From f8fe689a81f906d1b91bb3220acde2a4ecb14c5b Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 6 May 2024 05:01:46 +0200 Subject: Adding upstream version 6.0.4-dfsg. Signed-off-by: Daniel Baumann --- src/VBox/VMM/VMMR3/APIC.cpp | 1735 ++++ src/VBox/VMM/VMMR3/CFGM.cpp | 3282 +++++++ src/VBox/VMM/VMMR3/CPUM.cpp | 4228 +++++++++ src/VBox/VMM/VMMR3/CPUMDbg.cpp | 1524 +++ src/VBox/VMM/VMMR3/CPUMR3CpuId.cpp | 7471 +++++++++++++++ src/VBox/VMM/VMMR3/CPUMR3Db.cpp | 1123 +++ src/VBox/VMM/VMMR3/CSAM.cpp | 2998 ++++++ src/VBox/VMM/VMMR3/DBGF.cpp | 2119 +++++ src/VBox/VMM/VMMR3/DBGFAddr.cpp | 538 ++ src/VBox/VMM/VMMR3/DBGFAddrSpace.cpp | 1357 +++ src/VBox/VMM/VMMR3/DBGFBp.cpp | 1450 +++ src/VBox/VMM/VMMR3/DBGFCoreWrite.cpp | 664 ++ src/VBox/VMM/VMMR3/DBGFCpu.cpp | 163 + src/VBox/VMM/VMMR3/DBGFDisas.cpp | 872 ++ src/VBox/VMM/VMMR3/DBGFInfo.cpp | 1052 +++ src/VBox/VMM/VMMR3/DBGFLog.cpp | 186 + src/VBox/VMM/VMMR3/DBGFMem.cpp | 682 ++ src/VBox/VMM/VMMR3/DBGFModule.cpp | 290 + src/VBox/VMM/VMMR3/DBGFOS.cpp | 661 ++ src/VBox/VMM/VMMR3/DBGFR3BugCheck.cpp | 920 ++ src/VBox/VMM/VMMR3/DBGFR3Flow.cpp | 2266 +++++ src/VBox/VMM/VMMR3/DBGFR3ModInMem.cpp | 707 ++ src/VBox/VMM/VMMR3/DBGFR3PlugIn.cpp | 616 ++ src/VBox/VMM/VMMR3/DBGFR3Trace.cpp | 450 + src/VBox/VMM/VMMR3/DBGFR3Type.cpp | 1278 +++ src/VBox/VMM/VMMR3/DBGFReg.cpp | 2719 ++++++ src/VBox/VMM/VMMR3/DBGFStack.cpp | 1153 +++ src/VBox/VMM/VMMR3/EM.cpp | 3089 +++++++ src/VBox/VMM/VMMR3/EMHM.cpp | 510 ++ src/VBox/VMM/VMMR3/EMR3Dbg.cpp | 338 + src/VBox/VMM/VMMR3/EMR3Nem.cpp | 501 + src/VBox/VMM/VMMR3/EMRaw.cpp | 1518 +++ src/VBox/VMM/VMMR3/FTM.cpp | 1368 +++ src/VBox/VMM/VMMR3/GIM.cpp | 724 ++ src/VBox/VMM/VMMR3/GIMHv.cpp | 2318 +++++ src/VBox/VMM/VMMR3/GIMKvm.cpp | 535 ++ src/VBox/VMM/VMMR3/GIMMinimal.cpp | 131 + src/VBox/VMM/VMMR3/GMM.cpp | 451 + src/VBox/VMM/VMMR3/HM.cpp | 3322 +++++++ src/VBox/VMM/VMMR3/IEMR3.cpp | 214 + src/VBox/VMM/VMMR3/IOM.cpp | 2388 +++++ src/VBox/VMM/VMMR3/MM.cpp | 856 ++ src/VBox/VMM/VMMR3/MMHeap.cpp | 696 ++ src/VBox/VMM/VMMR3/MMHyper.cpp | 1509 +++ src/VBox/VMM/VMMR3/MMPagePool.cpp | 527 ++ src/VBox/VMM/VMMR3/MMUkHeap.cpp | 427 + src/VBox/VMM/VMMR3/Makefile.kup | 0 src/VBox/VMM/VMMR3/NEMR3.cpp | 508 + src/VBox/VMM/VMMR3/NEMR3Native-win.cpp | 2780 ++++++ src/VBox/VMM/VMMR3/PATM.cpp | 6887 ++++++++++++++ src/VBox/VMM/VMMR3/PATMA.asm | 2600 ++++++ src/VBox/VMM/VMMR3/PATMA.mac | 164 + src/VBox/VMM/VMMR3/PATMGuest.cpp | 247 + src/VBox/VMM/VMMR3/PATMPatch.cpp | 1627 ++++ src/VBox/VMM/VMMR3/PATMPatch.h | 156 + src/VBox/VMM/VMMR3/PATMR3Dbg.cpp | 404 + src/VBox/VMM/VMMR3/PATMSSM.cpp | 1549 ++++ src/VBox/VMM/VMMR3/PDM.cpp | 2972 ++++++ src/VBox/VMM/VMMR3/PDMAsyncCompletion.cpp | 1808 ++++ src/VBox/VMM/VMMR3/PDMAsyncCompletionFile.cpp | 1293 +++ .../VMM/VMMR3/PDMAsyncCompletionFileFailsafe.cpp | 268 + .../VMM/VMMR3/PDMAsyncCompletionFileNormal.cpp | 1732 ++++ src/VBox/VMM/VMMR3/PDMBlkCache.cpp | 2805 ++++++ src/VBox/VMM/VMMR3/PDMCritSect.cpp | 1078 +++ src/VBox/VMM/VMMR3/PDMDevHlp.cpp | 4080 +++++++++ src/VBox/VMM/VMMR3/PDMDevMiscHlp.cpp | 557 ++ src/VBox/VMM/VMMR3/PDMDevice.cpp | 1089 +++ src/VBox/VMM/VMMR3/PDMDriver.cpp | 1870 ++++ src/VBox/VMM/VMMR3/PDMLdr.cpp | 1735 ++++ src/VBox/VMM/VMMR3/PDMNetShaper.cpp | 554 ++ src/VBox/VMM/VMMR3/PDMQueue.cpp | 880 ++ src/VBox/VMM/VMMR3/PDMThread.cpp | 1090 +++ src/VBox/VMM/VMMR3/PDMUsb.cpp | 2005 ++++ src/VBox/VMM/VMMR3/PGM.cpp | 3013 ++++++ src/VBox/VMM/VMMR3/PGMDbg.cpp | 2856 ++++++ src/VBox/VMM/VMMR3/PGMHandler.cpp | 862 ++ src/VBox/VMM/VMMR3/PGMMap.cpp | 1470 +++ src/VBox/VMM/VMMR3/PGMPhys.cpp | 5498 +++++++++++ src/VBox/VMM/VMMR3/PGMPhysRWTmpl.h | 61 + src/VBox/VMM/VMMR3/PGMPool.cpp | 982 ++ src/VBox/VMM/VMMR3/PGMR3DbgA.asm | 475 + src/VBox/VMM/VMMR3/PGMSavedState.cpp | 3328 +++++++ src/VBox/VMM/VMMR3/PGMSharedPage.cpp | 442 + src/VBox/VMM/VMMR3/SELM.cpp | 2715 ++++++ src/VBox/VMM/VMMR3/SSM.cpp | 9683 ++++++++++++++++++++ src/VBox/VMM/VMMR3/STAM.cpp | 2916 ++++++ src/VBox/VMM/VMMR3/TM.cpp | 3713 ++++++++ src/VBox/VMM/VMMR3/TRPM.cpp | 1664 ++++ src/VBox/VMM/VMMR3/VM.cpp | 4705 ++++++++++ src/VBox/VMM/VMMR3/VMEmt.cpp | 1443 +++ src/VBox/VMM/VMMR3/VMM.cpp | 3233 +++++++ src/VBox/VMM/VMMR3/VMMGuruMeditation.cpp | 790 ++ src/VBox/VMM/VMMR3/VMMR3.def | 447 + src/VBox/VMM/VMMR3/VMMSwitcher.cpp | 1188 +++ src/VBox/VMM/VMMR3/VMMTests.cpp | 960 ++ src/VBox/VMM/VMMR3/VMReq.cpp | 1333 +++ src/VBox/VMM/VMMR3/cpus/AMD_Athlon_64_3200.h | 224 + .../VMMR3/cpus/AMD_Athlon_64_X2_Dual_Core_4200.h | 232 + src/VBox/VMM/VMMR3/cpus/AMD_FX_8150_Eight_Core.h | 383 + src/VBox/VMM/VMMR3/cpus/AMD_Phenom_II_X6_1100T.h | 272 + src/VBox/VMM/VMMR3/cpus/Intel_80186.h | 75 + src/VBox/VMM/VMMR3/cpus/Intel_80286.h | 75 + src/VBox/VMM/VMMR3/cpus/Intel_80386.h | 75 + src/VBox/VMM/VMMR3/cpus/Intel_80486.h | 73 + src/VBox/VMM/VMMR3/cpus/Intel_8086.h | 75 + src/VBox/VMM/VMMR3/cpus/Intel_Atom_330_1_60GHz.h | 210 + .../VMM/VMMR3/cpus/Intel_Core2_T7600_2_33GHz.h | 195 + .../VMM/VMMR3/cpus/Intel_Core2_X6800_2_93GHz.h | 260 + .../VMM/VMMR3/cpus/Intel_Core_Duo_T2600_2_16GHz.h | 225 + src/VBox/VMM/VMMR3/cpus/Intel_Core_i5_3570.h | 339 + src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_2635QM.h | 332 + src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_3820QM.h | 386 + src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_3960X.h | 369 + src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_5600U.h | 368 + src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_6700K.h | 510 ++ src/VBox/VMM/VMMR3/cpus/Intel_Pentium_4_3_00GHz.h | 277 + .../VMMR3/cpus/Intel_Pentium_M_processor_2_00GHz.h | 216 + .../VMM/VMMR3/cpus/Intel_Pentium_N3530_2_16GHz.h | 265 + src/VBox/VMM/VMMR3/cpus/Intel_Xeon_X5482_3_20GHz.h | 248 + src/VBox/VMM/VMMR3/cpus/Makefile.kup | 0 .../VMM/VMMR3/cpus/Quad_Core_AMD_Opteron_2384.h | 270 + .../VMM/VMMR3/cpus/VIA_QuadCore_L4700_1_2_GHz.h | 404 + .../VMMR3/cpus/ZHAOXIN_KaiXian_KX_U5581_1_8GHz.h | 417 + 123 files changed, 167216 insertions(+) create mode 100644 src/VBox/VMM/VMMR3/APIC.cpp create mode 100644 src/VBox/VMM/VMMR3/CFGM.cpp create mode 100644 src/VBox/VMM/VMMR3/CPUM.cpp create mode 100644 src/VBox/VMM/VMMR3/CPUMDbg.cpp create mode 100644 src/VBox/VMM/VMMR3/CPUMR3CpuId.cpp create mode 100644 src/VBox/VMM/VMMR3/CPUMR3Db.cpp create mode 100644 src/VBox/VMM/VMMR3/CSAM.cpp create mode 100644 src/VBox/VMM/VMMR3/DBGF.cpp create mode 100644 src/VBox/VMM/VMMR3/DBGFAddr.cpp create mode 100644 src/VBox/VMM/VMMR3/DBGFAddrSpace.cpp create mode 100644 src/VBox/VMM/VMMR3/DBGFBp.cpp create mode 100644 src/VBox/VMM/VMMR3/DBGFCoreWrite.cpp create mode 100644 src/VBox/VMM/VMMR3/DBGFCpu.cpp create mode 100644 src/VBox/VMM/VMMR3/DBGFDisas.cpp create mode 100644 src/VBox/VMM/VMMR3/DBGFInfo.cpp create mode 100644 src/VBox/VMM/VMMR3/DBGFLog.cpp create mode 100644 src/VBox/VMM/VMMR3/DBGFMem.cpp create mode 100644 src/VBox/VMM/VMMR3/DBGFModule.cpp create mode 100644 src/VBox/VMM/VMMR3/DBGFOS.cpp create mode 100644 src/VBox/VMM/VMMR3/DBGFR3BugCheck.cpp create mode 100644 src/VBox/VMM/VMMR3/DBGFR3Flow.cpp create mode 100644 src/VBox/VMM/VMMR3/DBGFR3ModInMem.cpp create mode 100644 src/VBox/VMM/VMMR3/DBGFR3PlugIn.cpp create mode 100644 src/VBox/VMM/VMMR3/DBGFR3Trace.cpp create mode 100644 src/VBox/VMM/VMMR3/DBGFR3Type.cpp create mode 100644 src/VBox/VMM/VMMR3/DBGFReg.cpp create mode 100644 src/VBox/VMM/VMMR3/DBGFStack.cpp create mode 100644 src/VBox/VMM/VMMR3/EM.cpp create mode 100644 src/VBox/VMM/VMMR3/EMHM.cpp create mode 100644 src/VBox/VMM/VMMR3/EMR3Dbg.cpp create mode 100644 src/VBox/VMM/VMMR3/EMR3Nem.cpp create mode 100644 src/VBox/VMM/VMMR3/EMRaw.cpp create mode 100644 src/VBox/VMM/VMMR3/FTM.cpp create mode 100644 src/VBox/VMM/VMMR3/GIM.cpp create mode 100644 src/VBox/VMM/VMMR3/GIMHv.cpp create mode 100644 src/VBox/VMM/VMMR3/GIMKvm.cpp create mode 100644 src/VBox/VMM/VMMR3/GIMMinimal.cpp create mode 100644 src/VBox/VMM/VMMR3/GMM.cpp create mode 100644 src/VBox/VMM/VMMR3/HM.cpp create mode 100644 src/VBox/VMM/VMMR3/IEMR3.cpp create mode 100644 src/VBox/VMM/VMMR3/IOM.cpp create mode 100644 src/VBox/VMM/VMMR3/MM.cpp create mode 100644 src/VBox/VMM/VMMR3/MMHeap.cpp create mode 100644 src/VBox/VMM/VMMR3/MMHyper.cpp create mode 100644 src/VBox/VMM/VMMR3/MMPagePool.cpp create mode 100644 src/VBox/VMM/VMMR3/MMUkHeap.cpp create mode 100644 src/VBox/VMM/VMMR3/Makefile.kup create mode 100644 src/VBox/VMM/VMMR3/NEMR3.cpp create mode 100644 src/VBox/VMM/VMMR3/NEMR3Native-win.cpp create mode 100644 src/VBox/VMM/VMMR3/PATM.cpp create mode 100644 src/VBox/VMM/VMMR3/PATMA.asm create mode 100644 src/VBox/VMM/VMMR3/PATMA.mac create mode 100644 src/VBox/VMM/VMMR3/PATMGuest.cpp create mode 100644 src/VBox/VMM/VMMR3/PATMPatch.cpp create mode 100644 src/VBox/VMM/VMMR3/PATMPatch.h create mode 100644 src/VBox/VMM/VMMR3/PATMR3Dbg.cpp create mode 100644 src/VBox/VMM/VMMR3/PATMSSM.cpp create mode 100644 src/VBox/VMM/VMMR3/PDM.cpp create mode 100644 src/VBox/VMM/VMMR3/PDMAsyncCompletion.cpp create mode 100644 src/VBox/VMM/VMMR3/PDMAsyncCompletionFile.cpp create mode 100644 src/VBox/VMM/VMMR3/PDMAsyncCompletionFileFailsafe.cpp create mode 100644 src/VBox/VMM/VMMR3/PDMAsyncCompletionFileNormal.cpp create mode 100644 src/VBox/VMM/VMMR3/PDMBlkCache.cpp create mode 100644 src/VBox/VMM/VMMR3/PDMCritSect.cpp create mode 100644 src/VBox/VMM/VMMR3/PDMDevHlp.cpp create mode 100644 src/VBox/VMM/VMMR3/PDMDevMiscHlp.cpp create mode 100644 src/VBox/VMM/VMMR3/PDMDevice.cpp create mode 100644 src/VBox/VMM/VMMR3/PDMDriver.cpp create mode 100644 src/VBox/VMM/VMMR3/PDMLdr.cpp create mode 100644 src/VBox/VMM/VMMR3/PDMNetShaper.cpp create mode 100644 src/VBox/VMM/VMMR3/PDMQueue.cpp create mode 100644 src/VBox/VMM/VMMR3/PDMThread.cpp create mode 100644 src/VBox/VMM/VMMR3/PDMUsb.cpp create mode 100644 src/VBox/VMM/VMMR3/PGM.cpp create mode 100644 src/VBox/VMM/VMMR3/PGMDbg.cpp create mode 100644 src/VBox/VMM/VMMR3/PGMHandler.cpp create mode 100644 src/VBox/VMM/VMMR3/PGMMap.cpp create mode 100644 src/VBox/VMM/VMMR3/PGMPhys.cpp create mode 100644 src/VBox/VMM/VMMR3/PGMPhysRWTmpl.h create mode 100644 src/VBox/VMM/VMMR3/PGMPool.cpp create mode 100644 src/VBox/VMM/VMMR3/PGMR3DbgA.asm create mode 100644 src/VBox/VMM/VMMR3/PGMSavedState.cpp create mode 100644 src/VBox/VMM/VMMR3/PGMSharedPage.cpp create mode 100644 src/VBox/VMM/VMMR3/SELM.cpp create mode 100644 src/VBox/VMM/VMMR3/SSM.cpp create mode 100644 src/VBox/VMM/VMMR3/STAM.cpp create mode 100644 src/VBox/VMM/VMMR3/TM.cpp create mode 100644 src/VBox/VMM/VMMR3/TRPM.cpp create mode 100644 src/VBox/VMM/VMMR3/VM.cpp create mode 100644 src/VBox/VMM/VMMR3/VMEmt.cpp create mode 100644 src/VBox/VMM/VMMR3/VMM.cpp create mode 100644 src/VBox/VMM/VMMR3/VMMGuruMeditation.cpp create mode 100644 src/VBox/VMM/VMMR3/VMMR3.def create mode 100644 src/VBox/VMM/VMMR3/VMMSwitcher.cpp create mode 100644 src/VBox/VMM/VMMR3/VMMTests.cpp create mode 100644 src/VBox/VMM/VMMR3/VMReq.cpp create mode 100644 src/VBox/VMM/VMMR3/cpus/AMD_Athlon_64_3200.h create mode 100644 src/VBox/VMM/VMMR3/cpus/AMD_Athlon_64_X2_Dual_Core_4200.h create mode 100644 src/VBox/VMM/VMMR3/cpus/AMD_FX_8150_Eight_Core.h create mode 100644 src/VBox/VMM/VMMR3/cpus/AMD_Phenom_II_X6_1100T.h create mode 100644 src/VBox/VMM/VMMR3/cpus/Intel_80186.h create mode 100644 src/VBox/VMM/VMMR3/cpus/Intel_80286.h create mode 100644 src/VBox/VMM/VMMR3/cpus/Intel_80386.h create mode 100644 src/VBox/VMM/VMMR3/cpus/Intel_80486.h create mode 100644 src/VBox/VMM/VMMR3/cpus/Intel_8086.h create mode 100644 src/VBox/VMM/VMMR3/cpus/Intel_Atom_330_1_60GHz.h create mode 100644 src/VBox/VMM/VMMR3/cpus/Intel_Core2_T7600_2_33GHz.h create mode 100644 src/VBox/VMM/VMMR3/cpus/Intel_Core2_X6800_2_93GHz.h create mode 100644 src/VBox/VMM/VMMR3/cpus/Intel_Core_Duo_T2600_2_16GHz.h create mode 100644 src/VBox/VMM/VMMR3/cpus/Intel_Core_i5_3570.h create mode 100644 src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_2635QM.h create mode 100644 src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_3820QM.h create mode 100644 src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_3960X.h create mode 100644 src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_5600U.h create mode 100644 src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_6700K.h create mode 100644 src/VBox/VMM/VMMR3/cpus/Intel_Pentium_4_3_00GHz.h create mode 100644 src/VBox/VMM/VMMR3/cpus/Intel_Pentium_M_processor_2_00GHz.h create mode 100644 src/VBox/VMM/VMMR3/cpus/Intel_Pentium_N3530_2_16GHz.h create mode 100644 src/VBox/VMM/VMMR3/cpus/Intel_Xeon_X5482_3_20GHz.h create mode 100644 src/VBox/VMM/VMMR3/cpus/Makefile.kup create mode 100644 src/VBox/VMM/VMMR3/cpus/Quad_Core_AMD_Opteron_2384.h create mode 100644 src/VBox/VMM/VMMR3/cpus/VIA_QuadCore_L4700_1_2_GHz.h create mode 100644 src/VBox/VMM/VMMR3/cpus/ZHAOXIN_KaiXian_KX_U5581_1_8GHz.h (limited to 'src/VBox/VMM/VMMR3') diff --git a/src/VBox/VMM/VMMR3/APIC.cpp b/src/VBox/VMM/VMMR3/APIC.cpp new file mode 100644 index 00000000..1395a4a0 --- /dev/null +++ b/src/VBox/VMM/VMMR3/APIC.cpp @@ -0,0 +1,1735 @@ +/* $Id: APIC.cpp $ */ +/** @file + * APIC - Advanced Programmable Interrupt Controller. + */ + +/* + * Copyright (C) 2016-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DEV_APIC +#include +#include "APICInternal.h" +#include +#include +#include +#include +#include +#include + + +#ifndef VBOX_DEVICE_STRUCT_TESTCASE + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** The current APIC saved state version. */ +#define APIC_SAVED_STATE_VERSION 5 +/** VirtualBox 5.1 beta2 - pre fActiveLintX. */ +#define APIC_SAVED_STATE_VERSION_VBOX_51_BETA2 4 +/** The saved state version used by VirtualBox 5.0 and + * earlier. */ +#define APIC_SAVED_STATE_VERSION_VBOX_50 3 +/** The saved state version used by VirtualBox v3 and earlier. + * This does not include the config. */ +#define APIC_SAVED_STATE_VERSION_VBOX_30 2 +/** Some ancient version... */ +#define APIC_SAVED_STATE_VERSION_ANCIENT 1 + +#ifdef VBOX_WITH_STATISTICS +# define X2APIC_MSRRANGE(a_uFirst, a_uLast, a_szName) \ + { (a_uFirst), (a_uLast), kCpumMsrRdFn_Ia32X2ApicN, kCpumMsrWrFn_Ia32X2ApicN, 0, 0, 0, 0, 0, a_szName, { 0 }, { 0 }, { 0 }, { 0 } } +# define X2APIC_MSRRANGE_INVALID(a_uFirst, a_uLast, a_szName) \ + { (a_uFirst), (a_uLast), kCpumMsrRdFn_WriteOnly, kCpumMsrWrFn_ReadOnly, 0, 0, 0, 0, UINT64_MAX /*fWrGpMask*/, a_szName, { 0 }, { 0 }, { 0 }, { 0 } } +#else +# define X2APIC_MSRRANGE(a_uFirst, a_uLast, a_szName) \ + { (a_uFirst), (a_uLast), kCpumMsrRdFn_Ia32X2ApicN, kCpumMsrWrFn_Ia32X2ApicN, 0, 0, 0, 0, 0, a_szName } +# define X2APIC_MSRRANGE_INVALID(a_uFirst, a_uLast, a_szName) \ + { (a_uFirst), (a_uLast), kCpumMsrRdFn_WriteOnly, kCpumMsrWrFn_ReadOnly, 0, 0, 0, 0, UINT64_MAX /*fWrGpMask*/, a_szName } +#endif + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +/** + * MSR range supported by the x2APIC. + * See Intel spec. 10.12.2 "x2APIC Register Availability". + */ +static CPUMMSRRANGE const g_MsrRange_x2Apic = X2APIC_MSRRANGE(MSR_IA32_X2APIC_START, MSR_IA32_X2APIC_END, "x2APIC range"); +static CPUMMSRRANGE const g_MsrRange_x2Apic_Invalid = X2APIC_MSRRANGE_INVALID(MSR_IA32_X2APIC_START, MSR_IA32_X2APIC_END, "x2APIC range invalid"); +#undef X2APIC_MSRRANGE +#undef X2APIC_MSRRANGE_GP + +/** Saved state field descriptors for XAPICPAGE. */ +static const SSMFIELD g_aXApicPageFields[] = +{ + SSMFIELD_ENTRY( XAPICPAGE, id.u8ApicId), + SSMFIELD_ENTRY( XAPICPAGE, version.all.u32Version), + SSMFIELD_ENTRY( XAPICPAGE, tpr.u8Tpr), + SSMFIELD_ENTRY( XAPICPAGE, apr.u8Apr), + SSMFIELD_ENTRY( XAPICPAGE, ppr.u8Ppr), + SSMFIELD_ENTRY( XAPICPAGE, ldr.all.u32Ldr), + SSMFIELD_ENTRY( XAPICPAGE, dfr.all.u32Dfr), + SSMFIELD_ENTRY( XAPICPAGE, svr.all.u32Svr), + SSMFIELD_ENTRY( XAPICPAGE, isr.u[0].u32Reg), + SSMFIELD_ENTRY( XAPICPAGE, isr.u[1].u32Reg), + SSMFIELD_ENTRY( XAPICPAGE, isr.u[2].u32Reg), + SSMFIELD_ENTRY( XAPICPAGE, isr.u[3].u32Reg), + SSMFIELD_ENTRY( XAPICPAGE, isr.u[4].u32Reg), + SSMFIELD_ENTRY( XAPICPAGE, isr.u[5].u32Reg), + SSMFIELD_ENTRY( XAPICPAGE, isr.u[6].u32Reg), + SSMFIELD_ENTRY( XAPICPAGE, isr.u[7].u32Reg), + SSMFIELD_ENTRY( XAPICPAGE, tmr.u[0].u32Reg), + SSMFIELD_ENTRY( XAPICPAGE, tmr.u[1].u32Reg), + SSMFIELD_ENTRY( XAPICPAGE, tmr.u[2].u32Reg), + SSMFIELD_ENTRY( XAPICPAGE, tmr.u[3].u32Reg), + SSMFIELD_ENTRY( XAPICPAGE, tmr.u[4].u32Reg), + SSMFIELD_ENTRY( XAPICPAGE, tmr.u[5].u32Reg), + SSMFIELD_ENTRY( XAPICPAGE, tmr.u[6].u32Reg), + SSMFIELD_ENTRY( XAPICPAGE, tmr.u[7].u32Reg), + SSMFIELD_ENTRY( XAPICPAGE, irr.u[0].u32Reg), + SSMFIELD_ENTRY( XAPICPAGE, irr.u[1].u32Reg), + SSMFIELD_ENTRY( XAPICPAGE, irr.u[2].u32Reg), + SSMFIELD_ENTRY( XAPICPAGE, irr.u[3].u32Reg), + SSMFIELD_ENTRY( XAPICPAGE, irr.u[4].u32Reg), + SSMFIELD_ENTRY( XAPICPAGE, irr.u[5].u32Reg), + SSMFIELD_ENTRY( XAPICPAGE, irr.u[6].u32Reg), + SSMFIELD_ENTRY( XAPICPAGE, irr.u[7].u32Reg), + SSMFIELD_ENTRY( XAPICPAGE, esr.all.u32Errors), + SSMFIELD_ENTRY( XAPICPAGE, icr_lo.all.u32IcrLo), + SSMFIELD_ENTRY( XAPICPAGE, icr_hi.all.u32IcrHi), + SSMFIELD_ENTRY( XAPICPAGE, lvt_timer.all.u32LvtTimer), + SSMFIELD_ENTRY( XAPICPAGE, lvt_thermal.all.u32LvtThermal), + SSMFIELD_ENTRY( XAPICPAGE, lvt_perf.all.u32LvtPerf), + SSMFIELD_ENTRY( XAPICPAGE, lvt_lint0.all.u32LvtLint0), + SSMFIELD_ENTRY( XAPICPAGE, lvt_lint1.all.u32LvtLint1), + SSMFIELD_ENTRY( XAPICPAGE, lvt_error.all.u32LvtError), + SSMFIELD_ENTRY( XAPICPAGE, timer_icr.u32InitialCount), + SSMFIELD_ENTRY( XAPICPAGE, timer_ccr.u32CurrentCount), + SSMFIELD_ENTRY( XAPICPAGE, timer_dcr.all.u32DivideValue), + SSMFIELD_ENTRY_TERM() +}; + +/** Saved state field descriptors for X2APICPAGE. */ +static const SSMFIELD g_aX2ApicPageFields[] = +{ + SSMFIELD_ENTRY(X2APICPAGE, id.u32ApicId), + SSMFIELD_ENTRY(X2APICPAGE, version.all.u32Version), + SSMFIELD_ENTRY(X2APICPAGE, tpr.u8Tpr), + SSMFIELD_ENTRY(X2APICPAGE, ppr.u8Ppr), + SSMFIELD_ENTRY(X2APICPAGE, ldr.u32LogicalApicId), + SSMFIELD_ENTRY(X2APICPAGE, svr.all.u32Svr), + SSMFIELD_ENTRY(X2APICPAGE, isr.u[0].u32Reg), + SSMFIELD_ENTRY(X2APICPAGE, isr.u[1].u32Reg), + SSMFIELD_ENTRY(X2APICPAGE, isr.u[2].u32Reg), + SSMFIELD_ENTRY(X2APICPAGE, isr.u[3].u32Reg), + SSMFIELD_ENTRY(X2APICPAGE, isr.u[4].u32Reg), + SSMFIELD_ENTRY(X2APICPAGE, isr.u[5].u32Reg), + SSMFIELD_ENTRY(X2APICPAGE, isr.u[6].u32Reg), + SSMFIELD_ENTRY(X2APICPAGE, isr.u[7].u32Reg), + SSMFIELD_ENTRY(X2APICPAGE, tmr.u[0].u32Reg), + SSMFIELD_ENTRY(X2APICPAGE, tmr.u[1].u32Reg), + SSMFIELD_ENTRY(X2APICPAGE, tmr.u[2].u32Reg), + SSMFIELD_ENTRY(X2APICPAGE, tmr.u[3].u32Reg), + SSMFIELD_ENTRY(X2APICPAGE, tmr.u[4].u32Reg), + SSMFIELD_ENTRY(X2APICPAGE, tmr.u[5].u32Reg), + SSMFIELD_ENTRY(X2APICPAGE, tmr.u[6].u32Reg), + SSMFIELD_ENTRY(X2APICPAGE, tmr.u[7].u32Reg), + SSMFIELD_ENTRY(X2APICPAGE, irr.u[0].u32Reg), + SSMFIELD_ENTRY(X2APICPAGE, irr.u[1].u32Reg), + SSMFIELD_ENTRY(X2APICPAGE, irr.u[2].u32Reg), + SSMFIELD_ENTRY(X2APICPAGE, irr.u[3].u32Reg), + SSMFIELD_ENTRY(X2APICPAGE, irr.u[4].u32Reg), + SSMFIELD_ENTRY(X2APICPAGE, irr.u[5].u32Reg), + SSMFIELD_ENTRY(X2APICPAGE, irr.u[6].u32Reg), + SSMFIELD_ENTRY(X2APICPAGE, irr.u[7].u32Reg), + SSMFIELD_ENTRY(X2APICPAGE, esr.all.u32Errors), + SSMFIELD_ENTRY(X2APICPAGE, icr_lo.all.u32IcrLo), + SSMFIELD_ENTRY(X2APICPAGE, icr_hi.u32IcrHi), + SSMFIELD_ENTRY(X2APICPAGE, lvt_timer.all.u32LvtTimer), + SSMFIELD_ENTRY(X2APICPAGE, lvt_thermal.all.u32LvtThermal), + SSMFIELD_ENTRY(X2APICPAGE, lvt_perf.all.u32LvtPerf), + SSMFIELD_ENTRY(X2APICPAGE, lvt_lint0.all.u32LvtLint0), + SSMFIELD_ENTRY(X2APICPAGE, lvt_lint1.all.u32LvtLint1), + SSMFIELD_ENTRY(X2APICPAGE, lvt_error.all.u32LvtError), + SSMFIELD_ENTRY(X2APICPAGE, timer_icr.u32InitialCount), + SSMFIELD_ENTRY(X2APICPAGE, timer_ccr.u32CurrentCount), + SSMFIELD_ENTRY(X2APICPAGE, timer_dcr.all.u32DivideValue), + SSMFIELD_ENTRY_TERM() +}; + + +/** + * Sets the CPUID feature bits for the APIC mode. + * + * @param pVM The cross context VM structure. + * @param enmMode The APIC mode. + */ +static void apicR3SetCpuIdFeatureLevel(PVM pVM, PDMAPICMODE enmMode) +{ + switch (enmMode) + { + case PDMAPICMODE_NONE: + CPUMR3ClearGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_X2APIC); + CPUMR3ClearGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_APIC); + break; + + case PDMAPICMODE_APIC: + CPUMR3ClearGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_X2APIC); + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_APIC); + break; + + case PDMAPICMODE_X2APIC: + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_APIC); + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_X2APIC); + break; + + default: + AssertMsgFailed(("Unknown/invalid APIC mode: %d\n", (int)enmMode)); + } +} + + +/** + * Receives an INIT IPI. + * + * @param pVCpu The cross context virtual CPU structure. + */ +VMMR3_INT_DECL(void) APICR3InitIpi(PVMCPU pVCpu) +{ + VMCPU_ASSERT_EMT(pVCpu); + LogFlow(("APIC%u: APICR3InitIpi\n", pVCpu->idCpu)); + apicInitIpi(pVCpu); +} + + +/** + * Sets whether Hyper-V compatibility mode (MSR interface) is enabled or not. + * + * This mode is a hybrid of xAPIC and x2APIC modes, some caveats: + * 1. MSRs are used even ones that are missing (illegal) in x2APIC like DFR. + * 2. A single ICR is used by the guest to send IPIs rather than 2 ICR writes. + * 3. It is unclear what the behaviour will be when invalid bits are set, + * currently we follow x2APIC behaviour of causing a \#GP. + * + * @param pVM The cross context VM structure. + * @param fHyperVCompatMode Whether the compatibility mode is enabled. + */ +VMMR3_INT_DECL(void) APICR3HvSetCompatMode(PVM pVM, bool fHyperVCompatMode) +{ + Assert(pVM); + PAPIC pApic = VM_TO_APIC(pVM); + pApic->fHyperVCompatMode = fHyperVCompatMode; + + if (fHyperVCompatMode) + LogRel(("APIC: Enabling Hyper-V x2APIC compatibility mode\n")); + + int rc = CPUMR3MsrRangesInsert(pVM, &g_MsrRange_x2Apic); + AssertLogRelRC(rc); +} + + +/** + * Helper for dumping an APIC 256-bit sparse register. + * + * @param pApicReg The APIC 256-bit spare register. + * @param pHlp The debug output helper. + */ +static void apicR3DbgInfo256BitReg(volatile const XAPIC256BITREG *pApicReg, PCDBGFINFOHLP pHlp) +{ + ssize_t const cFragments = RT_ELEMENTS(pApicReg->u); + unsigned const cBitsPerFragment = sizeof(pApicReg->u[0].u32Reg) * 8; + XAPIC256BITREG ApicReg; + RT_ZERO(ApicReg); + + pHlp->pfnPrintf(pHlp, " "); + for (ssize_t i = cFragments - 1; i >= 0; i--) + { + uint32_t const uFragment = pApicReg->u[i].u32Reg; + ApicReg.u[i].u32Reg = uFragment; + pHlp->pfnPrintf(pHlp, "%08x", uFragment); + } + pHlp->pfnPrintf(pHlp, "\n"); + + uint32_t cPending = 0; + pHlp->pfnPrintf(pHlp, " Pending:"); + for (ssize_t i = cFragments - 1; i >= 0; i--) + { + uint32_t uFragment = ApicReg.u[i].u32Reg; + if (uFragment) + { + do + { + unsigned idxSetBit = ASMBitLastSetU32(uFragment); + --idxSetBit; + ASMBitClear(&uFragment, idxSetBit); + + idxSetBit += (i * cBitsPerFragment); + pHlp->pfnPrintf(pHlp, " %#02x", idxSetBit); + ++cPending; + } while (uFragment); + } + } + if (!cPending) + pHlp->pfnPrintf(pHlp, " None"); + pHlp->pfnPrintf(pHlp, "\n"); +} + + +/** + * Helper for dumping an APIC pending-interrupt bitmap. + * + * @param pApicPib The pending-interrupt bitmap. + * @param pHlp The debug output helper. + */ +static void apicR3DbgInfoPib(PCAPICPIB pApicPib, PCDBGFINFOHLP pHlp) +{ + /* Copy the pending-interrupt bitmap as an APIC 256-bit sparse register. */ + XAPIC256BITREG ApicReg; + RT_ZERO(ApicReg); + ssize_t const cFragmentsDst = RT_ELEMENTS(ApicReg.u); + ssize_t const cFragmentsSrc = RT_ELEMENTS(pApicPib->au64VectorBitmap); + AssertCompile(RT_ELEMENTS(ApicReg.u) == 2 * RT_ELEMENTS(pApicPib->au64VectorBitmap)); + for (ssize_t idxPib = cFragmentsSrc - 1, idxReg = cFragmentsDst - 1; idxPib >= 0; idxPib--, idxReg -= 2) + { + uint64_t const uFragment = pApicPib->au64VectorBitmap[idxPib]; + uint32_t const uFragmentLo = RT_LO_U32(uFragment); + uint32_t const uFragmentHi = RT_HI_U32(uFragment); + ApicReg.u[idxReg].u32Reg = uFragmentHi; + ApicReg.u[idxReg - 1].u32Reg = uFragmentLo; + } + + /* Dump it. */ + apicR3DbgInfo256BitReg(&ApicReg, pHlp); +} + + +/** + * Dumps basic APIC state. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helpers. + * @param pszArgs Arguments, ignored. + */ +static DECLCALLBACK(void) apicR3Info(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + NOREF(pszArgs); + PVMCPU pVCpu = VMMGetCpu(pVM); + if (!pVCpu) + pVCpu = &pVM->aCpus[0]; + + PCAPICCPU pApicCpu = VMCPU_TO_APICCPU(pVCpu); + PCXAPICPAGE pXApicPage = VMCPU_TO_CXAPICPAGE(pVCpu); + PCX2APICPAGE pX2ApicPage = VMCPU_TO_CX2APICPAGE(pVCpu); + + uint64_t const uBaseMsr = pApicCpu->uApicBaseMsr; + APICMODE const enmMode = apicGetMode(uBaseMsr); + bool const fX2ApicMode = XAPIC_IN_X2APIC_MODE(pVCpu); + + pHlp->pfnPrintf(pHlp, "APIC%u:\n", pVCpu->idCpu); + pHlp->pfnPrintf(pHlp, " APIC Base MSR = %#RX64 (Addr=%#RX64)\n", uBaseMsr, + MSR_IA32_APICBASE_GET_ADDR(uBaseMsr)); + pHlp->pfnPrintf(pHlp, " Mode = %u (%s)\n", enmMode, apicGetModeName(enmMode)); + if (fX2ApicMode) + { + pHlp->pfnPrintf(pHlp, " APIC ID = %u (%#x)\n", pX2ApicPage->id.u32ApicId, + pX2ApicPage->id.u32ApicId); + } + else + pHlp->pfnPrintf(pHlp, " APIC ID = %u (%#x)\n", pXApicPage->id.u8ApicId, pXApicPage->id.u8ApicId); + pHlp->pfnPrintf(pHlp, " Version = %#x\n", pXApicPage->version.all.u32Version); + pHlp->pfnPrintf(pHlp, " APIC Version = %#x\n", pXApicPage->version.u.u8Version); + pHlp->pfnPrintf(pHlp, " Max LVT entry index (0..N) = %u\n", pXApicPage->version.u.u8MaxLvtEntry); + pHlp->pfnPrintf(pHlp, " EOI Broadcast supression = %RTbool\n", pXApicPage->version.u.fEoiBroadcastSupression); + if (!fX2ApicMode) + pHlp->pfnPrintf(pHlp, " APR = %u (%#x)\n", pXApicPage->apr.u8Apr, pXApicPage->apr.u8Apr); + pHlp->pfnPrintf(pHlp, " TPR = %u (%#x)\n", pXApicPage->tpr.u8Tpr, pXApicPage->tpr.u8Tpr); + pHlp->pfnPrintf(pHlp, " Task-priority class = %#x\n", XAPIC_TPR_GET_TP(pXApicPage->tpr.u8Tpr) >> 4); + pHlp->pfnPrintf(pHlp, " Task-priority subclass = %#x\n", XAPIC_TPR_GET_TP_SUBCLASS(pXApicPage->tpr.u8Tpr)); + pHlp->pfnPrintf(pHlp, " PPR = %u (%#x)\n", pXApicPage->ppr.u8Ppr, pXApicPage->ppr.u8Ppr); + pHlp->pfnPrintf(pHlp, " Processor-priority class = %#x\n", XAPIC_PPR_GET_PP(pXApicPage->ppr.u8Ppr) >> 4); + pHlp->pfnPrintf(pHlp, " Processor-priority subclass = %#x\n", XAPIC_PPR_GET_PP_SUBCLASS(pXApicPage->ppr.u8Ppr)); + if (!fX2ApicMode) + pHlp->pfnPrintf(pHlp, " RRD = %u (%#x)\n", pXApicPage->rrd.u32Rrd, pXApicPage->rrd.u32Rrd); + pHlp->pfnPrintf(pHlp, " LDR = %#x\n", pXApicPage->ldr.all.u32Ldr); + pHlp->pfnPrintf(pHlp, " Logical APIC ID = %#x\n", fX2ApicMode ? pX2ApicPage->ldr.u32LogicalApicId + : pXApicPage->ldr.u.u8LogicalApicId); + if (!fX2ApicMode) + { + pHlp->pfnPrintf(pHlp, " DFR = %#x\n", pXApicPage->dfr.all.u32Dfr); + pHlp->pfnPrintf(pHlp, " Model = %#x (%s)\n", pXApicPage->dfr.u.u4Model, + apicGetDestFormatName((XAPICDESTFORMAT)pXApicPage->dfr.u.u4Model)); + } + pHlp->pfnPrintf(pHlp, " SVR = %#x\n", pXApicPage->svr.all.u32Svr); + pHlp->pfnPrintf(pHlp, " Vector = %u (%#x)\n", pXApicPage->svr.u.u8SpuriousVector, + pXApicPage->svr.u.u8SpuriousVector); + pHlp->pfnPrintf(pHlp, " Software Enabled = %RTbool\n", RT_BOOL(pXApicPage->svr.u.fApicSoftwareEnable)); + pHlp->pfnPrintf(pHlp, " Supress EOI broadcast = %RTbool\n", RT_BOOL(pXApicPage->svr.u.fSupressEoiBroadcast)); + pHlp->pfnPrintf(pHlp, " ISR\n"); + apicR3DbgInfo256BitReg(&pXApicPage->isr, pHlp); + pHlp->pfnPrintf(pHlp, " TMR\n"); + apicR3DbgInfo256BitReg(&pXApicPage->tmr, pHlp); + pHlp->pfnPrintf(pHlp, " IRR\n"); + apicR3DbgInfo256BitReg(&pXApicPage->irr, pHlp); + pHlp->pfnPrintf(pHlp, " PIB\n"); + apicR3DbgInfoPib((PCAPICPIB)pApicCpu->pvApicPibR3, pHlp); + pHlp->pfnPrintf(pHlp, " Level PIB\n"); + apicR3DbgInfoPib(&pApicCpu->ApicPibLevel, pHlp); + pHlp->pfnPrintf(pHlp, " ESR Internal = %#x\n", pApicCpu->uEsrInternal); + pHlp->pfnPrintf(pHlp, " ESR = %#x\n", pXApicPage->esr.all.u32Errors); + pHlp->pfnPrintf(pHlp, " Redirectable IPI = %RTbool\n", pXApicPage->esr.u.fRedirectableIpi); + pHlp->pfnPrintf(pHlp, " Send Illegal Vector = %RTbool\n", pXApicPage->esr.u.fSendIllegalVector); + pHlp->pfnPrintf(pHlp, " Recv Illegal Vector = %RTbool\n", pXApicPage->esr.u.fRcvdIllegalVector); + pHlp->pfnPrintf(pHlp, " Illegal Register Address = %RTbool\n", pXApicPage->esr.u.fIllegalRegAddr); + pHlp->pfnPrintf(pHlp, " ICR Low = %#x\n", pXApicPage->icr_lo.all.u32IcrLo); + pHlp->pfnPrintf(pHlp, " Vector = %u (%#x)\n", pXApicPage->icr_lo.u.u8Vector, + pXApicPage->icr_lo.u.u8Vector); + pHlp->pfnPrintf(pHlp, " Delivery Mode = %#x (%s)\n", pXApicPage->icr_lo.u.u3DeliveryMode, + apicGetDeliveryModeName((XAPICDELIVERYMODE)pXApicPage->icr_lo.u.u3DeliveryMode)); + pHlp->pfnPrintf(pHlp, " Destination Mode = %#x (%s)\n", pXApicPage->icr_lo.u.u1DestMode, + apicGetDestModeName((XAPICDESTMODE)pXApicPage->icr_lo.u.u1DestMode)); + if (!fX2ApicMode) + pHlp->pfnPrintf(pHlp, " Delivery Status = %u\n", pXApicPage->icr_lo.u.u1DeliveryStatus); + pHlp->pfnPrintf(pHlp, " Level = %u\n", pXApicPage->icr_lo.u.u1Level); + pHlp->pfnPrintf(pHlp, " Trigger Mode = %u (%s)\n", pXApicPage->icr_lo.u.u1TriggerMode, + apicGetTriggerModeName((XAPICTRIGGERMODE)pXApicPage->icr_lo.u.u1TriggerMode)); + pHlp->pfnPrintf(pHlp, " Destination shorthand = %#x (%s)\n", pXApicPage->icr_lo.u.u2DestShorthand, + apicGetDestShorthandName((XAPICDESTSHORTHAND)pXApicPage->icr_lo.u.u2DestShorthand)); + pHlp->pfnPrintf(pHlp, " ICR High = %#x\n", pXApicPage->icr_hi.all.u32IcrHi); + pHlp->pfnPrintf(pHlp, " Destination field/mask = %#x\n", fX2ApicMode ? pX2ApicPage->icr_hi.u32IcrHi + : pXApicPage->icr_hi.u.u8Dest); +} + + +/** + * Helper for dumping the LVT timer. + * + * @param pVCpu The cross context virtual CPU structure. + * @param pHlp The debug output helper. + */ +static void apicR3InfoLvtTimer(PVMCPU pVCpu, PCDBGFINFOHLP pHlp) +{ + PCXAPICPAGE pXApicPage = VMCPU_TO_CXAPICPAGE(pVCpu); + uint32_t const uLvtTimer = pXApicPage->lvt_timer.all.u32LvtTimer; + pHlp->pfnPrintf(pHlp, "LVT Timer = %#RX32\n", uLvtTimer); + pHlp->pfnPrintf(pHlp, " Vector = %u (%#x)\n", pXApicPage->lvt_timer.u.u8Vector, pXApicPage->lvt_timer.u.u8Vector); + pHlp->pfnPrintf(pHlp, " Delivery status = %u\n", pXApicPage->lvt_timer.u.u1DeliveryStatus); + pHlp->pfnPrintf(pHlp, " Masked = %RTbool\n", XAPIC_LVT_IS_MASKED(uLvtTimer)); + pHlp->pfnPrintf(pHlp, " Timer Mode = %#x (%s)\n", pXApicPage->lvt_timer.u.u2TimerMode, + apicGetTimerModeName((XAPICTIMERMODE)pXApicPage->lvt_timer.u.u2TimerMode)); +} + + +/** + * Dumps APIC Local Vector Table (LVT) information. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helpers. + * @param pszArgs Arguments, ignored. + */ +static DECLCALLBACK(void) apicR3InfoLvt(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + NOREF(pszArgs); + PVMCPU pVCpu = VMMGetCpu(pVM); + if (!pVCpu) + pVCpu = &pVM->aCpus[0]; + + PCXAPICPAGE pXApicPage = VMCPU_TO_CXAPICPAGE(pVCpu); + + /* + * Delivery modes available in the LVT entries. They're different (more reserved stuff) from the + * ICR delivery modes and hence we don't use apicGetDeliveryMode but mostly because we want small, + * fixed-length strings to fit our formatting needs here. + */ + static const char * const s_apszLvtDeliveryModes[] = + { + "Fixed ", + "Rsvd ", + "SMI ", + "Rsvd ", + "NMI ", + "INIT ", + "Rsvd ", + "ExtINT" + }; + /* Delivery Status. */ + static const char * const s_apszLvtDeliveryStatus[] = + { + "Idle", + "Pend" + }; + const char *pszNotApplicable = ""; + + pHlp->pfnPrintf(pHlp, "VCPU[%u] APIC Local Vector Table (LVT):\n", pVCpu->idCpu); + pHlp->pfnPrintf(pHlp, "lvt timermode mask trigger rirr polarity dlvr_st dlvr_mode vector\n"); + /* Timer. */ + { + /* Timer modes. */ + static const char * const s_apszLvtTimerModes[] = + { + "One-shot ", + "Periodic ", + "TSC-dline" + }; + const uint32_t uLvtTimer = pXApicPage->lvt_timer.all.u32LvtTimer; + const XAPICTIMERMODE enmTimerMode = XAPIC_LVT_GET_TIMER_MODE(uLvtTimer); + const char *pszTimerMode = s_apszLvtTimerModes[enmTimerMode]; + const uint8_t uMask = XAPIC_LVT_IS_MASKED(uLvtTimer); + const uint8_t uDeliveryStatus = uLvtTimer & XAPIC_LVT_DELIVERY_STATUS; + const char *pszDeliveryStatus = s_apszLvtDeliveryStatus[uDeliveryStatus]; + const uint8_t uVector = XAPIC_LVT_GET_VECTOR(uLvtTimer); + + pHlp->pfnPrintf(pHlp, "%-7s %9s %u %5s %1s %8s %4s %6s %3u (%#x)\n", + "Timer", + pszTimerMode, + uMask, + pszNotApplicable, /* TriggerMode */ + pszNotApplicable, /* Remote IRR */ + pszNotApplicable, /* Polarity */ + pszDeliveryStatus, + pszNotApplicable, /* Delivery Mode */ + uVector, + uVector); + } + +#if XAPIC_HARDWARE_VERSION == XAPIC_HARDWARE_VERSION_P4 + /* Thermal sensor. */ + { + uint32_t const uLvtThermal = pXApicPage->lvt_thermal.all.u32LvtThermal; + const uint8_t uMask = XAPIC_LVT_IS_MASKED(uLvtThermal); + const uint8_t uDeliveryStatus = uLvtThermal & XAPIC_LVT_DELIVERY_STATUS; + const char *pszDeliveryStatus = s_apszLvtDeliveryStatus[uDeliveryStatus]; + const XAPICDELIVERYMODE enmDeliveryMode = XAPIC_LVT_GET_DELIVERY_MODE(uLvtThermal); + const char *pszDeliveryMode = s_apszLvtDeliveryModes[enmDeliveryMode]; + const uint8_t uVector = XAPIC_LVT_GET_VECTOR(uLvtThermal); + + pHlp->pfnPrintf(pHlp, "%-7s %9s %u %5s %1s %8s %4s %6s %3u (%#x)\n", + "Thermal", + pszNotApplicable, /* Timer mode */ + uMask, + pszNotApplicable, /* TriggerMode */ + pszNotApplicable, /* Remote IRR */ + pszNotApplicable, /* Polarity */ + pszDeliveryStatus, + pszDeliveryMode, + uVector, + uVector); + } +#endif + + /* Performance Monitor Counters. */ + { + uint32_t const uLvtPerf = pXApicPage->lvt_thermal.all.u32LvtThermal; + const uint8_t uMask = XAPIC_LVT_IS_MASKED(uLvtPerf); + const uint8_t uDeliveryStatus = uLvtPerf & XAPIC_LVT_DELIVERY_STATUS; + const char *pszDeliveryStatus = s_apszLvtDeliveryStatus[uDeliveryStatus]; + const XAPICDELIVERYMODE enmDeliveryMode = XAPIC_LVT_GET_DELIVERY_MODE(uLvtPerf); + const char *pszDeliveryMode = s_apszLvtDeliveryModes[enmDeliveryMode]; + const uint8_t uVector = XAPIC_LVT_GET_VECTOR(uLvtPerf); + + pHlp->pfnPrintf(pHlp, "%-7s %9s %u %5s %1s %8s %4s %6s %3u (%#x)\n", + "Perf", + pszNotApplicable, /* Timer mode */ + uMask, + pszNotApplicable, /* TriggerMode */ + pszNotApplicable, /* Remote IRR */ + pszNotApplicable, /* Polarity */ + pszDeliveryStatus, + pszDeliveryMode, + uVector, + uVector); + } + + /* LINT0, LINT1. */ + { + /* LINTx name. */ + static const char * const s_apszLvtLint[] = + { + "LINT0", + "LINT1" + }; + /* Trigger mode. */ + static const char * const s_apszLvtTriggerModes[] = + { + "Edge ", + "Level" + }; + /* Polarity. */ + static const char * const s_apszLvtPolarity[] = + { + "ActiveHi", + "ActiveLo" + }; + + uint32_t aLvtLint[2]; + aLvtLint[0] = pXApicPage->lvt_lint0.all.u32LvtLint0; + aLvtLint[1] = pXApicPage->lvt_lint1.all.u32LvtLint1; + for (size_t i = 0; i < RT_ELEMENTS(aLvtLint); i++) + { + uint32_t const uLvtLint = aLvtLint[i]; + const char *pszLint = s_apszLvtLint[i]; + const uint8_t uMask = XAPIC_LVT_IS_MASKED(uLvtLint); + const XAPICTRIGGERMODE enmTriggerMode = XAPIC_LVT_GET_TRIGGER_MODE(uLvtLint); + const char *pszTriggerMode = s_apszLvtTriggerModes[enmTriggerMode]; + const uint8_t uRemoteIrr = XAPIC_LVT_GET_REMOTE_IRR(uLvtLint); + const uint8_t uPolarity = XAPIC_LVT_GET_POLARITY(uLvtLint); + const char *pszPolarity = s_apszLvtPolarity[uPolarity]; + const uint8_t uDeliveryStatus = uLvtLint & XAPIC_LVT_DELIVERY_STATUS; + const char *pszDeliveryStatus = s_apszLvtDeliveryStatus[uDeliveryStatus]; + const XAPICDELIVERYMODE enmDeliveryMode = XAPIC_LVT_GET_DELIVERY_MODE(uLvtLint); + const char *pszDeliveryMode = s_apszLvtDeliveryModes[enmDeliveryMode]; + const uint8_t uVector = XAPIC_LVT_GET_VECTOR(uLvtLint); + + pHlp->pfnPrintf(pHlp, "%-7s %9s %u %5s %u %8s %4s %6s %3u (%#x)\n", + pszLint, + pszNotApplicable, /* Timer mode */ + uMask, + pszTriggerMode, + uRemoteIrr, + pszPolarity, + pszDeliveryStatus, + pszDeliveryMode, + uVector, + uVector); + } + } + + /* Error. */ + { + uint32_t const uLvtError = pXApicPage->lvt_thermal.all.u32LvtThermal; + const uint8_t uMask = XAPIC_LVT_IS_MASKED(uLvtError); + const uint8_t uDeliveryStatus = uLvtError & XAPIC_LVT_DELIVERY_STATUS; + const char *pszDeliveryStatus = s_apszLvtDeliveryStatus[uDeliveryStatus]; + const XAPICDELIVERYMODE enmDeliveryMode = XAPIC_LVT_GET_DELIVERY_MODE(uLvtError); + const char *pszDeliveryMode = s_apszLvtDeliveryModes[enmDeliveryMode]; + const uint8_t uVector = XAPIC_LVT_GET_VECTOR(uLvtError); + + pHlp->pfnPrintf(pHlp, "%-7s %9s %u %5s %1s %8s %4s %6s %3u (%#x)\n", + "Error", + pszNotApplicable, /* Timer mode */ + uMask, + pszNotApplicable, /* TriggerMode */ + pszNotApplicable, /* Remote IRR */ + pszNotApplicable, /* Polarity */ + pszDeliveryStatus, + pszDeliveryMode, + uVector, + uVector); + } +} + + +/** + * Dumps the APIC timer information. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helpers. + * @param pszArgs Arguments, ignored. + */ +static DECLCALLBACK(void) apicR3InfoTimer(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + NOREF(pszArgs); + PVMCPU pVCpu = VMMGetCpu(pVM); + if (!pVCpu) + pVCpu = &pVM->aCpus[0]; + + PCXAPICPAGE pXApicPage = VMCPU_TO_CXAPICPAGE(pVCpu); + PCAPICCPU pApicCpu = VMCPU_TO_APICCPU(pVCpu); + + pHlp->pfnPrintf(pHlp, "VCPU[%u] Local APIC timer:\n", pVCpu->idCpu); + pHlp->pfnPrintf(pHlp, " ICR = %#RX32\n", pXApicPage->timer_icr.u32InitialCount); + pHlp->pfnPrintf(pHlp, " CCR = %#RX32\n", pXApicPage->timer_ccr.u32CurrentCount); + pHlp->pfnPrintf(pHlp, " DCR = %#RX32\n", pXApicPage->timer_dcr.all.u32DivideValue); + pHlp->pfnPrintf(pHlp, " Timer shift = %#x\n", apicGetTimerShift(pXApicPage)); + pHlp->pfnPrintf(pHlp, " Timer initial TS = %#RU64\n", pApicCpu->u64TimerInitial); + apicR3InfoLvtTimer(pVCpu, pHlp); +} + + +#ifdef APIC_FUZZY_SSM_COMPAT_TEST + +/** + * Reads a 32-bit register at a specified offset. + * + * @returns The value at the specified offset. + * @param pXApicPage The xAPIC page. + * @param offReg The offset of the register being read. + * + * @remarks Duplicate of apicReadRaw32()! + */ +static uint32_t apicR3ReadRawR32(PCXAPICPAGE pXApicPage, uint16_t offReg) +{ + Assert(offReg < sizeof(*pXApicPage) - sizeof(uint32_t)); + uint8_t const *pbXApic = (const uint8_t *)pXApicPage; + uint32_t const uValue = *(const uint32_t *)(pbXApic + offReg); + return uValue; +} + + +/** + * Helper for dumping per-VCPU APIC state to the release logger. + * + * This is primarily concerned about the APIC state relevant for saved-states. + * + * @param pVCpu The cross context virtual CPU structure. + * @param pszPrefix A caller supplied prefix before dumping the state. + * @param uVersion Data layout version. + */ +static void apicR3DumpState(PVMCPU pVCpu, const char *pszPrefix, uint32_t uVersion) +{ + PCAPICCPU pApicCpu = VMCPU_TO_APICCPU(pVCpu); + + LogRel(("APIC%u: %s (version %u):\n", pVCpu->idCpu, pszPrefix, uVersion)); + + switch (uVersion) + { + case APIC_SAVED_STATE_VERSION: + case APIC_SAVED_STATE_VERSION_VBOX_51_BETA2: + { + /* The auxiliary state. */ + LogRel(("APIC%u: uApicBaseMsr = %#RX64\n", pVCpu->idCpu, pApicCpu->uApicBaseMsr)); + LogRel(("APIC%u: uEsrInternal = %#RX64\n", pVCpu->idCpu, pApicCpu->uEsrInternal)); + + /* The timer. */ + LogRel(("APIC%u: u64TimerInitial = %#RU64\n", pVCpu->idCpu, pApicCpu->u64TimerInitial)); + LogRel(("APIC%u: uHintedTimerInitialCount = %#RU64\n", pVCpu->idCpu, pApicCpu->uHintedTimerInitialCount)); + LogRel(("APIC%u: uHintedTimerShift = %#RU64\n", pVCpu->idCpu, pApicCpu->uHintedTimerShift)); + + PCXAPICPAGE pXApicPage = VMCPU_TO_CXAPICPAGE(pVCpu); + LogRel(("APIC%u: uTimerICR = %#RX32\n", pVCpu->idCpu, pXApicPage->timer_icr.u32InitialCount)); + LogRel(("APIC%u: uTimerCCR = %#RX32\n", pVCpu->idCpu, pXApicPage->timer_ccr.u32CurrentCount)); + + /* The PIBs. */ + LogRel(("APIC%u: Edge PIB : %.*Rhxs\n", pVCpu->idCpu, sizeof(APICPIB), pApicCpu->pvApicPibR3)); + LogRel(("APIC%u: Level PIB: %.*Rhxs\n", pVCpu->idCpu, sizeof(APICPIB), &pApicCpu->ApicPibLevel)); + + /* The LINT0, LINT1 interrupt line active states. */ + LogRel(("APIC%u: fActiveLint0 = %RTbool\n", pVCpu->idCpu, pApicCpu->fActiveLint0)); + LogRel(("APIC%u: fActiveLint1 = %RTbool\n", pVCpu->idCpu, pApicCpu->fActiveLint1)); + + /* The APIC page. */ + LogRel(("APIC%u: APIC page: %.*Rhxs\n", pVCpu->idCpu, sizeof(XAPICPAGE), pApicCpu->pvApicPageR3)); + break; + } + + case APIC_SAVED_STATE_VERSION_VBOX_50: + case APIC_SAVED_STATE_VERSION_VBOX_30: + case APIC_SAVED_STATE_VERSION_ANCIENT: + { + PCXAPICPAGE pXApicPage = VMCPU_TO_CXAPICPAGE(pVCpu); + LogRel(("APIC%u: uApicBaseMsr = %#RX32\n", pVCpu->idCpu, RT_LO_U32(pApicCpu->uApicBaseMsr))); + LogRel(("APIC%u: uId = %#RX32\n", pVCpu->idCpu, pXApicPage->id.u8ApicId)); + LogRel(("APIC%u: uPhysId = N/A\n", pVCpu->idCpu)); + LogRel(("APIC%u: uArbId = N/A\n", pVCpu->idCpu)); + LogRel(("APIC%u: uTpr = %#RX32\n", pVCpu->idCpu, pXApicPage->tpr.u8Tpr)); + LogRel(("APIC%u: uSvr = %#RX32\n", pVCpu->idCpu, pXApicPage->svr.all.u32Svr)); + LogRel(("APIC%u: uLdr = %#x\n", pVCpu->idCpu, pXApicPage->ldr.all.u32Ldr)); + LogRel(("APIC%u: uDfr = %#x\n", pVCpu->idCpu, pXApicPage->dfr.all.u32Dfr)); + + for (size_t i = 0; i < 8; i++) + { + LogRel(("APIC%u: Isr[%u].u32Reg = %#RX32\n", pVCpu->idCpu, i, pXApicPage->isr.u[i].u32Reg)); + LogRel(("APIC%u: Tmr[%u].u32Reg = %#RX32\n", pVCpu->idCpu, i, pXApicPage->tmr.u[i].u32Reg)); + LogRel(("APIC%u: Irr[%u].u32Reg = %#RX32\n", pVCpu->idCpu, i, pXApicPage->irr.u[i].u32Reg)); + } + + for (size_t i = 0; i < XAPIC_MAX_LVT_ENTRIES_P4; i++) + { + uint16_t const offReg = XAPIC_OFF_LVT_START + (i << 4); + LogRel(("APIC%u: Lvt[%u].u32Reg = %#RX32\n", pVCpu->idCpu, i, apicR3ReadRawR32(pXApicPage, offReg))); + } + + LogRel(("APIC%u: uEsr = %#RX32\n", pVCpu->idCpu, pXApicPage->esr.all.u32Errors)); + LogRel(("APIC%u: uIcr_Lo = %#RX32\n", pVCpu->idCpu, pXApicPage->icr_lo.all.u32IcrLo)); + LogRel(("APIC%u: uIcr_Hi = %#RX32\n", pVCpu->idCpu, pXApicPage->icr_hi.all.u32IcrHi)); + LogRel(("APIC%u: uTimerDcr = %#RX32\n", pVCpu->idCpu, pXApicPage->timer_dcr.all.u32DivideValue)); + LogRel(("APIC%u: uCountShift = %#RX32\n", pVCpu->idCpu, apicGetTimerShift(pXApicPage))); + LogRel(("APIC%u: uInitialCount = %#RX32\n", pVCpu->idCpu, pXApicPage->timer_icr.u32InitialCount)); + LogRel(("APIC%u: u64InitialCountLoadTime = %#RX64\n", pVCpu->idCpu, pApicCpu->u64TimerInitial)); + LogRel(("APIC%u: u64NextTime / TimerCCR = %#RX64\n", pVCpu->idCpu, pXApicPage->timer_ccr.u32CurrentCount)); + break; + } + + default: + { + LogRel(("APIC: apicR3DumpState: Invalid/unrecognized saved-state version %u (%#x)\n", uVersion, uVersion)); + break; + } + } +} + +#endif /* APIC_FUZZY_SSM_COMPAT_TEST */ + +/** + * Worker for saving per-VM APIC data. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM The SSM handle. + */ +static int apicR3SaveVMData(PVM pVM, PSSMHANDLE pSSM) +{ + PAPIC pApic = VM_TO_APIC(pVM); + SSMR3PutU32(pSSM, pVM->cCpus); + SSMR3PutBool(pSSM, pApic->fIoApicPresent); + return SSMR3PutU32(pSSM, pApic->enmMaxMode); +} + + +/** + * Worker for loading per-VM APIC data. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM The SSM handle. + */ +static int apicR3LoadVMData(PVM pVM, PSSMHANDLE pSSM) +{ + PAPIC pApic = VM_TO_APIC(pVM); + + /* Load and verify number of CPUs. */ + uint32_t cCpus; + int rc = SSMR3GetU32(pSSM, &cCpus); + AssertRCReturn(rc, rc); + if (cCpus != pVM->cCpus) + return SSMR3SetCfgError(pSSM, RT_SRC_POS, N_("Config mismatch - cCpus: saved=%u config=%u"), cCpus, pVM->cCpus); + + /* Load and verify I/O APIC presence. */ + bool fIoApicPresent; + rc = SSMR3GetBool(pSSM, &fIoApicPresent); + AssertRCReturn(rc, rc); + if (fIoApicPresent != pApic->fIoApicPresent) + return SSMR3SetCfgError(pSSM, RT_SRC_POS, N_("Config mismatch - fIoApicPresent: saved=%RTbool config=%RTbool"), + fIoApicPresent, pApic->fIoApicPresent); + + /* Load and verify configured max APIC mode. */ + uint32_t uSavedMaxApicMode; + rc = SSMR3GetU32(pSSM, &uSavedMaxApicMode); + AssertRCReturn(rc, rc); + if (uSavedMaxApicMode != (uint32_t)pApic->enmMaxMode) + return SSMR3SetCfgError(pSSM, RT_SRC_POS, N_("Config mismatch - uApicMode: saved=%u config=%u"), + uSavedMaxApicMode, pApic->enmMaxMode); + return VINF_SUCCESS; +} + + +/** + * Worker for loading per-VCPU APIC data for legacy (old) saved-states. + * + * @returns VBox status code. + * @param pVCpu The cross context virtual CPU structure. + * @param pSSM The SSM handle. + * @param uVersion Data layout version. + */ +static int apicR3LoadLegacyVCpuData(PVMCPU pVCpu, PSSMHANDLE pSSM, uint32_t uVersion) +{ + AssertReturn(uVersion <= APIC_SAVED_STATE_VERSION_VBOX_50, VERR_NOT_SUPPORTED); + + PAPICCPU pApicCpu = VMCPU_TO_APICCPU(pVCpu); + PXAPICPAGE pXApicPage = VMCPU_TO_XAPICPAGE(pVCpu); + + uint32_t uApicBaseLo; + int rc = SSMR3GetU32(pSSM, &uApicBaseLo); + AssertRCReturn(rc, rc); + pApicCpu->uApicBaseMsr = uApicBaseLo; + Log2(("APIC%u: apicR3LoadLegacyVCpuData: uApicBaseMsr=%#RX64\n", pVCpu->idCpu, pApicCpu->uApicBaseMsr)); + + switch (uVersion) + { + case APIC_SAVED_STATE_VERSION_VBOX_50: + case APIC_SAVED_STATE_VERSION_VBOX_30: + { + uint32_t uApicId, uPhysApicId, uArbId; + SSMR3GetU32(pSSM, &uApicId); pXApicPage->id.u8ApicId = uApicId; + SSMR3GetU32(pSSM, &uPhysApicId); NOREF(uPhysApicId); /* PhysId == pVCpu->idCpu */ + SSMR3GetU32(pSSM, &uArbId); NOREF(uArbId); /* ArbID is & was unused. */ + break; + } + + case APIC_SAVED_STATE_VERSION_ANCIENT: + { + uint8_t uPhysApicId; + SSMR3GetU8(pSSM, &pXApicPage->id.u8ApicId); + SSMR3GetU8(pSSM, &uPhysApicId); NOREF(uPhysApicId); /* PhysId == pVCpu->idCpu */ + break; + } + + default: + return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION; + } + + uint32_t u32Tpr; + SSMR3GetU32(pSSM, &u32Tpr); + pXApicPage->tpr.u8Tpr = u32Tpr & XAPIC_TPR_VALID; + + SSMR3GetU32(pSSM, &pXApicPage->svr.all.u32Svr); + SSMR3GetU8(pSSM, &pXApicPage->ldr.u.u8LogicalApicId); + + uint8_t uDfr; + SSMR3GetU8(pSSM, &uDfr); + pXApicPage->dfr.u.u4Model = uDfr >> 4; + + AssertCompile(RT_ELEMENTS(pXApicPage->isr.u) == 8); + AssertCompile(RT_ELEMENTS(pXApicPage->tmr.u) == 8); + AssertCompile(RT_ELEMENTS(pXApicPage->irr.u) == 8); + for (size_t i = 0; i < 8; i++) + { + SSMR3GetU32(pSSM, &pXApicPage->isr.u[i].u32Reg); + SSMR3GetU32(pSSM, &pXApicPage->tmr.u[i].u32Reg); + SSMR3GetU32(pSSM, &pXApicPage->irr.u[i].u32Reg); + } + + SSMR3GetU32(pSSM, &pXApicPage->lvt_timer.all.u32LvtTimer); + SSMR3GetU32(pSSM, &pXApicPage->lvt_thermal.all.u32LvtThermal); + SSMR3GetU32(pSSM, &pXApicPage->lvt_perf.all.u32LvtPerf); + SSMR3GetU32(pSSM, &pXApicPage->lvt_lint0.all.u32LvtLint0); + SSMR3GetU32(pSSM, &pXApicPage->lvt_lint1.all.u32LvtLint1); + SSMR3GetU32(pSSM, &pXApicPage->lvt_error.all.u32LvtError); + + SSMR3GetU32(pSSM, &pXApicPage->esr.all.u32Errors); + SSMR3GetU32(pSSM, &pXApicPage->icr_lo.all.u32IcrLo); + SSMR3GetU32(pSSM, &pXApicPage->icr_hi.all.u32IcrHi); + + uint32_t u32TimerShift; + SSMR3GetU32(pSSM, &pXApicPage->timer_dcr.all.u32DivideValue); + SSMR3GetU32(pSSM, &u32TimerShift); + /* + * Old implementation may have left the timer shift uninitialized until + * the timer configuration register was written. Unfortunately zero is + * also a valid timer shift value, so we're just going to ignore it + * completely. The shift count can always be derived from the DCR. + * See @bugref{8245#c98}. + */ + uint8_t const uTimerShift = apicGetTimerShift(pXApicPage); + + SSMR3GetU32(pSSM, &pXApicPage->timer_icr.u32InitialCount); + SSMR3GetU64(pSSM, &pApicCpu->u64TimerInitial); + uint64_t uNextTS; + rc = SSMR3GetU64(pSSM, &uNextTS); AssertRCReturn(rc, rc); + if (uNextTS >= pApicCpu->u64TimerInitial + ((pXApicPage->timer_icr.u32InitialCount + 1) << uTimerShift)) + pXApicPage->timer_ccr.u32CurrentCount = pXApicPage->timer_icr.u32InitialCount; + + rc = TMR3TimerLoad(pApicCpu->pTimerR3, pSSM); + AssertRCReturn(rc, rc); + Assert(pApicCpu->uHintedTimerInitialCount == 0); + Assert(pApicCpu->uHintedTimerShift == 0); + if (TMTimerIsActive(pApicCpu->pTimerR3)) + { + uint32_t const uInitialCount = pXApicPage->timer_icr.u32InitialCount; + apicHintTimerFreq(pApicCpu, uInitialCount, uTimerShift); + } + + return rc; +} + + +/** + * @copydoc FNSSMDEVSAVEEXEC + */ +static DECLCALLBACK(int) apicR3SaveExec(PPDMDEVINS pDevIns, PSSMHANDLE pSSM) +{ + PVM pVM = PDMDevHlpGetVM(pDevIns); + AssertReturn(pVM, VERR_INVALID_VM_HANDLE); + + LogFlow(("APIC: apicR3SaveExec\n")); + + /* Save per-VM data. */ + int rc = apicR3SaveVMData(pVM, pSSM); + AssertRCReturn(rc, rc); + + /* Save per-VCPU data.*/ + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[idCpu]; + PCAPICCPU pApicCpu = VMCPU_TO_APICCPU(pVCpu); + + /* Update interrupts from the pending-interrupts bitmaps to the IRR. */ + APICUpdatePendingInterrupts(pVCpu); + + /* Save the auxiliary data. */ + SSMR3PutU64(pSSM, pApicCpu->uApicBaseMsr); + SSMR3PutU32(pSSM, pApicCpu->uEsrInternal); + + /* Save the APIC page. */ + if (XAPIC_IN_X2APIC_MODE(pVCpu)) + SSMR3PutStruct(pSSM, (const void *)pApicCpu->pvApicPageR3, &g_aX2ApicPageFields[0]); + else + SSMR3PutStruct(pSSM, (const void *)pApicCpu->pvApicPageR3, &g_aXApicPageFields[0]); + + /* Save the timer. */ + SSMR3PutU64(pSSM, pApicCpu->u64TimerInitial); + TMR3TimerSave(pApicCpu->pTimerR3, pSSM); + + /* Save the LINT0, LINT1 interrupt line states. */ + SSMR3PutBool(pSSM, pApicCpu->fActiveLint0); + SSMR3PutBool(pSSM, pApicCpu->fActiveLint1); + +#if defined(APIC_FUZZY_SSM_COMPAT_TEST) || defined(DEBUG_ramshankar) + apicR3DumpState(pVCpu, "Saved state", APIC_SAVED_STATE_VERSION); +#endif + } + +#ifdef APIC_FUZZY_SSM_COMPAT_TEST + /* The state is fuzzy, don't even bother trying to load the guest. */ + return VERR_INVALID_STATE; +#else + return rc; +#endif +} + + +/** + * @copydoc FNSSMDEVLOADEXEC + */ +static DECLCALLBACK(int) apicR3LoadExec(PPDMDEVINS pDevIns, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + PVM pVM = PDMDevHlpGetVM(pDevIns); + + AssertReturn(pVM, VERR_INVALID_VM_HANDLE); + AssertReturn(uPass == SSM_PASS_FINAL, VERR_WRONG_ORDER); + + LogFlow(("APIC: apicR3LoadExec: uVersion=%u uPass=%#x\n", uVersion, uPass)); + + /* Weed out invalid versions. */ + if ( uVersion != APIC_SAVED_STATE_VERSION + && uVersion != APIC_SAVED_STATE_VERSION_VBOX_51_BETA2 + && uVersion != APIC_SAVED_STATE_VERSION_VBOX_50 + && uVersion != APIC_SAVED_STATE_VERSION_VBOX_30 + && uVersion != APIC_SAVED_STATE_VERSION_ANCIENT) + { + LogRel(("APIC: apicR3LoadExec: Invalid/unrecognized saved-state version %u (%#x)\n", uVersion, uVersion)); + return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION; + } + + int rc = VINF_SUCCESS; + if (uVersion > APIC_SAVED_STATE_VERSION_VBOX_30) + { + rc = apicR3LoadVMData(pVM, pSSM); + AssertRCReturn(rc, rc); + + if (uVersion == APIC_SAVED_STATE_VERSION) + { /* Load any new additional per-VM data. */ } + } + + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[idCpu]; + PAPICCPU pApicCpu = VMCPU_TO_APICCPU(pVCpu); + + if (uVersion > APIC_SAVED_STATE_VERSION_VBOX_50) + { + /* Load the auxiliary data. */ + SSMR3GetU64(pSSM, (uint64_t *)&pApicCpu->uApicBaseMsr); + SSMR3GetU32(pSSM, &pApicCpu->uEsrInternal); + + /* Load the APIC page. */ + if (XAPIC_IN_X2APIC_MODE(pVCpu)) + SSMR3GetStruct(pSSM, pApicCpu->pvApicPageR3, &g_aX2ApicPageFields[0]); + else + SSMR3GetStruct(pSSM, pApicCpu->pvApicPageR3, &g_aXApicPageFields[0]); + + /* Load the timer. */ + rc = SSMR3GetU64(pSSM, &pApicCpu->u64TimerInitial); AssertRCReturn(rc, rc); + rc = TMR3TimerLoad(pApicCpu->pTimerR3, pSSM); AssertRCReturn(rc, rc); + Assert(pApicCpu->uHintedTimerShift == 0); + Assert(pApicCpu->uHintedTimerInitialCount == 0); + if (TMTimerIsActive(pApicCpu->pTimerR3)) + { + PCXAPICPAGE pXApicPage = VMCPU_TO_CXAPICPAGE(pVCpu); + uint32_t const uInitialCount = pXApicPage->timer_icr.u32InitialCount; + uint8_t const uTimerShift = apicGetTimerShift(pXApicPage); + apicHintTimerFreq(pApicCpu, uInitialCount, uTimerShift); + } + + /* Load the LINT0, LINT1 interrupt line states. */ + if (uVersion > APIC_SAVED_STATE_VERSION_VBOX_51_BETA2) + { + SSMR3GetBool(pSSM, (bool *)&pApicCpu->fActiveLint0); + SSMR3GetBool(pSSM, (bool *)&pApicCpu->fActiveLint1); + } + } + else + { + rc = apicR3LoadLegacyVCpuData(pVCpu, pSSM, uVersion); + AssertRCReturn(rc, rc); + } + + /* + * Check that we're still good wrt restored data, then tell CPUM about the current CPUID[1].EDX[9] visibility. + */ + rc = SSMR3HandleGetStatus(pSSM); + AssertRCReturn(rc, rc); + CPUMSetGuestCpuIdPerCpuApicFeature(pVCpu, RT_BOOL(pApicCpu->uApicBaseMsr & MSR_IA32_APICBASE_EN)); + +#if defined(APIC_FUZZY_SSM_COMPAT_TEST) || defined(DEBUG_ramshankar) + apicR3DumpState(pVCpu, "Loaded state", uVersion); +#endif + } + + return rc; +} + + +/** + * The timer callback. + * + * @param pDevIns The device instance. + * @param pTimer The timer handle. + * @param pvUser Opaque pointer to the VMCPU. + * + * @thread Any. + * @remarks Currently this function is invoked on the last EMT, see @c + * idTimerCpu in tmR3TimerCallback(). However, the code does -not- + * rely on this and is designed to work with being invoked on any + * thread. + */ +static DECLCALLBACK(void) apicR3TimerCallback(PPDMDEVINS pDevIns, PTMTIMER pTimer, void *pvUser) +{ + PVMCPU pVCpu = (PVMCPU)pvUser; + Assert(TMTimerIsLockOwner(pTimer)); + Assert(pVCpu); + LogFlow(("APIC%u: apicR3TimerCallback\n", pVCpu->idCpu)); + RT_NOREF2(pDevIns, pTimer); + + PXAPICPAGE pXApicPage = VMCPU_TO_XAPICPAGE(pVCpu); + uint32_t const uLvtTimer = pXApicPage->lvt_timer.all.u32LvtTimer; +#ifdef VBOX_WITH_STATISTICS + PAPICCPU pApicCpu = VMCPU_TO_APICCPU(pVCpu); + STAM_COUNTER_INC(&pApicCpu->StatTimerCallback); +#endif + if (!XAPIC_LVT_IS_MASKED(uLvtTimer)) + { + uint8_t uVector = XAPIC_LVT_GET_VECTOR(uLvtTimer); + Log2(("APIC%u: apicR3TimerCallback: Raising timer interrupt. uVector=%#x\n", pVCpu->idCpu, uVector)); + apicPostInterrupt(pVCpu, uVector, XAPICTRIGGERMODE_EDGE, 0 /* uSrcTag */); + } + + XAPICTIMERMODE enmTimerMode = XAPIC_LVT_GET_TIMER_MODE(uLvtTimer); + switch (enmTimerMode) + { + case XAPICTIMERMODE_PERIODIC: + { + /* The initial-count register determines if the periodic timer is re-armed. */ + uint32_t const uInitialCount = pXApicPage->timer_icr.u32InitialCount; + pXApicPage->timer_ccr.u32CurrentCount = uInitialCount; + if (uInitialCount) + { + Log2(("APIC%u: apicR3TimerCallback: Re-arming timer. uInitialCount=%#RX32\n", pVCpu->idCpu, uInitialCount)); + apicStartTimer(pVCpu, uInitialCount); + } + break; + } + + case XAPICTIMERMODE_ONESHOT: + { + pXApicPage->timer_ccr.u32CurrentCount = 0; + break; + } + + case XAPICTIMERMODE_TSC_DEADLINE: + { + /** @todo implement TSC deadline. */ + AssertMsgFailed(("APIC: TSC deadline mode unimplemented\n")); + break; + } + } +} + + +/** + * @interface_method_impl{PDMDEVREG,pfnReset} + */ +static DECLCALLBACK(void) apicR3Reset(PPDMDEVINS pDevIns) +{ + PVM pVM = PDMDevHlpGetVM(pDevIns); + VM_ASSERT_EMT0(pVM); + VM_ASSERT_IS_NOT_RUNNING(pVM); + + LogFlow(("APIC: apicR3Reset\n")); + + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + PVMCPU pVCpuDest = &pVM->aCpus[idCpu]; + PAPICCPU pApicCpu = VMCPU_TO_APICCPU(pVCpuDest); + + if (TMTimerIsActive(pApicCpu->pTimerR3)) + TMTimerStop(pApicCpu->pTimerR3); + + apicResetCpu(pVCpuDest, true /* fResetApicBaseMsr */); + + /* Clear the interrupt pending force flag. */ + apicClearInterruptFF(pVCpuDest, PDMAPICIRQ_HARDWARE); + } +} + + +/** + * @interface_method_impl{PDMDEVREG,pfnRelocate} + */ +static DECLCALLBACK(void) apicR3Relocate(PPDMDEVINS pDevIns, RTGCINTPTR offDelta) +{ + PVM pVM = PDMDevHlpGetVM(pDevIns); + PAPIC pApic = VM_TO_APIC(pVM); + PAPICDEV pApicDev = PDMINS_2_DATA(pDevIns, PAPICDEV); + + LogFlow(("APIC: apicR3Relocate: pVM=%p pDevIns=%p offDelta=%RGi\n", pVM, pDevIns, offDelta)); + + pApicDev->pDevInsRC = PDMDEVINS_2_RCPTR(pDevIns); + + pApic->pApicDevRC = PDMINS_2_DATA_RCPTR(pDevIns); + pApic->pvApicPibRC += offDelta; + + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[idCpu]; + PAPICCPU pApicCpu = VMCPU_TO_APICCPU(pVCpu); + pApicCpu->pTimerRC = TMTimerRCPtr(pApicCpu->pTimerR3); + + pApicCpu->pvApicPageRC += offDelta; + pApicCpu->pvApicPibRC += offDelta; + Log2(("APIC%u: apicR3Relocate: APIC PIB at %RGv\n", pVCpu->idCpu, pApicCpu->pvApicPibRC)); + } +} + + +/** + * Terminates the APIC state. + * + * @param pVM The cross context VM structure. + */ +static void apicR3TermState(PVM pVM) +{ + PAPIC pApic = VM_TO_APIC(pVM); + LogFlow(("APIC: apicR3TermState: pVM=%p\n", pVM)); + + /* Unmap and free the PIB. */ + if (pApic->pvApicPibR3 != NIL_RTR3PTR) + { + size_t const cPages = pApic->cbApicPib >> PAGE_SHIFT; + if (cPages == 1) + SUPR3PageFreeEx(pApic->pvApicPibR3, cPages); + else + SUPR3ContFree(pApic->pvApicPibR3, cPages); + pApic->pvApicPibR3 = NIL_RTR3PTR; + pApic->pvApicPibR0 = NIL_RTR0PTR; + pApic->pvApicPibRC = NIL_RTRCPTR; + } + + /* Unmap and free the virtual-APIC pages. */ + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[idCpu]; + PAPICCPU pApicCpu = VMCPU_TO_APICCPU(pVCpu); + + pApicCpu->pvApicPibR3 = NIL_RTR3PTR; + pApicCpu->pvApicPibR0 = NIL_RTR0PTR; + pApicCpu->pvApicPibRC = NIL_RTRCPTR; + + if (pApicCpu->pvApicPageR3 != NIL_RTR3PTR) + { + SUPR3PageFreeEx(pApicCpu->pvApicPageR3, 1 /* cPages */); + pApicCpu->pvApicPageR3 = NIL_RTR3PTR; + pApicCpu->pvApicPageR0 = NIL_RTR0PTR; + pApicCpu->pvApicPageRC = NIL_RTRCPTR; + } + } +} + + +/** + * Initializes the APIC state. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +static int apicR3InitState(PVM pVM) +{ + PAPIC pApic = VM_TO_APIC(pVM); + LogFlow(("APIC: apicR3InitState: pVM=%p\n", pVM)); + + /* With hardware virtualization, we don't need to map the APIC in GC. */ + bool const fNeedsGCMapping = VM_IS_RAW_MODE_ENABLED(pVM); + + /* + * Allocate and map the pending-interrupt bitmap (PIB). + * + * We allocate all the VCPUs' PIBs contiguously in order to save space as + * physically contiguous allocations are rounded to a multiple of page size. + */ + Assert(pApic->pvApicPibR3 == NIL_RTR3PTR); + Assert(pApic->pvApicPibR0 == NIL_RTR0PTR); + Assert(pApic->pvApicPibRC == NIL_RTRCPTR); + pApic->cbApicPib = RT_ALIGN_Z(pVM->cCpus * sizeof(APICPIB), PAGE_SIZE); + size_t const cPages = pApic->cbApicPib >> PAGE_SHIFT; + if (cPages == 1) + { + SUPPAGE SupApicPib; + RT_ZERO(SupApicPib); + SupApicPib.Phys = NIL_RTHCPHYS; + int rc = SUPR3PageAllocEx(1 /* cPages */, 0 /* fFlags */, &pApic->pvApicPibR3, &pApic->pvApicPibR0, &SupApicPib); + if (RT_SUCCESS(rc)) + { + pApic->HCPhysApicPib = SupApicPib.Phys; + AssertLogRelReturn(pApic->pvApicPibR3, VERR_INTERNAL_ERROR); + } + else + { + LogRel(("APIC: Failed to allocate %u bytes for the pending-interrupt bitmap, rc=%Rrc\n", pApic->cbApicPib, rc)); + return rc; + } + } + else + pApic->pvApicPibR3 = SUPR3ContAlloc(cPages, &pApic->pvApicPibR0, &pApic->HCPhysApicPib); + + if (pApic->pvApicPibR3) + { + AssertLogRelReturn(pApic->pvApicPibR0 != NIL_RTR0PTR, VERR_INTERNAL_ERROR); + AssertLogRelReturn(pApic->HCPhysApicPib != NIL_RTHCPHYS, VERR_INTERNAL_ERROR); + + /* Initialize the PIB. */ + RT_BZERO(pApic->pvApicPibR3, pApic->cbApicPib); + + /* Map the PIB into GC. */ + if (fNeedsGCMapping) + { + pApic->pvApicPibRC = NIL_RTRCPTR; + int rc = MMR3HyperMapHCPhys(pVM, pApic->pvApicPibR3, NIL_RTR0PTR, pApic->HCPhysApicPib, pApic->cbApicPib, + "APIC PIB", (PRTGCPTR)&pApic->pvApicPibRC); + if (RT_FAILURE(rc)) + { + LogRel(("APIC: Failed to map %u bytes for the pending-interrupt bitmap into GC, rc=%Rrc\n", pApic->cbApicPib, + rc)); + apicR3TermState(pVM); + return rc; + } + + AssertLogRelReturn(pApic->pvApicPibRC != NIL_RTRCPTR, VERR_INTERNAL_ERROR); + } + + /* + * Allocate the map the virtual-APIC pages. + */ + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[idCpu]; + PAPICCPU pApicCpu = VMCPU_TO_APICCPU(pVCpu); + + SUPPAGE SupApicPage; + RT_ZERO(SupApicPage); + SupApicPage.Phys = NIL_RTHCPHYS; + + Assert(pVCpu->idCpu == idCpu); + Assert(pApicCpu->pvApicPageR3 == NIL_RTR0PTR); + Assert(pApicCpu->pvApicPageR0 == NIL_RTR0PTR); + Assert(pApicCpu->pvApicPageRC == NIL_RTRCPTR); + AssertCompile(sizeof(XAPICPAGE) == PAGE_SIZE); + pApicCpu->cbApicPage = sizeof(XAPICPAGE); + int rc = SUPR3PageAllocEx(1 /* cPages */, 0 /* fFlags */, &pApicCpu->pvApicPageR3, &pApicCpu->pvApicPageR0, + &SupApicPage); + if (RT_SUCCESS(rc)) + { + AssertLogRelReturn(pApicCpu->pvApicPageR3 != NIL_RTR3PTR, VERR_INTERNAL_ERROR); + AssertLogRelReturn(pApicCpu->HCPhysApicPage != NIL_RTHCPHYS, VERR_INTERNAL_ERROR); + pApicCpu->HCPhysApicPage = SupApicPage.Phys; + + /* Map the virtual-APIC page into GC. */ + if (fNeedsGCMapping) + { + rc = MMR3HyperMapHCPhys(pVM, pApicCpu->pvApicPageR3, NIL_RTR0PTR, pApicCpu->HCPhysApicPage, + pApicCpu->cbApicPage, "APIC", (PRTGCPTR)&pApicCpu->pvApicPageRC); + if (RT_FAILURE(rc)) + { + LogRel(("APIC%u: Failed to map %u bytes for the virtual-APIC page into GC, rc=%Rrc", idCpu, + pApicCpu->cbApicPage, rc)); + apicR3TermState(pVM); + return rc; + } + + AssertLogRelReturn(pApicCpu->pvApicPageRC != NIL_RTRCPTR, VERR_INTERNAL_ERROR); + } + + /* Associate the per-VCPU PIB pointers to the per-VM PIB mapping. */ + uint32_t const offApicPib = idCpu * sizeof(APICPIB); + pApicCpu->pvApicPibR0 = (RTR0PTR)((RTR0UINTPTR)pApic->pvApicPibR0 + offApicPib); + pApicCpu->pvApicPibR3 = (RTR3PTR)((RTR3UINTPTR)pApic->pvApicPibR3 + offApicPib); + if (fNeedsGCMapping) + pApicCpu->pvApicPibRC = (RTRCPTR)((RTRCUINTPTR)pApic->pvApicPibRC + offApicPib); + + /* Initialize the virtual-APIC state. */ + RT_BZERO(pApicCpu->pvApicPageR3, pApicCpu->cbApicPage); + apicResetCpu(pVCpu, true /* fResetApicBaseMsr */); + +#ifdef DEBUG_ramshankar + Assert(pApicCpu->pvApicPibR3 != NIL_RTR3PTR); + Assert(pApicCpu->pvApicPibR0 != NIL_RTR0PTR); + Assert(!fNeedsGCMapping || pApicCpu->pvApicPibRC != NIL_RTRCPTR); + Assert(pApicCpu->pvApicPageR3 != NIL_RTR3PTR); + Assert(pApicCpu->pvApicPageR0 != NIL_RTR0PTR); + Assert(!fNeedsGCMapping || pApicCpu->pvApicPageRC != NIL_RTRCPTR); + Assert(!fNeedsGCMapping || pApic->pvApicPibRC == pVM->aCpus[0].apic.s.pvApicPibRC); +#endif + } + else + { + LogRel(("APIC%u: Failed to allocate %u bytes for the virtual-APIC page, rc=%Rrc\n", idCpu, pApicCpu->cbApicPage, rc)); + apicR3TermState(pVM); + return rc; + } + } + +#ifdef DEBUG_ramshankar + Assert(pApic->pvApicPibR3 != NIL_RTR3PTR); + Assert(pApic->pvApicPibR0 != NIL_RTR0PTR); + Assert(!fNeedsGCMapping || pApic->pvApicPibRC != NIL_RTRCPTR); +#endif + return VINF_SUCCESS; + } + + LogRel(("APIC: Failed to allocate %u bytes of physically contiguous memory for the pending-interrupt bitmap\n", + pApic->cbApicPib)); + return VERR_NO_MEMORY; +} + + +/** + * @interface_method_impl{PDMDEVREG,pfnDestruct} + */ +static DECLCALLBACK(int) apicR3Destruct(PPDMDEVINS pDevIns) +{ + PVM pVM = PDMDevHlpGetVM(pDevIns); + LogFlow(("APIC: apicR3Destruct: pVM=%p\n", pVM)); + + apicR3TermState(pVM); + return VINF_SUCCESS; +} + + +/** + * @interface_method_impl{PDMDEVREG,pfnInitComplete} + */ +static DECLCALLBACK(int) apicR3InitComplete(PPDMDEVINS pDevIns) +{ + PVM pVM = PDMDevHlpGetVM(pDevIns); + PAPIC pApic = VM_TO_APIC(pVM); + + /* + * Init APIC settings that rely on HM and CPUM configurations. + */ + CPUMCPUIDLEAF CpuLeaf; + int rc = CPUMR3CpuIdGetLeaf(pVM, &CpuLeaf, 1, 0); + AssertRCReturn(rc, rc); + + pApic->fSupportsTscDeadline = RT_BOOL(CpuLeaf.uEcx & X86_CPUID_FEATURE_ECX_TSCDEADL); + pApic->fPostedIntrsEnabled = HMR3IsPostedIntrsEnabled(pVM->pUVM); + pApic->fVirtApicRegsEnabled = HMR3IsVirtApicRegsEnabled(pVM->pUVM); + + LogRel(("APIC: fPostedIntrsEnabled=%RTbool fVirtApicRegsEnabled=%RTbool fSupportsTscDeadline=%RTbool\n", + pApic->fPostedIntrsEnabled, pApic->fVirtApicRegsEnabled, pApic->fSupportsTscDeadline)); + + return VINF_SUCCESS; +} + + +/** + * @interface_method_impl{PDMDEVREG,pfnConstruct} + */ +static DECLCALLBACK(int) apicR3Construct(PPDMDEVINS pDevIns, int iInstance, PCFGMNODE pCfg) +{ + /* + * Validate inputs. + */ + Assert(iInstance == 0); NOREF(iInstance); + Assert(pDevIns); + + PAPICDEV pApicDev = PDMINS_2_DATA(pDevIns, PAPICDEV); + PVM pVM = PDMDevHlpGetVM(pDevIns); + PAPIC pApic = VM_TO_APIC(pVM); + + /* + * Init the data. + */ + pApicDev->pDevInsR3 = pDevIns; + pApicDev->pDevInsR0 = PDMDEVINS_2_R0PTR(pDevIns); + pApicDev->pDevInsRC = PDMDEVINS_2_RCPTR(pDevIns); + + pApic->pApicDevR0 = PDMINS_2_DATA_R0PTR(pDevIns); + pApic->pApicDevR3 = (PAPICDEV)PDMINS_2_DATA_R3PTR(pDevIns); + pApic->pApicDevRC = PDMINS_2_DATA_RCPTR(pDevIns); + + /* + * Validate APIC settings. + */ + if (!CFGMR3AreValuesValid(pCfg, "RZEnabled\0" + "Mode\0" + "IOAPIC\0" + "NumCPUs\0")) + { + return PDMDEV_SET_ERROR(pDevIns, VERR_PDM_DEVINS_UNKNOWN_CFG_VALUES, + N_("APIC configuration error: unknown option specified")); + } + + int rc = CFGMR3QueryBoolDef(pCfg, "RZEnabled", &pApic->fRZEnabled, true); + AssertLogRelRCReturn(rc, rc); + + rc = CFGMR3QueryBoolDef(pCfg, "IOAPIC", &pApic->fIoApicPresent, true); + AssertLogRelRCReturn(rc, rc); + + /* Max APIC feature level. */ + uint8_t uMaxMode; + rc = CFGMR3QueryU8Def(pCfg, "Mode", &uMaxMode, PDMAPICMODE_APIC); + AssertLogRelRCReturn(rc, rc); + switch ((PDMAPICMODE)uMaxMode) + { + case PDMAPICMODE_NONE: + LogRel(("APIC: APIC maximum mode configured as 'None', effectively disabled/not-present!\n")); + case PDMAPICMODE_APIC: + case PDMAPICMODE_X2APIC: + break; + default: + return VMR3SetError(pVM->pUVM, VERR_INVALID_PARAMETER, RT_SRC_POS, "APIC mode %d unknown.", uMaxMode); + } + pApic->enmMaxMode = (PDMAPICMODE)uMaxMode; + + /* + * Disable automatic PDM locking for this device. + */ + rc = PDMDevHlpSetDeviceCritSect(pDevIns, PDMDevHlpCritSectGetNop(pDevIns)); + AssertRCReturn(rc, rc); + + /* + * Register the APIC with PDM. + */ + rc = PDMDevHlpAPICRegister(pDevIns); + AssertLogRelRCReturn(rc, rc); + + /* + * Initialize the APIC state. + */ + if (pApic->enmMaxMode == PDMAPICMODE_X2APIC) + { + rc = CPUMR3MsrRangesInsert(pVM, &g_MsrRange_x2Apic); + AssertLogRelRCReturn(rc, rc); + } + else + { + /* We currently don't have a function to remove the range, so we register an range which will cause a #GP. */ + rc = CPUMR3MsrRangesInsert(pVM, &g_MsrRange_x2Apic_Invalid); + AssertLogRelRCReturn(rc, rc); + } + + /* Tell CPUM about the APIC feature level so it can adjust APICBASE MSR GP mask and CPUID bits. */ + apicR3SetCpuIdFeatureLevel(pVM, pApic->enmMaxMode); + /* Finally, initialize the state. */ + rc = apicR3InitState(pVM); + AssertRCReturn(rc, rc); + + /* + * Register the MMIO range. + */ + PAPICCPU pApicCpu0 = VMCPU_TO_APICCPU(&pVM->aCpus[0]); + RTGCPHYS GCPhysApicBase = MSR_IA32_APICBASE_GET_ADDR(pApicCpu0->uApicBaseMsr); + + rc = PDMDevHlpMMIORegister(pDevIns, GCPhysApicBase, sizeof(XAPICPAGE), NULL /* pvUser */, + IOMMMIO_FLAGS_READ_DWORD | IOMMMIO_FLAGS_WRITE_DWORD_ZEROED, + apicWriteMmio, apicReadMmio, "APIC"); + if (RT_FAILURE(rc)) + return rc; + + if (pApic->fRZEnabled) + { + rc = PDMDevHlpMMIORegisterRC(pDevIns, GCPhysApicBase, sizeof(XAPICPAGE), NIL_RTRCPTR /*pvUser*/, + "apicWriteMmio", "apicReadMmio"); + if (RT_FAILURE(rc)) + return rc; + + rc = PDMDevHlpMMIORegisterR0(pDevIns, GCPhysApicBase, sizeof(XAPICPAGE), NIL_RTR0PTR /*pvUser*/, + "apicWriteMmio", "apicReadMmio"); + if (RT_FAILURE(rc)) + return rc; + } + + /* + * Create the APIC timers. + */ + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[idCpu]; + PAPICCPU pApicCpu = VMCPU_TO_APICCPU(pVCpu); + RTStrPrintf(&pApicCpu->szTimerDesc[0], sizeof(pApicCpu->szTimerDesc), "APIC Timer %u", pVCpu->idCpu); + rc = PDMDevHlpTMTimerCreate(pDevIns, TMCLOCK_VIRTUAL_SYNC, apicR3TimerCallback, pVCpu, TMTIMER_FLAGS_NO_CRIT_SECT, + pApicCpu->szTimerDesc, &pApicCpu->pTimerR3); + if (RT_SUCCESS(rc)) + { + pApicCpu->pTimerR0 = TMTimerR0Ptr(pApicCpu->pTimerR3); + pApicCpu->pTimerRC = TMTimerRCPtr(pApicCpu->pTimerR3); + } + else + return rc; + } + + /* + * Register saved state callbacks. + */ + rc = PDMDevHlpSSMRegister3(pDevIns, APIC_SAVED_STATE_VERSION, sizeof(*pApicDev), NULL /*pfnLiveExec*/, apicR3SaveExec, + apicR3LoadExec); + if (RT_FAILURE(rc)) + return rc; + + /* + * Register debugger info callbacks. + * + * We use separate callbacks rather than arguments so they can also be + * dumped in an automated fashion while collecting crash diagnostics and + * not just used during live debugging via the VM debugger. + */ + rc = DBGFR3InfoRegisterInternalEx(pVM, "apic", "Dumps APIC basic information.", apicR3Info, DBGFINFO_FLAGS_ALL_EMTS); + rc |= DBGFR3InfoRegisterInternalEx(pVM, "apiclvt", "Dumps APIC LVT information.", apicR3InfoLvt, DBGFINFO_FLAGS_ALL_EMTS); + rc |= DBGFR3InfoRegisterInternalEx(pVM, "apictimer", "Dumps APIC timer information.", apicR3InfoTimer, DBGFINFO_FLAGS_ALL_EMTS); + AssertRCReturn(rc, rc); + +#ifdef VBOX_WITH_STATISTICS + /* + * Statistics. + */ +#define APIC_REG_COUNTER(a_Reg, a_Desc, a_Key) \ + do { \ + rc = STAMR3RegisterF(pVM, a_Reg, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, a_Desc, a_Key, idCpu); \ + AssertRCReturn(rc, rc); \ + } while(0) + +#define APIC_PROF_COUNTER(a_Reg, a_Desc, a_Key) \ + do { \ + rc = STAMR3RegisterF(pVM, a_Reg, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL, a_Desc, a_Key, \ + idCpu); \ + AssertRCReturn(rc, rc); \ + } while(0) + + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[idCpu]; + PAPICCPU pApicCpu = VMCPU_TO_APICCPU(pVCpu); + + APIC_REG_COUNTER(&pApicCpu->StatMmioReadRZ, "Number of APIC MMIO reads in RZ.", "/Devices/APIC/%u/RZ/MmioRead"); + APIC_REG_COUNTER(&pApicCpu->StatMmioWriteRZ, "Number of APIC MMIO writes in RZ.", "/Devices/APIC/%u/RZ/MmioWrite"); + APIC_REG_COUNTER(&pApicCpu->StatMsrReadRZ, "Number of APIC MSR reads in RZ.", "/Devices/APIC/%u/RZ/MsrRead"); + APIC_REG_COUNTER(&pApicCpu->StatMsrWriteRZ, "Number of APIC MSR writes in RZ.", "/Devices/APIC/%u/RZ/MsrWrite"); + + APIC_REG_COUNTER(&pApicCpu->StatMmioReadR3, "Number of APIC MMIO reads in R3.", "/Devices/APIC/%u/R3/MmioReadR3"); + APIC_REG_COUNTER(&pApicCpu->StatMmioWriteR3, "Number of APIC MMIO writes in R3.", "/Devices/APIC/%u/R3/MmioWriteR3"); + APIC_REG_COUNTER(&pApicCpu->StatMsrReadR3, "Number of APIC MSR reads in R3.", "/Devices/APIC/%u/R3/MsrReadR3"); + APIC_REG_COUNTER(&pApicCpu->StatMsrWriteR3, "Number of APIC MSR writes in R3.", "/Devices/APIC/%u/R3/MsrWriteR3"); + + APIC_PROF_COUNTER(&pApicCpu->StatUpdatePendingIntrs, "Profiling of APICUpdatePendingInterrupts", + "/PROF/CPU%d/APIC/UpdatePendingInterrupts"); + APIC_PROF_COUNTER(&pApicCpu->StatPostIntr, "Profiling of APICPostInterrupt", "/PROF/CPU%d/APIC/PostInterrupt"); + + APIC_REG_COUNTER(&pApicCpu->StatPostIntrAlreadyPending, "Number of times an interrupt is already pending.", + "/Devices/APIC/%u/PostInterruptAlreadyPending"); + APIC_REG_COUNTER(&pApicCpu->StatTimerCallback, "Number of times the timer callback is invoked.", + "/Devices/APIC/%u/TimerCallback"); + + APIC_REG_COUNTER(&pApicCpu->StatTprWrite, "Number of TPR writes.", "/Devices/APIC/%u/TprWrite"); + APIC_REG_COUNTER(&pApicCpu->StatTprRead, "Number of TPR reads.", "/Devices/APIC/%u/TprRead"); + APIC_REG_COUNTER(&pApicCpu->StatEoiWrite, "Number of EOI writes.", "/Devices/APIC/%u/EoiWrite"); + APIC_REG_COUNTER(&pApicCpu->StatMaskedByTpr, "Number of times TPR masks an interrupt in apicGetInterrupt.", + "/Devices/APIC/%u/MaskedByTpr"); + APIC_REG_COUNTER(&pApicCpu->StatMaskedByPpr, "Number of times PPR masks an interrupt in apicGetInterrupt.", + "/Devices/APIC/%u/MaskedByPpr"); + APIC_REG_COUNTER(&pApicCpu->StatTimerIcrWrite, "Number of times the timer ICR is written.", + "/Devices/APIC/%u/TimerIcrWrite"); + APIC_REG_COUNTER(&pApicCpu->StatIcrLoWrite, "Number of times the ICR Lo (send IPI) is written.", + "/Devices/APIC/%u/IcrLoWrite"); + APIC_REG_COUNTER(&pApicCpu->StatIcrHiWrite, "Number of times the ICR Hi is written.", + "/Devices/APIC/%u/IcrHiWrite"); + APIC_REG_COUNTER(&pApicCpu->StatIcrFullWrite, "Number of times the ICR full (send IPI, x2APIC) is written.", + "/Devices/APIC/%u/IcrFullWrite"); + } +# undef APIC_PROF_COUNTER +# undef APIC_REG_ACCESS_COUNTER +#endif + + return VINF_SUCCESS; +} + + +/** + * APIC device registration structure. + */ +static const PDMDEVREG g_DeviceAPIC = +{ + /* u32Version */ + PDM_DEVREG_VERSION, + /* szName */ + "apic", + /* szRCMod */ + "VMMRC.rc", + /* szR0Mod */ + "VMMR0.r0", + /* pszDescription */ + "Advanced Programmable Interrupt Controller", + /* fFlags */ + PDM_DEVREG_FLAGS_HOST_BITS_DEFAULT | PDM_DEVREG_FLAGS_GUEST_BITS_32_64 | PDM_DEVREG_FLAGS_PAE36 + | PDM_DEVREG_FLAGS_RC | PDM_DEVREG_FLAGS_R0, + /* fClass */ + PDM_DEVREG_CLASS_PIC, + /* cMaxInstances */ + 1, + /* cbInstance */ + sizeof(APICDEV), + /* pfnConstruct */ + apicR3Construct, + /* pfnDestruct */ + apicR3Destruct, + /* pfnRelocate */ + apicR3Relocate, + /* pfnMemSetup */ + NULL, + /* pfnPowerOn */ + NULL, + /* pfnReset */ + apicR3Reset, + /* pfnSuspend */ + NULL, + /* pfnResume */ + NULL, + /* pfnAttach */ + NULL, + /* pfnDetach */ + NULL, + /* pfnQueryInterface. */ + NULL, + /* pfnInitComplete */ + apicR3InitComplete, + /* pfnPowerOff */ + NULL, + /* pfnSoftReset */ + NULL, + /* u32VersionEnd */ + PDM_DEVREG_VERSION +}; + + +/** + * Called by PDM to register the APIC device. + */ +VMMR3_INT_DECL(int) APICR3RegisterDevice(PPDMDEVREGCB pCallbacks) +{ + return pCallbacks->pfnRegister(pCallbacks, &g_DeviceAPIC); +} + +#endif /* !VBOX_DEVICE_STRUCT_TESTCASE */ + diff --git a/src/VBox/VMM/VMMR3/CFGM.cpp b/src/VBox/VMM/VMMR3/CFGM.cpp new file mode 100644 index 00000000..0e6d90e9 --- /dev/null +++ b/src/VBox/VMM/VMMR3/CFGM.cpp @@ -0,0 +1,3282 @@ +/* $Id: CFGM.cpp $ */ +/** @file + * CFGM - Configuration Manager. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +/** @page pg_cfgm CFGM - The Configuration Manager + * + * The configuration manager is a directory containing the VM configuration at + * run time. It works in a manner similar to the windows registry - it's like a + * file system hierarchy, but the files (values) live in a separate name space + * and can include the path separators. + * + * The configuration is normally created via a callback passed to VMR3Create() + * via the pfnCFGMConstructor parameter. To make testcase writing a bit simpler, + * we allow the callback to be NULL, in which case a simple default + * configuration will be created by CFGMR3ConstructDefaultTree(). The + * Console::configConstructor() method in Main/ConsoleImpl2.cpp creates the + * configuration from the XML. + * + * Devices, drivers, services and other PDM stuff are given their own subtree + * where they are protected from accessing information of any parents. This is + * is implemented via the CFGMR3SetRestrictedRoot() API. + * + * Data validation beyond the basic primitives is left to the caller. The caller + * is in a better position to know the proper validation rules of the individual + * properties. + * + * @see grp_cfgm + * + * + * @section sec_cfgm_primitives Data Primitives + * + * CFGM supports the following data primitives: + * - Integers. Representation is unsigned 64-bit. Boolean, unsigned and + * small integers, and pointers are all represented using this primitive. + * - Zero terminated character strings. These are of course UTF-8. + * - Variable length byte strings. This can be used to get/put binary + * objects like for instance RTMAC. + * + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_CFGM +#include +#include +#include +#include "CFGMInternal.h" +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static void cfgmR3DumpPath(PCFGMNODE pNode, PCDBGFINFOHLP pHlp); +static void cfgmR3Dump(PCFGMNODE pRoot, unsigned iLevel, PCDBGFINFOHLP pHlp); +static DECLCALLBACK(void) cfgmR3Info(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); +static int cfgmR3ResolveNode(PCFGMNODE pNode, const char *pszPath, PCFGMNODE *ppChild); +static int cfgmR3ResolveLeaf(PCFGMNODE pNode, const char *pszName, PCFGMLEAF *ppLeaf); +static int cfgmR3InsertLeaf(PCFGMNODE pNode, const char *pszName, PCFGMLEAF *ppLeaf); +static void cfgmR3RemoveLeaf(PCFGMNODE pNode, PCFGMLEAF pLeaf); +static void cfgmR3FreeValue(PVM pVM, PCFGMLEAF pLeaf); + + +/** @todo replace pVM for pUVM !*/ + +/** + * Allocator wrapper. + * + * @returns Pointer to the allocated memory, NULL on failure. + * @param pVM The cross context VM structure, if the tree + * is associated with one. + * @param enmTag The allocation tag. + * @param cb The size of the allocation. + */ +static void *cfgmR3MemAlloc(PVM pVM, MMTAG enmTag, size_t cb) +{ + if (pVM) + return MMR3HeapAlloc(pVM, enmTag, cb); + return RTMemAlloc(cb); +} + + +/** + * Free wrapper. + * + * @returns Pointer to the allocated memory, NULL on failure. + * @param pVM The cross context VM structure, if the tree + * is associated with one. + * @param pv The memory block to free. + */ +static void cfgmR3MemFree(PVM pVM, void *pv) +{ + if (pVM) + MMR3HeapFree(pv); + else + RTMemFree(pv); +} + + +/** + * String allocator wrapper. + * + * @returns Pointer to the allocated memory, NULL on failure. + * @param pVM The cross context VM structure, if the tree + * is associated with one. + * @param enmTag The allocation tag. + * @param cbString The size of the allocation, terminator included. + */ +static char *cfgmR3StrAlloc(PVM pVM, MMTAG enmTag, size_t cbString) +{ + if (pVM) + return (char *)MMR3HeapAlloc(pVM, enmTag, cbString); + return (char *)RTStrAlloc(cbString); +} + + +/** + * String free wrapper. + * + * @returns Pointer to the allocated memory, NULL on failure. + * @param pVM The cross context VM structure, if the tree + * is associated with one. + * @param pszString The memory block to free. + */ +static void cfgmR3StrFree(PVM pVM, char *pszString) +{ + if (pVM) + MMR3HeapFree(pszString); + else + RTStrFree(pszString); +} + + +/** + * Frees one node, leaving any children or leaves to the caller. + * + * @param pNode The node structure to free. + */ +static void cfgmR3FreeNodeOnly(PCFGMNODE pNode) +{ + pNode->pFirstLeaf = NULL; + pNode->pFirstChild = NULL; + pNode->pNext = NULL; + pNode->pPrev = NULL; + if (!pNode->pVM) + RTMemFree(pNode); + else + { + pNode->pVM = NULL; + MMR3HeapFree(pNode); + } +} + + + + +/** + * Constructs the configuration for the VM. + * + * This should only be called used once. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pfnCFGMConstructor Pointer to callback function for constructing + * the VM configuration tree. This is called on + * the EMT. + * @param pvUser The user argument passed to pfnCFGMConstructor. + * @thread EMT. + * @internal + */ +VMMR3DECL(int) CFGMR3Init(PVM pVM, PFNCFGMCONSTRUCTOR pfnCFGMConstructor, void *pvUser) +{ + LogFlow(("CFGMR3Init: pfnCFGMConstructor=%p pvUser=%p\n", pfnCFGMConstructor, pvUser)); + + /* + * Init data members. + */ + pVM->cfgm.s.pRoot = NULL; + + /* + * Register DBGF into item. + */ + int rc = DBGFR3InfoRegisterInternal(pVM, "cfgm", "Dumps a part of the CFGM tree. The argument indicates where to start.", + cfgmR3Info); + AssertRCReturn(rc,rc); + + /* + * Root Node. + */ + PCFGMNODE pRoot = (PCFGMNODE)MMR3HeapAllocZ(pVM, MM_TAG_CFGM, sizeof(*pRoot)); + if (!pRoot) + return VERR_NO_MEMORY; + pRoot->pVM = pVM; + pRoot->cchName = 0; + pVM->cfgm.s.pRoot = pRoot; + + /* + * Call the constructor if specified, if not use the default one. + */ + if (pfnCFGMConstructor) + rc = pfnCFGMConstructor(pVM->pUVM, pVM, pvUser); + else + rc = CFGMR3ConstructDefaultTree(pVM); + if (RT_SUCCESS(rc)) + { + Log(("CFGMR3Init: Successfully constructed the configuration\n")); + CFGMR3Dump(CFGMR3GetRoot(pVM)); + } + else + LogRel(("Constructor failed with rc=%Rrc pfnCFGMConstructor=%p\n", rc, pfnCFGMConstructor)); + + return rc; +} + + +/** + * Terminates the configuration manager. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @internal + */ +VMMR3DECL(int) CFGMR3Term(PVM pVM) +{ + CFGMR3RemoveNode(pVM->cfgm.s.pRoot); + pVM->cfgm.s.pRoot = NULL; + return 0; +} + + +/** + * Gets the root node for the VM. + * + * @returns Pointer to root node. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(PCFGMNODE) CFGMR3GetRoot(PVM pVM) +{ + return pVM->cfgm.s.pRoot; +} + + +/** + * Gets the root node for the VM. + * + * @returns Pointer to root node. + * @param pUVM The user mode VM structure. + */ +VMMR3DECL(PCFGMNODE) CFGMR3GetRootU(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, NULL); + PVM pVM = pUVM->pVM; + AssertReturn(pVM, NULL); + return pVM->cfgm.s.pRoot; +} + + +/** + * Gets the parent of a CFGM node. + * + * @returns Pointer to the parent node. + * @returns NULL if pNode is Root or pNode is the start of a + * restricted subtree (use CFGMR3GetParentEx() for that). + * + * @param pNode The node which parent we query. + */ +VMMR3DECL(PCFGMNODE) CFGMR3GetParent(PCFGMNODE pNode) +{ + if (pNode && !pNode->fRestrictedRoot) + return pNode->pParent; + return NULL; +} + + +/** + * Gets the parent of a CFGM node. + * + * @returns Pointer to the parent node. + * @returns NULL if pNode is Root or pVM is not correct. + * + * @param pVM The cross context VM structure. Used as token that + * the caller is trusted. + * @param pNode The node which parent we query. + */ +VMMR3DECL(PCFGMNODE) CFGMR3GetParentEx(PVM pVM, PCFGMNODE pNode) +{ + if (pNode && pNode->pVM == pVM) + return pNode->pParent; + return NULL; +} + + +/** + * Query a child node. + * + * @returns Pointer to the specified node. + * @returns NULL if node was not found or pNode is NULL. + * @param pNode Node pszPath is relative to. + * @param pszPath Path to the child node or pNode. + * It's good style to end this with '/'. + */ +VMMR3DECL(PCFGMNODE) CFGMR3GetChild(PCFGMNODE pNode, const char *pszPath) +{ + PCFGMNODE pChild; + int rc = cfgmR3ResolveNode(pNode, pszPath, &pChild); + if (RT_SUCCESS(rc)) + return pChild; + return NULL; +} + + +/** + * Query a child node by a format string. + * + * @returns Pointer to the specified node. + * @returns NULL if node was not found or pNode is NULL. + * @param pNode Node pszPath is relative to. + * @param pszPathFormat Path to the child node or pNode. + * It's good style to end this with '/'. + * @param ... Arguments to pszPathFormat. + */ +VMMR3DECL(PCFGMNODE) CFGMR3GetChildF(PCFGMNODE pNode, const char *pszPathFormat, ...) +{ + va_list Args; + va_start(Args, pszPathFormat); + PCFGMNODE pRet = CFGMR3GetChildFV(pNode, pszPathFormat, Args); + va_end(Args); + return pRet; +} + + +/** + * Query a child node by a format string. + * + * @returns Pointer to the specified node. + * @returns NULL if node was not found or pNode is NULL. + * @param pNode Node pszPath is relative to. + * @param pszPathFormat Path to the child node or pNode. + * It's good style to end this with '/'. + * @param Args Arguments to pszPathFormat. + */ +VMMR3DECL(PCFGMNODE) CFGMR3GetChildFV(PCFGMNODE pNode, const char *pszPathFormat, va_list Args) +{ + char *pszPath; + RTStrAPrintfV(&pszPath, pszPathFormat, Args); + if (pszPath) + { + PCFGMNODE pChild; + int rc = cfgmR3ResolveNode(pNode, pszPath, &pChild); + RTStrFree(pszPath); + if (RT_SUCCESS(rc)) + return pChild; + } + return NULL; +} + + +/** + * Gets the first child node. + * Use this to start an enumeration of child nodes. + * + * @returns Pointer to the first child. + * @returns NULL if no children. + * @param pNode Node to enumerate children for. + */ +VMMR3DECL(PCFGMNODE) CFGMR3GetFirstChild(PCFGMNODE pNode) +{ + return pNode ? pNode->pFirstChild : NULL; +} + + +/** + * Gets the next sibling node. + * Use this to continue an enumeration. + * + * @returns Pointer to the first child. + * @returns NULL if no children. + * @param pCur Node to returned by a call to CFGMR3GetFirstChild() + * or successive calls to this function. + */ +VMMR3DECL(PCFGMNODE) CFGMR3GetNextChild(PCFGMNODE pCur) +{ + return pCur ? pCur->pNext : NULL; +} + + +/** + * Gets the name of the current node. + * (Needed for enumeration.) + * + * @returns VBox status code. + * @param pCur Node to returned by a call to CFGMR3GetFirstChild() + * or successive calls to CFGMR3GetNextChild(). + * @param pszName Where to store the node name. + * @param cchName Size of the buffer pointed to by pszName (with terminator). + */ +VMMR3DECL(int) CFGMR3GetName(PCFGMNODE pCur, char *pszName, size_t cchName) +{ + int rc; + if (pCur) + { + if (cchName > pCur->cchName) + { + rc = VINF_SUCCESS; + memcpy(pszName, pCur->szName, pCur->cchName + 1); + } + else + rc = VERR_CFGM_NOT_ENOUGH_SPACE; + } + else + rc = VERR_CFGM_NO_NODE; + return rc; +} + + +/** + * Gets the length of the current node's name. + * (Needed for enumeration.) + * + * @returns Node name length in bytes including the terminating null char. + * @returns 0 if pCur is NULL. + * @param pCur Node to returned by a call to CFGMR3GetFirstChild() + * or successive calls to CFGMR3GetNextChild(). + */ +VMMR3DECL(size_t) CFGMR3GetNameLen(PCFGMNODE pCur) +{ + return pCur ? pCur->cchName + 1 : 0; +} + + +/** + * Validates that the child nodes are within a set of valid names. + * + * @returns true if all names are found in pszzAllowed. + * @returns false if not. + * @param pNode The node which children should be examined. + * @param pszzValid List of valid names separated by '\\0' and ending with + * a double '\\0'. + * + * @deprecated Use CFGMR3ValidateConfig. + */ +VMMR3DECL(bool) CFGMR3AreChildrenValid(PCFGMNODE pNode, const char *pszzValid) +{ + if (pNode) + { + for (PCFGMNODE pChild = pNode->pFirstChild; pChild; pChild = pChild->pNext) + { + /* search pszzValid for the name */ + const char *psz = pszzValid; + while (*psz) + { + size_t cch = strlen(psz); + if ( cch == pChild->cchName + && !memcmp(psz, pChild->szName, cch)) + break; + + /* next */ + psz += cch + 1; + } + + /* if at end of pszzValid we didn't find it => failure */ + if (!*psz) + { + AssertMsgFailed(("Couldn't find '%s' in the valid values\n", pChild->szName)); + return false; + } + } + } + + /* all ok. */ + return true; +} + + +/** + * Gets the first value of a node. + * Use this to start an enumeration of values. + * + * @returns Pointer to the first value. + * @param pCur The node (Key) which values to enumerate. + */ +VMMR3DECL(PCFGMLEAF) CFGMR3GetFirstValue(PCFGMNODE pCur) +{ + return pCur ? pCur->pFirstLeaf : NULL; +} + +/** + * Gets the next value in enumeration. + * + * @returns Pointer to the next value. + * @param pCur The current value as returned by this function or CFGMR3GetFirstValue(). + */ +VMMR3DECL(PCFGMLEAF) CFGMR3GetNextValue(PCFGMLEAF pCur) +{ + return pCur ? pCur->pNext : NULL; +} + +/** + * Get the value name. + * (Needed for enumeration.) + * + * @returns VBox status code. + * @param pCur Value returned by a call to CFGMR3GetFirstValue() + * or successive calls to CFGMR3GetNextValue(). + * @param pszName Where to store the value name. + * @param cchName Size of the buffer pointed to by pszName (with terminator). + */ +VMMR3DECL(int) CFGMR3GetValueName(PCFGMLEAF pCur, char *pszName, size_t cchName) +{ + int rc; + if (pCur) + { + if (cchName > pCur->cchName) + { + rc = VINF_SUCCESS; + memcpy(pszName, pCur->szName, pCur->cchName + 1); + } + else + rc = VERR_CFGM_NOT_ENOUGH_SPACE; + } + else + rc = VERR_CFGM_NO_NODE; + return rc; +} + + +/** + * Gets the length of the current node's name. + * (Needed for enumeration.) + * + * @returns Value name length in bytes including the terminating null char. + * @returns 0 if pCur is NULL. + * @param pCur Value returned by a call to CFGMR3GetFirstValue() + * or successive calls to CFGMR3GetNextValue(). + */ +VMMR3DECL(size_t) CFGMR3GetValueNameLen(PCFGMLEAF pCur) +{ + return pCur ? pCur->cchName + 1 : 0; +} + + +/** + * Gets the value type. + * (For enumeration.) + * + * @returns VBox status code. + * @param pCur Value returned by a call to CFGMR3GetFirstValue() + * or successive calls to CFGMR3GetNextValue(). + */ +VMMR3DECL(CFGMVALUETYPE) CFGMR3GetValueType(PCFGMLEAF pCur) +{ + Assert(pCur); + return pCur->enmType; +} + + +/** + * Validates that the values are within a set of valid names. + * + * @returns true if all names are found in pszzValid. + * @returns false if not. + * @param pNode The node which values should be examined. + * @param pszzValid List of valid names separated by '\\0' and ending with + * a double '\\0'. + * @deprecated Use CFGMR3ValidateConfig. + */ +VMMR3DECL(bool) CFGMR3AreValuesValid(PCFGMNODE pNode, const char *pszzValid) +{ + if (pNode) + { + for (PCFGMLEAF pLeaf = pNode->pFirstLeaf; pLeaf; pLeaf = pLeaf->pNext) + { + /* search pszzValid for the name */ + const char *psz = pszzValid; + while (*psz) + { + size_t cch = strlen(psz); + if ( cch == pLeaf->cchName + && !memcmp(psz, pLeaf->szName, cch)) + break; + + /* next */ + psz += cch + 1; + } + + /* if at end of pszzValid we didn't find it => failure */ + if (!*psz) + { + AssertMsgFailed(("Couldn't find '%s' in the valid values\n", pLeaf->szName)); + return false; + } + } + } + + /* all ok. */ + return true; +} + + +/** + * Checks if the given value exists. + * + * @returns true if it exists, false if not. + * @param pNode Which node to search for pszName in. + * @param pszName The name of the value we seek. + */ +VMMR3DECL(bool) CFGMR3Exists(PCFGMNODE pNode, const char *pszName) +{ + PCFGMLEAF pLeaf; + int rc = cfgmR3ResolveLeaf(pNode, pszName, &pLeaf); + return RT_SUCCESS_NP(rc); +} + + +/** + * Query value type. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param penmType Where to store the type. + */ +VMMR3DECL(int) CFGMR3QueryType(PCFGMNODE pNode, const char *pszName, PCFGMVALUETYPE penmType) +{ + PCFGMLEAF pLeaf; + int rc = cfgmR3ResolveLeaf(pNode, pszName, &pLeaf); + if (RT_SUCCESS(rc)) + { + if (penmType) + *penmType = pLeaf->enmType; + } + return rc; +} + + +/** + * Query value size. + * This works on all types of values. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pcb Where to store the value size. + */ +VMMR3DECL(int) CFGMR3QuerySize(PCFGMNODE pNode, const char *pszName, size_t *pcb) +{ + PCFGMLEAF pLeaf; + int rc = cfgmR3ResolveLeaf(pNode, pszName, &pLeaf); + if (RT_SUCCESS(rc)) + { + switch (pLeaf->enmType) + { + case CFGMVALUETYPE_INTEGER: + *pcb = sizeof(pLeaf->Value.Integer.u64); + break; + + case CFGMVALUETYPE_STRING: + *pcb = pLeaf->Value.String.cb; + break; + + case CFGMVALUETYPE_BYTES: + *pcb = pLeaf->Value.Bytes.cb; + break; + + default: + rc = VERR_CFGM_IPE_1; + AssertMsgFailed(("Invalid value type %d\n", pLeaf->enmType)); + break; + } + } + return rc; +} + + +/** + * Query integer value. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pu64 Where to store the integer value. + */ +VMMR3DECL(int) CFGMR3QueryInteger(PCFGMNODE pNode, const char *pszName, uint64_t *pu64) +{ + PCFGMLEAF pLeaf; + int rc = cfgmR3ResolveLeaf(pNode, pszName, &pLeaf); + if (RT_SUCCESS(rc)) + { + if (pLeaf->enmType == CFGMVALUETYPE_INTEGER) + *pu64 = pLeaf->Value.Integer.u64; + else + rc = VERR_CFGM_NOT_INTEGER; + } + return rc; +} + + +/** + * Query integer value with default. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pu64 Where to store the integer value. This is set to the default on failure. + * @param u64Def The default value. This is always set. + */ +VMMR3DECL(int) CFGMR3QueryIntegerDef(PCFGMNODE pNode, const char *pszName, uint64_t *pu64, uint64_t u64Def) +{ + PCFGMLEAF pLeaf; + int rc = cfgmR3ResolveLeaf(pNode, pszName, &pLeaf); + if (RT_SUCCESS(rc)) + { + if (pLeaf->enmType == CFGMVALUETYPE_INTEGER) + *pu64 = pLeaf->Value.Integer.u64; + else + rc = VERR_CFGM_NOT_INTEGER; + } + + if (RT_FAILURE(rc)) + { + *pu64 = u64Def; + if (rc == VERR_CFGM_VALUE_NOT_FOUND || rc == VERR_CFGM_NO_PARENT) + rc = VINF_SUCCESS; + } + + return rc; +} + + +/** + * Query zero terminated character value. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of a zero terminate character value. + * @param pszString Where to store the string. + * @param cchString Size of the string buffer. (Includes terminator.) + */ +VMMR3DECL(int) CFGMR3QueryString(PCFGMNODE pNode, const char *pszName, char *pszString, size_t cchString) +{ + PCFGMLEAF pLeaf; + int rc = cfgmR3ResolveLeaf(pNode, pszName, &pLeaf); + if (RT_SUCCESS(rc)) + { + if (pLeaf->enmType == CFGMVALUETYPE_STRING) + { + size_t cbSrc = pLeaf->Value.String.cb; + if (cchString >= cbSrc) + { + memcpy(pszString, pLeaf->Value.String.psz, cbSrc); + memset(pszString + cbSrc, 0, cchString - cbSrc); + } + else + rc = VERR_CFGM_NOT_ENOUGH_SPACE; + } + else + rc = VERR_CFGM_NOT_STRING; + } + return rc; +} + + +/** + * Query zero terminated character value with default. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of a zero terminate character value. + * @param pszString Where to store the string. This will not be set on overflow error. + * @param cchString Size of the string buffer. (Includes terminator.) + * @param pszDef The default value. + */ +VMMR3DECL(int) CFGMR3QueryStringDef(PCFGMNODE pNode, const char *pszName, char *pszString, size_t cchString, const char *pszDef) +{ + PCFGMLEAF pLeaf; + int rc = cfgmR3ResolveLeaf(pNode, pszName, &pLeaf); + if (RT_SUCCESS(rc)) + { + if (pLeaf->enmType == CFGMVALUETYPE_STRING) + { + size_t cbSrc = pLeaf->Value.String.cb; + if (cchString >= cbSrc) + { + memcpy(pszString, pLeaf->Value.String.psz, cbSrc); + memset(pszString + cbSrc, 0, cchString - cbSrc); + } + else + rc = VERR_CFGM_NOT_ENOUGH_SPACE; + } + else + rc = VERR_CFGM_NOT_STRING; + } + + if (RT_FAILURE(rc) && rc != VERR_CFGM_NOT_ENOUGH_SPACE) + { + size_t cchDef = strlen(pszDef); + if (cchString > cchDef) + { + memcpy(pszString, pszDef, cchDef); + memset(pszString + cchDef, 0, cchString - cchDef); + if (rc == VERR_CFGM_VALUE_NOT_FOUND || rc == VERR_CFGM_NO_PARENT) + rc = VINF_SUCCESS; + } + else if (rc == VERR_CFGM_VALUE_NOT_FOUND || rc == VERR_CFGM_NO_PARENT) + rc = VERR_CFGM_NOT_ENOUGH_SPACE; + } + + return rc; +} + + +/** + * Query byte string value. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of a byte string value. + * @param pvData Where to store the binary data. + * @param cbData Size of buffer pvData points too. + */ +VMMR3DECL(int) CFGMR3QueryBytes(PCFGMNODE pNode, const char *pszName, void *pvData, size_t cbData) +{ + PCFGMLEAF pLeaf; + int rc = cfgmR3ResolveLeaf(pNode, pszName, &pLeaf); + if (RT_SUCCESS(rc)) + { + if (pLeaf->enmType == CFGMVALUETYPE_BYTES) + { + if (cbData >= pLeaf->Value.Bytes.cb) + { + memcpy(pvData, pLeaf->Value.Bytes.pau8, pLeaf->Value.Bytes.cb); + memset((char *)pvData + pLeaf->Value.Bytes.cb, 0, cbData - pLeaf->Value.Bytes.cb); + } + else + rc = VERR_CFGM_NOT_ENOUGH_SPACE; + } + else + rc = VERR_CFGM_NOT_BYTES; + } + return rc; +} + + +/** + * Validate one level of a configuration node. + * + * This replaces the CFGMR3AreChildrenValid and CFGMR3AreValuesValid APIs. + * + * @returns VBox status code. + * + * When an error is returned, both VMSetError and AssertLogRelMsgFailed + * have been called. So, all the caller needs to do is to propagate + * the error status up to PDM. + * + * @param pNode The node to validate. + * @param pszNode The node path, always ends with a slash. Use + * "/" for the root config node. + * @param pszValidValues Patterns describing the valid value names. See + * RTStrSimplePatternMultiMatch for details on the + * pattern syntax. + * @param pszValidNodes Patterns describing the valid node (key) names. + * See RTStrSimplePatternMultiMatch for details on + * the pattern syntax. + * @param pszWho Who is calling. + * @param uInstance The instance number of the caller. + */ +VMMR3DECL(int) CFGMR3ValidateConfig(PCFGMNODE pNode, const char *pszNode, + const char *pszValidValues, const char *pszValidNodes, + const char *pszWho, uint32_t uInstance) +{ + /* Input validation. */ + AssertPtrNullReturn(pNode, VERR_INVALID_POINTER); + AssertPtrReturn(pszNode, VERR_INVALID_POINTER); + Assert(*pszNode && pszNode[strlen(pszNode) - 1] == '/'); + AssertPtrReturn(pszValidValues, VERR_INVALID_POINTER); + AssertPtrReturn(pszValidNodes, VERR_INVALID_POINTER); + AssertPtrReturn(pszWho, VERR_INVALID_POINTER); + + if (pNode) + { + /* + * Enumerate the leaves and check them against pszValidValues. + */ + for (PCFGMLEAF pLeaf = pNode->pFirstLeaf; pLeaf; pLeaf = pLeaf->pNext) + { + if (!RTStrSimplePatternMultiMatch(pszValidValues, RTSTR_MAX, + pLeaf->szName, pLeaf->cchName, + NULL)) + { + AssertLogRelMsgFailed(("%s/%u: Value '%s%s' didn't match '%s'\n", + pszWho, uInstance, pszNode, pLeaf->szName, pszValidValues)); + return VMSetError(pNode->pVM, VERR_CFGM_CONFIG_UNKNOWN_VALUE, RT_SRC_POS, + N_("Unknown configuration value '%s%s' found in the configuration of %s instance #%u"), + pszNode, pLeaf->szName, pszWho, uInstance); + } + + } + + /* + * Enumerate the child nodes and check them against pszValidNodes. + */ + for (PCFGMNODE pChild = pNode->pFirstChild; pChild; pChild = pChild->pNext) + { + if (!RTStrSimplePatternMultiMatch(pszValidNodes, RTSTR_MAX, + pChild->szName, pChild->cchName, + NULL)) + { + AssertLogRelMsgFailed(("%s/%u: Node '%s%s' didn't match '%s'\n", + pszWho, uInstance, pszNode, pChild->szName, pszValidNodes)); + return VMSetError(pNode->pVM, VERR_CFGM_CONFIG_UNKNOWN_NODE, RT_SRC_POS, + N_("Unknown configuration node '%s%s' found in the configuration of %s instance #%u"), + pszNode, pChild->szName, pszWho, uInstance); + } + } + } + + /* All is well. */ + return VINF_SUCCESS; +} + + + +/** + * Populates the CFGM tree with the default configuration. + * + * This assumes an empty tree and is intended for testcases and such that only + * need to do very small adjustments to the config. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @internal + */ +VMMR3DECL(int) CFGMR3ConstructDefaultTree(PVM pVM) +{ + int rc; + int rcAll = VINF_SUCCESS; +#define UPDATERC() do { if (RT_FAILURE(rc) && RT_SUCCESS(rcAll)) rcAll = rc; } while (0) + + PCFGMNODE pRoot = CFGMR3GetRoot(pVM); + AssertReturn(pRoot, VERR_WRONG_ORDER); + + /* + * Create VM default values. + */ + rc = CFGMR3InsertString(pRoot, "Name", "Default VM"); + UPDATERC(); + rc = CFGMR3InsertInteger(pRoot, "RamSize", 128U * _1M); + UPDATERC(); + rc = CFGMR3InsertInteger(pRoot, "RamHoleSize", 512U * _1M); + UPDATERC(); + rc = CFGMR3InsertInteger(pRoot, "TimerMillies", 10); + UPDATERC(); + rc = CFGMR3InsertInteger(pRoot, "RawR3Enabled", 1); + UPDATERC(); + /** @todo CFGM Defaults: RawR0, PATMEnabled and CASMEnabled needs attention later. */ + rc = CFGMR3InsertInteger(pRoot, "RawR0Enabled", 1); + UPDATERC(); + rc = CFGMR3InsertInteger(pRoot, "PATMEnabled", 1); + UPDATERC(); + rc = CFGMR3InsertInteger(pRoot, "CSAMEnabled", 1); + UPDATERC(); + + /* + * PDM. + */ + PCFGMNODE pPdm; + rc = CFGMR3InsertNode(pRoot, "PDM", &pPdm); + UPDATERC(); + PCFGMNODE pDevices = NULL; + rc = CFGMR3InsertNode(pPdm, "Devices", &pDevices); + UPDATERC(); + rc = CFGMR3InsertInteger(pDevices, "LoadBuiltin", 1); /* boolean */ + UPDATERC(); + PCFGMNODE pDrivers = NULL; + rc = CFGMR3InsertNode(pPdm, "Drivers", &pDrivers); + UPDATERC(); + rc = CFGMR3InsertInteger(pDrivers, "LoadBuiltin", 1); /* boolean */ + UPDATERC(); + + + /* + * Devices + */ + pDevices = NULL; + rc = CFGMR3InsertNode(pRoot, "Devices", &pDevices); + UPDATERC(); + /* device */ + PCFGMNODE pDev = NULL; + PCFGMNODE pInst = NULL; + PCFGMNODE pCfg = NULL; +#if 0 + PCFGMNODE pLunL0 = NULL; + PCFGMNODE pLunL1 = NULL; +#endif + + /* + * PC Arch. + */ + rc = CFGMR3InsertNode(pDevices, "pcarch", &pDev); + UPDATERC(); + rc = CFGMR3InsertNode(pDev, "0", &pInst); + UPDATERC(); + rc = CFGMR3InsertInteger(pInst, "Trusted", 1); /* boolean */ + UPDATERC(); + rc = CFGMR3InsertNode(pInst, "Config", &pCfg); + UPDATERC(); + + /* + * PC Bios. + */ + rc = CFGMR3InsertNode(pDevices, "pcbios", &pDev); + UPDATERC(); + rc = CFGMR3InsertNode(pDev, "0", &pInst); + UPDATERC(); + rc = CFGMR3InsertInteger(pInst, "Trusted", 1); /* boolean */ + UPDATERC(); + rc = CFGMR3InsertNode(pInst, "Config", &pCfg); + UPDATERC(); + rc = CFGMR3InsertString(pCfg, "BootDevice0", "IDE"); + UPDATERC(); + rc = CFGMR3InsertString(pCfg, "BootDevice1", "NONE"); + UPDATERC(); + rc = CFGMR3InsertString(pCfg, "BootDevice2", "NONE"); + UPDATERC(); + rc = CFGMR3InsertString(pCfg, "BootDevice3", "NONE"); + UPDATERC(); + rc = CFGMR3InsertString(pCfg, "HardDiskDevice", "piix3ide"); + UPDATERC(); + rc = CFGMR3InsertString(pCfg, "FloppyDevice", ""); + UPDATERC(); + RTUUID Uuid; + RTUuidClear(&Uuid); + rc = CFGMR3InsertBytes(pCfg, "UUID", &Uuid, sizeof(Uuid)); + UPDATERC(); + + /* + * PCI bus. + */ + rc = CFGMR3InsertNode(pDevices, "pci", &pDev); /* piix3 */ + UPDATERC(); + rc = CFGMR3InsertNode(pDev, "0", &pInst); + UPDATERC(); + rc = CFGMR3InsertInteger(pInst, "Trusted", 1); /* boolean */ + UPDATERC(); + rc = CFGMR3InsertNode(pInst, "Config", &pCfg); + UPDATERC(); + + /* + * PS/2 keyboard & mouse + */ + rc = CFGMR3InsertNode(pDevices, "pckbd", &pDev); + UPDATERC(); + rc = CFGMR3InsertNode(pDev, "0", &pInst); + UPDATERC(); + rc = CFGMR3InsertNode(pInst, "Config", &pCfg); + UPDATERC(); +#if 0 + rc = CFGMR3InsertNode(pInst, "LUN#0", &pLunL0); + UPDATERC(); + rc = CFGMR3InsertString(pLunL0, "Driver", "KeyboardQueue"); + UPDATERC(); + rc = CFGMR3InsertNode(pLunL0, "Config", &pCfg); + UPDATERC(); + rc = CFGMR3InsertInteger(pCfg, "QueueSize", 64); + UPDATERC(); + rc = CFGMR3InsertNode(pLunL0, "AttachedDriver", &pLunL1); + UPDATERC(); + rc = CFGMR3InsertString(pLunL1, "Driver", "MainKeyboard"); + UPDATERC(); + rc = CFGMR3InsertNode(pLunL1, "Config", &pCfg); + UPDATERC(); +#endif +#if 0 + rc = CFGMR3InsertNode(pInst, "LUN#1", &pLunL0); + UPDATERC(); + rc = CFGMR3InsertString(pLunL0, "Driver", "MouseQueue"); + UPDATERC(); + rc = CFGMR3InsertNode(pLunL0, "Config", &pCfg); + UPDATERC(); + rc = CFGMR3InsertInteger(pCfg, "QueueSize", 128); + UPDATERC(); + rc = CFGMR3InsertNode(pLunL0, "AttachedDriver", &pLunL1); + UPDATERC(); + rc = CFGMR3InsertString(pLunL1, "Driver", "MainMouse"); + UPDATERC(); + rc = CFGMR3InsertNode(pLunL1, "Config", &pCfg); + UPDATERC(); +#endif + + /* + * i8254 Programmable Interval Timer And Dummy Speaker + */ + rc = CFGMR3InsertNode(pDevices, "i8254", &pDev); + UPDATERC(); + rc = CFGMR3InsertNode(pDev, "0", &pInst); + UPDATERC(); +#ifdef DEBUG + rc = CFGMR3InsertInteger(pInst, "Trusted", 1); /* boolean */ + UPDATERC(); +#endif + rc = CFGMR3InsertNode(pInst, "Config", &pCfg); + UPDATERC(); + + /* + * i8259 Programmable Interrupt Controller. + */ + rc = CFGMR3InsertNode(pDevices, "i8259", &pDev); + UPDATERC(); + rc = CFGMR3InsertNode(pDev, "0", &pInst); + UPDATERC(); + rc = CFGMR3InsertInteger(pInst, "Trusted", 1); /* boolean */ + UPDATERC(); + rc = CFGMR3InsertNode(pInst, "Config", &pCfg); + UPDATERC(); + + /* + * RTC MC146818. + */ + rc = CFGMR3InsertNode(pDevices, "mc146818", &pDev); + UPDATERC(); + rc = CFGMR3InsertNode(pDev, "0", &pInst); + UPDATERC(); + rc = CFGMR3InsertNode(pInst, "Config", &pCfg); + UPDATERC(); + + /* + * VGA. + */ + rc = CFGMR3InsertNode(pDevices, "vga", &pDev); + UPDATERC(); + rc = CFGMR3InsertNode(pDev, "0", &pInst); + UPDATERC(); + rc = CFGMR3InsertInteger(pInst, "Trusted", 1); /* boolean */ + UPDATERC(); + rc = CFGMR3InsertNode(pInst, "Config", &pCfg); + UPDATERC(); + rc = CFGMR3InsertInteger(pCfg, "VRamSize", 4 * _1M); + UPDATERC(); + + /* Bios logo. */ + rc = CFGMR3InsertInteger(pCfg, "FadeIn", 1); + UPDATERC(); + rc = CFGMR3InsertInteger(pCfg, "FadeOut", 1); + UPDATERC(); + rc = CFGMR3InsertInteger(pCfg, "LogoTime", 0); + UPDATERC(); + rc = CFGMR3InsertString(pCfg, "LogoFile", ""); + UPDATERC(); + +#if 0 + rc = CFGMR3InsertNode(pInst, "LUN#0", &pLunL0); + UPDATERC(); + rc = CFGMR3InsertString(pLunL0, "Driver", "MainDisplay"); + UPDATERC(); +#endif + + /* + * IDE controller. + */ + rc = CFGMR3InsertNode(pDevices, "piix3ide", &pDev); /* piix3 */ + UPDATERC(); + rc = CFGMR3InsertNode(pDev, "0", &pInst); + UPDATERC(); + rc = CFGMR3InsertInteger(pInst, "Trusted", 1); /* boolean */ + UPDATERC(); + rc = CFGMR3InsertNode(pInst, "Config", &pCfg); + UPDATERC(); + + /* + * VMMDev. + */ + rc = CFGMR3InsertNode(pDevices, "VMMDev", &pDev); + UPDATERC(); + rc = CFGMR3InsertNode(pDev, "0", &pInst); + UPDATERC(); + rc = CFGMR3InsertNode(pInst, "Config", &pCfg); + UPDATERC(); + rc = CFGMR3InsertInteger(pInst, "Trusted", 1); /* boolean */ + UPDATERC(); + + + /* + * ... + */ + +#undef UPDATERC + return rcAll; +} + + + + +/** + * Resolves a path reference to a child node. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszPath Path to the child node. + * @param ppChild Where to store the pointer to the child node. + */ +static int cfgmR3ResolveNode(PCFGMNODE pNode, const char *pszPath, PCFGMNODE *ppChild) +{ + *ppChild = NULL; + if (!pNode) + return VERR_CFGM_NO_PARENT; + PCFGMNODE pChild = NULL; + for (;;) + { + /* skip leading slashes. */ + while (*pszPath == '/') + pszPath++; + + /* End of path? */ + if (!*pszPath) + { + if (!pChild) + return VERR_CFGM_INVALID_CHILD_PATH; + *ppChild = pChild; + return VINF_SUCCESS; + } + + /* find end of component. */ + const char *pszNext = strchr(pszPath, '/'); + if (!pszNext) + pszNext = strchr(pszPath, '\0'); + RTUINT cchName = pszNext - pszPath; + + /* search child list. */ + pChild = pNode->pFirstChild; + for ( ; pChild; pChild = pChild->pNext) + if (pChild->cchName == cchName) + { + int iDiff = memcmp(pszPath, pChild->szName, cchName); + if (iDiff <= 0) + { + if (iDiff != 0) + pChild = NULL; + break; + } + } + if (!pChild) + return VERR_CFGM_CHILD_NOT_FOUND; + + /* next iteration */ + pNode = pChild; + pszPath = pszNext; + } + + /* won't get here */ +} + + +/** + * Resolves a path reference to a child node. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of a byte string value. + * @param ppLeaf Where to store the pointer to the leaf node. + */ +static int cfgmR3ResolveLeaf(PCFGMNODE pNode, const char *pszName, PCFGMLEAF *ppLeaf) +{ + *ppLeaf = NULL; + if (!pNode) + return VERR_CFGM_NO_PARENT; + + size_t cchName = strlen(pszName); + PCFGMLEAF pLeaf = pNode->pFirstLeaf; + while (pLeaf) + { + if (cchName == pLeaf->cchName) + { + int iDiff = memcmp(pszName, pLeaf->szName, cchName); + if (iDiff <= 0) + { + if (iDiff != 0) + break; + *ppLeaf = pLeaf; + return VINF_SUCCESS; + } + } + + /* next */ + pLeaf = pLeaf->pNext; + } + return VERR_CFGM_VALUE_NOT_FOUND; +} + + + +/** + * Creates a CFGM tree. + * + * This is intended for creating device/driver configs can be + * passed around and later attached to the main tree in the + * correct location. + * + * @returns Pointer to the root node, NULL on error (out of memory or invalid + * VM handle). + * @param pUVM The user mode VM handle. For testcase (and other + * purposes, NULL can be used. However, the resulting + * tree cannot be inserted into a tree that has a + * non-NULL value. Using NULL can be usedful for + * testcases and similar, non VMM uses. + */ +VMMR3DECL(PCFGMNODE) CFGMR3CreateTree(PUVM pUVM) +{ + if (pUVM) + { + UVM_ASSERT_VALID_EXT_RETURN(pUVM, NULL); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, NULL); + } + + PCFGMNODE pNew; + if (pUVM) + pNew = (PCFGMNODE)MMR3HeapAllocU(pUVM, MM_TAG_CFGM, sizeof(*pNew)); + else + pNew = (PCFGMNODE)RTMemAlloc(sizeof(*pNew)); + if (pNew) + { + pNew->pPrev = NULL; + pNew->pNext = NULL; + pNew->pParent = NULL; + pNew->pFirstChild = NULL; + pNew->pFirstLeaf = NULL; + pNew->pVM = pUVM ? pUVM->pVM : NULL; + pNew->fRestrictedRoot = false; + pNew->cchName = 0; + pNew->szName[0] = 0; + } + return pNew; +} + + +/** + * Duplicates a CFGM sub-tree or a full tree. + * + * @returns Pointer to the root node. NULL if we run out of memory or the + * input parameter is NULL. + * @param pRoot The root of the tree to duplicate. + * @param ppCopy Where to return the root of the duplicate. + */ +VMMR3DECL(int) CFGMR3DuplicateSubTree(PCFGMNODE pRoot, PCFGMNODE *ppCopy) +{ + AssertPtrReturn(pRoot, VERR_INVALID_POINTER); + + /* + * Create a new tree. + */ + PCFGMNODE pNewRoot = CFGMR3CreateTree(pRoot->pVM ? pRoot->pVM->pUVM : NULL); + if (!pNewRoot) + return VERR_NO_MEMORY; + + /* + * Duplicate the content. + */ + int rc = VINF_SUCCESS; + PCFGMNODE pSrcCur = pRoot; + PCFGMNODE pDstCur = pNewRoot; + for (;;) + { + if ( !pDstCur->pFirstChild + && !pDstCur->pFirstLeaf) + { + /* + * Values first. + */ + /** @todo this isn't the most efficient way to do it. */ + for (PCFGMLEAF pLeaf = pSrcCur->pFirstLeaf; pLeaf && RT_SUCCESS(rc); pLeaf = pLeaf->pNext) + rc = CFGMR3InsertValue(pDstCur, pLeaf); + + /* + * Insert immediate child nodes. + */ + /** @todo this isn't the most efficient way to do it. */ + for (PCFGMNODE pChild = pSrcCur->pFirstChild; pChild && RT_SUCCESS(rc); pChild = pChild->pNext) + rc = CFGMR3InsertNode(pDstCur, pChild->szName, NULL); + + AssertLogRelRCBreak(rc); + } + + /* + * Deep copy of the children. + */ + if (pSrcCur->pFirstChild) + { + Assert(pDstCur->pFirstChild && !strcmp(pDstCur->pFirstChild->szName, pSrcCur->pFirstChild->szName)); + pSrcCur = pSrcCur->pFirstChild; + pDstCur = pDstCur->pFirstChild; + } + /* + * If it's the root node, we're done. + */ + else if (pSrcCur == pRoot) + break; + else + { + /* + * Upon reaching the end of a sibling list, we must ascend and + * resume the sibiling walk on an previous level. + */ + if (!pSrcCur->pNext) + { + do + { + pSrcCur = pSrcCur->pParent; + pDstCur = pDstCur->pParent; + } while (!pSrcCur->pNext && pSrcCur != pRoot); + if (pSrcCur == pRoot) + break; + } + + /* + * Next sibling. + */ + Assert(pDstCur->pNext && !strcmp(pDstCur->pNext->szName, pSrcCur->pNext->szName)); + pSrcCur = pSrcCur->pNext; + pDstCur = pDstCur->pNext; + } + } + + if (RT_FAILURE(rc)) + { + CFGMR3RemoveNode(pNewRoot); + return rc; + } + + *ppCopy = pNewRoot; + return VINF_SUCCESS; +} + + +/** + * Insert subtree. + * + * This function inserts (no duplication) a tree created by CFGMR3CreateTree() + * into the main tree. + * + * The root node of the inserted subtree will need to be reallocated, which + * effectually means that the passed in pSubTree handle becomes invalid + * upon successful return. Use the value returned in ppChild instead + * of pSubTree. + * + * @returns VBox status code. + * @returns VERR_CFGM_NODE_EXISTS if the final child node name component exists. + * @param pNode Parent node. + * @param pszName Name or path of the new child node. + * @param pSubTree The subtree to insert. Must be returned by CFGMR3CreateTree(). + * @param ppChild Where to store the address of the new child node. (optional) + */ +VMMR3DECL(int) CFGMR3InsertSubTree(PCFGMNODE pNode, const char *pszName, PCFGMNODE pSubTree, PCFGMNODE *ppChild) +{ + /* + * Validate input. + */ + AssertPtrReturn(pNode, VERR_INVALID_POINTER); + AssertPtrReturn(pSubTree, VERR_INVALID_POINTER); + AssertReturn(pNode != pSubTree, VERR_INVALID_PARAMETER); + AssertReturn(!pSubTree->pParent, VERR_INVALID_PARAMETER); + AssertReturn(pNode->pVM == pSubTree->pVM, VERR_INVALID_PARAMETER); + Assert(!pSubTree->pNext); + Assert(!pSubTree->pPrev); + + /* + * Use CFGMR3InsertNode to create a new node and then + * re-attach the children and leaves of the subtree to it. + */ + PCFGMNODE pNewChild; + int rc = CFGMR3InsertNode(pNode, pszName, &pNewChild); + if (RT_SUCCESS(rc)) + { + Assert(!pNewChild->pFirstChild); + Assert(!pNewChild->pFirstLeaf); + + pNewChild->pFirstChild = pSubTree->pFirstChild; + pNewChild->pFirstLeaf = pSubTree->pFirstLeaf; + for (PCFGMNODE pChild = pNewChild->pFirstChild; pChild; pChild = pChild->pNext) + pChild->pParent = pNewChild; + + if (ppChild) + *ppChild = pNewChild; + + /* free the old subtree root */ + cfgmR3FreeNodeOnly(pSubTree); + } + return rc; +} + + +/** + * Replaces a (sub-)tree with new one. + * + * This function removes the exiting (sub-)tree, completely freeing it in the + * process, and inserts (no duplication) the specified tree. The tree can + * either be created by CFGMR3CreateTree or CFGMR3DuplicateSubTree. + * + * @returns VBox status code. + * @param pRoot The sub-tree to replace. This node will remain valid + * after the call. + * @param pNewRoot The tree to replace @a pRoot with. This not will + * become invalid after a successful call. + */ +VMMR3DECL(int) CFGMR3ReplaceSubTree(PCFGMNODE pRoot, PCFGMNODE pNewRoot) +{ + /* + * Validate input. + */ + AssertPtrReturn(pRoot, VERR_INVALID_POINTER); + AssertPtrReturn(pNewRoot, VERR_INVALID_POINTER); + AssertReturn(pRoot != pNewRoot, VERR_INVALID_PARAMETER); + AssertReturn(!pNewRoot->pParent, VERR_INVALID_PARAMETER); + AssertReturn(pNewRoot->pVM == pRoot->pVM, VERR_INVALID_PARAMETER); + AssertReturn(!pNewRoot->pNext, VERR_INVALID_PARAMETER); + AssertReturn(!pNewRoot->pPrev, VERR_INVALID_PARAMETER); + + /* + * Free the current properties fo pRoot. + */ + while (pRoot->pFirstChild) + CFGMR3RemoveNode(pRoot->pFirstChild); + + while (pRoot->pFirstLeaf) + cfgmR3RemoveLeaf(pRoot, pRoot->pFirstLeaf); + + /* + * Copy all the properties from the new root to the current one. + */ + pRoot->pFirstLeaf = pNewRoot->pFirstLeaf; + pRoot->pFirstChild = pNewRoot->pFirstChild; + for (PCFGMNODE pChild = pRoot->pFirstChild; pChild; pChild = pChild->pNext) + pChild->pParent = pRoot; + + cfgmR3FreeNodeOnly(pNewRoot); + + return VINF_SUCCESS; +} + + +/** + * Copies all values and keys from one tree onto another. + * + * The flags control what happens to keys and values with the same name + * existing in both source and destination. + * + * @returns VBox status code. + * @param pDstTree The destination tree. + * @param pSrcTree The source tree. + * @param fFlags Copy flags, see CFGM_COPY_FLAGS_XXX. + */ +VMMR3DECL(int) CFGMR3CopyTree(PCFGMNODE pDstTree, PCFGMNODE pSrcTree, uint32_t fFlags) +{ + /* + * Input validation. + */ + AssertPtrReturn(pSrcTree, VERR_INVALID_POINTER); + AssertPtrReturn(pDstTree, VERR_INVALID_POINTER); + AssertReturn(pDstTree != pSrcTree, VERR_INVALID_PARAMETER); + AssertReturn(!(fFlags & ~(CFGM_COPY_FLAGS_VALUE_DISP_MASK | CFGM_COPY_FLAGS_KEY_DISP_MASK)), VERR_INVALID_PARAMETER); + AssertReturn( (fFlags & CFGM_COPY_FLAGS_VALUE_DISP_MASK) != CFGM_COPY_FLAGS_RESERVED_VALUE_DISP_0 + && (fFlags & CFGM_COPY_FLAGS_VALUE_DISP_MASK) != CFGM_COPY_FLAGS_RESERVED_VALUE_DISP_1, + VERR_INVALID_PARAMETER); + AssertReturn((fFlags & CFGM_COPY_FLAGS_KEY_DISP_MASK) != CFGM_COPY_FLAGS_RESERVED_KEY_DISP, + VERR_INVALID_PARAMETER); + + /* + * Copy the values. + */ + int rc; + for (PCFGMLEAF pValue = CFGMR3GetFirstValue(pSrcTree); pValue; pValue = CFGMR3GetNextValue(pValue)) + { + rc = CFGMR3InsertValue(pDstTree, pValue); + if (rc == VERR_CFGM_LEAF_EXISTS) + { + if ((fFlags & CFGM_COPY_FLAGS_VALUE_DISP_MASK) == CFGM_COPY_FLAGS_REPLACE_VALUES) + { + rc = CFGMR3RemoveValue(pDstTree, pValue->szName); + if (RT_FAILURE(rc)) + break; + rc = CFGMR3InsertValue(pDstTree, pValue); + } + else + rc = VINF_SUCCESS; + } + AssertRCReturn(rc, rc); + } + + /* + * Copy/merge the keys - merging results in recursion. + */ + for (PCFGMNODE pSrcChild = CFGMR3GetFirstChild(pSrcTree); pSrcChild; pSrcChild = CFGMR3GetNextChild(pSrcChild)) + { + PCFGMNODE pDstChild = CFGMR3GetChild(pDstTree, pSrcChild->szName); + if ( pDstChild + && (fFlags & CFGM_COPY_FLAGS_KEY_DISP_MASK) == CFGM_COPY_FLAGS_REPLACE_KEYS) + { + CFGMR3RemoveNode(pDstChild); + pDstChild = NULL; + } + if (!pDstChild) + { + PCFGMNODE pChildCopy; + rc = CFGMR3DuplicateSubTree(pSrcChild, &pChildCopy); + AssertRCReturn(rc, rc); + rc = CFGMR3InsertSubTree(pDstTree, pSrcChild->szName, pChildCopy, NULL); + AssertRCReturnStmt(rc, CFGMR3RemoveNode(pChildCopy), rc); + } + else if ((fFlags & CFGM_COPY_FLAGS_KEY_DISP_MASK) == CFGM_COPY_FLAGS_MERGE_KEYS) + { + rc = CFGMR3CopyTree(pDstChild, pSrcChild, fFlags); + AssertRCReturn(rc, rc); + } + } + + return VINF_SUCCESS; +} + + + +/** + * Compares two names. + * + * @returns Similar to memcpy. + * @param pszName1 The first name. + * @param cchName1 The length of the first name. + * @param pszName2 The second name. + * @param cchName2 The length of the second name. + */ +DECLINLINE(int) cfgmR3CompareNames(const char *pszName1, size_t cchName1, const char *pszName2, size_t cchName2) +{ + int iDiff; + if (cchName1 <= cchName2) + { + iDiff = memcmp(pszName1, pszName2, cchName1); + if (!iDiff && cchName1 < cchName2) + iDiff = -1; + } + else + { + iDiff = memcmp(pszName1, pszName2, cchName2); + if (!iDiff) + iDiff = 1; + } + return iDiff; +} + + +/** + * Insert a node. + * + * @returns VBox status code. + * @returns VERR_CFGM_NODE_EXISTS if the final child node name component exists. + * @param pNode Parent node. + * @param pszName Name or path of the new child node. + * @param ppChild Where to store the address of the new child node. (optional) + */ +VMMR3DECL(int) CFGMR3InsertNode(PCFGMNODE pNode, const char *pszName, PCFGMNODE *ppChild) +{ + int rc; + if (pNode) + { + /* + * If given a path we have to deal with it component by component. + */ + while (*pszName == '/') + pszName++; + if (strchr(pszName, '/')) + { + char *pszDup = RTStrDup(pszName); + if (pszDup) + { + char *psz = pszDup; + for (;;) + { + /* Terminate at '/' and find the next component. */ + char *pszNext = strchr(psz, '/'); + if (pszNext) + { + *pszNext++ = '\0'; + while (*pszNext == '/') + pszNext++; + if (*pszNext == '\0') + pszNext = NULL; + } + + /* does it exist? */ + PCFGMNODE pChild = CFGMR3GetChild(pNode, psz); + if (!pChild) + { + /* no, insert it */ + rc = CFGMR3InsertNode(pNode, psz, &pChild); + if (RT_FAILURE(rc)) + break; + if (!pszNext) + { + if (ppChild) + *ppChild = pChild; + break; + } + + } + /* if last component fail */ + else if (!pszNext) + { + rc = VERR_CFGM_NODE_EXISTS; + break; + } + + /* next */ + pNode = pChild; + psz = pszNext; + } + RTStrFree(pszDup); + } + else + rc = VERR_NO_TMP_MEMORY; + } + /* + * Not multicomponent, just make sure it's a non-zero name. + */ + else if (*pszName) + { + /* + * Check if already exists and find last node in chain. + */ + size_t cchName = strlen(pszName); + PCFGMNODE pPrev = NULL; + PCFGMNODE pNext = pNode->pFirstChild; + if (pNext) + { + for ( ; pNext; pPrev = pNext, pNext = pNext->pNext) + { + int iDiff = cfgmR3CompareNames(pszName, cchName, pNext->szName, pNext->cchName); + if (iDiff <= 0) + { + if (!iDiff) + return VERR_CFGM_NODE_EXISTS; + break; + } + } + } + + /* + * Allocate and init node. + */ + PCFGMNODE pNew = (PCFGMNODE)cfgmR3MemAlloc(pNode->pVM, MM_TAG_CFGM, sizeof(*pNew) + cchName); + if (pNew) + { + pNew->pParent = pNode; + pNew->pFirstChild = NULL; + pNew->pFirstLeaf = NULL; + pNew->pVM = pNode->pVM; + pNew->fRestrictedRoot = false; + pNew->cchName = cchName; + memcpy(pNew->szName, pszName, cchName + 1); + + /* + * Insert into child list. + */ + pNew->pPrev = pPrev; + if (pPrev) + pPrev->pNext = pNew; + else + pNode->pFirstChild = pNew; + pNew->pNext = pNext; + if (pNext) + pNext->pPrev = pNew; + + if (ppChild) + *ppChild = pNew; + rc = VINF_SUCCESS; + } + else + rc = VERR_NO_MEMORY; + } + else + { + rc = VERR_CFGM_INVALID_NODE_PATH; + AssertMsgFailed(("Invalid path %s\n", pszName)); + } + } + else + { + rc = VERR_CFGM_NO_PARENT; + AssertMsgFailed(("No parent! path %s\n", pszName)); + } + + return rc; +} + + +/** + * Insert a node, format string name. + * + * @returns VBox status code. + * @param pNode Parent node. + * @param ppChild Where to store the address of the new child node. (optional) + * @param pszNameFormat Name of or path the new child node. + * @param ... Name format arguments. + */ +VMMR3DECL(int) CFGMR3InsertNodeF(PCFGMNODE pNode, PCFGMNODE *ppChild, const char *pszNameFormat, ...) +{ + va_list Args; + va_start(Args, pszNameFormat); + int rc = CFGMR3InsertNodeFV(pNode, ppChild, pszNameFormat, Args); + va_end(Args); + return rc; +} + + +/** + * Insert a node, format string name. + * + * @returns VBox status code. + * @param pNode Parent node. + * @param ppChild Where to store the address of the new child node. (optional) + * @param pszNameFormat Name or path of the new child node. + * @param Args Name format arguments. + */ +VMMR3DECL(int) CFGMR3InsertNodeFV(PCFGMNODE pNode, PCFGMNODE *ppChild, const char *pszNameFormat, va_list Args) +{ + int rc; + char *pszName; + RTStrAPrintfV(&pszName, pszNameFormat, Args); + if (pszName) + { + rc = CFGMR3InsertNode(pNode, pszName, ppChild); + RTStrFree(pszName); + } + else + rc = VERR_NO_MEMORY; + return rc; +} + + +/** + * Marks the node as the root of a restricted subtree, i.e. the end of + * a CFGMR3GetParent() journey. + * + * @param pNode The node to mark. + */ +VMMR3DECL(void) CFGMR3SetRestrictedRoot(PCFGMNODE pNode) +{ + if (pNode) + pNode->fRestrictedRoot = true; +} + + +/** + * Insert a node. + * + * @returns VBox status code. + * @param pNode Parent node. + * @param pszName Name of the new child node. + * @param ppLeaf Where to store the new leaf. + * The caller must fill in the enmType and Value fields! + */ +static int cfgmR3InsertLeaf(PCFGMNODE pNode, const char *pszName, PCFGMLEAF *ppLeaf) +{ + int rc; + if (*pszName) + { + if (pNode) + { + /* + * Check if already exists and find last node in chain. + */ + size_t cchName = strlen(pszName); + PCFGMLEAF pPrev = NULL; + PCFGMLEAF pNext = pNode->pFirstLeaf; + if (pNext) + { + for ( ; pNext; pPrev = pNext, pNext = pNext->pNext) + { + int iDiff = cfgmR3CompareNames(pszName, cchName, pNext->szName, pNext->cchName); + if (iDiff <= 0) + { + if (!iDiff) + return VERR_CFGM_LEAF_EXISTS; + break; + } + } + } + + /* + * Allocate and init node. + */ + PCFGMLEAF pNew = (PCFGMLEAF)cfgmR3MemAlloc(pNode->pVM, MM_TAG_CFGM, sizeof(*pNew) + cchName); + if (pNew) + { + pNew->cchName = cchName; + memcpy(pNew->szName, pszName, cchName + 1); + + /* + * Insert into child list. + */ + pNew->pPrev = pPrev; + if (pPrev) + pPrev->pNext = pNew; + else + pNode->pFirstLeaf = pNew; + pNew->pNext = pNext; + if (pNext) + pNext->pPrev = pNew; + + *ppLeaf = pNew; + rc = VINF_SUCCESS; + } + else + rc = VERR_NO_MEMORY; + } + else + rc = VERR_CFGM_NO_PARENT; + } + else + rc = VERR_CFGM_INVALID_CHILD_PATH; + return rc; +} + + +/** + * Removes a node. + * + * @param pNode The node to remove. + */ +VMMR3DECL(void) CFGMR3RemoveNode(PCFGMNODE pNode) +{ + if (pNode) + { + /* + * Free children. + */ + while (pNode->pFirstChild) + CFGMR3RemoveNode(pNode->pFirstChild); + + /* + * Free leaves. + */ + while (pNode->pFirstLeaf) + cfgmR3RemoveLeaf(pNode, pNode->pFirstLeaf); + + /* + * Unlink ourselves. + */ + if (pNode->pPrev) + pNode->pPrev->pNext = pNode->pNext; + else + { + if (pNode->pParent) + pNode->pParent->pFirstChild = pNode->pNext; + else if ( pNode->pVM /* might be a different tree */ + && pNode == pNode->pVM->cfgm.s.pRoot) + pNode->pVM->cfgm.s.pRoot = NULL; + } + if (pNode->pNext) + pNode->pNext->pPrev = pNode->pPrev; + + /* + * Free ourselves. + */ + cfgmR3FreeNodeOnly(pNode); + } +} + + +/** + * Removes a leaf. + * + * @param pNode Parent node. + * @param pLeaf Leaf to remove. + */ +static void cfgmR3RemoveLeaf(PCFGMNODE pNode, PCFGMLEAF pLeaf) +{ + if (pNode && pLeaf) + { + /* + * Unlink. + */ + if (pLeaf->pPrev) + pLeaf->pPrev->pNext = pLeaf->pNext; + else + pNode->pFirstLeaf = pLeaf->pNext; + if (pLeaf->pNext) + pLeaf->pNext->pPrev = pLeaf->pPrev; + + /* + * Free value and node. + */ + cfgmR3FreeValue(pNode->pVM, pLeaf); + pLeaf->pNext = NULL; + pLeaf->pPrev = NULL; + cfgmR3MemFree(pNode->pVM, pLeaf); + } +} + + +/** + * Frees whatever resources the leaf value is owning. + * + * Use this before assigning a new value to a leaf. + * The caller must either free the leaf or assign a new value to it. + * + * @param pVM The cross context VM structure, if the tree + * is associated with one. + * @param pLeaf Pointer to the leaf which value should be free. + */ +static void cfgmR3FreeValue(PVM pVM, PCFGMLEAF pLeaf) +{ + if (pLeaf) + { + switch (pLeaf->enmType) + { + case CFGMVALUETYPE_BYTES: + cfgmR3MemFree(pVM, pLeaf->Value.Bytes.pau8); + pLeaf->Value.Bytes.pau8 = NULL; + pLeaf->Value.Bytes.cb = 0; + break; + + case CFGMVALUETYPE_STRING: + cfgmR3StrFree(pVM, pLeaf->Value.String.psz); + pLeaf->Value.String.psz = NULL; + pLeaf->Value.String.cb = 0; + break; + + case CFGMVALUETYPE_INTEGER: + break; + } + pLeaf->enmType = (CFGMVALUETYPE)0; + } +} + +/** + * Destroys a tree created with CFGMR3CreateTree or CFGMR3DuplicateSubTree. + * + * @returns VBox status code. + * @param pRoot The root node of the tree. + */ +VMMR3DECL(int) CFGMR3DestroyTree(PCFGMNODE pRoot) +{ + if (!pRoot) + return VINF_SUCCESS; + AssertReturn(!pRoot->pParent, VERR_INVALID_PARAMETER); + AssertReturn(!pRoot->pVM || pRoot != pRoot->pVM->cfgm.s.pRoot, VERR_ACCESS_DENIED); + + CFGMR3RemoveNode(pRoot); + return VINF_SUCCESS; +} + + +/** + * Inserts a new integer value. + * + * @returns VBox status code. + * @param pNode Parent node. + * @param pszName Value name. + * @param u64Integer The value. + */ +VMMR3DECL(int) CFGMR3InsertInteger(PCFGMNODE pNode, const char *pszName, uint64_t u64Integer) +{ + PCFGMLEAF pLeaf; + int rc = cfgmR3InsertLeaf(pNode, pszName, &pLeaf); + if (RT_SUCCESS(rc)) + { + pLeaf->enmType = CFGMVALUETYPE_INTEGER; + pLeaf->Value.Integer.u64 = u64Integer; + } + return rc; +} + + +/** + * Inserts a new string value. This variant expects that the caller know the length + * of the string already so we can avoid calling strlen() here. + * + * @returns VBox status code. + * @param pNode Parent node. + * @param pszName Value name. + * @param pszString The value. Must not be NULL. + * @param cchString The length of the string excluding the + * terminator. + */ +VMMR3DECL(int) CFGMR3InsertStringN(PCFGMNODE pNode, const char *pszName, const char *pszString, size_t cchString) +{ + Assert(RTStrNLen(pszString, cchString) == cchString); + + int rc; + if (pNode) + { + /* + * Allocate string object first. + */ + char *pszStringCopy = (char *)cfgmR3StrAlloc(pNode->pVM, MM_TAG_CFGM_STRING, cchString + 1); + if (pszStringCopy) + { + memcpy(pszStringCopy, pszString, cchString); + pszStringCopy[cchString] = '\0'; + + /* + * Create value leaf and set it to string type. + */ + PCFGMLEAF pLeaf; + rc = cfgmR3InsertLeaf(pNode, pszName, &pLeaf); + if (RT_SUCCESS(rc)) + { + pLeaf->enmType = CFGMVALUETYPE_STRING; + pLeaf->Value.String.psz = pszStringCopy; + pLeaf->Value.String.cb = cchString + 1; + } + else + cfgmR3StrFree(pNode->pVM, pszStringCopy); + } + else + rc = VERR_NO_MEMORY; + } + else + rc = VERR_CFGM_NO_PARENT; + + return rc; +} + + +/** + * Inserts a new string value. Calls strlen(pszString) internally; if you know the + * length of the string, CFGMR3InsertStringLengthKnown() is faster. + * + * @returns VBox status code. + * @param pNode Parent node. + * @param pszName Value name. + * @param pszString The value. + */ +VMMR3DECL(int) CFGMR3InsertString(PCFGMNODE pNode, const char *pszName, const char *pszString) +{ + return CFGMR3InsertStringN(pNode, pszName, pszString, strlen(pszString)); +} + + +/** + * Same as CFGMR3InsertString except the string value given in RTStrPrintfV + * fashion. + * + * @returns VBox status code. + * @param pNode Parent node. + * @param pszName Value name. + * @param pszFormat The value given as a format string. + * @param va Argument to pszFormat. + */ +VMMR3DECL(int) CFGMR3InsertStringFV(PCFGMNODE pNode, const char *pszName, const char *pszFormat, va_list va) +{ + int rc; + if (pNode) + { + /* + * Allocate string object first. + */ + char *pszString; + if (!pNode->pVM) + pszString = RTStrAPrintf2(pszFormat, va); + else + pszString = MMR3HeapAPrintfVU(pNode->pVM->pUVM, MM_TAG_CFGM_STRING, pszFormat, va); + if (pszString) + { + /* + * Create value leaf and set it to string type. + */ + PCFGMLEAF pLeaf; + rc = cfgmR3InsertLeaf(pNode, pszName, &pLeaf); + if (RT_SUCCESS(rc)) + { + pLeaf->enmType = CFGMVALUETYPE_STRING; + pLeaf->Value.String.psz = pszString; + pLeaf->Value.String.cb = strlen(pszString) + 1; + } + else + cfgmR3StrFree(pNode->pVM, pszString); + } + else + rc = VERR_NO_MEMORY; + } + else + rc = VERR_CFGM_NO_PARENT; + + return rc; +} + + +/** + * Same as CFGMR3InsertString except the string value given in RTStrPrintf + * fashion. + * + * @returns VBox status code. + * @param pNode Parent node. + * @param pszName Value name. + * @param pszFormat The value given as a format string. + * @param ... Argument to pszFormat. + */ +VMMR3DECL(int) CFGMR3InsertStringF(PCFGMNODE pNode, const char *pszName, const char *pszFormat, ...) +{ + va_list va; + va_start(va, pszFormat); + int rc = CFGMR3InsertStringFV(pNode, pszName, pszFormat, va); + va_end(va); + return rc; +} + + +/** + * Same as CFGMR3InsertString except the string value given as a UTF-16 string. + * + * @returns VBox status code. + * @param pNode Parent node. + * @param pszName Value name. + * @param pwszValue The string value (UTF-16). + */ +VMMR3DECL(int) CFGMR3InsertStringW(PCFGMNODE pNode, const char *pszName, PCRTUTF16 pwszValue) +{ + char *pszValue; + int rc = RTUtf16ToUtf8(pwszValue, &pszValue); + if (RT_SUCCESS(rc)) + { + rc = CFGMR3InsertString(pNode, pszName, pszValue); + RTStrFree(pszValue); + } + return rc; +} + + +/** + * Inserts a new integer value. + * + * @returns VBox status code. + * @param pNode Parent node. + * @param pszName Value name. + * @param pvBytes The value. + * @param cbBytes The value size. + */ +VMMR3DECL(int) CFGMR3InsertBytes(PCFGMNODE pNode, const char *pszName, const void *pvBytes, size_t cbBytes) +{ + int rc; + if (pNode) + { + if (cbBytes == (RTUINT)cbBytes) + { + /* + * Allocate string object first. + */ + void *pvCopy = cfgmR3MemAlloc(pNode->pVM, MM_TAG_CFGM_STRING, cbBytes); + if (pvCopy || !cbBytes) + { + memcpy(pvCopy, pvBytes, cbBytes); + + /* + * Create value leaf and set it to string type. + */ + PCFGMLEAF pLeaf; + rc = cfgmR3InsertLeaf(pNode, pszName, &pLeaf); + if (RT_SUCCESS(rc)) + { + pLeaf->enmType = CFGMVALUETYPE_BYTES; + pLeaf->Value.Bytes.cb = cbBytes; + pLeaf->Value.Bytes.pau8 = (uint8_t *)pvCopy; + } + else + cfgmR3MemFree(pNode->pVM, pvCopy); + } + else + rc = VERR_NO_MEMORY; + } + else + rc = VERR_OUT_OF_RANGE; + } + else + rc = VERR_CFGM_NO_PARENT; + + return rc; +} + + +/** + * Make a copy of the specified value under the given node. + * + * @returns VBox status code. + * @param pNode Parent node. + * @param pValue The value to copy and insert. + */ +VMMR3DECL(int) CFGMR3InsertValue(PCFGMNODE pNode, PCFGMLEAF pValue) +{ + int rc; + switch (pValue->enmType) + { + case CFGMVALUETYPE_INTEGER: + rc = CFGMR3InsertInteger(pNode, pValue->szName, pValue->Value.Integer.u64); + break; + + case CFGMVALUETYPE_BYTES: + rc = CFGMR3InsertBytes(pNode, pValue->szName, pValue->Value.Bytes.pau8, pValue->Value.Bytes.cb); + break; + + case CFGMVALUETYPE_STRING: + rc = CFGMR3InsertStringN(pNode, pValue->szName, pValue->Value.String.psz, pValue->Value.String.cb - 1); + break; + + default: + rc = VERR_CFGM_IPE_1; + AssertMsgFailed(("Invalid value type %d\n", pValue->enmType)); + break; + } + return rc; +} + + +/** + * Remove a value. + * + * @returns VBox status code. + * @param pNode Parent node. + * @param pszName Name of the new child node. + */ +VMMR3DECL(int) CFGMR3RemoveValue(PCFGMNODE pNode, const char *pszName) +{ + PCFGMLEAF pLeaf; + int rc = cfgmR3ResolveLeaf(pNode, pszName, &pLeaf); + if (RT_SUCCESS(rc)) + cfgmR3RemoveLeaf(pNode, pLeaf); + return rc; +} + + + +/* + * -+- helper apis -+- + */ + + +/** + * Query unsigned 64-bit integer value. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pu64 Where to store the integer value. + */ +VMMR3DECL(int) CFGMR3QueryU64(PCFGMNODE pNode, const char *pszName, uint64_t *pu64) +{ + return CFGMR3QueryInteger(pNode, pszName, pu64); +} + + +/** + * Query unsigned 64-bit integer value with default. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pu64 Where to store the integer value. Set to default on failure. + * @param u64Def The default value. + */ +VMMR3DECL(int) CFGMR3QueryU64Def(PCFGMNODE pNode, const char *pszName, uint64_t *pu64, uint64_t u64Def) +{ + return CFGMR3QueryIntegerDef(pNode, pszName, pu64, u64Def); +} + + +/** + * Query signed 64-bit integer value. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pi64 Where to store the value. + */ +VMMR3DECL(int) CFGMR3QueryS64(PCFGMNODE pNode, const char *pszName, int64_t *pi64) +{ + uint64_t u64; + int rc = CFGMR3QueryInteger(pNode, pszName, &u64); + if (RT_SUCCESS(rc)) + *pi64 = (int64_t)u64; + return rc; +} + + +/** + * Query signed 64-bit integer value with default. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pi64 Where to store the value. Set to default on failure. + * @param i64Def The default value. + */ +VMMR3DECL(int) CFGMR3QueryS64Def(PCFGMNODE pNode, const char *pszName, int64_t *pi64, int64_t i64Def) +{ + uint64_t u64; + int rc = CFGMR3QueryIntegerDef(pNode, pszName, &u64, i64Def); + *pi64 = (int64_t)u64; + return rc; +} + + +/** + * Query unsigned 32-bit integer value. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pu32 Where to store the value. + */ +VMMR3DECL(int) CFGMR3QueryU32(PCFGMNODE pNode, const char *pszName, uint32_t *pu32) +{ + uint64_t u64; + int rc = CFGMR3QueryInteger(pNode, pszName, &u64); + if (RT_SUCCESS(rc)) + { + if (!(u64 & UINT64_C(0xffffffff00000000))) + *pu32 = (uint32_t)u64; + else + rc = VERR_CFGM_INTEGER_TOO_BIG; + } + return rc; +} + + +/** + * Query unsigned 32-bit integer value with default. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pu32 Where to store the value. Set to default on failure. + * @param u32Def The default value. + */ +VMMR3DECL(int) CFGMR3QueryU32Def(PCFGMNODE pNode, const char *pszName, uint32_t *pu32, uint32_t u32Def) +{ + uint64_t u64; + int rc = CFGMR3QueryIntegerDef(pNode, pszName, &u64, u32Def); + if (RT_SUCCESS(rc)) + { + if (!(u64 & UINT64_C(0xffffffff00000000))) + *pu32 = (uint32_t)u64; + else + rc = VERR_CFGM_INTEGER_TOO_BIG; + } + if (RT_FAILURE(rc)) + *pu32 = u32Def; + return rc; +} + + +/** + * Query signed 32-bit integer value. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pi32 Where to store the value. + */ +VMMR3DECL(int) CFGMR3QueryS32(PCFGMNODE pNode, const char *pszName, int32_t *pi32) +{ + uint64_t u64; + int rc = CFGMR3QueryInteger(pNode, pszName, &u64); + if (RT_SUCCESS(rc)) + { + if ( !(u64 & UINT64_C(0xffffffff80000000)) + || (u64 & UINT64_C(0xffffffff80000000)) == UINT64_C(0xffffffff80000000)) + *pi32 = (int32_t)u64; + else + rc = VERR_CFGM_INTEGER_TOO_BIG; + } + return rc; +} + + +/** + * Query signed 32-bit integer value with default. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pi32 Where to store the value. Set to default on failure. + * @param i32Def The default value. + */ +VMMR3DECL(int) CFGMR3QueryS32Def(PCFGMNODE pNode, const char *pszName, int32_t *pi32, int32_t i32Def) +{ + uint64_t u64; + int rc = CFGMR3QueryIntegerDef(pNode, pszName, &u64, i32Def); + if (RT_SUCCESS(rc)) + { + if ( !(u64 & UINT64_C(0xffffffff80000000)) + || (u64 & UINT64_C(0xffffffff80000000)) == UINT64_C(0xffffffff80000000)) + *pi32 = (int32_t)u64; + else + rc = VERR_CFGM_INTEGER_TOO_BIG; + } + if (RT_FAILURE(rc)) + *pi32 = i32Def; + return rc; +} + + +/** + * Query unsigned 16-bit integer value. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pu16 Where to store the value. + */ +VMMR3DECL(int) CFGMR3QueryU16(PCFGMNODE pNode, const char *pszName, uint16_t *pu16) +{ + uint64_t u64; + int rc = CFGMR3QueryInteger(pNode, pszName, &u64); + if (RT_SUCCESS(rc)) + { + if (!(u64 & UINT64_C(0xffffffffffff0000))) + *pu16 = (int16_t)u64; + else + rc = VERR_CFGM_INTEGER_TOO_BIG; + } + return rc; +} + + +/** + * Query unsigned 16-bit integer value with default. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pu16 Where to store the value. Set to default on failure. + * @param u16Def The default value. + */ +VMMR3DECL(int) CFGMR3QueryU16Def(PCFGMNODE pNode, const char *pszName, uint16_t *pu16, uint16_t u16Def) +{ + uint64_t u64; + int rc = CFGMR3QueryIntegerDef(pNode, pszName, &u64, u16Def); + if (RT_SUCCESS(rc)) + { + if (!(u64 & UINT64_C(0xffffffffffff0000))) + *pu16 = (int16_t)u64; + else + rc = VERR_CFGM_INTEGER_TOO_BIG; + } + if (RT_FAILURE(rc)) + *pu16 = u16Def; + return rc; +} + + +/** + * Query signed 16-bit integer value. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pi16 Where to store the value. + */ +VMMR3DECL(int) CFGMR3QueryS16(PCFGMNODE pNode, const char *pszName, int16_t *pi16) +{ + uint64_t u64; + int rc = CFGMR3QueryInteger(pNode, pszName, &u64); + if (RT_SUCCESS(rc)) + { + if ( !(u64 & UINT64_C(0xffffffffffff8000)) + || (u64 & UINT64_C(0xffffffffffff8000)) == UINT64_C(0xffffffffffff8000)) + *pi16 = (int16_t)u64; + else + rc = VERR_CFGM_INTEGER_TOO_BIG; + } + return rc; +} + + +/** + * Query signed 16-bit integer value with default. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pi16 Where to store the value. Set to default on failure. + * @param i16Def The default value. + */ +VMMR3DECL(int) CFGMR3QueryS16Def(PCFGMNODE pNode, const char *pszName, int16_t *pi16, int16_t i16Def) +{ + uint64_t u64; + int rc = CFGMR3QueryIntegerDef(pNode, pszName, &u64, i16Def); + if (RT_SUCCESS(rc)) + { + if ( !(u64 & UINT64_C(0xffffffffffff8000)) + || (u64 & UINT64_C(0xffffffffffff8000)) == UINT64_C(0xffffffffffff8000)) + *pi16 = (int16_t)u64; + else + rc = VERR_CFGM_INTEGER_TOO_BIG; + } + if (RT_FAILURE(rc)) + *pi16 = i16Def; + return rc; +} + + +/** + * Query unsigned 8-bit integer value. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pu8 Where to store the value. + */ +VMMR3DECL(int) CFGMR3QueryU8(PCFGMNODE pNode, const char *pszName, uint8_t *pu8) +{ + uint64_t u64; + int rc = CFGMR3QueryInteger(pNode, pszName, &u64); + if (RT_SUCCESS(rc)) + { + if (!(u64 & UINT64_C(0xffffffffffffff00))) + *pu8 = (uint8_t)u64; + else + rc = VERR_CFGM_INTEGER_TOO_BIG; + } + return rc; +} + + +/** + * Query unsigned 8-bit integer value with default. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pu8 Where to store the value. Set to default on failure. + * @param u8Def The default value. + */ +VMMR3DECL(int) CFGMR3QueryU8Def(PCFGMNODE pNode, const char *pszName, uint8_t *pu8, uint8_t u8Def) +{ + uint64_t u64; + int rc = CFGMR3QueryIntegerDef(pNode, pszName, &u64, u8Def); + if (RT_SUCCESS(rc)) + { + if (!(u64 & UINT64_C(0xffffffffffffff00))) + *pu8 = (uint8_t)u64; + else + rc = VERR_CFGM_INTEGER_TOO_BIG; + } + if (RT_FAILURE(rc)) + *pu8 = u8Def; + return rc; +} + + +/** + * Query signed 8-bit integer value. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pi8 Where to store the value. + */ +VMMR3DECL(int) CFGMR3QueryS8(PCFGMNODE pNode, const char *pszName, int8_t *pi8) +{ + uint64_t u64; + int rc = CFGMR3QueryInteger(pNode, pszName, &u64); + if (RT_SUCCESS(rc)) + { + if ( !(u64 & UINT64_C(0xffffffffffffff80)) + || (u64 & UINT64_C(0xffffffffffffff80)) == UINT64_C(0xffffffffffffff80)) + *pi8 = (int8_t)u64; + else + rc = VERR_CFGM_INTEGER_TOO_BIG; + } + return rc; +} + + +/** + * Query signed 8-bit integer value with default. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pi8 Where to store the value. Set to default on failure. + * @param i8Def The default value. + */ +VMMR3DECL(int) CFGMR3QueryS8Def(PCFGMNODE pNode, const char *pszName, int8_t *pi8, int8_t i8Def) +{ + uint64_t u64; + int rc = CFGMR3QueryIntegerDef(pNode, pszName, &u64, i8Def); + if (RT_SUCCESS(rc)) + { + if ( !(u64 & UINT64_C(0xffffffffffffff80)) + || (u64 & UINT64_C(0xffffffffffffff80)) == UINT64_C(0xffffffffffffff80)) + *pi8 = (int8_t)u64; + else + rc = VERR_CFGM_INTEGER_TOO_BIG; + } + if (RT_FAILURE(rc)) + *pi8 = i8Def; + return rc; +} + + +/** + * Query boolean integer value. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pf Where to store the value. + * @remark This function will interpret any non-zero value as true. + */ +VMMR3DECL(int) CFGMR3QueryBool(PCFGMNODE pNode, const char *pszName, bool *pf) +{ + uint64_t u64; + int rc = CFGMR3QueryInteger(pNode, pszName, &u64); + if (RT_SUCCESS(rc)) + *pf = u64 ? true : false; + return rc; +} + + +/** + * Query boolean integer value with default. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pf Where to store the value. Set to default on failure. + * @param fDef The default value. + * @remark This function will interpret any non-zero value as true. + */ +VMMR3DECL(int) CFGMR3QueryBoolDef(PCFGMNODE pNode, const char *pszName, bool *pf, bool fDef) +{ + uint64_t u64; + int rc = CFGMR3QueryIntegerDef(pNode, pszName, &u64, fDef); + *pf = u64 ? true : false; + return rc; +} + + +/** + * Query I/O port address value. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pPort Where to store the value. + */ +VMMR3DECL(int) CFGMR3QueryPort(PCFGMNODE pNode, const char *pszName, PRTIOPORT pPort) +{ + AssertCompileSize(RTIOPORT, 2); + return CFGMR3QueryU16(pNode, pszName, pPort); +} + + +/** + * Query I/O port address value with default. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pPort Where to store the value. Set to default on failure. + * @param PortDef The default value. + */ +VMMR3DECL(int) CFGMR3QueryPortDef(PCFGMNODE pNode, const char *pszName, PRTIOPORT pPort, RTIOPORT PortDef) +{ + AssertCompileSize(RTIOPORT, 2); + return CFGMR3QueryU16Def(pNode, pszName, pPort, PortDef); +} + + +/** + * Query unsigned int address value. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pu Where to store the value. + */ +VMMR3DECL(int) CFGMR3QueryUInt(PCFGMNODE pNode, const char *pszName, unsigned int *pu) +{ + AssertCompileSize(unsigned int, 4); + return CFGMR3QueryU32(pNode, pszName, (uint32_t *)pu); +} + + +/** + * Query unsigned int address value with default. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pu Where to store the value. Set to default on failure. + * @param uDef The default value. + */ +VMMR3DECL(int) CFGMR3QueryUIntDef(PCFGMNODE pNode, const char *pszName, unsigned int *pu, unsigned int uDef) +{ + AssertCompileSize(unsigned int, 4); + return CFGMR3QueryU32Def(pNode, pszName, (uint32_t *)pu, uDef); +} + + +/** + * Query signed int address value. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pi Where to store the value. + */ +VMMR3DECL(int) CFGMR3QuerySInt(PCFGMNODE pNode, const char *pszName, signed int *pi) +{ + AssertCompileSize(signed int, 4); + return CFGMR3QueryS32(pNode, pszName, (int32_t *)pi); +} + + +/** + * Query unsigned int address value with default. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pi Where to store the value. Set to default on failure. + * @param iDef The default value. + */ +VMMR3DECL(int) CFGMR3QuerySIntDef(PCFGMNODE pNode, const char *pszName, signed int *pi, signed int iDef) +{ + AssertCompileSize(signed int, 4); + return CFGMR3QueryS32Def(pNode, pszName, (int32_t *)pi, iDef); +} + + +/** + * Query pointer integer value. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param ppv Where to store the value. + */ +VMMR3DECL(int) CFGMR3QueryPtr(PCFGMNODE pNode, const char *pszName, void **ppv) +{ + uint64_t u64; + int rc = CFGMR3QueryInteger(pNode, pszName, &u64); + if (RT_SUCCESS(rc)) + { + uintptr_t u = (uintptr_t)u64; + if (u64 == u) + *ppv = (void *)u; + else + rc = VERR_CFGM_INTEGER_TOO_BIG; + } + return rc; +} + + +/** + * Query pointer integer value with default. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param ppv Where to store the value. Set to default on failure. + * @param pvDef The default value. + */ +VMMR3DECL(int) CFGMR3QueryPtrDef(PCFGMNODE pNode, const char *pszName, void **ppv, void *pvDef) +{ + uint64_t u64; + int rc = CFGMR3QueryIntegerDef(pNode, pszName, &u64, (uintptr_t)pvDef); + if (RT_SUCCESS(rc)) + { + uintptr_t u = (uintptr_t)u64; + if (u64 == u) + *ppv = (void *)u; + else + rc = VERR_CFGM_INTEGER_TOO_BIG; + } + if (RT_FAILURE(rc)) + *ppv = pvDef; + return rc; +} + + +/** + * Query Guest Context pointer integer value. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pGCPtr Where to store the value. + */ +VMMR3DECL(int) CFGMR3QueryGCPtr(PCFGMNODE pNode, const char *pszName, PRTGCPTR pGCPtr) +{ + uint64_t u64; + int rc = CFGMR3QueryInteger(pNode, pszName, &u64); + if (RT_SUCCESS(rc)) + { + RTGCPTR u = (RTGCPTR)u64; + if (u64 == u) + *pGCPtr = u; + else + rc = VERR_CFGM_INTEGER_TOO_BIG; + } + return rc; +} + + +/** + * Query Guest Context pointer integer value with default. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pGCPtr Where to store the value. Set to default on failure. + * @param GCPtrDef The default value. + */ +VMMR3DECL(int) CFGMR3QueryGCPtrDef(PCFGMNODE pNode, const char *pszName, PRTGCPTR pGCPtr, RTGCPTR GCPtrDef) +{ + uint64_t u64; + int rc = CFGMR3QueryIntegerDef(pNode, pszName, &u64, GCPtrDef); + if (RT_SUCCESS(rc)) + { + RTGCPTR u = (RTGCPTR)u64; + if (u64 == u) + *pGCPtr = u; + else + rc = VERR_CFGM_INTEGER_TOO_BIG; + } + if (RT_FAILURE(rc)) + *pGCPtr = GCPtrDef; + return rc; +} + + +/** + * Query Guest Context unsigned pointer value. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pGCPtr Where to store the value. + */ +VMMR3DECL(int) CFGMR3QueryGCPtrU(PCFGMNODE pNode, const char *pszName, PRTGCUINTPTR pGCPtr) +{ + uint64_t u64; + int rc = CFGMR3QueryInteger(pNode, pszName, &u64); + if (RT_SUCCESS(rc)) + { + RTGCUINTPTR u = (RTGCUINTPTR)u64; + if (u64 == u) + *pGCPtr = u; + else + rc = VERR_CFGM_INTEGER_TOO_BIG; + } + return rc; +} + + +/** + * Query Guest Context unsigned pointer value with default. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pGCPtr Where to store the value. Set to default on failure. + * @param GCPtrDef The default value. + */ +VMMR3DECL(int) CFGMR3QueryGCPtrUDef(PCFGMNODE pNode, const char *pszName, PRTGCUINTPTR pGCPtr, RTGCUINTPTR GCPtrDef) +{ + uint64_t u64; + int rc = CFGMR3QueryIntegerDef(pNode, pszName, &u64, GCPtrDef); + if (RT_SUCCESS(rc)) + { + RTGCUINTPTR u = (RTGCUINTPTR)u64; + if (u64 == u) + *pGCPtr = u; + else + rc = VERR_CFGM_INTEGER_TOO_BIG; + } + if (RT_FAILURE(rc)) + *pGCPtr = GCPtrDef; + return rc; +} + + +/** + * Query Guest Context signed pointer value. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pGCPtr Where to store the value. + */ +VMMR3DECL(int) CFGMR3QueryGCPtrS(PCFGMNODE pNode, const char *pszName, PRTGCINTPTR pGCPtr) +{ + uint64_t u64; + int rc = CFGMR3QueryInteger(pNode, pszName, &u64); + if (RT_SUCCESS(rc)) + { + RTGCINTPTR u = (RTGCINTPTR)u64; + if (u64 == (uint64_t)u) + *pGCPtr = u; + else + rc = VERR_CFGM_INTEGER_TOO_BIG; + } + return rc; +} + + +/** + * Query Guest Context signed pointer value with default. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Name of an integer value. + * @param pGCPtr Where to store the value. Set to default on failure. + * @param GCPtrDef The default value. + */ +VMMR3DECL(int) CFGMR3QueryGCPtrSDef(PCFGMNODE pNode, const char *pszName, PRTGCINTPTR pGCPtr, RTGCINTPTR GCPtrDef) +{ + uint64_t u64; + int rc = CFGMR3QueryIntegerDef(pNode, pszName, &u64, GCPtrDef); + if (RT_SUCCESS(rc)) + { + RTGCINTPTR u = (RTGCINTPTR)u64; + if (u64 == (uint64_t)u) + *pGCPtr = u; + else + rc = VERR_CFGM_INTEGER_TOO_BIG; + } + if (RT_FAILURE(rc)) + *pGCPtr = GCPtrDef; + return rc; +} + + +/** + * Query zero terminated character value storing it in a + * buffer allocated from the MM heap. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. + * @param pszName Value name. This value must be of zero terminated character string type. + * @param ppszString Where to store the string pointer. + * Free this using MMR3HeapFree() (or RTStrFree if not + * associated with a pUVM - see CFGMR3CreateTree). + */ +VMMR3DECL(int) CFGMR3QueryStringAlloc(PCFGMNODE pNode, const char *pszName, char **ppszString) +{ + size_t cbString; + int rc = CFGMR3QuerySize(pNode, pszName, &cbString); + if (RT_SUCCESS(rc)) + { + char *pszString = cfgmR3StrAlloc(pNode->pVM, MM_TAG_CFGM_USER, cbString); + if (pszString) + { + rc = CFGMR3QueryString(pNode, pszName, pszString, cbString); + if (RT_SUCCESS(rc)) + *ppszString = pszString; + else + cfgmR3StrFree(pNode->pVM, pszString); + } + else + rc = VERR_NO_MEMORY; + } + return rc; +} + + +/** + * Query zero terminated character value storing it in a + * buffer allocated from the MM heap. + * + * @returns VBox status code. + * @param pNode Which node to search for pszName in. This cannot be + * NULL if @a pszDef is not NULL, because we need + * somewhere way to get to the VM in order to call + * MMR3HeapStrDup. + * @param pszName Value name. This value must be of zero terminated character string type. + * @param ppszString Where to store the string pointer. Not set on failure. + * Free this using MMR3HeapFree() (or RTStrFree if not + * associated with a pUVM - see CFGMR3CreateTree). + * @param pszDef The default return value. This can be NULL. + */ +VMMR3DECL(int) CFGMR3QueryStringAllocDef(PCFGMNODE pNode, const char *pszName, char **ppszString, const char *pszDef) +{ + Assert(pNode || !pszDef); /* We need pVM if we need to duplicate the string later. */ + + /* + * (Don't call CFGMR3QuerySize and CFGMR3QueryStringDef here as the latter + * cannot handle pszDef being NULL.) + */ + PCFGMLEAF pLeaf; + int rc = cfgmR3ResolveLeaf(pNode, pszName, &pLeaf); + if (RT_SUCCESS(rc)) + { + if (pLeaf->enmType == CFGMVALUETYPE_STRING) + { + size_t const cbSrc = pLeaf->Value.String.cb; + char *pszString = cfgmR3StrAlloc(pNode->pVM, MM_TAG_CFGM_USER, cbSrc); + if (pszString) + { + memcpy(pszString, pLeaf->Value.String.psz, cbSrc); + *ppszString = pszString; + } + else + rc = VERR_NO_MEMORY; + } + else + rc = VERR_CFGM_NOT_STRING; + } + if (RT_FAILURE(rc)) + { + if (!pszDef) + *ppszString = NULL; + else + { + size_t const cbDef = strlen(pszDef) + 1; + *ppszString = cfgmR3StrAlloc(pNode->pVM, MM_TAG_CFGM_USER, cbDef); + memcpy(*ppszString, pszDef, cbDef); + } + if (rc == VERR_CFGM_VALUE_NOT_FOUND || rc == VERR_CFGM_NO_PARENT) + rc = VINF_SUCCESS; + } + + return rc; +} + + +/** + * Dumps the configuration (sub)tree to the release log. + * + * @param pRoot The root node of the dump. + */ +VMMR3DECL(void) CFGMR3Dump(PCFGMNODE pRoot) +{ + bool fOldBuffered = RTLogRelSetBuffering(true /*fBuffered*/); + LogRel(("************************* CFGM dump *************************\n")); + cfgmR3Dump(pRoot, 0, DBGFR3InfoLogRelHlp()); + LogRel(("********************* End of CFGM dump **********************\n")); + RTLogRelSetBuffering(fOldBuffered); +} + + +/** + * Info handler, internal version. + * + * @param pVM The cross context VM structure. + * @param pHlp Callback functions for doing output. + * @param pszArgs Argument string. Optional and specific to the handler. + */ +static DECLCALLBACK(void) cfgmR3Info(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + /* + * Figure where to start. + */ + PCFGMNODE pRoot = pVM->cfgm.s.pRoot; + if (pszArgs && *pszArgs) + { + int rc = cfgmR3ResolveNode(pRoot, pszArgs, &pRoot); + if (RT_FAILURE(rc)) + { + pHlp->pfnPrintf(pHlp, "Failed to resolve CFGM path '%s', %Rrc", pszArgs, rc); + return; + } + } + + /* + * Dump the specified tree. + */ + pHlp->pfnPrintf(pHlp, "pRoot=%p:{", pRoot); + cfgmR3DumpPath(pRoot, pHlp); + pHlp->pfnPrintf(pHlp, "}\n"); + cfgmR3Dump(pRoot, 0, pHlp); +} + + +/** + * Recursively prints a path name. + */ +static void cfgmR3DumpPath(PCFGMNODE pNode, PCDBGFINFOHLP pHlp) +{ + if (pNode->pParent) + cfgmR3DumpPath(pNode->pParent, pHlp); + pHlp->pfnPrintf(pHlp, "%s/", pNode->szName); +} + + +/** + * Dumps a branch of a tree. + */ +static void cfgmR3Dump(PCFGMNODE pRoot, unsigned iLevel, PCDBGFINFOHLP pHlp) +{ + /* + * Path. + */ + pHlp->pfnPrintf(pHlp, "["); + cfgmR3DumpPath(pRoot, pHlp); + pHlp->pfnPrintf(pHlp, "] (level %d)%s\n", iLevel, pRoot->fRestrictedRoot ? " (restricted root)" : ""); + + /* + * Values. + */ + PCFGMLEAF pLeaf; + size_t cchMax = 0; + for (pLeaf = CFGMR3GetFirstValue(pRoot); pLeaf; pLeaf = CFGMR3GetNextValue(pLeaf)) + cchMax = RT_MAX(cchMax, pLeaf->cchName); + for (pLeaf = CFGMR3GetFirstValue(pRoot); pLeaf; pLeaf = CFGMR3GetNextValue(pLeaf)) + { + switch (CFGMR3GetValueType(pLeaf)) + { + case CFGMVALUETYPE_INTEGER: + { + pHlp->pfnPrintf(pHlp, " %-*s = %#018llx (%'lld", (int)cchMax, pLeaf->szName, pLeaf->Value.Integer.u64, pLeaf->Value.Integer.u64); + if ( ( pLeaf->cchName >= 4 + && !RTStrCmp(&pLeaf->szName[pLeaf->cchName - 4], "Size")) + || ( pLeaf->cchName >= 2 + && !RTStrNCmp(pLeaf->szName, "cb", 2)) ) + { + if (pLeaf->Value.Integer.u64 > _2M) + pHlp->pfnPrintf(pHlp, ", %'lld MB", pLeaf->Value.Integer.u64 / _1M); + else if (pLeaf->Value.Integer.u64 > _2K) + pHlp->pfnPrintf(pHlp, ", %'lld KB", pLeaf->Value.Integer.u64 / _1K); + if (pLeaf->Value.Integer.u64 > _2G) + pHlp->pfnPrintf(pHlp, ", %'lld.%lld GB", + pLeaf->Value.Integer.u64 / _1G, + (pLeaf->Value.Integer.u64 % _1G) / (_1G / 10)); + } + pHlp->pfnPrintf(pHlp, ")\n"); + break; + } + + case CFGMVALUETYPE_STRING: + pHlp->pfnPrintf(pHlp, " %-*s = \"%s\" (cb=%zu)\n", (int)cchMax, pLeaf->szName, pLeaf->Value.String.psz, pLeaf->Value.String.cb); + break; + + case CFGMVALUETYPE_BYTES: + pHlp->pfnPrintf(pHlp, " %-*s = \"%.*Rhxs\" (cb=%zu)\n", (int)cchMax, pLeaf->szName, pLeaf->Value.Bytes.cb, pLeaf->Value.Bytes.pau8, pLeaf->Value.Bytes.cb); + break; + + default: + AssertMsgFailed(("bad leaf!\n")); + break; + } + } + pHlp->pfnPrintf(pHlp, "\n"); + + /* + * Children. + */ + for (PCFGMNODE pChild = CFGMR3GetFirstChild(pRoot); pChild; pChild = CFGMR3GetNextChild(pChild)) + { + Assert(pChild->pNext != pChild); + Assert(pChild->pPrev != pChild); + Assert(pChild->pPrev != pChild->pNext || !pChild->pPrev); + Assert(pChild->pFirstChild != pChild); + Assert(pChild->pParent == pRoot); + cfgmR3Dump(pChild, iLevel + 1, pHlp); + } +} + diff --git a/src/VBox/VMM/VMMR3/CPUM.cpp b/src/VBox/VMM/VMMR3/CPUM.cpp new file mode 100644 index 00000000..9f301998 --- /dev/null +++ b/src/VBox/VMM/VMMR3/CPUM.cpp @@ -0,0 +1,4228 @@ +/* $Id: CPUM.cpp $ */ +/** @file + * CPUM - CPU Monitor / Manager. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +/** @page pg_cpum CPUM - CPU Monitor / Manager + * + * The CPU Monitor / Manager keeps track of all the CPU registers. It is + * also responsible for lazy FPU handling and some of the context loading + * in raw mode. + * + * There are three CPU contexts, the most important one is the guest one (GC). + * When running in raw-mode (RC) there is a special hyper context for the VMM + * part that floats around inside the guest address space. When running in + * raw-mode, CPUM also maintains a host context for saving and restoring + * registers across world switches. This latter is done in cooperation with the + * world switcher (@see pg_vmm). + * + * @see grp_cpum + * + * @section sec_cpum_fpu FPU / SSE / AVX / ++ state. + * + * TODO: proper write up, currently just some notes. + * + * The ring-0 FPU handling per OS: + * + * - 64-bit Windows uses XMM registers in the kernel as part of the calling + * convention (Visual C++ doesn't seem to have a way to disable + * generating such code either), so CR0.TS/EM are always zero from what I + * can tell. We are also forced to always load/save the guest XMM0-XMM15 + * registers when entering/leaving guest context. Interrupt handlers + * using FPU/SSE will offically have call save and restore functions + * exported by the kernel, if the really really have to use the state. + * + * - 32-bit windows does lazy FPU handling, I think, probably including + * lazying saving. The Windows Internals book states that it's a bad + * idea to use the FPU in kernel space. However, it looks like it will + * restore the FPU state of the current thread in case of a kernel \#NM. + * Interrupt handlers should be same as for 64-bit. + * + * - Darwin allows taking \#NM in kernel space, restoring current thread's + * state if I read the code correctly. It saves the FPU state of the + * outgoing thread, and uses CR0.TS to lazily load the state of the + * incoming one. No idea yet how the FPU is treated by interrupt + * handlers, i.e. whether they are allowed to disable the state or + * something. + * + * - Linux also allows \#NM in kernel space (don't know since when), and + * uses CR0.TS for lazy loading. Saves outgoing thread's state, lazy + * loads the incoming unless configured to agressivly load it. Interrupt + * handlers can ask whether they're allowed to use the FPU, and may + * freely trash the state if Linux thinks it has saved the thread's state + * already. This is a problem. + * + * - Solaris will, from what I can tell, panic if it gets an \#NM in kernel + * context. When switching threads, the kernel will save the state of + * the outgoing thread and lazy load the incoming one using CR0.TS. + * There are a few routines in seeblk.s which uses the SSE unit in ring-0 + * to do stuff, HAT are among the users. The routines there will + * manually clear CR0.TS and save the XMM registers they use only if + * CR0.TS was zero upon entry. They will skip it when not, because as + * mentioned above, the FPU state is saved when switching away from a + * thread and CR0.TS set to 1, so when CR0.TS is 1 there is nothing to + * preserve. This is a problem if we restore CR0.TS to 1 after loading + * the guest state. + * + * - FreeBSD - no idea yet. + * + * - OS/2 does not allow \#NMs in kernel space IIRC. Does lazy loading, + * possibly also lazy saving. Interrupts must preserve the CR0.TS+EM & + * FPU states. + * + * Up to r107425 (2016-05-24) we would only temporarily modify CR0.TS/EM while + * saving and restoring the host and guest states. The motivation for this + * change is that we want to be able to emulate SSE instruction in ring-0 (IEM). + * + * Starting with that change, we will leave CR0.TS=EM=0 after saving the host + * state and only restore it once we've restore the host FPU state. This has the + * accidental side effect of triggering Solaris to preserve XMM registers in + * sseblk.s. When CR0 was changed by saving the FPU state, CPUM must now inform + * the VT-x (HMVMX) code about it as it caches the CR0 value in the VMCS. + * + * + * @section sec_cpum_logging Logging Level Assignments. + * + * Following log level assignments: + * - Log6 is used for FPU state management. + * - Log7 is used for FPU state actualization. + * + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_CPUM +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "CPUMInternal.h" +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** + * This was used in the saved state up to the early life of version 14. + * + * It indicates that we may have some out-of-sync hidden segement registers. + * It is only relevant for raw-mode. + */ +#define CPUM_CHANGED_HIDDEN_SEL_REGS_INVALID RT_BIT(12) + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ + +/** + * What kind of cpu info dump to perform. + */ +typedef enum CPUMDUMPTYPE +{ + CPUMDUMPTYPE_TERSE, + CPUMDUMPTYPE_DEFAULT, + CPUMDUMPTYPE_VERBOSE +} CPUMDUMPTYPE; +/** Pointer to a cpu info dump type. */ +typedef CPUMDUMPTYPE *PCPUMDUMPTYPE; + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static DECLCALLBACK(int) cpumR3LiveExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uPass); +static DECLCALLBACK(int) cpumR3SaveExec(PVM pVM, PSSMHANDLE pSSM); +static DECLCALLBACK(int) cpumR3LoadPrep(PVM pVM, PSSMHANDLE pSSM); +static DECLCALLBACK(int) cpumR3LoadExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass); +static DECLCALLBACK(int) cpumR3LoadDone(PVM pVM, PSSMHANDLE pSSM); +static DECLCALLBACK(void) cpumR3InfoAll(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); +static DECLCALLBACK(void) cpumR3InfoGuest(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); +static DECLCALLBACK(void) cpumR3InfoGuestHwvirt(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); +static DECLCALLBACK(void) cpumR3InfoGuestInstr(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); +static DECLCALLBACK(void) cpumR3InfoHyper(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); +static DECLCALLBACK(void) cpumR3InfoHost(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +/** Saved state field descriptors for CPUMCTX. */ +static const SSMFIELD g_aCpumCtxFields[] = +{ + SSMFIELD_ENTRY( CPUMCTX, rdi), + SSMFIELD_ENTRY( CPUMCTX, rsi), + SSMFIELD_ENTRY( CPUMCTX, rbp), + SSMFIELD_ENTRY( CPUMCTX, rax), + SSMFIELD_ENTRY( CPUMCTX, rbx), + SSMFIELD_ENTRY( CPUMCTX, rdx), + SSMFIELD_ENTRY( CPUMCTX, rcx), + SSMFIELD_ENTRY( CPUMCTX, rsp), + SSMFIELD_ENTRY( CPUMCTX, rflags), + SSMFIELD_ENTRY( CPUMCTX, rip), + SSMFIELD_ENTRY( CPUMCTX, r8), + SSMFIELD_ENTRY( CPUMCTX, r9), + SSMFIELD_ENTRY( CPUMCTX, r10), + SSMFIELD_ENTRY( CPUMCTX, r11), + SSMFIELD_ENTRY( CPUMCTX, r12), + SSMFIELD_ENTRY( CPUMCTX, r13), + SSMFIELD_ENTRY( CPUMCTX, r14), + SSMFIELD_ENTRY( CPUMCTX, r15), + SSMFIELD_ENTRY( CPUMCTX, es.Sel), + SSMFIELD_ENTRY( CPUMCTX, es.ValidSel), + SSMFIELD_ENTRY( CPUMCTX, es.fFlags), + SSMFIELD_ENTRY( CPUMCTX, es.u64Base), + SSMFIELD_ENTRY( CPUMCTX, es.u32Limit), + SSMFIELD_ENTRY( CPUMCTX, es.Attr), + SSMFIELD_ENTRY( CPUMCTX, cs.Sel), + SSMFIELD_ENTRY( CPUMCTX, cs.ValidSel), + SSMFIELD_ENTRY( CPUMCTX, cs.fFlags), + SSMFIELD_ENTRY( CPUMCTX, cs.u64Base), + SSMFIELD_ENTRY( CPUMCTX, cs.u32Limit), + SSMFIELD_ENTRY( CPUMCTX, cs.Attr), + SSMFIELD_ENTRY( CPUMCTX, ss.Sel), + SSMFIELD_ENTRY( CPUMCTX, ss.ValidSel), + SSMFIELD_ENTRY( CPUMCTX, ss.fFlags), + SSMFIELD_ENTRY( CPUMCTX, ss.u64Base), + SSMFIELD_ENTRY( CPUMCTX, ss.u32Limit), + SSMFIELD_ENTRY( CPUMCTX, ss.Attr), + SSMFIELD_ENTRY( CPUMCTX, ds.Sel), + SSMFIELD_ENTRY( CPUMCTX, ds.ValidSel), + SSMFIELD_ENTRY( CPUMCTX, ds.fFlags), + SSMFIELD_ENTRY( CPUMCTX, ds.u64Base), + SSMFIELD_ENTRY( CPUMCTX, ds.u32Limit), + SSMFIELD_ENTRY( CPUMCTX, ds.Attr), + SSMFIELD_ENTRY( CPUMCTX, fs.Sel), + SSMFIELD_ENTRY( CPUMCTX, fs.ValidSel), + SSMFIELD_ENTRY( CPUMCTX, fs.fFlags), + SSMFIELD_ENTRY( CPUMCTX, fs.u64Base), + SSMFIELD_ENTRY( CPUMCTX, fs.u32Limit), + SSMFIELD_ENTRY( CPUMCTX, fs.Attr), + SSMFIELD_ENTRY( CPUMCTX, gs.Sel), + SSMFIELD_ENTRY( CPUMCTX, gs.ValidSel), + SSMFIELD_ENTRY( CPUMCTX, gs.fFlags), + SSMFIELD_ENTRY( CPUMCTX, gs.u64Base), + SSMFIELD_ENTRY( CPUMCTX, gs.u32Limit), + SSMFIELD_ENTRY( CPUMCTX, gs.Attr), + SSMFIELD_ENTRY( CPUMCTX, cr0), + SSMFIELD_ENTRY( CPUMCTX, cr2), + SSMFIELD_ENTRY( CPUMCTX, cr3), + SSMFIELD_ENTRY( CPUMCTX, cr4), + SSMFIELD_ENTRY( CPUMCTX, dr[0]), + SSMFIELD_ENTRY( CPUMCTX, dr[1]), + SSMFIELD_ENTRY( CPUMCTX, dr[2]), + SSMFIELD_ENTRY( CPUMCTX, dr[3]), + SSMFIELD_ENTRY( CPUMCTX, dr[6]), + SSMFIELD_ENTRY( CPUMCTX, dr[7]), + SSMFIELD_ENTRY( CPUMCTX, gdtr.cbGdt), + SSMFIELD_ENTRY( CPUMCTX, gdtr.pGdt), + SSMFIELD_ENTRY( CPUMCTX, idtr.cbIdt), + SSMFIELD_ENTRY( CPUMCTX, idtr.pIdt), + SSMFIELD_ENTRY( CPUMCTX, SysEnter.cs), + SSMFIELD_ENTRY( CPUMCTX, SysEnter.eip), + SSMFIELD_ENTRY( CPUMCTX, SysEnter.esp), + SSMFIELD_ENTRY( CPUMCTX, msrEFER), + SSMFIELD_ENTRY( CPUMCTX, msrSTAR), + SSMFIELD_ENTRY( CPUMCTX, msrPAT), + SSMFIELD_ENTRY( CPUMCTX, msrLSTAR), + SSMFIELD_ENTRY( CPUMCTX, msrCSTAR), + SSMFIELD_ENTRY( CPUMCTX, msrSFMASK), + SSMFIELD_ENTRY( CPUMCTX, msrKERNELGSBASE), + SSMFIELD_ENTRY( CPUMCTX, ldtr.Sel), + SSMFIELD_ENTRY( CPUMCTX, ldtr.ValidSel), + SSMFIELD_ENTRY( CPUMCTX, ldtr.fFlags), + SSMFIELD_ENTRY( CPUMCTX, ldtr.u64Base), + SSMFIELD_ENTRY( CPUMCTX, ldtr.u32Limit), + SSMFIELD_ENTRY( CPUMCTX, ldtr.Attr), + SSMFIELD_ENTRY( CPUMCTX, tr.Sel), + SSMFIELD_ENTRY( CPUMCTX, tr.ValidSel), + SSMFIELD_ENTRY( CPUMCTX, tr.fFlags), + SSMFIELD_ENTRY( CPUMCTX, tr.u64Base), + SSMFIELD_ENTRY( CPUMCTX, tr.u32Limit), + SSMFIELD_ENTRY( CPUMCTX, tr.Attr), + SSMFIELD_ENTRY_VER( CPUMCTX, aXcr[0], CPUM_SAVED_STATE_VERSION_XSAVE), + SSMFIELD_ENTRY_VER( CPUMCTX, aXcr[1], CPUM_SAVED_STATE_VERSION_XSAVE), + SSMFIELD_ENTRY_VER( CPUMCTX, fXStateMask, CPUM_SAVED_STATE_VERSION_XSAVE), + SSMFIELD_ENTRY_TERM() +}; + +/** Saved state field descriptors for SVM nested hardware-virtualization + * Host State. */ +static const SSMFIELD g_aSvmHwvirtHostState[] = +{ + SSMFIELD_ENTRY( SVMHOSTSTATE, uEferMsr), + SSMFIELD_ENTRY( SVMHOSTSTATE, uCr0), + SSMFIELD_ENTRY( SVMHOSTSTATE, uCr4), + SSMFIELD_ENTRY( SVMHOSTSTATE, uCr3), + SSMFIELD_ENTRY( SVMHOSTSTATE, uRip), + SSMFIELD_ENTRY( SVMHOSTSTATE, uRsp), + SSMFIELD_ENTRY( SVMHOSTSTATE, uRax), + SSMFIELD_ENTRY( SVMHOSTSTATE, rflags), + SSMFIELD_ENTRY( SVMHOSTSTATE, es.Sel), + SSMFIELD_ENTRY( SVMHOSTSTATE, es.ValidSel), + SSMFIELD_ENTRY( SVMHOSTSTATE, es.fFlags), + SSMFIELD_ENTRY( SVMHOSTSTATE, es.u64Base), + SSMFIELD_ENTRY( SVMHOSTSTATE, es.u32Limit), + SSMFIELD_ENTRY( SVMHOSTSTATE, es.Attr), + SSMFIELD_ENTRY( SVMHOSTSTATE, cs.Sel), + SSMFIELD_ENTRY( SVMHOSTSTATE, cs.ValidSel), + SSMFIELD_ENTRY( SVMHOSTSTATE, cs.fFlags), + SSMFIELD_ENTRY( SVMHOSTSTATE, cs.u64Base), + SSMFIELD_ENTRY( SVMHOSTSTATE, cs.u32Limit), + SSMFIELD_ENTRY( SVMHOSTSTATE, cs.Attr), + SSMFIELD_ENTRY( SVMHOSTSTATE, ss.Sel), + SSMFIELD_ENTRY( SVMHOSTSTATE, ss.ValidSel), + SSMFIELD_ENTRY( SVMHOSTSTATE, ss.fFlags), + SSMFIELD_ENTRY( SVMHOSTSTATE, ss.u64Base), + SSMFIELD_ENTRY( SVMHOSTSTATE, ss.u32Limit), + SSMFIELD_ENTRY( SVMHOSTSTATE, ss.Attr), + SSMFIELD_ENTRY( SVMHOSTSTATE, ds.Sel), + SSMFIELD_ENTRY( SVMHOSTSTATE, ds.ValidSel), + SSMFIELD_ENTRY( SVMHOSTSTATE, ds.fFlags), + SSMFIELD_ENTRY( SVMHOSTSTATE, ds.u64Base), + SSMFIELD_ENTRY( SVMHOSTSTATE, ds.u32Limit), + SSMFIELD_ENTRY( SVMHOSTSTATE, ds.Attr), + SSMFIELD_ENTRY( SVMHOSTSTATE, gdtr.cbGdt), + SSMFIELD_ENTRY( SVMHOSTSTATE, gdtr.pGdt), + SSMFIELD_ENTRY( SVMHOSTSTATE, idtr.cbIdt), + SSMFIELD_ENTRY( SVMHOSTSTATE, idtr.pIdt), + SSMFIELD_ENTRY_IGNORE(SVMHOSTSTATE, abPadding), + SSMFIELD_ENTRY_TERM() +}; + +/** Saved state field descriptors for CPUMCTX. */ +static const SSMFIELD g_aCpumX87Fields[] = +{ + SSMFIELD_ENTRY( X86FXSTATE, FCW), + SSMFIELD_ENTRY( X86FXSTATE, FSW), + SSMFIELD_ENTRY( X86FXSTATE, FTW), + SSMFIELD_ENTRY( X86FXSTATE, FOP), + SSMFIELD_ENTRY( X86FXSTATE, FPUIP), + SSMFIELD_ENTRY( X86FXSTATE, CS), + SSMFIELD_ENTRY( X86FXSTATE, Rsrvd1), + SSMFIELD_ENTRY( X86FXSTATE, FPUDP), + SSMFIELD_ENTRY( X86FXSTATE, DS), + SSMFIELD_ENTRY( X86FXSTATE, Rsrvd2), + SSMFIELD_ENTRY( X86FXSTATE, MXCSR), + SSMFIELD_ENTRY( X86FXSTATE, MXCSR_MASK), + SSMFIELD_ENTRY( X86FXSTATE, aRegs[0]), + SSMFIELD_ENTRY( X86FXSTATE, aRegs[1]), + SSMFIELD_ENTRY( X86FXSTATE, aRegs[2]), + SSMFIELD_ENTRY( X86FXSTATE, aRegs[3]), + SSMFIELD_ENTRY( X86FXSTATE, aRegs[4]), + SSMFIELD_ENTRY( X86FXSTATE, aRegs[5]), + SSMFIELD_ENTRY( X86FXSTATE, aRegs[6]), + SSMFIELD_ENTRY( X86FXSTATE, aRegs[7]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[0]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[1]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[2]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[3]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[4]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[5]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[6]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[7]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[8]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[9]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[10]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[11]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[12]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[13]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[14]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[15]), + SSMFIELD_ENTRY_VER( X86FXSTATE, au32RsrvdForSoftware[0], CPUM_SAVED_STATE_VERSION_XSAVE), /* 32-bit/64-bit hack */ + SSMFIELD_ENTRY_TERM() +}; + +/** Saved state field descriptors for X86XSAVEHDR. */ +static const SSMFIELD g_aCpumXSaveHdrFields[] = +{ + SSMFIELD_ENTRY( X86XSAVEHDR, bmXState), + SSMFIELD_ENTRY_TERM() +}; + +/** Saved state field descriptors for X86XSAVEYMMHI. */ +static const SSMFIELD g_aCpumYmmHiFields[] = +{ + SSMFIELD_ENTRY( X86XSAVEYMMHI, aYmmHi[0]), + SSMFIELD_ENTRY( X86XSAVEYMMHI, aYmmHi[1]), + SSMFIELD_ENTRY( X86XSAVEYMMHI, aYmmHi[2]), + SSMFIELD_ENTRY( X86XSAVEYMMHI, aYmmHi[3]), + SSMFIELD_ENTRY( X86XSAVEYMMHI, aYmmHi[4]), + SSMFIELD_ENTRY( X86XSAVEYMMHI, aYmmHi[5]), + SSMFIELD_ENTRY( X86XSAVEYMMHI, aYmmHi[6]), + SSMFIELD_ENTRY( X86XSAVEYMMHI, aYmmHi[7]), + SSMFIELD_ENTRY( X86XSAVEYMMHI, aYmmHi[8]), + SSMFIELD_ENTRY( X86XSAVEYMMHI, aYmmHi[9]), + SSMFIELD_ENTRY( X86XSAVEYMMHI, aYmmHi[10]), + SSMFIELD_ENTRY( X86XSAVEYMMHI, aYmmHi[11]), + SSMFIELD_ENTRY( X86XSAVEYMMHI, aYmmHi[12]), + SSMFIELD_ENTRY( X86XSAVEYMMHI, aYmmHi[13]), + SSMFIELD_ENTRY( X86XSAVEYMMHI, aYmmHi[14]), + SSMFIELD_ENTRY( X86XSAVEYMMHI, aYmmHi[15]), + SSMFIELD_ENTRY_TERM() +}; + +/** Saved state field descriptors for X86XSAVEBNDREGS. */ +static const SSMFIELD g_aCpumBndRegsFields[] = +{ + SSMFIELD_ENTRY( X86XSAVEBNDREGS, aRegs[0]), + SSMFIELD_ENTRY( X86XSAVEBNDREGS, aRegs[1]), + SSMFIELD_ENTRY( X86XSAVEBNDREGS, aRegs[2]), + SSMFIELD_ENTRY( X86XSAVEBNDREGS, aRegs[3]), + SSMFIELD_ENTRY_TERM() +}; + +/** Saved state field descriptors for X86XSAVEBNDCFG. */ +static const SSMFIELD g_aCpumBndCfgFields[] = +{ + SSMFIELD_ENTRY( X86XSAVEBNDCFG, fConfig), + SSMFIELD_ENTRY( X86XSAVEBNDCFG, fStatus), + SSMFIELD_ENTRY_TERM() +}; + +#if 0 /** @todo */ +/** Saved state field descriptors for X86XSAVEOPMASK. */ +static const SSMFIELD g_aCpumOpmaskFields[] = +{ + SSMFIELD_ENTRY( X86XSAVEOPMASK, aKRegs[0]), + SSMFIELD_ENTRY( X86XSAVEOPMASK, aKRegs[1]), + SSMFIELD_ENTRY( X86XSAVEOPMASK, aKRegs[2]), + SSMFIELD_ENTRY( X86XSAVEOPMASK, aKRegs[3]), + SSMFIELD_ENTRY( X86XSAVEOPMASK, aKRegs[4]), + SSMFIELD_ENTRY( X86XSAVEOPMASK, aKRegs[5]), + SSMFIELD_ENTRY( X86XSAVEOPMASK, aKRegs[6]), + SSMFIELD_ENTRY( X86XSAVEOPMASK, aKRegs[7]), + SSMFIELD_ENTRY_TERM() +}; +#endif + +/** Saved state field descriptors for X86XSAVEZMMHI256. */ +static const SSMFIELD g_aCpumZmmHi256Fields[] = +{ + SSMFIELD_ENTRY( X86XSAVEZMMHI256, aHi256Regs[0]), + SSMFIELD_ENTRY( X86XSAVEZMMHI256, aHi256Regs[1]), + SSMFIELD_ENTRY( X86XSAVEZMMHI256, aHi256Regs[2]), + SSMFIELD_ENTRY( X86XSAVEZMMHI256, aHi256Regs[3]), + SSMFIELD_ENTRY( X86XSAVEZMMHI256, aHi256Regs[4]), + SSMFIELD_ENTRY( X86XSAVEZMMHI256, aHi256Regs[5]), + SSMFIELD_ENTRY( X86XSAVEZMMHI256, aHi256Regs[6]), + SSMFIELD_ENTRY( X86XSAVEZMMHI256, aHi256Regs[7]), + SSMFIELD_ENTRY( X86XSAVEZMMHI256, aHi256Regs[8]), + SSMFIELD_ENTRY( X86XSAVEZMMHI256, aHi256Regs[9]), + SSMFIELD_ENTRY( X86XSAVEZMMHI256, aHi256Regs[10]), + SSMFIELD_ENTRY( X86XSAVEZMMHI256, aHi256Regs[11]), + SSMFIELD_ENTRY( X86XSAVEZMMHI256, aHi256Regs[12]), + SSMFIELD_ENTRY( X86XSAVEZMMHI256, aHi256Regs[13]), + SSMFIELD_ENTRY( X86XSAVEZMMHI256, aHi256Regs[14]), + SSMFIELD_ENTRY( X86XSAVEZMMHI256, aHi256Regs[15]), + SSMFIELD_ENTRY_TERM() +}; + +/** Saved state field descriptors for X86XSAVEZMM16HI. */ +static const SSMFIELD g_aCpumZmm16HiFields[] = +{ + SSMFIELD_ENTRY( X86XSAVEZMM16HI, aRegs[0]), + SSMFIELD_ENTRY( X86XSAVEZMM16HI, aRegs[1]), + SSMFIELD_ENTRY( X86XSAVEZMM16HI, aRegs[2]), + SSMFIELD_ENTRY( X86XSAVEZMM16HI, aRegs[3]), + SSMFIELD_ENTRY( X86XSAVEZMM16HI, aRegs[4]), + SSMFIELD_ENTRY( X86XSAVEZMM16HI, aRegs[5]), + SSMFIELD_ENTRY( X86XSAVEZMM16HI, aRegs[6]), + SSMFIELD_ENTRY( X86XSAVEZMM16HI, aRegs[7]), + SSMFIELD_ENTRY( X86XSAVEZMM16HI, aRegs[8]), + SSMFIELD_ENTRY( X86XSAVEZMM16HI, aRegs[9]), + SSMFIELD_ENTRY( X86XSAVEZMM16HI, aRegs[10]), + SSMFIELD_ENTRY( X86XSAVEZMM16HI, aRegs[11]), + SSMFIELD_ENTRY( X86XSAVEZMM16HI, aRegs[12]), + SSMFIELD_ENTRY( X86XSAVEZMM16HI, aRegs[13]), + SSMFIELD_ENTRY( X86XSAVEZMM16HI, aRegs[14]), + SSMFIELD_ENTRY( X86XSAVEZMM16HI, aRegs[15]), + SSMFIELD_ENTRY_TERM() +}; + + + +/** Saved state field descriptors for CPUMCTX in V4.1 before the hidden selector + * registeres changed. */ +static const SSMFIELD g_aCpumX87FieldsMem[] = +{ + SSMFIELD_ENTRY( X86FXSTATE, FCW), + SSMFIELD_ENTRY( X86FXSTATE, FSW), + SSMFIELD_ENTRY( X86FXSTATE, FTW), + SSMFIELD_ENTRY( X86FXSTATE, FOP), + SSMFIELD_ENTRY( X86FXSTATE, FPUIP), + SSMFIELD_ENTRY( X86FXSTATE, CS), + SSMFIELD_ENTRY( X86FXSTATE, Rsrvd1), + SSMFIELD_ENTRY( X86FXSTATE, FPUDP), + SSMFIELD_ENTRY( X86FXSTATE, DS), + SSMFIELD_ENTRY( X86FXSTATE, Rsrvd2), + SSMFIELD_ENTRY( X86FXSTATE, MXCSR), + SSMFIELD_ENTRY( X86FXSTATE, MXCSR_MASK), + SSMFIELD_ENTRY( X86FXSTATE, aRegs[0]), + SSMFIELD_ENTRY( X86FXSTATE, aRegs[1]), + SSMFIELD_ENTRY( X86FXSTATE, aRegs[2]), + SSMFIELD_ENTRY( X86FXSTATE, aRegs[3]), + SSMFIELD_ENTRY( X86FXSTATE, aRegs[4]), + SSMFIELD_ENTRY( X86FXSTATE, aRegs[5]), + SSMFIELD_ENTRY( X86FXSTATE, aRegs[6]), + SSMFIELD_ENTRY( X86FXSTATE, aRegs[7]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[0]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[1]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[2]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[3]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[4]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[5]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[6]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[7]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[8]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[9]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[10]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[11]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[12]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[13]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[14]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[15]), + SSMFIELD_ENTRY_IGNORE( X86FXSTATE, au32RsrvdRest), + SSMFIELD_ENTRY_IGNORE( X86FXSTATE, au32RsrvdForSoftware), +}; + +/** Saved state field descriptors for CPUMCTX in V4.1 before the hidden selector + * registeres changed. */ +static const SSMFIELD g_aCpumCtxFieldsMem[] = +{ + SSMFIELD_ENTRY( CPUMCTX, rdi), + SSMFIELD_ENTRY( CPUMCTX, rsi), + SSMFIELD_ENTRY( CPUMCTX, rbp), + SSMFIELD_ENTRY( CPUMCTX, rax), + SSMFIELD_ENTRY( CPUMCTX, rbx), + SSMFIELD_ENTRY( CPUMCTX, rdx), + SSMFIELD_ENTRY( CPUMCTX, rcx), + SSMFIELD_ENTRY( CPUMCTX, rsp), + SSMFIELD_ENTRY_OLD( lss_esp, sizeof(uint32_t)), + SSMFIELD_ENTRY( CPUMCTX, ss.Sel), + SSMFIELD_ENTRY_OLD( ssPadding, sizeof(uint16_t)), + SSMFIELD_ENTRY( CPUMCTX, gs.Sel), + SSMFIELD_ENTRY_OLD( gsPadding, sizeof(uint16_t)), + SSMFIELD_ENTRY( CPUMCTX, fs.Sel), + SSMFIELD_ENTRY_OLD( fsPadding, sizeof(uint16_t)), + SSMFIELD_ENTRY( CPUMCTX, es.Sel), + SSMFIELD_ENTRY_OLD( esPadding, sizeof(uint16_t)), + SSMFIELD_ENTRY( CPUMCTX, ds.Sel), + SSMFIELD_ENTRY_OLD( dsPadding, sizeof(uint16_t)), + SSMFIELD_ENTRY( CPUMCTX, cs.Sel), + SSMFIELD_ENTRY_OLD( csPadding, sizeof(uint16_t)*3), + SSMFIELD_ENTRY( CPUMCTX, rflags), + SSMFIELD_ENTRY( CPUMCTX, rip), + SSMFIELD_ENTRY( CPUMCTX, r8), + SSMFIELD_ENTRY( CPUMCTX, r9), + SSMFIELD_ENTRY( CPUMCTX, r10), + SSMFIELD_ENTRY( CPUMCTX, r11), + SSMFIELD_ENTRY( CPUMCTX, r12), + SSMFIELD_ENTRY( CPUMCTX, r13), + SSMFIELD_ENTRY( CPUMCTX, r14), + SSMFIELD_ENTRY( CPUMCTX, r15), + SSMFIELD_ENTRY( CPUMCTX, es.u64Base), + SSMFIELD_ENTRY( CPUMCTX, es.u32Limit), + SSMFIELD_ENTRY( CPUMCTX, es.Attr), + SSMFIELD_ENTRY( CPUMCTX, cs.u64Base), + SSMFIELD_ENTRY( CPUMCTX, cs.u32Limit), + SSMFIELD_ENTRY( CPUMCTX, cs.Attr), + SSMFIELD_ENTRY( CPUMCTX, ss.u64Base), + SSMFIELD_ENTRY( CPUMCTX, ss.u32Limit), + SSMFIELD_ENTRY( CPUMCTX, ss.Attr), + SSMFIELD_ENTRY( CPUMCTX, ds.u64Base), + SSMFIELD_ENTRY( CPUMCTX, ds.u32Limit), + SSMFIELD_ENTRY( CPUMCTX, ds.Attr), + SSMFIELD_ENTRY( CPUMCTX, fs.u64Base), + SSMFIELD_ENTRY( CPUMCTX, fs.u32Limit), + SSMFIELD_ENTRY( CPUMCTX, fs.Attr), + SSMFIELD_ENTRY( CPUMCTX, gs.u64Base), + SSMFIELD_ENTRY( CPUMCTX, gs.u32Limit), + SSMFIELD_ENTRY( CPUMCTX, gs.Attr), + SSMFIELD_ENTRY( CPUMCTX, cr0), + SSMFIELD_ENTRY( CPUMCTX, cr2), + SSMFIELD_ENTRY( CPUMCTX, cr3), + SSMFIELD_ENTRY( CPUMCTX, cr4), + SSMFIELD_ENTRY( CPUMCTX, dr[0]), + SSMFIELD_ENTRY( CPUMCTX, dr[1]), + SSMFIELD_ENTRY( CPUMCTX, dr[2]), + SSMFIELD_ENTRY( CPUMCTX, dr[3]), + SSMFIELD_ENTRY_OLD( dr[4], sizeof(uint64_t)), + SSMFIELD_ENTRY_OLD( dr[5], sizeof(uint64_t)), + SSMFIELD_ENTRY( CPUMCTX, dr[6]), + SSMFIELD_ENTRY( CPUMCTX, dr[7]), + SSMFIELD_ENTRY( CPUMCTX, gdtr.cbGdt), + SSMFIELD_ENTRY( CPUMCTX, gdtr.pGdt), + SSMFIELD_ENTRY_OLD( gdtrPadding, sizeof(uint16_t)), + SSMFIELD_ENTRY( CPUMCTX, idtr.cbIdt), + SSMFIELD_ENTRY( CPUMCTX, idtr.pIdt), + SSMFIELD_ENTRY_OLD( idtrPadding, sizeof(uint16_t)), + SSMFIELD_ENTRY( CPUMCTX, ldtr.Sel), + SSMFIELD_ENTRY_OLD( ldtrPadding, sizeof(uint16_t)), + SSMFIELD_ENTRY( CPUMCTX, tr.Sel), + SSMFIELD_ENTRY_OLD( trPadding, sizeof(uint16_t)), + SSMFIELD_ENTRY( CPUMCTX, SysEnter.cs), + SSMFIELD_ENTRY( CPUMCTX, SysEnter.eip), + SSMFIELD_ENTRY( CPUMCTX, SysEnter.esp), + SSMFIELD_ENTRY( CPUMCTX, msrEFER), + SSMFIELD_ENTRY( CPUMCTX, msrSTAR), + SSMFIELD_ENTRY( CPUMCTX, msrPAT), + SSMFIELD_ENTRY( CPUMCTX, msrLSTAR), + SSMFIELD_ENTRY( CPUMCTX, msrCSTAR), + SSMFIELD_ENTRY( CPUMCTX, msrSFMASK), + SSMFIELD_ENTRY( CPUMCTX, msrKERNELGSBASE), + SSMFIELD_ENTRY( CPUMCTX, ldtr.u64Base), + SSMFIELD_ENTRY( CPUMCTX, ldtr.u32Limit), + SSMFIELD_ENTRY( CPUMCTX, ldtr.Attr), + SSMFIELD_ENTRY( CPUMCTX, tr.u64Base), + SSMFIELD_ENTRY( CPUMCTX, tr.u32Limit), + SSMFIELD_ENTRY( CPUMCTX, tr.Attr), + SSMFIELD_ENTRY_TERM() +}; + +/** Saved state field descriptors for CPUMCTX_VER1_6. */ +static const SSMFIELD g_aCpumX87FieldsV16[] = +{ + SSMFIELD_ENTRY( X86FXSTATE, FCW), + SSMFIELD_ENTRY( X86FXSTATE, FSW), + SSMFIELD_ENTRY( X86FXSTATE, FTW), + SSMFIELD_ENTRY( X86FXSTATE, FOP), + SSMFIELD_ENTRY( X86FXSTATE, FPUIP), + SSMFIELD_ENTRY( X86FXSTATE, CS), + SSMFIELD_ENTRY( X86FXSTATE, Rsrvd1), + SSMFIELD_ENTRY( X86FXSTATE, FPUDP), + SSMFIELD_ENTRY( X86FXSTATE, DS), + SSMFIELD_ENTRY( X86FXSTATE, Rsrvd2), + SSMFIELD_ENTRY( X86FXSTATE, MXCSR), + SSMFIELD_ENTRY( X86FXSTATE, MXCSR_MASK), + SSMFIELD_ENTRY( X86FXSTATE, aRegs[0]), + SSMFIELD_ENTRY( X86FXSTATE, aRegs[1]), + SSMFIELD_ENTRY( X86FXSTATE, aRegs[2]), + SSMFIELD_ENTRY( X86FXSTATE, aRegs[3]), + SSMFIELD_ENTRY( X86FXSTATE, aRegs[4]), + SSMFIELD_ENTRY( X86FXSTATE, aRegs[5]), + SSMFIELD_ENTRY( X86FXSTATE, aRegs[6]), + SSMFIELD_ENTRY( X86FXSTATE, aRegs[7]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[0]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[1]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[2]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[3]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[4]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[5]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[6]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[7]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[8]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[9]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[10]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[11]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[12]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[13]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[14]), + SSMFIELD_ENTRY( X86FXSTATE, aXMM[15]), + SSMFIELD_ENTRY_IGNORE( X86FXSTATE, au32RsrvdRest), + SSMFIELD_ENTRY_IGNORE( X86FXSTATE, au32RsrvdForSoftware), + SSMFIELD_ENTRY_TERM() +}; + +/** Saved state field descriptors for CPUMCTX_VER1_6. */ +static const SSMFIELD g_aCpumCtxFieldsV16[] = +{ + SSMFIELD_ENTRY( CPUMCTX, rdi), + SSMFIELD_ENTRY( CPUMCTX, rsi), + SSMFIELD_ENTRY( CPUMCTX, rbp), + SSMFIELD_ENTRY( CPUMCTX, rax), + SSMFIELD_ENTRY( CPUMCTX, rbx), + SSMFIELD_ENTRY( CPUMCTX, rdx), + SSMFIELD_ENTRY( CPUMCTX, rcx), + SSMFIELD_ENTRY_U32_ZX_U64( CPUMCTX, rsp), + SSMFIELD_ENTRY( CPUMCTX, ss.Sel), + SSMFIELD_ENTRY_OLD( ssPadding, sizeof(uint16_t)), + SSMFIELD_ENTRY_OLD( CPUMCTX, sizeof(uint64_t) /*rsp_notused*/), + SSMFIELD_ENTRY( CPUMCTX, gs.Sel), + SSMFIELD_ENTRY_OLD( gsPadding, sizeof(uint16_t)), + SSMFIELD_ENTRY( CPUMCTX, fs.Sel), + SSMFIELD_ENTRY_OLD( fsPadding, sizeof(uint16_t)), + SSMFIELD_ENTRY( CPUMCTX, es.Sel), + SSMFIELD_ENTRY_OLD( esPadding, sizeof(uint16_t)), + SSMFIELD_ENTRY( CPUMCTX, ds.Sel), + SSMFIELD_ENTRY_OLD( dsPadding, sizeof(uint16_t)), + SSMFIELD_ENTRY( CPUMCTX, cs.Sel), + SSMFIELD_ENTRY_OLD( csPadding, sizeof(uint16_t)*3), + SSMFIELD_ENTRY( CPUMCTX, rflags), + SSMFIELD_ENTRY( CPUMCTX, rip), + SSMFIELD_ENTRY( CPUMCTX, r8), + SSMFIELD_ENTRY( CPUMCTX, r9), + SSMFIELD_ENTRY( CPUMCTX, r10), + SSMFIELD_ENTRY( CPUMCTX, r11), + SSMFIELD_ENTRY( CPUMCTX, r12), + SSMFIELD_ENTRY( CPUMCTX, r13), + SSMFIELD_ENTRY( CPUMCTX, r14), + SSMFIELD_ENTRY( CPUMCTX, r15), + SSMFIELD_ENTRY_U32_ZX_U64( CPUMCTX, es.u64Base), + SSMFIELD_ENTRY( CPUMCTX, es.u32Limit), + SSMFIELD_ENTRY( CPUMCTX, es.Attr), + SSMFIELD_ENTRY_U32_ZX_U64( CPUMCTX, cs.u64Base), + SSMFIELD_ENTRY( CPUMCTX, cs.u32Limit), + SSMFIELD_ENTRY( CPUMCTX, cs.Attr), + SSMFIELD_ENTRY_U32_ZX_U64( CPUMCTX, ss.u64Base), + SSMFIELD_ENTRY( CPUMCTX, ss.u32Limit), + SSMFIELD_ENTRY( CPUMCTX, ss.Attr), + SSMFIELD_ENTRY_U32_ZX_U64( CPUMCTX, ds.u64Base), + SSMFIELD_ENTRY( CPUMCTX, ds.u32Limit), + SSMFIELD_ENTRY( CPUMCTX, ds.Attr), + SSMFIELD_ENTRY_U32_ZX_U64( CPUMCTX, fs.u64Base), + SSMFIELD_ENTRY( CPUMCTX, fs.u32Limit), + SSMFIELD_ENTRY( CPUMCTX, fs.Attr), + SSMFIELD_ENTRY_U32_ZX_U64( CPUMCTX, gs.u64Base), + SSMFIELD_ENTRY( CPUMCTX, gs.u32Limit), + SSMFIELD_ENTRY( CPUMCTX, gs.Attr), + SSMFIELD_ENTRY( CPUMCTX, cr0), + SSMFIELD_ENTRY( CPUMCTX, cr2), + SSMFIELD_ENTRY( CPUMCTX, cr3), + SSMFIELD_ENTRY( CPUMCTX, cr4), + SSMFIELD_ENTRY_OLD( cr8, sizeof(uint64_t)), + SSMFIELD_ENTRY( CPUMCTX, dr[0]), + SSMFIELD_ENTRY( CPUMCTX, dr[1]), + SSMFIELD_ENTRY( CPUMCTX, dr[2]), + SSMFIELD_ENTRY( CPUMCTX, dr[3]), + SSMFIELD_ENTRY_OLD( dr[4], sizeof(uint64_t)), + SSMFIELD_ENTRY_OLD( dr[5], sizeof(uint64_t)), + SSMFIELD_ENTRY( CPUMCTX, dr[6]), + SSMFIELD_ENTRY( CPUMCTX, dr[7]), + SSMFIELD_ENTRY( CPUMCTX, gdtr.cbGdt), + SSMFIELD_ENTRY_U32_ZX_U64( CPUMCTX, gdtr.pGdt), + SSMFIELD_ENTRY_OLD( gdtrPadding, sizeof(uint16_t)), + SSMFIELD_ENTRY_OLD( gdtrPadding64, sizeof(uint64_t)), + SSMFIELD_ENTRY( CPUMCTX, idtr.cbIdt), + SSMFIELD_ENTRY_U32_ZX_U64( CPUMCTX, idtr.pIdt), + SSMFIELD_ENTRY_OLD( idtrPadding, sizeof(uint16_t)), + SSMFIELD_ENTRY_OLD( idtrPadding64, sizeof(uint64_t)), + SSMFIELD_ENTRY( CPUMCTX, ldtr.Sel), + SSMFIELD_ENTRY_OLD( ldtrPadding, sizeof(uint16_t)), + SSMFIELD_ENTRY( CPUMCTX, tr.Sel), + SSMFIELD_ENTRY_OLD( trPadding, sizeof(uint16_t)), + SSMFIELD_ENTRY( CPUMCTX, SysEnter.cs), + SSMFIELD_ENTRY( CPUMCTX, SysEnter.eip), + SSMFIELD_ENTRY( CPUMCTX, SysEnter.esp), + SSMFIELD_ENTRY( CPUMCTX, msrEFER), + SSMFIELD_ENTRY( CPUMCTX, msrSTAR), + SSMFIELD_ENTRY( CPUMCTX, msrPAT), + SSMFIELD_ENTRY( CPUMCTX, msrLSTAR), + SSMFIELD_ENTRY( CPUMCTX, msrCSTAR), + SSMFIELD_ENTRY( CPUMCTX, msrSFMASK), + SSMFIELD_ENTRY_OLD( msrFSBASE, sizeof(uint64_t)), + SSMFIELD_ENTRY_OLD( msrGSBASE, sizeof(uint64_t)), + SSMFIELD_ENTRY( CPUMCTX, msrKERNELGSBASE), + SSMFIELD_ENTRY_U32_ZX_U64( CPUMCTX, ldtr.u64Base), + SSMFIELD_ENTRY( CPUMCTX, ldtr.u32Limit), + SSMFIELD_ENTRY( CPUMCTX, ldtr.Attr), + SSMFIELD_ENTRY_U32_ZX_U64( CPUMCTX, tr.u64Base), + SSMFIELD_ENTRY( CPUMCTX, tr.u32Limit), + SSMFIELD_ENTRY( CPUMCTX, tr.Attr), + SSMFIELD_ENTRY_OLD( padding, sizeof(uint32_t)*2), + SSMFIELD_ENTRY_TERM() +}; + + +/** + * Checks for partial/leaky FXSAVE/FXRSTOR handling on AMD CPUs. + * + * AMD K7, K8 and newer AMD CPUs do not save/restore the x87 error pointers + * (last instruction pointer, last data pointer, last opcode) except when the ES + * bit (Exception Summary) in x87 FSW (FPU Status Word) is set. Thus if we don't + * clear these registers there is potential, local FPU leakage from a process + * using the FPU to another. + * + * See AMD Instruction Reference for FXSAVE, FXRSTOR. + * + * @param pVM The cross context VM structure. + */ +static void cpumR3CheckLeakyFpu(PVM pVM) +{ + uint32_t u32CpuVersion = ASMCpuId_EAX(1); + uint32_t const u32Family = u32CpuVersion >> 8; + if ( u32Family >= 6 /* K7 and higher */ + && ASMIsAmdCpu()) + { + uint32_t cExt = ASMCpuId_EAX(0x80000000); + if (ASMIsValidExtRange(cExt)) + { + uint32_t fExtFeaturesEDX = ASMCpuId_EDX(0x80000001); + if (fExtFeaturesEDX & X86_CPUID_AMD_FEATURE_EDX_FFXSR) + { + for (VMCPUID i = 0; i < pVM->cCpus; i++) + pVM->aCpus[i].cpum.s.fUseFlags |= CPUM_USE_FFXSR_LEAKY; + Log(("CPUM: Host CPU has leaky fxsave/fxrstor behaviour\n")); + } + } + } +} + + +/** + * Frees memory allocated for the SVM hardware virtualization state. + * + * @param pVM The cross context VM structure. + */ +static void cpumR3FreeSvmHwVirtState(PVM pVM) +{ + Assert(pVM->cpum.s.GuestFeatures.fSvm); + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + if (pVCpu->cpum.s.Guest.hwvirt.svm.pVmcbR3) + { + SUPR3PageFreeEx(pVCpu->cpum.s.Guest.hwvirt.svm.pVmcbR3, SVM_VMCB_PAGES); + pVCpu->cpum.s.Guest.hwvirt.svm.pVmcbR3 = NULL; + } + pVCpu->cpum.s.Guest.hwvirt.svm.HCPhysVmcb = NIL_RTHCPHYS; + + if (pVCpu->cpum.s.Guest.hwvirt.svm.pvMsrBitmapR3) + { + SUPR3PageFreeEx(pVCpu->cpum.s.Guest.hwvirt.svm.pvMsrBitmapR3, SVM_MSRPM_PAGES); + pVCpu->cpum.s.Guest.hwvirt.svm.pvMsrBitmapR3 = NULL; + } + + if (pVCpu->cpum.s.Guest.hwvirt.svm.pvIoBitmapR3) + { + SUPR3PageFreeEx(pVCpu->cpum.s.Guest.hwvirt.svm.pvIoBitmapR3, SVM_IOPM_PAGES); + pVCpu->cpum.s.Guest.hwvirt.svm.pvIoBitmapR3 = NULL; + } + } +} + + +/** + * Allocates memory for the SVM hardware virtualization state. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +static int cpumR3AllocSvmHwVirtState(PVM pVM) +{ + Assert(pVM->cpum.s.GuestFeatures.fSvm); + + int rc = VINF_SUCCESS; + LogRel(("CPUM: Allocating %u pages for the nested-guest SVM MSR and IO permission bitmaps\n", + pVM->cCpus * (SVM_MSRPM_PAGES + SVM_IOPM_PAGES))); + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + pVCpu->cpum.s.Guest.hwvirt.enmHwvirt = CPUMHWVIRT_SVM; + + /* + * Allocate the nested-guest VMCB. + */ + SUPPAGE SupNstGstVmcbPage; + RT_ZERO(SupNstGstVmcbPage); + SupNstGstVmcbPage.Phys = NIL_RTHCPHYS; + Assert(SVM_VMCB_PAGES == 1); + Assert(!pVCpu->cpum.s.Guest.hwvirt.svm.pVmcbR3); + rc = SUPR3PageAllocEx(SVM_VMCB_PAGES, 0 /* fFlags */, (void **)&pVCpu->cpum.s.Guest.hwvirt.svm.pVmcbR3, + &pVCpu->cpum.s.Guest.hwvirt.svm.pVmcbR0, &SupNstGstVmcbPage); + if (RT_FAILURE(rc)) + { + Assert(!pVCpu->cpum.s.Guest.hwvirt.svm.pVmcbR3); + LogRel(("CPUM%u: Failed to alloc %u pages for the nested-guest's VMCB\n", pVCpu->idCpu, SVM_VMCB_PAGES)); + break; + } + pVCpu->cpum.s.Guest.hwvirt.svm.HCPhysVmcb = SupNstGstVmcbPage.Phys; + + /* + * Allocate the MSRPM (MSR Permission bitmap). + */ + Assert(!pVCpu->cpum.s.Guest.hwvirt.svm.pvMsrBitmapR3); + rc = SUPR3PageAllocEx(SVM_MSRPM_PAGES, 0 /* fFlags */, &pVCpu->cpum.s.Guest.hwvirt.svm.pvMsrBitmapR3, + &pVCpu->cpum.s.Guest.hwvirt.svm.pvMsrBitmapR0, NULL /* paPages */); + if (RT_FAILURE(rc)) + { + Assert(!pVCpu->cpum.s.Guest.hwvirt.svm.pvMsrBitmapR3); + LogRel(("CPUM%u: Failed to alloc %u pages for the nested-guest's MSR permission bitmap\n", pVCpu->idCpu, + SVM_MSRPM_PAGES)); + break; + } + + /* + * Allocate the IOPM (IO Permission bitmap). + */ + Assert(!pVCpu->cpum.s.Guest.hwvirt.svm.pvIoBitmapR3); + rc = SUPR3PageAllocEx(SVM_IOPM_PAGES, 0 /* fFlags */, &pVCpu->cpum.s.Guest.hwvirt.svm.pvIoBitmapR3, + &pVCpu->cpum.s.Guest.hwvirt.svm.pvIoBitmapR0, NULL /* paPages */); + if (RT_FAILURE(rc)) + { + Assert(!pVCpu->cpum.s.Guest.hwvirt.svm.pvIoBitmapR3); + LogRel(("CPUM%u: Failed to alloc %u pages for the nested-guest's IO permission bitmap\n", pVCpu->idCpu, + SVM_IOPM_PAGES)); + break; + } + } + + /* On any failure, cleanup. */ + if (RT_FAILURE(rc)) + cpumR3FreeSvmHwVirtState(pVM); + + return rc; +} + + +/** + * Resets per-VCPU SVM hardware virtualization state. + * + * @param pVCpu The cross context virtual CPU structure. + */ +DECLINLINE(void) cpumR3ResetSvmHwVirtState(PVMCPU pVCpu) +{ + PCPUMCTX pCtx = &pVCpu->cpum.s.Guest; + Assert(pCtx->hwvirt.enmHwvirt == CPUMHWVIRT_SVM); + Assert(pCtx->hwvirt.svm.CTX_SUFF(pVmcb)); + + memset(pCtx->hwvirt.svm.CTX_SUFF(pVmcb), 0, SVM_VMCB_PAGES << PAGE_SHIFT); + pCtx->hwvirt.svm.uMsrHSavePa = 0; + pCtx->hwvirt.svm.uPrevPauseTick = 0; +} + + +/** + * Frees memory allocated for the VMX hardware virtualization state. + * + * @param pVM The cross context VM structure. + */ +static void cpumR3FreeVmxHwVirtState(PVM pVM) +{ + Assert(pVM->cpum.s.GuestFeatures.fVmx); + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + if (pVCpu->cpum.s.Guest.hwvirt.vmx.pVmcsR3) + { + SUPR3PageFreeEx(pVCpu->cpum.s.Guest.hwvirt.vmx.pVmcsR3, VMX_V_VMCS_PAGES); + pVCpu->cpum.s.Guest.hwvirt.vmx.pVmcsR3 = NULL; + } + if (pVCpu->cpum.s.Guest.hwvirt.vmx.pShadowVmcsR3) + { + SUPR3PageFreeEx(pVCpu->cpum.s.Guest.hwvirt.vmx.pShadowVmcsR3, VMX_V_VMCS_PAGES); + pVCpu->cpum.s.Guest.hwvirt.vmx.pShadowVmcsR3 = NULL; + } + if (pVCpu->cpum.s.Guest.hwvirt.vmx.pvVirtApicPageR3) + { + SUPR3PageFreeEx(pVCpu->cpum.s.Guest.hwvirt.vmx.pvVirtApicPageR3, VMX_V_VIRT_APIC_PAGES); + pVCpu->cpum.s.Guest.hwvirt.vmx.pvVirtApicPageR3 = NULL; + } + if (pVCpu->cpum.s.Guest.hwvirt.vmx.pvVmreadBitmapR3) + { + SUPR3PageFreeEx(pVCpu->cpum.s.Guest.hwvirt.vmx.pvVmreadBitmapR3, VMX_V_VMREAD_VMWRITE_BITMAP_PAGES); + pVCpu->cpum.s.Guest.hwvirt.vmx.pvVmreadBitmapR3 = NULL; + } + if (pVCpu->cpum.s.Guest.hwvirt.vmx.pvVmwriteBitmapR3) + { + SUPR3PageFreeEx(pVCpu->cpum.s.Guest.hwvirt.vmx.pvVmwriteBitmapR3, VMX_V_VMREAD_VMWRITE_BITMAP_PAGES); + pVCpu->cpum.s.Guest.hwvirt.vmx.pvVmwriteBitmapR3 = NULL; + } + if (pVCpu->cpum.s.Guest.hwvirt.vmx.pAutoMsrAreaR3) + { + SUPR3PageFreeEx(pVCpu->cpum.s.Guest.hwvirt.vmx.pAutoMsrAreaR3, VMX_V_AUTOMSR_AREA_PAGES); + pVCpu->cpum.s.Guest.hwvirt.vmx.pAutoMsrAreaR3 = NULL; + } + if (pVCpu->cpum.s.Guest.hwvirt.vmx.pvMsrBitmapR3) + { + SUPR3PageFreeEx(pVCpu->cpum.s.Guest.hwvirt.vmx.pvMsrBitmapR3, VMX_V_MSR_BITMAP_PAGES); + pVCpu->cpum.s.Guest.hwvirt.vmx.pvMsrBitmapR3 = NULL; + } + if (pVCpu->cpum.s.Guest.hwvirt.vmx.pvIoBitmapR3) + { + SUPR3PageFreeEx(pVCpu->cpum.s.Guest.hwvirt.vmx.pvIoBitmapR3, VMX_V_IO_BITMAP_A_PAGES + VMX_V_IO_BITMAP_B_PAGES); + pVCpu->cpum.s.Guest.hwvirt.vmx.pvIoBitmapR3 = NULL; + } + } +} + + +/** + * Allocates memory for the VMX hardware virtualization state. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +static int cpumR3AllocVmxHwVirtState(PVM pVM) +{ + int rc = VINF_SUCCESS; + LogRel(("CPUM: Allocating %u pages for the nested-guest VMCS and related structures\n", + pVM->cCpus * ( VMX_V_VMCS_PAGES + VMX_V_VIRT_APIC_PAGES + VMX_V_VMREAD_VMWRITE_BITMAP_PAGES * 2 + + VMX_V_AUTOMSR_AREA_PAGES))); + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + pVCpu->cpum.s.Guest.hwvirt.enmHwvirt = CPUMHWVIRT_VMX; + + /* + * Allocate the nested-guest current VMCS. + */ + Assert(VMX_V_VMCS_PAGES == 1); + Assert(!pVCpu->cpum.s.Guest.hwvirt.vmx.pVmcsR3); + rc = SUPR3PageAllocEx(VMX_V_VMCS_PAGES, 0 /* fFlags */, (void **)&pVCpu->cpum.s.Guest.hwvirt.vmx.pVmcsR3, + &pVCpu->cpum.s.Guest.hwvirt.vmx.pVmcsR0, NULL /* paPages */); + if (RT_FAILURE(rc)) + { + Assert(!pVCpu->cpum.s.Guest.hwvirt.vmx.pVmcsR3); + LogRel(("CPUM%u: Failed to alloc %u pages for the nested-guest's VMCS\n", pVCpu->idCpu, VMX_V_VMCS_PAGES)); + break; + } + + /* + * Allocate the nested-guest shadow VMCS. + */ + Assert(VMX_V_VMCS_PAGES == 1); + Assert(!pVCpu->cpum.s.Guest.hwvirt.vmx.pShadowVmcsR3); + rc = SUPR3PageAllocEx(VMX_V_VMCS_PAGES, 0 /* fFlags */, (void **)&pVCpu->cpum.s.Guest.hwvirt.vmx.pShadowVmcsR3, + &pVCpu->cpum.s.Guest.hwvirt.vmx.pShadowVmcsR0, NULL /* paPages */); + if (RT_FAILURE(rc)) + { + Assert(!pVCpu->cpum.s.Guest.hwvirt.vmx.pShadowVmcsR3); + LogRel(("CPUM%u: Failed to alloc %u pages for the nested-guest's shadow VMCS\n", pVCpu->idCpu, VMX_V_VMCS_PAGES)); + break; + } + + /* + * Allocate the Virtual-APIC page. + */ + Assert(!pVCpu->cpum.s.Guest.hwvirt.vmx.pvVirtApicPageR3); + rc = SUPR3PageAllocEx(VMX_V_VIRT_APIC_PAGES, 0 /* fFlags */, &pVCpu->cpum.s.Guest.hwvirt.vmx.pvVirtApicPageR3, + &pVCpu->cpum.s.Guest.hwvirt.vmx.pvVirtApicPageR0, NULL /* paPages */); + if (RT_FAILURE(rc)) + { + Assert(!pVCpu->cpum.s.Guest.hwvirt.vmx.pvVirtApicPageR3); + LogRel(("CPUM%u: Failed to alloc %u pages for the nested-guest's Virtual-APIC page\n", pVCpu->idCpu, + VMX_V_VIRT_APIC_PAGES)); + break; + } + + /* + * Allocate the VMREAD-bitmap. + */ + Assert(!pVCpu->cpum.s.Guest.hwvirt.vmx.pvVmreadBitmapR3); + rc = SUPR3PageAllocEx(VMX_V_VMREAD_VMWRITE_BITMAP_PAGES, 0 /* fFlags */, &pVCpu->cpum.s.Guest.hwvirt.vmx.pvVmreadBitmapR3, + &pVCpu->cpum.s.Guest.hwvirt.vmx.pvVmreadBitmapR0, NULL /* paPages */); + if (RT_FAILURE(rc)) + { + Assert(!pVCpu->cpum.s.Guest.hwvirt.vmx.pvVmreadBitmapR3); + LogRel(("CPUM%u: Failed to alloc %u pages for the nested-guest's VMREAD-bitmap\n", pVCpu->idCpu, + VMX_V_VMREAD_VMWRITE_BITMAP_PAGES)); + break; + } + + /* + * Allocatge the VMWRITE-bitmap. + */ + Assert(!pVCpu->cpum.s.Guest.hwvirt.vmx.pvVmwriteBitmapR3); + rc = SUPR3PageAllocEx(VMX_V_VMREAD_VMWRITE_BITMAP_PAGES, 0 /* fFlags */, + &pVCpu->cpum.s.Guest.hwvirt.vmx.pvVmwriteBitmapR3, + &pVCpu->cpum.s.Guest.hwvirt.vmx.pvVmwriteBitmapR0, NULL /* paPages */); + if (RT_FAILURE(rc)) + { + Assert(!pVCpu->cpum.s.Guest.hwvirt.vmx.pvVmwriteBitmapR3); + LogRel(("CPUM%u: Failed to alloc %u pages for the nested-guest's VMWRITE-bitmap\n", pVCpu->idCpu, + VMX_V_VMREAD_VMWRITE_BITMAP_PAGES)); + break; + } + + /* + * Allocate the MSR auto-load/store area. + */ + Assert(!pVCpu->cpum.s.Guest.hwvirt.vmx.pAutoMsrAreaR3); + rc = SUPR3PageAllocEx(VMX_V_AUTOMSR_AREA_PAGES, 0 /* fFlags */, (void **)&pVCpu->cpum.s.Guest.hwvirt.vmx.pAutoMsrAreaR3, + &pVCpu->cpum.s.Guest.hwvirt.vmx.pAutoMsrAreaR0, NULL /* paPages */); + if (RT_FAILURE(rc)) + { + Assert(!pVCpu->cpum.s.Guest.hwvirt.vmx.pAutoMsrAreaR3); + LogRel(("CPUM%u: Failed to alloc %u pages for the nested-guest's auto-load/store MSR area\n", pVCpu->idCpu, + VMX_V_AUTOMSR_AREA_PAGES)); + break; + } + + /* + * Allocate the MSR bitmap. + */ + Assert(!pVCpu->cpum.s.Guest.hwvirt.vmx.pvMsrBitmapR3); + rc = SUPR3PageAllocEx(VMX_V_MSR_BITMAP_PAGES, 0 /* fFlags */, (void **)&pVCpu->cpum.s.Guest.hwvirt.vmx.pvMsrBitmapR3, + &pVCpu->cpum.s.Guest.hwvirt.vmx.pvMsrBitmapR0, NULL /* paPages */); + if (RT_FAILURE(rc)) + { + Assert(!pVCpu->cpum.s.Guest.hwvirt.vmx.pvMsrBitmapR3); + LogRel(("CPUM%u: Failed to alloc %u pages for the nested-guest's MSR bitmap\n", pVCpu->idCpu, + VMX_V_MSR_BITMAP_PAGES)); + break; + } + + /* + * Allocate the I/O bitmaps (A and B). + */ + Assert(!pVCpu->cpum.s.Guest.hwvirt.vmx.pvIoBitmapR3); + rc = SUPR3PageAllocEx(VMX_V_IO_BITMAP_A_PAGES + VMX_V_IO_BITMAP_B_PAGES, 0 /* fFlags */, + (void **)&pVCpu->cpum.s.Guest.hwvirt.vmx.pvIoBitmapR3, + &pVCpu->cpum.s.Guest.hwvirt.vmx.pvIoBitmapR0, NULL /* paPages */); + if (RT_FAILURE(rc)) + { + Assert(!pVCpu->cpum.s.Guest.hwvirt.vmx.pvIoBitmapR3); + LogRel(("CPUM%u: Failed to alloc %u pages for the nested-guest's I/O bitmaps\n", pVCpu->idCpu, + VMX_V_IO_BITMAP_A_PAGES + VMX_V_IO_BITMAP_B_PAGES)); + break; + } + } + + /* On any failure, cleanup. */ + if (RT_FAILURE(rc)) + cpumR3FreeVmxHwVirtState(pVM); + + return rc; +} + + +/** + * Resets per-VCPU VMX hardware virtualization state. + * + * @param pVCpu The cross context virtual CPU structure. + */ +DECLINLINE(void) cpumR3ResetVmxHwVirtState(PVMCPU pVCpu) +{ + PCPUMCTX pCtx = &pVCpu->cpum.s.Guest; + Assert(pCtx->hwvirt.enmHwvirt == CPUMHWVIRT_VMX); + Assert(pCtx->hwvirt.vmx.CTX_SUFF(pVmcs)); + Assert(pCtx->hwvirt.vmx.CTX_SUFF(pShadowVmcs)); + + memset(pCtx->hwvirt.vmx.CTX_SUFF(pVmcs), 0, VMX_V_VMCS_SIZE); + memset(pCtx->hwvirt.vmx.CTX_SUFF(pShadowVmcs), 0, VMX_V_VMCS_SIZE); + pCtx->hwvirt.vmx.GCPhysVmxon = NIL_RTGCPHYS; + pCtx->hwvirt.vmx.GCPhysShadowVmcs = NIL_RTGCPHYS; + pCtx->hwvirt.vmx.GCPhysVmxon = NIL_RTGCPHYS; + pCtx->hwvirt.vmx.fInVmxRootMode = false; + pCtx->hwvirt.vmx.fInVmxNonRootMode = false; + /* Don't reset diagnostics here. */ +} + + +/** + * Displays the host and guest VMX features. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helper functions. + * @param pszArgs "terse", "default" or "verbose". + */ +DECLCALLBACK(void) cpumR3InfoVmxFeatures(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + RT_NOREF(pszArgs); + PCCPUMFEATURES pHostFeatures = &pVM->cpum.s.HostFeatures; + PCCPUMFEATURES pGuestFeatures = &pVM->cpum.s.GuestFeatures; + if ( pHostFeatures->enmCpuVendor == CPUMCPUVENDOR_INTEL + || pHostFeatures->enmCpuVendor == CPUMCPUVENDOR_VIA + || pHostFeatures->enmCpuVendor == CPUMCPUVENDOR_SHANGHAI) + { +#define VMXFEATDUMP(a_szDesc, a_Var) \ + pHlp->pfnPrintf(pHlp, " %s = %u (%u)\n", a_szDesc, pGuestFeatures->a_Var, pHostFeatures->a_Var) + + pHlp->pfnPrintf(pHlp, "Nested hardware virtualization - VMX features\n"); + pHlp->pfnPrintf(pHlp, " Mnemonic - Description = guest (host)\n"); + VMXFEATDUMP("VMX - Virtual-Machine Extensions ", fVmx); + /* Basic. */ + VMXFEATDUMP("InsOutInfo - INS/OUTS instruction info. ", fVmxInsOutInfo); + /* Pin-based controls. */ + VMXFEATDUMP("ExtIntExit - External interrupt exiting ", fVmxExtIntExit); + VMXFEATDUMP("NmiExit - NMI exiting ", fVmxNmiExit); + VMXFEATDUMP("VirtNmi - Virtual NMIs ", fVmxVirtNmi); + VMXFEATDUMP("PreemptTimer - VMX preemption timer ", fVmxPreemptTimer); + VMXFEATDUMP("PostedInt - Posted interrupts ", fVmxPostedInt); + /* Processor-based controls. */ + VMXFEATDUMP("IntWindowExit - Interrupt-window exiting ", fVmxIntWindowExit); + VMXFEATDUMP("TscOffsetting - TSC offsetting ", fVmxTscOffsetting); + VMXFEATDUMP("HltExit - HLT exiting ", fVmxHltExit); + VMXFEATDUMP("InvlpgExit - INVLPG exiting ", fVmxInvlpgExit); + VMXFEATDUMP("MwaitExit - MWAIT exiting ", fVmxMwaitExit); + VMXFEATDUMP("RdpmcExit - RDPMC exiting ", fVmxRdpmcExit); + VMXFEATDUMP("RdtscExit - RDTSC exiting ", fVmxRdtscExit); + VMXFEATDUMP("Cr3LoadExit - CR3-load exiting ", fVmxCr3LoadExit); + VMXFEATDUMP("Cr3StoreExit - CR3-store exiting ", fVmxCr3StoreExit); + VMXFEATDUMP("Cr8LoadExit - CR8-load exiting ", fVmxCr8LoadExit); + VMXFEATDUMP("Cr8StoreExit - CR8-store exiting ", fVmxCr8StoreExit); + VMXFEATDUMP("UseTprShadow - Use TPR shadow ", fVmxUseTprShadow); + VMXFEATDUMP("NmiWindowExit - NMI-window exiting ", fVmxNmiWindowExit); + VMXFEATDUMP("MovDRxExit - Mov-DR exiting ", fVmxMovDRxExit); + VMXFEATDUMP("UncondIoExit - Unconditional I/O exiting ", fVmxUncondIoExit); + VMXFEATDUMP("UseIoBitmaps - Use I/O bitmaps ", fVmxUseIoBitmaps); + VMXFEATDUMP("MonitorTrapFlag - Monitor trap flag ", fVmxMonitorTrapFlag); + VMXFEATDUMP("UseMsrBitmaps - MSR bitmaps ", fVmxUseMsrBitmaps); + VMXFEATDUMP("MonitorExit - MONITOR exiting ", fVmxMonitorExit); + VMXFEATDUMP("PauseExit - PAUSE exiting ", fVmxPauseExit); + VMXFEATDUMP("SecondaryExecCtl - Activate secondary controls ", fVmxSecondaryExecCtls); + /* Secondary processor-based controls. */ + VMXFEATDUMP("VirtApic - Virtualize-APIC accesses ", fVmxVirtApicAccess); + VMXFEATDUMP("Ept - Extended Page Tables ", fVmxEpt); + VMXFEATDUMP("DescTableExit - Descriptor-table exiting ", fVmxDescTableExit); + VMXFEATDUMP("Rdtscp - Enable RDTSCP ", fVmxRdtscp); + VMXFEATDUMP("VirtX2ApicMode - Virtualize-x2APIC mode ", fVmxVirtX2ApicMode); + VMXFEATDUMP("Vpid - Enable VPID ", fVmxVpid); + VMXFEATDUMP("WbinvdExit - WBINVD exiting ", fVmxWbinvdExit); + VMXFEATDUMP("UnrestrictedGuest - Unrestricted guest ", fVmxUnrestrictedGuest); + VMXFEATDUMP("ApicRegVirt - APIC-register virtualization ", fVmxApicRegVirt); + VMXFEATDUMP("VirtIntDelivery - Virtual-interrupt delivery ", fVmxVirtIntDelivery); + VMXFEATDUMP("PauseLoopExit - PAUSE-loop exiting ", fVmxPauseLoopExit); + VMXFEATDUMP("RdrandExit - RDRAND exiting ", fVmxRdrandExit); + VMXFEATDUMP("Invpcid - Enable INVPCID ", fVmxInvpcid); + VMXFEATDUMP("VmFuncs - Enable VM Functions ", fVmxVmFunc); + VMXFEATDUMP("VmcsShadowing - VMCS shadowing ", fVmxVmcsShadowing); + VMXFEATDUMP("RdseedExiting - RDSEED exiting ", fVmxRdseedExit); + VMXFEATDUMP("PML - Page-Modification Log (PML) ", fVmxPml); + VMXFEATDUMP("EptVe - EPT violations can cause #VE ", fVmxEptXcptVe); + VMXFEATDUMP("XsavesXRstors - Enable XSAVES/XRSTORS ", fVmxXsavesXrstors); + /* VM-entry controls. */ + VMXFEATDUMP("EntryLoadDebugCtls - Load debug controls on VM-entry ", fVmxEntryLoadDebugCtls); + VMXFEATDUMP("Ia32eModeGuest - IA-32e mode guest ", fVmxIa32eModeGuest); + VMXFEATDUMP("EntryLoadEferMsr - Load IA32_EFER MSR on VM-entry ", fVmxEntryLoadEferMsr); + VMXFEATDUMP("EntryLoadPatMsr - Load IA32_PAT MSR on VM-entry ", fVmxEntryLoadPatMsr); + /* VM-exit controls. */ + VMXFEATDUMP("ExitSaveDebugCtls - Save debug controls on VM-exit ", fVmxExitSaveDebugCtls); + VMXFEATDUMP("HostAddrSpaceSize - Host address-space size ", fVmxHostAddrSpaceSize); + VMXFEATDUMP("ExitAckExtInt - Acknowledge interrupt on VM-exit ", fVmxExitAckExtInt); + VMXFEATDUMP("ExitSavePatMsr - Save IA32_PAT MSR on VM-exit ", fVmxExitSavePatMsr); + VMXFEATDUMP("ExitLoadPatMsr - Load IA32_PAT MSR on VM-exit ", fVmxExitLoadPatMsr); + VMXFEATDUMP("ExitSaveEferMsr - Save IA32_EFER MSR on VM-exit ", fVmxExitSaveEferMsr); + VMXFEATDUMP("ExitLoadEferMsr - Load IA32_EFER MSR on VM-exit ", fVmxExitLoadEferMsr); + VMXFEATDUMP("SavePreemptTimer - Save VMX-preemption timer ", fVmxSavePreemptTimer); + /* Miscellaneous data. */ + VMXFEATDUMP("ExitSaveEferLma - Save IA32_EFER.LMA on VM-exit ", fVmxExitSaveEferLma); + VMXFEATDUMP("IntelPt - Intel PT (Processor Trace) in VMX operation ", fVmxIntelPt); + VMXFEATDUMP("VmwriteAll - Write allowed to read-only VMCS fields ", fVmxVmwriteAll); + VMXFEATDUMP("EntryInjectSoftInt - Inject softint. with 0-len instr. ", fVmxEntryInjectSoftInt); +#undef VMXFEATDUMP + } + else + pHlp->pfnPrintf(pHlp, "No VMX features present - requires an Intel or compatible CPU.\n"); +} + + +/** + * Checks whether nested-guest execution using hardware-assisted VMX (e.g, using HM + * or NEM) is allowed. + * + * @returns @c true if hardware-assisted nested-guest execution is allowed, @c false + * otherwise. + * @param pVM The cross context VM structure. + */ +static bool cpumR3IsHwAssistNstGstExecAllowed(PVM pVM) +{ + AssertMsg(pVM->bMainExecutionEngine != VM_EXEC_ENGINE_NOT_SET, ("Calling this function too early!\n")); +#ifndef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM + if ( pVM->bMainExecutionEngine == VM_EXEC_ENGINE_HW_VIRT + || pVM->bMainExecutionEngine == VM_EXEC_ENGINE_NATIVE_API) + return true; +#else + NOREF(pVM); +#endif + return false; +} + + +/** + * Initializes the VMX guest MSRs from guest CPU features based on the host MSRs. + * + * @param pVM The cross context VM structure. + * @param pHostVmxMsrs The host VMX MSRs. Pass NULL when fully emulating VMX + * and no hardware-assisted nested-guest execution is + * possible for this VM. + * @param pGuestFeatures The guest features to use (only VMX features are + * accessed). + * @param pGuestVmxMsrs Where to store the initialized guest VMX MSRs. + * + * @remarks This function ASSUMES the VMX guest-features are already exploded! + */ +static void cpumR3InitVmxGuestMsrs(PVM pVM, PCVMXMSRS pHostVmxMsrs, PCCPUMFEATURES pGuestFeatures, PVMXMSRS pGuestVmxMsrs) +{ + bool const fIsNstGstHwExecAllowed = cpumR3IsHwAssistNstGstExecAllowed(pVM); + + Assert(!fIsNstGstHwExecAllowed || pHostVmxMsrs); + Assert(pGuestFeatures->fVmx); + + /* + * We don't support the following MSRs yet: + * - True Pin-based VM-execution controls. + * - True Processor-based VM-execution controls. + * - True VM-entry VM-execution controls. + * - True VM-exit VM-execution controls. + * - EPT/VPID capabilities. + */ + + /* Feature control. */ + pGuestVmxMsrs->u64FeatCtrl = MSR_IA32_FEATURE_CONTROL_LOCK | MSR_IA32_FEATURE_CONTROL_VMXON; + + /* Basic information. */ + { + uint64_t const u64Basic = RT_BF_MAKE(VMX_BF_BASIC_VMCS_ID, VMX_V_VMCS_REVISION_ID ) + | RT_BF_MAKE(VMX_BF_BASIC_VMCS_SIZE, VMX_V_VMCS_SIZE ) + | RT_BF_MAKE(VMX_BF_BASIC_PHYSADDR_WIDTH, !pGuestFeatures->fLongMode ) + | RT_BF_MAKE(VMX_BF_BASIC_DUAL_MON, 0 ) + | RT_BF_MAKE(VMX_BF_BASIC_VMCS_MEM_TYPE, VMX_BASIC_MEM_TYPE_WB ) + | RT_BF_MAKE(VMX_BF_BASIC_VMCS_INS_OUTS, pGuestFeatures->fVmxInsOutInfo) + | RT_BF_MAKE(VMX_BF_BASIC_TRUE_CTLS, 0 ); + pGuestVmxMsrs->u64Basic = u64Basic; + } + + /* Pin-based VM-execution controls. */ + { + uint32_t const fFeatures = (pGuestFeatures->fVmxExtIntExit << VMX_BF_PIN_CTLS_EXT_INT_EXIT_SHIFT ) + | (pGuestFeatures->fVmxNmiExit << VMX_BF_PIN_CTLS_NMI_EXIT_SHIFT ) + | (pGuestFeatures->fVmxVirtNmi << VMX_BF_PIN_CTLS_VIRT_NMI_SHIFT ) + | (pGuestFeatures->fVmxPreemptTimer << VMX_BF_PIN_CTLS_PREEMPT_TIMER_SHIFT) + | (pGuestFeatures->fVmxPostedInt << VMX_BF_PIN_CTLS_POSTED_INT_SHIFT ); + uint32_t const fAllowed0 = VMX_PIN_CTLS_DEFAULT1; + uint32_t const fAllowed1 = fFeatures | VMX_PIN_CTLS_DEFAULT1; + AssertMsg((fAllowed0 & fAllowed1) == fAllowed0, ("fAllowed0=%#RX32 fAllowed1=%#RX32 fFeatures=%#RX32\n", + fAllowed0, fAllowed1, fFeatures)); + pGuestVmxMsrs->PinCtls.u = RT_MAKE_U64(fAllowed0, fAllowed1); + } + + /* Processor-based VM-execution controls. */ + { + uint32_t const fFeatures = (pGuestFeatures->fVmxIntWindowExit << VMX_BF_PROC_CTLS_INT_WINDOW_EXIT_SHIFT ) + | (pGuestFeatures->fVmxTscOffsetting << VMX_BF_PROC_CTLS_USE_TSC_OFFSETTING_SHIFT) + | (pGuestFeatures->fVmxHltExit << VMX_BF_PROC_CTLS_HLT_EXIT_SHIFT ) + | (pGuestFeatures->fVmxInvlpgExit << VMX_BF_PROC_CTLS_INVLPG_EXIT_SHIFT ) + | (pGuestFeatures->fVmxMwaitExit << VMX_BF_PROC_CTLS_MWAIT_EXIT_SHIFT ) + | (pGuestFeatures->fVmxRdpmcExit << VMX_BF_PROC_CTLS_RDPMC_EXIT_SHIFT ) + | (pGuestFeatures->fVmxRdtscExit << VMX_BF_PROC_CTLS_RDTSC_EXIT_SHIFT ) + | (pGuestFeatures->fVmxCr3LoadExit << VMX_BF_PROC_CTLS_CR3_LOAD_EXIT_SHIFT ) + | (pGuestFeatures->fVmxCr3StoreExit << VMX_BF_PROC_CTLS_CR3_STORE_EXIT_SHIFT ) + | (pGuestFeatures->fVmxCr8LoadExit << VMX_BF_PROC_CTLS_CR8_LOAD_EXIT_SHIFT ) + | (pGuestFeatures->fVmxCr8StoreExit << VMX_BF_PROC_CTLS_CR8_STORE_EXIT_SHIFT ) + | (pGuestFeatures->fVmxUseTprShadow << VMX_BF_PROC_CTLS_USE_TPR_SHADOW_SHIFT ) + | (pGuestFeatures->fVmxNmiWindowExit << VMX_BF_PROC_CTLS_NMI_WINDOW_EXIT_SHIFT ) + | (pGuestFeatures->fVmxMovDRxExit << VMX_BF_PROC_CTLS_MOV_DR_EXIT_SHIFT ) + | (pGuestFeatures->fVmxUncondIoExit << VMX_BF_PROC_CTLS_UNCOND_IO_EXIT_SHIFT ) + | (pGuestFeatures->fVmxUseIoBitmaps << VMX_BF_PROC_CTLS_USE_IO_BITMAPS_SHIFT ) + | (pGuestFeatures->fVmxMonitorTrapFlag << VMX_BF_PROC_CTLS_MONITOR_TRAP_FLAG_SHIFT ) + | (pGuestFeatures->fVmxUseMsrBitmaps << VMX_BF_PROC_CTLS_USE_MSR_BITMAPS_SHIFT ) + | (pGuestFeatures->fVmxMonitorExit << VMX_BF_PROC_CTLS_MONITOR_EXIT_SHIFT ) + | (pGuestFeatures->fVmxPauseExit << VMX_BF_PROC_CTLS_PAUSE_EXIT_SHIFT ) + | (pGuestFeatures->fVmxSecondaryExecCtls << VMX_BF_PROC_CTLS_USE_SECONDARY_CTLS_SHIFT); + uint32_t const fAllowed0 = VMX_PROC_CTLS_DEFAULT1; + uint32_t const fAllowed1 = fFeatures | VMX_PROC_CTLS_DEFAULT1; + AssertMsg((fAllowed0 & fAllowed1) == fAllowed0, ("fAllowed0=%#RX32 fAllowed1=%#RX32 fFeatures=%#RX32\n", fAllowed0, + fAllowed1, fFeatures)); + pGuestVmxMsrs->ProcCtls.u = RT_MAKE_U64(fAllowed0, fAllowed1); + } + + /* Secondary processor-based VM-execution controls. */ + if (pGuestFeatures->fVmxSecondaryExecCtls) + { + uint32_t const fFeatures = (pGuestFeatures->fVmxVirtApicAccess << VMX_BF_PROC_CTLS2_VIRT_APIC_ACCESS_SHIFT ) + | (pGuestFeatures->fVmxEpt << VMX_BF_PROC_CTLS2_EPT_SHIFT ) + | (pGuestFeatures->fVmxDescTableExit << VMX_BF_PROC_CTLS2_DESC_TABLE_EXIT_SHIFT ) + | (pGuestFeatures->fVmxRdtscp << VMX_BF_PROC_CTLS2_RDTSCP_SHIFT ) + | (pGuestFeatures->fVmxVirtX2ApicMode << VMX_BF_PROC_CTLS2_VIRT_X2APIC_MODE_SHIFT ) + | (pGuestFeatures->fVmxVpid << VMX_BF_PROC_CTLS2_VPID_SHIFT ) + | (pGuestFeatures->fVmxWbinvdExit << VMX_BF_PROC_CTLS2_WBINVD_EXIT_SHIFT ) + | (pGuestFeatures->fVmxUnrestrictedGuest << VMX_BF_PROC_CTLS2_UNRESTRICTED_GUEST_SHIFT) + | (pGuestFeatures->fVmxApicRegVirt << VMX_BF_PROC_CTLS2_APIC_REG_VIRT_SHIFT ) + | (pGuestFeatures->fVmxVirtIntDelivery << VMX_BF_PROC_CTLS2_VIRT_INT_DELIVERY_SHIFT ) + | (pGuestFeatures->fVmxPauseLoopExit << VMX_BF_PROC_CTLS2_PAUSE_LOOP_EXIT_SHIFT ) + | (pGuestFeatures->fVmxRdrandExit << VMX_BF_PROC_CTLS2_RDRAND_EXIT_SHIFT ) + | (pGuestFeatures->fVmxInvpcid << VMX_BF_PROC_CTLS2_INVPCID_SHIFT ) + | (pGuestFeatures->fVmxVmFunc << VMX_BF_PROC_CTLS2_VMFUNC_SHIFT ) + | (pGuestFeatures->fVmxVmcsShadowing << VMX_BF_PROC_CTLS2_VMCS_SHADOWING_SHIFT ) + | (pGuestFeatures->fVmxRdseedExit << VMX_BF_PROC_CTLS2_RDSEED_EXIT_SHIFT ) + | (pGuestFeatures->fVmxPml << VMX_BF_PROC_CTLS2_PML_SHIFT ) + | (pGuestFeatures->fVmxEptXcptVe << VMX_BF_PROC_CTLS2_EPT_VE_SHIFT ) + | (pGuestFeatures->fVmxXsavesXrstors << VMX_BF_PROC_CTLS2_XSAVES_XRSTORS_SHIFT ) + | (pGuestFeatures->fVmxUseTscScaling << VMX_BF_PROC_CTLS2_TSC_SCALING_SHIFT ); + uint32_t const fAllowed0 = 0; + uint32_t const fAllowed1 = fFeatures; + pGuestVmxMsrs->ProcCtls2.u = RT_MAKE_U64(fAllowed0, fAllowed1); + } + + /* VM-exit controls. */ + { + uint32_t const fFeatures = (pGuestFeatures->fVmxExitSaveDebugCtls << VMX_BF_EXIT_CTLS_SAVE_DEBUG_SHIFT ) + | (pGuestFeatures->fVmxHostAddrSpaceSize << VMX_BF_EXIT_CTLS_HOST_ADDR_SPACE_SIZE_SHIFT) + | (pGuestFeatures->fVmxExitAckExtInt << VMX_BF_EXIT_CTLS_ACK_EXT_INT_SHIFT ) + | (pGuestFeatures->fVmxExitSavePatMsr << VMX_BF_EXIT_CTLS_SAVE_PAT_MSR_SHIFT ) + | (pGuestFeatures->fVmxExitLoadPatMsr << VMX_BF_EXIT_CTLS_LOAD_PAT_MSR_SHIFT ) + | (pGuestFeatures->fVmxExitSaveEferMsr << VMX_BF_EXIT_CTLS_SAVE_EFER_MSR_SHIFT ) + | (pGuestFeatures->fVmxExitLoadEferMsr << VMX_BF_EXIT_CTLS_LOAD_EFER_MSR_SHIFT ) + | (pGuestFeatures->fVmxSavePreemptTimer << VMX_BF_EXIT_CTLS_SAVE_PREEMPT_TIMER_SHIFT ); + /* Set the default1 class bits. See Intel spec. A.4 "VM-exit Controls". */ + uint32_t const fAllowed0 = VMX_EXIT_CTLS_DEFAULT1; + uint32_t const fAllowed1 = fFeatures | VMX_EXIT_CTLS_DEFAULT1; + AssertMsg((fAllowed0 & fAllowed1) == fAllowed0, ("fAllowed0=%#RX32 fAllowed1=%#RX32 fFeatures=%#RX32\n", fAllowed0, + fAllowed1, fFeatures)); + pGuestVmxMsrs->ExitCtls.u = RT_MAKE_U64(fAllowed0, fAllowed1); + } + + /* VM-entry controls. */ + { + uint32_t const fFeatures = (pGuestFeatures->fVmxEntryLoadDebugCtls << VMX_BF_ENTRY_CTLS_LOAD_DEBUG_SHIFT ) + | (pGuestFeatures->fVmxIa32eModeGuest << VMX_BF_ENTRY_CTLS_IA32E_MODE_GUEST_SHIFT) + | (pGuestFeatures->fVmxEntryLoadEferMsr << VMX_BF_ENTRY_CTLS_LOAD_EFER_MSR_SHIFT ) + | (pGuestFeatures->fVmxEntryLoadPatMsr << VMX_BF_ENTRY_CTLS_LOAD_PAT_MSR_SHIFT ); + uint32_t const fAllowed0 = VMX_ENTRY_CTLS_DEFAULT1; + uint32_t const fAllowed1 = fFeatures | VMX_ENTRY_CTLS_DEFAULT1; + AssertMsg((fAllowed0 & fAllowed1) == fAllowed0, ("fAllowed0=%#RX32 fAllowed0=%#RX32 fFeatures=%#RX32\n", fAllowed0, + fAllowed1, fFeatures)); + pGuestVmxMsrs->EntryCtls.u = RT_MAKE_U64(fAllowed0, fAllowed1); + } + + /* Miscellaneous data. */ + { + uint64_t const uHostMsr = fIsNstGstHwExecAllowed ? pHostVmxMsrs->u64Misc : 0; + + uint8_t const cMaxMsrs = RT_MIN(RT_BF_GET(uHostMsr, VMX_BF_MISC_MAX_MSRS), VMX_V_AUTOMSR_COUNT_MAX); + uint8_t const fActivityState = RT_BF_GET(uHostMsr, VMX_BF_MISC_ACTIVITY_STATES) & VMX_V_GUEST_ACTIVITY_STATE_MASK; + pGuestVmxMsrs->u64Misc = RT_BF_MAKE(VMX_BF_MISC_PREEMPT_TIMER_TSC, VMX_V_PREEMPT_TIMER_SHIFT ) + | RT_BF_MAKE(VMX_BF_MISC_EXIT_SAVE_EFER_LMA, pGuestFeatures->fVmxExitSaveEferLma ) + | RT_BF_MAKE(VMX_BF_MISC_ACTIVITY_STATES, fActivityState ) + | RT_BF_MAKE(VMX_BF_MISC_INTEL_PT, pGuestFeatures->fVmxIntelPt ) + | RT_BF_MAKE(VMX_BF_MISC_SMM_READ_SMBASE_MSR, 0 ) + | RT_BF_MAKE(VMX_BF_MISC_CR3_TARGET, VMX_V_CR3_TARGET_COUNT ) + | RT_BF_MAKE(VMX_BF_MISC_MAX_MSRS, cMaxMsrs ) + | RT_BF_MAKE(VMX_BF_MISC_VMXOFF_BLOCK_SMI, 0 ) + | RT_BF_MAKE(VMX_BF_MISC_VMWRITE_ALL, pGuestFeatures->fVmxVmwriteAll ) + | RT_BF_MAKE(VMX_BF_MISC_ENTRY_INJECT_SOFT_INT, pGuestFeatures->fVmxEntryInjectSoftInt) + | RT_BF_MAKE(VMX_BF_MISC_MSEG_ID, VMX_V_MSEG_REV_ID ); + } + + /* CR0 Fixed-0. */ + pGuestVmxMsrs->u64Cr0Fixed0 = pGuestFeatures->fVmxUnrestrictedGuest ? VMX_V_CR0_FIXED0_UX: VMX_V_CR0_FIXED0; + + /* CR0 Fixed-1. */ + { + /* + * All CPUs I've looked at so far report CR0 fixed-1 bits as 0xffffffff. + * This is different from CR4 fixed-1 bits which are reported as per the + * CPU features and/or micro-architecture/generation. Why? Ask Intel. + */ + uint64_t const uHostMsr = fIsNstGstHwExecAllowed ? pHostVmxMsrs->u64Cr0Fixed1 : 0xffffffff; + pGuestVmxMsrs->u64Cr0Fixed1 = uHostMsr | VMX_V_CR0_FIXED0; /* Make sure the CR0 MB1 bits are not clear. */ + } + + /* CR4 Fixed-0. */ + pGuestVmxMsrs->u64Cr4Fixed0 = VMX_V_CR4_FIXED0; + + /* CR4 Fixed-1. */ + { + uint64_t const uHostMsr = fIsNstGstHwExecAllowed ? pHostVmxMsrs->u64Cr4Fixed1 : CPUMGetGuestCR4ValidMask(pVM); + pGuestVmxMsrs->u64Cr4Fixed1 = uHostMsr | VMX_V_CR4_FIXED0; /* Make sure the CR4 MB1 bits are not clear. */ + } + + /* VMCS Enumeration. */ + pGuestVmxMsrs->u64VmcsEnum = VMX_V_VMCS_MAX_INDEX << VMX_BF_VMCS_ENUM_HIGHEST_IDX_SHIFT; + + /* VM Functions. */ + if (pGuestFeatures->fVmxVmFunc) + pGuestVmxMsrs->u64VmFunc = RT_BF_MAKE(VMX_BF_VMFUNC_EPTP_SWITCHING, 1); +} + + +#if 0 +/** + * Checks whether the given guest CPU VMX features are compatible with the provided + * base features. + * + * @returns @c true if compatible, @c false otherwise. + * @param pVM The cross context VM structure. + * @param pBase The base VMX CPU features. + * @param pGst The guest VMX CPU features. + * + * @remarks Only VMX feature bits are examined. + */ +static bool cpumR3AreVmxCpuFeaturesCompatible(PVM pVM, PCCPUMFEATURES pBase, PCCPUMFEATURES pGst) +{ + if (cpumR3IsHwAssistVmxNstGstExecAllowed(pVM)) + { + uint64_t const fBase = ((uint64_t)pBase->fVmxInsOutInfo << 0) | ((uint64_t)pBase->fVmxExtIntExit << 1) + | ((uint64_t)pBase->fVmxNmiExit << 2) | ((uint64_t)pBase->fVmxVirtNmi << 3) + | ((uint64_t)pBase->fVmxPreemptTimer << 4) | ((uint64_t)pBase->fVmxPostedInt << 5) + | ((uint64_t)pBase->fVmxIntWindowExit << 6) | ((uint64_t)pBase->fVmxTscOffsetting << 7) + | ((uint64_t)pBase->fVmxHltExit << 8) | ((uint64_t)pBase->fVmxInvlpgExit << 9) + | ((uint64_t)pBase->fVmxMwaitExit << 10) | ((uint64_t)pBase->fVmxRdpmcExit << 11) + | ((uint64_t)pBase->fVmxRdtscExit << 12) | ((uint64_t)pBase->fVmxCr3LoadExit << 13) + | ((uint64_t)pBase->fVmxCr3StoreExit << 14) | ((uint64_t)pBase->fVmxCr8LoadExit << 15) + | ((uint64_t)pBase->fVmxCr8StoreExit << 16) | ((uint64_t)pBase->fVmxUseTprShadow << 17) + | ((uint64_t)pBase->fVmxNmiWindowExit << 18) | ((uint64_t)pBase->fVmxMovDRxExit << 19) + | ((uint64_t)pBase->fVmxUncondIoExit << 20) | ((uint64_t)pBase->fVmxUseIoBitmaps << 21) + | ((uint64_t)pBase->fVmxMonitorTrapFlag << 22) | ((uint64_t)pBase->fVmxUseMsrBitmaps << 23) + | ((uint64_t)pBase->fVmxMonitorExit << 24) | ((uint64_t)pBase->fVmxPauseExit << 25) + | ((uint64_t)pBase->fVmxSecondaryExecCtls << 26) | ((uint64_t)pBase->fVmxVirtApicAccess << 27) + | ((uint64_t)pBase->fVmxEpt << 28) | ((uint64_t)pBase->fVmxDescTableExit << 29) + | ((uint64_t)pBase->fVmxRdtscp << 30) | ((uint64_t)pBase->fVmxVirtX2ApicMode << 31) + | ((uint64_t)pBase->fVmxVpid << 32) | ((uint64_t)pBase->fVmxWbinvdExit << 33) + | ((uint64_t)pBase->fVmxUnrestrictedGuest << 34) | ((uint64_t)pBase->fVmxApicRegVirt << 35) + | ((uint64_t)pBase->fVmxVirtIntDelivery << 36) | ((uint64_t)pBase->fVmxPauseLoopExit << 37) + | ((uint64_t)pBase->fVmxRdrandExit << 38) | ((uint64_t)pBase->fVmxInvpcid << 39) + | ((uint64_t)pBase->fVmxVmFunc << 40) | ((uint64_t)pBase->fVmxVmcsShadowing << 41) + | ((uint64_t)pBase->fVmxRdseedExit << 42) | ((uint64_t)pBase->fVmxPml << 43) + | ((uint64_t)pBase->fVmxEptXcptVe << 44) | ((uint64_t)pBase->fVmxXsavesXrstors << 45) + | ((uint64_t)pBase->fVmxUseTscScaling << 46) | ((uint64_t)pBase->fVmxEntryLoadDebugCtls << 47) + | ((uint64_t)pBase->fVmxIa32eModeGuest << 48) | ((uint64_t)pBase->fVmxEntryLoadEferMsr << 49) + | ((uint64_t)pBase->fVmxEntryLoadPatMsr << 50) | ((uint64_t)pBase->fVmxExitSaveDebugCtls << 51) + | ((uint64_t)pBase->fVmxHostAddrSpaceSize << 52) | ((uint64_t)pBase->fVmxExitAckExtInt << 53) + | ((uint64_t)pBase->fVmxExitSavePatMsr << 54) | ((uint64_t)pBase->fVmxExitLoadPatMsr << 55) + | ((uint64_t)pBase->fVmxExitSaveEferMsr << 56) | ((uint64_t)pBase->fVmxExitLoadEferMsr << 57) + | ((uint64_t)pBase->fVmxSavePreemptTimer << 58) | ((uint64_t)pBase->fVmxExitSaveEferLma << 59) + | ((uint64_t)pBase->fVmxIntelPt << 60) | ((uint64_t)pBase->fVmxVmwriteAll << 61) + | ((uint64_t)pBase->fVmxEntryInjectSoftInt << 62); + + uint64_t const fGst = ((uint64_t)pGst->fVmxInsOutInfo << 0) | ((uint64_t)pGst->fVmxExtIntExit << 1) + | ((uint64_t)pGst->fVmxNmiExit << 2) | ((uint64_t)pGst->fVmxVirtNmi << 3) + | ((uint64_t)pGst->fVmxPreemptTimer << 4) | ((uint64_t)pGst->fVmxPostedInt << 5) + | ((uint64_t)pGst->fVmxIntWindowExit << 6) | ((uint64_t)pGst->fVmxTscOffsetting << 7) + | ((uint64_t)pGst->fVmxHltExit << 8) | ((uint64_t)pGst->fVmxInvlpgExit << 9) + | ((uint64_t)pGst->fVmxMwaitExit << 10) | ((uint64_t)pGst->fVmxRdpmcExit << 11) + | ((uint64_t)pGst->fVmxRdtscExit << 12) | ((uint64_t)pGst->fVmxCr3LoadExit << 13) + | ((uint64_t)pGst->fVmxCr3StoreExit << 14) | ((uint64_t)pGst->fVmxCr8LoadExit << 15) + | ((uint64_t)pGst->fVmxCr8StoreExit << 16) | ((uint64_t)pGst->fVmxUseTprShadow << 17) + | ((uint64_t)pGst->fVmxNmiWindowExit << 18) | ((uint64_t)pGst->fVmxMovDRxExit << 19) + | ((uint64_t)pGst->fVmxUncondIoExit << 20) | ((uint64_t)pGst->fVmxUseIoBitmaps << 21) + | ((uint64_t)pGst->fVmxMonitorTrapFlag << 22) | ((uint64_t)pGst->fVmxUseMsrBitmaps << 23) + | ((uint64_t)pGst->fVmxMonitorExit << 24) | ((uint64_t)pGst->fVmxPauseExit << 25) + | ((uint64_t)pGst->fVmxSecondaryExecCtls << 26) | ((uint64_t)pGst->fVmxVirtApicAccess << 27) + | ((uint64_t)pGst->fVmxEpt << 28) | ((uint64_t)pGst->fVmxDescTableExit << 29) + | ((uint64_t)pGst->fVmxRdtscp << 30) | ((uint64_t)pGst->fVmxVirtX2ApicMode << 31) + | ((uint64_t)pGst->fVmxVpid << 32) | ((uint64_t)pGst->fVmxWbinvdExit << 33) + | ((uint64_t)pGst->fVmxUnrestrictedGuest << 34) | ((uint64_t)pGst->fVmxApicRegVirt << 35) + | ((uint64_t)pGst->fVmxVirtIntDelivery << 36) | ((uint64_t)pGst->fVmxPauseLoopExit << 37) + | ((uint64_t)pGst->fVmxRdrandExit << 38) | ((uint64_t)pGst->fVmxInvpcid << 39) + | ((uint64_t)pGst->fVmxVmFunc << 40) | ((uint64_t)pGst->fVmxVmcsShadowing << 41) + | ((uint64_t)pGst->fVmxRdseedExit << 42) | ((uint64_t)pGst->fVmxPml << 43) + | ((uint64_t)pGst->fVmxEptXcptVe << 44) | ((uint64_t)pGst->fVmxXsavesXrstors << 45) + | ((uint64_t)pGst->fVmxUseTscScaling << 46) | ((uint64_t)pGst->fVmxEntryLoadDebugCtls << 47) + | ((uint64_t)pGst->fVmxIa32eModeGuest << 48) | ((uint64_t)pGst->fVmxEntryLoadEferMsr << 49) + | ((uint64_t)pGst->fVmxEntryLoadPatMsr << 50) | ((uint64_t)pGst->fVmxExitSaveDebugCtls << 51) + | ((uint64_t)pGst->fVmxHostAddrSpaceSize << 52) | ((uint64_t)pGst->fVmxExitAckExtInt << 53) + | ((uint64_t)pGst->fVmxExitSavePatMsr << 54) | ((uint64_t)pGst->fVmxExitLoadPatMsr << 55) + | ((uint64_t)pGst->fVmxExitSaveEferMsr << 56) | ((uint64_t)pGst->fVmxExitLoadEferMsr << 57) + | ((uint64_t)pGst->fVmxSavePreemptTimer << 58) | ((uint64_t)pGst->fVmxExitSaveEferLma << 59) + | ((uint64_t)pGst->fVmxIntelPt << 60) | ((uint64_t)pGst->fVmxVmwriteAll << 61) + | ((uint64_t)pGst->fVmxEntryInjectSoftInt << 62); + + if ((fBase | fGst) != fBase) + return false; + return true; + } + return true; +} +#endif + + +/** + * Initializes VMX guest features and MSRs. + * + * @param pVM The cross context VM structure. + * @param pHostVmxMsrs The host VMX MSRs. Pass NULL when fully emulating VMX + * and no hardware-assisted nested-guest execution is + * possible for this VM. + * @param pGuestVmxMsrs Where to store the initialized guest VMX MSRs. + */ +void cpumR3InitVmxGuestFeaturesAndMsrs(PVM pVM, PCVMXMSRS pHostVmxMsrs, PVMXMSRS pGuestVmxMsrs) +{ + Assert(pVM); + Assert(pGuestVmxMsrs); + + /* + * Initialize the set of VMX features we emulate. + * + * Note! Some bits might be reported as 1 always if they fall under the + * default1 class bits (e.g. fVmxEntryLoadDebugCtls), see @bugref{9180#c5}. + */ + CPUMFEATURES EmuFeat; + RT_ZERO(EmuFeat); + EmuFeat.fVmx = 1; + EmuFeat.fVmxInsOutInfo = 0; + EmuFeat.fVmxExtIntExit = 1; + EmuFeat.fVmxNmiExit = 1; + EmuFeat.fVmxVirtNmi = 0; + EmuFeat.fVmxPreemptTimer = 0; /** @todo NSTVMX: enable this. */ + EmuFeat.fVmxPostedInt = 0; + EmuFeat.fVmxIntWindowExit = 1; + EmuFeat.fVmxTscOffsetting = 1; + EmuFeat.fVmxHltExit = 1; + EmuFeat.fVmxInvlpgExit = 1; + EmuFeat.fVmxMwaitExit = 1; + EmuFeat.fVmxRdpmcExit = 1; + EmuFeat.fVmxRdtscExit = 1; + EmuFeat.fVmxCr3LoadExit = 1; + EmuFeat.fVmxCr3StoreExit = 1; + EmuFeat.fVmxCr8LoadExit = 1; + EmuFeat.fVmxCr8StoreExit = 1; + EmuFeat.fVmxUseTprShadow = 0; + EmuFeat.fVmxNmiWindowExit = 0; + EmuFeat.fVmxMovDRxExit = 1; + EmuFeat.fVmxUncondIoExit = 1; + EmuFeat.fVmxUseIoBitmaps = 1; + EmuFeat.fVmxMonitorTrapFlag = 0; + EmuFeat.fVmxUseMsrBitmaps = 1; + EmuFeat.fVmxMonitorExit = 1; + EmuFeat.fVmxPauseExit = 1; + EmuFeat.fVmxSecondaryExecCtls = 1; + EmuFeat.fVmxVirtApicAccess = 0; + EmuFeat.fVmxEpt = 0; + EmuFeat.fVmxDescTableExit = 1; + EmuFeat.fVmxRdtscp = 1; + EmuFeat.fVmxVirtX2ApicMode = 0; + EmuFeat.fVmxVpid = 0; + EmuFeat.fVmxWbinvdExit = 1; + EmuFeat.fVmxUnrestrictedGuest = 0; + EmuFeat.fVmxApicRegVirt = 0; + EmuFeat.fVmxVirtIntDelivery = 0; + EmuFeat.fVmxPauseLoopExit = 0; + EmuFeat.fVmxRdrandExit = 0; + EmuFeat.fVmxInvpcid = 1; + EmuFeat.fVmxVmFunc = 0; + EmuFeat.fVmxVmcsShadowing = 0; + EmuFeat.fVmxRdseedExit = 0; + EmuFeat.fVmxPml = 0; + EmuFeat.fVmxEptXcptVe = 0; + EmuFeat.fVmxXsavesXrstors = 0; + EmuFeat.fVmxUseTscScaling = 0; + EmuFeat.fVmxEntryLoadDebugCtls = 1; + EmuFeat.fVmxIa32eModeGuest = 1; + EmuFeat.fVmxEntryLoadEferMsr = 1; + EmuFeat.fVmxEntryLoadPatMsr = 0; + EmuFeat.fVmxExitSaveDebugCtls = 1; + EmuFeat.fVmxHostAddrSpaceSize = 1; + EmuFeat.fVmxExitAckExtInt = 0; + EmuFeat.fVmxExitSavePatMsr = 0; + EmuFeat.fVmxExitLoadPatMsr = 0; + EmuFeat.fVmxExitSaveEferMsr = 1; + EmuFeat.fVmxExitLoadEferMsr = 1; + EmuFeat.fVmxSavePreemptTimer = 0; + EmuFeat.fVmxExitSaveEferLma = 1; + EmuFeat.fVmxIntelPt = 0; + EmuFeat.fVmxVmwriteAll = 0; + EmuFeat.fVmxEntryInjectSoftInt = 0; + + /* + * Merge guest features. + * + * When hardware-assisted VMX may be used, any feature we emulate must also be supported + * by the hardware, hence we merge our emulated features with the host features below. + */ + PCCPUMFEATURES pBaseFeat = cpumR3IsHwAssistNstGstExecAllowed(pVM) ? &pVM->cpum.s.HostFeatures : &EmuFeat; + PCPUMFEATURES pGuestFeat = &pVM->cpum.s.GuestFeatures; + Assert(pBaseFeat->fVmx); + pGuestFeat->fVmxInsOutInfo = (pBaseFeat->fVmxInsOutInfo & EmuFeat.fVmxInsOutInfo ); + pGuestFeat->fVmxExtIntExit = (pBaseFeat->fVmxExtIntExit & EmuFeat.fVmxExtIntExit ); + pGuestFeat->fVmxNmiExit = (pBaseFeat->fVmxNmiExit & EmuFeat.fVmxNmiExit ); + pGuestFeat->fVmxVirtNmi = (pBaseFeat->fVmxVirtNmi & EmuFeat.fVmxVirtNmi ); + pGuestFeat->fVmxPreemptTimer = (pBaseFeat->fVmxPreemptTimer & EmuFeat.fVmxPreemptTimer ); + pGuestFeat->fVmxPostedInt = (pBaseFeat->fVmxPostedInt & EmuFeat.fVmxPostedInt ); + pGuestFeat->fVmxIntWindowExit = (pBaseFeat->fVmxIntWindowExit & EmuFeat.fVmxIntWindowExit ); + pGuestFeat->fVmxTscOffsetting = (pBaseFeat->fVmxTscOffsetting & EmuFeat.fVmxTscOffsetting ); + pGuestFeat->fVmxHltExit = (pBaseFeat->fVmxHltExit & EmuFeat.fVmxHltExit ); + pGuestFeat->fVmxInvlpgExit = (pBaseFeat->fVmxInvlpgExit & EmuFeat.fVmxInvlpgExit ); + pGuestFeat->fVmxMwaitExit = (pBaseFeat->fVmxMwaitExit & EmuFeat.fVmxMwaitExit ); + pGuestFeat->fVmxRdpmcExit = (pBaseFeat->fVmxRdpmcExit & EmuFeat.fVmxRdpmcExit ); + pGuestFeat->fVmxRdtscExit = (pBaseFeat->fVmxRdtscExit & EmuFeat.fVmxRdtscExit ); + pGuestFeat->fVmxCr3LoadExit = (pBaseFeat->fVmxCr3LoadExit & EmuFeat.fVmxCr3LoadExit ); + pGuestFeat->fVmxCr3StoreExit = (pBaseFeat->fVmxCr3StoreExit & EmuFeat.fVmxCr3StoreExit ); + pGuestFeat->fVmxCr8LoadExit = (pBaseFeat->fVmxCr8LoadExit & EmuFeat.fVmxCr8LoadExit ); + pGuestFeat->fVmxCr8StoreExit = (pBaseFeat->fVmxCr8StoreExit & EmuFeat.fVmxCr8StoreExit ); + pGuestFeat->fVmxUseTprShadow = (pBaseFeat->fVmxUseTprShadow & EmuFeat.fVmxUseTprShadow ); + pGuestFeat->fVmxNmiWindowExit = (pBaseFeat->fVmxNmiWindowExit & EmuFeat.fVmxNmiWindowExit ); + pGuestFeat->fVmxMovDRxExit = (pBaseFeat->fVmxMovDRxExit & EmuFeat.fVmxMovDRxExit ); + pGuestFeat->fVmxUncondIoExit = (pBaseFeat->fVmxUncondIoExit & EmuFeat.fVmxUncondIoExit ); + pGuestFeat->fVmxUseIoBitmaps = (pBaseFeat->fVmxUseIoBitmaps & EmuFeat.fVmxUseIoBitmaps ); + pGuestFeat->fVmxMonitorTrapFlag = (pBaseFeat->fVmxMonitorTrapFlag & EmuFeat.fVmxMonitorTrapFlag ); + pGuestFeat->fVmxUseMsrBitmaps = (pBaseFeat->fVmxUseMsrBitmaps & EmuFeat.fVmxUseMsrBitmaps ); + pGuestFeat->fVmxMonitorExit = (pBaseFeat->fVmxMonitorExit & EmuFeat.fVmxMonitorExit ); + pGuestFeat->fVmxPauseExit = (pBaseFeat->fVmxPauseExit & EmuFeat.fVmxPauseExit ); + pGuestFeat->fVmxSecondaryExecCtls = (pBaseFeat->fVmxSecondaryExecCtls & EmuFeat.fVmxSecondaryExecCtls ); + pGuestFeat->fVmxVirtApicAccess = (pBaseFeat->fVmxVirtApicAccess & EmuFeat.fVmxVirtApicAccess ); + pGuestFeat->fVmxEpt = (pBaseFeat->fVmxEpt & EmuFeat.fVmxEpt ); + pGuestFeat->fVmxDescTableExit = (pBaseFeat->fVmxDescTableExit & EmuFeat.fVmxDescTableExit ); + pGuestFeat->fVmxRdtscp = (pBaseFeat->fVmxRdtscp & EmuFeat.fVmxRdtscp ); + pGuestFeat->fVmxVirtX2ApicMode = (pBaseFeat->fVmxVirtX2ApicMode & EmuFeat.fVmxVirtX2ApicMode ); + pGuestFeat->fVmxVpid = (pBaseFeat->fVmxVpid & EmuFeat.fVmxVpid ); + pGuestFeat->fVmxWbinvdExit = (pBaseFeat->fVmxWbinvdExit & EmuFeat.fVmxWbinvdExit ); + pGuestFeat->fVmxUnrestrictedGuest = (pBaseFeat->fVmxUnrestrictedGuest & EmuFeat.fVmxUnrestrictedGuest ); + pGuestFeat->fVmxApicRegVirt = (pBaseFeat->fVmxApicRegVirt & EmuFeat.fVmxApicRegVirt ); + pGuestFeat->fVmxVirtIntDelivery = (pBaseFeat->fVmxVirtIntDelivery & EmuFeat.fVmxVirtIntDelivery ); + pGuestFeat->fVmxPauseLoopExit = (pBaseFeat->fVmxPauseLoopExit & EmuFeat.fVmxPauseLoopExit ); + pGuestFeat->fVmxRdrandExit = (pBaseFeat->fVmxRdrandExit & EmuFeat.fVmxRdrandExit ); + pGuestFeat->fVmxInvpcid = (pBaseFeat->fVmxInvpcid & EmuFeat.fVmxInvpcid ); + pGuestFeat->fVmxVmFunc = (pBaseFeat->fVmxVmFunc & EmuFeat.fVmxVmFunc ); + pGuestFeat->fVmxVmcsShadowing = (pBaseFeat->fVmxVmcsShadowing & EmuFeat.fVmxVmcsShadowing ); + pGuestFeat->fVmxRdseedExit = (pBaseFeat->fVmxRdseedExit & EmuFeat.fVmxRdseedExit ); + pGuestFeat->fVmxPml = (pBaseFeat->fVmxPml & EmuFeat.fVmxPml ); + pGuestFeat->fVmxEptXcptVe = (pBaseFeat->fVmxEptXcptVe & EmuFeat.fVmxEptXcptVe ); + pGuestFeat->fVmxXsavesXrstors = (pBaseFeat->fVmxXsavesXrstors & EmuFeat.fVmxXsavesXrstors ); + pGuestFeat->fVmxUseTscScaling = (pBaseFeat->fVmxUseTscScaling & EmuFeat.fVmxUseTscScaling ); + pGuestFeat->fVmxEntryLoadDebugCtls = (pBaseFeat->fVmxEntryLoadDebugCtls & EmuFeat.fVmxEntryLoadDebugCtls ); + pGuestFeat->fVmxIa32eModeGuest = (pBaseFeat->fVmxIa32eModeGuest & EmuFeat.fVmxIa32eModeGuest ); + pGuestFeat->fVmxEntryLoadEferMsr = (pBaseFeat->fVmxEntryLoadEferMsr & EmuFeat.fVmxEntryLoadEferMsr ); + pGuestFeat->fVmxEntryLoadPatMsr = (pBaseFeat->fVmxEntryLoadPatMsr & EmuFeat.fVmxEntryLoadPatMsr ); + pGuestFeat->fVmxExitSaveDebugCtls = (pBaseFeat->fVmxExitSaveDebugCtls & EmuFeat.fVmxExitSaveDebugCtls ); + pGuestFeat->fVmxHostAddrSpaceSize = (pBaseFeat->fVmxHostAddrSpaceSize & EmuFeat.fVmxHostAddrSpaceSize ); + pGuestFeat->fVmxExitAckExtInt = (pBaseFeat->fVmxExitAckExtInt & EmuFeat.fVmxExitAckExtInt ); + pGuestFeat->fVmxExitSavePatMsr = (pBaseFeat->fVmxExitSavePatMsr & EmuFeat.fVmxExitSavePatMsr ); + pGuestFeat->fVmxExitLoadPatMsr = (pBaseFeat->fVmxExitLoadPatMsr & EmuFeat.fVmxExitLoadPatMsr ); + pGuestFeat->fVmxExitSaveEferMsr = (pBaseFeat->fVmxExitSaveEferMsr & EmuFeat.fVmxExitSaveEferMsr ); + pGuestFeat->fVmxExitLoadEferMsr = (pBaseFeat->fVmxExitLoadEferMsr & EmuFeat.fVmxExitLoadEferMsr ); + pGuestFeat->fVmxSavePreemptTimer = (pBaseFeat->fVmxSavePreemptTimer & EmuFeat.fVmxSavePreemptTimer ); + pGuestFeat->fVmxExitSaveEferLma = (pBaseFeat->fVmxExitSaveEferLma & EmuFeat.fVmxExitSaveEferLma ); + pGuestFeat->fVmxIntelPt = (pBaseFeat->fVmxIntelPt & EmuFeat.fVmxIntelPt ); + pGuestFeat->fVmxVmwriteAll = (pBaseFeat->fVmxVmwriteAll & EmuFeat.fVmxVmwriteAll ); + pGuestFeat->fVmxEntryInjectSoftInt = (pBaseFeat->fVmxEntryInjectSoftInt & EmuFeat.fVmxEntryInjectSoftInt ); + + /* Paranoia. */ + if (!pGuestFeat->fVmxSecondaryExecCtls) + { + Assert(!pGuestFeat->fVmxVirtApicAccess); + Assert(!pGuestFeat->fVmxEpt); + Assert(!pGuestFeat->fVmxDescTableExit); + Assert(!pGuestFeat->fVmxRdtscp); + Assert(!pGuestFeat->fVmxVirtX2ApicMode); + Assert(!pGuestFeat->fVmxVpid); + Assert(!pGuestFeat->fVmxWbinvdExit); + Assert(!pGuestFeat->fVmxUnrestrictedGuest); + Assert(!pGuestFeat->fVmxApicRegVirt); + Assert(!pGuestFeat->fVmxVirtIntDelivery); + Assert(!pGuestFeat->fVmxPauseLoopExit); + Assert(!pGuestFeat->fVmxRdrandExit); + Assert(!pGuestFeat->fVmxInvpcid); + Assert(!pGuestFeat->fVmxVmFunc); + Assert(!pGuestFeat->fVmxVmcsShadowing); + Assert(!pGuestFeat->fVmxRdseedExit); + Assert(!pGuestFeat->fVmxPml); + Assert(!pGuestFeat->fVmxEptXcptVe); + Assert(!pGuestFeat->fVmxXsavesXrstors); + Assert(!pGuestFeat->fVmxUseTscScaling); + } + if (pGuestFeat->fVmxUnrestrictedGuest) + { + /* See footnote in Intel spec. 27.2 "Recording VM-Exit Information And Updating VM-entry Control Fields". */ + Assert(pGuestFeat->fVmxExitSaveEferLma); + } + + /* + * Finally initialize the VMX guest MSRs. + */ + cpumR3InitVmxGuestMsrs(pVM, pHostVmxMsrs, pGuestFeat, pGuestVmxMsrs); +} + + +/** + * Gets the host hardware-virtualization MSRs. + * + * @returns VBox status code. + * @param pMsrs Where to store the MSRs. + */ +static int cpumR3GetHostHwvirtMsrs(PCPUMMSRS pMsrs) +{ + Assert(pMsrs); + + uint32_t fCaps = 0; + int rc = SUPR3QueryVTCaps(&fCaps); + if (RT_SUCCESS(rc)) + { + if (fCaps & (SUPVTCAPS_VT_X | SUPVTCAPS_AMD_V)) + { + SUPHWVIRTMSRS HwvirtMsrs; + rc = SUPR3GetHwvirtMsrs(&HwvirtMsrs, false /* fForceRequery */); + if (RT_SUCCESS(rc)) + { + if (fCaps & SUPVTCAPS_VT_X) + HMGetVmxMsrsFromHwvirtMsrs(&HwvirtMsrs, &pMsrs->hwvirt.vmx); + else + HMGetSvmMsrsFromHwvirtMsrs(&HwvirtMsrs, &pMsrs->hwvirt.svm); + return VINF_SUCCESS; + } + + LogRel(("CPUM: Querying hardware-virtualization MSRs failed. rc=%Rrc\n", rc)); + return rc; + } + else + { + LogRel(("CPUM: Querying hardware-virtualization capability succeeded but did not find VT-x or AMD-V\n")); + return VERR_INTERNAL_ERROR_5; + } + } + else + LogRel(("CPUM: No hardware-virtualization capability detected\n")); + + return VINF_SUCCESS; +} + + +/** + * Initializes the CPUM. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(int) CPUMR3Init(PVM pVM) +{ + LogFlow(("CPUMR3Init\n")); + + /* + * Assert alignment, sizes and tables. + */ + AssertCompileMemberAlignment(VM, cpum.s, 32); + AssertCompile(sizeof(pVM->cpum.s) <= sizeof(pVM->cpum.padding)); + AssertCompileSizeAlignment(CPUMCTX, 64); + AssertCompileSizeAlignment(CPUMCTXMSRS, 64); + AssertCompileSizeAlignment(CPUMHOSTCTX, 64); + AssertCompileMemberAlignment(VM, cpum, 64); + AssertCompileMemberAlignment(VM, aCpus, 64); + AssertCompileMemberAlignment(VMCPU, cpum.s, 64); + AssertCompileMemberSizeAlignment(VM, aCpus[0].cpum.s, 64); +#ifdef VBOX_STRICT + int rc2 = cpumR3MsrStrictInitChecks(); + AssertRCReturn(rc2, rc2); +#endif + + /* + * Initialize offsets. + */ + + /* Calculate the offset from CPUM to CPUMCPU for the first CPU. */ + pVM->cpum.s.offCPUMCPU0 = RT_UOFFSETOF(VM, aCpus[0].cpum) - RT_UOFFSETOF(VM, cpum); + Assert((uintptr_t)&pVM->cpum + pVM->cpum.s.offCPUMCPU0 == (uintptr_t)&pVM->aCpus[0].cpum); + + + /* Calculate the offset from CPUMCPU to CPUM. */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + + pVCpu->cpum.s.offCPUM = RT_UOFFSETOF_DYN(VM, aCpus[i].cpum) - RT_UOFFSETOF(VM, cpum); + Assert((uintptr_t)&pVCpu->cpum - pVCpu->cpum.s.offCPUM == (uintptr_t)&pVM->cpum); + } + + /* + * Gather info about the host CPU. + */ + if (!ASMHasCpuId()) + { + LogRel(("The CPU doesn't support CPUID!\n")); + return VERR_UNSUPPORTED_CPU; + } + + pVM->cpum.s.fHostMxCsrMask = CPUMR3DeterminHostMxCsrMask(); + + CPUMMSRS HostMsrs; + RT_ZERO(HostMsrs); + int rc = cpumR3GetHostHwvirtMsrs(&HostMsrs); + AssertLogRelRCReturn(rc, rc); + + PCPUMCPUIDLEAF paLeaves; + uint32_t cLeaves; + rc = CPUMR3CpuIdCollectLeaves(&paLeaves, &cLeaves); + AssertLogRelRCReturn(rc, rc); + + rc = cpumR3CpuIdExplodeFeatures(paLeaves, cLeaves, &HostMsrs, &pVM->cpum.s.HostFeatures); + RTMemFree(paLeaves); + AssertLogRelRCReturn(rc, rc); + pVM->cpum.s.GuestFeatures.enmCpuVendor = pVM->cpum.s.HostFeatures.enmCpuVendor; + + /* + * Check that the CPU supports the minimum features we require. + */ + if (!pVM->cpum.s.HostFeatures.fFxSaveRstor) + return VMSetError(pVM, VERR_UNSUPPORTED_CPU, RT_SRC_POS, "Host CPU does not support the FXSAVE/FXRSTOR instruction."); + if (!pVM->cpum.s.HostFeatures.fMmx) + return VMSetError(pVM, VERR_UNSUPPORTED_CPU, RT_SRC_POS, "Host CPU does not support MMX."); + if (!pVM->cpum.s.HostFeatures.fTsc) + return VMSetError(pVM, VERR_UNSUPPORTED_CPU, RT_SRC_POS, "Host CPU does not support RDTSC."); + + /* + * Setup the CR4 AND and OR masks used in the raw-mode switcher. + */ + pVM->cpum.s.CR4.AndMask = X86_CR4_OSXMMEEXCPT | X86_CR4_PVI | X86_CR4_VME; + pVM->cpum.s.CR4.OrMask = X86_CR4_OSFXSR; + + /* + * Figure out which XSAVE/XRSTOR features are available on the host. + */ + uint64_t fXcr0Host = 0; + uint64_t fXStateHostMask = 0; + if ( pVM->cpum.s.HostFeatures.fXSaveRstor + && pVM->cpum.s.HostFeatures.fOpSysXSaveRstor) + { + fXStateHostMask = fXcr0Host = ASMGetXcr0(); + fXStateHostMask &= XSAVE_C_X87 | XSAVE_C_SSE | XSAVE_C_YMM | XSAVE_C_OPMASK | XSAVE_C_ZMM_HI256 | XSAVE_C_ZMM_16HI; + AssertLogRelMsgStmt((fXStateHostMask & (XSAVE_C_X87 | XSAVE_C_SSE)) == (XSAVE_C_X87 | XSAVE_C_SSE), + ("%#llx\n", fXStateHostMask), fXStateHostMask = 0); + } + pVM->cpum.s.fXStateHostMask = fXStateHostMask; + if (VM_IS_RAW_MODE_ENABLED(pVM)) /* For raw-mode, we only use XSAVE/XRSTOR when the guest starts using it (CPUID/CR4 visibility). */ + fXStateHostMask = 0; + LogRel(("CPUM: fXStateHostMask=%#llx; initial: %#llx; host XCR0=%#llx\n", + pVM->cpum.s.fXStateHostMask, fXStateHostMask, fXcr0Host)); + + /* + * Allocate memory for the extended CPU state and initialize the host XSAVE/XRSTOR mask. + */ + uint32_t cbMaxXState = pVM->cpum.s.HostFeatures.cbMaxExtendedState; + cbMaxXState = RT_ALIGN(cbMaxXState, 128); + AssertLogRelReturn(cbMaxXState >= sizeof(X86FXSTATE) && cbMaxXState <= _8K, VERR_CPUM_IPE_2); + + uint8_t *pbXStates; + rc = MMR3HyperAllocOnceNoRelEx(pVM, cbMaxXState * 3 * pVM->cCpus, PAGE_SIZE, MM_TAG_CPUM_CTX, + MMHYPER_AONR_FLAGS_KERNEL_MAPPING, (void **)&pbXStates); + AssertLogRelRCReturn(rc, rc); + + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + + pVCpu->cpum.s.Guest.pXStateR3 = (PX86XSAVEAREA)pbXStates; + pVCpu->cpum.s.Guest.pXStateR0 = MMHyperR3ToR0(pVM, pbXStates); + pVCpu->cpum.s.Guest.pXStateRC = MMHyperR3ToR0(pVM, pbXStates); + pbXStates += cbMaxXState; + + pVCpu->cpum.s.Host.pXStateR3 = (PX86XSAVEAREA)pbXStates; + pVCpu->cpum.s.Host.pXStateR0 = MMHyperR3ToR0(pVM, pbXStates); + pVCpu->cpum.s.Host.pXStateRC = MMHyperR3ToR0(pVM, pbXStates); + pbXStates += cbMaxXState; + + pVCpu->cpum.s.Hyper.pXStateR3 = (PX86XSAVEAREA)pbXStates; + pVCpu->cpum.s.Hyper.pXStateR0 = MMHyperR3ToR0(pVM, pbXStates); + pVCpu->cpum.s.Hyper.pXStateRC = MMHyperR3ToR0(pVM, pbXStates); + pbXStates += cbMaxXState; + + pVCpu->cpum.s.Host.fXStateMask = fXStateHostMask; + } + + /* + * Register saved state data item. + */ + rc = SSMR3RegisterInternal(pVM, "cpum", 1, CPUM_SAVED_STATE_VERSION, sizeof(CPUM), + NULL, cpumR3LiveExec, NULL, + NULL, cpumR3SaveExec, NULL, + cpumR3LoadPrep, cpumR3LoadExec, cpumR3LoadDone); + if (RT_FAILURE(rc)) + return rc; + + /* + * Register info handlers and registers with the debugger facility. + */ + DBGFR3InfoRegisterInternalEx(pVM, "cpum", "Displays the all the cpu states.", + &cpumR3InfoAll, DBGFINFO_FLAGS_ALL_EMTS); + DBGFR3InfoRegisterInternalEx(pVM, "cpumguest", "Displays the guest cpu state.", + &cpumR3InfoGuest, DBGFINFO_FLAGS_ALL_EMTS); + DBGFR3InfoRegisterInternalEx(pVM, "cpumguesthwvirt", "Displays the guest hwvirt. cpu state.", + &cpumR3InfoGuestHwvirt, DBGFINFO_FLAGS_ALL_EMTS); + DBGFR3InfoRegisterInternalEx(pVM, "cpumhyper", "Displays the hypervisor cpu state.", + &cpumR3InfoHyper, DBGFINFO_FLAGS_ALL_EMTS); + DBGFR3InfoRegisterInternalEx(pVM, "cpumhost", "Displays the host cpu state.", + &cpumR3InfoHost, DBGFINFO_FLAGS_ALL_EMTS); + DBGFR3InfoRegisterInternalEx(pVM, "cpumguestinstr", "Displays the current guest instruction.", + &cpumR3InfoGuestInstr, DBGFINFO_FLAGS_ALL_EMTS); + DBGFR3InfoRegisterInternal( pVM, "cpuid", "Displays the guest cpuid leaves.", &cpumR3CpuIdInfo); + DBGFR3InfoRegisterInternal( pVM, "cpumvmxfeat", "Displays the host and guest VMX hwvirt. features.", + &cpumR3InfoVmxFeatures); + + rc = cpumR3DbgInit(pVM); + if (RT_FAILURE(rc)) + return rc; + + /* + * Check if we need to workaround partial/leaky FPU handling. + */ + cpumR3CheckLeakyFpu(pVM); + + /* + * Initialize the Guest CPUID and MSR states. + */ + rc = cpumR3InitCpuIdAndMsrs(pVM, &HostMsrs); + if (RT_FAILURE(rc)) + return rc; + + /* + * Allocate memory required by the guest hardware-virtualization structures. + * This must be done after initializing CPUID/MSR features as we access the + * the VMX/SVM guest features below. + */ + if (pVM->cpum.s.GuestFeatures.fVmx) + rc = cpumR3AllocVmxHwVirtState(pVM); + else if (pVM->cpum.s.GuestFeatures.fSvm) + rc = cpumR3AllocSvmHwVirtState(pVM); + else + Assert(pVM->aCpus[0].cpum.s.Guest.hwvirt.enmHwvirt == CPUMHWVIRT_NONE); + if (RT_FAILURE(rc)) + return rc; + + /* + * Workaround for missing cpuid(0) patches when leaf 4 returns GuestInfo.DefCpuId: + * If we miss to patch a cpuid(0).eax then Linux tries to determine the number + * of processors from (cpuid(4).eax >> 26) + 1. + * + * Note: this code is obsolete, but let's keep it here for reference. + * Purpose is valid when we artificially cap the max std id to less than 4. + * + * Note: This used to be a separate function CPUMR3SetHwVirt that was called + * after VMINITCOMPLETED_HM. + */ + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + Assert( (pVM->cpum.s.aGuestCpuIdPatmStd[4].uEax & UINT32_C(0xffffc000)) == 0 + || pVM->cpum.s.aGuestCpuIdPatmStd[0].uEax < 0x4); + pVM->cpum.s.aGuestCpuIdPatmStd[4].uEax &= UINT32_C(0x00003fff); + } + + CPUMR3Reset(pVM); + return VINF_SUCCESS; +} + + +/** + * Applies relocations to data and code managed by this + * component. This function will be called at init and + * whenever the VMM need to relocate it self inside the GC. + * + * The CPUM will update the addresses used by the switcher. + * + * @param pVM The cross context VM structure. + */ +VMMR3DECL(void) CPUMR3Relocate(PVM pVM) +{ + LogFlow(("CPUMR3Relocate\n")); + + pVM->cpum.s.GuestInfo.paMsrRangesRC = MMHyperR3ToRC(pVM, pVM->cpum.s.GuestInfo.paMsrRangesR3); + pVM->cpum.s.GuestInfo.paCpuIdLeavesRC = MMHyperR3ToRC(pVM, pVM->cpum.s.GuestInfo.paCpuIdLeavesR3); + + for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[iCpu]; + pVCpu->cpum.s.Guest.pXStateRC = MMHyperR3ToRC(pVM, pVCpu->cpum.s.Guest.pXStateR3); + pVCpu->cpum.s.Host.pXStateRC = MMHyperR3ToRC(pVM, pVCpu->cpum.s.Host.pXStateR3); + pVCpu->cpum.s.Hyper.pXStateRC = MMHyperR3ToRC(pVM, pVCpu->cpum.s.Hyper.pXStateR3); /** @todo remove me */ + + /* Recheck the guest DRx values in raw-mode. */ + CPUMRecalcHyperDRx(pVCpu, UINT8_MAX, false); + } +} + + +/** + * Terminates the CPUM. + * + * Termination means cleaning up and freeing all resources, + * the VM it self is at this point powered off or suspended. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(int) CPUMR3Term(PVM pVM) +{ +#ifdef VBOX_WITH_CRASHDUMP_MAGIC + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + PCPUMCTX pCtx = CPUMQueryGuestCtxPtr(pVCpu); + + memset(pVCpu->cpum.s.aMagic, 0, sizeof(pVCpu->cpum.s.aMagic)); + pVCpu->cpum.s.uMagic = 0; + pCtx->dr[5] = 0; + } +#endif + + if (pVM->cpum.s.GuestFeatures.fVmx) + cpumR3FreeVmxHwVirtState(pVM); + else if (pVM->cpum.s.GuestFeatures.fSvm) + cpumR3FreeSvmHwVirtState(pVM); + return VINF_SUCCESS; +} + + +/** + * Resets a virtual CPU. + * + * Used by CPUMR3Reset and CPU hot plugging. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the CPU that is + * being reset. This may differ from the current EMT. + */ +VMMR3DECL(void) CPUMR3ResetCpu(PVM pVM, PVMCPU pVCpu) +{ + /** @todo anything different for VCPU > 0? */ + PCPUMCTX pCtx = &pVCpu->cpum.s.Guest; + + /* + * Initialize everything to ZERO first. + */ + uint32_t fUseFlags = pVCpu->cpum.s.fUseFlags & ~CPUM_USED_FPU_SINCE_REM; + + AssertCompile(RTASSERT_OFFSET_OF(CPUMCTX, pXStateR0) < RTASSERT_OFFSET_OF(CPUMCTX, pXStateR3)); + AssertCompile(RTASSERT_OFFSET_OF(CPUMCTX, pXStateR0) < RTASSERT_OFFSET_OF(CPUMCTX, pXStateRC)); + memset(pCtx, 0, RT_UOFFSETOF(CPUMCTX, pXStateR0)); + + pVCpu->cpum.s.fUseFlags = fUseFlags; + + pCtx->cr0 = X86_CR0_CD | X86_CR0_NW | X86_CR0_ET; //0x60000010 + pCtx->eip = 0x0000fff0; + pCtx->edx = 0x00000600; /* P6 processor */ + pCtx->eflags.Bits.u1Reserved0 = 1; + + pCtx->cs.Sel = 0xf000; + pCtx->cs.ValidSel = 0xf000; + pCtx->cs.fFlags = CPUMSELREG_FLAGS_VALID; + pCtx->cs.u64Base = UINT64_C(0xffff0000); + pCtx->cs.u32Limit = 0x0000ffff; + pCtx->cs.Attr.n.u1DescType = 1; /* code/data segment */ + pCtx->cs.Attr.n.u1Present = 1; + pCtx->cs.Attr.n.u4Type = X86_SEL_TYPE_ER_ACC; + + pCtx->ds.fFlags = CPUMSELREG_FLAGS_VALID; + pCtx->ds.u32Limit = 0x0000ffff; + pCtx->ds.Attr.n.u1DescType = 1; /* code/data segment */ + pCtx->ds.Attr.n.u1Present = 1; + pCtx->ds.Attr.n.u4Type = X86_SEL_TYPE_RW_ACC; + + pCtx->es.fFlags = CPUMSELREG_FLAGS_VALID; + pCtx->es.u32Limit = 0x0000ffff; + pCtx->es.Attr.n.u1DescType = 1; /* code/data segment */ + pCtx->es.Attr.n.u1Present = 1; + pCtx->es.Attr.n.u4Type = X86_SEL_TYPE_RW_ACC; + + pCtx->fs.fFlags = CPUMSELREG_FLAGS_VALID; + pCtx->fs.u32Limit = 0x0000ffff; + pCtx->fs.Attr.n.u1DescType = 1; /* code/data segment */ + pCtx->fs.Attr.n.u1Present = 1; + pCtx->fs.Attr.n.u4Type = X86_SEL_TYPE_RW_ACC; + + pCtx->gs.fFlags = CPUMSELREG_FLAGS_VALID; + pCtx->gs.u32Limit = 0x0000ffff; + pCtx->gs.Attr.n.u1DescType = 1; /* code/data segment */ + pCtx->gs.Attr.n.u1Present = 1; + pCtx->gs.Attr.n.u4Type = X86_SEL_TYPE_RW_ACC; + + pCtx->ss.fFlags = CPUMSELREG_FLAGS_VALID; + pCtx->ss.u32Limit = 0x0000ffff; + pCtx->ss.Attr.n.u1Present = 1; + pCtx->ss.Attr.n.u1DescType = 1; /* code/data segment */ + pCtx->ss.Attr.n.u4Type = X86_SEL_TYPE_RW_ACC; + + pCtx->idtr.cbIdt = 0xffff; + pCtx->gdtr.cbGdt = 0xffff; + + pCtx->ldtr.fFlags = CPUMSELREG_FLAGS_VALID; + pCtx->ldtr.u32Limit = 0xffff; + pCtx->ldtr.Attr.n.u1Present = 1; + pCtx->ldtr.Attr.n.u4Type = X86_SEL_TYPE_SYS_LDT; + + pCtx->tr.fFlags = CPUMSELREG_FLAGS_VALID; + pCtx->tr.u32Limit = 0xffff; + pCtx->tr.Attr.n.u1Present = 1; + pCtx->tr.Attr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY; /* Deduction, not properly documented by Intel. */ + + pCtx->dr[6] = X86_DR6_INIT_VAL; + pCtx->dr[7] = X86_DR7_INIT_VAL; + + PX86FXSTATE pFpuCtx = &pCtx->pXStateR3->x87; AssertReleaseMsg(RT_VALID_PTR(pFpuCtx), ("%p\n", pFpuCtx)); + pFpuCtx->FTW = 0x00; /* All empty (abbridged tag reg edition). */ + pFpuCtx->FCW = 0x37f; + + /* Intel 64 and IA-32 Architectures Software Developer's Manual Volume 3A, Table 8-1. + IA-32 Processor States Following Power-up, Reset, or INIT */ + pFpuCtx->MXCSR = 0x1F80; + pFpuCtx->MXCSR_MASK = pVM->cpum.s.GuestInfo.fMxCsrMask; /** @todo check if REM messes this up... */ + + pCtx->aXcr[0] = XSAVE_C_X87; + if (pVM->cpum.s.HostFeatures.cbMaxExtendedState >= RT_UOFFSETOF(X86XSAVEAREA, Hdr)) + { + /* The entire FXSAVE state needs loading when we switch to XSAVE/XRSTOR + as we don't know what happened before. (Bother optimize later?) */ + pCtx->pXStateR3->Hdr.bmXState = XSAVE_C_X87 | XSAVE_C_SSE; + } + + /* + * MSRs. + */ + /* Init PAT MSR */ + pCtx->msrPAT = MSR_IA32_CR_PAT_INIT_VAL; + + /* EFER MBZ; see AMD64 Architecture Programmer's Manual Volume 2: Table 14-1. Initial Processor State. + * The Intel docs don't mention it. */ + Assert(!pCtx->msrEFER); + + /* IA32_MISC_ENABLE - not entirely sure what the init/reset state really + is supposed to be here, just trying provide useful/sensible values. */ + PCPUMMSRRANGE pRange = cpumLookupMsrRange(pVM, MSR_IA32_MISC_ENABLE); + if (pRange) + { + pVCpu->cpum.s.GuestMsrs.msr.MiscEnable = MSR_IA32_MISC_ENABLE_BTS_UNAVAIL + | MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL + | (pVM->cpum.s.GuestFeatures.fMonitorMWait ? MSR_IA32_MISC_ENABLE_MONITOR : 0) + | MSR_IA32_MISC_ENABLE_FAST_STRINGS; + pRange->fWrIgnMask |= MSR_IA32_MISC_ENABLE_BTS_UNAVAIL + | MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL; + pRange->fWrGpMask &= ~pVCpu->cpum.s.GuestMsrs.msr.MiscEnable; + } + + /** @todo Wire IA32_MISC_ENABLE bit 22 to our NT 4 CPUID trick. */ + + /** @todo r=ramshankar: Currently broken for SMP as TMCpuTickSet() expects to be + * called from each EMT while we're getting called by CPUMR3Reset() + * iteratively on the same thread. Fix later. */ +#if 0 /** @todo r=bird: This we will do in TM, not here. */ + /* TSC must be 0. Intel spec. Table 9-1. "IA-32 Processor States Following Power-up, Reset, or INIT." */ + CPUMSetGuestMsr(pVCpu, MSR_IA32_TSC, 0); +#endif + + + /* C-state control. Guesses. */ + pVCpu->cpum.s.GuestMsrs.msr.PkgCStateCfgCtrl = 1 /*C1*/ | RT_BIT_32(25) | RT_BIT_32(26) | RT_BIT_32(27) | RT_BIT_32(28); + /* For Nehalem+ and Atoms, the 0xE2 MSR (MSR_PKG_CST_CONFIG_CONTROL) is documented. For Core 2, + * it's undocumented but exists as MSR_PMG_CST_CONFIG_CONTROL and has similar but not identical + * functionality. The default value must be different due to incompatible write mask. + */ + if (CPUMMICROARCH_IS_INTEL_CORE2(pVM->cpum.s.GuestFeatures.enmMicroarch)) + pVCpu->cpum.s.GuestMsrs.msr.PkgCStateCfgCtrl = 0x202a01; /* From Mac Pro Harpertown, unlocked. */ + else if (pVM->cpum.s.GuestFeatures.enmMicroarch == kCpumMicroarch_Intel_Core_Yonah) + pVCpu->cpum.s.GuestMsrs.msr.PkgCStateCfgCtrl = 0x26740c; /* From MacBookPro1,1. */ + + /* + * Hardware virtualization state. + */ + CPUMSetGuestGif(pCtx, true); + Assert(!pVM->cpum.s.GuestFeatures.fVmx || !pVM->cpum.s.GuestFeatures.fSvm); /* Paranoia. */ + if (pVM->cpum.s.GuestFeatures.fVmx) + cpumR3ResetVmxHwVirtState(pVCpu); + else if (pVM->cpum.s.GuestFeatures.fSvm) + cpumR3ResetSvmHwVirtState(pVCpu); +} + + +/** + * Resets the CPU. + * + * @returns VINF_SUCCESS. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(void) CPUMR3Reset(PVM pVM) +{ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + CPUMR3ResetCpu(pVM, &pVM->aCpus[i]); + +#ifdef VBOX_WITH_CRASHDUMP_MAGIC + PCPUMCTX pCtx = &pVM->aCpus[i].cpum.s.Guest; + + /* Magic marker for searching in crash dumps. */ + strcpy((char *)pVM->aCpus[i].cpum.s.aMagic, "CPUMCPU Magic"); + pVM->aCpus[i].cpum.s.uMagic = UINT64_C(0xDEADBEEFDEADBEEF); + pCtx->dr[5] = UINT64_C(0xDEADBEEFDEADBEEF); +#endif + } +} + + + + +/** + * Pass 0 live exec callback. + * + * @returns VINF_SSM_DONT_CALL_AGAIN. + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + * @param uPass The pass (0). + */ +static DECLCALLBACK(int) cpumR3LiveExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uPass) +{ + AssertReturn(uPass == 0, VERR_SSM_UNEXPECTED_PASS); + cpumR3SaveCpuId(pVM, pSSM); + return VINF_SSM_DONT_CALL_AGAIN; +} + + +/** + * Execute state save operation. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM SSM operation handle. + */ +static DECLCALLBACK(int) cpumR3SaveExec(PVM pVM, PSSMHANDLE pSSM) +{ + /* + * Save. + */ + SSMR3PutU32(pSSM, pVM->cCpus); + SSMR3PutU32(pSSM, sizeof(pVM->aCpus[0].cpum.s.GuestMsrs.msr)); + for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[iCpu]; + + SSMR3PutStructEx(pSSM, &pVCpu->cpum.s.Hyper, sizeof(pVCpu->cpum.s.Hyper), 0, g_aCpumCtxFields, NULL); + + PCPUMCTX pGstCtx = &pVCpu->cpum.s.Guest; + SSMR3PutStructEx(pSSM, pGstCtx, sizeof(*pGstCtx), 0, g_aCpumCtxFields, NULL); + SSMR3PutStructEx(pSSM, &pGstCtx->pXStateR3->x87, sizeof(pGstCtx->pXStateR3->x87), 0, g_aCpumX87Fields, NULL); + if (pGstCtx->fXStateMask != 0) + SSMR3PutStructEx(pSSM, &pGstCtx->pXStateR3->Hdr, sizeof(pGstCtx->pXStateR3->Hdr), 0, g_aCpumXSaveHdrFields, NULL); + if (pGstCtx->fXStateMask & XSAVE_C_YMM) + { + PCX86XSAVEYMMHI pYmmHiCtx = CPUMCTX_XSAVE_C_PTR(pGstCtx, XSAVE_C_YMM_BIT, PCX86XSAVEYMMHI); + SSMR3PutStructEx(pSSM, pYmmHiCtx, sizeof(*pYmmHiCtx), SSMSTRUCT_FLAGS_FULL_STRUCT, g_aCpumYmmHiFields, NULL); + } + if (pGstCtx->fXStateMask & XSAVE_C_BNDREGS) + { + PCX86XSAVEBNDREGS pBndRegs = CPUMCTX_XSAVE_C_PTR(pGstCtx, XSAVE_C_BNDREGS_BIT, PCX86XSAVEBNDREGS); + SSMR3PutStructEx(pSSM, pBndRegs, sizeof(*pBndRegs), SSMSTRUCT_FLAGS_FULL_STRUCT, g_aCpumBndRegsFields, NULL); + } + if (pGstCtx->fXStateMask & XSAVE_C_BNDCSR) + { + PCX86XSAVEBNDCFG pBndCfg = CPUMCTX_XSAVE_C_PTR(pGstCtx, XSAVE_C_BNDCSR_BIT, PCX86XSAVEBNDCFG); + SSMR3PutStructEx(pSSM, pBndCfg, sizeof(*pBndCfg), SSMSTRUCT_FLAGS_FULL_STRUCT, g_aCpumBndCfgFields, NULL); + } + if (pGstCtx->fXStateMask & XSAVE_C_ZMM_HI256) + { + PCX86XSAVEZMMHI256 pZmmHi256 = CPUMCTX_XSAVE_C_PTR(pGstCtx, XSAVE_C_ZMM_HI256_BIT, PCX86XSAVEZMMHI256); + SSMR3PutStructEx(pSSM, pZmmHi256, sizeof(*pZmmHi256), SSMSTRUCT_FLAGS_FULL_STRUCT, g_aCpumZmmHi256Fields, NULL); + } + if (pGstCtx->fXStateMask & XSAVE_C_ZMM_16HI) + { + PCX86XSAVEZMM16HI pZmm16Hi = CPUMCTX_XSAVE_C_PTR(pGstCtx, XSAVE_C_ZMM_16HI_BIT, PCX86XSAVEZMM16HI); + SSMR3PutStructEx(pSSM, pZmm16Hi, sizeof(*pZmm16Hi), SSMSTRUCT_FLAGS_FULL_STRUCT, g_aCpumZmm16HiFields, NULL); + } + if (pVM->cpum.s.GuestFeatures.fSvm) + { + Assert(pGstCtx->hwvirt.svm.CTX_SUFF(pVmcb)); + SSMR3PutU64(pSSM, pGstCtx->hwvirt.svm.uMsrHSavePa); + SSMR3PutGCPhys(pSSM, pGstCtx->hwvirt.svm.GCPhysVmcb); + SSMR3PutU64(pSSM, pGstCtx->hwvirt.svm.uPrevPauseTick); + SSMR3PutU16(pSSM, pGstCtx->hwvirt.svm.cPauseFilter); + SSMR3PutU16(pSSM, pGstCtx->hwvirt.svm.cPauseFilterThreshold); + SSMR3PutBool(pSSM, pGstCtx->hwvirt.svm.fInterceptEvents); + SSMR3PutStructEx(pSSM, &pGstCtx->hwvirt.svm.HostState, sizeof(pGstCtx->hwvirt.svm.HostState), 0 /* fFlags */, + g_aSvmHwvirtHostState, NULL /* pvUser */); + SSMR3PutMem(pSSM, pGstCtx->hwvirt.svm.pVmcbR3, SVM_VMCB_PAGES << X86_PAGE_4K_SHIFT); + SSMR3PutMem(pSSM, pGstCtx->hwvirt.svm.pvMsrBitmapR3, SVM_MSRPM_PAGES << X86_PAGE_4K_SHIFT); + SSMR3PutMem(pSSM, pGstCtx->hwvirt.svm.pvIoBitmapR3, SVM_IOPM_PAGES << X86_PAGE_4K_SHIFT); + SSMR3PutU32(pSSM, pGstCtx->hwvirt.fLocalForcedActions); + SSMR3PutBool(pSSM, pGstCtx->hwvirt.fGif); + } + SSMR3PutU32(pSSM, pVCpu->cpum.s.fUseFlags); + SSMR3PutU32(pSSM, pVCpu->cpum.s.fChanged); + AssertCompileSizeAlignment(pVCpu->cpum.s.GuestMsrs.msr, sizeof(uint64_t)); + SSMR3PutMem(pSSM, &pVCpu->cpum.s.GuestMsrs, sizeof(pVCpu->cpum.s.GuestMsrs.msr)); + } + + cpumR3SaveCpuId(pVM, pSSM); + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{FNSSMINTLOADPREP} + */ +static DECLCALLBACK(int) cpumR3LoadPrep(PVM pVM, PSSMHANDLE pSSM) +{ + NOREF(pSSM); + pVM->cpum.s.fPendingRestore = true; + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{FNSSMINTLOADEXEC} + */ +static DECLCALLBACK(int) cpumR3LoadExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + int rc; /* Only for AssertRCReturn use. */ + + /* + * Validate version. + */ + if ( uVersion != CPUM_SAVED_STATE_VERSION_HWVIRT_SVM + && uVersion != CPUM_SAVED_STATE_VERSION_XSAVE + && uVersion != CPUM_SAVED_STATE_VERSION_GOOD_CPUID_COUNT + && uVersion != CPUM_SAVED_STATE_VERSION_BAD_CPUID_COUNT + && uVersion != CPUM_SAVED_STATE_VERSION_PUT_STRUCT + && uVersion != CPUM_SAVED_STATE_VERSION_MEM + && uVersion != CPUM_SAVED_STATE_VERSION_NO_MSR_SIZE + && uVersion != CPUM_SAVED_STATE_VERSION_VER3_2 + && uVersion != CPUM_SAVED_STATE_VERSION_VER3_0 + && uVersion != CPUM_SAVED_STATE_VERSION_VER2_1_NOMSR + && uVersion != CPUM_SAVED_STATE_VERSION_VER2_0 + && uVersion != CPUM_SAVED_STATE_VERSION_VER1_6) + { + AssertMsgFailed(("cpumR3LoadExec: Invalid version uVersion=%d!\n", uVersion)); + return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION; + } + + if (uPass == SSM_PASS_FINAL) + { + /* + * Set the size of RTGCPTR for SSMR3GetGCPtr. (Only necessary for + * really old SSM file versions.) + */ + if (uVersion == CPUM_SAVED_STATE_VERSION_VER1_6) + SSMR3HandleSetGCPtrSize(pSSM, sizeof(RTGCPTR32)); + else if (uVersion <= CPUM_SAVED_STATE_VERSION_VER3_0) + SSMR3HandleSetGCPtrSize(pSSM, HC_ARCH_BITS == 32 ? sizeof(RTGCPTR32) : sizeof(RTGCPTR)); + + /* + * Figure x86 and ctx field definitions to use for older states. + */ + uint32_t const fLoad = uVersion > CPUM_SAVED_STATE_VERSION_MEM ? 0 : SSMSTRUCT_FLAGS_MEM_BAND_AID_RELAXED; + PCSSMFIELD paCpumCtx1Fields = g_aCpumX87Fields; + PCSSMFIELD paCpumCtx2Fields = g_aCpumCtxFields; + if (uVersion == CPUM_SAVED_STATE_VERSION_VER1_6) + { + paCpumCtx1Fields = g_aCpumX87FieldsV16; + paCpumCtx2Fields = g_aCpumCtxFieldsV16; + } + else if (uVersion <= CPUM_SAVED_STATE_VERSION_MEM) + { + paCpumCtx1Fields = g_aCpumX87FieldsMem; + paCpumCtx2Fields = g_aCpumCtxFieldsMem; + } + + /* + * The hyper state used to preceed the CPU count. Starting with + * XSAVE it was moved down till after we've got the count. + */ + if (uVersion < CPUM_SAVED_STATE_VERSION_XSAVE) + { + for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[iCpu]; + X86FXSTATE Ign; + SSMR3GetStructEx(pSSM, &Ign, sizeof(Ign), fLoad | SSMSTRUCT_FLAGS_NO_TAIL_MARKER, paCpumCtx1Fields, NULL); + uint64_t uCR3 = pVCpu->cpum.s.Hyper.cr3; + uint64_t uRSP = pVCpu->cpum.s.Hyper.rsp; /* see VMMR3Relocate(). */ + SSMR3GetStructEx(pSSM, &pVCpu->cpum.s.Hyper, sizeof(pVCpu->cpum.s.Hyper), + fLoad | SSMSTRUCT_FLAGS_NO_LEAD_MARKER, paCpumCtx2Fields, NULL); + pVCpu->cpum.s.Hyper.cr3 = uCR3; + pVCpu->cpum.s.Hyper.rsp = uRSP; + } + } + + if (uVersion >= CPUM_SAVED_STATE_VERSION_VER2_1_NOMSR) + { + uint32_t cCpus; + rc = SSMR3GetU32(pSSM, &cCpus); AssertRCReturn(rc, rc); + AssertLogRelMsgReturn(cCpus == pVM->cCpus, ("Mismatching CPU counts: saved: %u; configured: %u \n", cCpus, pVM->cCpus), + VERR_SSM_UNEXPECTED_DATA); + } + AssertLogRelMsgReturn( uVersion > CPUM_SAVED_STATE_VERSION_VER2_0 + || pVM->cCpus == 1, + ("cCpus=%u\n", pVM->cCpus), + VERR_SSM_UNEXPECTED_DATA); + + uint32_t cbMsrs = 0; + if (uVersion > CPUM_SAVED_STATE_VERSION_NO_MSR_SIZE) + { + rc = SSMR3GetU32(pSSM, &cbMsrs); AssertRCReturn(rc, rc); + AssertLogRelMsgReturn(RT_ALIGN(cbMsrs, sizeof(uint64_t)) == cbMsrs, ("Size of MSRs is misaligned: %#x\n", cbMsrs), + VERR_SSM_UNEXPECTED_DATA); + AssertLogRelMsgReturn(cbMsrs <= sizeof(CPUMCTXMSRS) && cbMsrs > 0, ("Size of MSRs is out of range: %#x\n", cbMsrs), + VERR_SSM_UNEXPECTED_DATA); + } + + /* + * Do the per-CPU restoring. + */ + for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[iCpu]; + PCPUMCTX pGstCtx = &pVCpu->cpum.s.Guest; + + if (uVersion >= CPUM_SAVED_STATE_VERSION_XSAVE) + { + /* + * The XSAVE saved state layout moved the hyper state down here. + */ + uint64_t uCR3 = pVCpu->cpum.s.Hyper.cr3; + uint64_t uRSP = pVCpu->cpum.s.Hyper.rsp; /* see VMMR3Relocate(). */ + rc = SSMR3GetStructEx(pSSM, &pVCpu->cpum.s.Hyper, sizeof(pVCpu->cpum.s.Hyper), 0, g_aCpumCtxFields, NULL); + pVCpu->cpum.s.Hyper.cr3 = uCR3; + pVCpu->cpum.s.Hyper.rsp = uRSP; + AssertRCReturn(rc, rc); + + /* + * Start by restoring the CPUMCTX structure and the X86FXSAVE bits of the extended state. + */ + rc = SSMR3GetStructEx(pSSM, pGstCtx, sizeof(*pGstCtx), 0, g_aCpumCtxFields, NULL); + rc = SSMR3GetStructEx(pSSM, &pGstCtx->pXStateR3->x87, sizeof(pGstCtx->pXStateR3->x87), 0, g_aCpumX87Fields, NULL); + AssertRCReturn(rc, rc); + + /* Check that the xsave/xrstor mask is valid (invalid results in #GP). */ + if (pGstCtx->fXStateMask != 0) + { + AssertLogRelMsgReturn(!(pGstCtx->fXStateMask & ~pVM->cpum.s.fXStateGuestMask), + ("fXStateMask=%#RX64 fXStateGuestMask=%#RX64\n", + pGstCtx->fXStateMask, pVM->cpum.s.fXStateGuestMask), + VERR_CPUM_INCOMPATIBLE_XSAVE_COMP_MASK); + AssertLogRelMsgReturn(pGstCtx->fXStateMask & XSAVE_C_X87, + ("fXStateMask=%#RX64\n", pGstCtx->fXStateMask), VERR_CPUM_INVALID_XSAVE_COMP_MASK); + AssertLogRelMsgReturn((pGstCtx->fXStateMask & (XSAVE_C_SSE | XSAVE_C_YMM)) != XSAVE_C_YMM, + ("fXStateMask=%#RX64\n", pGstCtx->fXStateMask), VERR_CPUM_INVALID_XSAVE_COMP_MASK); + AssertLogRelMsgReturn( (pGstCtx->fXStateMask & (XSAVE_C_OPMASK | XSAVE_C_ZMM_HI256 | XSAVE_C_ZMM_16HI)) == 0 + || (pGstCtx->fXStateMask & (XSAVE_C_SSE | XSAVE_C_YMM | XSAVE_C_OPMASK | XSAVE_C_ZMM_HI256 | XSAVE_C_ZMM_16HI)) + == (XSAVE_C_SSE | XSAVE_C_YMM | XSAVE_C_OPMASK | XSAVE_C_ZMM_HI256 | XSAVE_C_ZMM_16HI), + ("fXStateMask=%#RX64\n", pGstCtx->fXStateMask), VERR_CPUM_INVALID_XSAVE_COMP_MASK); + } + + /* Check that the XCR0 mask is valid (invalid results in #GP). */ + AssertLogRelMsgReturn(pGstCtx->aXcr[0] & XSAVE_C_X87, ("xcr0=%#RX64\n", pGstCtx->aXcr[0]), VERR_CPUM_INVALID_XCR0); + if (pGstCtx->aXcr[0] != XSAVE_C_X87) + { + AssertLogRelMsgReturn(!(pGstCtx->aXcr[0] & ~(pGstCtx->fXStateMask | XSAVE_C_X87)), + ("xcr0=%#RX64 fXStateMask=%#RX64\n", pGstCtx->aXcr[0], pGstCtx->fXStateMask), + VERR_CPUM_INVALID_XCR0); + AssertLogRelMsgReturn(pGstCtx->aXcr[0] & XSAVE_C_X87, + ("xcr0=%#RX64\n", pGstCtx->aXcr[0]), VERR_CPUM_INVALID_XSAVE_COMP_MASK); + AssertLogRelMsgReturn((pGstCtx->aXcr[0] & (XSAVE_C_SSE | XSAVE_C_YMM)) != XSAVE_C_YMM, + ("xcr0=%#RX64\n", pGstCtx->aXcr[0]), VERR_CPUM_INVALID_XSAVE_COMP_MASK); + AssertLogRelMsgReturn( (pGstCtx->aXcr[0] & (XSAVE_C_OPMASK | XSAVE_C_ZMM_HI256 | XSAVE_C_ZMM_16HI)) == 0 + || (pGstCtx->aXcr[0] & (XSAVE_C_SSE | XSAVE_C_YMM | XSAVE_C_OPMASK | XSAVE_C_ZMM_HI256 | XSAVE_C_ZMM_16HI)) + == (XSAVE_C_SSE | XSAVE_C_YMM | XSAVE_C_OPMASK | XSAVE_C_ZMM_HI256 | XSAVE_C_ZMM_16HI), + ("xcr0=%#RX64\n", pGstCtx->aXcr[0]), VERR_CPUM_INVALID_XSAVE_COMP_MASK); + } + + /* Check that the XCR1 is zero, as we don't implement it yet. */ + AssertLogRelMsgReturn(!pGstCtx->aXcr[1], ("xcr1=%#RX64\n", pGstCtx->aXcr[1]), VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + + /* + * Restore the individual extended state components we support. + */ + if (pGstCtx->fXStateMask != 0) + { + rc = SSMR3GetStructEx(pSSM, &pGstCtx->pXStateR3->Hdr, sizeof(pGstCtx->pXStateR3->Hdr), + 0, g_aCpumXSaveHdrFields, NULL); + AssertRCReturn(rc, rc); + AssertLogRelMsgReturn(!(pGstCtx->pXStateR3->Hdr.bmXState & ~pGstCtx->fXStateMask), + ("bmXState=%#RX64 fXStateMask=%#RX64\n", + pGstCtx->pXStateR3->Hdr.bmXState, pGstCtx->fXStateMask), + VERR_CPUM_INVALID_XSAVE_HDR); + } + if (pGstCtx->fXStateMask & XSAVE_C_YMM) + { + PX86XSAVEYMMHI pYmmHiCtx = CPUMCTX_XSAVE_C_PTR(pGstCtx, XSAVE_C_YMM_BIT, PX86XSAVEYMMHI); + SSMR3GetStructEx(pSSM, pYmmHiCtx, sizeof(*pYmmHiCtx), SSMSTRUCT_FLAGS_FULL_STRUCT, g_aCpumYmmHiFields, NULL); + } + if (pGstCtx->fXStateMask & XSAVE_C_BNDREGS) + { + PX86XSAVEBNDREGS pBndRegs = CPUMCTX_XSAVE_C_PTR(pGstCtx, XSAVE_C_BNDREGS_BIT, PX86XSAVEBNDREGS); + SSMR3GetStructEx(pSSM, pBndRegs, sizeof(*pBndRegs), SSMSTRUCT_FLAGS_FULL_STRUCT, g_aCpumBndRegsFields, NULL); + } + if (pGstCtx->fXStateMask & XSAVE_C_BNDCSR) + { + PX86XSAVEBNDCFG pBndCfg = CPUMCTX_XSAVE_C_PTR(pGstCtx, XSAVE_C_BNDCSR_BIT, PX86XSAVEBNDCFG); + SSMR3GetStructEx(pSSM, pBndCfg, sizeof(*pBndCfg), SSMSTRUCT_FLAGS_FULL_STRUCT, g_aCpumBndCfgFields, NULL); + } + if (pGstCtx->fXStateMask & XSAVE_C_ZMM_HI256) + { + PX86XSAVEZMMHI256 pZmmHi256 = CPUMCTX_XSAVE_C_PTR(pGstCtx, XSAVE_C_ZMM_HI256_BIT, PX86XSAVEZMMHI256); + SSMR3GetStructEx(pSSM, pZmmHi256, sizeof(*pZmmHi256), SSMSTRUCT_FLAGS_FULL_STRUCT, g_aCpumZmmHi256Fields, NULL); + } + if (pGstCtx->fXStateMask & XSAVE_C_ZMM_16HI) + { + PX86XSAVEZMM16HI pZmm16Hi = CPUMCTX_XSAVE_C_PTR(pGstCtx, XSAVE_C_ZMM_16HI_BIT, PX86XSAVEZMM16HI); + SSMR3GetStructEx(pSSM, pZmm16Hi, sizeof(*pZmm16Hi), SSMSTRUCT_FLAGS_FULL_STRUCT, g_aCpumZmm16HiFields, NULL); + } + if (uVersion >= CPUM_SAVED_STATE_VERSION_HWVIRT_SVM) + { + if (pVM->cpum.s.GuestFeatures.fSvm) + { + Assert(pGstCtx->hwvirt.svm.CTX_SUFF(pVmcb)); + SSMR3GetU64(pSSM, &pGstCtx->hwvirt.svm.uMsrHSavePa); + SSMR3GetGCPhys(pSSM, &pGstCtx->hwvirt.svm.GCPhysVmcb); + SSMR3GetU64(pSSM, &pGstCtx->hwvirt.svm.uPrevPauseTick); + SSMR3GetU16(pSSM, &pGstCtx->hwvirt.svm.cPauseFilter); + SSMR3GetU16(pSSM, &pGstCtx->hwvirt.svm.cPauseFilterThreshold); + SSMR3GetBool(pSSM, &pGstCtx->hwvirt.svm.fInterceptEvents); + SSMR3GetStructEx(pSSM, &pGstCtx->hwvirt.svm.HostState, sizeof(pGstCtx->hwvirt.svm.HostState), + 0 /* fFlags */, g_aSvmHwvirtHostState, NULL /* pvUser */); + SSMR3GetMem(pSSM, pGstCtx->hwvirt.svm.pVmcbR3, SVM_VMCB_PAGES << X86_PAGE_4K_SHIFT); + SSMR3GetMem(pSSM, pGstCtx->hwvirt.svm.pvMsrBitmapR3, SVM_MSRPM_PAGES << X86_PAGE_4K_SHIFT); + SSMR3GetMem(pSSM, pGstCtx->hwvirt.svm.pvIoBitmapR3, SVM_IOPM_PAGES << X86_PAGE_4K_SHIFT); + SSMR3GetU32(pSSM, &pGstCtx->hwvirt.fLocalForcedActions); + SSMR3GetBool(pSSM, &pGstCtx->hwvirt.fGif); + } + } + /** @todo NSTVMX: Load VMX state. */ + } + else + { + /* + * Pre XSAVE saved state. + */ + SSMR3GetStructEx(pSSM, &pGstCtx->pXStateR3->x87, sizeof(pGstCtx->pXStateR3->x87), + fLoad | SSMSTRUCT_FLAGS_NO_TAIL_MARKER, paCpumCtx1Fields, NULL); + SSMR3GetStructEx(pSSM, pGstCtx, sizeof(*pGstCtx), fLoad | SSMSTRUCT_FLAGS_NO_LEAD_MARKER, paCpumCtx2Fields, NULL); + } + + /* + * Restore a couple of flags and the MSRs. + */ + SSMR3GetU32(pSSM, &pVCpu->cpum.s.fUseFlags); + SSMR3GetU32(pSSM, &pVCpu->cpum.s.fChanged); + + rc = VINF_SUCCESS; + if (uVersion > CPUM_SAVED_STATE_VERSION_NO_MSR_SIZE) + rc = SSMR3GetMem(pSSM, &pVCpu->cpum.s.GuestMsrs.au64[0], cbMsrs); + else if (uVersion >= CPUM_SAVED_STATE_VERSION_VER3_0) + { + SSMR3GetMem(pSSM, &pVCpu->cpum.s.GuestMsrs.au64[0], 2 * sizeof(uint64_t)); /* Restore two MSRs. */ + rc = SSMR3Skip(pSSM, 62 * sizeof(uint64_t)); + } + AssertRCReturn(rc, rc); + + /* REM and other may have cleared must-be-one fields in DR6 and + DR7, fix these. */ + pGstCtx->dr[6] &= ~(X86_DR6_RAZ_MASK | X86_DR6_MBZ_MASK); + pGstCtx->dr[6] |= X86_DR6_RA1_MASK; + pGstCtx->dr[7] &= ~(X86_DR7_RAZ_MASK | X86_DR7_MBZ_MASK); + pGstCtx->dr[7] |= X86_DR7_RA1_MASK; + } + + /* Older states does not have the internal selector register flags + and valid selector value. Supply those. */ + if (uVersion <= CPUM_SAVED_STATE_VERSION_MEM) + { + for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[iCpu]; + bool const fValid = !VM_IS_RAW_MODE_ENABLED(pVM) + || ( uVersion > CPUM_SAVED_STATE_VERSION_VER3_2 + && !(pVCpu->cpum.s.fChanged & CPUM_CHANGED_HIDDEN_SEL_REGS_INVALID)); + PCPUMSELREG paSelReg = CPUMCTX_FIRST_SREG(&pVCpu->cpum.s.Guest); + if (fValid) + { + for (uint32_t iSelReg = 0; iSelReg < X86_SREG_COUNT; iSelReg++) + { + paSelReg[iSelReg].fFlags = CPUMSELREG_FLAGS_VALID; + paSelReg[iSelReg].ValidSel = paSelReg[iSelReg].Sel; + } + + pVCpu->cpum.s.Guest.ldtr.fFlags = CPUMSELREG_FLAGS_VALID; + pVCpu->cpum.s.Guest.ldtr.ValidSel = pVCpu->cpum.s.Guest.ldtr.Sel; + } + else + { + for (uint32_t iSelReg = 0; iSelReg < X86_SREG_COUNT; iSelReg++) + { + paSelReg[iSelReg].fFlags = 0; + paSelReg[iSelReg].ValidSel = 0; + } + + /* This might not be 104% correct, but I think it's close + enough for all practical purposes... (REM always loaded + LDTR registers.) */ + pVCpu->cpum.s.Guest.ldtr.fFlags = CPUMSELREG_FLAGS_VALID; + pVCpu->cpum.s.Guest.ldtr.ValidSel = pVCpu->cpum.s.Guest.ldtr.Sel; + } + pVCpu->cpum.s.Guest.tr.fFlags = CPUMSELREG_FLAGS_VALID; + pVCpu->cpum.s.Guest.tr.ValidSel = pVCpu->cpum.s.Guest.tr.Sel; + } + } + + /* Clear CPUM_CHANGED_HIDDEN_SEL_REGS_INVALID. */ + if ( uVersion > CPUM_SAVED_STATE_VERSION_VER3_2 + && uVersion <= CPUM_SAVED_STATE_VERSION_MEM) + for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++) + pVM->aCpus[iCpu].cpum.s.fChanged &= CPUM_CHANGED_HIDDEN_SEL_REGS_INVALID; + + /* + * A quick sanity check. + */ + for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[iCpu]; + AssertLogRelReturn(!(pVCpu->cpum.s.Guest.es.fFlags & ~CPUMSELREG_FLAGS_VALID_MASK), VERR_SSM_UNEXPECTED_DATA); + AssertLogRelReturn(!(pVCpu->cpum.s.Guest.cs.fFlags & ~CPUMSELREG_FLAGS_VALID_MASK), VERR_SSM_UNEXPECTED_DATA); + AssertLogRelReturn(!(pVCpu->cpum.s.Guest.ss.fFlags & ~CPUMSELREG_FLAGS_VALID_MASK), VERR_SSM_UNEXPECTED_DATA); + AssertLogRelReturn(!(pVCpu->cpum.s.Guest.ds.fFlags & ~CPUMSELREG_FLAGS_VALID_MASK), VERR_SSM_UNEXPECTED_DATA); + AssertLogRelReturn(!(pVCpu->cpum.s.Guest.fs.fFlags & ~CPUMSELREG_FLAGS_VALID_MASK), VERR_SSM_UNEXPECTED_DATA); + AssertLogRelReturn(!(pVCpu->cpum.s.Guest.gs.fFlags & ~CPUMSELREG_FLAGS_VALID_MASK), VERR_SSM_UNEXPECTED_DATA); + } + } + + pVM->cpum.s.fPendingRestore = false; + + /* + * Guest CPUIDs. + */ + if (uVersion >= CPUM_SAVED_STATE_VERSION_VER3_2) + { + CPUMMSRS GuestMsrs; + RT_ZERO(GuestMsrs); + if (pVM->cpum.s.GuestFeatures.fVmx) + GuestMsrs.hwvirt.vmx = pVM->aCpus[0].cpum.s.Guest.hwvirt.vmx.Msrs; + return cpumR3LoadCpuId(pVM, pSSM, uVersion, &GuestMsrs); + } + return cpumR3LoadCpuIdPre32(pVM, pSSM, uVersion); +} + + +/** + * @callback_method_impl{FNSSMINTLOADDONE} + */ +static DECLCALLBACK(int) cpumR3LoadDone(PVM pVM, PSSMHANDLE pSSM) +{ + if (RT_FAILURE(SSMR3HandleGetStatus(pSSM))) + return VINF_SUCCESS; + + /* just check this since we can. */ /** @todo Add a SSM unit flag for indicating that it's mandatory during a restore. */ + if (pVM->cpum.s.fPendingRestore) + { + LogRel(("CPUM: Missing state!\n")); + return VERR_INTERNAL_ERROR_2; + } + + bool const fSupportsLongMode = VMR3IsLongModeAllowed(pVM); + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[idCpu]; + + /* Notify PGM of the NXE states in case they've changed. */ + PGMNotifyNxeChanged(pVCpu, RT_BOOL(pVCpu->cpum.s.Guest.msrEFER & MSR_K6_EFER_NXE)); + + /* During init. this is done in CPUMR3InitCompleted(). */ + if (fSupportsLongMode) + pVCpu->cpum.s.fUseFlags |= CPUM_USE_SUPPORTS_LONGMODE; + } + return VINF_SUCCESS; +} + + +/** + * Checks if the CPUM state restore is still pending. + * + * @returns true / false. + * @param pVM The cross context VM structure. + */ +VMMDECL(bool) CPUMR3IsStateRestorePending(PVM pVM) +{ + return pVM->cpum.s.fPendingRestore; +} + + +/** + * Formats the EFLAGS value into mnemonics. + * + * @param pszEFlags Where to write the mnemonics. (Assumes sufficient buffer space.) + * @param efl The EFLAGS value. + */ +static void cpumR3InfoFormatFlags(char *pszEFlags, uint32_t efl) +{ + /* + * Format the flags. + */ + static const struct + { + const char *pszSet; const char *pszClear; uint32_t fFlag; + } s_aFlags[] = + { + { "vip",NULL, X86_EFL_VIP }, + { "vif",NULL, X86_EFL_VIF }, + { "ac", NULL, X86_EFL_AC }, + { "vm", NULL, X86_EFL_VM }, + { "rf", NULL, X86_EFL_RF }, + { "nt", NULL, X86_EFL_NT }, + { "ov", "nv", X86_EFL_OF }, + { "dn", "up", X86_EFL_DF }, + { "ei", "di", X86_EFL_IF }, + { "tf", NULL, X86_EFL_TF }, + { "nt", "pl", X86_EFL_SF }, + { "nz", "zr", X86_EFL_ZF }, + { "ac", "na", X86_EFL_AF }, + { "po", "pe", X86_EFL_PF }, + { "cy", "nc", X86_EFL_CF }, + }; + char *psz = pszEFlags; + for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++) + { + const char *pszAdd = s_aFlags[i].fFlag & efl ? s_aFlags[i].pszSet : s_aFlags[i].pszClear; + if (pszAdd) + { + strcpy(psz, pszAdd); + psz += strlen(pszAdd); + *psz++ = ' '; + } + } + psz[-1] = '\0'; +} + + +/** + * Formats a full register dump. + * + * @param pVM The cross context VM structure. + * @param pCtx The context to format. + * @param pCtxCore The context core to format. + * @param pHlp Output functions. + * @param enmType The dump type. + * @param pszPrefix Register name prefix. + */ +static void cpumR3InfoOne(PVM pVM, PCPUMCTX pCtx, PCCPUMCTXCORE pCtxCore, PCDBGFINFOHLP pHlp, CPUMDUMPTYPE enmType, + const char *pszPrefix) +{ + NOREF(pVM); + + /* + * Format the EFLAGS. + */ + uint32_t efl = pCtxCore->eflags.u32; + char szEFlags[80]; + cpumR3InfoFormatFlags(&szEFlags[0], efl); + + /* + * Format the registers. + */ + switch (enmType) + { + case CPUMDUMPTYPE_TERSE: + if (CPUMIsGuestIn64BitCodeEx(pCtx)) + pHlp->pfnPrintf(pHlp, + "%srax=%016RX64 %srbx=%016RX64 %srcx=%016RX64 %srdx=%016RX64\n" + "%srsi=%016RX64 %srdi=%016RX64 %sr8 =%016RX64 %sr9 =%016RX64\n" + "%sr10=%016RX64 %sr11=%016RX64 %sr12=%016RX64 %sr13=%016RX64\n" + "%sr14=%016RX64 %sr15=%016RX64\n" + "%srip=%016RX64 %srsp=%016RX64 %srbp=%016RX64 %siopl=%d %*s\n" + "%scs=%04x %sss=%04x %sds=%04x %ses=%04x %sfs=%04x %sgs=%04x %seflags=%08x\n", + pszPrefix, pCtxCore->rax, pszPrefix, pCtxCore->rbx, pszPrefix, pCtxCore->rcx, pszPrefix, pCtxCore->rdx, pszPrefix, pCtxCore->rsi, pszPrefix, pCtxCore->rdi, + pszPrefix, pCtxCore->r8, pszPrefix, pCtxCore->r9, pszPrefix, pCtxCore->r10, pszPrefix, pCtxCore->r11, pszPrefix, pCtxCore->r12, pszPrefix, pCtxCore->r13, + pszPrefix, pCtxCore->r14, pszPrefix, pCtxCore->r15, + pszPrefix, pCtxCore->rip, pszPrefix, pCtxCore->rsp, pszPrefix, pCtxCore->rbp, pszPrefix, X86_EFL_GET_IOPL(efl), *pszPrefix ? 33 : 31, szEFlags, + pszPrefix, pCtxCore->cs.Sel, pszPrefix, pCtxCore->ss.Sel, pszPrefix, pCtxCore->ds.Sel, pszPrefix, pCtxCore->es.Sel, + pszPrefix, pCtxCore->fs.Sel, pszPrefix, pCtxCore->gs.Sel, pszPrefix, efl); + else + pHlp->pfnPrintf(pHlp, + "%seax=%08x %sebx=%08x %secx=%08x %sedx=%08x %sesi=%08x %sedi=%08x\n" + "%seip=%08x %sesp=%08x %sebp=%08x %siopl=%d %*s\n" + "%scs=%04x %sss=%04x %sds=%04x %ses=%04x %sfs=%04x %sgs=%04x %seflags=%08x\n", + pszPrefix, pCtxCore->eax, pszPrefix, pCtxCore->ebx, pszPrefix, pCtxCore->ecx, pszPrefix, pCtxCore->edx, pszPrefix, pCtxCore->esi, pszPrefix, pCtxCore->edi, + pszPrefix, pCtxCore->eip, pszPrefix, pCtxCore->esp, pszPrefix, pCtxCore->ebp, pszPrefix, X86_EFL_GET_IOPL(efl), *pszPrefix ? 33 : 31, szEFlags, + pszPrefix, pCtxCore->cs.Sel, pszPrefix, pCtxCore->ss.Sel, pszPrefix, pCtxCore->ds.Sel, pszPrefix, pCtxCore->es.Sel, + pszPrefix, pCtxCore->fs.Sel, pszPrefix, pCtxCore->gs.Sel, pszPrefix, efl); + break; + + case CPUMDUMPTYPE_DEFAULT: + if (CPUMIsGuestIn64BitCodeEx(pCtx)) + pHlp->pfnPrintf(pHlp, + "%srax=%016RX64 %srbx=%016RX64 %srcx=%016RX64 %srdx=%016RX64\n" + "%srsi=%016RX64 %srdi=%016RX64 %sr8 =%016RX64 %sr9 =%016RX64\n" + "%sr10=%016RX64 %sr11=%016RX64 %sr12=%016RX64 %sr13=%016RX64\n" + "%sr14=%016RX64 %sr15=%016RX64\n" + "%srip=%016RX64 %srsp=%016RX64 %srbp=%016RX64 %siopl=%d %*s\n" + "%scs=%04x %sss=%04x %sds=%04x %ses=%04x %sfs=%04x %sgs=%04x %str=%04x %seflags=%08x\n" + "%scr0=%08RX64 %scr2=%08RX64 %scr3=%08RX64 %scr4=%08RX64 %sgdtr=%016RX64:%04x %sldtr=%04x\n" + , + pszPrefix, pCtxCore->rax, pszPrefix, pCtxCore->rbx, pszPrefix, pCtxCore->rcx, pszPrefix, pCtxCore->rdx, pszPrefix, pCtxCore->rsi, pszPrefix, pCtxCore->rdi, + pszPrefix, pCtxCore->r8, pszPrefix, pCtxCore->r9, pszPrefix, pCtxCore->r10, pszPrefix, pCtxCore->r11, pszPrefix, pCtxCore->r12, pszPrefix, pCtxCore->r13, + pszPrefix, pCtxCore->r14, pszPrefix, pCtxCore->r15, + pszPrefix, pCtxCore->rip, pszPrefix, pCtxCore->rsp, pszPrefix, pCtxCore->rbp, pszPrefix, X86_EFL_GET_IOPL(efl), *pszPrefix ? 33 : 31, szEFlags, + pszPrefix, pCtxCore->cs.Sel, pszPrefix, pCtxCore->ss.Sel, pszPrefix, pCtxCore->ds.Sel, pszPrefix, pCtxCore->es.Sel, + pszPrefix, pCtxCore->fs.Sel, pszPrefix, pCtxCore->gs.Sel, pszPrefix, pCtx->tr.Sel, pszPrefix, efl, + pszPrefix, pCtx->cr0, pszPrefix, pCtx->cr2, pszPrefix, pCtx->cr3, pszPrefix, pCtx->cr4, + pszPrefix, pCtx->gdtr.pGdt, pCtx->gdtr.cbGdt, pszPrefix, pCtx->ldtr.Sel); + else + pHlp->pfnPrintf(pHlp, + "%seax=%08x %sebx=%08x %secx=%08x %sedx=%08x %sesi=%08x %sedi=%08x\n" + "%seip=%08x %sesp=%08x %sebp=%08x %siopl=%d %*s\n" + "%scs=%04x %sss=%04x %sds=%04x %ses=%04x %sfs=%04x %sgs=%04x %str=%04x %seflags=%08x\n" + "%scr0=%08RX64 %scr2=%08RX64 %scr3=%08RX64 %scr4=%08RX64 %sgdtr=%08RX64:%04x %sldtr=%04x\n" + , + pszPrefix, pCtxCore->eax, pszPrefix, pCtxCore->ebx, pszPrefix, pCtxCore->ecx, pszPrefix, pCtxCore->edx, pszPrefix, pCtxCore->esi, pszPrefix, pCtxCore->edi, + pszPrefix, pCtxCore->eip, pszPrefix, pCtxCore->esp, pszPrefix, pCtxCore->ebp, pszPrefix, X86_EFL_GET_IOPL(efl), *pszPrefix ? 33 : 31, szEFlags, + pszPrefix, pCtxCore->cs.Sel, pszPrefix, pCtxCore->ss.Sel, pszPrefix, pCtxCore->ds.Sel, pszPrefix, pCtxCore->es.Sel, + pszPrefix, pCtxCore->fs.Sel, pszPrefix, pCtxCore->gs.Sel, pszPrefix, pCtx->tr.Sel, pszPrefix, efl, + pszPrefix, pCtx->cr0, pszPrefix, pCtx->cr2, pszPrefix, pCtx->cr3, pszPrefix, pCtx->cr4, + pszPrefix, pCtx->gdtr.pGdt, pCtx->gdtr.cbGdt, pszPrefix, pCtx->ldtr.Sel); + break; + + case CPUMDUMPTYPE_VERBOSE: + if (CPUMIsGuestIn64BitCodeEx(pCtx)) + pHlp->pfnPrintf(pHlp, + "%srax=%016RX64 %srbx=%016RX64 %srcx=%016RX64 %srdx=%016RX64\n" + "%srsi=%016RX64 %srdi=%016RX64 %sr8 =%016RX64 %sr9 =%016RX64\n" + "%sr10=%016RX64 %sr11=%016RX64 %sr12=%016RX64 %sr13=%016RX64\n" + "%sr14=%016RX64 %sr15=%016RX64\n" + "%srip=%016RX64 %srsp=%016RX64 %srbp=%016RX64 %siopl=%d %*s\n" + "%scs={%04x base=%016RX64 limit=%08x flags=%08x}\n" + "%sds={%04x base=%016RX64 limit=%08x flags=%08x}\n" + "%ses={%04x base=%016RX64 limit=%08x flags=%08x}\n" + "%sfs={%04x base=%016RX64 limit=%08x flags=%08x}\n" + "%sgs={%04x base=%016RX64 limit=%08x flags=%08x}\n" + "%sss={%04x base=%016RX64 limit=%08x flags=%08x}\n" + "%scr0=%016RX64 %scr2=%016RX64 %scr3=%016RX64 %scr4=%016RX64\n" + "%sdr0=%016RX64 %sdr1=%016RX64 %sdr2=%016RX64 %sdr3=%016RX64\n" + "%sdr4=%016RX64 %sdr5=%016RX64 %sdr6=%016RX64 %sdr7=%016RX64\n" + "%sgdtr=%016RX64:%04x %sidtr=%016RX64:%04x %seflags=%08x\n" + "%sldtr={%04x base=%08RX64 limit=%08x flags=%08x}\n" + "%str ={%04x base=%08RX64 limit=%08x flags=%08x}\n" + "%sSysEnter={cs=%04llx eip=%016RX64 esp=%016RX64}\n" + , + pszPrefix, pCtxCore->rax, pszPrefix, pCtxCore->rbx, pszPrefix, pCtxCore->rcx, pszPrefix, pCtxCore->rdx, pszPrefix, pCtxCore->rsi, pszPrefix, pCtxCore->rdi, + pszPrefix, pCtxCore->r8, pszPrefix, pCtxCore->r9, pszPrefix, pCtxCore->r10, pszPrefix, pCtxCore->r11, pszPrefix, pCtxCore->r12, pszPrefix, pCtxCore->r13, + pszPrefix, pCtxCore->r14, pszPrefix, pCtxCore->r15, + pszPrefix, pCtxCore->rip, pszPrefix, pCtxCore->rsp, pszPrefix, pCtxCore->rbp, pszPrefix, X86_EFL_GET_IOPL(efl), *pszPrefix ? 33 : 31, szEFlags, + pszPrefix, pCtxCore->cs.Sel, pCtx->cs.u64Base, pCtx->cs.u32Limit, pCtx->cs.Attr.u, + pszPrefix, pCtxCore->ds.Sel, pCtx->ds.u64Base, pCtx->ds.u32Limit, pCtx->ds.Attr.u, + pszPrefix, pCtxCore->es.Sel, pCtx->es.u64Base, pCtx->es.u32Limit, pCtx->es.Attr.u, + pszPrefix, pCtxCore->fs.Sel, pCtx->fs.u64Base, pCtx->fs.u32Limit, pCtx->fs.Attr.u, + pszPrefix, pCtxCore->gs.Sel, pCtx->gs.u64Base, pCtx->gs.u32Limit, pCtx->gs.Attr.u, + pszPrefix, pCtxCore->ss.Sel, pCtx->ss.u64Base, pCtx->ss.u32Limit, pCtx->ss.Attr.u, + pszPrefix, pCtx->cr0, pszPrefix, pCtx->cr2, pszPrefix, pCtx->cr3, pszPrefix, pCtx->cr4, + pszPrefix, pCtx->dr[0], pszPrefix, pCtx->dr[1], pszPrefix, pCtx->dr[2], pszPrefix, pCtx->dr[3], + pszPrefix, pCtx->dr[4], pszPrefix, pCtx->dr[5], pszPrefix, pCtx->dr[6], pszPrefix, pCtx->dr[7], + pszPrefix, pCtx->gdtr.pGdt, pCtx->gdtr.cbGdt, pszPrefix, pCtx->idtr.pIdt, pCtx->idtr.cbIdt, pszPrefix, efl, + pszPrefix, pCtx->ldtr.Sel, pCtx->ldtr.u64Base, pCtx->ldtr.u32Limit, pCtx->ldtr.Attr.u, + pszPrefix, pCtx->tr.Sel, pCtx->tr.u64Base, pCtx->tr.u32Limit, pCtx->tr.Attr.u, + pszPrefix, pCtx->SysEnter.cs, pCtx->SysEnter.eip, pCtx->SysEnter.esp); + else + pHlp->pfnPrintf(pHlp, + "%seax=%08x %sebx=%08x %secx=%08x %sedx=%08x %sesi=%08x %sedi=%08x\n" + "%seip=%08x %sesp=%08x %sebp=%08x %siopl=%d %*s\n" + "%scs={%04x base=%016RX64 limit=%08x flags=%08x} %sdr0=%08RX64 %sdr1=%08RX64\n" + "%sds={%04x base=%016RX64 limit=%08x flags=%08x} %sdr2=%08RX64 %sdr3=%08RX64\n" + "%ses={%04x base=%016RX64 limit=%08x flags=%08x} %sdr4=%08RX64 %sdr5=%08RX64\n" + "%sfs={%04x base=%016RX64 limit=%08x flags=%08x} %sdr6=%08RX64 %sdr7=%08RX64\n" + "%sgs={%04x base=%016RX64 limit=%08x flags=%08x} %scr0=%08RX64 %scr2=%08RX64\n" + "%sss={%04x base=%016RX64 limit=%08x flags=%08x} %scr3=%08RX64 %scr4=%08RX64\n" + "%sgdtr=%016RX64:%04x %sidtr=%016RX64:%04x %seflags=%08x\n" + "%sldtr={%04x base=%08RX64 limit=%08x flags=%08x}\n" + "%str ={%04x base=%08RX64 limit=%08x flags=%08x}\n" + "%sSysEnter={cs=%04llx eip=%08llx esp=%08llx}\n" + , + pszPrefix, pCtxCore->eax, pszPrefix, pCtxCore->ebx, pszPrefix, pCtxCore->ecx, pszPrefix, pCtxCore->edx, pszPrefix, pCtxCore->esi, pszPrefix, pCtxCore->edi, + pszPrefix, pCtxCore->eip, pszPrefix, pCtxCore->esp, pszPrefix, pCtxCore->ebp, pszPrefix, X86_EFL_GET_IOPL(efl), *pszPrefix ? 33 : 31, szEFlags, + pszPrefix, pCtxCore->cs.Sel, pCtx->cs.u64Base, pCtx->cs.u32Limit, pCtx->cs.Attr.u, pszPrefix, pCtx->dr[0], pszPrefix, pCtx->dr[1], + pszPrefix, pCtxCore->ds.Sel, pCtx->ds.u64Base, pCtx->ds.u32Limit, pCtx->ds.Attr.u, pszPrefix, pCtx->dr[2], pszPrefix, pCtx->dr[3], + pszPrefix, pCtxCore->es.Sel, pCtx->es.u64Base, pCtx->es.u32Limit, pCtx->es.Attr.u, pszPrefix, pCtx->dr[4], pszPrefix, pCtx->dr[5], + pszPrefix, pCtxCore->fs.Sel, pCtx->fs.u64Base, pCtx->fs.u32Limit, pCtx->fs.Attr.u, pszPrefix, pCtx->dr[6], pszPrefix, pCtx->dr[7], + pszPrefix, pCtxCore->gs.Sel, pCtx->gs.u64Base, pCtx->gs.u32Limit, pCtx->gs.Attr.u, pszPrefix, pCtx->cr0, pszPrefix, pCtx->cr2, + pszPrefix, pCtxCore->ss.Sel, pCtx->ss.u64Base, pCtx->ss.u32Limit, pCtx->ss.Attr.u, pszPrefix, pCtx->cr3, pszPrefix, pCtx->cr4, + pszPrefix, pCtx->gdtr.pGdt, pCtx->gdtr.cbGdt, pszPrefix, pCtx->idtr.pIdt, pCtx->idtr.cbIdt, pszPrefix, efl, + pszPrefix, pCtx->ldtr.Sel, pCtx->ldtr.u64Base, pCtx->ldtr.u32Limit, pCtx->ldtr.Attr.u, + pszPrefix, pCtx->tr.Sel, pCtx->tr.u64Base, pCtx->tr.u32Limit, pCtx->tr.Attr.u, + pszPrefix, pCtx->SysEnter.cs, pCtx->SysEnter.eip, pCtx->SysEnter.esp); + + pHlp->pfnPrintf(pHlp, "%sxcr=%016RX64 %sxcr1=%016RX64 %sxss=%016RX64 (fXStateMask=%016RX64)\n", + pszPrefix, pCtx->aXcr[0], pszPrefix, pCtx->aXcr[1], + pszPrefix, UINT64_C(0) /** @todo XSS */, pCtx->fXStateMask); + if (pCtx->CTX_SUFF(pXState)) + { + PX86FXSTATE pFpuCtx = &pCtx->CTX_SUFF(pXState)->x87; + pHlp->pfnPrintf(pHlp, + "%sFCW=%04x %sFSW=%04x %sFTW=%04x %sFOP=%04x %sMXCSR=%08x %sMXCSR_MASK=%08x\n" + "%sFPUIP=%08x %sCS=%04x %sRsrvd1=%04x %sFPUDP=%08x %sDS=%04x %sRsvrd2=%04x\n" + , + pszPrefix, pFpuCtx->FCW, pszPrefix, pFpuCtx->FSW, pszPrefix, pFpuCtx->FTW, pszPrefix, pFpuCtx->FOP, + pszPrefix, pFpuCtx->MXCSR, pszPrefix, pFpuCtx->MXCSR_MASK, + pszPrefix, pFpuCtx->FPUIP, pszPrefix, pFpuCtx->CS, pszPrefix, pFpuCtx->Rsrvd1, + pszPrefix, pFpuCtx->FPUDP, pszPrefix, pFpuCtx->DS, pszPrefix, pFpuCtx->Rsrvd2 + ); + /* + * The FSAVE style memory image contains ST(0)-ST(7) at increasing addresses, + * not (FP)R0-7 as Intel SDM suggests. + */ + unsigned iShift = (pFpuCtx->FSW >> 11) & 7; + for (unsigned iST = 0; iST < RT_ELEMENTS(pFpuCtx->aRegs); iST++) + { + unsigned iFPR = (iST + iShift) % RT_ELEMENTS(pFpuCtx->aRegs); + unsigned uTag = (pFpuCtx->FTW >> (2 * iFPR)) & 3; + char chSign = pFpuCtx->aRegs[iST].au16[4] & 0x8000 ? '-' : '+'; + unsigned iInteger = (unsigned)(pFpuCtx->aRegs[iST].au64[0] >> 63); + uint64_t u64Fraction = pFpuCtx->aRegs[iST].au64[0] & UINT64_C(0x7fffffffffffffff); + int iExponent = pFpuCtx->aRegs[iST].au16[4] & 0x7fff; + iExponent -= 16383; /* subtract bias */ + /** @todo This isn't entirenly correct and needs more work! */ + pHlp->pfnPrintf(pHlp, + "%sST(%u)=%sFPR%u={%04RX16'%08RX32'%08RX32} t%d %c%u.%022llu * 2 ^ %d (*)", + pszPrefix, iST, pszPrefix, iFPR, + pFpuCtx->aRegs[iST].au16[4], pFpuCtx->aRegs[iST].au32[1], pFpuCtx->aRegs[iST].au32[0], + uTag, chSign, iInteger, u64Fraction, iExponent); + if (pFpuCtx->aRegs[iST].au16[5] || pFpuCtx->aRegs[iST].au16[6] || pFpuCtx->aRegs[iST].au16[7]) + pHlp->pfnPrintf(pHlp, " res={%04RX16,%04RX16,%04RX16}\n", + pFpuCtx->aRegs[iST].au16[5], pFpuCtx->aRegs[iST].au16[6], pFpuCtx->aRegs[iST].au16[7]); + else + pHlp->pfnPrintf(pHlp, "\n"); + } + + /* XMM/YMM/ZMM registers. */ + if (pCtx->fXStateMask & XSAVE_C_YMM) + { + PCX86XSAVEYMMHI pYmmHiCtx = CPUMCTX_XSAVE_C_PTR(pCtx, XSAVE_C_YMM_BIT, PCX86XSAVEYMMHI); + if (!(pCtx->fXStateMask & XSAVE_C_ZMM_HI256)) + for (unsigned i = 0; i < RT_ELEMENTS(pFpuCtx->aXMM); i++) + pHlp->pfnPrintf(pHlp, "%sYMM%u%s=%08RX32'%08RX32'%08RX32'%08RX32'%08RX32'%08RX32'%08RX32'%08RX32\n", + pszPrefix, i, i < 10 ? " " : "", + pYmmHiCtx->aYmmHi[i].au32[3], + pYmmHiCtx->aYmmHi[i].au32[2], + pYmmHiCtx->aYmmHi[i].au32[1], + pYmmHiCtx->aYmmHi[i].au32[0], + pFpuCtx->aXMM[i].au32[3], + pFpuCtx->aXMM[i].au32[2], + pFpuCtx->aXMM[i].au32[1], + pFpuCtx->aXMM[i].au32[0]); + else + { + PCX86XSAVEZMMHI256 pZmmHi256 = CPUMCTX_XSAVE_C_PTR(pCtx, XSAVE_C_ZMM_HI256_BIT, PCX86XSAVEZMMHI256); + for (unsigned i = 0; i < RT_ELEMENTS(pFpuCtx->aXMM); i++) + pHlp->pfnPrintf(pHlp, + "%sZMM%u%s=%08RX32'%08RX32'%08RX32'%08RX32'%08RX32'%08RX32'%08RX32'%08RX32''%08RX32'%08RX32'%08RX32'%08RX32'%08RX32'%08RX32'%08RX32'%08RX32\n", + pszPrefix, i, i < 10 ? " " : "", + pZmmHi256->aHi256Regs[i].au32[7], + pZmmHi256->aHi256Regs[i].au32[6], + pZmmHi256->aHi256Regs[i].au32[5], + pZmmHi256->aHi256Regs[i].au32[4], + pZmmHi256->aHi256Regs[i].au32[3], + pZmmHi256->aHi256Regs[i].au32[2], + pZmmHi256->aHi256Regs[i].au32[1], + pZmmHi256->aHi256Regs[i].au32[0], + pYmmHiCtx->aYmmHi[i].au32[3], + pYmmHiCtx->aYmmHi[i].au32[2], + pYmmHiCtx->aYmmHi[i].au32[1], + pYmmHiCtx->aYmmHi[i].au32[0], + pFpuCtx->aXMM[i].au32[3], + pFpuCtx->aXMM[i].au32[2], + pFpuCtx->aXMM[i].au32[1], + pFpuCtx->aXMM[i].au32[0]); + + PCX86XSAVEZMM16HI pZmm16Hi = CPUMCTX_XSAVE_C_PTR(pCtx, XSAVE_C_ZMM_16HI_BIT, PCX86XSAVEZMM16HI); + for (unsigned i = 0; i < RT_ELEMENTS(pZmm16Hi->aRegs); i++) + pHlp->pfnPrintf(pHlp, + "%sZMM%u=%08RX32'%08RX32'%08RX32'%08RX32'%08RX32'%08RX32'%08RX32'%08RX32''%08RX32'%08RX32'%08RX32'%08RX32'%08RX32'%08RX32'%08RX32'%08RX32\n", + pszPrefix, i + 16, + pZmm16Hi->aRegs[i].au32[15], + pZmm16Hi->aRegs[i].au32[14], + pZmm16Hi->aRegs[i].au32[13], + pZmm16Hi->aRegs[i].au32[12], + pZmm16Hi->aRegs[i].au32[11], + pZmm16Hi->aRegs[i].au32[10], + pZmm16Hi->aRegs[i].au32[9], + pZmm16Hi->aRegs[i].au32[8], + pZmm16Hi->aRegs[i].au32[7], + pZmm16Hi->aRegs[i].au32[6], + pZmm16Hi->aRegs[i].au32[5], + pZmm16Hi->aRegs[i].au32[4], + pZmm16Hi->aRegs[i].au32[3], + pZmm16Hi->aRegs[i].au32[2], + pZmm16Hi->aRegs[i].au32[1], + pZmm16Hi->aRegs[i].au32[0]); + } + } + else + for (unsigned i = 0; i < RT_ELEMENTS(pFpuCtx->aXMM); i++) + pHlp->pfnPrintf(pHlp, + i & 1 + ? "%sXMM%u%s=%08RX32'%08RX32'%08RX32'%08RX32\n" + : "%sXMM%u%s=%08RX32'%08RX32'%08RX32'%08RX32 ", + pszPrefix, i, i < 10 ? " " : "", + pFpuCtx->aXMM[i].au32[3], + pFpuCtx->aXMM[i].au32[2], + pFpuCtx->aXMM[i].au32[1], + pFpuCtx->aXMM[i].au32[0]); + + if (pCtx->fXStateMask & XSAVE_C_OPMASK) + { + PCX86XSAVEOPMASK pOpMask = CPUMCTX_XSAVE_C_PTR(pCtx, XSAVE_C_OPMASK_BIT, PCX86XSAVEOPMASK); + for (unsigned i = 0; i < RT_ELEMENTS(pOpMask->aKRegs); i += 4) + pHlp->pfnPrintf(pHlp, "%sK%u=%016RX64 %sK%u=%016RX64 %sK%u=%016RX64 %sK%u=%016RX64\n", + pszPrefix, i + 0, pOpMask->aKRegs[i + 0], + pszPrefix, i + 1, pOpMask->aKRegs[i + 1], + pszPrefix, i + 2, pOpMask->aKRegs[i + 2], + pszPrefix, i + 3, pOpMask->aKRegs[i + 3]); + } + + if (pCtx->fXStateMask & XSAVE_C_BNDREGS) + { + PCX86XSAVEBNDREGS pBndRegs = CPUMCTX_XSAVE_C_PTR(pCtx, XSAVE_C_BNDREGS_BIT, PCX86XSAVEBNDREGS); + for (unsigned i = 0; i < RT_ELEMENTS(pBndRegs->aRegs); i += 2) + pHlp->pfnPrintf(pHlp, "%sBNDREG%u=%016RX64/%016RX64 %sBNDREG%u=%016RX64/%016RX64\n", + pszPrefix, i, pBndRegs->aRegs[i].uLowerBound, pBndRegs->aRegs[i].uUpperBound, + pszPrefix, i + 1, pBndRegs->aRegs[i + 1].uLowerBound, pBndRegs->aRegs[i + 1].uUpperBound); + } + + if (pCtx->fXStateMask & XSAVE_C_BNDCSR) + { + PCX86XSAVEBNDCFG pBndCfg = CPUMCTX_XSAVE_C_PTR(pCtx, XSAVE_C_BNDCSR_BIT, PCX86XSAVEBNDCFG); + pHlp->pfnPrintf(pHlp, "%sBNDCFG.CONFIG=%016RX64 %sBNDCFG.STATUS=%016RX64\n", + pszPrefix, pBndCfg->fConfig, pszPrefix, pBndCfg->fStatus); + } + + for (unsigned i = 0; i < RT_ELEMENTS(pFpuCtx->au32RsrvdRest); i++) + if (pFpuCtx->au32RsrvdRest[i]) + pHlp->pfnPrintf(pHlp, "%sRsrvdRest[%u]=%RX32 (offset=%#x)\n", + pszPrefix, i, pFpuCtx->au32RsrvdRest[i], RT_UOFFSETOF_DYN(X86FXSTATE, au32RsrvdRest[i]) ); + } + + pHlp->pfnPrintf(pHlp, + "%sEFER =%016RX64\n" + "%sPAT =%016RX64\n" + "%sSTAR =%016RX64\n" + "%sCSTAR =%016RX64\n" + "%sLSTAR =%016RX64\n" + "%sSFMASK =%016RX64\n" + "%sKERNELGSBASE =%016RX64\n", + pszPrefix, pCtx->msrEFER, + pszPrefix, pCtx->msrPAT, + pszPrefix, pCtx->msrSTAR, + pszPrefix, pCtx->msrCSTAR, + pszPrefix, pCtx->msrLSTAR, + pszPrefix, pCtx->msrSFMASK, + pszPrefix, pCtx->msrKERNELGSBASE); + break; + } +} + + +/** + * Display all cpu states and any other cpum info. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helper functions. + * @param pszArgs Arguments, ignored. + */ +static DECLCALLBACK(void) cpumR3InfoAll(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + cpumR3InfoGuest(pVM, pHlp, pszArgs); + cpumR3InfoGuestInstr(pVM, pHlp, pszArgs); + cpumR3InfoGuestHwvirt(pVM, pHlp, pszArgs); + cpumR3InfoHyper(pVM, pHlp, pszArgs); + cpumR3InfoHost(pVM, pHlp, pszArgs); +} + + +/** + * Parses the info argument. + * + * The argument starts with 'verbose', 'terse' or 'default' and then + * continues with the comment string. + * + * @param pszArgs The pointer to the argument string. + * @param penmType Where to store the dump type request. + * @param ppszComment Where to store the pointer to the comment string. + */ +static void cpumR3InfoParseArg(const char *pszArgs, CPUMDUMPTYPE *penmType, const char **ppszComment) +{ + if (!pszArgs) + { + *penmType = CPUMDUMPTYPE_DEFAULT; + *ppszComment = ""; + } + else + { + if (!strncmp(pszArgs, RT_STR_TUPLE("verbose"))) + { + pszArgs += 7; + *penmType = CPUMDUMPTYPE_VERBOSE; + } + else if (!strncmp(pszArgs, RT_STR_TUPLE("terse"))) + { + pszArgs += 5; + *penmType = CPUMDUMPTYPE_TERSE; + } + else if (!strncmp(pszArgs, RT_STR_TUPLE("default"))) + { + pszArgs += 7; + *penmType = CPUMDUMPTYPE_DEFAULT; + } + else + *penmType = CPUMDUMPTYPE_DEFAULT; + *ppszComment = RTStrStripL(pszArgs); + } +} + + +/** + * Display the guest cpu state. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helper functions. + * @param pszArgs Arguments. + */ +static DECLCALLBACK(void) cpumR3InfoGuest(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + CPUMDUMPTYPE enmType; + const char *pszComment; + cpumR3InfoParseArg(pszArgs, &enmType, &pszComment); + + PVMCPU pVCpu = VMMGetCpu(pVM); + if (!pVCpu) + pVCpu = &pVM->aCpus[0]; + + pHlp->pfnPrintf(pHlp, "Guest CPUM (VCPU %d) state: %s\n", pVCpu->idCpu, pszComment); + + PCPUMCTX pCtx = &pVCpu->cpum.s.Guest; + cpumR3InfoOne(pVM, pCtx, CPUMCTX2CORE(pCtx), pHlp, enmType, ""); +} + + +/** + * Displays an SVM VMCB control area. + * + * @param pHlp The info helper functions. + * @param pVmcbCtrl Pointer to a SVM VMCB controls area. + * @param pszPrefix Caller specified string prefix. + */ +static void cpumR3InfoSvmVmcbCtrl(PCDBGFINFOHLP pHlp, PCSVMVMCBCTRL pVmcbCtrl, const char *pszPrefix) +{ + AssertReturnVoid(pHlp); + AssertReturnVoid(pVmcbCtrl); + + pHlp->pfnPrintf(pHlp, "%sCRX-read intercepts = %#RX16\n", pszPrefix, pVmcbCtrl->u16InterceptRdCRx); + pHlp->pfnPrintf(pHlp, "%sCRX-write intercepts = %#RX16\n", pszPrefix, pVmcbCtrl->u16InterceptWrCRx); + pHlp->pfnPrintf(pHlp, "%sDRX-read intercepts = %#RX16\n", pszPrefix, pVmcbCtrl->u16InterceptRdDRx); + pHlp->pfnPrintf(pHlp, "%sDRX-write intercepts = %#RX16\n", pszPrefix, pVmcbCtrl->u16InterceptWrDRx); + pHlp->pfnPrintf(pHlp, "%sException intercepts = %#RX32\n", pszPrefix, pVmcbCtrl->u32InterceptXcpt); + pHlp->pfnPrintf(pHlp, "%sControl intercepts = %#RX64\n", pszPrefix, pVmcbCtrl->u64InterceptCtrl); + pHlp->pfnPrintf(pHlp, "%sPause-filter threshold = %#RX16\n", pszPrefix, pVmcbCtrl->u16PauseFilterThreshold); + pHlp->pfnPrintf(pHlp, "%sPause-filter count = %#RX16\n", pszPrefix, pVmcbCtrl->u16PauseFilterCount); + pHlp->pfnPrintf(pHlp, "%sIOPM bitmap physaddr = %#RX64\n", pszPrefix, pVmcbCtrl->u64IOPMPhysAddr); + pHlp->pfnPrintf(pHlp, "%sMSRPM bitmap physaddr = %#RX64\n", pszPrefix, pVmcbCtrl->u64MSRPMPhysAddr); + pHlp->pfnPrintf(pHlp, "%sTSC offset = %#RX64\n", pszPrefix, pVmcbCtrl->u64TSCOffset); + pHlp->pfnPrintf(pHlp, "%sTLB Control\n", pszPrefix); + pHlp->pfnPrintf(pHlp, " %sASID = %#RX32\n", pszPrefix, pVmcbCtrl->TLBCtrl.n.u32ASID); + pHlp->pfnPrintf(pHlp, " %sTLB-flush type = %u\n", pszPrefix, pVmcbCtrl->TLBCtrl.n.u8TLBFlush); + pHlp->pfnPrintf(pHlp, "%sInterrupt Control\n", pszPrefix); + pHlp->pfnPrintf(pHlp, " %sVTPR = %#RX8 (%u)\n", pszPrefix, pVmcbCtrl->IntCtrl.n.u8VTPR, pVmcbCtrl->IntCtrl.n.u8VTPR); + pHlp->pfnPrintf(pHlp, " %sVIRQ (Pending) = %RTbool\n", pszPrefix, pVmcbCtrl->IntCtrl.n.u1VIrqPending); + pHlp->pfnPrintf(pHlp, " %sVINTR vector = %#RX8\n", pszPrefix, pVmcbCtrl->IntCtrl.n.u8VIntrVector); + pHlp->pfnPrintf(pHlp, " %sVGIF = %u\n", pszPrefix, pVmcbCtrl->IntCtrl.n.u1VGif); + pHlp->pfnPrintf(pHlp, " %sVINTR priority = %#RX8\n", pszPrefix, pVmcbCtrl->IntCtrl.n.u4VIntrPrio); + pHlp->pfnPrintf(pHlp, " %sIgnore TPR = %RTbool\n", pszPrefix, pVmcbCtrl->IntCtrl.n.u1IgnoreTPR); + pHlp->pfnPrintf(pHlp, " %sVINTR masking = %RTbool\n", pszPrefix, pVmcbCtrl->IntCtrl.n.u1VIntrMasking); + pHlp->pfnPrintf(pHlp, " %sVGIF enable = %RTbool\n", pszPrefix, pVmcbCtrl->IntCtrl.n.u1VGifEnable); + pHlp->pfnPrintf(pHlp, " %sAVIC enable = %RTbool\n", pszPrefix, pVmcbCtrl->IntCtrl.n.u1AvicEnable); + pHlp->pfnPrintf(pHlp, "%sInterrupt Shadow\n", pszPrefix); + pHlp->pfnPrintf(pHlp, " %sInterrupt shadow = %RTbool\n", pszPrefix, pVmcbCtrl->IntShadow.n.u1IntShadow); + pHlp->pfnPrintf(pHlp, " %sGuest-interrupt Mask = %RTbool\n", pszPrefix, pVmcbCtrl->IntShadow.n.u1GuestIntMask); + pHlp->pfnPrintf(pHlp, "%sExit Code = %#RX64\n", pszPrefix, pVmcbCtrl->u64ExitCode); + pHlp->pfnPrintf(pHlp, "%sEXITINFO1 = %#RX64\n", pszPrefix, pVmcbCtrl->u64ExitInfo1); + pHlp->pfnPrintf(pHlp, "%sEXITINFO2 = %#RX64\n", pszPrefix, pVmcbCtrl->u64ExitInfo2); + pHlp->pfnPrintf(pHlp, "%sExit Interrupt Info\n", pszPrefix); + pHlp->pfnPrintf(pHlp, " %sValid = %RTbool\n", pszPrefix, pVmcbCtrl->ExitIntInfo.n.u1Valid); + pHlp->pfnPrintf(pHlp, " %sVector = %#RX8 (%u)\n", pszPrefix, pVmcbCtrl->ExitIntInfo.n.u8Vector, pVmcbCtrl->ExitIntInfo.n.u8Vector); + pHlp->pfnPrintf(pHlp, " %sType = %u\n", pszPrefix, pVmcbCtrl->ExitIntInfo.n.u3Type); + pHlp->pfnPrintf(pHlp, " %sError-code valid = %RTbool\n", pszPrefix, pVmcbCtrl->ExitIntInfo.n.u1ErrorCodeValid); + pHlp->pfnPrintf(pHlp, " %sError-code = %#RX32\n", pszPrefix, pVmcbCtrl->ExitIntInfo.n.u32ErrorCode); + pHlp->pfnPrintf(pHlp, "%sNested paging and SEV\n", pszPrefix); + pHlp->pfnPrintf(pHlp, " %sNested paging = %RTbool\n", pszPrefix, pVmcbCtrl->NestedPagingCtrl.n.u1NestedPaging); + pHlp->pfnPrintf(pHlp, " %sSEV (Secure Encrypted VM) = %RTbool\n", pszPrefix, pVmcbCtrl->NestedPagingCtrl.n.u1Sev); + pHlp->pfnPrintf(pHlp, " %sSEV-ES (Encrypted State) = %RTbool\n", pszPrefix, pVmcbCtrl->NestedPagingCtrl.n.u1SevEs); + pHlp->pfnPrintf(pHlp, "%sEvent Inject\n", pszPrefix); + pHlp->pfnPrintf(pHlp, " %sValid = %RTbool\n", pszPrefix, pVmcbCtrl->EventInject.n.u1Valid); + pHlp->pfnPrintf(pHlp, " %sVector = %#RX32 (%u)\n", pszPrefix, pVmcbCtrl->EventInject.n.u8Vector, pVmcbCtrl->EventInject.n.u8Vector); + pHlp->pfnPrintf(pHlp, " %sType = %u\n", pszPrefix, pVmcbCtrl->EventInject.n.u3Type); + pHlp->pfnPrintf(pHlp, " %sError-code valid = %RTbool\n", pszPrefix, pVmcbCtrl->EventInject.n.u1ErrorCodeValid); + pHlp->pfnPrintf(pHlp, " %sError-code = %#RX32\n", pszPrefix, pVmcbCtrl->EventInject.n.u32ErrorCode); + pHlp->pfnPrintf(pHlp, "%sNested-paging CR3 = %#RX64\n", pszPrefix, pVmcbCtrl->u64NestedPagingCR3); + pHlp->pfnPrintf(pHlp, "%sLBR Virtualization\n", pszPrefix); + pHlp->pfnPrintf(pHlp, " %sLBR virt = %RTbool\n", pszPrefix, pVmcbCtrl->LbrVirt.n.u1LbrVirt); + pHlp->pfnPrintf(pHlp, " %sVirt. VMSAVE/VMLOAD = %RTbool\n", pszPrefix, pVmcbCtrl->LbrVirt.n.u1VirtVmsaveVmload); + pHlp->pfnPrintf(pHlp, "%sVMCB Clean Bits = %#RX32\n", pszPrefix, pVmcbCtrl->u32VmcbCleanBits); + pHlp->pfnPrintf(pHlp, "%sNext-RIP = %#RX64\n", pszPrefix, pVmcbCtrl->u64NextRIP); + pHlp->pfnPrintf(pHlp, "%sInstruction bytes fetched = %u\n", pszPrefix, pVmcbCtrl->cbInstrFetched); + pHlp->pfnPrintf(pHlp, "%sInstruction bytes = %.*Rhxs\n", pszPrefix, sizeof(pVmcbCtrl->abInstr), pVmcbCtrl->abInstr); + pHlp->pfnPrintf(pHlp, "%sAVIC\n", pszPrefix); + pHlp->pfnPrintf(pHlp, " %sBar addr = %#RX64\n", pszPrefix, pVmcbCtrl->AvicBar.n.u40Addr); + pHlp->pfnPrintf(pHlp, " %sBacking page addr = %#RX64\n", pszPrefix, pVmcbCtrl->AvicBackingPagePtr.n.u40Addr); + pHlp->pfnPrintf(pHlp, " %sLogical table addr = %#RX64\n", pszPrefix, pVmcbCtrl->AvicLogicalTablePtr.n.u40Addr); + pHlp->pfnPrintf(pHlp, " %sPhysical table addr = %#RX64\n", pszPrefix, pVmcbCtrl->AvicPhysicalTablePtr.n.u40Addr); + pHlp->pfnPrintf(pHlp, " %sLast guest core Id = %u\n", pszPrefix, pVmcbCtrl->AvicPhysicalTablePtr.n.u8LastGuestCoreId); +} + + +/** + * Helper for dumping the SVM VMCB selector registers. + * + * @param pHlp The info helper functions. + * @param pSel Pointer to the SVM selector register. + * @param pszName Name of the selector. + * @param pszPrefix Caller specified string prefix. + */ +DECLINLINE(void) cpumR3InfoSvmVmcbSelReg(PCDBGFINFOHLP pHlp, PCSVMSELREG pSel, const char *pszName, const char *pszPrefix) +{ + /* The string width of 4 used below is to handle 'LDTR'. Change later if longer register names are used. */ + pHlp->pfnPrintf(pHlp, "%s%-4s = {%04x base=%016RX64 limit=%08x flags=%04x}\n", pszPrefix, + pszName, pSel->u16Sel, pSel->u64Base, pSel->u32Limit, pSel->u16Attr); +} + + +/** + * Helper for dumping the SVM VMCB GDTR/IDTR registers. + * + * @param pHlp The info helper functions. + * @param pXdtr Pointer to the descriptor table register. + * @param pszName Name of the descriptor table register. + * @param pszPrefix Caller specified string prefix. + */ +DECLINLINE(void) cpumR3InfoSvmVmcbXdtr(PCDBGFINFOHLP pHlp, PCSVMXDTR pXdtr, const char *pszName, const char *pszPrefix) +{ + /* The string width of 4 used below is to cover 'GDTR', 'IDTR'. Change later if longer register names are used. */ + pHlp->pfnPrintf(pHlp, "%s%-4s = %016RX64:%04x\n", pszPrefix, pszName, pXdtr->u64Base, pXdtr->u32Limit); +} + + +/** + * Displays an SVM VMCB state-save area. + * + * @param pHlp The info helper functions. + * @param pVmcbStateSave Pointer to a SVM VMCB controls area. + * @param pszPrefix Caller specified string prefix. + */ +static void cpumR3InfoSvmVmcbStateSave(PCDBGFINFOHLP pHlp, PCSVMVMCBSTATESAVE pVmcbStateSave, const char *pszPrefix) +{ + AssertReturnVoid(pHlp); + AssertReturnVoid(pVmcbStateSave); + + char szEFlags[80]; + cpumR3InfoFormatFlags(&szEFlags[0], pVmcbStateSave->u64RFlags); + + cpumR3InfoSvmVmcbSelReg(pHlp, &pVmcbStateSave->CS, "CS", pszPrefix); + cpumR3InfoSvmVmcbSelReg(pHlp, &pVmcbStateSave->SS, "SS", pszPrefix); + cpumR3InfoSvmVmcbSelReg(pHlp, &pVmcbStateSave->ES, "ES", pszPrefix); + cpumR3InfoSvmVmcbSelReg(pHlp, &pVmcbStateSave->DS, "DS", pszPrefix); + cpumR3InfoSvmVmcbSelReg(pHlp, &pVmcbStateSave->FS, "FS", pszPrefix); + cpumR3InfoSvmVmcbSelReg(pHlp, &pVmcbStateSave->GS, "GS", pszPrefix); + cpumR3InfoSvmVmcbSelReg(pHlp, &pVmcbStateSave->LDTR, "LDTR", pszPrefix); + cpumR3InfoSvmVmcbSelReg(pHlp, &pVmcbStateSave->TR, "TR", pszPrefix); + cpumR3InfoSvmVmcbXdtr(pHlp, &pVmcbStateSave->GDTR, "GDTR", pszPrefix); + cpumR3InfoSvmVmcbXdtr(pHlp, &pVmcbStateSave->IDTR, "IDTR", pszPrefix); + pHlp->pfnPrintf(pHlp, "%sCPL = %u\n", pszPrefix, pVmcbStateSave->u8CPL); + pHlp->pfnPrintf(pHlp, "%sEFER = %#RX64\n", pszPrefix, pVmcbStateSave->u64EFER); + pHlp->pfnPrintf(pHlp, "%sCR4 = %#RX64\n", pszPrefix, pVmcbStateSave->u64CR4); + pHlp->pfnPrintf(pHlp, "%sCR3 = %#RX64\n", pszPrefix, pVmcbStateSave->u64CR3); + pHlp->pfnPrintf(pHlp, "%sCR0 = %#RX64\n", pszPrefix, pVmcbStateSave->u64CR0); + pHlp->pfnPrintf(pHlp, "%sDR7 = %#RX64\n", pszPrefix, pVmcbStateSave->u64DR7); + pHlp->pfnPrintf(pHlp, "%sDR6 = %#RX64\n", pszPrefix, pVmcbStateSave->u64DR6); + pHlp->pfnPrintf(pHlp, "%sRFLAGS = %#RX64 %31s\n", pszPrefix, pVmcbStateSave->u64RFlags, szEFlags); + pHlp->pfnPrintf(pHlp, "%sRIP = %#RX64\n", pszPrefix, pVmcbStateSave->u64RIP); + pHlp->pfnPrintf(pHlp, "%sRSP = %#RX64\n", pszPrefix, pVmcbStateSave->u64RSP); + pHlp->pfnPrintf(pHlp, "%sRAX = %#RX64\n", pszPrefix, pVmcbStateSave->u64RAX); + pHlp->pfnPrintf(pHlp, "%sSTAR = %#RX64\n", pszPrefix, pVmcbStateSave->u64STAR); + pHlp->pfnPrintf(pHlp, "%sLSTAR = %#RX64\n", pszPrefix, pVmcbStateSave->u64LSTAR); + pHlp->pfnPrintf(pHlp, "%sCSTAR = %#RX64\n", pszPrefix, pVmcbStateSave->u64CSTAR); + pHlp->pfnPrintf(pHlp, "%sSFMASK = %#RX64\n", pszPrefix, pVmcbStateSave->u64SFMASK); + pHlp->pfnPrintf(pHlp, "%sKERNELGSBASE = %#RX64\n", pszPrefix, pVmcbStateSave->u64KernelGSBase); + pHlp->pfnPrintf(pHlp, "%sSysEnter CS = %#RX64\n", pszPrefix, pVmcbStateSave->u64SysEnterCS); + pHlp->pfnPrintf(pHlp, "%sSysEnter EIP = %#RX64\n", pszPrefix, pVmcbStateSave->u64SysEnterEIP); + pHlp->pfnPrintf(pHlp, "%sSysEnter ESP = %#RX64\n", pszPrefix, pVmcbStateSave->u64SysEnterESP); + pHlp->pfnPrintf(pHlp, "%sCR2 = %#RX64\n", pszPrefix, pVmcbStateSave->u64CR2); + pHlp->pfnPrintf(pHlp, "%sPAT = %#RX64\n", pszPrefix, pVmcbStateSave->u64PAT); + pHlp->pfnPrintf(pHlp, "%sDBGCTL = %#RX64\n", pszPrefix, pVmcbStateSave->u64DBGCTL); + pHlp->pfnPrintf(pHlp, "%sBR_FROM = %#RX64\n", pszPrefix, pVmcbStateSave->u64BR_FROM); + pHlp->pfnPrintf(pHlp, "%sBR_TO = %#RX64\n", pszPrefix, pVmcbStateSave->u64BR_TO); + pHlp->pfnPrintf(pHlp, "%sLASTXCPT_FROM = %#RX64\n", pszPrefix, pVmcbStateSave->u64LASTEXCPFROM); + pHlp->pfnPrintf(pHlp, "%sLASTXCPT_TO = %#RX64\n", pszPrefix, pVmcbStateSave->u64LASTEXCPTO); +} + + +/** + * Displays a virtual-VMCS. + * + * @param pHlp The info helper functions. + * @param pVmcs Pointer to a virtual VMCS. + * @param pszPrefix Caller specified string prefix. + */ +static void cpumR3InfoVmxVmcs(PCDBGFINFOHLP pHlp, PCVMXVVMCS pVmcs, const char *pszPrefix) +{ + AssertReturnVoid(pHlp); + AssertReturnVoid(pVmcs); + + /* The string width of -4 used in the macros below to cover 'LDTR', 'GDTR', 'IDTR. */ +#define CPUMVMX_DUMP_HOST_XDTR(a_pHlp, a_pVmcs, a_Seg, a_SegName, a_pszPrefix) \ + do { \ + (a_pHlp)->pfnPrintf((a_pHlp), " %s%-4s = {base=%016RX64}\n", \ + (a_pszPrefix), (a_SegName), (a_pVmcs)->u64Host##a_Seg##Base.u); \ + } while (0) + +#define CPUMVMX_DUMP_HOST_FS_GS_TR(a_pHlp, a_pVmcs, a_Seg, a_SegName, a_pszPrefix) \ + do { \ + (a_pHlp)->pfnPrintf((a_pHlp), " %s%-4s = {%04x base=%016RX64}\n", \ + (a_pszPrefix), (a_SegName), (a_pVmcs)->Host##a_Seg, (a_pVmcs)->u64Host##a_Seg##Base.u); \ + } while (0) + +#define CPUMVMX_DUMP_GUEST_SEGREG(a_pHlp, a_pVmcs, a_Seg, a_SegName, a_pszPrefix) \ + do { \ + (a_pHlp)->pfnPrintf((a_pHlp), " %s%-4s = {%04x base=%016RX64 limit=%08x flags=%04x}\n", \ + (a_pszPrefix), (a_SegName), (a_pVmcs)->Guest##a_Seg, (a_pVmcs)->u64Guest##a_Seg##Base.u, \ + (a_pVmcs)->u32Guest##a_Seg##Limit, (a_pVmcs)->u32Guest##a_Seg##Attr); \ + } while (0) + +#define CPUMVMX_DUMP_GUEST_XDTR(a_pHlp, a_pVmcs, a_Seg, a_SegName, a_pszPrefix) \ + do { \ + (a_pHlp)->pfnPrintf((a_pHlp), " %s%-4s = {base=%016RX64 limit=%08x}\n", \ + (a_pszPrefix), (a_SegName), (a_pVmcs)->u64Guest##a_Seg##Base.u, (a_pVmcs)->u32Guest##a_Seg##Limit); \ + } while (0) + + /* Header. */ + { + pHlp->pfnPrintf(pHlp, "%sHeader:\n", pszPrefix); + pHlp->pfnPrintf(pHlp, " %sVMCS revision id = %#RX32\n", pszPrefix, pVmcs->u32VmcsRevId); + pHlp->pfnPrintf(pHlp, " %sVMX-abort id = %#RX32 (%s)\n", pszPrefix, pVmcs->enmVmxAbort, HMGetVmxAbortDesc(pVmcs->enmVmxAbort)); + pHlp->pfnPrintf(pHlp, " %sVMCS state = %#x (%s)\n", pszPrefix, pVmcs->fVmcsState, HMGetVmxVmcsStateDesc(pVmcs->fVmcsState)); + } + + /* Control fields. */ + { + /* 16-bit. */ + pHlp->pfnPrintf(pHlp, "%sControl:\n", pszPrefix); + pHlp->pfnPrintf(pHlp, " %sVPID = %#RX16\n", pszPrefix, pVmcs->u16Vpid); + pHlp->pfnPrintf(pHlp, " %sPosted intr notify vector = %#RX16\n", pszPrefix, pVmcs->u16PostIntNotifyVector); + pHlp->pfnPrintf(pHlp, " %sEPTP index = %#RX16\n", pszPrefix, pVmcs->u16EptpIndex); + + /* 32-bit. */ + pHlp->pfnPrintf(pHlp, " %sPinCtls = %#RX32\n", pszPrefix, pVmcs->u32PinCtls); + pHlp->pfnPrintf(pHlp, " %sProcCtls = %#RX32\n", pszPrefix, pVmcs->u32ProcCtls); + pHlp->pfnPrintf(pHlp, " %sProcCtls2 = %#RX32\n", pszPrefix, pVmcs->u32ProcCtls2); + pHlp->pfnPrintf(pHlp, " %sExitCtls = %#RX32\n", pszPrefix, pVmcs->u32ExitCtls); + pHlp->pfnPrintf(pHlp, " %sEntryCtls = %#RX32\n", pszPrefix, pVmcs->u32EntryCtls); + pHlp->pfnPrintf(pHlp, " %sException bitmap = %#RX32\n", pszPrefix, pVmcs->u32XcptBitmap); + pHlp->pfnPrintf(pHlp, " %sPage-fault mask = %#RX32\n", pszPrefix, pVmcs->u32XcptPFMask); + pHlp->pfnPrintf(pHlp, " %sPage-fault match = %#RX32\n", pszPrefix, pVmcs->u32XcptPFMatch); + pHlp->pfnPrintf(pHlp, " %sCR3-target count = %RU32\n", pszPrefix, pVmcs->u32Cr3TargetCount); + pHlp->pfnPrintf(pHlp, " %sVM-exit MSR store count = %RU32\n", pszPrefix, pVmcs->u32ExitMsrStoreCount); + pHlp->pfnPrintf(pHlp, " %sVM-exit MSR load count = %RU32\n", pszPrefix, pVmcs->u32ExitMsrLoadCount); + pHlp->pfnPrintf(pHlp, " %sVM-entry MSR load count = %RU32\n", pszPrefix, pVmcs->u32EntryMsrLoadCount); + pHlp->pfnPrintf(pHlp, " %sVM-entry interruption info = %#RX32\n", pszPrefix, pVmcs->u32EntryIntInfo); + { + uint32_t const fInfo = pVmcs->u32EntryIntInfo; + uint8_t const uType = VMX_ENTRY_INT_INFO_TYPE(fInfo); + pHlp->pfnPrintf(pHlp, " %sValid = %RTbool\n", pszPrefix, VMX_ENTRY_INT_INFO_IS_VALID(fInfo)); + pHlp->pfnPrintf(pHlp, " %sType = %#x (%s)\n", pszPrefix, uType, HMGetVmxEntryIntInfoTypeDesc(uType)); + pHlp->pfnPrintf(pHlp, " %sVector = %#x\n", pszPrefix, VMX_ENTRY_INT_INFO_VECTOR(fInfo)); + pHlp->pfnPrintf(pHlp, " %sNMI-unblocking-IRET = %RTbool\n", pszPrefix, VMX_ENTRY_INT_INFO_IS_NMI_UNBLOCK_IRET(fInfo)); + pHlp->pfnPrintf(pHlp, " %sError-code valid = %RTbool\n", pszPrefix, VMX_ENTRY_INT_INFO_IS_ERROR_CODE_VALID(fInfo)); + } + pHlp->pfnPrintf(pHlp, " %sVM-entry xcpt error-code = %#RX32\n", pszPrefix, pVmcs->u32EntryXcptErrCode); + pHlp->pfnPrintf(pHlp, " %sVM-entry instruction len = %u bytes\n", pszPrefix, pVmcs->u32EntryInstrLen); + pHlp->pfnPrintf(pHlp, " %sTPR threshold = %#RX32\n", pszPrefix, pVmcs->u32TprThreshold); + pHlp->pfnPrintf(pHlp, " %sPLE gap = %#RX32\n", pszPrefix, pVmcs->u32PleGap); + pHlp->pfnPrintf(pHlp, " %sPLE window = %#RX32\n", pszPrefix, pVmcs->u32PleWindow); + + /* 64-bit. */ + pHlp->pfnPrintf(pHlp, " %sIO-bitmap A addr = %#RX64\n", pszPrefix, pVmcs->u64AddrIoBitmapA.u); + pHlp->pfnPrintf(pHlp, " %sIO-bitmap B addr = %#RX64\n", pszPrefix, pVmcs->u64AddrIoBitmapB.u); + pHlp->pfnPrintf(pHlp, " %sMSR-bitmap addr = %#RX64\n", pszPrefix, pVmcs->u64AddrMsrBitmap.u); + pHlp->pfnPrintf(pHlp, " %sVM-exit MSR store addr = %#RX64\n", pszPrefix, pVmcs->u64AddrExitMsrStore.u); + pHlp->pfnPrintf(pHlp, " %sVM-exit MSR load addr = %#RX64\n", pszPrefix, pVmcs->u64AddrExitMsrLoad.u); + pHlp->pfnPrintf(pHlp, " %sVM-entry MSR load addr = %#RX64\n", pszPrefix, pVmcs->u64AddrEntryMsrLoad.u); + pHlp->pfnPrintf(pHlp, " %sExecutive VMCS ptr = %#RX64\n", pszPrefix, pVmcs->u64ExecVmcsPtr.u); + pHlp->pfnPrintf(pHlp, " %sPML addr = %#RX64\n", pszPrefix, pVmcs->u64AddrPml.u); + pHlp->pfnPrintf(pHlp, " %sTSC offset = %#RX64\n", pszPrefix, pVmcs->u64TscOffset.u); + pHlp->pfnPrintf(pHlp, " %sVirtual-APIC addr = %#RX64\n", pszPrefix, pVmcs->u64AddrVirtApic.u); + pHlp->pfnPrintf(pHlp, " %sAPIC-access addr = %#RX64\n", pszPrefix, pVmcs->u64AddrApicAccess.u); + pHlp->pfnPrintf(pHlp, " %sPosted-intr desc addr = %#RX64\n", pszPrefix, pVmcs->u64AddrPostedIntDesc.u); + pHlp->pfnPrintf(pHlp, " %sVM-functions control = %#RX64\n", pszPrefix, pVmcs->u64VmFuncCtls.u); + pHlp->pfnPrintf(pHlp, " %sEPTP ptr = %#RX64\n", pszPrefix, pVmcs->u64EptpPtr.u); + pHlp->pfnPrintf(pHlp, " %sEOI-exit bitmap 0 addr = %#RX64\n", pszPrefix, pVmcs->u64EoiExitBitmap0.u); + pHlp->pfnPrintf(pHlp, " %sEOI-exit bitmap 1 addr = %#RX64\n", pszPrefix, pVmcs->u64EoiExitBitmap1.u); + pHlp->pfnPrintf(pHlp, " %sEOI-exit bitmap 2 addr = %#RX64\n", pszPrefix, pVmcs->u64EoiExitBitmap2.u); + pHlp->pfnPrintf(pHlp, " %sEOI-exit bitmap 3 addr = %#RX64\n", pszPrefix, pVmcs->u64EoiExitBitmap3.u); + pHlp->pfnPrintf(pHlp, " %sEPTP-list addr = %#RX64\n", pszPrefix, pVmcs->u64AddrEptpList.u); + pHlp->pfnPrintf(pHlp, " %sVMREAD-bitmap addr = %#RX64\n", pszPrefix, pVmcs->u64AddrVmreadBitmap.u); + pHlp->pfnPrintf(pHlp, " %sVMWRITE-bitmap addr = %#RX64\n", pszPrefix, pVmcs->u64AddrVmwriteBitmap.u); + pHlp->pfnPrintf(pHlp, " %sVirt-Xcpt info addr = %#RX64\n", pszPrefix, pVmcs->u64AddrXcptVeInfo.u); + pHlp->pfnPrintf(pHlp, " %sXSS-bitmap = %#RX64\n", pszPrefix, pVmcs->u64XssBitmap.u); + pHlp->pfnPrintf(pHlp, " %sENCLS-exiting bitmap addr = %#RX64\n", pszPrefix, pVmcs->u64AddrEnclsBitmap.u); + pHlp->pfnPrintf(pHlp, " %sTSC multiplier = %#RX64\n", pszPrefix, pVmcs->u64TscMultiplier.u); + + /* Natural width. */ + pHlp->pfnPrintf(pHlp, " %sCR0 guest/host mask = %#RX64\n", pszPrefix, pVmcs->u64Cr0Mask.u); + pHlp->pfnPrintf(pHlp, " %sCR4 guest/host mask = %#RX64\n", pszPrefix, pVmcs->u64Cr4Mask.u); + pHlp->pfnPrintf(pHlp, " %sCR0 read shadow = %#RX64\n", pszPrefix, pVmcs->u64Cr0ReadShadow.u); + pHlp->pfnPrintf(pHlp, " %sCR4 read shadow = %#RX64\n", pszPrefix, pVmcs->u64Cr4ReadShadow.u); + pHlp->pfnPrintf(pHlp, " %sCR3-target 0 = %#RX64\n", pszPrefix, pVmcs->u64Cr3Target0.u); + pHlp->pfnPrintf(pHlp, " %sCR3-target 1 = %#RX64\n", pszPrefix, pVmcs->u64Cr3Target1.u); + pHlp->pfnPrintf(pHlp, " %sCR3-target 2 = %#RX64\n", pszPrefix, pVmcs->u64Cr3Target2.u); + pHlp->pfnPrintf(pHlp, " %sCR3-target 3 = %#RX64\n", pszPrefix, pVmcs->u64Cr3Target3.u); + } + + /* Guest state. */ + { + char szEFlags[80]; + cpumR3InfoFormatFlags(&szEFlags[0], pVmcs->u64GuestRFlags.u); + pHlp->pfnPrintf(pHlp, "%sGuest state:\n", pszPrefix); + + /* 16-bit. */ + CPUMVMX_DUMP_GUEST_SEGREG(pHlp, pVmcs, Cs, "cs", pszPrefix); + CPUMVMX_DUMP_GUEST_SEGREG(pHlp, pVmcs, Ss, "ss", pszPrefix); + CPUMVMX_DUMP_GUEST_SEGREG(pHlp, pVmcs, Es, "es", pszPrefix); + CPUMVMX_DUMP_GUEST_SEGREG(pHlp, pVmcs, Ds, "ds", pszPrefix); + CPUMVMX_DUMP_GUEST_SEGREG(pHlp, pVmcs, Fs, "fs", pszPrefix); + CPUMVMX_DUMP_GUEST_SEGREG(pHlp, pVmcs, Gs, "gs", pszPrefix); + CPUMVMX_DUMP_GUEST_SEGREG(pHlp, pVmcs, Ldtr, "ldtr", pszPrefix); + CPUMVMX_DUMP_GUEST_SEGREG(pHlp, pVmcs, Tr, "tr", pszPrefix); + CPUMVMX_DUMP_GUEST_XDTR(pHlp, pVmcs, Gdtr, "gdtr", pszPrefix); + CPUMVMX_DUMP_GUEST_XDTR(pHlp, pVmcs, Idtr, "idtr", pszPrefix); + pHlp->pfnPrintf(pHlp, " %sInterrupt status = %#RX16\n", pszPrefix, pVmcs->u16GuestIntStatus); + pHlp->pfnPrintf(pHlp, " %sPML index = %#RX16\n", pszPrefix, pVmcs->u16PmlIndex); + + /* 32-bit. */ + pHlp->pfnPrintf(pHlp, " %sInterruptibility state = %#RX32\n", pszPrefix, pVmcs->u32GuestIntrState); + pHlp->pfnPrintf(pHlp, " %sActivity state = %#RX32\n", pszPrefix, pVmcs->u32GuestActivityState); + pHlp->pfnPrintf(pHlp, " %sSMBASE = %#RX32\n", pszPrefix, pVmcs->u32GuestSmBase); + pHlp->pfnPrintf(pHlp, " %sSysEnter CS = %#RX32\n", pszPrefix, pVmcs->u32GuestSysenterCS); + pHlp->pfnPrintf(pHlp, " %sVMX-preemption timer value = %#RX32\n", pszPrefix, pVmcs->u32PreemptTimer); + + /* 64-bit. */ + pHlp->pfnPrintf(pHlp, " %sVMCS link ptr = %#RX64\n", pszPrefix, pVmcs->u64VmcsLinkPtr.u); + pHlp->pfnPrintf(pHlp, " %sDBGCTL = %#RX64\n", pszPrefix, pVmcs->u64GuestDebugCtlMsr.u); + pHlp->pfnPrintf(pHlp, " %sPAT = %#RX64\n", pszPrefix, pVmcs->u64GuestPatMsr.u); + pHlp->pfnPrintf(pHlp, " %sEFER = %#RX64\n", pszPrefix, pVmcs->u64GuestEferMsr.u); + pHlp->pfnPrintf(pHlp, " %sPERFGLOBALCTRL = %#RX64\n", pszPrefix, pVmcs->u64GuestPerfGlobalCtlMsr.u); + pHlp->pfnPrintf(pHlp, " %sPDPTE 0 = %#RX64\n", pszPrefix, pVmcs->u64GuestPdpte0.u); + pHlp->pfnPrintf(pHlp, " %sPDPTE 1 = %#RX64\n", pszPrefix, pVmcs->u64GuestPdpte1.u); + pHlp->pfnPrintf(pHlp, " %sPDPTE 2 = %#RX64\n", pszPrefix, pVmcs->u64GuestPdpte2.u); + pHlp->pfnPrintf(pHlp, " %sPDPTE 3 = %#RX64\n", pszPrefix, pVmcs->u64GuestPdpte3.u); + pHlp->pfnPrintf(pHlp, " %sBNDCFGS = %#RX64\n", pszPrefix, pVmcs->u64GuestBndcfgsMsr.u); + + /* Natural width. */ + pHlp->pfnPrintf(pHlp, " %scr0 = %#RX64\n", pszPrefix, pVmcs->u64GuestCr0.u); + pHlp->pfnPrintf(pHlp, " %scr3 = %#RX64\n", pszPrefix, pVmcs->u64GuestCr3.u); + pHlp->pfnPrintf(pHlp, " %scr4 = %#RX64\n", pszPrefix, pVmcs->u64GuestCr4.u); + pHlp->pfnPrintf(pHlp, " %sdr7 = %#RX64\n", pszPrefix, pVmcs->u64GuestDr7.u); + pHlp->pfnPrintf(pHlp, " %srsp = %#RX64\n", pszPrefix, pVmcs->u64GuestRsp.u); + pHlp->pfnPrintf(pHlp, " %srip = %#RX64\n", pszPrefix, pVmcs->u64GuestRip.u); + pHlp->pfnPrintf(pHlp, " %srflags = %#RX64 %31s\n",pszPrefix, pVmcs->u64GuestRFlags.u, szEFlags); + pHlp->pfnPrintf(pHlp, " %sPending debug xcpts = %#RX64\n", pszPrefix, pVmcs->u64GuestPendingDbgXcpt.u); + pHlp->pfnPrintf(pHlp, " %sSysEnter ESP = %#RX64\n", pszPrefix, pVmcs->u64GuestSysenterEsp.u); + pHlp->pfnPrintf(pHlp, " %sSysEnter EIP = %#RX64\n", pszPrefix, pVmcs->u64GuestSysenterEip.u); + } + + /* Host state. */ + { + pHlp->pfnPrintf(pHlp, "%sHost state:\n", pszPrefix); + + /* 16-bit. */ + pHlp->pfnPrintf(pHlp, " %scs = %#RX16\n", pszPrefix, pVmcs->HostCs); + pHlp->pfnPrintf(pHlp, " %sss = %#RX16\n", pszPrefix, pVmcs->HostSs); + pHlp->pfnPrintf(pHlp, " %sds = %#RX16\n", pszPrefix, pVmcs->HostDs); + pHlp->pfnPrintf(pHlp, " %ses = %#RX16\n", pszPrefix, pVmcs->HostEs); + CPUMVMX_DUMP_HOST_FS_GS_TR(pHlp, pVmcs, Fs, "fs", pszPrefix); + CPUMVMX_DUMP_HOST_FS_GS_TR(pHlp, pVmcs, Gs, "gs", pszPrefix); + CPUMVMX_DUMP_HOST_FS_GS_TR(pHlp, pVmcs, Tr, "tr", pszPrefix); + CPUMVMX_DUMP_HOST_XDTR(pHlp, pVmcs, Gdtr, "gdtr", pszPrefix); + CPUMVMX_DUMP_HOST_XDTR(pHlp, pVmcs, Idtr, "idtr", pszPrefix); + + /* 32-bit. */ + pHlp->pfnPrintf(pHlp, " %sSysEnter CS = %#RX32\n", pszPrefix, pVmcs->u32HostSysenterCs); + + /* 64-bit. */ + pHlp->pfnPrintf(pHlp, " %sEFER = %#RX64\n", pszPrefix, pVmcs->u64HostEferMsr.u); + pHlp->pfnPrintf(pHlp, " %sPAT = %#RX64\n", pszPrefix, pVmcs->u64HostPatMsr.u); + pHlp->pfnPrintf(pHlp, " %sPERFGLOBALCTRL = %#RX64\n", pszPrefix, pVmcs->u64HostPerfGlobalCtlMsr.u); + + /* Natural width. */ + pHlp->pfnPrintf(pHlp, " %scr0 = %#RX64\n", pszPrefix, pVmcs->u64HostCr0.u); + pHlp->pfnPrintf(pHlp, " %scr3 = %#RX64\n", pszPrefix, pVmcs->u64HostCr3.u); + pHlp->pfnPrintf(pHlp, " %scr4 = %#RX64\n", pszPrefix, pVmcs->u64HostCr4.u); + pHlp->pfnPrintf(pHlp, " %sSysEnter ESP = %#RX64\n", pszPrefix, pVmcs->u64HostSysenterEsp.u); + pHlp->pfnPrintf(pHlp, " %sSysEnter EIP = %#RX64\n", pszPrefix, pVmcs->u64HostSysenterEip.u); + pHlp->pfnPrintf(pHlp, " %srsp = %#RX64\n", pszPrefix, pVmcs->u64HostRsp.u); + pHlp->pfnPrintf(pHlp, " %srip = %#RX64\n", pszPrefix, pVmcs->u64HostRip.u); + } + + /* Read-only fields. */ + { + pHlp->pfnPrintf(pHlp, "%sRead-only data fields:\n", pszPrefix); + + /* 16-bit (none currently). */ + + /* 32-bit. */ + pHlp->pfnPrintf(pHlp, " %sExit reason = %u (%s)\n", pszPrefix, pVmcs->u32RoExitReason, HMGetVmxExitName(pVmcs->u32RoExitReason)); + pHlp->pfnPrintf(pHlp, " %sExit qualification = %#RX64\n", pszPrefix, pVmcs->u64RoExitQual.u); + pHlp->pfnPrintf(pHlp, " %sVM-instruction error = %#RX32\n", pszPrefix, pVmcs->u32RoVmInstrError); + pHlp->pfnPrintf(pHlp, " %sVM-exit intr info = %#RX32\n", pszPrefix, pVmcs->u32RoExitIntInfo); + { + uint32_t const fInfo = pVmcs->u32RoExitIntInfo; + uint8_t const uType = VMX_EXIT_INT_INFO_TYPE(fInfo); + pHlp->pfnPrintf(pHlp, " %sValid = %RTbool\n", pszPrefix, VMX_EXIT_INT_INFO_IS_VALID(fInfo)); + pHlp->pfnPrintf(pHlp, " %sType = %#x (%s)\n", pszPrefix, uType, HMGetVmxExitIntInfoTypeDesc(uType)); + pHlp->pfnPrintf(pHlp, " %sVector = %#x\n", pszPrefix, VMX_EXIT_INT_INFO_VECTOR(fInfo)); + pHlp->pfnPrintf(pHlp, " %sNMI-unblocking-IRET = %RTbool\n", pszPrefix, VMX_EXIT_INT_INFO_IS_NMI_UNBLOCK_IRET(fInfo)); + pHlp->pfnPrintf(pHlp, " %sError-code valid = %RTbool\n", pszPrefix, VMX_EXIT_INT_INFO_IS_ERROR_CODE_VALID(fInfo)); + } + pHlp->pfnPrintf(pHlp, " %sVM-exit intr error-code = %#RX32\n", pszPrefix, pVmcs->u32RoExitIntErrCode); + pHlp->pfnPrintf(pHlp, " %sIDT-vectoring info = %#RX32\n", pszPrefix, pVmcs->u32RoIdtVectoringInfo); + { + uint32_t const fInfo = pVmcs->u32RoIdtVectoringInfo; + uint8_t const uType = VMX_IDT_VECTORING_INFO_TYPE(fInfo); + pHlp->pfnPrintf(pHlp, " %sValid = %RTbool\n", pszPrefix, VMX_IDT_VECTORING_INFO_IS_VALID(fInfo)); + pHlp->pfnPrintf(pHlp, " %sType = %#x (%s)\n", pszPrefix, uType, HMGetVmxIdtVectoringInfoTypeDesc(uType)); + pHlp->pfnPrintf(pHlp, " %sVector = %#x\n", pszPrefix, VMX_IDT_VECTORING_INFO_VECTOR(fInfo)); + pHlp->pfnPrintf(pHlp, " %sError-code valid = %RTbool\n", pszPrefix, VMX_IDT_VECTORING_INFO_IS_ERROR_CODE_VALID(fInfo)); + } + pHlp->pfnPrintf(pHlp, " %sIDT-vectoring error-code = %#RX32\n", pszPrefix, pVmcs->u32RoIdtVectoringErrCode); + pHlp->pfnPrintf(pHlp, " %sVM-exit instruction length = %u bytes\n", pszPrefix, pVmcs->u32RoExitInstrLen); + pHlp->pfnPrintf(pHlp, " %sVM-exit instruction info = %#RX64\n", pszPrefix, pVmcs->u32RoExitInstrInfo); + + /* 64-bit. */ + pHlp->pfnPrintf(pHlp, " %sGuest-physical addr = %#RX64\n", pszPrefix, pVmcs->u64RoGuestPhysAddr.u); + + /* Natural width. */ + pHlp->pfnPrintf(pHlp, " %sI/O RCX = %#RX64\n", pszPrefix, pVmcs->u64RoIoRcx.u); + pHlp->pfnPrintf(pHlp, " %sI/O RSI = %#RX64\n", pszPrefix, pVmcs->u64RoIoRsi.u); + pHlp->pfnPrintf(pHlp, " %sI/O RDI = %#RX64\n", pszPrefix, pVmcs->u64RoIoRdi.u); + pHlp->pfnPrintf(pHlp, " %sI/O RIP = %#RX64\n", pszPrefix, pVmcs->u64RoIoRip.u); + pHlp->pfnPrintf(pHlp, " %sGuest-linear addr = %#RX64\n", pszPrefix, pVmcs->u64RoGuestLinearAddr.u); + } + +#undef CPUMVMX_DUMP_HOST_XDTR +#undef CPUMVMX_DUMP_HOST_FS_GS_TR +#undef CPUMVMX_DUMP_GUEST_SEGREG +#undef CPUMVMX_DUMP_GUEST_XDTR +} + + +/** + * Display the guest's hardware-virtualization cpu state. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helper functions. + * @param pszArgs Arguments, ignored. + */ +static DECLCALLBACK(void) cpumR3InfoGuestHwvirt(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + RT_NOREF(pszArgs); + + PVMCPU pVCpu = VMMGetCpu(pVM); + if (!pVCpu) + pVCpu = &pVM->aCpus[0]; + + /* + * Figure out what to dump. + * + * In the future we may need to dump everything whether or not we're actively in nested-guest mode + * or not, hence the reason why we use a mask to determine what needs dumping. Currently, we only + * dump hwvirt. state when the guest CPU is executing a nested-guest. + */ + /** @todo perhaps make this configurable through pszArgs, depending on how much + * noise we wish to accept when nested hwvirt. isn't used. */ +#define CPUMHWVIRTDUMP_NONE (0) +#define CPUMHWVIRTDUMP_SVM RT_BIT(0) +#define CPUMHWVIRTDUMP_VMX RT_BIT(1) +#define CPUMHWVIRTDUMP_COMMON RT_BIT(2) +#define CPUMHWVIRTDUMP_LAST CPUMHWVIRTDUMP_VMX + + PCPUMCTX pCtx = &pVCpu->cpum.s.Guest; + static const char *const s_aHwvirtModes[] = { "No/inactive", "SVM", "VMX", "Common" }; + bool const fSvm = pVM->cpum.s.GuestFeatures.fSvm; + bool const fVmx = pVM->cpum.s.GuestFeatures.fVmx; + uint8_t const idxHwvirtState = fSvm ? CPUMHWVIRTDUMP_SVM : (fVmx ? CPUMHWVIRTDUMP_VMX : CPUMHWVIRTDUMP_NONE); + AssertCompile(CPUMHWVIRTDUMP_LAST <= RT_ELEMENTS(s_aHwvirtModes)); + Assert(idxHwvirtState < RT_ELEMENTS(s_aHwvirtModes)); + const char *pcszHwvirtMode = s_aHwvirtModes[idxHwvirtState]; + uint32_t fDumpState = idxHwvirtState | CPUMHWVIRTDUMP_COMMON; + + /* + * Dump it. + */ + pHlp->pfnPrintf(pHlp, "VCPU[%u] hardware virtualization state:\n", pVCpu->idCpu); + + if (fDumpState & CPUMHWVIRTDUMP_COMMON) + pHlp->pfnPrintf(pHlp, "fLocalForcedActions = %#RX32\n", pCtx->hwvirt.fLocalForcedActions); + + pHlp->pfnPrintf(pHlp, "%s hwvirt state%s\n", pcszHwvirtMode, (fDumpState & (CPUMHWVIRTDUMP_SVM | CPUMHWVIRTDUMP_VMX)) ? + ":" : ""); + if (fDumpState & CPUMHWVIRTDUMP_SVM) + { + pHlp->pfnPrintf(pHlp, " fGif = %RTbool\n", pCtx->hwvirt.fGif); + + char szEFlags[80]; + cpumR3InfoFormatFlags(&szEFlags[0], pCtx->hwvirt.svm.HostState.rflags.u); + pHlp->pfnPrintf(pHlp, " uMsrHSavePa = %#RX64\n", pCtx->hwvirt.svm.uMsrHSavePa); + pHlp->pfnPrintf(pHlp, " GCPhysVmcb = %#RGp\n", pCtx->hwvirt.svm.GCPhysVmcb); + pHlp->pfnPrintf(pHlp, " VmcbCtrl:\n"); + cpumR3InfoSvmVmcbCtrl(pHlp, &pCtx->hwvirt.svm.pVmcbR3->ctrl, " " /* pszPrefix */); + pHlp->pfnPrintf(pHlp, " VmcbStateSave:\n"); + cpumR3InfoSvmVmcbStateSave(pHlp, &pCtx->hwvirt.svm.pVmcbR3->guest, " " /* pszPrefix */); + pHlp->pfnPrintf(pHlp, " HostState:\n"); + pHlp->pfnPrintf(pHlp, " uEferMsr = %#RX64\n", pCtx->hwvirt.svm.HostState.uEferMsr); + pHlp->pfnPrintf(pHlp, " uCr0 = %#RX64\n", pCtx->hwvirt.svm.HostState.uCr0); + pHlp->pfnPrintf(pHlp, " uCr4 = %#RX64\n", pCtx->hwvirt.svm.HostState.uCr4); + pHlp->pfnPrintf(pHlp, " uCr3 = %#RX64\n", pCtx->hwvirt.svm.HostState.uCr3); + pHlp->pfnPrintf(pHlp, " uRip = %#RX64\n", pCtx->hwvirt.svm.HostState.uRip); + pHlp->pfnPrintf(pHlp, " uRsp = %#RX64\n", pCtx->hwvirt.svm.HostState.uRsp); + pHlp->pfnPrintf(pHlp, " uRax = %#RX64\n", pCtx->hwvirt.svm.HostState.uRax); + pHlp->pfnPrintf(pHlp, " rflags = %#RX64 %31s\n", pCtx->hwvirt.svm.HostState.rflags.u64, szEFlags); + PCPUMSELREG pSel = &pCtx->hwvirt.svm.HostState.es; + pHlp->pfnPrintf(pHlp, " es = {%04x base=%016RX64 limit=%08x flags=%08x}\n", + pSel->Sel, pSel->u64Base, pSel->u32Limit, pSel->Attr.u); + pSel = &pCtx->hwvirt.svm.HostState.cs; + pHlp->pfnPrintf(pHlp, " cs = {%04x base=%016RX64 limit=%08x flags=%08x}\n", + pSel->Sel, pSel->u64Base, pSel->u32Limit, pSel->Attr.u); + pSel = &pCtx->hwvirt.svm.HostState.ss; + pHlp->pfnPrintf(pHlp, " ss = {%04x base=%016RX64 limit=%08x flags=%08x}\n", + pSel->Sel, pSel->u64Base, pSel->u32Limit, pSel->Attr.u); + pSel = &pCtx->hwvirt.svm.HostState.ds; + pHlp->pfnPrintf(pHlp, " ds = {%04x base=%016RX64 limit=%08x flags=%08x}\n", + pSel->Sel, pSel->u64Base, pSel->u32Limit, pSel->Attr.u); + pHlp->pfnPrintf(pHlp, " gdtr = %016RX64:%04x\n", pCtx->hwvirt.svm.HostState.gdtr.pGdt, + pCtx->hwvirt.svm.HostState.gdtr.cbGdt); + pHlp->pfnPrintf(pHlp, " idtr = %016RX64:%04x\n", pCtx->hwvirt.svm.HostState.idtr.pIdt, + pCtx->hwvirt.svm.HostState.idtr.cbIdt); + pHlp->pfnPrintf(pHlp, " cPauseFilter = %RU16\n", pCtx->hwvirt.svm.cPauseFilter); + pHlp->pfnPrintf(pHlp, " cPauseFilterThreshold = %RU32\n", pCtx->hwvirt.svm.cPauseFilterThreshold); + pHlp->pfnPrintf(pHlp, " fInterceptEvents = %u\n", pCtx->hwvirt.svm.fInterceptEvents); + pHlp->pfnPrintf(pHlp, " pvMsrBitmapR3 = %p\n", pCtx->hwvirt.svm.pvMsrBitmapR3); + pHlp->pfnPrintf(pHlp, " pvMsrBitmapR0 = %RKv\n", pCtx->hwvirt.svm.pvMsrBitmapR0); + pHlp->pfnPrintf(pHlp, " pvIoBitmapR3 = %p\n", pCtx->hwvirt.svm.pvIoBitmapR3); + pHlp->pfnPrintf(pHlp, " pvIoBitmapR0 = %RKv\n", pCtx->hwvirt.svm.pvIoBitmapR0); + } + + if (fDumpState & CPUMHWVIRTDUMP_VMX) + { + pHlp->pfnPrintf(pHlp, " GCPhysVmxon = %#RGp\n", pCtx->hwvirt.vmx.GCPhysVmxon); + pHlp->pfnPrintf(pHlp, " GCPhysVmcs = %#RGp\n", pCtx->hwvirt.vmx.GCPhysVmcs); + pHlp->pfnPrintf(pHlp, " GCPhysShadowVmcs = %#RGp\n", pCtx->hwvirt.vmx.GCPhysShadowVmcs); + pHlp->pfnPrintf(pHlp, " enmDiag = %u (%s)\n", pCtx->hwvirt.vmx.enmDiag, HMGetVmxDiagDesc(pCtx->hwvirt.vmx.enmDiag)); + pHlp->pfnPrintf(pHlp, " enmAbort = %u (%s)\n", pCtx->hwvirt.vmx.enmAbort, HMGetVmxAbortDesc(pCtx->hwvirt.vmx.enmAbort)); + pHlp->pfnPrintf(pHlp, " uAbortAux = %u (%#x)\n", pCtx->hwvirt.vmx.uAbortAux, pCtx->hwvirt.vmx.uAbortAux); + pHlp->pfnPrintf(pHlp, " fInVmxRootMode = %RTbool\n", pCtx->hwvirt.vmx.fInVmxRootMode); + pHlp->pfnPrintf(pHlp, " fInVmxNonRootMode = %RTbool\n", pCtx->hwvirt.vmx.fInVmxNonRootMode); + pHlp->pfnPrintf(pHlp, " fInterceptEvents = %RTbool\n", pCtx->hwvirt.vmx.fInterceptEvents); + pHlp->pfnPrintf(pHlp, " fNmiUnblockingIret = %RTbool\n", pCtx->hwvirt.vmx.fNmiUnblockingIret); + pHlp->pfnPrintf(pHlp, " uFirstPauseLoopTick = %RX64\n", pCtx->hwvirt.vmx.uFirstPauseLoopTick); + pHlp->pfnPrintf(pHlp, " uPrevPauseTick = %RX64\n", pCtx->hwvirt.vmx.uPrevPauseTick); + pHlp->pfnPrintf(pHlp, " uVmentryTick = %RX64\n", pCtx->hwvirt.vmx.uVmentryTick); + pHlp->pfnPrintf(pHlp, " offVirtApicWrite = %#RX16\n", pCtx->hwvirt.vmx.offVirtApicWrite); + pHlp->pfnPrintf(pHlp, " VMCS cache:\n"); + cpumR3InfoVmxVmcs(pHlp, pCtx->hwvirt.vmx.pVmcsR3, " " /* pszPrefix */); + } + +#undef CPUMHWVIRTDUMP_NONE +#undef CPUMHWVIRTDUMP_COMMON +#undef CPUMHWVIRTDUMP_SVM +#undef CPUMHWVIRTDUMP_VMX +#undef CPUMHWVIRTDUMP_LAST +#undef CPUMHWVIRTDUMP_ALL +} + +/** + * Display the current guest instruction + * + * @param pVM The cross context VM structure. + * @param pHlp The info helper functions. + * @param pszArgs Arguments, ignored. + */ +static DECLCALLBACK(void) cpumR3InfoGuestInstr(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + NOREF(pszArgs); + + PVMCPU pVCpu = VMMGetCpu(pVM); + if (!pVCpu) + pVCpu = &pVM->aCpus[0]; + + char szInstruction[256]; + szInstruction[0] = '\0'; + DBGFR3DisasInstrCurrent(pVCpu, szInstruction, sizeof(szInstruction)); + pHlp->pfnPrintf(pHlp, "\nCPUM%u: %s\n\n", pVCpu->idCpu, szInstruction); +} + + +/** + * Display the hypervisor cpu state. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helper functions. + * @param pszArgs Arguments, ignored. + */ +static DECLCALLBACK(void) cpumR3InfoHyper(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + PVMCPU pVCpu = VMMGetCpu(pVM); + if (!pVCpu) + pVCpu = &pVM->aCpus[0]; + + CPUMDUMPTYPE enmType; + const char *pszComment; + cpumR3InfoParseArg(pszArgs, &enmType, &pszComment); + pHlp->pfnPrintf(pHlp, "Hypervisor CPUM state: %s\n", pszComment); + cpumR3InfoOne(pVM, &pVCpu->cpum.s.Hyper, CPUMCTX2CORE(&pVCpu->cpum.s.Hyper), pHlp, enmType, "."); + pHlp->pfnPrintf(pHlp, "CR4OrMask=%#x CR4AndMask=%#x\n", pVM->cpum.s.CR4.OrMask, pVM->cpum.s.CR4.AndMask); +} + + +/** + * Display the host cpu state. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helper functions. + * @param pszArgs Arguments, ignored. + */ +static DECLCALLBACK(void) cpumR3InfoHost(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + CPUMDUMPTYPE enmType; + const char *pszComment; + cpumR3InfoParseArg(pszArgs, &enmType, &pszComment); + pHlp->pfnPrintf(pHlp, "Host CPUM state: %s\n", pszComment); + + PVMCPU pVCpu = VMMGetCpu(pVM); + if (!pVCpu) + pVCpu = &pVM->aCpus[0]; + PCPUMHOSTCTX pCtx = &pVCpu->cpum.s.Host; + + /* + * Format the EFLAGS. + */ +#if HC_ARCH_BITS == 32 + uint32_t efl = pCtx->eflags.u32; +#else + uint64_t efl = pCtx->rflags; +#endif + char szEFlags[80]; + cpumR3InfoFormatFlags(&szEFlags[0], efl); + + /* + * Format the registers. + */ +#if HC_ARCH_BITS == 32 + pHlp->pfnPrintf(pHlp, + "eax=xxxxxxxx ebx=%08x ecx=xxxxxxxx edx=xxxxxxxx esi=%08x edi=%08x\n" + "eip=xxxxxxxx esp=%08x ebp=%08x iopl=%d %31s\n" + "cs=%04x ds=%04x es=%04x fs=%04x gs=%04x eflags=%08x\n" + "cr0=%08RX64 cr2=xxxxxxxx cr3=%08RX64 cr4=%08RX64 gdtr=%08x:%04x ldtr=%04x\n" + "dr[0]=%08RX64 dr[1]=%08RX64x dr[2]=%08RX64 dr[3]=%08RX64x dr[6]=%08RX64 dr[7]=%08RX64\n" + "SysEnter={cs=%04x eip=%08x esp=%08x}\n" + , + /*pCtx->eax,*/ pCtx->ebx, /*pCtx->ecx, pCtx->edx,*/ pCtx->esi, pCtx->edi, + /*pCtx->eip,*/ pCtx->esp, pCtx->ebp, X86_EFL_GET_IOPL(efl), szEFlags, + pCtx->cs, pCtx->ds, pCtx->es, pCtx->fs, pCtx->gs, efl, + pCtx->cr0, /*pCtx->cr2,*/ pCtx->cr3, pCtx->cr4, + pCtx->dr0, pCtx->dr1, pCtx->dr2, pCtx->dr3, pCtx->dr6, pCtx->dr7, + (uint32_t)pCtx->gdtr.uAddr, pCtx->gdtr.cb, pCtx->ldtr, + pCtx->SysEnter.cs, pCtx->SysEnter.eip, pCtx->SysEnter.esp); +#else + pHlp->pfnPrintf(pHlp, + "rax=xxxxxxxxxxxxxxxx rbx=%016RX64 rcx=xxxxxxxxxxxxxxxx\n" + "rdx=xxxxxxxxxxxxxxxx rsi=%016RX64 rdi=%016RX64\n" + "rip=xxxxxxxxxxxxxxxx rsp=%016RX64 rbp=%016RX64\n" + " r8=xxxxxxxxxxxxxxxx r9=xxxxxxxxxxxxxxxx r10=%016RX64\n" + "r11=%016RX64 r12=%016RX64 r13=%016RX64\n" + "r14=%016RX64 r15=%016RX64\n" + "iopl=%d %31s\n" + "cs=%04x ds=%04x es=%04x fs=%04x gs=%04x eflags=%08RX64\n" + "cr0=%016RX64 cr2=xxxxxxxxxxxxxxxx cr3=%016RX64\n" + "cr4=%016RX64 ldtr=%04x tr=%04x\n" + "dr[0]=%016RX64 dr[1]=%016RX64 dr[2]=%016RX64\n" + "dr[3]=%016RX64 dr[6]=%016RX64 dr[7]=%016RX64\n" + "gdtr=%016RX64:%04x idtr=%016RX64:%04x\n" + "SysEnter={cs=%04x eip=%08x esp=%08x}\n" + "FSbase=%016RX64 GSbase=%016RX64 efer=%08RX64\n" + , + /*pCtx->rax,*/ pCtx->rbx, /*pCtx->rcx, + pCtx->rdx,*/ pCtx->rsi, pCtx->rdi, + /*pCtx->rip,*/ pCtx->rsp, pCtx->rbp, + /*pCtx->r8, pCtx->r9,*/ pCtx->r10, + pCtx->r11, pCtx->r12, pCtx->r13, + pCtx->r14, pCtx->r15, + X86_EFL_GET_IOPL(efl), szEFlags, + pCtx->cs, pCtx->ds, pCtx->es, pCtx->fs, pCtx->gs, efl, + pCtx->cr0, /*pCtx->cr2,*/ pCtx->cr3, + pCtx->cr4, pCtx->ldtr, pCtx->tr, + pCtx->dr0, pCtx->dr1, pCtx->dr2, + pCtx->dr3, pCtx->dr6, pCtx->dr7, + pCtx->gdtr.uAddr, pCtx->gdtr.cb, pCtx->idtr.uAddr, pCtx->idtr.cb, + pCtx->SysEnter.cs, pCtx->SysEnter.eip, pCtx->SysEnter.esp, + pCtx->FSbase, pCtx->GSbase, pCtx->efer); +#endif +} + +/** + * Structure used when disassembling and instructions in DBGF. + * This is used so the reader function can get the stuff it needs. + */ +typedef struct CPUMDISASSTATE +{ + /** Pointer to the CPU structure. */ + PDISCPUSTATE pCpu; + /** Pointer to the VM. */ + PVM pVM; + /** Pointer to the VMCPU. */ + PVMCPU pVCpu; + /** Pointer to the first byte in the segment. */ + RTGCUINTPTR GCPtrSegBase; + /** Pointer to the byte after the end of the segment. (might have wrapped!) */ + RTGCUINTPTR GCPtrSegEnd; + /** The size of the segment minus 1. */ + RTGCUINTPTR cbSegLimit; + /** Pointer to the current page - R3 Ptr. */ + void const *pvPageR3; + /** Pointer to the current page - GC Ptr. */ + RTGCPTR pvPageGC; + /** The lock information that PGMPhysReleasePageMappingLock needs. */ + PGMPAGEMAPLOCK PageMapLock; + /** Whether the PageMapLock is valid or not. */ + bool fLocked; + /** 64 bits mode or not. */ + bool f64Bits; +} CPUMDISASSTATE, *PCPUMDISASSTATE; + + +/** + * @callback_method_impl{FNDISREADBYTES} + */ +static DECLCALLBACK(int) cpumR3DisasInstrRead(PDISCPUSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead) +{ + PCPUMDISASSTATE pState = (PCPUMDISASSTATE)pDis->pvUser; + for (;;) + { + RTGCUINTPTR GCPtr = pDis->uInstrAddr + offInstr + pState->GCPtrSegBase; + + /* + * Need to update the page translation? + */ + if ( !pState->pvPageR3 + || (GCPtr >> PAGE_SHIFT) != (pState->pvPageGC >> PAGE_SHIFT)) + { + int rc = VINF_SUCCESS; + + /* translate the address */ + pState->pvPageGC = GCPtr & PAGE_BASE_GC_MASK; + if ( VM_IS_RAW_MODE_ENABLED(pState->pVM) + && MMHyperIsInsideArea(pState->pVM, pState->pvPageGC)) + { + pState->pvPageR3 = MMHyperRCToR3(pState->pVM, (RTRCPTR)pState->pvPageGC); + if (!pState->pvPageR3) + rc = VERR_INVALID_POINTER; + } + else + { + /* Release mapping lock previously acquired. */ + if (pState->fLocked) + PGMPhysReleasePageMappingLock(pState->pVM, &pState->PageMapLock); + rc = PGMPhysGCPtr2CCPtrReadOnly(pState->pVCpu, pState->pvPageGC, &pState->pvPageR3, &pState->PageMapLock); + pState->fLocked = RT_SUCCESS_NP(rc); + } + if (RT_FAILURE(rc)) + { + pState->pvPageR3 = NULL; + return rc; + } + } + + /* + * Check the segment limit. + */ + if (!pState->f64Bits && pDis->uInstrAddr + offInstr > pState->cbSegLimit) + return VERR_OUT_OF_SELECTOR_BOUNDS; + + /* + * Calc how much we can read. + */ + uint32_t cb = PAGE_SIZE - (GCPtr & PAGE_OFFSET_MASK); + if (!pState->f64Bits) + { + RTGCUINTPTR cbSeg = pState->GCPtrSegEnd - GCPtr; + if (cb > cbSeg && cbSeg) + cb = cbSeg; + } + if (cb > cbMaxRead) + cb = cbMaxRead; + + /* + * Read and advance or exit. + */ + memcpy(&pDis->abInstr[offInstr], (uint8_t *)pState->pvPageR3 + (GCPtr & PAGE_OFFSET_MASK), cb); + offInstr += (uint8_t)cb; + if (cb >= cbMinRead) + { + pDis->cbCachedInstr = offInstr; + return VINF_SUCCESS; + } + cbMinRead -= (uint8_t)cb; + cbMaxRead -= (uint8_t)cb; + } +} + + +/** + * Disassemble an instruction and return the information in the provided structure. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param pCtx Pointer to the guest CPU context. + * @param GCPtrPC Program counter (relative to CS) to disassemble from. + * @param pCpu Disassembly state. + * @param pszPrefix String prefix for logging (debug only). + * + */ +VMMR3DECL(int) CPUMR3DisasmInstrCPU(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, RTGCPTR GCPtrPC, PDISCPUSTATE pCpu, + const char *pszPrefix) +{ + CPUMDISASSTATE State; + int rc; + + const PGMMODE enmMode = PGMGetGuestMode(pVCpu); + State.pCpu = pCpu; + State.pvPageGC = 0; + State.pvPageR3 = NULL; + State.pVM = pVM; + State.pVCpu = pVCpu; + State.fLocked = false; + State.f64Bits = false; + + /* + * Get selector information. + */ + DISCPUMODE enmDisCpuMode; + if ( (pCtx->cr0 & X86_CR0_PE) + && pCtx->eflags.Bits.u1VM == 0) + { + if (!CPUMSELREG_ARE_HIDDEN_PARTS_VALID(pVCpu, &pCtx->cs)) + { +# ifdef VBOX_WITH_RAW_MODE_NOT_R0 + CPUMGuestLazyLoadHiddenSelectorReg(pVCpu, &pCtx->cs); +# endif + if (!CPUMSELREG_ARE_HIDDEN_PARTS_VALID(pVCpu, &pCtx->cs)) + return VERR_CPUM_HIDDEN_CS_LOAD_ERROR; + } + State.f64Bits = enmMode >= PGMMODE_AMD64 && pCtx->cs.Attr.n.u1Long; + State.GCPtrSegBase = pCtx->cs.u64Base; + State.GCPtrSegEnd = pCtx->cs.u32Limit + 1 + (RTGCUINTPTR)pCtx->cs.u64Base; + State.cbSegLimit = pCtx->cs.u32Limit; + enmDisCpuMode = (State.f64Bits) + ? DISCPUMODE_64BIT + : pCtx->cs.Attr.n.u1DefBig + ? DISCPUMODE_32BIT + : DISCPUMODE_16BIT; + } + else + { + /* real or V86 mode */ + enmDisCpuMode = DISCPUMODE_16BIT; + State.GCPtrSegBase = pCtx->cs.Sel * 16; + State.GCPtrSegEnd = 0xFFFFFFFF; + State.cbSegLimit = 0xFFFFFFFF; + } + + /* + * Disassemble the instruction. + */ + uint32_t cbInstr; +#ifndef LOG_ENABLED + RT_NOREF_PV(pszPrefix); + rc = DISInstrWithReader(GCPtrPC, enmDisCpuMode, cpumR3DisasInstrRead, &State, pCpu, &cbInstr); + if (RT_SUCCESS(rc)) + { +#else + char szOutput[160]; + rc = DISInstrToStrWithReader(GCPtrPC, enmDisCpuMode, cpumR3DisasInstrRead, &State, + pCpu, &cbInstr, szOutput, sizeof(szOutput)); + if (RT_SUCCESS(rc)) + { + /* log it */ + if (pszPrefix) + Log(("%s-CPU%d: %s", pszPrefix, pVCpu->idCpu, szOutput)); + else + Log(("%s", szOutput)); +#endif + rc = VINF_SUCCESS; + } + else + Log(("CPUMR3DisasmInstrCPU: DISInstr failed for %04X:%RGv rc=%Rrc\n", pCtx->cs.Sel, GCPtrPC, rc)); + + /* Release mapping lock acquired in cpumR3DisasInstrRead. */ + if (State.fLocked) + PGMPhysReleasePageMappingLock(pVM, &State.PageMapLock); + + return rc; +} + + + +/** + * API for controlling a few of the CPU features found in CR4. + * + * Currently only X86_CR4_TSD is accepted as input. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param fOr The CR4 OR mask. + * @param fAnd The CR4 AND mask. + */ +VMMR3DECL(int) CPUMR3SetCR4Feature(PVM pVM, RTHCUINTREG fOr, RTHCUINTREG fAnd) +{ + AssertMsgReturn(!(fOr & ~(X86_CR4_TSD)), ("%#x\n", fOr), VERR_INVALID_PARAMETER); + AssertMsgReturn((fAnd & ~(X86_CR4_TSD)) == ~(X86_CR4_TSD), ("%#x\n", fAnd), VERR_INVALID_PARAMETER); + + pVM->cpum.s.CR4.OrMask &= fAnd; + pVM->cpum.s.CR4.OrMask |= fOr; + + return VINF_SUCCESS; +} + + +/** + * Enters REM, gets and resets the changed flags (CPUM_CHANGED_*). + * + * Only REM should ever call this function! + * + * @returns The changed flags. + * @param pVCpu The cross context virtual CPU structure. + * @param puCpl Where to return the current privilege level (CPL). + */ +VMMR3DECL(uint32_t) CPUMR3RemEnter(PVMCPU pVCpu, uint32_t *puCpl) +{ + Assert(!pVCpu->cpum.s.fRawEntered); + Assert(!pVCpu->cpum.s.fRemEntered); + + /* + * Get the CPL first. + */ + *puCpl = CPUMGetGuestCPL(pVCpu); + + /* + * Get and reset the flags. + */ + uint32_t fFlags = pVCpu->cpum.s.fChanged; + pVCpu->cpum.s.fChanged = 0; + + /** @todo change the switcher to use the fChanged flags. */ + if (pVCpu->cpum.s.fUseFlags & CPUM_USED_FPU_SINCE_REM) + { + fFlags |= CPUM_CHANGED_FPU_REM; + pVCpu->cpum.s.fUseFlags &= ~CPUM_USED_FPU_SINCE_REM; + } + + pVCpu->cpum.s.fRemEntered = true; + return fFlags; +} + + +/** + * Leaves REM. + * + * @param pVCpu The cross context virtual CPU structure. + * @param fNoOutOfSyncSels This is @c false if there are out of sync + * registers. + */ +VMMR3DECL(void) CPUMR3RemLeave(PVMCPU pVCpu, bool fNoOutOfSyncSels) +{ + Assert(!pVCpu->cpum.s.fRawEntered); + Assert(pVCpu->cpum.s.fRemEntered); + + RT_NOREF_PV(fNoOutOfSyncSels); + + pVCpu->cpum.s.fRemEntered = false; +} + + +/** + * Called when the ring-3 init phase completes. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param enmWhat Which init phase. + */ +VMMR3DECL(int) CPUMR3InitCompleted(PVM pVM, VMINITCOMPLETED enmWhat) +{ + switch (enmWhat) + { + case VMINITCOMPLETED_RING3: + { + /* + * Figure out if the guest uses 32-bit or 64-bit FPU state at runtime for 64-bit capable VMs. + * Only applicable/used on 64-bit hosts, refer CPUMR0A.asm. See @bugref{7138}. + */ + bool const fSupportsLongMode = VMR3IsLongModeAllowed(pVM); + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + /* While loading a saved-state we fix it up in, cpumR3LoadDone(). */ + if (fSupportsLongMode) + pVCpu->cpum.s.fUseFlags |= CPUM_USE_SUPPORTS_LONGMODE; + } + + /* Register statistic counters for MSRs. */ + cpumR3MsrRegStats(pVM); + break; + } + + default: + break; + } + return VINF_SUCCESS; +} + + +/** + * Called when the ring-0 init phases completed. + * + * @param pVM The cross context VM structure. + */ +VMMR3DECL(void) CPUMR3LogCpuIdAndMsrFeatures(PVM pVM) +{ + /* + * Enable log buffering as we're going to log a lot of lines. + */ + bool const fOldBuffered = RTLogRelSetBuffering(true /*fBuffered*/); + + /* + * Log the cpuid. + */ + RTCPUSET OnlineSet; + LogRel(("CPUM: Logical host processors: %u present, %u max, %u online, online mask: %016RX64\n", + (unsigned)RTMpGetPresentCount(), (unsigned)RTMpGetCount(), (unsigned)RTMpGetOnlineCount(), + RTCpuSetToU64(RTMpGetOnlineSet(&OnlineSet)) )); + RTCPUID cCores = RTMpGetCoreCount(); + if (cCores) + LogRel(("CPUM: Physical host cores: %u\n", (unsigned)cCores)); + LogRel(("************************* CPUID dump ************************\n")); + DBGFR3Info(pVM->pUVM, "cpuid", "verbose", DBGFR3InfoLogRelHlp()); + LogRel(("\n")); + DBGFR3_INFO_LOG_SAFE(pVM, "cpuid", "verbose"); /* macro */ + LogRel(("******************** End of CPUID dump **********************\n")); + + /* + * Log VT-x extended features. + * + * SVM features are currently all covered under CPUID so there is nothing + * to do here for SVM. + */ + if (pVM->cpum.s.HostFeatures.fVmx) + { + LogRel(("*********************** VT-x features ***********************\n")); + DBGFR3Info(pVM->pUVM, "cpumvmxfeat", "default", DBGFR3InfoLogRelHlp()); + LogRel(("\n")); + LogRel(("******************* End of VT-x features ********************\n")); + } + + /* + * Restore the log buffering state to what it was previously. + */ + RTLogRelSetBuffering(fOldBuffered); +} + diff --git a/src/VBox/VMM/VMMR3/CPUMDbg.cpp b/src/VBox/VMM/VMMR3/CPUMDbg.cpp new file mode 100644 index 00000000..8f29d5ab --- /dev/null +++ b/src/VBox/VMM/VMMR3/CPUMDbg.cpp @@ -0,0 +1,1524 @@ +/* $Id: CPUMDbg.cpp $ */ +/** @file + * CPUM - CPU Monitor / Manager, Debugger & Debugging APIs. + */ + +/* + * Copyright (C) 2010-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DBGF +#include +#include +#include +#include "CPUMInternal.h" +#include +#include +#include +#include +#include +#include +#include + + +/** + * @interface_method_impl{DBGFREGDESC,pfnGet} + */ +static DECLCALLBACK(int) cpumR3RegGet_Generic(void *pvUser, PCDBGFREGDESC pDesc, PDBGFREGVAL pValue) +{ + PVMCPU pVCpu = (PVMCPU)pvUser; + void const *pv = (uint8_t const *)&pVCpu->cpum + pDesc->offRegister; + + VMCPU_ASSERT_EMT(pVCpu); + + switch (pDesc->enmType) + { + case DBGFREGVALTYPE_U8: pValue->u8 = *(uint8_t const *)pv; return VINF_SUCCESS; + case DBGFREGVALTYPE_U16: pValue->u16 = *(uint16_t const *)pv; return VINF_SUCCESS; + case DBGFREGVALTYPE_U32: pValue->u32 = *(uint32_t const *)pv; return VINF_SUCCESS; + case DBGFREGVALTYPE_U64: pValue->u64 = *(uint64_t const *)pv; return VINF_SUCCESS; + case DBGFREGVALTYPE_U128: pValue->u128 = *(PCRTUINT128U )pv; return VINF_SUCCESS; + case DBGFREGVALTYPE_U256: pValue->u256 = *(PCRTUINT256U )pv; return VINF_SUCCESS; + case DBGFREGVALTYPE_U512: pValue->u512 = *(PCRTUINT512U )pv; return VINF_SUCCESS; + default: + AssertMsgFailedReturn(("%d %s\n", pDesc->enmType, pDesc->pszName), VERR_IPE_NOT_REACHED_DEFAULT_CASE); + } +} + + +/** + * @interface_method_impl{DBGFREGDESC,pfnSet} + */ +static DECLCALLBACK(int) cpumR3RegSet_Generic(void *pvUser, PCDBGFREGDESC pDesc, PCDBGFREGVAL pValue, PCDBGFREGVAL pfMask) +{ + PVMCPU pVCpu = (PVMCPU)pvUser; + void *pv = (uint8_t *)&pVCpu->cpum + pDesc->offRegister; + + VMCPU_ASSERT_EMT(pVCpu); + + switch (pDesc->enmType) + { + case DBGFREGVALTYPE_U8: + *(uint8_t *)pv &= ~pfMask->u8; + *(uint8_t *)pv |= pValue->u8 & pfMask->u8; + return VINF_SUCCESS; + + case DBGFREGVALTYPE_U16: + *(uint16_t *)pv &= ~pfMask->u16; + *(uint16_t *)pv |= pValue->u16 & pfMask->u16; + return VINF_SUCCESS; + + case DBGFREGVALTYPE_U32: + *(uint32_t *)pv &= ~pfMask->u32; + *(uint32_t *)pv |= pValue->u32 & pfMask->u32; + return VINF_SUCCESS; + + case DBGFREGVALTYPE_U64: + *(uint64_t *)pv &= ~pfMask->u64; + *(uint64_t *)pv |= pValue->u64 & pfMask->u64; + return VINF_SUCCESS; + + case DBGFREGVALTYPE_U128: + { + RTUINT128U Val; + RTUInt128AssignAnd((PRTUINT128U)pv, RTUInt128AssignBitwiseNot(RTUInt128Assign(&Val, &pfMask->u128))); + RTUInt128AssignOr((PRTUINT128U)pv, RTUInt128AssignAnd(RTUInt128Assign(&Val, &pValue->u128), &pfMask->u128)); + return VINF_SUCCESS; + } + + default: + AssertMsgFailedReturn(("%d %s\n", pDesc->enmType, pDesc->pszName), VERR_IPE_NOT_REACHED_DEFAULT_CASE); + } +} + + +/** + * @interface_method_impl{DBGFREGDESC,pfnGet} + */ +static DECLCALLBACK(int) cpumR3RegGet_XStateGeneric(void *pvUser, PCDBGFREGDESC pDesc, PDBGFREGVAL pValue) +{ + PVMCPU pVCpu = (PVMCPU)pvUser; + void const *pv = (uint8_t const *)&pVCpu->cpum.s.Guest.pXStateR3 + pDesc->offRegister; + + VMCPU_ASSERT_EMT(pVCpu); + + switch (pDesc->enmType) + { + case DBGFREGVALTYPE_U8: pValue->u8 = *(uint8_t const *)pv; return VINF_SUCCESS; + case DBGFREGVALTYPE_U16: pValue->u16 = *(uint16_t const *)pv; return VINF_SUCCESS; + case DBGFREGVALTYPE_U32: pValue->u32 = *(uint32_t const *)pv; return VINF_SUCCESS; + case DBGFREGVALTYPE_U64: pValue->u64 = *(uint64_t const *)pv; return VINF_SUCCESS; + case DBGFREGVALTYPE_U128: pValue->u128 = *(PCRTUINT128U )pv; return VINF_SUCCESS; + default: + AssertMsgFailedReturn(("%d %s\n", pDesc->enmType, pDesc->pszName), VERR_IPE_NOT_REACHED_DEFAULT_CASE); + } +} + + +/** + * @interface_method_impl{DBGFREGDESC,pfnSet} + */ +static DECLCALLBACK(int) cpumR3RegSet_XStateGeneric(void *pvUser, PCDBGFREGDESC pDesc, PCDBGFREGVAL pValue, PCDBGFREGVAL pfMask) +{ + PVMCPU pVCpu = (PVMCPU)pvUser; + void *pv = (uint8_t *)&pVCpu->cpum.s.Guest.pXStateR3 + pDesc->offRegister; + + VMCPU_ASSERT_EMT(pVCpu); + + switch (pDesc->enmType) + { + case DBGFREGVALTYPE_U8: + *(uint8_t *)pv &= ~pfMask->u8; + *(uint8_t *)pv |= pValue->u8 & pfMask->u8; + return VINF_SUCCESS; + + case DBGFREGVALTYPE_U16: + *(uint16_t *)pv &= ~pfMask->u16; + *(uint16_t *)pv |= pValue->u16 & pfMask->u16; + return VINF_SUCCESS; + + case DBGFREGVALTYPE_U32: + *(uint32_t *)pv &= ~pfMask->u32; + *(uint32_t *)pv |= pValue->u32 & pfMask->u32; + return VINF_SUCCESS; + + case DBGFREGVALTYPE_U64: + *(uint64_t *)pv &= ~pfMask->u64; + *(uint64_t *)pv |= pValue->u64 & pfMask->u64; + return VINF_SUCCESS; + + case DBGFREGVALTYPE_U128: + { + RTUINT128U Val; + RTUInt128AssignAnd((PRTUINT128U)pv, RTUInt128AssignBitwiseNot(RTUInt128Assign(&Val, &pfMask->u128))); + RTUInt128AssignOr((PRTUINT128U)pv, RTUInt128AssignAnd(RTUInt128Assign(&Val, &pValue->u128), &pfMask->u128)); + return VINF_SUCCESS; + } + + default: + AssertMsgFailedReturn(("%d %s\n", pDesc->enmType, pDesc->pszName), VERR_IPE_NOT_REACHED_DEFAULT_CASE); + } +} + + + +/** + * @interface_method_impl{DBGFREGDESC,pfnGet} + */ +static DECLCALLBACK(int) cpumR3RegSet_seg(void *pvUser, PCDBGFREGDESC pDesc, PCDBGFREGVAL pValue, PCDBGFREGVAL pfMask) +{ + /** @todo perform a selector load, updating hidden selectors and stuff. */ + NOREF(pvUser); NOREF(pDesc); NOREF(pValue); NOREF(pfMask); + return VERR_NOT_IMPLEMENTED; +} + + +/** + * @interface_method_impl{DBGFREGDESC,pfnGet} + */ +static DECLCALLBACK(int) cpumR3RegGet_gdtr(void *pvUser, PCDBGFREGDESC pDesc, PDBGFREGVAL pValue) +{ + PVMCPU pVCpu = (PVMCPU)pvUser; + VBOXGDTR const *pGdtr = (VBOXGDTR const *)((uint8_t const *)&pVCpu->cpum + pDesc->offRegister); + + VMCPU_ASSERT_EMT(pVCpu); + Assert(pDesc->enmType == DBGFREGVALTYPE_DTR); + + pValue->dtr.u32Limit = pGdtr->cbGdt; + pValue->dtr.u64Base = pGdtr->pGdt; + return VINF_SUCCESS; +} + + +/** + * @interface_method_impl{DBGFREGDESC,pfnGet} + */ +static DECLCALLBACK(int) cpumR3RegSet_gdtr(void *pvUser, PCDBGFREGDESC pDesc, PCDBGFREGVAL pValue, PCDBGFREGVAL pfMask) +{ + NOREF(pvUser); NOREF(pDesc); NOREF(pValue); NOREF(pfMask); + return VERR_NOT_IMPLEMENTED; +} + + +/** + * @interface_method_impl{DBGFREGDESC,pfnGet} + */ +static DECLCALLBACK(int) cpumR3RegGet_idtr(void *pvUser, PCDBGFREGDESC pDesc, PDBGFREGVAL pValue) +{ + PVMCPU pVCpu = (PVMCPU)pvUser; + VBOXIDTR const *pIdtr = (VBOXIDTR const *)((uint8_t const *)&pVCpu->cpum + pDesc->offRegister); + + VMCPU_ASSERT_EMT(pVCpu); + Assert(pDesc->enmType == DBGFREGVALTYPE_DTR); + + pValue->dtr.u32Limit = pIdtr->cbIdt; + pValue->dtr.u64Base = pIdtr->pIdt; + return VINF_SUCCESS; +} + + +/** + * @interface_method_impl{DBGFREGDESC,pfnGet} + */ +static DECLCALLBACK(int) cpumR3RegSet_idtr(void *pvUser, PCDBGFREGDESC pDesc, PCDBGFREGVAL pValue, PCDBGFREGVAL pfMask) +{ + NOREF(pvUser); NOREF(pDesc); NOREF(pValue); NOREF(pfMask); + return VERR_NOT_IMPLEMENTED; +} + + +/** + * Determins the tag register value for a CPU register when the FPU state + * format is FXSAVE. + * + * @returns The tag register value. + * @param pFpu Pointer to the guest FPU. + * @param iReg The register number (0..7). + */ +DECLINLINE(uint16_t) cpumR3RegCalcFpuTagFromFxSave(PCX86FXSTATE pFpu, unsigned iReg) +{ + /* + * See table 11-1 in the AMD docs. + */ + if (!(pFpu->FTW & RT_BIT_32(iReg))) + return 3; /* b11 - empty */ + + uint16_t const uExp = pFpu->aRegs[iReg].au16[4]; + if (uExp == 0) + { + if (pFpu->aRegs[iReg].au64[0] == 0) /* J & M == 0 */ + return 1; /* b01 - zero */ + return 2; /* b10 - special */ + } + + if (uExp == UINT16_C(0xffff)) + return 2; /* b10 - special */ + + if (!(pFpu->aRegs[iReg].au64[0] >> 63)) /* J == 0 */ + return 2; /* b10 - special */ + + return 0; /* b00 - valid (normal) */ +} + + +/** + * @interface_method_impl{DBGFREGDESC,pfnGet} + */ +static DECLCALLBACK(int) cpumR3RegGet_ftw(void *pvUser, PCDBGFREGDESC pDesc, PDBGFREGVAL pValue) +{ + PVMCPU pVCpu = (PVMCPU)pvUser; + PCX86FXSTATE pFpu = (PCX86FXSTATE)((uint8_t const *)&pVCpu->cpum + pDesc->offRegister); + + VMCPU_ASSERT_EMT(pVCpu); + Assert(pDesc->enmType == DBGFREGVALTYPE_U16); + + pValue->u16 = cpumR3RegCalcFpuTagFromFxSave(pFpu, 0) + | (cpumR3RegCalcFpuTagFromFxSave(pFpu, 1) << 2) + | (cpumR3RegCalcFpuTagFromFxSave(pFpu, 2) << 4) + | (cpumR3RegCalcFpuTagFromFxSave(pFpu, 3) << 6) + | (cpumR3RegCalcFpuTagFromFxSave(pFpu, 4) << 8) + | (cpumR3RegCalcFpuTagFromFxSave(pFpu, 5) << 10) + | (cpumR3RegCalcFpuTagFromFxSave(pFpu, 6) << 12) + | (cpumR3RegCalcFpuTagFromFxSave(pFpu, 7) << 14); + return VINF_SUCCESS; +} + + +/** + * @interface_method_impl{DBGFREGDESC,pfnGet} + */ +static DECLCALLBACK(int) cpumR3RegSet_ftw(void *pvUser, PCDBGFREGDESC pDesc, PCDBGFREGVAL pValue, PCDBGFREGVAL pfMask) +{ + NOREF(pvUser); NOREF(pDesc); NOREF(pValue); NOREF(pfMask); + return VERR_DBGF_READ_ONLY_REGISTER; +} + + +/** + * @interface_method_impl{DBGFREGDESC,pfnGet} + */ +static DECLCALLBACK(int) cpumR3RegGet_Dummy(void *pvUser, PCDBGFREGDESC pDesc, PDBGFREGVAL pValue) +{ + RT_NOREF_PV(pvUser); + switch (pDesc->enmType) + { + case DBGFREGVALTYPE_U8: pValue->u8 = 0; return VINF_SUCCESS; + case DBGFREGVALTYPE_U16: pValue->u16 = 0; return VINF_SUCCESS; + case DBGFREGVALTYPE_U32: pValue->u32 = 0; return VINF_SUCCESS; + case DBGFREGVALTYPE_U64: pValue->u64 = 0; return VINF_SUCCESS; + case DBGFREGVALTYPE_U128: + RT_ZERO(pValue->u128); + return VINF_SUCCESS; + case DBGFREGVALTYPE_DTR: + pValue->dtr.u32Limit = 0; + pValue->dtr.u64Base = 0; + return VINF_SUCCESS; + case DBGFREGVALTYPE_R80: + RT_ZERO(pValue->r80Ex); + return VINF_SUCCESS; + default: + AssertMsgFailedReturn(("%d %s\n", pDesc->enmType, pDesc->pszName), VERR_IPE_NOT_REACHED_DEFAULT_CASE); + } +} + + +/** + * @interface_method_impl{DBGFREGDESC,pfnSet} + */ +static DECLCALLBACK(int) cpumR3RegSet_Dummy(void *pvUser, PCDBGFREGDESC pDesc, PCDBGFREGVAL pValue, PCDBGFREGVAL pfMask) +{ + NOREF(pvUser); NOREF(pDesc); NOREF(pValue); NOREF(pfMask); + return VERR_DBGF_READ_ONLY_REGISTER; +} + + +/** + * @interface_method_impl{DBGFREGDESC,pfnGet} + */ +static DECLCALLBACK(int) cpumR3RegGet_ymm(void *pvUser, PCDBGFREGDESC pDesc, PDBGFREGVAL pValue) +{ + PVMCPU pVCpu = (PVMCPU)pvUser; + uint32_t iReg = pDesc->offRegister; + + Assert(pDesc->enmType == DBGFREGVALTYPE_U256); + VMCPU_ASSERT_EMT(pVCpu); + + if (iReg < 16) + { + pValue->u256.DQWords.dqw0 = pVCpu->cpum.s.Guest.pXStateR3->x87.aXMM[iReg].uXmm; + pValue->u256.DQWords.dqw1 = pVCpu->cpum.s.Guest.pXStateR3->u.YmmHi.aYmmHi[iReg].uXmm; + return VINF_SUCCESS; + } + return VERR_NOT_IMPLEMENTED; +} + + +/** + * @interface_method_impl{DBGFREGDESC,pfnSet} + */ +static DECLCALLBACK(int) cpumR3RegSet_ymm(void *pvUser, PCDBGFREGDESC pDesc, PCDBGFREGVAL pValue, PCDBGFREGVAL pfMask) +{ + PVMCPU pVCpu = (PVMCPU)pvUser; + uint32_t iReg = pDesc->offRegister; + + Assert(pDesc->enmType == DBGFREGVALTYPE_U256); + VMCPU_ASSERT_EMT(pVCpu); + + if (iReg < 16) + { + RTUINT128U Val; + RTUInt128AssignAnd(&pVCpu->cpum.s.Guest.pXStateR3->x87.aXMM[iReg].uXmm, + RTUInt128AssignBitwiseNot(RTUInt128Assign(&Val, &pfMask->u256.DQWords.dqw0))); + RTUInt128AssignOr(&pVCpu->cpum.s.Guest.pXStateR3->u.YmmHi.aYmmHi[iReg].uXmm, + RTUInt128AssignAnd(RTUInt128Assign(&Val, &pValue->u128), &pfMask->u128)); + + } + return VERR_NOT_IMPLEMENTED; +} + + +/* + * + * Guest register access functions. + * + */ + +/** + * @interface_method_impl{DBGFREGDESC,pfnGet} + */ +static DECLCALLBACK(int) cpumR3RegGstGet_crX(void *pvUser, PCDBGFREGDESC pDesc, PDBGFREGVAL pValue) +{ + PVMCPU pVCpu = (PVMCPU)pvUser; + VMCPU_ASSERT_EMT(pVCpu); + + uint64_t u64Value; + int rc = CPUMGetGuestCRx(pVCpu, pDesc->offRegister, &u64Value); + if (rc == VERR_PDM_NO_APIC_INSTANCE) /* CR8 might not be available, see @bugref{8868}.*/ + u64Value = 0; + else + AssertRCReturn(rc, rc); + switch (pDesc->enmType) + { + case DBGFREGVALTYPE_U64: pValue->u64 = u64Value; break; + case DBGFREGVALTYPE_U32: pValue->u32 = (uint32_t)u64Value; break; + default: + AssertFailedReturn(VERR_IPE_NOT_REACHED_DEFAULT_CASE); + } + return VINF_SUCCESS; +} + + +/** + * @interface_method_impl{DBGFREGDESC,pfnGet} + */ +static DECLCALLBACK(int) cpumR3RegGstSet_crX(void *pvUser, PCDBGFREGDESC pDesc, PCDBGFREGVAL pValue, PCDBGFREGVAL pfMask) +{ + int rc; + PVMCPU pVCpu = (PVMCPU)pvUser; + + VMCPU_ASSERT_EMT(pVCpu); + + /* + * Calculate the new value. + */ + uint64_t u64Value; + uint64_t fMask; + uint64_t fMaskMax; + switch (pDesc->enmType) + { + case DBGFREGVALTYPE_U64: + u64Value = pValue->u64; + fMask = pfMask->u64; + fMaskMax = UINT64_MAX; + break; + case DBGFREGVALTYPE_U32: + u64Value = pValue->u32; + fMask = pfMask->u32; + fMaskMax = UINT32_MAX; + break; + default: + AssertFailedReturn(VERR_IPE_NOT_REACHED_DEFAULT_CASE); + } + if (fMask != fMaskMax) + { + uint64_t u64FullValue; + rc = CPUMGetGuestCRx(pVCpu, pDesc->offRegister, &u64FullValue); + if (RT_FAILURE(rc)) + return rc; + u64Value = (u64FullValue & ~fMask) + | (u64Value & fMask); + } + + /* + * Perform the assignment. + */ + switch (pDesc->offRegister) + { + case 0: rc = CPUMSetGuestCR0(pVCpu, u64Value); break; + case 2: rc = CPUMSetGuestCR2(pVCpu, u64Value); break; + case 3: rc = CPUMSetGuestCR3(pVCpu, u64Value); break; + case 4: rc = CPUMSetGuestCR4(pVCpu, u64Value); break; + case 8: rc = APICSetTpr(pVCpu, (uint8_t)(u64Value << 4)); break; + default: + AssertFailedReturn(VERR_IPE_NOT_REACHED_DEFAULT_CASE); + } + return rc; +} + + +/** + * @interface_method_impl{DBGFREGDESC,pfnGet} + */ +static DECLCALLBACK(int) cpumR3RegGstGet_drX(void *pvUser, PCDBGFREGDESC pDesc, PDBGFREGVAL pValue) +{ + PVMCPU pVCpu = (PVMCPU)pvUser; + VMCPU_ASSERT_EMT(pVCpu); + + uint64_t u64Value; + int rc = CPUMGetGuestDRx(pVCpu, pDesc->offRegister, &u64Value); + AssertRCReturn(rc, rc); + switch (pDesc->enmType) + { + case DBGFREGVALTYPE_U64: pValue->u64 = u64Value; break; + case DBGFREGVALTYPE_U32: pValue->u32 = (uint32_t)u64Value; break; + default: + AssertFailedReturn(VERR_IPE_NOT_REACHED_DEFAULT_CASE); + } + return VINF_SUCCESS; +} + + +/** + * @interface_method_impl{DBGFREGDESC,pfnGet} + */ +static DECLCALLBACK(int) cpumR3RegGstSet_drX(void *pvUser, PCDBGFREGDESC pDesc, PCDBGFREGVAL pValue, PCDBGFREGVAL pfMask) +{ + int rc; + PVMCPU pVCpu = (PVMCPU)pvUser; + + VMCPU_ASSERT_EMT(pVCpu); + + /* + * Calculate the new value. + */ + uint64_t u64Value; + uint64_t fMask; + uint64_t fMaskMax; + switch (pDesc->enmType) + { + case DBGFREGVALTYPE_U64: + u64Value = pValue->u64; + fMask = pfMask->u64; + fMaskMax = UINT64_MAX; + break; + case DBGFREGVALTYPE_U32: + u64Value = pValue->u32; + fMask = pfMask->u32; + fMaskMax = UINT32_MAX; + break; + default: + AssertFailedReturn(VERR_IPE_NOT_REACHED_DEFAULT_CASE); + } + if (fMask != fMaskMax) + { + uint64_t u64FullValue; + rc = CPUMGetGuestDRx(pVCpu, pDesc->offRegister, &u64FullValue); + if (RT_FAILURE(rc)) + return rc; + u64Value = (u64FullValue & ~fMask) + | (u64Value & fMask); + } + + /* + * Perform the assignment. + */ + return CPUMSetGuestDRx(pVCpu, pDesc->offRegister, u64Value); +} + + +/** + * @interface_method_impl{DBGFREGDESC,pfnGet} + */ +static DECLCALLBACK(int) cpumR3RegGstGet_msr(void *pvUser, PCDBGFREGDESC pDesc, PDBGFREGVAL pValue) +{ + PVMCPU pVCpu = (PVMCPU)pvUser; + VMCPU_ASSERT_EMT(pVCpu); + + uint64_t u64Value; + VBOXSTRICTRC rcStrict = CPUMQueryGuestMsr(pVCpu, pDesc->offRegister, &u64Value); + if (rcStrict == VINF_SUCCESS) + { + switch (pDesc->enmType) + { + case DBGFREGVALTYPE_U64: pValue->u64 = u64Value; break; + case DBGFREGVALTYPE_U32: pValue->u32 = (uint32_t)u64Value; break; + case DBGFREGVALTYPE_U16: pValue->u16 = (uint16_t)u64Value; break; + default: + AssertFailedReturn(VERR_IPE_NOT_REACHED_DEFAULT_CASE); + } + return VBOXSTRICTRC_VAL(rcStrict); + } + + /** @todo what to do about errors? */ + Assert(RT_FAILURE_NP(rcStrict)); + return VBOXSTRICTRC_VAL(rcStrict); +} + + +/** + * @interface_method_impl{DBGFREGDESC,pfnGet} + */ +static DECLCALLBACK(int) cpumR3RegGstSet_msr(void *pvUser, PCDBGFREGDESC pDesc, PCDBGFREGVAL pValue, PCDBGFREGVAL pfMask) +{ + PVMCPU pVCpu = (PVMCPU)pvUser; + + VMCPU_ASSERT_EMT(pVCpu); + + /* + * Calculate the new value. + */ + uint64_t u64Value; + uint64_t fMask; + uint64_t fMaskMax; + switch (pDesc->enmType) + { + case DBGFREGVALTYPE_U64: + u64Value = pValue->u64; + fMask = pfMask->u64; + fMaskMax = UINT64_MAX; + break; + case DBGFREGVALTYPE_U32: + u64Value = pValue->u32; + fMask = pfMask->u32; + fMaskMax = UINT32_MAX; + break; + case DBGFREGVALTYPE_U16: + u64Value = pValue->u16; + fMask = pfMask->u16; + fMaskMax = UINT16_MAX; + break; + default: + AssertFailedReturn(VERR_IPE_NOT_REACHED_DEFAULT_CASE); + } + if (fMask != fMaskMax) + { + uint64_t u64FullValue; + VBOXSTRICTRC rcStrict = CPUMQueryGuestMsr(pVCpu, pDesc->offRegister, &u64FullValue); + if (rcStrict != VINF_SUCCESS) + { + AssertRC(RT_FAILURE_NP(rcStrict)); + return VBOXSTRICTRC_VAL(rcStrict); + } + u64Value = (u64FullValue & ~fMask) + | (u64Value & fMask); + } + + /* + * Perform the assignment. + */ + VBOXSTRICTRC rcStrict = CPUMSetGuestMsr(pVCpu, pDesc->offRegister, u64Value); + if (rcStrict == VINF_SUCCESS) + return VINF_SUCCESS; + AssertRC(RT_FAILURE_NP(rcStrict)); + return VBOXSTRICTRC_VAL(rcStrict); +} + + +/** + * @interface_method_impl{DBGFREGDESC,pfnGet} + */ +static DECLCALLBACK(int) cpumR3RegGstGet_stN(void *pvUser, PCDBGFREGDESC pDesc, PDBGFREGVAL pValue) +{ + PVMCPU pVCpu = (PVMCPU)pvUser; + VMCPU_ASSERT_EMT(pVCpu); + Assert(pDesc->enmType == DBGFREGVALTYPE_R80); + + PX86FXSTATE pFpuCtx = &pVCpu->cpum.s.Guest.CTX_SUFF(pXState)->x87; + unsigned iReg = (pFpuCtx->FSW >> 11) & 7; + iReg += pDesc->offRegister; + iReg &= 7; + pValue->r80Ex = pFpuCtx->aRegs[iReg].r80Ex; + + return VINF_SUCCESS; +} + + +/** + * @interface_method_impl{DBGFREGDESC,pfnGet} + */ +static DECLCALLBACK(int) cpumR3RegGstSet_stN(void *pvUser, PCDBGFREGDESC pDesc, PCDBGFREGVAL pValue, PCDBGFREGVAL pfMask) +{ + NOREF(pvUser); NOREF(pDesc); NOREF(pValue); NOREF(pfMask); + return VERR_NOT_IMPLEMENTED; +} + + + +/* + * + * Hypervisor register access functions. + * + */ + +/** + * @interface_method_impl{DBGFREGDESC,pfnGet} + */ +static DECLCALLBACK(int) cpumR3RegHyperGet_crX(void *pvUser, PCDBGFREGDESC pDesc, PDBGFREGVAL pValue) +{ + PVMCPU pVCpu = (PVMCPU)pvUser; + VMCPU_ASSERT_EMT(pVCpu); + + uint64_t u64Value; + switch (pDesc->offRegister) + { + case 0: u64Value = UINT64_MAX; break; + case 2: u64Value = UINT64_MAX; break; + case 3: u64Value = CPUMGetHyperCR3(pVCpu); break; + case 4: u64Value = UINT64_MAX; break; + case 8: u64Value = UINT64_MAX; break; + default: + AssertFailedReturn(VERR_IPE_NOT_REACHED_DEFAULT_CASE); + } + switch (pDesc->enmType) + { + case DBGFREGVALTYPE_U64: pValue->u64 = u64Value; break; + case DBGFREGVALTYPE_U32: pValue->u32 = (uint32_t)u64Value; break; + default: + AssertFailedReturn(VERR_IPE_NOT_REACHED_DEFAULT_CASE); + } + return VINF_SUCCESS; +} + + +/** + * @interface_method_impl{DBGFREGDESC,pfnGet} + */ +static DECLCALLBACK(int) cpumR3RegHyperSet_crX(void *pvUser, PCDBGFREGDESC pDesc, PCDBGFREGVAL pValue, PCDBGFREGVAL pfMask) +{ + /* Not settable, prevents killing your host. */ + NOREF(pvUser); NOREF(pDesc); NOREF(pValue); NOREF(pfMask); + return VERR_ACCESS_DENIED; +} + + +/** + * @interface_method_impl{DBGFREGDESC,pfnGet} + */ +static DECLCALLBACK(int) cpumR3RegHyperGet_drX(void *pvUser, PCDBGFREGDESC pDesc, PDBGFREGVAL pValue) +{ + PVMCPU pVCpu = (PVMCPU)pvUser; + VMCPU_ASSERT_EMT(pVCpu); + + uint64_t u64Value; + switch (pDesc->offRegister) + { + case 0: u64Value = CPUMGetHyperDR0(pVCpu); break; + case 1: u64Value = CPUMGetHyperDR1(pVCpu); break; + case 2: u64Value = CPUMGetHyperDR2(pVCpu); break; + case 3: u64Value = CPUMGetHyperDR3(pVCpu); break; + case 6: u64Value = CPUMGetHyperDR6(pVCpu); break; + case 7: u64Value = CPUMGetHyperDR7(pVCpu); break; + default: + AssertFailedReturn(VERR_IPE_NOT_REACHED_DEFAULT_CASE); + } + switch (pDesc->enmType) + { + case DBGFREGVALTYPE_U64: pValue->u64 = u64Value; break; + case DBGFREGVALTYPE_U32: pValue->u32 = (uint32_t)u64Value; break; + default: + AssertFailedReturn(VERR_IPE_NOT_REACHED_DEFAULT_CASE); + } + return VINF_SUCCESS; +} + + +/** + * @interface_method_impl{DBGFREGDESC,pfnGet} + */ +static DECLCALLBACK(int) cpumR3RegHyperSet_drX(void *pvUser, PCDBGFREGDESC pDesc, PCDBGFREGVAL pValue, PCDBGFREGVAL pfMask) +{ + /* Not settable, prevents killing your host. */ + NOREF(pvUser); NOREF(pDesc); NOREF(pValue); NOREF(pfMask); + return VERR_ACCESS_DENIED; +} + + +/** + * @interface_method_impl{DBGFREGDESC,pfnGet} + */ +static DECLCALLBACK(int) cpumR3RegHyperGet_msr(void *pvUser, PCDBGFREGDESC pDesc, PDBGFREGVAL pValue) +{ + NOREF(pvUser); + + /* Not availble at present, return all FFs to keep things quiet */ + uint64_t u64Value = UINT64_MAX; + switch (pDesc->enmType) + { + case DBGFREGVALTYPE_U64: pValue->u64 = u64Value; break; + case DBGFREGVALTYPE_U32: pValue->u32 = (uint32_t)u64Value; break; + case DBGFREGVALTYPE_U16: pValue->u16 = (uint16_t)u64Value; break; + default: + AssertFailedReturn(VERR_IPE_NOT_REACHED_DEFAULT_CASE); + } + return VINF_SUCCESS; +} + + +/** + * @interface_method_impl{DBGFREGDESC,pfnGet} + */ +static DECLCALLBACK(int) cpumR3RegHyperSet_msr(void *pvUser, PCDBGFREGDESC pDesc, PCDBGFREGVAL pValue, PCDBGFREGVAL pfMask) +{ + /* Not settable, return failure. */ + NOREF(pvUser); NOREF(pDesc); NOREF(pValue); NOREF(pfMask); + return VERR_ACCESS_DENIED; +} + + +/* + * Set up aliases. + */ +#define CPUMREGALIAS_STD(Name, psz32, psz16, psz8) \ + static DBGFREGALIAS const g_aCpumRegAliases_##Name[] = \ + { \ + { psz32, DBGFREGVALTYPE_U32 }, \ + { psz16, DBGFREGVALTYPE_U16 }, \ + { psz8, DBGFREGVALTYPE_U8 }, \ + { NULL, DBGFREGVALTYPE_INVALID } \ + } +CPUMREGALIAS_STD(rax, "eax", "ax", "al"); +CPUMREGALIAS_STD(rcx, "ecx", "cx", "cl"); +CPUMREGALIAS_STD(rdx, "edx", "dx", "dl"); +CPUMREGALIAS_STD(rbx, "ebx", "bx", "bl"); +CPUMREGALIAS_STD(rsp, "esp", "sp", NULL); +CPUMREGALIAS_STD(rbp, "ebp", "bp", NULL); +CPUMREGALIAS_STD(rsi, "esi", "si", "sil"); +CPUMREGALIAS_STD(rdi, "edi", "di", "dil"); +CPUMREGALIAS_STD(r8, "r8d", "r8w", "r8b"); +CPUMREGALIAS_STD(r9, "r9d", "r9w", "r9b"); +CPUMREGALIAS_STD(r10, "r10d", "r10w", "r10b"); +CPUMREGALIAS_STD(r11, "r11d", "r11w", "r11b"); +CPUMREGALIAS_STD(r12, "r12d", "r12w", "r12b"); +CPUMREGALIAS_STD(r13, "r13d", "r13w", "r13b"); +CPUMREGALIAS_STD(r14, "r14d", "r14w", "r14b"); +CPUMREGALIAS_STD(r15, "r15d", "r15w", "r15b"); +CPUMREGALIAS_STD(rip, "eip", "ip", NULL); +CPUMREGALIAS_STD(rflags, "eflags", "flags", NULL); +#undef CPUMREGALIAS_STD + +static DBGFREGALIAS const g_aCpumRegAliases_fpuip[] = +{ + { "fpuip16", DBGFREGVALTYPE_U16 }, + { NULL, DBGFREGVALTYPE_INVALID } +}; + +static DBGFREGALIAS const g_aCpumRegAliases_fpudp[] = +{ + { "fpudp16", DBGFREGVALTYPE_U16 }, + { NULL, DBGFREGVALTYPE_INVALID } +}; + +static DBGFREGALIAS const g_aCpumRegAliases_cr0[] = +{ + { "msw", DBGFREGVALTYPE_U16 }, + { NULL, DBGFREGVALTYPE_INVALID } +}; + +/* + * Sub fields. + */ +/** Sub-fields for the (hidden) segment attribute register. */ +static DBGFREGSUBFIELD const g_aCpumRegFields_seg[] = +{ + DBGFREGSUBFIELD_RW("type", 0, 4, 0), + DBGFREGSUBFIELD_RW("s", 4, 1, 0), + DBGFREGSUBFIELD_RW("dpl", 5, 2, 0), + DBGFREGSUBFIELD_RW("p", 7, 1, 0), + DBGFREGSUBFIELD_RW("avl", 12, 1, 0), + DBGFREGSUBFIELD_RW("l", 13, 1, 0), + DBGFREGSUBFIELD_RW("d", 14, 1, 0), + DBGFREGSUBFIELD_RW("g", 15, 1, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** Sub-fields for the flags register. */ +static DBGFREGSUBFIELD const g_aCpumRegFields_rflags[] = +{ + DBGFREGSUBFIELD_RW("cf", 0, 1, 0), + DBGFREGSUBFIELD_RW("pf", 2, 1, 0), + DBGFREGSUBFIELD_RW("af", 4, 1, 0), + DBGFREGSUBFIELD_RW("zf", 6, 1, 0), + DBGFREGSUBFIELD_RW("sf", 7, 1, 0), + DBGFREGSUBFIELD_RW("tf", 8, 1, 0), + DBGFREGSUBFIELD_RW("if", 9, 1, 0), + DBGFREGSUBFIELD_RW("df", 10, 1, 0), + DBGFREGSUBFIELD_RW("of", 11, 1, 0), + DBGFREGSUBFIELD_RW("iopl", 12, 2, 0), + DBGFREGSUBFIELD_RW("nt", 14, 1, 0), + DBGFREGSUBFIELD_RW("rf", 16, 1, 0), + DBGFREGSUBFIELD_RW("vm", 17, 1, 0), + DBGFREGSUBFIELD_RW("ac", 18, 1, 0), + DBGFREGSUBFIELD_RW("vif", 19, 1, 0), + DBGFREGSUBFIELD_RW("vip", 20, 1, 0), + DBGFREGSUBFIELD_RW("id", 21, 1, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** Sub-fields for the FPU control word register. */ +static DBGFREGSUBFIELD const g_aCpumRegFields_fcw[] = +{ + DBGFREGSUBFIELD_RW("im", 1, 1, 0), + DBGFREGSUBFIELD_RW("dm", 2, 1, 0), + DBGFREGSUBFIELD_RW("zm", 3, 1, 0), + DBGFREGSUBFIELD_RW("om", 4, 1, 0), + DBGFREGSUBFIELD_RW("um", 5, 1, 0), + DBGFREGSUBFIELD_RW("pm", 6, 1, 0), + DBGFREGSUBFIELD_RW("pc", 8, 2, 0), + DBGFREGSUBFIELD_RW("rc", 10, 2, 0), + DBGFREGSUBFIELD_RW("x", 12, 1, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** Sub-fields for the FPU status word register. */ +static DBGFREGSUBFIELD const g_aCpumRegFields_fsw[] = +{ + DBGFREGSUBFIELD_RW("ie", 0, 1, 0), + DBGFREGSUBFIELD_RW("de", 1, 1, 0), + DBGFREGSUBFIELD_RW("ze", 2, 1, 0), + DBGFREGSUBFIELD_RW("oe", 3, 1, 0), + DBGFREGSUBFIELD_RW("ue", 4, 1, 0), + DBGFREGSUBFIELD_RW("pe", 5, 1, 0), + DBGFREGSUBFIELD_RW("se", 6, 1, 0), + DBGFREGSUBFIELD_RW("es", 7, 1, 0), + DBGFREGSUBFIELD_RW("c0", 8, 1, 0), + DBGFREGSUBFIELD_RW("c1", 9, 1, 0), + DBGFREGSUBFIELD_RW("c2", 10, 1, 0), + DBGFREGSUBFIELD_RW("top", 11, 3, 0), + DBGFREGSUBFIELD_RW("c3", 14, 1, 0), + DBGFREGSUBFIELD_RW("b", 15, 1, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** Sub-fields for the FPU tag word register. */ +static DBGFREGSUBFIELD const g_aCpumRegFields_ftw[] = +{ + DBGFREGSUBFIELD_RW("tag0", 0, 2, 0), + DBGFREGSUBFIELD_RW("tag1", 2, 2, 0), + DBGFREGSUBFIELD_RW("tag2", 4, 2, 0), + DBGFREGSUBFIELD_RW("tag3", 6, 2, 0), + DBGFREGSUBFIELD_RW("tag4", 8, 2, 0), + DBGFREGSUBFIELD_RW("tag5", 10, 2, 0), + DBGFREGSUBFIELD_RW("tag6", 12, 2, 0), + DBGFREGSUBFIELD_RW("tag7", 14, 2, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** Sub-fields for the Multimedia Extensions Control and Status Register. */ +static DBGFREGSUBFIELD const g_aCpumRegFields_mxcsr[] = +{ + DBGFREGSUBFIELD_RW("ie", 0, 1, 0), + DBGFREGSUBFIELD_RW("de", 1, 1, 0), + DBGFREGSUBFIELD_RW("ze", 2, 1, 0), + DBGFREGSUBFIELD_RW("oe", 3, 1, 0), + DBGFREGSUBFIELD_RW("ue", 4, 1, 0), + DBGFREGSUBFIELD_RW("pe", 5, 1, 0), + DBGFREGSUBFIELD_RW("daz", 6, 1, 0), + DBGFREGSUBFIELD_RW("im", 7, 1, 0), + DBGFREGSUBFIELD_RW("dm", 8, 1, 0), + DBGFREGSUBFIELD_RW("zm", 9, 1, 0), + DBGFREGSUBFIELD_RW("om", 10, 1, 0), + DBGFREGSUBFIELD_RW("um", 11, 1, 0), + DBGFREGSUBFIELD_RW("pm", 12, 1, 0), + DBGFREGSUBFIELD_RW("rc", 13, 2, 0), + DBGFREGSUBFIELD_RW("fz", 14, 1, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** Sub-fields for the FPU tag word register. */ +static DBGFREGSUBFIELD const g_aCpumRegFields_stN[] = +{ + DBGFREGSUBFIELD_RW("man", 0, 64, 0), + DBGFREGSUBFIELD_RW("exp", 64, 15, 0), + DBGFREGSUBFIELD_RW("sig", 79, 1, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** Sub-fields for the MMX registers. */ +static DBGFREGSUBFIELD const g_aCpumRegFields_mmN[] = +{ + DBGFREGSUBFIELD_RW("dw0", 0, 32, 0), + DBGFREGSUBFIELD_RW("dw1", 32, 32, 0), + DBGFREGSUBFIELD_RW("w0", 0, 16, 0), + DBGFREGSUBFIELD_RW("w1", 16, 16, 0), + DBGFREGSUBFIELD_RW("w2", 32, 16, 0), + DBGFREGSUBFIELD_RW("w3", 48, 16, 0), + DBGFREGSUBFIELD_RW("b0", 0, 8, 0), + DBGFREGSUBFIELD_RW("b1", 8, 8, 0), + DBGFREGSUBFIELD_RW("b2", 16, 8, 0), + DBGFREGSUBFIELD_RW("b3", 24, 8, 0), + DBGFREGSUBFIELD_RW("b4", 32, 8, 0), + DBGFREGSUBFIELD_RW("b5", 40, 8, 0), + DBGFREGSUBFIELD_RW("b6", 48, 8, 0), + DBGFREGSUBFIELD_RW("b7", 56, 8, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** Sub-fields for the XMM registers. */ +static DBGFREGSUBFIELD const g_aCpumRegFields_xmmN[] = +{ + DBGFREGSUBFIELD_RW("r0", 0, 32, 0), + DBGFREGSUBFIELD_RW("r0.man", 0+ 0, 23, 0), + DBGFREGSUBFIELD_RW("r0.exp", 0+23, 8, 0), + DBGFREGSUBFIELD_RW("r0.sig", 0+31, 1, 0), + DBGFREGSUBFIELD_RW("r1", 32, 32, 0), + DBGFREGSUBFIELD_RW("r1.man", 32+ 0, 23, 0), + DBGFREGSUBFIELD_RW("r1.exp", 32+23, 8, 0), + DBGFREGSUBFIELD_RW("r1.sig", 32+31, 1, 0), + DBGFREGSUBFIELD_RW("r2", 64, 32, 0), + DBGFREGSUBFIELD_RW("r2.man", 64+ 0, 23, 0), + DBGFREGSUBFIELD_RW("r2.exp", 64+23, 8, 0), + DBGFREGSUBFIELD_RW("r2.sig", 64+31, 1, 0), + DBGFREGSUBFIELD_RW("r3", 96, 32, 0), + DBGFREGSUBFIELD_RW("r3.man", 96+ 0, 23, 0), + DBGFREGSUBFIELD_RW("r3.exp", 96+23, 8, 0), + DBGFREGSUBFIELD_RW("r3.sig", 96+31, 1, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; + +#if 0 /* needs special accessor, too lazy for that now. */ +/** Sub-fields for the YMM registers. */ +static DBGFREGSUBFIELD const g_aCpumRegFields_ymmN[] = +{ + DBGFREGSUBFIELD_RW("r0", 0, 32, 0), + DBGFREGSUBFIELD_RW("r0.man", 0+ 0, 23, 0), + DBGFREGSUBFIELD_RW("r0.exp", 0+23, 8, 0), + DBGFREGSUBFIELD_RW("r0.sig", 0+31, 1, 0), + DBGFREGSUBFIELD_RW("r1", 32, 32, 0), + DBGFREGSUBFIELD_RW("r1.man", 32+ 0, 23, 0), + DBGFREGSUBFIELD_RW("r1.exp", 32+23, 8, 0), + DBGFREGSUBFIELD_RW("r1.sig", 32+31, 1, 0), + DBGFREGSUBFIELD_RW("r2", 64, 32, 0), + DBGFREGSUBFIELD_RW("r2.man", 64+ 0, 23, 0), + DBGFREGSUBFIELD_RW("r2.exp", 64+23, 8, 0), + DBGFREGSUBFIELD_RW("r2.sig", 64+31, 1, 0), + DBGFREGSUBFIELD_RW("r3", 96, 32, 0), + DBGFREGSUBFIELD_RW("r3.man", 96+ 0, 23, 0), + DBGFREGSUBFIELD_RW("r3.exp", 96+23, 8, 0), + DBGFREGSUBFIELD_RW("r3.sig", 96+31, 1, 0), + DBGFREGSUBFIELD_RW("r4", 128, 32, 0), + DBGFREGSUBFIELD_RW("r4.man", 128+ 0, 23, 0), + DBGFREGSUBFIELD_RW("r4.exp", 128+23, 8, 0), + DBGFREGSUBFIELD_RW("r4.sig", 128+31, 1, 0), + DBGFREGSUBFIELD_RW("r5", 160, 32, 0), + DBGFREGSUBFIELD_RW("r5.man", 160+ 0, 23, 0), + DBGFREGSUBFIELD_RW("r5.exp", 160+23, 8, 0), + DBGFREGSUBFIELD_RW("r5.sig", 160+31, 1, 0), + DBGFREGSUBFIELD_RW("r6", 192, 32, 0), + DBGFREGSUBFIELD_RW("r6.man", 192+ 0, 23, 0), + DBGFREGSUBFIELD_RW("r6.exp", 192+23, 8, 0), + DBGFREGSUBFIELD_RW("r6.sig", 192+31, 1, 0), + DBGFREGSUBFIELD_RW("r7", 224, 32, 0), + DBGFREGSUBFIELD_RW("r7.man", 224+ 0, 23, 0), + DBGFREGSUBFIELD_RW("r7.exp", 224+23, 8, 0), + DBGFREGSUBFIELD_RW("r7.sig", 224+31, 1, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; +#endif + +/** Sub-fields for the CR0 register. */ +static DBGFREGSUBFIELD const g_aCpumRegFields_cr0[] = +{ + DBGFREGSUBFIELD_RW("pe", 0, 1, 0), + DBGFREGSUBFIELD_RW("mp", 1, 1, 0), + DBGFREGSUBFIELD_RW("em", 2, 1, 0), + DBGFREGSUBFIELD_RW("ts", 3, 1, 0), + DBGFREGSUBFIELD_RO("et", 4, 1, 0), + DBGFREGSUBFIELD_RW("ne", 5, 1, 0), + DBGFREGSUBFIELD_RW("wp", 16, 1, 0), + DBGFREGSUBFIELD_RW("am", 18, 1, 0), + DBGFREGSUBFIELD_RW("nw", 29, 1, 0), + DBGFREGSUBFIELD_RW("cd", 30, 1, 0), + DBGFREGSUBFIELD_RW("pg", 31, 1, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** Sub-fields for the CR3 register. */ +static DBGFREGSUBFIELD const g_aCpumRegFields_cr3[] = +{ + DBGFREGSUBFIELD_RW("pwt", 3, 1, 0), + DBGFREGSUBFIELD_RW("pcd", 4, 1, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** Sub-fields for the CR4 register. */ +static DBGFREGSUBFIELD const g_aCpumRegFields_cr4[] = +{ + DBGFREGSUBFIELD_RW("vme", 0, 1, 0), + DBGFREGSUBFIELD_RW("pvi", 1, 1, 0), + DBGFREGSUBFIELD_RW("tsd", 2, 1, 0), + DBGFREGSUBFIELD_RW("de", 3, 1, 0), + DBGFREGSUBFIELD_RW("pse", 4, 1, 0), + DBGFREGSUBFIELD_RW("pae", 5, 1, 0), + DBGFREGSUBFIELD_RW("mce", 6, 1, 0), + DBGFREGSUBFIELD_RW("pge", 7, 1, 0), + DBGFREGSUBFIELD_RW("pce", 8, 1, 0), + DBGFREGSUBFIELD_RW("osfxsr", 9, 1, 0), + DBGFREGSUBFIELD_RW("osxmmeexcpt", 10, 1, 0), + DBGFREGSUBFIELD_RW("vmxe", 13, 1, 0), + DBGFREGSUBFIELD_RW("smxe", 14, 1, 0), + DBGFREGSUBFIELD_RW("pcide", 17, 1, 0), + DBGFREGSUBFIELD_RW("osxsave", 18, 1, 0), + DBGFREGSUBFIELD_RW("smep", 20, 1, 0), + DBGFREGSUBFIELD_RW("smap", 21, 1, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** Sub-fields for the DR6 register. */ +static DBGFREGSUBFIELD const g_aCpumRegFields_dr6[] = +{ + DBGFREGSUBFIELD_RW("b0", 0, 1, 0), + DBGFREGSUBFIELD_RW("b1", 1, 1, 0), + DBGFREGSUBFIELD_RW("b2", 2, 1, 0), + DBGFREGSUBFIELD_RW("b3", 3, 1, 0), + DBGFREGSUBFIELD_RW("bd", 13, 1, 0), + DBGFREGSUBFIELD_RW("bs", 14, 1, 0), + DBGFREGSUBFIELD_RW("bt", 15, 1, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** Sub-fields for the DR7 register. */ +static DBGFREGSUBFIELD const g_aCpumRegFields_dr7[] = +{ + DBGFREGSUBFIELD_RW("l0", 0, 1, 0), + DBGFREGSUBFIELD_RW("g0", 1, 1, 0), + DBGFREGSUBFIELD_RW("l1", 2, 1, 0), + DBGFREGSUBFIELD_RW("g1", 3, 1, 0), + DBGFREGSUBFIELD_RW("l2", 4, 1, 0), + DBGFREGSUBFIELD_RW("g2", 5, 1, 0), + DBGFREGSUBFIELD_RW("l3", 6, 1, 0), + DBGFREGSUBFIELD_RW("g3", 7, 1, 0), + DBGFREGSUBFIELD_RW("le", 8, 1, 0), + DBGFREGSUBFIELD_RW("ge", 9, 1, 0), + DBGFREGSUBFIELD_RW("gd", 13, 1, 0), + DBGFREGSUBFIELD_RW("rw0", 16, 2, 0), + DBGFREGSUBFIELD_RW("len0", 18, 2, 0), + DBGFREGSUBFIELD_RW("rw1", 20, 2, 0), + DBGFREGSUBFIELD_RW("len1", 22, 2, 0), + DBGFREGSUBFIELD_RW("rw2", 24, 2, 0), + DBGFREGSUBFIELD_RW("len2", 26, 2, 0), + DBGFREGSUBFIELD_RW("rw3", 28, 2, 0), + DBGFREGSUBFIELD_RW("len3", 30, 2, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** Sub-fields for the CR_PAT MSR. */ +static DBGFREGSUBFIELD const g_aCpumRegFields_apic_base[] = +{ + DBGFREGSUBFIELD_RW("bsp", 8, 1, 0), + DBGFREGSUBFIELD_RW("ge", 9, 1, 0), + DBGFREGSUBFIELD_RW("base", 12, 20, 12), + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** Sub-fields for the CR_PAT MSR. */ +static DBGFREGSUBFIELD const g_aCpumRegFields_cr_pat[] = +{ + /** @todo */ + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** Sub-fields for the PERF_STATUS MSR. */ +static DBGFREGSUBFIELD const g_aCpumRegFields_perf_status[] = +{ + /** @todo */ + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** Sub-fields for the EFER MSR. */ +static DBGFREGSUBFIELD const g_aCpumRegFields_efer[] = +{ + /** @todo */ + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** Sub-fields for the STAR MSR. */ +static DBGFREGSUBFIELD const g_aCpumRegFields_star[] = +{ + /** @todo */ + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** Sub-fields for the CSTAR MSR. */ +static DBGFREGSUBFIELD const g_aCpumRegFields_cstar[] = +{ + /** @todo */ + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** Sub-fields for the LSTAR MSR. */ +static DBGFREGSUBFIELD const g_aCpumRegFields_lstar[] = +{ + /** @todo */ + DBGFREGSUBFIELD_TERMINATOR() +}; + +#if 0 /** @todo */ +/** Sub-fields for the SF_MASK MSR. */ +static DBGFREGSUBFIELD const g_aCpumRegFields_sf_mask[] = +{ + /** @todo */ + DBGFREGSUBFIELD_TERMINATOR() +}; +#endif + + +/** @name Macros for producing register descriptor table entries. + * @{ */ +#define CPU_REG_EX_AS(a_szName, a_RegSuff, a_TypeSuff, a_offRegister, a_pfnGet, a_pfnSet, a_paAliases, a_paSubFields) \ + { a_szName, DBGFREG_##a_RegSuff, DBGFREGVALTYPE_##a_TypeSuff, 0 /*fFlags*/, a_offRegister, a_pfnGet, a_pfnSet, a_paAliases, a_paSubFields } + +#define CPU_REG_REG(UName, LName) \ + CPU_REG_RW_AS(#LName, UName, U64, LName, cpumR3RegGet_Generic, cpumR3RegSet_Generic, g_aCpumRegAliases_##LName, NULL) + +#define CPU_REG_SEG(UName, LName) \ + CPU_REG_RW_AS(#LName, UName, U16, LName.Sel, cpumR3RegGet_Generic, cpumR3RegSet_seg, NULL, NULL ), \ + CPU_REG_RW_AS(#LName "_attr", UName##_ATTR, U32, LName.Attr.u, cpumR3RegGet_Generic, cpumR3RegSet_Generic, NULL, g_aCpumRegFields_seg), \ + CPU_REG_RW_AS(#LName "_base", UName##_BASE, U64, LName.u64Base, cpumR3RegGet_Generic, cpumR3RegSet_Generic, NULL, NULL ), \ + CPU_REG_RW_AS(#LName "_lim", UName##_LIMIT, U32, LName.u32Limit, cpumR3RegGet_Generic, cpumR3RegSet_Generic, NULL, NULL ) + +#define CPU_REG_MM(n) \ + CPU_REG_XS_RW_AS("mm" #n, MM##n, U64, x87.aRegs[n].mmx, cpumR3RegGet_XStateGeneric, cpumR3RegSet_XStateGeneric, NULL, g_aCpumRegFields_mmN) + +#define CPU_REG_XMM(n) \ + CPU_REG_XS_RW_AS("xmm" #n, XMM##n, U128, x87.aXMM[n].xmm, cpumR3RegGet_XStateGeneric, cpumR3RegSet_XStateGeneric, NULL, g_aCpumRegFields_xmmN) + +#define CPU_REG_YMM(n) \ + { "ymm" #n, DBGFREG_YMM##n, DBGFREGVALTYPE_U256, 0 /*fFlags*/, n, cpumR3RegGet_ymm, cpumR3RegSet_ymm, NULL /*paAliases*/, NULL /*paSubFields*/ } + +/** @} */ + + +/** + * The guest register descriptors. + */ +static DBGFREGDESC const g_aCpumRegGstDescs[] = +{ +#define CPU_REG_RW_AS(a_szName, a_RegSuff, a_TypeSuff, a_CpumCtxMemb, a_pfnGet, a_pfnSet, a_paAliases, a_paSubFields) \ + { a_szName, DBGFREG_##a_RegSuff, DBGFREGVALTYPE_##a_TypeSuff, 0 /*fFlags*/, RT_OFFSETOF(CPUMCPU, Guest.a_CpumCtxMemb), a_pfnGet, a_pfnSet, a_paAliases, a_paSubFields } +#define CPU_REG_RO_AS(a_szName, a_RegSuff, a_TypeSuff, a_CpumCtxMemb, a_pfnGet, a_pfnSet, a_paAliases, a_paSubFields) \ + { a_szName, DBGFREG_##a_RegSuff, DBGFREGVALTYPE_##a_TypeSuff, DBGFREG_FLAGS_READ_ONLY, RT_OFFSETOF(CPUMCPU, Guest.a_CpumCtxMemb), a_pfnGet, a_pfnSet, a_paAliases, a_paSubFields } +#define CPU_REG_XS_RW_AS(a_szName, a_RegSuff, a_TypeSuff, a_XStateMemb, a_pfnGet, a_pfnSet, a_paAliases, a_paSubFields) \ + { a_szName, DBGFREG_##a_RegSuff, DBGFREGVALTYPE_##a_TypeSuff, 0 /*fFlags*/, RT_OFFSETOF(X86XSAVEAREA, a_XStateMemb), a_pfnGet, a_pfnSet, a_paAliases, a_paSubFields } +#define CPU_REG_XS_RO_AS(a_szName, a_RegSuff, a_TypeSuff, a_XStateMemb, a_pfnGet, a_pfnSet, a_paAliases, a_paSubFields) \ + { a_szName, DBGFREG_##a_RegSuff, DBGFREGVALTYPE_##a_TypeSuff, DBGFREG_FLAGS_READ_ONLY, RT_OFFSETOF(X86XSAVEAREA, a_XStateMemb), a_pfnGet, a_pfnSet, a_paAliases, a_paSubFields } +#define CPU_REG_MSR(a_szName, UName, a_TypeSuff, a_paSubFields) \ + CPU_REG_EX_AS(a_szName, MSR_##UName, a_TypeSuff, MSR_##UName, cpumR3RegGstGet_msr, cpumR3RegGstSet_msr, NULL, a_paSubFields) +#define CPU_REG_ST(n) \ + CPU_REG_EX_AS("st" #n, ST##n, R80, n, cpumR3RegGstGet_stN, cpumR3RegGstSet_stN, NULL, g_aCpumRegFields_stN) + + CPU_REG_REG(RAX, rax), + CPU_REG_REG(RCX, rcx), + CPU_REG_REG(RDX, rdx), + CPU_REG_REG(RBX, rbx), + CPU_REG_REG(RSP, rsp), + CPU_REG_REG(RBP, rbp), + CPU_REG_REG(RSI, rsi), + CPU_REG_REG(RDI, rdi), + CPU_REG_REG(R8, r8), + CPU_REG_REG(R9, r9), + CPU_REG_REG(R10, r10), + CPU_REG_REG(R11, r11), + CPU_REG_REG(R12, r12), + CPU_REG_REG(R13, r13), + CPU_REG_REG(R14, r14), + CPU_REG_REG(R15, r15), + CPU_REG_SEG(CS, cs), + CPU_REG_SEG(DS, ds), + CPU_REG_SEG(ES, es), + CPU_REG_SEG(FS, fs), + CPU_REG_SEG(GS, gs), + CPU_REG_SEG(SS, ss), + CPU_REG_REG(RIP, rip), + CPU_REG_RW_AS("rflags", RFLAGS, U64, rflags, cpumR3RegGet_Generic, cpumR3RegSet_Generic, g_aCpumRegAliases_rflags, g_aCpumRegFields_rflags ), + CPU_REG_XS_RW_AS("fcw", FCW, U16, x87.FCW, cpumR3RegGet_XStateGeneric, cpumR3RegSet_XStateGeneric, NULL, g_aCpumRegFields_fcw ), + CPU_REG_XS_RW_AS("fsw", FSW, U16, x87.FSW, cpumR3RegGet_XStateGeneric, cpumR3RegSet_XStateGeneric, NULL, g_aCpumRegFields_fsw ), + CPU_REG_XS_RO_AS("ftw", FTW, U16, x87, cpumR3RegGet_ftw, cpumR3RegSet_ftw, NULL, g_aCpumRegFields_ftw ), + CPU_REG_XS_RW_AS("fop", FOP, U16, x87.FOP, cpumR3RegGet_XStateGeneric, cpumR3RegSet_XStateGeneric, NULL, NULL ), + CPU_REG_XS_RW_AS("fpuip", FPUIP, U32, x87.FPUIP, cpumR3RegGet_XStateGeneric, cpumR3RegSet_XStateGeneric, g_aCpumRegAliases_fpuip, NULL ), + CPU_REG_XS_RW_AS("fpucs", FPUCS, U16, x87.CS, cpumR3RegGet_XStateGeneric, cpumR3RegSet_XStateGeneric, NULL, NULL ), + CPU_REG_XS_RW_AS("fpudp", FPUDP, U32, x87.FPUDP, cpumR3RegGet_XStateGeneric, cpumR3RegSet_XStateGeneric, g_aCpumRegAliases_fpudp, NULL ), + CPU_REG_XS_RW_AS("fpuds", FPUDS, U16, x87.DS, cpumR3RegGet_XStateGeneric, cpumR3RegSet_XStateGeneric, NULL, NULL ), + CPU_REG_XS_RW_AS("mxcsr", MXCSR, U32, x87.MXCSR, cpumR3RegGet_XStateGeneric, cpumR3RegSet_XStateGeneric, NULL, g_aCpumRegFields_mxcsr ), + CPU_REG_XS_RW_AS("mxcsr_mask", MXCSR_MASK, U32, x87.MXCSR_MASK, cpumR3RegGet_XStateGeneric, cpumR3RegSet_XStateGeneric, NULL, g_aCpumRegFields_mxcsr ), + CPU_REG_ST(0), + CPU_REG_ST(1), + CPU_REG_ST(2), + CPU_REG_ST(3), + CPU_REG_ST(4), + CPU_REG_ST(5), + CPU_REG_ST(6), + CPU_REG_ST(7), + CPU_REG_MM(0), + CPU_REG_MM(1), + CPU_REG_MM(2), + CPU_REG_MM(3), + CPU_REG_MM(4), + CPU_REG_MM(5), + CPU_REG_MM(6), + CPU_REG_MM(7), + CPU_REG_XMM(0), + CPU_REG_XMM(1), + CPU_REG_XMM(2), + CPU_REG_XMM(3), + CPU_REG_XMM(4), + CPU_REG_XMM(5), + CPU_REG_XMM(6), + CPU_REG_XMM(7), + CPU_REG_XMM(8), + CPU_REG_XMM(9), + CPU_REG_XMM(10), + CPU_REG_XMM(11), + CPU_REG_XMM(12), + CPU_REG_XMM(13), + CPU_REG_XMM(14), + CPU_REG_XMM(15), + CPU_REG_YMM(0), + CPU_REG_YMM(1), + CPU_REG_YMM(2), + CPU_REG_YMM(3), + CPU_REG_YMM(4), + CPU_REG_YMM(5), + CPU_REG_YMM(6), + CPU_REG_YMM(7), + CPU_REG_YMM(8), + CPU_REG_YMM(9), + CPU_REG_YMM(10), + CPU_REG_YMM(11), + CPU_REG_YMM(12), + CPU_REG_YMM(13), + CPU_REG_YMM(14), + CPU_REG_YMM(15), + CPU_REG_RW_AS("gdtr_base", GDTR_BASE, U64, gdtr.pGdt, cpumR3RegGet_Generic, cpumR3RegSet_Generic, NULL, NULL ), + CPU_REG_RW_AS("gdtr_lim", GDTR_LIMIT, U16, gdtr.cbGdt, cpumR3RegGet_Generic, cpumR3RegSet_Generic, NULL, NULL ), + CPU_REG_RW_AS("idtr_base", IDTR_BASE, U64, idtr.pIdt, cpumR3RegGet_Generic, cpumR3RegSet_Generic, NULL, NULL ), + CPU_REG_RW_AS("idtr_lim", IDTR_LIMIT, U16, idtr.cbIdt, cpumR3RegGet_Generic, cpumR3RegSet_Generic, NULL, NULL ), + CPU_REG_SEG(LDTR, ldtr), + CPU_REG_SEG(TR, tr), + CPU_REG_EX_AS("cr0", CR0, U32, 0, cpumR3RegGstGet_crX, cpumR3RegGstSet_crX, g_aCpumRegAliases_cr0, g_aCpumRegFields_cr0 ), + CPU_REG_EX_AS("cr2", CR2, U64, 2, cpumR3RegGstGet_crX, cpumR3RegGstSet_crX, NULL, NULL ), + CPU_REG_EX_AS("cr3", CR3, U64, 3, cpumR3RegGstGet_crX, cpumR3RegGstSet_crX, NULL, g_aCpumRegFields_cr3 ), + CPU_REG_EX_AS("cr4", CR4, U32, 4, cpumR3RegGstGet_crX, cpumR3RegGstSet_crX, NULL, g_aCpumRegFields_cr4 ), + CPU_REG_EX_AS("cr8", CR8, U32, 8, cpumR3RegGstGet_crX, cpumR3RegGstSet_crX, NULL, NULL ), + CPU_REG_EX_AS("dr0", DR0, U64, 0, cpumR3RegGstGet_drX, cpumR3RegGstSet_drX, NULL, NULL ), + CPU_REG_EX_AS("dr1", DR1, U64, 1, cpumR3RegGstGet_drX, cpumR3RegGstSet_drX, NULL, NULL ), + CPU_REG_EX_AS("dr2", DR2, U64, 2, cpumR3RegGstGet_drX, cpumR3RegGstSet_drX, NULL, NULL ), + CPU_REG_EX_AS("dr3", DR3, U64, 3, cpumR3RegGstGet_drX, cpumR3RegGstSet_drX, NULL, NULL ), + CPU_REG_EX_AS("dr6", DR6, U32, 6, cpumR3RegGstGet_drX, cpumR3RegGstSet_drX, NULL, g_aCpumRegFields_dr6 ), + CPU_REG_EX_AS("dr7", DR7, U32, 7, cpumR3RegGstGet_drX, cpumR3RegGstSet_drX, NULL, g_aCpumRegFields_dr7 ), + CPU_REG_MSR("apic_base", IA32_APICBASE, U32, g_aCpumRegFields_apic_base ), + CPU_REG_MSR("pat", IA32_CR_PAT, U64, g_aCpumRegFields_cr_pat ), + CPU_REG_MSR("perf_status", IA32_PERF_STATUS, U64, g_aCpumRegFields_perf_status), + CPU_REG_MSR("sysenter_cs", IA32_SYSENTER_CS, U16, NULL ), + CPU_REG_MSR("sysenter_eip", IA32_SYSENTER_EIP, U32, NULL ), + CPU_REG_MSR("sysenter_esp", IA32_SYSENTER_ESP, U32, NULL ), + CPU_REG_MSR("tsc", IA32_TSC, U32, NULL ), + CPU_REG_MSR("efer", K6_EFER, U32, g_aCpumRegFields_efer ), + CPU_REG_MSR("star", K6_STAR, U64, g_aCpumRegFields_star ), + CPU_REG_MSR("cstar", K8_CSTAR, U64, g_aCpumRegFields_cstar ), + CPU_REG_MSR("msr_fs_base", K8_FS_BASE, U64, NULL ), + CPU_REG_MSR("msr_gs_base", K8_GS_BASE, U64, NULL ), + CPU_REG_MSR("krnl_gs_base", K8_KERNEL_GS_BASE, U64, NULL ), + CPU_REG_MSR("lstar", K8_LSTAR, U64, g_aCpumRegFields_lstar ), + CPU_REG_MSR("sf_mask", K8_SF_MASK, U64, NULL ), + CPU_REG_MSR("tsc_aux", K8_TSC_AUX, U64, NULL ), + CPU_REG_EX_AS("ah", AH, U8, RT_OFFSETOF(CPUMCPU, Guest.rax) + 1, cpumR3RegGet_Generic, cpumR3RegSet_Generic, NULL, NULL ), + CPU_REG_EX_AS("ch", CH, U8, RT_OFFSETOF(CPUMCPU, Guest.rcx) + 1, cpumR3RegGet_Generic, cpumR3RegSet_Generic, NULL, NULL ), + CPU_REG_EX_AS("dh", DH, U8, RT_OFFSETOF(CPUMCPU, Guest.rdx) + 1, cpumR3RegGet_Generic, cpumR3RegSet_Generic, NULL, NULL ), + CPU_REG_EX_AS("bh", BH, U8, RT_OFFSETOF(CPUMCPU, Guest.rbx) + 1, cpumR3RegGet_Generic, cpumR3RegSet_Generic, NULL, NULL ), + CPU_REG_RW_AS("gdtr", GDTR, DTR, gdtr, cpumR3RegGet_gdtr, cpumR3RegSet_gdtr, NULL, NULL ), + CPU_REG_RW_AS("idtr", IDTR, DTR, idtr, cpumR3RegGet_idtr, cpumR3RegSet_idtr, NULL, NULL ), + DBGFREGDESC_TERMINATOR() + +#undef CPU_REG_RW_AS +#undef CPU_REG_RO_AS +#undef CPU_REG_MSR +#undef CPU_REG_ST +}; + + +/** + * The hypervisor (raw-mode) register descriptors. + */ +static DBGFREGDESC const g_aCpumRegHyperDescs[] = +{ +#define CPU_REG_RW_AS(a_szName, a_RegSuff, a_TypeSuff, a_CpumCtxMemb, a_pfnGet, a_pfnSet, a_paAliases, a_paSubFields) \ + { a_szName, DBGFREG_##a_RegSuff, DBGFREGVALTYPE_##a_TypeSuff, 0 /*fFlags*/, RT_OFFSETOF(CPUMCPU, Hyper.a_CpumCtxMemb), a_pfnGet, a_pfnSet, a_paAliases, a_paSubFields } +#define CPU_REG_RO_AS(a_szName, a_RegSuff, a_TypeSuff, a_CpumCtxMemb, a_pfnGet, a_pfnSet, a_paAliases, a_paSubFields) \ + { a_szName, DBGFREG_##a_RegSuff, DBGFREGVALTYPE_##a_TypeSuff, DBGFREG_FLAGS_READ_ONLY, RT_OFFSETOF(CPUMCPU, Hyper.a_CpumCtxMemb), a_pfnGet, a_pfnSet, a_paAliases, a_paSubFields } +#define CPU_REG_DUMMY(a_szName, a_RegSuff, a_TypeSuff) \ + { a_szName, DBGFREG_##a_RegSuff, DBGFREGVALTYPE_##a_TypeSuff, DBGFREG_FLAGS_READ_ONLY, 0, cpumR3RegGet_Dummy, cpumR3RegSet_Dummy, NULL, NULL} +#define CPU_REG_MSR(a_szName, UName, a_TypeSuff, a_paSubFields) \ + CPU_REG_EX_AS(a_szName, MSR_##UName, a_TypeSuff, MSR_##UName, cpumR3RegHyperGet_msr, cpumR3RegHyperSet_msr, NULL, a_paSubFields) + + CPU_REG_REG(RAX, rax), + CPU_REG_REG(RCX, rcx), + CPU_REG_REG(RDX, rdx), + CPU_REG_REG(RBX, rbx), + CPU_REG_REG(RSP, rsp), + CPU_REG_REG(RBP, rbp), + CPU_REG_REG(RSI, rsi), + CPU_REG_REG(RDI, rdi), + CPU_REG_REG(R8, r8), + CPU_REG_REG(R9, r9), + CPU_REG_REG(R10, r10), + CPU_REG_REG(R11, r11), + CPU_REG_REG(R12, r12), + CPU_REG_REG(R13, r13), + CPU_REG_REG(R14, r14), + CPU_REG_REG(R15, r15), + CPU_REG_SEG(CS, cs), + CPU_REG_SEG(DS, ds), + CPU_REG_SEG(ES, es), + CPU_REG_SEG(FS, fs), + CPU_REG_SEG(GS, gs), + CPU_REG_SEG(SS, ss), + CPU_REG_REG(RIP, rip), + CPU_REG_RW_AS("rflags", RFLAGS, U64, rflags, cpumR3RegGet_Generic, cpumR3RegSet_Generic, g_aCpumRegAliases_rflags, g_aCpumRegFields_rflags ), + CPU_REG_DUMMY("fcw", FCW, U16), + CPU_REG_DUMMY("fsw", FSW, U16), + CPU_REG_DUMMY("ftw", FTW, U16), + CPU_REG_DUMMY("fop", FOP, U16), + CPU_REG_DUMMY("fpuip", FPUIP, U32), + CPU_REG_DUMMY("fpucs", FPUCS, U16), + CPU_REG_DUMMY("fpudp", FPUDP, U32), + CPU_REG_DUMMY("fpuds", FPUDS, U16), + CPU_REG_DUMMY("mxcsr", MXCSR, U32), + CPU_REG_DUMMY("mxcsr_mask", MXCSR_MASK, U32), + CPU_REG_DUMMY("st0", ST0, R80), + CPU_REG_DUMMY("st1", ST1, R80), + CPU_REG_DUMMY("st2", ST2, R80), + CPU_REG_DUMMY("st3", ST3, R80), + CPU_REG_DUMMY("st4", ST4, R80), + CPU_REG_DUMMY("st5", ST5, R80), + CPU_REG_DUMMY("st6", ST6, R80), + CPU_REG_DUMMY("st7", ST7, R80), + CPU_REG_DUMMY("mm0", MM0, U64), + CPU_REG_DUMMY("mm1", MM1, U64), + CPU_REG_DUMMY("mm2", MM2, U64), + CPU_REG_DUMMY("mm3", MM3, U64), + CPU_REG_DUMMY("mm4", MM4, U64), + CPU_REG_DUMMY("mm5", MM5, U64), + CPU_REG_DUMMY("mm6", MM6, U64), + CPU_REG_DUMMY("mm7", MM7, U64), + CPU_REG_DUMMY("xmm0", XMM0, U128), + CPU_REG_DUMMY("xmm1", XMM1, U128), + CPU_REG_DUMMY("xmm2", XMM2, U128), + CPU_REG_DUMMY("xmm3", XMM3, U128), + CPU_REG_DUMMY("xmm4", XMM4, U128), + CPU_REG_DUMMY("xmm5", XMM5, U128), + CPU_REG_DUMMY("xmm6", XMM6, U128), + CPU_REG_DUMMY("xmm7", XMM7, U128), + CPU_REG_DUMMY("xmm8", XMM8, U128), + CPU_REG_DUMMY("xmm9", XMM9, U128), + CPU_REG_DUMMY("xmm10", XMM10, U128), + CPU_REG_DUMMY("xmm11", XMM11, U128), + CPU_REG_DUMMY("xmm12", XMM12, U128), + CPU_REG_DUMMY("xmm13", XMM13, U128), + CPU_REG_DUMMY("xmm14", XMM14, U128), + CPU_REG_DUMMY("xmm15", XMM15, U128), + CPU_REG_DUMMY("ymm0", YMM0, U256), + CPU_REG_DUMMY("ymm1", YMM1, U256), + CPU_REG_DUMMY("ymm2", YMM2, U256), + CPU_REG_DUMMY("ymm3", YMM3, U256), + CPU_REG_DUMMY("ymm4", YMM4, U256), + CPU_REG_DUMMY("ymm5", YMM5, U256), + CPU_REG_DUMMY("ymm6", YMM6, U256), + CPU_REG_DUMMY("ymm7", YMM7, U256), + CPU_REG_DUMMY("ymm8", YMM8, U256), + CPU_REG_DUMMY("ymm9", YMM9, U256), + CPU_REG_DUMMY("ymm10", YMM10, U256), + CPU_REG_DUMMY("ymm11", YMM11, U256), + CPU_REG_DUMMY("ymm12", YMM12, U256), + CPU_REG_DUMMY("ymm13", YMM13, U256), + CPU_REG_DUMMY("ymm14", YMM14, U256), + CPU_REG_DUMMY("ymm15", YMM15, U256), + CPU_REG_RW_AS("gdtr_base", GDTR_BASE, U64, gdtr.pGdt, cpumR3RegGet_Generic, cpumR3RegSet_Generic, NULL, NULL ), + CPU_REG_RW_AS("gdtr_lim", GDTR_LIMIT, U16, gdtr.cbGdt, cpumR3RegGet_Generic, cpumR3RegSet_Generic, NULL, NULL ), + CPU_REG_RW_AS("idtr_base", IDTR_BASE, U64, idtr.pIdt, cpumR3RegGet_Generic, cpumR3RegSet_Generic, NULL, NULL ), + CPU_REG_RW_AS("idtr_lim", IDTR_LIMIT, U16, idtr.cbIdt, cpumR3RegGet_Generic, cpumR3RegSet_Generic, NULL, NULL ), + CPU_REG_SEG(LDTR, ldtr), + CPU_REG_SEG(TR, tr), + CPU_REG_EX_AS("cr0", CR0, U32, 0, cpumR3RegHyperGet_crX, cpumR3RegHyperSet_crX, g_aCpumRegAliases_cr0, g_aCpumRegFields_cr0 ), + CPU_REG_EX_AS("cr2", CR2, U64, 2, cpumR3RegHyperGet_crX, cpumR3RegHyperSet_crX, NULL, NULL ), + CPU_REG_EX_AS("cr3", CR3, U64, 3, cpumR3RegHyperGet_crX, cpumR3RegHyperSet_crX, NULL, g_aCpumRegFields_cr3 ), + CPU_REG_EX_AS("cr4", CR4, U32, 4, cpumR3RegHyperGet_crX, cpumR3RegHyperSet_crX, NULL, g_aCpumRegFields_cr4 ), + CPU_REG_EX_AS("cr8", CR8, U32, 8, cpumR3RegHyperGet_crX, cpumR3RegHyperSet_crX, NULL, NULL ), + CPU_REG_EX_AS("dr0", DR0, U64, 0, cpumR3RegHyperGet_drX, cpumR3RegHyperSet_drX, NULL, NULL ), + CPU_REG_EX_AS("dr1", DR1, U64, 1, cpumR3RegHyperGet_drX, cpumR3RegHyperSet_drX, NULL, NULL ), + CPU_REG_EX_AS("dr2", DR2, U64, 2, cpumR3RegHyperGet_drX, cpumR3RegHyperSet_drX, NULL, NULL ), + CPU_REG_EX_AS("dr3", DR3, U64, 3, cpumR3RegHyperGet_drX, cpumR3RegHyperSet_drX, NULL, NULL ), + CPU_REG_EX_AS("dr6", DR6, U32, 6, cpumR3RegHyperGet_drX, cpumR3RegHyperSet_drX, NULL, g_aCpumRegFields_dr6 ), + CPU_REG_EX_AS("dr7", DR7, U32, 7, cpumR3RegHyperGet_drX, cpumR3RegHyperSet_drX, NULL, g_aCpumRegFields_dr7 ), + CPU_REG_MSR("apic_base", IA32_APICBASE, U32, g_aCpumRegFields_apic_base ), + CPU_REG_MSR("pat", IA32_CR_PAT, U64, g_aCpumRegFields_cr_pat ), + CPU_REG_MSR("perf_status", IA32_PERF_STATUS, U64, g_aCpumRegFields_perf_status), + CPU_REG_MSR("sysenter_cs", IA32_SYSENTER_CS, U16, NULL ), + CPU_REG_MSR("sysenter_eip", IA32_SYSENTER_EIP, U32, NULL ), + CPU_REG_MSR("sysenter_esp", IA32_SYSENTER_ESP, U32, NULL ), + CPU_REG_MSR("tsc", IA32_TSC, U32, NULL ), + CPU_REG_MSR("efer", K6_EFER, U32, g_aCpumRegFields_efer ), + CPU_REG_MSR("star", K6_STAR, U64, g_aCpumRegFields_star ), + CPU_REG_MSR("cstar", K8_CSTAR, U64, g_aCpumRegFields_cstar ), + CPU_REG_MSR("msr_fs_base", K8_FS_BASE, U64, NULL ), + CPU_REG_MSR("msr_gs_base", K8_GS_BASE, U64, NULL ), + CPU_REG_MSR("krnl_gs_base", K8_KERNEL_GS_BASE, U64, NULL ), + CPU_REG_MSR("lstar", K8_LSTAR, U64, g_aCpumRegFields_lstar ), + CPU_REG_MSR("sf_mask", K8_SF_MASK, U64, NULL ), + CPU_REG_MSR("tsc_aux", K8_TSC_AUX, U64, NULL ), + CPU_REG_EX_AS("ah", AH, U8, RT_OFFSETOF(CPUMCPU, Hyper.rax) + 1, cpumR3RegGet_Generic, cpumR3RegSet_Generic, NULL, NULL ), + CPU_REG_EX_AS("ch", CH, U8, RT_OFFSETOF(CPUMCPU, Hyper.rcx) + 1, cpumR3RegGet_Generic, cpumR3RegSet_Generic, NULL, NULL ), + CPU_REG_EX_AS("dh", DH, U8, RT_OFFSETOF(CPUMCPU, Hyper.rdx) + 1, cpumR3RegGet_Generic, cpumR3RegSet_Generic, NULL, NULL ), + CPU_REG_EX_AS("bh", BH, U8, RT_OFFSETOF(CPUMCPU, Hyper.rbx) + 1, cpumR3RegGet_Generic, cpumR3RegSet_Generic, NULL, NULL ), + CPU_REG_RW_AS("gdtr", GDTR, DTR, gdtr, cpumR3RegGet_gdtr, cpumR3RegSet_gdtr, NULL, NULL ), + CPU_REG_RW_AS("idtr", IDTR, DTR, idtr, cpumR3RegGet_idtr, cpumR3RegSet_idtr, NULL, NULL ), + DBGFREGDESC_TERMINATOR() +#undef CPU_REG_RW_AS +#undef CPU_REG_RO_AS +#undef CPU_REG_MSR +#undef CPU_REG_ST +}; + + +/** + * Initializes the debugger related sides of the CPUM component. + * + * Called by CPUMR3Init. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +int cpumR3DbgInit(PVM pVM) +{ + for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++) + { + int rc = DBGFR3RegRegisterCpu(pVM, &pVM->aCpus[iCpu], g_aCpumRegGstDescs, true /*fGuestRegs*/); + AssertLogRelRCReturn(rc, rc); + rc = DBGFR3RegRegisterCpu(pVM, &pVM->aCpus[iCpu], g_aCpumRegHyperDescs, false /*fGuestRegs*/); + AssertLogRelRCReturn(rc, rc); + } + + return VINF_SUCCESS; +} + diff --git a/src/VBox/VMM/VMMR3/CPUMR3CpuId.cpp b/src/VBox/VMM/VMMR3/CPUMR3CpuId.cpp new file mode 100644 index 00000000..1afb5773 --- /dev/null +++ b/src/VBox/VMM/VMMR3/CPUMR3CpuId.cpp @@ -0,0 +1,7471 @@ +/* $Id: CPUMR3CpuId.cpp $ */ +/** @file + * CPUM - CPU ID part. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_CPUM +#include +#include +#include +#include +#include +#include "CPUMInternal.h" +#include +#include +#include + +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** For sanity and avoid wasting hyper heap on buggy config / saved state. */ +#define CPUM_CPUID_MAX_LEAVES 2048 +/* Max size we accept for the XSAVE area. */ +#define CPUM_MAX_XSAVE_AREA_SIZE 10240 +/* Min size we accept for the XSAVE area. */ +#define CPUM_MIN_XSAVE_AREA_SIZE 0x240 + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +/** + * The intel pentium family. + */ +static const CPUMMICROARCH g_aenmIntelFamily06[] = +{ + /* [ 0(0x00)] = */ kCpumMicroarch_Intel_P6, /* Pentium Pro A-step (says sandpile.org). */ + /* [ 1(0x01)] = */ kCpumMicroarch_Intel_P6, /* Pentium Pro */ + /* [ 2(0x02)] = */ kCpumMicroarch_Intel_Unknown, + /* [ 3(0x03)] = */ kCpumMicroarch_Intel_P6_II, /* PII Klamath */ + /* [ 4(0x04)] = */ kCpumMicroarch_Intel_Unknown, + /* [ 5(0x05)] = */ kCpumMicroarch_Intel_P6_II, /* PII Deschutes */ + /* [ 6(0x06)] = */ kCpumMicroarch_Intel_P6_II, /* Celeron Mendocino. */ + /* [ 7(0x07)] = */ kCpumMicroarch_Intel_P6_III, /* PIII Katmai. */ + /* [ 8(0x08)] = */ kCpumMicroarch_Intel_P6_III, /* PIII Coppermine (includes Celeron). */ + /* [ 9(0x09)] = */ kCpumMicroarch_Intel_P6_M_Banias, /* Pentium/Celeron M Banias. */ + /* [10(0x0a)] = */ kCpumMicroarch_Intel_P6_III, /* PIII Xeon */ + /* [11(0x0b)] = */ kCpumMicroarch_Intel_P6_III, /* PIII Tualatin (includes Celeron). */ + /* [12(0x0c)] = */ kCpumMicroarch_Intel_Unknown, + /* [13(0x0d)] = */ kCpumMicroarch_Intel_P6_M_Dothan, /* Pentium/Celeron M Dothan. */ + /* [14(0x0e)] = */ kCpumMicroarch_Intel_Core_Yonah, /* Core Yonah (Enhanced Pentium M). */ + /* [15(0x0f)] = */ kCpumMicroarch_Intel_Core2_Merom, /* Merom */ + /* [16(0x10)] = */ kCpumMicroarch_Intel_Unknown, + /* [17(0x11)] = */ kCpumMicroarch_Intel_Unknown, + /* [18(0x12)] = */ kCpumMicroarch_Intel_Unknown, + /* [19(0x13)] = */ kCpumMicroarch_Intel_Unknown, + /* [20(0x14)] = */ kCpumMicroarch_Intel_Unknown, + /* [21(0x15)] = */ kCpumMicroarch_Intel_P6_M_Dothan, /* Tolapai - System-on-a-chip. */ + /* [22(0x16)] = */ kCpumMicroarch_Intel_Core2_Merom, + /* [23(0x17)] = */ kCpumMicroarch_Intel_Core2_Penryn, + /* [24(0x18)] = */ kCpumMicroarch_Intel_Unknown, + /* [25(0x19)] = */ kCpumMicroarch_Intel_Unknown, + /* [26(0x1a)] = */ kCpumMicroarch_Intel_Core7_Nehalem, + /* [27(0x1b)] = */ kCpumMicroarch_Intel_Unknown, + /* [28(0x1c)] = */ kCpumMicroarch_Intel_Atom_Bonnell, /* Diamonville, Pineview, */ + /* [29(0x1d)] = */ kCpumMicroarch_Intel_Core2_Penryn, + /* [30(0x1e)] = */ kCpumMicroarch_Intel_Core7_Nehalem, /* Clarksfield, Lynnfield, Jasper Forest. */ + /* [31(0x1f)] = */ kCpumMicroarch_Intel_Core7_Nehalem, /* Only listed by sandpile.org. 2 cores ABD/HVD, whatever that means. */ + /* [32(0x20)] = */ kCpumMicroarch_Intel_Unknown, + /* [33(0x21)] = */ kCpumMicroarch_Intel_Unknown, + /* [34(0x22)] = */ kCpumMicroarch_Intel_Unknown, + /* [35(0x23)] = */ kCpumMicroarch_Intel_Unknown, + /* [36(0x24)] = */ kCpumMicroarch_Intel_Unknown, + /* [37(0x25)] = */ kCpumMicroarch_Intel_Core7_Westmere, /* Arrandale, Clarksdale. */ + /* [38(0x26)] = */ kCpumMicroarch_Intel_Atom_Lincroft, + /* [39(0x27)] = */ kCpumMicroarch_Intel_Atom_Saltwell, + /* [40(0x28)] = */ kCpumMicroarch_Intel_Unknown, + /* [41(0x29)] = */ kCpumMicroarch_Intel_Unknown, + /* [42(0x2a)] = */ kCpumMicroarch_Intel_Core7_SandyBridge, + /* [43(0x2b)] = */ kCpumMicroarch_Intel_Unknown, + /* [44(0x2c)] = */ kCpumMicroarch_Intel_Core7_Westmere, /* Gulftown, Westmere-EP. */ + /* [45(0x2d)] = */ kCpumMicroarch_Intel_Core7_SandyBridge, /* SandyBridge-E, SandyBridge-EN, SandyBridge-EP. */ + /* [46(0x2e)] = */ kCpumMicroarch_Intel_Core7_Nehalem, /* Beckton (Xeon). */ + /* [47(0x2f)] = */ kCpumMicroarch_Intel_Core7_Westmere, /* Westmere-EX. */ + /* [48(0x30)] = */ kCpumMicroarch_Intel_Unknown, + /* [49(0x31)] = */ kCpumMicroarch_Intel_Unknown, + /* [50(0x32)] = */ kCpumMicroarch_Intel_Unknown, + /* [51(0x33)] = */ kCpumMicroarch_Intel_Unknown, + /* [52(0x34)] = */ kCpumMicroarch_Intel_Unknown, + /* [53(0x35)] = */ kCpumMicroarch_Intel_Atom_Saltwell, /* ?? */ + /* [54(0x36)] = */ kCpumMicroarch_Intel_Atom_Saltwell, /* Cedarview, ++ */ + /* [55(0x37)] = */ kCpumMicroarch_Intel_Atom_Silvermont, + /* [56(0x38)] = */ kCpumMicroarch_Intel_Unknown, + /* [57(0x39)] = */ kCpumMicroarch_Intel_Unknown, + /* [58(0x3a)] = */ kCpumMicroarch_Intel_Core7_IvyBridge, + /* [59(0x3b)] = */ kCpumMicroarch_Intel_Unknown, + /* [60(0x3c)] = */ kCpumMicroarch_Intel_Core7_Haswell, + /* [61(0x3d)] = */ kCpumMicroarch_Intel_Core7_Broadwell, + /* [62(0x3e)] = */ kCpumMicroarch_Intel_Core7_IvyBridge, + /* [63(0x3f)] = */ kCpumMicroarch_Intel_Core7_Haswell, + /* [64(0x40)] = */ kCpumMicroarch_Intel_Unknown, + /* [65(0x41)] = */ kCpumMicroarch_Intel_Unknown, + /* [66(0x42)] = */ kCpumMicroarch_Intel_Unknown, + /* [67(0x43)] = */ kCpumMicroarch_Intel_Unknown, + /* [68(0x44)] = */ kCpumMicroarch_Intel_Unknown, + /* [69(0x45)] = */ kCpumMicroarch_Intel_Core7_Haswell, + /* [70(0x46)] = */ kCpumMicroarch_Intel_Core7_Haswell, + /* [71(0x47)] = */ kCpumMicroarch_Intel_Core7_Broadwell, /* i7-5775C */ + /* [72(0x48)] = */ kCpumMicroarch_Intel_Unknown, + /* [73(0x49)] = */ kCpumMicroarch_Intel_Unknown, + /* [74(0x4a)] = */ kCpumMicroarch_Intel_Atom_Silvermont, + /* [75(0x4b)] = */ kCpumMicroarch_Intel_Unknown, + /* [76(0x4c)] = */ kCpumMicroarch_Intel_Atom_Airmount, + /* [77(0x4d)] = */ kCpumMicroarch_Intel_Atom_Silvermont, + /* [78(0x4e)] = */ kCpumMicroarch_Intel_Core7_Skylake, /* unconfirmed */ + /* [79(0x4f)] = */ kCpumMicroarch_Intel_Core7_Broadwell, /* unconfirmed, Broadwell-E */ + /* [80(0x50)] = */ kCpumMicroarch_Intel_Unknown, + /* [81(0x51)] = */ kCpumMicroarch_Intel_Unknown, + /* [82(0x52)] = */ kCpumMicroarch_Intel_Unknown, + /* [83(0x53)] = */ kCpumMicroarch_Intel_Unknown, + /* [84(0x54)] = */ kCpumMicroarch_Intel_Unknown, + /* [85(0x55)] = */ kCpumMicroarch_Intel_Core7_Skylake, /* server cpu */ + /* [86(0x56)] = */ kCpumMicroarch_Intel_Core7_Broadwell, /* Xeon D-1540, Broadwell-DE */ + /* [87(0x57)] = */ kCpumMicroarch_Intel_Phi_KnightsLanding, + /* [88(0x58)] = */ kCpumMicroarch_Intel_Unknown, + /* [89(0x59)] = */ kCpumMicroarch_Intel_Unknown, + /* [90(0x5a)] = */ kCpumMicroarch_Intel_Atom_Silvermont, /* Moorefield */ + /* [91(0x5b)] = */ kCpumMicroarch_Intel_Unknown, + /* [92(0x5c)] = */ kCpumMicroarch_Intel_Atom_Goldmont, /* Apollo Lake */ + /* [93(0x5d)] = */ kCpumMicroarch_Intel_Atom_Silvermont, /* x3-C3230 */ + /* [94(0x5e)] = */ kCpumMicroarch_Intel_Core7_Skylake, /* i7-6700K */ + /* [95(0x5f)] = */ kCpumMicroarch_Intel_Atom_Goldmont, /* Denverton */ + /* [96(0x60)] = */ kCpumMicroarch_Intel_Unknown, + /* [97(0x61)] = */ kCpumMicroarch_Intel_Unknown, + /* [98(0x62)] = */ kCpumMicroarch_Intel_Unknown, + /* [99(0x63)] = */ kCpumMicroarch_Intel_Unknown, + /*[100(0x64)] = */ kCpumMicroarch_Intel_Unknown, + /*[101(0x65)] = */ kCpumMicroarch_Intel_Atom_Silvermont, /* SoFIA */ + /*[102(0x66)] = */ kCpumMicroarch_Intel_Core7_CannonLake, /* unconfirmed */ + /*[103(0x67)] = */ kCpumMicroarch_Intel_Unknown, + /*[104(0x68)] = */ kCpumMicroarch_Intel_Unknown, + /*[105(0x69)] = */ kCpumMicroarch_Intel_Unknown, + /*[106(0x6a)] = */ kCpumMicroarch_Intel_Unknown, + /*[107(0x6b)] = */ kCpumMicroarch_Intel_Unknown, + /*[108(0x6c)] = */ kCpumMicroarch_Intel_Unknown, + /*[109(0x6d)] = */ kCpumMicroarch_Intel_Unknown, + /*[110(0x6e)] = */ kCpumMicroarch_Intel_Unknown, + /*[111(0x6f)] = */ kCpumMicroarch_Intel_Unknown, + /*[112(0x70)] = */ kCpumMicroarch_Intel_Unknown, + /*[113(0x71)] = */ kCpumMicroarch_Intel_Unknown, + /*[114(0x72)] = */ kCpumMicroarch_Intel_Unknown, + /*[115(0x73)] = */ kCpumMicroarch_Intel_Unknown, + /*[116(0x74)] = */ kCpumMicroarch_Intel_Unknown, + /*[117(0x75)] = */ kCpumMicroarch_Intel_Unknown, + /*[118(0x76)] = */ kCpumMicroarch_Intel_Unknown, + /*[119(0x77)] = */ kCpumMicroarch_Intel_Unknown, + /*[120(0x78)] = */ kCpumMicroarch_Intel_Unknown, + /*[121(0x79)] = */ kCpumMicroarch_Intel_Unknown, + /*[122(0x7a)] = */ kCpumMicroarch_Intel_Atom_GoldmontPlus, + /*[123(0x7b)] = */ kCpumMicroarch_Intel_Unknown, + /*[124(0x7c)] = */ kCpumMicroarch_Intel_Unknown, + /*[125(0x7d)] = */ kCpumMicroarch_Intel_Unknown, + /*[126(0x7e)] = */ kCpumMicroarch_Intel_Core7_IceLake, /* unconfirmed */ + /*[127(0x7f)] = */ kCpumMicroarch_Intel_Unknown, + /*[128(0x80)] = */ kCpumMicroarch_Intel_Unknown, + /*[129(0x81)] = */ kCpumMicroarch_Intel_Unknown, + /*[130(0x82)] = */ kCpumMicroarch_Intel_Unknown, + /*[131(0x83)] = */ kCpumMicroarch_Intel_Unknown, + /*[132(0x84)] = */ kCpumMicroarch_Intel_Unknown, + /*[133(0x85)] = */ kCpumMicroarch_Intel_Phi_KnightsMill, + /*[134(0x86)] = */ kCpumMicroarch_Intel_Unknown, + /*[135(0x87)] = */ kCpumMicroarch_Intel_Unknown, + /*[136(0x88)] = */ kCpumMicroarch_Intel_Unknown, + /*[137(0x89)] = */ kCpumMicroarch_Intel_Unknown, + /*[138(0x8a)] = */ kCpumMicroarch_Intel_Unknown, + /*[139(0x8b)] = */ kCpumMicroarch_Intel_Unknown, + /*[140(0x8c)] = */ kCpumMicroarch_Intel_Unknown, + /*[141(0x8d)] = */ kCpumMicroarch_Intel_Unknown, + /*[142(0x8e)] = */ kCpumMicroarch_Intel_Core7_KabyLake, /* Stepping 0xA is CoffeeLake, 9 is KabyLake. */ + /*[143(0x8f)] = */ kCpumMicroarch_Intel_Unknown, + /*[144(0x90)] = */ kCpumMicroarch_Intel_Unknown, + /*[145(0x91)] = */ kCpumMicroarch_Intel_Unknown, + /*[146(0x92)] = */ kCpumMicroarch_Intel_Unknown, + /*[147(0x93)] = */ kCpumMicroarch_Intel_Unknown, + /*[148(0x94)] = */ kCpumMicroarch_Intel_Unknown, + /*[149(0x95)] = */ kCpumMicroarch_Intel_Unknown, + /*[150(0x96)] = */ kCpumMicroarch_Intel_Unknown, + /*[151(0x97)] = */ kCpumMicroarch_Intel_Unknown, + /*[152(0x98)] = */ kCpumMicroarch_Intel_Unknown, + /*[153(0x99)] = */ kCpumMicroarch_Intel_Unknown, + /*[154(0x9a)] = */ kCpumMicroarch_Intel_Unknown, + /*[155(0x9b)] = */ kCpumMicroarch_Intel_Unknown, + /*[156(0x9c)] = */ kCpumMicroarch_Intel_Unknown, + /*[157(0x9d)] = */ kCpumMicroarch_Intel_Unknown, + /*[158(0x9e)] = */ kCpumMicroarch_Intel_Core7_KabyLake, /* Stepping 0xA is CoffeeLake, 9 is KabyLake. */ + /*[159(0x9f)] = */ kCpumMicroarch_Intel_Unknown, +}; +AssertCompile(RT_ELEMENTS(g_aenmIntelFamily06) == 0x9f+1); + + +/** + * Figures out the (sub-)micro architecture given a bit of CPUID info. + * + * @returns Micro architecture. + * @param enmVendor The CPU vendor . + * @param bFamily The CPU family. + * @param bModel The CPU model. + * @param bStepping The CPU stepping. + */ +VMMR3DECL(CPUMMICROARCH) CPUMR3CpuIdDetermineMicroarchEx(CPUMCPUVENDOR enmVendor, uint8_t bFamily, + uint8_t bModel, uint8_t bStepping) +{ + if (enmVendor == CPUMCPUVENDOR_AMD) + { + switch (bFamily) + { + case 0x02: return kCpumMicroarch_AMD_Am286; /* Not really kosher... */ + case 0x03: return kCpumMicroarch_AMD_Am386; + case 0x23: return kCpumMicroarch_AMD_Am386; /* SX*/ + case 0x04: return bModel < 14 ? kCpumMicroarch_AMD_Am486 : kCpumMicroarch_AMD_Am486Enh; + case 0x05: return bModel < 6 ? kCpumMicroarch_AMD_K5 : kCpumMicroarch_AMD_K6; /* Genode LX is 0x0a, lump it with K6. */ + case 0x06: + switch (bModel) + { + case 0: return kCpumMicroarch_AMD_K7_Palomino; + case 1: return kCpumMicroarch_AMD_K7_Palomino; + case 2: return kCpumMicroarch_AMD_K7_Palomino; + case 3: return kCpumMicroarch_AMD_K7_Spitfire; + case 4: return kCpumMicroarch_AMD_K7_Thunderbird; + case 6: return kCpumMicroarch_AMD_K7_Palomino; + case 7: return kCpumMicroarch_AMD_K7_Morgan; + case 8: return kCpumMicroarch_AMD_K7_Thoroughbred; + case 10: return kCpumMicroarch_AMD_K7_Barton; /* Thorton too. */ + } + return kCpumMicroarch_AMD_K7_Unknown; + case 0x0f: + /* + * This family is a friggin mess. Trying my best to make some + * sense out of it. Too much happened in the 0x0f family to + * lump it all together as K8 (130nm->90nm->65nm, AMD-V, ++). + * + * Emperical CPUID.01h.EAX evidence from revision guides, wikipedia, + * cpu-world.com, and other places: + * - 130nm: + * - ClawHammer: F7A/SH-CG, F5A/-CG, F4A/-CG, F50/-B0, F48/-C0, F58/-C0, + * - SledgeHammer: F50/SH-B0, F48/-C0, F58/-C0, F4A/-CG, F5A/-CG, F7A/-CG, F51/-B3 + * - Newcastle: FC0/DH-CG (erratum #180: FE0/DH-CG), FF0/DH-CG + * - Dublin: FC0/-CG, FF0/-CG, F82/CH-CG, F4A/-CG, F48/SH-C0, + * - Odessa: FC0/DH-CG (erratum #180: FE0/DH-CG) + * - Paris: FF0/DH-CG, FC0/DH-CG (erratum #180: FE0/DH-CG), + * - 90nm: + * - Winchester: 10FF0/DH-D0, 20FF0/DH-E3. + * - Oakville: 10FC0/DH-D0. + * - Georgetown: 10FC0/DH-D0. + * - Sonora: 10FC0/DH-D0. + * - Venus: 20F71/SH-E4 + * - Troy: 20F51/SH-E4 + * - Athens: 20F51/SH-E4 + * - San Diego: 20F71/SH-E4. + * - Lancaster: 20F42/SH-E5 + * - Newark: 20F42/SH-E5. + * - Albany: 20FC2/DH-E6. + * - Roma: 20FC2/DH-E6. + * - Venice: 20FF0/DH-E3, 20FC2/DH-E6, 20FF2/DH-E6. + * - Palermo: 10FC0/DH-D0, 20FF0/DH-E3, 20FC0/DH-E3, 20FC2/DH-E6, 20FF2/DH-E6 + * - 90nm introducing Dual core: + * - Denmark: 20F30/JH-E1, 20F32/JH-E6 + * - Italy: 20F10/JH-E1, 20F12/JH-E6 + * - Egypt: 20F10/JH-E1, 20F12/JH-E6 + * - Toledo: 20F32/JH-E6, 30F72/DH-E6 (single code variant). + * - Manchester: 20FB1/BH-E4, 30FF2/BH-E4. + * - 90nm 2nd gen opteron ++, AMD-V introduced (might be missing in some cheaper models): + * - Santa Ana: 40F32/JH-F2, /-F3 + * - Santa Rosa: 40F12/JH-F2, 40F13/JH-F3 + * - Windsor: 40F32/JH-F2, 40F33/JH-F3, C0F13/JH-F3, 40FB2/BH-F2, ??20FB1/BH-E4??. + * - Manila: 50FF2/DH-F2, 40FF2/DH-F2 + * - Orleans: 40FF2/DH-F2, 50FF2/DH-F2, 50FF3/DH-F3. + * - Keene: 40FC2/DH-F2. + * - Richmond: 40FC2/DH-F2 + * - Taylor: 40F82/BH-F2 + * - Trinidad: 40F82/BH-F2 + * + * - 65nm: + * - Brisbane: 60FB1/BH-G1, 60FB2/BH-G2. + * - Tyler: 60F81/BH-G1, 60F82/BH-G2. + * - Sparta: 70FF1/DH-G1, 70FF2/DH-G2. + * - Lima: 70FF1/DH-G1, 70FF2/DH-G2. + * - Sherman: /-G1, 70FC2/DH-G2. + * - Huron: 70FF2/DH-G2. + */ + if (bModel < 0x10) + return kCpumMicroarch_AMD_K8_130nm; + if (bModel >= 0x60 && bModel < 0x80) + return kCpumMicroarch_AMD_K8_65nm; + if (bModel >= 0x40) + return kCpumMicroarch_AMD_K8_90nm_AMDV; + switch (bModel) + { + case 0x21: + case 0x23: + case 0x2b: + case 0x2f: + case 0x37: + case 0x3f: + return kCpumMicroarch_AMD_K8_90nm_DualCore; + } + return kCpumMicroarch_AMD_K8_90nm; + case 0x10: + return kCpumMicroarch_AMD_K10; + case 0x11: + return kCpumMicroarch_AMD_K10_Lion; + case 0x12: + return kCpumMicroarch_AMD_K10_Llano; + case 0x14: + return kCpumMicroarch_AMD_Bobcat; + case 0x15: + switch (bModel) + { + case 0x00: return kCpumMicroarch_AMD_15h_Bulldozer; /* Any? prerelease? */ + case 0x01: return kCpumMicroarch_AMD_15h_Bulldozer; /* Opteron 4200, FX-81xx. */ + case 0x02: return kCpumMicroarch_AMD_15h_Piledriver; /* Opteron 4300, FX-83xx. */ + case 0x10: return kCpumMicroarch_AMD_15h_Piledriver; /* A10-5800K for e.g. */ + case 0x11: /* ?? */ + case 0x12: /* ?? */ + case 0x13: return kCpumMicroarch_AMD_15h_Piledriver; /* A10-6800K for e.g. */ + } + return kCpumMicroarch_AMD_15h_Unknown; + case 0x16: + return kCpumMicroarch_AMD_Jaguar; + case 0x17: + return kCpumMicroarch_AMD_Zen_Ryzen; + } + return kCpumMicroarch_AMD_Unknown; + } + + if (enmVendor == CPUMCPUVENDOR_INTEL) + { + switch (bFamily) + { + case 3: + return kCpumMicroarch_Intel_80386; + case 4: + return kCpumMicroarch_Intel_80486; + case 5: + return kCpumMicroarch_Intel_P5; + case 6: + if (bModel < RT_ELEMENTS(g_aenmIntelFamily06)) + { + CPUMMICROARCH enmMicroArch = g_aenmIntelFamily06[bModel]; + if ( enmMicroArch == kCpumMicroarch_Intel_Core7_KabyLake + && bStepping >= 0xa) + enmMicroArch = kCpumMicroarch_Intel_Core7_CoffeeLake; + return enmMicroArch; + } + return kCpumMicroarch_Intel_Atom_Unknown; + case 15: + switch (bModel) + { + case 0: return kCpumMicroarch_Intel_NB_Willamette; + case 1: return kCpumMicroarch_Intel_NB_Willamette; + case 2: return kCpumMicroarch_Intel_NB_Northwood; + case 3: return kCpumMicroarch_Intel_NB_Prescott; + case 4: return kCpumMicroarch_Intel_NB_Prescott2M; /* ?? */ + case 5: return kCpumMicroarch_Intel_NB_Unknown; /*??*/ + case 6: return kCpumMicroarch_Intel_NB_CedarMill; + case 7: return kCpumMicroarch_Intel_NB_Gallatin; + default: return kCpumMicroarch_Intel_NB_Unknown; + } + break; + /* The following are not kosher but kind of follow intuitively from 6, 5 & 4. */ + case 0: + return kCpumMicroarch_Intel_8086; + case 1: + return kCpumMicroarch_Intel_80186; + case 2: + return kCpumMicroarch_Intel_80286; + } + return kCpumMicroarch_Intel_Unknown; + } + + if (enmVendor == CPUMCPUVENDOR_VIA) + { + switch (bFamily) + { + case 5: + switch (bModel) + { + case 1: return kCpumMicroarch_Centaur_C6; + case 4: return kCpumMicroarch_Centaur_C6; + case 8: return kCpumMicroarch_Centaur_C2; + case 9: return kCpumMicroarch_Centaur_C3; + } + break; + + case 6: + switch (bModel) + { + case 5: return kCpumMicroarch_VIA_C3_M2; + case 6: return kCpumMicroarch_VIA_C3_C5A; + case 7: return bStepping < 8 ? kCpumMicroarch_VIA_C3_C5B : kCpumMicroarch_VIA_C3_C5C; + case 8: return kCpumMicroarch_VIA_C3_C5N; + case 9: return bStepping < 8 ? kCpumMicroarch_VIA_C3_C5XL : kCpumMicroarch_VIA_C3_C5P; + case 10: return kCpumMicroarch_VIA_C7_C5J; + case 15: return kCpumMicroarch_VIA_Isaiah; + } + break; + } + return kCpumMicroarch_VIA_Unknown; + } + + if (enmVendor == CPUMCPUVENDOR_SHANGHAI) + { + switch (bFamily) + { + case 6: + case 7: + return kCpumMicroarch_Shanghai_Wudaokou; + default: + break; + } + return kCpumMicroarch_Shanghai_Unknown; + } + + if (enmVendor == CPUMCPUVENDOR_CYRIX) + { + switch (bFamily) + { + case 4: + switch (bModel) + { + case 9: return kCpumMicroarch_Cyrix_5x86; + } + break; + + case 5: + switch (bModel) + { + case 2: return kCpumMicroarch_Cyrix_M1; + case 4: return kCpumMicroarch_Cyrix_MediaGX; + case 5: return kCpumMicroarch_Cyrix_MediaGXm; + } + break; + + case 6: + switch (bModel) + { + case 0: return kCpumMicroarch_Cyrix_M2; + } + break; + + } + return kCpumMicroarch_Cyrix_Unknown; + } + + return kCpumMicroarch_Unknown; +} + + +/** + * Translates a microarchitecture enum value to the corresponding string + * constant. + * + * @returns Read-only string constant (omits "kCpumMicroarch_" prefix). Returns + * NULL if the value is invalid. + * + * @param enmMicroarch The enum value to convert. + */ +VMMR3DECL(const char *) CPUMR3MicroarchName(CPUMMICROARCH enmMicroarch) +{ + switch (enmMicroarch) + { +#define CASE_RET_STR(enmValue) case enmValue: return #enmValue + (sizeof("kCpumMicroarch_") - 1) + CASE_RET_STR(kCpumMicroarch_Intel_8086); + CASE_RET_STR(kCpumMicroarch_Intel_80186); + CASE_RET_STR(kCpumMicroarch_Intel_80286); + CASE_RET_STR(kCpumMicroarch_Intel_80386); + CASE_RET_STR(kCpumMicroarch_Intel_80486); + CASE_RET_STR(kCpumMicroarch_Intel_P5); + + CASE_RET_STR(kCpumMicroarch_Intel_P6); + CASE_RET_STR(kCpumMicroarch_Intel_P6_II); + CASE_RET_STR(kCpumMicroarch_Intel_P6_III); + + CASE_RET_STR(kCpumMicroarch_Intel_P6_M_Banias); + CASE_RET_STR(kCpumMicroarch_Intel_P6_M_Dothan); + CASE_RET_STR(kCpumMicroarch_Intel_Core_Yonah); + + CASE_RET_STR(kCpumMicroarch_Intel_Core2_Merom); + CASE_RET_STR(kCpumMicroarch_Intel_Core2_Penryn); + + CASE_RET_STR(kCpumMicroarch_Intel_Core7_Nehalem); + CASE_RET_STR(kCpumMicroarch_Intel_Core7_Westmere); + CASE_RET_STR(kCpumMicroarch_Intel_Core7_SandyBridge); + CASE_RET_STR(kCpumMicroarch_Intel_Core7_IvyBridge); + CASE_RET_STR(kCpumMicroarch_Intel_Core7_Haswell); + CASE_RET_STR(kCpumMicroarch_Intel_Core7_Broadwell); + CASE_RET_STR(kCpumMicroarch_Intel_Core7_Skylake); + CASE_RET_STR(kCpumMicroarch_Intel_Core7_KabyLake); + CASE_RET_STR(kCpumMicroarch_Intel_Core7_CoffeeLake); + CASE_RET_STR(kCpumMicroarch_Intel_Core7_CannonLake); + CASE_RET_STR(kCpumMicroarch_Intel_Core7_IceLake); + CASE_RET_STR(kCpumMicroarch_Intel_Core7_TigerLake); + + CASE_RET_STR(kCpumMicroarch_Intel_Atom_Bonnell); + CASE_RET_STR(kCpumMicroarch_Intel_Atom_Lincroft); + CASE_RET_STR(kCpumMicroarch_Intel_Atom_Saltwell); + CASE_RET_STR(kCpumMicroarch_Intel_Atom_Silvermont); + CASE_RET_STR(kCpumMicroarch_Intel_Atom_Airmount); + CASE_RET_STR(kCpumMicroarch_Intel_Atom_Goldmont); + CASE_RET_STR(kCpumMicroarch_Intel_Atom_GoldmontPlus); + CASE_RET_STR(kCpumMicroarch_Intel_Atom_Unknown); + + CASE_RET_STR(kCpumMicroarch_Intel_Phi_KnightsFerry); + CASE_RET_STR(kCpumMicroarch_Intel_Phi_KnightsCorner); + CASE_RET_STR(kCpumMicroarch_Intel_Phi_KnightsLanding); + CASE_RET_STR(kCpumMicroarch_Intel_Phi_KnightsHill); + CASE_RET_STR(kCpumMicroarch_Intel_Phi_KnightsMill); + + CASE_RET_STR(kCpumMicroarch_Intel_NB_Willamette); + CASE_RET_STR(kCpumMicroarch_Intel_NB_Northwood); + CASE_RET_STR(kCpumMicroarch_Intel_NB_Prescott); + CASE_RET_STR(kCpumMicroarch_Intel_NB_Prescott2M); + CASE_RET_STR(kCpumMicroarch_Intel_NB_CedarMill); + CASE_RET_STR(kCpumMicroarch_Intel_NB_Gallatin); + CASE_RET_STR(kCpumMicroarch_Intel_NB_Unknown); + + CASE_RET_STR(kCpumMicroarch_Intel_Unknown); + + CASE_RET_STR(kCpumMicroarch_AMD_Am286); + CASE_RET_STR(kCpumMicroarch_AMD_Am386); + CASE_RET_STR(kCpumMicroarch_AMD_Am486); + CASE_RET_STR(kCpumMicroarch_AMD_Am486Enh); + CASE_RET_STR(kCpumMicroarch_AMD_K5); + CASE_RET_STR(kCpumMicroarch_AMD_K6); + + CASE_RET_STR(kCpumMicroarch_AMD_K7_Palomino); + CASE_RET_STR(kCpumMicroarch_AMD_K7_Spitfire); + CASE_RET_STR(kCpumMicroarch_AMD_K7_Thunderbird); + CASE_RET_STR(kCpumMicroarch_AMD_K7_Morgan); + CASE_RET_STR(kCpumMicroarch_AMD_K7_Thoroughbred); + CASE_RET_STR(kCpumMicroarch_AMD_K7_Barton); + CASE_RET_STR(kCpumMicroarch_AMD_K7_Unknown); + + CASE_RET_STR(kCpumMicroarch_AMD_K8_130nm); + CASE_RET_STR(kCpumMicroarch_AMD_K8_90nm); + CASE_RET_STR(kCpumMicroarch_AMD_K8_90nm_DualCore); + CASE_RET_STR(kCpumMicroarch_AMD_K8_90nm_AMDV); + CASE_RET_STR(kCpumMicroarch_AMD_K8_65nm); + + CASE_RET_STR(kCpumMicroarch_AMD_K10); + CASE_RET_STR(kCpumMicroarch_AMD_K10_Lion); + CASE_RET_STR(kCpumMicroarch_AMD_K10_Llano); + CASE_RET_STR(kCpumMicroarch_AMD_Bobcat); + CASE_RET_STR(kCpumMicroarch_AMD_Jaguar); + + CASE_RET_STR(kCpumMicroarch_AMD_15h_Bulldozer); + CASE_RET_STR(kCpumMicroarch_AMD_15h_Piledriver); + CASE_RET_STR(kCpumMicroarch_AMD_15h_Steamroller); + CASE_RET_STR(kCpumMicroarch_AMD_15h_Excavator); + CASE_RET_STR(kCpumMicroarch_AMD_15h_Unknown); + + CASE_RET_STR(kCpumMicroarch_AMD_16h_First); + + CASE_RET_STR(kCpumMicroarch_AMD_Zen_Ryzen); + + CASE_RET_STR(kCpumMicroarch_AMD_Unknown); + + CASE_RET_STR(kCpumMicroarch_Centaur_C6); + CASE_RET_STR(kCpumMicroarch_Centaur_C2); + CASE_RET_STR(kCpumMicroarch_Centaur_C3); + CASE_RET_STR(kCpumMicroarch_VIA_C3_M2); + CASE_RET_STR(kCpumMicroarch_VIA_C3_C5A); + CASE_RET_STR(kCpumMicroarch_VIA_C3_C5B); + CASE_RET_STR(kCpumMicroarch_VIA_C3_C5C); + CASE_RET_STR(kCpumMicroarch_VIA_C3_C5N); + CASE_RET_STR(kCpumMicroarch_VIA_C3_C5XL); + CASE_RET_STR(kCpumMicroarch_VIA_C3_C5P); + CASE_RET_STR(kCpumMicroarch_VIA_C7_C5J); + CASE_RET_STR(kCpumMicroarch_VIA_Isaiah); + CASE_RET_STR(kCpumMicroarch_VIA_Unknown); + + CASE_RET_STR(kCpumMicroarch_Shanghai_Wudaokou); + CASE_RET_STR(kCpumMicroarch_Shanghai_Unknown); + + CASE_RET_STR(kCpumMicroarch_Cyrix_5x86); + CASE_RET_STR(kCpumMicroarch_Cyrix_M1); + CASE_RET_STR(kCpumMicroarch_Cyrix_MediaGX); + CASE_RET_STR(kCpumMicroarch_Cyrix_MediaGXm); + CASE_RET_STR(kCpumMicroarch_Cyrix_M2); + CASE_RET_STR(kCpumMicroarch_Cyrix_Unknown); + + CASE_RET_STR(kCpumMicroarch_NEC_V20); + CASE_RET_STR(kCpumMicroarch_NEC_V30); + + CASE_RET_STR(kCpumMicroarch_Unknown); + +#undef CASE_RET_STR + case kCpumMicroarch_Invalid: + case kCpumMicroarch_Intel_End: + case kCpumMicroarch_Intel_Core2_End: + case kCpumMicroarch_Intel_Core7_End: + case kCpumMicroarch_Intel_Atom_End: + case kCpumMicroarch_Intel_P6_Core_Atom_End: + case kCpumMicroarch_Intel_Phi_End: + case kCpumMicroarch_Intel_NB_End: + case kCpumMicroarch_AMD_K7_End: + case kCpumMicroarch_AMD_K8_End: + case kCpumMicroarch_AMD_15h_End: + case kCpumMicroarch_AMD_16h_End: + case kCpumMicroarch_AMD_Zen_End: + case kCpumMicroarch_AMD_End: + case kCpumMicroarch_VIA_End: + case kCpumMicroarch_Cyrix_End: + case kCpumMicroarch_NEC_End: + case kCpumMicroarch_Shanghai_End: + case kCpumMicroarch_32BitHack: + break; + /* no default! */ + } + + return NULL; +} + + +/** + * Determins the host CPU MXCSR mask. + * + * @returns MXCSR mask. + */ +VMMR3DECL(uint32_t) CPUMR3DeterminHostMxCsrMask(void) +{ + if ( ASMHasCpuId() + && ASMIsValidStdRange(ASMCpuId_EAX(0)) + && ASMCpuId_EDX(1) & X86_CPUID_FEATURE_EDX_FXSR) + { + uint8_t volatile abBuf[sizeof(X86FXSTATE) + 64]; + PX86FXSTATE pState = (PX86FXSTATE)&abBuf[64 - ((uintptr_t)&abBuf[0] & 63)]; + RT_ZERO(*pState); + ASMFxSave(pState); + if (pState->MXCSR_MASK == 0) + return 0xffbf; + return pState->MXCSR_MASK; + } + return 0; +} + + +/** + * Gets a matching leaf in the CPUID leaf array. + * + * @returns Pointer to the matching leaf, or NULL if not found. + * @param paLeaves The CPUID leaves to search. This is sorted. + * @param cLeaves The number of leaves in the array. + * @param uLeaf The leaf to locate. + * @param uSubLeaf The subleaf to locate. Pass 0 if no sub-leaves. + */ +static PCPUMCPUIDLEAF cpumR3CpuIdGetLeaf(PCPUMCPUIDLEAF paLeaves, uint32_t cLeaves, uint32_t uLeaf, uint32_t uSubLeaf) +{ + /* Lazy bird does linear lookup here since this is only used for the + occational CPUID overrides. */ + for (uint32_t i = 0; i < cLeaves; i++) + if ( paLeaves[i].uLeaf == uLeaf + && paLeaves[i].uSubLeaf == (uSubLeaf & paLeaves[i].fSubLeafMask)) + return &paLeaves[i]; + return NULL; +} + + +#ifndef IN_VBOX_CPU_REPORT +/** + * Gets a matching leaf in the CPUID leaf array, converted to a CPUMCPUID. + * + * @returns true if found, false it not. + * @param paLeaves The CPUID leaves to search. This is sorted. + * @param cLeaves The number of leaves in the array. + * @param uLeaf The leaf to locate. + * @param uSubLeaf The subleaf to locate. Pass 0 if no sub-leaves. + * @param pLegacy The legacy output leaf. + */ +static bool cpumR3CpuIdGetLeafLegacy(PCPUMCPUIDLEAF paLeaves, uint32_t cLeaves, uint32_t uLeaf, uint32_t uSubLeaf, + PCPUMCPUID pLegacy) +{ + PCPUMCPUIDLEAF pLeaf = cpumR3CpuIdGetLeaf(paLeaves, cLeaves, uLeaf, uSubLeaf); + if (pLeaf) + { + pLegacy->uEax = pLeaf->uEax; + pLegacy->uEbx = pLeaf->uEbx; + pLegacy->uEcx = pLeaf->uEcx; + pLegacy->uEdx = pLeaf->uEdx; + return true; + } + return false; +} +#endif /* IN_VBOX_CPU_REPORT */ + + +/** + * Ensures that the CPUID leaf array can hold one more leaf. + * + * @returns Pointer to the CPUID leaf array (*ppaLeaves) on success. NULL on + * failure. + * @param pVM The cross context VM structure. If NULL, use + * the process heap, otherwise the VM's hyper heap. + * @param ppaLeaves Pointer to the variable holding the array pointer + * (input/output). + * @param cLeaves The current array size. + * + * @remarks This function will automatically update the R0 and RC pointers when + * using the hyper heap, which means @a ppaLeaves and @a cLeaves must + * be the corresponding VM's CPUID arrays (which is asserted). + */ +static PCPUMCPUIDLEAF cpumR3CpuIdEnsureSpace(PVM pVM, PCPUMCPUIDLEAF *ppaLeaves, uint32_t cLeaves) +{ + /* + * If pVM is not specified, we're on the regular heap and can waste a + * little space to speed things up. + */ + uint32_t cAllocated; + if (!pVM) + { + cAllocated = RT_ALIGN(cLeaves, 16); + if (cLeaves + 1 > cAllocated) + { + void *pvNew = RTMemRealloc(*ppaLeaves, (cAllocated + 16) * sizeof(**ppaLeaves)); + if (pvNew) + *ppaLeaves = (PCPUMCPUIDLEAF)pvNew; + else + { + RTMemFree(*ppaLeaves); + *ppaLeaves = NULL; + } + } + } + /* + * Otherwise, we're on the hyper heap and are probably just inserting + * one or two leaves and should conserve space. + */ + else + { +#ifdef IN_VBOX_CPU_REPORT + AssertReleaseFailed(); +#else + Assert(ppaLeaves == &pVM->cpum.s.GuestInfo.paCpuIdLeavesR3); + Assert(cLeaves == pVM->cpum.s.GuestInfo.cCpuIdLeaves); + + size_t cb = cLeaves * sizeof(**ppaLeaves); + size_t cbNew = (cLeaves + 1) * sizeof(**ppaLeaves); + int rc = MMR3HyperRealloc(pVM, *ppaLeaves, cb, 32, MM_TAG_CPUM_CPUID, cbNew, (void **)ppaLeaves); + if (RT_SUCCESS(rc)) + { + /* Update the R0 and RC pointers. */ + pVM->cpum.s.GuestInfo.paCpuIdLeavesR0 = MMHyperR3ToR0(pVM, *ppaLeaves); + pVM->cpum.s.GuestInfo.paCpuIdLeavesRC = MMHyperR3ToRC(pVM, *ppaLeaves); + } + else + { + *ppaLeaves = NULL; + pVM->cpum.s.GuestInfo.paCpuIdLeavesR0 = NIL_RTR0PTR; + pVM->cpum.s.GuestInfo.paCpuIdLeavesRC = NIL_RTRCPTR; + LogRel(("CPUM: cpumR3CpuIdEnsureSpace: MMR3HyperRealloc failed. rc=%Rrc\n", rc)); + } +#endif + } + return *ppaLeaves; +} + + +/** + * Append a CPUID leaf or sub-leaf. + * + * ASSUMES linear insertion order, so we'll won't need to do any searching or + * replace anything. Use cpumR3CpuIdInsert() for those cases. + * + * @returns VINF_SUCCESS or VERR_NO_MEMORY. On error, *ppaLeaves is freed, so + * the caller need do no more work. + * @param ppaLeaves Pointer to the pointer to the array of sorted + * CPUID leaves and sub-leaves. + * @param pcLeaves Where we keep the leaf count for *ppaLeaves. + * @param uLeaf The leaf we're adding. + * @param uSubLeaf The sub-leaf number. + * @param fSubLeafMask The sub-leaf mask. + * @param uEax The EAX value. + * @param uEbx The EBX value. + * @param uEcx The ECX value. + * @param uEdx The EDX value. + * @param fFlags The flags. + */ +static int cpumR3CollectCpuIdInfoAddOne(PCPUMCPUIDLEAF *ppaLeaves, uint32_t *pcLeaves, + uint32_t uLeaf, uint32_t uSubLeaf, uint32_t fSubLeafMask, + uint32_t uEax, uint32_t uEbx, uint32_t uEcx, uint32_t uEdx, uint32_t fFlags) +{ + if (!cpumR3CpuIdEnsureSpace(NULL /* pVM */, ppaLeaves, *pcLeaves)) + return VERR_NO_MEMORY; + + PCPUMCPUIDLEAF pNew = &(*ppaLeaves)[*pcLeaves]; + Assert( *pcLeaves == 0 + || pNew[-1].uLeaf < uLeaf + || (pNew[-1].uLeaf == uLeaf && pNew[-1].uSubLeaf < uSubLeaf) ); + + pNew->uLeaf = uLeaf; + pNew->uSubLeaf = uSubLeaf; + pNew->fSubLeafMask = fSubLeafMask; + pNew->uEax = uEax; + pNew->uEbx = uEbx; + pNew->uEcx = uEcx; + pNew->uEdx = uEdx; + pNew->fFlags = fFlags; + + *pcLeaves += 1; + return VINF_SUCCESS; +} + + +/** + * Checks that we've updated the CPUID leaves array correctly. + * + * This is a no-op in non-strict builds. + * + * @param paLeaves The leaves array. + * @param cLeaves The number of leaves. + */ +static void cpumR3CpuIdAssertOrder(PCPUMCPUIDLEAF paLeaves, uint32_t cLeaves) +{ +#ifdef VBOX_STRICT + for (uint32_t i = 1; i < cLeaves; i++) + if (paLeaves[i].uLeaf != paLeaves[i - 1].uLeaf) + AssertMsg(paLeaves[i].uLeaf > paLeaves[i - 1].uLeaf, ("%#x vs %#x\n", paLeaves[i].uLeaf, paLeaves[i - 1].uLeaf)); + else + { + AssertMsg(paLeaves[i].uSubLeaf > paLeaves[i - 1].uSubLeaf, + ("%#x: %#x vs %#x\n", paLeaves[i].uLeaf, paLeaves[i].uSubLeaf, paLeaves[i - 1].uSubLeaf)); + AssertMsg(paLeaves[i].fSubLeafMask == paLeaves[i - 1].fSubLeafMask, + ("%#x/%#x: %#x vs %#x\n", paLeaves[i].uLeaf, paLeaves[i].uSubLeaf, paLeaves[i].fSubLeafMask, paLeaves[i - 1].fSubLeafMask)); + AssertMsg(paLeaves[i].fFlags == paLeaves[i - 1].fFlags, + ("%#x/%#x: %#x vs %#x\n", paLeaves[i].uLeaf, paLeaves[i].uSubLeaf, paLeaves[i].fFlags, paLeaves[i - 1].fFlags)); + } +#else + NOREF(paLeaves); + NOREF(cLeaves); +#endif +} + + +/** + * Inserts a CPU ID leaf, replacing any existing ones. + * + * When inserting a simple leaf where we already got a series of sub-leaves with + * the same leaf number (eax), the simple leaf will replace the whole series. + * + * When pVM is NULL, this ASSUMES that the leaves array is still on the normal + * host-context heap and has only been allocated/reallocated by the + * cpumR3CpuIdEnsureSpace function. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. If NULL, use + * the process heap, otherwise the VM's hyper heap. + * @param ppaLeaves Pointer to the pointer to the array of sorted + * CPUID leaves and sub-leaves. Must be NULL if using + * the hyper heap. + * @param pcLeaves Where we keep the leaf count for *ppaLeaves. Must + * be NULL if using the hyper heap. + * @param pNewLeaf Pointer to the data of the new leaf we're about to + * insert. + */ +static int cpumR3CpuIdInsert(PVM pVM, PCPUMCPUIDLEAF *ppaLeaves, uint32_t *pcLeaves, PCPUMCPUIDLEAF pNewLeaf) +{ + /* + * Validate input parameters if we are using the hyper heap and use the VM's CPUID arrays. + */ + if (pVM) + { + AssertReturn(!ppaLeaves, VERR_INVALID_PARAMETER); + AssertReturn(!pcLeaves, VERR_INVALID_PARAMETER); + + ppaLeaves = &pVM->cpum.s.GuestInfo.paCpuIdLeavesR3; + pcLeaves = &pVM->cpum.s.GuestInfo.cCpuIdLeaves; + } + + PCPUMCPUIDLEAF paLeaves = *ppaLeaves; + uint32_t cLeaves = *pcLeaves; + + /* + * Validate the new leaf a little. + */ + AssertLogRelMsgReturn(!(pNewLeaf->fFlags & ~CPUMCPUIDLEAF_F_VALID_MASK), + ("%#x/%#x: %#x", pNewLeaf->uLeaf, pNewLeaf->uSubLeaf, pNewLeaf->fFlags), + VERR_INVALID_FLAGS); + AssertLogRelMsgReturn(pNewLeaf->fSubLeafMask != 0 || pNewLeaf->uSubLeaf == 0, + ("%#x/%#x: %#x", pNewLeaf->uLeaf, pNewLeaf->uSubLeaf, pNewLeaf->fSubLeafMask), + VERR_INVALID_PARAMETER); + AssertLogRelMsgReturn(RT_IS_POWER_OF_TWO(pNewLeaf->fSubLeafMask + 1), + ("%#x/%#x: %#x", pNewLeaf->uLeaf, pNewLeaf->uSubLeaf, pNewLeaf->fSubLeafMask), + VERR_INVALID_PARAMETER); + AssertLogRelMsgReturn((pNewLeaf->fSubLeafMask & pNewLeaf->uSubLeaf) == pNewLeaf->uSubLeaf, + ("%#x/%#x: %#x", pNewLeaf->uLeaf, pNewLeaf->uSubLeaf, pNewLeaf->fSubLeafMask), + VERR_INVALID_PARAMETER); + + /* + * Find insertion point. The lazy bird uses the same excuse as in + * cpumR3CpuIdGetLeaf(), but optimizes for linear insertion (saved state). + */ + uint32_t i; + if ( cLeaves > 0 + && paLeaves[cLeaves - 1].uLeaf < pNewLeaf->uLeaf) + { + /* Add at end. */ + i = cLeaves; + } + else if ( cLeaves > 0 + && paLeaves[cLeaves - 1].uLeaf == pNewLeaf->uLeaf) + { + /* Either replacing the last leaf or dealing with sub-leaves. Spool + back to the first sub-leaf to pretend we did the linear search. */ + i = cLeaves - 1; + while ( i > 0 + && paLeaves[i - 1].uLeaf == pNewLeaf->uLeaf) + i--; + } + else + { + /* Linear search from the start. */ + i = 0; + while ( i < cLeaves + && paLeaves[i].uLeaf < pNewLeaf->uLeaf) + i++; + } + if ( i < cLeaves + && paLeaves[i].uLeaf == pNewLeaf->uLeaf) + { + if (paLeaves[i].fSubLeafMask != pNewLeaf->fSubLeafMask) + { + /* + * The sub-leaf mask differs, replace all existing leaves with the + * same leaf number. + */ + uint32_t c = 1; + while ( i + c < cLeaves + && paLeaves[i + c].uLeaf == pNewLeaf->uLeaf) + c++; + if (c > 1 && i + c < cLeaves) + { + memmove(&paLeaves[i + c], &paLeaves[i + 1], (cLeaves - i - c) * sizeof(paLeaves[0])); + *pcLeaves = cLeaves -= c - 1; + } + + paLeaves[i] = *pNewLeaf; + cpumR3CpuIdAssertOrder(*ppaLeaves, *pcLeaves); + return VINF_SUCCESS; + } + + /* Find sub-leaf insertion point. */ + while ( i < cLeaves + && paLeaves[i].uSubLeaf < pNewLeaf->uSubLeaf + && paLeaves[i].uLeaf == pNewLeaf->uLeaf) + i++; + + /* + * If we've got an exactly matching leaf, replace it. + */ + if ( i < cLeaves + && paLeaves[i].uLeaf == pNewLeaf->uLeaf + && paLeaves[i].uSubLeaf == pNewLeaf->uSubLeaf) + { + paLeaves[i] = *pNewLeaf; + cpumR3CpuIdAssertOrder(*ppaLeaves, *pcLeaves); + return VINF_SUCCESS; + } + } + + /* + * Adding a new leaf at 'i'. + */ + AssertLogRelReturn(cLeaves < CPUM_CPUID_MAX_LEAVES, VERR_TOO_MANY_CPUID_LEAVES); + paLeaves = cpumR3CpuIdEnsureSpace(pVM, ppaLeaves, cLeaves); + if (!paLeaves) + return VERR_NO_MEMORY; + + if (i < cLeaves) + memmove(&paLeaves[i + 1], &paLeaves[i], (cLeaves - i) * sizeof(paLeaves[0])); + *pcLeaves += 1; + paLeaves[i] = *pNewLeaf; + + cpumR3CpuIdAssertOrder(*ppaLeaves, *pcLeaves); + return VINF_SUCCESS; +} + + +#ifndef IN_VBOX_CPU_REPORT +/** + * Removes a range of CPUID leaves. + * + * This will not reallocate the array. + * + * @param paLeaves The array of sorted CPUID leaves and sub-leaves. + * @param pcLeaves Where we keep the leaf count for @a paLeaves. + * @param uFirst The first leaf. + * @param uLast The last leaf. + */ +static void cpumR3CpuIdRemoveRange(PCPUMCPUIDLEAF paLeaves, uint32_t *pcLeaves, uint32_t uFirst, uint32_t uLast) +{ + uint32_t cLeaves = *pcLeaves; + + Assert(uFirst <= uLast); + + /* + * Find the first one. + */ + uint32_t iFirst = 0; + while ( iFirst < cLeaves + && paLeaves[iFirst].uLeaf < uFirst) + iFirst++; + + /* + * Find the end (last + 1). + */ + uint32_t iEnd = iFirst; + while ( iEnd < cLeaves + && paLeaves[iEnd].uLeaf <= uLast) + iEnd++; + + /* + * Adjust the array if anything needs removing. + */ + if (iFirst < iEnd) + { + if (iEnd < cLeaves) + memmove(&paLeaves[iFirst], &paLeaves[iEnd], (cLeaves - iEnd) * sizeof(paLeaves[0])); + *pcLeaves = cLeaves -= (iEnd - iFirst); + } + + cpumR3CpuIdAssertOrder(paLeaves, *pcLeaves); +} +#endif /* IN_VBOX_CPU_REPORT */ + + +/** + * Checks if ECX make a difference when reading a given CPUID leaf. + * + * @returns @c true if it does, @c false if it doesn't. + * @param uLeaf The leaf we're reading. + * @param pcSubLeaves Number of sub-leaves accessible via ECX. + * @param pfFinalEcxUnchanged Whether ECX is passed thru when going beyond the + * final sub-leaf (for leaf 0xb only). + */ +static bool cpumR3IsEcxRelevantForCpuIdLeaf(uint32_t uLeaf, uint32_t *pcSubLeaves, bool *pfFinalEcxUnchanged) +{ + *pfFinalEcxUnchanged = false; + + uint32_t auCur[4]; + uint32_t auPrev[4]; + ASMCpuIdExSlow(uLeaf, 0, 0, 0, &auPrev[0], &auPrev[1], &auPrev[2], &auPrev[3]); + + /* Look for sub-leaves. */ + uint32_t uSubLeaf = 1; + for (;;) + { + ASMCpuIdExSlow(uLeaf, 0, uSubLeaf, 0, &auCur[0], &auCur[1], &auCur[2], &auCur[3]); + if (memcmp(auCur, auPrev, sizeof(auCur))) + break; + + /* Advance / give up. */ + uSubLeaf++; + if (uSubLeaf >= 64) + { + *pcSubLeaves = 1; + return false; + } + } + + /* Count sub-leaves. */ + uint32_t cMinLeaves = uLeaf == 0xd ? 64 : 0; + uint32_t cRepeats = 0; + uSubLeaf = 0; + for (;;) + { + ASMCpuIdExSlow(uLeaf, 0, uSubLeaf, 0, &auCur[0], &auCur[1], &auCur[2], &auCur[3]); + + /* Figuring out when to stop isn't entirely straight forward as we need + to cover undocumented behavior up to a point and implementation shortcuts. */ + + /* 1. Look for more than 4 repeating value sets. */ + if ( auCur[0] == auPrev[0] + && auCur[1] == auPrev[1] + && ( auCur[2] == auPrev[2] + || ( auCur[2] == uSubLeaf + && auPrev[2] == uSubLeaf - 1) ) + && auCur[3] == auPrev[3]) + { + if ( uLeaf != 0xd + || uSubLeaf >= 64 + || ( auCur[0] == 0 + && auCur[1] == 0 + && auCur[2] == 0 + && auCur[3] == 0 + && auPrev[2] == 0) ) + cRepeats++; + if (cRepeats > 4 && uSubLeaf >= cMinLeaves) + break; + } + else + cRepeats = 0; + + /* 2. Look for zero values. */ + if ( auCur[0] == 0 + && auCur[1] == 0 + && (auCur[2] == 0 || auCur[2] == uSubLeaf) + && (auCur[3] == 0 || uLeaf == 0xb /* edx is fixed */) + && uSubLeaf >= cMinLeaves) + { + cRepeats = 0; + break; + } + + /* 3. Leaf 0xb level type 0 check. */ + if ( uLeaf == 0xb + && (auCur[2] & 0xff00) == 0 + && (auPrev[2] & 0xff00) == 0) + { + cRepeats = 0; + break; + } + + /* 99. Give up. */ + if (uSubLeaf >= 128) + { +#ifndef IN_VBOX_CPU_REPORT + /* Ok, limit it according to the documentation if possible just to + avoid annoying users with these detection issues. */ + uint32_t cDocLimit = UINT32_MAX; + if (uLeaf == 0x4) + cDocLimit = 4; + else if (uLeaf == 0x7) + cDocLimit = 1; + else if (uLeaf == 0xd) + cDocLimit = 63; + else if (uLeaf == 0xf) + cDocLimit = 2; + if (cDocLimit != UINT32_MAX) + { + *pfFinalEcxUnchanged = auCur[2] == uSubLeaf && uLeaf == 0xb; + *pcSubLeaves = cDocLimit + 3; + return true; + } +#endif + *pcSubLeaves = UINT32_MAX; + return true; + } + + /* Advance. */ + uSubLeaf++; + memcpy(auPrev, auCur, sizeof(auCur)); + } + + /* Standard exit. */ + *pfFinalEcxUnchanged = auCur[2] == uSubLeaf && uLeaf == 0xb; + *pcSubLeaves = uSubLeaf + 1 - cRepeats; + if (*pcSubLeaves == 0) + *pcSubLeaves = 1; + return true; +} + + +/** + * Gets a CPU ID leaf. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pLeaf Where to store the found leaf. + * @param uLeaf The leaf to locate. + * @param uSubLeaf The subleaf to locate. Pass 0 if no sub-leaves. + */ +VMMR3DECL(int) CPUMR3CpuIdGetLeaf(PVM pVM, PCPUMCPUIDLEAF pLeaf, uint32_t uLeaf, uint32_t uSubLeaf) +{ + PCPUMCPUIDLEAF pcLeaf = cpumR3CpuIdGetLeaf(pVM->cpum.s.GuestInfo.paCpuIdLeavesR3, pVM->cpum.s.GuestInfo.cCpuIdLeaves, + uLeaf, uSubLeaf); + if (pcLeaf) + { + memcpy(pLeaf, pcLeaf, sizeof(*pLeaf)); + return VINF_SUCCESS; + } + + return VERR_NOT_FOUND; +} + + +/** + * Inserts a CPU ID leaf, replacing any existing ones. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pNewLeaf Pointer to the leaf being inserted. + */ +VMMR3DECL(int) CPUMR3CpuIdInsert(PVM pVM, PCPUMCPUIDLEAF pNewLeaf) +{ + /* + * Validate parameters. + */ + AssertReturn(pVM, VERR_INVALID_PARAMETER); + AssertReturn(pNewLeaf, VERR_INVALID_PARAMETER); + + /* + * Disallow replacing CPU ID leaves that this API currently cannot manage. + * These leaves have dependencies on saved-states, see PATMCpuidReplacement(). + * If you want to modify these leaves, use CPUMSetGuestCpuIdFeature(). + */ + if ( pNewLeaf->uLeaf == UINT32_C(0x00000000) /* Standard */ + || pNewLeaf->uLeaf == UINT32_C(0x00000001) + || pNewLeaf->uLeaf == UINT32_C(0x80000000) /* Extended */ + || pNewLeaf->uLeaf == UINT32_C(0x80000001) + || pNewLeaf->uLeaf == UINT32_C(0xc0000000) /* Centaur */ + || pNewLeaf->uLeaf == UINT32_C(0xc0000001) ) + { + return VERR_NOT_SUPPORTED; + } + + return cpumR3CpuIdInsert(pVM, NULL /* ppaLeaves */, NULL /* pcLeaves */, pNewLeaf); +} + +/** + * Collects CPUID leaves and sub-leaves, returning a sorted array of them. + * + * @returns VBox status code. + * @param ppaLeaves Where to return the array pointer on success. + * Use RTMemFree to release. + * @param pcLeaves Where to return the size of the array on + * success. + */ +VMMR3DECL(int) CPUMR3CpuIdCollectLeaves(PCPUMCPUIDLEAF *ppaLeaves, uint32_t *pcLeaves) +{ + *ppaLeaves = NULL; + *pcLeaves = 0; + + /* + * Try out various candidates. This must be sorted! + */ + static struct { uint32_t uMsr; bool fSpecial; } const s_aCandidates[] = + { + { UINT32_C(0x00000000), false }, + { UINT32_C(0x10000000), false }, + { UINT32_C(0x20000000), false }, + { UINT32_C(0x30000000), false }, + { UINT32_C(0x40000000), false }, + { UINT32_C(0x50000000), false }, + { UINT32_C(0x60000000), false }, + { UINT32_C(0x70000000), false }, + { UINT32_C(0x80000000), false }, + { UINT32_C(0x80860000), false }, + { UINT32_C(0x8ffffffe), true }, + { UINT32_C(0x8fffffff), true }, + { UINT32_C(0x90000000), false }, + { UINT32_C(0xa0000000), false }, + { UINT32_C(0xb0000000), false }, + { UINT32_C(0xc0000000), false }, + { UINT32_C(0xd0000000), false }, + { UINT32_C(0xe0000000), false }, + { UINT32_C(0xf0000000), false }, + }; + + for (uint32_t iOuter = 0; iOuter < RT_ELEMENTS(s_aCandidates); iOuter++) + { + uint32_t uLeaf = s_aCandidates[iOuter].uMsr; + uint32_t uEax, uEbx, uEcx, uEdx; + ASMCpuIdExSlow(uLeaf, 0, 0, 0, &uEax, &uEbx, &uEcx, &uEdx); + + /* + * Does EAX look like a typical leaf count value? + */ + if ( uEax > uLeaf + && uEax - uLeaf < UINT32_C(0xff)) /* Adjust 0xff limit when exceeded by real HW. */ + { + /* Yes, dump them. */ + uint32_t cLeaves = uEax - uLeaf + 1; + while (cLeaves-- > 0) + { + ASMCpuIdExSlow(uLeaf, 0, 0, 0, &uEax, &uEbx, &uEcx, &uEdx); + + uint32_t fFlags = 0; + + /* There are currently three known leaves containing an APIC ID + that needs EMT specific attention */ + if (uLeaf == 1) + fFlags |= CPUMCPUIDLEAF_F_CONTAINS_APIC_ID; + else if (uLeaf == 0xb && uEcx != 0) + fFlags |= CPUMCPUIDLEAF_F_CONTAINS_APIC_ID; + else if ( uLeaf == UINT32_C(0x8000001e) + && ( uEax + || uEbx + || uEdx + || ASMIsAmdCpuEx((*ppaLeaves)[0].uEbx, (*ppaLeaves)[0].uEcx, (*ppaLeaves)[0].uEdx)) ) + fFlags |= CPUMCPUIDLEAF_F_CONTAINS_APIC_ID; + + /* The APIC bit is per-VCpu and needs flagging. */ + if (uLeaf == 1) + fFlags |= CPUMCPUIDLEAF_F_CONTAINS_APIC; + else if ( uLeaf == UINT32_C(0x80000001) + && ( (uEdx & X86_CPUID_AMD_FEATURE_EDX_APIC) + || ASMIsAmdCpuEx((*ppaLeaves)[0].uEbx, (*ppaLeaves)[0].uEcx, (*ppaLeaves)[0].uEdx)) ) + fFlags |= CPUMCPUIDLEAF_F_CONTAINS_APIC; + + /* Check three times here to reduce the chance of CPU migration + resulting in false positives with things like the APIC ID. */ + uint32_t cSubLeaves; + bool fFinalEcxUnchanged; + if ( cpumR3IsEcxRelevantForCpuIdLeaf(uLeaf, &cSubLeaves, &fFinalEcxUnchanged) + && cpumR3IsEcxRelevantForCpuIdLeaf(uLeaf, &cSubLeaves, &fFinalEcxUnchanged) + && cpumR3IsEcxRelevantForCpuIdLeaf(uLeaf, &cSubLeaves, &fFinalEcxUnchanged)) + { + if (cSubLeaves > (uLeaf == 0xd ? 68U : 16U)) + { + /* This shouldn't happen. But in case it does, file all + relevant details in the release log. */ + LogRel(("CPUM: VERR_CPUM_TOO_MANY_CPUID_SUBLEAVES! uLeaf=%#x cSubLeaves=%#x\n", uLeaf, cSubLeaves)); + LogRel(("------------------ dump of problematic sub-leaves -----------------\n")); + for (uint32_t uSubLeaf = 0; uSubLeaf < 128; uSubLeaf++) + { + uint32_t auTmp[4]; + ASMCpuIdExSlow(uLeaf, 0, uSubLeaf, 0, &auTmp[0], &auTmp[1], &auTmp[2], &auTmp[3]); + LogRel(("CPUM: %#010x, %#010x => %#010x %#010x %#010x %#010x\n", + uLeaf, uSubLeaf, auTmp[0], auTmp[1], auTmp[2], auTmp[3])); + } + LogRel(("----------------- dump of what we've found so far -----------------\n")); + for (uint32_t i = 0 ; i < *pcLeaves; i++) + LogRel(("CPUM: %#010x, %#010x/%#010x => %#010x %#010x %#010x %#010x\n", + (*ppaLeaves)[i].uLeaf, (*ppaLeaves)[i].uSubLeaf, (*ppaLeaves)[i].fSubLeafMask, + (*ppaLeaves)[i].uEax, (*ppaLeaves)[i].uEbx, (*ppaLeaves)[i].uEcx, (*ppaLeaves)[i].uEdx)); + LogRel(("\nPlease create a defect on virtualbox.org and attach this log file!\n\n")); + return VERR_CPUM_TOO_MANY_CPUID_SUBLEAVES; + } + + if (fFinalEcxUnchanged) + fFlags |= CPUMCPUIDLEAF_F_INTEL_TOPOLOGY_SUBLEAVES; + + for (uint32_t uSubLeaf = 0; uSubLeaf < cSubLeaves; uSubLeaf++) + { + ASMCpuIdExSlow(uLeaf, 0, uSubLeaf, 0, &uEax, &uEbx, &uEcx, &uEdx); + int rc = cpumR3CollectCpuIdInfoAddOne(ppaLeaves, pcLeaves, + uLeaf, uSubLeaf, UINT32_MAX, uEax, uEbx, uEcx, uEdx, fFlags); + if (RT_FAILURE(rc)) + return rc; + } + } + else + { + int rc = cpumR3CollectCpuIdInfoAddOne(ppaLeaves, pcLeaves, + uLeaf, 0, 0, uEax, uEbx, uEcx, uEdx, fFlags); + if (RT_FAILURE(rc)) + return rc; + } + + /* next */ + uLeaf++; + } + } + /* + * Special CPUIDs needs special handling as they don't follow the + * leaf count principle used above. + */ + else if (s_aCandidates[iOuter].fSpecial) + { + bool fKeep = false; + if (uLeaf == 0x8ffffffe && uEax == UINT32_C(0x00494544)) + fKeep = true; + else if ( uLeaf == 0x8fffffff + && RT_C_IS_PRINT(RT_BYTE1(uEax)) + && RT_C_IS_PRINT(RT_BYTE2(uEax)) + && RT_C_IS_PRINT(RT_BYTE3(uEax)) + && RT_C_IS_PRINT(RT_BYTE4(uEax)) + && RT_C_IS_PRINT(RT_BYTE1(uEbx)) + && RT_C_IS_PRINT(RT_BYTE2(uEbx)) + && RT_C_IS_PRINT(RT_BYTE3(uEbx)) + && RT_C_IS_PRINT(RT_BYTE4(uEbx)) + && RT_C_IS_PRINT(RT_BYTE1(uEcx)) + && RT_C_IS_PRINT(RT_BYTE2(uEcx)) + && RT_C_IS_PRINT(RT_BYTE3(uEcx)) + && RT_C_IS_PRINT(RT_BYTE4(uEcx)) + && RT_C_IS_PRINT(RT_BYTE1(uEdx)) + && RT_C_IS_PRINT(RT_BYTE2(uEdx)) + && RT_C_IS_PRINT(RT_BYTE3(uEdx)) + && RT_C_IS_PRINT(RT_BYTE4(uEdx)) ) + fKeep = true; + if (fKeep) + { + int rc = cpumR3CollectCpuIdInfoAddOne(ppaLeaves, pcLeaves, + uLeaf, 0, 0, uEax, uEbx, uEcx, uEdx, 0); + if (RT_FAILURE(rc)) + return rc; + } + } + } + + cpumR3CpuIdAssertOrder(*ppaLeaves, *pcLeaves); + return VINF_SUCCESS; +} + + +/** + * Determines the method the CPU uses to handle unknown CPUID leaves. + * + * @returns VBox status code. + * @param penmUnknownMethod Where to return the method. + * @param pDefUnknown Where to return default unknown values. This + * will be set, even if the resulting method + * doesn't actually needs it. + */ +VMMR3DECL(int) CPUMR3CpuIdDetectUnknownLeafMethod(PCPUMUNKNOWNCPUID penmUnknownMethod, PCPUMCPUID pDefUnknown) +{ + uint32_t uLastStd = ASMCpuId_EAX(0); + uint32_t uLastExt = ASMCpuId_EAX(0x80000000); + if (!ASMIsValidExtRange(uLastExt)) + uLastExt = 0x80000000; + + uint32_t auChecks[] = + { + uLastStd + 1, + uLastStd + 5, + uLastStd + 8, + uLastStd + 32, + uLastStd + 251, + uLastExt + 1, + uLastExt + 8, + uLastExt + 15, + uLastExt + 63, + uLastExt + 255, + 0x7fbbffcc, + 0x833f7872, + 0xefff2353, + 0x35779456, + 0x1ef6d33e, + }; + + static const uint32_t s_auValues[] = + { + 0xa95d2156, + 0x00000001, + 0x00000002, + 0x00000008, + 0x00000000, + 0x55773399, + 0x93401769, + 0x12039587, + }; + + /* + * Simple method, all zeros. + */ + *penmUnknownMethod = CPUMUNKNOWNCPUID_DEFAULTS; + pDefUnknown->uEax = 0; + pDefUnknown->uEbx = 0; + pDefUnknown->uEcx = 0; + pDefUnknown->uEdx = 0; + + /* + * Intel has been observed returning the last standard leaf. + */ + uint32_t auLast[4]; + ASMCpuIdExSlow(uLastStd, 0, 0, 0, &auLast[0], &auLast[1], &auLast[2], &auLast[3]); + + uint32_t cChecks = RT_ELEMENTS(auChecks); + while (cChecks > 0) + { + uint32_t auCur[4]; + ASMCpuIdExSlow(auChecks[cChecks - 1], 0, 0, 0, &auCur[0], &auCur[1], &auCur[2], &auCur[3]); + if (memcmp(auCur, auLast, sizeof(auCur))) + break; + cChecks--; + } + if (cChecks == 0) + { + /* Now, what happens when the input changes? Esp. ECX. */ + uint32_t cTotal = 0; + uint32_t cSame = 0; + uint32_t cLastWithEcx = 0; + uint32_t cNeither = 0; + uint32_t cValues = RT_ELEMENTS(s_auValues); + while (cValues > 0) + { + uint32_t uValue = s_auValues[cValues - 1]; + uint32_t auLastWithEcx[4]; + ASMCpuIdExSlow(uLastStd, uValue, uValue, uValue, + &auLastWithEcx[0], &auLastWithEcx[1], &auLastWithEcx[2], &auLastWithEcx[3]); + + cChecks = RT_ELEMENTS(auChecks); + while (cChecks > 0) + { + uint32_t auCur[4]; + ASMCpuIdExSlow(auChecks[cChecks - 1], uValue, uValue, uValue, &auCur[0], &auCur[1], &auCur[2], &auCur[3]); + if (!memcmp(auCur, auLast, sizeof(auCur))) + { + cSame++; + if (!memcmp(auCur, auLastWithEcx, sizeof(auCur))) + cLastWithEcx++; + } + else if (!memcmp(auCur, auLastWithEcx, sizeof(auCur))) + cLastWithEcx++; + else + cNeither++; + cTotal++; + cChecks--; + } + cValues--; + } + + Log(("CPUM: cNeither=%d cSame=%d cLastWithEcx=%d cTotal=%d\n", cNeither, cSame, cLastWithEcx, cTotal)); + if (cSame == cTotal) + *penmUnknownMethod = CPUMUNKNOWNCPUID_LAST_STD_LEAF; + else if (cLastWithEcx == cTotal) + *penmUnknownMethod = CPUMUNKNOWNCPUID_LAST_STD_LEAF_WITH_ECX; + else + *penmUnknownMethod = CPUMUNKNOWNCPUID_LAST_STD_LEAF; + pDefUnknown->uEax = auLast[0]; + pDefUnknown->uEbx = auLast[1]; + pDefUnknown->uEcx = auLast[2]; + pDefUnknown->uEdx = auLast[3]; + return VINF_SUCCESS; + } + + /* + * Unchanged register values? + */ + cChecks = RT_ELEMENTS(auChecks); + while (cChecks > 0) + { + uint32_t const uLeaf = auChecks[cChecks - 1]; + uint32_t cValues = RT_ELEMENTS(s_auValues); + while (cValues > 0) + { + uint32_t uValue = s_auValues[cValues - 1]; + uint32_t auCur[4]; + ASMCpuIdExSlow(uLeaf, uValue, uValue, uValue, &auCur[0], &auCur[1], &auCur[2], &auCur[3]); + if ( auCur[0] != uLeaf + || auCur[1] != uValue + || auCur[2] != uValue + || auCur[3] != uValue) + break; + cValues--; + } + if (cValues != 0) + break; + cChecks--; + } + if (cChecks == 0) + { + *penmUnknownMethod = CPUMUNKNOWNCPUID_PASSTHRU; + return VINF_SUCCESS; + } + + /* + * Just go with the simple method. + */ + return VINF_SUCCESS; +} + + +/** + * Translates a unknow CPUID leaf method into the constant name (sans prefix). + * + * @returns Read only name string. + * @param enmUnknownMethod The method to translate. + */ +VMMR3DECL(const char *) CPUMR3CpuIdUnknownLeafMethodName(CPUMUNKNOWNCPUID enmUnknownMethod) +{ + switch (enmUnknownMethod) + { + case CPUMUNKNOWNCPUID_DEFAULTS: return "DEFAULTS"; + case CPUMUNKNOWNCPUID_LAST_STD_LEAF: return "LAST_STD_LEAF"; + case CPUMUNKNOWNCPUID_LAST_STD_LEAF_WITH_ECX: return "LAST_STD_LEAF_WITH_ECX"; + case CPUMUNKNOWNCPUID_PASSTHRU: return "PASSTHRU"; + + case CPUMUNKNOWNCPUID_INVALID: + case CPUMUNKNOWNCPUID_END: + case CPUMUNKNOWNCPUID_32BIT_HACK: + break; + } + return "Invalid-unknown-CPUID-method"; +} + + +/** + * Detect the CPU vendor give n the + * + * @returns The vendor. + * @param uEAX EAX from CPUID(0). + * @param uEBX EBX from CPUID(0). + * @param uECX ECX from CPUID(0). + * @param uEDX EDX from CPUID(0). + */ +VMMR3DECL(CPUMCPUVENDOR) CPUMR3CpuIdDetectVendorEx(uint32_t uEAX, uint32_t uEBX, uint32_t uECX, uint32_t uEDX) +{ + if (ASMIsValidStdRange(uEAX)) + { + if (ASMIsAmdCpuEx(uEBX, uECX, uEDX)) + return CPUMCPUVENDOR_AMD; + + if (ASMIsIntelCpuEx(uEBX, uECX, uEDX)) + return CPUMCPUVENDOR_INTEL; + + if (ASMIsViaCentaurCpuEx(uEBX, uECX, uEDX)) + return CPUMCPUVENDOR_VIA; + + if (ASMIsShanghaiCpuEx(uEBX, uECX, uEDX)) + return CPUMCPUVENDOR_SHANGHAI; + + if ( uEBX == UINT32_C(0x69727943) /* CyrixInstead */ + && uECX == UINT32_C(0x64616574) + && uEDX == UINT32_C(0x736E4978)) + return CPUMCPUVENDOR_CYRIX; + + /* "Geode by NSC", example: family 5, model 9. */ + + /** @todo detect the other buggers... */ + } + + return CPUMCPUVENDOR_UNKNOWN; +} + + +/** + * Translates a CPU vendor enum value into the corresponding string constant. + * + * The named can be prefixed with 'CPUMCPUVENDOR_' to construct a valid enum + * value name. This can be useful when generating code. + * + * @returns Read only name string. + * @param enmVendor The CPU vendor value. + */ +VMMR3DECL(const char *) CPUMR3CpuVendorName(CPUMCPUVENDOR enmVendor) +{ + switch (enmVendor) + { + case CPUMCPUVENDOR_INTEL: return "INTEL"; + case CPUMCPUVENDOR_AMD: return "AMD"; + case CPUMCPUVENDOR_VIA: return "VIA"; + case CPUMCPUVENDOR_CYRIX: return "CYRIX"; + case CPUMCPUVENDOR_SHANGHAI: return "SHANGHAI"; + case CPUMCPUVENDOR_UNKNOWN: return "UNKNOWN"; + + case CPUMCPUVENDOR_INVALID: + case CPUMCPUVENDOR_32BIT_HACK: + break; + } + return "Invalid-cpu-vendor"; +} + + +static PCCPUMCPUIDLEAF cpumR3CpuIdFindLeaf(PCCPUMCPUIDLEAF paLeaves, uint32_t cLeaves, uint32_t uLeaf) +{ + /* Could do binary search, doing linear now because I'm lazy. */ + PCCPUMCPUIDLEAF pLeaf = paLeaves; + while (cLeaves-- > 0) + { + if (pLeaf->uLeaf == uLeaf) + return pLeaf; + pLeaf++; + } + return NULL; +} + + +static PCCPUMCPUIDLEAF cpumR3CpuIdFindLeafEx(PCCPUMCPUIDLEAF paLeaves, uint32_t cLeaves, uint32_t uLeaf, uint32_t uSubLeaf) +{ + PCCPUMCPUIDLEAF pLeaf = cpumR3CpuIdFindLeaf(paLeaves, cLeaves, uLeaf); + if ( !pLeaf + || pLeaf->uSubLeaf != (uSubLeaf & pLeaf->fSubLeafMask)) + return pLeaf; + + /* Linear sub-leaf search. Lazy as usual. */ + cLeaves -= pLeaf - paLeaves; + while ( cLeaves-- > 0 + && pLeaf->uLeaf == uLeaf) + { + if (pLeaf->uSubLeaf == (uSubLeaf & pLeaf->fSubLeafMask)) + return pLeaf; + pLeaf++; + } + + return NULL; +} + + +static void cpumR3ExplodeVmxFeatures(PCVMXMSRS pVmxMsrs, PCPUMFEATURES pFeatures) +{ + Assert(pVmxMsrs); + Assert(pFeatures); + Assert(pFeatures->fVmx); + + /* Basic information. */ + { + uint64_t const u64Basic = pVmxMsrs->u64Basic; + pFeatures->fVmxInsOutInfo = RT_BF_GET(u64Basic, VMX_BF_BASIC_VMCS_INS_OUTS); + } + + /* Pin-based VM-execution controls. */ + { + uint32_t const fPinCtls = pVmxMsrs->PinCtls.n.allowed1; + pFeatures->fVmxExtIntExit = RT_BOOL(fPinCtls & VMX_PIN_CTLS_EXT_INT_EXIT); + pFeatures->fVmxNmiExit = RT_BOOL(fPinCtls & VMX_PIN_CTLS_NMI_EXIT); + pFeatures->fVmxVirtNmi = RT_BOOL(fPinCtls & VMX_PIN_CTLS_VIRT_NMI); + pFeatures->fVmxPreemptTimer = RT_BOOL(fPinCtls & VMX_PIN_CTLS_PREEMPT_TIMER); + pFeatures->fVmxPostedInt = RT_BOOL(fPinCtls & VMX_PIN_CTLS_POSTED_INT); + } + + /* Processor-based VM-execution controls. */ + { + uint32_t const fProcCtls = pVmxMsrs->ProcCtls.n.allowed1; + pFeatures->fVmxIntWindowExit = RT_BOOL(fProcCtls & VMX_PROC_CTLS_INT_WINDOW_EXIT); + pFeatures->fVmxTscOffsetting = RT_BOOL(fProcCtls & VMX_PROC_CTLS_USE_TSC_OFFSETTING); + pFeatures->fVmxHltExit = RT_BOOL(fProcCtls & VMX_PROC_CTLS_HLT_EXIT); + pFeatures->fVmxInvlpgExit = RT_BOOL(fProcCtls & VMX_PROC_CTLS_INVLPG_EXIT); + pFeatures->fVmxMwaitExit = RT_BOOL(fProcCtls & VMX_PROC_CTLS_MWAIT_EXIT); + pFeatures->fVmxRdpmcExit = RT_BOOL(fProcCtls & VMX_PROC_CTLS_RDPMC_EXIT); + pFeatures->fVmxRdtscExit = RT_BOOL(fProcCtls & VMX_PROC_CTLS_RDTSC_EXIT); + pFeatures->fVmxCr3LoadExit = RT_BOOL(fProcCtls & VMX_PROC_CTLS_CR3_LOAD_EXIT); + pFeatures->fVmxCr3StoreExit = RT_BOOL(fProcCtls & VMX_PROC_CTLS_CR3_STORE_EXIT); + pFeatures->fVmxCr8LoadExit = RT_BOOL(fProcCtls & VMX_PROC_CTLS_CR8_LOAD_EXIT); + pFeatures->fVmxCr8StoreExit = RT_BOOL(fProcCtls & VMX_PROC_CTLS_CR8_STORE_EXIT); + pFeatures->fVmxUseTprShadow = RT_BOOL(fProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW); + pFeatures->fVmxNmiWindowExit = RT_BOOL(fProcCtls & VMX_PROC_CTLS_NMI_WINDOW_EXIT); + pFeatures->fVmxMovDRxExit = RT_BOOL(fProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT); + pFeatures->fVmxUncondIoExit = RT_BOOL(fProcCtls & VMX_PROC_CTLS_UNCOND_IO_EXIT); + pFeatures->fVmxUseIoBitmaps = RT_BOOL(fProcCtls & VMX_PROC_CTLS_USE_IO_BITMAPS); + pFeatures->fVmxMonitorTrapFlag = RT_BOOL(fProcCtls & VMX_PROC_CTLS_MONITOR_TRAP_FLAG); + pFeatures->fVmxUseMsrBitmaps = RT_BOOL(fProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS); + pFeatures->fVmxMonitorExit = RT_BOOL(fProcCtls & VMX_PROC_CTLS_MONITOR_EXIT); + pFeatures->fVmxPauseExit = RT_BOOL(fProcCtls & VMX_PROC_CTLS_PAUSE_EXIT); + pFeatures->fVmxSecondaryExecCtls = RT_BOOL(fProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS); + } + + /* Secondary processor-based VM-execution controls. */ + { + uint32_t const fProcCtls2 = pFeatures->fVmxSecondaryExecCtls ? pVmxMsrs->ProcCtls2.n.allowed1 : 0; + pFeatures->fVmxVirtApicAccess = RT_BOOL(fProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS); + pFeatures->fVmxEpt = RT_BOOL(fProcCtls2 & VMX_PROC_CTLS2_EPT); + pFeatures->fVmxDescTableExit = RT_BOOL(fProcCtls2 & VMX_PROC_CTLS2_DESC_TABLE_EXIT); + pFeatures->fVmxRdtscp = RT_BOOL(fProcCtls2 & VMX_PROC_CTLS2_RDTSCP); + pFeatures->fVmxVirtX2ApicMode = RT_BOOL(fProcCtls2 & VMX_PROC_CTLS2_VIRT_X2APIC_MODE); + pFeatures->fVmxVpid = RT_BOOL(fProcCtls2 & VMX_PROC_CTLS2_VPID); + pFeatures->fVmxWbinvdExit = RT_BOOL(fProcCtls2 & VMX_PROC_CTLS2_WBINVD_EXIT); + pFeatures->fVmxUnrestrictedGuest = RT_BOOL(fProcCtls2 & VMX_PROC_CTLS2_UNRESTRICTED_GUEST); + pFeatures->fVmxApicRegVirt = RT_BOOL(fProcCtls2 & VMX_PROC_CTLS2_APIC_REG_VIRT); + pFeatures->fVmxVirtIntDelivery = RT_BOOL(fProcCtls2 & VMX_PROC_CTLS2_VIRT_INT_DELIVERY); + pFeatures->fVmxPauseLoopExit = RT_BOOL(fProcCtls2 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT); + pFeatures->fVmxRdrandExit = RT_BOOL(fProcCtls2 & VMX_PROC_CTLS2_RDRAND_EXIT); + pFeatures->fVmxInvpcid = RT_BOOL(fProcCtls2 & VMX_PROC_CTLS2_INVPCID); + pFeatures->fVmxVmFunc = RT_BOOL(fProcCtls2 & VMX_PROC_CTLS2_VMFUNC); + pFeatures->fVmxVmcsShadowing = RT_BOOL(fProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING); + pFeatures->fVmxRdseedExit = RT_BOOL(fProcCtls2 & VMX_PROC_CTLS2_RDSEED_EXIT); + pFeatures->fVmxPml = RT_BOOL(fProcCtls2 & VMX_PROC_CTLS2_PML); + pFeatures->fVmxEptXcptVe = RT_BOOL(fProcCtls2 & VMX_PROC_CTLS2_EPT_VE); + pFeatures->fVmxXsavesXrstors = RT_BOOL(fProcCtls2 & VMX_PROC_CTLS2_XSAVES_XRSTORS); + pFeatures->fVmxUseTscScaling = RT_BOOL(fProcCtls2 & VMX_PROC_CTLS2_TSC_SCALING); + } + + /* VM-exit controls. */ + { + uint32_t const fExitCtls = pVmxMsrs->ExitCtls.n.allowed1; + pFeatures->fVmxExitSaveDebugCtls = RT_BOOL(fExitCtls & VMX_EXIT_CTLS_SAVE_DEBUG); + pFeatures->fVmxHostAddrSpaceSize = RT_BOOL(fExitCtls & VMX_EXIT_CTLS_HOST_ADDR_SPACE_SIZE); + pFeatures->fVmxExitAckExtInt = RT_BOOL(fExitCtls & VMX_EXIT_CTLS_ACK_EXT_INT); + pFeatures->fVmxExitSavePatMsr = RT_BOOL(fExitCtls & VMX_EXIT_CTLS_SAVE_PAT_MSR); + pFeatures->fVmxExitLoadPatMsr = RT_BOOL(fExitCtls & VMX_EXIT_CTLS_LOAD_PAT_MSR); + pFeatures->fVmxExitSaveEferMsr = RT_BOOL(fExitCtls & VMX_EXIT_CTLS_SAVE_EFER_MSR); + pFeatures->fVmxExitLoadEferMsr = RT_BOOL(fExitCtls & VMX_EXIT_CTLS_LOAD_EFER_MSR); + pFeatures->fVmxSavePreemptTimer = RT_BOOL(fExitCtls & VMX_EXIT_CTLS_SAVE_PREEMPT_TIMER); + } + + /* VM-entry controls. */ + { + uint32_t const fEntryCtls = pVmxMsrs->EntryCtls.n.allowed1; + pFeatures->fVmxEntryLoadDebugCtls = RT_BOOL(fEntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG); + pFeatures->fVmxIa32eModeGuest = RT_BOOL(fEntryCtls & VMX_ENTRY_CTLS_IA32E_MODE_GUEST); + pFeatures->fVmxEntryLoadEferMsr = RT_BOOL(fEntryCtls & VMX_ENTRY_CTLS_LOAD_EFER_MSR); + pFeatures->fVmxEntryLoadPatMsr = RT_BOOL(fEntryCtls & VMX_ENTRY_CTLS_LOAD_PAT_MSR); + } + + /* Miscellaneous data. */ + { + uint32_t const fMiscData = pVmxMsrs->u64Misc; + pFeatures->fVmxExitSaveEferLma = RT_BOOL(fMiscData & VMX_MISC_EXIT_SAVE_EFER_LMA); + pFeatures->fVmxIntelPt = RT_BOOL(fMiscData & VMX_MISC_INTEL_PT); + pFeatures->fVmxVmwriteAll = RT_BOOL(fMiscData & VMX_MISC_VMWRITE_ALL); + pFeatures->fVmxEntryInjectSoftInt = RT_BOOL(fMiscData & VMX_MISC_ENTRY_INJECT_SOFT_INT); + } +} + + +int cpumR3CpuIdExplodeFeatures(PCCPUMCPUIDLEAF paLeaves, uint32_t cLeaves, PCCPUMMSRS pMsrs, PCPUMFEATURES pFeatures) +{ + Assert(pMsrs); + RT_ZERO(*pFeatures); + if (cLeaves >= 2) + { + AssertLogRelReturn(paLeaves[0].uLeaf == 0, VERR_CPUM_IPE_1); + AssertLogRelReturn(paLeaves[1].uLeaf == 1, VERR_CPUM_IPE_1); + PCCPUMCPUIDLEAF const pStd0Leaf = cpumR3CpuIdFindLeafEx(paLeaves, cLeaves, 0, 0); + AssertLogRelReturn(pStd0Leaf, VERR_CPUM_IPE_1); + PCCPUMCPUIDLEAF const pStd1Leaf = cpumR3CpuIdFindLeafEx(paLeaves, cLeaves, 1, 0); + AssertLogRelReturn(pStd1Leaf, VERR_CPUM_IPE_1); + + pFeatures->enmCpuVendor = CPUMR3CpuIdDetectVendorEx(pStd0Leaf->uEax, + pStd0Leaf->uEbx, + pStd0Leaf->uEcx, + pStd0Leaf->uEdx); + pFeatures->uFamily = ASMGetCpuFamily(pStd1Leaf->uEax); + pFeatures->uModel = ASMGetCpuModel(pStd1Leaf->uEax, pFeatures->enmCpuVendor == CPUMCPUVENDOR_INTEL); + pFeatures->uStepping = ASMGetCpuStepping(pStd1Leaf->uEax); + pFeatures->enmMicroarch = CPUMR3CpuIdDetermineMicroarchEx((CPUMCPUVENDOR)pFeatures->enmCpuVendor, + pFeatures->uFamily, + pFeatures->uModel, + pFeatures->uStepping); + + PCCPUMCPUIDLEAF const pExtLeaf8 = cpumR3CpuIdFindLeaf(paLeaves, cLeaves, 0x80000008); + if (pExtLeaf8) + { + pFeatures->cMaxPhysAddrWidth = pExtLeaf8->uEax & 0xff; + pFeatures->cMaxLinearAddrWidth = (pExtLeaf8->uEax >> 8) & 0xff; + } + else if (pStd1Leaf->uEdx & X86_CPUID_FEATURE_EDX_PSE36) + { + pFeatures->cMaxPhysAddrWidth = 36; + pFeatures->cMaxLinearAddrWidth = 36; + } + else + { + pFeatures->cMaxPhysAddrWidth = 32; + pFeatures->cMaxLinearAddrWidth = 32; + } + + /* Standard features. */ + pFeatures->fMsr = RT_BOOL(pStd1Leaf->uEdx & X86_CPUID_FEATURE_EDX_MSR); + pFeatures->fApic = RT_BOOL(pStd1Leaf->uEdx & X86_CPUID_FEATURE_EDX_APIC); + pFeatures->fX2Apic = RT_BOOL(pStd1Leaf->uEcx & X86_CPUID_FEATURE_ECX_X2APIC); + pFeatures->fPse = RT_BOOL(pStd1Leaf->uEdx & X86_CPUID_FEATURE_EDX_PSE); + pFeatures->fPse36 = RT_BOOL(pStd1Leaf->uEdx & X86_CPUID_FEATURE_EDX_PSE36); + pFeatures->fPae = RT_BOOL(pStd1Leaf->uEdx & X86_CPUID_FEATURE_EDX_PAE); + pFeatures->fPat = RT_BOOL(pStd1Leaf->uEdx & X86_CPUID_FEATURE_EDX_PAT); + pFeatures->fFxSaveRstor = RT_BOOL(pStd1Leaf->uEdx & X86_CPUID_FEATURE_EDX_FXSR); + pFeatures->fXSaveRstor = RT_BOOL(pStd1Leaf->uEcx & X86_CPUID_FEATURE_ECX_XSAVE); + pFeatures->fOpSysXSaveRstor = RT_BOOL(pStd1Leaf->uEcx & X86_CPUID_FEATURE_ECX_OSXSAVE); + pFeatures->fMmx = RT_BOOL(pStd1Leaf->uEdx & X86_CPUID_FEATURE_EDX_MMX); + pFeatures->fSse = RT_BOOL(pStd1Leaf->uEdx & X86_CPUID_FEATURE_EDX_SSE); + pFeatures->fSse2 = RT_BOOL(pStd1Leaf->uEdx & X86_CPUID_FEATURE_EDX_SSE2); + pFeatures->fSse3 = RT_BOOL(pStd1Leaf->uEcx & X86_CPUID_FEATURE_ECX_SSE3); + pFeatures->fSsse3 = RT_BOOL(pStd1Leaf->uEcx & X86_CPUID_FEATURE_ECX_SSSE3); + pFeatures->fSse41 = RT_BOOL(pStd1Leaf->uEcx & X86_CPUID_FEATURE_ECX_SSE4_1); + pFeatures->fSse42 = RT_BOOL(pStd1Leaf->uEcx & X86_CPUID_FEATURE_ECX_SSE4_2); + pFeatures->fAvx = RT_BOOL(pStd1Leaf->uEcx & X86_CPUID_FEATURE_ECX_AVX); + pFeatures->fTsc = RT_BOOL(pStd1Leaf->uEdx & X86_CPUID_FEATURE_EDX_TSC); + pFeatures->fSysEnter = RT_BOOL(pStd1Leaf->uEdx & X86_CPUID_FEATURE_EDX_SEP); + pFeatures->fHypervisorPresent = RT_BOOL(pStd1Leaf->uEcx & X86_CPUID_FEATURE_ECX_HVP); + pFeatures->fMonitorMWait = RT_BOOL(pStd1Leaf->uEcx & X86_CPUID_FEATURE_ECX_MONITOR); + pFeatures->fMovCmpXchg16b = RT_BOOL(pStd1Leaf->uEcx & X86_CPUID_FEATURE_ECX_CX16); + pFeatures->fClFlush = RT_BOOL(pStd1Leaf->uEdx & X86_CPUID_FEATURE_EDX_CLFSH); + pFeatures->fPcid = RT_BOOL(pStd1Leaf->uEcx & X86_CPUID_FEATURE_ECX_PCID); + pFeatures->fVmx = RT_BOOL(pStd1Leaf->uEcx & X86_CPUID_FEATURE_ECX_VMX); + if (pFeatures->fVmx) + cpumR3ExplodeVmxFeatures(&pMsrs->hwvirt.vmx, pFeatures); + + /* Structured extended features. */ + PCCPUMCPUIDLEAF const pSxfLeaf0 = cpumR3CpuIdFindLeafEx(paLeaves, cLeaves, 7, 0); + if (pSxfLeaf0) + { + pFeatures->fFsGsBase = RT_BOOL(pSxfLeaf0->uEbx & X86_CPUID_STEXT_FEATURE_EBX_FSGSBASE); + pFeatures->fAvx2 = RT_BOOL(pSxfLeaf0->uEbx & X86_CPUID_STEXT_FEATURE_EBX_AVX2); + pFeatures->fAvx512Foundation = RT_BOOL(pSxfLeaf0->uEbx & X86_CPUID_STEXT_FEATURE_EBX_AVX512F); + pFeatures->fClFlushOpt = RT_BOOL(pSxfLeaf0->uEbx & X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT); + pFeatures->fInvpcid = RT_BOOL(pSxfLeaf0->uEbx & X86_CPUID_STEXT_FEATURE_EBX_INVPCID); + + pFeatures->fIbpb = RT_BOOL(pSxfLeaf0->uEdx & X86_CPUID_STEXT_FEATURE_EDX_IBRS_IBPB); + pFeatures->fIbrs = pFeatures->fIbpb; + pFeatures->fStibp = RT_BOOL(pSxfLeaf0->uEdx & X86_CPUID_STEXT_FEATURE_EDX_STIBP); + pFeatures->fFlushCmd = RT_BOOL(pSxfLeaf0->uEdx & X86_CPUID_STEXT_FEATURE_EDX_FLUSH_CMD); + pFeatures->fArchCap = RT_BOOL(pSxfLeaf0->uEdx & X86_CPUID_STEXT_FEATURE_EDX_ARCHCAP); + } + + /* MWAIT/MONITOR leaf. */ + PCCPUMCPUIDLEAF const pMWaitLeaf = cpumR3CpuIdFindLeaf(paLeaves, cLeaves, 5); + if (pMWaitLeaf) + pFeatures->fMWaitExtensions = (pMWaitLeaf->uEcx & (X86_CPUID_MWAIT_ECX_EXT | X86_CPUID_MWAIT_ECX_BREAKIRQIF0)) + == (X86_CPUID_MWAIT_ECX_EXT | X86_CPUID_MWAIT_ECX_BREAKIRQIF0); + + /* Extended features. */ + PCCPUMCPUIDLEAF const pExtLeaf = cpumR3CpuIdFindLeaf(paLeaves, cLeaves, 0x80000001); + if (pExtLeaf) + { + pFeatures->fLongMode = RT_BOOL(pExtLeaf->uEdx & X86_CPUID_EXT_FEATURE_EDX_LONG_MODE); + pFeatures->fSysCall = RT_BOOL(pExtLeaf->uEdx & X86_CPUID_EXT_FEATURE_EDX_SYSCALL); + pFeatures->fNoExecute = RT_BOOL(pExtLeaf->uEdx & X86_CPUID_EXT_FEATURE_EDX_NX); + pFeatures->fLahfSahf = RT_BOOL(pExtLeaf->uEcx & X86_CPUID_EXT_FEATURE_ECX_LAHF_SAHF); + pFeatures->fRdTscP = RT_BOOL(pExtLeaf->uEdx & X86_CPUID_EXT_FEATURE_EDX_RDTSCP); + pFeatures->fMovCr8In32Bit = RT_BOOL(pExtLeaf->uEcx & X86_CPUID_AMD_FEATURE_ECX_CMPL); + pFeatures->f3DNow = RT_BOOL(pExtLeaf->uEdx & X86_CPUID_AMD_FEATURE_EDX_3DNOW); + pFeatures->f3DNowPrefetch = (pExtLeaf->uEcx & X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF) + || (pExtLeaf->uEdx & ( X86_CPUID_EXT_FEATURE_EDX_LONG_MODE + | X86_CPUID_AMD_FEATURE_EDX_3DNOW)); + } + + /* VMX (VMXON, VMCS region and related data structures') physical address width (depends on long-mode). */ + pFeatures->cVmxMaxPhysAddrWidth = pFeatures->fLongMode ? pFeatures->cMaxPhysAddrWidth : 32; + + if ( pExtLeaf + && pFeatures->enmCpuVendor == CPUMCPUVENDOR_AMD) + { + /* AMD features. */ + pFeatures->fMsr |= RT_BOOL(pExtLeaf->uEdx & X86_CPUID_AMD_FEATURE_EDX_MSR); + pFeatures->fApic |= RT_BOOL(pExtLeaf->uEdx & X86_CPUID_AMD_FEATURE_EDX_APIC); + pFeatures->fPse |= RT_BOOL(pExtLeaf->uEdx & X86_CPUID_AMD_FEATURE_EDX_PSE); + pFeatures->fPse36 |= RT_BOOL(pExtLeaf->uEdx & X86_CPUID_AMD_FEATURE_EDX_PSE36); + pFeatures->fPae |= RT_BOOL(pExtLeaf->uEdx & X86_CPUID_AMD_FEATURE_EDX_PAE); + pFeatures->fPat |= RT_BOOL(pExtLeaf->uEdx & X86_CPUID_AMD_FEATURE_EDX_PAT); + pFeatures->fFxSaveRstor |= RT_BOOL(pExtLeaf->uEdx & X86_CPUID_AMD_FEATURE_EDX_FXSR); + pFeatures->fMmx |= RT_BOOL(pExtLeaf->uEdx & X86_CPUID_AMD_FEATURE_EDX_MMX); + pFeatures->fTsc |= RT_BOOL(pExtLeaf->uEdx & X86_CPUID_AMD_FEATURE_EDX_TSC); + pFeatures->fIbpb |= pExtLeaf8 && (pExtLeaf8->uEbx & X86_CPUID_AMD_EFEID_EBX_IBPB); + pFeatures->fAmdMmxExts = RT_BOOL(pExtLeaf->uEdx & X86_CPUID_AMD_FEATURE_EDX_AXMMX); + pFeatures->fXop = RT_BOOL(pExtLeaf->uEcx & X86_CPUID_AMD_FEATURE_ECX_XOP); + pFeatures->fSvm = RT_BOOL(pExtLeaf->uEcx & X86_CPUID_AMD_FEATURE_ECX_SVM); + if (pFeatures->fSvm) + { + PCCPUMCPUIDLEAF pSvmLeaf = cpumR3CpuIdFindLeaf(paLeaves, cLeaves, 0x8000000a); + AssertLogRelReturn(pSvmLeaf, VERR_CPUM_IPE_1); + pFeatures->fSvmNestedPaging = RT_BOOL(pSvmLeaf->uEdx & X86_CPUID_SVM_FEATURE_EDX_NESTED_PAGING); + pFeatures->fSvmLbrVirt = RT_BOOL(pSvmLeaf->uEdx & X86_CPUID_SVM_FEATURE_EDX_LBR_VIRT); + pFeatures->fSvmSvmLock = RT_BOOL(pSvmLeaf->uEdx & X86_CPUID_SVM_FEATURE_EDX_SVM_LOCK); + pFeatures->fSvmNextRipSave = RT_BOOL(pSvmLeaf->uEdx & X86_CPUID_SVM_FEATURE_EDX_NRIP_SAVE); + pFeatures->fSvmTscRateMsr = RT_BOOL(pSvmLeaf->uEdx & X86_CPUID_SVM_FEATURE_EDX_TSC_RATE_MSR); + pFeatures->fSvmVmcbClean = RT_BOOL(pSvmLeaf->uEdx & X86_CPUID_SVM_FEATURE_EDX_VMCB_CLEAN); + pFeatures->fSvmFlusbByAsid = RT_BOOL(pSvmLeaf->uEdx & X86_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID); + pFeatures->fSvmDecodeAssists = RT_BOOL(pSvmLeaf->uEdx & X86_CPUID_SVM_FEATURE_EDX_DECODE_ASSISTS); + pFeatures->fSvmPauseFilter = RT_BOOL(pSvmLeaf->uEdx & X86_CPUID_SVM_FEATURE_EDX_PAUSE_FILTER); + pFeatures->fSvmPauseFilterThreshold = RT_BOOL(pSvmLeaf->uEdx & X86_CPUID_SVM_FEATURE_EDX_PAUSE_FILTER_THRESHOLD); + pFeatures->fSvmAvic = RT_BOOL(pSvmLeaf->uEdx & X86_CPUID_SVM_FEATURE_EDX_AVIC); + pFeatures->fSvmVirtVmsaveVmload = RT_BOOL(pSvmLeaf->uEdx & X86_CPUID_SVM_FEATURE_EDX_VIRT_VMSAVE_VMLOAD); + pFeatures->fSvmVGif = RT_BOOL(pSvmLeaf->uEdx & X86_CPUID_SVM_FEATURE_EDX_VGIF); + pFeatures->uSvmMaxAsid = pSvmLeaf->uEbx; + } + } + + /* + * Quirks. + */ + pFeatures->fLeakyFxSR = pExtLeaf + && (pExtLeaf->uEdx & X86_CPUID_AMD_FEATURE_EDX_FFXSR) + && pFeatures->enmCpuVendor == CPUMCPUVENDOR_AMD + && pFeatures->uFamily >= 6 /* K7 and up */; + + /* + * Max extended (/FPU) state. + */ + pFeatures->cbMaxExtendedState = pFeatures->fFxSaveRstor ? sizeof(X86FXSTATE) : sizeof(X86FPUSTATE); + if (pFeatures->fXSaveRstor) + { + PCCPUMCPUIDLEAF const pXStateLeaf0 = cpumR3CpuIdFindLeafEx(paLeaves, cLeaves, 13, 0); + if (pXStateLeaf0) + { + if ( pXStateLeaf0->uEcx >= sizeof(X86FXSTATE) + && pXStateLeaf0->uEcx <= CPUM_MAX_XSAVE_AREA_SIZE + && RT_ALIGN_32(pXStateLeaf0->uEcx, 8) == pXStateLeaf0->uEcx + && pXStateLeaf0->uEbx >= sizeof(X86FXSTATE) + && pXStateLeaf0->uEbx <= pXStateLeaf0->uEcx + && RT_ALIGN_32(pXStateLeaf0->uEbx, 8) == pXStateLeaf0->uEbx) + { + pFeatures->cbMaxExtendedState = pXStateLeaf0->uEcx; + + /* (paranoia:) */ + PCCPUMCPUIDLEAF const pXStateLeaf1 = cpumR3CpuIdFindLeafEx(paLeaves, cLeaves, 13, 1); + if ( pXStateLeaf1 + && pXStateLeaf1->uEbx > pFeatures->cbMaxExtendedState + && pXStateLeaf1->uEbx <= CPUM_MAX_XSAVE_AREA_SIZE + && (pXStateLeaf1->uEcx || pXStateLeaf1->uEdx) ) + pFeatures->cbMaxExtendedState = pXStateLeaf1->uEbx; + } + else + AssertLogRelMsgFailedStmt(("Unexpected max/cur XSAVE area sizes: %#x/%#x\n", pXStateLeaf0->uEcx, pXStateLeaf0->uEbx), + pFeatures->fXSaveRstor = 0); + } + else + AssertLogRelMsgFailedStmt(("Expected leaf eax=0xd/ecx=0 with the XSAVE/XRSTOR feature!\n"), + pFeatures->fXSaveRstor = 0); + } + } + else + AssertLogRelReturn(cLeaves == 0, VERR_CPUM_IPE_1); + return VINF_SUCCESS; +} + + +/* + * + * Init related code. + * Init related code. + * Init related code. + * + * + */ +#ifdef VBOX_IN_VMM + + +/** + * Gets an exactly matching leaf + sub-leaf in the CPUID leaf array. + * + * This ignores the fSubLeafMask. + * + * @returns Pointer to the matching leaf, or NULL if not found. + * @param paLeaves The CPUID leaves to search. This is sorted. + * @param cLeaves The number of leaves in the array. + * @param uLeaf The leaf to locate. + * @param uSubLeaf The subleaf to locate. + */ +static PCPUMCPUIDLEAF cpumR3CpuIdGetExactLeaf(PCPUM pCpum, uint32_t uLeaf, uint32_t uSubLeaf) +{ + uint64_t uNeedle = RT_MAKE_U64(uSubLeaf, uLeaf); + PCPUMCPUIDLEAF paLeaves = pCpum->GuestInfo.paCpuIdLeavesR3; + uint32_t iEnd = pCpum->GuestInfo.cCpuIdLeaves; + if (iEnd) + { + uint32_t iBegin = 0; + for (;;) + { + uint32_t const i = (iEnd - iBegin) / 2 + iBegin; + uint64_t const uCur = RT_MAKE_U64(paLeaves[i].uSubLeaf, paLeaves[i].uLeaf); + if (uNeedle < uCur) + { + if (i > iBegin) + iEnd = i; + else + break; + } + else if (uNeedle > uCur) + { + if (i + 1 < iEnd) + iBegin = i + 1; + else + break; + } + else + return &paLeaves[i]; + } + } + return NULL; +} + + +/** + * Loads MSR range overrides. + * + * This must be called before the MSR ranges are moved from the normal heap to + * the hyper heap! + * + * @returns VBox status code (VMSetError called). + * @param pVM The cross context VM structure. + * @param pMsrNode The CFGM node with the MSR overrides. + */ +static int cpumR3LoadMsrOverrides(PVM pVM, PCFGMNODE pMsrNode) +{ + for (PCFGMNODE pNode = CFGMR3GetFirstChild(pMsrNode); pNode; pNode = CFGMR3GetNextChild(pNode)) + { + /* + * Assemble a valid MSR range. + */ + CPUMMSRRANGE MsrRange; + MsrRange.offCpumCpu = 0; + MsrRange.fReserved = 0; + + int rc = CFGMR3GetName(pNode, MsrRange.szName, sizeof(MsrRange.szName)); + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, "Invalid MSR entry (name is probably too long): %Rrc\n", rc); + + rc = CFGMR3QueryU32(pNode, "First", &MsrRange.uFirst); + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, "Invalid MSR entry '%s': Error querying mandatory 'First' value: %Rrc\n", + MsrRange.szName, rc); + + rc = CFGMR3QueryU32Def(pNode, "Last", &MsrRange.uLast, MsrRange.uFirst); + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, "Invalid MSR entry '%s': Error querying 'Last' value: %Rrc\n", + MsrRange.szName, rc); + + char szType[32]; + rc = CFGMR3QueryStringDef(pNode, "Type", szType, sizeof(szType), "FixedValue"); + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, "Invalid MSR entry '%s': Error querying 'Type' value: %Rrc\n", + MsrRange.szName, rc); + if (!RTStrICmp(szType, "FixedValue")) + { + MsrRange.enmRdFn = kCpumMsrRdFn_FixedValue; + MsrRange.enmWrFn = kCpumMsrWrFn_IgnoreWrite; + + rc = CFGMR3QueryU64Def(pNode, "Value", &MsrRange.uValue, 0); + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, "Invalid MSR entry '%s': Error querying 'Value' value: %Rrc\n", + MsrRange.szName, rc); + + rc = CFGMR3QueryU64Def(pNode, "WrGpMask", &MsrRange.fWrGpMask, 0); + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, "Invalid MSR entry '%s': Error querying 'WrGpMask' value: %Rrc\n", + MsrRange.szName, rc); + + rc = CFGMR3QueryU64Def(pNode, "WrIgnMask", &MsrRange.fWrIgnMask, 0); + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, "Invalid MSR entry '%s': Error querying 'WrIgnMask' value: %Rrc\n", + MsrRange.szName, rc); + } + else + return VMSetError(pVM, VERR_INVALID_PARAMETER, RT_SRC_POS, + "Invalid MSR entry '%s': Unknown type '%s'\n", MsrRange.szName, szType); + + /* + * Insert the range into the table (replaces/splits/shrinks existing + * MSR ranges). + */ + rc = cpumR3MsrRangesInsert(NULL /* pVM */, &pVM->cpum.s.GuestInfo.paMsrRangesR3, &pVM->cpum.s.GuestInfo.cMsrRanges, + &MsrRange); + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, "Error adding MSR entry '%s': %Rrc\n", MsrRange.szName, rc); + } + + return VINF_SUCCESS; +} + + +/** + * Loads CPUID leaf overrides. + * + * This must be called before the CPUID leaves are moved from the normal + * heap to the hyper heap! + * + * @returns VBox status code (VMSetError called). + * @param pVM The cross context VM structure. + * @param pParentNode The CFGM node with the CPUID leaves. + * @param pszLabel How to label the overrides we're loading. + */ +static int cpumR3LoadCpuIdOverrides(PVM pVM, PCFGMNODE pParentNode, const char *pszLabel) +{ + for (PCFGMNODE pNode = CFGMR3GetFirstChild(pParentNode); pNode; pNode = CFGMR3GetNextChild(pNode)) + { + /* + * Get the leaf and subleaf numbers. + */ + char szName[128]; + int rc = CFGMR3GetName(pNode, szName, sizeof(szName)); + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, "Invalid %s entry (name is probably too long): %Rrc\n", pszLabel, rc); + + /* The leaf number is either specified directly or thru the node name. */ + uint32_t uLeaf; + rc = CFGMR3QueryU32(pNode, "Leaf", &uLeaf); + if (rc == VERR_CFGM_VALUE_NOT_FOUND) + { + rc = RTStrToUInt32Full(szName, 16, &uLeaf); + if (rc != VINF_SUCCESS) + return VMSetError(pVM, VERR_INVALID_NAME, RT_SRC_POS, + "Invalid %s entry: Invalid leaf number: '%s' \n", pszLabel, szName); + } + else if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, "Invalid %s entry '%s': Error querying 'Leaf' value: %Rrc\n", + pszLabel, szName, rc); + + uint32_t uSubLeaf; + rc = CFGMR3QueryU32Def(pNode, "SubLeaf", &uSubLeaf, 0); + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, "Invalid %s entry '%s': Error querying 'SubLeaf' value: %Rrc\n", + pszLabel, szName, rc); + + uint32_t fSubLeafMask; + rc = CFGMR3QueryU32Def(pNode, "SubLeafMask", &fSubLeafMask, 0); + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, "Invalid %s entry '%s': Error querying 'SubLeafMask' value: %Rrc\n", + pszLabel, szName, rc); + + /* + * Look up the specified leaf, since the output register values + * defaults to any existing values. This allows overriding a single + * register, without needing to know the other values. + */ + PCCPUMCPUIDLEAF pLeaf = cpumR3CpuIdGetExactLeaf(&pVM->cpum.s, uLeaf, uSubLeaf); + CPUMCPUIDLEAF Leaf; + if (pLeaf) + Leaf = *pLeaf; + else + RT_ZERO(Leaf); + Leaf.uLeaf = uLeaf; + Leaf.uSubLeaf = uSubLeaf; + Leaf.fSubLeafMask = fSubLeafMask; + + rc = CFGMR3QueryU32Def(pNode, "eax", &Leaf.uEax, Leaf.uEax); + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, "Invalid %s entry '%s': Error querying 'eax' value: %Rrc\n", + pszLabel, szName, rc); + rc = CFGMR3QueryU32Def(pNode, "ebx", &Leaf.uEbx, Leaf.uEbx); + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, "Invalid %s entry '%s': Error querying 'ebx' value: %Rrc\n", + pszLabel, szName, rc); + rc = CFGMR3QueryU32Def(pNode, "ecx", &Leaf.uEcx, Leaf.uEcx); + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, "Invalid %s entry '%s': Error querying 'ecx' value: %Rrc\n", + pszLabel, szName, rc); + rc = CFGMR3QueryU32Def(pNode, "edx", &Leaf.uEdx, Leaf.uEdx); + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, "Invalid %s entry '%s': Error querying 'edx' value: %Rrc\n", + pszLabel, szName, rc); + + /* + * Insert the leaf into the table (replaces existing ones). + */ + rc = cpumR3CpuIdInsert(NULL /* pVM */, &pVM->cpum.s.GuestInfo.paCpuIdLeavesR3, &pVM->cpum.s.GuestInfo.cCpuIdLeaves, + &Leaf); + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, "Error adding CPUID leaf entry '%s': %Rrc\n", szName, rc); + } + + return VINF_SUCCESS; +} + + + +/** + * Fetches overrides for a CPUID leaf. + * + * @returns VBox status code. + * @param pLeaf The leaf to load the overrides into. + * @param pCfgNode The CFGM node containing the overrides + * (/CPUM/HostCPUID/ or /CPUM/CPUID/). + * @param iLeaf The CPUID leaf number. + */ +static int cpumR3CpuIdFetchLeafOverride(PCPUMCPUID pLeaf, PCFGMNODE pCfgNode, uint32_t iLeaf) +{ + PCFGMNODE pLeafNode = CFGMR3GetChildF(pCfgNode, "%RX32", iLeaf); + if (pLeafNode) + { + uint32_t u32; + int rc = CFGMR3QueryU32(pLeafNode, "eax", &u32); + if (RT_SUCCESS(rc)) + pLeaf->uEax = u32; + else + AssertReturn(rc == VERR_CFGM_VALUE_NOT_FOUND, rc); + + rc = CFGMR3QueryU32(pLeafNode, "ebx", &u32); + if (RT_SUCCESS(rc)) + pLeaf->uEbx = u32; + else + AssertReturn(rc == VERR_CFGM_VALUE_NOT_FOUND, rc); + + rc = CFGMR3QueryU32(pLeafNode, "ecx", &u32); + if (RT_SUCCESS(rc)) + pLeaf->uEcx = u32; + else + AssertReturn(rc == VERR_CFGM_VALUE_NOT_FOUND, rc); + + rc = CFGMR3QueryU32(pLeafNode, "edx", &u32); + if (RT_SUCCESS(rc)) + pLeaf->uEdx = u32; + else + AssertReturn(rc == VERR_CFGM_VALUE_NOT_FOUND, rc); + + } + return VINF_SUCCESS; +} + + +/** + * Load the overrides for a set of CPUID leaves. + * + * @returns VBox status code. + * @param paLeaves The leaf array. + * @param cLeaves The number of leaves. + * @param uStart The start leaf number. + * @param pCfgNode The CFGM node containing the overrides + * (/CPUM/HostCPUID/ or /CPUM/CPUID/). + */ +static int cpumR3CpuIdInitLoadOverrideSet(uint32_t uStart, PCPUMCPUID paLeaves, uint32_t cLeaves, PCFGMNODE pCfgNode) +{ + for (uint32_t i = 0; i < cLeaves; i++) + { + int rc = cpumR3CpuIdFetchLeafOverride(&paLeaves[i], pCfgNode, uStart + i); + if (RT_FAILURE(rc)) + return rc; + } + + return VINF_SUCCESS; +} + + +/** + * Installs the CPUID leaves and explods the data into structures like + * GuestFeatures and CPUMCTX::aoffXState. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pCpum The CPUM part of @a VM. + * @param paLeaves The leaves. These will be copied (but not freed). + * @param cLeaves The number of leaves. + * @param pMsrs The MSRs. + */ +static int cpumR3CpuIdInstallAndExplodeLeaves(PVM pVM, PCPUM pCpum, PCPUMCPUIDLEAF paLeaves, uint32_t cLeaves, PCCPUMMSRS pMsrs) +{ + cpumR3CpuIdAssertOrder(paLeaves, cLeaves); + + /* + * Install the CPUID information. + */ + int rc = MMHyperDupMem(pVM, paLeaves, sizeof(paLeaves[0]) * cLeaves, 32, + MM_TAG_CPUM_CPUID, (void **)&pCpum->GuestInfo.paCpuIdLeavesR3); + + AssertLogRelRCReturn(rc, rc); + pCpum->GuestInfo.cCpuIdLeaves = cLeaves; + pCpum->GuestInfo.paCpuIdLeavesR0 = MMHyperR3ToR0(pVM, pCpum->GuestInfo.paCpuIdLeavesR3); + pCpum->GuestInfo.paCpuIdLeavesRC = MMHyperR3ToRC(pVM, pCpum->GuestInfo.paCpuIdLeavesR3); + Assert(MMHyperR0ToR3(pVM, pCpum->GuestInfo.paCpuIdLeavesR0) == (void *)pCpum->GuestInfo.paCpuIdLeavesR3); + Assert(MMHyperRCToR3(pVM, pCpum->GuestInfo.paCpuIdLeavesRC) == (void *)pCpum->GuestInfo.paCpuIdLeavesR3); + + /* + * Update the default CPUID leaf if necessary. + */ + switch (pCpum->GuestInfo.enmUnknownCpuIdMethod) + { + case CPUMUNKNOWNCPUID_LAST_STD_LEAF: + case CPUMUNKNOWNCPUID_LAST_STD_LEAF_WITH_ECX: + { + /* We don't use CPUID(0).eax here because of the NT hack that only + changes that value without actually removing any leaves. */ + uint32_t i = 0; + if ( pCpum->GuestInfo.cCpuIdLeaves > 0 + && pCpum->GuestInfo.paCpuIdLeavesR3[0].uLeaf <= UINT32_C(0xff)) + { + while ( i + 1 < pCpum->GuestInfo.cCpuIdLeaves + && pCpum->GuestInfo.paCpuIdLeavesR3[i + 1].uLeaf <= UINT32_C(0xff)) + i++; + pCpum->GuestInfo.DefCpuId.uEax = pCpum->GuestInfo.paCpuIdLeavesR3[i].uEax; + pCpum->GuestInfo.DefCpuId.uEbx = pCpum->GuestInfo.paCpuIdLeavesR3[i].uEbx; + pCpum->GuestInfo.DefCpuId.uEcx = pCpum->GuestInfo.paCpuIdLeavesR3[i].uEcx; + pCpum->GuestInfo.DefCpuId.uEdx = pCpum->GuestInfo.paCpuIdLeavesR3[i].uEdx; + } + break; + } + default: + break; + } + + /* + * Explode the guest CPU features. + */ + rc = cpumR3CpuIdExplodeFeatures(pCpum->GuestInfo.paCpuIdLeavesR3, pCpum->GuestInfo.cCpuIdLeaves, pMsrs, + &pCpum->GuestFeatures); + AssertLogRelRCReturn(rc, rc); + + /* + * Adjust the scalable bus frequency according to the CPUID information + * we're now using. + */ + if (CPUMMICROARCH_IS_INTEL_CORE7(pVM->cpum.s.GuestFeatures.enmMicroarch)) + pCpum->GuestInfo.uScalableBusFreq = pCpum->GuestFeatures.enmMicroarch >= kCpumMicroarch_Intel_Core7_SandyBridge + ? UINT64_C(100000000) /* 100MHz */ + : UINT64_C(133333333); /* 133MHz */ + + /* + * Populate the legacy arrays. Currently used for everything, later only + * for patch manager. + */ + struct { PCPUMCPUID paCpuIds; uint32_t cCpuIds, uBase; } aOldRanges[] = + { + { pCpum->aGuestCpuIdPatmStd, RT_ELEMENTS(pCpum->aGuestCpuIdPatmStd), 0x00000000 }, + { pCpum->aGuestCpuIdPatmExt, RT_ELEMENTS(pCpum->aGuestCpuIdPatmExt), 0x80000000 }, + { pCpum->aGuestCpuIdPatmCentaur, RT_ELEMENTS(pCpum->aGuestCpuIdPatmCentaur), 0xc0000000 }, + }; + for (uint32_t i = 0; i < RT_ELEMENTS(aOldRanges); i++) + { + uint32_t cLeft = aOldRanges[i].cCpuIds; + uint32_t uLeaf = aOldRanges[i].uBase + cLeft; + PCPUMCPUID pLegacyLeaf = &aOldRanges[i].paCpuIds[cLeft]; + while (cLeft-- > 0) + { + uLeaf--; + pLegacyLeaf--; + + PCCPUMCPUIDLEAF pLeaf = cpumR3CpuIdGetExactLeaf(pCpum, uLeaf, 0 /* uSubLeaf */); + if (pLeaf) + { + pLegacyLeaf->uEax = pLeaf->uEax; + pLegacyLeaf->uEbx = pLeaf->uEbx; + pLegacyLeaf->uEcx = pLeaf->uEcx; + pLegacyLeaf->uEdx = pLeaf->uEdx; + } + else + *pLegacyLeaf = pCpum->GuestInfo.DefCpuId; + } + } + + /* + * Configure XSAVE offsets according to the CPUID info and set the feature flags. + */ + memset(&pVM->aCpus[0].cpum.s.Guest.aoffXState[0], 0xff, sizeof(pVM->aCpus[0].cpum.s.Guest.aoffXState)); + pVM->aCpus[0].cpum.s.Guest.aoffXState[XSAVE_C_X87_BIT] = 0; + pVM->aCpus[0].cpum.s.Guest.aoffXState[XSAVE_C_SSE_BIT] = 0; + for (uint32_t iComponent = XSAVE_C_SSE_BIT + 1; iComponent < 63; iComponent++) + if (pCpum->fXStateGuestMask & RT_BIT_64(iComponent)) + { + PCPUMCPUIDLEAF pSubLeaf = cpumR3CpuIdGetExactLeaf(pCpum, 0xd, iComponent); + AssertLogRelMsgReturn(pSubLeaf, ("iComponent=%#x\n", iComponent), VERR_CPUM_IPE_1); + AssertLogRelMsgReturn(pSubLeaf->fSubLeafMask >= iComponent, ("iComponent=%#x\n", iComponent), VERR_CPUM_IPE_1); + AssertLogRelMsgReturn( pSubLeaf->uEax > 0 + && pSubLeaf->uEbx >= CPUM_MIN_XSAVE_AREA_SIZE + && pSubLeaf->uEax <= pCpum->GuestFeatures.cbMaxExtendedState + && pSubLeaf->uEbx <= pCpum->GuestFeatures.cbMaxExtendedState + && pSubLeaf->uEbx + pSubLeaf->uEax <= pCpum->GuestFeatures.cbMaxExtendedState, + ("iComponent=%#x eax=%#x ebx=%#x cbMax=%#x\n", iComponent, pSubLeaf->uEax, pSubLeaf->uEbx, + pCpum->GuestFeatures.cbMaxExtendedState), + VERR_CPUM_IPE_1); + pVM->aCpus[0].cpum.s.Guest.aoffXState[iComponent] = pSubLeaf->uEbx; + } + memset(&pVM->aCpus[0].cpum.s.Hyper.aoffXState[0], 0xff, sizeof(pVM->aCpus[0].cpum.s.Hyper.aoffXState)); + + /* Copy the CPU #0 data to the other CPUs. */ + for (VMCPUID iCpu = 1; iCpu < pVM->cCpus; iCpu++) + { + memcpy(&pVM->aCpus[iCpu].cpum.s.Guest.aoffXState[0], &pVM->aCpus[0].cpum.s.Guest.aoffXState[0], + sizeof(pVM->aCpus[iCpu].cpum.s.Guest.aoffXState)); + memcpy(&pVM->aCpus[iCpu].cpum.s.Hyper.aoffXState[0], &pVM->aCpus[0].cpum.s.Hyper.aoffXState[0], + sizeof(pVM->aCpus[iCpu].cpum.s.Hyper.aoffXState)); + } + + return VINF_SUCCESS; +} + + +/** @name Instruction Set Extension Options + * @{ */ +/** Configuration option type (extended boolean, really). */ +typedef uint8_t CPUMISAEXTCFG; +/** Always disable the extension. */ +#define CPUMISAEXTCFG_DISABLED false +/** Enable the extension if it's supported by the host CPU. */ +#define CPUMISAEXTCFG_ENABLED_SUPPORTED true +/** Enable the extension if it's supported by the host CPU, but don't let + * the portable CPUID feature disable it. */ +#define CPUMISAEXTCFG_ENABLED_PORTABLE UINT8_C(127) +/** Always enable the extension. */ +#define CPUMISAEXTCFG_ENABLED_ALWAYS UINT8_C(255) +/** @} */ + +/** + * CPUID Configuration (from CFGM). + * + * @remarks The members aren't document since we would only be duplicating the + * \@cfgm entries in cpumR3CpuIdReadConfig. + */ +typedef struct CPUMCPUIDCONFIG +{ + bool fNt4LeafLimit; + bool fInvariantTsc; + bool fForceVme; + bool fNestedHWVirt; + + CPUMISAEXTCFG enmCmpXchg16b; + CPUMISAEXTCFG enmMonitor; + CPUMISAEXTCFG enmMWaitExtensions; + CPUMISAEXTCFG enmSse41; + CPUMISAEXTCFG enmSse42; + CPUMISAEXTCFG enmAvx; + CPUMISAEXTCFG enmAvx2; + CPUMISAEXTCFG enmXSave; + CPUMISAEXTCFG enmAesNi; + CPUMISAEXTCFG enmPClMul; + CPUMISAEXTCFG enmPopCnt; + CPUMISAEXTCFG enmMovBe; + CPUMISAEXTCFG enmRdRand; + CPUMISAEXTCFG enmRdSeed; + CPUMISAEXTCFG enmCLFlushOpt; + CPUMISAEXTCFG enmFsGsBase; + CPUMISAEXTCFG enmPcid; + CPUMISAEXTCFG enmInvpcid; + CPUMISAEXTCFG enmFlushCmdMsr; + + CPUMISAEXTCFG enmAbm; + CPUMISAEXTCFG enmSse4A; + CPUMISAEXTCFG enmMisAlnSse; + CPUMISAEXTCFG enm3dNowPrf; + CPUMISAEXTCFG enmAmdExtMmx; + + uint32_t uMaxStdLeaf; + uint32_t uMaxExtLeaf; + uint32_t uMaxCentaurLeaf; + uint32_t uMaxIntelFamilyModelStep; + char szCpuName[128]; +} CPUMCPUIDCONFIG; +/** Pointer to CPUID config (from CFGM). */ +typedef CPUMCPUIDCONFIG *PCPUMCPUIDCONFIG; + + +/** + * Mini CPU selection support for making Mac OS X happy. + * + * Executes the /CPUM/MaxIntelFamilyModelStep config. + * + * @param pCpum The CPUM instance data. + * @param pConfig The CPUID configuration we've read from CFGM. + */ +static void cpumR3CpuIdLimitIntelFamModStep(PCPUM pCpum, PCPUMCPUIDCONFIG pConfig) +{ + if (pCpum->GuestFeatures.enmCpuVendor == CPUMCPUVENDOR_INTEL) + { + PCPUMCPUIDLEAF pStdFeatureLeaf = cpumR3CpuIdGetExactLeaf(pCpum, 1, 0); + uint32_t uCurIntelFamilyModelStep = RT_MAKE_U32_FROM_U8(ASMGetCpuStepping(pStdFeatureLeaf->uEax), + ASMGetCpuModelIntel(pStdFeatureLeaf->uEax), + ASMGetCpuFamily(pStdFeatureLeaf->uEax), + 0); + uint32_t uMaxIntelFamilyModelStep = pConfig->uMaxIntelFamilyModelStep; + if (pConfig->uMaxIntelFamilyModelStep < uCurIntelFamilyModelStep) + { + uint32_t uNew = pStdFeatureLeaf->uEax & UINT32_C(0xf0003000); + uNew |= RT_BYTE1(uMaxIntelFamilyModelStep) & 0xf; /* stepping */ + uNew |= (RT_BYTE2(uMaxIntelFamilyModelStep) & 0xf) << 4; /* 4 low model bits */ + uNew |= (RT_BYTE2(uMaxIntelFamilyModelStep) >> 4) << 16; /* 4 high model bits */ + uNew |= (RT_BYTE3(uMaxIntelFamilyModelStep) & 0xf) << 8; /* 4 low family bits */ + if (RT_BYTE3(uMaxIntelFamilyModelStep) > 0xf) /* 8 high family bits, using intel's suggested calculation. */ + uNew |= ( (RT_BYTE3(uMaxIntelFamilyModelStep) - (RT_BYTE3(uMaxIntelFamilyModelStep) & 0xf)) & 0xff ) << 20; + LogRel(("CPU: CPUID(0).EAX %#x -> %#x (uMaxIntelFamilyModelStep=%#x, uCurIntelFamilyModelStep=%#x\n", + pStdFeatureLeaf->uEax, uNew, uMaxIntelFamilyModelStep, uCurIntelFamilyModelStep)); + pStdFeatureLeaf->uEax = uNew; + } + } +} + + + +/** + * Limit it the number of entries, zapping the remainder. + * + * The limits are masking off stuff about power saving and similar, this + * is perhaps a bit crudely done as there is probably some relatively harmless + * info too in these leaves (like words about having a constant TSC). + * + * @param pCpum The CPUM instance data. + * @param pConfig The CPUID configuration we've read from CFGM. + */ +static void cpumR3CpuIdLimitLeaves(PCPUM pCpum, PCPUMCPUIDCONFIG pConfig) +{ + /* + * Standard leaves. + */ + uint32_t uSubLeaf = 0; + PCPUMCPUIDLEAF pCurLeaf = cpumR3CpuIdGetExactLeaf(pCpum, 0, uSubLeaf); + if (pCurLeaf) + { + uint32_t uLimit = pCurLeaf->uEax; + if (uLimit <= UINT32_C(0x000fffff)) + { + if (uLimit > pConfig->uMaxStdLeaf) + { + pCurLeaf->uEax = uLimit = pConfig->uMaxStdLeaf; + cpumR3CpuIdRemoveRange(pCpum->GuestInfo.paCpuIdLeavesR3, &pCpum->GuestInfo.cCpuIdLeaves, + uLimit + 1, UINT32_C(0x000fffff)); + } + + /* NT4 hack, no zapping of extra leaves here. */ + if (pConfig->fNt4LeafLimit && uLimit > 3) + pCurLeaf->uEax = uLimit = 3; + + while ((pCurLeaf = cpumR3CpuIdGetExactLeaf(pCpum, UINT32_C(0x00000000), ++uSubLeaf)) != NULL) + pCurLeaf->uEax = uLimit; + } + else + { + LogRel(("CPUID: Invalid standard range: %#x\n", uLimit)); + cpumR3CpuIdRemoveRange(pCpum->GuestInfo.paCpuIdLeavesR3, &pCpum->GuestInfo.cCpuIdLeaves, + UINT32_C(0x00000000), UINT32_C(0x0fffffff)); + } + } + + /* + * Extended leaves. + */ + uSubLeaf = 0; + pCurLeaf = cpumR3CpuIdGetExactLeaf(pCpum, UINT32_C(0x80000000), uSubLeaf); + if (pCurLeaf) + { + uint32_t uLimit = pCurLeaf->uEax; + if ( uLimit >= UINT32_C(0x80000000) + && uLimit <= UINT32_C(0x800fffff)) + { + if (uLimit > pConfig->uMaxExtLeaf) + { + pCurLeaf->uEax = uLimit = pConfig->uMaxExtLeaf; + cpumR3CpuIdRemoveRange(pCpum->GuestInfo.paCpuIdLeavesR3, &pCpum->GuestInfo.cCpuIdLeaves, + uLimit + 1, UINT32_C(0x800fffff)); + while ((pCurLeaf = cpumR3CpuIdGetExactLeaf(pCpum, UINT32_C(0x80000000), ++uSubLeaf)) != NULL) + pCurLeaf->uEax = uLimit; + } + } + else + { + LogRel(("CPUID: Invalid extended range: %#x\n", uLimit)); + cpumR3CpuIdRemoveRange(pCpum->GuestInfo.paCpuIdLeavesR3, &pCpum->GuestInfo.cCpuIdLeaves, + UINT32_C(0x80000000), UINT32_C(0x8ffffffd)); + } + } + + /* + * Centaur leaves (VIA). + */ + uSubLeaf = 0; + pCurLeaf = cpumR3CpuIdGetExactLeaf(pCpum, UINT32_C(0xc0000000), uSubLeaf); + if (pCurLeaf) + { + uint32_t uLimit = pCurLeaf->uEax; + if ( uLimit >= UINT32_C(0xc0000000) + && uLimit <= UINT32_C(0xc00fffff)) + { + if (uLimit > pConfig->uMaxCentaurLeaf) + { + pCurLeaf->uEax = uLimit = pConfig->uMaxCentaurLeaf; + cpumR3CpuIdRemoveRange(pCpum->GuestInfo.paCpuIdLeavesR3, &pCpum->GuestInfo.cCpuIdLeaves, + uLimit + 1, UINT32_C(0xcfffffff)); + while ((pCurLeaf = cpumR3CpuIdGetExactLeaf(pCpum, UINT32_C(0xc0000000), ++uSubLeaf)) != NULL) + pCurLeaf->uEax = uLimit; + } + } + else + { + LogRel(("CPUID: Invalid centaur range: %#x\n", uLimit)); + cpumR3CpuIdRemoveRange(pCpum->GuestInfo.paCpuIdLeavesR3, &pCpum->GuestInfo.cCpuIdLeaves, + UINT32_C(0xc0000000), UINT32_C(0xcfffffff)); + } + } +} + + +/** + * Clears a CPUID leaf and all sub-leaves (to zero). + * + * @param pCpum The CPUM instance data. + * @param uLeaf The leaf to clear. + */ +static void cpumR3CpuIdZeroLeaf(PCPUM pCpum, uint32_t uLeaf) +{ + uint32_t uSubLeaf = 0; + PCPUMCPUIDLEAF pCurLeaf; + while ((pCurLeaf = cpumR3CpuIdGetExactLeaf(pCpum, uLeaf, uSubLeaf)) != NULL) + { + pCurLeaf->uEax = 0; + pCurLeaf->uEbx = 0; + pCurLeaf->uEcx = 0; + pCurLeaf->uEdx = 0; + uSubLeaf++; + } +} + + +/** + * Used by cpumR3CpuIdSanitize to ensure that we don't have any sub-leaves for + * the given leaf. + * + * @returns pLeaf. + * @param pCpum The CPUM instance data. + * @param pLeaf The leaf to ensure is alone with it's EAX input value. + */ +static PCPUMCPUIDLEAF cpumR3CpuIdMakeSingleLeaf(PCPUM pCpum, PCPUMCPUIDLEAF pLeaf) +{ + Assert((uintptr_t)(pLeaf - pCpum->GuestInfo.paCpuIdLeavesR3) < pCpum->GuestInfo.cCpuIdLeaves); + if (pLeaf->fSubLeafMask != 0) + { + /* + * Figure out how many sub-leaves in need of removal (we'll keep the first). + * Log everything while we're at it. + */ + LogRel(("CPUM:\n" + "CPUM: Unexpected CPUID sub-leaves for leaf %#x; fSubLeafMask=%#x\n", pLeaf->uLeaf, pLeaf->fSubLeafMask)); + PCPUMCPUIDLEAF pLast = &pCpum->GuestInfo.paCpuIdLeavesR3[pCpum->GuestInfo.cCpuIdLeaves - 1]; + PCPUMCPUIDLEAF pSubLeaf = pLeaf; + for (;;) + { + LogRel(("CPUM: %08x/%08x: %08x %08x %08x %08x; flags=%#x mask=%#x\n", + pSubLeaf->uLeaf, pSubLeaf->uSubLeaf, + pSubLeaf->uEax, pSubLeaf->uEbx, pSubLeaf->uEcx, pSubLeaf->uEdx, + pSubLeaf->fFlags, pSubLeaf->fSubLeafMask)); + if (pSubLeaf == pLast || pSubLeaf[1].uLeaf != pLeaf->uLeaf) + break; + pSubLeaf++; + } + LogRel(("CPUM:\n")); + + /* + * Remove the offending sub-leaves. + */ + if (pSubLeaf != pLeaf) + { + if (pSubLeaf != pLast) + memmove(pLeaf + 1, pSubLeaf + 1, (uintptr_t)pLast - (uintptr_t)pSubLeaf); + pCpum->GuestInfo.cCpuIdLeaves -= (uint32_t)(pSubLeaf - pLeaf); + } + + /* + * Convert the first sub-leaf into a single leaf. + */ + pLeaf->uSubLeaf = 0; + pLeaf->fSubLeafMask = 0; + } + return pLeaf; +} + + +/** + * Sanitizes and adjust the CPUID leaves. + * + * Drop features that aren't virtualized (or virtualizable). Adjust information + * and capabilities to fit the virtualized hardware. Remove information the + * guest shouldn't have (because it's wrong in the virtual world or because it + * gives away host details) or that we don't have documentation for and no idea + * what means. + * + * @returns VBox status code. + * @param pVM The cross context VM structure (for cCpus). + * @param pCpum The CPUM instance data. + * @param pConfig The CPUID configuration we've read from CFGM. + */ +static int cpumR3CpuIdSanitize(PVM pVM, PCPUM pCpum, PCPUMCPUIDCONFIG pConfig) +{ +#define PORTABLE_CLEAR_BITS_WHEN(Lvl, a_pLeafReg, FeatNm, fMask, uValue) \ + if ( pCpum->u8PortableCpuIdLevel >= (Lvl) && ((a_pLeafReg) & (fMask)) == (uValue) ) \ + { \ + LogRel(("PortableCpuId: " #a_pLeafReg "[" #FeatNm "]: %#x -> 0\n", (a_pLeafReg) & (fMask))); \ + (a_pLeafReg) &= ~(uint32_t)(fMask); \ + } +#define PORTABLE_DISABLE_FEATURE_BIT(Lvl, a_pLeafReg, FeatNm, fBitMask) \ + if ( pCpum->u8PortableCpuIdLevel >= (Lvl) && ((a_pLeafReg) & (fBitMask)) ) \ + { \ + LogRel(("PortableCpuId: " #a_pLeafReg "[" #FeatNm "]: 1 -> 0\n")); \ + (a_pLeafReg) &= ~(uint32_t)(fBitMask); \ + } +#define PORTABLE_DISABLE_FEATURE_BIT_CFG(Lvl, a_pLeafReg, FeatNm, fBitMask, enmConfig) \ + if ( pCpum->u8PortableCpuIdLevel >= (Lvl) \ + && ((a_pLeafReg) & (fBitMask)) \ + && (enmConfig) != CPUMISAEXTCFG_ENABLED_PORTABLE ) \ + { \ + LogRel(("PortableCpuId: " #a_pLeafReg "[" #FeatNm "]: 1 -> 0\n")); \ + (a_pLeafReg) &= ~(uint32_t)(fBitMask); \ + } + Assert(pCpum->GuestFeatures.enmCpuVendor != CPUMCPUVENDOR_INVALID); + + /* Cpuid 1: + * EAX: CPU model, family and stepping. + * + * ECX + EDX: Supported features. Only report features we can support. + * Note! When enabling new features the Synthetic CPU and Portable CPUID + * options may require adjusting (i.e. stripping what was enabled). + * + * EBX: Branding, CLFLUSH line size, logical processors per package and + * initial APIC ID. + */ + PCPUMCPUIDLEAF pStdFeatureLeaf = cpumR3CpuIdGetExactLeaf(pCpum, 1, 0); /* Note! Must refetch when used later. */ + AssertLogRelReturn(pStdFeatureLeaf, VERR_CPUM_IPE_2); + pStdFeatureLeaf = cpumR3CpuIdMakeSingleLeaf(pCpum, pStdFeatureLeaf); + + pStdFeatureLeaf->uEdx &= X86_CPUID_FEATURE_EDX_FPU + | X86_CPUID_FEATURE_EDX_VME + | X86_CPUID_FEATURE_EDX_DE + | X86_CPUID_FEATURE_EDX_PSE + | X86_CPUID_FEATURE_EDX_TSC + | X86_CPUID_FEATURE_EDX_MSR + //| X86_CPUID_FEATURE_EDX_PAE - set later if configured. + | X86_CPUID_FEATURE_EDX_MCE + | X86_CPUID_FEATURE_EDX_CX8 + //| X86_CPUID_FEATURE_EDX_APIC - set by the APIC device if present. + //| RT_BIT_32(10) - not defined + /* Note! we don't report sysenter/sysexit support due to our inability to keep the IOPL part of eflags in sync while in ring 1 (see @bugref{1757}) */ + //| X86_CPUID_FEATURE_EDX_SEP + | X86_CPUID_FEATURE_EDX_MTRR + | X86_CPUID_FEATURE_EDX_PGE + | X86_CPUID_FEATURE_EDX_MCA + | X86_CPUID_FEATURE_EDX_CMOV + | X86_CPUID_FEATURE_EDX_PAT /* 16 */ + | X86_CPUID_FEATURE_EDX_PSE36 + //| X86_CPUID_FEATURE_EDX_PSN - no serial number. + | X86_CPUID_FEATURE_EDX_CLFSH + //| RT_BIT_32(20) - not defined + //| X86_CPUID_FEATURE_EDX_DS - no debug store. + //| X86_CPUID_FEATURE_EDX_ACPI - not supported (not DevAcpi, right?). + | X86_CPUID_FEATURE_EDX_MMX + | X86_CPUID_FEATURE_EDX_FXSR + | X86_CPUID_FEATURE_EDX_SSE + | X86_CPUID_FEATURE_EDX_SSE2 + //| X86_CPUID_FEATURE_EDX_SS - no self snoop. + | X86_CPUID_FEATURE_EDX_HTT + //| X86_CPUID_FEATURE_EDX_TM - no thermal monitor. + //| RT_BIT_32(30) - not defined + //| X86_CPUID_FEATURE_EDX_PBE - no pending break enabled. + ; + pStdFeatureLeaf->uEcx &= 0 + | X86_CPUID_FEATURE_ECX_SSE3 + | (pConfig->enmPClMul ? X86_CPUID_FEATURE_ECX_PCLMUL : 0) + //| X86_CPUID_FEATURE_ECX_DTES64 - not implemented yet. + /* Can't properly emulate monitor & mwait with guest SMP; force the guest to use hlt for idling VCPUs. */ + | ((pConfig->enmMonitor && pVM->cCpus == 1) ? X86_CPUID_FEATURE_ECX_MONITOR : 0) + //| X86_CPUID_FEATURE_ECX_CPLDS - no CPL qualified debug store. + | (pConfig->fNestedHWVirt ? X86_CPUID_FEATURE_ECX_VMX : 0) + //| X86_CPUID_FEATURE_ECX_SMX - not virtualized yet. + //| X86_CPUID_FEATURE_ECX_EST - no extended speed step. + //| X86_CPUID_FEATURE_ECX_TM2 - no thermal monitor 2. + | X86_CPUID_FEATURE_ECX_SSSE3 + //| X86_CPUID_FEATURE_ECX_CNTXID - no L1 context id (MSR++). + //| X86_CPUID_FEATURE_ECX_FMA - not implemented yet. + | (pConfig->enmCmpXchg16b ? X86_CPUID_FEATURE_ECX_CX16 : 0) + /* ECX Bit 14 - xTPR Update Control. Processor supports changing IA32_MISC_ENABLES[bit 23]. */ + //| X86_CPUID_FEATURE_ECX_TPRUPDATE + //| X86_CPUID_FEATURE_ECX_PDCM - not implemented yet. + | (pConfig->enmPcid ? X86_CPUID_FEATURE_ECX_PCID : 0) + //| X86_CPUID_FEATURE_ECX_DCA - not implemented yet. + | (pConfig->enmSse41 ? X86_CPUID_FEATURE_ECX_SSE4_1 : 0) + | (pConfig->enmSse42 ? X86_CPUID_FEATURE_ECX_SSE4_2 : 0) + //| X86_CPUID_FEATURE_ECX_X2APIC - turned on later by the device if enabled. + | (pConfig->enmMovBe ? X86_CPUID_FEATURE_ECX_MOVBE : 0) + | (pConfig->enmPopCnt ? X86_CPUID_FEATURE_ECX_POPCNT : 0) + //| X86_CPUID_FEATURE_ECX_TSCDEADL - not implemented yet. + | (pConfig->enmAesNi ? X86_CPUID_FEATURE_ECX_AES : 0) + | (pConfig->enmXSave ? X86_CPUID_FEATURE_ECX_XSAVE : 0 ) + //| X86_CPUID_FEATURE_ECX_OSXSAVE - mirrors CR4.OSXSAVE state, set dynamically. + | (pConfig->enmAvx ? X86_CPUID_FEATURE_ECX_AVX : 0) + //| X86_CPUID_FEATURE_ECX_F16C - not implemented yet. + | (pConfig->enmRdRand ? X86_CPUID_FEATURE_ECX_RDRAND : 0) + //| X86_CPUID_FEATURE_ECX_HVP - Set explicitly later. + ; + + /* Mask out PCID unless FSGSBASE is exposed due to a bug in Windows 10 SMP guests, see @bugref{9089#c15}. */ + if ( !pVM->cpum.s.GuestFeatures.fFsGsBase + && (pStdFeatureLeaf->uEcx & X86_CPUID_FEATURE_ECX_PCID)) + { + pStdFeatureLeaf->uEcx &= ~X86_CPUID_FEATURE_ECX_PCID; + LogRel(("CPUM: Disabled PCID without FSGSBASE to workaround buggy guests\n")); + } + + if (pCpum->u8PortableCpuIdLevel > 0) + { + PORTABLE_CLEAR_BITS_WHEN(1, pStdFeatureLeaf->uEax, ProcessorType, (UINT32_C(3) << 12), (UINT32_C(2) << 12)); + PORTABLE_DISABLE_FEATURE_BIT( 1, pStdFeatureLeaf->uEcx, SSSE3, X86_CPUID_FEATURE_ECX_SSSE3); + PORTABLE_DISABLE_FEATURE_BIT_CFG(1, pStdFeatureLeaf->uEcx, PCID, X86_CPUID_FEATURE_ECX_PCID, pConfig->enmPcid); + PORTABLE_DISABLE_FEATURE_BIT_CFG(1, pStdFeatureLeaf->uEcx, SSE4_1, X86_CPUID_FEATURE_ECX_SSE4_1, pConfig->enmSse41); + PORTABLE_DISABLE_FEATURE_BIT_CFG(1, pStdFeatureLeaf->uEcx, SSE4_2, X86_CPUID_FEATURE_ECX_SSE4_2, pConfig->enmSse42); + PORTABLE_DISABLE_FEATURE_BIT_CFG(1, pStdFeatureLeaf->uEcx, MOVBE, X86_CPUID_FEATURE_ECX_MOVBE, pConfig->enmMovBe); + PORTABLE_DISABLE_FEATURE_BIT( 1, pStdFeatureLeaf->uEcx, AES, X86_CPUID_FEATURE_ECX_AES); + PORTABLE_DISABLE_FEATURE_BIT( 1, pStdFeatureLeaf->uEcx, VMX, X86_CPUID_FEATURE_ECX_VMX); + PORTABLE_DISABLE_FEATURE_BIT_CFG(1, pStdFeatureLeaf->uEcx, PCLMUL, X86_CPUID_FEATURE_ECX_PCLMUL, pConfig->enmPClMul); + PORTABLE_DISABLE_FEATURE_BIT_CFG(1, pStdFeatureLeaf->uEcx, POPCNT, X86_CPUID_FEATURE_ECX_POPCNT, pConfig->enmPopCnt); + PORTABLE_DISABLE_FEATURE_BIT( 1, pStdFeatureLeaf->uEcx, F16C, X86_CPUID_FEATURE_ECX_F16C); + PORTABLE_DISABLE_FEATURE_BIT_CFG(1, pStdFeatureLeaf->uEcx, XSAVE, X86_CPUID_FEATURE_ECX_XSAVE, pConfig->enmXSave); + PORTABLE_DISABLE_FEATURE_BIT_CFG(1, pStdFeatureLeaf->uEcx, AVX, X86_CPUID_FEATURE_ECX_AVX, pConfig->enmAvx); + PORTABLE_DISABLE_FEATURE_BIT_CFG(1, pStdFeatureLeaf->uEcx, RDRAND, X86_CPUID_FEATURE_ECX_RDRAND, pConfig->enmRdRand); + PORTABLE_DISABLE_FEATURE_BIT_CFG(1, pStdFeatureLeaf->uEcx, CX16, X86_CPUID_FEATURE_ECX_CX16, pConfig->enmCmpXchg16b); + PORTABLE_DISABLE_FEATURE_BIT( 2, pStdFeatureLeaf->uEcx, SSE3, X86_CPUID_FEATURE_ECX_SSE3); + PORTABLE_DISABLE_FEATURE_BIT( 3, pStdFeatureLeaf->uEdx, SSE2, X86_CPUID_FEATURE_EDX_SSE2); + PORTABLE_DISABLE_FEATURE_BIT( 3, pStdFeatureLeaf->uEdx, SSE, X86_CPUID_FEATURE_EDX_SSE); + PORTABLE_DISABLE_FEATURE_BIT( 3, pStdFeatureLeaf->uEdx, CLFSH, X86_CPUID_FEATURE_EDX_CLFSH); + PORTABLE_DISABLE_FEATURE_BIT( 3, pStdFeatureLeaf->uEdx, CMOV, X86_CPUID_FEATURE_EDX_CMOV); + + Assert(!(pStdFeatureLeaf->uEdx & ( X86_CPUID_FEATURE_EDX_SEP + | X86_CPUID_FEATURE_EDX_PSN + | X86_CPUID_FEATURE_EDX_DS + | X86_CPUID_FEATURE_EDX_ACPI + | X86_CPUID_FEATURE_EDX_SS + | X86_CPUID_FEATURE_EDX_TM + | X86_CPUID_FEATURE_EDX_PBE + ))); + Assert(!(pStdFeatureLeaf->uEcx & ( X86_CPUID_FEATURE_ECX_DTES64 + | X86_CPUID_FEATURE_ECX_CPLDS + | X86_CPUID_FEATURE_ECX_AES + | X86_CPUID_FEATURE_ECX_VMX + | X86_CPUID_FEATURE_ECX_SMX + | X86_CPUID_FEATURE_ECX_EST + | X86_CPUID_FEATURE_ECX_TM2 + | X86_CPUID_FEATURE_ECX_CNTXID + | X86_CPUID_FEATURE_ECX_FMA + | X86_CPUID_FEATURE_ECX_TPRUPDATE + | X86_CPUID_FEATURE_ECX_PDCM + | X86_CPUID_FEATURE_ECX_DCA + | X86_CPUID_FEATURE_ECX_OSXSAVE + ))); + } + + /* Set up APIC ID for CPU 0, configure multi core/threaded smp. */ + pStdFeatureLeaf->uEbx &= UINT32_C(0x0000ffff); /* (APIC-ID := 0 and #LogCpus := 0) */ + + /* The HTT bit is architectural and does not directly indicate hyper-threading or multiple cores; + * it was set even on single-core/non-HT Northwood P4s for example. The HTT bit only means that the + * information in EBX[23:16] (max number of addressable logical processor IDs) is valid. + */ +#ifdef VBOX_WITH_MULTI_CORE + if (pVM->cCpus > 1) + pStdFeatureLeaf->uEdx |= X86_CPUID_FEATURE_EDX_HTT; /* Force if emulating a multi-core CPU. */ +#endif + if (pStdFeatureLeaf->uEdx & X86_CPUID_FEATURE_EDX_HTT) + { + /* If CPUID Fn0000_0001_EDX[HTT] = 1 then LogicalProcessorCount is the number of threads per CPU + core times the number of CPU cores per processor */ +#ifdef VBOX_WITH_MULTI_CORE + pStdFeatureLeaf->uEbx |= pVM->cCpus <= 0xff ? (pVM->cCpus << 16) : UINT32_C(0x00ff0000); +#else + /* Single logical processor in a package. */ + pStdFeatureLeaf->uEbx |= (1 << 16); +#endif + } + + uint32_t uMicrocodeRev; + int rc = SUPR3QueryMicrocodeRev(&uMicrocodeRev); + if (RT_SUCCESS(rc)) + { + LogRel(("CPUM: Microcode revision 0x%08X\n", uMicrocodeRev)); + } + else + { + uMicrocodeRev = 0; + LogRel(("CPUM: Failed to query microcode revision. rc=%Rrc\n", rc)); + } + + /* Mask out the VME capability on certain CPUs, unless overridden by fForceVme. + * VME bug was fixed in AGESA 1.0.0.6, microcode patch level 8001126. + */ + if ( (pVM->cpum.s.GuestFeatures.enmMicroarch == kCpumMicroarch_AMD_Zen_Ryzen) + && uMicrocodeRev < 0x8001126 + && !pConfig->fForceVme) + { + /** @todo The above is a very coarse test but at the moment we don't know any better (see @bugref{8852}). */ + LogRel(("CPUM: Zen VME workaround engaged\n")); + pStdFeatureLeaf->uEdx &= ~X86_CPUID_FEATURE_EDX_VME; + } + + /* Force standard feature bits. */ + if (pConfig->enmPClMul == CPUMISAEXTCFG_ENABLED_ALWAYS) + pStdFeatureLeaf->uEcx |= X86_CPUID_FEATURE_ECX_PCLMUL; + if (pConfig->enmMonitor == CPUMISAEXTCFG_ENABLED_ALWAYS) + pStdFeatureLeaf->uEcx |= X86_CPUID_FEATURE_ECX_MONITOR; + if (pConfig->enmCmpXchg16b == CPUMISAEXTCFG_ENABLED_ALWAYS) + pStdFeatureLeaf->uEcx |= X86_CPUID_FEATURE_ECX_CX16; + if (pConfig->enmSse41 == CPUMISAEXTCFG_ENABLED_ALWAYS) + pStdFeatureLeaf->uEcx |= X86_CPUID_FEATURE_ECX_SSE4_1; + if (pConfig->enmSse42 == CPUMISAEXTCFG_ENABLED_ALWAYS) + pStdFeatureLeaf->uEcx |= X86_CPUID_FEATURE_ECX_SSE4_2; + if (pConfig->enmMovBe == CPUMISAEXTCFG_ENABLED_ALWAYS) + pStdFeatureLeaf->uEcx |= X86_CPUID_FEATURE_ECX_MOVBE; + if (pConfig->enmPopCnt == CPUMISAEXTCFG_ENABLED_ALWAYS) + pStdFeatureLeaf->uEcx |= X86_CPUID_FEATURE_ECX_POPCNT; + if (pConfig->enmAesNi == CPUMISAEXTCFG_ENABLED_ALWAYS) + pStdFeatureLeaf->uEcx |= X86_CPUID_FEATURE_ECX_AES; + if (pConfig->enmXSave == CPUMISAEXTCFG_ENABLED_ALWAYS) + pStdFeatureLeaf->uEcx |= X86_CPUID_FEATURE_ECX_XSAVE; + if (pConfig->enmAvx == CPUMISAEXTCFG_ENABLED_ALWAYS) + pStdFeatureLeaf->uEcx |= X86_CPUID_FEATURE_ECX_AVX; + if (pConfig->enmRdRand == CPUMISAEXTCFG_ENABLED_ALWAYS) + pStdFeatureLeaf->uEcx |= X86_CPUID_FEATURE_ECX_RDRAND; + + pStdFeatureLeaf = NULL; /* Must refetch! */ + + /* Cpuid 0x80000001: (Similar, but in no way identical to 0x00000001.) + * AMD: + * EAX: CPU model, family and stepping. + * + * ECX + EDX: Supported features. Only report features we can support. + * Note! When enabling new features the Synthetic CPU and Portable CPUID + * options may require adjusting (i.e. stripping what was enabled). + * ASSUMES that this is ALWAYS the AMD defined feature set if present. + * + * EBX: Branding ID and package type (or reserved). + * + * Intel and probably most others: + * EAX: 0 + * EBX: 0 + * ECX + EDX: Subset of AMD features, mainly for AMD64 support. + */ + PCPUMCPUIDLEAF pExtFeatureLeaf = cpumR3CpuIdGetExactLeaf(pCpum, UINT32_C(0x80000001), 0); + if (pExtFeatureLeaf) + { + pExtFeatureLeaf = cpumR3CpuIdMakeSingleLeaf(pCpum, pExtFeatureLeaf); + + pExtFeatureLeaf->uEdx &= X86_CPUID_AMD_FEATURE_EDX_FPU + | X86_CPUID_AMD_FEATURE_EDX_VME + | X86_CPUID_AMD_FEATURE_EDX_DE + | X86_CPUID_AMD_FEATURE_EDX_PSE + | X86_CPUID_AMD_FEATURE_EDX_TSC + | X86_CPUID_AMD_FEATURE_EDX_MSR //?? this means AMD MSRs.. + //| X86_CPUID_AMD_FEATURE_EDX_PAE - turned on when necessary + //| X86_CPUID_AMD_FEATURE_EDX_MCE - not virtualized yet. + | X86_CPUID_AMD_FEATURE_EDX_CX8 + //| X86_CPUID_AMD_FEATURE_EDX_APIC - set by the APIC device if present. + //| RT_BIT_32(10) - reserved + /* Note! We don't report sysenter/sysexit support due to our inability to keep the IOPL part of + eflags in sync while in ring 1 (see @bugref{1757}). HM enables them later. */ + //| X86_CPUID_EXT_FEATURE_EDX_SYSCALL + | X86_CPUID_AMD_FEATURE_EDX_MTRR + | X86_CPUID_AMD_FEATURE_EDX_PGE + | X86_CPUID_AMD_FEATURE_EDX_MCA + | X86_CPUID_AMD_FEATURE_EDX_CMOV + | X86_CPUID_AMD_FEATURE_EDX_PAT + | X86_CPUID_AMD_FEATURE_EDX_PSE36 + //| RT_BIT_32(18) - reserved + //| RT_BIT_32(19) - reserved + //| X86_CPUID_EXT_FEATURE_EDX_NX - enabled later by PGM + //| RT_BIT_32(21) - reserved + | (pConfig->enmAmdExtMmx ? X86_CPUID_AMD_FEATURE_EDX_AXMMX : 0) + | X86_CPUID_AMD_FEATURE_EDX_MMX + | X86_CPUID_AMD_FEATURE_EDX_FXSR + | X86_CPUID_AMD_FEATURE_EDX_FFXSR + //| X86_CPUID_EXT_FEATURE_EDX_PAGE1GB + | X86_CPUID_EXT_FEATURE_EDX_RDTSCP + //| RT_BIT_32(28) - reserved + //| X86_CPUID_EXT_FEATURE_EDX_LONG_MODE - turned on when necessary + | X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX + | X86_CPUID_AMD_FEATURE_EDX_3DNOW + ; + pExtFeatureLeaf->uEcx &= X86_CPUID_EXT_FEATURE_ECX_LAHF_SAHF + //| X86_CPUID_AMD_FEATURE_ECX_CMPL - set below if applicable. + | (pConfig->fNestedHWVirt ? X86_CPUID_AMD_FEATURE_ECX_SVM : 0) + //| X86_CPUID_AMD_FEATURE_ECX_EXT_APIC + /* Note: This could prevent teleporting from AMD to Intel CPUs! */ + | X86_CPUID_AMD_FEATURE_ECX_CR8L /* expose lock mov cr0 = mov cr8 hack for guests that can use this feature to access the TPR. */ + | (pConfig->enmAbm ? X86_CPUID_AMD_FEATURE_ECX_ABM : 0) + | (pConfig->enmSse4A ? X86_CPUID_AMD_FEATURE_ECX_SSE4A : 0) + | (pConfig->enmMisAlnSse ? X86_CPUID_AMD_FEATURE_ECX_MISALNSSE : 0) + | (pConfig->enm3dNowPrf ? X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF : 0) + //| X86_CPUID_AMD_FEATURE_ECX_OSVW + //| X86_CPUID_AMD_FEATURE_ECX_IBS + //| X86_CPUID_AMD_FEATURE_ECX_XOP + //| X86_CPUID_AMD_FEATURE_ECX_SKINIT + //| X86_CPUID_AMD_FEATURE_ECX_WDT + //| RT_BIT_32(14) - reserved + //| X86_CPUID_AMD_FEATURE_ECX_LWP - not supported + //| X86_CPUID_AMD_FEATURE_ECX_FMA4 - not yet virtualized. + //| RT_BIT_32(17) - reserved + //| RT_BIT_32(18) - reserved + //| X86_CPUID_AMD_FEATURE_ECX_NODEID - not yet virtualized. + //| RT_BIT_32(20) - reserved + //| X86_CPUID_AMD_FEATURE_ECX_TBM - not yet virtualized. + //| X86_CPUID_AMD_FEATURE_ECX_TOPOEXT - not yet virtualized. + //| RT_BIT_32(23) - reserved + //| RT_BIT_32(24) - reserved + //| RT_BIT_32(25) - reserved + //| RT_BIT_32(26) - reserved + //| RT_BIT_32(27) - reserved + //| RT_BIT_32(28) - reserved + //| RT_BIT_32(29) - reserved + //| RT_BIT_32(30) - reserved + //| RT_BIT_32(31) - reserved + ; +#ifdef VBOX_WITH_MULTI_CORE + if ( pVM->cCpus > 1 + && pCpum->GuestFeatures.enmCpuVendor == CPUMCPUVENDOR_AMD) + pExtFeatureLeaf->uEcx |= X86_CPUID_AMD_FEATURE_ECX_CMPL; /* CmpLegacy */ +#endif + + if (pCpum->u8PortableCpuIdLevel > 0) + { + PORTABLE_DISABLE_FEATURE_BIT( 1, pExtFeatureLeaf->uEcx, CR8L, X86_CPUID_AMD_FEATURE_ECX_CR8L); + PORTABLE_DISABLE_FEATURE_BIT( 1, pExtFeatureLeaf->uEcx, SVM, X86_CPUID_AMD_FEATURE_ECX_SVM); + PORTABLE_DISABLE_FEATURE_BIT_CFG(1, pExtFeatureLeaf->uEcx, ABM, X86_CPUID_AMD_FEATURE_ECX_ABM, pConfig->enmAbm); + PORTABLE_DISABLE_FEATURE_BIT_CFG(1, pExtFeatureLeaf->uEcx, SSE4A, X86_CPUID_AMD_FEATURE_ECX_SSE4A, pConfig->enmSse4A); + PORTABLE_DISABLE_FEATURE_BIT_CFG(1, pExtFeatureLeaf->uEcx, MISALNSSE, X86_CPUID_AMD_FEATURE_ECX_MISALNSSE, pConfig->enmMisAlnSse); + PORTABLE_DISABLE_FEATURE_BIT_CFG(1, pExtFeatureLeaf->uEcx, 3DNOWPRF, X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF, pConfig->enm3dNowPrf); + PORTABLE_DISABLE_FEATURE_BIT( 1, pExtFeatureLeaf->uEcx, XOP, X86_CPUID_AMD_FEATURE_ECX_XOP); + PORTABLE_DISABLE_FEATURE_BIT( 1, pExtFeatureLeaf->uEcx, TBM, X86_CPUID_AMD_FEATURE_ECX_TBM); + PORTABLE_DISABLE_FEATURE_BIT( 1, pExtFeatureLeaf->uEcx, FMA4, X86_CPUID_AMD_FEATURE_ECX_FMA4); + PORTABLE_DISABLE_FEATURE_BIT_CFG(1, pExtFeatureLeaf->uEdx, AXMMX, X86_CPUID_AMD_FEATURE_EDX_AXMMX, pConfig->enmAmdExtMmx); + PORTABLE_DISABLE_FEATURE_BIT( 1, pExtFeatureLeaf->uEdx, 3DNOW, X86_CPUID_AMD_FEATURE_EDX_3DNOW); + PORTABLE_DISABLE_FEATURE_BIT( 1, pExtFeatureLeaf->uEdx, 3DNOW_EX, X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX); + PORTABLE_DISABLE_FEATURE_BIT( 1, pExtFeatureLeaf->uEdx, FFXSR, X86_CPUID_AMD_FEATURE_EDX_FFXSR); + PORTABLE_DISABLE_FEATURE_BIT( 1, pExtFeatureLeaf->uEdx, RDTSCP, X86_CPUID_EXT_FEATURE_EDX_RDTSCP); + PORTABLE_DISABLE_FEATURE_BIT( 2, pExtFeatureLeaf->uEcx, LAHF_SAHF, X86_CPUID_EXT_FEATURE_ECX_LAHF_SAHF); + PORTABLE_DISABLE_FEATURE_BIT( 3, pExtFeatureLeaf->uEcx, CMOV, X86_CPUID_AMD_FEATURE_EDX_CMOV); + + Assert(!(pExtFeatureLeaf->uEcx & ( X86_CPUID_AMD_FEATURE_ECX_SVM + | X86_CPUID_AMD_FEATURE_ECX_EXT_APIC + | X86_CPUID_AMD_FEATURE_ECX_OSVW + | X86_CPUID_AMD_FEATURE_ECX_IBS + | X86_CPUID_AMD_FEATURE_ECX_SKINIT + | X86_CPUID_AMD_FEATURE_ECX_WDT + | X86_CPUID_AMD_FEATURE_ECX_LWP + | X86_CPUID_AMD_FEATURE_ECX_NODEID + | X86_CPUID_AMD_FEATURE_ECX_TOPOEXT + | UINT32_C(0xff964000) + ))); + Assert(!(pExtFeatureLeaf->uEdx & ( RT_BIT(10) + | X86_CPUID_EXT_FEATURE_EDX_SYSCALL + | RT_BIT(18) + | RT_BIT(19) + | RT_BIT(21) + | X86_CPUID_AMD_FEATURE_EDX_AXMMX + | X86_CPUID_EXT_FEATURE_EDX_PAGE1GB + | RT_BIT(28) + ))); + } + + /* Force extended feature bits. */ + if (pConfig->enmAbm == CPUMISAEXTCFG_ENABLED_ALWAYS) + pExtFeatureLeaf->uEcx |= X86_CPUID_AMD_FEATURE_ECX_ABM; + if (pConfig->enmSse4A == CPUMISAEXTCFG_ENABLED_ALWAYS) + pExtFeatureLeaf->uEcx |= X86_CPUID_AMD_FEATURE_ECX_SSE4A; + if (pConfig->enmMisAlnSse == CPUMISAEXTCFG_ENABLED_ALWAYS) + pExtFeatureLeaf->uEcx |= X86_CPUID_AMD_FEATURE_ECX_MISALNSSE; + if (pConfig->enm3dNowPrf == CPUMISAEXTCFG_ENABLED_ALWAYS) + pExtFeatureLeaf->uEcx |= X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF; + if (pConfig->enmAmdExtMmx == CPUMISAEXTCFG_ENABLED_ALWAYS) + pExtFeatureLeaf->uEdx |= X86_CPUID_AMD_FEATURE_EDX_AXMMX; + } + pExtFeatureLeaf = NULL; /* Must refetch! */ + + + /* Cpuid 2: + * Intel: (Nondeterministic) Cache and TLB information + * AMD: Reserved + * VIA: Reserved + * Safe to expose. + */ + uint32_t uSubLeaf = 0; + PCPUMCPUIDLEAF pCurLeaf; + while ((pCurLeaf = cpumR3CpuIdGetExactLeaf(pCpum, 2, uSubLeaf)) != NULL) + { + if ((pCurLeaf->uEax & 0xff) > 1) + { + LogRel(("CpuId: Std[2].al: %d -> 1\n", pCurLeaf->uEax & 0xff)); + pCurLeaf->uEax &= UINT32_C(0xffffff01); + } + uSubLeaf++; + } + + /* Cpuid 3: + * Intel: EAX, EBX - reserved (transmeta uses these) + * ECX, EDX - Processor Serial Number if available, otherwise reserved + * AMD: Reserved + * VIA: Reserved + * Safe to expose + */ + pStdFeatureLeaf = cpumR3CpuIdGetExactLeaf(pCpum, 1, 0); + if (!(pStdFeatureLeaf->uEdx & X86_CPUID_FEATURE_EDX_PSN)) + { + uSubLeaf = 0; + while ((pCurLeaf = cpumR3CpuIdGetExactLeaf(pCpum, 3, uSubLeaf)) != NULL) + { + pCurLeaf->uEcx = pCurLeaf->uEdx = 0; + if (pCpum->u8PortableCpuIdLevel > 0) + pCurLeaf->uEax = pCurLeaf->uEbx = 0; + uSubLeaf++; + } + } + + /* Cpuid 4 + ECX: + * Intel: Deterministic Cache Parameters Leaf. + * AMD: Reserved + * VIA: Reserved + * Safe to expose, except for EAX: + * Bits 25-14: Maximum number of addressable IDs for logical processors sharing this cache (see note)** + * Bits 31-26: Maximum number of processor cores in this physical package** + * Note: These SMP values are constant regardless of ECX + */ + uSubLeaf = 0; + while ((pCurLeaf = cpumR3CpuIdGetExactLeaf(pCpum, 4, uSubLeaf)) != NULL) + { + pCurLeaf->uEax &= UINT32_C(0x00003fff); /* Clear the #maxcores, #threads-sharing-cache (both are #-1).*/ +#ifdef VBOX_WITH_MULTI_CORE + if ( pVM->cCpus > 1 + && pCpum->GuestFeatures.enmCpuVendor == CPUMCPUVENDOR_INTEL) + { + AssertReturn(pVM->cCpus <= 64, VERR_TOO_MANY_CPUS); + /* One logical processor with possibly multiple cores. */ + /* See http://www.intel.com/Assets/PDF/appnote/241618.pdf p. 29 */ + pCurLeaf->uEax |= pVM->cCpus <= 0x40 ? ((pVM->cCpus - 1) << 26) : UINT32_C(0xfc000000); /* 6 bits only -> 64 cores! */ + } +#endif + uSubLeaf++; + } + + /* Cpuid 5: Monitor/mwait Leaf + * Intel: ECX, EDX - reserved + * EAX, EBX - Smallest and largest monitor line size + * AMD: EDX - reserved + * EAX, EBX - Smallest and largest monitor line size + * ECX - extensions (ignored for now) + * VIA: Reserved + * Safe to expose + */ + uSubLeaf = 0; + while ((pCurLeaf = cpumR3CpuIdGetExactLeaf(pCpum, 5, uSubLeaf)) != NULL) + { + pStdFeatureLeaf = cpumR3CpuIdGetExactLeaf(pCpum, 1, 0); + if (!(pStdFeatureLeaf->uEcx & X86_CPUID_FEATURE_ECX_MONITOR)) + pCurLeaf->uEax = pCurLeaf->uEbx = 0; + + pCurLeaf->uEcx = pCurLeaf->uEdx = 0; + if (pConfig->enmMWaitExtensions) + { + pCurLeaf->uEcx = X86_CPUID_MWAIT_ECX_EXT | X86_CPUID_MWAIT_ECX_BREAKIRQIF0; + /** @todo for now we just expose host's MWAIT C-states, although conceptually + it shall be part of our power management virtualization model */ +#if 0 + /* MWAIT sub C-states */ + pCurLeaf->uEdx = + (0 << 0) /* 0 in C0 */ | + (2 << 4) /* 2 in C1 */ | + (2 << 8) /* 2 in C2 */ | + (2 << 12) /* 2 in C3 */ | + (0 << 16) /* 0 in C4 */ + ; +#endif + } + else + pCurLeaf->uEcx = pCurLeaf->uEdx = 0; + uSubLeaf++; + } + + /* Cpuid 6: Digital Thermal Sensor and Power Management Paramenters. + * Intel: Various stuff. + * AMD: EAX, EBX, EDX - reserved. + * ECX - Bit zero is EffFreq, indicating MSR_0000_00e7 and MSR_0000_00e8 + * present. Same as intel. + * VIA: ?? + * + * We clear everything here for now. + */ + cpumR3CpuIdZeroLeaf(pCpum, 6); + + /* Cpuid 7 + ECX: Structured Extended Feature Flags Enumeration + * EAX: Number of sub leaves. + * EBX+ECX+EDX: Feature flags + * + * We only have documentation for one sub-leaf, so clear all other (no need + * to remove them as such, just set them to zero). + * + * Note! When enabling new features the Synthetic CPU and Portable CPUID + * options may require adjusting (i.e. stripping what was enabled). + */ + uSubLeaf = 0; + while ((pCurLeaf = cpumR3CpuIdGetExactLeaf(pCpum, 7, uSubLeaf)) != NULL) + { + switch (uSubLeaf) + { + case 0: + { + pCurLeaf->uEax = 0; /* Max ECX input is 0. */ + pCurLeaf->uEbx &= 0 + | (pConfig->enmFsGsBase ? X86_CPUID_STEXT_FEATURE_EBX_FSGSBASE : 0) + //| X86_CPUID_STEXT_FEATURE_EBX_TSC_ADJUST RT_BIT(1) + //| X86_CPUID_STEXT_FEATURE_EBX_SGX RT_BIT(2) + //| X86_CPUID_STEXT_FEATURE_EBX_BMI1 RT_BIT(3) + //| X86_CPUID_STEXT_FEATURE_EBX_HLE RT_BIT(4) + | (pConfig->enmAvx2 ? X86_CPUID_STEXT_FEATURE_EBX_AVX2 : 0) + | X86_CPUID_STEXT_FEATURE_EBX_FDP_EXCPTN_ONLY + //| X86_CPUID_STEXT_FEATURE_EBX_SMEP RT_BIT(7) + //| X86_CPUID_STEXT_FEATURE_EBX_BMI2 RT_BIT(8) + //| X86_CPUID_STEXT_FEATURE_EBX_ERMS RT_BIT(9) + | (pConfig->enmInvpcid ? X86_CPUID_STEXT_FEATURE_EBX_INVPCID : 0) + //| X86_CPUID_STEXT_FEATURE_EBX_RTM RT_BIT(11) + //| X86_CPUID_STEXT_FEATURE_EBX_PQM RT_BIT(12) + | X86_CPUID_STEXT_FEATURE_EBX_DEPR_FPU_CS_DS + //| X86_CPUID_STEXT_FEATURE_EBX_MPE RT_BIT(14) + //| X86_CPUID_STEXT_FEATURE_EBX_PQE RT_BIT(15) + //| X86_CPUID_STEXT_FEATURE_EBX_AVX512F RT_BIT(16) + //| RT_BIT(17) - reserved + | (pConfig->enmRdSeed ? X86_CPUID_STEXT_FEATURE_EBX_RDSEED : 0) + //| X86_CPUID_STEXT_FEATURE_EBX_ADX RT_BIT(19) + //| X86_CPUID_STEXT_FEATURE_EBX_SMAP RT_BIT(20) + //| RT_BIT(21) - reserved + //| RT_BIT(22) - reserved + | (pConfig->enmCLFlushOpt ? X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT : 0) + //| RT_BIT(24) - reserved + //| X86_CPUID_STEXT_FEATURE_EBX_INTEL_PT RT_BIT(25) + //| X86_CPUID_STEXT_FEATURE_EBX_AVX512PF RT_BIT(26) + //| X86_CPUID_STEXT_FEATURE_EBX_AVX512ER RT_BIT(27) + //| X86_CPUID_STEXT_FEATURE_EBX_AVX512CD RT_BIT(28) + //| X86_CPUID_STEXT_FEATURE_EBX_SHA RT_BIT(29) + //| RT_BIT(30) - reserved + //| RT_BIT(31) - reserved + ; + pCurLeaf->uEcx &= 0 + //| X86_CPUID_STEXT_FEATURE_ECX_PREFETCHWT1 - we do not do vector functions yet. + ; + pCurLeaf->uEdx &= 0 + //| X86_CPUID_STEXT_FEATURE_EDX_IBRS_IBPB RT_BIT(26) + //| X86_CPUID_STEXT_FEATURE_EDX_STIBP RT_BIT(27) + | (pConfig->enmFlushCmdMsr ? X86_CPUID_STEXT_FEATURE_EDX_FLUSH_CMD : 0) + //| X86_CPUID_STEXT_FEATURE_EDX_ARCHCAP RT_BIT(29) + ; + + /* Mask out INVPCID unless FSGSBASE is exposed due to a bug in Windows 10 SMP guests, see @bugref{9089#c15}. */ + if ( !pVM->cpum.s.GuestFeatures.fFsGsBase + && (pCurLeaf->uEbx & X86_CPUID_STEXT_FEATURE_EBX_INVPCID)) + { + pCurLeaf->uEbx &= ~X86_CPUID_STEXT_FEATURE_EBX_INVPCID; + LogRel(("CPUM: Disabled INVPCID without FSGSBASE to work around buggy guests\n")); + } + + if (pCpum->u8PortableCpuIdLevel > 0) + { + PORTABLE_DISABLE_FEATURE_BIT_CFG(1, pCurLeaf->uEbx, FSGSBASE, X86_CPUID_STEXT_FEATURE_EBX_FSGSBASE, pConfig->enmFsGsBase); + PORTABLE_DISABLE_FEATURE_BIT( 1, pCurLeaf->uEbx, SGX, X86_CPUID_STEXT_FEATURE_EBX_SGX); + PORTABLE_DISABLE_FEATURE_BIT_CFG(1, pCurLeaf->uEbx, AVX2, X86_CPUID_STEXT_FEATURE_EBX_AVX2, pConfig->enmAvx2); + PORTABLE_DISABLE_FEATURE_BIT( 1, pCurLeaf->uEbx, SMEP, X86_CPUID_STEXT_FEATURE_EBX_SMEP); + PORTABLE_DISABLE_FEATURE_BIT( 1, pCurLeaf->uEbx, BMI2, X86_CPUID_STEXT_FEATURE_EBX_BMI2); + PORTABLE_DISABLE_FEATURE_BIT_CFG(1, pCurLeaf->uEbx, INVPCID, X86_CPUID_STEXT_FEATURE_EBX_INVPCID, pConfig->enmInvpcid); + PORTABLE_DISABLE_FEATURE_BIT( 1, pCurLeaf->uEbx, AVX512F, X86_CPUID_STEXT_FEATURE_EBX_AVX512F); + PORTABLE_DISABLE_FEATURE_BIT_CFG(1, pCurLeaf->uEbx, RDSEED, X86_CPUID_STEXT_FEATURE_EBX_RDSEED, pConfig->enmRdSeed); + PORTABLE_DISABLE_FEATURE_BIT_CFG(1, pCurLeaf->uEbx, CLFLUSHOPT, X86_CPUID_STEXT_FEATURE_EBX_RDSEED, pConfig->enmCLFlushOpt); + PORTABLE_DISABLE_FEATURE_BIT( 1, pCurLeaf->uEbx, AVX512PF, X86_CPUID_STEXT_FEATURE_EBX_AVX512PF); + PORTABLE_DISABLE_FEATURE_BIT( 1, pCurLeaf->uEbx, AVX512ER, X86_CPUID_STEXT_FEATURE_EBX_AVX512ER); + PORTABLE_DISABLE_FEATURE_BIT( 1, pCurLeaf->uEbx, AVX512CD, X86_CPUID_STEXT_FEATURE_EBX_AVX512CD); + PORTABLE_DISABLE_FEATURE_BIT( 1, pCurLeaf->uEbx, SMAP, X86_CPUID_STEXT_FEATURE_EBX_SMAP); + PORTABLE_DISABLE_FEATURE_BIT( 1, pCurLeaf->uEbx, SHA, X86_CPUID_STEXT_FEATURE_EBX_SHA); + PORTABLE_DISABLE_FEATURE_BIT( 1, pCurLeaf->uEcx, PREFETCHWT1, X86_CPUID_STEXT_FEATURE_ECX_PREFETCHWT1); + PORTABLE_DISABLE_FEATURE_BIT_CFG(3, pCurLeaf->uEdx, FLUSH_CMD, X86_CPUID_STEXT_FEATURE_EDX_FLUSH_CMD, pConfig->enmFlushCmdMsr); + } + + /* Force standard feature bits. */ + if (pConfig->enmFsGsBase == CPUMISAEXTCFG_ENABLED_ALWAYS) + pCurLeaf->uEbx |= X86_CPUID_STEXT_FEATURE_EBX_FSGSBASE; + if (pConfig->enmAvx2 == CPUMISAEXTCFG_ENABLED_ALWAYS) + pCurLeaf->uEbx |= X86_CPUID_STEXT_FEATURE_EBX_AVX2; + if (pConfig->enmRdSeed == CPUMISAEXTCFG_ENABLED_ALWAYS) + pCurLeaf->uEbx |= X86_CPUID_STEXT_FEATURE_EBX_RDSEED; + if (pConfig->enmCLFlushOpt == CPUMISAEXTCFG_ENABLED_ALWAYS) + pCurLeaf->uEbx |= X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT; + if (pConfig->enmInvpcid == CPUMISAEXTCFG_ENABLED_ALWAYS) + pCurLeaf->uEbx |= X86_CPUID_STEXT_FEATURE_EBX_INVPCID; + if (pConfig->enmFlushCmdMsr == CPUMISAEXTCFG_ENABLED_ALWAYS) + pCurLeaf->uEdx |= X86_CPUID_STEXT_FEATURE_EDX_FLUSH_CMD; + break; + } + + default: + /* Invalid index, all values are zero. */ + pCurLeaf->uEax = 0; + pCurLeaf->uEbx = 0; + pCurLeaf->uEcx = 0; + pCurLeaf->uEdx = 0; + break; + } + uSubLeaf++; + } + + /* Cpuid 8: Marked as reserved by Intel and AMD. + * We zero this since we don't know what it may have been used for. + */ + cpumR3CpuIdZeroLeaf(pCpum, 8); + + /* Cpuid 9: Direct Cache Access (DCA) Parameters + * Intel: EAX - Value of PLATFORM_DCA_CAP bits. + * EBX, ECX, EDX - reserved. + * AMD: Reserved + * VIA: ?? + * + * We zero this. + */ + cpumR3CpuIdZeroLeaf(pCpum, 9); + + /* Cpuid 0xa: Architectural Performance Monitor Features + * Intel: EAX - Value of PLATFORM_DCA_CAP bits. + * EBX, ECX, EDX - reserved. + * AMD: Reserved + * VIA: ?? + * + * We zero this, for now at least. + */ + cpumR3CpuIdZeroLeaf(pCpum, 10); + + /* Cpuid 0xb+ECX: x2APIC Features / Processor Topology. + * Intel: EAX - APCI ID shift right for next level. + * EBX - Factory configured cores/threads at this level. + * ECX - Level number (same as input) and level type (1,2,0). + * EDX - Extended initial APIC ID. + * AMD: Reserved + * VIA: ?? + */ + uSubLeaf = 0; + while ((pCurLeaf = cpumR3CpuIdGetExactLeaf(pCpum, 11, uSubLeaf)) != NULL) + { + if (pCurLeaf->fFlags & CPUMCPUIDLEAF_F_CONTAINS_APIC_ID) + { + uint8_t bLevelType = RT_BYTE2(pCurLeaf->uEcx); + if (bLevelType == 1) + { + /* Thread level - we don't do threads at the moment. */ + pCurLeaf->uEax = 0; /** @todo is this correct? Real CPUs never do 0 here, I think... */ + pCurLeaf->uEbx = 1; + } + else if (bLevelType == 2) + { + /* Core level. */ + pCurLeaf->uEax = 1; /** @todo real CPUs are supposed to be in the 4-6 range, not 1. Our APIC ID assignments are a little special... */ +#ifdef VBOX_WITH_MULTI_CORE + while (RT_BIT_32(pCurLeaf->uEax) < pVM->cCpus) + pCurLeaf->uEax++; +#endif + pCurLeaf->uEbx = pVM->cCpus; + } + else + { + AssertLogRelMsg(bLevelType == 0, ("bLevelType=%#x uSubLeaf=%#x\n", bLevelType, uSubLeaf)); + pCurLeaf->uEax = 0; + pCurLeaf->uEbx = 0; + pCurLeaf->uEcx = 0; + } + pCurLeaf->uEcx = (pCurLeaf->uEcx & UINT32_C(0xffffff00)) | (uSubLeaf & 0xff); + pCurLeaf->uEdx = 0; /* APIC ID is filled in by CPUMGetGuestCpuId() at runtime. Init for EMT(0) as usual. */ + } + else + { + pCurLeaf->uEax = 0; + pCurLeaf->uEbx = 0; + pCurLeaf->uEcx = 0; + pCurLeaf->uEdx = 0; + } + uSubLeaf++; + } + + /* Cpuid 0xc: Marked as reserved by Intel and AMD. + * We zero this since we don't know what it may have been used for. + */ + cpumR3CpuIdZeroLeaf(pCpum, 12); + + /* Cpuid 0xd + ECX: Processor Extended State Enumeration + * ECX=0: EAX - Valid bits in XCR0[31:0]. + * EBX - Maximum state size as per current XCR0 value. + * ECX - Maximum state size for all supported features. + * EDX - Valid bits in XCR0[63:32]. + * ECX=1: EAX - Various X-features. + * EBX - Maximum state size as per current XCR0|IA32_XSS value. + * ECX - Valid bits in IA32_XSS[31:0]. + * EDX - Valid bits in IA32_XSS[63:32]. + * ECX=N, where N in 2..63 and indicates a bit in XCR0 and/or IA32_XSS, + * if the bit invalid all four registers are set to zero. + * EAX - The state size for this feature. + * EBX - The state byte offset of this feature. + * ECX - Bit 0 indicates whether this sub-leaf maps to a valid IA32_XSS bit (=1) or a valid XCR0 bit (=0). + * EDX - Reserved, but is set to zero if invalid sub-leaf index. + * + * Clear them all as we don't currently implement extended CPU state. + */ + /* Figure out the supported XCR0/XSS mask component and make sure CPUID[1].ECX[27] = CR4.OSXSAVE. */ + uint64_t fGuestXcr0Mask = 0; + pStdFeatureLeaf = cpumR3CpuIdGetExactLeaf(pCpum, 1, 0); + if (pStdFeatureLeaf && (pStdFeatureLeaf->uEcx & X86_CPUID_FEATURE_ECX_XSAVE)) + { + fGuestXcr0Mask = XSAVE_C_X87 | XSAVE_C_SSE; + if (pStdFeatureLeaf && (pStdFeatureLeaf->uEcx & X86_CPUID_FEATURE_ECX_AVX)) + fGuestXcr0Mask |= XSAVE_C_YMM; + pCurLeaf = cpumR3CpuIdGetExactLeaf(pCpum, 7, 0); + if (pCurLeaf && (pCurLeaf->uEbx & X86_CPUID_STEXT_FEATURE_EBX_AVX512F)) + fGuestXcr0Mask |= XSAVE_C_ZMM_16HI | XSAVE_C_ZMM_HI256 | XSAVE_C_OPMASK; + fGuestXcr0Mask &= pCpum->fXStateHostMask; + + pStdFeatureLeaf->fFlags |= CPUMCPUIDLEAF_F_CONTAINS_OSXSAVE; + } + pStdFeatureLeaf = NULL; + pCpum->fXStateGuestMask = fGuestXcr0Mask; + + /* Work the sub-leaves. */ + uint32_t cbXSaveMaxActual = CPUM_MIN_XSAVE_AREA_SIZE; + uint32_t cbXSaveMaxReport = CPUM_MIN_XSAVE_AREA_SIZE; + for (uSubLeaf = 0; uSubLeaf < 63; uSubLeaf++) + { + pCurLeaf = cpumR3CpuIdGetExactLeaf(pCpum, 13, uSubLeaf); + if (pCurLeaf) + { + if (fGuestXcr0Mask) + { + switch (uSubLeaf) + { + case 0: + pCurLeaf->uEax &= RT_LO_U32(fGuestXcr0Mask); + pCurLeaf->uEdx &= RT_HI_U32(fGuestXcr0Mask); + AssertLogRelMsgReturn((pCurLeaf->uEax & (XSAVE_C_X87 | XSAVE_C_SSE)) == (XSAVE_C_X87 | XSAVE_C_SSE), + ("CPUID(0xd/0).EAX missing mandatory X87 or SSE bits: %#RX32", pCurLeaf->uEax), + VERR_CPUM_IPE_1); + cbXSaveMaxActual = pCurLeaf->uEcx; + AssertLogRelMsgReturn(cbXSaveMaxActual <= CPUM_MAX_XSAVE_AREA_SIZE && cbXSaveMaxActual >= CPUM_MIN_XSAVE_AREA_SIZE, + ("%#x max=%#x\n", cbXSaveMaxActual, CPUM_MAX_XSAVE_AREA_SIZE), VERR_CPUM_IPE_2); + AssertLogRelMsgReturn(pCurLeaf->uEbx >= CPUM_MIN_XSAVE_AREA_SIZE && pCurLeaf->uEbx <= cbXSaveMaxActual, + ("ebx=%#x cbXSaveMaxActual=%#x\n", pCurLeaf->uEbx, cbXSaveMaxActual), + VERR_CPUM_IPE_2); + continue; + case 1: + pCurLeaf->uEax &= 0; + pCurLeaf->uEcx &= 0; + pCurLeaf->uEdx &= 0; + /** @todo what about checking ebx? */ + continue; + default: + if (fGuestXcr0Mask & RT_BIT_64(uSubLeaf)) + { + AssertLogRelMsgReturn( pCurLeaf->uEax <= cbXSaveMaxActual + && pCurLeaf->uEax > 0 + && pCurLeaf->uEbx < cbXSaveMaxActual + && pCurLeaf->uEbx >= CPUM_MIN_XSAVE_AREA_SIZE + && pCurLeaf->uEbx + pCurLeaf->uEax <= cbXSaveMaxActual, + ("%#x: eax=%#x ebx=%#x cbMax=%#x\n", + uSubLeaf, pCurLeaf->uEax, pCurLeaf->uEbx, cbXSaveMaxActual), + VERR_CPUM_IPE_2); + AssertLogRel(!(pCurLeaf->uEcx & 1)); + pCurLeaf->uEcx = 0; /* Bit 0 should be zero (XCR0), the reset are reserved... */ + pCurLeaf->uEdx = 0; /* it's reserved... */ + if (pCurLeaf->uEbx + pCurLeaf->uEax > cbXSaveMaxReport) + cbXSaveMaxReport = pCurLeaf->uEbx + pCurLeaf->uEax; + continue; + } + break; + } + } + + /* Clear the leaf. */ + pCurLeaf->uEax = 0; + pCurLeaf->uEbx = 0; + pCurLeaf->uEcx = 0; + pCurLeaf->uEdx = 0; + } + } + + /* Update the max and current feature sizes to shut up annoying Linux kernels. */ + if (cbXSaveMaxReport != cbXSaveMaxActual && fGuestXcr0Mask) + { + pCurLeaf = cpumR3CpuIdGetExactLeaf(pCpum, 13, 0); + if (pCurLeaf) + { + LogRel(("CPUM: Changing leaf 13[0]: EBX=%#RX32 -> %#RX32, ECX=%#RX32 -> %#RX32\n", + pCurLeaf->uEbx, cbXSaveMaxReport, pCurLeaf->uEcx, cbXSaveMaxReport)); + pCurLeaf->uEbx = cbXSaveMaxReport; + pCurLeaf->uEcx = cbXSaveMaxReport; + } + } + + /* Cpuid 0xe: Marked as reserved by Intel and AMD. + * We zero this since we don't know what it may have been used for. + */ + cpumR3CpuIdZeroLeaf(pCpum, 14); + + /* Cpuid 0xf + ECX: Platform quality of service monitoring (PQM), + * also known as Intel Resource Director Technology (RDT) Monitoring + * We zero this as we don't currently virtualize PQM. + */ + cpumR3CpuIdZeroLeaf(pCpum, 15); + + /* Cpuid 0x10 + ECX: Platform quality of service enforcement (PQE), + * also known as Intel Resource Director Technology (RDT) Allocation + * We zero this as we don't currently virtualize PQE. + */ + cpumR3CpuIdZeroLeaf(pCpum, 16); + + /* Cpuid 0x11: Marked as reserved by Intel and AMD. + * We zero this since we don't know what it may have been used for. + */ + cpumR3CpuIdZeroLeaf(pCpum, 17); + + /* Cpuid 0x12 + ECX: SGX resource enumeration. + * We zero this as we don't currently virtualize this. + */ + cpumR3CpuIdZeroLeaf(pCpum, 18); + + /* Cpuid 0x13: Marked as reserved by Intel and AMD. + * We zero this since we don't know what it may have been used for. + */ + cpumR3CpuIdZeroLeaf(pCpum, 19); + + /* Cpuid 0x14 + ECX: Processor Trace (PT) capability enumeration. + * We zero this as we don't currently virtualize this. + */ + cpumR3CpuIdZeroLeaf(pCpum, 20); + + /* Cpuid 0x15: Timestamp Counter / Core Crystal Clock info. + * Intel: uTscFrequency = uCoreCrystalClockFrequency * EBX / EAX. + * EAX - denominator (unsigned). + * EBX - numerator (unsigned). + * ECX, EDX - reserved. + * AMD: Reserved / undefined / not implemented. + * VIA: Reserved / undefined / not implemented. + * We zero this as we don't currently virtualize this. + */ + cpumR3CpuIdZeroLeaf(pCpum, 21); + + /* Cpuid 0x16: Processor frequency info + * Intel: EAX - Core base frequency in MHz. + * EBX - Core maximum frequency in MHz. + * ECX - Bus (reference) frequency in MHz. + * EDX - Reserved. + * AMD: Reserved / undefined / not implemented. + * VIA: Reserved / undefined / not implemented. + * We zero this as we don't currently virtualize this. + */ + cpumR3CpuIdZeroLeaf(pCpum, 22); + + /* Cpuid 0x17..0x10000000: Unknown. + * We don't know these and what they mean, so remove them. */ + cpumR3CpuIdRemoveRange(pCpum->GuestInfo.paCpuIdLeavesR3, &pCpum->GuestInfo.cCpuIdLeaves, + UINT32_C(0x00000017), UINT32_C(0x0fffffff)); + + + /* CpuId 0x40000000..0x4fffffff: Reserved for hypervisor/emulator. + * We remove all these as we're a hypervisor and must provide our own. + */ + cpumR3CpuIdRemoveRange(pCpum->GuestInfo.paCpuIdLeavesR3, &pCpum->GuestInfo.cCpuIdLeaves, + UINT32_C(0x40000000), UINT32_C(0x4fffffff)); + + + /* Cpuid 0x80000000 is harmless. */ + + /* Cpuid 0x80000001 is handled with cpuid 1 way up above. */ + + /* Cpuid 0x80000002...0x80000004 contains the processor name and is considered harmless. */ + + /* Cpuid 0x800000005 & 0x800000006 contain information about L1, L2 & L3 cache and TLB identifiers. + * Safe to pass on to the guest. + * + * AMD: 0x800000005 L1 cache information + * 0x800000006 L2/L3 cache information + * Intel: 0x800000005 reserved + * 0x800000006 L2 cache information + * VIA: 0x800000005 TLB and L1 cache information + * 0x800000006 L2 cache information + */ + + /* Cpuid 0x800000007: Advanced Power Management Information. + * AMD: EAX: Processor feedback capabilities. + * EBX: RAS capabilites. + * ECX: Advanced power monitoring interface. + * EDX: Enhanced power management capabilities. + * Intel: EAX, EBX, ECX - reserved. + * EDX - Invariant TSC indicator supported (bit 8), the rest is reserved. + * VIA: Reserved + * We let the guest see EDX_TSCINVAR (and later maybe EDX_EFRO). Actually, we should set EDX_TSCINVAR. + */ + uSubLeaf = 0; + while ((pCurLeaf = cpumR3CpuIdGetExactLeaf(pCpum, UINT32_C(0x80000007), uSubLeaf)) != NULL) + { + pCurLeaf->uEax = pCurLeaf->uEbx = pCurLeaf->uEcx = 0; + if (pCpum->GuestFeatures.enmCpuVendor == CPUMCPUVENDOR_AMD) + { + /* + * Older 64-bit linux kernels blindly assume that the AMD performance counters work + * if X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR is set, see @bugref{7243#c85}. Exposing this + * bit is now configurable. + */ + pCurLeaf->uEdx &= 0 + //| X86_CPUID_AMD_ADVPOWER_EDX_TS + //| X86_CPUID_AMD_ADVPOWER_EDX_FID + //| X86_CPUID_AMD_ADVPOWER_EDX_VID + //| X86_CPUID_AMD_ADVPOWER_EDX_TTP + //| X86_CPUID_AMD_ADVPOWER_EDX_TM + //| X86_CPUID_AMD_ADVPOWER_EDX_STC + //| X86_CPUID_AMD_ADVPOWER_EDX_MC + //| X86_CPUID_AMD_ADVPOWER_EDX_HWPSTATE + | X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR + //| X86_CPUID_AMD_ADVPOWER_EDX_CPB RT_BIT(9) + //| X86_CPUID_AMD_ADVPOWER_EDX_EFRO RT_BIT(10) + //| X86_CPUID_AMD_ADVPOWER_EDX_PFI RT_BIT(11) + //| X86_CPUID_AMD_ADVPOWER_EDX_PA RT_BIT(12) + | 0; + } + else + pCurLeaf->uEdx &= X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR; + if (!pConfig->fInvariantTsc) + pCurLeaf->uEdx &= ~X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR; + uSubLeaf++; + } + + /* Cpuid 0x80000008: + * AMD: EBX, EDX - reserved + * EAX: Virtual/Physical/Guest address Size + * ECX: Number of cores + APICIdCoreIdSize + * Intel: EAX: Virtual/Physical address Size + * EBX, ECX, EDX - reserved + * VIA: EAX: Virtual/Physical address Size + * EBX, ECX, EDX - reserved + * + * We only expose the virtual+pysical address size to the guest atm. + * On AMD we set the core count, but not the apic id stuff as we're + * currently not doing the apic id assignments in a complatible manner. + */ + uSubLeaf = 0; + while ((pCurLeaf = cpumR3CpuIdGetExactLeaf(pCpum, UINT32_C(0x80000008), uSubLeaf)) != NULL) + { + pCurLeaf->uEax &= UINT32_C(0x0000ffff); /* Virtual & physical address sizes only. */ + pCurLeaf->uEbx = 0; /* reserved - [12] == IBPB */ + pCurLeaf->uEdx = 0; /* reserved */ + + /* Set APICIdCoreIdSize to zero (use legacy method to determine the number of cores per cpu). + * Set core count to 0, indicating 1 core. Adjust if we're in multi core mode on AMD. */ + pCurLeaf->uEcx = 0; +#ifdef VBOX_WITH_MULTI_CORE + if ( pVM->cCpus > 1 + && pCpum->GuestFeatures.enmCpuVendor == CPUMCPUVENDOR_AMD) + pCurLeaf->uEcx |= (pVM->cCpus - 1) & UINT32_C(0xff); +#endif + uSubLeaf++; + } + + /* Cpuid 0x80000009: Reserved + * We zero this since we don't know what it may have been used for. + */ + cpumR3CpuIdZeroLeaf(pCpum, UINT32_C(0x80000009)); + + /* Cpuid 0x8000000a: SVM Information + * AMD: EAX - SVM revision. + * EBX - Number of ASIDs. + * ECX - Reserved. + * EDX - SVM Feature identification. + */ + pExtFeatureLeaf = cpumR3CpuIdGetExactLeaf(pCpum, UINT32_C(0x80000001), 0); + if (pExtFeatureLeaf->uEcx & X86_CPUID_AMD_FEATURE_ECX_SVM) + { + PCPUMCPUIDLEAF pSvmFeatureLeaf = cpumR3CpuIdGetExactLeaf(pCpum, 0x8000000a, 0); + pSvmFeatureLeaf->uEax = 0x1; + pSvmFeatureLeaf->uEbx = 0x8000; /** @todo figure out virtual NASID. */ + pSvmFeatureLeaf->uEcx = 0; + pSvmFeatureLeaf->uEdx &= ( X86_CPUID_SVM_FEATURE_EDX_NRIP_SAVE /** @todo Support other SVM features */ + | X86_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID + | X86_CPUID_SVM_FEATURE_EDX_DECODE_ASSISTS); + } + else + cpumR3CpuIdZeroLeaf(pCpum, UINT32_C(0x8000000a)); + + /* Cpuid 0x8000000b thru 0x80000018: Reserved + * We clear these as we don't know what purpose they might have. */ + for (uint32_t uLeaf = UINT32_C(0x8000000b); uLeaf <= UINT32_C(0x80000018); uLeaf++) + cpumR3CpuIdZeroLeaf(pCpum, uLeaf); + + /* Cpuid 0x80000019: TLB configuration + * Seems to be harmless, pass them thru as is. */ + + /* Cpuid 0x8000001a: Peformance optimization identifiers. + * Strip anything we don't know what is or addresses feature we don't implement. */ + uSubLeaf = 0; + while ((pCurLeaf = cpumR3CpuIdGetExactLeaf(pCpum, UINT32_C(0x8000001a), uSubLeaf)) != NULL) + { + pCurLeaf->uEax &= RT_BIT_32(0) /* FP128 - use 1x128-bit instead of 2x64-bit. */ + | RT_BIT_32(1) /* MOVU - Prefere unaligned MOV over MOVL + MOVH. */ + //| RT_BIT_32(2) /* FP256 - use 1x256-bit instead of 2x128-bit. */ + ; + pCurLeaf->uEbx = 0; /* reserved */ + pCurLeaf->uEcx = 0; /* reserved */ + pCurLeaf->uEdx = 0; /* reserved */ + uSubLeaf++; + } + + /* Cpuid 0x8000001b: Instruct based sampling (IBS) information. + * Clear this as we don't currently virtualize this feature. */ + cpumR3CpuIdZeroLeaf(pCpum, UINT32_C(0x8000001b)); + + /* Cpuid 0x8000001c: Lightweight profiling (LWP) information. + * Clear this as we don't currently virtualize this feature. */ + cpumR3CpuIdZeroLeaf(pCpum, UINT32_C(0x8000001c)); + + /* Cpuid 0x8000001d+ECX: Get cache configuration descriptors. + * We need to sanitize the cores per cache (EAX[25:14]). + * + * This is very much the same as Intel's CPUID(4) leaf, except EAX[31:26] + * and EDX[2] are reserved here, and EAX[14:25] is documented having a + * slightly different meaning. + */ + uSubLeaf = 0; + while ((pCurLeaf = cpumR3CpuIdGetExactLeaf(pCpum, UINT32_C(0x8000001d), uSubLeaf)) != NULL) + { +#ifdef VBOX_WITH_MULTI_CORE + uint32_t cCores = ((pCurLeaf->uEax >> 14) & 0xfff) + 1; + if (cCores > pVM->cCpus) + cCores = pVM->cCpus; + pCurLeaf->uEax &= UINT32_C(0x00003fff); + pCurLeaf->uEax |= ((cCores - 1) & 0xfff) << 14; +#else + pCurLeaf->uEax &= UINT32_C(0x00003fff); +#endif + uSubLeaf++; + } + + /* Cpuid 0x8000001e: Get APIC / unit / node information. + * If AMD, we configure it for our layout (on EMT(0)). In the multi-core + * setup, we have one compute unit with all the cores in it. Single node. + */ + uSubLeaf = 0; + while ((pCurLeaf = cpumR3CpuIdGetExactLeaf(pCpum, UINT32_C(0x8000001e), uSubLeaf)) != NULL) + { + pCurLeaf->uEax = 0; /* Extended APIC ID = EMT(0).idApic (== 0). */ + if (pCurLeaf->fFlags & CPUMCPUIDLEAF_F_CONTAINS_APIC_ID) + { +#ifdef VBOX_WITH_MULTI_CORE + pCurLeaf->uEbx = pVM->cCpus < 0x100 + ? (pVM->cCpus - 1) << 8 : UINT32_C(0x0000ff00); /* Compute unit ID 0, core per unit. */ +#else + pCurLeaf->uEbx = 0; /* Compute unit ID 0, 1 core per unit. */ +#endif + pCurLeaf->uEcx = 0; /* Node ID 0, 1 node per CPU. */ + } + else + { + Assert(pCpum->GuestFeatures.enmCpuVendor != CPUMCPUVENDOR_AMD); + pCurLeaf->uEbx = 0; /* Reserved. */ + pCurLeaf->uEcx = 0; /* Reserved. */ + } + pCurLeaf->uEdx = 0; /* Reserved. */ + uSubLeaf++; + } + + /* Cpuid 0x8000001f...0x8ffffffd: Unknown. + * We don't know these and what they mean, so remove them. */ + cpumR3CpuIdRemoveRange(pCpum->GuestInfo.paCpuIdLeavesR3, &pCpum->GuestInfo.cCpuIdLeaves, + UINT32_C(0x8000001f), UINT32_C(0x8ffffffd)); + + /* Cpuid 0x8ffffffe: Mystery AMD K6 leaf. + * Just pass it thru for now. */ + + /* Cpuid 0x8fffffff: Mystery hammer time leaf! + * Just pass it thru for now. */ + + /* Cpuid 0xc0000000: Centaur stuff. + * Harmless, pass it thru. */ + + /* Cpuid 0xc0000001: Centaur features. + * VIA: EAX - Family, model, stepping. + * EDX - Centaur extended feature flags. Nothing interesting, except may + * FEMMS (bit 5), but VIA marks it as 'reserved', so never mind. + * EBX, ECX - reserved. + * We keep EAX but strips the rest. + */ + uSubLeaf = 0; + while ((pCurLeaf = cpumR3CpuIdGetExactLeaf(pCpum, UINT32_C(0xc0000001), uSubLeaf)) != NULL) + { + pCurLeaf->uEbx = 0; + pCurLeaf->uEcx = 0; + pCurLeaf->uEdx = 0; /* Bits 0 thru 9 are documented on sandpil.org, but we don't want them, except maybe 5 (FEMMS). */ + uSubLeaf++; + } + + /* Cpuid 0xc0000002: Old Centaur Current Performance Data. + * We only have fixed stale values, but should be harmless. */ + + /* Cpuid 0xc0000003: Reserved. + * We zero this since we don't know what it may have been used for. + */ + cpumR3CpuIdZeroLeaf(pCpum, UINT32_C(0xc0000003)); + + /* Cpuid 0xc0000004: Centaur Performance Info. + * We only have fixed stale values, but should be harmless. */ + + + /* Cpuid 0xc0000005...0xcfffffff: Unknown. + * We don't know these and what they mean, so remove them. */ + cpumR3CpuIdRemoveRange(pCpum->GuestInfo.paCpuIdLeavesR3, &pCpum->GuestInfo.cCpuIdLeaves, + UINT32_C(0xc0000005), UINT32_C(0xcfffffff)); + + return VINF_SUCCESS; +#undef PORTABLE_DISABLE_FEATURE_BIT +#undef PORTABLE_CLEAR_BITS_WHEN +} + + +/** + * Reads a value in /CPUM/IsaExts/ node. + * + * @returns VBox status code (error message raised). + * @param pVM The cross context VM structure. (For errors.) + * @param pIsaExts The /CPUM/IsaExts node (can be NULL). + * @param pszValueName The value / extension name. + * @param penmValue Where to return the choice. + * @param enmDefault The default choice. + */ +static int cpumR3CpuIdReadIsaExtCfg(PVM pVM, PCFGMNODE pIsaExts, const char *pszValueName, + CPUMISAEXTCFG *penmValue, CPUMISAEXTCFG enmDefault) +{ + /* + * Try integer encoding first. + */ + uint64_t uValue; + int rc = CFGMR3QueryInteger(pIsaExts, pszValueName, &uValue); + if (RT_SUCCESS(rc)) + switch (uValue) + { + case 0: *penmValue = CPUMISAEXTCFG_DISABLED; break; + case 1: *penmValue = CPUMISAEXTCFG_ENABLED_SUPPORTED; break; + case 2: *penmValue = CPUMISAEXTCFG_ENABLED_ALWAYS; break; + case 9: *penmValue = CPUMISAEXTCFG_ENABLED_PORTABLE; break; + default: + return VMSetError(pVM, VERR_CPUM_INVALID_CONFIG_VALUE, RT_SRC_POS, + "Invalid config value for '/CPUM/IsaExts/%s': %llu (expected 0/'disabled', 1/'enabled', 2/'portable', or 9/'forced')", + pszValueName, uValue); + } + /* + * If missing, use default. + */ + else if (rc == VERR_CFGM_VALUE_NOT_FOUND || rc == VERR_CFGM_NO_PARENT) + *penmValue = enmDefault; + else + { + if (rc == VERR_CFGM_NOT_INTEGER) + { + /* + * Not an integer, try read it as a string. + */ + char szValue[32]; + rc = CFGMR3QueryString(pIsaExts, pszValueName, szValue, sizeof(szValue)); + if (RT_SUCCESS(rc)) + { + RTStrToLower(szValue); + size_t cchValue = strlen(szValue); +#define EQ(a_str) (cchValue == sizeof(a_str) - 1U && memcmp(szValue, a_str, sizeof(a_str) - 1)) + if ( EQ("disabled") || EQ("disable") || EQ("off") || EQ("no")) + *penmValue = CPUMISAEXTCFG_DISABLED; + else if (EQ("enabled") || EQ("enable") || EQ("on") || EQ("yes")) + *penmValue = CPUMISAEXTCFG_ENABLED_SUPPORTED; + else if (EQ("forced") || EQ("force") || EQ("always")) + *penmValue = CPUMISAEXTCFG_ENABLED_ALWAYS; + else if (EQ("portable")) + *penmValue = CPUMISAEXTCFG_ENABLED_PORTABLE; + else if (EQ("default") || EQ("def")) + *penmValue = enmDefault; + else + return VMSetError(pVM, VERR_CPUM_INVALID_CONFIG_VALUE, RT_SRC_POS, + "Invalid config value for '/CPUM/IsaExts/%s': '%s' (expected 0/'disabled', 1/'enabled', 2/'portable', or 9/'forced')", + pszValueName, uValue); +#undef EQ + } + } + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, "Error reading config value '/CPUM/IsaExts/%s': %Rrc", pszValueName, rc); + } + return VINF_SUCCESS; +} + + +/** + * Reads a value in /CPUM/IsaExts/ node, forcing it to DISABLED if wanted. + * + * @returns VBox status code (error message raised). + * @param pVM The cross context VM structure. (For errors.) + * @param pIsaExts The /CPUM/IsaExts node (can be NULL). + * @param pszValueName The value / extension name. + * @param penmValue Where to return the choice. + * @param enmDefault The default choice. + * @param fAllowed Allowed choice. Applied both to the result and to + * the default value. + */ +static int cpumR3CpuIdReadIsaExtCfgEx(PVM pVM, PCFGMNODE pIsaExts, const char *pszValueName, + CPUMISAEXTCFG *penmValue, CPUMISAEXTCFG enmDefault, bool fAllowed) +{ + int rc; + if (fAllowed) + rc = cpumR3CpuIdReadIsaExtCfg(pVM, pIsaExts, pszValueName, penmValue, enmDefault); + else + { + rc = cpumR3CpuIdReadIsaExtCfg(pVM, pIsaExts, pszValueName, penmValue, false /*enmDefault*/); + if (RT_SUCCESS(rc) && *penmValue == CPUMISAEXTCFG_ENABLED_ALWAYS) + LogRel(("CPUM: Ignoring forced '%s'\n", pszValueName)); + *penmValue = CPUMISAEXTCFG_DISABLED; + } + return rc; +} + + +/** + * Reads a value in /CPUM/IsaExts/ node that used to be located in /CPUM/. + * + * @returns VBox status code (error message raised). + * @param pVM The cross context VM structure. (For errors.) + * @param pIsaExts The /CPUM/IsaExts node (can be NULL). + * @param pCpumCfg The /CPUM node (can be NULL). + * @param pszValueName The value / extension name. + * @param penmValue Where to return the choice. + * @param enmDefault The default choice. + */ +static int cpumR3CpuIdReadIsaExtCfgLegacy(PVM pVM, PCFGMNODE pIsaExts, PCFGMNODE pCpumCfg, const char *pszValueName, + CPUMISAEXTCFG *penmValue, CPUMISAEXTCFG enmDefault) +{ + if (CFGMR3Exists(pCpumCfg, pszValueName)) + { + if (!CFGMR3Exists(pIsaExts, pszValueName)) + LogRel(("Warning: /CPUM/%s is deprecated, use /CPUM/IsaExts/%s instead.\n", pszValueName, pszValueName)); + else + return VMSetError(pVM, VERR_DUPLICATE, RT_SRC_POS, + "Duplicate config values '/CPUM/%s' and '/CPUM/IsaExts/%s' - please remove the former!", + pszValueName, pszValueName); + + bool fLegacy; + int rc = CFGMR3QueryBoolDef(pCpumCfg, pszValueName, &fLegacy, enmDefault != CPUMISAEXTCFG_DISABLED); + if (RT_SUCCESS(rc)) + { + *penmValue = fLegacy; + return VINF_SUCCESS; + } + return VMSetError(pVM, VERR_DUPLICATE, RT_SRC_POS, "Error querying '/CPUM/%s': %Rrc", pszValueName, rc); + } + + return cpumR3CpuIdReadIsaExtCfg(pVM, pIsaExts, pszValueName, penmValue, enmDefault); +} + + +static int cpumR3CpuIdReadConfig(PVM pVM, PCPUMCPUIDCONFIG pConfig, PCFGMNODE pCpumCfg, bool fNestedPagingAndFullGuestExec) +{ + int rc; + + /** @cfgm{/CPUM/PortableCpuIdLevel, 8-bit, 0, 3, 0} + * When non-zero CPUID features that could cause portability issues will be + * stripped. The higher the value the more features gets stripped. Higher + * values should only be used when older CPUs are involved since it may + * harm performance and maybe also cause problems with specific guests. */ + rc = CFGMR3QueryU8Def(pCpumCfg, "PortableCpuIdLevel", &pVM->cpum.s.u8PortableCpuIdLevel, 0); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/GuestCpuName, string} + * The name of the CPU we're to emulate. The default is the host CPU. + * Note! CPUs other than "host" one is currently unsupported. */ + rc = CFGMR3QueryStringDef(pCpumCfg, "GuestCpuName", pConfig->szCpuName, sizeof(pConfig->szCpuName), "host"); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/NT4LeafLimit, boolean, false} + * Limit the number of standard CPUID leaves to 0..3 to prevent NT4 from + * bugchecking with MULTIPROCESSOR_CONFIGURATION_NOT_SUPPORTED (0x3e). + * This option corresponds somewhat to IA32_MISC_ENABLES.BOOT_NT4[bit 22]. + */ + rc = CFGMR3QueryBoolDef(pCpumCfg, "NT4LeafLimit", &pConfig->fNt4LeafLimit, false); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/InvariantTsc, boolean, true} + * Pass-through the invariant TSC flag in 0x80000007 if available on the host + * CPU. On AMD CPUs, users may wish to suppress it to avoid trouble from older + * 64-bit linux guests which assume the presence of AMD performance counters + * that we do not virtualize. + */ + rc = CFGMR3QueryBoolDef(pCpumCfg, "InvariantTsc", &pConfig->fInvariantTsc, true); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/ForceVme, boolean, false} + * Always expose the VME (Virtual-8086 Mode Extensions) capability if true. + * By default the flag is passed thru as is from the host CPU, except + * on AMD Ryzen CPUs where it's masked to avoid trouble with XP/Server 2003 + * guests and DOS boxes in general. + */ + rc = CFGMR3QueryBoolDef(pCpumCfg, "ForceVme", &pConfig->fForceVme, false); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/MaxIntelFamilyModelStep, uint32_t, UINT32_MAX} + * Restrict the reported CPU family+model+stepping of intel CPUs. This is + * probably going to be a temporary hack, so don't depend on this. + * The 1st byte of the value is the stepping, the 2nd byte value is the model + * number and the 3rd byte value is the family, and the 4th value must be zero. + */ + rc = CFGMR3QueryU32Def(pCpumCfg, "MaxIntelFamilyModelStep", &pConfig->uMaxIntelFamilyModelStep, UINT32_MAX); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/MaxStdLeaf, uint32_t, 0x00000016} + * The last standard leaf to keep. The actual last value that is stored in EAX + * is RT_MAX(CPUID[0].EAX,/CPUM/MaxStdLeaf). Leaves beyond the max leaf are + * removed. (This works independently of and differently from NT4LeafLimit.) + * The default is usually set to what we're able to reasonably sanitize. + */ + rc = CFGMR3QueryU32Def(pCpumCfg, "MaxStdLeaf", &pConfig->uMaxStdLeaf, UINT32_C(0x00000016)); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/MaxExtLeaf, uint32_t, 0x8000001e} + * The last extended leaf to keep. The actual last value that is stored in EAX + * is RT_MAX(CPUID[0x80000000].EAX,/CPUM/MaxStdLeaf). Leaves beyond the max + * leaf are removed. The default is set to what we're able to sanitize. + */ + rc = CFGMR3QueryU32Def(pCpumCfg, "MaxExtLeaf", &pConfig->uMaxExtLeaf, UINT32_C(0x8000001e)); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/MaxCentaurLeaf, uint32_t, 0xc0000004} + * The last extended leaf to keep. The actual last value that is stored in EAX + * is RT_MAX(CPUID[0xc0000000].EAX,/CPUM/MaxCentaurLeaf). Leaves beyond the max + * leaf are removed. The default is set to what we're able to sanitize. + */ + rc = CFGMR3QueryU32Def(pCpumCfg, "MaxCentaurLeaf", &pConfig->uMaxCentaurLeaf, UINT32_C(0xc0000004)); + AssertLogRelRCReturn(rc, rc); + + bool fQueryNestedHwvirt = false; +#ifdef VBOX_WITH_NESTED_HWVIRT_SVM + fQueryNestedHwvirt |= RT_BOOL(pVM->cpum.s.HostFeatures.enmCpuVendor == CPUMCPUVENDOR_AMD); +#endif +#ifdef VBOX_WITH_NESTED_HWVIRT_VMX + fQueryNestedHwvirt |= RT_BOOL( pVM->cpum.s.HostFeatures.enmCpuVendor == CPUMCPUVENDOR_INTEL + || pVM->cpum.s.HostFeatures.enmCpuVendor == CPUMCPUVENDOR_VIA); +#endif + if (fQueryNestedHwvirt) + { + /** @cfgm{/CPUM/NestedHWVirt, bool, false} + * Whether to expose the hardware virtualization (VMX/SVM) feature to the guest. + * The default is false, and when enabled requires a 64-bit CPU with support for + * nested-paging and AMD-V or unrestricted guest mode. + */ + rc = CFGMR3QueryBoolDef(pCpumCfg, "NestedHWVirt", &pConfig->fNestedHWVirt, false); + AssertLogRelRCReturn(rc, rc); + if ( pConfig->fNestedHWVirt + && !fNestedPagingAndFullGuestExec) + return VMSetError(pVM, VERR_CPUM_INVALID_HWVIRT_CONFIG, RT_SRC_POS, + "Cannot enable nested VT-x/AMD-V without nested-paging and unresricted guest execution!\n"); + + /** @todo Think about enabling this later with NEM/KVM. */ + if ( pConfig->fNestedHWVirt + && VM_IS_NEM_ENABLED(pVM)) + { + LogRel(("CPUM: WARNING! Can't turn on nested VT-x/AMD-V when NEM is used!\n")); + pConfig->fNestedHWVirt = false; + } + +#if HC_ARCH_BITS == 32 + /* We don't support nested hardware virtualization on 32-bit hosts. */ + if (pConfig->fNestedHWVirt) + return VMSetError(pVM, VERR_CPUM_INVALID_HWVIRT_CONFIG, RT_SRC_POS, + "Cannot enable nested VT-x/AMD-V on a 32-bit host\n"); +#endif + } + + /* + * Instruction Set Architecture (ISA) Extensions. + */ + PCFGMNODE pIsaExts = CFGMR3GetChild(pCpumCfg, "IsaExts"); + if (pIsaExts) + { + rc = CFGMR3ValidateConfig(pIsaExts, "/CPUM/IsaExts/", + "CMPXCHG16B" + "|MONITOR" + "|MWaitExtensions" + "|SSE4.1" + "|SSE4.2" + "|XSAVE" + "|AVX" + "|AVX2" + "|AESNI" + "|PCLMUL" + "|POPCNT" + "|MOVBE" + "|RDRAND" + "|RDSEED" + "|CLFLUSHOPT" + "|FSGSBASE" + "|PCID" + "|INVPCID" + "|FlushCmdMsr" + "|ABM" + "|SSE4A" + "|MISALNSSE" + "|3DNOWPRF" + "|AXMMX" + , "" /*pszValidNodes*/, "CPUM" /*pszWho*/, 0 /*uInstance*/); + if (RT_FAILURE(rc)) + return rc; + } + + /** @cfgm{/CPUM/IsaExts/CMPXCHG16B, boolean, depends} + * Expose CMPXCHG16B to the guest if supported by the host. For the time + * being the default is to only do this for VMs with nested paging and AMD-V or + * unrestricted guest mode. + */ + rc = cpumR3CpuIdReadIsaExtCfgLegacy(pVM, pIsaExts, pCpumCfg, "CMPXCHG16B", &pConfig->enmCmpXchg16b, fNestedPagingAndFullGuestExec); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/IsaExts/MONITOR, boolean, true} + * Expose MONITOR/MWAIT instructions to the guest. + */ + rc = cpumR3CpuIdReadIsaExtCfgLegacy(pVM, pIsaExts, pCpumCfg, "MONITOR", &pConfig->enmMonitor, true); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/IsaExts/MWaitExtensions, boolean, false} + * Expose MWAIT extended features to the guest. For now we expose just MWAIT + * break on interrupt feature (bit 1). + */ + rc = cpumR3CpuIdReadIsaExtCfgLegacy(pVM, pIsaExts, pCpumCfg, "MWaitExtensions", &pConfig->enmMWaitExtensions, false); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/IsaExts/SSE4.1, boolean, true} + * Expose SSE4.1 to the guest if available. + */ + rc = cpumR3CpuIdReadIsaExtCfgLegacy(pVM, pIsaExts, pCpumCfg, "SSE4.1", &pConfig->enmSse41, true); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/IsaExts/SSE4.2, boolean, true} + * Expose SSE4.2 to the guest if available. + */ + rc = cpumR3CpuIdReadIsaExtCfgLegacy(pVM, pIsaExts, pCpumCfg, "SSE4.2", &pConfig->enmSse42, true); + AssertLogRelRCReturn(rc, rc); + + bool const fMayHaveXSave = fNestedPagingAndFullGuestExec + && pVM->cpum.s.HostFeatures.fXSaveRstor + && pVM->cpum.s.HostFeatures.fOpSysXSaveRstor +#if HC_ARCH_BITS == 32 /* Seems this may be broken when doing 64-bit on 32-bit, just disable it for now. */ + && ( !HMIsLongModeAllowed(pVM) + || NEMHCIsLongModeAllowed(pVM)) +#endif + ; + uint64_t const fXStateHostMask = pVM->cpum.s.fXStateHostMask; + + /** @cfgm{/CPUM/IsaExts/XSAVE, boolean, depends} + * Expose XSAVE/XRSTOR to the guest if available. For the time being the + * default is to only expose this to VMs with nested paging and AMD-V or + * unrestricted guest execution mode. Not possible to force this one without + * host support at the moment. + */ + rc = cpumR3CpuIdReadIsaExtCfgEx(pVM, pIsaExts, "XSAVE", &pConfig->enmXSave, fNestedPagingAndFullGuestExec, + fMayHaveXSave /*fAllowed*/); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/IsaExts/AVX, boolean, depends} + * Expose the AVX instruction set extensions to the guest if available and + * XSAVE is exposed too. For the time being the default is to only expose this + * to VMs with nested paging and AMD-V or unrestricted guest execution mode. + */ + rc = cpumR3CpuIdReadIsaExtCfgEx(pVM, pIsaExts, "AVX", &pConfig->enmAvx, fNestedPagingAndFullGuestExec, + fMayHaveXSave && pConfig->enmXSave && (fXStateHostMask & XSAVE_C_YMM) /*fAllowed*/); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/IsaExts/AVX2, boolean, depends} + * Expose the AVX2 instruction set extensions to the guest if available and + * XSAVE is exposed too. For the time being the default is to only expose this + * to VMs with nested paging and AMD-V or unrestricted guest execution mode. + */ + rc = cpumR3CpuIdReadIsaExtCfgEx(pVM, pIsaExts, "AVX2", &pConfig->enmAvx2, fNestedPagingAndFullGuestExec /* temporarily */, + fMayHaveXSave && pConfig->enmXSave && (fXStateHostMask & XSAVE_C_YMM) /*fAllowed*/); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/IsaExts/AESNI, isaextcfg, depends} + * Whether to expose the AES instructions to the guest. For the time being the + * default is to only do this for VMs with nested paging and AMD-V or + * unrestricted guest mode. + */ + rc = cpumR3CpuIdReadIsaExtCfg(pVM, pIsaExts, "AESNI", &pConfig->enmAesNi, fNestedPagingAndFullGuestExec); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/IsaExts/PCLMUL, isaextcfg, depends} + * Whether to expose the PCLMULQDQ instructions to the guest. For the time + * being the default is to only do this for VMs with nested paging and AMD-V or + * unrestricted guest mode. + */ + rc = cpumR3CpuIdReadIsaExtCfg(pVM, pIsaExts, "PCLMUL", &pConfig->enmPClMul, fNestedPagingAndFullGuestExec); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/IsaExts/POPCNT, isaextcfg, depends} + * Whether to expose the POPCNT instructions to the guest. For the time + * being the default is to only do this for VMs with nested paging and AMD-V or + * unrestricted guest mode. + */ + rc = cpumR3CpuIdReadIsaExtCfg(pVM, pIsaExts, "POPCNT", &pConfig->enmPopCnt, fNestedPagingAndFullGuestExec); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/IsaExts/MOVBE, isaextcfg, depends} + * Whether to expose the MOVBE instructions to the guest. For the time + * being the default is to only do this for VMs with nested paging and AMD-V or + * unrestricted guest mode. + */ + rc = cpumR3CpuIdReadIsaExtCfg(pVM, pIsaExts, "MOVBE", &pConfig->enmMovBe, fNestedPagingAndFullGuestExec); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/IsaExts/RDRAND, isaextcfg, depends} + * Whether to expose the RDRAND instructions to the guest. For the time being + * the default is to only do this for VMs with nested paging and AMD-V or + * unrestricted guest mode. + */ + rc = cpumR3CpuIdReadIsaExtCfg(pVM, pIsaExts, "RDRAND", &pConfig->enmRdRand, fNestedPagingAndFullGuestExec); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/IsaExts/RDSEED, isaextcfg, depends} + * Whether to expose the RDSEED instructions to the guest. For the time being + * the default is to only do this for VMs with nested paging and AMD-V or + * unrestricted guest mode. + */ + rc = cpumR3CpuIdReadIsaExtCfg(pVM, pIsaExts, "RDSEED", &pConfig->enmRdSeed, fNestedPagingAndFullGuestExec); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/IsaExts/CLFLUSHOPT, isaextcfg, depends} + * Whether to expose the CLFLUSHOPT instructions to the guest. For the time + * being the default is to only do this for VMs with nested paging and AMD-V or + * unrestricted guest mode. + */ + rc = cpumR3CpuIdReadIsaExtCfg(pVM, pIsaExts, "CLFLUSHOPT", &pConfig->enmCLFlushOpt, fNestedPagingAndFullGuestExec); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/IsaExts/FSGSBASE, isaextcfg, true} + * Whether to expose the read/write FSGSBASE instructions to the guest. + */ + rc = cpumR3CpuIdReadIsaExtCfg(pVM, pIsaExts, "FSGSBASE", &pConfig->enmFsGsBase, true); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/IsaExts/PCID, isaextcfg, true} + * Whether to expose the PCID feature to the guest. + */ + rc = cpumR3CpuIdReadIsaExtCfg(pVM, pIsaExts, "PCID", &pConfig->enmPcid, pConfig->enmFsGsBase); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/IsaExts/INVPCID, isaextcfg, true} + * Whether to expose the INVPCID instruction to the guest. + */ + rc = cpumR3CpuIdReadIsaExtCfg(pVM, pIsaExts, "INVPCID", &pConfig->enmInvpcid, pConfig->enmFsGsBase); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/IsaExts/FlushCmdMsr, isaextcfg, true} + * Whether to expose the IA32_FLUSH_CMD MSR to the guest. + */ + rc = cpumR3CpuIdReadIsaExtCfg(pVM, pIsaExts, "FlushCmdMsr", &pConfig->enmFlushCmdMsr, CPUMISAEXTCFG_ENABLED_SUPPORTED); + AssertLogRelRCReturn(rc, rc); + + + /* AMD: */ + + /** @cfgm{/CPUM/IsaExts/ABM, isaextcfg, depends} + * Whether to expose the AMD ABM instructions to the guest. For the time + * being the default is to only do this for VMs with nested paging and AMD-V or + * unrestricted guest mode. + */ + rc = cpumR3CpuIdReadIsaExtCfg(pVM, pIsaExts, "ABM", &pConfig->enmAbm, fNestedPagingAndFullGuestExec); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/IsaExts/SSE4A, isaextcfg, depends} + * Whether to expose the AMD SSE4A instructions to the guest. For the time + * being the default is to only do this for VMs with nested paging and AMD-V or + * unrestricted guest mode. + */ + rc = cpumR3CpuIdReadIsaExtCfg(pVM, pIsaExts, "SSE4A", &pConfig->enmSse4A, fNestedPagingAndFullGuestExec); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/IsaExts/MISALNSSE, isaextcfg, depends} + * Whether to expose the AMD MisAlSse feature (MXCSR flag 17) to the guest. For + * the time being the default is to only do this for VMs with nested paging and + * AMD-V or unrestricted guest mode. + */ + rc = cpumR3CpuIdReadIsaExtCfg(pVM, pIsaExts, "MISALNSSE", &pConfig->enmMisAlnSse, fNestedPagingAndFullGuestExec); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/IsaExts/3DNOWPRF, isaextcfg, depends} + * Whether to expose the AMD 3D Now! prefetch instructions to the guest. + * For the time being the default is to only do this for VMs with nested paging + * and AMD-V or unrestricted guest mode. + */ + rc = cpumR3CpuIdReadIsaExtCfg(pVM, pIsaExts, "3DNOWPRF", &pConfig->enm3dNowPrf, fNestedPagingAndFullGuestExec); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/CPUM/IsaExts/AXMMX, isaextcfg, depends} + * Whether to expose the AMD's MMX Extensions to the guest. For the time being + * the default is to only do this for VMs with nested paging and AMD-V or + * unrestricted guest mode. + */ + rc = cpumR3CpuIdReadIsaExtCfg(pVM, pIsaExts, "AXMMX", &pConfig->enmAmdExtMmx, fNestedPagingAndFullGuestExec); + AssertLogRelRCReturn(rc, rc); + + return VINF_SUCCESS; +} + + +/** + * Initializes the emulated CPU's CPUID & MSR information. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pHostMsrs Pointer to the host MSRs. + */ +int cpumR3InitCpuIdAndMsrs(PVM pVM, PCCPUMMSRS pHostMsrs) +{ + Assert(pHostMsrs); + + PCPUM pCpum = &pVM->cpum.s; + PCFGMNODE pCpumCfg = CFGMR3GetChild(CFGMR3GetRoot(pVM), "CPUM"); + + /* + * Set the fCpuIdApicFeatureVisible flags so the APIC can assume visibility + * on construction and manage everything from here on. + */ + for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++) + pVM->aCpus[iCpu].cpum.s.fCpuIdApicFeatureVisible = true; + + /* + * Read the configuration. + */ + CPUMCPUIDCONFIG Config; + RT_ZERO(Config); + + int rc = cpumR3CpuIdReadConfig(pVM, &Config, pCpumCfg, HMAreNestedPagingAndFullGuestExecEnabled(pVM)); + AssertRCReturn(rc, rc); + + /* + * Get the guest CPU data from the database and/or the host. + * + * The CPUID and MSRs are currently living on the regular heap to avoid + * fragmenting the hyper heap (and because there isn't/wasn't any realloc + * API for the hyper heap). This means special cleanup considerations. + */ + rc = cpumR3DbGetCpuInfo(Config.szCpuName, &pCpum->GuestInfo); + if (RT_FAILURE(rc)) + return rc == VERR_CPUM_DB_CPU_NOT_FOUND + ? VMSetError(pVM, rc, RT_SRC_POS, + "Info on guest CPU '%s' could not be found. Please, select a different CPU.", Config.szCpuName) + : rc; + + if (pCpum->GuestInfo.fMxCsrMask & ~pVM->cpum.s.fHostMxCsrMask) + { + LogRel(("Stripping unsupported MXCSR bits from guest mask: %#x -> %#x (host: %#x)\n", pCpum->GuestInfo.fMxCsrMask, + pCpum->GuestInfo.fMxCsrMask & pVM->cpum.s.fHostMxCsrMask, pVM->cpum.s.fHostMxCsrMask)); + pCpum->GuestInfo.fMxCsrMask &= pVM->cpum.s.fHostMxCsrMask; + } + LogRel(("CPUM: MXCSR_MASK=%#x (host: %#x)\n", pCpum->GuestInfo.fMxCsrMask, pVM->cpum.s.fHostMxCsrMask)); + + /** @cfgm{/CPUM/MSRs/[Name]/[First|Last|Type|Value|...],} + * Overrides the guest MSRs. + */ + rc = cpumR3LoadMsrOverrides(pVM, CFGMR3GetChild(pCpumCfg, "MSRs")); + + /** @cfgm{/CPUM/HostCPUID/[000000xx|800000xx|c000000x]/[eax|ebx|ecx|edx],32-bit} + * Overrides the CPUID leaf values (from the host CPU usually) used for + * calculating the guest CPUID leaves. This can be used to preserve the CPUID + * values when moving a VM to a different machine. Another use is restricting + * (or extending) the feature set exposed to the guest. */ + if (RT_SUCCESS(rc)) + rc = cpumR3LoadCpuIdOverrides(pVM, CFGMR3GetChild(pCpumCfg, "HostCPUID"), "HostCPUID"); + + if (RT_SUCCESS(rc) && CFGMR3GetChild(pCpumCfg, "CPUID")) /* 2nd override, now discontinued. */ + rc = VMSetError(pVM, VERR_CFGM_CONFIG_UNKNOWN_NODE, RT_SRC_POS, + "Found unsupported configuration node '/CPUM/CPUID/'. " + "Please use IMachine::setCPUIDLeaf() instead."); + + CPUMMSRS GuestMsrs; + RT_ZERO(GuestMsrs); + + /* + * Pre-explode the CPUID info. + */ + if (RT_SUCCESS(rc)) + { + rc = cpumR3CpuIdExplodeFeatures(pCpum->GuestInfo.paCpuIdLeavesR3, pCpum->GuestInfo.cCpuIdLeaves, &GuestMsrs, + &pCpum->GuestFeatures); + } + + /* + * Sanitize the cpuid information passed on to the guest. + */ + if (RT_SUCCESS(rc)) + { + rc = cpumR3CpuIdSanitize(pVM, pCpum, &Config); + if (RT_SUCCESS(rc)) + { + cpumR3CpuIdLimitLeaves(pCpum, &Config); + cpumR3CpuIdLimitIntelFamModStep(pCpum, &Config); + } + } + + /* + * Setup MSRs introduced in microcode updates or that are otherwise not in + * the CPU profile, but are advertised in the CPUID info we just sanitized. + */ + if (RT_SUCCESS(rc)) + rc = cpumR3MsrReconcileWithCpuId(pVM); + /* + * MSR fudging. + */ + if (RT_SUCCESS(rc)) + { + /** @cfgm{/CPUM/FudgeMSRs, boolean, true} + * Fudges some common MSRs if not present in the selected CPU database entry. + * This is for trying to keep VMs running when moved between different hosts + * and different CPU vendors. */ + bool fEnable; + rc = CFGMR3QueryBoolDef(pCpumCfg, "FudgeMSRs", &fEnable, true); AssertRC(rc); + if (RT_SUCCESS(rc) && fEnable) + { + rc = cpumR3MsrApplyFudge(pVM); + AssertLogRelRC(rc); + } + } + if (RT_SUCCESS(rc)) + { + /* + * Move the MSR and CPUID arrays over on the hypervisor heap, and explode + * guest CPU features again. + */ + void *pvFree = pCpum->GuestInfo.paCpuIdLeavesR3; + int rc1 = cpumR3CpuIdInstallAndExplodeLeaves(pVM, pCpum, pCpum->GuestInfo.paCpuIdLeavesR3, + pCpum->GuestInfo.cCpuIdLeaves, &GuestMsrs); + RTMemFree(pvFree); + + pvFree = pCpum->GuestInfo.paMsrRangesR3; + int rc2 = MMHyperDupMem(pVM, pvFree, + sizeof(pCpum->GuestInfo.paMsrRangesR3[0]) * pCpum->GuestInfo.cMsrRanges, 32, + MM_TAG_CPUM_MSRS, (void **)&pCpum->GuestInfo.paMsrRangesR3); + RTMemFree(pvFree); + AssertLogRelRCReturn(rc1, rc1); + AssertLogRelRCReturn(rc2, rc2); + + pCpum->GuestInfo.paMsrRangesR0 = MMHyperR3ToR0(pVM, pCpum->GuestInfo.paMsrRangesR3); + pCpum->GuestInfo.paMsrRangesRC = MMHyperR3ToRC(pVM, pCpum->GuestInfo.paMsrRangesR3); + + /* + * Finally, initialize guest VMX MSRs. + * + * This needs to be done -after- exploding guest features and sanitizing CPUID leaves + * as constructing VMX capabilities MSRs rely on CPU feature bits like long mode, + * unrestricted-guest execution, CR4 feature bits and possibly more in the future. + */ + if (pVM->cpum.s.GuestFeatures.fVmx) + { + Assert(Config.fNestedHWVirt); + cpumR3InitVmxGuestFeaturesAndMsrs(pVM, &pHostMsrs->hwvirt.vmx, &GuestMsrs.hwvirt.vmx); + + /* Copy MSRs to all VCPUs */ + PCVMXMSRS pVmxMsrs = &GuestMsrs.hwvirt.vmx; + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[idCpu]; + memcpy(&pVCpu->cpum.s.Guest.hwvirt.vmx.Msrs, pVmxMsrs, sizeof(*pVmxMsrs)); + } + } + + /* + * Some more configuration that we're applying at the end of everything + * via the CPUMSetGuestCpuIdFeature API. + */ + + /* Check if PAE was explicitely enabled by the user. */ + bool fEnable; + rc = CFGMR3QueryBoolDef(CFGMR3GetRoot(pVM), "EnablePAE", &fEnable, false); + AssertRCReturn(rc, rc); + if (fEnable) + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_PAE); + + /* We don't normally enable NX for raw-mode, so give the user a chance to force it on. */ + rc = CFGMR3QueryBoolDef(pCpumCfg, "EnableNX", &fEnable, false); + AssertRCReturn(rc, rc); + if (fEnable) + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_NX); + + /* Check if speculation control is enabled. */ + rc = CFGMR3QueryBoolDef(pCpumCfg, "SpecCtrl", &fEnable, false); + AssertRCReturn(rc, rc); + if (fEnable) + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_SPEC_CTRL); + + return VINF_SUCCESS; + } + + /* + * Failed before switching to hyper heap. + */ + RTMemFree(pCpum->GuestInfo.paCpuIdLeavesR3); + pCpum->GuestInfo.paCpuIdLeavesR3 = NULL; + RTMemFree(pCpum->GuestInfo.paMsrRangesR3); + pCpum->GuestInfo.paMsrRangesR3 = NULL; + return rc; +} + + +/** + * Sets a CPUID feature bit during VM initialization. + * + * Since the CPUID feature bits are generally related to CPU features, other + * CPUM configuration like MSRs can also be modified by calls to this API. + * + * @param pVM The cross context VM structure. + * @param enmFeature The feature to set. + */ +VMMR3_INT_DECL(void) CPUMR3SetGuestCpuIdFeature(PVM pVM, CPUMCPUIDFEATURE enmFeature) +{ + PCPUMCPUIDLEAF pLeaf; + PCPUMMSRRANGE pMsrRange; + + switch (enmFeature) + { + /* + * Set the APIC bit in both feature masks. + */ + case CPUMCPUIDFEATURE_APIC: + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x00000001)); + if (pLeaf && (pLeaf->fFlags & CPUMCPUIDLEAF_F_CONTAINS_APIC)) + pVM->cpum.s.aGuestCpuIdPatmStd[1].uEdx = pLeaf->uEdx |= X86_CPUID_FEATURE_EDX_APIC; + + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x80000001)); + if (pLeaf && (pLeaf->fFlags & CPUMCPUIDLEAF_F_CONTAINS_APIC)) + pVM->cpum.s.aGuestCpuIdPatmExt[1].uEdx = pLeaf->uEdx |= X86_CPUID_AMD_FEATURE_EDX_APIC; + + pVM->cpum.s.GuestFeatures.fApic = 1; + + /* Make sure we've got the APICBASE MSR present. */ + pMsrRange = cpumLookupMsrRange(pVM, MSR_IA32_APICBASE); + if (!pMsrRange) + { + static CPUMMSRRANGE const s_ApicBase = + { + /*.uFirst =*/ MSR_IA32_APICBASE, /*.uLast =*/ MSR_IA32_APICBASE, + /*.enmRdFn =*/ kCpumMsrRdFn_Ia32ApicBase, /*.enmWrFn =*/ kCpumMsrWrFn_Ia32ApicBase, + /*.offCpumCpu =*/ UINT16_MAX, /*.fReserved =*/ 0, /*.uValue =*/ 0, /*.fWrIgnMask =*/ 0, /*.fWrGpMask =*/ 0, + /*.szName = */ "IA32_APIC_BASE" + }; + int rc = CPUMR3MsrRangesInsert(pVM, &s_ApicBase); + AssertLogRelRC(rc); + } + + LogRel(("CPUM: SetGuestCpuIdFeature: Enabled xAPIC\n")); + break; + + /* + * Set the x2APIC bit in the standard feature mask. + * Note! ASSUMES CPUMCPUIDFEATURE_APIC is called first. + */ + case CPUMCPUIDFEATURE_X2APIC: + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x00000001)); + if (pLeaf) + pVM->cpum.s.aGuestCpuIdPatmStd[1].uEcx = pLeaf->uEcx |= X86_CPUID_FEATURE_ECX_X2APIC; + pVM->cpum.s.GuestFeatures.fX2Apic = 1; + + /* Make sure the MSR doesn't GP or ignore the EXTD bit. */ + pMsrRange = cpumLookupMsrRange(pVM, MSR_IA32_APICBASE); + if (pMsrRange) + { + pMsrRange->fWrGpMask &= ~MSR_IA32_APICBASE_EXTD; + pMsrRange->fWrIgnMask &= ~MSR_IA32_APICBASE_EXTD; + } + + LogRel(("CPUM: SetGuestCpuIdFeature: Enabled x2APIC\n")); + break; + + /* + * Set the sysenter/sysexit bit in the standard feature mask. + * Assumes the caller knows what it's doing! (host must support these) + */ + case CPUMCPUIDFEATURE_SEP: + if (!pVM->cpum.s.HostFeatures.fSysEnter) + { + AssertMsgFailed(("ERROR: Can't turn on SEP when the host doesn't support it!!\n")); + return; + } + + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x00000001)); + if (pLeaf) + pVM->cpum.s.aGuestCpuIdPatmStd[1].uEdx = pLeaf->uEdx |= X86_CPUID_FEATURE_EDX_SEP; + pVM->cpum.s.GuestFeatures.fSysEnter = 1; + LogRel(("CPUM: SetGuestCpuIdFeature: Enabled SYSENTER/EXIT\n")); + break; + + /* + * Set the syscall/sysret bit in the extended feature mask. + * Assumes the caller knows what it's doing! (host must support these) + */ + case CPUMCPUIDFEATURE_SYSCALL: + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x80000001)); + if ( !pLeaf + || !pVM->cpum.s.HostFeatures.fSysCall) + { +#if HC_ARCH_BITS == 32 + /* X86_CPUID_EXT_FEATURE_EDX_SYSCALL not set it seems in 32-bit + mode by Intel, even when the cpu is capable of doing so in + 64-bit mode. Long mode requires syscall support. */ + if (!pVM->cpum.s.HostFeatures.fLongMode) +#endif + { + LogRel(("CPUM: WARNING! Can't turn on SYSCALL/SYSRET when the host doesn't support it!\n")); + return; + } + } + + /* Valid for both Intel and AMD CPUs, although only in 64 bits mode for Intel. */ + pVM->cpum.s.aGuestCpuIdPatmExt[1].uEdx = pLeaf->uEdx |= X86_CPUID_EXT_FEATURE_EDX_SYSCALL; + pVM->cpum.s.GuestFeatures.fSysCall = 1; + LogRel(("CPUM: SetGuestCpuIdFeature: Enabled SYSCALL/RET\n")); + break; + + /* + * Set the PAE bit in both feature masks. + * Assumes the caller knows what it's doing! (host must support these) + */ + case CPUMCPUIDFEATURE_PAE: + if (!pVM->cpum.s.HostFeatures.fPae) + { + LogRel(("CPUM: WARNING! Can't turn on PAE when the host doesn't support it!\n")); + return; + } + + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x00000001)); + if (pLeaf) + pVM->cpum.s.aGuestCpuIdPatmStd[1].uEdx = pLeaf->uEdx |= X86_CPUID_FEATURE_EDX_PAE; + + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x80000001)); + if ( pLeaf + && pVM->cpum.s.GuestFeatures.enmCpuVendor == CPUMCPUVENDOR_AMD) + pVM->cpum.s.aGuestCpuIdPatmExt[1].uEdx = pLeaf->uEdx |= X86_CPUID_AMD_FEATURE_EDX_PAE; + + pVM->cpum.s.GuestFeatures.fPae = 1; + LogRel(("CPUM: SetGuestCpuIdFeature: Enabled PAE\n")); + break; + + /* + * Set the LONG MODE bit in the extended feature mask. + * Assumes the caller knows what it's doing! (host must support these) + */ + case CPUMCPUIDFEATURE_LONG_MODE: + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x80000001)); + if ( !pLeaf + || !pVM->cpum.s.HostFeatures.fLongMode) + { + LogRel(("CPUM: WARNING! Can't turn on LONG MODE when the host doesn't support it!\n")); + return; + } + + /* Valid for both Intel and AMD. */ + pVM->cpum.s.aGuestCpuIdPatmExt[1].uEdx = pLeaf->uEdx |= X86_CPUID_EXT_FEATURE_EDX_LONG_MODE; + pVM->cpum.s.GuestFeatures.fLongMode = 1; + LogRel(("CPUM: SetGuestCpuIdFeature: Enabled LONG MODE\n")); + break; + + /* + * Set the NX/XD bit in the extended feature mask. + * Assumes the caller knows what it's doing! (host must support these) + */ + case CPUMCPUIDFEATURE_NX: + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x80000001)); + if ( !pLeaf + || !pVM->cpum.s.HostFeatures.fNoExecute) + { + LogRel(("CPUM: WARNING! Can't turn on NX/XD when the host doesn't support it!\n")); + return; + } + + /* Valid for both Intel and AMD. */ + pVM->cpum.s.aGuestCpuIdPatmExt[1].uEdx = pLeaf->uEdx |= X86_CPUID_EXT_FEATURE_EDX_NX; + pVM->cpum.s.GuestFeatures.fNoExecute = 1; + LogRel(("CPUM: SetGuestCpuIdFeature: Enabled NX\n")); + break; + + + /* + * Set the LAHF/SAHF support in 64-bit mode. + * Assumes the caller knows what it's doing! (host must support this) + */ + case CPUMCPUIDFEATURE_LAHF: + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x80000001)); + if ( !pLeaf + || !pVM->cpum.s.HostFeatures.fLahfSahf) + { + LogRel(("CPUM: WARNING! Can't turn on LAHF/SAHF when the host doesn't support it!\n")); + return; + } + + /* Valid for both Intel and AMD. */ + pVM->cpum.s.aGuestCpuIdPatmExt[1].uEcx = pLeaf->uEcx |= X86_CPUID_EXT_FEATURE_ECX_LAHF_SAHF; + pVM->cpum.s.GuestFeatures.fLahfSahf = 1; + LogRel(("CPUM: SetGuestCpuIdFeature: Enabled LAHF/SAHF\n")); + break; + + /* + * Set the page attribute table bit. This is alternative page level + * cache control that doesn't much matter when everything is + * virtualized, though it may when passing thru device memory. + */ + case CPUMCPUIDFEATURE_PAT: + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x00000001)); + if (pLeaf) + pVM->cpum.s.aGuestCpuIdPatmStd[1].uEdx = pLeaf->uEdx |= X86_CPUID_FEATURE_EDX_PAT; + + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x80000001)); + if ( pLeaf + && pVM->cpum.s.GuestFeatures.enmCpuVendor == CPUMCPUVENDOR_AMD) + pVM->cpum.s.aGuestCpuIdPatmExt[1].uEdx = pLeaf->uEdx |= X86_CPUID_AMD_FEATURE_EDX_PAT; + + pVM->cpum.s.GuestFeatures.fPat = 1; + LogRel(("CPUM: SetGuestCpuIdFeature: Enabled PAT\n")); + break; + + /* + * Set the RDTSCP support bit. + * Assumes the caller knows what it's doing! (host must support this) + */ + case CPUMCPUIDFEATURE_RDTSCP: + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x80000001)); + if ( !pLeaf + || !pVM->cpum.s.HostFeatures.fRdTscP + || pVM->cpum.s.u8PortableCpuIdLevel > 0) + { + if (!pVM->cpum.s.u8PortableCpuIdLevel) + LogRel(("CPUM: WARNING! Can't turn on RDTSCP when the host doesn't support it!\n")); + return; + } + + /* Valid for both Intel and AMD. */ + pVM->cpum.s.aGuestCpuIdPatmExt[1].uEdx = pLeaf->uEdx |= X86_CPUID_EXT_FEATURE_EDX_RDTSCP; + pVM->cpum.s.HostFeatures.fRdTscP = 1; + LogRel(("CPUM: SetGuestCpuIdFeature: Enabled RDTSCP.\n")); + break; + + /* + * Set the Hypervisor Present bit in the standard feature mask. + */ + case CPUMCPUIDFEATURE_HVP: + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x00000001)); + if (pLeaf) + pVM->cpum.s.aGuestCpuIdPatmStd[1].uEcx = pLeaf->uEcx |= X86_CPUID_FEATURE_ECX_HVP; + pVM->cpum.s.GuestFeatures.fHypervisorPresent = 1; + LogRel(("CPUM: SetGuestCpuIdFeature: Enabled Hypervisor Present bit\n")); + break; + + /* + * Set the MWAIT Extensions Present bit in the MWAIT/MONITOR leaf. + * This currently includes the Present bit and MWAITBREAK bit as well. + */ + case CPUMCPUIDFEATURE_MWAIT_EXTS: + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x00000005)); + if ( !pLeaf + || !pVM->cpum.s.HostFeatures.fMWaitExtensions) + { + LogRel(("CPUM: WARNING! Can't turn on MWAIT Extensions when the host doesn't support it!\n")); + return; + } + + /* Valid for both Intel and AMD. */ + pVM->cpum.s.aGuestCpuIdPatmStd[5].uEcx = pLeaf->uEcx |= X86_CPUID_MWAIT_ECX_EXT | X86_CPUID_MWAIT_ECX_BREAKIRQIF0; + pVM->cpum.s.GuestFeatures.fMWaitExtensions = 1; + LogRel(("CPUM: SetGuestCpuIdFeature: Enabled MWAIT Extensions.\n")); + break; + + /* + * Set up the speculation control CPUID bits and MSRs. This is quite complicated + * on Intel CPUs, and different on AMDs. + */ + case CPUMCPUIDFEATURE_SPEC_CTRL: + if (pVM->cpum.s.GuestFeatures.enmCpuVendor == CPUMCPUVENDOR_INTEL) + { + pLeaf = cpumR3CpuIdGetExactLeaf(&pVM->cpum.s, UINT32_C(0x00000007), 0); + if ( !pLeaf + || !(pVM->cpum.s.HostFeatures.fIbpb || pVM->cpum.s.HostFeatures.fIbrs)) + { + LogRel(("CPUM: WARNING! Can't turn on Speculation Control when the host doesn't support it!\n")); + return; + } + + /* The feature can be enabled. Let's see what we can actually do. */ + pVM->cpum.s.GuestFeatures.fSpeculationControl = 1; + + /* We will only expose STIBP if IBRS is present to keep things simpler (simple is not an option). */ + if (pVM->cpum.s.HostFeatures.fIbrs) + { + pLeaf->uEdx |= X86_CPUID_STEXT_FEATURE_EDX_IBRS_IBPB; + pVM->cpum.s.GuestFeatures.fIbrs = 1; + if (pVM->cpum.s.HostFeatures.fStibp) + { + pLeaf->uEdx |= X86_CPUID_STEXT_FEATURE_EDX_STIBP; + pVM->cpum.s.GuestFeatures.fStibp = 1; + } + + /* Make sure we have the speculation control MSR... */ + pMsrRange = cpumLookupMsrRange(pVM, MSR_IA32_SPEC_CTRL); + if (!pMsrRange) + { + static CPUMMSRRANGE const s_SpecCtrl = + { + /*.uFirst =*/ MSR_IA32_SPEC_CTRL, /*.uLast =*/ MSR_IA32_SPEC_CTRL, + /*.enmRdFn =*/ kCpumMsrRdFn_Ia32SpecCtrl, /*.enmWrFn =*/ kCpumMsrWrFn_Ia32SpecCtrl, + /*.offCpumCpu =*/ UINT16_MAX, /*.fReserved =*/ 0, /*.uValue =*/ 0, /*.fWrIgnMask =*/ 0, /*.fWrGpMask =*/ 0, + /*.szName = */ "IA32_SPEC_CTRL" + }; + int rc = CPUMR3MsrRangesInsert(pVM, &s_SpecCtrl); + AssertLogRelRC(rc); + } + + /* ... and the predictor command MSR. */ + pMsrRange = cpumLookupMsrRange(pVM, MSR_IA32_PRED_CMD); + if (!pMsrRange) + { + /** @todo incorrect fWrGpMask. */ + static CPUMMSRRANGE const s_SpecCtrl = + { + /*.uFirst =*/ MSR_IA32_PRED_CMD, /*.uLast =*/ MSR_IA32_PRED_CMD, + /*.enmRdFn =*/ kCpumMsrRdFn_WriteOnly, /*.enmWrFn =*/ kCpumMsrWrFn_Ia32PredCmd, + /*.offCpumCpu =*/ UINT16_MAX, /*.fReserved =*/ 0, /*.uValue =*/ 0, /*.fWrIgnMask =*/ 0, /*.fWrGpMask =*/ 0, + /*.szName = */ "IA32_PRED_CMD" + }; + int rc = CPUMR3MsrRangesInsert(pVM, &s_SpecCtrl); + AssertLogRelRC(rc); + } + + } + + if (pVM->cpum.s.HostFeatures.fArchCap) + { + pLeaf->uEdx |= X86_CPUID_STEXT_FEATURE_EDX_ARCHCAP; + + /* Install the architectural capabilities MSR. */ + pMsrRange = cpumLookupMsrRange(pVM, MSR_IA32_ARCH_CAPABILITIES); + if (!pMsrRange) + { + static CPUMMSRRANGE const s_ArchCaps = + { + /*.uFirst =*/ MSR_IA32_ARCH_CAPABILITIES, /*.uLast =*/ MSR_IA32_ARCH_CAPABILITIES, + /*.enmRdFn =*/ kCpumMsrRdFn_Ia32ArchCapabilities, /*.enmWrFn =*/ kCpumMsrWrFn_ReadOnly, + /*.offCpumCpu =*/ UINT16_MAX, /*.fReserved =*/ 0, /*.uValue =*/ 0, /*.fWrIgnMask =*/ 0, /*.fWrGpMask =*/ UINT64_MAX, + /*.szName = */ "IA32_ARCH_CAPABILITIES" + }; + int rc = CPUMR3MsrRangesInsert(pVM, &s_ArchCaps); + AssertLogRelRC(rc); + } + } + + LogRel(("CPUM: SetGuestCpuIdFeature: Enabled Speculation Control.\n")); + } + else if (pVM->cpum.s.GuestFeatures.enmCpuVendor == CPUMCPUVENDOR_AMD) + { + /* The precise details of AMD's implementation are not yet clear. */ + } + break; + + default: + AssertMsgFailed(("enmFeature=%d\n", enmFeature)); + break; + } + + /** @todo can probably kill this as this API is now init time only... */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + pVCpu->cpum.s.fChanged |= CPUM_CHANGED_CPUID; + } +} + + +/** + * Queries a CPUID feature bit. + * + * @returns boolean for feature presence + * @param pVM The cross context VM structure. + * @param enmFeature The feature to query. + * @deprecated Use the cpum.ro.GuestFeatures directly instead. + */ +VMMR3_INT_DECL(bool) CPUMR3GetGuestCpuIdFeature(PVM pVM, CPUMCPUIDFEATURE enmFeature) +{ + switch (enmFeature) + { + case CPUMCPUIDFEATURE_APIC: return pVM->cpum.s.GuestFeatures.fApic; + case CPUMCPUIDFEATURE_X2APIC: return pVM->cpum.s.GuestFeatures.fX2Apic; + case CPUMCPUIDFEATURE_SYSCALL: return pVM->cpum.s.GuestFeatures.fSysCall; + case CPUMCPUIDFEATURE_SEP: return pVM->cpum.s.GuestFeatures.fSysEnter; + case CPUMCPUIDFEATURE_PAE: return pVM->cpum.s.GuestFeatures.fPae; + case CPUMCPUIDFEATURE_NX: return pVM->cpum.s.GuestFeatures.fNoExecute; + case CPUMCPUIDFEATURE_LAHF: return pVM->cpum.s.GuestFeatures.fLahfSahf; + case CPUMCPUIDFEATURE_LONG_MODE: return pVM->cpum.s.GuestFeatures.fLongMode; + case CPUMCPUIDFEATURE_PAT: return pVM->cpum.s.GuestFeatures.fPat; + case CPUMCPUIDFEATURE_RDTSCP: return pVM->cpum.s.GuestFeatures.fRdTscP; + case CPUMCPUIDFEATURE_HVP: return pVM->cpum.s.GuestFeatures.fHypervisorPresent; + case CPUMCPUIDFEATURE_MWAIT_EXTS: return pVM->cpum.s.GuestFeatures.fMWaitExtensions; + case CPUMCPUIDFEATURE_SPEC_CTRL: return pVM->cpum.s.GuestFeatures.fSpeculationControl; + + case CPUMCPUIDFEATURE_INVALID: + case CPUMCPUIDFEATURE_32BIT_HACK: + break; + } + AssertFailed(); + return false; +} + + +/** + * Clears a CPUID feature bit. + * + * @param pVM The cross context VM structure. + * @param enmFeature The feature to clear. + * + * @deprecated Probably better to default the feature to disabled and only allow + * setting (enabling) it during construction. + */ +VMMR3_INT_DECL(void) CPUMR3ClearGuestCpuIdFeature(PVM pVM, CPUMCPUIDFEATURE enmFeature) +{ + PCPUMCPUIDLEAF pLeaf; + switch (enmFeature) + { + case CPUMCPUIDFEATURE_APIC: + Assert(!pVM->cpum.s.GuestFeatures.fApic); /* We only expect this call during init. No MSR adjusting needed. */ + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x00000001)); + if (pLeaf) + pVM->cpum.s.aGuestCpuIdPatmStd[1].uEdx = pLeaf->uEdx &= ~X86_CPUID_FEATURE_EDX_APIC; + + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x80000001)); + if (pLeaf && (pLeaf->fFlags & CPUMCPUIDLEAF_F_CONTAINS_APIC)) + pVM->cpum.s.aGuestCpuIdPatmExt[1].uEdx = pLeaf->uEdx &= ~X86_CPUID_AMD_FEATURE_EDX_APIC; + + pVM->cpum.s.GuestFeatures.fApic = 0; + Log(("CPUM: ClearGuestCpuIdFeature: Disabled xAPIC\n")); + break; + + case CPUMCPUIDFEATURE_X2APIC: + Assert(!pVM->cpum.s.GuestFeatures.fX2Apic); /* We only expect this call during init. No MSR adjusting needed. */ + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x00000001)); + if (pLeaf) + pVM->cpum.s.aGuestCpuIdPatmStd[1].uEcx = pLeaf->uEcx &= ~X86_CPUID_FEATURE_ECX_X2APIC; + pVM->cpum.s.GuestFeatures.fX2Apic = 0; + Log(("CPUM: ClearGuestCpuIdFeature: Disabled x2APIC\n")); + break; + + case CPUMCPUIDFEATURE_PAE: + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x00000001)); + if (pLeaf) + pVM->cpum.s.aGuestCpuIdPatmStd[1].uEdx = pLeaf->uEdx &= ~X86_CPUID_FEATURE_EDX_PAE; + + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x80000001)); + if ( pLeaf + && pVM->cpum.s.GuestFeatures.enmCpuVendor == CPUMCPUVENDOR_AMD) + pVM->cpum.s.aGuestCpuIdPatmExt[1].uEdx = pLeaf->uEdx &= ~X86_CPUID_AMD_FEATURE_EDX_PAE; + + pVM->cpum.s.GuestFeatures.fPae = 0; + Log(("CPUM: ClearGuestCpuIdFeature: Disabled PAE!\n")); + break; + + case CPUMCPUIDFEATURE_PAT: + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x00000001)); + if (pLeaf) + pVM->cpum.s.aGuestCpuIdPatmStd[1].uEdx = pLeaf->uEdx &= ~X86_CPUID_FEATURE_EDX_PAT; + + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x80000001)); + if ( pLeaf + && pVM->cpum.s.GuestFeatures.enmCpuVendor == CPUMCPUVENDOR_AMD) + pVM->cpum.s.aGuestCpuIdPatmExt[1].uEdx = pLeaf->uEdx &= ~X86_CPUID_AMD_FEATURE_EDX_PAT; + + pVM->cpum.s.GuestFeatures.fPat = 0; + Log(("CPUM: ClearGuestCpuIdFeature: Disabled PAT!\n")); + break; + + case CPUMCPUIDFEATURE_LONG_MODE: + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x80000001)); + if (pLeaf) + pVM->cpum.s.aGuestCpuIdPatmExt[1].uEdx = pLeaf->uEdx &= ~X86_CPUID_EXT_FEATURE_EDX_LONG_MODE; + pVM->cpum.s.GuestFeatures.fLongMode = 0; + break; + + case CPUMCPUIDFEATURE_LAHF: + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x80000001)); + if (pLeaf) + pVM->cpum.s.aGuestCpuIdPatmExt[1].uEcx = pLeaf->uEcx &= ~X86_CPUID_EXT_FEATURE_ECX_LAHF_SAHF; + pVM->cpum.s.GuestFeatures.fLahfSahf = 0; + break; + + case CPUMCPUIDFEATURE_RDTSCP: + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x80000001)); + if (pLeaf) + pVM->cpum.s.aGuestCpuIdPatmExt[1].uEdx = pLeaf->uEdx &= ~X86_CPUID_EXT_FEATURE_EDX_RDTSCP; + pVM->cpum.s.GuestFeatures.fRdTscP = 0; + Log(("CPUM: ClearGuestCpuIdFeature: Disabled RDTSCP!\n")); + break; + + case CPUMCPUIDFEATURE_HVP: + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x00000001)); + if (pLeaf) + pVM->cpum.s.aGuestCpuIdPatmStd[1].uEcx = pLeaf->uEcx &= ~X86_CPUID_FEATURE_ECX_HVP; + pVM->cpum.s.GuestFeatures.fHypervisorPresent = 0; + break; + + case CPUMCPUIDFEATURE_MWAIT_EXTS: + pLeaf = cpumCpuIdGetLeaf(pVM, UINT32_C(0x00000005)); + if (pLeaf) + pVM->cpum.s.aGuestCpuIdPatmStd[5].uEcx = pLeaf->uEcx &= ~(X86_CPUID_MWAIT_ECX_EXT | X86_CPUID_MWAIT_ECX_BREAKIRQIF0); + pVM->cpum.s.GuestFeatures.fMWaitExtensions = 0; + Log(("CPUM: ClearGuestCpuIdFeature: Disabled MWAIT Extensions!\n")); + break; + + case CPUMCPUIDFEATURE_SPEC_CTRL: + pLeaf = cpumR3CpuIdGetExactLeaf(&pVM->cpum.s, UINT32_C(0x00000007), 0); + if (pLeaf) + pLeaf->uEdx &= ~( X86_CPUID_STEXT_FEATURE_EDX_IBRS_IBPB | X86_CPUID_STEXT_FEATURE_EDX_STIBP + | X86_CPUID_STEXT_FEATURE_EDX_ARCHCAP); + pVM->cpum.s.GuestFeatures.fSpeculationControl = 0; + Log(("CPUM: ClearGuestCpuIdFeature: Disabled speculation control!\n")); + break; + + default: + AssertMsgFailed(("enmFeature=%d\n", enmFeature)); + break; + } + + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + pVCpu->cpum.s.fChanged |= CPUM_CHANGED_CPUID; + } +} + + + +/* + * + * + * Saved state related code. + * Saved state related code. + * Saved state related code. + * + * + */ + +/** + * Called both in pass 0 and the final pass. + * + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + */ +void cpumR3SaveCpuId(PVM pVM, PSSMHANDLE pSSM) +{ + /* + * Save all the CPU ID leaves. + */ + SSMR3PutU32(pSSM, sizeof(pVM->cpum.s.GuestInfo.paCpuIdLeavesR3[0])); + SSMR3PutU32(pSSM, pVM->cpum.s.GuestInfo.cCpuIdLeaves); + SSMR3PutMem(pSSM, pVM->cpum.s.GuestInfo.paCpuIdLeavesR3, + sizeof(pVM->cpum.s.GuestInfo.paCpuIdLeavesR3[0]) * pVM->cpum.s.GuestInfo.cCpuIdLeaves); + + SSMR3PutMem(pSSM, &pVM->cpum.s.GuestInfo.DefCpuId, sizeof(pVM->cpum.s.GuestInfo.DefCpuId)); + + /* + * Save a good portion of the raw CPU IDs as well as they may come in + * handy when validating features for raw mode. + */ + CPUMCPUID aRawStd[16]; + for (unsigned i = 0; i < RT_ELEMENTS(aRawStd); i++) + ASMCpuIdExSlow(i, 0, 0, 0, &aRawStd[i].uEax, &aRawStd[i].uEbx, &aRawStd[i].uEcx, &aRawStd[i].uEdx); + SSMR3PutU32(pSSM, RT_ELEMENTS(aRawStd)); + SSMR3PutMem(pSSM, &aRawStd[0], sizeof(aRawStd)); + + CPUMCPUID aRawExt[32]; + for (unsigned i = 0; i < RT_ELEMENTS(aRawExt); i++) + ASMCpuIdExSlow(i | UINT32_C(0x80000000), 0, 0, 0, &aRawExt[i].uEax, &aRawExt[i].uEbx, &aRawExt[i].uEcx, &aRawExt[i].uEdx); + SSMR3PutU32(pSSM, RT_ELEMENTS(aRawExt)); + SSMR3PutMem(pSSM, &aRawExt[0], sizeof(aRawExt)); +} + + +static int cpumR3LoadOneOldGuestCpuIdArray(PSSMHANDLE pSSM, uint32_t uBase, PCPUMCPUIDLEAF *ppaLeaves, uint32_t *pcLeaves) +{ + uint32_t cCpuIds; + int rc = SSMR3GetU32(pSSM, &cCpuIds); + if (RT_SUCCESS(rc)) + { + if (cCpuIds < 64) + { + for (uint32_t i = 0; i < cCpuIds; i++) + { + CPUMCPUID CpuId; + rc = SSMR3GetMem(pSSM, &CpuId, sizeof(CpuId)); + if (RT_FAILURE(rc)) + break; + + CPUMCPUIDLEAF NewLeaf; + NewLeaf.uLeaf = uBase + i; + NewLeaf.uSubLeaf = 0; + NewLeaf.fSubLeafMask = 0; + NewLeaf.uEax = CpuId.uEax; + NewLeaf.uEbx = CpuId.uEbx; + NewLeaf.uEcx = CpuId.uEcx; + NewLeaf.uEdx = CpuId.uEdx; + NewLeaf.fFlags = 0; + rc = cpumR3CpuIdInsert(NULL /* pVM */, ppaLeaves, pcLeaves, &NewLeaf); + } + } + else + rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED; + } + if (RT_FAILURE(rc)) + { + RTMemFree(*ppaLeaves); + *ppaLeaves = NULL; + *pcLeaves = 0; + } + return rc; +} + + +static int cpumR3LoadGuestCpuIdArray(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, PCPUMCPUIDLEAF *ppaLeaves, uint32_t *pcLeaves) +{ + *ppaLeaves = NULL; + *pcLeaves = 0; + + int rc; + if (uVersion > CPUM_SAVED_STATE_VERSION_PUT_STRUCT) + { + /* + * The new format. Starts by declaring the leave size and count. + */ + uint32_t cbLeaf; + SSMR3GetU32(pSSM, &cbLeaf); + uint32_t cLeaves; + rc = SSMR3GetU32(pSSM, &cLeaves); + if (RT_SUCCESS(rc)) + { + if (cbLeaf == sizeof(**ppaLeaves)) + { + if (cLeaves <= CPUM_CPUID_MAX_LEAVES) + { + /* + * Load the leaves one by one. + * + * The uPrev stuff is a kludge for working around a week worth of bad saved + * states during the CPUID revamp in March 2015. We saved too many leaves + * due to a bug in cpumR3CpuIdInstallAndExplodeLeaves, thus ending up with + * garbage entires at the end of the array when restoring. We also had + * a subleaf insertion bug that triggered with the leaf 4 stuff below, + * this kludge doesn't deal correctly with that, but who cares... + */ + uint32_t uPrev = 0; + for (uint32_t i = 0; i < cLeaves && RT_SUCCESS(rc); i++) + { + CPUMCPUIDLEAF Leaf; + rc = SSMR3GetMem(pSSM, &Leaf, sizeof(Leaf)); + if (RT_SUCCESS(rc)) + { + if ( uVersion != CPUM_SAVED_STATE_VERSION_BAD_CPUID_COUNT + || Leaf.uLeaf >= uPrev) + { + rc = cpumR3CpuIdInsert(NULL /* pVM */, ppaLeaves, pcLeaves, &Leaf); + uPrev = Leaf.uLeaf; + } + else + uPrev = UINT32_MAX; + } + } + } + else + rc = SSMR3SetLoadError(pSSM, VERR_TOO_MANY_CPUID_LEAVES, RT_SRC_POS, + "Too many CPUID leaves: %#x, max %#x", cLeaves, CPUM_CPUID_MAX_LEAVES); + } + else + rc = SSMR3SetLoadError(pSSM, VERR_SSM_DATA_UNIT_FORMAT_CHANGED, RT_SRC_POS, + "CPUMCPUIDLEAF size differs: saved=%#x, our=%#x", cbLeaf, sizeof(**ppaLeaves)); + } + } + else + { + /* + * The old format with its three inflexible arrays. + */ + rc = cpumR3LoadOneOldGuestCpuIdArray(pSSM, UINT32_C(0x00000000), ppaLeaves, pcLeaves); + if (RT_SUCCESS(rc)) + rc = cpumR3LoadOneOldGuestCpuIdArray(pSSM, UINT32_C(0x80000000), ppaLeaves, pcLeaves); + if (RT_SUCCESS(rc)) + rc = cpumR3LoadOneOldGuestCpuIdArray(pSSM, UINT32_C(0xc0000000), ppaLeaves, pcLeaves); + if (RT_SUCCESS(rc)) + { + /* + * Fake up leaf 4 on intel like we used to do in CPUMGetGuestCpuId earlier. + */ + PCPUMCPUIDLEAF pLeaf = cpumR3CpuIdGetLeaf(*ppaLeaves, *pcLeaves, 0, 0); + if ( pLeaf + && ASMIsIntelCpuEx(pLeaf->uEbx, pLeaf->uEcx, pLeaf->uEdx)) + { + CPUMCPUIDLEAF Leaf; + Leaf.uLeaf = 4; + Leaf.fSubLeafMask = UINT32_MAX; + Leaf.uSubLeaf = 0; + Leaf.uEdx = UINT32_C(0); /* 3 flags, 0 is fine. */ + Leaf.uEcx = UINT32_C(63); /* sets - 1 */ + Leaf.uEbx = (UINT32_C(7) << 22) /* associativity -1 */ + | (UINT32_C(0) << 12) /* phys line partitions - 1 */ + | UINT32_C(63); /* system coherency line size - 1 */ + Leaf.uEax = (RT_MIN(pVM->cCpus - 1, UINT32_C(0x3f)) << 26) /* cores per package - 1 */ + | (UINT32_C(0) << 14) /* threads per cache - 1 */ + | (UINT32_C(1) << 5) /* cache level */ + | UINT32_C(1); /* cache type (data) */ + Leaf.fFlags = 0; + rc = cpumR3CpuIdInsert(NULL /* pVM */, ppaLeaves, pcLeaves, &Leaf); + if (RT_SUCCESS(rc)) + { + Leaf.uSubLeaf = 1; /* Should've been cache type 2 (code), but buggy code made it data. */ + rc = cpumR3CpuIdInsert(NULL /* pVM */, ppaLeaves, pcLeaves, &Leaf); + } + if (RT_SUCCESS(rc)) + { + Leaf.uSubLeaf = 2; /* Should've been cache type 3 (unified), but buggy code made it data. */ + Leaf.uEcx = 4095; /* sets - 1 */ + Leaf.uEbx &= UINT32_C(0x003fffff); /* associativity - 1 */ + Leaf.uEbx |= UINT32_C(23) << 22; + Leaf.uEax &= UINT32_C(0xfc003fff); /* threads per cache - 1 */ + Leaf.uEax |= RT_MIN(pVM->cCpus - 1, UINT32_C(0xfff)) << 14; + Leaf.uEax &= UINT32_C(0xffffff1f); /* level */ + Leaf.uEax |= UINT32_C(2) << 5; + rc = cpumR3CpuIdInsert(NULL /* pVM */, ppaLeaves, pcLeaves, &Leaf); + } + } + } + } + return rc; +} + + +/** + * Loads the CPU ID leaves saved by pass 0, inner worker. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + * @param uVersion The format version. + * @param paLeaves Guest CPUID leaves loaded from the state. + * @param cLeaves The number of leaves in @a paLeaves. + * @param pMsrs The guest MSRs. + */ +int cpumR3LoadCpuIdInner(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, PCPUMCPUIDLEAF paLeaves, uint32_t cLeaves, PCCPUMMSRS pMsrs) +{ + AssertMsgReturn(uVersion >= CPUM_SAVED_STATE_VERSION_VER3_2, ("%u\n", uVersion), VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION); + + /* + * Continue loading the state into stack buffers. + */ + CPUMCPUID GuestDefCpuId; + int rc = SSMR3GetMem(pSSM, &GuestDefCpuId, sizeof(GuestDefCpuId)); + AssertRCReturn(rc, rc); + + CPUMCPUID aRawStd[16]; + uint32_t cRawStd; + rc = SSMR3GetU32(pSSM, &cRawStd); AssertRCReturn(rc, rc); + if (cRawStd > RT_ELEMENTS(aRawStd)) + return VERR_SSM_DATA_UNIT_FORMAT_CHANGED; + rc = SSMR3GetMem(pSSM, &aRawStd[0], cRawStd * sizeof(aRawStd[0])); + AssertRCReturn(rc, rc); + for (uint32_t i = cRawStd; i < RT_ELEMENTS(aRawStd); i++) + ASMCpuIdExSlow(i, 0, 0, 0, &aRawStd[i].uEax, &aRawStd[i].uEbx, &aRawStd[i].uEcx, &aRawStd[i].uEdx); + + CPUMCPUID aRawExt[32]; + uint32_t cRawExt; + rc = SSMR3GetU32(pSSM, &cRawExt); AssertRCReturn(rc, rc); + if (cRawExt > RT_ELEMENTS(aRawExt)) + return VERR_SSM_DATA_UNIT_FORMAT_CHANGED; + rc = SSMR3GetMem(pSSM, &aRawExt[0], cRawExt * sizeof(aRawExt[0])); + AssertRCReturn(rc, rc); + for (uint32_t i = cRawExt; i < RT_ELEMENTS(aRawExt); i++) + ASMCpuIdExSlow(i | UINT32_C(0x80000000), 0, 0, 0, &aRawExt[i].uEax, &aRawExt[i].uEbx, &aRawExt[i].uEcx, &aRawExt[i].uEdx); + + /* + * Get the raw CPU IDs for the current host. + */ + CPUMCPUID aHostRawStd[16]; + for (unsigned i = 0; i < RT_ELEMENTS(aHostRawStd); i++) + ASMCpuIdExSlow(i, 0, 0, 0, &aHostRawStd[i].uEax, &aHostRawStd[i].uEbx, &aHostRawStd[i].uEcx, &aHostRawStd[i].uEdx); + + CPUMCPUID aHostRawExt[32]; + for (unsigned i = 0; i < RT_ELEMENTS(aHostRawExt); i++) + ASMCpuIdExSlow(i | UINT32_C(0x80000000), 0, 0, 0, + &aHostRawExt[i].uEax, &aHostRawExt[i].uEbx, &aHostRawExt[i].uEcx, &aHostRawExt[i].uEdx); + + /* + * Get the host and guest overrides so we don't reject the state because + * some feature was enabled thru these interfaces. + * Note! We currently only need the feature leaves, so skip rest. + */ + PCFGMNODE pOverrideCfg = CFGMR3GetChild(CFGMR3GetRoot(pVM), "CPUM/HostCPUID"); + CPUMCPUID aHostOverrideStd[2]; + memcpy(&aHostOverrideStd[0], &aHostRawStd[0], sizeof(aHostOverrideStd)); + cpumR3CpuIdInitLoadOverrideSet(UINT32_C(0x00000000), &aHostOverrideStd[0], RT_ELEMENTS(aHostOverrideStd), pOverrideCfg); + + CPUMCPUID aHostOverrideExt[2]; + memcpy(&aHostOverrideExt[0], &aHostRawExt[0], sizeof(aHostOverrideExt)); + cpumR3CpuIdInitLoadOverrideSet(UINT32_C(0x80000000), &aHostOverrideExt[0], RT_ELEMENTS(aHostOverrideExt), pOverrideCfg); + + /* + * This can be skipped. + */ + bool fStrictCpuIdChecks; + CFGMR3QueryBoolDef(CFGMR3GetChild(CFGMR3GetRoot(pVM), "CPUM"), "StrictCpuIdChecks", &fStrictCpuIdChecks, true); + + /* + * Define a bunch of macros for simplifying the santizing/checking code below. + */ + /* Generic expression + failure message. */ +#define CPUID_CHECK_RET(expr, fmt) \ + do { \ + if (!(expr)) \ + { \ + char *pszMsg = RTStrAPrintf2 fmt; /* lack of variadic macros sucks */ \ + if (fStrictCpuIdChecks) \ + { \ + int rcCpuid = SSMR3SetLoadError(pSSM, VERR_SSM_LOAD_CPUID_MISMATCH, RT_SRC_POS, "%s", pszMsg); \ + RTStrFree(pszMsg); \ + return rcCpuid; \ + } \ + LogRel(("CPUM: %s\n", pszMsg)); \ + RTStrFree(pszMsg); \ + } \ + } while (0) +#define CPUID_CHECK_WRN(expr, fmt) \ + do { \ + if (!(expr)) \ + LogRel(fmt); \ + } while (0) + + /* For comparing two values and bitch if they differs. */ +#define CPUID_CHECK2_RET(what, host, saved) \ + do { \ + if ((host) != (saved)) \ + { \ + if (fStrictCpuIdChecks) \ + return SSMR3SetLoadError(pSSM, VERR_SSM_LOAD_CPUID_MISMATCH, RT_SRC_POS, \ + N_(#what " mismatch: host=%#x saved=%#x"), (host), (saved)); \ + LogRel(("CPUM: " #what " differs: host=%#x saved=%#x\n", (host), (saved))); \ + } \ + } while (0) +#define CPUID_CHECK2_WRN(what, host, saved) \ + do { \ + if ((host) != (saved)) \ + LogRel(("CPUM: " #what " differs: host=%#x saved=%#x\n", (host), (saved))); \ + } while (0) + + /* For checking raw cpu features (raw mode). */ +#define CPUID_RAW_FEATURE_RET(set, reg, bit) \ + do { \ + if ((aHostRaw##set [1].reg & bit) != (aRaw##set [1].reg & bit)) \ + { \ + if (fStrictCpuIdChecks) \ + return SSMR3SetLoadError(pSSM, VERR_SSM_LOAD_CPUID_MISMATCH, RT_SRC_POS, \ + N_(#bit " mismatch: host=%d saved=%d"), \ + !!(aHostRaw##set [1].reg & (bit)), !!(aRaw##set [1].reg & (bit)) ); \ + LogRel(("CPUM: " #bit" differs: host=%d saved=%d\n", \ + !!(aHostRaw##set [1].reg & (bit)), !!(aRaw##set [1].reg & (bit)) )); \ + } \ + } while (0) +#define CPUID_RAW_FEATURE_WRN(set, reg, bit) \ + do { \ + if ((aHostRaw##set [1].reg & bit) != (aRaw##set [1].reg & bit)) \ + LogRel(("CPUM: " #bit" differs: host=%d saved=%d\n", \ + !!(aHostRaw##set [1].reg & (bit)), !!(aRaw##set [1].reg & (bit)) )); \ + } while (0) +#define CPUID_RAW_FEATURE_IGN(set, reg, bit) do { } while (0) + + /* For checking guest features. */ +#define CPUID_GST_FEATURE_RET(set, reg, bit) \ + do { \ + if ( (aGuestCpuId##set [1].reg & bit) \ + && !(aHostRaw##set [1].reg & bit) \ + && !(aHostOverride##set [1].reg & bit) \ + ) \ + { \ + if (fStrictCpuIdChecks) \ + return SSMR3SetLoadError(pSSM, VERR_SSM_LOAD_CPUID_MISMATCH, RT_SRC_POS, \ + N_(#bit " is not supported by the host but has already exposed to the guest")); \ + LogRel(("CPUM: " #bit " is not supported by the host but has already exposed to the guest\n")); \ + } \ + } while (0) +#define CPUID_GST_FEATURE_WRN(set, reg, bit) \ + do { \ + if ( (aGuestCpuId##set [1].reg & bit) \ + && !(aHostRaw##set [1].reg & bit) \ + && !(aHostOverride##set [1].reg & bit) \ + ) \ + LogRel(("CPUM: " #bit " is not supported by the host but has already exposed to the guest\n")); \ + } while (0) +#define CPUID_GST_FEATURE_EMU(set, reg, bit) \ + do { \ + if ( (aGuestCpuId##set [1].reg & bit) \ + && !(aHostRaw##set [1].reg & bit) \ + && !(aHostOverride##set [1].reg & bit) \ + ) \ + LogRel(("CPUM: Warning - " #bit " is not supported by the host but already exposed to the guest. This may impact performance.\n")); \ + } while (0) +#define CPUID_GST_FEATURE_IGN(set, reg, bit) do { } while (0) + + /* For checking guest features if AMD guest CPU. */ +#define CPUID_GST_AMD_FEATURE_RET(set, reg, bit) \ + do { \ + if ( (aGuestCpuId##set [1].reg & bit) \ + && fGuestAmd \ + && (!fGuestAmd || !(aHostRaw##set [1].reg & bit)) \ + && !(aHostOverride##set [1].reg & bit) \ + ) \ + { \ + if (fStrictCpuIdChecks) \ + return SSMR3SetLoadError(pSSM, VERR_SSM_LOAD_CPUID_MISMATCH, RT_SRC_POS, \ + N_(#bit " is not supported by the host but has already exposed to the guest")); \ + LogRel(("CPUM: " #bit " is not supported by the host but has already exposed to the guest\n")); \ + } \ + } while (0) +#define CPUID_GST_AMD_FEATURE_WRN(set, reg, bit) \ + do { \ + if ( (aGuestCpuId##set [1].reg & bit) \ + && fGuestAmd \ + && (!fGuestAmd || !(aHostRaw##set [1].reg & bit)) \ + && !(aHostOverride##set [1].reg & bit) \ + ) \ + LogRel(("CPUM: " #bit " is not supported by the host but has already exposed to the guest\n")); \ + } while (0) +#define CPUID_GST_AMD_FEATURE_EMU(set, reg, bit) \ + do { \ + if ( (aGuestCpuId##set [1].reg & bit) \ + && fGuestAmd \ + && (!fGuestAmd || !(aHostRaw##set [1].reg & bit)) \ + && !(aHostOverride##set [1].reg & bit) \ + ) \ + LogRel(("CPUM: Warning - " #bit " is not supported by the host but already exposed to the guest. This may impact performance.\n")); \ + } while (0) +#define CPUID_GST_AMD_FEATURE_IGN(set, reg, bit) do { } while (0) + + /* For checking AMD features which have a corresponding bit in the standard + range. (Intel defines very few bits in the extended feature sets.) */ +#define CPUID_GST_FEATURE2_RET(reg, ExtBit, StdBit) \ + do { \ + if ( (aGuestCpuIdExt [1].reg & (ExtBit)) \ + && !(fHostAmd \ + ? aHostRawExt[1].reg & (ExtBit) \ + : aHostRawStd[1].reg & (StdBit)) \ + && !(aHostOverrideExt[1].reg & (ExtBit)) \ + ) \ + { \ + if (fStrictCpuIdChecks) \ + return SSMR3SetLoadError(pSSM, VERR_SSM_LOAD_CPUID_MISMATCH, RT_SRC_POS, \ + N_(#ExtBit " is not supported by the host but has already exposed to the guest")); \ + LogRel(("CPUM: " #ExtBit " is not supported by the host but has already exposed to the guest\n")); \ + } \ + } while (0) +#define CPUID_GST_FEATURE2_WRN(reg, ExtBit, StdBit) \ + do { \ + if ( (aGuestCpuId[1].reg & (ExtBit)) \ + && !(fHostAmd \ + ? aHostRawExt[1].reg & (ExtBit) \ + : aHostRawStd[1].reg & (StdBit)) \ + && !(aHostOverrideExt[1].reg & (ExtBit)) \ + ) \ + LogRel(("CPUM: " #ExtBit " is not supported by the host but has already exposed to the guest\n")); \ + } while (0) +#define CPUID_GST_FEATURE2_EMU(reg, ExtBit, StdBit) \ + do { \ + if ( (aGuestCpuIdExt [1].reg & (ExtBit)) \ + && !(fHostAmd \ + ? aHostRawExt[1].reg & (ExtBit) \ + : aHostRawStd[1].reg & (StdBit)) \ + && !(aHostOverrideExt[1].reg & (ExtBit)) \ + ) \ + LogRel(("CPUM: Warning - " #ExtBit " is not supported by the host but already exposed to the guest. This may impact performance.\n")); \ + } while (0) +#define CPUID_GST_FEATURE2_IGN(reg, ExtBit, StdBit) do { } while (0) + + /* + * For raw-mode we'll require that the CPUs are very similar since we don't + * intercept CPUID instructions for user mode applications. + */ + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + /* CPUID(0) */ + CPUID_CHECK_RET( aHostRawStd[0].uEbx == aRawStd[0].uEbx + && aHostRawStd[0].uEcx == aRawStd[0].uEcx + && aHostRawStd[0].uEdx == aRawStd[0].uEdx, + (N_("CPU vendor mismatch: host='%.4s%.4s%.4s' saved='%.4s%.4s%.4s'"), + &aHostRawStd[0].uEbx, &aHostRawStd[0].uEdx, &aHostRawStd[0].uEcx, + &aRawStd[0].uEbx, &aRawStd[0].uEdx, &aRawStd[0].uEcx)); + CPUID_CHECK2_WRN("Std CPUID max leaf", aHostRawStd[0].uEax, aRawStd[0].uEax); + CPUID_CHECK2_WRN("Reserved bits 15:14", (aHostRawExt[1].uEax >> 14) & 3, (aRawExt[1].uEax >> 14) & 3); + CPUID_CHECK2_WRN("Reserved bits 31:28", aHostRawExt[1].uEax >> 28, aRawExt[1].uEax >> 28); + + bool const fIntel = ASMIsIntelCpuEx(aRawStd[0].uEbx, aRawStd[0].uEcx, aRawStd[0].uEdx); + + /* CPUID(1).eax */ + CPUID_CHECK2_RET("CPU family", ASMGetCpuFamily(aHostRawStd[1].uEax), ASMGetCpuFamily(aRawStd[1].uEax)); + CPUID_CHECK2_RET("CPU model", ASMGetCpuModel(aHostRawStd[1].uEax, fIntel), ASMGetCpuModel(aRawStd[1].uEax, fIntel)); + CPUID_CHECK2_WRN("CPU type", (aHostRawStd[1].uEax >> 12) & 3, (aRawStd[1].uEax >> 12) & 3 ); + + /* CPUID(1).ebx - completely ignore CPU count and APIC ID. */ + CPUID_CHECK2_RET("CPU brand ID", aHostRawStd[1].uEbx & 0xff, aRawStd[1].uEbx & 0xff); + CPUID_CHECK2_WRN("CLFLUSH chunk count", (aHostRawStd[1].uEbx >> 8) & 0xff, (aRawStd[1].uEbx >> 8) & 0xff); + + /* CPUID(1).ecx */ + CPUID_RAW_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_SSE3); + CPUID_RAW_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_PCLMUL); + CPUID_RAW_FEATURE_IGN(Std, uEcx, X86_CPUID_FEATURE_ECX_DTES64); + CPUID_RAW_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_MONITOR); + CPUID_RAW_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_CPLDS); + CPUID_RAW_FEATURE_IGN(Std, uEcx, X86_CPUID_FEATURE_ECX_VMX); + CPUID_RAW_FEATURE_IGN(Std, uEcx, X86_CPUID_FEATURE_ECX_SMX); + CPUID_RAW_FEATURE_IGN(Std, uEcx, X86_CPUID_FEATURE_ECX_EST); + CPUID_RAW_FEATURE_IGN(Std, uEcx, X86_CPUID_FEATURE_ECX_TM2); + CPUID_RAW_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_SSSE3); + CPUID_RAW_FEATURE_IGN(Std, uEcx, X86_CPUID_FEATURE_ECX_CNTXID); + CPUID_RAW_FEATURE_RET(Std, uEcx, RT_BIT_32(11) /*reserved*/ ); + CPUID_RAW_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_FMA); + CPUID_RAW_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_CX16); + CPUID_RAW_FEATURE_IGN(Std, uEcx, X86_CPUID_FEATURE_ECX_TPRUPDATE); + CPUID_RAW_FEATURE_IGN(Std, uEcx, X86_CPUID_FEATURE_ECX_PDCM); + CPUID_RAW_FEATURE_RET(Std, uEcx, RT_BIT_32(16) /*reserved*/); + CPUID_RAW_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_PCID); + CPUID_RAW_FEATURE_IGN(Std, uEcx, X86_CPUID_FEATURE_ECX_DCA); + CPUID_RAW_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_SSE4_1); + CPUID_RAW_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_SSE4_2); + CPUID_RAW_FEATURE_IGN(Std, uEcx, X86_CPUID_FEATURE_ECX_X2APIC); + CPUID_RAW_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_MOVBE); + CPUID_RAW_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_POPCNT); + CPUID_RAW_FEATURE_IGN(Std, uEcx, X86_CPUID_FEATURE_ECX_TSCDEADL); + CPUID_RAW_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_AES); + CPUID_RAW_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_XSAVE); + CPUID_RAW_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_OSXSAVE); + CPUID_RAW_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_AVX); + CPUID_RAW_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_F16C); + CPUID_RAW_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_RDRAND); + CPUID_RAW_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_HVP); + + /* CPUID(1).edx */ + CPUID_RAW_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_FPU); + CPUID_RAW_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_VME); + CPUID_RAW_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_DE); + CPUID_RAW_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_PSE); + CPUID_RAW_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_TSC); + CPUID_RAW_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_MSR); + CPUID_RAW_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_PAE); + CPUID_RAW_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_MCE); + CPUID_RAW_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_CX8); + CPUID_RAW_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_APIC); + CPUID_RAW_FEATURE_RET(Std, uEdx, RT_BIT_32(10) /*reserved*/); + CPUID_RAW_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_SEP); + CPUID_RAW_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_MTRR); + CPUID_RAW_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_PGE); + CPUID_RAW_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_MCA); + CPUID_RAW_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_CMOV); + CPUID_RAW_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_PAT); + CPUID_RAW_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_PSE36); + CPUID_RAW_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_PSN); + CPUID_RAW_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_CLFSH); + CPUID_RAW_FEATURE_RET(Std, uEdx, RT_BIT_32(20) /*reserved*/); + CPUID_RAW_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_DS); + CPUID_RAW_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_ACPI); + CPUID_RAW_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_MMX); + CPUID_RAW_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_FXSR); + CPUID_RAW_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_SSE); + CPUID_RAW_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_SSE2); + CPUID_RAW_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_SS); + CPUID_RAW_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_HTT); + CPUID_RAW_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_TM); + CPUID_RAW_FEATURE_RET(Std, uEdx, RT_BIT_32(30) /*JMPE/IA64*/); + CPUID_RAW_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_PBE); + + /* CPUID(2) - config, mostly about caches. ignore. */ + /* CPUID(3) - processor serial number. ignore. */ + /* CPUID(4) - config, cache and topology - takes ECX as input. ignore. */ + /* CPUID(5) - mwait/monitor config. ignore. */ + /* CPUID(6) - power management. ignore. */ + /* CPUID(7) - ???. ignore. */ + /* CPUID(8) - ???. ignore. */ + /* CPUID(9) - DCA. ignore for now. */ + /* CPUID(a) - PeMo info. ignore for now. */ + /* CPUID(b) - topology info - takes ECX as input. ignore. */ + + /* CPUID(d) - XCR0 stuff - takes ECX as input. We only warn about the main level (ECX=0) for now. */ + CPUID_CHECK_WRN( aRawStd[0].uEax < UINT32_C(0x0000000d) + || aHostRawStd[0].uEax >= UINT32_C(0x0000000d), + ("CPUM: Standard leaf D was present on saved state host, not present on current.\n")); + if ( aRawStd[0].uEax >= UINT32_C(0x0000000d) + && aHostRawStd[0].uEax >= UINT32_C(0x0000000d)) + { + CPUID_CHECK2_WRN("Valid low XCR0 bits", aHostRawStd[0xd].uEax, aRawStd[0xd].uEax); + CPUID_CHECK2_WRN("Valid high XCR0 bits", aHostRawStd[0xd].uEdx, aRawStd[0xd].uEdx); + CPUID_CHECK2_WRN("Current XSAVE/XRSTOR area size", aHostRawStd[0xd].uEbx, aRawStd[0xd].uEbx); +/** @todo XSAVE: Stricter XSAVE feature checks for raw-mode. */ + CPUID_CHECK2_WRN("Max XSAVE/XRSTOR area size", aHostRawStd[0xd].uEcx, aRawStd[0xd].uEcx); + } + + /* CPUID(0x80000000) - same as CPUID(0) except for eax. + Note! Intel have/is marking many of the fields here as reserved. We + will verify them as if it's an AMD CPU. */ + CPUID_CHECK_RET( (aHostRawExt[0].uEax >= UINT32_C(0x80000001) && aHostRawExt[0].uEax <= UINT32_C(0x8000007f)) + || !(aRawExt[0].uEax >= UINT32_C(0x80000001) && aRawExt[0].uEax <= UINT32_C(0x8000007f)), + (N_("Extended leaves was present on saved state host, but is missing on the current\n"))); + if (aRawExt[0].uEax >= UINT32_C(0x80000001) && aRawExt[0].uEax <= UINT32_C(0x8000007f)) + { + CPUID_CHECK_RET( aHostRawExt[0].uEbx == aRawExt[0].uEbx + && aHostRawExt[0].uEcx == aRawExt[0].uEcx + && aHostRawExt[0].uEdx == aRawExt[0].uEdx, + (N_("CPU vendor mismatch: host='%.4s%.4s%.4s' saved='%.4s%.4s%.4s'"), + &aHostRawExt[0].uEbx, &aHostRawExt[0].uEdx, &aHostRawExt[0].uEcx, + &aRawExt[0].uEbx, &aRawExt[0].uEdx, &aRawExt[0].uEcx)); + CPUID_CHECK2_WRN("Ext CPUID max leaf", aHostRawExt[0].uEax, aRawExt[0].uEax); + + /* CPUID(0x80000001).eax - same as CPUID(0).eax. */ + CPUID_CHECK2_RET("CPU family", ASMGetCpuFamily(aHostRawExt[1].uEax), ASMGetCpuFamily(aRawExt[1].uEax)); + CPUID_CHECK2_RET("CPU model", ASMGetCpuModel(aHostRawExt[1].uEax, fIntel), ASMGetCpuModel(aRawExt[1].uEax, fIntel)); + CPUID_CHECK2_WRN("CPU type", (aHostRawExt[1].uEax >> 12) & 3, (aRawExt[1].uEax >> 12) & 3 ); + CPUID_CHECK2_WRN("Reserved bits 15:14", (aHostRawExt[1].uEax >> 14) & 3, (aRawExt[1].uEax >> 14) & 3 ); + CPUID_CHECK2_WRN("Reserved bits 31:28", aHostRawExt[1].uEax >> 28, aRawExt[1].uEax >> 28); + + /* CPUID(0x80000001).ebx - Brand ID (maybe), just warn if things differs. */ + CPUID_CHECK2_WRN("CPU BrandID", aHostRawExt[1].uEbx & 0xffff, aRawExt[1].uEbx & 0xffff); + CPUID_CHECK2_WRN("Reserved bits 16:27", (aHostRawExt[1].uEbx >> 16) & 0xfff, (aRawExt[1].uEbx >> 16) & 0xfff); + CPUID_CHECK2_WRN("PkgType", (aHostRawExt[1].uEbx >> 28) & 0xf, (aRawExt[1].uEbx >> 28) & 0xf); + + /* CPUID(0x80000001).ecx */ + CPUID_RAW_FEATURE_IGN(Ext, uEcx, X86_CPUID_EXT_FEATURE_ECX_LAHF_SAHF); + CPUID_RAW_FEATURE_IGN(Ext, uEcx, X86_CPUID_AMD_FEATURE_ECX_CMPL); + CPUID_RAW_FEATURE_IGN(Ext, uEcx, X86_CPUID_AMD_FEATURE_ECX_SVM); + CPUID_RAW_FEATURE_IGN(Ext, uEcx, X86_CPUID_AMD_FEATURE_ECX_EXT_APIC); + CPUID_RAW_FEATURE_IGN(Ext, uEcx, X86_CPUID_AMD_FEATURE_ECX_CR8L); + CPUID_RAW_FEATURE_WRN(Ext, uEcx, X86_CPUID_AMD_FEATURE_ECX_ABM); + CPUID_RAW_FEATURE_WRN(Ext, uEcx, X86_CPUID_AMD_FEATURE_ECX_SSE4A); + CPUID_RAW_FEATURE_WRN(Ext, uEcx, X86_CPUID_AMD_FEATURE_ECX_MISALNSSE); + CPUID_RAW_FEATURE_WRN(Ext, uEcx, X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF); + CPUID_RAW_FEATURE_WRN(Ext, uEcx, X86_CPUID_AMD_FEATURE_ECX_OSVW); + CPUID_RAW_FEATURE_IGN(Ext, uEcx, X86_CPUID_AMD_FEATURE_ECX_IBS); + CPUID_RAW_FEATURE_WRN(Ext, uEcx, X86_CPUID_AMD_FEATURE_ECX_XOP); + CPUID_RAW_FEATURE_IGN(Ext, uEcx, X86_CPUID_AMD_FEATURE_ECX_SKINIT); + CPUID_RAW_FEATURE_IGN(Ext, uEcx, X86_CPUID_AMD_FEATURE_ECX_WDT); + CPUID_RAW_FEATURE_WRN(Ext, uEcx, RT_BIT_32(14)); + CPUID_RAW_FEATURE_WRN(Ext, uEcx, RT_BIT_32(15)); + CPUID_RAW_FEATURE_WRN(Ext, uEcx, RT_BIT_32(16)); + CPUID_RAW_FEATURE_WRN(Ext, uEcx, RT_BIT_32(17)); + CPUID_RAW_FEATURE_WRN(Ext, uEcx, RT_BIT_32(18)); + CPUID_RAW_FEATURE_WRN(Ext, uEcx, RT_BIT_32(19)); + CPUID_RAW_FEATURE_WRN(Ext, uEcx, RT_BIT_32(20)); + CPUID_RAW_FEATURE_WRN(Ext, uEcx, RT_BIT_32(21)); + CPUID_RAW_FEATURE_WRN(Ext, uEcx, RT_BIT_32(22)); + CPUID_RAW_FEATURE_WRN(Ext, uEcx, RT_BIT_32(23)); + CPUID_RAW_FEATURE_WRN(Ext, uEcx, RT_BIT_32(24)); + CPUID_RAW_FEATURE_WRN(Ext, uEcx, RT_BIT_32(25)); + CPUID_RAW_FEATURE_WRN(Ext, uEcx, RT_BIT_32(26)); + CPUID_RAW_FEATURE_WRN(Ext, uEcx, RT_BIT_32(27)); + CPUID_RAW_FEATURE_WRN(Ext, uEcx, RT_BIT_32(28)); + CPUID_RAW_FEATURE_WRN(Ext, uEcx, RT_BIT_32(29)); + CPUID_RAW_FEATURE_WRN(Ext, uEcx, RT_BIT_32(30)); + CPUID_RAW_FEATURE_WRN(Ext, uEcx, RT_BIT_32(31)); + + /* CPUID(0x80000001).edx */ + CPUID_RAW_FEATURE_RET(Ext, uEdx, X86_CPUID_AMD_FEATURE_EDX_FPU); + CPUID_RAW_FEATURE_RET(Ext, uEdx, X86_CPUID_AMD_FEATURE_EDX_VME); + CPUID_RAW_FEATURE_RET(Ext, uEdx, X86_CPUID_AMD_FEATURE_EDX_DE); + CPUID_RAW_FEATURE_RET(Ext, uEdx, X86_CPUID_AMD_FEATURE_EDX_PSE); + CPUID_RAW_FEATURE_RET(Ext, uEdx, X86_CPUID_AMD_FEATURE_EDX_TSC); + CPUID_RAW_FEATURE_RET(Ext, uEdx, X86_CPUID_AMD_FEATURE_EDX_MSR); + CPUID_RAW_FEATURE_IGN(Ext, uEdx, X86_CPUID_AMD_FEATURE_EDX_PAE); + CPUID_RAW_FEATURE_IGN(Ext, uEdx, X86_CPUID_AMD_FEATURE_EDX_MCE); + CPUID_RAW_FEATURE_RET(Ext, uEdx, X86_CPUID_AMD_FEATURE_EDX_CX8); + CPUID_RAW_FEATURE_IGN(Ext, uEdx, X86_CPUID_AMD_FEATURE_EDX_APIC); + CPUID_RAW_FEATURE_IGN(Ext, uEdx, RT_BIT_32(10) /*reserved*/); + CPUID_RAW_FEATURE_IGN(Ext, uEdx, X86_CPUID_EXT_FEATURE_EDX_SEP); + CPUID_RAW_FEATURE_IGN(Ext, uEdx, X86_CPUID_AMD_FEATURE_EDX_MTRR); + CPUID_RAW_FEATURE_IGN(Ext, uEdx, X86_CPUID_AMD_FEATURE_EDX_PGE); + CPUID_RAW_FEATURE_IGN(Ext, uEdx, X86_CPUID_AMD_FEATURE_EDX_MCA); + CPUID_RAW_FEATURE_RET(Ext, uEdx, X86_CPUID_AMD_FEATURE_EDX_CMOV); + CPUID_RAW_FEATURE_IGN(Ext, uEdx, X86_CPUID_AMD_FEATURE_EDX_PAT); + CPUID_RAW_FEATURE_IGN(Ext, uEdx, X86_CPUID_AMD_FEATURE_EDX_PSE36); + CPUID_RAW_FEATURE_IGN(Ext, uEdx, RT_BIT_32(18) /*reserved*/); + CPUID_RAW_FEATURE_IGN(Ext, uEdx, RT_BIT_32(19) /*reserved*/); + CPUID_RAW_FEATURE_IGN(Ext, uEdx, X86_CPUID_EXT_FEATURE_EDX_NX); + CPUID_RAW_FEATURE_IGN(Ext, uEdx, RT_BIT_32(21) /*reserved*/); + CPUID_RAW_FEATURE_IGN(Ext, uEdx, X86_CPUID_AMD_FEATURE_EDX_AXMMX); + CPUID_RAW_FEATURE_RET(Ext, uEdx, X86_CPUID_AMD_FEATURE_EDX_MMX); + CPUID_RAW_FEATURE_RET(Ext, uEdx, X86_CPUID_AMD_FEATURE_EDX_FXSR); + CPUID_RAW_FEATURE_RET(Ext, uEdx, X86_CPUID_AMD_FEATURE_EDX_FFXSR); + CPUID_RAW_FEATURE_IGN(Ext, uEdx, X86_CPUID_EXT_FEATURE_EDX_PAGE1GB); + CPUID_RAW_FEATURE_IGN(Ext, uEdx, X86_CPUID_EXT_FEATURE_EDX_RDTSCP); + CPUID_RAW_FEATURE_IGN(Ext, uEdx, RT_BIT_32(28) /*reserved*/); + CPUID_RAW_FEATURE_IGN(Ext, uEdx, X86_CPUID_EXT_FEATURE_EDX_LONG_MODE); + CPUID_RAW_FEATURE_RET(Ext, uEdx, X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX); + CPUID_RAW_FEATURE_RET(Ext, uEdx, X86_CPUID_AMD_FEATURE_EDX_3DNOW); + + /** @todo verify the rest as well. */ + } + } + + + + /* + * Verify that we can support the features already exposed to the guest on + * this host. + * + * Most of the features we're emulating requires intercepting instruction + * and doing it the slow way, so there is no need to warn when they aren't + * present in the host CPU. Thus we use IGN instead of EMU on these. + * + * Trailing comments: + * "EMU" - Possible to emulate, could be lots of work and very slow. + * "EMU?" - Can this be emulated? + */ + CPUMCPUID aGuestCpuIdStd[2]; + RT_ZERO(aGuestCpuIdStd); + cpumR3CpuIdGetLeafLegacy(paLeaves, cLeaves, 1, 0, &aGuestCpuIdStd[1]); + + /* CPUID(1).ecx */ + CPUID_GST_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_SSE3); // -> EMU + CPUID_GST_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_PCLMUL); // -> EMU? + CPUID_GST_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_DTES64); // -> EMU? + CPUID_GST_FEATURE_IGN(Std, uEcx, X86_CPUID_FEATURE_ECX_MONITOR); + CPUID_GST_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_CPLDS); // -> EMU? + CPUID_GST_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_VMX); // -> EMU + CPUID_GST_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_SMX); // -> EMU + CPUID_GST_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_EST); // -> EMU + CPUID_GST_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_TM2); // -> EMU? + CPUID_GST_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_SSSE3); // -> EMU + CPUID_GST_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_CNTXID); // -> EMU + CPUID_GST_FEATURE_IGN(Std, uEcx, X86_CPUID_FEATURE_ECX_SDBG); + CPUID_GST_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_FMA); // -> EMU? what's this? + CPUID_GST_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_CX16); // -> EMU? + CPUID_GST_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_TPRUPDATE);//-> EMU + CPUID_GST_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_PDCM); // -> EMU + CPUID_GST_FEATURE_RET(Std, uEcx, RT_BIT_32(16) /*reserved*/); + CPUID_GST_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_PCID); + CPUID_GST_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_DCA); // -> EMU? + CPUID_GST_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_SSE4_1); // -> EMU + CPUID_GST_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_SSE4_2); // -> EMU + CPUID_GST_FEATURE_IGN(Std, uEcx, X86_CPUID_FEATURE_ECX_X2APIC); + CPUID_GST_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_MOVBE); // -> EMU + CPUID_GST_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_POPCNT); // -> EMU + CPUID_GST_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_TSCDEADL); + CPUID_GST_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_AES); // -> EMU + CPUID_GST_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_XSAVE); // -> EMU + CPUID_GST_FEATURE_IGN(Std, uEcx, X86_CPUID_FEATURE_ECX_OSXSAVE); + CPUID_GST_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_AVX); // -> EMU? + CPUID_GST_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_F16C); + CPUID_GST_FEATURE_RET(Std, uEcx, X86_CPUID_FEATURE_ECX_RDRAND); + CPUID_GST_FEATURE_IGN(Std, uEcx, X86_CPUID_FEATURE_ECX_HVP); // Normally not set by host + + /* CPUID(1).edx */ + CPUID_GST_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_FPU); + CPUID_GST_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_VME); + CPUID_GST_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_DE); // -> EMU? + CPUID_GST_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_PSE); + CPUID_GST_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_TSC); // -> EMU + CPUID_GST_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_MSR); // -> EMU + CPUID_GST_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_PAE); + CPUID_GST_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_MCE); + CPUID_GST_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_CX8); // -> EMU? + CPUID_GST_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_APIC); + CPUID_GST_FEATURE_RET(Std, uEdx, RT_BIT_32(10) /*reserved*/); + CPUID_GST_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_SEP); + CPUID_GST_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_MTRR); + CPUID_GST_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_PGE); + CPUID_GST_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_MCA); + CPUID_GST_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_CMOV); // -> EMU + CPUID_GST_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_PAT); + CPUID_GST_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_PSE36); + CPUID_GST_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_PSN); + CPUID_GST_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_CLFSH); // -> EMU + CPUID_GST_FEATURE_RET(Std, uEdx, RT_BIT_32(20) /*reserved*/); + CPUID_GST_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_DS); // -> EMU? + CPUID_GST_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_ACPI); // -> EMU? + CPUID_GST_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_MMX); // -> EMU + CPUID_GST_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_FXSR); // -> EMU + CPUID_GST_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_SSE); // -> EMU + CPUID_GST_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_SSE2); // -> EMU + CPUID_GST_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_SS); // -> EMU? + CPUID_GST_FEATURE_IGN(Std, uEdx, X86_CPUID_FEATURE_EDX_HTT); // -> EMU? + CPUID_GST_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_TM); // -> EMU? + CPUID_GST_FEATURE_RET(Std, uEdx, RT_BIT_32(30) /*JMPE/IA64*/); // -> EMU + CPUID_GST_FEATURE_RET(Std, uEdx, X86_CPUID_FEATURE_EDX_PBE); // -> EMU? + + /* CPUID(0x80000000). */ + CPUMCPUID aGuestCpuIdExt[2]; + RT_ZERO(aGuestCpuIdExt); + if (cpumR3CpuIdGetLeafLegacy(paLeaves, cLeaves, UINT32_C(0x80000001), 0, &aGuestCpuIdExt[1])) + { + /** @todo deal with no 0x80000001 on the host. */ + bool const fHostAmd = ASMIsAmdCpuEx(aHostRawStd[0].uEbx, aHostRawStd[0].uEcx, aHostRawStd[0].uEdx); + bool const fGuestAmd = ASMIsAmdCpuEx(aGuestCpuIdExt[0].uEbx, aGuestCpuIdExt[0].uEcx, aGuestCpuIdExt[0].uEdx); + + /* CPUID(0x80000001).ecx */ + CPUID_GST_FEATURE_WRN(Ext, uEcx, X86_CPUID_EXT_FEATURE_ECX_LAHF_SAHF); // -> EMU + CPUID_GST_AMD_FEATURE_WRN(Ext, uEcx, X86_CPUID_AMD_FEATURE_ECX_CMPL); // -> EMU + CPUID_GST_AMD_FEATURE_RET(Ext, uEcx, X86_CPUID_AMD_FEATURE_ECX_SVM); // -> EMU + CPUID_GST_AMD_FEATURE_WRN(Ext, uEcx, X86_CPUID_AMD_FEATURE_ECX_EXT_APIC);// ??? + CPUID_GST_AMD_FEATURE_RET(Ext, uEcx, X86_CPUID_AMD_FEATURE_ECX_CR8L); // -> EMU + CPUID_GST_AMD_FEATURE_RET(Ext, uEcx, X86_CPUID_AMD_FEATURE_ECX_ABM); // -> EMU + CPUID_GST_AMD_FEATURE_RET(Ext, uEcx, X86_CPUID_AMD_FEATURE_ECX_SSE4A); // -> EMU + CPUID_GST_AMD_FEATURE_RET(Ext, uEcx, X86_CPUID_AMD_FEATURE_ECX_MISALNSSE);//-> EMU + CPUID_GST_AMD_FEATURE_RET(Ext, uEcx, X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF);// -> EMU + CPUID_GST_AMD_FEATURE_RET(Ext, uEcx, X86_CPUID_AMD_FEATURE_ECX_OSVW); // -> EMU? + CPUID_GST_AMD_FEATURE_RET(Ext, uEcx, X86_CPUID_AMD_FEATURE_ECX_IBS); // -> EMU + CPUID_GST_AMD_FEATURE_RET(Ext, uEcx, X86_CPUID_AMD_FEATURE_ECX_XOP); // -> EMU + CPUID_GST_AMD_FEATURE_RET(Ext, uEcx, X86_CPUID_AMD_FEATURE_ECX_SKINIT); // -> EMU + CPUID_GST_AMD_FEATURE_RET(Ext, uEcx, X86_CPUID_AMD_FEATURE_ECX_WDT); // -> EMU + CPUID_GST_AMD_FEATURE_WRN(Ext, uEcx, RT_BIT_32(14)); + CPUID_GST_AMD_FEATURE_WRN(Ext, uEcx, RT_BIT_32(15)); + CPUID_GST_AMD_FEATURE_WRN(Ext, uEcx, RT_BIT_32(16)); + CPUID_GST_AMD_FEATURE_WRN(Ext, uEcx, RT_BIT_32(17)); + CPUID_GST_AMD_FEATURE_WRN(Ext, uEcx, RT_BIT_32(18)); + CPUID_GST_AMD_FEATURE_WRN(Ext, uEcx, RT_BIT_32(19)); + CPUID_GST_AMD_FEATURE_WRN(Ext, uEcx, RT_BIT_32(20)); + CPUID_GST_AMD_FEATURE_WRN(Ext, uEcx, RT_BIT_32(21)); + CPUID_GST_AMD_FEATURE_WRN(Ext, uEcx, RT_BIT_32(22)); + CPUID_GST_AMD_FEATURE_WRN(Ext, uEcx, RT_BIT_32(23)); + CPUID_GST_AMD_FEATURE_WRN(Ext, uEcx, RT_BIT_32(24)); + CPUID_GST_AMD_FEATURE_WRN(Ext, uEcx, RT_BIT_32(25)); + CPUID_GST_AMD_FEATURE_WRN(Ext, uEcx, RT_BIT_32(26)); + CPUID_GST_AMD_FEATURE_WRN(Ext, uEcx, RT_BIT_32(27)); + CPUID_GST_AMD_FEATURE_WRN(Ext, uEcx, RT_BIT_32(28)); + CPUID_GST_AMD_FEATURE_WRN(Ext, uEcx, RT_BIT_32(29)); + CPUID_GST_AMD_FEATURE_WRN(Ext, uEcx, RT_BIT_32(30)); + CPUID_GST_AMD_FEATURE_WRN(Ext, uEcx, RT_BIT_32(31)); + + /* CPUID(0x80000001).edx */ + CPUID_GST_FEATURE2_RET( uEdx, X86_CPUID_AMD_FEATURE_EDX_FPU, X86_CPUID_FEATURE_EDX_FPU); // -> EMU + CPUID_GST_FEATURE2_RET( uEdx, X86_CPUID_AMD_FEATURE_EDX_VME, X86_CPUID_FEATURE_EDX_VME); // -> EMU + CPUID_GST_FEATURE2_RET( uEdx, X86_CPUID_AMD_FEATURE_EDX_DE, X86_CPUID_FEATURE_EDX_DE); // -> EMU + CPUID_GST_FEATURE2_IGN( uEdx, X86_CPUID_AMD_FEATURE_EDX_PSE, X86_CPUID_FEATURE_EDX_PSE); + CPUID_GST_FEATURE2_RET( uEdx, X86_CPUID_AMD_FEATURE_EDX_TSC, X86_CPUID_FEATURE_EDX_TSC); // -> EMU + CPUID_GST_FEATURE2_RET( uEdx, X86_CPUID_AMD_FEATURE_EDX_MSR, X86_CPUID_FEATURE_EDX_MSR); // -> EMU + CPUID_GST_FEATURE2_RET( uEdx, X86_CPUID_AMD_FEATURE_EDX_PAE, X86_CPUID_FEATURE_EDX_PAE); + CPUID_GST_FEATURE2_IGN( uEdx, X86_CPUID_AMD_FEATURE_EDX_MCE, X86_CPUID_FEATURE_EDX_MCE); + CPUID_GST_FEATURE2_RET( uEdx, X86_CPUID_AMD_FEATURE_EDX_CX8, X86_CPUID_FEATURE_EDX_CX8); // -> EMU? + CPUID_GST_FEATURE2_IGN( uEdx, X86_CPUID_AMD_FEATURE_EDX_APIC, X86_CPUID_FEATURE_EDX_APIC); + CPUID_GST_AMD_FEATURE_WRN(Ext, uEdx, RT_BIT_32(10) /*reserved*/); + CPUID_GST_FEATURE_IGN( Ext, uEdx, X86_CPUID_EXT_FEATURE_EDX_SYSCALL); // On Intel: long mode only. + CPUID_GST_FEATURE2_IGN( uEdx, X86_CPUID_AMD_FEATURE_EDX_MTRR, X86_CPUID_FEATURE_EDX_MTRR); + CPUID_GST_FEATURE2_IGN( uEdx, X86_CPUID_AMD_FEATURE_EDX_PGE, X86_CPUID_FEATURE_EDX_PGE); + CPUID_GST_FEATURE2_IGN( uEdx, X86_CPUID_AMD_FEATURE_EDX_MCA, X86_CPUID_FEATURE_EDX_MCA); + CPUID_GST_FEATURE2_RET( uEdx, X86_CPUID_AMD_FEATURE_EDX_CMOV, X86_CPUID_FEATURE_EDX_CMOV); // -> EMU + CPUID_GST_FEATURE2_IGN( uEdx, X86_CPUID_AMD_FEATURE_EDX_PAT, X86_CPUID_FEATURE_EDX_PAT); + CPUID_GST_FEATURE2_IGN( uEdx, X86_CPUID_AMD_FEATURE_EDX_PSE36, X86_CPUID_FEATURE_EDX_PSE36); + CPUID_GST_AMD_FEATURE_WRN(Ext, uEdx, RT_BIT_32(18) /*reserved*/); + CPUID_GST_AMD_FEATURE_WRN(Ext, uEdx, RT_BIT_32(19) /*reserved*/); + CPUID_GST_FEATURE_RET( Ext, uEdx, X86_CPUID_EXT_FEATURE_EDX_NX); + CPUID_GST_FEATURE_WRN( Ext, uEdx, RT_BIT_32(21) /*reserved*/); + CPUID_GST_FEATURE_RET( Ext, uEdx, X86_CPUID_AMD_FEATURE_EDX_AXMMX); + CPUID_GST_FEATURE2_RET( uEdx, X86_CPUID_AMD_FEATURE_EDX_MMX, X86_CPUID_FEATURE_EDX_MMX); // -> EMU + CPUID_GST_FEATURE2_RET( uEdx, X86_CPUID_AMD_FEATURE_EDX_FXSR, X86_CPUID_FEATURE_EDX_FXSR); // -> EMU + CPUID_GST_AMD_FEATURE_RET(Ext, uEdx, X86_CPUID_AMD_FEATURE_EDX_FFXSR); + CPUID_GST_AMD_FEATURE_RET(Ext, uEdx, X86_CPUID_EXT_FEATURE_EDX_PAGE1GB); + CPUID_GST_AMD_FEATURE_RET(Ext, uEdx, X86_CPUID_EXT_FEATURE_EDX_RDTSCP); + CPUID_GST_FEATURE_IGN( Ext, uEdx, RT_BIT_32(28) /*reserved*/); + CPUID_GST_FEATURE_RET( Ext, uEdx, X86_CPUID_EXT_FEATURE_EDX_LONG_MODE); + CPUID_GST_AMD_FEATURE_RET(Ext, uEdx, X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX); + CPUID_GST_AMD_FEATURE_RET(Ext, uEdx, X86_CPUID_AMD_FEATURE_EDX_3DNOW); + } + + /** @todo check leaf 7 */ + + /* CPUID(d) - XCR0 stuff - takes ECX as input. + * ECX=0: EAX - Valid bits in XCR0[31:0]. + * EBX - Maximum state size as per current XCR0 value. + * ECX - Maximum state size for all supported features. + * EDX - Valid bits in XCR0[63:32]. + * ECX=1: EAX - Various X-features. + * EBX - Maximum state size as per current XCR0|IA32_XSS value. + * ECX - Valid bits in IA32_XSS[31:0]. + * EDX - Valid bits in IA32_XSS[63:32]. + * ECX=N, where N in 2..63 and indicates a bit in XCR0 and/or IA32_XSS, + * if the bit invalid all four registers are set to zero. + * EAX - The state size for this feature. + * EBX - The state byte offset of this feature. + * ECX - Bit 0 indicates whether this sub-leaf maps to a valid IA32_XSS bit (=1) or a valid XCR0 bit (=0). + * EDX - Reserved, but is set to zero if invalid sub-leaf index. + */ + uint64_t fGuestXcr0Mask = 0; + PCPUMCPUIDLEAF pCurLeaf = cpumR3CpuIdGetLeaf(paLeaves, cLeaves, UINT32_C(0x0000000d), 0); + if ( pCurLeaf + && (aGuestCpuIdStd[1].uEcx & X86_CPUID_FEATURE_ECX_XSAVE) + && ( pCurLeaf->uEax + || pCurLeaf->uEbx + || pCurLeaf->uEcx + || pCurLeaf->uEdx) ) + { + fGuestXcr0Mask = RT_MAKE_U64(pCurLeaf->uEax, pCurLeaf->uEdx); + if (fGuestXcr0Mask & ~pVM->cpum.s.fXStateHostMask) + return SSMR3SetLoadError(pSSM, VERR_SSM_LOAD_CPUID_MISMATCH, RT_SRC_POS, + N_("CPUID(0xd/0).EDX:EAX mismatch: %#llx saved, %#llx supported by the current host (XCR0 bits)"), + fGuestXcr0Mask, pVM->cpum.s.fXStateHostMask); + if ((fGuestXcr0Mask & (XSAVE_C_X87 | XSAVE_C_SSE)) != (XSAVE_C_X87 | XSAVE_C_SSE)) + return SSMR3SetLoadError(pSSM, VERR_SSM_LOAD_CPUID_MISMATCH, RT_SRC_POS, + N_("CPUID(0xd/0).EDX:EAX missing mandatory X87 or SSE bits: %#RX64"), fGuestXcr0Mask); + + /* We don't support any additional features yet. */ + pCurLeaf = cpumR3CpuIdGetLeaf(paLeaves, cLeaves, UINT32_C(0x0000000d), 1); + if (pCurLeaf && pCurLeaf->uEax) + return SSMR3SetLoadError(pSSM, VERR_SSM_LOAD_CPUID_MISMATCH, RT_SRC_POS, + N_("CPUID(0xd/1).EAX=%#x, expected zero"), pCurLeaf->uEax); + if (pCurLeaf && (pCurLeaf->uEcx || pCurLeaf->uEdx)) + return SSMR3SetLoadError(pSSM, VERR_SSM_LOAD_CPUID_MISMATCH, RT_SRC_POS, + N_("CPUID(0xd/1).EDX:ECX=%#llx, expected zero"), + RT_MAKE_U64(pCurLeaf->uEdx, pCurLeaf->uEcx)); + + + for (uint32_t uSubLeaf = 2; uSubLeaf < 64; uSubLeaf++) + { + pCurLeaf = cpumR3CpuIdGetLeaf(paLeaves, cLeaves, UINT32_C(0x0000000d), uSubLeaf); + if (pCurLeaf) + { + /* If advertised, the state component offset and size must match the one used by host. */ + if (pCurLeaf->uEax || pCurLeaf->uEbx || pCurLeaf->uEcx || pCurLeaf->uEdx) + { + CPUMCPUID RawHost; + ASMCpuIdExSlow(UINT32_C(0x0000000d), 0, uSubLeaf, 0, + &RawHost.uEax, &RawHost.uEbx, &RawHost.uEcx, &RawHost.uEdx); + if ( RawHost.uEbx != pCurLeaf->uEbx + || RawHost.uEax != pCurLeaf->uEax) + return SSMR3SetLoadError(pSSM, VERR_SSM_LOAD_CPUID_MISMATCH, RT_SRC_POS, + N_("CPUID(0xd/%#x).EBX/EAX=%#x/%#x, current host uses %#x/%#x (offset/size)"), + uSubLeaf, pCurLeaf->uEbx, pCurLeaf->uEax, RawHost.uEbx, RawHost.uEax); + } + } + } + } + /* Clear leaf 0xd just in case we're loading an old state... */ + else if (pCurLeaf) + { + for (uint32_t uSubLeaf = 0; uSubLeaf < 64; uSubLeaf++) + { + pCurLeaf = cpumR3CpuIdGetLeaf(paLeaves, cLeaves, UINT32_C(0x0000000d), uSubLeaf); + if (pCurLeaf) + { + AssertLogRelMsg( uVersion <= CPUM_SAVED_STATE_VERSION_PUT_STRUCT + || ( pCurLeaf->uEax == 0 + && pCurLeaf->uEbx == 0 + && pCurLeaf->uEcx == 0 + && pCurLeaf->uEdx == 0), + ("uVersion=%#x; %#x %#x %#x %#x\n", + uVersion, pCurLeaf->uEax, pCurLeaf->uEbx, pCurLeaf->uEcx, pCurLeaf->uEdx)); + pCurLeaf->uEax = pCurLeaf->uEbx = pCurLeaf->uEcx = pCurLeaf->uEdx = 0; + } + } + } + + /* Update the fXStateGuestMask value for the VM. */ + if (pVM->cpum.s.fXStateGuestMask != fGuestXcr0Mask) + { + LogRel(("CPUM: fXStateGuestMask=%#llx -> %#llx\n", pVM->cpum.s.fXStateGuestMask, fGuestXcr0Mask)); + pVM->cpum.s.fXStateGuestMask = fGuestXcr0Mask; + if (!fGuestXcr0Mask && (aGuestCpuIdStd[1].uEcx & X86_CPUID_FEATURE_ECX_XSAVE)) + return SSMR3SetLoadError(pSSM, VERR_SSM_LOAD_CPUID_MISMATCH, RT_SRC_POS, + N_("Internal Processing Error: XSAVE feature bit enabled, but leaf 0xd is empty.")); + } + +#undef CPUID_CHECK_RET +#undef CPUID_CHECK_WRN +#undef CPUID_CHECK2_RET +#undef CPUID_CHECK2_WRN +#undef CPUID_RAW_FEATURE_RET +#undef CPUID_RAW_FEATURE_WRN +#undef CPUID_RAW_FEATURE_IGN +#undef CPUID_GST_FEATURE_RET +#undef CPUID_GST_FEATURE_WRN +#undef CPUID_GST_FEATURE_EMU +#undef CPUID_GST_FEATURE_IGN +#undef CPUID_GST_FEATURE2_RET +#undef CPUID_GST_FEATURE2_WRN +#undef CPUID_GST_FEATURE2_EMU +#undef CPUID_GST_FEATURE2_IGN +#undef CPUID_GST_AMD_FEATURE_RET +#undef CPUID_GST_AMD_FEATURE_WRN +#undef CPUID_GST_AMD_FEATURE_EMU +#undef CPUID_GST_AMD_FEATURE_IGN + + /* + * We're good, commit the CPU ID leaves. + */ + MMHyperFree(pVM, pVM->cpum.s.GuestInfo.paCpuIdLeavesR3); + pVM->cpum.s.GuestInfo.paCpuIdLeavesR3 = NULL; + pVM->cpum.s.GuestInfo.paCpuIdLeavesR0 = NIL_RTR0PTR; + pVM->cpum.s.GuestInfo.paCpuIdLeavesRC = NIL_RTRCPTR; + pVM->cpum.s.GuestInfo.DefCpuId = GuestDefCpuId; + rc = cpumR3CpuIdInstallAndExplodeLeaves(pVM, &pVM->cpum.s, paLeaves, cLeaves, pMsrs); + AssertLogRelRCReturn(rc, rc); + + return VINF_SUCCESS; +} + + +/** + * Loads the CPU ID leaves saved by pass 0. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + * @param uVersion The format version. + * @param pMsrs The guest MSRs. + */ +int cpumR3LoadCpuId(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, PCCPUMMSRS pMsrs) +{ + AssertMsgReturn(uVersion >= CPUM_SAVED_STATE_VERSION_VER3_2, ("%u\n", uVersion), VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION); + + /* + * Load the CPUID leaves array first and call worker to do the rest, just so + * we can free the memory when we need to without ending up in column 1000. + */ + PCPUMCPUIDLEAF paLeaves; + uint32_t cLeaves; + int rc = cpumR3LoadGuestCpuIdArray(pVM, pSSM, uVersion, &paLeaves, &cLeaves); + AssertRC(rc); + if (RT_SUCCESS(rc)) + { + rc = cpumR3LoadCpuIdInner(pVM, pSSM, uVersion, paLeaves, cLeaves, pMsrs); + RTMemFree(paLeaves); + } + return rc; +} + + + +/** + * Loads the CPU ID leaves saved by pass 0 in an pre 3.2 saved state. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + * @param uVersion The format version. + */ +int cpumR3LoadCpuIdPre32(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion) +{ + AssertMsgReturn(uVersion < CPUM_SAVED_STATE_VERSION_VER3_2, ("%u\n", uVersion), VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION); + + /* + * Restore the CPUID leaves. + * + * Note that we support restoring less than the current amount of standard + * leaves because we've been allowed more is newer version of VBox. + */ + uint32_t cElements; + int rc = SSMR3GetU32(pSSM, &cElements); AssertRCReturn(rc, rc); + if (cElements > RT_ELEMENTS(pVM->cpum.s.aGuestCpuIdPatmStd)) + return VERR_SSM_DATA_UNIT_FORMAT_CHANGED; + SSMR3GetMem(pSSM, &pVM->cpum.s.aGuestCpuIdPatmStd[0], cElements*sizeof(pVM->cpum.s.aGuestCpuIdPatmStd[0])); + + rc = SSMR3GetU32(pSSM, &cElements); AssertRCReturn(rc, rc); + if (cElements != RT_ELEMENTS(pVM->cpum.s.aGuestCpuIdPatmExt)) + return VERR_SSM_DATA_UNIT_FORMAT_CHANGED; + SSMR3GetMem(pSSM, &pVM->cpum.s.aGuestCpuIdPatmExt[0], sizeof(pVM->cpum.s.aGuestCpuIdPatmExt)); + + rc = SSMR3GetU32(pSSM, &cElements); AssertRCReturn(rc, rc); + if (cElements != RT_ELEMENTS(pVM->cpum.s.aGuestCpuIdPatmCentaur)) + return VERR_SSM_DATA_UNIT_FORMAT_CHANGED; + SSMR3GetMem(pSSM, &pVM->cpum.s.aGuestCpuIdPatmCentaur[0], sizeof(pVM->cpum.s.aGuestCpuIdPatmCentaur)); + + SSMR3GetMem(pSSM, &pVM->cpum.s.GuestInfo.DefCpuId, sizeof(pVM->cpum.s.GuestInfo.DefCpuId)); + + /* + * Check that the basic cpuid id information is unchanged. + */ + /** @todo we should check the 64 bits capabilities too! */ + uint32_t au32CpuId[8] = {0,0,0,0, 0,0,0,0}; + ASMCpuIdExSlow(0, 0, 0, 0, &au32CpuId[0], &au32CpuId[1], &au32CpuId[2], &au32CpuId[3]); + ASMCpuIdExSlow(1, 0, 0, 0, &au32CpuId[4], &au32CpuId[5], &au32CpuId[6], &au32CpuId[7]); + uint32_t au32CpuIdSaved[8]; + rc = SSMR3GetMem(pSSM, &au32CpuIdSaved[0], sizeof(au32CpuIdSaved)); + if (RT_SUCCESS(rc)) + { + /* Ignore CPU stepping. */ + au32CpuId[4] &= 0xfffffff0; + au32CpuIdSaved[4] &= 0xfffffff0; + + /* Ignore APIC ID (AMD specs). */ + au32CpuId[5] &= ~0xff000000; + au32CpuIdSaved[5] &= ~0xff000000; + + /* Ignore the number of Logical CPUs (AMD specs). */ + au32CpuId[5] &= ~0x00ff0000; + au32CpuIdSaved[5] &= ~0x00ff0000; + + /* Ignore some advanced capability bits, that we don't expose to the guest. */ + au32CpuId[6] &= ~( X86_CPUID_FEATURE_ECX_DTES64 + | X86_CPUID_FEATURE_ECX_VMX + | X86_CPUID_FEATURE_ECX_SMX + | X86_CPUID_FEATURE_ECX_EST + | X86_CPUID_FEATURE_ECX_TM2 + | X86_CPUID_FEATURE_ECX_CNTXID + | X86_CPUID_FEATURE_ECX_TPRUPDATE + | X86_CPUID_FEATURE_ECX_PDCM + | X86_CPUID_FEATURE_ECX_DCA + | X86_CPUID_FEATURE_ECX_X2APIC + ); + au32CpuIdSaved[6] &= ~( X86_CPUID_FEATURE_ECX_DTES64 + | X86_CPUID_FEATURE_ECX_VMX + | X86_CPUID_FEATURE_ECX_SMX + | X86_CPUID_FEATURE_ECX_EST + | X86_CPUID_FEATURE_ECX_TM2 + | X86_CPUID_FEATURE_ECX_CNTXID + | X86_CPUID_FEATURE_ECX_TPRUPDATE + | X86_CPUID_FEATURE_ECX_PDCM + | X86_CPUID_FEATURE_ECX_DCA + | X86_CPUID_FEATURE_ECX_X2APIC + ); + + /* Make sure we don't forget to update the masks when enabling + * features in the future. + */ + AssertRelease(!(pVM->cpum.s.aGuestCpuIdPatmStd[1].uEcx & + ( X86_CPUID_FEATURE_ECX_DTES64 + | X86_CPUID_FEATURE_ECX_VMX + | X86_CPUID_FEATURE_ECX_SMX + | X86_CPUID_FEATURE_ECX_EST + | X86_CPUID_FEATURE_ECX_TM2 + | X86_CPUID_FEATURE_ECX_CNTXID + | X86_CPUID_FEATURE_ECX_TPRUPDATE + | X86_CPUID_FEATURE_ECX_PDCM + | X86_CPUID_FEATURE_ECX_DCA + | X86_CPUID_FEATURE_ECX_X2APIC + ))); + /* do the compare */ + if (memcmp(au32CpuIdSaved, au32CpuId, sizeof(au32CpuIdSaved))) + { + if (SSMR3HandleGetAfter(pSSM) == SSMAFTER_DEBUG_IT) + LogRel(("cpumR3LoadExec: CpuId mismatch! (ignored due to SSMAFTER_DEBUG_IT)\n" + "Saved=%.*Rhxs\n" + "Real =%.*Rhxs\n", + sizeof(au32CpuIdSaved), au32CpuIdSaved, + sizeof(au32CpuId), au32CpuId)); + else + { + LogRel(("cpumR3LoadExec: CpuId mismatch!\n" + "Saved=%.*Rhxs\n" + "Real =%.*Rhxs\n", + sizeof(au32CpuIdSaved), au32CpuIdSaved, + sizeof(au32CpuId), au32CpuId)); + rc = VERR_SSM_LOAD_CPUID_MISMATCH; + } + } + } + + return rc; +} + + + +/* + * + * + * CPUID Info Handler. + * CPUID Info Handler. + * CPUID Info Handler. + * + * + */ + + + +/** + * Get L1 cache / TLS associativity. + */ +static const char *getCacheAss(unsigned u, char *pszBuf) +{ + if (u == 0) + return "res0 "; + if (u == 1) + return "direct"; + if (u == 255) + return "fully"; + if (u >= 256) + return "???"; + + RTStrPrintf(pszBuf, 16, "%d way", u); + return pszBuf; +} + + +/** + * Get L2 cache associativity. + */ +const char *getL2CacheAss(unsigned u) +{ + switch (u) + { + case 0: return "off "; + case 1: return "direct"; + case 2: return "2 way "; + case 3: return "res3 "; + case 4: return "4 way "; + case 5: return "res5 "; + case 6: return "8 way "; + case 7: return "res7 "; + case 8: return "16 way"; + case 9: return "res9 "; + case 10: return "res10 "; + case 11: return "res11 "; + case 12: return "res12 "; + case 13: return "res13 "; + case 14: return "res14 "; + case 15: return "fully "; + default: return "????"; + } +} + + +/** CPUID(1).EDX field descriptions. */ +static DBGFREGSUBFIELD const g_aLeaf1EdxSubFields[] = +{ + DBGFREGSUBFIELD_RO("FPU\0" "x87 FPU on Chip", 0, 1, 0), + DBGFREGSUBFIELD_RO("VME\0" "Virtual 8086 Mode Enhancements", 1, 1, 0), + DBGFREGSUBFIELD_RO("DE\0" "Debugging extensions", 2, 1, 0), + DBGFREGSUBFIELD_RO("PSE\0" "Page Size Extension", 3, 1, 0), + DBGFREGSUBFIELD_RO("TSC\0" "Time Stamp Counter", 4, 1, 0), + DBGFREGSUBFIELD_RO("MSR\0" "Model Specific Registers", 5, 1, 0), + DBGFREGSUBFIELD_RO("PAE\0" "Physical Address Extension", 6, 1, 0), + DBGFREGSUBFIELD_RO("MCE\0" "Machine Check Exception", 7, 1, 0), + DBGFREGSUBFIELD_RO("CX8\0" "CMPXCHG8B instruction", 8, 1, 0), + DBGFREGSUBFIELD_RO("APIC\0" "APIC On-Chip", 9, 1, 0), + DBGFREGSUBFIELD_RO("SEP\0" "SYSENTER and SYSEXIT Present", 11, 1, 0), + DBGFREGSUBFIELD_RO("MTRR\0" "Memory Type Range Registers", 12, 1, 0), + DBGFREGSUBFIELD_RO("PGE\0" "PTE Global Bit", 13, 1, 0), + DBGFREGSUBFIELD_RO("MCA\0" "Machine Check Architecture", 14, 1, 0), + DBGFREGSUBFIELD_RO("CMOV\0" "Conditional Move instructions", 15, 1, 0), + DBGFREGSUBFIELD_RO("PAT\0" "Page Attribute Table", 16, 1, 0), + DBGFREGSUBFIELD_RO("PSE-36\0" "36-bit Page Size Extension", 17, 1, 0), + DBGFREGSUBFIELD_RO("PSN\0" "Processor Serial Number", 18, 1, 0), + DBGFREGSUBFIELD_RO("CLFSH\0" "CLFLUSH instruction", 19, 1, 0), + DBGFREGSUBFIELD_RO("DS\0" "Debug Store", 21, 1, 0), + DBGFREGSUBFIELD_RO("ACPI\0" "Thermal Mon. & Soft. Clock Ctrl.", 22, 1, 0), + DBGFREGSUBFIELD_RO("MMX\0" "Intel MMX Technology", 23, 1, 0), + DBGFREGSUBFIELD_RO("FXSR\0" "FXSAVE and FXRSTOR instructions", 24, 1, 0), + DBGFREGSUBFIELD_RO("SSE\0" "SSE support", 25, 1, 0), + DBGFREGSUBFIELD_RO("SSE2\0" "SSE2 support", 26, 1, 0), + DBGFREGSUBFIELD_RO("SS\0" "Self Snoop", 27, 1, 0), + DBGFREGSUBFIELD_RO("HTT\0" "Hyper-Threading Technology", 28, 1, 0), + DBGFREGSUBFIELD_RO("TM\0" "Therm. Monitor", 29, 1, 0), + DBGFREGSUBFIELD_RO("PBE\0" "Pending Break Enabled", 31, 1, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** CPUID(1).ECX field descriptions. */ +static DBGFREGSUBFIELD const g_aLeaf1EcxSubFields[] = +{ + DBGFREGSUBFIELD_RO("SSE3\0" "SSE3 support", 0, 1, 0), + DBGFREGSUBFIELD_RO("PCLMUL\0" "PCLMULQDQ support (for AES-GCM)", 1, 1, 0), + DBGFREGSUBFIELD_RO("DTES64\0" "DS Area 64-bit Layout", 2, 1, 0), + DBGFREGSUBFIELD_RO("MONITOR\0" "MONITOR/MWAIT instructions", 3, 1, 0), + DBGFREGSUBFIELD_RO("CPL-DS\0" "CPL Qualified Debug Store", 4, 1, 0), + DBGFREGSUBFIELD_RO("VMX\0" "Virtual Machine Extensions", 5, 1, 0), + DBGFREGSUBFIELD_RO("SMX\0" "Safer Mode Extensions", 6, 1, 0), + DBGFREGSUBFIELD_RO("EST\0" "Enhanced SpeedStep Technology", 7, 1, 0), + DBGFREGSUBFIELD_RO("TM2\0" "Terminal Monitor 2", 8, 1, 0), + DBGFREGSUBFIELD_RO("SSSE3\0" "Supplemental Streaming SIMD Extensions 3", 9, 1, 0), + DBGFREGSUBFIELD_RO("CNTX-ID\0" "L1 Context ID", 10, 1, 0), + DBGFREGSUBFIELD_RO("SDBG\0" "Silicon Debug interface", 11, 1, 0), + DBGFREGSUBFIELD_RO("FMA\0" "Fused Multiply Add extensions", 12, 1, 0), + DBGFREGSUBFIELD_RO("CX16\0" "CMPXCHG16B instruction", 13, 1, 0), + DBGFREGSUBFIELD_RO("TPRUPDATE\0" "xTPR Update Control", 14, 1, 0), + DBGFREGSUBFIELD_RO("PDCM\0" "Perf/Debug Capability MSR", 15, 1, 0), + DBGFREGSUBFIELD_RO("PCID\0" "Process Context Identifiers", 17, 1, 0), + DBGFREGSUBFIELD_RO("DCA\0" "Direct Cache Access", 18, 1, 0), + DBGFREGSUBFIELD_RO("SSE4_1\0" "SSE4_1 support", 19, 1, 0), + DBGFREGSUBFIELD_RO("SSE4_2\0" "SSE4_2 support", 20, 1, 0), + DBGFREGSUBFIELD_RO("X2APIC\0" "x2APIC support", 21, 1, 0), + DBGFREGSUBFIELD_RO("MOVBE\0" "MOVBE instruction", 22, 1, 0), + DBGFREGSUBFIELD_RO("POPCNT\0" "POPCNT instruction", 23, 1, 0), + DBGFREGSUBFIELD_RO("TSCDEADL\0" "Time Stamp Counter Deadline", 24, 1, 0), + DBGFREGSUBFIELD_RO("AES\0" "AES instructions", 25, 1, 0), + DBGFREGSUBFIELD_RO("XSAVE\0" "XSAVE instruction", 26, 1, 0), + DBGFREGSUBFIELD_RO("OSXSAVE\0" "OSXSAVE instruction", 27, 1, 0), + DBGFREGSUBFIELD_RO("AVX\0" "AVX support", 28, 1, 0), + DBGFREGSUBFIELD_RO("F16C\0" "16-bit floating point conversion instructions", 29, 1, 0), + DBGFREGSUBFIELD_RO("RDRAND\0" "RDRAND instruction", 30, 1, 0), + DBGFREGSUBFIELD_RO("HVP\0" "Hypervisor Present (we're a guest)", 31, 1, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** CPUID(7,0).EBX field descriptions. */ +static DBGFREGSUBFIELD const g_aLeaf7Sub0EbxSubFields[] = +{ + DBGFREGSUBFIELD_RO("FSGSBASE\0" "RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE instr.", 0, 1, 0), + DBGFREGSUBFIELD_RO("TSCADJUST\0" "Supports MSR_IA32_TSC_ADJUST", 1, 1, 0), + DBGFREGSUBFIELD_RO("SGX\0" "Supports Software Guard Extensions", 2, 1, 0), + DBGFREGSUBFIELD_RO("BMI1\0" "Advanced Bit Manipulation extension 1", 3, 1, 0), + DBGFREGSUBFIELD_RO("HLE\0" "Hardware Lock Elision", 4, 1, 0), + DBGFREGSUBFIELD_RO("AVX2\0" "Advanced Vector Extensions 2", 5, 1, 0), + DBGFREGSUBFIELD_RO("FDP_EXCPTN_ONLY\0" "FPU DP only updated on exceptions", 6, 1, 0), + DBGFREGSUBFIELD_RO("SMEP\0" "Supervisor Mode Execution Prevention", 7, 1, 0), + DBGFREGSUBFIELD_RO("BMI2\0" "Advanced Bit Manipulation extension 2", 8, 1, 0), + DBGFREGSUBFIELD_RO("ERMS\0" "Enhanced REP MOVSB/STOSB instructions", 9, 1, 0), + DBGFREGSUBFIELD_RO("INVPCID\0" "INVPCID instruction", 10, 1, 0), + DBGFREGSUBFIELD_RO("RTM\0" "Restricted Transactional Memory", 11, 1, 0), + DBGFREGSUBFIELD_RO("PQM\0" "Platform Quality of Service Monitoring", 12, 1, 0), + DBGFREGSUBFIELD_RO("DEPFPU_CS_DS\0" "Deprecates FPU CS, FPU DS values if set", 13, 1, 0), + DBGFREGSUBFIELD_RO("MPE\0" "Intel Memory Protection Extensions", 14, 1, 0), + DBGFREGSUBFIELD_RO("PQE\0" "Platform Quality of Service Enforcement", 15, 1, 0), + DBGFREGSUBFIELD_RO("AVX512F\0" "AVX512 Foundation instructions", 16, 1, 0), + DBGFREGSUBFIELD_RO("RDSEED\0" "RDSEED instruction", 18, 1, 0), + DBGFREGSUBFIELD_RO("ADX\0" "ADCX/ADOX instructions", 19, 1, 0), + DBGFREGSUBFIELD_RO("SMAP\0" "Supervisor Mode Access Prevention", 20, 1, 0), + DBGFREGSUBFIELD_RO("CLFLUSHOPT\0" "CLFLUSHOPT (Cache Line Flush) instruction", 23, 1, 0), + DBGFREGSUBFIELD_RO("INTEL_PT\0" "Intel Processor Trace", 25, 1, 0), + DBGFREGSUBFIELD_RO("AVX512PF\0" "AVX512 Prefetch instructions", 26, 1, 0), + DBGFREGSUBFIELD_RO("AVX512ER\0" "AVX512 Exponential & Reciprocal instructions", 27, 1, 0), + DBGFREGSUBFIELD_RO("AVX512CD\0" "AVX512 Conflict Detection instructions", 28, 1, 0), + DBGFREGSUBFIELD_RO("SHA\0" "Secure Hash Algorithm extensions", 29, 1, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** CPUID(7,0).ECX field descriptions. */ +static DBGFREGSUBFIELD const g_aLeaf7Sub0EcxSubFields[] = +{ + DBGFREGSUBFIELD_RO("PREFETCHWT1\0" "PREFETCHWT1 instruction", 0, 1, 0), + DBGFREGSUBFIELD_RO("UMIP\0" "User mode insturction prevention", 2, 1, 0), + DBGFREGSUBFIELD_RO("PKU\0" "Protection Key for Usermode pages", 3, 1, 0), + DBGFREGSUBFIELD_RO("OSPKE\0" "CR4.PKU mirror", 4, 1, 0), + DBGFREGSUBFIELD_RO("MAWAU\0" "Value used by BNDLDX & BNDSTX", 17, 5, 0), + DBGFREGSUBFIELD_RO("RDPID\0" "Read processor ID support", 22, 1, 0), + DBGFREGSUBFIELD_RO("SGX_LC\0" "Supports SGX Launch Configuration", 30, 1, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** CPUID(7,0).EDX field descriptions. */ +static DBGFREGSUBFIELD const g_aLeaf7Sub0EdxSubFields[] = +{ + DBGFREGSUBFIELD_RO("IBRS_IBPB\0" "IA32_SPEC_CTRL.IBRS and IA32_PRED_CMD.IBPB", 26, 1, 0), + DBGFREGSUBFIELD_RO("STIBP\0" "Supports IA32_SPEC_CTRL.STIBP", 27, 1, 0), + DBGFREGSUBFIELD_RO("FLUSH_CMD\0" "Supports IA32_FLUSH_CMD", 28, 1, 0), + DBGFREGSUBFIELD_RO("ARCHCAP\0" "Supports IA32_ARCH_CAP", 29, 1, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; + + +/** CPUID(13,0).EAX+EDX, XCR0, ++ bit descriptions. */ +static DBGFREGSUBFIELD const g_aXSaveStateBits[] = +{ + DBGFREGSUBFIELD_RO("x87\0" "Legacy FPU state", 0, 1, 0), + DBGFREGSUBFIELD_RO("SSE\0" "128-bit SSE state", 1, 1, 0), + DBGFREGSUBFIELD_RO("YMM_Hi128\0" "Upper 128 bits of YMM0-15 (AVX)", 2, 1, 0), + DBGFREGSUBFIELD_RO("BNDREGS\0" "MPX bound register state", 3, 1, 0), + DBGFREGSUBFIELD_RO("BNDCSR\0" "MPX bound config and status state", 4, 1, 0), + DBGFREGSUBFIELD_RO("Opmask\0" "opmask state", 5, 1, 0), + DBGFREGSUBFIELD_RO("ZMM_Hi256\0" "Upper 256 bits of ZMM0-15 (AVX-512)", 6, 1, 0), + DBGFREGSUBFIELD_RO("Hi16_ZMM\0" "512-bits ZMM16-31 state (AVX-512)", 7, 1, 0), + DBGFREGSUBFIELD_RO("LWP\0" "Lightweight Profiling (AMD)", 62, 1, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** CPUID(13,1).EAX field descriptions. */ +static DBGFREGSUBFIELD const g_aLeaf13Sub1EaxSubFields[] = +{ + DBGFREGSUBFIELD_RO("XSAVEOPT\0" "XSAVEOPT is available", 0, 1, 0), + DBGFREGSUBFIELD_RO("XSAVEC\0" "XSAVEC and compacted XRSTOR supported", 1, 1, 0), + DBGFREGSUBFIELD_RO("XGETBC1\0" "XGETBV with ECX=1 supported", 2, 1, 0), + DBGFREGSUBFIELD_RO("XSAVES\0" "XSAVES/XRSTORS and IA32_XSS supported", 3, 1, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; + + +/** CPUID(0x80000001,0).EDX field descriptions. */ +static DBGFREGSUBFIELD const g_aExtLeaf1EdxSubFields[] = +{ + DBGFREGSUBFIELD_RO("FPU\0" "x87 FPU on Chip", 0, 1, 0), + DBGFREGSUBFIELD_RO("VME\0" "Virtual 8086 Mode Enhancements", 1, 1, 0), + DBGFREGSUBFIELD_RO("DE\0" "Debugging extensions", 2, 1, 0), + DBGFREGSUBFIELD_RO("PSE\0" "Page Size Extension", 3, 1, 0), + DBGFREGSUBFIELD_RO("TSC\0" "Time Stamp Counter", 4, 1, 0), + DBGFREGSUBFIELD_RO("MSR\0" "K86 Model Specific Registers", 5, 1, 0), + DBGFREGSUBFIELD_RO("PAE\0" "Physical Address Extension", 6, 1, 0), + DBGFREGSUBFIELD_RO("MCE\0" "Machine Check Exception", 7, 1, 0), + DBGFREGSUBFIELD_RO("CX8\0" "CMPXCHG8B instruction", 8, 1, 0), + DBGFREGSUBFIELD_RO("APIC\0" "APIC On-Chip", 9, 1, 0), + DBGFREGSUBFIELD_RO("SEP\0" "SYSCALL/SYSRET", 11, 1, 0), + DBGFREGSUBFIELD_RO("MTRR\0" "Memory Type Range Registers", 12, 1, 0), + DBGFREGSUBFIELD_RO("PGE\0" "PTE Global Bit", 13, 1, 0), + DBGFREGSUBFIELD_RO("MCA\0" "Machine Check Architecture", 14, 1, 0), + DBGFREGSUBFIELD_RO("CMOV\0" "Conditional Move instructions", 15, 1, 0), + DBGFREGSUBFIELD_RO("PAT\0" "Page Attribute Table", 16, 1, 0), + DBGFREGSUBFIELD_RO("PSE-36\0" "36-bit Page Size Extension", 17, 1, 0), + DBGFREGSUBFIELD_RO("NX\0" "No-Execute/Execute-Disable", 20, 1, 0), + DBGFREGSUBFIELD_RO("AXMMX\0" "AMD Extensions to MMX instructions", 22, 1, 0), + DBGFREGSUBFIELD_RO("MMX\0" "Intel MMX Technology", 23, 1, 0), + DBGFREGSUBFIELD_RO("FXSR\0" "FXSAVE and FXRSTOR Instructions", 24, 1, 0), + DBGFREGSUBFIELD_RO("FFXSR\0" "AMD fast FXSAVE and FXRSTOR instructions", 25, 1, 0), + DBGFREGSUBFIELD_RO("Page1GB\0" "1 GB large page", 26, 1, 0), + DBGFREGSUBFIELD_RO("RDTSCP\0" "RDTSCP instruction", 27, 1, 0), + DBGFREGSUBFIELD_RO("LM\0" "AMD64 Long Mode", 29, 1, 0), + DBGFREGSUBFIELD_RO("3DNOWEXT\0" "AMD Extensions to 3DNow", 30, 1, 0), + DBGFREGSUBFIELD_RO("3DNOW\0" "AMD 3DNow", 31, 1, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** CPUID(0x80000001,0).ECX field descriptions. */ +static DBGFREGSUBFIELD const g_aExtLeaf1EcxSubFields[] = +{ + DBGFREGSUBFIELD_RO("LahfSahf\0" "LAHF/SAHF support in 64-bit mode", 0, 1, 0), + DBGFREGSUBFIELD_RO("CmpLegacy\0" "Core multi-processing legacy mode", 1, 1, 0), + DBGFREGSUBFIELD_RO("SVM\0" "AMD Secure Virtual Machine extensions", 2, 1, 0), + DBGFREGSUBFIELD_RO("EXTAPIC\0" "AMD Extended APIC registers", 3, 1, 0), + DBGFREGSUBFIELD_RO("CR8L\0" "AMD LOCK MOV CR0 means MOV CR8", 4, 1, 0), + DBGFREGSUBFIELD_RO("ABM\0" "AMD Advanced Bit Manipulation", 5, 1, 0), + DBGFREGSUBFIELD_RO("SSE4A\0" "SSE4A instructions", 6, 1, 0), + DBGFREGSUBFIELD_RO("MISALIGNSSE\0" "AMD Misaligned SSE mode", 7, 1, 0), + DBGFREGSUBFIELD_RO("3DNOWPRF\0" "AMD PREFETCH and PREFETCHW instructions", 8, 1, 0), + DBGFREGSUBFIELD_RO("OSVW\0" "AMD OS Visible Workaround", 9, 1, 0), + DBGFREGSUBFIELD_RO("IBS\0" "Instruct Based Sampling", 10, 1, 0), + DBGFREGSUBFIELD_RO("XOP\0" "Extended Operation support", 11, 1, 0), + DBGFREGSUBFIELD_RO("SKINIT\0" "SKINIT, STGI, and DEV support", 12, 1, 0), + DBGFREGSUBFIELD_RO("WDT\0" "AMD Watchdog Timer support", 13, 1, 0), + DBGFREGSUBFIELD_RO("LWP\0" "Lightweight Profiling support", 15, 1, 0), + DBGFREGSUBFIELD_RO("FMA4\0" "Four operand FMA instruction support", 16, 1, 0), + DBGFREGSUBFIELD_RO("NodeId\0" "NodeId in MSR C001_100C", 19, 1, 0), + DBGFREGSUBFIELD_RO("TBM\0" "Trailing Bit Manipulation instructions", 21, 1, 0), + DBGFREGSUBFIELD_RO("TOPOEXT\0" "Topology Extensions", 22, 1, 0), + DBGFREGSUBFIELD_RO("PRFEXTCORE\0" "Performance Counter Extensions support", 23, 1, 0), + DBGFREGSUBFIELD_RO("PRFEXTNB\0" "NB Performance Counter Extensions support", 24, 1, 0), + DBGFREGSUBFIELD_RO("DATABPEXT\0" "Data-access Breakpoint Extension", 26, 1, 0), + DBGFREGSUBFIELD_RO("PERFTSC\0" "Performance Time Stamp Counter", 27, 1, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** CPUID(0x8000000a,0).EDX field descriptions. */ +static DBGFREGSUBFIELD const g_aExtLeafAEdxSubFields[] = +{ + DBGFREGSUBFIELD_RO("NP\0" "Nested Paging", 0, 1, 0), + DBGFREGSUBFIELD_RO("LbrVirt\0" "Last Branch Record Virtualization", 1, 1, 0), + DBGFREGSUBFIELD_RO("SVML\0" "SVM Lock", 2, 1, 0), + DBGFREGSUBFIELD_RO("NRIPS\0" "NextRIP Save", 3, 1, 0), + DBGFREGSUBFIELD_RO("TscRateMsr\0" "MSR based TSC rate control", 4, 1, 0), + DBGFREGSUBFIELD_RO("VmcbClean\0" "VMCB clean bits", 5, 1, 0), + DBGFREGSUBFIELD_RO("FlushByASID\0" "Flush by ASID", 6, 1, 0), + DBGFREGSUBFIELD_RO("DecodeAssists\0" "Decode Assists", 7, 1, 0), + DBGFREGSUBFIELD_RO("PauseFilter\0" "Pause intercept filter", 10, 1, 0), + DBGFREGSUBFIELD_RO("PauseFilterThreshold\0" "Pause filter threshold", 12, 1, 0), + DBGFREGSUBFIELD_RO("AVIC\0" "Advanced Virtual Interrupt Controller", 13, 1, 0), + DBGFREGSUBFIELD_RO("VMSAVEVirt\0" "VMSAVE and VMLOAD Virtualization", 15, 1, 0), + DBGFREGSUBFIELD_RO("VGIF\0" "Virtual Global-Interrupt Flag", 16, 1, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; + + +/** CPUID(0x80000007,0).EDX field descriptions. */ +static DBGFREGSUBFIELD const g_aExtLeaf7EdxSubFields[] = +{ + DBGFREGSUBFIELD_RO("TS\0" "Temperature Sensor", 0, 1, 0), + DBGFREGSUBFIELD_RO("FID\0" "Frequency ID control", 1, 1, 0), + DBGFREGSUBFIELD_RO("VID\0" "Voltage ID control", 2, 1, 0), + DBGFREGSUBFIELD_RO("VID\0" "Voltage ID control", 2, 1, 0), + DBGFREGSUBFIELD_RO("TTP\0" "Thermal Trip", 3, 1, 0), + DBGFREGSUBFIELD_RO("TM\0" "Hardware Thermal Control (HTC)", 4, 1, 0), + DBGFREGSUBFIELD_RO("100MHzSteps\0" "100 MHz Multiplier control", 6, 1, 0), + DBGFREGSUBFIELD_RO("HwPstate\0" "Hardware P-state control", 7, 1, 0), + DBGFREGSUBFIELD_RO("TscInvariant\0" "Invariant Time Stamp Counter", 8, 1, 0), + DBGFREGSUBFIELD_RO("CBP\0" "Core Performance Boost", 9, 1, 0), + DBGFREGSUBFIELD_RO("EffFreqRO\0" "Read-only Effective Frequency Interface", 10, 1, 0), + DBGFREGSUBFIELD_RO("ProcFdbkIf\0" "Processor Feedback Interface", 11, 1, 0), + DBGFREGSUBFIELD_RO("ProcPwrRep\0" "Core power reporting interface support", 12, 1, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; + +/** CPUID(0x80000008,0).EBX field descriptions. */ +static DBGFREGSUBFIELD const g_aExtLeaf8EbxSubFields[] = +{ + DBGFREGSUBFIELD_RO("CLZERO\0" "Clear zero instruction (cacheline)", 0, 1, 0), + DBGFREGSUBFIELD_RO("IRPerf\0" "Instructions retired count support", 1, 1, 0), + DBGFREGSUBFIELD_RO("XSaveErPtr\0" "Save/restore error pointers (FXSAVE/RSTOR*)", 2, 1, 0), + DBGFREGSUBFIELD_RO("IBPB\0" "Supports the IBPB command in IA32_PRED_CMD", 12, 1, 0), + DBGFREGSUBFIELD_TERMINATOR() +}; + + +static void cpumR3CpuIdInfoMnemonicListU32(PCDBGFINFOHLP pHlp, uint32_t uVal, PCDBGFREGSUBFIELD pDesc, + const char *pszLeadIn, uint32_t cchWidth) +{ + if (pszLeadIn) + pHlp->pfnPrintf(pHlp, "%*s", cchWidth, pszLeadIn); + + for (uint32_t iBit = 0; iBit < 32; iBit++) + if (RT_BIT_32(iBit) & uVal) + { + while ( pDesc->pszName != NULL + && iBit >= (uint32_t)pDesc->iFirstBit + pDesc->cBits) + pDesc++; + if ( pDesc->pszName != NULL + && iBit - (uint32_t)pDesc->iFirstBit < (uint32_t)pDesc->cBits) + { + if (pDesc->cBits == 1) + pHlp->pfnPrintf(pHlp, " %s", pDesc->pszName); + else + { + uint32_t uFieldValue = uVal >> pDesc->iFirstBit; + if (pDesc->cBits < 32) + uFieldValue &= RT_BIT_32(pDesc->cBits) - UINT32_C(1); + pHlp->pfnPrintf(pHlp, pDesc->cBits < 4 ? " %s=%u" : " %s=%#x", pDesc->pszName, uFieldValue); + iBit = pDesc->iFirstBit + pDesc->cBits - 1; + } + } + else + pHlp->pfnPrintf(pHlp, " %u", iBit); + } + if (pszLeadIn) + pHlp->pfnPrintf(pHlp, "\n"); +} + + +static void cpumR3CpuIdInfoMnemonicListU64(PCDBGFINFOHLP pHlp, uint64_t uVal, PCDBGFREGSUBFIELD pDesc, + const char *pszLeadIn, uint32_t cchWidth) +{ + if (pszLeadIn) + pHlp->pfnPrintf(pHlp, "%*s", cchWidth, pszLeadIn); + + for (uint32_t iBit = 0; iBit < 64; iBit++) + if (RT_BIT_64(iBit) & uVal) + { + while ( pDesc->pszName != NULL + && iBit >= (uint32_t)pDesc->iFirstBit + pDesc->cBits) + pDesc++; + if ( pDesc->pszName != NULL + && iBit - (uint32_t)pDesc->iFirstBit < (uint32_t)pDesc->cBits) + { + if (pDesc->cBits == 1) + pHlp->pfnPrintf(pHlp, " %s", pDesc->pszName); + else + { + uint64_t uFieldValue = uVal >> pDesc->iFirstBit; + if (pDesc->cBits < 64) + uFieldValue &= RT_BIT_64(pDesc->cBits) - UINT64_C(1); + pHlp->pfnPrintf(pHlp, pDesc->cBits < 4 ? " %s=%llu" : " %s=%#llx", pDesc->pszName, uFieldValue); + iBit = pDesc->iFirstBit + pDesc->cBits - 1; + } + } + else + pHlp->pfnPrintf(pHlp, " %u", iBit); + } + if (pszLeadIn) + pHlp->pfnPrintf(pHlp, "\n"); +} + + +static void cpumR3CpuIdInfoValueWithMnemonicListU64(PCDBGFINFOHLP pHlp, uint64_t uVal, PCDBGFREGSUBFIELD pDesc, + const char *pszLeadIn, uint32_t cchWidth) +{ + if (!uVal) + pHlp->pfnPrintf(pHlp, "%*s %#010x`%08x\n", cchWidth, pszLeadIn, RT_HI_U32(uVal), RT_LO_U32(uVal)); + else + { + pHlp->pfnPrintf(pHlp, "%*s %#010x`%08x (", cchWidth, pszLeadIn, RT_HI_U32(uVal), RT_LO_U32(uVal)); + cpumR3CpuIdInfoMnemonicListU64(pHlp, uVal, pDesc, NULL, 0); + pHlp->pfnPrintf(pHlp, " )\n"); + } +} + + +static void cpumR3CpuIdInfoVerboseCompareListU32(PCDBGFINFOHLP pHlp, uint32_t uVal1, uint32_t uVal2, PCDBGFREGSUBFIELD pDesc, + uint32_t cchWidth) +{ + uint32_t uCombined = uVal1 | uVal2; + for (uint32_t iBit = 0; iBit < 32; iBit++) + if ( (RT_BIT_32(iBit) & uCombined) + || (iBit == pDesc->iFirstBit && pDesc->pszName) ) + { + while ( pDesc->pszName != NULL + && iBit >= (uint32_t)pDesc->iFirstBit + pDesc->cBits) + pDesc++; + + if ( pDesc->pszName != NULL + && iBit - (uint32_t)pDesc->iFirstBit < (uint32_t)pDesc->cBits) + { + size_t cchMnemonic = strlen(pDesc->pszName); + const char *pszDesc = pDesc->pszName + cchMnemonic + 1; + size_t cchDesc = strlen(pszDesc); + uint32_t uFieldValue1 = uVal1 >> pDesc->iFirstBit; + uint32_t uFieldValue2 = uVal2 >> pDesc->iFirstBit; + if (pDesc->cBits < 32) + { + uFieldValue1 &= RT_BIT_32(pDesc->cBits) - UINT32_C(1); + uFieldValue2 &= RT_BIT_32(pDesc->cBits) - UINT32_C(1); + } + + pHlp->pfnPrintf(pHlp, pDesc->cBits < 4 ? " %s - %s%*s= %u (%u)\n" : " %s - %s%*s= %#x (%#x)\n", + pDesc->pszName, pszDesc, + cchMnemonic + 3 + cchDesc < cchWidth ? cchWidth - (cchMnemonic + 3 + cchDesc) : 1, "", + uFieldValue1, uFieldValue2); + + iBit = pDesc->iFirstBit + pDesc->cBits - 1U; + pDesc++; + } + else + pHlp->pfnPrintf(pHlp, " %2u - Reserved%*s= %u (%u)\n", iBit, 13 < cchWidth ? cchWidth - 13 : 1, "", + RT_BOOL(uVal1 & RT_BIT_32(iBit)), RT_BOOL(uVal2 & RT_BIT_32(iBit))); + } +} + + +/** + * Produces a detailed summary of standard leaf 0x00000001. + * + * @param pHlp The info helper functions. + * @param pCurLeaf The 0x00000001 leaf. + * @param fVerbose Whether to be very verbose or not. + * @param fIntel Set if intel CPU. + */ +static void cpumR3CpuIdInfoStdLeaf1Details(PCDBGFINFOHLP pHlp, PCCPUMCPUIDLEAF pCurLeaf, bool fVerbose, bool fIntel) +{ + Assert(pCurLeaf); Assert(pCurLeaf->uLeaf == 1); + static const char * const s_apszTypes[4] = { "primary", "overdrive", "MP", "reserved" }; + uint32_t uEAX = pCurLeaf->uEax; + uint32_t uEBX = pCurLeaf->uEbx; + + pHlp->pfnPrintf(pHlp, + "%36s %2d \tExtended: %d \tEffective: %d\n" + "%36s %2d \tExtended: %d \tEffective: %d\n" + "%36s %d\n" + "%36s %d (%s)\n" + "%36s %#04x\n" + "%36s %d\n" + "%36s %d\n" + "%36s %#04x\n" + , + "Family:", (uEAX >> 8) & 0xf, (uEAX >> 20) & 0x7f, ASMGetCpuFamily(uEAX), + "Model:", (uEAX >> 4) & 0xf, (uEAX >> 16) & 0x0f, ASMGetCpuModel(uEAX, fIntel), + "Stepping:", ASMGetCpuStepping(uEAX), + "Type:", (uEAX >> 12) & 3, s_apszTypes[(uEAX >> 12) & 3], + "APIC ID:", (uEBX >> 24) & 0xff, + "Logical CPUs:",(uEBX >> 16) & 0xff, + "CLFLUSH Size:",(uEBX >> 8) & 0xff, + "Brand ID:", (uEBX >> 0) & 0xff); + if (fVerbose) + { + CPUMCPUID Host; + ASMCpuIdExSlow(1, 0, 0, 0, &Host.uEax, &Host.uEbx, &Host.uEcx, &Host.uEdx); + pHlp->pfnPrintf(pHlp, "Features\n"); + pHlp->pfnPrintf(pHlp, " Mnemonic - Description = guest (host)\n"); + cpumR3CpuIdInfoVerboseCompareListU32(pHlp, pCurLeaf->uEdx, Host.uEdx, g_aLeaf1EdxSubFields, 56); + cpumR3CpuIdInfoVerboseCompareListU32(pHlp, pCurLeaf->uEcx, Host.uEcx, g_aLeaf1EcxSubFields, 56); + } + else + { + cpumR3CpuIdInfoMnemonicListU32(pHlp, pCurLeaf->uEdx, g_aLeaf1EdxSubFields, "Features EDX:", 36); + cpumR3CpuIdInfoMnemonicListU32(pHlp, pCurLeaf->uEcx, g_aLeaf1EcxSubFields, "Features ECX:", 36); + } +} + + +/** + * Produces a detailed summary of standard leaf 0x00000007. + * + * @param pHlp The info helper functions. + * @param paLeaves The CPUID leaves array. + * @param cLeaves The number of leaves in the array. + * @param pCurLeaf The first 0x00000007 leaf. + * @param fVerbose Whether to be very verbose or not. + */ +static void cpumR3CpuIdInfoStdLeaf7Details(PCDBGFINFOHLP pHlp, PCCPUMCPUIDLEAF paLeaves, uint32_t cLeaves, + PCCPUMCPUIDLEAF pCurLeaf, bool fVerbose) +{ + Assert(pCurLeaf); Assert(pCurLeaf->uLeaf == 7); + pHlp->pfnPrintf(pHlp, "Structured Extended Feature Flags Enumeration (leaf 7):\n"); + for (;;) + { + CPUMCPUID Host; + ASMCpuIdExSlow(pCurLeaf->uLeaf, 0, pCurLeaf->uSubLeaf, 0, &Host.uEax, &Host.uEbx, &Host.uEcx, &Host.uEdx); + + switch (pCurLeaf->uSubLeaf) + { + case 0: + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Mnemonic - Description = guest (host)\n"); + cpumR3CpuIdInfoVerboseCompareListU32(pHlp, pCurLeaf->uEbx, Host.uEbx, g_aLeaf7Sub0EbxSubFields, 56); + cpumR3CpuIdInfoVerboseCompareListU32(pHlp, pCurLeaf->uEcx, Host.uEcx, g_aLeaf7Sub0EcxSubFields, 56); + if (pCurLeaf->uEdx || Host.uEdx) + cpumR3CpuIdInfoVerboseCompareListU32(pHlp, pCurLeaf->uEdx, Host.uEdx, g_aLeaf7Sub0EdxSubFields, 56); + } + else + { + cpumR3CpuIdInfoMnemonicListU32(pHlp, pCurLeaf->uEbx, g_aLeaf7Sub0EbxSubFields, "Ext Features EBX:", 36); + cpumR3CpuIdInfoMnemonicListU32(pHlp, pCurLeaf->uEcx, g_aLeaf7Sub0EcxSubFields, "Ext Features ECX:", 36); + if (pCurLeaf->uEdx) + cpumR3CpuIdInfoMnemonicListU32(pHlp, pCurLeaf->uEdx, g_aLeaf7Sub0EdxSubFields, "Ext Features EDX:", 36); + } + break; + + default: + if (pCurLeaf->uEdx || pCurLeaf->uEcx || pCurLeaf->uEbx) + pHlp->pfnPrintf(pHlp, "Unknown extended feature sub-leaf #%u: EAX=%#x EBX=%#x ECX=%#x EDX=%#x\n", + pCurLeaf->uSubLeaf, pCurLeaf->uEax, pCurLeaf->uEbx, pCurLeaf->uEcx, pCurLeaf->uEdx); + break; + + } + + /* advance. */ + pCurLeaf++; + if ( (uintptr_t)(pCurLeaf - paLeaves) >= cLeaves + || pCurLeaf->uLeaf != 0x7) + break; + } +} + + +/** + * Produces a detailed summary of standard leaf 0x0000000d. + * + * @param pHlp The info helper functions. + * @param paLeaves The CPUID leaves array. + * @param cLeaves The number of leaves in the array. + * @param pCurLeaf The first 0x00000007 leaf. + * @param fVerbose Whether to be very verbose or not. + */ +static void cpumR3CpuIdInfoStdLeaf13Details(PCDBGFINFOHLP pHlp, PCCPUMCPUIDLEAF paLeaves, uint32_t cLeaves, + PCCPUMCPUIDLEAF pCurLeaf, bool fVerbose) +{ + RT_NOREF_PV(fVerbose); + Assert(pCurLeaf); Assert(pCurLeaf->uLeaf == 13); + pHlp->pfnPrintf(pHlp, "Processor Extended State Enumeration (leaf 0xd):\n"); + for (uint32_t uSubLeaf = 0; uSubLeaf < 64; uSubLeaf++) + { + CPUMCPUID Host; + ASMCpuIdExSlow(UINT32_C(0x0000000d), 0, uSubLeaf, 0, &Host.uEax, &Host.uEbx, &Host.uEcx, &Host.uEdx); + + switch (uSubLeaf) + { + case 0: + if (pCurLeaf && pCurLeaf->uSubLeaf == uSubLeaf) + pHlp->pfnPrintf(pHlp, "%42s %#x/%#x\n", "XSAVE area cur/max size by XCR0, guest:", + pCurLeaf->uEbx, pCurLeaf->uEcx); + pHlp->pfnPrintf(pHlp, "%42s %#x/%#x\n", "XSAVE area cur/max size by XCR0, host:", Host.uEbx, Host.uEcx); + + if (pCurLeaf && pCurLeaf->uSubLeaf == uSubLeaf) + cpumR3CpuIdInfoValueWithMnemonicListU64(pHlp, RT_MAKE_U64(pCurLeaf->uEax, pCurLeaf->uEdx), g_aXSaveStateBits, + "Valid XCR0 bits, guest:", 42); + cpumR3CpuIdInfoValueWithMnemonicListU64(pHlp, RT_MAKE_U64(Host.uEax, Host.uEdx), g_aXSaveStateBits, + "Valid XCR0 bits, host:", 42); + break; + + case 1: + if (pCurLeaf && pCurLeaf->uSubLeaf == uSubLeaf) + cpumR3CpuIdInfoMnemonicListU32(pHlp, pCurLeaf->uEax, g_aLeaf13Sub1EaxSubFields, "XSAVE features, guest:", 42); + cpumR3CpuIdInfoMnemonicListU32(pHlp, Host.uEax, g_aLeaf13Sub1EaxSubFields, "XSAVE features, host:", 42); + + if (pCurLeaf && pCurLeaf->uSubLeaf == uSubLeaf) + pHlp->pfnPrintf(pHlp, "%42s %#x\n", "XSAVE area cur size XCR0|XSS, guest:", pCurLeaf->uEbx); + pHlp->pfnPrintf(pHlp, "%42s %#x\n", "XSAVE area cur size XCR0|XSS, host:", Host.uEbx); + + if (pCurLeaf && pCurLeaf->uSubLeaf == uSubLeaf) + cpumR3CpuIdInfoValueWithMnemonicListU64(pHlp, RT_MAKE_U64(pCurLeaf->uEcx, pCurLeaf->uEdx), g_aXSaveStateBits, + " Valid IA32_XSS bits, guest:", 42); + cpumR3CpuIdInfoValueWithMnemonicListU64(pHlp, RT_MAKE_U64(Host.uEdx, Host.uEcx), g_aXSaveStateBits, + " Valid IA32_XSS bits, host:", 42); + break; + + default: + if ( pCurLeaf + && pCurLeaf->uSubLeaf == uSubLeaf + && (pCurLeaf->uEax || pCurLeaf->uEbx || pCurLeaf->uEcx || pCurLeaf->uEdx) ) + { + pHlp->pfnPrintf(pHlp, " State #%u, guest: off=%#06x, cb=%#06x %s", uSubLeaf, pCurLeaf->uEbx, + pCurLeaf->uEax, pCurLeaf->uEcx & RT_BIT_32(0) ? "XCR0-bit" : "IA32_XSS-bit"); + if (pCurLeaf->uEcx & ~RT_BIT_32(0)) + pHlp->pfnPrintf(pHlp, " ECX[reserved]=%#x\n", pCurLeaf->uEcx & ~RT_BIT_32(0)); + if (pCurLeaf->uEdx) + pHlp->pfnPrintf(pHlp, " EDX[reserved]=%#x\n", pCurLeaf->uEdx); + pHlp->pfnPrintf(pHlp, " --"); + cpumR3CpuIdInfoMnemonicListU64(pHlp, RT_BIT_64(uSubLeaf), g_aXSaveStateBits, NULL, 0); + pHlp->pfnPrintf(pHlp, "\n"); + } + if (Host.uEax || Host.uEbx || Host.uEcx || Host.uEdx) + { + pHlp->pfnPrintf(pHlp, " State #%u, host: off=%#06x, cb=%#06x %s", uSubLeaf, Host.uEbx, + Host.uEax, Host.uEcx & RT_BIT_32(0) ? "XCR0-bit" : "IA32_XSS-bit"); + if (Host.uEcx & ~RT_BIT_32(0)) + pHlp->pfnPrintf(pHlp, " ECX[reserved]=%#x\n", Host.uEcx & ~RT_BIT_32(0)); + if (Host.uEdx) + pHlp->pfnPrintf(pHlp, " EDX[reserved]=%#x\n", Host.uEdx); + pHlp->pfnPrintf(pHlp, " --"); + cpumR3CpuIdInfoMnemonicListU64(pHlp, RT_BIT_64(uSubLeaf), g_aXSaveStateBits, NULL, 0); + pHlp->pfnPrintf(pHlp, "\n"); + } + break; + + } + + /* advance. */ + if (pCurLeaf) + { + while ( (uintptr_t)(pCurLeaf - paLeaves) < cLeaves + && pCurLeaf->uSubLeaf <= uSubLeaf + && pCurLeaf->uLeaf == UINT32_C(0x0000000d)) + pCurLeaf++; + if ( (uintptr_t)(pCurLeaf - paLeaves) >= cLeaves + || pCurLeaf->uLeaf != UINT32_C(0x0000000d)) + pCurLeaf = NULL; + } + } +} + + +static PCCPUMCPUIDLEAF cpumR3CpuIdInfoRawRange(PCDBGFINFOHLP pHlp, PCCPUMCPUIDLEAF paLeaves, uint32_t cLeaves, + PCCPUMCPUIDLEAF pCurLeaf, uint32_t uUpToLeaf, const char *pszTitle) +{ + if ( (uintptr_t)(pCurLeaf - paLeaves) < cLeaves + && pCurLeaf->uLeaf <= uUpToLeaf) + { + pHlp->pfnPrintf(pHlp, + " %s\n" + " Leaf/sub-leaf eax ebx ecx edx\n", pszTitle); + while ( (uintptr_t)(pCurLeaf - paLeaves) < cLeaves + && pCurLeaf->uLeaf <= uUpToLeaf) + { + CPUMCPUID Host; + ASMCpuIdExSlow(pCurLeaf->uLeaf, 0, pCurLeaf->uSubLeaf, 0, &Host.uEax, &Host.uEbx, &Host.uEcx, &Host.uEdx); + pHlp->pfnPrintf(pHlp, + "Gst: %08x/%04x %08x %08x %08x %08x\n" + "Hst: %08x %08x %08x %08x\n", + pCurLeaf->uLeaf, pCurLeaf->uSubLeaf, pCurLeaf->uEax, pCurLeaf->uEbx, pCurLeaf->uEcx, pCurLeaf->uEdx, + Host.uEax, Host.uEbx, Host.uEcx, Host.uEdx); + pCurLeaf++; + } + } + + return pCurLeaf; +} + + +/** + * Display the guest CpuId leaves. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helper functions. + * @param pszArgs "terse", "default" or "verbose". + */ +DECLCALLBACK(void) cpumR3CpuIdInfo(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + /* + * Parse the argument. + */ + unsigned iVerbosity = 1; + if (pszArgs) + { + pszArgs = RTStrStripL(pszArgs); + if (!strcmp(pszArgs, "terse")) + iVerbosity--; + else if (!strcmp(pszArgs, "verbose")) + iVerbosity++; + } + + uint32_t uLeaf; + CPUMCPUID Host; + uint32_t cLeaves = pVM->cpum.s.GuestInfo.cCpuIdLeaves; + PCPUMCPUIDLEAF paLeaves = pVM->cpum.s.GuestInfo.paCpuIdLeavesR3; + PCCPUMCPUIDLEAF pCurLeaf; + PCCPUMCPUIDLEAF pNextLeaf; + bool const fIntel = ASMIsIntelCpuEx(pVM->cpum.s.aGuestCpuIdPatmStd[0].uEbx, + pVM->cpum.s.aGuestCpuIdPatmStd[0].uEcx, + pVM->cpum.s.aGuestCpuIdPatmStd[0].uEdx); + + /* + * Standard leaves. Custom raw dump here due to ECX sub-leaves host handling. + */ + uint32_t cHstMax = ASMCpuId_EAX(0); + uint32_t cGstMax = paLeaves[0].uLeaf == 0 ? paLeaves[0].uEax : 0; + uint32_t cMax = RT_MAX(cGstMax, cHstMax); + pHlp->pfnPrintf(pHlp, + " Raw Standard CPUID Leaves\n" + " Leaf/sub-leaf eax ebx ecx edx\n"); + for (uLeaf = 0, pCurLeaf = paLeaves; uLeaf <= cMax; uLeaf++) + { + uint32_t cMaxSubLeaves = 1; + if (uLeaf == 4 || uLeaf == 7 || uLeaf == 0xb) + cMaxSubLeaves = 16; + else if (uLeaf == 0xd) + cMaxSubLeaves = 128; + + for (uint32_t uSubLeaf = 0; uSubLeaf < cMaxSubLeaves; uSubLeaf++) + { + ASMCpuIdExSlow(uLeaf, 0, uSubLeaf, 0, &Host.uEax, &Host.uEbx, &Host.uEcx, &Host.uEdx); + if ( (uintptr_t)(pCurLeaf - paLeaves) < cLeaves + && pCurLeaf->uLeaf == uLeaf + && pCurLeaf->uSubLeaf == uSubLeaf) + { + pHlp->pfnPrintf(pHlp, + "Gst: %08x/%04x %08x %08x %08x %08x\n" + "Hst: %08x %08x %08x %08x\n", + uLeaf, uSubLeaf, pCurLeaf->uEax, pCurLeaf->uEbx, pCurLeaf->uEcx, pCurLeaf->uEdx, + Host.uEax, Host.uEbx, Host.uEcx, Host.uEdx); + pCurLeaf++; + } + else if ( uLeaf != 0xd + || uSubLeaf <= 1 + || Host.uEbx != 0 ) + pHlp->pfnPrintf(pHlp, + "Hst: %08x/%04x %08x %08x %08x %08x\n", + uLeaf, uSubLeaf, Host.uEax, Host.uEbx, Host.uEcx, Host.uEdx); + + /* Done? */ + if ( ( (uintptr_t)(pCurLeaf - paLeaves) >= cLeaves + || pCurLeaf->uLeaf != uLeaf) + && ( (uLeaf == 0x4 && ((Host.uEax & 0x000f) == 0 || (Host.uEax & 0x000f) >= 8)) + || (uLeaf == 0x7 && Host.uEax == 0) + || (uLeaf == 0xb && ((Host.uEcx & 0xff00) == 0 || (Host.uEcx & 0xff00) >= 8)) + || (uLeaf == 0xb && (Host.uEcx & 0xff) != uSubLeaf) + || (uLeaf == 0xd && uSubLeaf >= 128) + ) + ) + break; + } + } + pNextLeaf = pCurLeaf; + + /* + * If verbose, decode it. + */ + if (iVerbosity && paLeaves[0].uLeaf == 0) + pHlp->pfnPrintf(pHlp, + "%36s %.04s%.04s%.04s\n" + "%36s 0x00000000-%#010x\n" + , + "Name:", &paLeaves[0].uEbx, &paLeaves[0].uEdx, &paLeaves[0].uEcx, + "Supports:", paLeaves[0].uEax); + + if (iVerbosity && (pCurLeaf = cpumR3CpuIdGetLeaf(paLeaves, cLeaves, UINT32_C(0x00000001), 0)) != NULL) + cpumR3CpuIdInfoStdLeaf1Details(pHlp, pCurLeaf, iVerbosity > 1, fIntel); + + if (iVerbosity && (pCurLeaf = cpumR3CpuIdGetLeaf(paLeaves, cLeaves, UINT32_C(0x00000007), 0)) != NULL) + cpumR3CpuIdInfoStdLeaf7Details(pHlp, paLeaves, cLeaves, pCurLeaf, iVerbosity > 1); + + if (iVerbosity && (pCurLeaf = cpumR3CpuIdGetLeaf(paLeaves, cLeaves, UINT32_C(0x0000000d), 0)) != NULL) + cpumR3CpuIdInfoStdLeaf13Details(pHlp, paLeaves, cLeaves, pCurLeaf, iVerbosity > 1); + + pCurLeaf = pNextLeaf; + + /* + * Hypervisor leaves. + * + * Unlike most of the other leaves reported, the guest hypervisor leaves + * aren't a subset of the host CPUID bits. + */ + pCurLeaf = cpumR3CpuIdInfoRawRange(pHlp, paLeaves, cLeaves, pCurLeaf, UINT32_C(0x3fffffff), "Unknown CPUID Leaves"); + + ASMCpuIdExSlow(UINT32_C(0x40000000), 0, 0, 0, &Host.uEax, &Host.uEbx, &Host.uEcx, &Host.uEdx); + cHstMax = Host.uEax >= UINT32_C(0x40000001) && Host.uEax <= UINT32_C(0x40000fff) ? Host.uEax : 0; + cGstMax = (uintptr_t)(pCurLeaf - paLeaves) < cLeaves && pCurLeaf->uLeaf == UINT32_C(0x40000000) + ? RT_MIN(pCurLeaf->uEax, UINT32_C(0x40000fff)) : 0; + cMax = RT_MAX(cHstMax, cGstMax); + if (cMax >= UINT32_C(0x40000000)) + { + pNextLeaf = cpumR3CpuIdInfoRawRange(pHlp, paLeaves, cLeaves, pCurLeaf, cMax, "Raw Hypervisor CPUID Leaves"); + + /** @todo dump these in more detail. */ + + pCurLeaf = pNextLeaf; + } + + + /* + * Extended. Custom raw dump here due to ECX sub-leaves host handling. + * Implemented after AMD specs. + */ + pCurLeaf = cpumR3CpuIdInfoRawRange(pHlp, paLeaves, cLeaves, pCurLeaf, UINT32_C(0x7fffffff), "Unknown CPUID Leaves"); + + ASMCpuIdExSlow(UINT32_C(0x80000000), 0, 0, 0, &Host.uEax, &Host.uEbx, &Host.uEcx, &Host.uEdx); + cHstMax = ASMIsValidExtRange(Host.uEax) ? RT_MIN(Host.uEax, UINT32_C(0x80000fff)) : 0; + cGstMax = (uintptr_t)(pCurLeaf - paLeaves) < cLeaves && pCurLeaf->uLeaf == UINT32_C(0x80000000) + ? RT_MIN(pCurLeaf->uEax, UINT32_C(0x80000fff)) : 0; + cMax = RT_MAX(cHstMax, cGstMax); + if (cMax >= UINT32_C(0x80000000)) + { + + pHlp->pfnPrintf(pHlp, + " Raw Extended CPUID Leaves\n" + " Leaf/sub-leaf eax ebx ecx edx\n"); + PCCPUMCPUIDLEAF pExtLeaf = pCurLeaf; + for (uLeaf = UINT32_C(0x80000000); uLeaf <= cMax; uLeaf++) + { + uint32_t cMaxSubLeaves = 1; + if (uLeaf == UINT32_C(0x8000001d)) + cMaxSubLeaves = 16; + + for (uint32_t uSubLeaf = 0; uSubLeaf < cMaxSubLeaves; uSubLeaf++) + { + ASMCpuIdExSlow(uLeaf, 0, uSubLeaf, 0, &Host.uEax, &Host.uEbx, &Host.uEcx, &Host.uEdx); + if ( (uintptr_t)(pCurLeaf - paLeaves) < cLeaves + && pCurLeaf->uLeaf == uLeaf + && pCurLeaf->uSubLeaf == uSubLeaf) + { + pHlp->pfnPrintf(pHlp, + "Gst: %08x/%04x %08x %08x %08x %08x\n" + "Hst: %08x %08x %08x %08x\n", + uLeaf, uSubLeaf, pCurLeaf->uEax, pCurLeaf->uEbx, pCurLeaf->uEcx, pCurLeaf->uEdx, + Host.uEax, Host.uEbx, Host.uEcx, Host.uEdx); + pCurLeaf++; + } + else if ( uLeaf != 0xd + || uSubLeaf <= 1 + || Host.uEbx != 0 ) + pHlp->pfnPrintf(pHlp, + "Hst: %08x/%04x %08x %08x %08x %08x\n", + uLeaf, uSubLeaf, Host.uEax, Host.uEbx, Host.uEcx, Host.uEdx); + + /* Done? */ + if ( ( (uintptr_t)(pCurLeaf - paLeaves) >= cLeaves + || pCurLeaf->uLeaf != uLeaf) + && (uLeaf == UINT32_C(0x8000001d) && ((Host.uEax & 0x000f) == 0 || (Host.uEax & 0x000f) >= 8)) ) + break; + } + } + pNextLeaf = pCurLeaf; + + /* + * Understandable output + */ + if (iVerbosity) + pHlp->pfnPrintf(pHlp, + "Ext Name: %.4s%.4s%.4s\n" + "Ext Supports: 0x80000000-%#010x\n", + &pExtLeaf->uEbx, &pExtLeaf->uEdx, &pExtLeaf->uEcx, pExtLeaf->uEax); + + pCurLeaf = cpumR3CpuIdGetLeaf(paLeaves, cLeaves, UINT32_C(0x80000001), 0); + if (iVerbosity && pCurLeaf) + { + uint32_t uEAX = pCurLeaf->uEax; + pHlp->pfnPrintf(pHlp, + "Family: %d \tExtended: %d \tEffective: %d\n" + "Model: %d \tExtended: %d \tEffective: %d\n" + "Stepping: %d\n" + "Brand ID: %#05x\n", + (uEAX >> 8) & 0xf, (uEAX >> 20) & 0x7f, ASMGetCpuFamily(uEAX), + (uEAX >> 4) & 0xf, (uEAX >> 16) & 0x0f, ASMGetCpuModel(uEAX, fIntel), + ASMGetCpuStepping(uEAX), + pCurLeaf->uEbx & 0xfff); + + if (iVerbosity == 1) + { + cpumR3CpuIdInfoMnemonicListU32(pHlp, pCurLeaf->uEdx, g_aExtLeaf1EdxSubFields, "Ext Features EDX:", 34); + cpumR3CpuIdInfoMnemonicListU32(pHlp, pCurLeaf->uEcx, g_aExtLeaf1EdxSubFields, "Ext Features ECX:", 34); + } + else + { + ASMCpuIdExSlow(0x80000001, 0, 0, 0, &Host.uEax, &Host.uEbx, &Host.uEcx, &Host.uEdx); + pHlp->pfnPrintf(pHlp, "Ext Features\n"); + pHlp->pfnPrintf(pHlp, " Mnemonic - Description = guest (host)\n"); + cpumR3CpuIdInfoVerboseCompareListU32(pHlp, pCurLeaf->uEdx, Host.uEdx, g_aExtLeaf1EdxSubFields, 56); + cpumR3CpuIdInfoVerboseCompareListU32(pHlp, pCurLeaf->uEcx, Host.uEcx, g_aExtLeaf1EcxSubFields, 56); + if (Host.uEcx & X86_CPUID_AMD_FEATURE_ECX_SVM) + { + pHlp->pfnPrintf(pHlp, "SVM Feature Identification (leaf A):\n"); + ASMCpuIdExSlow(0x8000000a, 0, 0, 0, &Host.uEax, &Host.uEbx, &Host.uEcx, &Host.uEdx); + pCurLeaf = cpumR3CpuIdGetLeaf(paLeaves, cLeaves, UINT32_C(0x8000000a), 0); + uint32_t const uGstEdx = pCurLeaf ? pCurLeaf->uEdx : 0; + cpumR3CpuIdInfoVerboseCompareListU32(pHlp, uGstEdx, Host.uEdx, g_aExtLeafAEdxSubFields, 56); + } + } + } + + if (iVerbosity && (pCurLeaf = cpumR3CpuIdGetLeaf(paLeaves, cLeaves, UINT32_C(0x80000002), 0)) != NULL) + { + char szString[4*4*3+1] = {0}; + uint32_t *pu32 = (uint32_t *)szString; + *pu32++ = pCurLeaf->uEax; + *pu32++ = pCurLeaf->uEbx; + *pu32++ = pCurLeaf->uEcx; + *pu32++ = pCurLeaf->uEdx; + pCurLeaf = cpumR3CpuIdGetLeaf(paLeaves, cLeaves, UINT32_C(0x80000003), 0); + if (pCurLeaf) + { + *pu32++ = pCurLeaf->uEax; + *pu32++ = pCurLeaf->uEbx; + *pu32++ = pCurLeaf->uEcx; + *pu32++ = pCurLeaf->uEdx; + } + pCurLeaf = cpumR3CpuIdGetLeaf(paLeaves, cLeaves, UINT32_C(0x80000004), 0); + if (pCurLeaf) + { + *pu32++ = pCurLeaf->uEax; + *pu32++ = pCurLeaf->uEbx; + *pu32++ = pCurLeaf->uEcx; + *pu32++ = pCurLeaf->uEdx; + } + pHlp->pfnPrintf(pHlp, "Full Name: \"%s\"\n", szString); + } + + if (iVerbosity && (pCurLeaf = cpumR3CpuIdGetLeaf(paLeaves, cLeaves, UINT32_C(0x80000005), 0)) != NULL) + { + uint32_t uEAX = pCurLeaf->uEax; + uint32_t uEBX = pCurLeaf->uEbx; + uint32_t uECX = pCurLeaf->uEcx; + uint32_t uEDX = pCurLeaf->uEdx; + char sz1[32]; + char sz2[32]; + + pHlp->pfnPrintf(pHlp, + "TLB 2/4M Instr/Uni: %s %3d entries\n" + "TLB 2/4M Data: %s %3d entries\n", + getCacheAss((uEAX >> 8) & 0xff, sz1), (uEAX >> 0) & 0xff, + getCacheAss((uEAX >> 24) & 0xff, sz2), (uEAX >> 16) & 0xff); + pHlp->pfnPrintf(pHlp, + "TLB 4K Instr/Uni: %s %3d entries\n" + "TLB 4K Data: %s %3d entries\n", + getCacheAss((uEBX >> 8) & 0xff, sz1), (uEBX >> 0) & 0xff, + getCacheAss((uEBX >> 24) & 0xff, sz2), (uEBX >> 16) & 0xff); + pHlp->pfnPrintf(pHlp, "L1 Instr Cache Line Size: %d bytes\n" + "L1 Instr Cache Lines Per Tag: %d\n" + "L1 Instr Cache Associativity: %s\n" + "L1 Instr Cache Size: %d KB\n", + (uEDX >> 0) & 0xff, + (uEDX >> 8) & 0xff, + getCacheAss((uEDX >> 16) & 0xff, sz1), + (uEDX >> 24) & 0xff); + pHlp->pfnPrintf(pHlp, + "L1 Data Cache Line Size: %d bytes\n" + "L1 Data Cache Lines Per Tag: %d\n" + "L1 Data Cache Associativity: %s\n" + "L1 Data Cache Size: %d KB\n", + (uECX >> 0) & 0xff, + (uECX >> 8) & 0xff, + getCacheAss((uECX >> 16) & 0xff, sz1), + (uECX >> 24) & 0xff); + } + + if (iVerbosity && (pCurLeaf = cpumR3CpuIdGetLeaf(paLeaves, cLeaves, UINT32_C(0x80000006), 0)) != NULL) + { + uint32_t uEAX = pCurLeaf->uEax; + uint32_t uEBX = pCurLeaf->uEbx; + uint32_t uEDX = pCurLeaf->uEdx; + + pHlp->pfnPrintf(pHlp, + "L2 TLB 2/4M Instr/Uni: %s %4d entries\n" + "L2 TLB 2/4M Data: %s %4d entries\n", + getL2CacheAss((uEAX >> 12) & 0xf), (uEAX >> 0) & 0xfff, + getL2CacheAss((uEAX >> 28) & 0xf), (uEAX >> 16) & 0xfff); + pHlp->pfnPrintf(pHlp, + "L2 TLB 4K Instr/Uni: %s %4d entries\n" + "L2 TLB 4K Data: %s %4d entries\n", + getL2CacheAss((uEBX >> 12) & 0xf), (uEBX >> 0) & 0xfff, + getL2CacheAss((uEBX >> 28) & 0xf), (uEBX >> 16) & 0xfff); + pHlp->pfnPrintf(pHlp, + "L2 Cache Line Size: %d bytes\n" + "L2 Cache Lines Per Tag: %d\n" + "L2 Cache Associativity: %s\n" + "L2 Cache Size: %d KB\n", + (uEDX >> 0) & 0xff, + (uEDX >> 8) & 0xf, + getL2CacheAss((uEDX >> 12) & 0xf), + (uEDX >> 16) & 0xffff); + } + + if (iVerbosity && (pCurLeaf = cpumR3CpuIdGetLeaf(paLeaves, cLeaves, UINT32_C(0x80000007), 0)) != NULL) + { + ASMCpuIdExSlow(UINT32_C(0x80000007), 0, 0, 0, &Host.uEax, &Host.uEbx, &Host.uEcx, &Host.uEdx); + if (pCurLeaf->uEdx || (Host.uEdx && iVerbosity)) + { + if (iVerbosity < 1) + cpumR3CpuIdInfoMnemonicListU32(pHlp, pCurLeaf->uEdx, g_aExtLeaf7EdxSubFields, "APM Features EDX:", 34); + else + cpumR3CpuIdInfoVerboseCompareListU32(pHlp, pCurLeaf->uEdx, Host.uEdx, g_aExtLeaf7EdxSubFields, 56); + } + } + + pCurLeaf = cpumR3CpuIdGetLeaf(paLeaves, cLeaves, UINT32_C(0x80000008), 0); + if (pCurLeaf != NULL) + { + ASMCpuIdExSlow(UINT32_C(0x80000008), 0, 0, 0, &Host.uEax, &Host.uEbx, &Host.uEcx, &Host.uEdx); + if (pCurLeaf->uEbx || (Host.uEbx && iVerbosity)) + { + if (iVerbosity < 1) + cpumR3CpuIdInfoMnemonicListU32(pHlp, pCurLeaf->uEbx, g_aExtLeaf8EbxSubFields, "Ext Features ext IDs EBX:", 34); + else + cpumR3CpuIdInfoVerboseCompareListU32(pHlp, pCurLeaf->uEbx, Host.uEbx, g_aExtLeaf8EbxSubFields, 56); + } + + if (iVerbosity) + { + uint32_t uEAX = pCurLeaf->uEax; + uint32_t uECX = pCurLeaf->uEcx; + + pHlp->pfnPrintf(pHlp, + "Physical Address Width: %d bits\n" + "Virtual Address Width: %d bits\n" + "Guest Physical Address Width: %d bits\n", + (uEAX >> 0) & 0xff, + (uEAX >> 8) & 0xff, + (uEAX >> 16) & 0xff); + pHlp->pfnPrintf(pHlp, + "Physical Core Count: %d\n", + ((uECX >> 0) & 0xff) + 1); + } + } + + pCurLeaf = pNextLeaf; + } + + + + /* + * Centaur. + */ + pCurLeaf = cpumR3CpuIdInfoRawRange(pHlp, paLeaves, cLeaves, pCurLeaf, UINT32_C(0xbfffffff), "Unknown CPUID Leaves"); + + ASMCpuIdExSlow(UINT32_C(0xc0000000), 0, 0, 0, &Host.uEax, &Host.uEbx, &Host.uEcx, &Host.uEdx); + cHstMax = Host.uEax >= UINT32_C(0xc0000001) && Host.uEax <= UINT32_C(0xc0000fff) + ? RT_MIN(Host.uEax, UINT32_C(0xc0000fff)) : 0; + cGstMax = (uintptr_t)(pCurLeaf - paLeaves) < cLeaves && pCurLeaf->uLeaf == UINT32_C(0xc0000000) + ? RT_MIN(pCurLeaf->uEax, UINT32_C(0xc0000fff)) : 0; + cMax = RT_MAX(cHstMax, cGstMax); + if (cMax >= UINT32_C(0xc0000000)) + { + pNextLeaf = cpumR3CpuIdInfoRawRange(pHlp, paLeaves, cLeaves, pCurLeaf, cMax, "Raw Centaur CPUID Leaves"); + + /* + * Understandable output + */ + if (iVerbosity && (pCurLeaf = cpumR3CpuIdGetLeaf(paLeaves, cLeaves, UINT32_C(0xc0000000), 0)) != NULL) + pHlp->pfnPrintf(pHlp, + "Centaur Supports: 0xc0000000-%#010x\n", + pCurLeaf->uEax); + + if (iVerbosity && (pCurLeaf = cpumR3CpuIdGetLeaf(paLeaves, cLeaves, UINT32_C(0xc0000001), 0)) != NULL) + { + ASMCpuIdExSlow(0xc0000001, 0, 0, 0, &Host.uEax, &Host.uEbx, &Host.uEcx, &Host.uEdx); + uint32_t uEdxGst = pCurLeaf->uEdx; + uint32_t uEdxHst = Host.uEdx; + + if (iVerbosity == 1) + { + pHlp->pfnPrintf(pHlp, "Centaur Features EDX: "); + if (uEdxGst & RT_BIT(0)) pHlp->pfnPrintf(pHlp, " AIS"); + if (uEdxGst & RT_BIT(1)) pHlp->pfnPrintf(pHlp, " AIS-E"); + if (uEdxGst & RT_BIT(2)) pHlp->pfnPrintf(pHlp, " RNG"); + if (uEdxGst & RT_BIT(3)) pHlp->pfnPrintf(pHlp, " RNG-E"); + if (uEdxGst & RT_BIT(4)) pHlp->pfnPrintf(pHlp, " LH"); + if (uEdxGst & RT_BIT(5)) pHlp->pfnPrintf(pHlp, " FEMMS"); + if (uEdxGst & RT_BIT(6)) pHlp->pfnPrintf(pHlp, " ACE"); + if (uEdxGst & RT_BIT(7)) pHlp->pfnPrintf(pHlp, " ACE-E"); + /* possibly indicating MM/HE and MM/HE-E on older chips... */ + if (uEdxGst & RT_BIT(8)) pHlp->pfnPrintf(pHlp, " ACE2"); + if (uEdxGst & RT_BIT(9)) pHlp->pfnPrintf(pHlp, " ACE2-E"); + if (uEdxGst & RT_BIT(10)) pHlp->pfnPrintf(pHlp, " PHE"); + if (uEdxGst & RT_BIT(11)) pHlp->pfnPrintf(pHlp, " PHE-E"); + if (uEdxGst & RT_BIT(12)) pHlp->pfnPrintf(pHlp, " PMM"); + if (uEdxGst & RT_BIT(13)) pHlp->pfnPrintf(pHlp, " PMM-E"); + for (unsigned iBit = 14; iBit < 32; iBit++) + if (uEdxGst & RT_BIT(iBit)) + pHlp->pfnPrintf(pHlp, " %d", iBit); + pHlp->pfnPrintf(pHlp, "\n"); + } + else + { + pHlp->pfnPrintf(pHlp, "Mnemonic - Description = guest (host)\n"); + pHlp->pfnPrintf(pHlp, "AIS - Alternate Instruction Set = %d (%d)\n", !!(uEdxGst & RT_BIT( 0)), !!(uEdxHst & RT_BIT( 0))); + pHlp->pfnPrintf(pHlp, "AIS-E - AIS enabled = %d (%d)\n", !!(uEdxGst & RT_BIT( 1)), !!(uEdxHst & RT_BIT( 1))); + pHlp->pfnPrintf(pHlp, "RNG - Random Number Generator = %d (%d)\n", !!(uEdxGst & RT_BIT( 2)), !!(uEdxHst & RT_BIT( 2))); + pHlp->pfnPrintf(pHlp, "RNG-E - RNG enabled = %d (%d)\n", !!(uEdxGst & RT_BIT( 3)), !!(uEdxHst & RT_BIT( 3))); + pHlp->pfnPrintf(pHlp, "LH - LongHaul MSR 0000_110Ah = %d (%d)\n", !!(uEdxGst & RT_BIT( 4)), !!(uEdxHst & RT_BIT( 4))); + pHlp->pfnPrintf(pHlp, "FEMMS - FEMMS = %d (%d)\n", !!(uEdxGst & RT_BIT( 5)), !!(uEdxHst & RT_BIT( 5))); + pHlp->pfnPrintf(pHlp, "ACE - Advanced Cryptography Engine = %d (%d)\n", !!(uEdxGst & RT_BIT( 6)), !!(uEdxHst & RT_BIT( 6))); + pHlp->pfnPrintf(pHlp, "ACE-E - ACE enabled = %d (%d)\n", !!(uEdxGst & RT_BIT( 7)), !!(uEdxHst & RT_BIT( 7))); + /* possibly indicating MM/HE and MM/HE-E on older chips... */ + pHlp->pfnPrintf(pHlp, "ACE2 - Advanced Cryptography Engine 2 = %d (%d)\n", !!(uEdxGst & RT_BIT( 8)), !!(uEdxHst & RT_BIT( 8))); + pHlp->pfnPrintf(pHlp, "ACE2-E - ACE enabled = %d (%d)\n", !!(uEdxGst & RT_BIT( 9)), !!(uEdxHst & RT_BIT( 9))); + pHlp->pfnPrintf(pHlp, "PHE - Padlock Hash Engine = %d (%d)\n", !!(uEdxGst & RT_BIT(10)), !!(uEdxHst & RT_BIT(10))); + pHlp->pfnPrintf(pHlp, "PHE-E - PHE enabled = %d (%d)\n", !!(uEdxGst & RT_BIT(11)), !!(uEdxHst & RT_BIT(11))); + pHlp->pfnPrintf(pHlp, "PMM - Montgomery Multiplier = %d (%d)\n", !!(uEdxGst & RT_BIT(12)), !!(uEdxHst & RT_BIT(12))); + pHlp->pfnPrintf(pHlp, "PMM-E - PMM enabled = %d (%d)\n", !!(uEdxGst & RT_BIT(13)), !!(uEdxHst & RT_BIT(13))); + pHlp->pfnPrintf(pHlp, "14 - Reserved = %d (%d)\n", !!(uEdxGst & RT_BIT(14)), !!(uEdxHst & RT_BIT(14))); + pHlp->pfnPrintf(pHlp, "15 - Reserved = %d (%d)\n", !!(uEdxGst & RT_BIT(15)), !!(uEdxHst & RT_BIT(15))); + pHlp->pfnPrintf(pHlp, "Parallax = %d (%d)\n", !!(uEdxGst & RT_BIT(16)), !!(uEdxHst & RT_BIT(16))); + pHlp->pfnPrintf(pHlp, "Parallax enabled = %d (%d)\n", !!(uEdxGst & RT_BIT(17)), !!(uEdxHst & RT_BIT(17))); + pHlp->pfnPrintf(pHlp, "Overstress = %d (%d)\n", !!(uEdxGst & RT_BIT(18)), !!(uEdxHst & RT_BIT(18))); + pHlp->pfnPrintf(pHlp, "Overstress enabled = %d (%d)\n", !!(uEdxGst & RT_BIT(19)), !!(uEdxHst & RT_BIT(19))); + pHlp->pfnPrintf(pHlp, "TM3 - Temperature Monitoring 3 = %d (%d)\n", !!(uEdxGst & RT_BIT(20)), !!(uEdxHst & RT_BIT(20))); + pHlp->pfnPrintf(pHlp, "TM3-E - TM3 enabled = %d (%d)\n", !!(uEdxGst & RT_BIT(21)), !!(uEdxHst & RT_BIT(21))); + pHlp->pfnPrintf(pHlp, "RNG2 - Random Number Generator 2 = %d (%d)\n", !!(uEdxGst & RT_BIT(22)), !!(uEdxHst & RT_BIT(22))); + pHlp->pfnPrintf(pHlp, "RNG2-E - RNG2 enabled = %d (%d)\n", !!(uEdxGst & RT_BIT(23)), !!(uEdxHst & RT_BIT(23))); + pHlp->pfnPrintf(pHlp, "24 - Reserved = %d (%d)\n", !!(uEdxGst & RT_BIT(24)), !!(uEdxHst & RT_BIT(24))); + pHlp->pfnPrintf(pHlp, "PHE2 - Padlock Hash Engine 2 = %d (%d)\n", !!(uEdxGst & RT_BIT(25)), !!(uEdxHst & RT_BIT(25))); + pHlp->pfnPrintf(pHlp, "PHE2-E - PHE2 enabled = %d (%d)\n", !!(uEdxGst & RT_BIT(26)), !!(uEdxHst & RT_BIT(26))); + for (unsigned iBit = 27; iBit < 32; iBit++) + if ((uEdxGst | uEdxHst) & RT_BIT(iBit)) + pHlp->pfnPrintf(pHlp, "Bit %d = %d (%d)\n", iBit, !!(uEdxGst & RT_BIT(iBit)), !!(uEdxHst & RT_BIT(iBit))); + pHlp->pfnPrintf(pHlp, "\n"); + } + } + + pCurLeaf = pNextLeaf; + } + + /* + * The remainder. + */ + pCurLeaf = cpumR3CpuIdInfoRawRange(pHlp, paLeaves, cLeaves, pCurLeaf, UINT32_C(0xffffffff), "Unknown CPUID Leaves"); +} + + + + + +/* + * + * + * PATM interfaces. + * PATM interfaces. + * PATM interfaces. + * + * + */ + + +# if defined(VBOX_WITH_RAW_MODE) || defined(DOXYGEN_RUNNING) +/** @name Patchmanager CPUID legacy table APIs + * @{ + */ + +/** + * Gets a pointer to the default CPUID leaf. + * + * @returns Raw-mode pointer to the default CPUID leaf (read-only). + * @param pVM The cross context VM structure. + * @remark Intended for PATM only. + */ +VMMR3_INT_DECL(RCPTRTYPE(PCCPUMCPUID)) CPUMR3GetGuestCpuIdPatmDefRCPtr(PVM pVM) +{ + return (RCPTRTYPE(PCCPUMCPUID))VM_RC_ADDR(pVM, &pVM->cpum.s.GuestInfo.DefCpuId); +} + + +/** + * Gets a number of standard CPUID leaves (PATM only). + * + * @returns Number of leaves. + * @param pVM The cross context VM structure. + * @remark Intended for PATM - legacy, don't use in new code. + */ +VMMR3_INT_DECL(uint32_t) CPUMR3GetGuestCpuIdPatmStdMax(PVM pVM) +{ + RT_NOREF_PV(pVM); + return RT_ELEMENTS(pVM->cpum.s.aGuestCpuIdPatmStd); +} + + +/** + * Gets a number of extended CPUID leaves (PATM only). + * + * @returns Number of leaves. + * @param pVM The cross context VM structure. + * @remark Intended for PATM - legacy, don't use in new code. + */ +VMMR3_INT_DECL(uint32_t) CPUMR3GetGuestCpuIdPatmExtMax(PVM pVM) +{ + RT_NOREF_PV(pVM); + return RT_ELEMENTS(pVM->cpum.s.aGuestCpuIdPatmExt); +} + + +/** + * Gets a number of centaur CPUID leaves. + * + * @returns Number of leaves. + * @param pVM The cross context VM structure. + * @remark Intended for PATM - legacy, don't use in new code. + */ +VMMR3_INT_DECL(uint32_t) CPUMR3GetGuestCpuIdPatmCentaurMax(PVM pVM) +{ + RT_NOREF_PV(pVM); + return RT_ELEMENTS(pVM->cpum.s.aGuestCpuIdPatmCentaur); +} + + +/** + * Gets a pointer to the array of standard CPUID leaves. + * + * CPUMR3GetGuestCpuIdStdMax() give the size of the array. + * + * @returns Raw-mode pointer to the standard CPUID leaves (read-only). + * @param pVM The cross context VM structure. + * @remark Intended for PATM - legacy, don't use in new code. + */ +VMMR3_INT_DECL(RCPTRTYPE(PCCPUMCPUID)) CPUMR3GetGuestCpuIdPatmStdRCPtr(PVM pVM) +{ + return RCPTRTYPE(PCCPUMCPUID)VM_RC_ADDR(pVM, &pVM->cpum.s.aGuestCpuIdPatmStd[0]); +} + + +/** + * Gets a pointer to the array of extended CPUID leaves. + * + * CPUMGetGuestCpuIdExtMax() give the size of the array. + * + * @returns Raw-mode pointer to the extended CPUID leaves (read-only). + * @param pVM The cross context VM structure. + * @remark Intended for PATM - legacy, don't use in new code. + */ +VMMR3_INT_DECL(RCPTRTYPE(PCCPUMCPUID)) CPUMR3GetGuestCpuIdPatmExtRCPtr(PVM pVM) +{ + return (RCPTRTYPE(PCCPUMCPUID))VM_RC_ADDR(pVM, &pVM->cpum.s.aGuestCpuIdPatmExt[0]); +} + + +/** + * Gets a pointer to the array of centaur CPUID leaves. + * + * CPUMGetGuestCpuIdCentaurMax() give the size of the array. + * + * @returns Raw-mode pointer to the centaur CPUID leaves (read-only). + * @param pVM The cross context VM structure. + * @remark Intended for PATM - legacy, don't use in new code. + */ +VMMR3_INT_DECL(RCPTRTYPE(PCCPUMCPUID)) CPUMR3GetGuestCpuIdPatmCentaurRCPtr(PVM pVM) +{ + return (RCPTRTYPE(PCCPUMCPUID))VM_RC_ADDR(pVM, &pVM->cpum.s.aGuestCpuIdPatmCentaur[0]); +} + +/** @} */ +# endif /* VBOX_WITH_RAW_MODE || DOXYGEN_RUNNING */ + +#endif /* VBOX_IN_VMM */ + diff --git a/src/VBox/VMM/VMMR3/CPUMR3Db.cpp b/src/VBox/VMM/VMMR3/CPUMR3Db.cpp new file mode 100644 index 00000000..82bcc665 --- /dev/null +++ b/src/VBox/VMM/VMMR3/CPUMR3Db.cpp @@ -0,0 +1,1123 @@ +/* $Id: CPUMR3Db.cpp $ */ +/** @file + * CPUM - CPU database part. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_CPUM +#include +#include "CPUMInternal.h" +#include +#include + +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +typedef struct CPUMDBENTRY +{ + /** The CPU name. */ + const char *pszName; + /** The full CPU name. */ + const char *pszFullName; + /** The CPU vendor (CPUMCPUVENDOR). */ + uint8_t enmVendor; + /** The CPU family. */ + uint8_t uFamily; + /** The CPU model. */ + uint8_t uModel; + /** The CPU stepping. */ + uint8_t uStepping; + /** The microarchitecture. */ + CPUMMICROARCH enmMicroarch; + /** Scalable bus frequency used for reporting other frequencies. */ + uint64_t uScalableBusFreq; + /** Flags - CPUDB_F_XXX. */ + uint32_t fFlags; + /** The maximum physical address with of the CPU. This should correspond to + * the value in CPUID leaf 0x80000008 when present. */ + uint8_t cMaxPhysAddrWidth; + /** The MXCSR mask. */ + uint32_t fMxCsrMask; + /** Pointer to an array of CPUID leaves. */ + PCCPUMCPUIDLEAF paCpuIdLeaves; + /** The number of CPUID leaves in the array paCpuIdLeaves points to. */ + uint32_t cCpuIdLeaves; + /** The method used to deal with unknown CPUID leaves. */ + CPUMUNKNOWNCPUID enmUnknownCpuId; + /** The default unknown CPUID value. */ + CPUMCPUID DefUnknownCpuId; + + /** MSR mask. Several microarchitectures ignore the higher bits of ECX in + * the RDMSR and WRMSR instructions. */ + uint32_t fMsrMask; + + /** The number of ranges in the table pointed to b paMsrRanges. */ + uint32_t cMsrRanges; + /** MSR ranges for this CPU. */ + PCCPUMMSRRANGE paMsrRanges; +} CPUMDBENTRY; + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** @name CPUDB_F_XXX - CPUDBENTRY::fFlags + * @{ */ +/** Should execute all in IEM. + * @todo Implement this - currently done in Main... */ +#define CPUDB_F_EXECUTE_ALL_IN_IEM RT_BIT_32(0) +/** @} */ + + +/** @def NULL_ALONE + * For eliminating an unnecessary data dependency in standalone builds (for + * VBoxSVC). */ +/** @def ZERO_ALONE + * For eliminating an unnecessary data size dependency in standalone builds (for + * VBoxSVC). */ +#ifndef CPUM_DB_STANDALONE +# define NULL_ALONE(a_aTable) a_aTable +# define ZERO_ALONE(a_cTable) a_cTable +#else +# define NULL_ALONE(a_aTable) NULL +# define ZERO_ALONE(a_cTable) 0 +#endif + + +/** @name Short macros for the MSR range entries. + * + * These are rather cryptic, but this is to reduce the attack on the right + * margin. + * + * @{ */ +/** Alias one MSR onto another (a_uTarget). */ +#define MAL(a_uMsr, a_szName, a_uTarget) \ + RINT(a_uMsr, a_uMsr, kCpumMsrRdFn_MsrAlias, kCpumMsrWrFn_MsrAlias, 0, a_uTarget, 0, 0, a_szName) +/** Functions handles everything. */ +#define MFN(a_uMsr, a_szName, a_enmRdFnSuff, a_enmWrFnSuff) \ + RINT(a_uMsr, a_uMsr, kCpumMsrRdFn_##a_enmRdFnSuff, kCpumMsrWrFn_##a_enmWrFnSuff, 0, 0, 0, 0, a_szName) +/** Functions handles everything, with GP mask. */ +#define MFG(a_uMsr, a_szName, a_enmRdFnSuff, a_enmWrFnSuff, a_fWrGpMask) \ + RINT(a_uMsr, a_uMsr, kCpumMsrRdFn_##a_enmRdFnSuff, kCpumMsrWrFn_##a_enmWrFnSuff, 0, 0, 0, a_fWrGpMask, a_szName) +/** Function handlers, read-only. */ +#define MFO(a_uMsr, a_szName, a_enmRdFnSuff) \ + RINT(a_uMsr, a_uMsr, kCpumMsrRdFn_##a_enmRdFnSuff, kCpumMsrWrFn_ReadOnly, 0, 0, 0, UINT64_MAX, a_szName) +/** Function handlers, ignore all writes. */ +#define MFI(a_uMsr, a_szName, a_enmRdFnSuff) \ + RINT(a_uMsr, a_uMsr, kCpumMsrRdFn_##a_enmRdFnSuff, kCpumMsrWrFn_IgnoreWrite, 0, 0, UINT64_MAX, 0, a_szName) +/** Function handlers, with value. */ +#define MFV(a_uMsr, a_szName, a_enmRdFnSuff, a_enmWrFnSuff, a_uValue) \ + RINT(a_uMsr, a_uMsr, kCpumMsrRdFn_##a_enmRdFnSuff, kCpumMsrWrFn_##a_enmWrFnSuff, 0, a_uValue, 0, 0, a_szName) +/** Function handlers, with write ignore mask. */ +#define MFW(a_uMsr, a_szName, a_enmRdFnSuff, a_enmWrFnSuff, a_fWrIgnMask) \ + RINT(a_uMsr, a_uMsr, kCpumMsrRdFn_##a_enmRdFnSuff, kCpumMsrWrFn_##a_enmWrFnSuff, 0, 0, a_fWrIgnMask, 0, a_szName) +/** Function handlers, extended version. */ +#define MFX(a_uMsr, a_szName, a_enmRdFnSuff, a_enmWrFnSuff, a_uValue, a_fWrIgnMask, a_fWrGpMask) \ + RINT(a_uMsr, a_uMsr, kCpumMsrRdFn_##a_enmRdFnSuff, kCpumMsrWrFn_##a_enmWrFnSuff, 0, a_uValue, a_fWrIgnMask, a_fWrGpMask, a_szName) +/** Function handlers, with CPUMCPU storage variable. */ +#define MFS(a_uMsr, a_szName, a_enmRdFnSuff, a_enmWrFnSuff, a_CpumCpuMember) \ + RINT(a_uMsr, a_uMsr, kCpumMsrRdFn_##a_enmRdFnSuff, kCpumMsrWrFn_##a_enmWrFnSuff, \ + RT_OFFSETOF(CPUMCPU, a_CpumCpuMember), 0, 0, 0, a_szName) +/** Function handlers, with CPUMCPU storage variable, ignore mask and GP mask. */ +#define MFZ(a_uMsr, a_szName, a_enmRdFnSuff, a_enmWrFnSuff, a_CpumCpuMember, a_fWrIgnMask, a_fWrGpMask) \ + RINT(a_uMsr, a_uMsr, kCpumMsrRdFn_##a_enmRdFnSuff, kCpumMsrWrFn_##a_enmWrFnSuff, \ + RT_OFFSETOF(CPUMCPU, a_CpumCpuMember), 0, a_fWrIgnMask, a_fWrGpMask, a_szName) +/** Read-only fixed value. */ +#define MVO(a_uMsr, a_szName, a_uValue) \ + RINT(a_uMsr, a_uMsr, kCpumMsrRdFn_FixedValue, kCpumMsrWrFn_ReadOnly, 0, a_uValue, 0, UINT64_MAX, a_szName) +/** Read-only fixed value, ignores all writes. */ +#define MVI(a_uMsr, a_szName, a_uValue) \ + RINT(a_uMsr, a_uMsr, kCpumMsrRdFn_FixedValue, kCpumMsrWrFn_IgnoreWrite, 0, a_uValue, UINT64_MAX, 0, a_szName) +/** Read fixed value, ignore writes outside GP mask. */ +#define MVG(a_uMsr, a_szName, a_uValue, a_fWrGpMask) \ + RINT(a_uMsr, a_uMsr, kCpumMsrRdFn_FixedValue, kCpumMsrWrFn_IgnoreWrite, 0, a_uValue, 0, a_fWrGpMask, a_szName) +/** Read fixed value, extended version with both GP and ignore masks. */ +#define MVX(a_uMsr, a_szName, a_uValue, a_fWrIgnMask, a_fWrGpMask) \ + RINT(a_uMsr, a_uMsr, kCpumMsrRdFn_FixedValue, kCpumMsrWrFn_IgnoreWrite, 0, a_uValue, a_fWrIgnMask, a_fWrGpMask, a_szName) +/** The short form, no CPUM backing. */ +#define MSN(a_uMsr, a_szName, a_enmRdFnSuff, a_enmWrFnSuff, a_uInitOrReadValue, a_fWrIgnMask, a_fWrGpMask) \ + RINT(a_uMsr, a_uMsr, kCpumMsrRdFn_##a_enmRdFnSuff, kCpumMsrWrFn_##a_enmWrFnSuff, 0, \ + a_uInitOrReadValue, a_fWrIgnMask, a_fWrGpMask, a_szName) + +/** Range: Functions handles everything. */ +#define RFN(a_uFirst, a_uLast, a_szName, a_enmRdFnSuff, a_enmWrFnSuff) \ + RINT(a_uFirst, a_uLast, kCpumMsrRdFn_##a_enmRdFnSuff, kCpumMsrWrFn_##a_enmWrFnSuff, 0, 0, 0, 0, a_szName) +/** Range: Read fixed value, read-only. */ +#define RVO(a_uFirst, a_uLast, a_szName, a_uValue) \ + RINT(a_uFirst, a_uLast, kCpumMsrRdFn_FixedValue, kCpumMsrWrFn_ReadOnly, 0, a_uValue, 0, UINT64_MAX, a_szName) +/** Range: Read fixed value, ignore writes. */ +#define RVI(a_uFirst, a_uLast, a_szName, a_uValue) \ + RINT(a_uFirst, a_uLast, kCpumMsrRdFn_FixedValue, kCpumMsrWrFn_IgnoreWrite, 0, a_uValue, UINT64_MAX, 0, a_szName) +/** Range: The short form, no CPUM backing. */ +#define RSN(a_uFirst, a_uLast, a_szName, a_enmRdFnSuff, a_enmWrFnSuff, a_uInitOrReadValue, a_fWrIgnMask, a_fWrGpMask) \ + RINT(a_uFirst, a_uLast, kCpumMsrRdFn_##a_enmRdFnSuff, kCpumMsrWrFn_##a_enmWrFnSuff, 0, \ + a_uInitOrReadValue, a_fWrIgnMask, a_fWrGpMask, a_szName) + +/** Internal form used by the macros. */ +#ifdef VBOX_WITH_STATISTICS +# define RINT(a_uFirst, a_uLast, a_enmRdFn, a_enmWrFn, a_offCpumCpu, a_uInitOrReadValue, a_fWrIgnMask, a_fWrGpMask, a_szName) \ + { a_uFirst, a_uLast, a_enmRdFn, a_enmWrFn, a_offCpumCpu, 0, a_uInitOrReadValue, a_fWrIgnMask, a_fWrGpMask, a_szName, \ + { 0 }, { 0 }, { 0 }, { 0 } } +#else +# define RINT(a_uFirst, a_uLast, a_enmRdFn, a_enmWrFn, a_offCpumCpu, a_uInitOrReadValue, a_fWrIgnMask, a_fWrGpMask, a_szName) \ + { a_uFirst, a_uLast, a_enmRdFn, a_enmWrFn, a_offCpumCpu, 0, a_uInitOrReadValue, a_fWrIgnMask, a_fWrGpMask, a_szName } +#endif +/** @} */ + +#ifndef CPUM_DB_STANDALONE + +#include "cpus/Intel_Core_i7_6700K.h" +#include "cpus/Intel_Core_i7_5600U.h" +#include "cpus/Intel_Core_i7_3960X.h" +#include "cpus/Intel_Core_i5_3570.h" +#include "cpus/Intel_Core_i7_2635QM.h" +#include "cpus/Intel_Xeon_X5482_3_20GHz.h" +#include "cpus/Intel_Core2_X6800_2_93GHz.h" +#include "cpus/Intel_Core2_T7600_2_33GHz.h" +#include "cpus/Intel_Core_Duo_T2600_2_16GHz.h" +#include "cpus/Intel_Pentium_M_processor_2_00GHz.h" +#include "cpus/Intel_Pentium_4_3_00GHz.h" +#include "cpus/Intel_Pentium_N3530_2_16GHz.h" +#include "cpus/Intel_Atom_330_1_60GHz.h" +#include "cpus/Intel_80486.h" +#include "cpus/Intel_80386.h" +#include "cpus/Intel_80286.h" +#include "cpus/Intel_80186.h" +#include "cpus/Intel_8086.h" + +#include "cpus/AMD_FX_8150_Eight_Core.h" +#include "cpus/AMD_Phenom_II_X6_1100T.h" +#include "cpus/Quad_Core_AMD_Opteron_2384.h" +#include "cpus/AMD_Athlon_64_X2_Dual_Core_4200.h" +#include "cpus/AMD_Athlon_64_3200.h" + +#include "cpus/VIA_QuadCore_L4700_1_2_GHz.h" + +#include "cpus/ZHAOXIN_KaiXian_KX_U5581_1_8GHz.h" + + + +/** + * The database entries. + * + * 1. The first entry is special. It is the fallback for unknown + * processors. Thus, it better be pretty representative. + * + * 2. The first entry for a CPU vendor is likewise important as it is + * the default entry for that vendor. + * + * Generally we put the most recent CPUs first, since these tend to have the + * most complicated and backwards compatible list of MSRs. + */ +static CPUMDBENTRY const * const g_apCpumDbEntries[] = +{ +#ifdef VBOX_CPUDB_Intel_Core_i7_6700K_h + &g_Entry_Intel_Core_i7_6700K, +#endif +#ifdef VBOX_CPUDB_Intel_Core_i7_5600U_h + &g_Entry_Intel_Core_i7_5600U, +#endif +#ifdef VBOX_CPUDB_Intel_Core_i5_3570_h + &g_Entry_Intel_Core_i5_3570, +#endif +#ifdef VBOX_CPUDB_Intel_Core_i7_3960X_h + &g_Entry_Intel_Core_i7_3960X, +#endif +#ifdef VBOX_CPUDB_Intel_Core_i7_2635QM_h + &g_Entry_Intel_Core_i7_2635QM, +#endif +#ifdef VBOX_CPUDB_Intel_Pentium_N3530_2_16GHz_h + &g_Entry_Intel_Pentium_N3530_2_16GHz, +#endif +#ifdef VBOX_CPUDB_Intel_Atom_330_1_60GHz_h + &g_Entry_Intel_Atom_330_1_60GHz, +#endif +#ifdef VBOX_CPUDB_Intel_Pentium_M_processor_2_00GHz_h + &g_Entry_Intel_Pentium_M_processor_2_00GHz, +#endif +#ifdef VBOX_CPUDB_Intel_Xeon_X5482_3_20GHz_h + &g_Entry_Intel_Xeon_X5482_3_20GHz, +#endif +#ifdef VBOX_CPUDB_Intel_Core2_X6800_2_93GHz_h + &g_Entry_Intel_Core2_X6800_2_93GHz, +#endif +#ifdef VBOX_CPUDB_Intel_Core2_T7600_2_33GHz_h + &g_Entry_Intel_Core2_T7600_2_33GHz, +#endif +#ifdef VBOX_CPUDB_Intel_Core_Duo_T2600_2_16GHz_h + &g_Entry_Intel_Core_Duo_T2600_2_16GHz, +#endif +#ifdef VBOX_CPUDB_Intel_Pentium_4_3_00GHz_h + &g_Entry_Intel_Pentium_4_3_00GHz, +#endif +#ifdef VBOX_CPUDB_Intel_Pentium_4_3_00GHz_h + &g_Entry_Intel_Pentium_4_3_00GHz, +#endif +/** @todo pentium, pentium mmx, pentium pro, pentium II, pentium III */ +#ifdef VBOX_CPUDB_Intel_80486_h + &g_Entry_Intel_80486, +#endif +#ifdef VBOX_CPUDB_Intel_80386_h + &g_Entry_Intel_80386, +#endif +#ifdef VBOX_CPUDB_Intel_80286_h + &g_Entry_Intel_80286, +#endif +#ifdef VBOX_CPUDB_Intel_80186_h + &g_Entry_Intel_80186, +#endif +#ifdef VBOX_CPUDB_Intel_8086_h + &g_Entry_Intel_8086, +#endif + +#ifdef VBOX_CPUDB_AMD_FX_8150_Eight_Core_h + &g_Entry_AMD_FX_8150_Eight_Core, +#endif +#ifdef VBOX_CPUDB_AMD_Phenom_II_X6_1100T_h + &g_Entry_AMD_Phenom_II_X6_1100T, +#endif +#ifdef VBOX_CPUDB_Quad_Core_AMD_Opteron_2384_h + &g_Entry_Quad_Core_AMD_Opteron_2384, +#endif +#ifdef VBOX_CPUDB_AMD_Athlon_64_X2_Dual_Core_4200_h + &g_Entry_AMD_Athlon_64_X2_Dual_Core_4200, +#endif +#ifdef VBOX_CPUDB_AMD_Athlon_64_3200_h + &g_Entry_AMD_Athlon_64_3200, +#endif + +#ifdef VBOX_CPUDB_ZHAOXIN_KaiXian_KX_U5581_1_8GHz_h + &g_Entry_ZHAOXIN_KaiXian_KX_U5581_1_8GHz, +#endif + +#ifdef VBOX_CPUDB_VIA_QuadCore_L4700_1_2_GHz_h + &g_Entry_VIA_QuadCore_L4700_1_2_GHz, +#endif + +#ifdef VBOX_CPUDB_NEC_V20_h + &g_Entry_NEC_V20, +#endif +}; + + + +/** + * Binary search used by cpumR3MsrRangesInsert and has some special properties + * wrt to mismatches. + * + * @returns Insert location. + * @param paMsrRanges The MSR ranges to search. + * @param cMsrRanges The number of MSR ranges. + * @param uMsr What to search for. + */ +static uint32_t cpumR3MsrRangesBinSearch(PCCPUMMSRRANGE paMsrRanges, uint32_t cMsrRanges, uint32_t uMsr) +{ + if (!cMsrRanges) + return 0; + + uint32_t iStart = 0; + uint32_t iLast = cMsrRanges - 1; + for (;;) + { + uint32_t i = iStart + (iLast - iStart + 1) / 2; + if ( uMsr >= paMsrRanges[i].uFirst + && uMsr <= paMsrRanges[i].uLast) + return i; + if (uMsr < paMsrRanges[i].uFirst) + { + if (i <= iStart) + return i; + iLast = i - 1; + } + else + { + if (i >= iLast) + { + if (i < cMsrRanges) + i++; + return i; + } + iStart = i + 1; + } + } +} + + +/** + * Ensures that there is space for at least @a cNewRanges in the table, + * reallocating the table if necessary. + * + * @returns Pointer to the MSR ranges on success, NULL on failure. On failure + * @a *ppaMsrRanges is freed and set to NULL. + * @param pVM The cross context VM structure. If NULL, + * use the process heap, otherwise the VM's hyper heap. + * @param ppaMsrRanges The variable pointing to the ranges (input/output). + * @param cMsrRanges The current number of ranges. + * @param cNewRanges The number of ranges to be added. + */ +static PCPUMMSRRANGE cpumR3MsrRangesEnsureSpace(PVM pVM, PCPUMMSRRANGE *ppaMsrRanges, uint32_t cMsrRanges, uint32_t cNewRanges) +{ + uint32_t cMsrRangesAllocated; + if (!pVM) + cMsrRangesAllocated = RT_ALIGN_32(cMsrRanges, 16); + else + { + /* + * We're using the hyper heap now, but when the range array was copied over to it from + * the host-context heap, we only copy the exact size and not the ensured size. + * See @bugref{7270}. + */ + cMsrRangesAllocated = cMsrRanges; + } + if (cMsrRangesAllocated < cMsrRanges + cNewRanges) + { + void *pvNew; + uint32_t cNew = RT_ALIGN_32(cMsrRanges + cNewRanges, 16); + if (pVM) + { + Assert(ppaMsrRanges == &pVM->cpum.s.GuestInfo.paMsrRangesR3); + Assert(cMsrRanges == pVM->cpum.s.GuestInfo.cMsrRanges); + + size_t cb = cMsrRangesAllocated * sizeof(**ppaMsrRanges); + size_t cbNew = cNew * sizeof(**ppaMsrRanges); + int rc = MMR3HyperRealloc(pVM, *ppaMsrRanges, cb, 32, MM_TAG_CPUM_MSRS, cbNew, &pvNew); + if (RT_FAILURE(rc)) + { + *ppaMsrRanges = NULL; + pVM->cpum.s.GuestInfo.paMsrRangesR0 = NIL_RTR0PTR; + pVM->cpum.s.GuestInfo.paMsrRangesRC = NIL_RTRCPTR; + LogRel(("CPUM: cpumR3MsrRangesEnsureSpace: MMR3HyperRealloc failed. rc=%Rrc\n", rc)); + return NULL; + } + *ppaMsrRanges = (PCPUMMSRRANGE)pvNew; + } + else + { + pvNew = RTMemRealloc(*ppaMsrRanges, cNew * sizeof(**ppaMsrRanges)); + if (!pvNew) + { + RTMemFree(*ppaMsrRanges); + *ppaMsrRanges = NULL; + return NULL; + } + } + *ppaMsrRanges = (PCPUMMSRRANGE)pvNew; + } + + if (pVM) + { + /* Update R0 and RC pointers. */ + Assert(ppaMsrRanges == &pVM->cpum.s.GuestInfo.paMsrRangesR3); + pVM->cpum.s.GuestInfo.paMsrRangesR0 = MMHyperR3ToR0(pVM, *ppaMsrRanges); + pVM->cpum.s.GuestInfo.paMsrRangesRC = MMHyperR3ToRC(pVM, *ppaMsrRanges); + } + + return *ppaMsrRanges; +} + + +/** + * Inserts a new MSR range in into an sorted MSR range array. + * + * If the new MSR range overlaps existing ranges, the existing ones will be + * adjusted/removed to fit in the new one. + * + * @returns VBox status code. + * @retval VINF_SUCCESS + * @retval VERR_NO_MEMORY + * + * @param pVM The cross context VM structure. If NULL, + * use the process heap, otherwise the VM's hyper heap. + * @param ppaMsrRanges The variable pointing to the ranges (input/output). + * Must be NULL if using the hyper heap. + * @param pcMsrRanges The variable holding number of ranges. Must be NULL + * if using the hyper heap. + * @param pNewRange The new range. + */ +int cpumR3MsrRangesInsert(PVM pVM, PCPUMMSRRANGE *ppaMsrRanges, uint32_t *pcMsrRanges, PCCPUMMSRRANGE pNewRange) +{ + Assert(pNewRange->uLast >= pNewRange->uFirst); + Assert(pNewRange->enmRdFn > kCpumMsrRdFn_Invalid && pNewRange->enmRdFn < kCpumMsrRdFn_End); + Assert(pNewRange->enmWrFn > kCpumMsrWrFn_Invalid && pNewRange->enmWrFn < kCpumMsrWrFn_End); + + /* + * Validate and use the VM's MSR ranges array if we are using the hyper heap. + */ + if (pVM) + { + AssertReturn(!ppaMsrRanges, VERR_INVALID_PARAMETER); + AssertReturn(!pcMsrRanges, VERR_INVALID_PARAMETER); + + ppaMsrRanges = &pVM->cpum.s.GuestInfo.paMsrRangesR3; + pcMsrRanges = &pVM->cpum.s.GuestInfo.cMsrRanges; + } + else + { + AssertReturn(ppaMsrRanges, VERR_INVALID_POINTER); + AssertReturn(pcMsrRanges, VERR_INVALID_POINTER); + } + + uint32_t cMsrRanges = *pcMsrRanges; + PCPUMMSRRANGE paMsrRanges = *ppaMsrRanges; + + /* + * Optimize the linear insertion case where we add new entries at the end. + */ + if ( cMsrRanges > 0 + && paMsrRanges[cMsrRanges - 1].uLast < pNewRange->uFirst) + { + paMsrRanges = cpumR3MsrRangesEnsureSpace(pVM, ppaMsrRanges, cMsrRanges, 1); + if (!paMsrRanges) + return VERR_NO_MEMORY; + paMsrRanges[cMsrRanges] = *pNewRange; + *pcMsrRanges += 1; + } + else + { + uint32_t i = cpumR3MsrRangesBinSearch(paMsrRanges, cMsrRanges, pNewRange->uFirst); + Assert(i == cMsrRanges || pNewRange->uFirst <= paMsrRanges[i].uLast); + Assert(i == 0 || pNewRange->uFirst > paMsrRanges[i - 1].uLast); + + /* + * Adding an entirely new entry? + */ + if ( i >= cMsrRanges + || pNewRange->uLast < paMsrRanges[i].uFirst) + { + paMsrRanges = cpumR3MsrRangesEnsureSpace(pVM, ppaMsrRanges, cMsrRanges, 1); + if (!paMsrRanges) + return VERR_NO_MEMORY; + if (i < cMsrRanges) + memmove(&paMsrRanges[i + 1], &paMsrRanges[i], (cMsrRanges - i) * sizeof(paMsrRanges[0])); + paMsrRanges[i] = *pNewRange; + *pcMsrRanges += 1; + } + /* + * Replace existing entry? + */ + else if ( pNewRange->uFirst == paMsrRanges[i].uFirst + && pNewRange->uLast == paMsrRanges[i].uLast) + paMsrRanges[i] = *pNewRange; + /* + * Splitting an existing entry? + */ + else if ( pNewRange->uFirst > paMsrRanges[i].uFirst + && pNewRange->uLast < paMsrRanges[i].uLast) + { + paMsrRanges = cpumR3MsrRangesEnsureSpace(pVM, ppaMsrRanges, cMsrRanges, 2); + if (!paMsrRanges) + return VERR_NO_MEMORY; + if (i < cMsrRanges) + memmove(&paMsrRanges[i + 2], &paMsrRanges[i], (cMsrRanges - i) * sizeof(paMsrRanges[0])); + paMsrRanges[i + 1] = *pNewRange; + paMsrRanges[i + 2] = paMsrRanges[i]; + paMsrRanges[i ].uLast = pNewRange->uFirst - 1; + paMsrRanges[i + 2].uFirst = pNewRange->uLast + 1; + *pcMsrRanges += 2; + } + /* + * Complicated scenarios that can affect more than one range. + * + * The current code does not optimize memmove calls when replacing + * one or more existing ranges, because it's tedious to deal with and + * not expected to be a frequent usage scenario. + */ + else + { + /* Adjust start of first match? */ + if ( pNewRange->uFirst <= paMsrRanges[i].uFirst + && pNewRange->uLast < paMsrRanges[i].uLast) + paMsrRanges[i].uFirst = pNewRange->uLast + 1; + else + { + /* Adjust end of first match? */ + if (pNewRange->uFirst > paMsrRanges[i].uFirst) + { + Assert(paMsrRanges[i].uLast >= pNewRange->uFirst); + paMsrRanges[i].uLast = pNewRange->uFirst - 1; + i++; + } + /* Replace the whole first match (lazy bird). */ + else + { + if (i + 1 < cMsrRanges) + memmove(&paMsrRanges[i], &paMsrRanges[i + 1], (cMsrRanges - i - 1) * sizeof(paMsrRanges[0])); + cMsrRanges = *pcMsrRanges -= 1; + } + + /* Do the new range affect more ranges? */ + while ( i < cMsrRanges + && pNewRange->uLast >= paMsrRanges[i].uFirst) + { + if (pNewRange->uLast < paMsrRanges[i].uLast) + { + /* Adjust the start of it, then we're done. */ + paMsrRanges[i].uFirst = pNewRange->uLast + 1; + break; + } + + /* Remove it entirely. */ + if (i + 1 < cMsrRanges) + memmove(&paMsrRanges[i], &paMsrRanges[i + 1], (cMsrRanges - i - 1) * sizeof(paMsrRanges[0])); + cMsrRanges = *pcMsrRanges -= 1; + } + } + + /* Now, perform a normal insertion. */ + paMsrRanges = cpumR3MsrRangesEnsureSpace(pVM, ppaMsrRanges, cMsrRanges, 1); + if (!paMsrRanges) + return VERR_NO_MEMORY; + if (i < cMsrRanges) + memmove(&paMsrRanges[i + 1], &paMsrRanges[i], (cMsrRanges - i) * sizeof(paMsrRanges[0])); + paMsrRanges[i] = *pNewRange; + *pcMsrRanges += 1; + } + } + + return VINF_SUCCESS; +} + + +/** + * Reconciles CPUID info with MSRs (selected ones). + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +int cpumR3MsrReconcileWithCpuId(PVM pVM) +{ + PCCPUMMSRRANGE papToAdd[10]; + uint32_t cToAdd = 0; + + /* + * The IA32_FLUSH_CMD MSR was introduced in MCUs for CVS-2018-3646 and associates. + */ + if (pVM->cpum.s.GuestFeatures.fFlushCmd && !cpumLookupMsrRange(pVM, MSR_IA32_FLUSH_CMD)) + { + static CPUMMSRRANGE const s_FlushCmd = + { + /*.uFirst =*/ MSR_IA32_FLUSH_CMD, + /*.uLast =*/ MSR_IA32_FLUSH_CMD, + /*.enmRdFn =*/ kCpumMsrRdFn_WriteOnly, + /*.enmWrFn =*/ kCpumMsrWrFn_Ia32FlushCmd, + /*.offCpumCpu =*/ UINT16_MAX, + /*.fReserved =*/ 0, + /*.uValue =*/ 0, + /*.fWrIgnMask =*/ 0, + /*.fWrGpMask =*/ ~MSR_IA32_FLUSH_CMD_F_L1D, + /*.szName = */ "IA32_FLUSH_CMD" + }; + papToAdd[cToAdd++] = &s_FlushCmd; + } + + /* + * Do the adding. + */ + for (uint32_t i = 0; i < cToAdd; i++) + { + PCCPUMMSRRANGE pRange = papToAdd[i]; + LogRel(("CPUM: MSR/CPUID reconciliation insert: %#010x %s\n", pRange->uFirst, pRange->szName)); + int rc = cpumR3MsrRangesInsert(NULL /* pVM */, &pVM->cpum.s.GuestInfo.paMsrRangesR3, &pVM->cpum.s.GuestInfo.cMsrRanges, + pRange); + if (RT_FAILURE(rc)) + return rc; + } + return VINF_SUCCESS; +} + + +/** + * Worker for cpumR3MsrApplyFudge that applies one table. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param paRanges Array of MSRs to fudge. + * @param cRanges Number of MSRs in the array. + */ +static int cpumR3MsrApplyFudgeTable(PVM pVM, PCCPUMMSRRANGE paRanges, size_t cRanges) +{ + for (uint32_t i = 0; i < cRanges; i++) + if (!cpumLookupMsrRange(pVM, paRanges[i].uFirst)) + { + LogRel(("CPUM: MSR fudge: %#010x %s\n", paRanges[i].uFirst, paRanges[i].szName)); + int rc = cpumR3MsrRangesInsert(NULL /* pVM */, &pVM->cpum.s.GuestInfo.paMsrRangesR3, &pVM->cpum.s.GuestInfo.cMsrRanges, + &paRanges[i]); + if (RT_FAILURE(rc)) + return rc; + } + return VINF_SUCCESS; +} + + +/** + * Fudges the MSRs that guest are known to access in some odd cases. + * + * A typical example is a VM that has been moved between different hosts where + * for instance the cpu vendor differs. + * + * Another example is older CPU profiles (e.g. Atom Bonnet) for newer CPUs (e.g. + * Atom Silvermont), where features reported thru CPUID aren't present in the + * MSRs (e.g. AMD64_TSC_AUX). + * + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +int cpumR3MsrApplyFudge(PVM pVM) +{ + /* + * Basic. + */ + static CPUMMSRRANGE const s_aFudgeMsrs[] = + { + MFO(0x00000000, "IA32_P5_MC_ADDR", Ia32P5McAddr), + MFX(0x00000001, "IA32_P5_MC_TYPE", Ia32P5McType, Ia32P5McType, 0, 0, UINT64_MAX), + MVO(0x00000017, "IA32_PLATFORM_ID", 0), + MFN(0x0000001b, "IA32_APIC_BASE", Ia32ApicBase, Ia32ApicBase), + MVI(0x0000008b, "BIOS_SIGN", 0), + MFX(0x000000fe, "IA32_MTRRCAP", Ia32MtrrCap, ReadOnly, 0x508, 0, 0), + MFX(0x00000179, "IA32_MCG_CAP", Ia32McgCap, ReadOnly, 0x005, 0, 0), + MFX(0x0000017a, "IA32_MCG_STATUS", Ia32McgStatus, Ia32McgStatus, 0, ~(uint64_t)UINT32_MAX, 0), + MFN(0x000001a0, "IA32_MISC_ENABLE", Ia32MiscEnable, Ia32MiscEnable), + MFN(0x000001d9, "IA32_DEBUGCTL", Ia32DebugCtl, Ia32DebugCtl), + MFO(0x000001db, "P6_LAST_BRANCH_FROM_IP", P6LastBranchFromIp), + MFO(0x000001dc, "P6_LAST_BRANCH_TO_IP", P6LastBranchToIp), + MFO(0x000001dd, "P6_LAST_INT_FROM_IP", P6LastIntFromIp), + MFO(0x000001de, "P6_LAST_INT_TO_IP", P6LastIntToIp), + MFS(0x00000277, "IA32_PAT", Ia32Pat, Ia32Pat, Guest.msrPAT), + MFZ(0x000002ff, "IA32_MTRR_DEF_TYPE", Ia32MtrrDefType, Ia32MtrrDefType, GuestMsrs.msr.MtrrDefType, 0, ~(uint64_t)0xc07), + MFN(0x00000400, "IA32_MCi_CTL_STATUS_ADDR_MISC", Ia32McCtlStatusAddrMiscN, Ia32McCtlStatusAddrMiscN), + }; + int rc = cpumR3MsrApplyFudgeTable(pVM, &s_aFudgeMsrs[0], RT_ELEMENTS(s_aFudgeMsrs)); + AssertLogRelRCReturn(rc, rc); + + /* + * XP might mistake opterons and other newer CPUs for P4s. + */ + if (pVM->cpum.s.GuestFeatures.uFamily >= 0xf) + { + static CPUMMSRRANGE const s_aP4FudgeMsrs[] = + { + MFX(0x0000002c, "P4_EBC_FREQUENCY_ID", IntelP4EbcFrequencyId, IntelP4EbcFrequencyId, 0xf12010f, UINT64_MAX, 0), + }; + rc = cpumR3MsrApplyFudgeTable(pVM, &s_aP4FudgeMsrs[0], RT_ELEMENTS(s_aP4FudgeMsrs)); + AssertLogRelRCReturn(rc, rc); + } + + if (pVM->cpum.s.GuestFeatures.fRdTscP) + { + static CPUMMSRRANGE const s_aRdTscPFudgeMsrs[] = + { + MFX(0xc0000103, "AMD64_TSC_AUX", Amd64TscAux, Amd64TscAux, 0, 0, ~(uint64_t)UINT32_MAX), + }; + rc = cpumR3MsrApplyFudgeTable(pVM, &s_aRdTscPFudgeMsrs[0], RT_ELEMENTS(s_aRdTscPFudgeMsrs)); + AssertLogRelRCReturn(rc, rc); + } + + return rc; +} + + +/** + * Do we consider @a enmConsider a better match for @a enmTarget than + * @a enmFound? + * + * Only called when @a enmConsider isn't exactly what we're looking for. + * + * @returns true/false. + * @param enmConsider The new microarch to consider. + * @param enmTarget The target microarch. + * @param enmFound The best microarch match we've found thus far. + */ +DECLINLINE(bool) cpumR3DbIsBetterMarchMatch(CPUMMICROARCH enmConsider, CPUMMICROARCH enmTarget, CPUMMICROARCH enmFound) +{ + Assert(enmConsider != enmTarget); + + /* + * If we've got an march match, don't bother with enmConsider. + */ + if (enmFound == enmTarget) + return false; + + /* + * Found is below: Pick 'consider' if it's closer to the target or above it. + */ + if (enmFound < enmTarget) + return enmConsider > enmFound; + + /* + * Found is above: Pick 'consider' if it's also above (paranoia: or equal) + * and but closer to the target. + */ + return enmConsider >= enmTarget && enmConsider < enmFound; +} + + +/** + * Do we consider @a enmConsider a better match for @a enmTarget than + * @a enmFound? + * + * Only called for intel family 06h CPUs. + * + * @returns true/false. + * @param enmConsider The new microarch to consider. + * @param enmTarget The target microarch. + * @param enmFound The best microarch match we've found thus far. + */ +static bool cpumR3DbIsBetterIntelFam06Match(CPUMMICROARCH enmConsider, CPUMMICROARCH enmTarget, CPUMMICROARCH enmFound) +{ + /* Check intel family 06h claims. */ + AssertReturn(enmConsider >= kCpumMicroarch_Intel_P6_Core_Atom_First && enmConsider <= kCpumMicroarch_Intel_P6_Core_Atom_End, + false); + AssertReturn(enmTarget >= kCpumMicroarch_Intel_P6_Core_Atom_First && enmTarget <= kCpumMicroarch_Intel_P6_Core_Atom_End, + false); + + /* Put matches out of the way. */ + if (enmConsider == enmTarget) + return true; + if (enmFound == enmTarget) + return false; + + /* If found isn't a family 06h march, whatever we're considering must be a better choice. */ + if ( enmFound < kCpumMicroarch_Intel_P6_Core_Atom_First + || enmFound > kCpumMicroarch_Intel_P6_Core_Atom_End) + return true; + + /* + * The family 06h stuff is split into three categories: + * - Common P6 heritage + * - Core + * - Atom + * + * Determin which of the three arguments are Atom marchs, because that's + * all we need to make the right choice. + */ + bool const fConsiderAtom = enmConsider >= kCpumMicroarch_Intel_Atom_First; + bool const fTargetAtom = enmTarget >= kCpumMicroarch_Intel_Atom_First; + bool const fFoundAtom = enmFound >= kCpumMicroarch_Intel_Atom_First; + + /* + * Want atom: + */ + if (fTargetAtom) + { + /* Pick the atom if we've got one of each.*/ + if (fConsiderAtom != fFoundAtom) + return fConsiderAtom; + /* If we haven't got any atoms under consideration, pick a P6 or the earlier core. + Note! Not entirely sure Dothan is the best choice, but it'll do for now. */ + if (!fConsiderAtom) + { + if (enmConsider > enmFound) + return enmConsider <= kCpumMicroarch_Intel_P6_M_Dothan; + return enmFound > kCpumMicroarch_Intel_P6_M_Dothan; + } + /* else: same category, default comparison rules. */ + Assert(fConsiderAtom && fFoundAtom); + } + /* + * Want non-atom: + */ + /* Pick the non-atom if we've got one of each. */ + else if (fConsiderAtom != fFoundAtom) + return fFoundAtom; + /* If we've only got atoms under consideration, pick the older one just to pick something. */ + else if (fConsiderAtom) + return enmConsider < enmFound; + else + Assert(!fConsiderAtom && !fFoundAtom); + + /* + * Same basic category. Do same compare as caller. + */ + return cpumR3DbIsBetterMarchMatch(enmConsider, enmTarget, enmFound); +} + + +int cpumR3DbGetCpuInfo(const char *pszName, PCPUMINFO pInfo) +{ + CPUMDBENTRY const *pEntry = NULL; + int rc; + + if (!strcmp(pszName, "host")) + { + /* + * Create a CPU database entry for the host CPU. This means getting + * the CPUID bits from the real CPU and grabbing the closest matching + * database entry for MSRs. + */ + rc = CPUMR3CpuIdDetectUnknownLeafMethod(&pInfo->enmUnknownCpuIdMethod, &pInfo->DefCpuId); + if (RT_FAILURE(rc)) + return rc; + rc = CPUMR3CpuIdCollectLeaves(&pInfo->paCpuIdLeavesR3, &pInfo->cCpuIdLeaves); + if (RT_FAILURE(rc)) + return rc; + pInfo->fMxCsrMask = CPUMR3DeterminHostMxCsrMask(); + + /* Lookup database entry for MSRs. */ + CPUMCPUVENDOR const enmVendor = CPUMR3CpuIdDetectVendorEx(pInfo->paCpuIdLeavesR3[0].uEax, + pInfo->paCpuIdLeavesR3[0].uEbx, + pInfo->paCpuIdLeavesR3[0].uEcx, + pInfo->paCpuIdLeavesR3[0].uEdx); + uint32_t const uStd1Eax = pInfo->paCpuIdLeavesR3[1].uEax; + uint8_t const uFamily = ASMGetCpuFamily(uStd1Eax); + uint8_t const uModel = ASMGetCpuModel(uStd1Eax, enmVendor == CPUMCPUVENDOR_INTEL); + uint8_t const uStepping = ASMGetCpuStepping(uStd1Eax); + CPUMMICROARCH const enmMicroarch = CPUMR3CpuIdDetermineMicroarchEx(enmVendor, uFamily, uModel, uStepping); + + for (unsigned i = 0; i < RT_ELEMENTS(g_apCpumDbEntries); i++) + { + CPUMDBENTRY const *pCur = g_apCpumDbEntries[i]; + if ((CPUMCPUVENDOR)pCur->enmVendor == enmVendor) + { + /* Match against Family, Microarch, model and stepping. Except + for family, always match the closer with preference given to + the later/older ones. */ + if (pCur->uFamily == uFamily) + { + if (pCur->enmMicroarch == enmMicroarch) + { + if (pCur->uModel == uModel) + { + if (pCur->uStepping == uStepping) + { + /* Perfect match. */ + pEntry = pCur; + break; + } + + if ( !pEntry + || pEntry->uModel != uModel + || pEntry->enmMicroarch != enmMicroarch + || pEntry->uFamily != uFamily) + pEntry = pCur; + else if ( pCur->uStepping >= uStepping + ? pCur->uStepping < pEntry->uStepping || pEntry->uStepping < uStepping + : pCur->uStepping > pEntry->uStepping) + pEntry = pCur; + } + else if ( !pEntry + || pEntry->enmMicroarch != enmMicroarch + || pEntry->uFamily != uFamily) + pEntry = pCur; + else if ( pCur->uModel >= uModel + ? pCur->uModel < pEntry->uModel || pEntry->uModel < uModel + : pCur->uModel > pEntry->uModel) + pEntry = pCur; + } + else if ( !pEntry + || pEntry->uFamily != uFamily) + pEntry = pCur; + /* Special march matching rules applies to intel family 06h. */ + else if ( enmVendor == CPUMCPUVENDOR_INTEL + && uFamily == 6 + ? cpumR3DbIsBetterIntelFam06Match(pCur->enmMicroarch, enmMicroarch, pEntry->enmMicroarch) + : cpumR3DbIsBetterMarchMatch(pCur->enmMicroarch, enmMicroarch, pEntry->enmMicroarch)) + pEntry = pCur; + } + /* We don't do closeness matching on family, we use the first + entry for the CPU vendor instead. (P4 workaround.) */ + else if (!pEntry) + pEntry = pCur; + } + } + + if (pEntry) + LogRel(("CPUM: Matched host CPU %s %#x/%#x/%#x %s with CPU DB entry '%s' (%s %#x/%#x/%#x %s)\n", + CPUMR3CpuVendorName(enmVendor), uFamily, uModel, uStepping, CPUMR3MicroarchName(enmMicroarch), + pEntry->pszName, CPUMR3CpuVendorName((CPUMCPUVENDOR)pEntry->enmVendor), pEntry->uFamily, pEntry->uModel, + pEntry->uStepping, CPUMR3MicroarchName(pEntry->enmMicroarch) )); + else + { + pEntry = g_apCpumDbEntries[0]; + LogRel(("CPUM: No matching processor database entry %s %#x/%#x/%#x %s, falling back on '%s'\n", + CPUMR3CpuVendorName(enmVendor), uFamily, uModel, uStepping, CPUMR3MicroarchName(enmMicroarch), + pEntry->pszName)); + } + } + else + { + /* + * We're supposed to be emulating a specific CPU that is included in + * our CPU database. The CPUID tables needs to be copied onto the + * heap so the caller can modify them and so they can be freed like + * in the host case above. + */ + for (unsigned i = 0; i < RT_ELEMENTS(g_apCpumDbEntries); i++) + if (!strcmp(pszName, g_apCpumDbEntries[i]->pszName)) + { + pEntry = g_apCpumDbEntries[i]; + break; + } + if (!pEntry) + { + LogRel(("CPUM: Cannot locate any CPU by the name '%s'\n", pszName)); + return VERR_CPUM_DB_CPU_NOT_FOUND; + } + + pInfo->cCpuIdLeaves = pEntry->cCpuIdLeaves; + if (pEntry->cCpuIdLeaves) + { + /* Must allocate a multiple of 16 here, matching cpumR3CpuIdEnsureSpace. */ + size_t cbExtra = sizeof(pEntry->paCpuIdLeaves[0]) * (RT_ALIGN(pEntry->cCpuIdLeaves, 16) - pEntry->cCpuIdLeaves); + pInfo->paCpuIdLeavesR3 = (PCPUMCPUIDLEAF)RTMemDupEx(pEntry->paCpuIdLeaves, + sizeof(pEntry->paCpuIdLeaves[0]) * pEntry->cCpuIdLeaves, + cbExtra); + if (!pInfo->paCpuIdLeavesR3) + return VERR_NO_MEMORY; + } + else + pInfo->paCpuIdLeavesR3 = NULL; + + pInfo->enmUnknownCpuIdMethod = pEntry->enmUnknownCpuId; + pInfo->DefCpuId = pEntry->DefUnknownCpuId; + pInfo->fMxCsrMask = pEntry->fMxCsrMask; + + LogRel(("CPUM: Using CPU DB entry '%s' (%s %#x/%#x/%#x %s)\n", + pEntry->pszName, CPUMR3CpuVendorName((CPUMCPUVENDOR)pEntry->enmVendor), + pEntry->uFamily, pEntry->uModel, pEntry->uStepping, CPUMR3MicroarchName(pEntry->enmMicroarch) )); + } + + pInfo->fMsrMask = pEntry->fMsrMask; + pInfo->iFirstExtCpuIdLeaf = 0; /* Set by caller. */ + pInfo->uScalableBusFreq = pEntry->uScalableBusFreq; + pInfo->paCpuIdLeavesR0 = NIL_RTR0PTR; + pInfo->paMsrRangesR0 = NIL_RTR0PTR; + pInfo->paCpuIdLeavesRC = NIL_RTRCPTR; + pInfo->paMsrRangesRC = NIL_RTRCPTR; + + /* + * Copy the MSR range. + */ + uint32_t cMsrs = 0; + PCPUMMSRRANGE paMsrs = NULL; + + PCCPUMMSRRANGE pCurMsr = pEntry->paMsrRanges; + uint32_t cLeft = pEntry->cMsrRanges; + while (cLeft-- > 0) + { + rc = cpumR3MsrRangesInsert(NULL /* pVM */, &paMsrs, &cMsrs, pCurMsr); + if (RT_FAILURE(rc)) + { + Assert(!paMsrs); /* The above function frees this. */ + RTMemFree(pInfo->paCpuIdLeavesR3); + pInfo->paCpuIdLeavesR3 = NULL; + return rc; + } + pCurMsr++; + } + + pInfo->paMsrRangesR3 = paMsrs; + pInfo->cMsrRanges = cMsrs; + return VINF_SUCCESS; +} + + +/** + * Insert an MSR range into the VM. + * + * If the new MSR range overlaps existing ranges, the existing ones will be + * adjusted/removed to fit in the new one. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pNewRange Pointer to the MSR range being inserted. + */ +VMMR3DECL(int) CPUMR3MsrRangesInsert(PVM pVM, PCCPUMMSRRANGE pNewRange) +{ + AssertReturn(pVM, VERR_INVALID_PARAMETER); + AssertReturn(pNewRange, VERR_INVALID_PARAMETER); + + return cpumR3MsrRangesInsert(pVM, NULL /* ppaMsrRanges */, NULL /* pcMsrRanges */, pNewRange); +} + + +/** + * Register statistics for the MSRs. + * + * This must not be called before the MSRs have been finalized and moved to the + * hyper heap. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +int cpumR3MsrRegStats(PVM pVM) +{ + /* + * Global statistics. + */ + PCPUM pCpum = &pVM->cpum.s; + STAM_REL_REG(pVM, &pCpum->cMsrReads, STAMTYPE_COUNTER, "/CPUM/MSR-Totals/Reads", + STAMUNIT_OCCURENCES, "All RDMSRs making it to CPUM."); + STAM_REL_REG(pVM, &pCpum->cMsrReadsRaiseGp, STAMTYPE_COUNTER, "/CPUM/MSR-Totals/ReadsRaisingGP", + STAMUNIT_OCCURENCES, "RDMSR raising #GPs, except unknown MSRs."); + STAM_REL_REG(pVM, &pCpum->cMsrReadsUnknown, STAMTYPE_COUNTER, "/CPUM/MSR-Totals/ReadsUnknown", + STAMUNIT_OCCURENCES, "RDMSR on unknown MSRs (raises #GP)."); + STAM_REL_REG(pVM, &pCpum->cMsrWrites, STAMTYPE_COUNTER, "/CPUM/MSR-Totals/Writes", + STAMUNIT_OCCURENCES, "All WRMSRs making it to CPUM."); + STAM_REL_REG(pVM, &pCpum->cMsrWritesRaiseGp, STAMTYPE_COUNTER, "/CPUM/MSR-Totals/WritesRaisingGP", + STAMUNIT_OCCURENCES, "WRMSR raising #GPs, except unknown MSRs."); + STAM_REL_REG(pVM, &pCpum->cMsrWritesToIgnoredBits, STAMTYPE_COUNTER, "/CPUM/MSR-Totals/WritesToIgnoredBits", + STAMUNIT_OCCURENCES, "Writing of ignored bits."); + STAM_REL_REG(pVM, &pCpum->cMsrWritesUnknown, STAMTYPE_COUNTER, "/CPUM/MSR-Totals/WritesUnknown", + STAMUNIT_OCCURENCES, "WRMSR on unknown MSRs (raises #GP)."); + + +# ifdef VBOX_WITH_STATISTICS + /* + * Per range. + */ + PCPUMMSRRANGE paRanges = pVM->cpum.s.GuestInfo.paMsrRangesR3; + uint32_t cRanges = pVM->cpum.s.GuestInfo.cMsrRanges; + for (uint32_t i = 0; i < cRanges; i++) + { + char szName[160]; + ssize_t cchName; + + if (paRanges[i].uFirst == paRanges[i].uLast) + cchName = RTStrPrintf(szName, sizeof(szName), "/CPUM/MSRs/%#010x-%s", + paRanges[i].uFirst, paRanges[i].szName); + else + cchName = RTStrPrintf(szName, sizeof(szName), "/CPUM/MSRs/%#010x-%#010x-%s", + paRanges[i].uFirst, paRanges[i].uLast, paRanges[i].szName); + + RTStrCopy(&szName[cchName], sizeof(szName) - cchName, "-reads"); + STAMR3Register(pVM, &paRanges[i].cReads, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, szName, STAMUNIT_OCCURENCES, "RDMSR"); + + RTStrCopy(&szName[cchName], sizeof(szName) - cchName, "-writes"); + STAMR3Register(pVM, &paRanges[i].cWrites, STAMTYPE_COUNTER, STAMVISIBILITY_USED, szName, STAMUNIT_OCCURENCES, "WRMSR"); + + RTStrCopy(&szName[cchName], sizeof(szName) - cchName, "-GPs"); + STAMR3Register(pVM, &paRanges[i].cGps, STAMTYPE_COUNTER, STAMVISIBILITY_USED, szName, STAMUNIT_OCCURENCES, "#GPs"); + + RTStrCopy(&szName[cchName], sizeof(szName) - cchName, "-ign-bits-writes"); + STAMR3Register(pVM, &paRanges[i].cIgnoredBits, STAMTYPE_COUNTER, STAMVISIBILITY_USED, szName, STAMUNIT_OCCURENCES, "WRMSR w/ ignored bits"); + } +# endif /* VBOX_WITH_STATISTICS */ + + return VINF_SUCCESS; +} + +#endif /* !CPUM_DB_STANDALONE */ + diff --git a/src/VBox/VMM/VMMR3/CSAM.cpp b/src/VBox/VMM/VMMR3/CSAM.cpp new file mode 100644 index 00000000..aef66aed --- /dev/null +++ b/src/VBox/VMM/VMMR3/CSAM.cpp @@ -0,0 +1,2998 @@ +/* $Id: CSAM.cpp $ */ +/** @file + * CSAM - Guest OS Code Scanning and Analysis Manager + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +/** @page pg_csam CSAM - Code Scanning Analysis Manager + * + * The CSAM is responsible for scanning and marking guest OS kernel code paths + * to making safe raw-mode execution possible. + * + * It works tightly with the @ref pg_patm "patch manager" to patch code + * sequences that we could otherwise not execute in raw-mode. + * + * @sa @ref grp_csam + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_CSAM +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef VBOX_WITH_REM +# include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include "CSAMInternal.h" +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + + +/* Enabled by default */ +#define CSAM_ENABLE + +/* Enable to monitor code pages for self-modifying code. */ +#define CSAM_MONITOR_CODE_PAGES +/* Enable to monitor all scanned pages +#define CSAM_MONITOR_CSAM_CODE_PAGES */ +/* Enable to scan beyond ret instructions. +#define CSAM_ANALYSE_BEYOND_RET */ + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static DECLCALLBACK(int) csamR3Save(PVM pVM, PSSMHANDLE pSSM); +static DECLCALLBACK(int) csamR3Load(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass); +static FNPGMR3VIRTINVALIDATE csamR3CodePageInvalidate; + +bool csamIsCodeScanned(PVM pVM, RTRCPTR pInstr, PCSAMPAGE *pPage); +int csamR3CheckPageRecord(PVM pVM, RTRCPTR pInstr); +static PCSAMPAGE csamR3CreatePageRecord(PVM pVM, RTRCPTR GCPtr, CSAMTAG enmTag, bool fCode32, bool fMonitorInvalidation = false); +static int csamRemovePageRecord(PVM pVM, RTRCPTR GCPtr); +static int csamReinit(PVM pVM); +static void csamMarkCode(PVM pVM, PCSAMPAGE pPage, RTRCPTR pInstr, uint32_t opsize, bool fScanned); +static int csamAnalyseCodeStream(PVM pVM, RCPTRTYPE(uint8_t *) pInstrGC, RCPTRTYPE(uint8_t *) pCurInstrGC, bool fCode32, + PFN_CSAMR3ANALYSE pfnCSAMR3Analyse, void *pUserData, PCSAMP2GLOOKUPREC pCacheRec); + +/** @todo "Temporary" for debugging. */ +static bool g_fInCsamR3CodePageInvalidate = false; + +#ifdef VBOX_WITH_DEBUGGER +static FNDBGCCMD csamr3CmdOn; +static FNDBGCCMD csamr3CmdOff; +#endif + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +#ifdef VBOX_WITH_DEBUGGER +/** Command descriptors. */ +static const DBGCCMD g_aCmds[] = +{ + /* pszCmd, cArgsMin, cArgsMax, paArgDesc, cArgDescs, fFlags, pfnHandler pszSyntax, ....pszDescription */ + { "csamon", 0, 0, NULL, 0, 0, csamr3CmdOn, "", "Enable CSAM code scanning." }, + { "csamoff", 0, 0, NULL, 0, 0, csamr3CmdOff, "", "Disable CSAM code scanning." }, +}; +#endif + +/** + * SSM descriptor table for the CSAM structure (save + restore). + */ +static const SSMFIELD g_aCsamFields[] = +{ + SSMFIELD_ENTRY( CSAM, aDangerousInstr), /* didn't used to restored */ + SSMFIELD_ENTRY( CSAM, cDangerousInstr), /* didn't used to restored */ + SSMFIELD_ENTRY( CSAM, iDangerousInstr), /* didn't used to restored */ + SSMFIELD_ENTRY( CSAM, savedstate.cPageRecords), + SSMFIELD_ENTRY( CSAM, savedstate.cPatchPageRecords), + SSMFIELD_ENTRY( CSAM, cDirtyPages), + SSMFIELD_ENTRY_RCPTR_ARRAY( CSAM, pvDirtyBasePage), + SSMFIELD_ENTRY_RCPTR_ARRAY( CSAM, pvDirtyFaultPage), + SSMFIELD_ENTRY( CSAM, cPossibleCodePages), + SSMFIELD_ENTRY_RCPTR_ARRAY( CSAM, pvPossibleCodePage), + SSMFIELD_ENTRY_RCPTR_ARRAY( CSAM, pvCallInstruction), /* didn't used to be restored */ + SSMFIELD_ENTRY( CSAM, iCallInstruction), /* didn't used to be restored */ + SSMFIELD_ENTRY( CSAM, fScanningStarted), + SSMFIELD_ENTRY( CSAM, fGatesChecked), + SSMFIELD_ENTRY_TERM() +}; + +/** + * SSM descriptor table for the version 5.0.0 CSAM structure. + */ +static const SSMFIELD g_aCsamFields500[] = +{ + SSMFIELD_ENTRY_IGNORE( CSAM, offVM), + SSMFIELD_ENTRY_PAD_HC64( CSAM, Alignment0, sizeof(uint32_t)), + SSMFIELD_ENTRY_IGN_HCPTR( CSAM, pPageTree), + SSMFIELD_ENTRY( CSAM, aDangerousInstr), + SSMFIELD_ENTRY( CSAM, cDangerousInstr), + SSMFIELD_ENTRY( CSAM, iDangerousInstr), + SSMFIELD_ENTRY_RCPTR( CSAM, pPDBitmapGC), /// @todo ignore this? + SSMFIELD_ENTRY_RCPTR( CSAM, pPDHCBitmapGC), /// @todo ignore this? + SSMFIELD_ENTRY_IGN_HCPTR( CSAM, pPDBitmapHC), + SSMFIELD_ENTRY_IGN_HCPTR( CSAM, pPDGCBitmapHC), + SSMFIELD_ENTRY_IGN_HCPTR( CSAM, savedstate.pSSM), + SSMFIELD_ENTRY( CSAM, savedstate.cPageRecords), + SSMFIELD_ENTRY( CSAM, savedstate.cPatchPageRecords), + SSMFIELD_ENTRY( CSAM, cDirtyPages), + SSMFIELD_ENTRY_RCPTR_ARRAY( CSAM, pvDirtyBasePage), + SSMFIELD_ENTRY_RCPTR_ARRAY( CSAM, pvDirtyFaultPage), + SSMFIELD_ENTRY( CSAM, cPossibleCodePages), + SSMFIELD_ENTRY_RCPTR_ARRAY( CSAM, pvPossibleCodePage), + SSMFIELD_ENTRY_RCPTR_ARRAY( CSAM, pvCallInstruction), + SSMFIELD_ENTRY( CSAM, iCallInstruction), + SSMFIELD_ENTRY_IGNORE( CSAM, hCodePageWriteType), /* added in 5.0 */ + SSMFIELD_ENTRY_IGNORE( CSAM, hCodePageWriteAndInvPgType), /* added in 5.0 */ + SSMFIELD_ENTRY( CSAM, fScanningStarted), + SSMFIELD_ENTRY( CSAM, fGatesChecked), + SSMFIELD_ENTRY_PAD_HC( CSAM, Alignment1, 6, 2), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrTraps), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrPages), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrPagesInv), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrRemovedPages), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrPatchPages), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrPageNPHC), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrPageNPGC), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrFlushes), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrFlushesSkipped), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrKnownPagesHC), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrKnownPagesGC), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrInstr), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrBytesRead), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrOpcodeRead), + SSMFIELD_ENTRY_IGNORE( CSAM, StatTime), + SSMFIELD_ENTRY_IGNORE( CSAM, StatTimeCheckAddr), + SSMFIELD_ENTRY_IGNORE( CSAM, StatTimeAddrConv), + SSMFIELD_ENTRY_IGNORE( CSAM, StatTimeFlushPage), + SSMFIELD_ENTRY_IGNORE( CSAM, StatTimeDisasm), + SSMFIELD_ENTRY_IGNORE( CSAM, StatFlushDirtyPages), + SSMFIELD_ENTRY_IGNORE( CSAM, StatCheckGates), + SSMFIELD_ENTRY_IGNORE( CSAM, StatCodePageModified), + SSMFIELD_ENTRY_IGNORE( CSAM, StatDangerousWrite), + SSMFIELD_ENTRY_IGNORE( CSAM, StatInstrCacheHit), + SSMFIELD_ENTRY_IGNORE( CSAM, StatInstrCacheMiss), + SSMFIELD_ENTRY_IGNORE( CSAM, StatPagePATM), + SSMFIELD_ENTRY_IGNORE( CSAM, StatPageCSAM), + SSMFIELD_ENTRY_IGNORE( CSAM, StatPageREM), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrUserPages), + SSMFIELD_ENTRY_IGNORE( CSAM, StatPageMonitor), + SSMFIELD_ENTRY_IGNORE( CSAM, StatPageRemoveREMFlush), + SSMFIELD_ENTRY_IGNORE( CSAM, StatBitmapAlloc), + SSMFIELD_ENTRY_IGNORE( CSAM, StatScanNextFunction), + SSMFIELD_ENTRY_IGNORE( CSAM, StatScanNextFunctionFailed), + SSMFIELD_ENTRY_TERM() +}; + +/** + * SSM descriptor table for the pre 5.0.0 CSAM structure. + */ +static const SSMFIELD g_aCsamFieldsBefore500[] = +{ + /** @todo there are more fields that can be ignored here. */ + SSMFIELD_ENTRY_IGNORE( CSAM, offVM), + SSMFIELD_ENTRY_PAD_HC64( CSAM, Alignment0, sizeof(uint32_t)), + SSMFIELD_ENTRY_IGN_HCPTR( CSAM, pPageTree), + SSMFIELD_ENTRY( CSAM, aDangerousInstr), + SSMFIELD_ENTRY( CSAM, cDangerousInstr), + SSMFIELD_ENTRY( CSAM, iDangerousInstr), + SSMFIELD_ENTRY_RCPTR( CSAM, pPDBitmapGC), /// @todo ignore this? + SSMFIELD_ENTRY_RCPTR( CSAM, pPDHCBitmapGC), /// @todo ignore this? + SSMFIELD_ENTRY_IGN_HCPTR( CSAM, pPDBitmapHC), + SSMFIELD_ENTRY_IGN_HCPTR( CSAM, pPDGCBitmapHC), + SSMFIELD_ENTRY_IGN_HCPTR( CSAM, savedstate.pSSM), + SSMFIELD_ENTRY( CSAM, savedstate.cPageRecords), + SSMFIELD_ENTRY( CSAM, savedstate.cPatchPageRecords), + SSMFIELD_ENTRY( CSAM, cDirtyPages), + SSMFIELD_ENTRY_RCPTR_ARRAY( CSAM, pvDirtyBasePage), + SSMFIELD_ENTRY_RCPTR_ARRAY( CSAM, pvDirtyFaultPage), + SSMFIELD_ENTRY( CSAM, cPossibleCodePages), + SSMFIELD_ENTRY_RCPTR_ARRAY( CSAM, pvPossibleCodePage), + SSMFIELD_ENTRY_RCPTR_ARRAY( CSAM, pvCallInstruction), + SSMFIELD_ENTRY( CSAM, iCallInstruction), + SSMFIELD_ENTRY( CSAM, fScanningStarted), + SSMFIELD_ENTRY( CSAM, fGatesChecked), + SSMFIELD_ENTRY_PAD_HC( CSAM, Alignment1, 6, 2), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrTraps), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrPages), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrPagesInv), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrRemovedPages), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrPatchPages), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrPageNPHC), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrPageNPGC), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrFlushes), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrFlushesSkipped), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrKnownPagesHC), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrKnownPagesGC), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrInstr), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrBytesRead), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrOpcodeRead), + SSMFIELD_ENTRY_IGNORE( CSAM, StatTime), + SSMFIELD_ENTRY_IGNORE( CSAM, StatTimeCheckAddr), + SSMFIELD_ENTRY_IGNORE( CSAM, StatTimeAddrConv), + SSMFIELD_ENTRY_IGNORE( CSAM, StatTimeFlushPage), + SSMFIELD_ENTRY_IGNORE( CSAM, StatTimeDisasm), + SSMFIELD_ENTRY_IGNORE( CSAM, StatFlushDirtyPages), + SSMFIELD_ENTRY_IGNORE( CSAM, StatCheckGates), + SSMFIELD_ENTRY_IGNORE( CSAM, StatCodePageModified), + SSMFIELD_ENTRY_IGNORE( CSAM, StatDangerousWrite), + SSMFIELD_ENTRY_IGNORE( CSAM, StatInstrCacheHit), + SSMFIELD_ENTRY_IGNORE( CSAM, StatInstrCacheMiss), + SSMFIELD_ENTRY_IGNORE( CSAM, StatPagePATM), + SSMFIELD_ENTRY_IGNORE( CSAM, StatPageCSAM), + SSMFIELD_ENTRY_IGNORE( CSAM, StatPageREM), + SSMFIELD_ENTRY_IGNORE( CSAM, StatNrUserPages), + SSMFIELD_ENTRY_IGNORE( CSAM, StatPageMonitor), + SSMFIELD_ENTRY_IGNORE( CSAM, StatPageRemoveREMFlush), + SSMFIELD_ENTRY_IGNORE( CSAM, StatBitmapAlloc), + SSMFIELD_ENTRY_IGNORE( CSAM, StatScanNextFunction), + SSMFIELD_ENTRY_IGNORE( CSAM, StatScanNextFunctionFailed), + SSMFIELD_ENTRY_TERM() +}; + + +/** Fake type to simplify g_aCsamPDBitmapArray construction. */ +typedef struct +{ + uint8_t *a[CSAM_PGDIRBMP_CHUNKS]; +} CSAMPDBITMAPARRAY; + +/** + * SSM descriptor table for the CSAM::pPDBitmapHC array. + */ +static SSMFIELD const g_aCsamPDBitmapArray[] = +{ + SSMFIELD_ENTRY_HCPTR_NI_ARRAY(CSAMPDBITMAPARRAY, a), + SSMFIELD_ENTRY_TERM() +}; + + +/** + * SSM descriptor table for the CSAMPAGE structure. + */ +static const SSMFIELD g_aCsamPageFields[] = +{ + SSMFIELD_ENTRY_RCPTR( CSAMPAGE, pPageGC), + SSMFIELD_ENTRY_GCPHYS( CSAMPAGE, GCPhys), + SSMFIELD_ENTRY( CSAMPAGE, fFlags), + SSMFIELD_ENTRY( CSAMPAGE, uSize), + SSMFIELD_ENTRY_HCPTR_NI( CSAMPAGE, pBitmap), + SSMFIELD_ENTRY( CSAMPAGE, fCode32), + SSMFIELD_ENTRY( CSAMPAGE, fMonitorActive), + SSMFIELD_ENTRY( CSAMPAGE, fMonitorInvalidation), + SSMFIELD_ENTRY( CSAMPAGE, enmTag), + SSMFIELD_ENTRY( CSAMPAGE, u64Hash), + SSMFIELD_ENTRY_TERM() +}; + +/** + * SSM descriptor table for the CSAMPAGEREC structure, putmem fashion. + */ +static const SSMFIELD g_aCsamPageRecFields[] = +{ + SSMFIELD_ENTRY_IGN_HCPTR( CSAMPAGEREC, Core.Key), + SSMFIELD_ENTRY_IGN_HCPTR( CSAMPAGEREC, Core.pLeft), + SSMFIELD_ENTRY_IGN_HCPTR( CSAMPAGEREC, Core.pRight), + SSMFIELD_ENTRY_IGNORE( CSAMPAGEREC, Core.uchHeight), + SSMFIELD_ENTRY_PAD_HC_AUTO( 3, 7), + SSMFIELD_ENTRY_RCPTR( CSAMPAGEREC, page.pPageGC), + SSMFIELD_ENTRY_PAD_HC_AUTO( 0, 4), + SSMFIELD_ENTRY_PAD_MSC32_AUTO( 4), + SSMFIELD_ENTRY_GCPHYS( CSAMPAGEREC, page.GCPhys), + SSMFIELD_ENTRY( CSAMPAGEREC, page.fFlags), + SSMFIELD_ENTRY( CSAMPAGEREC, page.uSize), + SSMFIELD_ENTRY_PAD_HC_AUTO( 0, 4), + SSMFIELD_ENTRY_HCPTR_NI( CSAMPAGEREC, page.pBitmap), + SSMFIELD_ENTRY( CSAMPAGEREC, page.fCode32), + SSMFIELD_ENTRY( CSAMPAGEREC, page.fMonitorActive), + SSMFIELD_ENTRY( CSAMPAGEREC, page.fMonitorInvalidation), + SSMFIELD_ENTRY_PAD_HC_AUTO( 1, 1), + SSMFIELD_ENTRY( CSAMPAGEREC, page.enmTag), + SSMFIELD_ENTRY( CSAMPAGEREC, page.u64Hash), + SSMFIELD_ENTRY_TERM() +}; + + +/** + * Initializes the CSAM. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) CSAMR3Init(PVM pVM) +{ + int rc; + + /* + * We only need a saved state dummy loader if HM is enabled. + */ + if (!VM_IS_RAW_MODE_ENABLED(pVM)) + { + pVM->fCSAMEnabled = false; + return SSMR3RegisterStub(pVM, "CSAM", 0); + } + + /* + * Raw-mode. + */ + LogFlow(("CSAMR3Init\n")); + + /* Allocate bitmap for the page directory. */ + rc = MMR3HyperAllocOnceNoRel(pVM, CSAM_PGDIRBMP_CHUNKS*sizeof(RTHCPTR), 0, MM_TAG_CSAM, (void **)&pVM->csam.s.pPDBitmapHC); + AssertRCReturn(rc, rc); + rc = MMR3HyperAllocOnceNoRel(pVM, CSAM_PGDIRBMP_CHUNKS*sizeof(RTRCPTR), 0, MM_TAG_CSAM, (void **)&pVM->csam.s.pPDGCBitmapHC); + AssertRCReturn(rc, rc); + pVM->csam.s.pPDBitmapGC = MMHyperR3ToRC(pVM, pVM->csam.s.pPDGCBitmapHC); + pVM->csam.s.pPDHCBitmapGC = MMHyperR3ToRC(pVM, pVM->csam.s.pPDBitmapHC); + + rc = csamReinit(pVM); + AssertRCReturn(rc, rc); + + /* + * Register virtual handler types. + */ + rc = PGMR3HandlerVirtualTypeRegister(pVM, PGMVIRTHANDLERKIND_WRITE, false /*fRelocUserRC*/, + NULL /*pfnInvalidateR3 */, + csamCodePageWriteHandler, + "csamCodePageWriteHandler", "csamRCCodePageWritePfHandler", + "CSAM code page write handler", + &pVM->csam.s.hCodePageWriteType); + AssertLogRelRCReturn(rc, rc); + rc = PGMR3HandlerVirtualTypeRegister(pVM, PGMVIRTHANDLERKIND_WRITE, false /*fRelocUserRC*/, + csamR3CodePageInvalidate, + csamCodePageWriteHandler, + "csamCodePageWriteHandler", "csamRCCodePageWritePfHandler", + "CSAM code page write and invlpg handler", + &pVM->csam.s.hCodePageWriteAndInvPgType); + AssertLogRelRCReturn(rc, rc); + + /* + * Register save and load state notifiers. + */ + rc = SSMR3RegisterInternal(pVM, "CSAM", 0, CSAM_SAVED_STATE_VERSION, sizeof(pVM->csam.s) + PAGE_SIZE*16, + NULL, NULL, NULL, + NULL, csamR3Save, NULL, + NULL, csamR3Load, NULL); + AssertRCReturn(rc, rc); + + STAM_REG(pVM, &pVM->csam.s.StatNrTraps, STAMTYPE_COUNTER, "/CSAM/PageTraps", STAMUNIT_OCCURENCES, "The number of CSAM page traps."); + STAM_REG(pVM, &pVM->csam.s.StatDangerousWrite, STAMTYPE_COUNTER, "/CSAM/DangerousWrites", STAMUNIT_OCCURENCES, "The number of dangerous writes that cause a context switch."); + + STAM_REG(pVM, &pVM->csam.s.StatNrPageNPHC, STAMTYPE_COUNTER, "/CSAM/HC/PageNotPresent", STAMUNIT_OCCURENCES, "The number of CSAM pages marked not present."); + STAM_REG(pVM, &pVM->csam.s.StatNrPageNPGC, STAMTYPE_COUNTER, "/CSAM/GC/PageNotPresent", STAMUNIT_OCCURENCES, "The number of CSAM pages marked not present."); + STAM_REG(pVM, &pVM->csam.s.StatNrPages, STAMTYPE_COUNTER, "/CSAM/PageRec/AddedRW", STAMUNIT_OCCURENCES, "The number of CSAM page records (RW monitoring)."); + STAM_REG(pVM, &pVM->csam.s.StatNrPagesInv, STAMTYPE_COUNTER, "/CSAM/PageRec/AddedRWI", STAMUNIT_OCCURENCES, "The number of CSAM page records (RW & invalidation monitoring)."); + STAM_REG(pVM, &pVM->csam.s.StatNrRemovedPages, STAMTYPE_COUNTER, "/CSAM/PageRec/Removed", STAMUNIT_OCCURENCES, "The number of removed CSAM page records."); + STAM_REG(pVM, &pVM->csam.s.StatPageRemoveREMFlush,STAMTYPE_COUNTER, "/CSAM/PageRec/Removed/REMFlush", STAMUNIT_OCCURENCES, "The number of removed CSAM page records that caused a REM flush."); + + STAM_REG(pVM, &pVM->csam.s.StatNrPatchPages, STAMTYPE_COUNTER, "/CSAM/PageRec/Patch", STAMUNIT_OCCURENCES, "The number of CSAM patch page records."); + STAM_REG(pVM, &pVM->csam.s.StatNrUserPages, STAMTYPE_COUNTER, "/CSAM/PageRec/Ignore/User", STAMUNIT_OCCURENCES, "The number of CSAM user page records (ignored)."); + STAM_REG(pVM, &pVM->csam.s.StatPagePATM, STAMTYPE_COUNTER, "/CSAM/PageRec/Type/PATM", STAMUNIT_OCCURENCES, "The number of PATM page records."); + STAM_REG(pVM, &pVM->csam.s.StatPageCSAM, STAMTYPE_COUNTER, "/CSAM/PageRec/Type/CSAM", STAMUNIT_OCCURENCES, "The number of CSAM page records."); + STAM_REG(pVM, &pVM->csam.s.StatPageREM, STAMTYPE_COUNTER, "/CSAM/PageRec/Type/REM", STAMUNIT_OCCURENCES, "The number of REM page records."); + STAM_REG(pVM, &pVM->csam.s.StatPageMonitor, STAMTYPE_COUNTER, "/CSAM/PageRec/Monitored", STAMUNIT_OCCURENCES, "The number of monitored pages."); + + STAM_REG(pVM, &pVM->csam.s.StatCodePageModified, STAMTYPE_COUNTER, "/CSAM/Monitor/DirtyPage", STAMUNIT_OCCURENCES, "The number of code page modifications."); + + STAM_REG(pVM, &pVM->csam.s.StatNrFlushes, STAMTYPE_COUNTER, "/CSAM/PageFlushes", STAMUNIT_OCCURENCES, "The number of CSAM page flushes."); + STAM_REG(pVM, &pVM->csam.s.StatNrFlushesSkipped, STAMTYPE_COUNTER, "/CSAM/PageFlushesSkipped", STAMUNIT_OCCURENCES, "The number of CSAM page flushes that were skipped."); + STAM_REG(pVM, &pVM->csam.s.StatNrKnownPagesHC, STAMTYPE_COUNTER, "/CSAM/HC/KnownPageRecords", STAMUNIT_OCCURENCES, "The number of known CSAM page records."); + STAM_REG(pVM, &pVM->csam.s.StatNrKnownPagesGC, STAMTYPE_COUNTER, "/CSAM/GC/KnownPageRecords", STAMUNIT_OCCURENCES, "The number of known CSAM page records."); + STAM_REG(pVM, &pVM->csam.s.StatNrInstr, STAMTYPE_COUNTER, "/CSAM/ScannedInstr", STAMUNIT_OCCURENCES, "The number of scanned instructions."); + STAM_REG(pVM, &pVM->csam.s.StatNrBytesRead, STAMTYPE_COUNTER, "/CSAM/BytesRead", STAMUNIT_OCCURENCES, "The number of bytes read for scanning."); + STAM_REG(pVM, &pVM->csam.s.StatNrOpcodeRead, STAMTYPE_COUNTER, "/CSAM/OpcodeBytesRead", STAMUNIT_OCCURENCES, "The number of opcode bytes read by the recompiler."); + + STAM_REG(pVM, &pVM->csam.s.StatBitmapAlloc, STAMTYPE_COUNTER, "/CSAM/Alloc/PageBitmap", STAMUNIT_OCCURENCES, "The number of page bitmap allocations."); + + STAM_REG(pVM, &pVM->csam.s.StatInstrCacheHit, STAMTYPE_COUNTER, "/CSAM/Cache/Hit", STAMUNIT_OCCURENCES, "The number of dangerous instruction cache hits."); + STAM_REG(pVM, &pVM->csam.s.StatInstrCacheMiss, STAMTYPE_COUNTER, "/CSAM/Cache/Miss", STAMUNIT_OCCURENCES, "The number of dangerous instruction cache misses."); + + STAM_REG(pVM, &pVM->csam.s.StatScanNextFunction, STAMTYPE_COUNTER, "/CSAM/Function/Scan/Success", STAMUNIT_OCCURENCES, "The number of found functions beyond the ret border."); + STAM_REG(pVM, &pVM->csam.s.StatScanNextFunctionFailed, STAMTYPE_COUNTER, "/CSAM/Function/Scan/Failed", STAMUNIT_OCCURENCES, "The number of refused functions beyond the ret border."); + + STAM_REG(pVM, &pVM->csam.s.StatTime, STAMTYPE_PROFILE, "/PROF/CSAM/Scan", STAMUNIT_TICKS_PER_CALL, "Scanning overhead."); + STAM_REG(pVM, &pVM->csam.s.StatTimeCheckAddr, STAMTYPE_PROFILE, "/PROF/CSAM/CheckAddr", STAMUNIT_TICKS_PER_CALL, "Address check overhead."); + STAM_REG(pVM, &pVM->csam.s.StatTimeAddrConv, STAMTYPE_PROFILE, "/PROF/CSAM/AddrConv", STAMUNIT_TICKS_PER_CALL, "Address conversion overhead."); + STAM_REG(pVM, &pVM->csam.s.StatTimeFlushPage, STAMTYPE_PROFILE, "/PROF/CSAM/FlushPage", STAMUNIT_TICKS_PER_CALL, "Page flushing overhead."); + STAM_REG(pVM, &pVM->csam.s.StatTimeDisasm, STAMTYPE_PROFILE, "/PROF/CSAM/Disasm", STAMUNIT_TICKS_PER_CALL, "Disassembly overhead."); + STAM_REG(pVM, &pVM->csam.s.StatFlushDirtyPages, STAMTYPE_PROFILE, "/PROF/CSAM/FlushDirtyPage", STAMUNIT_TICKS_PER_CALL, "Dirty page flushing overhead."); + STAM_REG(pVM, &pVM->csam.s.StatCheckGates, STAMTYPE_PROFILE, "/PROF/CSAM/CheckGates", STAMUNIT_TICKS_PER_CALL, "CSAMR3CheckGates overhead."); + + /* + * Check CFGM option and enable/disable CSAM. + */ + bool fEnabled; + rc = CFGMR3QueryBool(CFGMR3GetRoot(pVM), "CSAMEnabled", &fEnabled); + if (RT_FAILURE(rc)) +#ifdef CSAM_ENABLE + fEnabled = true; +#else + fEnabled = false; +#endif + if (fEnabled) + CSAMEnableScanning(pVM); + +#ifdef VBOX_WITH_DEBUGGER + /* + * Debugger commands. + */ + static bool fRegisteredCmds = false; + if (!fRegisteredCmds) + { + rc = DBGCRegisterCommands(&g_aCmds[0], RT_ELEMENTS(g_aCmds)); + if (RT_SUCCESS(rc)) + fRegisteredCmds = true; + } +#endif + + return VINF_SUCCESS; +} + +/** + * (Re)initializes CSAM + * + * @param pVM The cross context VM structure. + */ +static int csamReinit(PVM pVM) +{ + /* + * Assert alignment and sizes. + */ + AssertRelease(!(RT_UOFFSETOF(VM, csam.s) & 31)); + AssertRelease(sizeof(pVM->csam.s) <= sizeof(pVM->csam.padding)); + AssertRelease(VM_IS_RAW_MODE_ENABLED(pVM)); + + /* + * Setup any fixed pointers and offsets. + */ + pVM->csam.s.offVM = RT_UOFFSETOF(VM, patm); + + pVM->csam.s.fGatesChecked = false; + pVM->csam.s.fScanningStarted = false; + + PVMCPU pVCpu = &pVM->aCpus[0]; /* raw mode implies 1 VPCU */ + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_CSAM_PENDING_ACTION); + pVM->csam.s.cDirtyPages = 0; + /* not necessary */ + memset(pVM->csam.s.pvDirtyBasePage, 0, sizeof(pVM->csam.s.pvDirtyBasePage)); + memset(pVM->csam.s.pvDirtyFaultPage, 0, sizeof(pVM->csam.s.pvDirtyFaultPage)); + + memset(&pVM->csam.s.aDangerousInstr, 0, sizeof(pVM->csam.s.aDangerousInstr)); + pVM->csam.s.cDangerousInstr = 0; + pVM->csam.s.iDangerousInstr = 0; + + memset(pVM->csam.s.pvCallInstruction, 0, sizeof(pVM->csam.s.pvCallInstruction)); + pVM->csam.s.iCallInstruction = 0; + + /** @note never mess with the pgdir bitmap here! */ + return VINF_SUCCESS; +} + +/** + * Applies relocations to data and code managed by this + * component. This function will be called at init and + * whenever the VMM need to relocate itself inside the GC. + * + * The csam will update the addresses used by the switcher. + * + * @param pVM The cross context VM structure. + * @param offDelta Relocation delta. + */ +VMMR3_INT_DECL(void) CSAMR3Relocate(PVM pVM, RTGCINTPTR offDelta) +{ + if (offDelta && VM_IS_RAW_MODE_ENABLED(pVM)) + { + /* Adjust pgdir and page bitmap pointers. */ + pVM->csam.s.pPDBitmapGC = MMHyperR3ToRC(pVM, pVM->csam.s.pPDGCBitmapHC); + pVM->csam.s.pPDHCBitmapGC = MMHyperR3ToRC(pVM, pVM->csam.s.pPDBitmapHC); + + for(int i=0;icsam.s.pPDGCBitmapHC[i]) + { + pVM->csam.s.pPDGCBitmapHC[i] += offDelta; + } + } + } + return; +} + +/** + * Terminates the csam. + * + * Termination means cleaning up and freeing all resources, + * the VM it self is at this point powered off or suspended. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) CSAMR3Term(PVM pVM) +{ + if (!VM_IS_RAW_MODE_ENABLED(pVM)) + return VINF_SUCCESS; + + int rc; + + rc = CSAMR3Reset(pVM); + AssertRC(rc); + + /** @todo triggers assertion in MMHyperFree */ +#if 0 + for(int i=0;icsam.s.pPDBitmapHC[i]) + MMHyperFree(pVM, pVM->csam.s.pPDBitmapHC[i]); + } +#endif + + return VINF_SUCCESS; +} + +/** + * CSAM reset callback. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) CSAMR3Reset(PVM pVM) +{ + if (!VM_IS_RAW_MODE_ENABLED(pVM)) + return VINF_SUCCESS; + + /* Clear page bitmaps. */ + for (int i = 0; i < CSAM_PGDIRBMP_CHUNKS; i++) + { + if (pVM->csam.s.pPDBitmapHC[i]) + { + Assert((CSAM_PAGE_BITMAP_SIZE& 3) == 0); + ASMMemZero32(pVM->csam.s.pPDBitmapHC[i], CSAM_PAGE_BITMAP_SIZE); + } + } + + /* Remove all CSAM page records. */ + for (;;) + { + PCSAMPAGEREC pPageRec = (PCSAMPAGEREC)RTAvlPVGetBestFit(&pVM->csam.s.pPageTree, 0, true); + if (!pPageRec) + break; + csamRemovePageRecord(pVM, pPageRec->page.pPageGC); + } + Assert(!pVM->csam.s.pPageTree); + + csamReinit(pVM); + + return VINF_SUCCESS; +} + + +/** + * Callback function for RTAvlPVDoWithAll + * + * Counts the number of records in the tree + * + * @returns VBox status code. + * @param pNode Current node + * @param pcPatches Pointer to patch counter + */ +static DECLCALLBACK(int) csamR3SaveCountRecord(PAVLPVNODECORE pNode, void *pcPatches) +{ + NOREF(pNode); + *(uint32_t *)pcPatches += 1; + return VINF_SUCCESS; +} + +/** + * Callback function for RTAvlPVDoWithAll for saving a page record. + * + * @returns VBox status code. + * @param pNode Current node + * @param pvVM Pointer to the VM + */ +static DECLCALLBACK(int) csamR3SavePageState(PAVLPVNODECORE pNode, void *pvVM) +{ + PCSAMPAGEREC pPage = (PCSAMPAGEREC)pNode; + PVM pVM = (PVM)pvVM; + PSSMHANDLE pSSM = pVM->csam.s.savedstate.pSSM; + + int rc = SSMR3PutStructEx(pSSM, &pPage->page, sizeof(pPage->page), 0 /*fFlags*/, &g_aCsamPageFields[0], NULL); + AssertLogRelRCReturn(rc, rc); + + if (pPage->page.pBitmap) + SSMR3PutMem(pSSM, pPage->page.pBitmap, CSAM_PAGE_BITMAP_SIZE); + + return VINF_SUCCESS; +} + +/** + * Execute state save operation. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM SSM operation handle. + */ +static DECLCALLBACK(int) csamR3Save(PVM pVM, PSSMHANDLE pSSM) +{ + int rc; + + /* + * Count the number of page records in the tree (feeling lazy) + */ + pVM->csam.s.savedstate.cPageRecords = 0; + RTAvlPVDoWithAll(&pVM->csam.s.pPageTree, true, csamR3SaveCountRecord, &pVM->csam.s.savedstate.cPageRecords); + + /* + * Save CSAM structure. + */ + pVM->csam.s.savedstate.pSSM = pSSM; + rc = SSMR3PutStructEx(pSSM, &pVM->csam.s, sizeof(pVM->csam.s), 0 /*fFlags*/, g_aCsamFields, NULL); + AssertLogRelRCReturn(rc, rc); + + /* + * Save pgdir bitmap. + */ + SSMR3PutU32(pSSM, CSAM_PGDIRBMP_CHUNKS); + SSMR3PutU32(pSSM, CSAM_PAGE_BITMAP_SIZE); + for (uint32_t i = 0; i < CSAM_PGDIRBMP_CHUNKS; i++) + if (pVM->csam.s.pPDBitmapHC[i]) + { + SSMR3PutU32(pSSM, i); + SSMR3PutMem(pSSM, pVM->csam.s.pPDBitmapHC[i], CSAM_PAGE_BITMAP_SIZE); + } + SSMR3PutU32(pSSM, UINT32_MAX); /* terminator */ + + /* + * Save page records + */ + pVM->csam.s.savedstate.pSSM = pSSM; + rc = RTAvlPVDoWithAll(&pVM->csam.s.pPageTree, true, csamR3SavePageState, pVM); + AssertRCReturn(rc, rc); + + pVM->csam.s.savedstate.pSSM = NULL; + return VINF_SUCCESS; +} + + +/** + * Execute state load operation. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM SSM operation handle. + * @param uVersion Data layout version. + * @param uPass The data pass. + */ +static DECLCALLBACK(int) csamR3Load(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + int rc; + + /* + * Check preconditions. + */ + Assert(uPass == SSM_PASS_FINAL); NOREF(uPass); + Assert(pVM->csam.s.savedstate.pSSM == NULL); + AssertLogRelMsgReturn(uVersion >= CSAM_SAVED_STATE_VERSION_PUT_MEM && uVersion <= CSAM_SAVED_STATE_VERSION, + ("uVersion=%d (%#x)\n", uVersion, uVersion), + VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION); + + if (uVersion >= CSAM_SAVED_STATE_VERSION_PUT_STRUCT) + { + /* + * Restore the SSMR3PutStructEx fashioned state. + */ + rc = SSMR3GetStructEx(pSSM, &pVM->csam.s, sizeof(pVM->csam.s), 0 /*fFlags*/, &g_aCsamFields[0], NULL); + + /* + * Restore page bitmaps + */ + uint32_t cPgDirBmpChunks = 0; + rc = SSMR3GetU32(pSSM, &cPgDirBmpChunks); + uint32_t cbPgDirBmpChunk = 0; + rc = SSMR3GetU32(pSSM, &cbPgDirBmpChunk); + AssertRCReturn(rc, rc); + AssertLogRelMsgReturn(cPgDirBmpChunks <= CSAM_PGDIRBMP_CHUNKS, + ("cPgDirBmpChunks=%#x (vs %#x)\n", cPgDirBmpChunks, CSAM_PGDIRBMP_CHUNKS), + VERR_SSM_UNEXPECTED_DATA); + AssertLogRelMsgReturn(cbPgDirBmpChunk <= CSAM_PAGE_BITMAP_SIZE, + ("cbPgDirBmpChunk=%#x (vs %#x)\n", cbPgDirBmpChunk, CSAM_PAGE_BITMAP_SIZE), + VERR_SSM_UNEXPECTED_DATA); + for (uint32_t i = 0; i < CSAM_PGDIRBMP_CHUNKS; i++) + { + Assert(!pVM->csam.s.pPDBitmapHC[i]); + Assert(!pVM->csam.s.pPDGCBitmapHC[i]); + } + for (uint32_t iNext = 0;;) + { + uint32_t iThis; + rc = SSMR3GetU32(pSSM, &iThis); + AssertLogRelRCReturn(rc, rc); + AssertLogRelMsgReturn(iThis >= iNext, ("iThis=%#x iNext=%#x\n", iThis, iNext), VERR_SSM_UNEXPECTED_DATA); + if (iThis == UINT32_MAX) + break; + + rc = MMHyperAlloc(pVM, CSAM_PAGE_BITMAP_SIZE, 0, MM_TAG_CSAM, (void **)&pVM->csam.s.pPDBitmapHC[iThis]); + AssertLogRelRCReturn(rc, rc); + pVM->csam.s.pPDGCBitmapHC[iThis] = MMHyperR3ToRC(pVM, pVM->csam.s.pPDBitmapHC[iThis]); + + rc = SSMR3GetMem(pSSM, pVM->csam.s.pPDBitmapHC[iThis], CSAM_PAGE_BITMAP_SIZE); + AssertLogRelRCReturn(rc, rc); + iNext = iThis + 1; + } + + /* + * Restore page records + */ + uint32_t const cPageRecords = pVM->csam.s.savedstate.cPageRecords + pVM->csam.s.savedstate.cPatchPageRecords; + for (uint32_t iPageRec = 0; iPageRec < cPageRecords; iPageRec++) + { + CSAMPAGE PageRec; + RT_ZERO(PageRec); + rc = SSMR3GetStructEx(pSSM, &PageRec, sizeof(PageRec), 0 /*fFlags*/, &g_aCsamPageFields[0], NULL); + AssertLogRelRCReturn(rc, rc); + + /* Recreate the page record. */ + PCSAMPAGE pPage = csamR3CreatePageRecord(pVM, PageRec.pPageGC, PageRec.enmTag, PageRec.fCode32, + PageRec.fMonitorInvalidation); + AssertReturn(pPage, VERR_NO_MEMORY); + pPage->GCPhys = PageRec.GCPhys; + pPage->fFlags = PageRec.fFlags; + pPage->u64Hash = PageRec.u64Hash; + if (PageRec.pBitmap) + { + rc = SSMR3GetMem(pSSM, pPage->pBitmap, CSAM_PAGE_BITMAP_SIZE); + AssertLogRelRCReturn(rc, rc); + } + else + { + MMR3HeapFree(pPage->pBitmap); + pPage->pBitmap = NULL; + } + } + } + else + { + /* + * Restore the old SSMR3PutMem fashioned state. + */ + + /* CSAM structure first. */ + CSAM csamInfo; + RT_ZERO(csamInfo); + if ( SSMR3HandleVersion(pSSM) >= VBOX_FULL_VERSION_MAKE(4, 3, 51) + && SSMR3HandleRevision(pSSM) >= 100346) + rc = SSMR3GetStructEx(pSSM, &csamInfo, sizeof(csamInfo), SSMSTRUCT_FLAGS_MEM_BAND_AID, + &g_aCsamFields500[0], NULL); + else + rc = SSMR3GetStructEx(pSSM, &csamInfo, sizeof(csamInfo), SSMSTRUCT_FLAGS_MEM_BAND_AID_RELAXED, + &g_aCsamFieldsBefore500[0], NULL); + AssertRCReturn(rc, rc); + + pVM->csam.s.fGatesChecked = csamInfo.fGatesChecked; + pVM->csam.s.fScanningStarted = csamInfo.fScanningStarted; + + /* Restore dirty code page info. */ + pVM->csam.s.cDirtyPages = csamInfo.cDirtyPages; + memcpy(pVM->csam.s.pvDirtyBasePage, csamInfo.pvDirtyBasePage, sizeof(pVM->csam.s.pvDirtyBasePage)); + memcpy(pVM->csam.s.pvDirtyFaultPage, csamInfo.pvDirtyFaultPage, sizeof(pVM->csam.s.pvDirtyFaultPage)); + + /* Restore possible code page */ + pVM->csam.s.cPossibleCodePages = csamInfo.cPossibleCodePages; + memcpy(pVM->csam.s.pvPossibleCodePage, csamInfo.pvPossibleCodePage, sizeof(pVM->csam.s.pvPossibleCodePage)); + + /* + * Restore pgdir bitmap (we'll change the pointers next). + */ + rc = SSMR3GetStructEx(pSSM, pVM->csam.s.pPDBitmapHC, sizeof(uint8_t *) * CSAM_PGDIRBMP_CHUNKS, + SSMSTRUCT_FLAGS_MEM_BAND_AID_RELAXED, &g_aCsamPDBitmapArray[0], NULL); + AssertRCReturn(rc, rc); + + /* + * Restore page bitmaps + */ + for (unsigned i = 0; i < CSAM_PGDIRBMP_CHUNKS; i++) + if (pVM->csam.s.pPDBitmapHC[i]) + { + rc = MMHyperAlloc(pVM, CSAM_PAGE_BITMAP_SIZE, 0, MM_TAG_CSAM, (void **)&pVM->csam.s.pPDBitmapHC[i]); + AssertLogRelRCReturn(rc, rc); + pVM->csam.s.pPDGCBitmapHC[i] = MMHyperR3ToRC(pVM, pVM->csam.s.pPDBitmapHC[i]); + + /* Restore the bitmap. */ + rc = SSMR3GetMem(pSSM, pVM->csam.s.pPDBitmapHC[i], CSAM_PAGE_BITMAP_SIZE); + AssertRCReturn(rc, rc); + } + else + { + Assert(!pVM->csam.s.pPDGCBitmapHC[i]); + pVM->csam.s.pPDGCBitmapHC[i] = 0; + } + + /* + * Restore page records + */ + for (uint32_t i=0;iGCPhys = page.page.GCPhys; + pPage->fFlags = page.page.fFlags; + pPage->u64Hash = page.page.u64Hash; + + if (page.page.pBitmap) + { + rc = SSMR3GetMem(pSSM, pPage->pBitmap, CSAM_PAGE_BITMAP_SIZE); + AssertRCReturn(rc, rc); + } + else + { + MMR3HeapFree(pPage->pBitmap); + pPage->pBitmap = NULL; + } + } + + /* Note: we don't restore aDangerousInstr; it will be recreated automatically. */ + memset(&pVM->csam.s.aDangerousInstr, 0, sizeof(pVM->csam.s.aDangerousInstr)); + pVM->csam.s.cDangerousInstr = 0; + pVM->csam.s.iDangerousInstr = 0; + } + return VINF_SUCCESS; +} + +/** + * Convert guest context address to host context pointer + * + * @returns Byte pointer (ring-3 context) corresponding to pGCPtr on success, + * NULL on failure. + * @param pVM The cross context VM structure. + * @param pCacheRec Address conversion cache record + * @param pGCPtr Guest context pointer + * @returns Host context pointer or NULL in case of an error + * + */ +static uint8_t *csamR3GCVirtToHCVirt(PVM pVM, PCSAMP2GLOOKUPREC pCacheRec, RCPTRTYPE(uint8_t *) pGCPtr) +{ + int rc; + void *pHCPtr; + Assert(pVM->cCpus == 1); + PVMCPU pVCpu = VMMGetCpu0(pVM); + + STAM_PROFILE_START(&pVM->csam.s.StatTimeAddrConv, a); + + pHCPtr = PATMR3GCPtrToHCPtr(pVM, pGCPtr); + if (pHCPtr) + return (uint8_t *)pHCPtr; + + if (pCacheRec->pPageLocStartHC) + { + uint32_t offset = pGCPtr & PAGE_OFFSET_MASK; + if (pCacheRec->pGuestLoc == (pGCPtr & PAGE_BASE_GC_MASK)) + { + STAM_PROFILE_STOP(&pVM->csam.s.StatTimeAddrConv, a); + return pCacheRec->pPageLocStartHC + offset; + } + } + + /* Release previous lock if any. */ + if (pCacheRec->Lock.pvMap) + { + PGMPhysReleasePageMappingLock(pVM, &pCacheRec->Lock); + pCacheRec->Lock.pvMap = NULL; + } + + rc = PGMPhysGCPtr2CCPtrReadOnly(pVCpu, pGCPtr, (const void **)&pHCPtr, &pCacheRec->Lock); + if (rc != VINF_SUCCESS) + { +//// AssertMsgRC(rc, ("MMR3PhysGCVirt2HCVirtEx failed for %RRv\n", pGCPtr)); + STAM_PROFILE_STOP(&pVM->csam.s.StatTimeAddrConv, a); + return NULL; + } + + pCacheRec->pPageLocStartHC = (uint8_t*)((uintptr_t)pHCPtr & PAGE_BASE_HC_MASK); + pCacheRec->pGuestLoc = pGCPtr & PAGE_BASE_GC_MASK; + STAM_PROFILE_STOP(&pVM->csam.s.StatTimeAddrConv, a); + return (uint8_t *)pHCPtr; +} + + +/** For csamR3ReadBytes. */ +typedef struct CSAMDISINFO +{ + PVM pVM; + uint8_t const *pbSrcInstr; /* aka pInstHC */ +} CSAMDISINFO, *PCSAMDISINFO; + + +/** + * @callback_method_impl{FNDISREADBYTES} + */ +static DECLCALLBACK(int) csamR3ReadBytes(PDISCPUSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead) +{ + PCSAMDISINFO pDisInfo = (PCSAMDISINFO)pDis->pvUser; + + /* + * We are not interested in patched instructions, so read the original opcode bytes. + * + * Note! single instruction patches (int3) are checked in CSAMR3AnalyseCallback + * + * Since we're decoding one instruction at the time, we don't need to be + * concerned about any patched instructions following the first one. We + * could in fact probably skip this PATM call for offInstr != 0. + */ + size_t cbRead = cbMaxRead; + RTUINTPTR uSrcAddr = pDis->uInstrAddr + offInstr; + int rc = PATMR3ReadOrgInstr(pDisInfo->pVM, pDis->uInstrAddr + offInstr, &pDis->abInstr[offInstr], cbRead, &cbRead); + if (RT_SUCCESS(rc)) + { + if (cbRead >= cbMinRead) + { + pDis->cbCachedInstr = offInstr + (uint8_t)cbRead; + return rc; + } + + cbMinRead -= (uint8_t)cbRead; + cbMaxRead -= (uint8_t)cbRead; + offInstr += (uint8_t)cbRead; + uSrcAddr += cbRead; + } + + /* + * The current byte isn't a patch instruction byte. + */ + AssertPtr(pDisInfo->pbSrcInstr); + if ((pDis->uInstrAddr >> PAGE_SHIFT) == ((uSrcAddr + cbMaxRead - 1) >> PAGE_SHIFT)) + { + memcpy(&pDis->abInstr[offInstr], &pDisInfo->pbSrcInstr[offInstr], cbMaxRead); + offInstr += cbMaxRead; + rc = VINF_SUCCESS; + } + else if ( (pDis->uInstrAddr >> PAGE_SHIFT) == ((uSrcAddr + cbMinRead - 1) >> PAGE_SHIFT) + || PATMIsPatchGCAddr(pDisInfo->pVM, uSrcAddr) /** @todo does CSAM actually analyze patch code, or is this just a copy&past check? */ + ) + { + memcpy(&pDis->abInstr[offInstr], &pDisInfo->pbSrcInstr[offInstr], cbMinRead); + offInstr += cbMinRead; + rc = VINF_SUCCESS; + } + else + { + /* Crossed page boundrary, pbSrcInstr is no good... */ + rc = PGMPhysSimpleReadGCPtr(VMMGetCpu0(pDisInfo->pVM), &pDis->abInstr[offInstr], uSrcAddr, cbMinRead); + offInstr += cbMinRead; + } + + pDis->cbCachedInstr = offInstr; + return rc; +} + +DECLINLINE(int) csamR3DISInstr(PVM pVM, RTRCPTR InstrGC, uint8_t *InstrHC, DISCPUMODE enmCpuMode, + PDISCPUSTATE pCpu, uint32_t *pcbInstr, char *pszOutput, size_t cbOutput) +{ + CSAMDISINFO DisInfo = { pVM, InstrHC }; +#ifdef DEBUG + return DISInstrToStrEx(InstrGC, enmCpuMode, csamR3ReadBytes, &DisInfo, DISOPTYPE_ALL, + pCpu, pcbInstr, pszOutput, cbOutput); +#else + /* We are interested in everything except harmless stuff */ + if (pszOutput) + return DISInstrToStrEx(InstrGC, enmCpuMode, csamR3ReadBytes, &DisInfo, + ~(DISOPTYPE_INVALID | DISOPTYPE_HARMLESS | DISOPTYPE_RRM_MASK), + pCpu, pcbInstr, pszOutput, cbOutput); + return DISInstrEx(InstrGC, enmCpuMode, ~(DISOPTYPE_INVALID | DISOPTYPE_HARMLESS | DISOPTYPE_RRM_MASK), + csamR3ReadBytes, &DisInfo, pCpu, pcbInstr); +#endif +} + +/** + * Analyses the instructions following the cli for compliance with our heuristics for cli + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pCpu CPU disassembly state + * @param pInstrGC Guest context pointer to privileged instruction + * @param pCurInstrGC Guest context pointer to the current instruction + * @param pCacheRec GC to HC cache record + * @param pUserData User pointer (callback specific) + * + */ +static DECLCALLBACK(int) CSAMR3AnalyseCallback(PVM pVM, DISCPUSTATE *pCpu, RCPTRTYPE(uint8_t *) pInstrGC, RCPTRTYPE(uint8_t *) pCurInstrGC, + PCSAMP2GLOOKUPREC pCacheRec, void *pUserData) +{ + PCSAMPAGE pPage = (PCSAMPAGE)pUserData; + int rc; + NOREF(pInstrGC); + + switch (pCpu->pCurInstr->uOpcode) + { + case OP_INT: + Assert(pCpu->Param1.fUse & DISUSE_IMMEDIATE8); + if (pCpu->Param1.uValue == 3) + { + //two byte int 3 + return VINF_SUCCESS; + } + break; + + /* removing breaks win2k guests? */ + case OP_IRET: + if (EMIsRawRing1Enabled(pVM)) + break; + RT_FALL_THRU(); + + case OP_ILLUD2: + /* This appears to be some kind of kernel panic in Linux 2.4; no point to continue. */ + case OP_RETN: + case OP_INT3: + case OP_INVALID: + return VINF_SUCCESS; + } + + // Check for exit points + switch (pCpu->pCurInstr->uOpcode) + { + /* It's not a good idea to patch pushf instructions: + * - increases the chance of conflicts (code jumping to the next instruction) + * - better to patch the cli + * - code that branches before the cli will likely hit an int 3 + * - in general doesn't offer any benefits as we don't allow nested patch blocks (IF is always 1) + */ + case OP_PUSHF: + case OP_POPF: + break; + + case OP_CLI: + { + uint32_t cbInstrs = 0; + uint32_t cbCurInstr = pCpu->cbInstr; + bool fCode32 = pPage->fCode32; + + Assert(fCode32); + + PATMR3AddHint(pVM, pCurInstrGC, (fCode32) ? PATMFL_CODE32 : 0); + + /* Make sure the instructions that follow the cli have not been encountered before. */ + while (true) + { + DISCPUSTATE cpu; + + if (cbInstrs + cbCurInstr >= SIZEOF_NEARJUMP32) + break; + + if (csamIsCodeScanned(pVM, pCurInstrGC + cbCurInstr, &pPage) == true) + { + /* We've scanned the next instruction(s) already. This means we've + followed a branch that ended up there before -> dangerous!! */ + PATMR3DetectConflict(pVM, pCurInstrGC, pCurInstrGC + cbCurInstr); + break; + } + pCurInstrGC += cbCurInstr; + cbInstrs += cbCurInstr; + + { /* Force pCurInstrHC out of scope after we stop using it (page lock!) */ + uint8_t *pCurInstrHC = csamR3GCVirtToHCVirt(pVM, pCacheRec, pCurInstrGC); + if (pCurInstrHC == NULL) + { + Log(("csamR3GCVirtToHCVirt failed for %RRv\n", pCurInstrGC)); + break; + } + Assert(VALID_PTR(pCurInstrHC)); + + rc = csamR3DISInstr(pVM, pCurInstrGC, pCurInstrHC, (fCode32) ? DISCPUMODE_32BIT : DISCPUMODE_16BIT, + &cpu, &cbCurInstr, NULL, 0); + } + AssertRC(rc); + if (RT_FAILURE(rc)) + break; + } + break; + } + +#ifdef VBOX_WITH_RAW_RING1 + case OP_MOV: + /* mov xx, CS is a dangerous instruction as our raw ring usage leaks through. */ + if ( EMIsRawRing1Enabled(pVM) + && (pCpu->Param2.fUse & DISUSE_REG_SEG) + && (pCpu->Param2.Base.idxSegReg == DISSELREG_CS)) + { + Log(("CSAM: Patching dangerous 'mov xx, cs' instruction at %RGv with an int3\n", pCurInstrGC)); + if (PATMR3HasBeenPatched(pVM, pCurInstrGC) == false) + { + rc = PATMR3InstallPatch(pVM, pCurInstrGC, (pPage->fCode32) ? PATMFL_CODE32 : 0); + if (RT_FAILURE(rc)) + { + Log(("PATMR3InstallPatch failed with %d\n", rc)); + return VWRN_CONTINUE_ANALYSIS; + } + } + return VWRN_CONTINUE_ANALYSIS; + } + break; +#endif + + case OP_PUSH: + /** @todo broken comparison!! should be if ((pCpu->Param1.fUse & DISUSE_REG_SEG) && (pCpu->Param1.Base.idxSegReg == DISSELREG_SS)) */ + if (pCpu->pCurInstr->fParam1 != OP_PARM_REG_CS) + break; + +#ifndef VBOX_WITH_SAFE_STR + RT_FALL_THRU(); + case OP_STR: +#endif + RT_FALL_THRU(); + case OP_LSL: + case OP_LAR: + case OP_SGDT: + case OP_SLDT: + case OP_SIDT: + case OP_SMSW: + case OP_VERW: + case OP_VERR: + case OP_CPUID: + case OP_IRET: +#ifdef DEBUG + switch(pCpu->pCurInstr->uOpcode) + { + case OP_STR: + Log(("Privileged instruction at %RRv: str!!\n", pCurInstrGC)); + break; + case OP_LSL: + Log(("Privileged instruction at %RRv: lsl!!\n", pCurInstrGC)); + break; + case OP_LAR: + Log(("Privileged instruction at %RRv: lar!!\n", pCurInstrGC)); + break; + case OP_SGDT: + Log(("Privileged instruction at %RRv: sgdt!!\n", pCurInstrGC)); + break; + case OP_SLDT: + Log(("Privileged instruction at %RRv: sldt!!\n", pCurInstrGC)); + break; + case OP_SIDT: + Log(("Privileged instruction at %RRv: sidt!!\n", pCurInstrGC)); + break; + case OP_SMSW: + Log(("Privileged instruction at %RRv: smsw!!\n", pCurInstrGC)); + break; + case OP_VERW: + Log(("Privileged instruction at %RRv: verw!!\n", pCurInstrGC)); + break; + case OP_VERR: + Log(("Privileged instruction at %RRv: verr!!\n", pCurInstrGC)); + break; + case OP_CPUID: + Log(("Privileged instruction at %RRv: cpuid!!\n", pCurInstrGC)); + break; + case OP_PUSH: + Log(("Privileged instruction at %RRv: push cs!!\n", pCurInstrGC)); + break; + case OP_IRET: + Log(("Privileged instruction at %RRv: iret!!\n", pCurInstrGC)); + break; + } +#endif + + if (PATMR3HasBeenPatched(pVM, pCurInstrGC) == false) + { + rc = PATMR3InstallPatch(pVM, pCurInstrGC, (pPage->fCode32) ? PATMFL_CODE32 : 0); + if (RT_FAILURE(rc)) + { + Log(("PATMR3InstallPatch failed with %d\n", rc)); + return VWRN_CONTINUE_ANALYSIS; + } + } + if (pCpu->pCurInstr->uOpcode == OP_IRET) + return VINF_SUCCESS; /* Look no further in this branch. */ + + return VWRN_CONTINUE_ANALYSIS; + + case OP_JMP: + case OP_CALL: + { + // return or jump/call through a jump table + if (OP_PARM_VTYPE(pCpu->pCurInstr->fParam1) != OP_PARM_J) + { +#ifdef DEBUG + switch(pCpu->pCurInstr->uOpcode) + { + case OP_JMP: + Log(("Control Flow instruction at %RRv: jmp!!\n", pCurInstrGC)); + break; + case OP_CALL: + Log(("Control Flow instruction at %RRv: call!!\n", pCurInstrGC)); + break; + } +#endif + return VWRN_CONTINUE_ANALYSIS; + } + return VWRN_CONTINUE_ANALYSIS; + } + + } + + return VWRN_CONTINUE_ANALYSIS; +} + +#ifdef CSAM_ANALYSE_BEYOND_RET +/** + * Wrapper for csamAnalyseCodeStream for call instructions. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pInstrGC Guest context pointer to privileged instruction + * @param pCurInstrGC Guest context pointer to the current instruction + * @param fCode32 16 or 32 bits code + * @param pfnCSAMR3Analyse Callback for testing the disassembled instruction + * @param pUserData User pointer (callback specific) + * + */ +static int csamAnalyseCallCodeStream(PVM pVM, RCPTRTYPE(uint8_t *) pInstrGC, RCPTRTYPE(uint8_t *) pCurInstrGC, bool fCode32, + PFN_CSAMR3ANALYSE pfnCSAMR3Analyse, void *pUserData, PCSAMP2GLOOKUPREC pCacheRec) +{ + int rc; + CSAMCALLEXITREC CallExitRec; + PCSAMCALLEXITREC pOldCallRec; + PCSAMPAGE pPage = 0; + uint32_t i; + + CallExitRec.cInstrAfterRet = 0; + + pOldCallRec = pCacheRec->pCallExitRec; + pCacheRec->pCallExitRec = &CallExitRec; + + rc = csamAnalyseCodeStream(pVM, pInstrGC, pCurInstrGC, fCode32, pfnCSAMR3Analyse, pUserData, pCacheRec); + + for (i=0;icsam.s.StatTimeDisasm, a); +#ifdef DEBUG + rc2 = csamR3DISInstr(pVM, pCurInstrGC, pCurInstrHC, (fCode32) ? DISCPUMODE_32BIT : DISCPUMODE_16BIT, + &cpu, &cbInstr, szOutput, sizeof(szOutput)); + if (RT_SUCCESS(rc2)) Log(("CSAM Call Analysis: %s", szOutput)); +#else + rc2 = csamR3DISInstr(pVM, pCurInstrGC, pCurInstrHC, (fCode32) ? DISCPUMODE_32BIT : DISCPUMODE_16BIT, + &cpu, &cbInstr, NULL, 0); +#endif + STAM_PROFILE_STOP(&pVM->csam.s.StatTimeDisasm, a); + if (RT_FAILURE(rc2)) + { + Log(("Disassembly failed at %RRv with %Rrc (probably page not present) -> return to caller\n", pCurInstrGC, rc2)); + goto done; + } + + STAM_COUNTER_ADD(&pVM->csam.s.StatNrBytesRead, cbInstr); + + RCPTRTYPE(uint8_t *) addr = 0; + PCSAMPAGE pJmpPage = NULL; + + if (PAGE_ADDRESS(pCurInstrGC) != PAGE_ADDRESS(pCurInstrGC + cbInstr - 1)) + { + if (!PGMGstIsPagePresent(pVM, pCurInstrGC + cbInstr - 1)) + { + /// @todo fault in the page + Log(("Page for current instruction %RRv is not present!!\n", pCurInstrGC)); + goto done; + } + //all is fine, let's continue + csamR3CheckPageRecord(pVM, pCurInstrGC + cbInstr - 1); + } + + switch (cpu.pCurInstr->uOpcode) + { + case OP_NOP: + case OP_INT3: + break; /* acceptable */ + + case OP_LEA: + /* Must be similar to: + * + * lea esi, [esi] + * lea esi, [esi+0] + * Any register is allowed as long as source and destination are identical. + */ + if ( cpu.Param1.fUse != DISUSE_REG_GEN32 + || ( cpu.Param2.flags != DISUSE_REG_GEN32 + && ( !(cpu.Param2.flags & DISUSE_REG_GEN32) + || !(cpu.Param2.flags & (DISUSE_DISPLACEMENT8|DISUSE_DISPLACEMENT16|DISUSE_DISPLACEMENT32)) + || cpu.Param2.uValue != 0 + ) + ) + || cpu.Param1.base.reg_gen32 != cpu.Param2.base.reg_gen32 + ) + { + STAM_COUNTER_INC(&pVM->csam.s.StatScanNextFunctionFailed); + goto next_function; + } + break; + + case OP_PUSH: + { + if ( (pCurInstrGC & 0x3) != 0 + || cpu.Param1.fUse != DISUSE_REG_GEN32 + || cpu.Param1.base.reg_gen32 != USE_REG_EBP + ) + { + STAM_COUNTER_INC(&pVM->csam.s.StatScanNextFunctionFailed); + goto next_function; + } + + if (csamIsCodeScanned(pVM, pCurInstrGC, &pPage) == false) + { + CSAMCALLEXITREC CallExitRec2; + CallExitRec2.cInstrAfterRet = 0; + + pCacheRec->pCallExitRec = &CallExitRec2; + + /* Analyse the function. */ + Log(("Found new function at %RRv\n", pCurInstrGC)); + STAM_COUNTER_INC(&pVM->csam.s.StatScanNextFunction); + csamAnalyseCallCodeStream(pVM, pInstrGC, pCurInstrGC, fCode32, pfnCSAMR3Analyse, pUserData, pCacheRec); + } + goto next_function; + } + + case OP_SUB: + { + if ( (pCurInstrGC & 0x3) != 0 + || cpu.Param1.fUse != DISUSE_REG_GEN32 + || cpu.Param1.base.reg_gen32 != USE_REG_ESP + ) + { + STAM_COUNTER_INC(&pVM->csam.s.StatScanNextFunctionFailed); + goto next_function; + } + + if (csamIsCodeScanned(pVM, pCurInstrGC, &pPage) == false) + { + CSAMCALLEXITREC CallExitRec2; + CallExitRec2.cInstrAfterRet = 0; + + pCacheRec->pCallExitRec = &CallExitRec2; + + /* Analyse the function. */ + Log(("Found new function at %RRv\n", pCurInstrGC)); + STAM_COUNTER_INC(&pVM->csam.s.StatScanNextFunction); + csamAnalyseCallCodeStream(pVM, pInstrGC, pCurInstrGC, fCode32, pfnCSAMR3Analyse, pUserData, pCacheRec); + } + goto next_function; + } + + default: + STAM_COUNTER_INC(&pVM->csam.s.StatScanNextFunctionFailed); + goto next_function; + } + /* Mark it as scanned. */ + csamMarkCode(pVM, pPage, pCurInstrGC, cbInstr, true); + pCurInstrGC += cbInstr; + } /* for at most 16 instructions */ +next_function: + ; /* MSVC complains otherwise */ + } + } +done: + pCacheRec->pCallExitRec = pOldCallRec; + return rc; +} +#else +#define csamAnalyseCallCodeStream csamAnalyseCodeStream +#endif + +/** + * Disassembles the code stream until the callback function detects a failure or decides everything is acceptable + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pInstrGC Guest context pointer to privileged instruction + * @param pCurInstrGC Guest context pointer to the current instruction + * @param fCode32 16 or 32 bits code + * @param pfnCSAMR3Analyse Callback for testing the disassembled instruction + * @param pUserData User pointer (callback specific) + * @param pCacheRec GC to HC cache record. + */ +static int csamAnalyseCodeStream(PVM pVM, RCPTRTYPE(uint8_t *) pInstrGC, RCPTRTYPE(uint8_t *) pCurInstrGC, bool fCode32, + PFN_CSAMR3ANALYSE pfnCSAMR3Analyse, void *pUserData, PCSAMP2GLOOKUPREC pCacheRec) +{ + DISCPUSTATE cpu; + PCSAMPAGE pPage = (PCSAMPAGE)pUserData; + int rc = VWRN_CONTINUE_ANALYSIS; + uint32_t cbInstr; + int rc2; + Assert(pVM->cCpus == 1); + PVMCPU pVCpu = VMMGetCpu0(pVM); + +#ifdef DEBUG + char szOutput[256]; +#endif + + LogFlow(("csamAnalyseCodeStream: code at %RRv depth=%d\n", pCurInstrGC, pCacheRec->depth)); + + pVM->csam.s.fScanningStarted = true; + + pCacheRec->depth++; + /* + * Limit the call depth. (rather arbitrary upper limit; too low and we won't detect certain + * cpuid instructions in Linux kernels; too high and we waste too much time scanning code) + * (512 is necessary to detect cpuid instructions in Red Hat EL4; see defect 1355) + * @note we are using a lot of stack here. couple of 100k when we go to the full depth (!) + */ + if (pCacheRec->depth > 512) + { + LogFlow(("CSAM: maximum calldepth reached for %RRv\n", pCurInstrGC)); + pCacheRec->depth--; + return VINF_SUCCESS; //let's not go on forever + } + + Assert(!PATMIsPatchGCAddr(pVM, pCurInstrGC)); + csamR3CheckPageRecord(pVM, pCurInstrGC); + + while(rc == VWRN_CONTINUE_ANALYSIS) + { + if (csamIsCodeScanned(pVM, pCurInstrGC, &pPage) == false) + { + if (pPage == NULL) + { + /* New address; let's take a look at it. */ + pPage = csamR3CreatePageRecord(pVM, pCurInstrGC, CSAM_TAG_CSAM, fCode32); + if (pPage == NULL) + { + rc = VERR_NO_MEMORY; + goto done; + } + } + } + else + { + LogFlow(("Code at %RRv has been scanned before\n", pCurInstrGC)); + rc = VINF_SUCCESS; + goto done; + } + + { /* Force pCurInstrHC out of scope after we stop using it (page lock!) */ + uint8_t *pCurInstrHC = csamR3GCVirtToHCVirt(pVM, pCacheRec, pCurInstrGC); + if (pCurInstrHC == NULL) + { + Log(("csamR3GCVirtToHCVirt failed for %RRv\n", pCurInstrGC)); + rc = VERR_PATCHING_REFUSED; + goto done; + } + Assert(VALID_PTR(pCurInstrHC)); + + STAM_PROFILE_START(&pVM->csam.s.StatTimeDisasm, a); +#ifdef DEBUG + rc2 = csamR3DISInstr(pVM, pCurInstrGC, pCurInstrHC, fCode32 ? DISCPUMODE_32BIT : DISCPUMODE_16BIT, + &cpu, &cbInstr, szOutput, sizeof(szOutput)); + if (RT_SUCCESS(rc2)) Log(("CSAM Analysis: %s", szOutput)); +#else + rc2 = csamR3DISInstr(pVM, pCurInstrGC, pCurInstrHC, fCode32 ? DISCPUMODE_32BIT : DISCPUMODE_16BIT, + &cpu, &cbInstr, NULL, 0); +#endif + STAM_PROFILE_STOP(&pVM->csam.s.StatTimeDisasm, a); + } + if (RT_FAILURE(rc2)) + { + Log(("Disassembly failed at %RRv with %Rrc (probably page not present) -> return to caller\n", pCurInstrGC, rc2)); + rc = VINF_SUCCESS; + goto done; + } + + STAM_COUNTER_ADD(&pVM->csam.s.StatNrBytesRead, cbInstr); + + csamMarkCode(pVM, pPage, pCurInstrGC, cbInstr, true); + + RCPTRTYPE(uint8_t *) addr = 0; + PCSAMPAGE pJmpPage = NULL; + + if (PAGE_ADDRESS(pCurInstrGC) != PAGE_ADDRESS(pCurInstrGC + cbInstr - 1)) + { + if (!PGMGstIsPagePresent(pVCpu, pCurInstrGC + cbInstr - 1)) + { + /// @todo fault in the page + Log(("Page for current instruction %RRv is not present!!\n", pCurInstrGC)); + rc = VWRN_CONTINUE_ANALYSIS; + goto next_please; + } + //all is fine, let's continue + csamR3CheckPageRecord(pVM, pCurInstrGC + cbInstr - 1); + } + /* + * If it's harmless, then don't bother checking it (the disasm tables had better be accurate!) + */ + if ((cpu.pCurInstr->fOpType & ~DISOPTYPE_RRM_MASK) == DISOPTYPE_HARMLESS) + { + AssertMsg(pfnCSAMR3Analyse(pVM, &cpu, pInstrGC, pCurInstrGC, pCacheRec, (void *)pPage) == VWRN_CONTINUE_ANALYSIS, ("Instruction incorrectly marked harmless?!?!?\n")); + rc = VWRN_CONTINUE_ANALYSIS; + goto next_please; + } + +#ifdef CSAM_ANALYSE_BEYOND_RET + /* Remember the address of the instruction following the ret in case the parent instruction was a call. */ + if ( pCacheRec->pCallExitRec + && cpu.pCurInstr->uOpcode == OP_RETN + && pCacheRec->pCallExitRec->cInstrAfterRet < CSAM_MAX_CALLEXIT_RET) + { + pCacheRec->pCallExitRec->pInstrAfterRetGC[pCacheRec->pCallExitRec->cInstrAfterRet] = pCurInstrGC + cbInstr; + pCacheRec->pCallExitRec->cInstrAfterRet++; + } +#endif + + rc = pfnCSAMR3Analyse(pVM, &cpu, pInstrGC, pCurInstrGC, pCacheRec, (void *)pPage); + if (rc == VINF_SUCCESS) + goto done; + + // For our first attempt, we'll handle only simple relative jumps and calls (immediate offset coded in instruction) + if ( ((cpu.pCurInstr->fOpType & DISOPTYPE_CONTROLFLOW) && (OP_PARM_VTYPE(cpu.pCurInstr->fParam1) == OP_PARM_J)) + || (cpu.pCurInstr->uOpcode == OP_CALL && cpu.Param1.fUse == DISUSE_DISPLACEMENT32)) /* simple indirect call (call dword ptr [address]) */ + { + /* We need to parse 'call dword ptr [address]' type of calls to catch cpuid instructions in some recent Linux distributions (e.g. OpenSuse 10.3) */ + if ( cpu.pCurInstr->uOpcode == OP_CALL + && cpu.Param1.fUse == DISUSE_DISPLACEMENT32) + { + addr = 0; + PGMPhysSimpleReadGCPtr(pVCpu, &addr, (RTRCUINTPTR)cpu.Param1.uDisp.i32, sizeof(addr)); + } + else + addr = CSAMResolveBranch(&cpu, pCurInstrGC); + + if (addr == 0) + { + Log(("We don't support far jumps here!! (%08X)\n", cpu.Param1.fUse)); + rc = VINF_SUCCESS; + break; + } + Assert(!PATMIsPatchGCAddr(pVM, addr)); + + /* If the target address lies in a patch generated jump, then special action needs to be taken. */ + PATMR3DetectConflict(pVM, pCurInstrGC, addr); + + /* Same page? */ + if (PAGE_ADDRESS(addr) != PAGE_ADDRESS(pCurInstrGC )) + { + if (!PGMGstIsPagePresent(pVCpu, addr)) + { + Log(("Page for current instruction %RRv is not present!!\n", addr)); + rc = VWRN_CONTINUE_ANALYSIS; + goto next_please; + } + + /* All is fine, let's continue. */ + csamR3CheckPageRecord(pVM, addr); + } + + pJmpPage = NULL; + if (csamIsCodeScanned(pVM, addr, &pJmpPage) == false) + { + if (pJmpPage == NULL) + { + /* New branch target; let's take a look at it. */ + pJmpPage = csamR3CreatePageRecord(pVM, addr, CSAM_TAG_CSAM, fCode32); + if (pJmpPage == NULL) + { + rc = VERR_NO_MEMORY; + goto done; + } + Assert(pPage); + } + if (cpu.pCurInstr->uOpcode == OP_CALL) + rc = csamAnalyseCallCodeStream(pVM, pInstrGC, addr, fCode32, pfnCSAMR3Analyse, (void *)pJmpPage, pCacheRec); + else + rc = csamAnalyseCodeStream(pVM, pInstrGC, addr, fCode32, pfnCSAMR3Analyse, (void *)pJmpPage, pCacheRec); + + if (rc != VINF_SUCCESS) { + goto done; + } + } + if (cpu.pCurInstr->uOpcode == OP_JMP) + {//unconditional jump; return to caller + rc = VINF_SUCCESS; + goto done; + } + + rc = VWRN_CONTINUE_ANALYSIS; + } //if ((cpu.pCurInstr->fOpType & DISOPTYPE_CONTROLFLOW) && (OP_PARM_VTYPE(cpu.pCurInstr->fParam1) == OP_PARM_J)) +#ifdef CSAM_SCAN_JUMP_TABLE + else + if ( cpu.pCurInstr->uOpcode == OP_JMP + && (cpu.Param1.fUse & (DISUSE_DISPLACEMENT32|DISUSE_INDEX|DISUSE_SCALE)) == (DISUSE_DISPLACEMENT32|DISUSE_INDEX|DISUSE_SCALE) + ) + { + RTRCPTR pJumpTableGC = (RTRCPTR)cpu.Param1.disp32; + uint8_t *pJumpTableHC; + int rc2; + + Log(("Jump through jump table\n")); + + rc2 = PGMPhysGCPtr2CCPtrReadOnly(pVCpu, pJumpTableGC, (PRTHCPTR)&pJumpTableHC, missing page lock); + if (rc2 == VINF_SUCCESS) + { + for (uint32_t i=0;i<2;i++) + { + uint64_t fFlags; + + addr = pJumpTableGC + cpu.Param1.scale * i; + /* Same page? */ + if (PAGE_ADDRESS(addr) != PAGE_ADDRESS(pJumpTableGC)) + break; + + addr = *(RTRCPTR *)(pJumpTableHC + cpu.Param1.scale * i); + + rc2 = PGMGstGetPage(pVCpu, addr, &fFlags, NULL); + if ( rc2 != VINF_SUCCESS + || (fFlags & X86_PTE_US) + || !(fFlags & X86_PTE_P) + ) + break; + + Log(("Jump to %RRv\n", addr)); + + pJmpPage = NULL; + if (csamIsCodeScanned(pVM, addr, &pJmpPage) == false) + { + if (pJmpPage == NULL) + { + /* New branch target; let's take a look at it. */ + pJmpPage = csamR3CreatePageRecord(pVM, addr, CSAM_TAG_CSAM, fCode32); + if (pJmpPage == NULL) + { + rc = VERR_NO_MEMORY; + goto done; + } + Assert(pPage); + } + rc = csamAnalyseCodeStream(pVM, pInstrGC, addr, fCode32, pfnCSAMR3Analyse, (void *)pJmpPage, pCacheRec); + if (rc != VINF_SUCCESS) { + goto done; + } + } + } + } + } +#endif + if (rc != VWRN_CONTINUE_ANALYSIS) { + break; //done! + } +next_please: + if (cpu.pCurInstr->uOpcode == OP_JMP) + { + rc = VINF_SUCCESS; + goto done; + } + pCurInstrGC += cbInstr; + } +done: + pCacheRec->depth--; + return rc; +} + + +/** + * Calculates the 64 bits hash value for the current page + * + * @returns hash value + * @param pVM The cross context VM structure. + * @param pInstr Page address + */ +uint64_t csamR3CalcPageHash(PVM pVM, RTRCPTR pInstr) +{ + uint64_t hash = 0; + uint32_t val[5]; + int rc; + Assert(pVM->cCpus == 1); + PVMCPU pVCpu = VMMGetCpu0(pVM); + + Assert((pInstr & PAGE_OFFSET_MASK) == 0); + + rc = PGMPhysSimpleReadGCPtr(pVCpu, &val[0], pInstr, sizeof(val[0])); + if (RT_SUCCESS(rc)) + { /* likely */ } + else + { + if (rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT || rc == VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS) + { + Log(("csamR3CalcPageHash: page %RRv not present/invalid!!\n", pInstr)); + return ~0ULL; + } + AssertMsgFailed(("rc = %Rrc %RRv\n", rc, pInstr)); + } + + rc = PGMPhysSimpleReadGCPtr(pVCpu, &val[1], pInstr+1024, sizeof(val[0])); + AssertMsg(RT_SUCCESS(rc) || rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("rc = %Rrc\n", rc)); + if (rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT) + { + Log(("csamR3CalcPageHash: page %RRv not present!!\n", pInstr)); + return ~0ULL; + } + + rc = PGMPhysSimpleReadGCPtr(pVCpu, &val[2], pInstr+2048, sizeof(val[0])); + AssertMsg(RT_SUCCESS(rc) || rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("rc = %Rrc\n", rc)); + if (rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT) + { + Log(("csamR3CalcPageHash: page %RRv not present!!\n", pInstr)); + return ~0ULL; + } + + rc = PGMPhysSimpleReadGCPtr(pVCpu, &val[3], pInstr+3072, sizeof(val[0])); + AssertMsg(RT_SUCCESS(rc) || rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("rc = %Rrc\n", rc)); + if (rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT) + { + Log(("csamR3CalcPageHash: page %RRv not present!!\n", pInstr)); + return ~0ULL; + } + + rc = PGMPhysSimpleReadGCPtr(pVCpu, &val[4], pInstr+4092, sizeof(val[0])); + AssertMsg(RT_SUCCESS(rc) || rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("rc = %Rrc\n", rc)); + if (rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT) + { + Log(("csamR3CalcPageHash: page %RRv not present!!\n", pInstr)); + return ~0ULL; + } + + // don't want to get division by zero traps + val[2] |= 1; + val[4] |= 1; + + hash = (uint64_t)val[0] * (uint64_t)val[1] / (uint64_t)val[2] + (val[3]%val[4]); + return (hash == ~0ULL) ? hash - 1 : hash; +} + + +/** + * Notify CSAM of a page flush + * + * @returns VBox status code + * @param pVM The cross context VM structure. + * @param addr GC address of the page to flush + * @param fRemovePage Page removal flag + */ +static int csamFlushPage(PVM pVM, RTRCPTR addr, bool fRemovePage) +{ + PCSAMPAGEREC pPageRec; + int rc; + RTGCPHYS GCPhys = 0; + uint64_t fFlags = 0; + Assert(pVM->cCpus == 1 || !CSAMIsEnabled(pVM)); + + if (!CSAMIsEnabled(pVM)) + return VINF_SUCCESS; + Assert(VM_IS_RAW_MODE_ENABLED(pVM)); + + PVMCPU pVCpu = VMMGetCpu0(pVM); + + STAM_PROFILE_START(&pVM->csam.s.StatTimeFlushPage, a); + + addr = addr & PAGE_BASE_GC_MASK; + + /* + * Note: searching for the page in our tree first is more expensive (skipped flushes are two orders of magnitude more common) + */ + if (pVM->csam.s.pPageTree == NULL) + { + STAM_PROFILE_STOP(&pVM->csam.s.StatTimeFlushPage, a); + return VWRN_CSAM_PAGE_NOT_FOUND; + } + + rc = PGMGstGetPage(pVCpu, addr, &fFlags, &GCPhys); + /* Returned at a very early stage (no paging yet presumably). */ + if (rc == VERR_NOT_SUPPORTED) + { + STAM_PROFILE_STOP(&pVM->csam.s.StatTimeFlushPage, a); + return rc; + } + + if (RT_SUCCESS(rc)) + { + if ( (fFlags & X86_PTE_US) + || rc == VERR_PGM_PHYS_PAGE_RESERVED + ) + { + /* User page -> not relevant for us. */ + STAM_COUNTER_ADD(&pVM->csam.s.StatNrFlushesSkipped, 1); + STAM_PROFILE_STOP(&pVM->csam.s.StatTimeFlushPage, a); + return VINF_SUCCESS; + } + } + else + if (rc != VERR_PAGE_NOT_PRESENT && rc != VERR_PAGE_TABLE_NOT_PRESENT) + AssertMsgFailed(("PGMR3GetPage %RRv failed with %Rrc\n", addr, rc)); + + pPageRec = (PCSAMPAGEREC)RTAvlPVGet(&pVM->csam.s.pPageTree, (AVLPVKEY)(uintptr_t)addr); + if (pPageRec) + { + if ( GCPhys == pPageRec->page.GCPhys + && (fFlags & X86_PTE_P)) + { + STAM_COUNTER_ADD(&pVM->csam.s.StatNrFlushesSkipped, 1); + STAM_PROFILE_STOP(&pVM->csam.s.StatTimeFlushPage, a); + return VINF_SUCCESS; + } + + Log(("CSAMR3FlushPage: page %RRv has changed -> FLUSH (rc=%Rrc) (Phys: %RGp vs %RGp)\n", addr, rc, GCPhys, pPageRec->page.GCPhys)); + + STAM_COUNTER_ADD(&pVM->csam.s.StatNrFlushes, 1); + + if (fRemovePage) + csamRemovePageRecord(pVM, addr); + else + { + CSAMMarkPage(pVM, addr, false); + pPageRec->page.GCPhys = 0; + pPageRec->page.fFlags = 0; + rc = PGMGstGetPage(pVCpu, addr, &pPageRec->page.fFlags, &pPageRec->page.GCPhys); + if (rc == VINF_SUCCESS) + pPageRec->page.u64Hash = csamR3CalcPageHash(pVM, addr); + + if (pPageRec->page.pBitmap == NULL) + { + pPageRec->page.pBitmap = (uint8_t *)MMR3HeapAllocZ(pVM, MM_TAG_CSAM_PATCH, CSAM_PAGE_BITMAP_SIZE); + Assert(pPageRec->page.pBitmap); + if (pPageRec->page.pBitmap == NULL) + return VERR_NO_MEMORY; + } + else + memset(pPageRec->page.pBitmap, 0, CSAM_PAGE_BITMAP_SIZE); + } + + + /* + * Inform patch manager about the flush; no need to repeat the above check twice. + */ + PATMR3FlushPage(pVM, addr); + + STAM_PROFILE_STOP(&pVM->csam.s.StatTimeFlushPage, a); + return VINF_SUCCESS; + } + else + { + STAM_PROFILE_STOP(&pVM->csam.s.StatTimeFlushPage, a); + return VWRN_CSAM_PAGE_NOT_FOUND; + } +} + +/** + * Notify CSAM of a page flush + * + * @returns VBox status code + * @param pVM The cross context VM structure. + * @param addr GC address of the page to flush + */ +VMMR3_INT_DECL(int) CSAMR3FlushPage(PVM pVM, RTRCPTR addr) +{ + return csamFlushPage(pVM, addr, true /* remove page record */); +} + +/** + * Remove a CSAM monitored page. Use with care! + * + * @returns VBox status code + * @param pVM The cross context VM structure. + * @param addr GC address of the page to flush + */ +VMMR3_INT_DECL(int) CSAMR3RemovePage(PVM pVM, RTRCPTR addr) +{ + PCSAMPAGEREC pPageRec; + int rc; + + AssertReturn(VM_IS_RAW_MODE_ENABLED(pVM), VERR_CSAM_HM_IPE); + + addr = addr & PAGE_BASE_GC_MASK; + + pPageRec = (PCSAMPAGEREC)RTAvlPVGet(&pVM->csam.s.pPageTree, (AVLPVKEY)(uintptr_t)addr); + if (pPageRec) + { + rc = csamRemovePageRecord(pVM, addr); + if (RT_SUCCESS(rc)) + PATMR3FlushPage(pVM, addr); + return VINF_SUCCESS; + } + return VWRN_CSAM_PAGE_NOT_FOUND; +} + +/** + * Check a page record in case a page has been changed + * + * @returns VBox status code. (trap handled or not) + * @param pVM The cross context VM structure. + * @param pInstrGC GC instruction pointer + */ +int csamR3CheckPageRecord(PVM pVM, RTRCPTR pInstrGC) +{ + PCSAMPAGEREC pPageRec; + uint64_t u64hash; + + pInstrGC = pInstrGC & PAGE_BASE_GC_MASK; + + pPageRec = (PCSAMPAGEREC)RTAvlPVGet(&pVM->csam.s.pPageTree, (AVLPVKEY)(uintptr_t)pInstrGC); + if (pPageRec) + { + u64hash = csamR3CalcPageHash(pVM, pInstrGC); + if (u64hash != pPageRec->page.u64Hash) + csamFlushPage(pVM, pInstrGC, false /* don't remove page record */); + } + else + return VWRN_CSAM_PAGE_NOT_FOUND; + + return VINF_SUCCESS; +} + +/** + * Returns monitor description based on CSAM tag + * + * @return description string + * @param enmTag Owner tag + */ +const char *csamGetMonitorDescription(CSAMTAG enmTag) +{ + if (enmTag == CSAM_TAG_PATM) + return "CSAM-PATM self-modifying code monitor handler"; + else + if (enmTag == CSAM_TAG_REM) + return "CSAM-REM self-modifying code monitor handler"; + Assert(enmTag == CSAM_TAG_CSAM); + return "CSAM self-modifying code monitor handler"; +} + +/** + * Adds page record to our lookup tree + * + * @returns CSAMPAGE ptr or NULL if failure + * @param pVM The cross context VM structure. + * @param GCPtr Page address + * @param enmTag Owner tag + * @param fCode32 16 or 32 bits code + * @param fMonitorInvalidation Monitor page invalidation flag + */ +static PCSAMPAGE csamR3CreatePageRecord(PVM pVM, RTRCPTR GCPtr, CSAMTAG enmTag, bool fCode32, bool fMonitorInvalidation) +{ + PCSAMPAGEREC pPage; + int rc; + bool ret; + Assert(pVM->cCpus == 1); + PVMCPU pVCpu = VMMGetCpu0(pVM); + + Log(("New page record for %RRv\n", GCPtr & PAGE_BASE_GC_MASK)); + + pPage = (PCSAMPAGEREC)MMR3HeapAllocZ(pVM, MM_TAG_CSAM_PATCH, sizeof(CSAMPAGEREC)); + if (pPage == NULL) + { + AssertMsgFailed(("csamR3CreatePageRecord: Out of memory!!!!\n")); + return NULL; + } + /* Round down to page boundary. */ + GCPtr = (GCPtr & PAGE_BASE_GC_MASK); + pPage->Core.Key = (AVLPVKEY)(uintptr_t)GCPtr; + pPage->page.pPageGC = GCPtr; + pPage->page.fCode32 = fCode32; + pPage->page.fMonitorInvalidation = fMonitorInvalidation; + pPage->page.enmTag = enmTag; + pPage->page.fMonitorActive = false; + pPage->page.pBitmap = (uint8_t *)MMR3HeapAllocZ(pVM, MM_TAG_CSAM_PATCH, PAGE_SIZE/sizeof(uint8_t)); + rc = PGMGstGetPage(pVCpu, GCPtr, &pPage->page.fFlags, &pPage->page.GCPhys); + AssertMsg(RT_SUCCESS(rc) || rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("rc = %Rrc\n", rc)); + + pPage->page.u64Hash = csamR3CalcPageHash(pVM, GCPtr); + ret = RTAvlPVInsert(&pVM->csam.s.pPageTree, &pPage->Core); + Assert(ret); + +#ifdef CSAM_MONITOR_CODE_PAGES + AssertRelease(!g_fInCsamR3CodePageInvalidate); + + switch (enmTag) + { + case CSAM_TAG_PATM: + case CSAM_TAG_REM: +# ifdef CSAM_MONITOR_CSAM_CODE_PAGES + case CSAM_TAG_CSAM: +# endif + { + rc = PGMR3HandlerVirtualRegister(pVM, pVCpu, fMonitorInvalidation + ? pVM->csam.s.hCodePageWriteAndInvPgType : pVM->csam.s.hCodePageWriteType, + GCPtr, GCPtr + (PAGE_SIZE - 1) /* inclusive! */, + pPage, NIL_RTRCPTR, csamGetMonitorDescription(enmTag)); + AssertMsg(RT_SUCCESS(rc) || rc == VERR_PGM_HANDLER_VIRTUAL_CONFLICT, + ("PGMR3HandlerVirtualRegister %RRv failed with %Rrc\n", GCPtr, rc)); + if (RT_FAILURE(rc)) + Log(("PGMR3HandlerVirtualRegister for %RRv failed with %Rrc\n", GCPtr, rc)); + + /* Could fail, because it's already monitored. Don't treat that condition as fatal. */ + + /* Prefetch it in case it's not there yet. */ + rc = PGMPrefetchPage(pVCpu, GCPtr); + AssertRC(rc); + + rc = PGMShwMakePageReadonly(pVCpu, GCPtr, 0 /*fFlags*/); + Assert(rc == VINF_SUCCESS || rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT); + + pPage->page.fMonitorActive = true; + STAM_COUNTER_INC(&pVM->csam.s.StatPageMonitor); + break; + } + default: + break; /* to shut up GCC */ + } + + Log(("csamR3CreatePageRecord %RRv GCPhys=%RGp\n", GCPtr, pPage->page.GCPhys)); + +# ifdef VBOX_WITH_STATISTICS + switch (enmTag) + { + case CSAM_TAG_CSAM: + STAM_COUNTER_INC(&pVM->csam.s.StatPageCSAM); + break; + case CSAM_TAG_PATM: + STAM_COUNTER_INC(&pVM->csam.s.StatPagePATM); + break; + case CSAM_TAG_REM: + STAM_COUNTER_INC(&pVM->csam.s.StatPageREM); + break; + default: + break; /* to shut up GCC */ + } +# endif + +#endif + + STAM_COUNTER_INC(&pVM->csam.s.StatNrPages); + if (fMonitorInvalidation) + STAM_COUNTER_INC(&pVM->csam.s.StatNrPagesInv); + + return &pPage->page; +} + +/** + * Monitors a code page (if not already monitored) + * + * @returns VBox status code + * @param pVM The cross context VM structure. + * @param pPageAddrGC The page to monitor + * @param enmTag Monitor tag + */ +VMMR3DECL(int) CSAMR3MonitorPage(PVM pVM, RTRCPTR pPageAddrGC, CSAMTAG enmTag) +{ + ; + int rc; + bool fMonitorInvalidation; + Assert(pVM->cCpus == 1); + PVMCPU pVCpu = VMMGetCpu0(pVM); + Assert(VM_IS_RAW_MODE_ENABLED(pVM)); + + /* Dirty pages must be handled before calling this function!. */ + Assert(!pVM->csam.s.cDirtyPages); + + if (pVM->csam.s.fScanningStarted == false) + return VINF_SUCCESS; /* too early */ + + pPageAddrGC &= PAGE_BASE_GC_MASK; + + Log(("CSAMR3MonitorPage %RRv %d\n", pPageAddrGC, enmTag)); + + /** @todo implicit assumption */ + fMonitorInvalidation = (enmTag == CSAM_TAG_PATM); + + PCSAMPAGEREC pPageRec = (PCSAMPAGEREC)RTAvlPVGet(&pVM->csam.s.pPageTree, (AVLPVKEY)(uintptr_t)pPageAddrGC); + if (pPageRec == NULL) + { + uint64_t fFlags; + + rc = PGMGstGetPage(pVCpu, pPageAddrGC, &fFlags, NULL); + AssertMsg(RT_SUCCESS(rc) || rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("rc = %Rrc\n", rc)); + if ( rc == VINF_SUCCESS + && (fFlags & X86_PTE_US)) + { + /* We don't care about user pages. */ + STAM_COUNTER_INC(&pVM->csam.s.StatNrUserPages); + return VINF_SUCCESS; + } + + csamR3CreatePageRecord(pVM, pPageAddrGC, enmTag, true /* 32 bits code */, fMonitorInvalidation); + + pPageRec = (PCSAMPAGEREC)RTAvlPVGet(&pVM->csam.s.pPageTree, (AVLPVKEY)(uintptr_t)pPageAddrGC); + Assert(pPageRec); + } + /** @todo reference count */ + +#ifdef CSAM_MONITOR_CSAM_CODE_PAGES + Assert(pPageRec->page.fMonitorActive); +#endif + +#ifdef CSAM_MONITOR_CODE_PAGES + if (!pPageRec->page.fMonitorActive) + { + Log(("CSAMR3MonitorPage: activate monitoring for %RRv\n", pPageAddrGC)); + + rc = PGMR3HandlerVirtualRegister(pVM, pVCpu, fMonitorInvalidation + ? pVM->csam.s.hCodePageWriteAndInvPgType : pVM->csam.s.hCodePageWriteType, + pPageAddrGC, pPageAddrGC + (PAGE_SIZE - 1) /* inclusive! */, + pPageRec, NIL_RTRCPTR /*pvUserRC*/, csamGetMonitorDescription(enmTag)); + AssertMsg(RT_SUCCESS(rc) || rc == VERR_PGM_HANDLER_VIRTUAL_CONFLICT, + ("PGMR3HandlerVirtualRegister %RRv failed with %Rrc\n", pPageAddrGC, rc)); + if (RT_FAILURE(rc)) + Log(("PGMR3HandlerVirtualRegister for %RRv failed with %Rrc\n", pPageAddrGC, rc)); + + /* Could fail, because it's already monitored. Don't treat that condition as fatal. */ + + /* Prefetch it in case it's not there yet. */ + rc = PGMPrefetchPage(pVCpu, pPageAddrGC); + AssertRC(rc); + + rc = PGMShwMakePageReadonly(pVCpu, pPageAddrGC, 0 /*fFlags*/); + Assert(rc == VINF_SUCCESS || rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT); + + STAM_COUNTER_INC(&pVM->csam.s.StatPageMonitor); + + pPageRec->page.fMonitorActive = true; + pPageRec->page.fMonitorInvalidation = fMonitorInvalidation; + } + else + if ( !pPageRec->page.fMonitorInvalidation + && fMonitorInvalidation) + { + Assert(pPageRec->page.fMonitorActive); + rc = PGMHandlerVirtualChangeType(pVM, pPageRec->page.pPageGC, pVM->csam.s.hCodePageWriteAndInvPgType); + AssertRC(rc); + pPageRec->page.fMonitorInvalidation = true; + STAM_COUNTER_INC(&pVM->csam.s.StatNrPagesInv); + + /* Prefetch it in case it's not there yet. */ + rc = PGMPrefetchPage(pVCpu, pPageAddrGC); + AssertRC(rc); + + /* Make sure it's readonly. Page invalidation may have modified the attributes. */ + rc = PGMShwMakePageReadonly(pVCpu, pPageAddrGC, 0 /*fFlags*/); + Assert(rc == VINF_SUCCESS || rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT); + } + +#if 0 /* def VBOX_STRICT -> very annoying) */ + if (pPageRec->page.fMonitorActive) + { + uint64_t fPageShw; + RTHCPHYS GCPhys; + rc = PGMShwGetPage(pVCpu, pPageAddrGC, &fPageShw, &GCPhys); +// AssertMsg( (rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT) +// || !(fPageShw & X86_PTE_RW) +// || (pPageRec->page.GCPhys == 0), ("Shadow page flags for %RRv (%RHp) aren't readonly (%RX64)!!\n", pPageAddrGC, GCPhys, fPageShw)); + } +#endif + + if (pPageRec->page.GCPhys == 0) + { + /* Prefetch it in case it's not there yet. */ + rc = PGMPrefetchPage(pVCpu, pPageAddrGC); + AssertRC(rc); + /* The page was changed behind our back. It won't be made read-only until the next SyncCR3, so force it here. */ + rc = PGMShwMakePageReadonly(pVCpu, pPageAddrGC, 0 /*fFlags*/); + Assert(rc == VINF_SUCCESS || rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT); + } +#endif /* CSAM_MONITOR_CODE_PAGES */ + return VINF_SUCCESS; +} + +/** + * Unmonitors a code page + * + * @returns VBox status code + * @param pVM The cross context VM structure. + * @param pPageAddrGC The page to monitor + * @param enmTag Monitor tag + */ +VMMR3DECL(int) CSAMR3UnmonitorPage(PVM pVM, RTRCPTR pPageAddrGC, CSAMTAG enmTag) +{ + Assert(VM_IS_RAW_MODE_ENABLED(pVM)); + + pPageAddrGC &= PAGE_BASE_GC_MASK; + + Log(("CSAMR3UnmonitorPage %RRv %d\n", pPageAddrGC, enmTag)); + + Assert(enmTag == CSAM_TAG_REM); RT_NOREF_PV(enmTag); + +#ifdef VBOX_STRICT + PCSAMPAGEREC pPageRec; + + pPageRec = (PCSAMPAGEREC)RTAvlPVGet(&pVM->csam.s.pPageTree, (AVLPVKEY)(uintptr_t)pPageAddrGC); + Assert(pPageRec && pPageRec->page.enmTag == enmTag); +#endif + return CSAMR3RemovePage(pVM, pPageAddrGC); +} + +/** + * Removes a page record from our lookup tree + * + * @returns VBox status code + * @param pVM The cross context VM structure. + * @param GCPtr Page address + */ +static int csamRemovePageRecord(PVM pVM, RTRCPTR GCPtr) +{ + PCSAMPAGEREC pPageRec; + Assert(pVM->cCpus == 1); + PVMCPU pVCpu = VMMGetCpu0(pVM); + + Log(("csamRemovePageRecord %RRv\n", GCPtr)); + pPageRec = (PCSAMPAGEREC)RTAvlPVRemove(&pVM->csam.s.pPageTree, (AVLPVKEY)(uintptr_t)GCPtr); + + if (pPageRec) + { + STAM_COUNTER_INC(&pVM->csam.s.StatNrRemovedPages); + +#ifdef CSAM_MONITOR_CODE_PAGES + if (pPageRec->page.fMonitorActive) + { + /** @todo -> this is expensive (cr3 reload)!!! + * if this happens often, then reuse it instead!!! + */ + Assert(!g_fInCsamR3CodePageInvalidate); + STAM_COUNTER_DEC(&pVM->csam.s.StatPageMonitor); + PGMHandlerVirtualDeregister(pVM, pVCpu, GCPtr, false /*fHypervisor*/); + } + if (pPageRec->page.enmTag == CSAM_TAG_PATM) + { + /* Make sure the recompiler flushes its cache as this page is no longer monitored. */ + STAM_COUNTER_INC(&pVM->csam.s.StatPageRemoveREMFlush); + CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH); + } +#endif + +#ifdef VBOX_WITH_STATISTICS + switch (pPageRec->page.enmTag) + { + case CSAM_TAG_CSAM: + STAM_COUNTER_DEC(&pVM->csam.s.StatPageCSAM); + break; + case CSAM_TAG_PATM: + STAM_COUNTER_DEC(&pVM->csam.s.StatPagePATM); + break; + case CSAM_TAG_REM: + STAM_COUNTER_DEC(&pVM->csam.s.StatPageREM); + break; + default: + break; /* to shut up GCC */ + } +#endif + + if (pPageRec->page.pBitmap) MMR3HeapFree(pPageRec->page.pBitmap); + MMR3HeapFree(pPageRec); + } + else + AssertFailed(); + + return VINF_SUCCESS; +} + +#if 0 /* Unused */ +/** + * Callback for delayed writes from non-EMT threads + * + * @param pVM The cross context VM structure. + * @param GCPtr The virtual address the guest is writing to. (not correct if it's an alias!) + * @param cbBuf How much it's reading/writing. + */ +static DECLCALLBACK(void) CSAMDelayedWriteHandler(PVM pVM, RTRCPTR GCPtr, size_t cbBuf) +{ + int rc = PATMR3PatchWrite(pVM, GCPtr, (uint32_t)cbBuf); + AssertRC(rc); +} +#endif + +/** + * \#PF Handler callback for invalidation of virtual access handler ranges. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * @param GCPtr The virtual address the guest has changed. + * @param pvUser Ignored. + * + * @remarks Not currently called by PGM. It was actually only called for a month + * back in 2006... + */ +static DECLCALLBACK(int) csamR3CodePageInvalidate(PVM pVM, PVMCPU pVCpu, RTGCPTR GCPtr, void *pvUser) +{ + RT_NOREF2(pVCpu, pvUser); + + g_fInCsamR3CodePageInvalidate = true; + LogFlow(("csamR3CodePageInvalidate %RGv\n", GCPtr)); + /** @todo We can't remove the page (which unregisters the virtual handler) as we are called from a DoWithAll on the virtual handler tree. Argh. */ + csamFlushPage(pVM, GCPtr, false /* don't remove page! */); + g_fInCsamR3CodePageInvalidate = false; + + return VINF_SUCCESS; +} + +/** + * Check if the current instruction has already been checked before + * + * @returns VBox status code. (trap handled or not) + * @param pVM The cross context VM structure. + * @param pInstr Instruction pointer + * @param pPage CSAM patch structure pointer + */ +bool csamIsCodeScanned(PVM pVM, RTRCPTR pInstr, PCSAMPAGE *pPage) +{ + PCSAMPAGEREC pPageRec; + uint32_t offset; + + STAM_PROFILE_START(&pVM->csam.s.StatTimeCheckAddr, a); + + offset = pInstr & PAGE_OFFSET_MASK; + pInstr = pInstr & PAGE_BASE_GC_MASK; + + Assert(pPage); + + if (*pPage && (*pPage)->pPageGC == pInstr) + { + if ((*pPage)->pBitmap == NULL || ASMBitTest((*pPage)->pBitmap, offset)) + { + STAM_COUNTER_ADD(&pVM->csam.s.StatNrKnownPagesHC, 1); + STAM_PROFILE_STOP(&pVM->csam.s.StatTimeCheckAddr, a); + return true; + } + STAM_PROFILE_STOP(&pVM->csam.s.StatTimeCheckAddr, a); + return false; + } + + pPageRec = (PCSAMPAGEREC)RTAvlPVGet(&pVM->csam.s.pPageTree, (AVLPVKEY)(uintptr_t)pInstr); + if (pPageRec) + { + if (pPage) *pPage= &pPageRec->page; + if (pPageRec->page.pBitmap == NULL || ASMBitTest(pPageRec->page.pBitmap, offset)) + { + STAM_COUNTER_ADD(&pVM->csam.s.StatNrKnownPagesHC, 1); + STAM_PROFILE_STOP(&pVM->csam.s.StatTimeCheckAddr, a); + return true; + } + } + else + { + if (pPage) *pPage = NULL; + } + STAM_PROFILE_STOP(&pVM->csam.s.StatTimeCheckAddr, a); + return false; +} + +/** + * Mark an instruction in a page as scanned/not scanned + * + * @param pVM The cross context VM structure. + * @param pPage Patch structure pointer + * @param pInstr Instruction pointer + * @param cbInstr Instruction size + * @param fScanned Mark as scanned or not + */ +static void csamMarkCode(PVM pVM, PCSAMPAGE pPage, RTRCPTR pInstr, uint32_t cbInstr, bool fScanned) +{ + LogFlow(("csamMarkCodeAsScanned %RRv cbInstr=%d\n", pInstr, cbInstr)); + CSAMMarkPage(pVM, pInstr, fScanned); + + /** @todo should recreate empty bitmap if !fScanned */ + if (pPage->pBitmap == NULL) + return; + + if (fScanned) + { + // retn instructions can be scanned more than once + if (ASMBitTest(pPage->pBitmap, pInstr & PAGE_OFFSET_MASK) == 0) + { + pPage->uSize += cbInstr; + STAM_COUNTER_ADD(&pVM->csam.s.StatNrInstr, 1); + } + if (pPage->uSize >= PAGE_SIZE) + { + Log(("Scanned full page (%RRv) -> free bitmap\n", pInstr & PAGE_BASE_GC_MASK)); + MMR3HeapFree(pPage->pBitmap); + pPage->pBitmap = NULL; + } + else + ASMBitSet(pPage->pBitmap, pInstr & PAGE_OFFSET_MASK); + } + else + ASMBitClear(pPage->pBitmap, pInstr & PAGE_OFFSET_MASK); +} + +/** + * Mark an instruction in a page as scanned/not scanned + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pInstr Instruction pointer + * @param cbInstr Instruction size + * @param fScanned Mark as scanned or not + */ +VMMR3_INT_DECL(int) CSAMR3MarkCode(PVM pVM, RTRCPTR pInstr, uint32_t cbInstr, bool fScanned) +{ + PCSAMPAGE pPage = 0; + + Assert(!fScanned); /* other case not implemented. */ + Assert(!PATMIsPatchGCAddr(pVM, pInstr)); + Assert(VM_IS_RAW_MODE_ENABLED(pVM)); + + if (csamIsCodeScanned(pVM, pInstr, &pPage) == false) + { + Assert(fScanned == true); /* other case should not be possible */ + return VINF_SUCCESS; + } + + Log(("CSAMR3MarkCode: %RRv size=%d fScanned=%d\n", pInstr, cbInstr, fScanned)); + csamMarkCode(pVM, pPage, pInstr, cbInstr, fScanned); + return VINF_SUCCESS; +} + + +/** + * Scan and analyse code + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pCtx Guest CPU context. + * @param pInstrGC Instruction pointer. + */ +VMMR3_INT_DECL(int) CSAMR3CheckCodeEx(PVM pVM, PCPUMCTX pCtx, RTRCPTR pInstrGC) +{ + Assert(VM_IS_RAW_MODE_ENABLED(pVM)); + if (!EMIsRawRing0Enabled(pVM) || PATMIsPatchGCAddr(pVM, pInstrGC) == true) + { + // No use + return VINF_SUCCESS; + } + + if (CSAMIsEnabled(pVM)) + { + /* Assuming 32 bits code for now. */ + Assert(CPUMGetGuestCodeBits(VMMGetCpu0(pVM)) == 32); + + pInstrGC = SELMToFlat(pVM, DISSELREG_CS, CPUMCTX2CORE(pCtx), pInstrGC); + return CSAMR3CheckCode(pVM, pInstrGC); + } + return VINF_SUCCESS; +} + +/** + * Scan and analyse code + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pInstrGC Instruction pointer (0:32 virtual address) + */ +VMMR3_INT_DECL(int) CSAMR3CheckCode(PVM pVM, RTRCPTR pInstrGC) +{ + int rc; + PCSAMPAGE pPage = NULL; + Assert(VM_IS_RAW_MODE_ENABLED(pVM)); + + if ( !EMIsRawRing0Enabled(pVM) + || PATMIsPatchGCAddr(pVM, pInstrGC) == true) + { + /* Not active. */ + return VINF_SUCCESS; + } + + if (CSAMIsEnabled(pVM)) + { + /* Cache record for csamR3GCVirtToHCVirt */ + CSAMP2GLOOKUPREC cacheRec; + RT_ZERO(cacheRec); + + STAM_PROFILE_START(&pVM->csam.s.StatTime, a); + rc = csamAnalyseCallCodeStream(pVM, pInstrGC, pInstrGC, true /* 32 bits code */, CSAMR3AnalyseCallback, pPage, &cacheRec); + STAM_PROFILE_STOP(&pVM->csam.s.StatTime, a); + if (cacheRec.Lock.pvMap) + PGMPhysReleasePageMappingLock(pVM, &cacheRec.Lock); + + if (rc != VINF_SUCCESS) + { + Log(("csamAnalyseCodeStream failed with %d\n", rc)); + return rc; + } + } + return VINF_SUCCESS; +} + +/** + * Flush dirty code pages + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +static int csamR3FlushDirtyPages(PVM pVM) +{ + Assert(pVM->cCpus == 1); + PVMCPU pVCpu = VMMGetCpu0(pVM); + + STAM_PROFILE_START(&pVM->csam.s.StatFlushDirtyPages, a); + + for (uint32_t i = 0; i < pVM->csam.s.cDirtyPages; i++) + { + int rc; + PCSAMPAGEREC pPageRec; + RTRCPTR GCPtr = pVM->csam.s.pvDirtyBasePage[i] & PAGE_BASE_GC_MASK; + +#ifdef VBOX_WITH_REM + /* Notify the recompiler that this page has been changed. */ + REMR3NotifyCodePageChanged(pVM, pVCpu, GCPtr); + if (pVM->csam.s.pvDirtyFaultPage[i] != pVM->csam.s.pvDirtyBasePage[i]) + REMR3NotifyCodePageChanged(pVM, pVCpu, pVM->csam.s.pvDirtyFaultPage[i] & PAGE_BASE_GC_MASK); +#endif + + /* Enable write protection again. (use the fault address as it might be an alias) */ + rc = PGMShwMakePageReadonly(pVCpu, pVM->csam.s.pvDirtyFaultPage[i], 0 /*fFlags*/); + Assert(rc == VINF_SUCCESS || rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT); + + Log(("CSAMR3FlushDirtyPages: flush %RRv (modifypage rc=%Rrc)\n", pVM->csam.s.pvDirtyBasePage[i], rc)); + + pPageRec = (PCSAMPAGEREC)RTAvlPVGet(&pVM->csam.s.pPageTree, (AVLPVKEY)(uintptr_t)GCPtr); + if (pPageRec && pPageRec->page.enmTag == CSAM_TAG_REM) + { + uint64_t fFlags; + + rc = PGMGstGetPage(pVCpu, GCPtr, &fFlags, NULL); + AssertMsg(RT_SUCCESS(rc) || rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("rc = %Rrc\n", rc)); + if ( rc == VINF_SUCCESS + && (fFlags & X86_PTE_US)) + { + /* We don't care about user pages. */ + csamRemovePageRecord(pVM, GCPtr); + STAM_COUNTER_INC(&pVM->csam.s.StatNrUserPages); + } + } + } + pVM->csam.s.cDirtyPages = 0; + STAM_PROFILE_STOP(&pVM->csam.s.StatFlushDirtyPages, a); + return VINF_SUCCESS; +} + +/** + * Flush potential new code pages + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +static int csamR3FlushCodePages(PVM pVM) +{ + Assert(pVM->cCpus == 1); + PVMCPU pVCpu = VMMGetCpu0(pVM); + + for (uint32_t i=0;icsam.s.cPossibleCodePages;i++) + { + RTRCPTR GCPtr = pVM->csam.s.pvPossibleCodePage[i]; + + GCPtr = GCPtr & PAGE_BASE_GC_MASK; + + Log(("csamR3FlushCodePages: %RRv\n", GCPtr)); + PGMShwMakePageNotPresent(pVCpu, GCPtr, 0 /*fFlags*/); + /* Resync the page to make sure instruction fetch will fault */ + CSAMMarkPage(pVM, GCPtr, false); + } + pVM->csam.s.cPossibleCodePages = 0; + return VINF_SUCCESS; +} + +/** + * Perform any pending actions + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +VMMR3_INT_DECL(int) CSAMR3DoPendingAction(PVM pVM, PVMCPU pVCpu) +{ + AssertReturn(VM_IS_RAW_MODE_ENABLED(pVM), VERR_CSAM_HM_IPE); + + csamR3FlushDirtyPages(pVM); + csamR3FlushCodePages(pVM); + + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_CSAM_PENDING_ACTION); + return VINF_SUCCESS; +} + +/** + * Analyse interrupt and trap gates + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param iGate Start gate + * @param cGates Number of gates to check + */ +VMMR3_INT_DECL(int) CSAMR3CheckGates(PVM pVM, uint32_t iGate, uint32_t cGates) +{ +#ifdef VBOX_WITH_RAW_MODE + Assert(pVM->cCpus == 1); + PVMCPU pVCpu = VMMGetCpu0(pVM); + uint16_t cbIDT; + RTRCPTR GCPtrIDT = CPUMGetGuestIDTR(pVCpu, &cbIDT); + uint32_t iGateEnd; + uint32_t maxGates; + VBOXIDTE aIDT[256]; + PVBOXIDTE pGuestIdte; + int rc; + + AssertReturn(VM_IS_RAW_MODE_ENABLED(pVM), VERR_CSAM_HM_IPE); + if (!EMIsRawRing0Enabled(pVM)) + { + /* Enabling interrupt gates only works when raw ring 0 is enabled. */ + //AssertFailed(); + return VINF_SUCCESS; + } + + /* We only check all gates once during a session */ + if ( !pVM->csam.s.fGatesChecked + && cGates != 256) + return VINF_SUCCESS; /* too early */ + + /* We only check all gates once during a session */ + if ( pVM->csam.s.fGatesChecked + && cGates != 1) + return VINF_SUCCESS; /* ignored */ + + Assert(cGates <= 256); + if (!GCPtrIDT || cGates > 256) + return VERR_INVALID_PARAMETER; + + if (cGates != 1) + { + pVM->csam.s.fGatesChecked = true; + for (unsigned i=0;icsam.s.pvCallInstruction);i++) + { + RTRCPTR pHandler = pVM->csam.s.pvCallInstruction[i]; + + if (pHandler) + { + PCSAMPAGE pPage = NULL; + CSAMP2GLOOKUPREC cacheRec; /* Cache record for csamR3GCVirtToHCVirt. */ + RT_ZERO(cacheRec); + + Log(("CSAMCheckGates: checking previous call instruction %RRv\n", pHandler)); + STAM_PROFILE_START(&pVM->csam.s.StatTime, a); + rc = csamAnalyseCodeStream(pVM, pHandler, pHandler, true, CSAMR3AnalyseCallback, pPage, &cacheRec); + STAM_PROFILE_STOP(&pVM->csam.s.StatTime, a); + if (cacheRec.Lock.pvMap) + PGMPhysReleasePageMappingLock(pVM, &cacheRec.Lock); + + if (rc != VINF_SUCCESS) + { + Log(("CSAMCheckGates: csamAnalyseCodeStream failed with %d\n", rc)); + continue; + } + } + } + } + + /* Determine valid upper boundary. */ + maxGates = (cbIDT+1) / sizeof(VBOXIDTE); + Assert(iGate < maxGates); + if (iGate > maxGates) + return VERR_INVALID_PARAMETER; + + if (iGate + cGates > maxGates) + cGates = maxGates - iGate; + + GCPtrIDT = GCPtrIDT + iGate * sizeof(VBOXIDTE); + iGateEnd = iGate + cGates; + + STAM_PROFILE_START(&pVM->csam.s.StatCheckGates, a); + + /* + * Get IDT entries. + */ + rc = PGMPhysSimpleReadGCPtr(pVCpu, aIDT, GCPtrIDT, cGates*sizeof(VBOXIDTE)); + if (RT_FAILURE(rc)) + { + AssertMsgRC(rc, ("Failed to read IDTE! rc=%Rrc\n", rc)); + STAM_PROFILE_STOP(&pVM->csam.s.StatCheckGates, a); + return rc; + } + pGuestIdte = &aIDT[0]; + + for (/*iGate*/; iGateGen.u1Present + && (pGuestIdte->Gen.u5Type2 == VBOX_IDTE_TYPE2_TRAP_32 || pGuestIdte->Gen.u5Type2 == VBOX_IDTE_TYPE2_INT_32) + && (pGuestIdte->Gen.u2DPL == 3 || pGuestIdte->Gen.u2DPL == 0) + ) + { + RTRCPTR pHandler; + PCSAMPAGE pPage = NULL; + DBGFSELINFO selInfo; + CSAMP2GLOOKUPREC cacheRec; /* Cache record for csamR3GCVirtToHCVirt. */ + RT_ZERO(cacheRec); + + pHandler = VBOXIDTE_OFFSET(*pGuestIdte); + pHandler = SELMToFlatBySel(pVM, pGuestIdte->Gen.u16SegSel, pHandler); + + rc = SELMR3GetSelectorInfo(pVM, pVCpu, pGuestIdte->Gen.u16SegSel, &selInfo); + if ( RT_FAILURE(rc) + || (selInfo.fFlags & (DBGFSELINFO_FLAGS_NOT_PRESENT | DBGFSELINFO_FLAGS_INVALID)) + || selInfo.GCPtrBase != 0 + || selInfo.cbLimit != ~0U + ) + { + /* Refuse to patch a handler whose idt cs selector isn't wide open. */ + Log(("CSAMCheckGates: check gate %d failed due to rc %Rrc GCPtrBase=%RRv limit=%x\n", iGate, rc, selInfo.GCPtrBase, selInfo.cbLimit)); + continue; + } + + + if (pGuestIdte->Gen.u5Type2 == VBOX_IDTE_TYPE2_TRAP_32) + { + Log(("CSAMCheckGates: check trap gate %d at %04X:%08X (flat %RRv)\n", iGate, pGuestIdte->Gen.u16SegSel, VBOXIDTE_OFFSET(*pGuestIdte), pHandler)); + } + else + { + Log(("CSAMCheckGates: check interrupt gate %d at %04X:%08X (flat %RRv)\n", iGate, pGuestIdte->Gen.u16SegSel, VBOXIDTE_OFFSET(*pGuestIdte), pHandler)); + } + + STAM_PROFILE_START(&pVM->csam.s.StatTime, b); + rc = csamAnalyseCodeStream(pVM, pHandler, pHandler, true, CSAMR3AnalyseCallback, pPage, &cacheRec); + STAM_PROFILE_STOP(&pVM->csam.s.StatTime, b); + if (cacheRec.Lock.pvMap) + PGMPhysReleasePageMappingLock(pVM, &cacheRec.Lock); + + if (rc != VINF_SUCCESS) + { + Log(("CSAMCheckGates: csamAnalyseCodeStream failed with %d\n", rc)); + continue; + } + /* OpenBSD guest specific patch test. */ + if (iGate >= 0x20) + { + PCPUMCTX pCtx; + DISCPUSTATE cpu; + RTGCUINTPTR32 aOpenBsdPushCSOffset[3] = {0x03, /* OpenBSD 3.7 & 3.8 */ + 0x2B, /* OpenBSD 4.0 installation ISO */ + 0x2F}; /* OpenBSD 4.0 after install */ + + pCtx = CPUMQueryGuestCtxPtr(pVCpu); + + for (unsigned i=0;iuOpcode == OP_PUSH + && cpu.pCurInstr->fParam1 == OP_PARM_REG_CS) + { + rc = PATMR3InstallPatch(pVM, pHandler - aOpenBsdPushCSOffset[i], PATMFL_CODE32 | PATMFL_GUEST_SPECIFIC); + if (RT_SUCCESS(rc)) + Log(("Installed OpenBSD interrupt handler prefix instruction (push cs) patch\n")); + } + } + } + + /* Trap gates and certain interrupt gates. */ + uint32_t fPatchFlags = PATMFL_CODE32 | PATMFL_IDTHANDLER; + + if (pGuestIdte->Gen.u5Type2 == VBOX_IDTE_TYPE2_TRAP_32) + fPatchFlags |= PATMFL_TRAPHANDLER; + else + fPatchFlags |= PATMFL_INTHANDLER; + + switch (iGate) { + case 8: + case 10: + case 11: + case 12: + case 13: + case 14: + case 17: + fPatchFlags |= PATMFL_TRAPHANDLER_WITH_ERRORCODE; + break; + default: + /* No error code. */ + break; + } + + Log(("Installing %s gate handler for 0x%X at %RRv\n", (pGuestIdte->Gen.u5Type2 == VBOX_IDTE_TYPE2_TRAP_32) ? "trap" : "intr", iGate, pHandler)); + + rc = PATMR3InstallPatch(pVM, pHandler, fPatchFlags); + if ( RT_SUCCESS(rc) + || rc == VERR_PATM_ALREADY_PATCHED) + { + Log(("Gate handler 0x%X is SAFE!\n", iGate)); + + RTRCPTR pNewHandlerGC = PATMR3QueryPatchGCPtr(pVM, pHandler); + if (pNewHandlerGC) + { + rc = TRPMR3SetGuestTrapHandler(pVM, iGate, pNewHandlerGC); + if (RT_FAILURE(rc)) + Log(("TRPMR3SetGuestTrapHandler %d failed with %Rrc\n", iGate, rc)); + } + } + } + } /* for */ + STAM_PROFILE_STOP(&pVM->csam.s.StatCheckGates, a); +#endif /* VBOX_WITH_RAW_MODE */ + return VINF_SUCCESS; +} + +/** + * Record previous call instruction addresses + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param GCPtrCall Call address + */ +VMMR3DECL(int) CSAMR3RecordCallAddress(PVM pVM, RTRCPTR GCPtrCall) +{ + Assert(VM_IS_RAW_MODE_ENABLED(pVM)); + for (unsigned i=0;icsam.s.pvCallInstruction);i++) + { + if (pVM->csam.s.pvCallInstruction[i] == GCPtrCall) + return VINF_SUCCESS; + } + + Log(("CSAMR3RecordCallAddress %RRv\n", GCPtrCall)); + + pVM->csam.s.pvCallInstruction[pVM->csam.s.iCallInstruction++] = GCPtrCall; + if (pVM->csam.s.iCallInstruction >= RT_ELEMENTS(pVM->csam.s.pvCallInstruction)) + pVM->csam.s.iCallInstruction = 0; + + return VINF_SUCCESS; +} + + +/** + * Query CSAM state (enabled/disabled) + * + * @returns true if enabled, false otherwise. + * @param pUVM The user mode VM handle. + */ +VMMR3DECL(bool) CSAMR3IsEnabled(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, false); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, false); + return CSAMIsEnabled(pVM); +} + + +/** + * Enables or disables code scanning. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param fEnabled Whether to enable or disable scanning. + */ +VMMR3DECL(int) CSAMR3SetScanningEnabled(PUVM pUVM, bool fEnabled) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + if (!VM_IS_RAW_MODE_ENABLED(pVM)) + { + Assert(!pVM->fCSAMEnabled); + return VINF_SUCCESS; + } + + int rc; + if (fEnabled) + rc = CSAMEnableScanning(pVM); + else + rc = CSAMDisableScanning(pVM); + return rc; +} + + +#ifdef VBOX_WITH_DEBUGGER + +/** + * @callback_method_impl{FNDBGCCMD, The '.csamoff' command.} + */ +static DECLCALLBACK(int) csamr3CmdOff(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PUVM pUVM, PCDBGCVAR paArgs, unsigned cArgs) +{ + DBGC_CMDHLP_REQ_UVM_RET(pCmdHlp, pCmd, pUVM); + NOREF(cArgs); NOREF(paArgs); + + if (HMR3IsEnabled(pUVM)) + return DBGCCmdHlpPrintf(pCmdHlp, "CSAM is permanently disabled by HM.\n"); + + int rc = CSAMR3SetScanningEnabled(pUVM, false); + if (RT_FAILURE(rc)) + return DBGCCmdHlpFailRc(pCmdHlp, pCmd, rc, "CSAMR3SetScanningEnabled"); + return DBGCCmdHlpPrintf(pCmdHlp, "CSAM Scanning disabled\n"); +} + +/** + * @callback_method_impl{FNDBGCCMD, The '.csamon' command.} + */ +static DECLCALLBACK(int) csamr3CmdOn(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PUVM pUVM, PCDBGCVAR paArgs, unsigned cArgs) +{ + DBGC_CMDHLP_REQ_UVM_RET(pCmdHlp, pCmd, pUVM); + NOREF(cArgs); NOREF(paArgs); + + if (HMR3IsEnabled(pUVM)) + return DBGCCmdHlpPrintf(pCmdHlp, "CSAM is permanently disabled by HM.\n"); + + int rc = CSAMR3SetScanningEnabled(pUVM, true); + if (RT_FAILURE(rc)) + return DBGCCmdHlpFailRc(pCmdHlp, pCmd, rc, "CSAMR3SetScanningEnabled"); + return DBGCCmdHlpPrintf(pCmdHlp, "CSAM Scanning enabled\n"); +} + +#endif /* VBOX_WITH_DEBUGGER */ diff --git a/src/VBox/VMM/VMMR3/DBGF.cpp b/src/VBox/VMM/VMMR3/DBGF.cpp new file mode 100644 index 00000000..f3d9aa25 --- /dev/null +++ b/src/VBox/VMM/VMMR3/DBGF.cpp @@ -0,0 +1,2119 @@ +/* $Id: DBGF.cpp $ */ +/** @file + * DBGF - Debugger Facility. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/** @page pg_dbgf DBGF - The Debugger Facility + * + * The purpose of the DBGF is to provide an interface for debuggers to + * manipulate the VMM without having to mess up the source code for each of + * them. The DBGF is always built in and will always work when a debugger + * attaches to the VM. The DBGF provides the basic debugger features, such as + * halting execution, handling breakpoints, single step execution, instruction + * disassembly, info querying, OS specific diggers, symbol and module + * management. + * + * The interface is working in a manner similar to the win32, linux and os2 + * debugger interfaces. The interface has an asynchronous nature. This comes + * from the fact that the VMM and the Debugger are running in different threads. + * They are referred to as the "emulation thread" and the "debugger thread", or + * as the "ping thread" and the "pong thread, respectivly. (The last set of + * names comes from the use of the Ping-Pong synchronization construct from the + * RTSem API.) + * + * @see grp_dbgf + * + * + * @section sec_dbgf_scenario Usage Scenario + * + * The debugger starts by attaching to the VM. For practical reasons we limit the + * number of concurrently attached debuggers to 1 per VM. The action of + * attaching to the VM causes the VM to check and generate debug events. + * + * The debugger then will wait/poll for debug events and issue commands. + * + * The waiting and polling is done by the DBGFEventWait() function. It will wait + * for the emulation thread to send a ping, thus indicating that there is an + * event waiting to be processed. + * + * An event can be a response to a command issued previously, the hitting of a + * breakpoint, or running into a bad/fatal VMM condition. The debugger now has + * the ping and must respond to the event at hand - the VMM is waiting. This + * usually means that the user of the debugger must do something, but it doesn't + * have to. The debugger is free to call any DBGF function (nearly at least) + * while processing the event. + * + * Typically the user will issue a request for the execution to be resumed, so + * the debugger calls DBGFResume() and goes back to waiting/polling for events. + * + * When the user eventually terminates the debugging session or selects another + * VM, the debugger detaches from the VM. This means that breakpoints are + * disabled and that the emulation thread no longer polls for debugger commands. + * + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DBGF +#include +#include +#ifdef VBOX_WITH_REM +# include +#endif +#include +#include +#include "DBGFInternal.h" +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * Instruction type returned by dbgfStepGetCurInstrType. + */ +typedef enum DBGFSTEPINSTRTYPE +{ + DBGFSTEPINSTRTYPE_INVALID = 0, + DBGFSTEPINSTRTYPE_OTHER, + DBGFSTEPINSTRTYPE_RET, + DBGFSTEPINSTRTYPE_CALL, + DBGFSTEPINSTRTYPE_END, + DBGFSTEPINSTRTYPE_32BIT_HACK = 0x7fffffff +} DBGFSTEPINSTRTYPE; + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static int dbgfR3VMMWait(PVM pVM); +static int dbgfR3VMMCmd(PVM pVM, DBGFCMD enmCmd, PDBGFCMDDATA pCmdData, bool *pfResumeExecution); +static DECLCALLBACK(int) dbgfR3Attach(PVM pVM); +static DBGFSTEPINSTRTYPE dbgfStepGetCurInstrType(PVM pVM, PVMCPU pVCpu); +static bool dbgfStepAreWeThereYet(PVM pVM, PVMCPU pVCpu); + + +/** + * Sets the VMM Debug Command variable. + * + * @returns Previous command. + * @param pVM The cross context VM structure. + * @param enmCmd The command. + */ +DECLINLINE(DBGFCMD) dbgfR3SetCmd(PVM pVM, DBGFCMD enmCmd) +{ + DBGFCMD rc; + if (enmCmd == DBGFCMD_NO_COMMAND) + { + Log2(("DBGF: Setting command to %d (DBGFCMD_NO_COMMAND)\n", enmCmd)); + rc = (DBGFCMD)ASMAtomicXchgU32((uint32_t volatile *)(void *)&pVM->dbgf.s.enmVMMCmd, enmCmd); + VM_FF_CLEAR(pVM, VM_FF_DBGF); + } + else + { + Log2(("DBGF: Setting command to %d\n", enmCmd)); + AssertMsg(pVM->dbgf.s.enmVMMCmd == DBGFCMD_NO_COMMAND, ("enmCmd=%d enmVMMCmd=%d\n", enmCmd, pVM->dbgf.s.enmVMMCmd)); + rc = (DBGFCMD)ASMAtomicXchgU32((uint32_t volatile *)(void *)&pVM->dbgf.s.enmVMMCmd, enmCmd); + VM_FF_SET(pVM, VM_FF_DBGF); + VMR3NotifyGlobalFFU(pVM->pUVM, 0 /* didn't notify REM */); + } + return rc; +} + + +/** + * Initializes the DBGF. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) DBGFR3Init(PVM pVM) +{ + PUVM pUVM = pVM->pUVM; + AssertCompile(sizeof(pUVM->dbgf.s) <= sizeof(pUVM->dbgf.padding)); + AssertCompile(sizeof(pUVM->aCpus[0].dbgf.s) <= sizeof(pUVM->aCpus[0].dbgf.padding)); + + pVM->dbgf.s.SteppingFilter.idCpu = NIL_VMCPUID; + + /* + * The usual sideways mountain climbing style of init: + */ + int rc = dbgfR3InfoInit(pUVM); /* (First, initalizes the shared critical section.) */ + if (RT_SUCCESS(rc)) + { + rc = dbgfR3TraceInit(pVM); + if (RT_SUCCESS(rc)) + { + rc = dbgfR3RegInit(pUVM); + if (RT_SUCCESS(rc)) + { + rc = dbgfR3AsInit(pUVM); + if (RT_SUCCESS(rc)) + { + rc = dbgfR3BpInit(pVM); + if (RT_SUCCESS(rc)) + { + rc = dbgfR3OSInit(pUVM); + if (RT_SUCCESS(rc)) + { + rc = dbgfR3PlugInInit(pUVM); + if (RT_SUCCESS(rc)) + { + rc = dbgfR3BugCheckInit(pVM); + if (RT_SUCCESS(rc)) + { + return VINF_SUCCESS; + } + dbgfR3PlugInTerm(pUVM); + } + dbgfR3OSTermPart1(pUVM); + dbgfR3OSTermPart2(pUVM); + } + } + dbgfR3AsTerm(pUVM); + } + dbgfR3RegTerm(pUVM); + } + dbgfR3TraceTerm(pVM); + } + dbgfR3InfoTerm(pUVM); + } + return rc; +} + + +/** + * Terminates and cleans up resources allocated by the DBGF. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) DBGFR3Term(PVM pVM) +{ + PUVM pUVM = pVM->pUVM; + + dbgfR3OSTermPart1(pUVM); + dbgfR3PlugInTerm(pUVM); + dbgfR3OSTermPart2(pUVM); + dbgfR3AsTerm(pUVM); + dbgfR3RegTerm(pUVM); + dbgfR3TraceTerm(pVM); + dbgfR3InfoTerm(pUVM); + + return VINF_SUCCESS; +} + + +/** + * Called when the VM is powered off to detach debuggers. + * + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(void) DBGFR3PowerOff(PVM pVM) +{ + + /* + * Send a termination event to any attached debugger. + */ + /* wait to become the speaker (we should already be that). */ + if ( pVM->dbgf.s.fAttached + && RTSemPingShouldWait(&pVM->dbgf.s.PingPong)) + RTSemPingWait(&pVM->dbgf.s.PingPong, 5000); + + if (pVM->dbgf.s.fAttached) + { + /* Just mark it as detached if we're not in a position to send a power + off event. It should fail later on. */ + if (!RTSemPingIsSpeaker(&pVM->dbgf.s.PingPong)) + { + ASMAtomicWriteBool(&pVM->dbgf.s.fAttached, false); + if (RTSemPingIsSpeaker(&pVM->dbgf.s.PingPong)) + ASMAtomicWriteBool(&pVM->dbgf.s.fAttached, true); + } + + if (RTSemPingIsSpeaker(&pVM->dbgf.s.PingPong)) + { + /* Try send the power off event. */ + int rc; + DBGFCMD enmCmd = dbgfR3SetCmd(pVM, DBGFCMD_NO_COMMAND); + if (enmCmd == DBGFCMD_DETACH_DEBUGGER) + /* the debugger beat us to initiating the detaching. */ + rc = VINF_SUCCESS; + else + { + /* ignore the command (if any). */ + enmCmd = DBGFCMD_NO_COMMAND; + pVM->dbgf.s.DbgEvent.enmType = DBGFEVENT_POWERING_OFF; + pVM->dbgf.s.DbgEvent.enmCtx = DBGFEVENTCTX_OTHER; + rc = RTSemPing(&pVM->dbgf.s.PingPong); + } + + /* + * Process commands and priority requests until we get a command + * indicating that the debugger has detached. + */ + uint32_t cPollHack = 1; + PVMCPU pVCpu = VMMGetCpu(pVM); + while (RT_SUCCESS(rc)) + { + if (enmCmd != DBGFCMD_NO_COMMAND) + { + /* process command */ + bool fResumeExecution; + DBGFCMDDATA CmdData = pVM->dbgf.s.VMMCmdData; + rc = dbgfR3VMMCmd(pVM, enmCmd, &CmdData, &fResumeExecution); + if (enmCmd == DBGFCMD_DETACHED_DEBUGGER) + break; + enmCmd = DBGFCMD_NO_COMMAND; + } + else + { + /* Wait for new command, processing pending priority requests + first. The request processing is a bit crazy, but + unfortunately required by plugin unloading. */ + if ( VM_FF_IS_SET(pVM, VM_FF_REQUEST) + || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_REQUEST)) + { + LogFlow(("DBGFR3PowerOff: Processes priority requests...\n")); + rc = VMR3ReqProcessU(pVM->pUVM, VMCPUID_ANY, true /*fPriorityOnly*/); + if (rc == VINF_SUCCESS) + rc = VMR3ReqProcessU(pVM->pUVM, pVCpu->idCpu, true /*fPriorityOnly*/); + LogFlow(("DBGFR3PowerOff: VMR3ReqProcess -> %Rrc\n", rc)); + cPollHack = 1; + } + /* Need to handle rendezvous too, for generic debug event management. */ + else if (VM_FF_IS_SET(pVM, VM_FF_EMT_RENDEZVOUS)) + { + rc = VMMR3EmtRendezvousFF(pVM, pVCpu); + AssertLogRel(rc == VINF_SUCCESS); + cPollHack = 1; + } + else if (cPollHack < 120) + cPollHack++; + + rc = RTSemPingWait(&pVM->dbgf.s.PingPong, cPollHack); + if (RT_SUCCESS(rc)) + enmCmd = dbgfR3SetCmd(pVM, DBGFCMD_NO_COMMAND); + else if (rc == VERR_TIMEOUT) + rc = VINF_SUCCESS; + } + } + + /* + * Clear the FF so we won't get confused later on. + */ + VM_FF_CLEAR(pVM, VM_FF_DBGF); + } + } +} + + +/** + * Applies relocations to data and code managed by this + * component. This function will be called at init and + * whenever the VMM need to relocate it self inside the GC. + * + * @param pVM The cross context VM structure. + * @param offDelta Relocation delta relative to old location. + */ +VMMR3_INT_DECL(void) DBGFR3Relocate(PVM pVM, RTGCINTPTR offDelta) +{ + dbgfR3TraceRelocate(pVM); + dbgfR3AsRelocate(pVM->pUVM, offDelta); +} + + +/** + * Waits a little while for a debuggger to attach. + * + * @returns True is a debugger have attached. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context per CPU structure. + * @param enmEvent Event. + * + * @thread EMT(pVCpu) + */ +bool dbgfR3WaitForAttach(PVM pVM, PVMCPU pVCpu, DBGFEVENTTYPE enmEvent) +{ + /* + * First a message. + */ +#ifndef RT_OS_L4 + +# if !defined(DEBUG) || defined(DEBUG_sandervl) || defined(DEBUG_frank) + int cWait = 10; +# else + int cWait = !VM_IS_RAW_MODE_ENABLED(pVM) + && ( enmEvent == DBGFEVENT_ASSERTION_HYPER + || enmEvent == DBGFEVENT_FATAL_ERROR) + && !RTEnvExist("VBOX_DBGF_WAIT_FOR_ATTACH") + ? 10 + : 150; +# endif + RTStrmPrintf(g_pStdErr, "DBGF: No debugger attached, waiting %d second%s for one to attach (event=%d)\n", + cWait / 10, cWait != 10 ? "s" : "", enmEvent); + RTStrmFlush(g_pStdErr); + while (cWait > 0) + { + RTThreadSleep(100); + if (pVM->dbgf.s.fAttached) + { + RTStrmPrintf(g_pStdErr, "Attached!\n"); + RTStrmFlush(g_pStdErr); + return true; + } + + /* Process priority stuff. */ + if ( VM_FF_IS_SET(pVM, VM_FF_REQUEST) + || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_REQUEST)) + { + int rc = VMR3ReqProcessU(pVM->pUVM, VMCPUID_ANY, true /*fPriorityOnly*/); + if (rc == VINF_SUCCESS) + rc = VMR3ReqProcessU(pVM->pUVM, pVCpu->idCpu, true /*fPriorityOnly*/); + if (rc != VINF_SUCCESS) + { + RTStrmPrintf(g_pStdErr, "[rcReq=%Rrc, ignored!]", rc); + RTStrmFlush(g_pStdErr); + } + } + + /* next */ + if (!(cWait % 10)) + { + RTStrmPrintf(g_pStdErr, "%d.", cWait / 10); + RTStrmFlush(g_pStdErr); + } + cWait--; + } +#endif + + RTStrmPrintf(g_pStdErr, "Stopping the VM!\n"); + RTStrmFlush(g_pStdErr); + return false; +} + + +/** + * Forced action callback. + * + * The VMM will call this from it's main loop when either VM_FF_DBGF or + * VMCPU_FF_DBGF are set. + * + * The function checks for and executes pending commands from the debugger. + * Then it checks for pending debug events and serves these. + * + * @returns VINF_SUCCESS normally. + * @returns VERR_DBGF_RAISE_FATAL_ERROR to pretend a fatal error happened. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context per CPU structure. + */ +VMMR3_INT_DECL(int) DBGFR3VMMForcedAction(PVM pVM, PVMCPU pVCpu) +{ + VBOXSTRICTRC rcStrict = VINF_SUCCESS; + + if (VM_FF_TEST_AND_CLEAR(pVM, VM_FF_DBGF)) + { + /* + * Command pending? Process it. + */ + if (pVM->dbgf.s.enmVMMCmd != DBGFCMD_NO_COMMAND) + { + bool fResumeExecution; + DBGFCMDDATA CmdData = pVM->dbgf.s.VMMCmdData; + DBGFCMD enmCmd = dbgfR3SetCmd(pVM, DBGFCMD_NO_COMMAND); + rcStrict = dbgfR3VMMCmd(pVM, enmCmd, &CmdData, &fResumeExecution); + if (!fResumeExecution) + rcStrict = dbgfR3VMMWait(pVM); + } + } + + /* + * Dispatch pending events. + */ + if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_DBGF)) + { + if ( pVCpu->dbgf.s.cEvents > 0 + && pVCpu->dbgf.s.aEvents[pVCpu->dbgf.s.cEvents - 1].enmState == DBGFEVENTSTATE_CURRENT) + { + VBOXSTRICTRC rcStrict2 = DBGFR3EventHandlePending(pVM, pVCpu); + if ( rcStrict2 != VINF_SUCCESS + && ( rcStrict == VINF_SUCCESS + || RT_FAILURE(rcStrict2) + || rcStrict2 < rcStrict) ) /** @todo oversimplified? */ + rcStrict = rcStrict2; + } + } + + return VBOXSTRICTRC_TODO(rcStrict); +} + + +/** + * Flag whether the event implies that we're stopped in the hypervisor code + * and have to block certain operations. + * + * @param pVM The cross context VM structure. + * @param enmEvent The event. + */ +static void dbgfR3EventSetStoppedInHyperFlag(PVM pVM, DBGFEVENTTYPE enmEvent) +{ + switch (enmEvent) + { + case DBGFEVENT_STEPPED_HYPER: + case DBGFEVENT_ASSERTION_HYPER: + case DBGFEVENT_BREAKPOINT_HYPER: + pVM->dbgf.s.fStoppedInHyper = true; + break; + default: + pVM->dbgf.s.fStoppedInHyper = false; + break; + } +} + + +/** + * Try to determine the event context. + * + * @returns debug event context. + * @param pVM The cross context VM structure. + */ +static DBGFEVENTCTX dbgfR3FigureEventCtx(PVM pVM) +{ + /** @todo SMP support! */ + PVMCPU pVCpu = &pVM->aCpus[0]; + + switch (EMGetState(pVCpu)) + { + case EMSTATE_RAW: + case EMSTATE_DEBUG_GUEST_RAW: + return DBGFEVENTCTX_RAW; + + case EMSTATE_REM: + case EMSTATE_DEBUG_GUEST_REM: + return DBGFEVENTCTX_REM; + + case EMSTATE_DEBUG_HYPER: + case EMSTATE_GURU_MEDITATION: + return DBGFEVENTCTX_HYPER; + + default: + return DBGFEVENTCTX_OTHER; + } +} + +/** + * The common event prologue code. + * It will set the 'stopped-in-hyper' flag, make sure someone is attached, + * and perhaps process any high priority pending actions (none yet). + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param enmEvent The event to be sent. + */ +static int dbgfR3EventPrologue(PVM pVM, DBGFEVENTTYPE enmEvent) +{ + /** @todo SMP */ + PVMCPU pVCpu = VMMGetCpu(pVM); + + /* + * Check if a debugger is attached. + */ + if ( !pVM->dbgf.s.fAttached + && !dbgfR3WaitForAttach(pVM, pVCpu, enmEvent)) + { + Log(("DBGFR3VMMEventSrc: enmEvent=%d - debugger not attached\n", enmEvent)); + return VERR_DBGF_NOT_ATTACHED; + } + + /* + * Sync back the state from the REM. + */ + dbgfR3EventSetStoppedInHyperFlag(pVM, enmEvent); +#ifdef VBOX_WITH_REM + if (!pVM->dbgf.s.fStoppedInHyper) + REMR3StateUpdate(pVM, pVCpu); +#endif + + /* + * Look thru pending commands and finish those which make sense now. + */ + /** @todo Process/purge pending commands. */ + //int rc = DBGFR3VMMForcedAction(pVM); + return VINF_SUCCESS; +} + + +/** + * Sends the event in the event buffer. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +static int dbgfR3SendEvent(PVM pVM) +{ + pVM->dbgf.s.SteppingFilter.idCpu = NIL_VMCPUID; + + int rc = RTSemPing(&pVM->dbgf.s.PingPong); + if (RT_SUCCESS(rc)) + rc = dbgfR3VMMWait(pVM); + + pVM->dbgf.s.fStoppedInHyper = false; + /** @todo sync VMM -> REM after exitting the debugger. everything may change while in the debugger! */ + return rc; +} + + +/** + * Processes a pending event on the current CPU. + * + * This is called by EM in response to VINF_EM_DBG_EVENT. + * + * @returns Strict VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context per CPU structure. + * + * @thread EMT(pVCpu) + */ +VMMR3_INT_DECL(VBOXSTRICTRC) DBGFR3EventHandlePending(PVM pVM, PVMCPU pVCpu) +{ + VMCPU_ASSERT_EMT(pVCpu); + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_DBGF); + + /* + * Check that we've got an event first. + */ + AssertReturn(pVCpu->dbgf.s.cEvents > 0, VINF_SUCCESS); + AssertReturn(pVCpu->dbgf.s.aEvents[pVCpu->dbgf.s.cEvents - 1].enmState == DBGFEVENTSTATE_CURRENT, VINF_SUCCESS); + PDBGFEVENT pEvent = &pVCpu->dbgf.s.aEvents[pVCpu->dbgf.s.cEvents - 1].Event; + + /* + * Make sure we've got a debugger and is allowed to speak to it. + */ + int rc = dbgfR3EventPrologue(pVM, pEvent->enmType); + if (RT_FAILURE(rc)) + { + /** @todo drop them events? */ + return rc; + } + +/** @todo SMP + debugger speaker logic */ + /* + * Copy the event over and mark it as ignore. + */ + pVM->dbgf.s.DbgEvent = *pEvent; + pVCpu->dbgf.s.aEvents[pVCpu->dbgf.s.cEvents - 1].enmState = DBGFEVENTSTATE_IGNORE; + return dbgfR3SendEvent(pVM); +} + + +/** + * Send a generic debugger event which takes no data. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param enmEvent The event to send. + * @internal + */ +VMMR3DECL(int) DBGFR3Event(PVM pVM, DBGFEVENTTYPE enmEvent) +{ + /* + * Do stepping filtering. + */ + /** @todo Would be better if we did some of this inside the execution + * engines. */ + if ( enmEvent == DBGFEVENT_STEPPED + || enmEvent == DBGFEVENT_STEPPED_HYPER) + { + if (!dbgfStepAreWeThereYet(pVM, VMMGetCpu(pVM))) + return VINF_EM_DBG_STEP; + } + + int rc = dbgfR3EventPrologue(pVM, enmEvent); + if (RT_FAILURE(rc)) + return rc; + + /* + * Send the event and process the reply communication. + */ + pVM->dbgf.s.DbgEvent.enmType = enmEvent; + pVM->dbgf.s.DbgEvent.enmCtx = dbgfR3FigureEventCtx(pVM); + return dbgfR3SendEvent(pVM); +} + + +/** + * Send a debugger event which takes the full source file location. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param enmEvent The event to send. + * @param pszFile Source file. + * @param uLine Line number in source file. + * @param pszFunction Function name. + * @param pszFormat Message which accompanies the event. + * @param ... Message arguments. + * @internal + */ +VMMR3DECL(int) DBGFR3EventSrc(PVM pVM, DBGFEVENTTYPE enmEvent, const char *pszFile, unsigned uLine, const char *pszFunction, const char *pszFormat, ...) +{ + va_list args; + va_start(args, pszFormat); + int rc = DBGFR3EventSrcV(pVM, enmEvent, pszFile, uLine, pszFunction, pszFormat, args); + va_end(args); + return rc; +} + + +/** + * Send a debugger event which takes the full source file location. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param enmEvent The event to send. + * @param pszFile Source file. + * @param uLine Line number in source file. + * @param pszFunction Function name. + * @param pszFormat Message which accompanies the event. + * @param args Message arguments. + * @internal + */ +VMMR3DECL(int) DBGFR3EventSrcV(PVM pVM, DBGFEVENTTYPE enmEvent, const char *pszFile, unsigned uLine, const char *pszFunction, const char *pszFormat, va_list args) +{ + int rc = dbgfR3EventPrologue(pVM, enmEvent); + if (RT_FAILURE(rc)) + return rc; + + /* + * Format the message. + */ + char *pszMessage = NULL; + char szMessage[8192]; + if (pszFormat && *pszFormat) + { + pszMessage = &szMessage[0]; + RTStrPrintfV(szMessage, sizeof(szMessage), pszFormat, args); + } + + /* + * Send the event and process the reply communication. + */ + pVM->dbgf.s.DbgEvent.enmType = enmEvent; + pVM->dbgf.s.DbgEvent.enmCtx = dbgfR3FigureEventCtx(pVM); + pVM->dbgf.s.DbgEvent.u.Src.pszFile = pszFile; + pVM->dbgf.s.DbgEvent.u.Src.uLine = uLine; + pVM->dbgf.s.DbgEvent.u.Src.pszFunction = pszFunction; + pVM->dbgf.s.DbgEvent.u.Src.pszMessage = pszMessage; + return dbgfR3SendEvent(pVM); +} + + +/** + * Send a debugger event which takes the two assertion messages. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param enmEvent The event to send. + * @param pszMsg1 First assertion message. + * @param pszMsg2 Second assertion message. + */ +VMMR3_INT_DECL(int) DBGFR3EventAssertion(PVM pVM, DBGFEVENTTYPE enmEvent, const char *pszMsg1, const char *pszMsg2) +{ + int rc = dbgfR3EventPrologue(pVM, enmEvent); + if (RT_FAILURE(rc)) + return rc; + + /* + * Send the event and process the reply communication. + */ + pVM->dbgf.s.DbgEvent.enmType = enmEvent; + pVM->dbgf.s.DbgEvent.enmCtx = dbgfR3FigureEventCtx(pVM); + pVM->dbgf.s.DbgEvent.u.Assert.pszMsg1 = pszMsg1; + pVM->dbgf.s.DbgEvent.u.Assert.pszMsg2 = pszMsg2; + return dbgfR3SendEvent(pVM); +} + + +/** + * Breakpoint was hit somewhere. + * Figure out which breakpoint it is and notify the debugger. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param enmEvent DBGFEVENT_BREAKPOINT_HYPER or DBGFEVENT_BREAKPOINT. + */ +VMMR3_INT_DECL(int) DBGFR3EventBreakpoint(PVM pVM, DBGFEVENTTYPE enmEvent) +{ + int rc = dbgfR3EventPrologue(pVM, enmEvent); + if (RT_FAILURE(rc)) + return rc; + + /* + * Send the event and process the reply communication. + */ + /** @todo SMP */ + PVMCPU pVCpu = VMMGetCpu0(pVM); + + pVM->dbgf.s.DbgEvent.enmType = enmEvent; + RTUINT iBp = pVM->dbgf.s.DbgEvent.u.Bp.iBp = pVCpu->dbgf.s.iActiveBp; + pVCpu->dbgf.s.iActiveBp = ~0U; + if (iBp != ~0U) + pVM->dbgf.s.DbgEvent.enmCtx = DBGFEVENTCTX_RAW; + else + { + /* REM breakpoints has be been searched for. */ +#if 0 /** @todo get flat PC api! */ + uint32_t eip = CPUMGetGuestEIP(pVM); +#else + /** @todo SMP support!! */ + PCPUMCTX pCtx = CPUMQueryGuestCtxPtr(VMMGetCpu(pVM)); + RTGCPTR eip = pCtx->rip + pCtx->cs.u64Base; +#endif + for (size_t i = 0; i < RT_ELEMENTS(pVM->dbgf.s.aBreakpoints); i++) + if ( pVM->dbgf.s.aBreakpoints[i].enmType == DBGFBPTYPE_REM + && pVM->dbgf.s.aBreakpoints[i].u.Rem.GCPtr == eip) + { + pVM->dbgf.s.DbgEvent.u.Bp.iBp = pVM->dbgf.s.aBreakpoints[i].iBp; + break; + } + AssertMsg(pVM->dbgf.s.DbgEvent.u.Bp.iBp != ~0U, ("eip=%08x\n", eip)); + pVM->dbgf.s.DbgEvent.enmCtx = DBGFEVENTCTX_REM; + } + return dbgfR3SendEvent(pVM); +} + + +/** + * Waits for the debugger to respond. + * + * @returns VBox status code. (clearify) + * @param pVM The cross context VM structure. + */ +static int dbgfR3VMMWait(PVM pVM) +{ + PVMCPU pVCpu = VMMGetCpu(pVM); + + LogFlow(("dbgfR3VMMWait:\n")); + int rcRet = VINF_SUCCESS; + + /* + * Waits for the debugger to reply (i.e. issue an command). + */ + for (;;) + { + /* + * Wait. + */ + uint32_t cPollHack = 1; /** @todo this interface is horrible now that we're using lots of VMR3ReqCall stuff all over DBGF. */ + for (;;) + { + int rc; + if ( !VM_FF_IS_ANY_SET(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_REQUEST) + && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_REQUEST)) + { + rc = RTSemPingWait(&pVM->dbgf.s.PingPong, cPollHack); + if (RT_SUCCESS(rc)) + break; + if (rc != VERR_TIMEOUT) + { + LogFlow(("dbgfR3VMMWait: returns %Rrc\n", rc)); + return rc; + } + } + + if (VM_FF_IS_SET(pVM, VM_FF_EMT_RENDEZVOUS)) + { + rc = VMMR3EmtRendezvousFF(pVM, pVCpu); + cPollHack = 1; + } + else if ( VM_FF_IS_SET(pVM, VM_FF_REQUEST) + || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_REQUEST)) + { + LogFlow(("dbgfR3VMMWait: Processes requests...\n")); + rc = VMR3ReqProcessU(pVM->pUVM, VMCPUID_ANY, false /*fPriorityOnly*/); + if (rc == VINF_SUCCESS) + rc = VMR3ReqProcessU(pVM->pUVM, pVCpu->idCpu, false /*fPriorityOnly*/); + LogFlow(("dbgfR3VMMWait: VMR3ReqProcess -> %Rrc rcRet=%Rrc\n", rc, rcRet)); + cPollHack = 1; + } + else + { + rc = VINF_SUCCESS; + if (cPollHack < 120) + cPollHack++; + } + + if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST) + { + switch (rc) + { + case VINF_EM_DBG_BREAKPOINT: + case VINF_EM_DBG_STEPPED: + case VINF_EM_DBG_STEP: + case VINF_EM_DBG_STOP: + case VINF_EM_DBG_EVENT: + AssertMsgFailed(("rc=%Rrc\n", rc)); + break; + + /* return straight away */ + case VINF_EM_TERMINATE: + case VINF_EM_OFF: + LogFlow(("dbgfR3VMMWait: returns %Rrc\n", rc)); + return rc; + + /* remember return code. */ + default: + AssertReleaseMsgFailed(("rc=%Rrc is not in the switch!\n", rc)); + RT_FALL_THRU(); + case VINF_EM_RESET: + case VINF_EM_SUSPEND: + case VINF_EM_HALT: + case VINF_EM_RESUME: + case VINF_EM_RESCHEDULE: + case VINF_EM_RESCHEDULE_REM: + case VINF_EM_RESCHEDULE_RAW: + if (rc < rcRet || rcRet == VINF_SUCCESS) + rcRet = rc; + break; + } + } + else if (RT_FAILURE(rc)) + { + LogFlow(("dbgfR3VMMWait: returns %Rrc\n", rc)); + return rc; + } + } + + /* + * Process the command. + */ + bool fResumeExecution; + DBGFCMDDATA CmdData = pVM->dbgf.s.VMMCmdData; + DBGFCMD enmCmd = dbgfR3SetCmd(pVM, DBGFCMD_NO_COMMAND); + int rc = dbgfR3VMMCmd(pVM, enmCmd, &CmdData, &fResumeExecution); + if (fResumeExecution) + { + if (RT_FAILURE(rc)) + rcRet = rc; + else if ( rc >= VINF_EM_FIRST + && rc <= VINF_EM_LAST + && (rc < rcRet || rcRet == VINF_SUCCESS)) + rcRet = rc; + LogFlow(("dbgfR3VMMWait: returns %Rrc\n", rcRet)); + return rcRet; + } + } +} + + +/** + * Executes command from debugger. + * + * The caller is responsible for waiting or resuming execution based on the + * value returned in the *pfResumeExecution indicator. + * + * @returns VBox status code. (clearify!) + * @param pVM The cross context VM structure. + * @param enmCmd The command in question. + * @param pCmdData Pointer to the command data. + * @param pfResumeExecution Where to store the resume execution / continue waiting indicator. + */ +static int dbgfR3VMMCmd(PVM pVM, DBGFCMD enmCmd, PDBGFCMDDATA pCmdData, bool *pfResumeExecution) +{ + bool fSendEvent; + bool fResume; + int rc = VINF_SUCCESS; + + NOREF(pCmdData); /* for later */ + + switch (enmCmd) + { + /* + * Halt is answered by an event say that we've halted. + */ + case DBGFCMD_HALT: + { + pVM->dbgf.s.DbgEvent.enmType = DBGFEVENT_HALT_DONE; + pVM->dbgf.s.DbgEvent.enmCtx = dbgfR3FigureEventCtx(pVM); + fSendEvent = true; + fResume = false; + break; + } + + + /* + * Resume is not answered we'll just resume execution. + */ + case DBGFCMD_GO: + { + /** @todo SMP */ + PVMCPU pVCpu = VMMGetCpu0(pVM); + pVCpu->dbgf.s.fSingleSteppingRaw = false; + fSendEvent = false; + fResume = true; + break; + } + + /** @todo implement (and define) the rest of the commands. */ + + /* + * Disable breakpoints and stuff. + * Send an everythings cool event to the debugger thread and resume execution. + */ + case DBGFCMD_DETACH_DEBUGGER: + { + ASMAtomicWriteBool(&pVM->dbgf.s.fAttached, false); + pVM->dbgf.s.DbgEvent.enmType = DBGFEVENT_DETACH_DONE; + pVM->dbgf.s.DbgEvent.enmCtx = DBGFEVENTCTX_OTHER; + pVM->dbgf.s.SteppingFilter.idCpu = NIL_VMCPUID; + fSendEvent = true; + fResume = true; + break; + } + + /* + * The debugger has detached successfully. + * There is no reply to this event. + */ + case DBGFCMD_DETACHED_DEBUGGER: + { + fSendEvent = false; + fResume = true; + break; + } + + /* + * Single step, with trace into. + */ + case DBGFCMD_SINGLE_STEP: + { + Log2(("Single step\n")); + /** @todo SMP */ + PVMCPU pVCpu = VMMGetCpu0(pVM); + if (pVM->dbgf.s.SteppingFilter.fFlags & DBGF_STEP_F_OVER) + { + if (dbgfStepGetCurInstrType(pVM, pVCpu) == DBGFSTEPINSTRTYPE_CALL) + pVM->dbgf.s.SteppingFilter.uCallDepth++; + } + if (pVM->dbgf.s.SteppingFilter.cMaxSteps > 0) + { + pVCpu->dbgf.s.fSingleSteppingRaw = true; + fSendEvent = false; + fResume = true; + rc = VINF_EM_DBG_STEP; + } + else + { + /* Stop after zero steps. Nonsense, but whatever. */ + pVM->dbgf.s.SteppingFilter.idCpu = NIL_VMCPUID; + pVM->dbgf.s.DbgEvent.enmCtx = dbgfR3FigureEventCtx(pVM); + pVM->dbgf.s.DbgEvent.enmType = pVM->dbgf.s.DbgEvent.enmCtx != DBGFEVENTCTX_HYPER + ? DBGFEVENT_STEPPED : DBGFEVENT_STEPPED_HYPER; + fSendEvent = false; + fResume = false; + } + break; + } + + /* + * Default is to send an invalid command event. + */ + default: + { + pVM->dbgf.s.DbgEvent.enmType = DBGFEVENT_INVALID_COMMAND; + pVM->dbgf.s.DbgEvent.enmCtx = dbgfR3FigureEventCtx(pVM); + fSendEvent = true; + fResume = false; + break; + } + } + + /* + * Send pending event. + */ + if (fSendEvent) + { + Log2(("DBGF: Emulation thread: sending event %d\n", pVM->dbgf.s.DbgEvent.enmType)); + int rc2 = RTSemPing(&pVM->dbgf.s.PingPong); + if (RT_FAILURE(rc2)) + { + AssertRC(rc2); + *pfResumeExecution = true; + return rc2; + } + } + + /* + * Return. + */ + *pfResumeExecution = fResume; + return rc; +} + + +/** + * Attaches a debugger to the specified VM. + * + * Only one debugger at a time. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + */ +VMMR3DECL(int) DBGFR3Attach(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + /* + * Call the VM, use EMT for serialization. + * + * Using a priority call here so we can actually attach a debugger during + * the countdown in dbgfR3WaitForAttach. + */ + /** @todo SMP */ + return VMR3ReqPriorityCallWait(pVM, VMCPUID_ANY, (PFNRT)dbgfR3Attach, 1, pVM); +} + + +/** + * EMT worker for DBGFR3Attach. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +static DECLCALLBACK(int) dbgfR3Attach(PVM pVM) +{ + if (pVM->dbgf.s.fAttached) + { + Log(("dbgR3Attach: Debugger already attached\n")); + return VERR_DBGF_ALREADY_ATTACHED; + } + + /* + * Create the Ping-Pong structure. + */ + int rc = RTSemPingPongInit(&pVM->dbgf.s.PingPong); + AssertRCReturn(rc, rc); + + /* + * Set the attached flag. + */ + ASMAtomicWriteBool(&pVM->dbgf.s.fAttached, true); + return VINF_SUCCESS; +} + + +/** + * Detaches a debugger from the specified VM. + * + * Caller must be attached to the VM. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + */ +VMMR3DECL(int) DBGFR3Detach(PUVM pUVM) +{ + LogFlow(("DBGFR3Detach:\n")); + int rc; + + /* + * Validate input. The UVM handle shall be valid, the VM handle might be + * in the processes of being destroyed already, so deal quietly with that. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + if (!VM_IS_VALID_EXT(pVM)) + return VERR_INVALID_VM_HANDLE; + + /* + * Check if attached. + */ + if (!pVM->dbgf.s.fAttached) + return VERR_DBGF_NOT_ATTACHED; + + /* + * Try send the detach command. + * Keep in mind that we might be racing EMT, so, be extra careful. + */ + DBGFCMD enmCmd = dbgfR3SetCmd(pVM, DBGFCMD_DETACH_DEBUGGER); + if (RTSemPongIsSpeaker(&pVM->dbgf.s.PingPong)) + { + rc = RTSemPong(&pVM->dbgf.s.PingPong); + AssertMsgRCReturn(rc, ("Failed to signal emulation thread. rc=%Rrc\n", rc), rc); + LogRel(("DBGFR3Detach: enmCmd=%d (pong -> ping)\n", enmCmd)); + } + + /* + * Wait for the OK event. + */ + rc = RTSemPongWait(&pVM->dbgf.s.PingPong, RT_INDEFINITE_WAIT); + AssertLogRelMsgRCReturn(rc, ("Wait on detach command failed, rc=%Rrc\n", rc), rc); + + /* + * Send the notification command indicating that we're really done. + */ + enmCmd = dbgfR3SetCmd(pVM, DBGFCMD_DETACHED_DEBUGGER); + rc = RTSemPong(&pVM->dbgf.s.PingPong); + AssertMsgRCReturn(rc, ("Failed to signal emulation thread. rc=%Rrc\n", rc), rc); + + LogFlowFunc(("returns VINF_SUCCESS\n")); + return VINF_SUCCESS; +} + + +/** + * Wait for a debug event. + * + * @returns VBox status code. Will not return VBOX_INTERRUPTED. + * @param pUVM The user mode VM handle. + * @param cMillies Number of millis to wait. + * @param ppEvent Where to store the event pointer. + */ +VMMR3DECL(int) DBGFR3EventWait(PUVM pUVM, RTMSINTERVAL cMillies, PCDBGFEVENT *ppEvent) +{ + /* + * Check state. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertReturn(pVM->dbgf.s.fAttached, VERR_DBGF_NOT_ATTACHED); + *ppEvent = NULL; + + /* + * Wait. + */ + int rc = RTSemPongWait(&pVM->dbgf.s.PingPong, cMillies); + if (RT_SUCCESS(rc)) + { + *ppEvent = &pVM->dbgf.s.DbgEvent; + Log2(("DBGF: Debugger thread: receiving event %d\n", (*ppEvent)->enmType)); + return VINF_SUCCESS; + } + + return rc; +} + + +/** + * Halts VM execution. + * + * After calling this the VM isn't actually halted till an DBGFEVENT_HALT_DONE + * arrives. Until that time it's not possible to issue any new commands. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + */ +VMMR3DECL(int) DBGFR3Halt(PUVM pUVM) +{ + /* + * Check state. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertReturn(pVM->dbgf.s.fAttached, VERR_DBGF_NOT_ATTACHED); + RTPINGPONGSPEAKER enmSpeaker = pVM->dbgf.s.PingPong.enmSpeaker; + if ( enmSpeaker == RTPINGPONGSPEAKER_PONG + || enmSpeaker == RTPINGPONGSPEAKER_PONG_SIGNALED) + return VWRN_DBGF_ALREADY_HALTED; + + /* + * Send command. + */ + dbgfR3SetCmd(pVM, DBGFCMD_HALT); + + return VINF_SUCCESS; +} + + +/** + * Checks if the VM is halted by the debugger. + * + * @returns True if halted. + * @returns False if not halted. + * @param pUVM The user mode VM handle. + */ +VMMR3DECL(bool) DBGFR3IsHalted(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, false); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, false); + AssertReturn(pVM->dbgf.s.fAttached, false); + + RTPINGPONGSPEAKER enmSpeaker = pVM->dbgf.s.PingPong.enmSpeaker; + return enmSpeaker == RTPINGPONGSPEAKER_PONG_SIGNALED + || enmSpeaker == RTPINGPONGSPEAKER_PONG; +} + + +/** + * Checks if the debugger can wait for events or not. + * + * This function is only used by lazy, multiplexing debuggers. :-) + * + * @returns VBox status code. + * @retval VINF_SUCCESS if waitable. + * @retval VERR_SEM_OUT_OF_TURN if not waitable. + * @retval VERR_INVALID_VM_HANDLE if the VM is being (/ has been) destroyed + * (not asserted) or if the handle is invalid (asserted). + * @retval VERR_DBGF_NOT_ATTACHED if not attached. + * + * @param pUVM The user mode VM handle. + */ +VMMR3DECL(int) DBGFR3QueryWaitable(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + + /* Note! There is a slight race here, unfortunately. */ + PVM pVM = pUVM->pVM; + if (!RT_VALID_PTR(pVM)) + return VERR_INVALID_VM_HANDLE; + if (pVM->enmVMState >= VMSTATE_DESTROYING) + return VERR_INVALID_VM_HANDLE; + if (!pVM->dbgf.s.fAttached) + return VERR_DBGF_NOT_ATTACHED; + + if (!RTSemPongShouldWait(&pVM->dbgf.s.PingPong)) + return VERR_SEM_OUT_OF_TURN; + + return VINF_SUCCESS; +} + + +/** + * Resumes VM execution. + * + * There is no receipt event on this command. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + */ +VMMR3DECL(int) DBGFR3Resume(PUVM pUVM) +{ + /* + * Check state. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertReturn(pVM->dbgf.s.fAttached, VERR_DBGF_NOT_ATTACHED); + if (RT_LIKELY(RTSemPongIsSpeaker(&pVM->dbgf.s.PingPong))) + { /* likely */ } + else + return VERR_SEM_OUT_OF_TURN; + + /* + * Send the ping back to the emulation thread telling it to run. + */ + dbgfR3SetCmd(pVM, DBGFCMD_GO); + int rc = RTSemPong(&pVM->dbgf.s.PingPong); + AssertRC(rc); + + return rc; +} + + +/** + * Classifies the current instruction. + * + * @returns Type of instruction. + * @param pVM The cross context VM structure. + * @param pVCpu The current CPU. + * @thread EMT(pVCpu) + */ +static DBGFSTEPINSTRTYPE dbgfStepGetCurInstrType(PVM pVM, PVMCPU pVCpu) +{ + /* + * Read the instruction. + */ + bool fIsHyper = dbgfR3FigureEventCtx(pVM) == DBGFEVENTCTX_HYPER; + size_t cbRead = 0; + uint8_t abOpcode[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + int rc = PGMR3DbgReadGCPtr(pVM, abOpcode, !fIsHyper ? CPUMGetGuestFlatPC(pVCpu) : CPUMGetHyperRIP(pVCpu), + sizeof(abOpcode) - 1, 0 /*fFlags*/, &cbRead); + if (RT_SUCCESS(rc)) + { + /* + * Do minimal parsing. No real need to involve the disassembler here. + */ + uint8_t *pb = abOpcode; + for (;;) + { + switch (*pb++) + { + default: + return DBGFSTEPINSTRTYPE_OTHER; + + case 0xe8: /* call rel16/32 */ + case 0x9a: /* call farptr */ + case 0xcc: /* int3 */ + case 0xcd: /* int xx */ + // case 0xce: /* into */ + return DBGFSTEPINSTRTYPE_CALL; + + case 0xc2: /* ret xx */ + case 0xc3: /* ret */ + case 0xca: /* retf xx */ + case 0xcb: /* retf */ + case 0xcf: /* iret */ + return DBGFSTEPINSTRTYPE_RET; + + case 0xff: + if ( ((*pb >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) == 2 /* call indir */ + || ((*pb >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) == 3) /* call indir-farptr */ + return DBGFSTEPINSTRTYPE_CALL; + return DBGFSTEPINSTRTYPE_OTHER; + + case 0x0f: + switch (*pb++) + { + case 0x05: /* syscall */ + case 0x34: /* sysenter */ + return DBGFSTEPINSTRTYPE_CALL; + case 0x07: /* sysret */ + case 0x35: /* sysexit */ + return DBGFSTEPINSTRTYPE_RET; + } + break; + + /* Must handle some REX prefixes. So we do all normal prefixes. */ + case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47: + case 0x48: case 0x49: case 0x4a: case 0x4b: case 0x4c: case 0x4d: case 0x4e: case 0x4f: + if (fIsHyper) /* ASSUMES 32-bit raw-mode! */ + return DBGFSTEPINSTRTYPE_OTHER; + if (!CPUMIsGuestIn64BitCode(pVCpu)) + return DBGFSTEPINSTRTYPE_OTHER; + break; + + case 0x2e: /* CS */ + case 0x36: /* SS */ + case 0x3e: /* DS */ + case 0x26: /* ES */ + case 0x64: /* FS */ + case 0x65: /* GS */ + case 0x66: /* op size */ + case 0x67: /* addr size */ + case 0xf0: /* lock */ + case 0xf2: /* REPNZ */ + case 0xf3: /* REPZ */ + break; + } + } + } + + return DBGFSTEPINSTRTYPE_INVALID; +} + + +/** + * Checks if the stepping has reached a stop point. + * + * Called when raising a stepped event. + * + * @returns true if the event should be raised, false if we should take one more + * step first. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context per CPU structure of the calling EMT. + * @thread EMT(pVCpu) + */ +static bool dbgfStepAreWeThereYet(PVM pVM, PVMCPU pVCpu) +{ + /* + * Check valid pVCpu and that it matches the CPU one stepping. + */ + if (pVCpu) + { + if (pVCpu->idCpu == pVM->dbgf.s.SteppingFilter.idCpu) + { + /* + * Increase the number of steps and see if we've reached the max. + */ + pVM->dbgf.s.SteppingFilter.cSteps++; + if (pVM->dbgf.s.SteppingFilter.cSteps < pVM->dbgf.s.SteppingFilter.cMaxSteps) + { + /* + * Check PC and SP address filtering. + */ + if (pVM->dbgf.s.SteppingFilter.fFlags & (DBGF_STEP_F_STOP_ON_ADDRESS | DBGF_STEP_F_STOP_ON_STACK_POP)) + { + bool fIsHyper = dbgfR3FigureEventCtx(pVM) == DBGFEVENTCTX_HYPER; + if ( (pVM->dbgf.s.SteppingFilter.fFlags & DBGF_STEP_F_STOP_ON_ADDRESS) + && pVM->dbgf.s.SteppingFilter.AddrPc == (!fIsHyper ? CPUMGetGuestFlatPC(pVCpu) : CPUMGetHyperRIP(pVCpu))) + return true; + if ( (pVM->dbgf.s.SteppingFilter.fFlags & DBGF_STEP_F_STOP_ON_STACK_POP) + && (!fIsHyper ? CPUMGetGuestFlatSP(pVCpu) : (uint64_t)CPUMGetHyperESP(pVCpu)) + - pVM->dbgf.s.SteppingFilter.AddrStackPop + < pVM->dbgf.s.SteppingFilter.cbStackPop) + return true; + } + + /* + * Do step-over filtering separate from the step-into one. + */ + if (pVM->dbgf.s.SteppingFilter.fFlags & DBGF_STEP_F_OVER) + { + DBGFSTEPINSTRTYPE enmType = dbgfStepGetCurInstrType(pVM, pVCpu); + switch (enmType) + { + default: + if ( pVM->dbgf.s.SteppingFilter.uCallDepth != 0 + || (pVM->dbgf.s.SteppingFilter.fFlags & DBGF_STEP_F_STOP_FILTER_MASK)) + break; + return true; + case DBGFSTEPINSTRTYPE_CALL: + if ( (pVM->dbgf.s.SteppingFilter.fFlags & DBGF_STEP_F_STOP_ON_CALL) + && pVM->dbgf.s.SteppingFilter.uCallDepth == 0) + return true; + pVM->dbgf.s.SteppingFilter.uCallDepth++; + break; + case DBGFSTEPINSTRTYPE_RET: + if (pVM->dbgf.s.SteppingFilter.uCallDepth == 0) + { + if (pVM->dbgf.s.SteppingFilter.fFlags & DBGF_STEP_F_STOP_ON_RET) + return true; + /* If after return, we use the cMaxStep limit to stop the next time. */ + if (pVM->dbgf.s.SteppingFilter.fFlags & DBGF_STEP_F_STOP_AFTER_RET) + pVM->dbgf.s.SteppingFilter.cMaxSteps = pVM->dbgf.s.SteppingFilter.cSteps + 1; + } + else if (pVM->dbgf.s.SteppingFilter.uCallDepth > 0) + pVM->dbgf.s.SteppingFilter.uCallDepth--; + break; + } + return false; + } + /* + * Filtered step-into. + */ + else if ( pVM->dbgf.s.SteppingFilter.fFlags + & (DBGF_STEP_F_STOP_ON_CALL | DBGF_STEP_F_STOP_ON_RET | DBGF_STEP_F_STOP_AFTER_RET)) + { + DBGFSTEPINSTRTYPE enmType = dbgfStepGetCurInstrType(pVM, pVCpu); + switch (enmType) + { + default: + break; + case DBGFSTEPINSTRTYPE_CALL: + if (pVM->dbgf.s.SteppingFilter.fFlags & DBGF_STEP_F_STOP_ON_CALL) + return true; + break; + case DBGFSTEPINSTRTYPE_RET: + if (pVM->dbgf.s.SteppingFilter.fFlags & DBGF_STEP_F_STOP_ON_RET) + return true; + /* If after return, we use the cMaxStep limit to stop the next time. */ + if (pVM->dbgf.s.SteppingFilter.fFlags & DBGF_STEP_F_STOP_AFTER_RET) + pVM->dbgf.s.SteppingFilter.cMaxSteps = pVM->dbgf.s.SteppingFilter.cSteps + 1; + break; + } + return false; + } + } + } + } + + return true; +} + + +/** + * Step Into. + * + * A single step event is generated from this command. + * The current implementation is not reliable, so don't rely on the event coming. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param idCpu The ID of the CPU to single step on. + */ +VMMR3DECL(int) DBGFR3Step(PUVM pUVM, VMCPUID idCpu) +{ + return DBGFR3StepEx(pUVM, idCpu, DBGF_STEP_F_INTO, NULL, NULL, 0, 1); +} + + +/** + * Full fleged step. + * + * This extended stepping API allows for doing multiple steps before raising an + * event, helping implementing step over, step out and other more advanced + * features. + * + * Like the DBGFR3Step() API, this will normally generate a DBGFEVENT_STEPPED or + * DBGFEVENT_STEPPED_EVENT. However the stepping may be interrupted by other + * events, which will abort the stepping. + * + * The stop on pop area feature is for safeguarding step out. + * + * Please note though, that it will always use stepping and never breakpoints. + * While this allows for a much greater flexibility it can at times be rather + * slow. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param idCpu The ID of the CPU to single step on. + * @param fFlags Flags controlling the stepping, DBGF_STEP_F_XXX. + * Either DBGF_STEP_F_INTO or DBGF_STEP_F_OVER must + * always be specified. + * @param pStopPcAddr Address to stop executing at. Completely ignored + * unless DBGF_STEP_F_STOP_ON_ADDRESS is specified. + * @param pStopPopAddr Stack address that SP must be lower than when + * performing DBGF_STEP_F_STOP_ON_STACK_POP filtering. + * @param cbStopPop The range starting at @a pStopPopAddr which is + * considered to be within the same thread stack. Note + * that the API allows @a pStopPopAddr and @a cbStopPop + * to form an area that wraps around and it will + * consider the part starting at 0 as included. + * @param cMaxSteps The maximum number of steps to take. This is to + * prevent stepping for ever, so passing UINT32_MAX is + * not recommended. + * + * @remarks The two address arguments must be guest context virtual addresses, + * or HMA. The code doesn't make much of a point of out HMA, though. + */ +VMMR3DECL(int) DBGFR3StepEx(PUVM pUVM, VMCPUID idCpu, uint32_t fFlags, PCDBGFADDRESS pStopPcAddr, + PCDBGFADDRESS pStopPopAddr, RTGCUINTPTR cbStopPop, uint32_t cMaxSteps) +{ + /* + * Check state. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertReturn(idCpu < pVM->cCpus, VERR_INVALID_PARAMETER); + AssertReturn(!(fFlags & ~DBGF_STEP_F_VALID_MASK), VERR_INVALID_FLAGS); + AssertReturn(RT_BOOL(fFlags & DBGF_STEP_F_INTO) != RT_BOOL(fFlags & DBGF_STEP_F_OVER), VERR_INVALID_FLAGS); + if (fFlags & DBGF_STEP_F_STOP_ON_ADDRESS) + { + AssertReturn(RT_VALID_PTR(pStopPcAddr), VERR_INVALID_POINTER); + AssertReturn(DBGFADDRESS_IS_VALID(pStopPcAddr), VERR_INVALID_PARAMETER); + AssertReturn(DBGFADDRESS_IS_VIRT_GC(pStopPcAddr), VERR_INVALID_PARAMETER); + } + AssertReturn(!(fFlags & DBGF_STEP_F_STOP_ON_STACK_POP) || RT_VALID_PTR(pStopPopAddr), VERR_INVALID_POINTER); + if (fFlags & DBGF_STEP_F_STOP_ON_STACK_POP) + { + AssertReturn(RT_VALID_PTR(pStopPopAddr), VERR_INVALID_POINTER); + AssertReturn(DBGFADDRESS_IS_VALID(pStopPopAddr), VERR_INVALID_PARAMETER); + AssertReturn(DBGFADDRESS_IS_VIRT_GC(pStopPopAddr), VERR_INVALID_PARAMETER); + AssertReturn(cbStopPop > 0, VERR_INVALID_PARAMETER); + } + + AssertReturn(pVM->dbgf.s.fAttached, VERR_DBGF_NOT_ATTACHED); + if (RT_LIKELY(RTSemPongIsSpeaker(&pVM->dbgf.s.PingPong))) + { /* likely */ } + else + return VERR_SEM_OUT_OF_TURN; + Assert(pVM->dbgf.s.SteppingFilter.idCpu == NIL_VMCPUID); + + /* + * Send the ping back to the emulation thread telling it to run. + */ + if (fFlags == DBGF_STEP_F_INTO) + pVM->dbgf.s.SteppingFilter.idCpu = NIL_VMCPUID; + else + pVM->dbgf.s.SteppingFilter.idCpu = idCpu; + pVM->dbgf.s.SteppingFilter.fFlags = fFlags; + if (fFlags & DBGF_STEP_F_STOP_ON_ADDRESS) + pVM->dbgf.s.SteppingFilter.AddrPc = pStopPcAddr->FlatPtr; + else + pVM->dbgf.s.SteppingFilter.AddrPc = 0; + if (fFlags & DBGF_STEP_F_STOP_ON_STACK_POP) + { + pVM->dbgf.s.SteppingFilter.AddrStackPop = pStopPopAddr->FlatPtr; + pVM->dbgf.s.SteppingFilter.cbStackPop = cbStopPop; + } + else + { + pVM->dbgf.s.SteppingFilter.AddrStackPop = 0; + pVM->dbgf.s.SteppingFilter.cbStackPop = RTGCPTR_MAX; + } + + pVM->dbgf.s.SteppingFilter.cMaxSteps = cMaxSteps; + pVM->dbgf.s.SteppingFilter.cSteps = 0; + pVM->dbgf.s.SteppingFilter.uCallDepth = 0; + +/** @todo SMP (idCpu) */ + dbgfR3SetCmd(pVM, DBGFCMD_SINGLE_STEP); + int rc = RTSemPong(&pVM->dbgf.s.PingPong); + AssertRC(rc); + return rc; +} + + + +/** + * dbgfR3EventConfigEx argument packet. + */ +typedef struct DBGFR3EVENTCONFIGEXARGS +{ + PCDBGFEVENTCONFIG paConfigs; + size_t cConfigs; + int rc; +} DBGFR3EVENTCONFIGEXARGS; +/** Pointer to a dbgfR3EventConfigEx argument packet. */ +typedef DBGFR3EVENTCONFIGEXARGS *PDBGFR3EVENTCONFIGEXARGS; + + +/** + * @callback_method_impl{FNVMMEMTRENDEZVOUS, Worker for DBGFR3EventConfigEx.} + */ +static DECLCALLBACK(VBOXSTRICTRC) dbgfR3EventConfigEx(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + if (pVCpu->idCpu == 0) + { + PDBGFR3EVENTCONFIGEXARGS pArgs = (PDBGFR3EVENTCONFIGEXARGS)pvUser; + DBGFEVENTCONFIG volatile const *paConfigs = pArgs->paConfigs; + size_t cConfigs = pArgs->cConfigs; + + /* + * Apply the changes. + */ + unsigned cChanges = 0; + for (uint32_t i = 0; i < cConfigs; i++) + { + DBGFEVENTTYPE enmType = paConfigs[i].enmType; + AssertReturn(enmType >= DBGFEVENT_FIRST_SELECTABLE && enmType < DBGFEVENT_END, VERR_INVALID_PARAMETER); + if (paConfigs[i].fEnabled) + cChanges += ASMAtomicBitTestAndSet(&pVM->dbgf.s.bmSelectedEvents, enmType) == false; + else + cChanges += ASMAtomicBitTestAndClear(&pVM->dbgf.s.bmSelectedEvents, enmType) == true; + } + + /* + * Inform HM about changes. + */ + if (cChanges > 0 && HMIsEnabled(pVM)) + { + HMR3NotifyDebugEventChanged(pVM); + HMR3NotifyDebugEventChangedPerCpu(pVM, pVCpu); + } + } + else if (HMIsEnabled(pVM)) + HMR3NotifyDebugEventChangedPerCpu(pVM, pVCpu); + + return VINF_SUCCESS; +} + + +/** + * Configures (enables/disables) multiple selectable debug events. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param paConfigs The event to configure and their new state. + * @param cConfigs Number of entries in @a paConfigs. + */ +VMMR3DECL(int) DBGFR3EventConfigEx(PUVM pUVM, PCDBGFEVENTCONFIG paConfigs, size_t cConfigs) +{ + /* + * Validate input. + */ + size_t i = cConfigs; + while (i-- > 0) + { + AssertReturn(paConfigs[i].enmType >= DBGFEVENT_FIRST_SELECTABLE, VERR_INVALID_PARAMETER); + AssertReturn(paConfigs[i].enmType < DBGFEVENT_END, VERR_INVALID_PARAMETER); + } + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + /* + * Apply the changes in EMT(0) and rendezvous with the other CPUs so they + * can sync their data and execution with new debug state. + */ + DBGFR3EVENTCONFIGEXARGS Args = { paConfigs, cConfigs, VINF_SUCCESS }; + int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ASCENDING | VMMEMTRENDEZVOUS_FLAGS_PRIORITY, + dbgfR3EventConfigEx, &Args); + if (RT_SUCCESS(rc)) + rc = Args.rc; + return rc; +} + + +/** + * Enables or disables a selectable debug event. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param enmEvent The selectable debug event. + * @param fEnabled The new state. + */ +VMMR3DECL(int) DBGFR3EventConfig(PUVM pUVM, DBGFEVENTTYPE enmEvent, bool fEnabled) +{ + /* + * Convert to an array call. + */ + DBGFEVENTCONFIG EvtCfg = { enmEvent, fEnabled }; + return DBGFR3EventConfigEx(pUVM, &EvtCfg, 1); +} + + +/** + * Checks if the given selectable event is enabled. + * + * @returns true if enabled, false if not or invalid input. + * @param pUVM The user mode VM handle. + * @param enmEvent The selectable debug event. + * @sa DBGFR3EventQuery + */ +VMMR3DECL(bool) DBGFR3EventIsEnabled(PUVM pUVM, DBGFEVENTTYPE enmEvent) +{ + /* + * Validate input. + */ + AssertReturn( enmEvent >= DBGFEVENT_HALT_DONE + && enmEvent < DBGFEVENT_END, false); + Assert( enmEvent >= DBGFEVENT_FIRST_SELECTABLE + || enmEvent == DBGFEVENT_BREAKPOINT + || enmEvent == DBGFEVENT_BREAKPOINT_IO + || enmEvent == DBGFEVENT_BREAKPOINT_MMIO); + + UVM_ASSERT_VALID_EXT_RETURN(pUVM, false); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, false); + + /* + * Check the event status. + */ + return ASMBitTest(&pVM->dbgf.s.bmSelectedEvents, enmEvent); +} + + +/** + * Queries the status of a set of events. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param paConfigs The events to query and where to return the state. + * @param cConfigs The number of elements in @a paConfigs. + * @sa DBGFR3EventIsEnabled, DBGF_IS_EVENT_ENABLED + */ +VMMR3DECL(int) DBGFR3EventQuery(PUVM pUVM, PDBGFEVENTCONFIG paConfigs, size_t cConfigs) +{ + /* + * Validate input. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + for (size_t i = 0; i < cConfigs; i++) + { + DBGFEVENTTYPE enmType = paConfigs[i].enmType; + AssertReturn( enmType >= DBGFEVENT_HALT_DONE + && enmType < DBGFEVENT_END, VERR_INVALID_PARAMETER); + Assert( enmType >= DBGFEVENT_FIRST_SELECTABLE + || enmType == DBGFEVENT_BREAKPOINT + || enmType == DBGFEVENT_BREAKPOINT_IO + || enmType == DBGFEVENT_BREAKPOINT_MMIO); + paConfigs[i].fEnabled = ASMBitTest(&pVM->dbgf.s.bmSelectedEvents, paConfigs[i].enmType); + } + + return VINF_SUCCESS; +} + + +/** + * dbgfR3InterruptConfigEx argument packet. + */ +typedef struct DBGFR3INTERRUPTCONFIGEXARGS +{ + PCDBGFINTERRUPTCONFIG paConfigs; + size_t cConfigs; + int rc; +} DBGFR3INTERRUPTCONFIGEXARGS; +/** Pointer to a dbgfR3InterruptConfigEx argument packet. */ +typedef DBGFR3INTERRUPTCONFIGEXARGS *PDBGFR3INTERRUPTCONFIGEXARGS; + +/** + * @callback_method_impl{FNVMMEMTRENDEZVOUS, + * Worker for DBGFR3InterruptConfigEx.} + */ +static DECLCALLBACK(VBOXSTRICTRC) dbgfR3InterruptConfigEx(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + if (pVCpu->idCpu == 0) + { + PDBGFR3INTERRUPTCONFIGEXARGS pArgs = (PDBGFR3INTERRUPTCONFIGEXARGS)pvUser; + PCDBGFINTERRUPTCONFIG paConfigs = pArgs->paConfigs; + size_t cConfigs = pArgs->cConfigs; + + /* + * Apply the changes. + */ + bool fChanged = false; + bool fThis; + for (uint32_t i = 0; i < cConfigs; i++) + { + /* + * Hardware interrupts. + */ + if (paConfigs[i].enmHardState == DBGFINTERRUPTSTATE_ENABLED) + { + fChanged |= fThis = ASMAtomicBitTestAndSet(&pVM->dbgf.s.bmHardIntBreakpoints, paConfigs[i].iInterrupt) == false; + if (fThis) + { + Assert(pVM->dbgf.s.cHardIntBreakpoints < 256); + pVM->dbgf.s.cHardIntBreakpoints++; + } + } + else if (paConfigs[i].enmHardState == DBGFINTERRUPTSTATE_DISABLED) + { + fChanged |= fThis = ASMAtomicBitTestAndClear(&pVM->dbgf.s.bmHardIntBreakpoints, paConfigs[i].iInterrupt) == true; + if (fThis) + { + Assert(pVM->dbgf.s.cHardIntBreakpoints > 0); + pVM->dbgf.s.cHardIntBreakpoints--; + } + } + + /* + * Software interrupts. + */ + if (paConfigs[i].enmHardState == DBGFINTERRUPTSTATE_ENABLED) + { + fChanged |= fThis = ASMAtomicBitTestAndSet(&pVM->dbgf.s.bmSoftIntBreakpoints, paConfigs[i].iInterrupt) == false; + if (fThis) + { + Assert(pVM->dbgf.s.cSoftIntBreakpoints < 256); + pVM->dbgf.s.cSoftIntBreakpoints++; + } + } + else if (paConfigs[i].enmSoftState == DBGFINTERRUPTSTATE_DISABLED) + { + fChanged |= fThis = ASMAtomicBitTestAndClear(&pVM->dbgf.s.bmSoftIntBreakpoints, paConfigs[i].iInterrupt) == true; + if (fThis) + { + Assert(pVM->dbgf.s.cSoftIntBreakpoints > 0); + pVM->dbgf.s.cSoftIntBreakpoints--; + } + } + } + + /* + * Update the event bitmap entries. + */ + if (pVM->dbgf.s.cHardIntBreakpoints > 0) + fChanged |= ASMAtomicBitTestAndSet(&pVM->dbgf.s.bmSelectedEvents, DBGFEVENT_INTERRUPT_HARDWARE) == false; + else + fChanged |= ASMAtomicBitTestAndClear(&pVM->dbgf.s.bmSelectedEvents, DBGFEVENT_INTERRUPT_HARDWARE) == true; + + if (pVM->dbgf.s.cSoftIntBreakpoints > 0) + fChanged |= ASMAtomicBitTestAndSet(&pVM->dbgf.s.bmSelectedEvents, DBGFEVENT_INTERRUPT_SOFTWARE) == false; + else + fChanged |= ASMAtomicBitTestAndClear(&pVM->dbgf.s.bmSelectedEvents, DBGFEVENT_INTERRUPT_SOFTWARE) == true; + + /* + * Inform HM about changes. + */ + if (fChanged && HMIsEnabled(pVM)) + { + HMR3NotifyDebugEventChanged(pVM); + HMR3NotifyDebugEventChangedPerCpu(pVM, pVCpu); + } + } + else if (HMIsEnabled(pVM)) + HMR3NotifyDebugEventChangedPerCpu(pVM, pVCpu); + + return VINF_SUCCESS; +} + + +/** + * Changes + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param paConfigs The events to query and where to return the state. + * @param cConfigs The number of elements in @a paConfigs. + * @sa DBGFR3InterruptConfigHardware, DBGFR3InterruptConfigSoftware + */ +VMMR3DECL(int) DBGFR3InterruptConfigEx(PUVM pUVM, PCDBGFINTERRUPTCONFIG paConfigs, size_t cConfigs) +{ + /* + * Validate input. + */ + size_t i = cConfigs; + while (i-- > 0) + { + AssertReturn(paConfigs[i].enmHardState <= DBGFINTERRUPTSTATE_DONT_TOUCH, VERR_INVALID_PARAMETER); + AssertReturn(paConfigs[i].enmSoftState <= DBGFINTERRUPTSTATE_DONT_TOUCH, VERR_INVALID_PARAMETER); + } + + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + /* + * Apply the changes in EMT(0) and rendezvous with the other CPUs so they + * can sync their data and execution with new debug state. + */ + DBGFR3INTERRUPTCONFIGEXARGS Args = { paConfigs, cConfigs, VINF_SUCCESS }; + int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ASCENDING | VMMEMTRENDEZVOUS_FLAGS_PRIORITY, + dbgfR3InterruptConfigEx, &Args); + if (RT_SUCCESS(rc)) + rc = Args.rc; + return rc; +} + + +/** + * Configures interception of a hardware interrupt. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param iInterrupt The interrupt number. + * @param fEnabled Whether interception is enabled or not. + * @sa DBGFR3InterruptSoftwareConfig, DBGFR3InterruptConfigEx + */ +VMMR3DECL(int) DBGFR3InterruptHardwareConfig(PUVM pUVM, uint8_t iInterrupt, bool fEnabled) +{ + /* + * Convert to DBGFR3InterruptConfigEx call. + */ + DBGFINTERRUPTCONFIG IntCfg = { iInterrupt, (uint8_t)fEnabled, DBGFINTERRUPTSTATE_DONT_TOUCH }; + return DBGFR3InterruptConfigEx(pUVM, &IntCfg, 1); +} + + +/** + * Configures interception of a software interrupt. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param iInterrupt The interrupt number. + * @param fEnabled Whether interception is enabled or not. + * @sa DBGFR3InterruptHardwareConfig, DBGFR3InterruptConfigEx + */ +VMMR3DECL(int) DBGFR3InterruptSoftwareConfig(PUVM pUVM, uint8_t iInterrupt, bool fEnabled) +{ + /* + * Convert to DBGFR3InterruptConfigEx call. + */ + DBGFINTERRUPTCONFIG IntCfg = { iInterrupt, DBGFINTERRUPTSTATE_DONT_TOUCH, (uint8_t)fEnabled }; + return DBGFR3InterruptConfigEx(pUVM, &IntCfg, 1); +} + + +/** + * Checks whether interception is enabled for a hardware interrupt. + * + * @returns true if enabled, false if not or invalid input. + * @param pUVM The user mode VM handle. + * @param iInterrupt The interrupt number. + * @sa DBGFR3InterruptSoftwareIsEnabled, DBGF_IS_HARDWARE_INT_ENABLED, + * DBGF_IS_SOFTWARE_INT_ENABLED + */ +VMMR3DECL(int) DBGFR3InterruptHardwareIsEnabled(PUVM pUVM, uint8_t iInterrupt) +{ + /* + * Validate input. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, false); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, false); + + /* + * Check it. + */ + return ASMBitTest(&pVM->dbgf.s.bmHardIntBreakpoints, iInterrupt); +} + + +/** + * Checks whether interception is enabled for a software interrupt. + * + * @returns true if enabled, false if not or invalid input. + * @param pUVM The user mode VM handle. + * @param iInterrupt The interrupt number. + * @sa DBGFR3InterruptHardwareIsEnabled, DBGF_IS_SOFTWARE_INT_ENABLED, + * DBGF_IS_HARDWARE_INT_ENABLED, + */ +VMMR3DECL(int) DBGFR3InterruptSoftwareIsEnabled(PUVM pUVM, uint8_t iInterrupt) +{ + /* + * Validate input. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, false); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, false); + + /* + * Check it. + */ + return ASMBitTest(&pVM->dbgf.s.bmSoftIntBreakpoints, iInterrupt); +} + + + +/** + * Call this to single step programmatically. + * + * You must pass down the return code to the EM loop! That's + * where the actual single stepping take place (at least in the + * current implementation). + * + * @returns VINF_EM_DBG_STEP + * + * @param pVCpu The cross context virtual CPU structure. + * + * @thread VCpu EMT + * @internal + */ +VMMR3_INT_DECL(int) DBGFR3PrgStep(PVMCPU pVCpu) +{ + VMCPU_ASSERT_EMT(pVCpu); + + pVCpu->dbgf.s.fSingleSteppingRaw = true; + return VINF_EM_DBG_STEP; +} + + +/** + * Inject an NMI into a running VM (only VCPU 0!) + * + * @returns VBox status code. + * @param pUVM The user mode VM structure. + * @param idCpu The ID of the CPU to inject the NMI on. + */ +VMMR3DECL(int) DBGFR3InjectNMI(PUVM pUVM, VMCPUID idCpu) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertReturn(idCpu < pVM->cCpus, VERR_INVALID_CPU_ID); + + /** @todo Implement generic NMI injection. */ + /** @todo NEM: NMI injection */ + if (!HMIsEnabled(pVM)) + return VERR_NOT_SUP_IN_RAW_MODE; + + VMCPU_FF_SET(&pVM->aCpus[idCpu], VMCPU_FF_INTERRUPT_NMI); + return VINF_SUCCESS; +} + diff --git a/src/VBox/VMM/VMMR3/DBGFAddr.cpp b/src/VBox/VMM/VMMR3/DBGFAddr.cpp new file mode 100644 index 00000000..9598b621 --- /dev/null +++ b/src/VBox/VMM/VMMR3/DBGFAddr.cpp @@ -0,0 +1,538 @@ +/* $Id: DBGFAddr.cpp $ */ +/** @file + * DBGF - Debugger Facility, Mixed Address Methods. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DBGF +#include +#include +#include +#include +#include +#include "DBGFInternal.h" +#include +#include + +#include +#include +#include + + + +/** + * Checks if an address is in the HMA or not. + * + * @retval true if it's inside the HMA. + * @retval flase if it's not inside the HMA. + * + * @param pUVM The user mode VM handle. + * @param FlatPtr The address in question. + */ +DECLINLINE(bool) dbgfR3IsHMA(PUVM pUVM, RTGCUINTPTR FlatPtr) +{ + return VM_IS_RAW_MODE_ENABLED(pUVM->pVM) + && MMHyperIsInsideArea(pUVM->pVM, FlatPtr); +} + + +/** + * Common worker for DBGFR3AddrFromSelOff and DBGFR3AddrFromSelInfoOff. + */ +static int dbgfR3AddrFromSelInfoOffWorker(PDBGFADDRESS pAddress, PCDBGFSELINFO pSelInfo, RTUINTPTR off) +{ + if (pSelInfo->fFlags & (DBGFSELINFO_FLAGS_INVALID | DBGFSELINFO_FLAGS_NOT_PRESENT)) + return pSelInfo->fFlags & DBGFSELINFO_FLAGS_NOT_PRESENT + ? VERR_SELECTOR_NOT_PRESENT + : VERR_INVALID_SELECTOR; + + /** @todo This all goes voodoo in long mode. */ + /* check limit. */ + if (DBGFSelInfoIsExpandDown(pSelInfo)) + { + if ( !pSelInfo->u.Raw.Gen.u1Granularity + && off > UINT32_C(0xffff)) + return VERR_OUT_OF_SELECTOR_BOUNDS; + if (off <= pSelInfo->cbLimit) + return VERR_OUT_OF_SELECTOR_BOUNDS; + } + else if (off > pSelInfo->cbLimit) + return VERR_OUT_OF_SELECTOR_BOUNDS; + + pAddress->FlatPtr = pSelInfo->GCPtrBase + off; + + /** @todo fix all these selector tests! */ + if ( !pSelInfo->GCPtrBase + && pSelInfo->u.Raw.Gen.u1Granularity + && pSelInfo->u.Raw.Gen.u1DefBig) + pAddress->fFlags = DBGFADDRESS_FLAGS_FLAT; + else if (pSelInfo->cbLimit <= UINT32_C(0xffff)) + pAddress->fFlags = DBGFADDRESS_FLAGS_FAR16; + else if (pSelInfo->cbLimit <= UINT32_C(0xffffffff)) + pAddress->fFlags = DBGFADDRESS_FLAGS_FAR32; + else + pAddress->fFlags = DBGFADDRESS_FLAGS_FAR64; + + return VINF_SUCCESS; +} + + +/** + * Creates a mixed address from a Sel:off pair. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param idCpu The CPU ID. + * @param pAddress Where to store the mixed address. + * @param Sel The selector part. + * @param off The offset part. + */ +VMMR3DECL(int) DBGFR3AddrFromSelOff(PUVM pUVM, VMCPUID idCpu, PDBGFADDRESS pAddress, RTSEL Sel, RTUINTPTR off) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, VERR_INVALID_VM_HANDLE); + AssertReturn(idCpu < pUVM->cCpus, VERR_INVALID_PARAMETER); + + pAddress->Sel = Sel; + pAddress->off = off; + if (Sel != DBGF_SEL_FLAT) + { + DBGFSELINFO SelInfo; + int rc = DBGFR3SelQueryInfo(pUVM, idCpu, Sel, DBGFSELQI_FLAGS_DT_GUEST | DBGFSELQI_FLAGS_DT_ADJ_64BIT_MODE, &SelInfo); + if (RT_FAILURE(rc) && VM_IS_RAW_MODE_ENABLED(pUVM->pVM)) + rc = DBGFR3SelQueryInfo(pUVM, idCpu, Sel, DBGFSELQI_FLAGS_DT_SHADOW, &SelInfo); + if (RT_FAILURE(rc)) + return rc; + rc = dbgfR3AddrFromSelInfoOffWorker(pAddress, &SelInfo, off); + if (RT_FAILURE(rc)) + return rc; + if ( (SelInfo.fFlags & DBGFSELINFO_FLAGS_HYPER) + || dbgfR3IsHMA(pUVM, pAddress->FlatPtr)) + pAddress->fFlags |= DBGFADDRESS_FLAGS_HMA; + } + else + { + pAddress->FlatPtr = off; + pAddress->fFlags = DBGFADDRESS_FLAGS_FLAT; + if (dbgfR3IsHMA(pUVM, pAddress->FlatPtr)) + pAddress->fFlags |= DBGFADDRESS_FLAGS_HMA; + } + pAddress->fFlags |= DBGFADDRESS_FLAGS_VALID; + + return VINF_SUCCESS; +} + + +/** + * Creates a mixed address from selector info and an offset into the segment + * described by it. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pAddress Where to store the mixed address. + * @param pSelInfo The selector info. + * @param off The offset part. + */ +VMMR3DECL(int) DBGFR3AddrFromSelInfoOff(PUVM pUVM, PDBGFADDRESS pAddress, PCDBGFSELINFO pSelInfo, RTUINTPTR off) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, VERR_INVALID_VM_HANDLE); + + pAddress->Sel = pSelInfo->Sel; + pAddress->off = off; + int rc = dbgfR3AddrFromSelInfoOffWorker(pAddress, pSelInfo, off); + if (RT_FAILURE(rc)) + return rc; + + pAddress->fFlags |= DBGFADDRESS_FLAGS_VALID; + if (dbgfR3IsHMA(pUVM, pAddress->FlatPtr)) + pAddress->fFlags |= DBGFADDRESS_FLAGS_HMA; + + return VINF_SUCCESS; +} + + +/** + * Creates a mixed address from a flat address. + * + * @returns pAddress. + * @param pUVM The user mode VM handle. + * @param pAddress Where to store the mixed address. + * @param FlatPtr The flat pointer. + */ +VMMR3DECL(PDBGFADDRESS) DBGFR3AddrFromFlat(PUVM pUVM, PDBGFADDRESS pAddress, RTGCUINTPTR FlatPtr) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, NULL); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, NULL); + pAddress->Sel = DBGF_SEL_FLAT; + pAddress->off = FlatPtr; + pAddress->FlatPtr = FlatPtr; + pAddress->fFlags = DBGFADDRESS_FLAGS_FLAT | DBGFADDRESS_FLAGS_VALID; + if (dbgfR3IsHMA(pUVM, pAddress->FlatPtr)) + pAddress->fFlags |= DBGFADDRESS_FLAGS_HMA; + return pAddress; +} + + +/** + * Creates a mixed address from a guest physical address. + * + * @returns pAddress. + * @param pUVM The user mode VM handle. + * @param pAddress Where to store the mixed address. + * @param PhysAddr The guest physical address. + */ +VMMR3DECL(PDBGFADDRESS) DBGFR3AddrFromPhys(PUVM pUVM, PDBGFADDRESS pAddress, RTGCPHYS PhysAddr) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, NULL); + pAddress->Sel = DBGF_SEL_FLAT; + pAddress->off = PhysAddr; + pAddress->FlatPtr = PhysAddr; + pAddress->fFlags = DBGFADDRESS_FLAGS_PHYS | DBGFADDRESS_FLAGS_VALID; + return pAddress; +} + + +/** + * Creates a mixed address from a flat host ring-0 address. + * + * @returns pAddress + * @param pAddress Where to store the mixed address. + * @param R0Ptr The host ring-0 address. + */ +VMMR3_INT_DECL(PDBGFADDRESS) DBGFR3AddrFromHostR0(PDBGFADDRESS pAddress, RTR0UINTPTR R0Ptr) +{ + pAddress->FlatPtr = R0Ptr; + pAddress->off = R0Ptr; + pAddress->fFlags = DBGFADDRESS_FLAGS_RING0 | DBGFADDRESS_FLAGS_VALID; + pAddress->Sel = DBGF_SEL_FLAT; + return pAddress; +} + + +/** + * Checks if the specified address is valid (checks the structure pointer too). + * + * @returns true if valid. + * @returns false if invalid. + * @param pUVM The user mode VM handle. + * @param pAddress The address to validate. + */ +VMMR3DECL(bool) DBGFR3AddrIsValid(PUVM pUVM, PCDBGFADDRESS pAddress) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, false); + if (!VALID_PTR(pAddress)) + return false; + if (!DBGFADDRESS_IS_VALID(pAddress)) + return false; + /* more? */ + return true; +} + + +/** + * Called on the EMT for the VCpu. + * + * @returns VBox status code. + * @param pVCpu The cross context virtual CPU structure. + * @param pAddress The address. + * @param pGCPhys Where to return the physical address. + */ +static DECLCALLBACK(int) dbgfR3AddrToPhysOnVCpu(PVMCPU pVCpu, PCDBGFADDRESS pAddress, PRTGCPHYS pGCPhys) +{ + VMCPU_ASSERT_EMT(pVCpu); + /* This is just a wrapper because we cannot pass FlatPtr thru VMR3ReqCall directly. */ + return PGMGstGetPage(pVCpu, pAddress->FlatPtr, NULL, pGCPhys); +} + + +/** + * Converts an address to a guest physical address. + * + * @returns VBox status code. + * @retval VINF_SUCCESS + * @retval VERR_INVALID_PARAMETER if the address is invalid. + * @retval VERR_INVALID_STATE if the VM is being terminated or if the virtual + * CPU handle is invalid. + * @retval VERR_NOT_SUPPORTED is the type of address cannot be converted. + * @retval VERR_PAGE_NOT_PRESENT + * @retval VERR_PAGE_TABLE_NOT_PRESENT + * @retval VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT + * @retval VERR_PAGE_MAP_LEVEL4_NOT_PRESENT + * + * @param pUVM The user mode VM handle. + * @param idCpu The ID of the CPU context to convert virtual + * addresses. + * @param pAddress The address. + * @param pGCPhys Where to return the physical address. + */ +VMMR3DECL(int) DBGFR3AddrToPhys(PUVM pUVM, VMCPUID idCpu, PCDBGFADDRESS pAddress, PRTGCPHYS pGCPhys) +{ + /* + * Parameter validation. + */ + AssertPtr(pGCPhys); + *pGCPhys = NIL_RTGCPHYS; + AssertPtr(pAddress); + AssertReturn(DBGFADDRESS_IS_VALID(pAddress), VERR_INVALID_PARAMETER); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_STATE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertReturn(idCpu < pUVM->cCpus, VERR_INVALID_PARAMETER); + + /* + * Convert by address type. + */ + int rc; + if (pAddress->fFlags & DBGFADDRESS_FLAGS_HMA) + rc = VERR_NOT_SUPPORTED; + else if (pAddress->fFlags & DBGFADDRESS_FLAGS_PHYS) + { + *pGCPhys = pAddress->FlatPtr; + rc = VINF_SUCCESS; + } + else + { + PVMCPU pVCpu = VMMGetCpuById(pVM, idCpu); + if (VMCPU_IS_EMT(pVCpu)) + rc = dbgfR3AddrToPhysOnVCpu(pVCpu, pAddress, pGCPhys); + else + rc = VMR3ReqPriorityCallWaitU(pUVM, pVCpu->idCpu, + (PFNRT)dbgfR3AddrToPhysOnVCpu, 3, pVCpu, pAddress, pGCPhys); + } + return rc; +} + + +/** + * Converts an address to a host physical address. + * + * @returns VBox status code. + * @retval VINF_SUCCESS + * @retval VERR_INVALID_PARAMETER if the address is invalid. + * @retval VERR_INVALID_STATE if the VM is being terminated or if the virtual + * CPU handle is invalid. + * @retval VERR_NOT_SUPPORTED is the type of address cannot be converted. + * @retval VERR_PAGE_NOT_PRESENT + * @retval VERR_PAGE_TABLE_NOT_PRESENT + * @retval VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT + * @retval VERR_PAGE_MAP_LEVEL4_NOT_PRESENT + * @retval VERR_PGM_PHYS_PAGE_RESERVED + * @retval VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS + * + * @param pUVM The user mode VM handle. + * @param idCpu The ID of the CPU context to convert virtual + * addresses. + * @param pAddress The address. + * @param pHCPhys Where to return the physical address. + */ +VMMR3DECL(int) DBGFR3AddrToHostPhys(PUVM pUVM, VMCPUID idCpu, PDBGFADDRESS pAddress, PRTHCPHYS pHCPhys) +{ + /* + * Parameter validation. + */ + AssertPtr(pHCPhys); + *pHCPhys = NIL_RTHCPHYS; + AssertPtr(pAddress); + AssertReturn(DBGFADDRESS_IS_VALID(pAddress), VERR_INVALID_PARAMETER); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_STATE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertReturn(idCpu < pUVM->cCpus, VERR_INVALID_PARAMETER); + + /* + * Convert it if we can. + */ + int rc; + if (pAddress->fFlags & DBGFADDRESS_FLAGS_HMA) + rc = VERR_NOT_SUPPORTED; /** @todo implement this */ + else + { + RTGCPHYS GCPhys; + rc = DBGFR3AddrToPhys(pUVM, idCpu, pAddress, &GCPhys); + if (RT_SUCCESS(rc)) + rc = PGMPhysGCPhys2HCPhys(pVM, pAddress->FlatPtr, pHCPhys); + } + return rc; +} + + +/** + * Called on the EMT for the VCpu. + * + * @returns VBox status code. + * + * @param pUVM The user mode VM handle. + * @param idCpu The ID of the CPU context. + * @param pAddress The address. + * @param fReadOnly Whether returning a read-only page is fine or not. + * @param ppvR3Ptr Where to return the address. + */ +static DECLCALLBACK(int) dbgfR3AddrToVolatileR3PtrOnVCpu(PUVM pUVM, VMCPUID idCpu, PDBGFADDRESS pAddress, bool fReadOnly, + void **ppvR3Ptr) +{ + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + Assert(idCpu == VMMGetCpuId(pVM)); + + int rc; + if (pAddress->fFlags & DBGFADDRESS_FLAGS_HMA) + { + rc = VERR_NOT_SUPPORTED; /** @todo create some dedicated errors for this stuff. */ + /** @todo this may assert, create a debug version of this which doesn't. */ + if ( VM_IS_RAW_MODE_ENABLED(pVM) + && MMHyperIsInsideArea(pVM, pAddress->FlatPtr)) + { + void *pv = MMHyperRCToCC(pVM, (RTRCPTR)pAddress->FlatPtr); + if (pv) + { + *ppvR3Ptr = pv; + rc = VINF_SUCCESS; + } + } + } + else + { + /* + * This is a tad ugly, but it gets the job done. + */ + PGMPAGEMAPLOCK Lock; + if (pAddress->fFlags & DBGFADDRESS_FLAGS_PHYS) + { + if (fReadOnly) + rc = PGMPhysGCPhys2CCPtrReadOnly(pVM, pAddress->FlatPtr, (void const **)ppvR3Ptr, &Lock); + else + rc = PGMPhysGCPhys2CCPtr(pVM, pAddress->FlatPtr, ppvR3Ptr, &Lock); + } + else + { + PVMCPU pVCpu = VMMGetCpuById(pVM, idCpu); + if (fReadOnly) + rc = PGMPhysGCPtr2CCPtrReadOnly(pVCpu, pAddress->FlatPtr, (void const **)ppvR3Ptr, &Lock); + else + rc = PGMPhysGCPtr2CCPtr(pVCpu, pAddress->FlatPtr, ppvR3Ptr, &Lock); + } + if (RT_SUCCESS(rc)) + PGMPhysReleasePageMappingLock(pVM, &Lock); + } + return rc; +} + + + + +/** + * Converts an address to a volatile host virtual address. + * + * @returns VBox status code. + * @retval VINF_SUCCESS + * @retval VERR_INVALID_PARAMETER if the address is invalid. + * @retval VERR_INVALID_STATE if the VM is being terminated or if the virtual + * CPU handle is invalid. + * @retval VERR_NOT_SUPPORTED is the type of address cannot be converted. + * @retval VERR_PAGE_NOT_PRESENT + * @retval VERR_PAGE_TABLE_NOT_PRESENT + * @retval VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT + * @retval VERR_PAGE_MAP_LEVEL4_NOT_PRESENT + * @retval VERR_PGM_PHYS_PAGE_RESERVED + * @retval VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS + * + * @param pUVM The user mode VM handle. + * @param idCpu The ID of the CPU context to convert virtual + * addresses. + * @param pAddress The address. + * @param fReadOnly Whether returning a read-only page is fine or not. + * If set to thru the page may have to be made writable + * before we return. + * @param ppvR3Ptr Where to return the address. + */ +VMMR3DECL(int) DBGFR3AddrToVolatileR3Ptr(PUVM pUVM, VMCPUID idCpu, PDBGFADDRESS pAddress, bool fReadOnly, void **ppvR3Ptr) +{ + /* + * Parameter validation. + */ + AssertPtr(ppvR3Ptr); + *ppvR3Ptr = NULL; + AssertPtr(pAddress); + AssertReturn(DBGFADDRESS_IS_VALID(pAddress), VERR_INVALID_PARAMETER); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_STATE); + AssertReturn(idCpu < pUVM->cCpus, VERR_INVALID_PARAMETER); + + /* + * Convert it. + */ + return VMR3ReqPriorityCallWaitU(pUVM, idCpu, (PFNRT)dbgfR3AddrToVolatileR3PtrOnVCpu, 5, + pUVM, idCpu, pAddress, fReadOnly, ppvR3Ptr); +} + + +/** + * Adds an offset to an address. + * + * @returns pAddress. + * + * @param pAddress The address. + * @param uAddend How much to add. + * + * @remarks No address space or segment limit checks are performed, + */ +VMMR3DECL(PDBGFADDRESS) DBGFR3AddrAdd(PDBGFADDRESS pAddress, RTGCUINTPTR uAddend) +{ + /* + * Parameter validation. + */ + AssertPtrReturn(pAddress, NULL); + AssertReturn(DBGFADDRESS_IS_VALID(pAddress), NULL); + + /* + * Add the stuff. + */ + pAddress->off += uAddend; + pAddress->FlatPtr += uAddend; + + return pAddress; +} + + +/** + * Subtracts an offset from an address. + * + * @returns VINF_SUCCESS on success. + * + * @param pAddress The address. + * @param uSubtrahend How much to subtract. + * + * @remarks No address space or segment limit checks are performed, + */ +VMMR3DECL(PDBGFADDRESS) DBGFR3AddrSub(PDBGFADDRESS pAddress, RTGCUINTPTR uSubtrahend) +{ + /* + * Parameter validation. + */ + AssertPtrReturn(pAddress, NULL); + AssertReturn(DBGFADDRESS_IS_VALID(pAddress), NULL); + + /* + * Add the stuff. + */ + pAddress->off -= uSubtrahend; + pAddress->FlatPtr -= uSubtrahend; + + return pAddress; +} + diff --git a/src/VBox/VMM/VMMR3/DBGFAddrSpace.cpp b/src/VBox/VMM/VMMR3/DBGFAddrSpace.cpp new file mode 100644 index 00000000..8f418d31 --- /dev/null +++ b/src/VBox/VMM/VMMR3/DBGFAddrSpace.cpp @@ -0,0 +1,1357 @@ +/* $Id: DBGFAddrSpace.cpp $ */ +/** @file + * DBGF - Debugger Facility, Address Space Management. + */ + +/* + * Copyright (C) 2008-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/** @page pg_dbgf_addr_space DBGFAddrSpace - Address Space Management + * + * What's an address space? It's mainly a convenient way of stuffing + * module segments and ad-hoc symbols together. It will also help out + * when the debugger gets extended to deal with user processes later. + * + * There are two standard address spaces that will always be present: + * - The physical address space. + * - The global virtual address space. + * + * Additional address spaces will be added and removed at runtime for + * guest processes. The global virtual address space will be used to + * track the kernel parts of the OS, or at least the bits of the kernel + * that is part of all address spaces (mac os x and 4G/4G patched linux). + * + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DBGF +#include +#include +#include +#include +#ifdef VBOX_WITH_RAW_MODE +# include +#endif +#include "DBGFInternal.h" +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * Address space database node. + */ +typedef struct DBGFASDBNODE +{ + /** The node core for DBGF::AsHandleTree, the key is the address space handle. */ + AVLPVNODECORE HandleCore; + /** The node core for DBGF::AsPidTree, the key is the process id. */ + AVLU32NODECORE PidCore; + /** The node core for DBGF::AsNameSpace, the string is the address space name. */ + RTSTRSPACECORE NameCore; + +} DBGFASDBNODE; +/** Pointer to an address space database node. */ +typedef DBGFASDBNODE *PDBGFASDBNODE; + + +/** + * For dbgfR3AsLoadImageOpenData and dbgfR3AsLoadMapOpenData. + */ +typedef struct DBGFR3ASLOADOPENDATA +{ + const char *pszModName; + RTGCUINTPTR uSubtrahend; + uint32_t fFlags; + RTDBGMOD hMod; +} DBGFR3ASLOADOPENDATA; + +#if 0 /* unused */ +/** + * Callback for dbgfR3AsSearchPath and dbgfR3AsSearchEnvPath. + * + * @returns VBox status code. If success, then the search is completed. + * @param pszFilename The file name under evaluation. + * @param pvUser The user argument. + */ +typedef int FNDBGFR3ASSEARCHOPEN(const char *pszFilename, void *pvUser); +/** Pointer to a FNDBGFR3ASSEARCHOPEN. */ +typedef FNDBGFR3ASSEARCHOPEN *PFNDBGFR3ASSEARCHOPEN; +#endif + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** Locks the address space database for writing. */ +#define DBGF_AS_DB_LOCK_WRITE(pUVM) \ + do { \ + int rcSem = RTSemRWRequestWrite((pUVM)->dbgf.s.hAsDbLock, RT_INDEFINITE_WAIT); \ + AssertRC(rcSem); \ + } while (0) + +/** Unlocks the address space database after writing. */ +#define DBGF_AS_DB_UNLOCK_WRITE(pUVM) \ + do { \ + int rcSem = RTSemRWReleaseWrite((pUVM)->dbgf.s.hAsDbLock); \ + AssertRC(rcSem); \ + } while (0) + +/** Locks the address space database for reading. */ +#define DBGF_AS_DB_LOCK_READ(pUVM) \ + do { \ + int rcSem = RTSemRWRequestRead((pUVM)->dbgf.s.hAsDbLock, RT_INDEFINITE_WAIT); \ + AssertRC(rcSem); \ + } while (0) + +/** Unlocks the address space database after reading. */ +#define DBGF_AS_DB_UNLOCK_READ(pUVM) \ + do { \ + int rcSem = RTSemRWReleaseRead((pUVM)->dbgf.s.hAsDbLock); \ + AssertRC(rcSem); \ + } while (0) + + + +/** + * Initializes the address space parts of DBGF. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + */ +int dbgfR3AsInit(PUVM pUVM) +{ + Assert(pUVM->pVM); + + /* + * Create the semaphore. + */ + int rc = RTSemRWCreate(&pUVM->dbgf.s.hAsDbLock); + AssertRCReturn(rc, rc); + + /* + * Create the debugging config instance and set it up, defaulting to + * deferred loading in order to keep things fast. + */ + rc = RTDbgCfgCreate(&pUVM->dbgf.s.hDbgCfg, NULL, true /*fNativePaths*/); + AssertRCReturn(rc, rc); + rc = RTDbgCfgChangeUInt(pUVM->dbgf.s.hDbgCfg, RTDBGCFGPROP_FLAGS, RTDBGCFGOP_PREPEND, + RTDBGCFG_FLAGS_DEFERRED); + AssertRCReturn(rc, rc); + + static struct + { + RTDBGCFGPROP enmProp; + const char *pszEnvName; + const char *pszCfgName; + } const s_aProps[] = + { + { RTDBGCFGPROP_FLAGS, "VBOXDBG_FLAGS", "Flags" }, + { RTDBGCFGPROP_PATH, "VBOXDBG_PATH", "Path" }, + { RTDBGCFGPROP_SUFFIXES, "VBOXDBG_SUFFIXES", "Suffixes" }, + { RTDBGCFGPROP_SRC_PATH, "VBOXDBG_SRC_PATH", "SrcPath" }, + }; + PCFGMNODE pCfgDbgf = CFGMR3GetChild(CFGMR3GetRootU(pUVM), "/DBGF"); + for (unsigned i = 0; i < RT_ELEMENTS(s_aProps); i++) + { + char szEnvValue[8192]; + rc = RTEnvGetEx(RTENV_DEFAULT, s_aProps[i].pszEnvName, szEnvValue, sizeof(szEnvValue), NULL); + if (RT_SUCCESS(rc)) + { + rc = RTDbgCfgChangeString(pUVM->dbgf.s.hDbgCfg, s_aProps[i].enmProp, RTDBGCFGOP_PREPEND, szEnvValue); + if (RT_FAILURE(rc)) + return VMR3SetError(pUVM, rc, RT_SRC_POS, + "DBGF Config Error: %s=%s -> %Rrc", s_aProps[i].pszEnvName, szEnvValue, rc); + } + else if (rc != VERR_ENV_VAR_NOT_FOUND) + return VMR3SetError(pUVM, rc, RT_SRC_POS, + "DBGF Config Error: Error querying env.var. %s: %Rrc", s_aProps[i].pszEnvName, rc); + + char *pszCfgValue; + rc = CFGMR3QueryStringAllocDef(pCfgDbgf, s_aProps[i].pszCfgName, &pszCfgValue, NULL); + if (RT_FAILURE(rc)) + return VMR3SetError(pUVM, rc, RT_SRC_POS, + "DBGF Config Error: Querying /DBGF/%s -> %Rrc", s_aProps[i].pszCfgName, rc); + if (pszCfgValue) + { + rc = RTDbgCfgChangeString(pUVM->dbgf.s.hDbgCfg, s_aProps[i].enmProp, RTDBGCFGOP_PREPEND, pszCfgValue); + if (RT_FAILURE(rc)) + return VMR3SetError(pUVM, rc, RT_SRC_POS, + "DBGF Config Error: /DBGF/%s=%s -> %Rrc", s_aProps[i].pszCfgName, pszCfgValue, rc); + MMR3HeapFree(pszCfgValue); + } + } + + /* + * Prepend the NoArch and VBoxDbgSyms directories to the path. + */ + char szPath[RTPATH_MAX]; + rc = RTPathAppPrivateNoArch(szPath, sizeof(szPath)); + AssertRCReturn(rc, rc); +#ifdef RT_OS_DARWIN + rc = RTPathAppend(szPath, sizeof(szPath), "../Resources/VBoxDbgSyms/"); +#else + rc = RTDbgCfgChangeString(pUVM->dbgf.s.hDbgCfg, RTDBGCFGPROP_PATH, RTDBGCFGOP_PREPEND, szPath); + AssertRCReturn(rc, rc); + + rc = RTPathAppend(szPath, sizeof(szPath), "VBoxDbgSyms/"); +#endif + AssertRCReturn(rc, rc); + rc = RTDbgCfgChangeString(pUVM->dbgf.s.hDbgCfg, RTDBGCFGPROP_PATH, RTDBGCFGOP_PREPEND, szPath); + AssertRCReturn(rc, rc); + + /* + * Create the standard address spaces. + */ + RTDBGAS hDbgAs; + rc = RTDbgAsCreate(&hDbgAs, 0, RTGCPTR_MAX, "Global"); + AssertRCReturn(rc, rc); + rc = DBGFR3AsAdd(pUVM, hDbgAs, NIL_RTPROCESS); + AssertRCReturn(rc, rc); + pUVM->dbgf.s.ahAsAliases[DBGF_AS_ALIAS_2_INDEX(DBGF_AS_GLOBAL)] = hDbgAs; + + RTDbgAsRetain(hDbgAs); + pUVM->dbgf.s.ahAsAliases[DBGF_AS_ALIAS_2_INDEX(DBGF_AS_KERNEL)] = hDbgAs; + + rc = RTDbgAsCreate(&hDbgAs, 0, RTGCPHYS_MAX, "Physical"); + AssertRCReturn(rc, rc); + rc = DBGFR3AsAdd(pUVM, hDbgAs, NIL_RTPROCESS); + AssertRCReturn(rc, rc); + pUVM->dbgf.s.ahAsAliases[DBGF_AS_ALIAS_2_INDEX(DBGF_AS_PHYS)] = hDbgAs; + + rc = RTDbgAsCreate(&hDbgAs, 0, RTRCPTR_MAX, "HyperRawMode"); + AssertRCReturn(rc, rc); + rc = DBGFR3AsAdd(pUVM, hDbgAs, NIL_RTPROCESS); + AssertRCReturn(rc, rc); + pUVM->dbgf.s.ahAsAliases[DBGF_AS_ALIAS_2_INDEX(DBGF_AS_RC)] = hDbgAs; + RTDbgAsRetain(hDbgAs); + pUVM->dbgf.s.ahAsAliases[DBGF_AS_ALIAS_2_INDEX(DBGF_AS_RC_AND_GC_GLOBAL)] = hDbgAs; + + rc = RTDbgAsCreate(&hDbgAs, 0, RTR0PTR_MAX, "HyperRing0"); + AssertRCReturn(rc, rc); + rc = DBGFR3AsAdd(pUVM, hDbgAs, NIL_RTPROCESS); + AssertRCReturn(rc, rc); + pUVM->dbgf.s.ahAsAliases[DBGF_AS_ALIAS_2_INDEX(DBGF_AS_R0)] = hDbgAs; + + return VINF_SUCCESS; +} + + +/** + * Callback used by dbgfR3AsTerm / RTAvlPVDestroy to release an address space. + * + * @returns 0. + * @param pNode The address space database node. + * @param pvIgnore NULL. + */ +static DECLCALLBACK(int) dbgfR3AsTermDestroyNode(PAVLPVNODECORE pNode, void *pvIgnore) +{ + PDBGFASDBNODE pDbNode = (PDBGFASDBNODE)pNode; + RTDbgAsRelease((RTDBGAS)pDbNode->HandleCore.Key); + pDbNode->HandleCore.Key = NIL_RTDBGAS; + /* Don't bother freeing it here as MM will free it soon and MM is much at + it when doing it wholesale instead of piecemeal. */ + NOREF(pvIgnore); + return 0; +} + + +/** + * Terminates the address space parts of DBGF. + * + * @param pUVM The user mode VM handle. + */ +void dbgfR3AsTerm(PUVM pUVM) +{ + /* + * Create the semaphore. + */ + int rc = RTSemRWDestroy(pUVM->dbgf.s.hAsDbLock); + AssertRC(rc); + pUVM->dbgf.s.hAsDbLock = NIL_RTSEMRW; + + /* + * Release all the address spaces. + */ + RTAvlPVDestroy(&pUVM->dbgf.s.AsHandleTree, dbgfR3AsTermDestroyNode, NULL); + for (size_t i = 0; i < RT_ELEMENTS(pUVM->dbgf.s.ahAsAliases); i++) + { + RTDbgAsRelease(pUVM->dbgf.s.ahAsAliases[i]); + pUVM->dbgf.s.ahAsAliases[i] = NIL_RTDBGAS; + } + + /* + * Release the reference to the debugging config. + */ + rc = RTDbgCfgRelease(pUVM->dbgf.s.hDbgCfg); + AssertRC(rc); +} + + +/** + * Relocates the RC address space. + * + * @param pUVM The user mode VM handle. + * @param offDelta The relocation delta. + */ +void dbgfR3AsRelocate(PUVM pUVM, RTGCUINTPTR offDelta) +{ + /* + * We will relocate the raw-mode context modules by offDelta if they have + * been injected into the DBGF_AS_RC map. + */ + if ( pUVM->dbgf.s.afAsAliasPopuplated[DBGF_AS_ALIAS_2_INDEX(DBGF_AS_RC)] + && offDelta != 0) + { + RTDBGAS hAs = pUVM->dbgf.s.ahAsAliases[DBGF_AS_ALIAS_2_INDEX(DBGF_AS_RC)]; + + /* Take a snapshot of the modules as we might have overlapping + addresses between the previous and new mapping. */ + RTDbgAsLockExcl(hAs); + uint32_t cModules = RTDbgAsModuleCount(hAs); + if (cModules > 0 && cModules < _4K) + { + struct DBGFASRELOCENTRY + { + RTDBGMOD hDbgMod; + RTRCPTR uOldAddr; + } *paEntries = (struct DBGFASRELOCENTRY *)RTMemTmpAllocZ(sizeof(paEntries[0]) * cModules); + if (paEntries) + { + /* Snapshot. */ + for (uint32_t i = 0; i < cModules; i++) + { + paEntries[i].hDbgMod = RTDbgAsModuleByIndex(hAs, i); + AssertLogRelMsg(paEntries[i].hDbgMod != NIL_RTDBGMOD, ("iModule=%#x\n", i)); + + RTDBGASMAPINFO aMappings[1] = { { 0, 0 } }; + uint32_t cMappings = 1; + int rc = RTDbgAsModuleQueryMapByIndex(hAs, i, &aMappings[0], &cMappings, 0 /*fFlags*/); + if (RT_SUCCESS(rc) && cMappings == 1 && aMappings[0].iSeg == NIL_RTDBGSEGIDX) + paEntries[i].uOldAddr = (RTRCPTR)aMappings[0].Address; + else + AssertLogRelMsgFailed(("iModule=%#x rc=%Rrc cMappings=%#x.\n", i, rc, cMappings)); + } + + /* Unlink them. */ + for (uint32_t i = 0; i < cModules; i++) + { + int rc = RTDbgAsModuleUnlink(hAs, paEntries[i].hDbgMod); + AssertLogRelMsg(RT_SUCCESS(rc), ("iModule=%#x rc=%Rrc hDbgMod=%p\n", i, rc, paEntries[i].hDbgMod)); + } + + /* Link them at the new locations. */ + for (uint32_t i = 0; i < cModules; i++) + { + RTRCPTR uNewAddr = paEntries[i].uOldAddr + offDelta; + int rc = RTDbgAsModuleLink(hAs, paEntries[i].hDbgMod, uNewAddr, + RTDBGASLINK_FLAGS_REPLACE); + AssertLogRelMsg(RT_SUCCESS(rc), + ("iModule=%#x rc=%Rrc hDbgMod=%p %RRv -> %RRv\n", i, rc, paEntries[i].hDbgMod, + paEntries[i].uOldAddr, uNewAddr)); + RTDbgModRelease(paEntries[i].hDbgMod); + } + + RTMemTmpFree(paEntries); + } + else + AssertLogRelMsgFailed(("No memory for %#x modules.\n", cModules)); + } + else + AssertLogRelMsgFailed(("cModules=%#x\n", cModules)); + RTDbgAsUnlockExcl(hAs); + } +} + + +/** + * Gets the IPRT debugging configuration handle (no refs retained). + * + * @returns Config handle or NIL_RTDBGCFG. + * @param pUVM The user mode VM handle. + */ +VMMR3DECL(RTDBGCFG) DBGFR3AsGetConfig(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, NIL_RTDBGCFG); + return pUVM->dbgf.s.hDbgCfg; +} + + +/** + * Adds the address space to the database. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param hDbgAs The address space handle. The reference of the caller + * will NOT be consumed. + * @param ProcId The process id or NIL_RTPROCESS. + */ +VMMR3DECL(int) DBGFR3AsAdd(PUVM pUVM, RTDBGAS hDbgAs, RTPROCESS ProcId) +{ + /* + * Input validation. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + const char *pszName = RTDbgAsName(hDbgAs); + if (!pszName) + return VERR_INVALID_HANDLE; + uint32_t cRefs = RTDbgAsRetain(hDbgAs); + if (cRefs == UINT32_MAX) + return VERR_INVALID_HANDLE; + + /* + * Allocate a tracking node. + */ + int rc = VERR_NO_MEMORY; + PDBGFASDBNODE pDbNode = (PDBGFASDBNODE)MMR3HeapAllocU(pUVM, MM_TAG_DBGF_AS, sizeof(*pDbNode)); + if (pDbNode) + { + pDbNode->HandleCore.Key = hDbgAs; + pDbNode->PidCore.Key = ProcId; + pDbNode->NameCore.pszString = pszName; + pDbNode->NameCore.cchString = strlen(pszName); + DBGF_AS_DB_LOCK_WRITE(pUVM); + if (RTStrSpaceInsert(&pUVM->dbgf.s.AsNameSpace, &pDbNode->NameCore)) + { + if (RTAvlPVInsert(&pUVM->dbgf.s.AsHandleTree, &pDbNode->HandleCore)) + { + DBGF_AS_DB_UNLOCK_WRITE(pUVM); + return VINF_SUCCESS; + } + + /* bail out */ + RTStrSpaceRemove(&pUVM->dbgf.s.AsNameSpace, pszName); + } + DBGF_AS_DB_UNLOCK_WRITE(pUVM); + MMR3HeapFree(pDbNode); + } + RTDbgAsRelease(hDbgAs); + return rc; +} + + +/** + * Delete an address space from the database. + * + * The address space must not be engaged as any of the standard aliases. + * + * @returns VBox status code. + * @retval VERR_SHARING_VIOLATION if in use as an alias. + * @retval VERR_NOT_FOUND if not found in the address space database. + * + * @param pUVM The user mode VM handle. + * @param hDbgAs The address space handle. Aliases are not allowed. + */ +VMMR3DECL(int) DBGFR3AsDelete(PUVM pUVM, RTDBGAS hDbgAs) +{ + /* + * Input validation. Retain the address space so it can be released outside + * the lock as well as validated. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + if (hDbgAs == NIL_RTDBGAS) + return VINF_SUCCESS; + uint32_t cRefs = RTDbgAsRetain(hDbgAs); + if (cRefs == UINT32_MAX) + return VERR_INVALID_HANDLE; + RTDbgAsRelease(hDbgAs); + + DBGF_AS_DB_LOCK_WRITE(pUVM); + + /* + * You cannot delete any of the aliases. + */ + for (size_t i = 0; i < RT_ELEMENTS(pUVM->dbgf.s.ahAsAliases); i++) + if (pUVM->dbgf.s.ahAsAliases[i] == hDbgAs) + { + DBGF_AS_DB_UNLOCK_WRITE(pUVM); + return VERR_SHARING_VIOLATION; + } + + /* + * Ok, try remove it from the database. + */ + PDBGFASDBNODE pDbNode = (PDBGFASDBNODE)RTAvlPVRemove(&pUVM->dbgf.s.AsHandleTree, hDbgAs); + if (!pDbNode) + { + DBGF_AS_DB_UNLOCK_WRITE(pUVM); + return VERR_NOT_FOUND; + } + RTStrSpaceRemove(&pUVM->dbgf.s.AsNameSpace, pDbNode->NameCore.pszString); + if (pDbNode->PidCore.Key != NIL_RTPROCESS) + RTAvlU32Remove(&pUVM->dbgf.s.AsPidTree, pDbNode->PidCore.Key); + + DBGF_AS_DB_UNLOCK_WRITE(pUVM); + + /* + * Free the resources. + */ + RTDbgAsRelease(hDbgAs); + MMR3HeapFree(pDbNode); + + return VINF_SUCCESS; +} + + +/** + * Changes an alias to point to a new address space. + * + * Not all the aliases can be changed, currently it's only DBGF_AS_GLOBAL + * and DBGF_AS_KERNEL. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param hAlias The alias to change. + * @param hAliasFor The address space hAlias should be an alias for. This + * can be an alias. The caller's reference to this address + * space will NOT be consumed. + */ +VMMR3DECL(int) DBGFR3AsSetAlias(PUVM pUVM, RTDBGAS hAlias, RTDBGAS hAliasFor) +{ + /* + * Input validation. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertMsgReturn(DBGF_AS_IS_ALIAS(hAlias), ("%p\n", hAlias), VERR_INVALID_PARAMETER); + AssertMsgReturn(!DBGF_AS_IS_FIXED_ALIAS(hAlias), ("%p\n", hAlias), VERR_INVALID_PARAMETER); + RTDBGAS hRealAliasFor = DBGFR3AsResolveAndRetain(pUVM, hAliasFor); + if (hRealAliasFor == NIL_RTDBGAS) + return VERR_INVALID_HANDLE; + + /* + * Make sure the handle is already in the database. + */ + int rc = VERR_NOT_FOUND; + DBGF_AS_DB_LOCK_WRITE(pUVM); + if (RTAvlPVGet(&pUVM->dbgf.s.AsHandleTree, hRealAliasFor)) + { + /* + * Update the alias table and release the current address space. + */ + RTDBGAS hAsOld; + ASMAtomicXchgHandle(&pUVM->dbgf.s.ahAsAliases[DBGF_AS_ALIAS_2_INDEX(hAlias)], hRealAliasFor, &hAsOld); + uint32_t cRefs = RTDbgAsRelease(hAsOld); + Assert(cRefs > 0); Assert(cRefs != UINT32_MAX); NOREF(cRefs); + rc = VINF_SUCCESS; + } + else + RTDbgAsRelease(hRealAliasFor); + DBGF_AS_DB_UNLOCK_WRITE(pUVM); + + return rc; +} + + +/** + * @callback_method_impl{FNPDMR3ENUM} + */ +static DECLCALLBACK(int) dbgfR3AsLazyPopulateR0Callback(PVM pVM, const char *pszFilename, const char *pszName, + RTUINTPTR ImageBase, size_t cbImage, PDMLDRCTX enmCtx, void *pvArg) +{ + NOREF(pVM); NOREF(cbImage); + + /* Only ring-0 modules. */ + if (enmCtx == PDMLDRCTX_RING_0) + { + RTDBGMOD hDbgMod; + int rc = RTDbgModCreateFromImage(&hDbgMod, pszFilename, pszName, RTLDRARCH_HOST, pVM->pUVM->dbgf.s.hDbgCfg); + if (RT_SUCCESS(rc)) + { + rc = RTDbgAsModuleLink((RTDBGAS)pvArg, hDbgMod, ImageBase, 0 /*fFlags*/); + if (RT_FAILURE(rc)) + LogRel(("DBGF: Failed to link module \"%s\" into DBGF_AS_R0 at %RTptr: %Rrc\n", + pszName, ImageBase, rc)); + } + else + LogRel(("DBGF: RTDbgModCreateFromImage failed with rc=%Rrc for module \"%s\" (%s)\n", + rc, pszName, pszFilename)); + } + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{FNPDMR3ENUM} + */ +static DECLCALLBACK(int) dbgfR3AsLazyPopulateRCCallback(PVM pVM, const char *pszFilename, const char *pszName, + RTUINTPTR ImageBase, size_t cbImage, PDMLDRCTX enmCtx, void *pvArg) +{ + NOREF(pVM); NOREF(cbImage); + + /* Only raw-mode modules. */ + if (enmCtx == PDMLDRCTX_RAW_MODE) + { + RTDBGMOD hDbgMod; + int rc = RTDbgModCreateFromImage(&hDbgMod, pszFilename, pszName, RTLDRARCH_X86_32, pVM->pUVM->dbgf.s.hDbgCfg); + if (RT_SUCCESS(rc)) + { + rc = RTDbgAsModuleLink((RTDBGAS)pvArg, hDbgMod, ImageBase, 0 /*fFlags*/); + if (RT_FAILURE(rc)) + LogRel(("DBGF: Failed to link module \"%s\" into DBGF_AS_RC at %RTptr: %Rrc\n", + pszName, ImageBase, rc)); + } + else + LogRel(("DBGF: RTDbgModCreateFromImage failed with rc=%Rrc for module \"%s\" (%s)\n", + rc, pszName, pszFilename)); + } + return VINF_SUCCESS; +} + + +/** + * Lazily populates the specified address space. + * + * @param pUVM The user mode VM handle. + * @param hAlias The alias. + */ +static void dbgfR3AsLazyPopulate(PUVM pUVM, RTDBGAS hAlias) +{ + DBGF_AS_DB_LOCK_WRITE(pUVM); + uintptr_t iAlias = DBGF_AS_ALIAS_2_INDEX(hAlias); + if (!pUVM->dbgf.s.afAsAliasPopuplated[iAlias]) + { + RTDBGAS hDbgAs = pUVM->dbgf.s.ahAsAliases[iAlias]; + if (hAlias == DBGF_AS_R0 && pUVM->pVM) + PDMR3LdrEnumModules(pUVM->pVM, dbgfR3AsLazyPopulateR0Callback, hDbgAs); + else if (hAlias == DBGF_AS_RC && pUVM->pVM && VM_IS_RAW_MODE_ENABLED(pUVM->pVM)) + { + LogRel(("DBGF: Lazy init of RC address space\n")); + PDMR3LdrEnumModules(pUVM->pVM, dbgfR3AsLazyPopulateRCCallback, hDbgAs); +#ifdef VBOX_WITH_RAW_MODE + PATMR3DbgPopulateAddrSpace(pUVM->pVM, hDbgAs); +#endif + } + else if (hAlias == DBGF_AS_PHYS && pUVM->pVM) + { + /** @todo Lazy load pc and vga bios symbols or the EFI stuff. */ + } + + pUVM->dbgf.s.afAsAliasPopuplated[iAlias] = true; + } + DBGF_AS_DB_UNLOCK_WRITE(pUVM); +} + + +/** + * Resolves the address space handle into a real handle if it's an alias. + * + * @returns Real address space handle. NIL_RTDBGAS if invalid handle. + * + * @param pUVM The user mode VM handle. + * @param hAlias The possibly address space alias. + * + * @remarks Doesn't take any locks. + */ +VMMR3DECL(RTDBGAS) DBGFR3AsResolve(PUVM pUVM, RTDBGAS hAlias) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, NULL); + AssertCompileNS(NIL_RTDBGAS == (RTDBGAS)0); + + uintptr_t iAlias = DBGF_AS_ALIAS_2_INDEX(hAlias); + if (iAlias < DBGF_AS_COUNT) + ASMAtomicReadHandle(&pUVM->dbgf.s.ahAsAliases[iAlias], &hAlias); + return hAlias; +} + + +/** + * Resolves the address space handle into a real handle if it's an alias, + * and retains whatever it is. + * + * @returns Real address space handle. NIL_RTDBGAS if invalid handle. + * + * @param pUVM The user mode VM handle. + * @param hAlias The possibly address space alias. + */ +VMMR3DECL(RTDBGAS) DBGFR3AsResolveAndRetain(PUVM pUVM, RTDBGAS hAlias) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, NULL); + AssertCompileNS(NIL_RTDBGAS == (RTDBGAS)0); + + uint32_t cRefs; + uintptr_t iAlias = DBGF_AS_ALIAS_2_INDEX(hAlias); + if (iAlias < DBGF_AS_COUNT) + { + if (DBGF_AS_IS_FIXED_ALIAS(hAlias)) + { + /* Perform lazy address space population. */ + if (!pUVM->dbgf.s.afAsAliasPopuplated[iAlias]) + dbgfR3AsLazyPopulate(pUVM, hAlias); + + /* Won't ever change, no need to grab the lock. */ + hAlias = pUVM->dbgf.s.ahAsAliases[iAlias]; + cRefs = RTDbgAsRetain(hAlias); + } + else + { + /* May change, grab the lock so we can read it safely. */ + DBGF_AS_DB_LOCK_READ(pUVM); + hAlias = pUVM->dbgf.s.ahAsAliases[iAlias]; + cRefs = RTDbgAsRetain(hAlias); + DBGF_AS_DB_UNLOCK_READ(pUVM); + } + } + else + /* Not an alias, just retain it. */ + cRefs = RTDbgAsRetain(hAlias); + + return cRefs != UINT32_MAX ? hAlias : NIL_RTDBGAS; +} + + +/** + * Query an address space by name. + * + * @returns Retained address space handle if found, NIL_RTDBGAS if not. + * + * @param pUVM The user mode VM handle. + * @param pszName The name. + */ +VMMR3DECL(RTDBGAS) DBGFR3AsQueryByName(PUVM pUVM, const char *pszName) +{ + /* + * Validate the input. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, NIL_RTDBGAS); + AssertPtrReturn(pszName, NIL_RTDBGAS); + AssertReturn(*pszName, NIL_RTDBGAS); + + /* + * Look it up in the string space and retain the result. + */ + RTDBGAS hDbgAs = NIL_RTDBGAS; + DBGF_AS_DB_LOCK_READ(pUVM); + + PRTSTRSPACECORE pNode = RTStrSpaceGet(&pUVM->dbgf.s.AsNameSpace, pszName); + if (pNode) + { + PDBGFASDBNODE pDbNode = RT_FROM_MEMBER(pNode, DBGFASDBNODE, NameCore); + hDbgAs = (RTDBGAS)pDbNode->HandleCore.Key; + uint32_t cRefs = RTDbgAsRetain(hDbgAs); + if (RT_UNLIKELY(cRefs == UINT32_MAX)) + hDbgAs = NIL_RTDBGAS; + } + + DBGF_AS_DB_UNLOCK_READ(pUVM); + return hDbgAs; +} + + +/** + * Query an address space by process ID. + * + * @returns Retained address space handle if found, NIL_RTDBGAS if not. + * + * @param pUVM The user mode VM handle. + * @param ProcId The process ID. + */ +VMMR3DECL(RTDBGAS) DBGFR3AsQueryByPid(PUVM pUVM, RTPROCESS ProcId) +{ + /* + * Validate the input. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, NIL_RTDBGAS); + AssertReturn(ProcId != NIL_RTPROCESS, NIL_RTDBGAS); + + /* + * Look it up in the PID tree and retain the result. + */ + RTDBGAS hDbgAs = NIL_RTDBGAS; + DBGF_AS_DB_LOCK_READ(pUVM); + + PAVLU32NODECORE pNode = RTAvlU32Get(&pUVM->dbgf.s.AsPidTree, ProcId); + if (pNode) + { + PDBGFASDBNODE pDbNode = RT_FROM_MEMBER(pNode, DBGFASDBNODE, PidCore); + hDbgAs = (RTDBGAS)pDbNode->HandleCore.Key; + uint32_t cRefs = RTDbgAsRetain(hDbgAs); + if (RT_UNLIKELY(cRefs == UINT32_MAX)) + hDbgAs = NIL_RTDBGAS; + } + DBGF_AS_DB_UNLOCK_READ(pUVM); + + return hDbgAs; +} + +#if 0 /* unused */ + +/** + * Searches for the file in the path. + * + * The file is first tested without any path modification, then we walk the path + * looking in each directory. + * + * @returns VBox status code. + * @param pszFilename The file to search for. + * @param pszPath The search path. + * @param pfnOpen The open callback function. + * @param pvUser User argument for the callback. + */ +static int dbgfR3AsSearchPath(const char *pszFilename, const char *pszPath, PFNDBGFR3ASSEARCHOPEN pfnOpen, void *pvUser) +{ + char szFound[RTPATH_MAX]; + + /* Check the filename length. */ + size_t const cchFilename = strlen(pszFilename); + if (cchFilename >= sizeof(szFound)) + return VERR_FILENAME_TOO_LONG; + const char *pszName = RTPathFilename(pszFilename); + if (!pszName) + return VERR_IS_A_DIRECTORY; + size_t const cchName = strlen(pszName); + + /* + * Try default location first. + */ + memcpy(szFound, pszFilename, cchFilename + 1); + int rc = pfnOpen(szFound, pvUser); + if (RT_SUCCESS(rc)) + return rc; + + /* + * Walk the search path. + */ + const char *psz = pszPath; + while (*psz) + { + /* Skip leading blanks - no directories with leading spaces, thank you. */ + while (RT_C_IS_BLANK(*psz)) + psz++; + + /* Find the end of this element. */ + const char *pszNext; + const char *pszEnd = strchr(psz, ';'); + if (!pszEnd) + pszEnd = pszNext = strchr(psz, '\0'); + else + pszNext = pszEnd + 1; + if (pszEnd != psz) + { + size_t const cch = pszEnd - psz; + if (cch + 1 + cchName < sizeof(szFound)) + { + /** @todo RTPathCompose, RTPathComposeN(). This code isn't right + * for 'E:' on DOS systems. It may also create unwanted double slashes. */ + memcpy(szFound, psz, cch); + szFound[cch] = '/'; + memcpy(szFound + cch + 1, pszName, cchName + 1); + int rc2 = pfnOpen(szFound, pvUser); + if (RT_SUCCESS(rc2)) + return rc2; + if ( rc2 != rc + && ( rc == VERR_FILE_NOT_FOUND + || rc == VERR_OPEN_FAILED)) + rc = rc2; + } + } + + /* advance */ + psz = pszNext; + } + + /* + * Walk the path once again, this time do a depth search. + */ + /** @todo do a depth search using the specified path. */ + + /* failed */ + return rc; +} + + +/** + * Same as dbgfR3AsSearchEnv, except that the path is taken from the environment. + * + * If the environment variable doesn't exist, the current directory is searched + * instead. + * + * @returns VBox status code. + * @param pszFilename The filename. + * @param pszEnvVar The environment variable name. + * @param pfnOpen The open callback function. + * @param pvUser User argument for the callback. + */ +static int dbgfR3AsSearchEnvPath(const char *pszFilename, const char *pszEnvVar, PFNDBGFR3ASSEARCHOPEN pfnOpen, void *pvUser) +{ + int rc; + char *pszPath = RTEnvDupEx(RTENV_DEFAULT, pszEnvVar); + if (pszPath) + { + rc = dbgfR3AsSearchPath(pszFilename, pszPath, pfnOpen, pvUser); + RTStrFree(pszPath); + } + else + rc = dbgfR3AsSearchPath(pszFilename, ".", pfnOpen, pvUser); + return rc; +} + + +/** + * Same as dbgfR3AsSearchEnv, except that the path is taken from the DBGF config + * (CFGM). + * + * Nothing is done if the CFGM variable isn't set. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszFilename The filename. + * @param pszCfgValue The name of the config variable (under /DBGF/). + * @param pfnOpen The open callback function. + * @param pvUser User argument for the callback. + */ +static int dbgfR3AsSearchCfgPath(PUVM pUVM, const char *pszFilename, const char *pszCfgValue, + PFNDBGFR3ASSEARCHOPEN pfnOpen, void *pvUser) +{ + char *pszPath; + int rc = CFGMR3QueryStringAllocDef(CFGMR3GetChild(CFGMR3GetRootU(pUVM), "/DBGF"), pszCfgValue, &pszPath, NULL); + if (RT_FAILURE(rc)) + return rc; + if (!pszPath) + return VERR_FILE_NOT_FOUND; + rc = dbgfR3AsSearchPath(pszFilename, pszPath, pfnOpen, pvUser); + MMR3HeapFree(pszPath); + return rc; +} + +#endif /* unused */ + + +/** + * Load symbols from an executable module into the specified address space. + * + * If an module exist at the specified address it will be replaced by this + * call, otherwise a new module is created. + * + * @returns VBox status code. + * + * @param pUVM The user mode VM handle. + * @param hDbgAs The address space. + * @param pszFilename The filename of the executable module. + * @param pszModName The module name. If NULL, then then the file name + * base is used (no extension or nothing). + * @param enmArch The desired architecture, use RTLDRARCH_WHATEVER if + * it's not relevant or known. + * @param pModAddress The load address of the module. + * @param iModSeg The segment to load, pass NIL_RTDBGSEGIDX to load + * the whole image. + * @param fFlags For DBGFR3AsLinkModule, see RTDBGASLINK_FLAGS_*. + */ +VMMR3DECL(int) DBGFR3AsLoadImage(PUVM pUVM, RTDBGAS hDbgAs, const char *pszFilename, const char *pszModName, RTLDRARCH enmArch, + PCDBGFADDRESS pModAddress, RTDBGSEGIDX iModSeg, uint32_t fFlags) +{ + /* + * Validate input + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertPtrReturn(pszFilename, VERR_INVALID_POINTER); + AssertReturn(*pszFilename, VERR_INVALID_PARAMETER); + AssertReturn(DBGFR3AddrIsValid(pUVM, pModAddress), VERR_INVALID_PARAMETER); + AssertReturn(!(fFlags & ~RTDBGASLINK_FLAGS_VALID_MASK), VERR_INVALID_PARAMETER); + RTDBGAS hRealAS = DBGFR3AsResolveAndRetain(pUVM, hDbgAs); + if (hRealAS == NIL_RTDBGAS) + return VERR_INVALID_HANDLE; + + RTDBGMOD hDbgMod; + int rc = RTDbgModCreateFromImage(&hDbgMod, pszFilename, pszModName, enmArch, pUVM->dbgf.s.hDbgCfg); + if (RT_SUCCESS(rc)) + { + rc = DBGFR3AsLinkModule(pUVM, hRealAS, hDbgMod, pModAddress, iModSeg, fFlags & RTDBGASLINK_FLAGS_VALID_MASK); + if (RT_FAILURE(rc)) + RTDbgModRelease(hDbgMod); + } + + RTDbgAsRelease(hRealAS); + return rc; +} + + +/** + * Load symbols from a map file into a module at the specified address space. + * + * If an module exist at the specified address it will be replaced by this + * call, otherwise a new module is created. + * + * @returns VBox status code. + * + * @param pUVM The user mode VM handle. + * @param hDbgAs The address space. + * @param pszFilename The map file. + * @param pszModName The module name. If NULL, then then the file name + * base is used (no extension or nothing). + * @param pModAddress The load address of the module. + * @param iModSeg The segment to load, pass NIL_RTDBGSEGIDX to load + * the whole image. + * @param uSubtrahend Value to to subtract from the symbols in the map + * file. This is useful for the linux System.map and + * /proc/kallsyms. + * @param fFlags Flags reserved for future extensions, must be 0. + */ +VMMR3DECL(int) DBGFR3AsLoadMap(PUVM pUVM, RTDBGAS hDbgAs, const char *pszFilename, const char *pszModName, + PCDBGFADDRESS pModAddress, RTDBGSEGIDX iModSeg, RTGCUINTPTR uSubtrahend, uint32_t fFlags) +{ + /* + * Validate input + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertPtrReturn(pszFilename, VERR_INVALID_POINTER); + AssertReturn(*pszFilename, VERR_INVALID_PARAMETER); + AssertReturn(DBGFR3AddrIsValid(pUVM, pModAddress), VERR_INVALID_PARAMETER); + AssertReturn(fFlags == 0, VERR_INVALID_PARAMETER); + RTDBGAS hRealAS = DBGFR3AsResolveAndRetain(pUVM, hDbgAs); + if (hRealAS == NIL_RTDBGAS) + return VERR_INVALID_HANDLE; + + RTDBGMOD hDbgMod; + int rc = RTDbgModCreateFromMap(&hDbgMod, pszFilename, pszModName, uSubtrahend, pUVM->dbgf.s.hDbgCfg); + if (RT_SUCCESS(rc)) + { + rc = DBGFR3AsLinkModule(pUVM, hRealAS, hDbgMod, pModAddress, iModSeg, 0); + if (RT_FAILURE(rc)) + RTDbgModRelease(hDbgMod); + } + + RTDbgAsRelease(hRealAS); + return rc; +} + + +/** + * Wrapper around RTDbgAsModuleLink, RTDbgAsModuleLinkSeg and DBGFR3AsResolve. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param hDbgAs The address space handle. + * @param hMod The module handle. + * @param pModAddress The link address. + * @param iModSeg The segment to link, NIL_RTDBGSEGIDX for the entire image. + * @param fFlags Flags to pass to the link functions, see RTDBGASLINK_FLAGS_*. + */ +VMMR3DECL(int) DBGFR3AsLinkModule(PUVM pUVM, RTDBGAS hDbgAs, RTDBGMOD hMod, PCDBGFADDRESS pModAddress, + RTDBGSEGIDX iModSeg, uint32_t fFlags) +{ + /* + * Input validation. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertReturn(DBGFR3AddrIsValid(pUVM, pModAddress), VERR_INVALID_PARAMETER); + RTDBGAS hRealAS = DBGFR3AsResolveAndRetain(pUVM, hDbgAs); + if (hRealAS == NIL_RTDBGAS) + return VERR_INVALID_HANDLE; + + /* + * Do the job. + */ + int rc; + if (iModSeg == NIL_RTDBGSEGIDX) + rc = RTDbgAsModuleLink(hRealAS, hMod, pModAddress->FlatPtr, fFlags); + else + rc = RTDbgAsModuleLinkSeg(hRealAS, hMod, iModSeg, pModAddress->FlatPtr, fFlags); + + RTDbgAsRelease(hRealAS); + return rc; +} + + +/** + * Wrapper around RTDbgAsModuleByName and RTDbgAsModuleUnlink. + * + * Unlinks all mappings matching the given module name. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param hDbgAs The address space handle. + * @param pszModName The name of the module to unlink. + */ +VMMR3DECL(int) DBGFR3AsUnlinkModuleByName(PUVM pUVM, RTDBGAS hDbgAs, const char *pszModName) +{ + /* + * Input validation. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + RTDBGAS hRealAS = DBGFR3AsResolveAndRetain(pUVM, hDbgAs); + if (hRealAS == NIL_RTDBGAS) + return VERR_INVALID_HANDLE; + + /* + * Do the job. + */ + RTDBGMOD hMod; + int rc = RTDbgAsModuleByName(hRealAS, pszModName, 0, &hMod); + if (RT_SUCCESS(rc)) + { + for (;;) + { + rc = RTDbgAsModuleUnlink(hRealAS, hMod); + RTDbgModRelease(hMod); + if (RT_FAILURE(rc)) + break; + rc = RTDbgAsModuleByName(hRealAS, pszModName, 0, &hMod); + if (RT_FAILURE_NP(rc)) + { + if (rc == VERR_NOT_FOUND) + rc = VINF_SUCCESS; + break; + } + } + } + + RTDbgAsRelease(hRealAS); + return rc; +} + + +/** + * Adds the module name to the symbol name. + * + * @param pSymbol The symbol info (in/out). + * @param hMod The module handle. + */ +static void dbgfR3AsSymbolJoinNames(PRTDBGSYMBOL pSymbol, RTDBGMOD hMod) +{ + /* Figure the lengths, adjust them if the result is too long. */ + const char *pszModName = RTDbgModName(hMod); + size_t cchModName = strlen(pszModName); + size_t cchSymbol = strlen(pSymbol->szName); + if (cchModName + 1 + cchSymbol >= sizeof(pSymbol->szName)) + { + if (cchModName >= sizeof(pSymbol->szName) / 4) + cchModName = sizeof(pSymbol->szName) / 4; + if (cchModName + 1 + cchSymbol >= sizeof(pSymbol->szName)) + cchSymbol = sizeof(pSymbol->szName) - cchModName - 2; + Assert(cchModName + 1 + cchSymbol < sizeof(pSymbol->szName)); + } + + /* Do the moving and copying. */ + memmove(&pSymbol->szName[cchModName + 1], &pSymbol->szName[0], cchSymbol + 1); + memcpy(&pSymbol->szName[0], pszModName, cchModName); + pSymbol->szName[cchModName] = '!'; +} + + +/** + * Query a symbol by address. + * + * The returned symbol is the one we consider closes to the specified address. + * + * @returns VBox status code. See RTDbgAsSymbolByAddr. + * + * @param pUVM The user mode VM handle. + * @param hDbgAs The address space handle. + * @param pAddress The address to lookup. + * @param fFlags One of the RTDBGSYMADDR_FLAGS_XXX flags. + * @param poffDisp Where to return the distance between the returned + * symbol and pAddress. Optional. + * @param pSymbol Where to return the symbol information. The returned + * symbol name will be prefixed by the module name as + * far as space allows. + * @param phMod Where to return the module handle. Optional. + */ +VMMR3DECL(int) DBGFR3AsSymbolByAddr(PUVM pUVM, RTDBGAS hDbgAs, PCDBGFADDRESS pAddress, uint32_t fFlags, + PRTGCINTPTR poffDisp, PRTDBGSYMBOL pSymbol, PRTDBGMOD phMod) +{ + /* + * Implement the special address space aliases the lazy way. + */ + if (hDbgAs == DBGF_AS_RC_AND_GC_GLOBAL) + { + int rc = DBGFR3AsSymbolByAddr(pUVM, DBGF_AS_RC, pAddress, fFlags, poffDisp, pSymbol, phMod); + if (RT_FAILURE(rc)) + rc = DBGFR3AsSymbolByAddr(pUVM, DBGF_AS_GLOBAL, pAddress, fFlags, poffDisp, pSymbol, phMod); + return rc; + } + + /* + * Input validation. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertReturn(DBGFR3AddrIsValid(pUVM, pAddress), VERR_INVALID_PARAMETER); + AssertPtrNullReturn(poffDisp, VERR_INVALID_POINTER); + AssertPtrReturn(pSymbol, VERR_INVALID_POINTER); + AssertPtrNullReturn(phMod, VERR_INVALID_POINTER); + if (poffDisp) + *poffDisp = 0; + if (phMod) + *phMod = NIL_RTDBGMOD; + RTDBGAS hRealAS = DBGFR3AsResolveAndRetain(pUVM, hDbgAs); + if (hRealAS == NIL_RTDBGAS) + return VERR_INVALID_HANDLE; + + /* + * Do the lookup. + */ + RTDBGMOD hMod; + int rc = RTDbgAsSymbolByAddr(hRealAS, pAddress->FlatPtr, fFlags, poffDisp, pSymbol, &hMod); + if (RT_SUCCESS(rc)) + { + dbgfR3AsSymbolJoinNames(pSymbol, hMod); + if (!phMod) + RTDbgModRelease(hMod); + } + + RTDbgAsRelease(hRealAS); + return rc; +} + + +/** + * Convenience function that combines RTDbgSymbolDup and DBGFR3AsSymbolByAddr. + * + * @returns Pointer to the symbol on success. This must be free using + * RTDbgSymbolFree(). NULL is returned if not found or any error + * occurs. + * + * @param pUVM The user mode VM handle. + * @param hDbgAs See DBGFR3AsSymbolByAddr. + * @param pAddress See DBGFR3AsSymbolByAddr. + * @param fFlags See DBGFR3AsSymbolByAddr. + * @param poffDisp See DBGFR3AsSymbolByAddr. + * @param phMod See DBGFR3AsSymbolByAddr. + */ +VMMR3DECL(PRTDBGSYMBOL) DBGFR3AsSymbolByAddrA(PUVM pUVM, RTDBGAS hDbgAs, PCDBGFADDRESS pAddress, uint32_t fFlags, + PRTGCINTPTR poffDisp, PRTDBGMOD phMod) +{ + RTDBGSYMBOL SymInfo; + int rc = DBGFR3AsSymbolByAddr(pUVM, hDbgAs, pAddress, fFlags, poffDisp, &SymInfo, phMod); + if (RT_SUCCESS(rc)) + return RTDbgSymbolDup(&SymInfo); + return NULL; +} + + +/** + * Query a symbol by name. + * + * The symbol can be prefixed by a module name pattern to scope the search. The + * pattern is a simple string pattern with '*' and '?' as wild chars. See + * RTStrSimplePatternMatch(). + * + * @returns VBox status code. See RTDbgAsSymbolByAddr. + * + * @param pUVM The user mode VM handle. + * @param hDbgAs The address space handle. + * @param pszSymbol The symbol to search for, maybe prefixed by a + * module pattern. + * @param pSymbol Where to return the symbol information. + * The returned symbol name will be prefixed by + * the module name as far as space allows. + * @param phMod Where to return the module handle. Optional. + */ +VMMR3DECL(int) DBGFR3AsSymbolByName(PUVM pUVM, RTDBGAS hDbgAs, const char *pszSymbol, + PRTDBGSYMBOL pSymbol, PRTDBGMOD phMod) +{ + /* + * Implement the special address space aliases the lazy way. + */ + if (hDbgAs == DBGF_AS_RC_AND_GC_GLOBAL) + { + int rc = DBGFR3AsSymbolByName(pUVM, DBGF_AS_RC, pszSymbol, pSymbol, phMod); + if (RT_FAILURE(rc)) + rc = DBGFR3AsSymbolByName(pUVM, DBGF_AS_GLOBAL, pszSymbol, pSymbol, phMod); + return rc; + } + + /* + * Input validation. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertPtrReturn(pSymbol, VERR_INVALID_POINTER); + AssertPtrNullReturn(phMod, VERR_INVALID_POINTER); + if (phMod) + *phMod = NIL_RTDBGMOD; + RTDBGAS hRealAS = DBGFR3AsResolveAndRetain(pUVM, hDbgAs); + if (hRealAS == NIL_RTDBGAS) + return VERR_INVALID_HANDLE; + + + /* + * Do the lookup. + */ + RTDBGMOD hMod; + int rc = RTDbgAsSymbolByName(hRealAS, pszSymbol, pSymbol, &hMod); + if (RT_SUCCESS(rc)) + { + dbgfR3AsSymbolJoinNames(pSymbol, hMod); + if (!phMod) + RTDbgModRelease(hMod); + } + + RTDbgAsRelease(hRealAS); + return rc; +} + + +VMMR3DECL(int) DBGFR3AsLineByAddr(PUVM pUVM, RTDBGAS hDbgAs, PCDBGFADDRESS pAddress, + PRTGCINTPTR poffDisp, PRTDBGLINE pLine, PRTDBGMOD phMod) +{ + /* + * Implement the special address space aliases the lazy way. + */ + if (hDbgAs == DBGF_AS_RC_AND_GC_GLOBAL) + { + int rc = DBGFR3AsLineByAddr(pUVM, DBGF_AS_RC, pAddress, poffDisp, pLine, phMod); + if (RT_FAILURE(rc)) + rc = DBGFR3AsLineByAddr(pUVM, DBGF_AS_GLOBAL, pAddress, poffDisp, pLine, phMod); + return rc; + } + + /* + * Input validation. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertReturn(DBGFR3AddrIsValid(pUVM, pAddress), VERR_INVALID_PARAMETER); + AssertPtrNullReturn(poffDisp, VERR_INVALID_POINTER); + AssertPtrReturn(pLine, VERR_INVALID_POINTER); + AssertPtrNullReturn(phMod, VERR_INVALID_POINTER); + if (poffDisp) + *poffDisp = 0; + if (phMod) + *phMod = NIL_RTDBGMOD; + RTDBGAS hRealAS = DBGFR3AsResolveAndRetain(pUVM, hDbgAs); + if (hRealAS == NIL_RTDBGAS) + return VERR_INVALID_HANDLE; + + /* + * Do the lookup. + */ + int rc = RTDbgAsLineByAddr(hRealAS, pAddress->FlatPtr, poffDisp, pLine, phMod); + + RTDbgAsRelease(hRealAS); + return rc; +} + + +VMMR3DECL(PRTDBGLINE) DBGFR3AsLineByAddrA(PUVM pUVM, RTDBGAS hDbgAs, PCDBGFADDRESS pAddress, + PRTGCINTPTR poffDisp, PRTDBGMOD phMod) +{ + RTDBGLINE Line; + int rc = DBGFR3AsLineByAddr(pUVM, hDbgAs, pAddress, poffDisp, &Line, phMod); + if (RT_SUCCESS(rc)) + return RTDbgLineDup(&Line); + return NULL; +} + diff --git a/src/VBox/VMM/VMMR3/DBGFBp.cpp b/src/VBox/VMM/VMMR3/DBGFBp.cpp new file mode 100644 index 00000000..41d50bb0 --- /dev/null +++ b/src/VBox/VMM/VMMR3/DBGFBp.cpp @@ -0,0 +1,1450 @@ +/* $Id: DBGFBp.cpp $ */ +/** @file + * DBGF - Debugger Facility, Breakpoint Management. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DBGF +#include +#include +#ifdef VBOX_WITH_REM +# include +#else +# include +#endif +#include +#include +#include +#include "DBGFInternal.h" +#include +#include + +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * DBGF INT3-breakpoint set callback arguments. + */ +typedef struct DBGFBPINT3ARGS +{ + /** The source virtual CPU ID (used for breakpoint address resolution). */ + VMCPUID idSrcCpu; + /** The breakpoint address. */ + PCDBGFADDRESS pAddress; + /** The hit count at which the breakpoint starts triggering. */ + uint64_t iHitTrigger; + /** The hit count at which disables the breakpoint. */ + uint64_t iHitDisable; + /** Where to store the breakpoint Id (optional). */ + uint32_t *piBp; +} DBGFBPINT3ARGS; +/** Pointer to a DBGF INT3 breakpoint set callback argument. */ +typedef DBGFBPINT3ARGS *PDBGFBPINT3ARGS; + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +RT_C_DECLS_BEGIN +static int dbgfR3BpRegArm(PVM pVM, PDBGFBP pBp); +static int dbgfR3BpInt3Arm(PVM pVM, PDBGFBP pBp); +RT_C_DECLS_END + + + +/** + * Initialize the breakpoint stuff. + * + * @returns VINF_SUCCESS + * @param pVM The cross context VM structure. + */ +int dbgfR3BpInit(PVM pVM) +{ + /* + * Init structures. + */ + unsigned i; + for (i = 0; i < RT_ELEMENTS(pVM->dbgf.s.aHwBreakpoints); i++) + { + pVM->dbgf.s.aHwBreakpoints[i].iBp = i; + pVM->dbgf.s.aHwBreakpoints[i].enmType = DBGFBPTYPE_FREE; + pVM->dbgf.s.aHwBreakpoints[i].u.Reg.iReg = i; + } + + for (i = 0; i < RT_ELEMENTS(pVM->dbgf.s.aBreakpoints); i++) + { + pVM->dbgf.s.aBreakpoints[i].iBp = i + RT_ELEMENTS(pVM->dbgf.s.aHwBreakpoints); + pVM->dbgf.s.aBreakpoints[i].enmType = DBGFBPTYPE_FREE; + } + + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[idCpu]; + pVCpu->dbgf.s.iActiveBp = ~0U; + } + + /* + * Register saved state. + */ + /** @todo */ + + return VINF_SUCCESS; +} + + + +/** + * Allocate a breakpoint. + * + * @returns Pointer to the allocated breakpoint. + * @returns NULL if we're out of breakpoints. + * @param pVM The cross context VM structure. + * @param enmType The type to allocate. + */ +static PDBGFBP dbgfR3BpAlloc(PVM pVM, DBGFBPTYPE enmType) +{ + /* + * Determine which array to search and where in the array to start + * searching (latter for grouping similar BPs, reducing runtime overhead). + */ + unsigned iStart; + unsigned cBps; + PDBGFBP paBps; + switch (enmType) + { + case DBGFBPTYPE_REG: + cBps = RT_ELEMENTS(pVM->dbgf.s.aHwBreakpoints); + paBps = &pVM->dbgf.s.aHwBreakpoints[0]; + iStart = 0; + break; + + case DBGFBPTYPE_INT3: + case DBGFBPTYPE_REM: + case DBGFBPTYPE_PORT_IO: + case DBGFBPTYPE_MMIO: + cBps = RT_ELEMENTS(pVM->dbgf.s.aBreakpoints); + paBps = &pVM->dbgf.s.aBreakpoints[0]; + if (enmType == DBGFBPTYPE_PORT_IO) + iStart = cBps / 4 * 2; + else if (enmType == DBGFBPTYPE_MMIO) + iStart = cBps / 4 * 1; + else if (enmType == DBGFBPTYPE_REM) + iStart = cBps / 4 * 3; + else + iStart = 0; + break; + + default: + AssertMsgFailed(("enmType=%d\n", enmType)); + return NULL; + } + + /* + * Search for a free breakpoint entry. + */ + unsigned iBp; + for (iBp = iStart; iBp < cBps; iBp++) + if (paBps[iBp].enmType == DBGFBPTYPE_FREE) + break; + if (iBp >= cBps && iStart != 0) + for (iBp = 0; iBp < cBps; iBp++) + if (paBps[iBp].enmType == DBGFBPTYPE_FREE) + break; + if (iBp < cBps) + { + /* + * Return what we found. + */ + paBps[iBp].fEnabled = false; + paBps[iBp].cHits = 0; + paBps[iBp].enmType = enmType; + return &paBps[iBp]; + } + + LogFlow(("dbgfR3BpAlloc: returns NULL - we're out of breakpoint slots! cBps=%u\n", cBps)); + return NULL; +} + + +/** + * Updates the search optimization structure for enabled breakpoints of the + * specified type. + * + * @returns VINF_SUCCESS. + * @param pVM The cross context VM structure. + * @param enmType The breakpoint type. + * @param pOpt The breakpoint optimization structure to update. + */ +static int dbgfR3BpUpdateSearchOptimizations(PVM pVM, DBGFBPTYPE enmType, PDBGFBPSEARCHOPT pOpt) +{ + DBGFBPSEARCHOPT Opt = { UINT32_MAX, 0 }; + + for (uint32_t iBp = 0; iBp < RT_ELEMENTS(pVM->dbgf.s.aBreakpoints); iBp++) + if ( pVM->dbgf.s.aBreakpoints[iBp].enmType == enmType + && pVM->dbgf.s.aBreakpoints[iBp].fEnabled) + { + if (Opt.iStartSearch > iBp) + Opt.iStartSearch = iBp; + Opt.cToSearch = iBp - Opt.iStartSearch + 1; + } + + *pOpt = Opt; + return VINF_SUCCESS; +} + + +/** + * Get a breakpoint give by breakpoint id. + * + * @returns Pointer to the allocated breakpoint. + * @returns NULL if the breakpoint is invalid. + * @param pVM The cross context VM structure. + * @param iBp The breakpoint id. + */ +static PDBGFBP dbgfR3BpGet(PVM pVM, uint32_t iBp) +{ + /* Find it. */ + PDBGFBP pBp; + if (iBp < RT_ELEMENTS(pVM->dbgf.s.aHwBreakpoints)) + pBp = &pVM->dbgf.s.aHwBreakpoints[iBp]; + else + { + iBp -= RT_ELEMENTS(pVM->dbgf.s.aHwBreakpoints); + if (iBp >= RT_ELEMENTS(pVM->dbgf.s.aBreakpoints)) + return NULL; + pBp = &pVM->dbgf.s.aBreakpoints[iBp]; + } + + /* check if it's valid. */ + switch (pBp->enmType) + { + case DBGFBPTYPE_FREE: + return NULL; + + case DBGFBPTYPE_REG: + case DBGFBPTYPE_INT3: + case DBGFBPTYPE_REM: + case DBGFBPTYPE_PORT_IO: + case DBGFBPTYPE_MMIO: + break; + + default: + AssertMsgFailed(("Invalid enmType=%d!\n", pBp->enmType)); + return NULL; + } + + return pBp; +} + + +/** + * Get a breakpoint give by address. + * + * @returns Pointer to the allocated breakpoint. + * @returns NULL if the breakpoint is invalid. + * @param pVM The cross context VM structure. + * @param enmType The breakpoint type. + * @param GCPtr The breakpoint address. + */ +static PDBGFBP dbgfR3BpGetByAddr(PVM pVM, DBGFBPTYPE enmType, RTGCUINTPTR GCPtr) +{ + /* + * Determine which array to search. + */ + unsigned cBps; + PDBGFBP paBps; + switch (enmType) + { + case DBGFBPTYPE_REG: + cBps = RT_ELEMENTS(pVM->dbgf.s.aHwBreakpoints); + paBps = &pVM->dbgf.s.aHwBreakpoints[0]; + break; + + case DBGFBPTYPE_INT3: + case DBGFBPTYPE_REM: + cBps = RT_ELEMENTS(pVM->dbgf.s.aBreakpoints); + paBps = &pVM->dbgf.s.aBreakpoints[0]; + break; + + default: + AssertMsgFailed(("enmType=%d\n", enmType)); + return NULL; + } + + /* + * Search. + */ + for (unsigned iBp = 0; iBp < cBps; iBp++) + if ( paBps[iBp].enmType == enmType + && paBps[iBp].u.GCPtr == GCPtr) + return &paBps[iBp]; + + return NULL; +} + + +/** + * Frees a breakpoint. + * + * @param pVM The cross context VM structure. + * @param pBp The breakpoint to free. + */ +static void dbgfR3BpFree(PVM pVM, PDBGFBP pBp) +{ + switch (pBp->enmType) + { + case DBGFBPTYPE_FREE: + AssertMsgFailed(("Already freed!\n")); + return; + + case DBGFBPTYPE_REG: + Assert((uintptr_t)(pBp - &pVM->dbgf.s.aHwBreakpoints[0]) < RT_ELEMENTS(pVM->dbgf.s.aHwBreakpoints)); + break; + + case DBGFBPTYPE_INT3: + case DBGFBPTYPE_REM: + case DBGFBPTYPE_PORT_IO: + case DBGFBPTYPE_MMIO: + Assert((uintptr_t)(pBp - &pVM->dbgf.s.aBreakpoints[0]) < RT_ELEMENTS(pVM->dbgf.s.aBreakpoints)); + break; + + default: + AssertMsgFailed(("Invalid enmType=%d!\n", pBp->enmType)); + return; + + } + pBp->enmType = DBGFBPTYPE_FREE; + NOREF(pVM); +} + + +/** + * @callback_method_impl{FNVMMEMTRENDEZVOUS} + */ +static DECLCALLBACK(VBOXSTRICTRC) dbgfR3BpEnableInt3OnCpu(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + /* + * Validate input. + */ + PDBGFBP pBp = (PDBGFBP)pvUser; + AssertReturn(pBp, VERR_INVALID_PARAMETER); + Assert(pBp->enmType == DBGFBPTYPE_INT3); + VMCPU_ASSERT_EMT(pVCpu); RT_NOREF(pVCpu); + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + /* + * Arm the breakpoint. + */ + return dbgfR3BpInt3Arm(pVM, pBp); +} + + +/** + * @callback_method_impl{FNVMMEMTRENDEZVOUS} + */ +static DECLCALLBACK(VBOXSTRICTRC) dbgfR3BpSetInt3OnCpu(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + /* + * Validate input. + */ + PDBGFBPINT3ARGS pBpArgs = (PDBGFBPINT3ARGS)pvUser; + AssertReturn(pBpArgs, VERR_INVALID_PARAMETER); + VMCPU_ASSERT_EMT(pVCpu); + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + AssertMsgReturn(!pBpArgs->piBp || VALID_PTR(pBpArgs->piBp), ("piBp=%p\n", pBpArgs->piBp), VERR_INVALID_POINTER); + PCDBGFADDRESS pAddress = pBpArgs->pAddress; + if (!DBGFR3AddrIsValid(pVM->pUVM, pAddress)) + return VERR_INVALID_PARAMETER; + + if (pBpArgs->iHitTrigger > pBpArgs->iHitDisable) + return VERR_INVALID_PARAMETER; + + /* + * Check if we're on the source CPU where we can resolve the breakpoint address. + */ + if (pVCpu->idCpu == pBpArgs->idSrcCpu) + { + if (pBpArgs->piBp) + *pBpArgs->piBp = UINT32_MAX; + + /* + * Check if the breakpoint already exists. + */ + PDBGFBP pBp = dbgfR3BpGetByAddr(pVM, DBGFBPTYPE_INT3, pAddress->FlatPtr); + if (pBp) + { + int rc = VINF_SUCCESS; + if (!pBp->fEnabled) + rc = dbgfR3BpInt3Arm(pVM, pBp); + if (RT_SUCCESS(rc)) + { + if (pBpArgs->piBp) + *pBpArgs->piBp = pBp->iBp; + + /* + * Returning VINF_DBGF_BP_ALREADY_EXIST here causes a VBOXSTRICTRC out-of-range assertion + * in VMMR3EmtRendezvous(). Re-setting of an existing breakpoint shouldn't cause an assertion + * killing the VM (and debugging session), so for now we'll pretend success. + */ +#if 0 + rc = VINF_DBGF_BP_ALREADY_EXIST; +#endif + } + else + dbgfR3BpFree(pVM, pBp); + return rc; + } + + /* + * Allocate the breakpoint. + */ + pBp = dbgfR3BpAlloc(pVM, DBGFBPTYPE_INT3); + if (!pBp) + return VERR_DBGF_NO_MORE_BP_SLOTS; + + /* + * Translate & save the breakpoint address into a guest-physical address. + */ + int rc = DBGFR3AddrToPhys(pVM->pUVM, pBpArgs->idSrcCpu, pAddress, &pBp->u.Int3.PhysAddr); + if (RT_SUCCESS(rc)) + { + /* The physical address from DBGFR3AddrToPhys() is the start of the page, + we need the exact byte offset into the page while writing to it in dbgfR3BpInt3Arm(). */ + pBp->u.Int3.PhysAddr |= (pAddress->FlatPtr & X86_PAGE_OFFSET_MASK); + pBp->u.Int3.GCPtr = pAddress->FlatPtr; + pBp->iHitTrigger = pBpArgs->iHitTrigger; + pBp->iHitDisable = pBpArgs->iHitDisable; + + /* + * Now set the breakpoint in guest memory. + */ + rc = dbgfR3BpInt3Arm(pVM, pBp); + if (RT_SUCCESS(rc)) + { + if (pBpArgs->piBp) + *pBpArgs->piBp = pBp->iBp; + return VINF_SUCCESS; + } + } + + dbgfR3BpFree(pVM, pBp); + return rc; + } + + return VINF_SUCCESS; +} + + +/** + * Sets a breakpoint (int 3 based). + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param idSrcCpu The ID of the virtual CPU used for the + * breakpoint address resolution. + * @param pAddress The address of the breakpoint. + * @param iHitTrigger The hit count at which the breakpoint start triggering. + * Use 0 (or 1) if it's gonna trigger at once. + * @param iHitDisable The hit count which disables the breakpoint. + * Use ~(uint64_t) if it's never gonna be disabled. + * @param piBp Where to store the breakpoint id. (optional) + * @thread Any thread. + */ +VMMR3DECL(int) DBGFR3BpSetInt3(PUVM pUVM, VMCPUID idSrcCpu, PCDBGFADDRESS pAddress, uint64_t iHitTrigger, uint64_t iHitDisable, + uint32_t *piBp) +{ + AssertReturn(idSrcCpu <= pUVM->cCpus, VERR_INVALID_CPU_ID); + + DBGFBPINT3ARGS BpArgs; + RT_ZERO(BpArgs); + BpArgs.idSrcCpu = idSrcCpu; + BpArgs.iHitTrigger = iHitTrigger; + BpArgs.iHitDisable = iHitDisable; + BpArgs.pAddress = pAddress; + BpArgs.piBp = piBp; + + int rc = VMMR3EmtRendezvous(pUVM->pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ALL_AT_ONCE, dbgfR3BpSetInt3OnCpu, &BpArgs); + LogFlow(("DBGFR3BpSet: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Arms an int 3 breakpoint. + * + * This is used to implement both DBGFR3BpSetInt3() and + * DBGFR3BpEnable(). + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pBp The breakpoint. + */ +static int dbgfR3BpInt3Arm(PVM pVM, PDBGFBP pBp) +{ + VM_ASSERT_EMT(pVM); + + /* + * Save current byte and write the int3 instruction byte. + */ + int rc = PGMPhysSimpleReadGCPhys(pVM, &pBp->u.Int3.bOrg, pBp->u.Int3.PhysAddr, sizeof(pBp->u.Int3.bOrg)); + if (RT_SUCCESS(rc)) + { + static const uint8_t s_bInt3 = 0xcc; + rc = PGMPhysSimpleWriteGCPhys(pVM, pBp->u.Int3.PhysAddr, &s_bInt3, sizeof(s_bInt3)); + if (RT_SUCCESS(rc)) + { + pBp->fEnabled = true; + dbgfR3BpUpdateSearchOptimizations(pVM, DBGFBPTYPE_INT3, &pVM->dbgf.s.Int3); + pVM->dbgf.s.cEnabledInt3Breakpoints = pVM->dbgf.s.Int3.cToSearch; + Log(("DBGF: Set breakpoint at %RGv (Phys %RGp) cEnabledInt3Breakpoints=%u\n", pBp->u.Int3.GCPtr, + pBp->u.Int3.PhysAddr, pVM->dbgf.s.cEnabledInt3Breakpoints)); + } + } + return rc; +} + + +/** + * Disarms an int 3 breakpoint. + * + * This is used to implement both DBGFR3BpClear() and DBGFR3BpDisable(). + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pBp The breakpoint. + */ +static int dbgfR3BpInt3Disarm(PVM pVM, PDBGFBP pBp) +{ + VM_ASSERT_EMT(pVM); + + /* + * Check that the current byte is the int3 instruction, and restore the original one. + * We currently ignore invalid bytes. + */ + uint8_t bCurrent = 0; + int rc = PGMPhysSimpleReadGCPhys(pVM, &bCurrent, pBp->u.Int3.PhysAddr, sizeof(bCurrent)); + if ( RT_SUCCESS(rc) + && bCurrent == 0xcc) + { + rc = PGMPhysSimpleWriteGCPhys(pVM, pBp->u.Int3.PhysAddr, &pBp->u.Int3.bOrg, sizeof(pBp->u.Int3.bOrg)); + if (RT_SUCCESS(rc)) + { + pBp->fEnabled = false; + dbgfR3BpUpdateSearchOptimizations(pVM, DBGFBPTYPE_INT3, &pVM->dbgf.s.Int3); + pVM->dbgf.s.cEnabledInt3Breakpoints = pVM->dbgf.s.Int3.cToSearch; + Log(("DBGF: Removed breakpoint at %RGv (Phys %RGp) cEnabledInt3Breakpoints=%u\n", pBp->u.Int3.GCPtr, + pBp->u.Int3.PhysAddr, pVM->dbgf.s.cEnabledInt3Breakpoints)); + } + } + return rc; +} + + +/** + * @callback_method_impl{FNVMMEMTRENDEZVOUS} + */ +static DECLCALLBACK(VBOXSTRICTRC) dbgfR3BpDisableInt3OnCpu(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + /* + * Validate input. + */ + PDBGFBP pBp = (PDBGFBP)pvUser; + AssertReturn(pBp, VERR_INVALID_PARAMETER); + Assert(pBp->enmType == DBGFBPTYPE_INT3); + VMCPU_ASSERT_EMT(pVCpu); RT_NOREF(pVCpu); + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + /* + * Disarm the breakpoint. + */ + return dbgfR3BpInt3Disarm(pVM, pBp); +} + + +/** + * Sets a register breakpoint. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pAddress The address of the breakpoint. + * @param piHitTrigger The hit count at which the breakpoint start triggering. + * Use 0 (or 1) if it's gonna trigger at once. + * @param piHitDisable The hit count which disables the breakpoint. + * Use ~(uint64_t) if it's never gonna be disabled. + * @param fType The access type (one of the X86_DR7_RW_* defines). + * @param cb The access size - 1,2,4 or 8 (the latter is AMD64 long mode only. + * Must be 1 if fType is X86_DR7_RW_EO. + * @param piBp Where to store the breakpoint id. (optional) + * @thread EMT + * @internal + */ +static DECLCALLBACK(int) dbgfR3BpSetReg(PUVM pUVM, PCDBGFADDRESS pAddress, uint64_t *piHitTrigger, uint64_t *piHitDisable, + uint8_t fType, uint8_t cb, uint32_t *piBp) +{ + /* + * Validate input. + */ + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + if (!DBGFR3AddrIsValid(pUVM, pAddress)) + return VERR_INVALID_PARAMETER; + if (*piHitTrigger > *piHitDisable) + return VERR_INVALID_PARAMETER; + AssertMsgReturn(!piBp || VALID_PTR(piBp), ("piBp=%p\n", piBp), VERR_INVALID_POINTER); + if (piBp) + *piBp = UINT32_MAX; + switch (fType) + { + case X86_DR7_RW_EO: + if (cb == 1) + break; + AssertMsgFailed(("fType=%#x cb=%d != 1\n", fType, cb)); + return VERR_INVALID_PARAMETER; + case X86_DR7_RW_IO: + case X86_DR7_RW_RW: + case X86_DR7_RW_WO: + break; + default: + AssertMsgFailed(("fType=%#x\n", fType)); + return VERR_INVALID_PARAMETER; + } + switch (cb) + { + case 1: + case 2: + case 4: + break; + default: + AssertMsgFailed(("cb=%#x\n", cb)); + return VERR_INVALID_PARAMETER; + } + + /* + * Check if the breakpoint already exists. + */ + PDBGFBP pBp = dbgfR3BpGetByAddr(pVM, DBGFBPTYPE_REG, pAddress->FlatPtr); + if ( pBp + && pBp->u.Reg.cb == cb + && pBp->u.Reg.fType == fType) + { + int rc = VINF_SUCCESS; + if (!pBp->fEnabled) + rc = dbgfR3BpRegArm(pVM, pBp); + if (RT_SUCCESS(rc)) + { + rc = VINF_DBGF_BP_ALREADY_EXIST; + if (piBp) + *piBp = pBp->iBp; + } + return rc; + } + + /* + * Allocate and initialize the bp. + */ + pBp = dbgfR3BpAlloc(pVM, DBGFBPTYPE_REG); + if (!pBp) + return VERR_DBGF_NO_MORE_BP_SLOTS; + pBp->iHitTrigger = *piHitTrigger; + pBp->iHitDisable = *piHitDisable; + Assert(pBp->iBp == pBp->u.Reg.iReg); + pBp->u.Reg.GCPtr = pAddress->FlatPtr; + pBp->u.Reg.fType = fType; + pBp->u.Reg.cb = cb; + ASMCompilerBarrier(); + pBp->fEnabled = true; + + /* + * Arm the breakpoint. + */ + int rc = dbgfR3BpRegArm(pVM, pBp); + if (RT_SUCCESS(rc)) + { + if (piBp) + *piBp = pBp->iBp; + } + else + dbgfR3BpFree(pVM, pBp); + + return rc; +} + + +/** + * Sets a register breakpoint. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pAddress The address of the breakpoint. + * @param iHitTrigger The hit count at which the breakpoint start triggering. + * Use 0 (or 1) if it's gonna trigger at once. + * @param iHitDisable The hit count which disables the breakpoint. + * Use ~(uint64_t) if it's never gonna be disabled. + * @param fType The access type (one of the X86_DR7_RW_* defines). + * @param cb The access size - 1,2,4 or 8 (the latter is AMD64 long mode only. + * Must be 1 if fType is X86_DR7_RW_EO. + * @param piBp Where to store the breakpoint id. (optional) + * @thread Any thread. + */ +VMMR3DECL(int) DBGFR3BpSetReg(PUVM pUVM, PCDBGFADDRESS pAddress, uint64_t iHitTrigger, uint64_t iHitDisable, + uint8_t fType, uint8_t cb, uint32_t *piBp) +{ + /* + * This must be done on EMT. + */ + int rc = VMR3ReqCallWaitU(pUVM, 0 /*idDstCpu*/, (PFNRT)dbgfR3BpSetReg, 7, + pUVM, pAddress, &iHitTrigger, &iHitDisable, fType, cb, piBp); + LogFlow(("DBGFR3BpSetReg: returns %Rrc\n", rc)); + return rc; + +} + + +/** + * @callback_method_impl{FNVMMEMTRENDEZVOUS} + */ +static DECLCALLBACK(VBOXSTRICTRC) dbgfR3BpRegRecalcOnCpu(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + NOREF(pVM); NOREF(pvUser); + + /* + * CPU 0 updates the enabled hardware breakpoint counts. + */ + if (pVCpu->idCpu == 0) + { + pVM->dbgf.s.cEnabledHwBreakpoints = 0; + pVM->dbgf.s.cEnabledHwIoBreakpoints = 0; + + for (uint32_t iBp = 0; iBp < RT_ELEMENTS(pVM->dbgf.s.aHwBreakpoints); iBp++) + if ( pVM->dbgf.s.aHwBreakpoints[iBp].fEnabled + && pVM->dbgf.s.aHwBreakpoints[iBp].enmType == DBGFBPTYPE_REG) + { + pVM->dbgf.s.cEnabledHwBreakpoints += 1; + pVM->dbgf.s.cEnabledHwIoBreakpoints += pVM->dbgf.s.aHwBreakpoints[iBp].u.Reg.fType == X86_DR7_RW_IO; + } + } + + return CPUMRecalcHyperDRx(pVCpu, UINT8_MAX, false); +} + + +/** + * Arms a debug register breakpoint. + * + * This is used to implement both DBGFR3BpSetReg() and DBGFR3BpEnable(). + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pBp The breakpoint. + * @thread EMT(0) + */ +static int dbgfR3BpRegArm(PVM pVM, PDBGFBP pBp) +{ + RT_NOREF_PV(pBp); + Assert(pBp->fEnabled); + return VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ALL_AT_ONCE, dbgfR3BpRegRecalcOnCpu, NULL); +} + + +/** + * Disarms a debug register breakpoint. + * + * This is used to implement both DBGFR3BpClear() and DBGFR3BpDisable(). + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pBp The breakpoint. + * @thread EMT(0) + */ +static int dbgfR3BpRegDisarm(PVM pVM, PDBGFBP pBp) +{ + RT_NOREF_PV(pBp); + Assert(!pBp->fEnabled); + return VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ALL_AT_ONCE, dbgfR3BpRegRecalcOnCpu, NULL); +} + + +/** + * EMT worker for DBGFR3BpSetREM(). + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pAddress The address of the breakpoint. + * @param piHitTrigger The hit count at which the breakpoint start triggering. + * Use 0 (or 1) if it's gonna trigger at once. + * @param piHitDisable The hit count which disables the breakpoint. + * Use ~(uint64_t) if it's never gonna be disabled. + * @param piBp Where to store the breakpoint id. (optional) + * @thread EMT(0) + * @internal + */ +static DECLCALLBACK(int) dbgfR3BpSetREM(PUVM pUVM, PCDBGFADDRESS pAddress, uint64_t *piHitTrigger, + uint64_t *piHitDisable, uint32_t *piBp) +{ + /* + * Validate input. + */ + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + if (!DBGFR3AddrIsValid(pUVM, pAddress)) + return VERR_INVALID_PARAMETER; + if (*piHitTrigger > *piHitDisable) + return VERR_INVALID_PARAMETER; + AssertMsgReturn(!piBp || VALID_PTR(piBp), ("piBp=%p\n", piBp), VERR_INVALID_POINTER); + if (piBp) + *piBp = UINT32_MAX; + + /* + * Check if the breakpoint already exists. + */ + PDBGFBP pBp = dbgfR3BpGetByAddr(pVM, DBGFBPTYPE_REM, pAddress->FlatPtr); + if (pBp) + { + int rc = VINF_SUCCESS; + if (!pBp->fEnabled) +#ifdef VBOX_WITH_REM + rc = REMR3BreakpointSet(pVM, pBp->u.Rem.GCPtr); +#else + rc = IEMBreakpointSet(pVM, pBp->u.Rem.GCPtr); +#endif + if (RT_SUCCESS(rc)) + { + rc = VINF_DBGF_BP_ALREADY_EXIST; + if (piBp) + *piBp = pBp->iBp; + } + return rc; + } + + /* + * Allocate and initialize the bp. + */ + pBp = dbgfR3BpAlloc(pVM, DBGFBPTYPE_REM); + if (!pBp) + return VERR_DBGF_NO_MORE_BP_SLOTS; + pBp->u.Rem.GCPtr = pAddress->FlatPtr; + pBp->iHitTrigger = *piHitTrigger; + pBp->iHitDisable = *piHitDisable; + ASMCompilerBarrier(); + pBp->fEnabled = true; + + /* + * Now ask REM to set the breakpoint. + */ +#ifdef VBOX_WITH_REM + int rc = REMR3BreakpointSet(pVM, pAddress->FlatPtr); +#else + int rc = IEMBreakpointSet(pVM, pAddress->FlatPtr); +#endif + if (RT_SUCCESS(rc)) + { + if (piBp) + *piBp = pBp->iBp; + } + else + dbgfR3BpFree(pVM, pBp); + + return rc; +} + + +/** + * Sets a recompiler breakpoint. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pAddress The address of the breakpoint. + * @param iHitTrigger The hit count at which the breakpoint start triggering. + * Use 0 (or 1) if it's gonna trigger at once. + * @param iHitDisable The hit count which disables the breakpoint. + * Use ~(uint64_t) if it's never gonna be disabled. + * @param piBp Where to store the breakpoint id. (optional) + * @thread Any thread. + */ +VMMR3DECL(int) DBGFR3BpSetREM(PUVM pUVM, PCDBGFADDRESS pAddress, uint64_t iHitTrigger, uint64_t iHitDisable, uint32_t *piBp) +{ + /* + * This must be done on EMT. + */ + int rc = VMR3ReqCallWaitU(pUVM, 0 /*idDstCpu*/, (PFNRT)dbgfR3BpSetREM, 5, + pUVM, pAddress, &iHitTrigger, &iHitDisable, piBp); + LogFlow(("DBGFR3BpSetREM: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Updates IOM on whether we've got any armed I/O port or MMIO breakpoints. + * + * @returns VINF_SUCCESS + * @param pVM The cross context VM structure. + * @thread EMT(0) + */ +static int dbgfR3BpUpdateIom(PVM pVM) +{ + dbgfR3BpUpdateSearchOptimizations(pVM, DBGFBPTYPE_PORT_IO, &pVM->dbgf.s.PortIo); + if (pVM->dbgf.s.PortIo.cToSearch) + ASMAtomicBitSet(&pVM->dbgf.s.bmSelectedEvents, DBGFEVENT_BREAKPOINT_IO); + else + ASMAtomicBitClear(&pVM->dbgf.s.bmSelectedEvents, DBGFEVENT_BREAKPOINT_IO); + + dbgfR3BpUpdateSearchOptimizations(pVM, DBGFBPTYPE_MMIO, &pVM->dbgf.s.Mmio); + if (pVM->dbgf.s.Mmio.cToSearch) + ASMAtomicBitSet(&pVM->dbgf.s.bmSelectedEvents, DBGFEVENT_BREAKPOINT_MMIO); + else + ASMAtomicBitClear(&pVM->dbgf.s.bmSelectedEvents, DBGFEVENT_BREAKPOINT_MMIO); + + IOMR3NotifyBreakpointCountChange(pVM, pVM->dbgf.s.PortIo.cToSearch != 0, pVM->dbgf.s.Mmio.cToSearch != 0); + return VINF_SUCCESS; +} + + +/** + * EMT worker for DBGFR3BpSetPortIo. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param uPort The first I/O port. + * @param cPorts The number of I/O ports. + * @param fAccess The access we want to break on. + * @param piHitTrigger The hit count at which the breakpoint start triggering. + * Use 0 (or 1) if it's gonna trigger at once. + * @param piHitDisable The hit count which disables the breakpoint. + * Use ~(uint64_t) if it's never gonna be disabled. + * @param piBp Where to store the breakpoint ID. + * @thread EMT(0) + */ +static DECLCALLBACK(int) dbgfR3BpSetPortIo(PUVM pUVM, RTIOPORT uPort, RTIOPORT cPorts, uint32_t fAccess, + uint64_t const *piHitTrigger, uint64_t const *piHitDisable, uint32_t *piBp) +{ + /* + * Validate input. + */ + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + *piBp = UINT32_MAX; + + /* + * Check if the breakpoint already exists. + */ + for (uint32_t i = 0; i < RT_ELEMENTS(pVM->dbgf.s.aBreakpoints); i++) + if ( pVM->dbgf.s.aBreakpoints[i].enmType == DBGFBPTYPE_PORT_IO + && pVM->dbgf.s.aBreakpoints[i].u.PortIo.uPort == uPort + && pVM->dbgf.s.aBreakpoints[i].u.PortIo.cPorts == cPorts + && pVM->dbgf.s.aBreakpoints[i].u.PortIo.fAccess == fAccess) + { + if (!pVM->dbgf.s.aBreakpoints[i].fEnabled) + { + pVM->dbgf.s.aBreakpoints[i].fEnabled = true; + dbgfR3BpUpdateIom(pVM); + } + *piBp = pVM->dbgf.s.aBreakpoints[i].iBp; + return VINF_DBGF_BP_ALREADY_EXIST; + } + + /* + * Allocate and initialize the breakpoint. + */ + PDBGFBP pBp = dbgfR3BpAlloc(pVM, DBGFBPTYPE_PORT_IO); + if (!pBp) + return VERR_DBGF_NO_MORE_BP_SLOTS; + pBp->iHitTrigger = *piHitTrigger; + pBp->iHitDisable = *piHitDisable; + pBp->u.PortIo.uPort = uPort; + pBp->u.PortIo.cPorts = cPorts; + pBp->u.PortIo.fAccess = fAccess; + ASMCompilerBarrier(); + pBp->fEnabled = true; + + /* + * Tell IOM. + */ + dbgfR3BpUpdateIom(pVM); + *piBp = pBp->iBp; + return VINF_SUCCESS; +} + + +/** + * Sets an I/O port breakpoint. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param uPort The first I/O port. + * @param cPorts The number of I/O ports, see DBGFBPIOACCESS_XXX. + * @param fAccess The access we want to break on. + * @param iHitTrigger The hit count at which the breakpoint start + * triggering. Use 0 (or 1) if it's gonna trigger at + * once. + * @param iHitDisable The hit count which disables the breakpoint. + * Use ~(uint64_t) if it's never gonna be disabled. + * @param piBp Where to store the breakpoint ID. Optional. + * @thread Any thread. + */ +VMMR3DECL(int) DBGFR3BpSetPortIo(PUVM pUVM, RTIOPORT uPort, RTIOPORT cPorts, uint32_t fAccess, + uint64_t iHitTrigger, uint64_t iHitDisable, uint32_t *piBp) +{ + AssertReturn(!(fAccess & ~DBGFBPIOACCESS_VALID_MASK_PORT_IO), VERR_INVALID_FLAGS); + AssertReturn(fAccess, VERR_INVALID_FLAGS); + if (iHitTrigger > iHitDisable) + return VERR_INVALID_PARAMETER; + AssertPtrNullReturn(piBp, VERR_INVALID_POINTER); + AssertReturn(cPorts > 0, VERR_OUT_OF_RANGE); + AssertReturn((RTIOPORT)(uPort + cPorts) < uPort, VERR_OUT_OF_RANGE); + + /* + * This must be done on EMT. + */ + uint32_t iBp = UINT32_MAX; + int rc = VMR3ReqCallWaitU(pUVM, 0 /*idDstCpu*/, (PFNRT)dbgfR3BpSetPortIo, 7, + pUVM, uPort, cPorts, fAccess, &iHitTrigger, &iHitDisable, piBp); + if (piBp) + *piBp = iBp; + LogFlow(("DBGFR3BpSetPortIo: returns %Rrc iBp=%d\n", rc, iBp)); + return rc; +} + + +/** + * EMT worker for DBGFR3BpSetMmio. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pGCPhys The start of the MMIO range to break on. + * @param cb The size of the MMIO range. + * @param fAccess The access we want to break on. + * @param piHitTrigger The hit count at which the breakpoint start triggering. + * Use 0 (or 1) if it's gonna trigger at once. + * @param piHitDisable The hit count which disables the breakpoint. + * Use ~(uint64_t) if it's never gonna be disabled. + * @param piBp Where to store the breakpoint ID. + * @thread EMT(0) + */ +static DECLCALLBACK(int) dbgfR3BpSetMmio(PUVM pUVM, PCRTGCPHYS pGCPhys, uint32_t cb, uint32_t fAccess, + uint64_t const *piHitTrigger, uint64_t const *piHitDisable, uint32_t *piBp) +{ + RTGCPHYS const GCPhys = *pGCPhys; + + /* + * Validate input. + */ + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + *piBp = UINT32_MAX; + + /* + * Check if the breakpoint already exists. + */ + for (uint32_t i = 0; i < RT_ELEMENTS(pVM->dbgf.s.aBreakpoints); i++) + if ( pVM->dbgf.s.aBreakpoints[i].enmType == DBGFBPTYPE_MMIO + && pVM->dbgf.s.aBreakpoints[i].u.Mmio.PhysAddr == GCPhys + && pVM->dbgf.s.aBreakpoints[i].u.Mmio.cb == cb + && pVM->dbgf.s.aBreakpoints[i].u.Mmio.fAccess == fAccess) + { + if (!pVM->dbgf.s.aBreakpoints[i].fEnabled) + { + pVM->dbgf.s.aBreakpoints[i].fEnabled = true; + dbgfR3BpUpdateIom(pVM); + } + *piBp = pVM->dbgf.s.aBreakpoints[i].iBp; + return VINF_DBGF_BP_ALREADY_EXIST; + } + + /* + * Allocate and initialize the breakpoint. + */ + PDBGFBP pBp = dbgfR3BpAlloc(pVM, DBGFBPTYPE_MMIO); + if (!pBp) + return VERR_DBGF_NO_MORE_BP_SLOTS; + pBp->iHitTrigger = *piHitTrigger; + pBp->iHitDisable = *piHitDisable; + pBp->u.Mmio.PhysAddr = GCPhys; + pBp->u.Mmio.cb = cb; + pBp->u.Mmio.fAccess = fAccess; + ASMCompilerBarrier(); + pBp->fEnabled = true; + + /* + * Tell IOM. + */ + dbgfR3BpUpdateIom(pVM); + *piBp = pBp->iBp; + return VINF_SUCCESS; +} + + +/** + * Sets a memory mapped I/O breakpoint. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param GCPhys The first MMIO address. + * @param cb The size of the MMIO range to break on. + * @param fAccess The access we want to break on. + * @param iHitTrigger The hit count at which the breakpoint start + * triggering. Use 0 (or 1) if it's gonna trigger at + * once. + * @param iHitDisable The hit count which disables the breakpoint. + * Use ~(uint64_t) if it's never gonna be disabled. + * @param piBp Where to store the breakpoint ID. Optional. + * @thread Any thread. + */ +VMMR3DECL(int) DBGFR3BpSetMmio(PUVM pUVM, RTGCPHYS GCPhys, uint32_t cb, uint32_t fAccess, + uint64_t iHitTrigger, uint64_t iHitDisable, uint32_t *piBp) +{ + AssertReturn(!(fAccess & ~DBGFBPIOACCESS_VALID_MASK_MMIO), VERR_INVALID_FLAGS); + AssertReturn(fAccess, VERR_INVALID_FLAGS); + if (iHitTrigger > iHitDisable) + return VERR_INVALID_PARAMETER; + AssertPtrNullReturn(piBp, VERR_INVALID_POINTER); + AssertReturn(cb, VERR_OUT_OF_RANGE); + AssertReturn(GCPhys + cb < GCPhys, VERR_OUT_OF_RANGE); + + /* + * This must be done on EMT. + */ + uint32_t iBp = UINT32_MAX; + int rc = VMR3ReqCallWaitU(pUVM, 0 /*idDstCpu*/, (PFNRT)dbgfR3BpSetMmio, 7, + pUVM, &GCPhys, cb, fAccess, &iHitTrigger, &iHitDisable, piBp); + if (piBp) + *piBp = iBp; + LogFlow(("DBGFR3BpSetMmio: returns %Rrc iBp=%d\n", rc, iBp)); + return rc; +} + + +/** + * EMT worker for DBGFR3BpClear(). + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param iBp The id of the breakpoint which should be removed (cleared). + * @thread EMT(0) + * @internal + */ +static DECLCALLBACK(int) dbgfR3BpClear(PUVM pUVM, uint32_t iBp) +{ + /* + * Validate input. + */ + PVM pVM = pUVM->pVM; + VM_ASSERT_EMT(pVM); + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + PDBGFBP pBp = dbgfR3BpGet(pVM, iBp); + if (!pBp) + return VERR_DBGF_BP_NOT_FOUND; + + /* + * Disarm the breakpoint if it's enabled. + */ + if (pBp->fEnabled) + { + pBp->fEnabled = false; + int rc; + switch (pBp->enmType) + { + case DBGFBPTYPE_REG: + rc = dbgfR3BpRegDisarm(pVM, pBp); + break; + + case DBGFBPTYPE_INT3: + rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, dbgfR3BpDisableInt3OnCpu, pBp); + break; + + case DBGFBPTYPE_REM: +#ifdef VBOX_WITH_REM + rc = REMR3BreakpointClear(pVM, pBp->u.Rem.GCPtr); +#else + rc = IEMBreakpointClear(pVM, pBp->u.Rem.GCPtr); +#endif + break; + + case DBGFBPTYPE_PORT_IO: + case DBGFBPTYPE_MMIO: + rc = dbgfR3BpUpdateIom(pVM); + break; + + default: + AssertMsgFailedReturn(("Invalid enmType=%d!\n", pBp->enmType), VERR_IPE_NOT_REACHED_DEFAULT_CASE); + } + AssertRCReturn(rc, rc); + } + + /* + * Free the breakpoint. + */ + dbgfR3BpFree(pVM, pBp); + return VINF_SUCCESS; +} + + +/** + * Clears a breakpoint. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param iBp The id of the breakpoint which should be removed (cleared). + * @thread Any thread. + */ +VMMR3DECL(int) DBGFR3BpClear(PUVM pUVM, uint32_t iBp) +{ + /* + * This must be done on EMT. + */ + int rc = VMR3ReqCallWaitU(pUVM, 0 /*idDstCpu*/, (PFNRT)dbgfR3BpClear, 2, pUVM, iBp); + LogFlow(("DBGFR3BpClear: returns %Rrc\n", rc)); + return rc; +} + + +/** + * EMT worker for DBGFR3BpEnable(). + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param iBp The id of the breakpoint which should be enabled. + * @thread EMT(0) + * @internal + */ +static DECLCALLBACK(int) dbgfR3BpEnable(PUVM pUVM, uint32_t iBp) +{ + /* + * Validate input. + */ + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + PDBGFBP pBp = dbgfR3BpGet(pVM, iBp); + if (!pBp) + return VERR_DBGF_BP_NOT_FOUND; + + /* + * Already enabled? + */ + if (pBp->fEnabled) + return VINF_DBGF_BP_ALREADY_ENABLED; + + /* + * Arm the breakpoint. + */ + int rc; + pBp->fEnabled = true; + switch (pBp->enmType) + { + case DBGFBPTYPE_REG: + rc = dbgfR3BpRegArm(pVM, pBp); + break; + + case DBGFBPTYPE_INT3: + rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, dbgfR3BpEnableInt3OnCpu, pBp); + break; + + case DBGFBPTYPE_REM: +#ifdef VBOX_WITH_REM + rc = REMR3BreakpointSet(pVM, pBp->u.Rem.GCPtr); +#else + rc = IEMBreakpointSet(pVM, pBp->u.Rem.GCPtr); +#endif + break; + + case DBGFBPTYPE_PORT_IO: + case DBGFBPTYPE_MMIO: + rc = dbgfR3BpUpdateIom(pVM); + break; + + default: + AssertMsgFailedReturn(("Invalid enmType=%d!\n", pBp->enmType), VERR_IPE_NOT_REACHED_DEFAULT_CASE); + } + if (RT_FAILURE(rc)) + pBp->fEnabled = false; + + return rc; +} + + +/** + * Enables a breakpoint. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param iBp The id of the breakpoint which should be enabled. + * @thread Any thread. + */ +VMMR3DECL(int) DBGFR3BpEnable(PUVM pUVM, uint32_t iBp) +{ + /* + * This must be done on EMT. + */ + int rc = VMR3ReqCallWaitU(pUVM, 0 /*idDstCpu*/, (PFNRT)dbgfR3BpEnable, 2, pUVM, iBp); + LogFlow(("DBGFR3BpEnable: returns %Rrc\n", rc)); + return rc; +} + + +/** + * EMT worker for DBGFR3BpDisable(). + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param iBp The id of the breakpoint which should be disabled. + * @thread EMT(0) + * @internal + */ +static DECLCALLBACK(int) dbgfR3BpDisable(PUVM pUVM, uint32_t iBp) +{ + /* + * Validate input. + */ + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + PDBGFBP pBp = dbgfR3BpGet(pVM, iBp); + if (!pBp) + return VERR_DBGF_BP_NOT_FOUND; + + /* + * Already enabled? + */ + if (!pBp->fEnabled) + return VINF_DBGF_BP_ALREADY_DISABLED; + + /* + * Remove the breakpoint. + */ + pBp->fEnabled = false; + int rc; + switch (pBp->enmType) + { + case DBGFBPTYPE_REG: + rc = dbgfR3BpRegDisarm(pVM, pBp); + break; + + case DBGFBPTYPE_INT3: + rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, dbgfR3BpDisableInt3OnCpu, pBp); + break; + + case DBGFBPTYPE_REM: +#ifdef VBOX_WITH_REM + rc = REMR3BreakpointClear(pVM, pBp->u.Rem.GCPtr); +#else + rc = IEMBreakpointClear(pVM, pBp->u.Rem.GCPtr); +#endif + break; + + case DBGFBPTYPE_PORT_IO: + case DBGFBPTYPE_MMIO: + rc = dbgfR3BpUpdateIom(pVM); + break; + + default: + AssertMsgFailedReturn(("Invalid enmType=%d!\n", pBp->enmType), VERR_IPE_NOT_REACHED_DEFAULT_CASE); + } + + return rc; +} + + +/** + * Disables a breakpoint. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param iBp The id of the breakpoint which should be disabled. + * @thread Any thread. + */ +VMMR3DECL(int) DBGFR3BpDisable(PUVM pUVM, uint32_t iBp) +{ + /* + * This must be done on EMT. + */ + int rc = VMR3ReqCallWaitU(pUVM, 0 /*idDstCpu*/, (PFNRT)dbgfR3BpDisable, 2, pUVM, iBp); + LogFlow(("DBGFR3BpDisable: returns %Rrc\n", rc)); + return rc; +} + + +/** + * EMT worker for DBGFR3BpEnum(). + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pfnCallback The callback function. + * @param pvUser The user argument to pass to the callback. + * @thread EMT + * @internal + */ +static DECLCALLBACK(int) dbgfR3BpEnum(PUVM pUVM, PFNDBGFBPENUM pfnCallback, void *pvUser) +{ + /* + * Validate input. + */ + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertPtrReturn(pfnCallback, VERR_INVALID_POINTER); + + /* + * Enumerate the hardware breakpoints. + */ + unsigned i; + for (i = 0; i < RT_ELEMENTS(pVM->dbgf.s.aHwBreakpoints); i++) + if (pVM->dbgf.s.aHwBreakpoints[i].enmType != DBGFBPTYPE_FREE) + { + int rc = pfnCallback(pUVM, pvUser, &pVM->dbgf.s.aHwBreakpoints[i]); + if (RT_FAILURE(rc) || rc == VINF_CALLBACK_RETURN) + return rc; + } + + /* + * Enumerate the other breakpoints. + */ + for (i = 0; i < RT_ELEMENTS(pVM->dbgf.s.aBreakpoints); i++) + if (pVM->dbgf.s.aBreakpoints[i].enmType != DBGFBPTYPE_FREE) + { + int rc = pfnCallback(pUVM, pvUser, &pVM->dbgf.s.aBreakpoints[i]); + if (RT_FAILURE(rc) || rc == VINF_CALLBACK_RETURN) + return rc; + } + + return VINF_SUCCESS; +} + + +/** + * Enumerate the breakpoints. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pfnCallback The callback function. + * @param pvUser The user argument to pass to the callback. + * @thread Any thread but the callback will be called from EMT. + */ +VMMR3DECL(int) DBGFR3BpEnum(PUVM pUVM, PFNDBGFBPENUM pfnCallback, void *pvUser) +{ + /* + * This must be done on EMT. + */ + int rc = VMR3ReqPriorityCallWaitU(pUVM, 0 /*idDstCpu*/, (PFNRT)dbgfR3BpEnum, 3, pUVM, pfnCallback, pvUser); + LogFlow(("DBGFR3BpEnum: returns %Rrc\n", rc)); + return rc; +} + diff --git a/src/VBox/VMM/VMMR3/DBGFCoreWrite.cpp b/src/VBox/VMM/VMMR3/DBGFCoreWrite.cpp new file mode 100644 index 00000000..1744ba8b --- /dev/null +++ b/src/VBox/VMM/VMMR3/DBGFCoreWrite.cpp @@ -0,0 +1,664 @@ +/* $Id: DBGFCoreWrite.cpp $ */ +/** @file + * DBGF - Debugger Facility, Guest Core Dump. + */ + +/* + * Copyright (C) 2010-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +/** @page pg_dbgf_vmcore VMCore Format + * + * The VirtualBox VMCore Format: + * [ ELF 64 Header] -- Only 1 + * + * [ PT_NOTE ] -- Only 1 + * - Offset into CoreDescriptor followed by list of Notes (Note Hdr + data) of VBox CPUs. + * - (Any Additional custom Note sections). + * + * [ PT_LOAD ] -- One for each contiguous memory chunk + * - Memory offset (physical). + * - File offset. + * + * CoreDescriptor + * - Magic, VBox version. + * - Number of CPus. + * + * Per-CPU register dump + * - CPU 1 Note Hdr + Data. + * - CPU 2 Note Hdr + Data. + * ... + * (Additional custom notes Hdr+data) + * - VBox 1 Note Hdr + Data. + * - VBox 2 Note Hdr + Data. + * ... + * Memory dump + * + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DBGF +#include +#include +#include +#include + +#include "DBGFInternal.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +#define DBGFLOG_NAME "DBGFCoreWrite" + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +static const int g_NoteAlign = 8; +static const int g_cbNoteName = 16; + +/* The size of these strings (incl. NULL terminator) must align to 8 bytes (g_NoteAlign) and -not- 4 bytes. */ +static const char *g_pcszCoreVBoxCore = "VBCORE"; +static const char *g_pcszCoreVBoxCpu = "VBCPU"; + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * Guest core writer data. + * + * Used to pass parameters from DBGFR3CoreWrite to dbgfR3CoreWriteRendezvous(). + */ +typedef struct DBGFCOREDATA +{ + /** The name of the file to write the file to. */ + const char *pszFilename; + /** Whether to replace (/overwrite) any existing file. */ + bool fReplaceFile; +} DBGFCOREDATA; +/** Pointer to the guest core writer data. */ +typedef DBGFCOREDATA *PDBGFCOREDATA; + + + +/** + * ELF function to write 64-bit ELF header. + * + * @param hFile The file to write to. + * @param cProgHdrs Number of program headers. + * @param cSecHdrs Number of section headers. + * + * @return IPRT status code. + */ +static int Elf64WriteElfHdr(RTFILE hFile, uint16_t cProgHdrs, uint16_t cSecHdrs) +{ + Elf64_Ehdr ElfHdr; + RT_ZERO(ElfHdr); + ElfHdr.e_ident[EI_MAG0] = ELFMAG0; + ElfHdr.e_ident[EI_MAG1] = ELFMAG1; + ElfHdr.e_ident[EI_MAG2] = ELFMAG2; + ElfHdr.e_ident[EI_MAG3] = ELFMAG3; + ElfHdr.e_ident[EI_DATA] = ELFDATA2LSB; + ElfHdr.e_type = ET_CORE; + ElfHdr.e_version = EV_CURRENT; + ElfHdr.e_ident[EI_CLASS] = ELFCLASS64; + /* 32-bit builds will produce cores with e_machine EM_386. */ +#ifdef RT_ARCH_AMD64 + ElfHdr.e_machine = EM_X86_64; +#else + ElfHdr.e_machine = EM_386; +#endif + ElfHdr.e_phnum = cProgHdrs; + ElfHdr.e_shnum = cSecHdrs; + ElfHdr.e_ehsize = sizeof(ElfHdr); + ElfHdr.e_phoff = sizeof(ElfHdr); + ElfHdr.e_phentsize = sizeof(Elf64_Phdr); + ElfHdr.e_shentsize = sizeof(Elf64_Shdr); + + return RTFileWrite(hFile, &ElfHdr, sizeof(ElfHdr), NULL /* all */); +} + + +/** + * ELF function to write 64-bit program header. + * + * @param hFile The file to write to. + * @param Type Type of program header (PT_*). + * @param fFlags Flags (access permissions, PF_*). + * @param offFileData File offset of contents. + * @param cbFileData Size of contents in the file. + * @param cbMemData Size of contents in memory. + * @param Phys Physical address, pass zero if not applicable. + * + * @return IPRT status code. + */ +static int Elf64WriteProgHdr(RTFILE hFile, uint32_t Type, uint32_t fFlags, uint64_t offFileData, uint64_t cbFileData, + uint64_t cbMemData, RTGCPHYS Phys) +{ + Elf64_Phdr ProgHdr; + RT_ZERO(ProgHdr); + ProgHdr.p_type = Type; + ProgHdr.p_flags = fFlags; + ProgHdr.p_offset = offFileData; + ProgHdr.p_filesz = cbFileData; + ProgHdr.p_memsz = cbMemData; + ProgHdr.p_paddr = Phys; + + return RTFileWrite(hFile, &ProgHdr, sizeof(ProgHdr), NULL /* all */); +} + + +/** + * Returns the size of the NOTE section given the name and size of the data. + * + * @param pszName Name of the note section. + * @param cbData Size of the data portion of the note section. + * + * @return The size of the NOTE section as rounded to the file alignment. + */ +static uint64_t Elf64NoteSectionSize(const char *pszName, uint64_t cbData) +{ + uint64_t cbNote = sizeof(Elf64_Nhdr); + + size_t cbName = strlen(pszName) + 1; + size_t cbNameAlign = RT_ALIGN_Z(cbName, g_NoteAlign); + + cbNote += cbNameAlign; + cbNote += RT_ALIGN_64(cbData, g_NoteAlign); + return cbNote; +} + + +/** + * Elf function to write 64-bit note header. + * + * @param hFile The file to write to. + * @param Type Type of this section. + * @param pszName Name of this section. + * @param pvData Opaque pointer to the data, if NULL only computes size. + * @param cbData Size of the data. + * + * @returns IPRT status code. + */ +static int Elf64WriteNoteHdr(RTFILE hFile, uint16_t Type, const char *pszName, const void *pvData, uint64_t cbData) +{ + AssertReturn(pvData, VERR_INVALID_POINTER); + AssertReturn(cbData > 0, VERR_NO_DATA); + + char szNoteName[g_cbNoteName]; + RT_ZERO(szNoteName); + RTStrCopy(szNoteName, sizeof(szNoteName), pszName); + + size_t cbName = strlen(szNoteName) + 1; + size_t cbNameAlign = RT_ALIGN_Z(cbName, g_NoteAlign); + uint64_t cbDataAlign = RT_ALIGN_64(cbData, g_NoteAlign); + + /* + * Yell loudly and bail if we are going to be writing a core file that is not compatible with + * both Solaris and the 64-bit ELF spec. which dictates 8-byte alignment. See @bugref{5211#c3}. + */ + if (cbNameAlign - cbName > 3) + { + LogRel((DBGFLOG_NAME ": Elf64WriteNoteHdr pszName=%s cbName=%u cbNameAlign=%u, cbName aligns to 4 not 8-bytes!\n", + pszName, cbName, cbNameAlign)); + return VERR_INVALID_PARAMETER; + } + + if (cbDataAlign - cbData > 3) + { + LogRel((DBGFLOG_NAME ": Elf64WriteNoteHdr pszName=%s cbData=%u cbDataAlign=%u, cbData aligns to 4 not 8-bytes!\n", + pszName, cbData, cbDataAlign)); + return VERR_INVALID_PARAMETER; + } + + static const char s_achPad[7] = { 0, 0, 0, 0, 0, 0, 0 }; + AssertCompile(sizeof(s_achPad) >= g_NoteAlign - 1); + + Elf64_Nhdr ElfNoteHdr; + RT_ZERO(ElfNoteHdr); + ElfNoteHdr.n_namesz = (Elf64_Word)cbName - 1; /* Again, a discrepancy between ELF-64 and Solaris, + we will follow ELF-64, see @bugref{5211#c3}. */ + ElfNoteHdr.n_type = Type; + ElfNoteHdr.n_descsz = (Elf64_Word)cbDataAlign; + + /* + * Write note header. + */ + int rc = RTFileWrite(hFile, &ElfNoteHdr, sizeof(ElfNoteHdr), NULL /* all */); + if (RT_SUCCESS(rc)) + { + /* + * Write note name. + */ + rc = RTFileWrite(hFile, szNoteName, cbName, NULL /* all */); + if (RT_SUCCESS(rc)) + { + /* + * Write note name padding if required. + */ + if (cbNameAlign > cbName) + rc = RTFileWrite(hFile, s_achPad, cbNameAlign - cbName, NULL); + + if (RT_SUCCESS(rc)) + { + /* + * Write note data. + */ + rc = RTFileWrite(hFile, pvData, cbData, NULL /* all */); + if (RT_SUCCESS(rc)) + { + /* + * Write note data padding if required. + */ + if (cbDataAlign > cbData) + rc = RTFileWrite(hFile, s_achPad, cbDataAlign - cbData, NULL /* all*/); + } + } + } + } + + if (RT_FAILURE(rc)) + LogRel((DBGFLOG_NAME ": RTFileWrite failed. rc=%Rrc pszName=%s cbName=%u cbNameAlign=%u cbData=%u cbDataAlign=%u\n", + rc, pszName, cbName, cbNameAlign, cbData, cbDataAlign)); + + return rc; +} + + +/** + * Count the number of memory ranges that go into the core file. + * + * We cannot do a page-by-page dump of the entire guest memory as there will be + * way too many program header entries. Also we don't want to dump MMIO regions + * which means we cannot have a 1:1 mapping between core file offset and memory + * offset. Instead we dump the memory in ranges. A memory range is a contiguous + * memory area suitable for dumping to a core file. + * + * @param pVM The cross context VM structure. + * + * @return Number of memory ranges + */ +static uint32_t dbgfR3GetRamRangeCount(PVM pVM) +{ + return PGMR3PhysGetRamRangeCount(pVM); +} + + +/** + * Gets the guest-CPU context suitable for dumping into the core file. + * + * @param pVCpu The cross context virtual CPU structure. + * @param pDbgfCpu Where to dump the guest-CPU data. + */ +static void dbgfR3GetCoreCpu(PVMCPU pVCpu, PDBGFCORECPU pDbgfCpu) +{ +#define DBGFCOPYSEL(a_dbgfsel, a_cpumselreg) \ + do { \ + (a_dbgfsel).uBase = (a_cpumselreg).u64Base; \ + (a_dbgfsel).uLimit = (a_cpumselreg).u32Limit; \ + (a_dbgfsel).uAttr = (a_cpumselreg).Attr.u; \ + (a_dbgfsel).uSel = (a_cpumselreg).Sel; \ + } while (0) + + PVM pVM = pVCpu->CTX_SUFF(pVM); + PCCPUMCTX pCtx = CPUMQueryGuestCtxPtr(pVCpu); + pDbgfCpu->rax = pCtx->rax; + pDbgfCpu->rbx = pCtx->rbx; + pDbgfCpu->rcx = pCtx->rcx; + pDbgfCpu->rdx = pCtx->rdx; + pDbgfCpu->rsi = pCtx->rsi; + pDbgfCpu->rdi = pCtx->rdi; + pDbgfCpu->r8 = pCtx->r8; + pDbgfCpu->r9 = pCtx->r9; + pDbgfCpu->r10 = pCtx->r10; + pDbgfCpu->r11 = pCtx->r11; + pDbgfCpu->r12 = pCtx->r12; + pDbgfCpu->r13 = pCtx->r13; + pDbgfCpu->r14 = pCtx->r14; + pDbgfCpu->r15 = pCtx->r15; + pDbgfCpu->rip = pCtx->rip; + pDbgfCpu->rsp = pCtx->rsp; + pDbgfCpu->rbp = pCtx->rbp; + pDbgfCpu->rflags = pCtx->rflags.u; + DBGFCOPYSEL(pDbgfCpu->cs, pCtx->cs); + DBGFCOPYSEL(pDbgfCpu->ds, pCtx->ds); + DBGFCOPYSEL(pDbgfCpu->es, pCtx->es); + DBGFCOPYSEL(pDbgfCpu->fs, pCtx->fs); + DBGFCOPYSEL(pDbgfCpu->gs, pCtx->gs); + DBGFCOPYSEL(pDbgfCpu->ss, pCtx->ss); + pDbgfCpu->cr0 = pCtx->cr0; + pDbgfCpu->cr2 = pCtx->cr2; + pDbgfCpu->cr3 = pCtx->cr3; + pDbgfCpu->cr4 = pCtx->cr4; + AssertCompile(RT_ELEMENTS(pDbgfCpu->dr) == RT_ELEMENTS(pCtx->dr)); + for (unsigned i = 0; i < RT_ELEMENTS(pDbgfCpu->dr); i++) + pDbgfCpu->dr[i] = pCtx->dr[i]; + pDbgfCpu->gdtr.uAddr = pCtx->gdtr.pGdt; + pDbgfCpu->gdtr.cb = pCtx->gdtr.cbGdt; + pDbgfCpu->idtr.uAddr = pCtx->idtr.pIdt; + pDbgfCpu->idtr.cb = pCtx->idtr.cbIdt; + DBGFCOPYSEL(pDbgfCpu->ldtr, pCtx->ldtr); + DBGFCOPYSEL(pDbgfCpu->tr, pCtx->tr); + pDbgfCpu->sysenter.cs = pCtx->SysEnter.cs; + pDbgfCpu->sysenter.eip = pCtx->SysEnter.eip; + pDbgfCpu->sysenter.esp = pCtx->SysEnter.esp; + pDbgfCpu->msrEFER = pCtx->msrEFER; + pDbgfCpu->msrSTAR = pCtx->msrSTAR; + pDbgfCpu->msrPAT = pCtx->msrPAT; + pDbgfCpu->msrLSTAR = pCtx->msrLSTAR; + pDbgfCpu->msrCSTAR = pCtx->msrCSTAR; + pDbgfCpu->msrSFMASK = pCtx->msrSFMASK; + pDbgfCpu->msrKernelGSBase = pCtx->msrKERNELGSBASE; + pDbgfCpu->msrApicBase = APICGetBaseMsrNoCheck(pVCpu); + pDbgfCpu->aXcr[0] = pCtx->aXcr[0]; + pDbgfCpu->aXcr[1] = pCtx->aXcr[1]; + AssertCompile(sizeof(pDbgfCpu->ext) == sizeof(*pCtx->pXStateR3)); + pDbgfCpu->cbExt = pVM->cpum.ro.GuestFeatures.cbMaxExtendedState; + if (RT_LIKELY(pDbgfCpu->cbExt)) + memcpy(&pDbgfCpu->ext, pCtx->pXStateR3, pDbgfCpu->cbExt); + +#undef DBGFCOPYSEL +} + + +/** + * Worker function for dbgfR3CoreWrite() which does the writing. + * + * @returns VBox status code + * @param pVM The cross context VM structure. + * @param hFile The file to write to. Caller closes this. + */ +static int dbgfR3CoreWriteWorker(PVM pVM, RTFILE hFile) +{ + /* + * Collect core information. + */ + uint32_t const cu32MemRanges = dbgfR3GetRamRangeCount(pVM); + uint16_t const cMemRanges = cu32MemRanges < UINT16_MAX - 1 ? cu32MemRanges : UINT16_MAX - 1; /* One PT_NOTE Program header */ + uint16_t const cProgHdrs = cMemRanges + 1; + + DBGFCOREDESCRIPTOR CoreDescriptor; + RT_ZERO(CoreDescriptor); + CoreDescriptor.u32Magic = DBGFCORE_MAGIC; + CoreDescriptor.u32FmtVersion = DBGFCORE_FMT_VERSION; + CoreDescriptor.cbSelf = sizeof(CoreDescriptor); + CoreDescriptor.u32VBoxVersion = VBOX_FULL_VERSION; + CoreDescriptor.u32VBoxRevision = VMMGetSvnRev(); + CoreDescriptor.cCpus = pVM->cCpus; + + Log((DBGFLOG_NAME ": CoreDescriptor Version=%u Revision=%u\n", CoreDescriptor.u32VBoxVersion, CoreDescriptor.u32VBoxRevision)); + + /* + * Compute the file layout (see pg_dbgf_vmcore). + */ + uint64_t const offElfHdr = RTFileTell(hFile); + uint64_t const offNoteSection = offElfHdr + sizeof(Elf64_Ehdr); + uint64_t const offLoadSections = offNoteSection + sizeof(Elf64_Phdr); + uint64_t const cbLoadSections = cMemRanges * sizeof(Elf64_Phdr); + uint64_t const offCoreDescriptor = offLoadSections + cbLoadSections; + uint64_t const cbCoreDescriptor = Elf64NoteSectionSize(g_pcszCoreVBoxCore, sizeof(CoreDescriptor)); + uint64_t const offCpuDumps = offCoreDescriptor + cbCoreDescriptor; + uint64_t const cbCpuDumps = pVM->cCpus * Elf64NoteSectionSize(g_pcszCoreVBoxCpu, sizeof(DBGFCORECPU)); + uint64_t const offMemory = offCpuDumps + cbCpuDumps; + + uint64_t const offNoteSectionData = offCoreDescriptor; + uint64_t const cbNoteSectionData = cbCoreDescriptor + cbCpuDumps; + + /* + * Write ELF header. + */ + int rc = Elf64WriteElfHdr(hFile, cProgHdrs, 0 /* cSecHdrs */); + if (RT_FAILURE(rc)) + { + LogRel((DBGFLOG_NAME ": Elf64WriteElfHdr failed. rc=%Rrc\n", rc)); + return rc; + } + + /* + * Write PT_NOTE program header. + */ + Assert(RTFileTell(hFile) == offNoteSection); + rc = Elf64WriteProgHdr(hFile, PT_NOTE, PF_R, + offNoteSectionData, /* file offset to contents */ + cbNoteSectionData, /* size in core file */ + cbNoteSectionData, /* size in memory */ + 0); /* physical address */ + if (RT_FAILURE(rc)) + { + LogRel((DBGFLOG_NAME ": Elf64WritreProgHdr failed for PT_NOTE. rc=%Rrc\n", rc)); + return rc; + } + + /* + * Write PT_LOAD program header for each memory range. + */ + Assert(RTFileTell(hFile) == offLoadSections); + uint64_t offMemRange = offMemory; + for (uint16_t iRange = 0; iRange < cMemRanges; iRange++) + { + RTGCPHYS GCPhysStart; + RTGCPHYS GCPhysEnd; + bool fIsMmio; + rc = PGMR3PhysGetRange(pVM, iRange, &GCPhysStart, &GCPhysEnd, NULL /* pszDesc */, &fIsMmio); + if (RT_FAILURE(rc)) + { + LogRel((DBGFLOG_NAME ": PGMR3PhysGetRange failed for iRange(%u) rc=%Rrc\n", iRange, rc)); + return rc; + } + + uint64_t cbMemRange = GCPhysEnd - GCPhysStart + 1; + uint64_t cbFileRange = fIsMmio ? 0 : cbMemRange; + + Log((DBGFLOG_NAME ": PGMR3PhysGetRange iRange=%u GCPhysStart=%#x GCPhysEnd=%#x cbMemRange=%u\n", + iRange, GCPhysStart, GCPhysEnd, cbMemRange)); + + rc = Elf64WriteProgHdr(hFile, PT_LOAD, PF_R, + offMemRange, /* file offset to contents */ + cbFileRange, /* size in core file */ + cbMemRange, /* size in memory */ + GCPhysStart); /* physical address */ + if (RT_FAILURE(rc)) + { + LogRel((DBGFLOG_NAME ": Elf64WriteProgHdr failed for memory range(%u) cbFileRange=%u cbMemRange=%u rc=%Rrc\n", + iRange, cbFileRange, cbMemRange, rc)); + return rc; + } + + offMemRange += cbFileRange; + } + + /* + * Write the Core descriptor note header and data. + */ + Assert(RTFileTell(hFile) == offCoreDescriptor); + rc = Elf64WriteNoteHdr(hFile, NT_VBOXCORE, g_pcszCoreVBoxCore, &CoreDescriptor, sizeof(CoreDescriptor)); + if (RT_FAILURE(rc)) + { + LogRel((DBGFLOG_NAME ": Elf64WriteNoteHdr failed for Note '%s' rc=%Rrc\n", g_pcszCoreVBoxCore, rc)); + return rc; + } + + /* + * Write the CPU context note headers and data. + * We allocate the DBGFCORECPU struct. rather than using the stack as it can be pretty large due to X86XSAVEAREA. + */ + Assert(RTFileTell(hFile) == offCpuDumps); + PDBGFCORECPU pDbgfCoreCpu = (PDBGFCORECPU)RTMemAlloc(sizeof(*pDbgfCoreCpu)); + if (RT_UNLIKELY(!pDbgfCoreCpu)) + { + LogRel((DBGFLOG_NAME ": Failed to alloc %u bytes for DBGFCORECPU\n", sizeof(*pDbgfCoreCpu))); + return VERR_NO_MEMORY; + } + + for (uint32_t iCpu = 0; iCpu < pVM->cCpus; iCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[iCpu]; + RT_BZERO(pDbgfCoreCpu, sizeof(*pDbgfCoreCpu)); + dbgfR3GetCoreCpu(pVCpu, pDbgfCoreCpu); + + rc = Elf64WriteNoteHdr(hFile, NT_VBOXCPU, g_pcszCoreVBoxCpu, pDbgfCoreCpu, sizeof(*pDbgfCoreCpu)); + if (RT_FAILURE(rc)) + { + LogRel((DBGFLOG_NAME ": Elf64WriteNoteHdr failed for vCPU[%u] rc=%Rrc\n", iCpu, rc)); + RTMemFree(pDbgfCoreCpu); + return rc; + } + } + RTMemFree(pDbgfCoreCpu); + pDbgfCoreCpu = NULL; + + /* + * Write memory ranges. + */ + Assert(RTFileTell(hFile) == offMemory); + for (uint16_t iRange = 0; iRange < cMemRanges; iRange++) + { + RTGCPHYS GCPhysStart; + RTGCPHYS GCPhysEnd; + bool fIsMmio; + rc = PGMR3PhysGetRange(pVM, iRange, &GCPhysStart, &GCPhysEnd, NULL /* pszDesc */, &fIsMmio); + if (RT_FAILURE(rc)) + { + LogRel((DBGFLOG_NAME ": PGMR3PhysGetRange(2) failed for iRange(%u) rc=%Rrc\n", iRange, rc)); + return rc; + } + + if (fIsMmio) + continue; + + /* + * Write page-by-page of this memory range. + * + * The read function may fail on MMIO ranges, we write these as zero + * pages for now (would be nice to have the VGA bits there though). + */ + uint64_t cbMemRange = GCPhysEnd - GCPhysStart + 1; + uint64_t cPages = cbMemRange >> PAGE_SHIFT; + for (uint64_t iPage = 0; iPage < cPages; iPage++) + { + uint8_t abPage[PAGE_SIZE]; + rc = PGMPhysSimpleReadGCPhys(pVM, abPage, GCPhysStart + (iPage << PAGE_SHIFT), sizeof(abPage)); + if (RT_FAILURE(rc)) + { + if (rc != VERR_PGM_PHYS_PAGE_RESERVED) + LogRel((DBGFLOG_NAME ": PGMPhysRead failed for iRange=%u iPage=%u. rc=%Rrc. Ignoring...\n", iRange, iPage, rc)); + RT_ZERO(abPage); + } + + rc = RTFileWrite(hFile, abPage, sizeof(abPage), NULL /* all */); + if (RT_FAILURE(rc)) + { + LogRel((DBGFLOG_NAME ": RTFileWrite failed. iRange=%u iPage=%u rc=%Rrc\n", iRange, iPage, rc)); + return rc; + } + } + } + + return rc; +} + + +/** + * EMT Rendezvous worker function for DBGFR3CoreWrite(). + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * @param pvData Opaque data. + * + * @return VBox status code. + */ +static DECLCALLBACK(VBOXSTRICTRC) dbgfR3CoreWriteRendezvous(PVM pVM, PVMCPU pVCpu, void *pvData) +{ + /* + * Validate input. + */ + AssertReturn(pVM, VERR_INVALID_VM_HANDLE); + AssertReturn(pVCpu, VERR_INVALID_VMCPU_HANDLE); + AssertReturn(pvData, VERR_INVALID_POINTER); + + PDBGFCOREDATA pDbgfData = (PDBGFCOREDATA)pvData; + + /* + * Create the core file. + */ + uint32_t fFlags = (pDbgfData->fReplaceFile ? RTFILE_O_CREATE_REPLACE : RTFILE_O_CREATE) + | RTFILE_O_WRITE + | RTFILE_O_DENY_ALL + | (0600 << RTFILE_O_CREATE_MODE_SHIFT); + RTFILE hFile; + int rc = RTFileOpen(&hFile, pDbgfData->pszFilename, fFlags); + if (RT_SUCCESS(rc)) + { + rc = dbgfR3CoreWriteWorker(pVM, hFile); + RTFileClose(hFile); + } + else + LogRel((DBGFLOG_NAME ": RTFileOpen failed for '%s' rc=%Rrc\n", pDbgfData->pszFilename, rc)); + return rc; +} + + +/** + * Write core dump of the guest. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszFilename The name of the file to which the guest core + * dump should be written. + * @param fReplaceFile Whether to replace the file or not. + * + * @remarks The VM may need to be suspended before calling this function in + * order to truly stop all device threads and drivers. This function + * only synchronizes EMTs. + */ +VMMR3DECL(int) DBGFR3CoreWrite(PUVM pUVM, const char *pszFilename, bool fReplaceFile) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertReturn(pszFilename, VERR_INVALID_HANDLE); + + /* + * Pass the core write request down to EMT rendezvous which makes sure + * other EMTs, if any, are not running. IO threads could still be running + * but we don't care about them. + */ + DBGFCOREDATA CoreData; + RT_ZERO(CoreData); + CoreData.pszFilename = pszFilename; + CoreData.fReplaceFile = fReplaceFile; + + int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, dbgfR3CoreWriteRendezvous, &CoreData); + if (RT_SUCCESS(rc)) + LogRel((DBGFLOG_NAME ": Successfully wrote guest core dump '%s'\n", pszFilename)); + else + LogRel((DBGFLOG_NAME ": Failed to write guest core dump '%s'. rc=%Rrc\n", pszFilename, rc)); + return rc; +} + diff --git a/src/VBox/VMM/VMMR3/DBGFCpu.cpp b/src/VBox/VMM/VMMR3/DBGFCpu.cpp new file mode 100644 index 00000000..96822a0d --- /dev/null +++ b/src/VBox/VMM/VMMR3/DBGFCpu.cpp @@ -0,0 +1,163 @@ +/* $Id: DBGFCpu.cpp $ */ +/** @file + * DBGF - Debugger Facility, CPU State Accessors. + */ + +/* + * Copyright (C) 2009-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DBGF +#define VMCPU_INCL_CPUM_GST_CTX /* For CPUM_IMPORT_EXTRN_RET(). */ +#include +#include +#include "DBGFInternal.h" +#include +#include +#include +#include +#include +#include + + +/** + * Wrapper around CPUMGetGuestMode. + * + * @returns VINF_SUCCESS. + * @param pVM The cross context VM structure. + * @param idCpu The current CPU ID. + * @param penmMode Where to return the mode. + */ +static DECLCALLBACK(int) dbgfR3CpuGetMode(PVM pVM, VMCPUID idCpu, CPUMMODE *penmMode) +{ + Assert(idCpu == VMMGetCpuId(pVM)); + PVMCPU pVCpu = VMMGetCpuById(pVM, idCpu); + CPUM_IMPORT_EXTRN_RET(pVCpu, CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_EFER); + *penmMode = CPUMGetGuestMode(pVCpu); + return VINF_SUCCESS; +} + + +/** + * Get the current CPU mode. + * + * @returns The CPU mode on success, CPUMMODE_INVALID on failure. + * @param pUVM The user mode VM handle. + * @param idCpu The target CPU ID. + */ +VMMR3DECL(CPUMMODE) DBGFR3CpuGetMode(PUVM pUVM, VMCPUID idCpu) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, CPUMMODE_INVALID); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, CPUMMODE_INVALID); + AssertReturn(idCpu < pUVM->pVM->cCpus, CPUMMODE_INVALID); + + CPUMMODE enmMode; + int rc = VMR3ReqPriorityCallWaitU(pUVM, idCpu, (PFNRT)dbgfR3CpuGetMode, 3, pUVM->pVM, idCpu, &enmMode); + if (RT_FAILURE(rc)) + return CPUMMODE_INVALID; + return enmMode; +} + + +/** + * Wrapper around CPUMIsGuestIn64BitCode. + * + * @returns VINF_SUCCESS. + * @param pVM The cross context VM structure. + * @param idCpu The current CPU ID. + * @param pfIn64BitCode Where to return the result. + */ +static DECLCALLBACK(int) dbgfR3CpuIn64BitCode(PVM pVM, VMCPUID idCpu, bool *pfIn64BitCode) +{ + Assert(idCpu == VMMGetCpuId(pVM)); + PVMCPU pVCpu = VMMGetCpuById(pVM, idCpu); + CPUM_IMPORT_EXTRN_RET(pVCpu, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_EFER); + *pfIn64BitCode = CPUMIsGuestIn64BitCode(pVCpu); + return VINF_SUCCESS; +} + + +/** + * Checks if the given CPU is executing 64-bit code or not. + * + * @returns true / false accordingly. + * @param pUVM The user mode VM handle. + * @param idCpu The target CPU ID. + */ +VMMR3DECL(bool) DBGFR3CpuIsIn64BitCode(PUVM pUVM, VMCPUID idCpu) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, false); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, false); + AssertReturn(idCpu < pUVM->pVM->cCpus, false); + + bool fIn64BitCode; + int rc = VMR3ReqPriorityCallWaitU(pUVM, idCpu, (PFNRT)dbgfR3CpuIn64BitCode, 3, pUVM->pVM, idCpu, &fIn64BitCode); + if (RT_FAILURE(rc)) + return false; + return fIn64BitCode; +} + + +/** + * Wrapper around CPUMIsGuestInV86Code. + * + * @returns VINF_SUCCESS. + * @param pVM The cross context VM structure. + * @param idCpu The current CPU ID. + * @param pfInV86Code Where to return the result. + */ +static DECLCALLBACK(int) dbgfR3CpuInV86Code(PVM pVM, VMCPUID idCpu, bool *pfInV86Code) +{ + Assert(idCpu == VMMGetCpuId(pVM)); + PVMCPU pVCpu = VMMGetCpuById(pVM, idCpu); + CPUM_IMPORT_EXTRN_RET(pVCpu, CPUMCTX_EXTRN_RFLAGS); + *pfInV86Code = CPUMIsGuestInV86ModeEx(CPUMQueryGuestCtxPtr(pVCpu)); + return VINF_SUCCESS; +} + + +/** + * Checks if the given CPU is executing V8086 code or not. + * + * @returns true / false accordingly. + * @param pUVM The user mode VM handle. + * @param idCpu The target CPU ID. + */ +VMMR3DECL(bool) DBGFR3CpuIsInV86Code(PUVM pUVM, VMCPUID idCpu) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, false); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, false); + AssertReturn(idCpu < pUVM->pVM->cCpus, false); + + bool fInV86Code; + int rc = VMR3ReqPriorityCallWaitU(pUVM, idCpu, (PFNRT)dbgfR3CpuInV86Code, 3, pUVM->pVM, idCpu, &fInV86Code); + if (RT_FAILURE(rc)) + return false; + return fInV86Code; +} + + +/** + * Get the number of CPUs (or threads if you insist). + * + * @returns The number of CPUs + * @param pUVM The user mode VM handle. + */ +VMMR3DECL(VMCPUID) DBGFR3CpuGetCount(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, 1); + return pUVM->cCpus; +} + diff --git a/src/VBox/VMM/VMMR3/DBGFDisas.cpp b/src/VBox/VMM/VMMR3/DBGFDisas.cpp new file mode 100644 index 00000000..8947932f --- /dev/null +++ b/src/VBox/VMM/VMMR3/DBGFDisas.cpp @@ -0,0 +1,872 @@ +/* $Id: DBGFDisas.cpp $ */ +/** @file + * DBGF - Debugger Facility, Disassembler. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DBGF +#include +#include +#include +#include +#include +#include +#ifdef VBOX_WITH_RAW_MODE +# include +#endif +#include "DBGFInternal.h" +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * Structure used when disassembling and instructions in DBGF. + * This is used so the reader function can get the stuff it needs. + */ +typedef struct +{ + /** The core structure. */ + DISCPUSTATE Cpu; + /** The cross context VM structure. */ + PVM pVM; + /** The cross context virtual CPU structure. */ + PVMCPU pVCpu; + /** The address space for resolving symbol. */ + RTDBGAS hDbgAs; + /** Pointer to the first byte in the segment. */ + RTGCUINTPTR GCPtrSegBase; + /** Pointer to the byte after the end of the segment. (might have wrapped!) */ + RTGCUINTPTR GCPtrSegEnd; + /** The size of the segment minus 1. */ + RTGCUINTPTR cbSegLimit; + /** The guest paging mode. */ + PGMMODE enmMode; + /** Pointer to the current page - R3 Ptr. */ + void const *pvPageR3; + /** Pointer to the current page - GC Ptr. */ + RTGCPTR GCPtrPage; + /** Pointer to the next instruction (relative to GCPtrSegBase). */ + RTGCUINTPTR GCPtrNext; + /** The lock information that PGMPhysReleasePageMappingLock needs. */ + PGMPAGEMAPLOCK PageMapLock; + /** Whether the PageMapLock is valid or not. */ + bool fLocked; + /** 64 bits mode or not. */ + bool f64Bits; + /** Read original unpatched bytes from the patch manager. */ + bool fUnpatchedBytes; + /** Set when fUnpatchedBytes is active and we encounter patched bytes. */ + bool fPatchedInstr; +} DBGFDISASSTATE, *PDBGFDISASSTATE; + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static FNDISREADBYTES dbgfR3DisasInstrRead; + + + +/** + * Calls the disassembler with the proper reader functions and such for disa + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param pSelInfo The selector info. + * @param enmMode The guest paging mode. + * @param fFlags DBGF_DISAS_FLAGS_XXX. + * @param GCPtr The GC pointer (selector offset). + * @param pState The disas CPU state. + */ +static int dbgfR3DisasInstrFirst(PVM pVM, PVMCPU pVCpu, PDBGFSELINFO pSelInfo, PGMMODE enmMode, + RTGCPTR GCPtr, uint32_t fFlags, PDBGFDISASSTATE pState) +{ + pState->GCPtrSegBase = pSelInfo->GCPtrBase; + pState->GCPtrSegEnd = pSelInfo->cbLimit + 1 + (RTGCUINTPTR)pSelInfo->GCPtrBase; + pState->cbSegLimit = pSelInfo->cbLimit; + pState->enmMode = enmMode; + pState->GCPtrPage = 0; + pState->pvPageR3 = NULL; + pState->hDbgAs = VM_IS_RAW_MODE_ENABLED(pVM) + ? DBGF_AS_RC_AND_GC_GLOBAL + : DBGF_AS_GLOBAL; + pState->pVM = pVM; + pState->pVCpu = pVCpu; + pState->fLocked = false; + pState->f64Bits = enmMode >= PGMMODE_AMD64 && pSelInfo->u.Raw.Gen.u1Long; +#ifdef VBOX_WITH_RAW_MODE + pState->fUnpatchedBytes = RT_BOOL(fFlags & DBGF_DISAS_FLAGS_UNPATCHED_BYTES); + pState->fPatchedInstr = false; +#endif + + DISCPUMODE enmCpuMode; + switch (fFlags & DBGF_DISAS_FLAGS_MODE_MASK) + { + default: + AssertFailed(); + RT_FALL_THRU(); + case DBGF_DISAS_FLAGS_DEFAULT_MODE: + enmCpuMode = pState->f64Bits + ? DISCPUMODE_64BIT + : pSelInfo->u.Raw.Gen.u1DefBig + ? DISCPUMODE_32BIT + : DISCPUMODE_16BIT; + break; + case DBGF_DISAS_FLAGS_16BIT_MODE: + case DBGF_DISAS_FLAGS_16BIT_REAL_MODE: + enmCpuMode = DISCPUMODE_16BIT; + break; + case DBGF_DISAS_FLAGS_32BIT_MODE: + enmCpuMode = DISCPUMODE_32BIT; + break; + case DBGF_DISAS_FLAGS_64BIT_MODE: + enmCpuMode = DISCPUMODE_64BIT; + break; + } + + uint32_t cbInstr; + int rc = DISInstrWithReader(GCPtr, + enmCpuMode, + dbgfR3DisasInstrRead, + &pState->Cpu, + &pState->Cpu, + &cbInstr); + if (RT_SUCCESS(rc)) + { + pState->GCPtrNext = GCPtr + cbInstr; + return VINF_SUCCESS; + } + + /* cleanup */ + if (pState->fLocked) + { + PGMPhysReleasePageMappingLock(pVM, &pState->PageMapLock); + pState->fLocked = false; + } + return rc; +} + + +#if 0 +/** + * Calls the disassembler for disassembling the next instruction. + * + * @returns VBox status code. + * @param pState The disas CPU state. + */ +static int dbgfR3DisasInstrNext(PDBGFDISASSTATE pState) +{ + uint32_t cbInstr; + int rc = DISInstr(&pState->Cpu, (void *)pState->GCPtrNext, 0, &cbInstr, NULL); + if (RT_SUCCESS(rc)) + { + pState->GCPtrNext = GCPtr + cbInstr; + return VINF_SUCCESS; + } + return rc; +} +#endif + + +/** + * Done with the disassembler state, free associated resources. + * + * @param pState The disas CPU state ++. + */ +static void dbgfR3DisasInstrDone(PDBGFDISASSTATE pState) +{ + if (pState->fLocked) + { + PGMPhysReleasePageMappingLock(pState->pVM, &pState->PageMapLock); + pState->fLocked = false; + } +} + + +/** + * @callback_method_impl{FNDISREADBYTES} + * + * @remarks The source is relative to the base address indicated by + * DBGFDISASSTATE::GCPtrSegBase. + */ +static DECLCALLBACK(int) dbgfR3DisasInstrRead(PDISCPUSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead) +{ + PDBGFDISASSTATE pState = (PDBGFDISASSTATE)pDis; + for (;;) + { + RTGCUINTPTR GCPtr = pDis->uInstrAddr + offInstr + pState->GCPtrSegBase; + + /* + * Need to update the page translation? + */ + if ( !pState->pvPageR3 + || (GCPtr >> PAGE_SHIFT) != (pState->GCPtrPage >> PAGE_SHIFT)) + { + int rc = VINF_SUCCESS; + + /* translate the address */ + pState->GCPtrPage = GCPtr & PAGE_BASE_GC_MASK; + if ( VM_IS_RAW_MODE_ENABLED(pState->pVM) + && MMHyperIsInsideArea(pState->pVM, pState->GCPtrPage)) + { + pState->pvPageR3 = MMHyperRCToR3(pState->pVM, (RTRCPTR)pState->GCPtrPage); + if (!pState->pvPageR3) + rc = VERR_INVALID_POINTER; + } + else + { + if (pState->fLocked) + PGMPhysReleasePageMappingLock(pState->pVM, &pState->PageMapLock); + + if (pState->enmMode <= PGMMODE_PROTECTED) + rc = PGMPhysGCPhys2CCPtrReadOnly(pState->pVM, pState->GCPtrPage, &pState->pvPageR3, &pState->PageMapLock); + else + rc = PGMPhysGCPtr2CCPtrReadOnly(pState->pVCpu, pState->GCPtrPage, &pState->pvPageR3, &pState->PageMapLock); + pState->fLocked = RT_SUCCESS_NP(rc); + } + if (RT_FAILURE(rc)) + { + pState->pvPageR3 = NULL; + return rc; + } + } + + /* + * Check the segment limit. + */ + if (!pState->f64Bits && pDis->uInstrAddr + offInstr > pState->cbSegLimit) + return VERR_OUT_OF_SELECTOR_BOUNDS; + + /* + * Calc how much we can read, maxing out the read. + */ + uint32_t cb = PAGE_SIZE - (GCPtr & PAGE_OFFSET_MASK); + if (!pState->f64Bits) + { + RTGCUINTPTR cbSeg = pState->GCPtrSegEnd - GCPtr; + if (cb > cbSeg && cbSeg) + cb = cbSeg; + } + if (cb > cbMaxRead) + cb = cbMaxRead; + +#ifdef VBOX_WITH_RAW_MODE + /* + * Read original bytes from PATM if asked to do so. + */ + if (pState->fUnpatchedBytes) + { + size_t cbRead = cb; + int rc = PATMR3ReadOrgInstr(pState->pVM, GCPtr, &pDis->abInstr[offInstr], cbRead, &cbRead); + if (RT_SUCCESS(rc)) + { + pState->fPatchedInstr = true; + if (cbRead >= cbMinRead) + { + pDis->cbCachedInstr = offInstr + (uint8_t)cbRead; + return rc; + } + + cbMinRead -= (uint8_t)cbRead; + cbMaxRead -= (uint8_t)cbRead; + cb -= (uint8_t)cbRead; + offInstr += (uint8_t)cbRead; + GCPtr += cbRead; + if (!cb) + continue; + } + } +#endif /* VBOX_WITH_RAW_MODE */ + + /* + * Read and advance, + */ + memcpy(&pDis->abInstr[offInstr], (char *)pState->pvPageR3 + (GCPtr & PAGE_OFFSET_MASK), cb); + offInstr += (uint8_t)cb; + if (cb >= cbMinRead) + { + pDis->cbCachedInstr = offInstr; + return VINF_SUCCESS; + } + cbMaxRead -= (uint8_t)cb; + cbMinRead -= (uint8_t)cb; + } +} + + +/** + * @callback_method_impl{FNDISGETSYMBOL} + */ +static DECLCALLBACK(int) dbgfR3DisasGetSymbol(PCDISCPUSTATE pDis, uint32_t u32Sel, RTUINTPTR uAddress, + char *pszBuf, size_t cchBuf, RTINTPTR *poff, void *pvUser) +{ + PDBGFDISASSTATE pState = (PDBGFDISASSTATE)pDis; + PCDBGFSELINFO pSelInfo = (PCDBGFSELINFO)pvUser; + + /* + * Address conversion + */ + DBGFADDRESS Addr; + int rc; + /* Start with CS. */ + if ( DIS_FMT_SEL_IS_REG(u32Sel) + ? DIS_FMT_SEL_GET_REG(u32Sel) == DISSELREG_CS + : pSelInfo->Sel == DIS_FMT_SEL_GET_VALUE(u32Sel)) + rc = DBGFR3AddrFromSelInfoOff(pState->pVM->pUVM, &Addr, pSelInfo, uAddress); + /* In long mode everything but FS and GS is easy. */ + else if ( pState->Cpu.uCpuMode == DISCPUMODE_64BIT + && DIS_FMT_SEL_IS_REG(u32Sel) + && DIS_FMT_SEL_GET_REG(u32Sel) != DISSELREG_GS + && DIS_FMT_SEL_GET_REG(u32Sel) != DISSELREG_FS) + { + DBGFR3AddrFromFlat(pState->pVM->pUVM, &Addr, uAddress); + rc = VINF_SUCCESS; + } + /* Here's a quick hack to catch patch manager SS relative access. */ + else if ( DIS_FMT_SEL_IS_REG(u32Sel) + && DIS_FMT_SEL_GET_REG(u32Sel) == DISSELREG_SS + && pSelInfo->GCPtrBase == 0 + && pSelInfo->cbLimit >= UINT32_MAX +#ifdef VBOX_WITH_RAW_MODE + && PATMIsPatchGCAddr(pState->pVM, pState->Cpu.uInstrAddr) +#endif + ) + { + DBGFR3AddrFromFlat(pState->pVM->pUVM, &Addr, uAddress); + rc = VINF_SUCCESS; + } + else + { + /** @todo implement a generic solution here. */ + rc = VERR_SYMBOL_NOT_FOUND; + } + + /* + * If we got an address, try resolve it into a symbol. + */ + if (RT_SUCCESS(rc)) + { + RTDBGSYMBOL Sym; + RTGCINTPTR off; + rc = DBGFR3AsSymbolByAddr(pState->pVM->pUVM, pState->hDbgAs, &Addr, + RTDBGSYMADDR_FLAGS_LESS_OR_EQUAL | RTDBGSYMADDR_FLAGS_SKIP_ABS_IN_DEFERRED, + &off, &Sym, NULL /*phMod*/); + if (RT_SUCCESS(rc)) + { + /* + * Return the symbol and offset. + */ + size_t cchName = strlen(Sym.szName); + if (cchName >= cchBuf) + cchName = cchBuf - 1; + memcpy(pszBuf, Sym.szName, cchName); + pszBuf[cchName] = '\0'; + + *poff = off; + } + } + return rc; +} + + +/** + * Disassembles the one instruction according to the specified flags and + * address, internal worker executing on the EMT of the specified virtual CPU. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param Sel The code selector. This used to determine the 32/16 bit ness and + * calculation of the actual instruction address. + * @param pGCPtr Pointer to the variable holding the code address + * relative to the base of Sel. + * @param fFlags Flags controlling where to start and how to format. + * A combination of the DBGF_DISAS_FLAGS_* \#defines. + * @param pszOutput Output buffer. + * @param cbOutput Size of the output buffer. + * @param pcbInstr Where to return the size of the instruction. + * @param pDisState Where to store the disassembler state into. + */ +static DECLCALLBACK(int) +dbgfR3DisasInstrExOnVCpu(PVM pVM, PVMCPU pVCpu, RTSEL Sel, PRTGCPTR pGCPtr, uint32_t fFlags, + char *pszOutput, uint32_t cbOutput, uint32_t *pcbInstr, PDBGFDISSTATE pDisState) +{ + VMCPU_ASSERT_EMT(pVCpu); + RTGCPTR GCPtr = *pGCPtr; + int rc; + + /* + * Get the Sel and GCPtr if fFlags requests that. + */ + PCCPUMCTXCORE pCtxCore = NULL; + PCCPUMSELREG pSRegCS = NULL; + if (fFlags & DBGF_DISAS_FLAGS_CURRENT_GUEST) + { + pCtxCore = CPUMGetGuestCtxCore(pVCpu); + Sel = pCtxCore->cs.Sel; + pSRegCS = &pCtxCore->cs; + GCPtr = pCtxCore->rip; + } + else if (fFlags & DBGF_DISAS_FLAGS_CURRENT_HYPER) + { + fFlags |= DBGF_DISAS_FLAGS_HYPER; + pCtxCore = CPUMGetHyperCtxCore(pVCpu); + Sel = pCtxCore->cs.Sel; + GCPtr = pCtxCore->rip; + } + /* + * Check if the selector matches the guest CS, use the hidden + * registers from that if they are valid. Saves time and effort. + */ + else + { + pCtxCore = CPUMGetGuestCtxCore(pVCpu); + if (pCtxCore->cs.Sel == Sel && Sel != DBGF_SEL_FLAT) + pSRegCS = &pCtxCore->cs; + else + pCtxCore = NULL; + } + + /* + * Read the selector info - assume no stale selectors and nasty stuff like that. + * + * Note! We CANNOT load invalid hidden selector registers since that would + * mean that log/debug statements or the debug will influence the + * guest state and make things behave differently. + */ + DBGFSELINFO SelInfo; + const PGMMODE enmMode = PGMGetGuestMode(pVCpu); + bool fRealModeAddress = false; + + if ( pSRegCS + && CPUMSELREG_ARE_HIDDEN_PARTS_VALID(pVCpu, pSRegCS)) + { + SelInfo.Sel = Sel; + SelInfo.SelGate = 0; + SelInfo.GCPtrBase = pSRegCS->u64Base; + SelInfo.cbLimit = pSRegCS->u32Limit; + SelInfo.fFlags = PGMMODE_IS_LONG_MODE(enmMode) + ? DBGFSELINFO_FLAGS_LONG_MODE + : enmMode != PGMMODE_REAL && !pCtxCore->eflags.Bits.u1VM + ? DBGFSELINFO_FLAGS_PROT_MODE + : DBGFSELINFO_FLAGS_REAL_MODE; + + SelInfo.u.Raw.au32[0] = 0; + SelInfo.u.Raw.au32[1] = 0; + SelInfo.u.Raw.Gen.u16LimitLow = 0xffff; + SelInfo.u.Raw.Gen.u4LimitHigh = 0xf; + SelInfo.u.Raw.Gen.u1Present = pSRegCS->Attr.n.u1Present; + SelInfo.u.Raw.Gen.u1Granularity = pSRegCS->Attr.n.u1Granularity;; + SelInfo.u.Raw.Gen.u1DefBig = pSRegCS->Attr.n.u1DefBig; + SelInfo.u.Raw.Gen.u1Long = pSRegCS->Attr.n.u1Long; + SelInfo.u.Raw.Gen.u1DescType = pSRegCS->Attr.n.u1DescType; + SelInfo.u.Raw.Gen.u4Type = pSRegCS->Attr.n.u4Type; + fRealModeAddress = !!(SelInfo.fFlags & DBGFSELINFO_FLAGS_REAL_MODE); + } + else if (Sel == DBGF_SEL_FLAT) + { + SelInfo.Sel = Sel; + SelInfo.SelGate = 0; + SelInfo.GCPtrBase = 0; + SelInfo.cbLimit = ~(RTGCUINTPTR)0; + SelInfo.fFlags = PGMMODE_IS_LONG_MODE(enmMode) + ? DBGFSELINFO_FLAGS_LONG_MODE + : enmMode != PGMMODE_REAL + ? DBGFSELINFO_FLAGS_PROT_MODE + : DBGFSELINFO_FLAGS_REAL_MODE; + SelInfo.u.Raw.au32[0] = 0; + SelInfo.u.Raw.au32[1] = 0; + SelInfo.u.Raw.Gen.u16LimitLow = 0xffff; + SelInfo.u.Raw.Gen.u4LimitHigh = 0xf; + + pSRegCS = &CPUMGetGuestCtxCore(pVCpu)->cs; + if (CPUMSELREG_ARE_HIDDEN_PARTS_VALID(pVCpu, pSRegCS)) + { + /* Assume the current CS defines the execution mode. */ + SelInfo.u.Raw.Gen.u1Present = pSRegCS->Attr.n.u1Present; + SelInfo.u.Raw.Gen.u1Granularity = pSRegCS->Attr.n.u1Granularity;; + SelInfo.u.Raw.Gen.u1DefBig = pSRegCS->Attr.n.u1DefBig; + SelInfo.u.Raw.Gen.u1Long = pSRegCS->Attr.n.u1Long; + SelInfo.u.Raw.Gen.u1DescType = pSRegCS->Attr.n.u1DescType; + SelInfo.u.Raw.Gen.u4Type = pSRegCS->Attr.n.u4Type; + } + else + { + pSRegCS = NULL; + SelInfo.u.Raw.Gen.u1Present = 1; + SelInfo.u.Raw.Gen.u1Granularity = 1; + SelInfo.u.Raw.Gen.u1DefBig = 1; + SelInfo.u.Raw.Gen.u1DescType = 1; + SelInfo.u.Raw.Gen.u4Type = X86_SEL_TYPE_EO; + } + } + else if ( !(fFlags & DBGF_DISAS_FLAGS_HYPER) + && ( (pCtxCore && pCtxCore->eflags.Bits.u1VM) + || enmMode == PGMMODE_REAL + || (fFlags & DBGF_DISAS_FLAGS_MODE_MASK) == DBGF_DISAS_FLAGS_16BIT_REAL_MODE + ) + ) + { /* V86 mode or real mode - real mode addressing */ + SelInfo.Sel = Sel; + SelInfo.SelGate = 0; + SelInfo.GCPtrBase = Sel * 16; + SelInfo.cbLimit = ~(RTGCUINTPTR)0; + SelInfo.fFlags = DBGFSELINFO_FLAGS_REAL_MODE; + SelInfo.u.Raw.au32[0] = 0; + SelInfo.u.Raw.au32[1] = 0; + SelInfo.u.Raw.Gen.u16LimitLow = 0xffff; + SelInfo.u.Raw.Gen.u4LimitHigh = 0xf; + SelInfo.u.Raw.Gen.u1Present = 1; + SelInfo.u.Raw.Gen.u1Granularity = 1; + SelInfo.u.Raw.Gen.u1DefBig = 0; /* 16 bits */ + SelInfo.u.Raw.Gen.u1DescType = 1; + SelInfo.u.Raw.Gen.u4Type = X86_SEL_TYPE_EO; + fRealModeAddress = true; + } + else + { + if (!(fFlags & DBGF_DISAS_FLAGS_HYPER)) + rc = SELMR3GetSelectorInfo(pVM, pVCpu, Sel, &SelInfo); + else + rc = SELMR3GetShadowSelectorInfo(pVM, Sel, &SelInfo); + if (RT_FAILURE(rc)) + { + RTStrPrintf(pszOutput, cbOutput, "Sel=%04x -> %Rrc\n", Sel, rc); + return rc; + } + } + + /* + * Disassemble it. + */ + DBGFDISASSTATE State; + rc = dbgfR3DisasInstrFirst(pVM, pVCpu, &SelInfo, enmMode, GCPtr, fFlags, &State); + if (RT_FAILURE(rc)) + { + if (State.Cpu.cbCachedInstr) + RTStrPrintf(pszOutput, cbOutput, "Disas -> %Rrc; %.*Rhxs\n", rc, (size_t)State.Cpu.cbCachedInstr, State.Cpu.abInstr); + else + RTStrPrintf(pszOutput, cbOutput, "Disas -> %Rrc\n", rc); + return rc; + } + + /* + * Format it. + */ + char szBuf[512]; + DISFormatYasmEx(&State.Cpu, szBuf, sizeof(szBuf), + DIS_FMT_FLAGS_RELATIVE_BRANCH, + fFlags & DBGF_DISAS_FLAGS_NO_SYMBOLS ? NULL : dbgfR3DisasGetSymbol, + &SelInfo); + +#ifdef VBOX_WITH_RAW_MODE + /* + * Patched instruction annotations. + */ + char szPatchAnnotations[256]; + szPatchAnnotations[0] = '\0'; + if (fFlags & DBGF_DISAS_FLAGS_ANNOTATE_PATCHED) + PATMR3DbgAnnotatePatchedInstruction(pVM, GCPtr, State.Cpu.cbInstr, szPatchAnnotations, sizeof(szPatchAnnotations)); +#endif + + /* + * Print it to the user specified buffer. + */ + size_t cch; + if (fFlags & DBGF_DISAS_FLAGS_NO_BYTES) + { + if (fFlags & DBGF_DISAS_FLAGS_NO_ADDRESS) + cch = RTStrPrintf(pszOutput, cbOutput, "%s", szBuf); + else if (fRealModeAddress) + cch = RTStrPrintf(pszOutput, cbOutput, "%04x:%04x %s", Sel, (unsigned)GCPtr, szBuf); + else if (Sel == DBGF_SEL_FLAT) + { + if (enmMode >= PGMMODE_AMD64) + cch = RTStrPrintf(pszOutput, cbOutput, "%RGv %s", GCPtr, szBuf); + else + cch = RTStrPrintf(pszOutput, cbOutput, "%08RX32 %s", (uint32_t)GCPtr, szBuf); + } + else + { + if (enmMode >= PGMMODE_AMD64) + cch = RTStrPrintf(pszOutput, cbOutput, "%04x:%RGv %s", Sel, GCPtr, szBuf); + else + cch = RTStrPrintf(pszOutput, cbOutput, "%04x:%08RX32 %s", Sel, (uint32_t)GCPtr, szBuf); + } + } + else + { + uint32_t cbInstr = State.Cpu.cbInstr; + uint8_t const *pabInstr = State.Cpu.abInstr; + if (fFlags & DBGF_DISAS_FLAGS_NO_ADDRESS) + cch = RTStrPrintf(pszOutput, cbOutput, "%.*Rhxs%*s %s", + cbInstr, pabInstr, cbInstr < 8 ? (8 - cbInstr) * 3 : 0, "", + szBuf); + else if (fRealModeAddress) + cch = RTStrPrintf(pszOutput, cbOutput, "%04x:%04x %.*Rhxs%*s %s", + Sel, (unsigned)GCPtr, + cbInstr, pabInstr, cbInstr < 8 ? (8 - cbInstr) * 3 : 0, "", + szBuf); + else if (Sel == DBGF_SEL_FLAT) + { + if (enmMode >= PGMMODE_AMD64) + cch = RTStrPrintf(pszOutput, cbOutput, "%RGv %.*Rhxs%*s %s", + GCPtr, + cbInstr, pabInstr, cbInstr < 8 ? (8 - cbInstr) * 3 : 0, "", + szBuf); + else + cch = RTStrPrintf(pszOutput, cbOutput, "%08RX32 %.*Rhxs%*s %s", + (uint32_t)GCPtr, + cbInstr, pabInstr, cbInstr < 8 ? (8 - cbInstr) * 3 : 0, "", + szBuf); + } + else + { + if (enmMode >= PGMMODE_AMD64) + cch = RTStrPrintf(pszOutput, cbOutput, "%04x:%RGv %.*Rhxs%*s %s", + Sel, GCPtr, + cbInstr, pabInstr, cbInstr < 8 ? (8 - cbInstr) * 3 : 0, "", + szBuf); + else + cch = RTStrPrintf(pszOutput, cbOutput, "%04x:%08RX32 %.*Rhxs%*s %s", + Sel, (uint32_t)GCPtr, + cbInstr, pabInstr, cbInstr < 8 ? (8 - cbInstr) * 3 : 0, "", + szBuf); + } + } + +#ifdef VBOX_WITH_RAW_MODE + if (szPatchAnnotations[0] && cch + 1 < cbOutput) + RTStrPrintf(pszOutput + cch, cbOutput - cch, " ; %s", szPatchAnnotations); +#endif + + if (pcbInstr) + *pcbInstr = State.Cpu.cbInstr; + + if (pDisState) + { + pDisState->pCurInstr = State.Cpu.pCurInstr; + pDisState->cbInstr = State.Cpu.cbInstr; + pDisState->Param1 = State.Cpu.Param1; + pDisState->Param2 = State.Cpu.Param2; + pDisState->Param3 = State.Cpu.Param3; + pDisState->Param4 = State.Cpu.Param4; + } + + dbgfR3DisasInstrDone(&State); + return VINF_SUCCESS; +} + + +/** + * Disassembles the one instruction according to the specified flags and address + * returning part of the disassembler state. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param idCpu The ID of virtual CPU. + * @param pAddr The code address. + * @param fFlags Flags controlling where to start and how to format. + * A combination of the DBGF_DISAS_FLAGS_* \#defines. + * @param pszOutput Output buffer. This will always be properly + * terminated if @a cbOutput is greater than zero. + * @param cbOutput Size of the output buffer. + * @param pDisState The disassembler state to fill in. + * + * @remarks May have to switch to the EMT of the virtual CPU in order to do + * address conversion. + */ +DECLHIDDEN(int) dbgfR3DisasInstrStateEx(PUVM pUVM, VMCPUID idCpu, PDBGFADDRESS pAddr, uint32_t fFlags, + char *pszOutput, uint32_t cbOutput, PDBGFDISSTATE pDisState) +{ + AssertReturn(cbOutput > 0, VERR_INVALID_PARAMETER); + *pszOutput = '\0'; + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertReturn(idCpu < pUVM->cCpus, VERR_INVALID_CPU_ID); + AssertReturn(!(fFlags & ~DBGF_DISAS_FLAGS_VALID_MASK), VERR_INVALID_PARAMETER); + AssertReturn((fFlags & DBGF_DISAS_FLAGS_MODE_MASK) <= DBGF_DISAS_FLAGS_64BIT_MODE, VERR_INVALID_PARAMETER); + + /* + * Optimize the common case where we're called on the EMT of idCpu since + * we're using this all the time when logging. + */ + int rc; + PVMCPU pVCpu = VMMGetCpu(pVM); + if ( pVCpu + && pVCpu->idCpu == idCpu) + rc = dbgfR3DisasInstrExOnVCpu(pVM, pVCpu, pAddr->Sel, &pAddr->off, fFlags, pszOutput, cbOutput, NULL, pDisState); + else + rc = VMR3ReqPriorityCallWait(pVM, idCpu, (PFNRT)dbgfR3DisasInstrExOnVCpu, 9, + pVM, VMMGetCpuById(pVM, idCpu), pAddr->Sel, &pAddr->off, fFlags, pszOutput, cbOutput, NULL, pDisState); + return rc; +} + +/** + * Disassembles the one instruction according to the specified flags and address. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param idCpu The ID of virtual CPU. + * @param Sel The code selector. This used to determine the 32/16 bit ness and + * calculation of the actual instruction address. + * @param GCPtr The code address relative to the base of Sel. + * @param fFlags Flags controlling where to start and how to format. + * A combination of the DBGF_DISAS_FLAGS_* \#defines. + * @param pszOutput Output buffer. This will always be properly + * terminated if @a cbOutput is greater than zero. + * @param cbOutput Size of the output buffer. + * @param pcbInstr Where to return the size of the instruction. + * + * @remarks May have to switch to the EMT of the virtual CPU in order to do + * address conversion. + */ +VMMR3DECL(int) DBGFR3DisasInstrEx(PUVM pUVM, VMCPUID idCpu, RTSEL Sel, RTGCPTR GCPtr, uint32_t fFlags, + char *pszOutput, uint32_t cbOutput, uint32_t *pcbInstr) +{ + AssertReturn(cbOutput > 0, VERR_INVALID_PARAMETER); + *pszOutput = '\0'; + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertReturn(idCpu < pUVM->cCpus, VERR_INVALID_CPU_ID); + AssertReturn(!(fFlags & ~DBGF_DISAS_FLAGS_VALID_MASK), VERR_INVALID_PARAMETER); + AssertReturn((fFlags & DBGF_DISAS_FLAGS_MODE_MASK) <= DBGF_DISAS_FLAGS_64BIT_MODE, VERR_INVALID_PARAMETER); + + /* + * Optimize the common case where we're called on the EMT of idCpu since + * we're using this all the time when logging. + */ + int rc; + PVMCPU pVCpu = VMMGetCpu(pVM); + if ( pVCpu + && pVCpu->idCpu == idCpu) + rc = dbgfR3DisasInstrExOnVCpu(pVM, pVCpu, Sel, &GCPtr, fFlags, pszOutput, cbOutput, pcbInstr, NULL); + else + rc = VMR3ReqPriorityCallWait(pVM, idCpu, (PFNRT)dbgfR3DisasInstrExOnVCpu, 9, + pVM, VMMGetCpuById(pVM, idCpu), Sel, &GCPtr, fFlags, pszOutput, cbOutput, pcbInstr, NULL); + return rc; +} + + +/** + * Disassembles the current guest context instruction. + * All registers and data will be displayed. Addresses will be attempted resolved to symbols. + * + * @returns VBox status code. + * @param pVCpu The cross context virtual CPU structure. + * @param pszOutput Output buffer. This will always be properly + * terminated if @a cbOutput is greater than zero. + * @param cbOutput Size of the output buffer. + * @thread EMT(pVCpu) + */ +VMMR3_INT_DECL(int) DBGFR3DisasInstrCurrent(PVMCPU pVCpu, char *pszOutput, uint32_t cbOutput) +{ + AssertReturn(cbOutput > 0, VERR_INVALID_PARAMETER); + *pszOutput = '\0'; + Assert(VMCPU_IS_EMT(pVCpu)); + + RTGCPTR GCPtr = 0; + return dbgfR3DisasInstrExOnVCpu(pVCpu->pVMR3, pVCpu, 0, &GCPtr, + DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE + | DBGF_DISAS_FLAGS_ANNOTATE_PATCHED, + pszOutput, cbOutput, NULL, NULL); +} + + +/** + * Disassembles the current guest context instruction and writes it to the log. + * All registers and data will be displayed. Addresses will be attempted resolved to symbols. + * + * @returns VBox status code. + * @param pVCpu The cross context virtual CPU structure. + * @param pszPrefix Short prefix string to the disassembly string. (optional) + * @thread EMT(pVCpu) + */ +VMMR3DECL(int) DBGFR3DisasInstrCurrentLogInternal(PVMCPU pVCpu, const char *pszPrefix) +{ + char szBuf[256]; + szBuf[0] = '\0'; + int rc = DBGFR3DisasInstrCurrent(pVCpu, &szBuf[0], sizeof(szBuf)); + if (RT_FAILURE(rc)) + RTStrPrintf(szBuf, sizeof(szBuf), "DBGFR3DisasInstrCurrentLog failed with rc=%Rrc\n", rc); + if (pszPrefix && *pszPrefix) + { + if (pVCpu->CTX_SUFF(pVM)->cCpus > 1) + RTLogPrintf("%s-CPU%u: %s\n", pszPrefix, pVCpu->idCpu, szBuf); + else + RTLogPrintf("%s: %s\n", pszPrefix, szBuf); + } + else + RTLogPrintf("%s\n", szBuf); + return rc; +} + + + +/** + * Disassembles the specified guest context instruction and writes it to the log. + * Addresses will be attempted resolved to symbols. + * + * @returns VBox status code. + * @param pVCpu The cross context virtual CPU structure of the calling + * EMT. + * @param Sel The code selector. This used to determine the 32/16 + * bit-ness and calculation of the actual instruction + * address. + * @param GCPtr The code address relative to the base of Sel. + * @param pszPrefix Short prefix string to the disassembly string. + * (optional) + * @thread EMT(pVCpu) + */ +VMMR3DECL(int) DBGFR3DisasInstrLogInternal(PVMCPU pVCpu, RTSEL Sel, RTGCPTR GCPtr, const char *pszPrefix) +{ + Assert(VMCPU_IS_EMT(pVCpu)); + + char szBuf[256]; + RTGCPTR GCPtrTmp = GCPtr; + int rc = dbgfR3DisasInstrExOnVCpu(pVCpu->pVMR3, pVCpu, Sel, &GCPtrTmp, DBGF_DISAS_FLAGS_DEFAULT_MODE, + &szBuf[0], sizeof(szBuf), NULL, NULL); + if (RT_FAILURE(rc)) + RTStrPrintf(szBuf, sizeof(szBuf), "DBGFR3DisasInstrLog(, %RTsel, %RGv) failed with rc=%Rrc\n", Sel, GCPtr, rc); + if (pszPrefix && *pszPrefix) + { + if (pVCpu->CTX_SUFF(pVM)->cCpus > 1) + RTLogPrintf("%s-CPU%u: %s\n", pszPrefix, pVCpu->idCpu, szBuf); + else + RTLogPrintf("%s: %s\n", pszPrefix, szBuf); + } + else + RTLogPrintf("%s\n", szBuf); + return rc; +} + diff --git a/src/VBox/VMM/VMMR3/DBGFInfo.cpp b/src/VBox/VMM/VMMR3/DBGFInfo.cpp new file mode 100644 index 00000000..f1645eb2 --- /dev/null +++ b/src/VBox/VMM/VMMR3/DBGFInfo.cpp @@ -0,0 +1,1052 @@ +/* $Id: DBGFInfo.cpp $ */ +/** @file + * DBGF - Debugger Facility, Info. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DBGF_INFO +#include + +#include +#include "DBGFInternal.h" +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static DECLCALLBACK(void) dbgfR3InfoLog_Printf(PCDBGFINFOHLP pHlp, const char *pszFormat, ...); +static DECLCALLBACK(void) dbgfR3InfoLog_PrintfV(PCDBGFINFOHLP pHlp, const char *pszFormat, va_list args); +static DECLCALLBACK(void) dbgfR3InfoLogRel_Printf(PCDBGFINFOHLP pHlp, const char *pszFormat, ...); +static DECLCALLBACK(void) dbgfR3InfoLogRel_PrintfV(PCDBGFINFOHLP pHlp, const char *pszFormat, va_list args); +static DECLCALLBACK(void) dbgfR3InfoStdErr_Printf(PCDBGFINFOHLP pHlp, const char *pszFormat, ...); +static DECLCALLBACK(void) dbgfR3InfoStdErr_PrintfV(PCDBGFINFOHLP pHlp, const char *pszFormat, va_list args); +static DECLCALLBACK(void) dbgfR3InfoHelp(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +/** Logger output. */ +static const DBGFINFOHLP g_dbgfR3InfoLogHlp = +{ + dbgfR3InfoLog_Printf, + dbgfR3InfoLog_PrintfV +}; + +/** Release logger output. */ +static const DBGFINFOHLP g_dbgfR3InfoLogRelHlp = +{ + dbgfR3InfoLogRel_Printf, + dbgfR3InfoLogRel_PrintfV +}; + +/** Standard error output. */ +static const DBGFINFOHLP g_dbgfR3InfoStdErrHlp = +{ + dbgfR3InfoStdErr_Printf, + dbgfR3InfoStdErr_PrintfV +}; + + +/** + * Initialize the info handlers. + * + * This is called first during the DBGF init process and thus does the shared + * critsect init. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + */ +int dbgfR3InfoInit(PUVM pUVM) +{ + /* + * Make sure we already didn't initialized in the lazy manner. + */ + if (RTCritSectRwIsInitialized(&pUVM->dbgf.s.CritSect)) + return VINF_SUCCESS; + + /* + * Initialize the crit sect. + */ + int rc = RTCritSectRwInit(&pUVM->dbgf.s.CritSect); + AssertRCReturn(rc, rc); + + /* + * Register the 'info help' item. + */ + rc = DBGFR3InfoRegisterInternal(pUVM->pVM, "help", "List of info items.", dbgfR3InfoHelp); + AssertRCReturn(rc, rc); + + return VINF_SUCCESS; +} + + +/** + * Terminate the info handlers. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + */ +int dbgfR3InfoTerm(PUVM pUVM) +{ + /* + * Delete the crit sect. + */ + int rc = RTCritSectRwDelete(&pUVM->dbgf.s.CritSect); + AssertRC(rc); + return rc; +} + + +/** Logger output. + * @copydoc DBGFINFOHLP::pfnPrintf */ +static DECLCALLBACK(void) dbgfR3InfoLog_Printf(PCDBGFINFOHLP pHlp, const char *pszFormat, ...) +{ + NOREF(pHlp); + va_list args; + va_start(args, pszFormat); + RTLogPrintfV(pszFormat, args); + va_end(args); +} + +/** Logger output. + * @copydoc DBGFINFOHLP::pfnPrintfV */ +static DECLCALLBACK(void) dbgfR3InfoLog_PrintfV(PCDBGFINFOHLP pHlp, const char *pszFormat, va_list args) +{ + NOREF(pHlp); + RTLogPrintfV(pszFormat, args); +} + + +/** + * Gets the logger info helper. + * The returned info helper will unconditionally write all output to the log. + * + * @returns Pointer to the logger info helper. + */ +VMMR3DECL(PCDBGFINFOHLP) DBGFR3InfoLogHlp(void) +{ + return &g_dbgfR3InfoLogHlp; +} + + +/** Release logger output. + * @copydoc DBGFINFOHLP::pfnPrintf */ +static DECLCALLBACK(void) dbgfR3InfoLogRel_Printf(PCDBGFINFOHLP pHlp, const char *pszFormat, ...) +{ + NOREF(pHlp); + va_list args; + va_start(args, pszFormat); + RTLogRelPrintfV(pszFormat, args); + va_end(args); +} + +/** Release logger output. + * @copydoc DBGFINFOHLP::pfnPrintfV */ +static DECLCALLBACK(void) dbgfR3InfoLogRel_PrintfV(PCDBGFINFOHLP pHlp, const char *pszFormat, va_list args) +{ + NOREF(pHlp); + RTLogRelPrintfV(pszFormat, args); +} + + +/** Standard error output. + * @copydoc DBGFINFOHLP::pfnPrintf */ +static DECLCALLBACK(void) dbgfR3InfoStdErr_Printf(PCDBGFINFOHLP pHlp, const char *pszFormat, ...) +{ + NOREF(pHlp); + va_list args; + va_start(args, pszFormat); + RTStrmPrintfV(g_pStdErr, pszFormat, args); + va_end(args); +} + +/** Standard error output. + * @copydoc DBGFINFOHLP::pfnPrintfV */ +static DECLCALLBACK(void) dbgfR3InfoStdErr_PrintfV(PCDBGFINFOHLP pHlp, const char *pszFormat, va_list args) +{ + NOREF(pHlp); + RTStrmPrintfV(g_pStdErr, pszFormat, args); +} + + +/** + * Gets the release logger info helper. + * The returned info helper will unconditionally write all output to the release log. + * + * @returns Pointer to the release logger info helper. + */ +VMMR3DECL(PCDBGFINFOHLP) DBGFR3InfoLogRelHlp(void) +{ + return &g_dbgfR3InfoLogRelHlp; +} + + +/** + * Handle registration worker. + * + * This allocates the structure, initializes the common fields and inserts into the list. + * Upon successful return the we're inside the crit sect and the caller must leave it. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszName The identifier of the info. + * @param pszDesc The description of the info and any arguments the handler may take. + * @param fFlags The flags. + * @param ppInfo Where to store the created + */ +static int dbgfR3InfoRegister(PUVM pUVM, const char *pszName, const char *pszDesc, uint32_t fFlags, PDBGFINFO *ppInfo) +{ + /* + * Validate. + */ + AssertPtrReturn(pszName, VERR_INVALID_POINTER); + AssertReturn(*pszName, VERR_INVALID_PARAMETER); + AssertPtrReturn(pszDesc, VERR_INVALID_POINTER); + AssertMsgReturn(!(fFlags & ~(DBGFINFO_FLAGS_RUN_ON_EMT | DBGFINFO_FLAGS_ALL_EMTS)), + ("fFlags=%#x\n", fFlags), VERR_INVALID_FLAGS); + + /* + * Allocate and initialize. + */ + int rc; + size_t cchName = strlen(pszName) + 1; + PDBGFINFO pInfo = (PDBGFINFO)MMR3HeapAllocU(pUVM, MM_TAG_DBGF_INFO, RT_UOFFSETOF_DYN(DBGFINFO, szName[cchName])); + if (pInfo) + { + pInfo->enmType = DBGFINFOTYPE_INVALID; + pInfo->fFlags = fFlags; + pInfo->pszDesc = pszDesc; + pInfo->cchName = cchName - 1; + memcpy(pInfo->szName, pszName, cchName); + + /* lazy init */ + rc = VINF_SUCCESS; + if (!RTCritSectRwIsInitialized(&pUVM->dbgf.s.CritSect)) + rc = dbgfR3InfoInit(pUVM); + if (RT_SUCCESS(rc)) + { + /* + * Insert in alphabetical order. + */ + rc = RTCritSectRwEnterExcl(&pUVM->dbgf.s.CritSect); + AssertRC(rc); + PDBGFINFO pPrev = NULL; + PDBGFINFO pCur; + for (pCur = pUVM->dbgf.s.pInfoFirst; pCur; pPrev = pCur, pCur = pCur->pNext) + if (strcmp(pszName, pCur->szName) < 0) + break; + pInfo->pNext = pCur; + if (pPrev) + pPrev->pNext = pInfo; + else + pUVM->dbgf.s.pInfoFirst = pInfo; + + *ppInfo = pInfo; + return VINF_SUCCESS; + } + MMR3HeapFree(pInfo); + } + else + rc = VERR_NO_MEMORY; + return rc; +} + + +/** + * Register a info handler owned by a device. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pszName The identifier of the info. + * @param pszDesc The description of the info and any arguments the handler may take. + * @param pfnHandler The handler function to be called to display the info. + * @param pDevIns The device instance owning the info. + */ +VMMR3_INT_DECL(int) DBGFR3InfoRegisterDevice(PVM pVM, const char *pszName, const char *pszDesc, + PFNDBGFHANDLERDEV pfnHandler, PPDMDEVINS pDevIns) +{ + LogFlow(("DBGFR3InfoRegisterDevice: pszName=%p:{%s} pszDesc=%p:{%s} pfnHandler=%p pDevIns=%p\n", + pszName, pszName, pszDesc, pszDesc, pfnHandler, pDevIns)); + + /* + * Validate the specific stuff. + */ + AssertPtrReturn(pfnHandler, VERR_INVALID_POINTER); + AssertPtrReturn(pDevIns, VERR_INVALID_POINTER); + + /* + * Register + */ + PDBGFINFO pInfo; + int rc = dbgfR3InfoRegister(pVM->pUVM, pszName, pszDesc, 0, &pInfo); + if (RT_SUCCESS(rc)) + { + pInfo->enmType = DBGFINFOTYPE_DEV; + pInfo->u.Dev.pfnHandler = pfnHandler; + pInfo->u.Dev.pDevIns = pDevIns; + RTCritSectRwLeaveExcl(&pVM->pUVM->dbgf.s.CritSect); + } + + return rc; +} + + +/** + * Register a info handler owned by a driver. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pszName The identifier of the info. + * @param pszDesc The description of the info and any arguments the handler may take. + * @param pfnHandler The handler function to be called to display the info. + * @param pDrvIns The driver instance owning the info. + */ +VMMR3_INT_DECL(int) DBGFR3InfoRegisterDriver(PVM pVM, const char *pszName, const char *pszDesc, PFNDBGFHANDLERDRV pfnHandler, PPDMDRVINS pDrvIns) +{ + LogFlow(("DBGFR3InfoRegisterDriver: pszName=%p:{%s} pszDesc=%p:{%s} pfnHandler=%p pDrvIns=%p\n", + pszName, pszName, pszDesc, pszDesc, pfnHandler, pDrvIns)); + + /* + * Validate the specific stuff. + */ + AssertPtrReturn(pfnHandler, VERR_INVALID_POINTER); + AssertPtrReturn(pDrvIns, VERR_INVALID_POINTER); + + /* + * Register + */ + PDBGFINFO pInfo; + int rc = dbgfR3InfoRegister(pVM->pUVM, pszName, pszDesc, 0, &pInfo); + if (RT_SUCCESS(rc)) + { + pInfo->enmType = DBGFINFOTYPE_DRV; + pInfo->u.Drv.pfnHandler = pfnHandler; + pInfo->u.Drv.pDrvIns = pDrvIns; + RTCritSectRwLeaveExcl(&pVM->pUVM->dbgf.s.CritSect); + } + + return rc; +} + + +/** + * Register a info handler owned by an internal component. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pszName The identifier of the info. + * @param pszDesc The description of the info and any arguments the handler may take. + * @param pfnHandler The handler function to be called to display the info. + */ +VMMR3_INT_DECL(int) DBGFR3InfoRegisterInternal(PVM pVM, const char *pszName, const char *pszDesc, PFNDBGFHANDLERINT pfnHandler) +{ + return DBGFR3InfoRegisterInternalEx(pVM, pszName, pszDesc, pfnHandler, 0); +} + + +/** + * Register a info handler owned by an internal component. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pszName The identifier of the info. + * @param pszDesc The description of the info and any arguments the handler may take. + * @param pfnHandler The handler function to be called to display the info. + * @param fFlags Flags, see the DBGFINFO_FLAGS_*. + */ +VMMR3_INT_DECL(int) DBGFR3InfoRegisterInternalEx(PVM pVM, const char *pszName, const char *pszDesc, + PFNDBGFHANDLERINT pfnHandler, uint32_t fFlags) +{ + LogFlow(("DBGFR3InfoRegisterInternal: pszName=%p:{%s} pszDesc=%p:{%s} pfnHandler=%p fFlags=%x\n", + pszName, pszName, pszDesc, pszDesc, pfnHandler, fFlags)); + + /* + * Validate the specific stuff. + */ + AssertPtrReturn(pfnHandler, VERR_INVALID_POINTER); + + /* + * Register + */ + PDBGFINFO pInfo; + int rc = dbgfR3InfoRegister(pVM->pUVM, pszName, pszDesc, fFlags, &pInfo); + if (RT_SUCCESS(rc)) + { + pInfo->enmType = DBGFINFOTYPE_INT; + pInfo->u.Int.pfnHandler = pfnHandler; + RTCritSectRwLeaveExcl(&pVM->pUVM->dbgf.s.CritSect); + } + + return rc; +} + + +/** + * Register a info handler owned by an external component. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszName The identifier of the info. + * @param pszDesc The description of the info and any arguments the handler may take. + * @param pfnHandler The handler function to be called to display the info. + * @param pvUser User argument to be passed to the handler. + */ +VMMR3DECL(int) DBGFR3InfoRegisterExternal(PUVM pUVM, const char *pszName, const char *pszDesc, + PFNDBGFHANDLEREXT pfnHandler, void *pvUser) +{ + LogFlow(("DBGFR3InfoRegisterExternal: pszName=%p:{%s} pszDesc=%p:{%s} pfnHandler=%p pvUser=%p\n", + pszName, pszName, pszDesc, pszDesc, pfnHandler, pvUser)); + + /* + * Validate the specific stuff. + */ + AssertPtrReturn(pfnHandler, VERR_INVALID_POINTER); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + + /* + * Register + */ + PDBGFINFO pInfo; + int rc = dbgfR3InfoRegister(pUVM, pszName, pszDesc, 0, &pInfo); + if (RT_SUCCESS(rc)) + { + pInfo->enmType = DBGFINFOTYPE_EXT; + pInfo->u.Ext.pfnHandler = pfnHandler; + pInfo->u.Ext.pvUser = pvUser; + RTCritSectRwLeaveExcl(&pUVM->dbgf.s.CritSect); + } + + return rc; +} + + +/** + * Deregister one(/all) info handler(s) owned by a device. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDevIns Device instance. + * @param pszName The identifier of the info. If NULL all owned by the device. + */ +VMMR3_INT_DECL(int) DBGFR3InfoDeregisterDevice(PVM pVM, PPDMDEVINS pDevIns, const char *pszName) +{ + LogFlow(("DBGFR3InfoDeregisterDevice: pDevIns=%p pszName=%p:{%s}\n", pDevIns, pszName, pszName)); + + /* + * Validate input. + */ + AssertPtrReturn(pDevIns, VERR_INVALID_POINTER); + AssertPtrNullReturn(pszName, VERR_INVALID_POINTER); + size_t cchName = pszName ? strlen(pszName) : 0; + PUVM pUVM = pVM->pUVM; + + /* + * Enumerate the info handlers and free the requested entries. + */ + int rc = RTCritSectRwEnterExcl(&pUVM->dbgf.s.CritSect); AssertRC(rc); + rc = VERR_FILE_NOT_FOUND; + PDBGFINFO pPrev = NULL; + PDBGFINFO pInfo = pUVM->dbgf.s.pInfoFirst; + if (pszName) + { + /* + * Free a specific one. + */ + for (; pInfo; pPrev = pInfo, pInfo = pInfo->pNext) + if ( pInfo->enmType == DBGFINFOTYPE_DEV + && pInfo->u.Dev.pDevIns == pDevIns + && pInfo->cchName == cchName + && !strcmp(pInfo->szName, pszName)) + { + if (pPrev) + pPrev->pNext = pInfo->pNext; + else + pUVM->dbgf.s.pInfoFirst = pInfo->pNext; + MMR3HeapFree(pInfo); + rc = VINF_SUCCESS; + break; + } + } + else + { + /* + * Free all owned by the device. + */ + while (pInfo != NULL) + if ( pInfo->enmType == DBGFINFOTYPE_DEV + && pInfo->u.Dev.pDevIns == pDevIns) + { + PDBGFINFO volatile pFree = pInfo; + pInfo = pInfo->pNext; + if (pPrev) + pPrev->pNext = pInfo; + else + pUVM->dbgf.s.pInfoFirst = pInfo; + MMR3HeapFree(pFree); + } + else + { + pPrev = pInfo; + pInfo = pInfo->pNext; + } + rc = VINF_SUCCESS; + } + int rc2 = RTCritSectRwLeaveExcl(&pUVM->dbgf.s.CritSect); + AssertRC(rc2); + AssertRC(rc); + LogFlow(("DBGFR3InfoDeregisterDevice: returns %Rrc\n", rc)); + return rc; +} + +/** + * Deregister one(/all) info handler(s) owned by a driver. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDrvIns Driver instance. + * @param pszName The identifier of the info. If NULL all owned by the driver. + */ +VMMR3_INT_DECL(int) DBGFR3InfoDeregisterDriver(PVM pVM, PPDMDRVINS pDrvIns, const char *pszName) +{ + LogFlow(("DBGFR3InfoDeregisterDriver: pDrvIns=%p pszName=%p:{%s}\n", pDrvIns, pszName, pszName)); + + /* + * Validate input. + */ + AssertPtrReturn(pDrvIns, VERR_INVALID_POINTER); + AssertPtrNullReturn(pszName, VERR_INVALID_POINTER); + size_t cchName = pszName ? strlen(pszName) : 0; + PUVM pUVM = pVM->pUVM; + + /* + * Enumerate the info handlers and free the requested entries. + */ + int rc = RTCritSectRwEnterExcl(&pUVM->dbgf.s.CritSect); AssertRC(rc); + rc = VERR_FILE_NOT_FOUND; + PDBGFINFO pPrev = NULL; + PDBGFINFO pInfo = pUVM->dbgf.s.pInfoFirst; + if (pszName) + { + /* + * Free a specific one. + */ + for (; pInfo; pPrev = pInfo, pInfo = pInfo->pNext) + if ( pInfo->enmType == DBGFINFOTYPE_DRV + && pInfo->u.Drv.pDrvIns == pDrvIns + && pInfo->cchName == cchName + && !strcmp(pInfo->szName, pszName)) + { + if (pPrev) + pPrev->pNext = pInfo->pNext; + else + pUVM->dbgf.s.pInfoFirst = pInfo->pNext; + MMR3HeapFree(pInfo); + rc = VINF_SUCCESS; + break; + } + } + else + { + /* + * Free all owned by the driver. + */ + while (pInfo != NULL) + if ( pInfo->enmType == DBGFINFOTYPE_DRV + && pInfo->u.Drv.pDrvIns == pDrvIns) + { + PDBGFINFO volatile pFree = pInfo; + pInfo = pInfo->pNext; + if (pPrev) + pPrev->pNext = pInfo; + else + pUVM->dbgf.s.pInfoFirst = pInfo; + MMR3HeapFree(pFree); + } + else + { + pPrev = pInfo; + pInfo = pInfo->pNext; + } + rc = VINF_SUCCESS; + } + int rc2 = RTCritSectRwLeaveExcl(&pUVM->dbgf.s.CritSect); + AssertRC(rc2); + AssertRC(rc); + LogFlow(("DBGFR3InfoDeregisterDriver: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Internal deregistration helper. + * + * @returns VBox status code. + * @param pUVM Pointer to the VM. + * @param pszName The identifier of the info. + * @param enmType The info owner type. + */ +static int dbgfR3InfoDeregister(PUVM pUVM, const char *pszName, DBGFINFOTYPE enmType) +{ + /* + * Validate input. + */ + AssertPtrReturn(pszName, VERR_INVALID_POINTER); + + /* + * Find the info handler. + */ + size_t cchName = strlen(pszName); + int rc = RTCritSectRwEnterExcl(&pUVM->dbgf.s.CritSect); + AssertRC(rc); + rc = VERR_FILE_NOT_FOUND; + PDBGFINFO pPrev = NULL; + PDBGFINFO pInfo = pUVM->dbgf.s.pInfoFirst; + for (; pInfo; pPrev = pInfo, pInfo = pInfo->pNext) + if ( pInfo->cchName == cchName + && !strcmp(pInfo->szName, pszName) + && pInfo->enmType == enmType) + { + if (pPrev) + pPrev->pNext = pInfo->pNext; + else + pUVM->dbgf.s.pInfoFirst = pInfo->pNext; + MMR3HeapFree(pInfo); + rc = VINF_SUCCESS; + break; + } + int rc2 = RTCritSectRwLeaveExcl(&pUVM->dbgf.s.CritSect); + AssertRC(rc2); + AssertRC(rc); + LogFlow(("dbgfR3InfoDeregister: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Deregister a info handler owned by an internal component. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pszName The identifier of the info. If NULL all owned by the device. + */ +VMMR3_INT_DECL(int) DBGFR3InfoDeregisterInternal(PVM pVM, const char *pszName) +{ + LogFlow(("DBGFR3InfoDeregisterInternal: pszName=%p:{%s}\n", pszName, pszName)); + return dbgfR3InfoDeregister(pVM->pUVM, pszName, DBGFINFOTYPE_INT); +} + + +/** + * Deregister a info handler owned by an external component. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszName The identifier of the info. If NULL all owned by the device. + */ +VMMR3DECL(int) DBGFR3InfoDeregisterExternal(PUVM pUVM, const char *pszName) +{ + LogFlow(("DBGFR3InfoDeregisterExternal: pszName=%p:{%s}\n", pszName, pszName)); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + return dbgfR3InfoDeregister(pUVM, pszName, DBGFINFOTYPE_EXT); +} + + +/** + * Worker for DBGFR3InfoEx. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param idCpu Which CPU to run EMT bound handlers on. VMCPUID_ANY or + * a valid CPU ID. + * @param pszName What to dump. + * @param pszArgs Arguments, optional. + * @param pHlp Output helper, optional. + */ +static DECLCALLBACK(int) dbgfR3Info(PUVM pUVM, VMCPUID idCpu, const char *pszName, const char *pszArgs, PCDBGFINFOHLP pHlp) +{ + /* + * Validate input. + */ + AssertPtrReturn(pszName, VERR_INVALID_POINTER); + AssertPtrNullReturn(pszArgs, VERR_INVALID_POINTER); + if (pHlp) + { + AssertPtrReturn(pHlp, VERR_INVALID_PARAMETER); + AssertPtrReturn(pHlp->pfnPrintf, VERR_INVALID_PARAMETER); + AssertPtrReturn(pHlp->pfnPrintfV, VERR_INVALID_PARAMETER); + } + else + pHlp = &g_dbgfR3InfoLogHlp; + Assert(idCpu == NIL_VMCPUID || idCpu < pUVM->cCpus); /* if not nil, we're on that EMT already. */ + + /* + * Find the info handler. + */ + size_t cchName = strlen(pszName); + int rc = RTCritSectRwEnterShared(&pUVM->dbgf.s.CritSect); + AssertRC(rc); + PDBGFINFO pInfo = pUVM->dbgf.s.pInfoFirst; + for (; pInfo; pInfo = pInfo->pNext) + if ( pInfo->cchName == cchName + && !memcmp(pInfo->szName, pszName, cchName)) + break; + if (pInfo) + { + /* + * Found it. + */ + VMCPUID idDstCpu = NIL_VMCPUID; + if ((pInfo->fFlags & (DBGFINFO_FLAGS_RUN_ON_EMT | DBGFINFO_FLAGS_ALL_EMTS)) && idCpu == NIL_VMCPUID) + idDstCpu = pInfo->fFlags & DBGFINFO_FLAGS_ALL_EMTS ? VMCPUID_ALL : VMCPUID_ANY; + + rc = VINF_SUCCESS; + switch (pInfo->enmType) + { + case DBGFINFOTYPE_DEV: + if (idDstCpu != NIL_VMCPUID) + rc = VMR3ReqPriorityCallWaitU(pUVM, idDstCpu, (PFNRT)pInfo->u.Dev.pfnHandler, 3, + pInfo->u.Dev.pDevIns, pHlp, pszArgs); + else + pInfo->u.Dev.pfnHandler(pInfo->u.Dev.pDevIns, pHlp, pszArgs); + break; + + case DBGFINFOTYPE_DRV: + if (idDstCpu != NIL_VMCPUID) + rc = VMR3ReqPriorityCallWaitU(pUVM, idDstCpu, (PFNRT)pInfo->u.Drv.pfnHandler, 3, + pInfo->u.Drv.pDrvIns, pHlp, pszArgs); + else + pInfo->u.Drv.pfnHandler(pInfo->u.Drv.pDrvIns, pHlp, pszArgs); + break; + + case DBGFINFOTYPE_INT: + if (RT_VALID_PTR(pUVM->pVM)) + { + if (idDstCpu != NIL_VMCPUID) + rc = VMR3ReqPriorityCallWaitU(pUVM, idDstCpu, (PFNRT)pInfo->u.Int.pfnHandler, 3, + pUVM->pVM, pHlp, pszArgs); + else + pInfo->u.Int.pfnHandler(pUVM->pVM, pHlp, pszArgs); + } + else + rc = VERR_INVALID_VM_HANDLE; + break; + + case DBGFINFOTYPE_EXT: + if (idDstCpu != NIL_VMCPUID) + rc = VMR3ReqPriorityCallWaitU(pUVM, idDstCpu, (PFNRT)pInfo->u.Ext.pfnHandler, 3, + pInfo->u.Ext.pvUser, pHlp, pszArgs); + else + pInfo->u.Ext.pfnHandler(pInfo->u.Ext.pvUser, pHlp, pszArgs); + break; + + default: + AssertMsgFailedReturn(("Invalid info type enmType=%d\n", pInfo->enmType), VERR_IPE_NOT_REACHED_DEFAULT_CASE); + } + + int rc2 = RTCritSectRwLeaveShared(&pUVM->dbgf.s.CritSect); + AssertRC(rc2); + } + else + { + rc = RTCritSectRwLeaveShared(&pUVM->dbgf.s.CritSect); + AssertRC(rc); + rc = VERR_FILE_NOT_FOUND; + } + return rc; +} + + +/** + * Display a piece of info writing to the supplied handler. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszName The identifier of the info to display. + * @param pszArgs Arguments to the info handler. + * @param pHlp The output helper functions. If NULL the logger will be used. + */ +VMMR3DECL(int) DBGFR3Info(PUVM pUVM, const char *pszName, const char *pszArgs, PCDBGFINFOHLP pHlp) +{ + return DBGFR3InfoEx(pUVM, NIL_VMCPUID, pszName, pszArgs, pHlp); +} + + +/** + * Display a piece of info writing to the supplied handler. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param idCpu The CPU to exectue the request on. Pass NIL_VMCPUID + * to not involve any EMT unless necessary. + * @param pszName The identifier of the info to display. + * @param pszArgs Arguments to the info handler. + * @param pHlp The output helper functions. If NULL the logger will be used. + */ +VMMR3DECL(int) DBGFR3InfoEx(PUVM pUVM, VMCPUID idCpu, const char *pszName, const char *pszArgs, PCDBGFINFOHLP pHlp) +{ + /* + * Some input validation. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertReturn( idCpu != VMCPUID_ANY_QUEUE + && idCpu != VMCPUID_ALL + && idCpu != VMCPUID_ALL_REVERSE, VERR_INVALID_PARAMETER); + + /* + * Run on any specific EMT? + */ + if (idCpu == NIL_VMCPUID) + return dbgfR3Info(pUVM, NIL_VMCPUID, pszName, pszArgs, pHlp); + return VMR3ReqPriorityCallWaitU(pUVM, idCpu, + (PFNRT)dbgfR3Info, 5, pUVM, idCpu, pszName, pszArgs, pHlp); +} + + +/** + * Wrapper for DBGFR3Info that outputs to the release log. + * + * @returns See DBGFR3Info. + * @param pUVM The user mode VM handle. + * @param pszName See DBGFR3Info. + * @param pszArgs See DBGFR3Info. + */ +VMMR3DECL(int) DBGFR3InfoLogRel(PUVM pUVM, const char *pszName, const char *pszArgs) +{ + return DBGFR3InfoEx(pUVM, NIL_VMCPUID, pszName, pszArgs, &g_dbgfR3InfoLogRelHlp); +} + + +/** + * Wrapper for DBGFR3Info that outputs to standard error. + * + * @returns See DBGFR3Info. + * @param pUVM The user mode VM handle. + * @param pszName See DBGFR3Info. + * @param pszArgs See DBGFR3Info. + */ +VMMR3DECL(int) DBGFR3InfoStdErr(PUVM pUVM, const char *pszName, const char *pszArgs) +{ + return DBGFR3InfoEx(pUVM, NIL_VMCPUID, pszName, pszArgs, &g_dbgfR3InfoStdErrHlp); +} + + +/** + * Display several info items. + * + * This is intended used by the fatal error dump only. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pszIncludePat Simple string pattern of info items to include. + * @param pszExcludePat Simple string pattern of info items to exclude. + * @param pszSepFmt Item separator format string. The item name will be + * given as parameter. + * @param pHlp The output helper functions. If NULL the logger + * will be used. + * + * @thread EMT + */ +VMMR3_INT_DECL(int) DBGFR3InfoMulti(PVM pVM, const char *pszIncludePat, const char *pszExcludePat, const char *pszSepFmt, + PCDBGFINFOHLP pHlp) +{ + /* + * Validate input. + */ + PUVM pUVM = pVM->pUVM; + VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT); + AssertPtrReturn(pszIncludePat, VERR_INVALID_POINTER); + AssertPtrReturn(pszExcludePat, VERR_INVALID_POINTER); + if (pHlp) + { + AssertPtrReturn(pHlp->pfnPrintf, VERR_INVALID_POINTER); + AssertPtrReturn(pHlp->pfnPrintfV, VERR_INVALID_POINTER); + } + else + pHlp = &g_dbgfR3InfoLogHlp; + + size_t const cchIncludePat = strlen(pszIncludePat); + size_t const cchExcludePat = strlen(pszExcludePat); + const char *pszArgs = ""; + + /* + * Enumerate the info handlers and call the ones matching. + * Note! We won't leave the critical section here... + */ + int rc = RTCritSectRwEnterShared(&pUVM->dbgf.s.CritSect); + AssertRC(rc); + rc = VWRN_NOT_FOUND; + for (PDBGFINFO pInfo = pUVM->dbgf.s.pInfoFirst; pInfo; pInfo = pInfo->pNext) + { + if ( RTStrSimplePatternMultiMatch(pszIncludePat, cchIncludePat, pInfo->szName, pInfo->cchName, NULL) + && !RTStrSimplePatternMultiMatch(pszExcludePat, cchExcludePat, pInfo->szName, pInfo->cchName, NULL)) + { + pHlp->pfnPrintf(pHlp, pszSepFmt, pInfo->szName); + + VMCPUID idDstCpu = NIL_VMCPUID; + if (pInfo->fFlags & (DBGFINFO_FLAGS_RUN_ON_EMT | DBGFINFO_FLAGS_ALL_EMTS)) + idDstCpu = pInfo->fFlags & DBGFINFO_FLAGS_ALL_EMTS ? VMCPUID_ALL : VMCPUID_ANY; + + rc = VINF_SUCCESS; + switch (pInfo->enmType) + { + case DBGFINFOTYPE_DEV: + if (idDstCpu != NIL_VMCPUID) + rc = VMR3ReqPriorityCallVoidWaitU(pUVM, idDstCpu, (PFNRT)pInfo->u.Dev.pfnHandler, 3, + pInfo->u.Dev.pDevIns, pHlp, pszArgs); + else + pInfo->u.Dev.pfnHandler(pInfo->u.Dev.pDevIns, pHlp, pszArgs); + break; + + case DBGFINFOTYPE_DRV: + if (idDstCpu != NIL_VMCPUID) + rc = VMR3ReqPriorityCallVoidWaitU(pUVM, idDstCpu, (PFNRT)pInfo->u.Drv.pfnHandler, 3, + pInfo->u.Drv.pDrvIns, pHlp, pszArgs); + else + pInfo->u.Drv.pfnHandler(pInfo->u.Drv.pDrvIns, pHlp, pszArgs); + break; + + case DBGFINFOTYPE_INT: + if (idDstCpu != NIL_VMCPUID) + rc = VMR3ReqPriorityCallVoidWaitU(pUVM, idDstCpu, (PFNRT)pInfo->u.Int.pfnHandler, 3, + pVM, pHlp, pszArgs); + else + pInfo->u.Int.pfnHandler(pVM, pHlp, pszArgs); + break; + + case DBGFINFOTYPE_EXT: + if (idDstCpu != NIL_VMCPUID) + rc = VMR3ReqPriorityCallVoidWaitU(pUVM, idDstCpu, (PFNRT)pInfo->u.Ext.pfnHandler, 3, + pInfo->u.Ext.pvUser, pHlp, pszArgs); + else + pInfo->u.Ext.pfnHandler(pInfo->u.Ext.pvUser, pHlp, pszArgs); + break; + + default: + AssertMsgFailedReturn(("Invalid info type enmType=%d\n", pInfo->enmType), VERR_IPE_NOT_REACHED_DEFAULT_CASE); + } + } + } + int rc2 = RTCritSectRwLeaveShared(&pUVM->dbgf.s.CritSect); + AssertRC(rc2); + + return rc; +} + + +/** + * Enumerate all the register info handlers. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pfnCallback Pointer to callback function. + * @param pvUser User argument to pass to the callback. + */ +VMMR3DECL(int) DBGFR3InfoEnum(PUVM pUVM, PFNDBGFINFOENUM pfnCallback, void *pvUser) +{ + LogFlow(("DBGFR3InfoLog: pfnCallback=%p pvUser=%p\n", pfnCallback, pvUser)); + + /* + * Validate input. + */ + if (!pfnCallback) + { + AssertMsgFailed(("!pfnCallback\n")); + return VERR_INVALID_PARAMETER; + } + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + + /* + * Enter and enumerate. + */ + int rc = RTCritSectRwEnterShared(&pUVM->dbgf.s.CritSect); + AssertRC(rc); + + rc = VINF_SUCCESS; + for (PDBGFINFO pInfo = pUVM->dbgf.s.pInfoFirst; RT_SUCCESS(rc) && pInfo; pInfo = pInfo->pNext) + rc = pfnCallback(pUVM, pInfo->szName, pInfo->pszDesc, pvUser); + + /* + * Leave and exit. + */ + int rc2 = RTCritSectRwLeaveShared(&pUVM->dbgf.s.CritSect); + AssertRC(rc2); + + LogFlow(("DBGFR3InfoLog: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Info handler, internal version. + * + * @param pVM The cross context VM structure. + * @param pHlp Callback functions for doing output. + * @param pszArgs Argument string. Optional and specific to the handler. + */ +static DECLCALLBACK(void) dbgfR3InfoHelp(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + LogFlow(("dbgfR3InfoHelp: pszArgs=%s\n", pszArgs)); + + /* + * Enter and enumerate. + */ + PUVM pUVM = pVM->pUVM; + int rc = RTCritSectRwEnterShared(&pUVM->dbgf.s.CritSect); + AssertRC(rc); + + if (pszArgs && *pszArgs) + { + for (PDBGFINFO pInfo = pUVM->dbgf.s.pInfoFirst; pInfo; pInfo = pInfo->pNext) + { + const char *psz = strstr(pszArgs, pInfo->szName); + if ( psz + && ( psz == pszArgs + || RT_C_IS_SPACE(psz[-1])) + && ( !psz[pInfo->cchName] + || RT_C_IS_SPACE(psz[pInfo->cchName]))) + pHlp->pfnPrintf(pHlp, "%-16s %s\n", + pInfo->szName, pInfo->pszDesc); + } + } + else + { + for (PDBGFINFO pInfo = pUVM->dbgf.s.pInfoFirst; pInfo; pInfo = pInfo->pNext) + pHlp->pfnPrintf(pHlp, "%-16s %s\n", + pInfo->szName, pInfo->pszDesc); + } + + /* + * Leave and exit. + */ + rc = RTCritSectRwLeaveShared(&pUVM->dbgf.s.CritSect); + AssertRC(rc); +} + diff --git a/src/VBox/VMM/VMMR3/DBGFLog.cpp b/src/VBox/VMM/VMMR3/DBGFLog.cpp new file mode 100644 index 00000000..6b84acf1 --- /dev/null +++ b/src/VBox/VMM/VMMR3/DBGFLog.cpp @@ -0,0 +1,186 @@ +/* $Id: DBGFLog.cpp $ */ +/** @file + * DBGF - Debugger Facility, Log Manager. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/** + * Checkes for logger prefixes and selects the right logger. + * + * @returns Target logger. + * @param ppsz Pointer to the string pointer. + */ +static PRTLOGGER dbgfR3LogResolvedLogger(const char **ppsz) +{ + PRTLOGGER pLogger; + const char *psz = *ppsz; + if (!strncmp(psz, RT_STR_TUPLE("release:"))) + { + *ppsz += sizeof("release:") - 1; + pLogger = RTLogRelGetDefaultInstance(); + } + else + { + if (!strncmp(psz, RT_STR_TUPLE("debug:"))) + *ppsz += sizeof("debug:") - 1; + pLogger = RTLogDefaultInstance(); + } + return pLogger; +} + + +/** + * EMT worker for DBGFR3LogModifyGroups. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszGroupSettings The group settings string. (VBOX_LOG) + */ +static DECLCALLBACK(int) dbgfR3LogModifyGroups(PUVM pUVM, const char *pszGroupSettings) +{ + PRTLOGGER pLogger = dbgfR3LogResolvedLogger(&pszGroupSettings); + if (!pLogger) + return VINF_SUCCESS; + + int rc = RTLogGroupSettings(pLogger, pszGroupSettings); + if (RT_SUCCESS(rc) && pUVM->pVM) + { + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, VERR_INVALID_VM_HANDLE); + rc = VMMR3UpdateLoggers(pUVM->pVM); + } + return rc; +} + + +/** + * Changes the logger group settings. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszGroupSettings The group settings string. (VBOX_LOG) + * By prefixing the string with \"release:\" the + * changes will be applied to the release log + * instead of the debug log. The prefix \"debug:\" + * is also recognized. + */ +VMMR3DECL(int) DBGFR3LogModifyGroups(PUVM pUVM, const char *pszGroupSettings) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertPtrReturn(pszGroupSettings, VERR_INVALID_POINTER); + + return VMR3ReqPriorityCallWaitU(pUVM, VMCPUID_ANY, (PFNRT)dbgfR3LogModifyGroups, 2, pUVM, pszGroupSettings); +} + + +/** + * EMT worker for DBGFR3LogModifyFlags. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszFlagSettings The group settings string. (VBOX_LOG_FLAGS) + */ +static DECLCALLBACK(int) dbgfR3LogModifyFlags(PUVM pUVM, const char *pszFlagSettings) +{ + PRTLOGGER pLogger = dbgfR3LogResolvedLogger(&pszFlagSettings); + if (!pLogger) + return VINF_SUCCESS; + + int rc = RTLogFlags(pLogger, pszFlagSettings); + if (RT_SUCCESS(rc) && pUVM->pVM) + { + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, VERR_INVALID_VM_HANDLE); + rc = VMMR3UpdateLoggers(pUVM->pVM); + } + return rc; +} + + +/** + * Changes the logger flag settings. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszFlagSettings The group settings string. (VBOX_LOG_FLAGS) + * By prefixing the string with \"release:\" the + * changes will be applied to the release log + * instead of the debug log. The prefix \"debug:\" + * is also recognized. + */ +VMMR3DECL(int) DBGFR3LogModifyFlags(PUVM pUVM, const char *pszFlagSettings) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertPtrReturn(pszFlagSettings, VERR_INVALID_POINTER); + + return VMR3ReqPriorityCallWaitU(pUVM, VMCPUID_ANY, (PFNRT)dbgfR3LogModifyFlags, 2, pUVM, pszFlagSettings); +} + + +/** + * EMT worker for DBGFR3LogModifyFlags. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszDestSettings The destination settings string. (VBOX_LOG_DEST) + */ +static DECLCALLBACK(int) dbgfR3LogModifyDestinations(PUVM pUVM, const char *pszDestSettings) +{ + PRTLOGGER pLogger = dbgfR3LogResolvedLogger(&pszDestSettings); + if (!pLogger) + return VINF_SUCCESS; + + int rc = RTLogDestinations(NULL, pszDestSettings); + if (RT_SUCCESS(rc) && pUVM->pVM) + { + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, VERR_INVALID_VM_HANDLE); + rc = VMMR3UpdateLoggers(pUVM->pVM); + } + return rc; +} + + +/** + * Changes the logger destination settings. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszDestSettings The destination settings string. (VBOX_LOG_DEST) + * By prefixing the string with \"release:\" the + * changes will be applied to the release log + * instead of the debug log. The prefix \"debug:\" + * is also recognized. + */ +VMMR3DECL(int) DBGFR3LogModifyDestinations(PUVM pUVM, const char *pszDestSettings) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertPtrReturn(pszDestSettings, VERR_INVALID_POINTER); + + return VMR3ReqPriorityCallWaitU(pUVM, VMCPUID_ANY, (PFNRT)dbgfR3LogModifyDestinations, 2, pUVM, pszDestSettings); +} + diff --git a/src/VBox/VMM/VMMR3/DBGFMem.cpp b/src/VBox/VMM/VMMR3/DBGFMem.cpp new file mode 100644 index 00000000..b92db18b --- /dev/null +++ b/src/VBox/VMM/VMMR3/DBGFMem.cpp @@ -0,0 +1,682 @@ +/* $Id: DBGFMem.cpp $ */ +/** @file + * DBGF - Debugger Facility, Memory Methods. + */ + +/* + * Copyright (C) 2007-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DBGF +#include +#include +#include +#include +#include "DBGFInternal.h" +#include +#include +#include +#include +#include + + + +/** + * Scan guest memory for an exact byte string. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param idCpu The ID of the CPU context to search in. + * @param pAddress Where to store the mixed address. + * @param puAlign The alignment restriction imposed on the search result. + * @param pcbRange The number of bytes to scan. Passed as a pointer because + * it may be 64-bit. + * @param pabNeedle What to search for - exact search. + * @param cbNeedle Size of the search byte string. + * @param pHitAddress Where to put the address of the first hit. + */ +static DECLCALLBACK(int) dbgfR3MemScan(PUVM pUVM, VMCPUID idCpu, PCDBGFADDRESS pAddress, PCRTGCUINTPTR pcbRange, + RTGCUINTPTR *puAlign, const uint8_t *pabNeedle, size_t cbNeedle, PDBGFADDRESS pHitAddress) +{ + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + Assert(idCpu == VMMGetCpuId(pVM)); + + /* + * Validate the input we use, PGM does the rest. + */ + RTGCUINTPTR cbRange = *pcbRange; + if (!DBGFR3AddrIsValid(pUVM, pAddress)) + return VERR_INVALID_POINTER; + if (!VALID_PTR(pHitAddress)) + return VERR_INVALID_POINTER; + if (DBGFADDRESS_IS_HMA(pAddress)) + return VERR_INVALID_POINTER; + + /* + * Select DBGF worker by addressing mode. + */ + int rc; + PVMCPU pVCpu = VMMGetCpuById(pVM, idCpu); + PGMMODE enmMode = PGMGetGuestMode(pVCpu); + if ( enmMode == PGMMODE_REAL + || enmMode == PGMMODE_PROTECTED + || DBGFADDRESS_IS_PHYS(pAddress) + ) + { + RTGCPHYS GCPhysAlign = *puAlign; + if (GCPhysAlign != *puAlign) + return VERR_OUT_OF_RANGE; + RTGCPHYS PhysHit; + rc = PGMR3DbgScanPhysical(pVM, pAddress->FlatPtr, cbRange, GCPhysAlign, pabNeedle, cbNeedle, &PhysHit); + if (RT_SUCCESS(rc)) + DBGFR3AddrFromPhys(pUVM, pHitAddress, PhysHit); + } + else + { +#if GC_ARCH_BITS > 32 + if ( ( pAddress->FlatPtr >= _4G + || pAddress->FlatPtr + cbRange > _4G) + && enmMode != PGMMODE_AMD64 + && enmMode != PGMMODE_AMD64_NX) + return VERR_DBGF_MEM_NOT_FOUND; +#endif + RTGCUINTPTR GCPtrHit; + rc = PGMR3DbgScanVirtual(pVM, pVCpu, pAddress->FlatPtr, cbRange, *puAlign, pabNeedle, cbNeedle, &GCPtrHit); + if (RT_SUCCESS(rc)) + DBGFR3AddrFromFlat(pUVM, pHitAddress, GCPtrHit); + } + + return rc; +} + + +/** + * Scan guest memory for an exact byte string. + * + * @returns VBox status codes: + * @retval VINF_SUCCESS and *pGCPtrHit on success. + * @retval VERR_DBGF_MEM_NOT_FOUND if not found. + * @retval VERR_INVALID_POINTER if any of the pointer arguments are invalid. + * @retval VERR_INVALID_ARGUMENT if any other arguments are invalid. + * + * @param pUVM The user mode VM handle. + * @param idCpu The ID of the CPU context to search in. + * @param pAddress Where to store the mixed address. + * @param cbRange The number of bytes to scan. + * @param uAlign The alignment restriction imposed on the result. + * Usually set to 1. + * @param pvNeedle What to search for - exact search. + * @param cbNeedle Size of the search byte string. + * @param pHitAddress Where to put the address of the first hit. + * + * @thread Any thread. + */ +VMMR3DECL(int) DBGFR3MemScan(PUVM pUVM, VMCPUID idCpu, PCDBGFADDRESS pAddress, RTGCUINTPTR cbRange, RTGCUINTPTR uAlign, + const void *pvNeedle, size_t cbNeedle, PDBGFADDRESS pHitAddress) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertReturn(idCpu < pUVM->cCpus, VERR_INVALID_CPU_ID); + return VMR3ReqPriorityCallWaitU(pUVM, idCpu, (PFNRT)dbgfR3MemScan, 8, + pUVM, idCpu, pAddress, &cbRange, &uAlign, pvNeedle, cbNeedle, pHitAddress); + +} + + +/** + * Read guest memory. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param idCpu The ID of the CPU context to read memory from. + * @param pAddress Where to start reading. + * @param pvBuf Where to store the data we've read. + * @param cbRead The number of bytes to read. + */ +static DECLCALLBACK(int) dbgfR3MemRead(PUVM pUVM, VMCPUID idCpu, PCDBGFADDRESS pAddress, void *pvBuf, size_t cbRead) +{ + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + Assert(idCpu == VMMGetCpuId(pVM)); + + /* + * Validate the input we use, PGM does the rest. + */ + if (!DBGFR3AddrIsValid(pUVM, pAddress)) + return VERR_INVALID_POINTER; + if (!VALID_PTR(pvBuf)) + return VERR_INVALID_POINTER; + + /* + * HMA is special. + */ + int rc; + if (DBGFADDRESS_IS_HMA(pAddress)) + { + if (DBGFADDRESS_IS_PHYS(pAddress)) + rc = VERR_INVALID_POINTER; + else + rc = MMR3HyperReadGCVirt(pVM, pvBuf, pAddress->FlatPtr, cbRead); + } + else + { + /* + * Select PGM worker by addressing mode. + */ + PVMCPU pVCpu = VMMGetCpuById(pVM, idCpu); + PGMMODE enmMode = PGMGetGuestMode(pVCpu); + if ( enmMode == PGMMODE_REAL + || enmMode == PGMMODE_PROTECTED + || DBGFADDRESS_IS_PHYS(pAddress) ) + rc = PGMPhysSimpleReadGCPhys(pVM, pvBuf, pAddress->FlatPtr, cbRead); + else + { +#if GC_ARCH_BITS > 32 + if ( ( pAddress->FlatPtr >= _4G + || pAddress->FlatPtr + cbRead > _4G) + && enmMode != PGMMODE_AMD64 + && enmMode != PGMMODE_AMD64_NX) + return VERR_PAGE_TABLE_NOT_PRESENT; +#endif + rc = PGMPhysSimpleReadGCPtr(pVCpu, pvBuf, pAddress->FlatPtr, cbRead); + } + } + return rc; +} + + +/** + * Read guest memory. + * + * @returns VBox status code. + * + * @param pUVM The user mode VM handle. + * @param idCpu The ID of the source CPU context (for the address). + * @param pAddress Where to start reading. + * @param pvBuf Where to store the data we've read. + * @param cbRead The number of bytes to read. + */ +VMMR3DECL(int) DBGFR3MemRead(PUVM pUVM, VMCPUID idCpu, PCDBGFADDRESS pAddress, void *pvBuf, size_t cbRead) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertReturn(idCpu < pUVM->cCpus, VERR_INVALID_CPU_ID); + + if ((pAddress->fFlags & DBGFADDRESS_FLAGS_TYPE_MASK) == DBGFADDRESS_FLAGS_RING0) + { + AssertCompile(sizeof(RTHCUINTPTR) <= sizeof(pAddress->FlatPtr)); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, VERR_INVALID_VM_HANDLE); + return VMMR3ReadR0Stack(pUVM->pVM, idCpu, (RTHCUINTPTR)pAddress->FlatPtr, pvBuf, cbRead); + } + return VMR3ReqPriorityCallWaitU(pUVM, idCpu, (PFNRT)dbgfR3MemRead, 5, pUVM, idCpu, pAddress, pvBuf, cbRead); +} + + +/** + * Read a zero terminated string from guest memory. + * + * @returns VBox status code. + * + * @param pUVM The user mode VM handle. + * @param idCpu The ID of the source CPU context (for the address). + * @param pAddress Where to start reading. + * @param pszBuf Where to store the string. + * @param cchBuf The size of the buffer. + */ +static DECLCALLBACK(int) dbgfR3MemReadString(PUVM pUVM, VMCPUID idCpu, PCDBGFADDRESS pAddress, char *pszBuf, size_t cchBuf) +{ + /* + * Validate the input we use, PGM does the rest. + */ + if (!DBGFR3AddrIsValid(pUVM, pAddress)) + return VERR_INVALID_POINTER; + if (!VALID_PTR(pszBuf)) + return VERR_INVALID_POINTER; + + /* + * Let dbgfR3MemRead do the job. + */ + int rc = dbgfR3MemRead(pUVM, idCpu, pAddress, pszBuf, cchBuf); + + /* + * Make sure the result is terminated and that overflow is signaled. + * This may look a bit reckless with the rc but, it should be fine. + */ + if (!RTStrEnd(pszBuf, cchBuf)) + { + pszBuf[cchBuf - 1] = '\0'; + rc = VINF_BUFFER_OVERFLOW; + } + /* + * Handle partial reads (not perfect). + */ + else if (RT_FAILURE(rc)) + { + if (pszBuf[0]) + rc = VINF_SUCCESS; + } + + return rc; +} + + +/** + * Read a zero terminated string from guest memory. + * + * @returns VBox status code. + * + * @param pUVM The user mode VM handle. + * @param idCpu The ID of the source CPU context (for the address). + * @param pAddress Where to start reading. + * @param pszBuf Where to store the string. + * @param cchBuf The size of the buffer. + */ +VMMR3DECL(int) DBGFR3MemReadString(PUVM pUVM, VMCPUID idCpu, PCDBGFADDRESS pAddress, char *pszBuf, size_t cchBuf) +{ + /* + * Validate and zero output. + */ + if (!VALID_PTR(pszBuf)) + return VERR_INVALID_POINTER; + if (cchBuf <= 0) + return VERR_INVALID_PARAMETER; + memset(pszBuf, 0, cchBuf); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertReturn(idCpu < pUVM->cCpus, VERR_INVALID_CPU_ID); + + /* + * Pass it on to the EMT. + */ + return VMR3ReqPriorityCallWaitU(pUVM, idCpu, (PFNRT)dbgfR3MemReadString, 5, pUVM, idCpu, pAddress, pszBuf, cchBuf); +} + + +/** + * Writes guest memory. + * + * @returns VBox status code. + * + * @param pUVM The user mode VM handle. + * @param idCpu The ID of the target CPU context (for the address). + * @param pAddress Where to start writing. + * @param pvBuf The data to write. + * @param cbWrite The number of bytes to write. + */ +static DECLCALLBACK(int) dbgfR3MemWrite(PUVM pUVM, VMCPUID idCpu, PCDBGFADDRESS pAddress, void const *pvBuf, size_t cbWrite) +{ + /* + * Validate the input we use, PGM does the rest. + */ + if (!DBGFR3AddrIsValid(pUVM, pAddress)) + return VERR_INVALID_POINTER; + if (!VALID_PTR(pvBuf)) + return VERR_INVALID_POINTER; + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + /* + * HMA is always special. + */ + int rc; + if (DBGFADDRESS_IS_HMA(pAddress)) + { + /** @todo write to HMA. */ + rc = VERR_ACCESS_DENIED; + } + else + { + /* + * Select PGM function by addressing mode. + */ + PVMCPU pVCpu = VMMGetCpuById(pVM, idCpu); + PGMMODE enmMode = PGMGetGuestMode(pVCpu); + if ( enmMode == PGMMODE_REAL + || enmMode == PGMMODE_PROTECTED + || DBGFADDRESS_IS_PHYS(pAddress) ) + rc = PGMPhysSimpleWriteGCPhys(pVM, pAddress->FlatPtr, pvBuf, cbWrite); + else + { +#if GC_ARCH_BITS > 32 + if ( ( pAddress->FlatPtr >= _4G + || pAddress->FlatPtr + cbWrite > _4G) + && enmMode != PGMMODE_AMD64 + && enmMode != PGMMODE_AMD64_NX) + return VERR_PAGE_TABLE_NOT_PRESENT; +#endif + rc = PGMPhysSimpleWriteGCPtr(pVCpu, pAddress->FlatPtr, pvBuf, cbWrite); + } + } + return rc; +} + + +/** + * Read guest memory. + * + * @returns VBox status code. + * + * @param pUVM The user mode VM handle. + * @param idCpu The ID of the target CPU context (for the address). + * @param pAddress Where to start writing. + * @param pvBuf The data to write. + * @param cbWrite The number of bytes to write. + */ +VMMR3DECL(int) DBGFR3MemWrite(PUVM pUVM, VMCPUID idCpu, PCDBGFADDRESS pAddress, void const *pvBuf, size_t cbWrite) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertReturn(idCpu < pUVM->cCpus, VERR_INVALID_CPU_ID); + return VMR3ReqPriorityCallWaitU(pUVM, idCpu, (PFNRT)dbgfR3MemWrite, 5, pUVM, idCpu, pAddress, pvBuf, cbWrite); +} + + +/** + * Worker for DBGFR3SelQueryInfo that calls into SELM. + */ +static DECLCALLBACK(int) dbgfR3SelQueryInfo(PUVM pUVM, VMCPUID idCpu, RTSEL Sel, uint32_t fFlags, PDBGFSELINFO pSelInfo) +{ + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + /* + * Make the query. + */ + int rc; + if (!(fFlags & DBGFSELQI_FLAGS_DT_SHADOW)) + { + PVMCPU pVCpu = VMMGetCpuById(pVM, idCpu); + VMCPU_ASSERT_EMT(pVCpu); + rc = SELMR3GetSelectorInfo(pVM, pVCpu, Sel, pSelInfo); + + /* + * 64-bit mode HACKS for making data and stack selectors wide open when + * queried. This is voodoo magic. + */ + if (fFlags & DBGFSELQI_FLAGS_DT_ADJ_64BIT_MODE) + { + /* Expand 64-bit data and stack selectors. The check is a bit bogus... */ + if ( RT_SUCCESS(rc) + && (pSelInfo->fFlags & ( DBGFSELINFO_FLAGS_LONG_MODE | DBGFSELINFO_FLAGS_REAL_MODE | DBGFSELINFO_FLAGS_PROT_MODE + | DBGFSELINFO_FLAGS_GATE | DBGFSELINFO_FLAGS_HYPER + | DBGFSELINFO_FLAGS_INVALID | DBGFSELINFO_FLAGS_NOT_PRESENT)) + == DBGFSELINFO_FLAGS_LONG_MODE + && pSelInfo->cbLimit != ~(RTGCPTR)0 + && CPUMIsGuestIn64BitCode(pVCpu) ) + { + pSelInfo->GCPtrBase = 0; + pSelInfo->cbLimit = ~(RTGCPTR)0; + } + else if ( Sel == 0 + && CPUMIsGuestIn64BitCode(pVCpu)) + { + pSelInfo->GCPtrBase = 0; + pSelInfo->cbLimit = ~(RTGCPTR)0; + pSelInfo->Sel = 0; + pSelInfo->SelGate = 0; + pSelInfo->fFlags = DBGFSELINFO_FLAGS_LONG_MODE; + pSelInfo->u.Raw64.Gen.u1Present = 1; + pSelInfo->u.Raw64.Gen.u1Long = 1; + pSelInfo->u.Raw64.Gen.u1DescType = 1; + rc = VINF_SUCCESS; + } + } + } + else + { + if (!VM_IS_RAW_MODE_ENABLED(pVM)) + rc = VERR_INVALID_STATE; + else + rc = SELMR3GetShadowSelectorInfo(pVM, Sel, pSelInfo); + } + return rc; +} + + +/** + * Gets information about a selector. + * + * Intended for the debugger mostly and will prefer the guest + * descriptor tables over the shadow ones. + * + * @returns VBox status code, the following are the common ones. + * @retval VINF_SUCCESS on success. + * @retval VERR_INVALID_SELECTOR if the selector isn't fully inside the + * descriptor table. + * @retval VERR_SELECTOR_NOT_PRESENT if the LDT is invalid or not present. This + * is not returned if the selector itself isn't present, you have to + * check that for yourself (see DBGFSELINFO::fFlags). + * @retval VERR_PAGE_TABLE_NOT_PRESENT or VERR_PAGE_NOT_PRESENT if the + * pagetable or page backing the selector table wasn't present. + * + * @param pUVM The user mode VM handle. + * @param idCpu The ID of the virtual CPU context. + * @param Sel The selector to get info about. + * @param fFlags Flags, see DBGFQSEL_FLAGS_*. + * @param pSelInfo Where to store the information. This will always be + * updated. + * + * @remarks This is a wrapper around SELMR3GetSelectorInfo and + * SELMR3GetShadowSelectorInfo. + */ +VMMR3DECL(int) DBGFR3SelQueryInfo(PUVM pUVM, VMCPUID idCpu, RTSEL Sel, uint32_t fFlags, PDBGFSELINFO pSelInfo) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertReturn(idCpu < pUVM->cCpus, VERR_INVALID_CPU_ID); + AssertReturn(!(fFlags & ~(DBGFSELQI_FLAGS_DT_GUEST | DBGFSELQI_FLAGS_DT_SHADOW | DBGFSELQI_FLAGS_DT_ADJ_64BIT_MODE)), VERR_INVALID_PARAMETER); + AssertReturn( (fFlags & (DBGFSELQI_FLAGS_DT_SHADOW | DBGFSELQI_FLAGS_DT_ADJ_64BIT_MODE)) + != (DBGFSELQI_FLAGS_DT_SHADOW | DBGFSELQI_FLAGS_DT_ADJ_64BIT_MODE), VERR_INVALID_PARAMETER); + + /* Clear the return data here on this thread. */ + memset(pSelInfo, 0, sizeof(*pSelInfo)); + + /* + * Dispatch the request to a worker running on the target CPU. + */ + return VMR3ReqPriorityCallWaitU(pUVM, idCpu, (PFNRT)dbgfR3SelQueryInfo, 5, pUVM, idCpu, Sel, fFlags, pSelInfo); +} + + +/** + * Validates a CS selector. + * + * @returns VBox status code. + * @param pSelInfo Pointer to the selector information for the CS selector. + * @param SelCPL The selector defining the CPL (SS). + */ +VMMDECL(int) DBGFR3SelInfoValidateCS(PCDBGFSELINFO pSelInfo, RTSEL SelCPL) +{ + /* + * Check if present. + */ + if (pSelInfo->u.Raw.Gen.u1Present) + { + /* + * Type check. + */ + if ( pSelInfo->u.Raw.Gen.u1DescType == 1 + && (pSelInfo->u.Raw.Gen.u4Type & X86_SEL_TYPE_CODE)) + { + /* + * Check level. + */ + unsigned uLevel = RT_MAX(SelCPL & X86_SEL_RPL, pSelInfo->Sel & X86_SEL_RPL); + if ( !(pSelInfo->u.Raw.Gen.u4Type & X86_SEL_TYPE_CONF) + ? uLevel <= pSelInfo->u.Raw.Gen.u2Dpl + : uLevel >= pSelInfo->u.Raw.Gen.u2Dpl /* hope I got this right now... */ + ) + return VINF_SUCCESS; + return VERR_INVALID_RPL; + } + return VERR_NOT_CODE_SELECTOR; + } + return VERR_SELECTOR_NOT_PRESENT; +} + + +/** + * Converts a PGM paging mode to a set of DBGFPGDMP_XXX flags. + * + * @returns Flags. UINT32_MAX if the mode is invalid (asserted). + * @param enmMode The mode. + */ +static uint32_t dbgfR3PagingDumpModeToFlags(PGMMODE enmMode) +{ + switch (enmMode) + { + case PGMMODE_32_BIT: + return DBGFPGDMP_FLAGS_PSE; + case PGMMODE_PAE: + return DBGFPGDMP_FLAGS_PSE | DBGFPGDMP_FLAGS_PAE; + case PGMMODE_PAE_NX: + return DBGFPGDMP_FLAGS_PSE | DBGFPGDMP_FLAGS_PAE | DBGFPGDMP_FLAGS_NXE; + case PGMMODE_AMD64: + return DBGFPGDMP_FLAGS_PSE | DBGFPGDMP_FLAGS_PAE | DBGFPGDMP_FLAGS_LME; + case PGMMODE_AMD64_NX: + return DBGFPGDMP_FLAGS_PSE | DBGFPGDMP_FLAGS_PAE | DBGFPGDMP_FLAGS_LME | DBGFPGDMP_FLAGS_NXE; + case PGMMODE_NESTED_32BIT: + return DBGFPGDMP_FLAGS_NP | DBGFPGDMP_FLAGS_PSE; + case PGMMODE_NESTED_PAE: + return DBGFPGDMP_FLAGS_NP | DBGFPGDMP_FLAGS_PSE | DBGFPGDMP_FLAGS_PAE | DBGFPGDMP_FLAGS_NXE; + case PGMMODE_NESTED_AMD64: + return DBGFPGDMP_FLAGS_NP | DBGFPGDMP_FLAGS_PSE | DBGFPGDMP_FLAGS_PAE | DBGFPGDMP_FLAGS_LME | DBGFPGDMP_FLAGS_NXE; + case PGMMODE_EPT: + return DBGFPGDMP_FLAGS_EPT; + case PGMMODE_NONE: + return 0; + default: + AssertFailedReturn(UINT32_MAX); + } +} + + +/** + * EMT worker for DBGFR3PagingDumpEx. + * + * @returns VBox status code. + * @param pUVM The shared VM handle. + * @param idCpu The current CPU ID. + * @param fFlags The flags, DBGFPGDMP_FLAGS_XXX. Valid. + * @param pcr3 The CR3 to use (unless we're getting the current + * state, see @a fFlags). + * @param pu64FirstAddr The first address. + * @param pu64LastAddr The last address. + * @param cMaxDepth The depth. + * @param pHlp The output callbacks. + */ +static DECLCALLBACK(int) dbgfR3PagingDumpEx(PUVM pUVM, VMCPUID idCpu, uint32_t fFlags, uint64_t *pcr3, + uint64_t *pu64FirstAddr, uint64_t *pu64LastAddr, + uint32_t cMaxDepth, PCDBGFINFOHLP pHlp) +{ + /* + * Implement dumping both context by means of recursion. + */ + if ((fFlags & (DBGFPGDMP_FLAGS_GUEST | DBGFPGDMP_FLAGS_SHADOW)) == (DBGFPGDMP_FLAGS_GUEST | DBGFPGDMP_FLAGS_SHADOW)) + { + int rc1 = dbgfR3PagingDumpEx(pUVM, idCpu, fFlags & ~DBGFPGDMP_FLAGS_GUEST, + pcr3, pu64FirstAddr, pu64LastAddr, cMaxDepth, pHlp); + int rc2 = dbgfR3PagingDumpEx(pUVM, idCpu, fFlags & ~DBGFPGDMP_FLAGS_SHADOW, + pcr3, pu64FirstAddr, pu64LastAddr, cMaxDepth, pHlp); + return RT_FAILURE(rc1) ? rc1 : rc2; + } + + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + /* + * Get the current CR3/mode if required. + */ + uint64_t cr3 = *pcr3; + if (fFlags & (DBGFPGDMP_FLAGS_CURRENT_CR3 | DBGFPGDMP_FLAGS_CURRENT_MODE)) + { + PVMCPU pVCpu = &pVM->aCpus[idCpu]; + if (fFlags & DBGFPGDMP_FLAGS_SHADOW) + { + if (PGMGetShadowMode(pVCpu) == PGMMODE_NONE) + { + pHlp->pfnPrintf(pHlp, "Shadow paging mode is 'none' (NEM)\n"); + return VINF_SUCCESS; + } + + if (fFlags & DBGFPGDMP_FLAGS_CURRENT_CR3) + cr3 = PGMGetHyperCR3(pVCpu); + if (fFlags & DBGFPGDMP_FLAGS_CURRENT_MODE) + fFlags |= dbgfR3PagingDumpModeToFlags(PGMGetShadowMode(pVCpu)); + } + else + { + if (fFlags & DBGFPGDMP_FLAGS_CURRENT_CR3) + cr3 = CPUMGetGuestCR3(pVCpu); + if (fFlags & DBGFPGDMP_FLAGS_CURRENT_MODE) + { + AssertCompile(DBGFPGDMP_FLAGS_PSE == X86_CR4_PSE); AssertCompile(DBGFPGDMP_FLAGS_PAE == X86_CR4_PAE); + fFlags |= CPUMGetGuestCR4(pVCpu) & (X86_CR4_PSE | X86_CR4_PAE); + AssertCompile(DBGFPGDMP_FLAGS_LME == MSR_K6_EFER_LME); AssertCompile(DBGFPGDMP_FLAGS_NXE == MSR_K6_EFER_NXE); + fFlags |= CPUMGetGuestEFER(pVCpu) & (MSR_K6_EFER_LME | MSR_K6_EFER_NXE); + } + } + } + fFlags &= ~(DBGFPGDMP_FLAGS_CURRENT_MODE | DBGFPGDMP_FLAGS_CURRENT_CR3); + + /* + * Call PGM to do the real work. + */ + int rc; + if (fFlags & DBGFPGDMP_FLAGS_SHADOW) + rc = PGMR3DumpHierarchyShw(pVM, cr3, fFlags, *pu64FirstAddr, *pu64LastAddr, cMaxDepth, pHlp); + else + rc = PGMR3DumpHierarchyGst(pVM, cr3, fFlags, *pu64FirstAddr, *pu64LastAddr, cMaxDepth, pHlp); + return rc; +} + + +/** + * Dump paging structures. + * + * This API can be used to dump both guest and shadow structures. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param idCpu The current CPU ID. + * @param fFlags The flags, DBGFPGDMP_FLAGS_XXX. + * @param cr3 The CR3 to use (unless we're getting the current + * state, see @a fFlags). + * @param u64FirstAddr The address to start dumping at. + * @param u64LastAddr The address to end dumping after. + * @param cMaxDepth The depth. + * @param pHlp The output callbacks. Defaults to the debug log if + * NULL. + */ +VMMDECL(int) DBGFR3PagingDumpEx(PUVM pUVM, VMCPUID idCpu, uint32_t fFlags, uint64_t cr3, uint64_t u64FirstAddr, + uint64_t u64LastAddr, uint32_t cMaxDepth, PCDBGFINFOHLP pHlp) +{ + /* + * Input validation. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertReturn(idCpu < pUVM->cCpus, VERR_INVALID_CPU_ID); + AssertReturn(!(fFlags & ~DBGFPGDMP_FLAGS_VALID_MASK), VERR_INVALID_PARAMETER); + AssertReturn(fFlags & (DBGFPGDMP_FLAGS_SHADOW | DBGFPGDMP_FLAGS_GUEST), VERR_INVALID_PARAMETER); + AssertReturn((fFlags & DBGFPGDMP_FLAGS_CURRENT_MODE) || !(fFlags & DBGFPGDMP_FLAGS_MODE_MASK), VERR_INVALID_PARAMETER); + AssertReturn( !(fFlags & DBGFPGDMP_FLAGS_EPT) + || !(fFlags & (DBGFPGDMP_FLAGS_LME | DBGFPGDMP_FLAGS_PAE | DBGFPGDMP_FLAGS_PSE | DBGFPGDMP_FLAGS_NXE)) + , VERR_INVALID_PARAMETER); + AssertPtrReturn(pHlp, VERR_INVALID_POINTER); + AssertReturn(cMaxDepth, VERR_INVALID_PARAMETER); + + /* + * Forward the request to the target CPU. + */ + return VMR3ReqPriorityCallWaitU(pUVM, idCpu, (PFNRT)dbgfR3PagingDumpEx, 8, + pUVM, idCpu, fFlags, &cr3, &u64FirstAddr, &u64LastAddr, cMaxDepth, pHlp); +} + diff --git a/src/VBox/VMM/VMMR3/DBGFModule.cpp b/src/VBox/VMM/VMMR3/DBGFModule.cpp new file mode 100644 index 00000000..02746644 --- /dev/null +++ b/src/VBox/VMM/VMMR3/DBGFModule.cpp @@ -0,0 +1,290 @@ +/* $Id: DBGFModule.cpp $ */ +/** @file + * DBGF - Debugger Facility, Module & Segment Management. + */ + +/* + * Copyright (C) 2008-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/** @page pg_dbgf_module DBGFModule - Module & Segment Management + * + * A module is our representation of an executable binary. It's main purpose + * is to provide segments that can be mapped into address spaces and thereby + * provide debug info for those parts for the guest code or data. + * + * This module will not deal directly with debug info, it will only serve + * as an interface between the debugger / symbol lookup and the debug info + * readers. + * + * An executable binary doesn't need to have a file, or that is, we don't + * need the file to create a module for it. There will be interfaces for + * ROMs to register themselves so we can get to their symbols, and there + * will be interfaces for the guest OS plugins (@see pg_dbgf_os) to + * register kernel, drivers and other global modules. + */ + +#if 0 +#include + + +/** Special segment number that indicates that the offset is a relative + * virtual address (RVA). I.e. an offset from the start of the module. */ +#define DBGF_SEG_RVA UINT32_C(0xfffffff0) + +/** @defgroup grp_dbgf_dbginfo Debug Info Types + * @{ */ +/** Other format. */ +#define DBGF_DBGINFO_OTHER RT_BIT_32(0) +/** Stabs. */ +#define DBGF_DBGINFO_STABS RT_BIT_32(1) +/** Debug With Arbitrary Record Format (DWARF). */ +#define DBGF_DBGINFO_DWARF RT_BIT_32(2) +/** Microsoft Codeview debug info. */ +#define DBGF_DBGINFO_CODEVIEW RT_BIT_32(3) +/** Watcom debug info. */ +#define DBGF_DBGINFO_WATCOM RT_BIT_32(4) +/** IBM High Level Language debug info. */ +#define DBGF_DBGINFO_HLL RT_BIT_32(5) +/** Old OS/2 and Windows symbol file. */ +#define DBGF_DBGINFO_SYM RT_BIT_32(6) +/** Map file. */ +#define DBGF_DBGINFO_MAP RT_BIT_32(7) +/** @} */ + +/** @defgroup grp_dbgf_exeimg Executable Image Types + * @{ */ +/** Some other format. */ +#define DBGF_EXEIMG_OTHER RT_BIT_32(0) +/** Portable Executable. */ +#define DBGF_EXEIMG_PE RT_BIT_32(1) +/** Linear eXecutable. */ +#define DBGF_EXEIMG_LX RT_BIT_32(2) +/** Linear Executable. */ +#define DBGF_EXEIMG_LE RT_BIT_32(3) +/** New Executable. */ +#define DBGF_EXEIMG_NE RT_BIT_32(4) +/** DOS Executable (Mark Zbikowski). */ +#define DBGF_EXEIMG_MZ RT_BIT_32(5) +/** COM Executable. */ +#define DBGF_EXEIMG_COM RT_BIT_32(6) +/** a.out Executable. */ +#define DBGF_EXEIMG_AOUT RT_BIT_32(7) +/** Executable and Linkable Format. */ +#define DBGF_EXEIMG_ELF RT_BIT_32(8) +/** Mach-O Executable (including FAT ones). */ +#define DBGF_EXEIMG_MACHO RT_BIT_32(9) +/** @} */ + +/** Pointer to a module. */ +typedef struct DBGFMOD *PDBGFMOD; + + +/** + * Virtual method table for executable image interpreters. + */ +typedef struct DBGFMODVTIMG +{ + /** Magic number (DBGFMODVTIMG_MAGIC). */ + uint32_t u32Magic; + /** Mask of supported debug info types, see grp_dbgf_exeimg. + * Used to speed up the search for a suitable interpreter. */ + uint32_t fSupports; + /** The name of the interpreter. */ + const char *pszName; + + /** + * Try open the image. + * + * This combines probing and opening. + * + * @returns VBox status code. No informational returns defined. + * + * @param pMod Pointer to the module that is being opened. + * + * The DBGFMOD::pszDbgFile member will point to + * the filename of any debug info we're aware of + * on input. Also, or alternatively, it is expected + * that the interpreter will look for debug info in + * the executable image file when present and that it + * may ask the image interpreter for this when it's + * around. + * + * Upon successful return the method is expected to + * initialize pDbgOps and pvDbgPriv. + */ + DECLCALLBACKMEMBER(int, pfnTryOpen)(PDBGFMOD pMod); + + /** + * Close the interpreter, freeing all associated resources. + * + * The caller sets the pDbgOps and pvDbgPriv DBGFMOD members + * to NULL upon return. + * + * @param pMod Pointer to the module structure. + */ + DECLCALLBACKMEMBER(int, pfnClose)(PDBGFMOD pMod); + +} DBGFMODVTIMG + +/** + * Virtual method table for debug info interpreters. + */ +typedef struct DBGFMODVTDBG +{ + /** Magic number (DBGFMODVTDBG_MAGIC). */ + uint32_t u32Magic; + /** Mask of supported debug info types, see grp_dbgf_dbginfo. + * Used to speed up the search for a suitable interpreter. */ + uint32_t fSupports; + /** The name of the interpreter. */ + const char *pszName; + + /** + * Try open the image. + * + * This combines probing and opening. + * + * @returns VBox status code. No informational returns defined. + * + * @param pMod Pointer to the module that is being opened. + * + * The DBGFMOD::pszDbgFile member will point to + * the filename of any debug info we're aware of + * on input. Also, or alternatively, it is expected + * that the interpreter will look for debug info in + * the executable image file when present and that it + * may ask the image interpreter for this when it's + * around. + * + * Upon successful return the method is expected to + * initialize pDbgOps and pvDbgPriv. + */ + DECLCALLBACKMEMBER(int, pfnTryOpen)(PDBGFMOD pMod); + + /** + * Close the interpreter, freeing all associated resources. + * + * The caller sets the pDbgOps and pvDbgPriv DBGFMOD members + * to NULL upon return. + * + * @param pMod Pointer to the module structure. + */ + DECLCALLBACKMEMBER(int, pfnClose)(PDBGFMOD pMod); + + /** + * Queries symbol information by symbol name. + * + * @returns VBox status code. + * @retval VINF_SUCCESS on success, no informational status code. + * @retval VERR_DBGF_NO_SYMBOLS if there aren't any symbols. + * @retval VERR_SYMBOL_NOT_FOUND if no suitable symbol was found. + * + * @param pMod Pointer to the module structure. + * @param pszSymbol The symbol name. + * @para pSymbol Where to store the symbol information. + */ + DECLCALLBACKMEMBER(int, pfnSymbolByName)(PDBGFMOD pMod, const char *pszSymbol, PDBGFSYMBOL pSymbol); + + /** + * Queries symbol information by address. + * + * The returned symbol is what the debug info interpreter considers the symbol + * most applicable to the specified address. This usually means a symbol with an + * address equal or lower than the requested. + * + * @returns VBox status code. + * @retval VINF_SUCCESS on success, no informational status code. + * @retval VERR_DBGF_NO_SYMBOLS if there aren't any symbols. + * @retval VERR_SYMBOL_NOT_FOUND if no suitable symbol was found. + * + * @param pMod Pointer to the module structure. + * @param iSeg The segment number (0-based). DBGF_SEG_RVA can be used. + * @param off The offset into the segment. + * @param poffDisp Where to store the distance between the specified address + * and the returned symbol. Optional. + * @param pSymbol Where to store the symbol information. + */ + DECLCALLBACKMEMBER(int, pfnSymbolByAddr)(PDBGFMOD pMod, uint32_t iSeg, RTGCUINTPTR off, PRTGCINTPTR poffDisp, PDBGFSYMBOL pSymbol); + + /** + * Queries line number information by address. + * + * @returns VBox status code. + * @retval VINF_SUCCESS on success, no informational status code. + * @retval VERR_DBGF_NO_LINE_NUMBERS if there aren't any line numbers. + * @retval VERR_DBGF_LINE_NOT_FOUND if no suitable line number was found. + * + * @param pMod Pointer to the module structure. + * @param iSeg The segment number (0-based). DBGF_SEG_RVA can be used. + * @param off The offset into the segment. + * @param poffDisp Where to store the distance between the specified address + * and the returned line number. Optional. + * @param pLine Where to store the information about the closest line number. + */ + DECLCALLBACKMEMBER(int, pfnLineByAddr)(PDBGFMOD pMod, uint32_t iSeg, RTGCUINTPTR off, PRTGCINTPTR poffDisp, PDBGFLINE pLine); + + /** + * Adds a symbol to the module (optional). + * + * This method is used to implement DBGFR3SymbolAdd. + * + * @returns VBox status code. + * @retval VERR_NOT_SUPPORTED if the interpreter doesn't support this feature. + * + * @param pMod Pointer to the module structure. + * @param pszSymbol The symbol name. + * @param iSeg The segment number (0-based). DBGF_SEG_RVA can be used. + * @param off The offset into the segment. + * @param cbSymbol The area covered by the symbol. 0 is fine. + */ + DECLCALLBACKMEMBER(int, pfnSymbolAdd)(PDBGFMOD pMod, const char *pszSymbol, uint32_t iSeg, RTGCUINTPTR off, RTUINT cbSymbol); + + /** For catching initialization errors (DBGFMODVTDBG_MAGIC). */ + uint32_t u32EndMagic; +} DBGFMODVTDBG; + +#define DBGFMODVTDBG_MAGIC 123 + +/** + * Module. + */ +typedef struct DBGFMOD +{ + /** Magic value (DBGFMOD_MAGIC). */ + uint32_t u32Magic; + /** The number of address spaces this module is currently linked into. + * This is used to perform automatic cleanup and sharing. */ + uint32_t cLinks; + /** The module name (short). */ + const char *pszName; + /** The module filename. Can be NULL. */ + const char *pszImgFile; + /** The debug info file (if external). Can be NULL. */ + const char *pszDbgFile; + + /** The method table for the executable image interpreter. */ + PCDBGFMODVTIMG pImgVt; + /** Pointer to the private data of the executable image interpreter. */ + void *pvImgPriv; + + /** The method table for the debug info interpreter. */ + PCDBGFMODVTDBG pDbgVt; + /** Pointer to the private data of the debug info interpreter. */ + void *pvDbgPriv; + +} DBGFMOD; + +#define DBGFMOD_MAGIC 0x12345678 + +#endif + diff --git a/src/VBox/VMM/VMMR3/DBGFOS.cpp b/src/VBox/VMM/VMMR3/DBGFOS.cpp new file mode 100644 index 00000000..ac3f6b1f --- /dev/null +++ b/src/VBox/VMM/VMMR3/DBGFOS.cpp @@ -0,0 +1,661 @@ +/* $Id: DBGFOS.cpp $ */ +/** @file + * DBGF - Debugger Facility, Guest OS Diggers. + */ + +/* + * Copyright (C) 2008-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DBGF +#include +#include +#include "DBGFInternal.h" +#include +#include +#include + +#include +#include +#include + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ + +#define DBGF_OS_READ_LOCK(pUVM) \ + do { int rcLock = RTCritSectRwEnterShared(&pUVM->dbgf.s.CritSect); AssertRC(rcLock); } while (0) +#define DBGF_OS_READ_UNLOCK(pUVM) \ + do { int rcLock = RTCritSectRwLeaveShared(&pUVM->dbgf.s.CritSect); AssertRC(rcLock); } while (0) + +#define DBGF_OS_WRITE_LOCK(pUVM) \ + do { int rcLock = RTCritSectRwEnterExcl(&pUVM->dbgf.s.CritSect); AssertRC(rcLock); } while (0) +#define DBGF_OS_WRITE_UNLOCK(pUVM) \ + do { int rcLock = RTCritSectRwLeaveExcl(&pUVM->dbgf.s.CritSect); AssertRC(rcLock); } while (0) + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * EMT interface wrappers. + * + * The diggers expects to be called on an EMT. To avoid the debugger+Main having + * + * Since the user (debugger/Main) shouldn't be calling directly into the digger code, but rather + */ +typedef struct DBGFOSEMTWRAPPER +{ + /** Pointer to the next list entry. */ + struct DBGFOSEMTWRAPPER *pNext; + /** The interface type. */ + DBGFOSINTERFACE enmIf; + /** The digger interface pointer. */ + union + { + /** Generic void pointer. */ + void *pv; + /** DBGFOSINTERFACE_DMESG.*/ + PDBGFOSIDMESG pDmesg; + } uDigger; + /** The user mode VM handle. */ + PUVM pUVM; + /** The wrapper interface union (consult enmIf). */ + union + { + /** DBGFOSINTERFACE_DMESG.*/ + DBGFOSIDMESG Dmesg; + } uWrapper; +} DBGFOSEMTWRAPPER; +/** Pointer to an EMT interface wrapper. */ +typedef DBGFOSEMTWRAPPER *PDBGFOSEMTWRAPPER; + + +/** + * Internal init routine called by DBGFR3Init(). + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + */ +int dbgfR3OSInit(PUVM pUVM) +{ + RT_NOREF_PV(pUVM); + return VINF_SUCCESS; +} + + +/** + * Internal cleanup routine called by DBGFR3Term(), part 1. + * + * @param pUVM The user mode VM handle. + */ +void dbgfR3OSTermPart1(PUVM pUVM) +{ + DBGF_OS_WRITE_LOCK(pUVM); + + /* + * Terminate the current one. + */ + if (pUVM->dbgf.s.pCurOS) + { + pUVM->dbgf.s.pCurOS->pReg->pfnTerm(pUVM, pUVM->dbgf.s.pCurOS->abData); + pUVM->dbgf.s.pCurOS = NULL; + } + + DBGF_OS_WRITE_UNLOCK(pUVM); +} + + +/** + * Internal cleanup routine called by DBGFR3Term(), part 2. + * + * @param pUVM The user mode VM handle. + */ +void dbgfR3OSTermPart2(PUVM pUVM) +{ + DBGF_OS_WRITE_LOCK(pUVM); + + /* This shouldn't happen. */ + AssertStmt(!pUVM->dbgf.s.pCurOS, dbgfR3OSTermPart1(pUVM)); + + /* + * Destroy all the instances. + */ + while (pUVM->dbgf.s.pOSHead) + { + PDBGFOS pOS = pUVM->dbgf.s.pOSHead; + pUVM->dbgf.s.pOSHead = pOS->pNext; + if (pOS->pReg->pfnDestruct) + pOS->pReg->pfnDestruct(pUVM, pOS->abData); + + PDBGFOSEMTWRAPPER pFree = pOS->pWrapperHead; + while ((pFree = pOS->pWrapperHead) != NULL) + { + pOS->pWrapperHead = pFree->pNext; + pFree->pNext = NULL; + MMR3HeapFree(pFree); + } + + MMR3HeapFree(pOS); + } + + DBGF_OS_WRITE_UNLOCK(pUVM); +} + + +/** + * EMT worker function for DBGFR3OSRegister. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pReg The registration structure. + */ +static DECLCALLBACK(int) dbgfR3OSRegister(PUVM pUVM, PDBGFOSREG pReg) +{ + /* more validations. */ + DBGF_OS_READ_LOCK(pUVM); + PDBGFOS pOS; + for (pOS = pUVM->dbgf.s.pOSHead; pOS; pOS = pOS->pNext) + if (!strcmp(pOS->pReg->szName, pReg->szName)) + { + DBGF_OS_READ_UNLOCK(pUVM); + Log(("dbgfR3OSRegister: %s -> VERR_ALREADY_LOADED\n", pReg->szName)); + return VERR_ALREADY_LOADED; + } + DBGF_OS_READ_UNLOCK(pUVM); + + /* + * Allocate a new structure, call the constructor and link it into the list. + */ + pOS = (PDBGFOS)MMR3HeapAllocZU(pUVM, MM_TAG_DBGF_OS, RT_UOFFSETOF_DYN(DBGFOS, abData[pReg->cbData])); + AssertReturn(pOS, VERR_NO_MEMORY); + pOS->pReg = pReg; + + int rc = pOS->pReg->pfnConstruct(pUVM, pOS->abData); + if (RT_SUCCESS(rc)) + { + DBGF_OS_WRITE_LOCK(pUVM); + pOS->pNext = pUVM->dbgf.s.pOSHead; + pUVM->dbgf.s.pOSHead = pOS; + DBGF_OS_WRITE_UNLOCK(pUVM); + } + else + { + if (pOS->pReg->pfnDestruct) + pOS->pReg->pfnDestruct(pUVM, pOS->abData); + MMR3HeapFree(pOS); + } + + return VINF_SUCCESS; +} + + +/** + * Registers a guest OS digger. + * + * This will instantiate an instance of the digger and add it + * to the list for us in the next call to DBGFR3OSDetect(). + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pReg The registration structure. + * @thread Any. + */ +VMMR3DECL(int) DBGFR3OSRegister(PUVM pUVM, PCDBGFOSREG pReg) +{ + /* + * Validate intput. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + + AssertPtrReturn(pReg, VERR_INVALID_POINTER); + AssertReturn(pReg->u32Magic == DBGFOSREG_MAGIC, VERR_INVALID_MAGIC); + AssertReturn(pReg->u32EndMagic == DBGFOSREG_MAGIC, VERR_INVALID_MAGIC); + AssertReturn(!pReg->fFlags, VERR_INVALID_PARAMETER); + AssertReturn(pReg->cbData < _2G, VERR_INVALID_PARAMETER); + AssertReturn(pReg->szName[0], VERR_INVALID_NAME); + AssertReturn(RTStrEnd(&pReg->szName[0], sizeof(pReg->szName)), VERR_INVALID_NAME); + AssertPtrReturn(pReg->pfnConstruct, VERR_INVALID_POINTER); + AssertPtrNullReturn(pReg->pfnDestruct, VERR_INVALID_POINTER); + AssertPtrReturn(pReg->pfnProbe, VERR_INVALID_POINTER); + AssertPtrReturn(pReg->pfnInit, VERR_INVALID_POINTER); + AssertPtrReturn(pReg->pfnRefresh, VERR_INVALID_POINTER); + AssertPtrReturn(pReg->pfnTerm, VERR_INVALID_POINTER); + AssertPtrReturn(pReg->pfnQueryVersion, VERR_INVALID_POINTER); + AssertPtrReturn(pReg->pfnQueryInterface, VERR_INVALID_POINTER); + + /* + * Pass it on to EMT(0). + */ + return VMR3ReqPriorityCallWaitU(pUVM, 0 /*idDstCpu*/, (PFNRT)dbgfR3OSRegister, 2, pUVM, pReg); +} + + +/** + * EMT worker function for DBGFR3OSDeregister. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pReg The registration structure. + */ +static DECLCALLBACK(int) dbgfR3OSDeregister(PUVM pUVM, PDBGFOSREG pReg) +{ + /* + * Unlink it. + */ + bool fWasCurOS = false; + PDBGFOS pOSPrev = NULL; + PDBGFOS pOS; + DBGF_OS_WRITE_LOCK(pUVM); + for (pOS = pUVM->dbgf.s.pOSHead; pOS; pOSPrev = pOS, pOS = pOS->pNext) + if (pOS->pReg == pReg) + { + if (pOSPrev) + pOSPrev->pNext = pOS->pNext; + else + pUVM->dbgf.s.pOSHead = pOS->pNext; + if (pUVM->dbgf.s.pCurOS == pOS) + { + pUVM->dbgf.s.pCurOS = NULL; + fWasCurOS = true; + } + break; + } + DBGF_OS_WRITE_UNLOCK(pUVM); + if (!pOS) + { + Log(("DBGFR3OSDeregister: %s -> VERR_NOT_FOUND\n", pReg->szName)); + return VERR_NOT_FOUND; + } + + /* + * Terminate it if it was the current OS, then invoke the + * destructor and clean up. + */ + if (fWasCurOS) + pOS->pReg->pfnTerm(pUVM, pOS->abData); + if (pOS->pReg->pfnDestruct) + pOS->pReg->pfnDestruct(pUVM, pOS->abData); + + PDBGFOSEMTWRAPPER pFree = pOS->pWrapperHead; + while ((pFree = pOS->pWrapperHead) != NULL) + { + pOS->pWrapperHead = pFree->pNext; + pFree->pNext = NULL; + MMR3HeapFree(pFree); + } + + MMR3HeapFree(pOS); + + return VINF_SUCCESS; +} + + +/** + * Deregisters a guest OS digger previously registered by DBGFR3OSRegister. + * + * @returns VBox status code. + * + * @param pUVM The user mode VM handle. + * @param pReg The registration structure. + * @thread Any. + */ +VMMR3DECL(int) DBGFR3OSDeregister(PUVM pUVM, PCDBGFOSREG pReg) +{ + /* + * Validate input. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertPtrReturn(pReg, VERR_INVALID_POINTER); + AssertReturn(pReg->u32Magic == DBGFOSREG_MAGIC, VERR_INVALID_MAGIC); + AssertReturn(pReg->u32EndMagic == DBGFOSREG_MAGIC, VERR_INVALID_MAGIC); + AssertReturn(RTStrEnd(&pReg->szName[0], sizeof(pReg->szName)), VERR_INVALID_NAME); + + DBGF_OS_READ_LOCK(pUVM); + PDBGFOS pOS; + for (pOS = pUVM->dbgf.s.pOSHead; pOS; pOS = pOS->pNext) + if (pOS->pReg == pReg) + break; + DBGF_OS_READ_UNLOCK(pUVM); + + if (!pOS) + { + Log(("DBGFR3OSDeregister: %s -> VERR_NOT_FOUND\n", pReg->szName)); + return VERR_NOT_FOUND; + } + + /* + * Pass it on to EMT(0). + */ + return VMR3ReqPriorityCallWaitU(pUVM, 0 /*idDstCpu*/, (PFNRT)dbgfR3OSDeregister, 2, pUVM, pReg); +} + + +/** + * EMT worker function for DBGFR3OSDetect. + * + * @returns VBox status code. + * @retval VINF_SUCCESS if successfully detected. + * @retval VINF_DBGF_OS_NOT_DETCTED if we cannot figure it out. + * + * @param pUVM The user mode VM handle. + * @param pszName Where to store the OS name. Empty string if not detected. + * @param cchName Size of the buffer. + */ +static DECLCALLBACK(int) dbgfR3OSDetect(PUVM pUVM, char *pszName, size_t cchName) +{ + /* + * Cycle thru the detection routines. + */ + DBGF_OS_WRITE_LOCK(pUVM); + + PDBGFOS const pOldOS = pUVM->dbgf.s.pCurOS; + pUVM->dbgf.s.pCurOS = NULL; + + for (PDBGFOS pNewOS = pUVM->dbgf.s.pOSHead; pNewOS; pNewOS = pNewOS->pNext) + if (pNewOS->pReg->pfnProbe(pUVM, pNewOS->abData)) + { + int rc; + pUVM->dbgf.s.pCurOS = pNewOS; + if (pOldOS == pNewOS) + rc = pNewOS->pReg->pfnRefresh(pUVM, pNewOS->abData); + else + { + if (pOldOS) + pOldOS->pReg->pfnTerm(pUVM, pNewOS->abData); + rc = pNewOS->pReg->pfnInit(pUVM, pNewOS->abData); + } + if (pszName && cchName) + strncat(pszName, pNewOS->pReg->szName, cchName); + + DBGF_OS_WRITE_UNLOCK(pUVM); + return rc; + } + + /* not found */ + if (pOldOS) + pOldOS->pReg->pfnTerm(pUVM, pOldOS->abData); + + DBGF_OS_WRITE_UNLOCK(pUVM); + return VINF_DBGF_OS_NOT_DETCTED; +} + + +/** + * Detects the guest OS and try dig out symbols and useful stuff. + * + * When called the 2nd time, symbols will be updated that if the OS + * is the same. + * + * @returns VBox status code. + * @retval VINF_SUCCESS if successfully detected. + * @retval VINF_DBGF_OS_NOT_DETCTED if we cannot figure it out. + * + * @param pUVM The user mode VM handle. + * @param pszName Where to store the OS name. Empty string if not detected. + * @param cchName Size of the buffer. + * @thread Any. + */ +VMMR3DECL(int) DBGFR3OSDetect(PUVM pUVM, char *pszName, size_t cchName) +{ + AssertPtrNullReturn(pszName, VERR_INVALID_POINTER); + if (pszName && cchName) + *pszName = '\0'; + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + + /* + * Pass it on to EMT(0). + */ + return VMR3ReqPriorityCallWaitU(pUVM, 0 /*idDstCpu*/, (PFNRT)dbgfR3OSDetect, 3, pUVM, pszName, cchName); +} + + +/** + * EMT worker function for DBGFR3OSQueryNameAndVersion + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszName Where to store the OS name. Optional. + * @param cchName The size of the name buffer. + * @param pszVersion Where to store the version string. Optional. + * @param cchVersion The size of the version buffer. + */ +static DECLCALLBACK(int) dbgfR3OSQueryNameAndVersion(PUVM pUVM, char *pszName, size_t cchName, char *pszVersion, size_t cchVersion) +{ + /* + * Any known OS? + */ + DBGF_OS_READ_LOCK(pUVM); + + if (pUVM->dbgf.s.pCurOS) + { + int rc = VINF_SUCCESS; + if (pszName && cchName) + { + size_t cch = strlen(pUVM->dbgf.s.pCurOS->pReg->szName); + if (cchName > cch) + memcpy(pszName, pUVM->dbgf.s.pCurOS->pReg->szName, cch + 1); + else + { + memcpy(pszName, pUVM->dbgf.s.pCurOS->pReg->szName, cchName - 1); + pszName[cchName - 1] = '\0'; + rc = VINF_BUFFER_OVERFLOW; + } + } + + if (pszVersion && cchVersion) + { + int rc2 = pUVM->dbgf.s.pCurOS->pReg->pfnQueryVersion(pUVM, pUVM->dbgf.s.pCurOS->abData, pszVersion, cchVersion); + if (RT_FAILURE(rc2) || rc == VINF_SUCCESS) + rc = rc2; + } + + DBGF_OS_READ_UNLOCK(pUVM); + return rc; + } + + DBGF_OS_READ_UNLOCK(pUVM); + return VERR_DBGF_OS_NOT_DETCTED; +} + + +/** + * Queries the name and/or version string for the guest OS. + * + * It goes without saying that this querying is done using the current + * guest OS digger and not additions or user configuration. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszName Where to store the OS name. Optional. + * @param cchName The size of the name buffer. + * @param pszVersion Where to store the version string. Optional. + * @param cchVersion The size of the version buffer. + * @thread Any. + */ +VMMR3DECL(int) DBGFR3OSQueryNameAndVersion(PUVM pUVM, char *pszName, size_t cchName, char *pszVersion, size_t cchVersion) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertPtrNullReturn(pszName, VERR_INVALID_POINTER); + AssertPtrNullReturn(pszVersion, VERR_INVALID_POINTER); + + /* + * Initialize the output up front. + */ + if (pszName && cchName) + *pszName = '\0'; + if (pszVersion && cchVersion) + *pszVersion = '\0'; + + /* + * Pass it on to EMT(0). + */ + return VMR3ReqPriorityCallWaitU(pUVM, 0 /*idDstCpu*/, + (PFNRT)dbgfR3OSQueryNameAndVersion, 5, pUVM, pszName, cchName, pszVersion, cchVersion); +} + + +/** + * @interface_method_impl{DBGFOSIDMESG,pfnQueryKernelLog, Generic EMT wrapper.} + */ +static DECLCALLBACK(int) dbgfR3OSEmtIDmesg_QueryKernelLog(PDBGFOSIDMESG pThis, PUVM pUVM, uint32_t fFlags, uint32_t cMessages, + char *pszBuf, size_t cbBuf, size_t *pcbActual) +{ + PDBGFOSEMTWRAPPER pWrapper = RT_FROM_MEMBER(pThis, DBGFOSEMTWRAPPER, uWrapper.Dmesg); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertReturn(pUVM == pWrapper->pUVM, VERR_INVALID_VM_HANDLE); + AssertReturn(!fFlags, VERR_INVALID_FLAGS); + AssertReturn(cMessages > 0, VERR_INVALID_PARAMETER); + if (cbBuf) + AssertPtrReturn(pszBuf, VERR_INVALID_POINTER); + AssertPtrNullReturn(pcbActual, VERR_INVALID_POINTER); + + return VMR3ReqPriorityCallWaitU(pWrapper->pUVM, 0 /*idDstCpu*/, + (PFNRT)pWrapper->uDigger.pDmesg->pfnQueryKernelLog, 7, + pWrapper->uDigger.pDmesg, pUVM, fFlags, cMessages, pszBuf, cbBuf, pcbActual); + +} + + +/** + * EMT worker for DBGFR3OSQueryInterface. + * + * @param pUVM The user mode VM handle. + * @param enmIf The interface identifier. + * @param ppvIf Where to store the interface pointer on success. + */ +static DECLCALLBACK(void) dbgfR3OSQueryInterface(PUVM pUVM, DBGFOSINTERFACE enmIf, void **ppvIf) +{ + AssertPtrReturnVoid(ppvIf); + *ppvIf = NULL; + AssertReturnVoid(enmIf > DBGFOSINTERFACE_INVALID && enmIf < DBGFOSINTERFACE_END); + UVM_ASSERT_VALID_EXT_RETURN_VOID(pUVM); + + /* + * Forward the query to the current OS. + */ + DBGF_OS_READ_LOCK(pUVM); + PDBGFOS pOS = pUVM->dbgf.s.pCurOS; + if (pOS) + { + void *pvDiggerIf; + pvDiggerIf = pOS->pReg->pfnQueryInterface(pUVM, pUVM->dbgf.s.pCurOS->abData, enmIf); + if (pvDiggerIf) + { + /* + * Do we have an EMT wrapper for this interface already? + * + * We ASSUME the interfaces are static and not dynamically allocated + * for each QueryInterface call. + */ + PDBGFOSEMTWRAPPER pWrapper = pOS->pWrapperHead; + while ( pWrapper != NULL + && ( pWrapper->uDigger.pv != pvDiggerIf + && pWrapper->enmIf != enmIf) ) + pWrapper = pWrapper->pNext; + if (pWrapper) + { + *ppvIf = &pWrapper->uWrapper; + DBGF_OS_READ_UNLOCK(pUVM); + return; + } + DBGF_OS_READ_UNLOCK(pUVM); + + /* + * Create a wrapper. + */ + int rc = MMR3HeapAllocExU(pUVM, MM_TAG_DBGF_OS, sizeof(*pWrapper), (void **)&pWrapper); + if (RT_FAILURE(rc)) + return; + pWrapper->uDigger.pv = pvDiggerIf; + pWrapper->pUVM = pUVM; + pWrapper->enmIf = enmIf; + switch (enmIf) + { + case DBGFOSINTERFACE_DMESG: + pWrapper->uWrapper.Dmesg.u32Magic = DBGFOSIDMESG_MAGIC; + pWrapper->uWrapper.Dmesg.pfnQueryKernelLog = dbgfR3OSEmtIDmesg_QueryKernelLog; + pWrapper->uWrapper.Dmesg.u32EndMagic = DBGFOSIDMESG_MAGIC; + break; + default: + AssertFailed(); + MMR3HeapFree(pWrapper); + return; + } + + DBGF_OS_WRITE_LOCK(pUVM); + if (pUVM->dbgf.s.pCurOS == pOS) + { + pWrapper->pNext = pOS->pWrapperHead; + pOS->pWrapperHead = pWrapper; + *ppvIf = &pWrapper->uWrapper; + DBGF_OS_WRITE_UNLOCK(pUVM); + } + else + { + DBGF_OS_WRITE_UNLOCK(pUVM); + MMR3HeapFree(pWrapper); + } + return; + } + } + DBGF_OS_READ_UNLOCK(pUVM); +} + + +/** + * Query an optional digger interface. + * + * @returns Pointer to the digger interface on success, NULL if the interfaces isn't + * available or no active guest OS digger. + * @param pUVM The user mode VM handle. + * @param enmIf The interface identifier. + * @thread Any. + */ +VMMR3DECL(void *) DBGFR3OSQueryInterface(PUVM pUVM, DBGFOSINTERFACE enmIf) +{ + AssertMsgReturn(enmIf > DBGFOSINTERFACE_INVALID && enmIf < DBGFOSINTERFACE_END, ("%d\n", enmIf), NULL); + + /* + * Pass it on to an EMT. + */ + void *pvIf = NULL; + VMR3ReqPriorityCallVoidWaitU(pUVM, VMCPUID_ANY, (PFNRT)dbgfR3OSQueryInterface, 3, pUVM, enmIf, &pvIf); + return pvIf; +} + + + +/** + * Internal wrapper for calling DBGFOSREG::pfnStackUnwindAssist. + */ +int dbgfR3OSStackUnwindAssist(PUVM pUVM, VMCPUID idCpu, PDBGFSTACKFRAME pFrame, PRTDBGUNWINDSTATE pState, + PCCPUMCTX pInitialCtx, RTDBGAS hAs, uint64_t *puScratch) +{ + int rc = VINF_SUCCESS; + if (pUVM->dbgf.s.pCurOS) + { + ASMCompilerBarrier(); + DBGF_OS_READ_LOCK(pUVM); + PDBGFOS pOS = pUVM->dbgf.s.pCurOS; + if (pOS) + rc = pOS->pReg->pfnStackUnwindAssist(pUVM, pUVM->dbgf.s.pCurOS->abData, idCpu, pFrame, + pState, pInitialCtx, hAs, puScratch); + DBGF_OS_READ_UNLOCK(pUVM); + } + return rc; +} + diff --git a/src/VBox/VMM/VMMR3/DBGFR3BugCheck.cpp b/src/VBox/VMM/VMMR3/DBGFR3BugCheck.cpp new file mode 100644 index 00000000..0dd10ef3 --- /dev/null +++ b/src/VBox/VMM/VMMR3/DBGFR3BugCheck.cpp @@ -0,0 +1,920 @@ +/* $Id: DBGFR3BugCheck.cpp $ */ +/** @file + * DBGF - Debugger Facility, NT Bug Checks. + */ + +/* + * Copyright (C) 2018-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DBGF +#include +#include +#include +#include "DBGFInternal.h" +#include +#include +#include + +#include +#include + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static FNDBGFHANDLERINT dbgfR3BugCheckInfo; + + +/** + * Initializes the bug check state and registers the info callback. + * + * No termination function needed. + * + * @returns VBox status code. + * @param pVM The VM handle. + */ +int dbgfR3BugCheckInit(PVM pVM) +{ + pVM->dbgf.s.BugCheck.idCpu = NIL_VMCPUID; + pVM->dbgf.s.BugCheck.enmEvent = DBGFEVENT_END; + + return DBGFR3InfoRegisterInternal(pVM, "bugcheck", + "Show bugcheck info. Can specify bug check code and parameters to lookup info.", + dbgfR3BugCheckInfo); +} + + +/** + * Names a few common NT status codes for DBGFR3FormatBugCheck. + */ +static const char *dbgfR3GetNtStatusName(uint32_t uNtStatus) +{ + switch (uNtStatus) + { + case 0x80000001: return " - STATUS_GUARD_PAGE_VIOLATION"; + case 0x80000002: return " - STATUS_DATATYPE_MISALIGNMENT"; + case 0x80000003: return " - STATUS_BREAKPOINT"; + case 0x80000004: return " - STATUS_SINGLE_STEP"; + case 0xc0000008: return " - STATUS_INVALID_HANDLE"; + case 0xc0000005: return " - STATUS_ACCESS_VIOLATION"; + case 0xc0000027: return " - STATUS_UNWIND"; + case 0xc0000028: return " - STATUS_BAD_STACK"; + case 0xc0000029: return " - STATUS_INVALID_UNWIND_TARGET"; + default: return ""; + } +} + + +/** + * Formats a symbol for DBGFR3FormatBugCheck. + */ +static const char *dbgfR3FormatSymbol(PUVM pUVM, char *pszSymbol, size_t cchSymbol, const char *pszPrefix, uint64_t uFlatAddr) +{ + DBGFADDRESS Addr; + RTGCINTPTR offDisp = 0; + PRTDBGSYMBOL pSym = DBGFR3AsSymbolByAddrA(pUVM, DBGF_AS_GLOBAL, DBGFR3AddrFromFlat(pUVM, &Addr, uFlatAddr), + RTDBGSYMADDR_FLAGS_LESS_OR_EQUAL | RTDBGSYMADDR_FLAGS_SKIP_ABS_IN_DEFERRED, + &offDisp, NULL /*phMod*/); + if (pSym) + { + if (!offDisp) + RTStrPrintf(pszSymbol, cchSymbol, "%s%s", pszPrefix, pSym->szName); + else if (offDisp > 0) + RTStrPrintf(pszSymbol, cchSymbol, "%s%s + %#RX64", pszPrefix, pSym->szName, (uint64_t)offDisp); + else + RTStrPrintf(pszSymbol, cchSymbol, "%s%s - %#RX64", pszPrefix, pSym->szName, (uint64_t)-offDisp); + RTDbgSymbolFree(pSym); + } + else + *pszSymbol = '\0'; + return pszSymbol; +} + + +/** + * Formats a windows bug check (BSOD). + * + * @retval VINF_SUCCESS on success. + * @retval VINF_BUFFER_OVERFLOW if there is more data than the buffer can handle. + * + * @param pUVM The usermode VM handle. + * @param pszDetails The output buffer. + * @param cbDetails The size of the output buffer. + * @param uBugCheck The bugheck code. + * @param uP1 Bug check parameter 1. + * @param uP2 Bug check parameter 2. + * @param uP3 Bug check parameter 3. + * @param uP4 Bug check parameter 4. + */ +VMMR3DECL(int) DBGFR3FormatBugCheck(PUVM pUVM, char *pszDetails, size_t cbDetails, + uint64_t uBugCheck, uint64_t uP1, uint64_t uP2, uint64_t uP3, uint64_t uP4) +{ + /* + * Start with bug check line typically seen in windbg. + */ + size_t cchUsed = RTStrPrintf(pszDetails, cbDetails, + "BugCheck %RX64 {%RX64, %RX64, %RX64, %RX64}\n", uBugCheck, uP1, uP2, uP3, uP4); + if (cchUsed >= cbDetails) + return VINF_BUFFER_OVERFLOW; + pszDetails += cchUsed; + cbDetails -= cchUsed; + + /* + * Try name the bugcheck and format parameters if we can/care. + */ + char szSym[512]; + switch (uBugCheck) + { + case 0x00000001: cchUsed = RTStrPrintf(pszDetails, cbDetails, "APC_INDEX_MISMATCH\n"); break; + case 0x00000002: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DEVICE_QUEUE_NOT_BUSY\n"); break; + case 0x00000003: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INVALID_AFFINITY_SET\n"); break; + case 0x00000004: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INVALID_DATA_ACCESS_TRAP\n"); break; + case 0x00000005: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INVALID_PROCESS_ATTACH_ATTEMPT\n"); break; + case 0x00000006: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INVALID_PROCESS_DETACH_ATTEMPT\n"); break; + case 0x00000007: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INVALID_SOFTWARE_INTERRUPT\n"); break; + case 0x00000008: cchUsed = RTStrPrintf(pszDetails, cbDetails, "IRQL_NOT_DISPATCH_LEVEL\n"); break; + case 0x00000009: cchUsed = RTStrPrintf(pszDetails, cbDetails, "IRQL_NOT_GREATER_OR_EQUAL\n"); break; + case 0x0000000a: + cchUsed = RTStrPrintf(pszDetails, cbDetails, + "IRQL_NOT_LESS_OR_EQUAL\n" + "P1: %016RX64 - memory referenced\n" + "P2: %016RX64 - IRQL\n" + "P3: %016RX64 - bitfield\n" + " b0: %u - %s operation\n" + " b3: %u - %sexecute operation\n" + "P4: %016RX64 - EIP/RIP%s\n", + uP1, uP2, uP3, + RT_BOOL(uP3 & RT_BIT_64(0)), uP3 & RT_BIT_64(0) ? "write" : "read", + RT_BOOL(uP3 & RT_BIT_64(3)), uP3 & RT_BIT_64(3) ? "not-" : "", + uP4, dbgfR3FormatSymbol(pUVM, szSym, sizeof(szSym), ": ", uP4)); + break; + case 0x0000000b: cchUsed = RTStrPrintf(pszDetails, cbDetails, "NO_EXCEPTION_HANDLING_SUPPORT\n"); break; + case 0x0000000c: cchUsed = RTStrPrintf(pszDetails, cbDetails, "MAXIMUM_WAIT_OBJECTS_EXCEEDED\n"); break; + case 0x0000000d: cchUsed = RTStrPrintf(pszDetails, cbDetails, "MUTEX_LEVEL_NUMBER_VIOLATION\n"); break; + case 0x0000000e: cchUsed = RTStrPrintf(pszDetails, cbDetails, "NO_USER_MODE_CONTEXT\n"); break; + case 0x0000000f: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SPIN_LOCK_ALREADY_OWNED\n"); break; + case 0x00000010: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SPIN_LOCK_NOT_OWNED\n"); break; + case 0x00000011: cchUsed = RTStrPrintf(pszDetails, cbDetails, "THREAD_NOT_MUTEX_OWNER\n"); break; + case 0x00000012: cchUsed = RTStrPrintf(pszDetails, cbDetails, "TRAP_CAUSE_UNKNOWN\n"); break; + case 0x00000013: cchUsed = RTStrPrintf(pszDetails, cbDetails, "EMPTY_THREAD_REAPER_LIST\n"); break; + case 0x00000014: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CREATE_DELETE_LOCK_NOT_LOCKED\n"); break; + case 0x00000015: cchUsed = RTStrPrintf(pszDetails, cbDetails, "LAST_CHANCE_CALLED_FROM_KMODE\n"); break; + case 0x00000016: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CID_HANDLE_CREATION\n"); break; + case 0x00000017: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CID_HANDLE_DELETION\n"); break; + case 0x00000018: cchUsed = RTStrPrintf(pszDetails, cbDetails, "REFERENCE_BY_POINTER\n"); break; + case 0x00000019: cchUsed = RTStrPrintf(pszDetails, cbDetails, "BAD_POOL_HEADER\n"); break; + case 0x0000001a: cchUsed = RTStrPrintf(pszDetails, cbDetails, "MEMORY_MANAGEMENT\n"); break; + case 0x0000001b: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PFN_SHARE_COUNT\n"); break; + case 0x0000001c: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PFN_REFERENCE_COUNT\n"); break; + case 0x0000001d: cchUsed = RTStrPrintf(pszDetails, cbDetails, "NO_SPIN_LOCK_AVAILABLE\n"); break; + case 0x0000001e: + cchUsed = RTStrPrintf(pszDetails, cbDetails, + "KMODE_EXCEPTION_NOT_HANDLED\n" + "P1: %016RX64 - exception code%s\n" + "P2: %016RX64 - EIP/RIP%s\n" + "P3: %016RX64 - Xcpt param #0\n" + "P4: %016RX64 - Xcpt param #1\n", + uP1, dbgfR3GetNtStatusName((uint32_t)uP1), + uP2, dbgfR3FormatSymbol(pUVM, szSym, sizeof(szSym), ": ", uP2), + uP3, uP4); + break; + case 0x0000001f: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SHARED_RESOURCE_CONV_ERROR\n"); break; + case 0x00000020: cchUsed = RTStrPrintf(pszDetails, cbDetails, "KERNEL_APC_PENDING_DURING_EXIT\n"); break; + case 0x00000021: cchUsed = RTStrPrintf(pszDetails, cbDetails, "QUOTA_UNDERFLOW\n"); break; + case 0x00000022: cchUsed = RTStrPrintf(pszDetails, cbDetails, "FILE_SYSTEM\n"); break; + case 0x00000023: cchUsed = RTStrPrintf(pszDetails, cbDetails, "FAT_FILE_SYSTEM\n"); break; + case 0x00000024: cchUsed = RTStrPrintf(pszDetails, cbDetails, "NTFS_FILE_SYSTEM\n"); break; + case 0x00000025: cchUsed = RTStrPrintf(pszDetails, cbDetails, "NPFS_FILE_SYSTEM\n"); break; + case 0x00000026: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CDFS_FILE_SYSTEM\n"); break; + case 0x00000027: cchUsed = RTStrPrintf(pszDetails, cbDetails, "RDR_FILE_SYSTEM\n"); break; + case 0x00000028: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CORRUPT_ACCESS_TOKEN\n"); break; + case 0x00000029: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SECURITY_SYSTEM\n"); break; + case 0x0000002a: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INCONSISTENT_IRP\n"); break; + case 0x0000002b: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PANIC_STACK_SWITCH\n"); break; + case 0x0000002c: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PORT_DRIVER_INTERNAL\n"); break; + case 0x0000002d: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SCSI_DISK_DRIVER_INTERNAL\n"); break; + case 0x0000002e: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DATA_BUS_ERROR\n"); break; + case 0x0000002f: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INSTRUCTION_BUS_ERROR\n"); break; + case 0x00000030: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SET_OF_INVALID_CONTEXT\n"); break; + case 0x00000031: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PHASE0_INITIALIZATION_FAILED\n"); break; + case 0x00000032: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PHASE1_INITIALIZATION_FAILED\n"); break; + case 0x00000033: cchUsed = RTStrPrintf(pszDetails, cbDetails, "UNEXPECTED_INITIALIZATION_CALL\n"); break; + case 0x00000034: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CACHE_MANAGER\n"); break; + case 0x00000035: cchUsed = RTStrPrintf(pszDetails, cbDetails, "NO_MORE_IRP_STACK_LOCATIONS\n"); break; + case 0x00000036: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DEVICE_REFERENCE_COUNT_NOT_ZERO\n"); break; + case 0x00000037: cchUsed = RTStrPrintf(pszDetails, cbDetails, "FLOPPY_INTERNAL_ERROR\n"); break; + case 0x00000038: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SERIAL_DRIVER_INTERNAL\n"); break; + case 0x00000039: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SYSTEM_EXIT_OWNED_MUTEX\n"); break; + case 0x0000003a: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SYSTEM_UNWIND_PREVIOUS_USER\n"); break; + case 0x0000003b: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SYSTEM_SERVICE_EXCEPTION\n"); break; + case 0x0000003c: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INTERRUPT_UNWIND_ATTEMPTED\n"); break; + case 0x0000003d: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INTERRUPT_EXCEPTION_NOT_HANDLED\n"); break; + case 0x0000003e: cchUsed = RTStrPrintf(pszDetails, cbDetails, "MULTIPROCESSOR_CONFIGURATION_NOT_SUPPORTED\n"); break; + case 0x0000003f: cchUsed = RTStrPrintf(pszDetails, cbDetails, "NO_MORE_SYSTEM_PTES\n"); break; + case 0x00000040: cchUsed = RTStrPrintf(pszDetails, cbDetails, "TARGET_MDL_TOO_SMALL\n"); break; + case 0x00000041: cchUsed = RTStrPrintf(pszDetails, cbDetails, "MUST_SUCCEED_POOL_EMPTY\n"); break; + case 0x00000042: cchUsed = RTStrPrintf(pszDetails, cbDetails, "ATDISK_DRIVER_INTERNAL\n"); break; + case 0x00000043: cchUsed = RTStrPrintf(pszDetails, cbDetails, "NO_SUCH_PARTITION\n"); break; + case 0x00000044: cchUsed = RTStrPrintf(pszDetails, cbDetails, "MULTIPLE_IRP_COMPLETE_REQUESTS\n"); break; + case 0x00000045: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INSUFFICIENT_SYSTEM_MAP_REGS\n"); break; + case 0x00000046: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DEREF_UNKNOWN_LOGON_SESSION\n"); break; + case 0x00000047: cchUsed = RTStrPrintf(pszDetails, cbDetails, "REF_UNKNOWN_LOGON_SESSION\n"); break; + case 0x00000048: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CANCEL_STATE_IN_COMPLETED_IRP\n"); break; + case 0x00000049: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PAGE_FAULT_WITH_INTERRUPTS_OFF\n"); break; + case 0x0000004a: cchUsed = RTStrPrintf(pszDetails, cbDetails, "IRQL_GT_ZERO_AT_SYSTEM_SERVICE\n"); break; + case 0x0000004b: cchUsed = RTStrPrintf(pszDetails, cbDetails, "STREAMS_INTERNAL_ERROR\n"); break; + case 0x0000004c: cchUsed = RTStrPrintf(pszDetails, cbDetails, "FATAL_UNHANDLED_HARD_ERROR\n"); break; + case 0x0000004d: cchUsed = RTStrPrintf(pszDetails, cbDetails, "NO_PAGES_AVAILABLE\n"); break; + case 0x0000004e: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PFN_LIST_CORRUPT\n"); break; + case 0x0000004f: cchUsed = RTStrPrintf(pszDetails, cbDetails, "NDIS_INTERNAL_ERROR\n"); break; + case 0x00000050: /* PAGE_FAULT_IN_NONPAGED_AREA */ + case 0x10000050: /* PAGE_FAULT_IN_NONPAGED_AREA_M */ + cchUsed = RTStrPrintf(pszDetails, cbDetails, + "PAGE_FAULT_IN_NONPAGED_AREA%s\n" + "P1: %016RX64 - memory referenced\n" + "P2: %016RX64 - IRQL\n" + "P3: %016RX64 - %s\n" + "P4: %016RX64 - reserved\n", + uBugCheck & 0x10000000 ? "_M" : "", uP1, uP2, uP3, uP3 & RT_BIT_64(0) ? "write" : "read", uP4); + break; + case 0x00000051: cchUsed = RTStrPrintf(pszDetails, cbDetails, "REGISTRY_ERROR\n"); break; + case 0x00000052: cchUsed = RTStrPrintf(pszDetails, cbDetails, "MAILSLOT_FILE_SYSTEM\n"); break; + case 0x00000053: cchUsed = RTStrPrintf(pszDetails, cbDetails, "NO_BOOT_DEVICE\n"); break; + case 0x00000054: cchUsed = RTStrPrintf(pszDetails, cbDetails, "LM_SERVER_INTERNAL_ERROR\n"); break; + case 0x00000055: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DATA_COHERENCY_EXCEPTION\n"); break; + case 0x00000056: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INSTRUCTION_COHERENCY_EXCEPTION\n"); break; + case 0x00000057: cchUsed = RTStrPrintf(pszDetails, cbDetails, "XNS_INTERNAL_ERROR\n"); break; + case 0x00000058: cchUsed = RTStrPrintf(pszDetails, cbDetails, "VOLMGRX_INTERNAL_ERROR\n"); break; + case 0x00000059: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PINBALL_FILE_SYSTEM\n"); break; + case 0x0000005a: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CRITICAL_SERVICE_FAILED\n"); break; + case 0x0000005b: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SET_ENV_VAR_FAILED\n"); break; + case 0x0000005c: cchUsed = RTStrPrintf(pszDetails, cbDetails, "HAL_INITIALIZATION_FAILED\n"); break; + case 0x0000005d: cchUsed = RTStrPrintf(pszDetails, cbDetails, "UNSUPPORTED_PROCESSOR\n"); break; + case 0x0000005e: cchUsed = RTStrPrintf(pszDetails, cbDetails, "OBJECT_INITIALIZATION_FAILED\n"); break; + case 0x0000005f: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SECURITY_INITIALIZATION_FAILED\n"); break; + case 0x00000060: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PROCESS_INITIALIZATION_FAILED\n"); break; + case 0x00000061: cchUsed = RTStrPrintf(pszDetails, cbDetails, "HAL1_INITIALIZATION_FAILED\n"); break; + case 0x00000062: cchUsed = RTStrPrintf(pszDetails, cbDetails, "OBJECT1_INITIALIZATION_FAILED\n"); break; + case 0x00000063: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SECURITY1_INITIALIZATION_FAILED\n"); break; + case 0x00000064: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SYMBOLIC_INITIALIZATION_FAILED\n"); break; + case 0x00000065: cchUsed = RTStrPrintf(pszDetails, cbDetails, "MEMORY1_INITIALIZATION_FAILED\n"); break; + case 0x00000066: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CACHE_INITIALIZATION_FAILED\n"); break; + case 0x00000067: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CONFIG_INITIALIZATION_FAILED\n"); break; + case 0x00000068: cchUsed = RTStrPrintf(pszDetails, cbDetails, "FILE_INITIALIZATION_FAILED\n"); break; + case 0x00000069: cchUsed = RTStrPrintf(pszDetails, cbDetails, "IO1_INITIALIZATION_FAILED\n"); break; + case 0x0000006a: cchUsed = RTStrPrintf(pszDetails, cbDetails, "LPC_INITIALIZATION_FAILED\n"); break; + case 0x0000006b: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PROCESS1_INITIALIZATION_FAILED\n"); break; + case 0x0000006c: cchUsed = RTStrPrintf(pszDetails, cbDetails, "REFMON_INITIALIZATION_FAILED\n"); break; + case 0x0000006d: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SESSION1_INITIALIZATION_FAILED\n"); break; + case 0x0000006e: cchUsed = RTStrPrintf(pszDetails, cbDetails, "BOOTPROC_INITIALIZATION_FAILED\n"); break; + case 0x0000006f: cchUsed = RTStrPrintf(pszDetails, cbDetails, "VSL_INITIALIZATION_FAILED\n"); break; + case 0x00000070: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SOFT_RESTART_FATAL_ERROR\n"); break; + case 0x00000072: cchUsed = RTStrPrintf(pszDetails, cbDetails, "ASSIGN_DRIVE_LETTERS_FAILED\n"); break; + case 0x00000073: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CONFIG_LIST_FAILED\n"); break; + case 0x00000074: cchUsed = RTStrPrintf(pszDetails, cbDetails, "BAD_SYSTEM_CONFIG_INFO\n"); break; + case 0x00000075: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CANNOT_WRITE_CONFIGURATION\n"); break; + case 0x00000076: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PROCESS_HAS_LOCKED_PAGES\n"); break; + case 0x00000077: cchUsed = RTStrPrintf(pszDetails, cbDetails, "KERNEL_STACK_INPAGE_ERROR\n"); break; + case 0x00000078: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PHASE0_EXCEPTION\n"); break; + case 0x00000079: cchUsed = RTStrPrintf(pszDetails, cbDetails, "MISMATCHED_HAL\n"); break; + case 0x0000007a: cchUsed = RTStrPrintf(pszDetails, cbDetails, "KERNEL_DATA_INPAGE_ERROR\n"); break; + case 0x0000007b: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INACCESSIBLE_BOOT_DEVICE\n"); break; + case 0x0000007c: cchUsed = RTStrPrintf(pszDetails, cbDetails, "BUGCODE_NDIS_DRIVER\n"); break; + case 0x0000007d: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INSTALL_MORE_MEMORY\n"); break; + case 0x0000007e: /* SYSTEM_THREAD_EXCEPTION_NOT_HANDLED */ + case 0x1000007e: /* SYSTEM_THREAD_EXCEPTION_NOT_HANDLED_M */ + cchUsed = RTStrPrintf(pszDetails, cbDetails, + "SYSTEM_THREAD_EXCEPTION_NOT_HANDLED%s\n" + "P1: %016RX64 - exception code%s\n" + "P2: %016RX64 - EIP/RIP%s\n" + "P3: %016RX64 - Xcpt address\n" + "P4: %016RX64 - Context address\n", + uBugCheck & 0x10000000 ? "_M" : "", uP1, dbgfR3GetNtStatusName((uint32_t)uP1), + uP2, dbgfR3FormatSymbol(pUVM, szSym, sizeof(szSym), ": ", uP2), + uP3, uP4); + break; + case 0x0000007f: /* UNEXPECTED_KERNEL_MODE_TRAP */ + case 0x1000007f: /* UNEXPECTED_KERNEL_MODE_TRAP_M */ + cchUsed = RTStrPrintf(pszDetails, cbDetails, + "UNEXPECTED_KERNEL_MODE_TRAP%s\n" + "P1: %016RX64 - x86 trap number\n" + "P2: %016RX64 - reserved/errorcode?\n" + "P3: %016RX64 - reserved\n" + "P4: %016RX64 - reserved\n", + uBugCheck & 0x10000000 ? "_M" : "", uP1, uP2, uP3, uP4); + break; + case 0x00000080: cchUsed = RTStrPrintf(pszDetails, cbDetails, "NMI_HARDWARE_FAILURE\n"); break; + case 0x00000081: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SPIN_LOCK_INIT_FAILURE\n"); break; + case 0x00000082: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DFS_FILE_SYSTEM\n"); break; + case 0x00000083: cchUsed = RTStrPrintf(pszDetails, cbDetails, "OFS_FILE_SYSTEM\n"); break; + case 0x00000084: cchUsed = RTStrPrintf(pszDetails, cbDetails, "RECOM_DRIVER\n"); break; + case 0x00000085: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SETUP_FAILURE\n"); break; + case 0x00000086: cchUsed = RTStrPrintf(pszDetails, cbDetails, "AUDIT_FAILURE\n"); break; + case 0x0000008b: cchUsed = RTStrPrintf(pszDetails, cbDetails, "MBR_CHECKSUM_MISMATCH\n"); break; + case 0x0000008e: /* KERNEL_MODE_EXCEPTION_NOT_HANDLED */ + case 0x1000008e: /* KERNEL_MODE_EXCEPTION_NOT_HANDLED_M */ + cchUsed = RTStrPrintf(pszDetails, cbDetails, + "KERNEL_MODE_EXCEPTION_NOT_HANDLED%s\n" + "P1: %016RX64 - exception code%s\n" + "P2: %016RX64 - EIP/RIP%s\n" + "P3: %016RX64 - Trap frame address\n" + "P4: %016RX64 - reserved\n", + uBugCheck & 0x10000000 ? "_M" : "", uP1, dbgfR3GetNtStatusName((uint32_t)uP1), + uP2, dbgfR3FormatSymbol(pUVM, szSym, sizeof(szSym), ": ", uP2), + uP3, uP4); + break; + case 0x0000008f: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PP0_INITIALIZATION_FAILED\n"); break; + case 0x00000090: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PP1_INITIALIZATION_FAILED\n"); break; + case 0x00000091: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WIN32K_INIT_OR_RIT_FAILURE\n"); break; + case 0x00000092: cchUsed = RTStrPrintf(pszDetails, cbDetails, "UP_DRIVER_ON_MP_SYSTEM\n"); break; + case 0x00000093: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INVALID_KERNEL_HANDLE\n"); break; + case 0x00000094: cchUsed = RTStrPrintf(pszDetails, cbDetails, "KERNEL_STACK_LOCKED_AT_EXIT\n"); break; + case 0x00000095: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PNP_INTERNAL_ERROR\n"); break; + case 0x00000096: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INVALID_WORK_QUEUE_ITEM\n"); break; + case 0x00000097: cchUsed = RTStrPrintf(pszDetails, cbDetails, "BOUND_IMAGE_UNSUPPORTED\n"); break; + case 0x00000098: cchUsed = RTStrPrintf(pszDetails, cbDetails, "END_OF_NT_EVALUATION_PERIOD\n"); break; + case 0x00000099: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INVALID_REGION_OR_SEGMENT\n"); break; + case 0x0000009a: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SYSTEM_LICENSE_VIOLATION\n"); break; + case 0x0000009b: cchUsed = RTStrPrintf(pszDetails, cbDetails, "UDFS_FILE_SYSTEM\n"); break; + case 0x0000009c: cchUsed = RTStrPrintf(pszDetails, cbDetails, "MACHINE_CHECK_EXCEPTION\n"); break; + case 0x0000009e: cchUsed = RTStrPrintf(pszDetails, cbDetails, "USER_MODE_HEALTH_MONITOR\n"); break; + case 0x0000009f: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DRIVER_POWER_STATE_FAILURE\n"); break; + case 0x000000a0: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INTERNAL_POWER_ERROR\n"); break; + case 0x000000a1: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PCI_BUS_DRIVER_INTERNAL\n"); break; + case 0x000000a2: cchUsed = RTStrPrintf(pszDetails, cbDetails, "MEMORY_IMAGE_CORRUPT\n"); break; + case 0x000000a3: cchUsed = RTStrPrintf(pszDetails, cbDetails, "ACPI_DRIVER_INTERNAL\n"); break; + case 0x000000a4: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CNSS_FILE_SYSTEM_FILTER\n"); break; + case 0x000000a5: cchUsed = RTStrPrintf(pszDetails, cbDetails, "ACPI_BIOS_ERROR\n"); break; + case 0x000000a6: cchUsed = RTStrPrintf(pszDetails, cbDetails, "FP_EMULATION_ERROR\n"); break; + case 0x000000a7: cchUsed = RTStrPrintf(pszDetails, cbDetails, "BAD_EXHANDLE\n"); break; + case 0x000000a8: cchUsed = RTStrPrintf(pszDetails, cbDetails, "BOOTING_IN_SAFEMODE_MINIMAL\n"); break; + case 0x000000a9: cchUsed = RTStrPrintf(pszDetails, cbDetails, "BOOTING_IN_SAFEMODE_NETWORK\n"); break; + case 0x000000aa: cchUsed = RTStrPrintf(pszDetails, cbDetails, "BOOTING_IN_SAFEMODE_DSREPAIR\n"); break; + case 0x000000ab: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SESSION_HAS_VALID_POOL_ON_EXIT\n"); break; + case 0x000000ac: cchUsed = RTStrPrintf(pszDetails, cbDetails, "HAL_MEMORY_ALLOCATION\n"); break; + case 0x000000b1: cchUsed = RTStrPrintf(pszDetails, cbDetails, "BGI_DETECTED_VIOLATION\n"); break; + case 0x000000b4: cchUsed = RTStrPrintf(pszDetails, cbDetails, "VIDEO_DRIVER_INIT_FAILURE\n"); break; + case 0x000000b5: cchUsed = RTStrPrintf(pszDetails, cbDetails, "BOOTLOG_LOADED\n"); break; + case 0x000000b6: cchUsed = RTStrPrintf(pszDetails, cbDetails, "BOOTLOG_NOT_LOADED\n"); break; + case 0x000000b7: cchUsed = RTStrPrintf(pszDetails, cbDetails, "BOOTLOG_ENABLED\n"); break; + case 0x000000b8: cchUsed = RTStrPrintf(pszDetails, cbDetails, "ATTEMPTED_SWITCH_FROM_DPC\n"); break; + case 0x000000b9: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CHIPSET_DETECTED_ERROR\n"); break; + case 0x000000ba: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SESSION_HAS_VALID_VIEWS_ON_EXIT\n"); break; + case 0x000000bb: cchUsed = RTStrPrintf(pszDetails, cbDetails, "NETWORK_BOOT_INITIALIZATION_FAILED\n"); break; + case 0x000000bc: cchUsed = RTStrPrintf(pszDetails, cbDetails, "NETWORK_BOOT_DUPLICATE_ADDRESS\n"); break; + case 0x000000bd: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INVALID_HIBERNATED_STATE\n"); break; + case 0x000000be: cchUsed = RTStrPrintf(pszDetails, cbDetails, "ATTEMPTED_WRITE_TO_READONLY_MEMORY\n"); break; + case 0x000000bf: cchUsed = RTStrPrintf(pszDetails, cbDetails, "MUTEX_ALREADY_OWNED\n"); break; + case 0x000000c0: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PCI_CONFIG_SPACE_ACCESS_FAILURE\n"); break; + case 0x000000c1: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SPECIAL_POOL_DETECTED_MEMORY_CORRUPTION\n"); break; + + case 0x000000c2: + cchUsed = RTStrPrintf(pszDetails, cbDetails, + "BAD_POOL_CALLER\n" + "P1: %016RX64 - ", uP1); + if (cchUsed >= cbDetails) + return VINF_BUFFER_OVERFLOW; + cbDetails -= cchUsed; + pszDetails += cchUsed; + switch (uP1) + { + case 1: + case 2: + case 4: + cchUsed = RTStrPrintf(pszDetails, cbDetails, + "Pool header corrupted!\n" + "P2: %016RX64 - Pool header address\n" + "P3: %016RX64 - Pool header contents\n" + "P4: %016RX64 - reserved\n", uP2, uP3, uP4); + break; + case 6: + cchUsed = RTStrPrintf(pszDetails, cbDetails, + "Double free w/o tag!\n" + "P2: %016RX64 - reserved\n" + "P3: %016RX64 - Pool header address\n" + "P4: %016RX64 - Pool header contents\n", uP2, uP3, uP4); + break; + case 7: + cchUsed = RTStrPrintf(pszDetails, cbDetails, + "Double free w/ tag!\n" + "P2: %016RX64 - tag %c%c%c%c\n" + "P3: %016RX64 - Pool header contents\n" + "P4: %016RX64 - Free address\n", + uP2, + RT_C_IS_PRINT(RT_BYTE1(uP2)) ? RT_BYTE1(uP2) : '.', + RT_C_IS_PRINT(RT_BYTE2(uP2)) ? RT_BYTE2(uP2) : '.', + RT_C_IS_PRINT(RT_BYTE3(uP2)) ? RT_BYTE3(uP2) : '.', + RT_C_IS_PRINT(RT_BYTE4(uP2)) ? RT_BYTE4(uP2) : '.', + uP3, uP4); + break; + case 8: + cchUsed = RTStrPrintf(pszDetails, cbDetails, + "Wrong IRQL for allocation!\n" + "P2: %016RX64 - IRQL\n" + "P3: %016RX64 - Pool type\n" + "P4: %016RX64 - Allocation size\n", + uP2, uP3, uP4); + break; + case 9: + cchUsed = RTStrPrintf(pszDetails, cbDetails, + "Wrong IRQL for free!\n" + "P2: %016RX64 - IRQL\n" + "P3: %016RX64 - Pool type\n" + "P4: %016RX64 - Pool address\n", + uP2, uP3, uP4); + break; + /** @todo fill in more BAD_POOL_CALLER types here as needed.*/ + default: + cchUsed = RTStrPrintf(pszDetails, cbDetails, + "Unknown pool violation type\n" + "P2: %016RX64 - type specific\n" + "P3: %016RX64 - type specific\n" + "P4: %016RX64 - type specific\n", + uP2, uP3, uP4); + break; + } + break; + + case 0x000000c3: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SYSTEM_IMAGE_BAD_SIGNATURE\n"); break; + case 0x000000c4: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DRIVER_VERIFIER_DETECTED_VIOLATION\n"); break; + case 0x000000c5: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DRIVER_CORRUPTED_EXPOOL\n"); break; + case 0x000000c6: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DRIVER_CAUGHT_MODIFYING_FREED_POOL\n"); break; + case 0x000000c7: cchUsed = RTStrPrintf(pszDetails, cbDetails, "TIMER_OR_DPC_INVALID\n"); break; + case 0x000000c8: cchUsed = RTStrPrintf(pszDetails, cbDetails, "IRQL_UNEXPECTED_VALUE\n"); break; + case 0x000000c9: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DRIVER_VERIFIER_IOMANAGER_VIOLATION\n"); break; + case 0x000000ca: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PNP_DETECTED_FATAL_ERROR\n"); break; + case 0x000000cb: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DRIVER_LEFT_LOCKED_PAGES_IN_PROCESS\n"); break; + case 0x000000cc: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PAGE_FAULT_IN_FREED_SPECIAL_POOL\n"); break; + case 0x000000cd: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PAGE_FAULT_BEYOND_END_OF_ALLOCATION\n"); break; + case 0x000000ce: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DRIVER_UNLOADED_WITHOUT_CANCELLING_PENDING_OPERATIONS\n"); break; + case 0x000000cf: cchUsed = RTStrPrintf(pszDetails, cbDetails, "TERMINAL_SERVER_DRIVER_MADE_INCORRECT_MEMORY_REFERENCE\n"); break; + case 0x000000d0: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DRIVER_CORRUPTED_MMPOOL\n"); break; + case 0x000000d1: + cchUsed = RTStrPrintf(pszDetails, cbDetails, + "DRIVER_IRQL_NOT_LESS_OR_EQUAL\n" + "P1: %016RX64 - memory referenced\n" + "P2: %016RX64 - IRQL\n" + "P3: %016RX64 - %s\n" + "P4: %016RX64 - EIP/RIP%s\n", + uP1, uP2, uP3, uP3 & RT_BIT_64(0) ? "write" : "read", + uP4, dbgfR3FormatSymbol(pUVM, szSym, sizeof(szSym), ": ", uP4)); + break; + case 0x000000d2: cchUsed = RTStrPrintf(pszDetails, cbDetails, "BUGCODE_ID_DRIVER\n"); break; + case 0x000000d3: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DRIVER_PORTION_MUST_BE_NONPAGED\n"); break; + case 0x000000d4: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SYSTEM_SCAN_AT_RAISED_IRQL_CAUGHT_IMPROPER_DRIVER_UNLOAD\n"); break; + case 0x000000d5: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DRIVER_PAGE_FAULT_IN_FREED_SPECIAL_POOL\n"); break; + case 0x000000d6: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DRIVER_PAGE_FAULT_BEYOND_END_OF_ALLOCATION\n"); break; + case 0x100000d6: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DRIVER_PAGE_FAULT_BEYOND_END_OF_ALLOCATION_M\n"); break; + case 0x000000d7: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DRIVER_UNMAPPING_INVALID_VIEW\n"); break; + case 0x000000d8: + cchUsed = RTStrPrintf(pszDetails, cbDetails, + "DRIVER_USED_EXCESSIVE_PTES\n" + "P1: %016RX64 - Driver name pointer\n" + "P2: %016RX64 - Number of PTEs\n" + "P3: %016RX64 - Free system PTEs\n" + "P4: %016RX64 - System PTEs\n", + uP1, uP2, uP3, uP4); + break; + case 0x000000d9: cchUsed = RTStrPrintf(pszDetails, cbDetails, "LOCKED_PAGES_TRACKER_CORRUPTION\n"); break; + case 0x000000da: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SYSTEM_PTE_MISUSE\n"); break; + case 0x000000db: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DRIVER_CORRUPTED_SYSPTES\n"); break; + case 0x000000dc: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DRIVER_INVALID_STACK_ACCESS\n"); break; + case 0x000000de: cchUsed = RTStrPrintf(pszDetails, cbDetails, "POOL_CORRUPTION_IN_FILE_AREA\n"); break; + case 0x000000df: cchUsed = RTStrPrintf(pszDetails, cbDetails, "IMPERSONATING_WORKER_THREAD\n"); break; + case 0x000000e0: cchUsed = RTStrPrintf(pszDetails, cbDetails, "ACPI_BIOS_FATAL_ERROR\n"); break; + case 0x000000e1: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WORKER_THREAD_RETURNED_AT_BAD_IRQL\n"); break; + case 0x000000e2: cchUsed = RTStrPrintf(pszDetails, cbDetails, "MANUALLY_INITIATED_CRASH\n"); break; + case 0x000000e3: cchUsed = RTStrPrintf(pszDetails, cbDetails, "RESOURCE_NOT_OWNED\n"); break; + case 0x000000e4: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WORKER_INVALID\n"); break; + case 0x000000e5: cchUsed = RTStrPrintf(pszDetails, cbDetails, "POWER_FAILURE_SIMULATE\n"); break; + case 0x000000e6: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DRIVER_VERIFIER_DMA_VIOLATION\n"); break; + case 0x000000e7: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INVALID_FLOATING_POINT_STATE\n"); break; + case 0x000000e8: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INVALID_CANCEL_OF_FILE_OPEN\n"); break; + case 0x000000e9: cchUsed = RTStrPrintf(pszDetails, cbDetails, "ACTIVE_EX_WORKER_THREAD_TERMINATION\n"); break; + case 0x000000ea: cchUsed = RTStrPrintf(pszDetails, cbDetails, "THREAD_STUCK_IN_DEVICE_DRIVER\n"); break; + case 0x100000ea: cchUsed = RTStrPrintf(pszDetails, cbDetails, "THREAD_STUCK_IN_DEVICE_DRIVER_M\n"); break; + case 0x000000eb: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DIRTY_MAPPED_PAGES_CONGESTION\n"); break; + case 0x000000ec: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SESSION_HAS_VALID_SPECIAL_POOL_ON_EXIT\n"); break; + case 0x000000ed: cchUsed = RTStrPrintf(pszDetails, cbDetails, "UNMOUNTABLE_BOOT_VOLUME\n"); break; + case 0x000000ef: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CRITICAL_PROCESS_DIED\n"); break; + case 0x000000f0: cchUsed = RTStrPrintf(pszDetails, cbDetails, "STORAGE_MINIPORT_ERROR\n"); break; + case 0x000000f1: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SCSI_VERIFIER_DETECTED_VIOLATION\n"); break; + case 0x000000f2: cchUsed = RTStrPrintf(pszDetails, cbDetails, "HARDWARE_INTERRUPT_STORM\n"); break; + case 0x000000f3: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DISORDERLY_SHUTDOWN\n"); break; + case 0x000000f4: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CRITICAL_OBJECT_TERMINATION\n"); break; + case 0x000000f5: cchUsed = RTStrPrintf(pszDetails, cbDetails, "FLTMGR_FILE_SYSTEM\n"); break; + case 0x000000f6: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PCI_VERIFIER_DETECTED_VIOLATION\n"); break; + case 0x000000f7: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DRIVER_OVERRAN_STACK_BUFFER\n"); break; + case 0x000000f8: cchUsed = RTStrPrintf(pszDetails, cbDetails, "RAMDISK_BOOT_INITIALIZATION_FAILED\n"); break; + case 0x000000f9: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DRIVER_RETURNED_STATUS_REPARSE_FOR_VOLUME_OPEN\n"); break; + case 0x000000fa: cchUsed = RTStrPrintf(pszDetails, cbDetails, "HTTP_DRIVER_CORRUPTED\n"); break; + case 0x000000fb: cchUsed = RTStrPrintf(pszDetails, cbDetails, "RECURSIVE_MACHINE_CHECK\n"); break; + case 0x000000fc: cchUsed = RTStrPrintf(pszDetails, cbDetails, "ATTEMPTED_EXECUTE_OF_NOEXECUTE_MEMORY\n"); break; + case 0x000000fd: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DIRTY_NOWRITE_PAGES_CONGESTION\n"); break; + case 0x000000fe: cchUsed = RTStrPrintf(pszDetails, cbDetails, "BUGCODE_USB_DRIVER\n"); break; + case 0x000000ff: cchUsed = RTStrPrintf(pszDetails, cbDetails, "RESERVE_QUEUE_OVERFLOW\n"); break; + case 0x00000100: cchUsed = RTStrPrintf(pszDetails, cbDetails, "LOADER_BLOCK_MISMATCH\n"); break; + case 0x00000101: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CLOCK_WATCHDOG_TIMEOUT\n"); break; + case 0x00000102: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DPC_WATCHDOG_TIMEOUT\n"); break; + case 0x00000103: cchUsed = RTStrPrintf(pszDetails, cbDetails, "MUP_FILE_SYSTEM\n"); break; + case 0x00000104: cchUsed = RTStrPrintf(pszDetails, cbDetails, "AGP_INVALID_ACCESS\n"); break; + case 0x00000105: cchUsed = RTStrPrintf(pszDetails, cbDetails, "AGP_GART_CORRUPTION\n"); break; + case 0x00000106: cchUsed = RTStrPrintf(pszDetails, cbDetails, "AGP_ILLEGALLY_REPROGRAMMED\n"); break; + case 0x00000107: cchUsed = RTStrPrintf(pszDetails, cbDetails, "KERNEL_EXPAND_STACK_ACTIVE\n"); break; + case 0x00000108: cchUsed = RTStrPrintf(pszDetails, cbDetails, "THIRD_PARTY_FILE_SYSTEM_FAILURE\n"); break; + case 0x00000109: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CRITICAL_STRUCTURE_CORRUPTION\n"); break; + case 0x0000010a: cchUsed = RTStrPrintf(pszDetails, cbDetails, "APP_TAGGING_INITIALIZATION_FAILED\n"); break; + case 0x0000010b: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DFSC_FILE_SYSTEM\n"); break; + case 0x0000010c: cchUsed = RTStrPrintf(pszDetails, cbDetails, "FSRTL_EXTRA_CREATE_PARAMETER_VIOLATION\n"); break; + case 0x0000010d: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WDF_VIOLATION\n"); break; + case 0x0000010e: cchUsed = RTStrPrintf(pszDetails, cbDetails, "VIDEO_MEMORY_MANAGEMENT_INTERNAL\n"); break; + case 0x00000110: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DRIVER_INVALID_CRUNTIME_PARAMETER\n"); break; + case 0x00000111: cchUsed = RTStrPrintf(pszDetails, cbDetails, "RECURSIVE_NMI\n"); break; + case 0x00000112: cchUsed = RTStrPrintf(pszDetails, cbDetails, "MSRPC_STATE_VIOLATION\n"); break; + case 0x00000113: cchUsed = RTStrPrintf(pszDetails, cbDetails, "VIDEO_DXGKRNL_FATAL_ERROR\n"); break; + case 0x00000114: cchUsed = RTStrPrintf(pszDetails, cbDetails, "VIDEO_SHADOW_DRIVER_FATAL_ERROR\n"); break; + case 0x00000115: cchUsed = RTStrPrintf(pszDetails, cbDetails, "AGP_INTERNAL\n"); break; + case 0x00000116: cchUsed = RTStrPrintf(pszDetails, cbDetails, "VIDEO_TDR_FAILURE\n"); break; + case 0x00000117: cchUsed = RTStrPrintf(pszDetails, cbDetails, "VIDEO_TDR_TIMEOUT_DETECTED\n"); break; + case 0x00000118: cchUsed = RTStrPrintf(pszDetails, cbDetails, "NTHV_GUEST_ERROR\n"); break; + case 0x00000119: cchUsed = RTStrPrintf(pszDetails, cbDetails, "VIDEO_SCHEDULER_INTERNAL_ERROR\n"); break; + case 0x0000011a: cchUsed = RTStrPrintf(pszDetails, cbDetails, "EM_INITIALIZATION_ERROR\n"); break; + case 0x0000011b: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DRIVER_RETURNED_HOLDING_CANCEL_LOCK\n"); break; + case 0x0000011c: cchUsed = RTStrPrintf(pszDetails, cbDetails, "ATTEMPTED_WRITE_TO_CM_PROTECTED_STORAGE\n"); break; + case 0x0000011d: cchUsed = RTStrPrintf(pszDetails, cbDetails, "EVENT_TRACING_FATAL_ERROR\n"); break; + case 0x0000011e: cchUsed = RTStrPrintf(pszDetails, cbDetails, "TOO_MANY_RECURSIVE_FAULTS\n"); break; + case 0x0000011f: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INVALID_DRIVER_HANDLE\n"); break; + case 0x00000120: cchUsed = RTStrPrintf(pszDetails, cbDetails, "BITLOCKER_FATAL_ERROR\n"); break; + case 0x00000121: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DRIVER_VIOLATION\n"); break; + case 0x00000122: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WHEA_INTERNAL_ERROR\n"); break; + case 0x00000123: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CRYPTO_SELF_TEST_FAILURE\n"); break; + case 0x00000124: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WHEA_UNCORRECTABLE_ERROR\n"); break; + case 0x00000125: cchUsed = RTStrPrintf(pszDetails, cbDetails, "NMR_INVALID_STATE\n"); break; + case 0x00000126: cchUsed = RTStrPrintf(pszDetails, cbDetails, "NETIO_INVALID_POOL_CALLER\n"); break; + case 0x00000127: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PAGE_NOT_ZERO\n"); break; + case 0x00000128: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WORKER_THREAD_RETURNED_WITH_BAD_IO_PRIORITY\n"); break; + case 0x00000129: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WORKER_THREAD_RETURNED_WITH_BAD_PAGING_IO_PRIORITY\n"); break; + case 0x0000012a: cchUsed = RTStrPrintf(pszDetails, cbDetails, "MUI_NO_VALID_SYSTEM_LANGUAGE\n"); break; + case 0x0000012b: cchUsed = RTStrPrintf(pszDetails, cbDetails, "FAULTY_HARDWARE_CORRUPTED_PAGE\n"); break; + case 0x0000012c: cchUsed = RTStrPrintf(pszDetails, cbDetails, "EXFAT_FILE_SYSTEM\n"); break; + case 0x0000012d: cchUsed = RTStrPrintf(pszDetails, cbDetails, "VOLSNAP_OVERLAPPED_TABLE_ACCESS\n"); break; + case 0x0000012e: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INVALID_MDL_RANGE\n"); break; + case 0x0000012f: cchUsed = RTStrPrintf(pszDetails, cbDetails, "VHD_BOOT_INITIALIZATION_FAILED\n"); break; + case 0x00000130: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DYNAMIC_ADD_PROCESSOR_MISMATCH\n"); break; + case 0x00000131: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INVALID_EXTENDED_PROCESSOR_STATE\n"); break; + case 0x00000132: cchUsed = RTStrPrintf(pszDetails, cbDetails, "RESOURCE_OWNER_POINTER_INVALID\n"); break; + case 0x00000133: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DPC_WATCHDOG_VIOLATION\n"); break; + case 0x00000134: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DRIVE_EXTENDER\n"); break; + case 0x00000135: cchUsed = RTStrPrintf(pszDetails, cbDetails, "REGISTRY_FILTER_DRIVER_EXCEPTION\n"); break; + case 0x00000136: cchUsed = RTStrPrintf(pszDetails, cbDetails, "VHD_BOOT_HOST_VOLUME_NOT_ENOUGH_SPACE\n"); break; + case 0x00000137: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WIN32K_HANDLE_MANAGER\n"); break; + case 0x00000138: cchUsed = RTStrPrintf(pszDetails, cbDetails, "GPIO_CONTROLLER_DRIVER_ERROR\n"); break; + + case 0x00000139: + { + const char *pszCheck; + switch (uP1) + { + case 0x00: pszCheck = "Stack buffer overrun (/GS)"; break; + case 0x01: pszCheck = "Illegal virtual function table use (VTGuard)"; break; + case 0x02: pszCheck = "Stack buffer overrun (via cookie)"; break; + case 0x03: pszCheck = "Correupt LIST_ENTRY"; break; + case 0x04: pszCheck = "Out of bounds stack pointer"; break; + case 0x05: pszCheck = "Invalid parameter (fatal)"; break; + case 0x06: pszCheck = "Uninitialized stack cookie (by loader prior to Win8)"; break; + case 0x07: pszCheck = "Fatal program exit request"; break; + case 0x08: pszCheck = "Compiler bounds check violation"; break; + case 0x09: pszCheck = "Direct RtlQueryRegistryValues w/o typechecking on untrusted hive"; break; + /* https://docs.microsoft.com/en-us/windows-hardware/drivers/debugger/bug-check---bug-check-0x139-kernel-security-check-failure + and !analyze -show differs on the following: */ + case 0x0a: case 0x0b: case 0x0c: case 0x0d: case 0x0e: + case 0x0f: pszCheck = "Memory safety violation [?]"; break; + case 0x10: pszCheck = "Invalid indirect call (indirect call guard) [?]"; break; + case 0x11: pszCheck = "Invalid memory write (write guard) [?]"; break; + case 0x12: pszCheck = "Invalid target context for fiber switch [?]"; break; + /** @todo there are lots more... */ + default: pszCheck = "Todo/Unknown"; break; + } + cchUsed = RTStrPrintf(pszDetails, cbDetails, + "KERNEL_SECURITY_CHECK_FAILURE\n" + "P1: %016RX64 - %s!\n" + "P2: %016RX64 - Trap frame address\n" + "P3: %016RX64 - Exception record\n" + "P4: %016RX64 - reserved\n", uP1, pszCheck, uP2, uP3, uP4); + break; + } + + case 0x0000013a: cchUsed = RTStrPrintf(pszDetails, cbDetails, "KERNEL_MODE_HEAP_CORRUPTION\n"); break; + case 0x0000013b: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PASSIVE_INTERRUPT_ERROR\n"); break; + case 0x0000013c: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INVALID_IO_BOOST_STATE\n"); break; + case 0x0000013d: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CRITICAL_INITIALIZATION_FAILURE\n"); break; + case 0x0000013e: cchUsed = RTStrPrintf(pszDetails, cbDetails, "ERRATA_WORKAROUND_UNSUCCESSFUL\n"); break; + case 0x00000140: cchUsed = RTStrPrintf(pszDetails, cbDetails, "STORAGE_DEVICE_ABNORMALITY_DETECTED\n"); break; + case 0x00000141: cchUsed = RTStrPrintf(pszDetails, cbDetails, "VIDEO_ENGINE_TIMEOUT_DETECTED\n"); break; + case 0x00000142: cchUsed = RTStrPrintf(pszDetails, cbDetails, "VIDEO_TDR_APPLICATION_BLOCKED\n"); break; + case 0x00000143: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PROCESSOR_DRIVER_INTERNAL\n"); break; + case 0x00000144: cchUsed = RTStrPrintf(pszDetails, cbDetails, "BUGCODE_USB3_DRIVER\n"); break; + case 0x00000145: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SECURE_BOOT_VIOLATION\n"); break; + case 0x00000146: cchUsed = RTStrPrintf(pszDetails, cbDetails, "NDIS_NET_BUFFER_LIST_INFO_ILLEGALLY_TRANSFERRED\n"); break; + case 0x00000147: cchUsed = RTStrPrintf(pszDetails, cbDetails, "ABNORMAL_RESET_DETECTED\n"); break; + case 0x00000148: cchUsed = RTStrPrintf(pszDetails, cbDetails, "IO_OBJECT_INVALID\n"); break; + case 0x00000149: cchUsed = RTStrPrintf(pszDetails, cbDetails, "REFS_FILE_SYSTEM\n"); break; + case 0x0000014a: cchUsed = RTStrPrintf(pszDetails, cbDetails, "KERNEL_WMI_INTERNAL\n"); break; + case 0x0000014b: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SOC_SUBSYSTEM_FAILURE\n"); break; + case 0x0000014c: cchUsed = RTStrPrintf(pszDetails, cbDetails, "FATAL_ABNORMAL_RESET_ERROR\n"); break; + case 0x0000014d: cchUsed = RTStrPrintf(pszDetails, cbDetails, "EXCEPTION_SCOPE_INVALID\n"); break; + case 0x0000014e: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SOC_CRITICAL_DEVICE_REMOVED\n"); break; + case 0x0000014f: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PDC_WATCHDOG_TIMEOUT\n"); break; + case 0x00000150: cchUsed = RTStrPrintf(pszDetails, cbDetails, "TCPIP_AOAC_NIC_ACTIVE_REFERENCE_LEAK\n"); break; + case 0x00000151: cchUsed = RTStrPrintf(pszDetails, cbDetails, "UNSUPPORTED_INSTRUCTION_MODE\n"); break; + case 0x00000152: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INVALID_PUSH_LOCK_FLAGS\n"); break; + case 0x00000153: cchUsed = RTStrPrintf(pszDetails, cbDetails, "KERNEL_LOCK_ENTRY_LEAKED_ON_THREAD_TERMINATION\n"); break; + case 0x00000154: cchUsed = RTStrPrintf(pszDetails, cbDetails, "UNEXPECTED_STORE_EXCEPTION\n"); break; + case 0x00000155: cchUsed = RTStrPrintf(pszDetails, cbDetails, "OS_DATA_TAMPERING\n"); break; + case 0x00000156: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WINSOCK_DETECTED_HUNG_CLOSESOCKET_LIVEDUMP\n"); break; + case 0x00000157: cchUsed = RTStrPrintf(pszDetails, cbDetails, "KERNEL_THREAD_PRIORITY_FLOOR_VIOLATION\n"); break; + case 0x00000158: cchUsed = RTStrPrintf(pszDetails, cbDetails, "ILLEGAL_IOMMU_PAGE_FAULT\n"); break; + case 0x00000159: cchUsed = RTStrPrintf(pszDetails, cbDetails, "HAL_ILLEGAL_IOMMU_PAGE_FAULT\n"); break; + case 0x0000015a: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SDBUS_INTERNAL_ERROR\n"); break; + case 0x0000015b: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WORKER_THREAD_RETURNED_WITH_SYSTEM_PAGE_PRIORITY_ACTIVE\n"); break; + case 0x0000015c: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PDC_WATCHDOG_TIMEOUT_LIVEDUMP\n"); break; + case 0x0000015d: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SOC_SUBSYSTEM_FAILURE_LIVEDUMP\n"); break; + case 0x0000015e: cchUsed = RTStrPrintf(pszDetails, cbDetails, "BUGCODE_NDIS_DRIVER_LIVE_DUMP\n"); break; + case 0x0000015f: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CONNECTED_STANDBY_WATCHDOG_TIMEOUT_LIVEDUMP\n"); break; + case 0x00000160: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WIN32K_ATOMIC_CHECK_FAILURE\n"); break; + case 0x00000161: cchUsed = RTStrPrintf(pszDetails, cbDetails, "LIVE_SYSTEM_DUMP\n"); break; + case 0x00000162: cchUsed = RTStrPrintf(pszDetails, cbDetails, "KERNEL_AUTO_BOOST_INVALID_LOCK_RELEASE\n"); break; + case 0x00000163: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WORKER_THREAD_TEST_CONDITION\n"); break; + case 0x00000164: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WIN32K_CRITICAL_FAILURE\n"); break; + case 0x00000165: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CLUSTER_CSV_STATUS_IO_TIMEOUT_LIVEDUMP\n"); break; + case 0x00000166: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CLUSTER_RESOURCE_CALL_TIMEOUT_LIVEDUMP\n"); break; + case 0x00000167: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CLUSTER_CSV_SNAPSHOT_DEVICE_INFO_TIMEOUT_LIVEDUMP\n"); break; + case 0x00000168: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CLUSTER_CSV_STATE_TRANSITION_TIMEOUT_LIVEDUMP\n"); break; + case 0x00000169: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CLUSTER_CSV_VOLUME_ARRIVAL_LIVEDUMP\n"); break; + case 0x0000016a: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CLUSTER_CSV_VOLUME_REMOVAL_LIVEDUMP\n"); break; + case 0x0000016b: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CLUSTER_CSV_CLUSTER_WATCHDOG_LIVEDUMP\n"); break; + case 0x0000016c: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INVALID_RUNDOWN_PROTECTION_FLAGS\n"); break; + case 0x0000016d: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INVALID_SLOT_ALLOCATOR_FLAGS\n"); break; + case 0x0000016e: cchUsed = RTStrPrintf(pszDetails, cbDetails, "ERESOURCE_INVALID_RELEASE\n"); break; + case 0x0000016f: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CLUSTER_CSV_STATE_TRANSITION_INTERVAL_TIMEOUT_LIVEDUMP\n"); break; + case 0x00000170: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CLUSTER_CSV_CLUSSVC_DISCONNECT_WATCHDOG\n"); break; + case 0x00000171: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CRYPTO_LIBRARY_INTERNAL_ERROR\n"); break; + case 0x00000173: cchUsed = RTStrPrintf(pszDetails, cbDetails, "COREMSGCALL_INTERNAL_ERROR\n"); break; + case 0x00000174: cchUsed = RTStrPrintf(pszDetails, cbDetails, "COREMSG_INTERNAL_ERROR\n"); break; + case 0x00000175: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PREVIOUS_FATAL_ABNORMAL_RESET_ERROR\n"); break; + case 0x00000178: cchUsed = RTStrPrintf(pszDetails, cbDetails, "ELAM_DRIVER_DETECTED_FATAL_ERROR\n"); break; + case 0x00000179: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CLUSTER_CLUSPORT_STATUS_IO_TIMEOUT_LIVEDUMP\n"); break; + case 0x0000017b: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PROFILER_CONFIGURATION_ILLEGAL\n"); break; + case 0x0000017c: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PDC_LOCK_WATCHDOG_LIVEDUMP\n"); break; + case 0x0000017d: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PDC_UNEXPECTED_REVOCATION_LIVEDUMP\n"); break; + case 0x00000180: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WVR_LIVEDUMP_REPLICATION_IOCONTEXT_TIMEOUT\n"); break; + case 0x00000181: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WVR_LIVEDUMP_STATE_TRANSITION_TIMEOUT\n"); break; + case 0x00000182: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WVR_LIVEDUMP_RECOVERY_IOCONTEXT_TIMEOUT\n"); break; + case 0x00000183: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WVR_LIVEDUMP_APP_IO_TIMEOUT\n"); break; + case 0x00000184: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WVR_LIVEDUMP_MANUALLY_INITIATED\n"); break; + case 0x00000185: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WVR_LIVEDUMP_STATE_FAILURE\n"); break; + case 0x00000186: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WVR_LIVEDUMP_CRITICAL_ERROR\n"); break; + case 0x00000187: cchUsed = RTStrPrintf(pszDetails, cbDetails, "VIDEO_DWMINIT_TIMEOUT_FALLBACK_BDD\n"); break; + case 0x00000188: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CLUSTER_CSVFS_LIVEDUMP\n"); break; + case 0x00000189: cchUsed = RTStrPrintf(pszDetails, cbDetails, "BAD_OBJECT_HEADER\n"); break; + case 0x0000018a: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SILO_CORRUPT\n"); break; + case 0x0000018b: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SECURE_KERNEL_ERROR\n"); break; + case 0x0000018c: cchUsed = RTStrPrintf(pszDetails, cbDetails, "HYPERGUARD_VIOLATION\n"); break; + case 0x0000018d: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SECURE_FAULT_UNHANDLED\n"); break; + case 0x0000018e: cchUsed = RTStrPrintf(pszDetails, cbDetails, "KERNEL_PARTITION_REFERENCE_VIOLATION\n"); break; + case 0x00000190: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WIN32K_CRITICAL_FAILURE_LIVEDUMP\n"); break; + case 0x00000191: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PF_DETECTED_CORRUPTION\n"); break; + case 0x00000192: cchUsed = RTStrPrintf(pszDetails, cbDetails, "KERNEL_AUTO_BOOST_LOCK_ACQUISITION_WITH_RAISED_IRQL\n"); break; + case 0x00000193: cchUsed = RTStrPrintf(pszDetails, cbDetails, "VIDEO_DXGKRNL_LIVEDUMP\n"); break; + case 0x00000194: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_NONRESPONSIVEPROCESS\n"); break; + case 0x00000195: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SMB_SERVER_LIVEDUMP\n"); break; + case 0x00000196: cchUsed = RTStrPrintf(pszDetails, cbDetails, "LOADER_ROLLBACK_DETECTED\n"); break; + case 0x00000197: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WIN32K_SECURITY_FAILURE\n"); break; + case 0x00000198: cchUsed = RTStrPrintf(pszDetails, cbDetails, "UFX_LIVEDUMP\n"); break; + case 0x00000199: cchUsed = RTStrPrintf(pszDetails, cbDetails, "KERNEL_STORAGE_SLOT_IN_USE\n"); break; + case 0x0000019a: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WORKER_THREAD_RETURNED_WHILE_ATTACHED_TO_SILO\n"); break; + case 0x0000019b: cchUsed = RTStrPrintf(pszDetails, cbDetails, "TTM_FATAL_ERROR\n"); break; + case 0x0000019c: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WIN32K_POWER_WATCHDOG_TIMEOUT\n"); break; + case 0x0000019d: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CLUSTER_SVHDX_LIVEDUMP\n"); break; + case 0x0000019e: cchUsed = RTStrPrintf(pszDetails, cbDetails, "BUGCODE_NETADAPTER_DRIVER\n"); break; + case 0x0000019f: cchUsed = RTStrPrintf(pszDetails, cbDetails, "PDC_PRIVILEGE_CHECK_LIVEDUMP\n"); break; + case 0x000001a0: cchUsed = RTStrPrintf(pszDetails, cbDetails, "TTM_WATCHDOG_TIMEOUT\n"); break; + case 0x000001a1: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WIN32K_CALLOUT_WATCHDOG_LIVEDUMP\n"); break; + case 0x000001a2: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WIN32K_CALLOUT_WATCHDOG_BUGCHECK\n"); break; + case 0x000001a3: cchUsed = RTStrPrintf(pszDetails, cbDetails, "CALL_HAS_NOT_RETURNED_WATCHDOG_TIMEOUT_LIVEDUMP\n"); break; + case 0x000001a4: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DRIPS_SW_HW_DIVERGENCE_LIVEDUMP\n"); break; + case 0x000001a5: cchUsed = RTStrPrintf(pszDetails, cbDetails, "USB_DRIPS_BLOCKER_SURPRISE_REMOVAL_LIVEDUMP\n"); break; + case 0x000001c4: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DRIVER_VERIFIER_DETECTED_VIOLATION_LIVEDUMP\n"); break; + case 0x000001c5: cchUsed = RTStrPrintf(pszDetails, cbDetails, "IO_THREADPOOL_DEADLOCK_LIVEDUMP\n"); break; + case 0x000001c6: cchUsed = RTStrPrintf(pszDetails, cbDetails, "FAST_ERESOURCE_PRECONDITION_VIOLATION\n"); break; + case 0x000001c7: cchUsed = RTStrPrintf(pszDetails, cbDetails, "STORE_DATA_STRUCTURE_CORRUPTION\n"); break; + case 0x000001c8: cchUsed = RTStrPrintf(pszDetails, cbDetails, "MANUALLY_INITIATED_POWER_BUTTON_HOLD\n"); break; + case 0x000001c9: cchUsed = RTStrPrintf(pszDetails, cbDetails, "USER_MODE_HEALTH_MONITOR_LIVEDUMP\n"); break; + case 0x000001ca: cchUsed = RTStrPrintf(pszDetails, cbDetails, "HYPERVISOR_WATCHDOG_TIMEOUT\n"); break; + case 0x000001cb: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INVALID_SILO_DETACH\n"); break; + case 0x000001cc: cchUsed = RTStrPrintf(pszDetails, cbDetails, "EXRESOURCE_TIMEOUT_LIVEDUMP\n"); break; + case 0x000001cd: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INVALID_CALLBACK_STACK_ADDRESS\n"); break; + case 0x000001ce: cchUsed = RTStrPrintf(pszDetails, cbDetails, "INVALID_KERNEL_STACK_ADDRESS\n"); break; + case 0x000001cf: cchUsed = RTStrPrintf(pszDetails, cbDetails, "HARDWARE_WATCHDOG_TIMEOUT\n"); break; + case 0x000001d0: cchUsed = RTStrPrintf(pszDetails, cbDetails, "ACPI_FIRMWARE_WATCHDOG_TIMEOUT\n"); break; + case 0x000001d1: cchUsed = RTStrPrintf(pszDetails, cbDetails, "TELEMETRY_ASSERTS_LIVEDUMP\n"); break; + case 0x000001d2: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WORKER_THREAD_INVALID_STATE\n"); break; + case 0x000001d3: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WFP_INVALID_OPERATION\n"); break; + case 0x000001d4: cchUsed = RTStrPrintf(pszDetails, cbDetails, "UCMUCSI_LIVEDUMP\n"); break; + case 0x000001d5: cchUsed = RTStrPrintf(pszDetails, cbDetails, "DRIVER_PNP_WATCHDOG\n"); break; + case 0x00000315: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_MTBFCOMMANDTIMEOUT\n"); break; + case 0x00000356: cchUsed = RTStrPrintf(pszDetails, cbDetails, "XBOX_ERACTRL_CS_TIMEOUT\n"); break; + case 0x00000357: cchUsed = RTStrPrintf(pszDetails, cbDetails, "XBOX_CORRUPTED_IMAGE\n"); break; + case 0x00000358: cchUsed = RTStrPrintf(pszDetails, cbDetails, "XBOX_INVERTED_FUNCTION_TABLE_OVERFLOW\n"); break; + case 0x00000359: cchUsed = RTStrPrintf(pszDetails, cbDetails, "XBOX_CORRUPTED_IMAGE_BASE\n"); break; + case 0x00000360: cchUsed = RTStrPrintf(pszDetails, cbDetails, "XBOX_360_SYSTEM_CRASH\n"); break; + case 0x00000420: cchUsed = RTStrPrintf(pszDetails, cbDetails, "XBOX_360_SYSTEM_CRASH_RESERVED\n"); break; + case 0x00000bfe: cchUsed = RTStrPrintf(pszDetails, cbDetails, "BC_BLUETOOTH_VERIFIER_FAULT\n"); break; + case 0x00000bff: cchUsed = RTStrPrintf(pszDetails, cbDetails, "BC_BTHMINI_VERIFIER_FAULT\n"); break; + case 0x00008866: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_SICKAPPLICATION\n"); break; + case 0x0000f000: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_UNSPECIFIED\n"); break; + case 0x0000f002: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_BLANKSCREEN\n"); break; + case 0x0000f003: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_INPUT\n"); break; + case 0x0000f004: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_WATCHDOG\n"); break; + case 0x0000f005: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_STARTNOTVISIBLE\n"); break; + case 0x0000f006: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_NAVIGATIONMODEL\n"); break; + case 0x0000f007: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_OUTOFMEMORY\n"); break; + case 0x0000f008: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_GRAPHICS\n"); break; + case 0x0000f009: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_NAVSERVERTIMEOUT\n"); break; + case 0x0000f00a: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_CHROMEPROCESSCRASH\n"); break; + case 0x0000f00b: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_NOTIFICATIONDISMISSAL\n"); break; + case 0x0000f00c: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_SPEECHDISMISSAL\n"); break; + case 0x0000f00d: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_CALLDISMISSAL\n"); break; + case 0x0000f00e: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_APPBARDISMISSAL\n"); break; + case 0x0000f00f: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_RILADAPTATIONCRASH\n"); break; + case 0x0000f010: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_APPLISTUNREACHABLE\n"); break; + case 0x0000f011: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_REPORTNOTIFICATIONFAILURE\n"); break; + case 0x0000f012: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_UNEXPECTEDSHUTDOWN\n"); break; + case 0x0000f013: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_RPCFAILURE\n"); break; + case 0x0000f014: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_AUXILIARYFULLDUMP\n"); break; + case 0x0000f015: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_ACCOUNTPROVSVCINITFAILURE\n"); break; + case 0x0000f101: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_MTBFCOMMANDHANG\n"); break; + case 0x0000f102: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_MTBFPASSBUGCHECK\n"); break; + case 0x0000f103: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_MTBFIOERROR\n"); break; + case 0x0000f200: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_RENDERTHREADHANG\n"); break; + case 0x0000f201: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_RENDERMOBILEUIOOM\n"); break; + case 0x0000f300: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_DEVICEUPDATEUNSPECIFIED\n"); break; + case 0x0000f400: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_AUDIODRIVERHANG\n"); break; + case 0x0000f500: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_BATTERYPULLOUT\n"); break; + case 0x0000f600: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_MEDIACORETESTHANG\n"); break; + case 0x0000f700: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_RESOURCEMANAGEMENT\n"); break; + case 0x0000f800: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_CAPTURESERVICE\n"); break; + case 0x0000f900: cchUsed = RTStrPrintf(pszDetails, cbDetails, "SAVER_WAITFORSHELLREADY\n"); break; + case 0x00020001: cchUsed = RTStrPrintf(pszDetails, cbDetails, "HYPERVISOR_ERROR\n"); break; + case 0x4000008a: cchUsed = RTStrPrintf(pszDetails, cbDetails, "THREAD_TERMINATE_HELD_MUTEX\n"); break; + case 0x400000ad: cchUsed = RTStrPrintf(pszDetails, cbDetails, "VIDEO_DRIVER_DEBUG_REPORT_REQUEST\n"); break; + case 0xc000021a: cchUsed = RTStrPrintf(pszDetails, cbDetails, "WINLOGON_FATAL_ERROR\n"); break; + case 0xdeaddead: cchUsed = RTStrPrintf(pszDetails, cbDetails, "MANUALLY_INITIATED_CRASH1\n"); break; + default: cchUsed = 0; break; + } + if (cchUsed < cbDetails) + return VINF_SUCCESS; + return VINF_BUFFER_OVERFLOW; +} + + +/** + * Report a bug check. + * + * @returns + * @param pVM The cross context VM structure. + * @param pVCpu The cross context per virtual CPU structure. + * @param enmEvent The kind of BSOD event this is. + * @param uBugCheck The bug check number. + * @param uP1 The bug check parameter \#1. + * @param uP2 The bug check parameter \#2. + * @param uP3 The bug check parameter \#3. + * @param uP4 The bug check parameter \#4. + */ +VMMR3DECL(VBOXSTRICTRC) DBGFR3ReportBugCheck(PVM pVM, PVMCPU pVCpu, DBGFEVENTTYPE enmEvent, uint64_t uBugCheck, + uint64_t uP1, uint64_t uP2, uint64_t uP3, uint64_t uP4) +{ + /* + * Be careful. + */ + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + VMCPU_ASSERT_EMT_RETURN(pVCpu, VERR_INVALID_VMCPU_HANDLE); + const char *pszSource; + switch (enmEvent) + { + case DBGFEVENT_BSOD_MSR: pszSource = "GIMHv"; break; + case DBGFEVENT_BSOD_EFI: pszSource = "EFI"; break; + case DBGFEVENT_BSOD_VMMDEV: pszSource = "VMMDev"; break; + default: + AssertMsgFailedReturn(("enmEvent=%d\n", enmEvent), VERR_INVALID_PARAMETER); + } + + /* + * Note it down. + */ + pVM->dbgf.s.BugCheck.enmEvent = enmEvent; + pVM->dbgf.s.BugCheck.uBugCheck = uBugCheck; + pVM->dbgf.s.BugCheck.auParameters[0] = uP1; + pVM->dbgf.s.BugCheck.auParameters[1] = uP2; + pVM->dbgf.s.BugCheck.auParameters[2] = uP3; + pVM->dbgf.s.BugCheck.auParameters[3] = uP4; + pVM->dbgf.s.BugCheck.idCpu = pVCpu->idCpu; + pVM->dbgf.s.BugCheck.uTimestamp = TMVirtualGet(pVM); + pVM->dbgf.s.BugCheck.uResetNo = VMGetResetCount(pVM); + + /* + * Log the details. + */ + char szDetails[2048]; + DBGFR3FormatBugCheck(pVM->pUVM, szDetails, sizeof(szDetails), uBugCheck, uP1, uP2, uP3, uP4); + LogRel(("%s: %s", pszSource, szDetails)); + + /* + * Raise debugger event. + */ + VBOXSTRICTRC rc = VINF_SUCCESS; + if (DBGF_IS_EVENT_ENABLED(pVM, enmEvent)) + rc = DBGFEventGenericWithArgs(pVM, pVCpu, enmEvent, DBGFEVENTCTX_OTHER, 5 /*cArgs*/, uBugCheck, uP1, uP2, uP3, uP4); + + /* + * Take actions. + */ + /** @todo Take actions on BSOD, like notifying main or stopping the VM... + * For testing it makes little sense to continue after a BSOD. */ + return rc; +} + + +/** + * @callback_method_impl{FNDBGFHANDLERINT, bugcheck} + */ +static DECLCALLBACK(void) dbgfR3BugCheckInfo(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + char szDetails[2048]; + + /* + * Any arguments for bug check formatting? + */ + if (pszArgs && *pszArgs) + pszArgs = RTStrStripL(pszArgs); + if (pszArgs && *pszArgs) + { + uint64_t auData[5] = { 0, 0, 0, 0, 0 }; + unsigned iData = 0; + do + { + /* Find the next hex digit */ + char ch; + while ((ch = *pszArgs) != '\0' && !RT_C_IS_XDIGIT(ch)) + pszArgs++; + if (ch == '\0') + break; + + /* Extract the number. */ + char *pszNext = (char *)pszArgs + 1; + RTStrToUInt64Ex(pszArgs, &pszNext, 16, &auData[iData]); + + /* Advance. */ + pszArgs = pszNext; + iData++; + } while (iData < RT_ELEMENTS(auData) && *pszArgs); + + /* Format it. */ + DBGFR3FormatBugCheck(pVM->pUVM, szDetails, sizeof(szDetails), auData[0], auData[1], auData[2], auData[3], auData[4]); + pHlp->pfnPrintf(pHlp, "%s", szDetails); + } + /* + * Format what's been reported (if any). + */ + else if (pVM->dbgf.s.BugCheck.enmEvent != DBGFEVENT_END) + { + DBGFR3FormatBugCheck(pVM->pUVM, szDetails, sizeof(szDetails), pVM->dbgf.s.BugCheck.uBugCheck, + pVM->dbgf.s.BugCheck.auParameters[0], pVM->dbgf.s.BugCheck.auParameters[1], + pVM->dbgf.s.BugCheck.auParameters[2], pVM->dbgf.s.BugCheck.auParameters[3]); + const char *pszSource = pVM->dbgf.s.BugCheck.enmEvent == DBGFEVENT_BSOD_MSR ? "GIMHv" + : pVM->dbgf.s.BugCheck.enmEvent == DBGFEVENT_BSOD_EFI ? "EFI" + : pVM->dbgf.s.BugCheck.enmEvent == DBGFEVENT_BSOD_VMMDEV ? "VMMDev" : ""; + uint32_t const uFreq = TMVirtualGetFreq(pVM); + uint64_t const cSecs = pVM->dbgf.s.BugCheck.uTimestamp / uFreq; + uint32_t const cMillis = (pVM->dbgf.s.BugCheck.uTimestamp - cSecs * uFreq) * 1000 / uFreq; + pHlp->pfnPrintf(pHlp, "BugCheck on CPU #%u after %RU64.%03u s VM uptime, %u resets ago (src: %s)\n%s", + pVM->dbgf.s.BugCheck.idCpu, cSecs, cMillis, VMGetResetCount(pVM) - pVM->dbgf.s.BugCheck.uResetNo, + pszSource, szDetails); + } + else + pHlp->pfnPrintf(pHlp, "No bug check reported.\n"); +} + diff --git a/src/VBox/VMM/VMMR3/DBGFR3Flow.cpp b/src/VBox/VMM/VMMR3/DBGFR3Flow.cpp new file mode 100644 index 00000000..d53ead79 --- /dev/null +++ b/src/VBox/VMM/VMMR3/DBGFR3Flow.cpp @@ -0,0 +1,2266 @@ +/* $Id: DBGFR3Flow.cpp $ */ +/** @file + * DBGF - Debugger Facility, Control Flow Graph Interface (CFG). + */ + +/* + * Copyright (C) 2016-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/** @page pg_dbgf_cfg DBGFR3Flow - Control Flow Graph Interface + * + * The control flow graph interface provides an API to disassemble + * guest code providing the result in a control flow graph. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DBGF +#include +#include "DBGFInternal.h" +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ + + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ + +/** + * Internal control flow graph state. + */ +typedef struct DBGFFLOWINT +{ + /** Reference counter. */ + uint32_t volatile cRefs; + /** Internal reference counter for basic blocks. */ + uint32_t volatile cRefsBb; + /** Flags during creation. */ + uint32_t fFlags; + /** List of all basic blocks. */ + RTLISTANCHOR LstFlowBb; + /** List of identified branch tables. */ + RTLISTANCHOR LstBranchTbl; + /** Number of basic blocks in this control flow graph. */ + uint32_t cBbs; + /** Number of branch tables in this control flow graph. */ + uint32_t cBranchTbls; + /** The lowest addres of a basic block. */ + DBGFADDRESS AddrLowest; + /** The highest address of a basic block. */ + DBGFADDRESS AddrHighest; + /** String cache for disassembled instructions. */ + RTSTRCACHE hStrCacheInstr; +} DBGFFLOWINT; +/** Pointer to an internal control flow graph state. */ +typedef DBGFFLOWINT *PDBGFFLOWINT; + +/** + * Instruction record + */ +typedef struct DBGFFLOWBBINSTR +{ + /** Instruction address. */ + DBGFADDRESS AddrInstr; + /** Size of instruction. */ + uint32_t cbInstr; + /** Disassembled instruction string. */ + const char *pszInstr; +} DBGFFLOWBBINSTR; +/** Pointer to an instruction record. */ +typedef DBGFFLOWBBINSTR *PDBGFFLOWBBINSTR; + + +/** + * A branch table identified by the graph processor. + */ +typedef struct DBGFFLOWBRANCHTBLINT +{ + /** Node for the list of branch tables. */ + RTLISTNODE NdBranchTbl; + /** The owning control flow graph. */ + PDBGFFLOWINT pFlow; + /** Reference counter. */ + uint32_t volatile cRefs; + /** The general register index holding the bracnh table base. */ + uint8_t idxGenRegBase; + /** Start address of the branch table. */ + DBGFADDRESS AddrStart; + /** Number of valid entries in the branch table. */ + uint32_t cSlots; + /** The addresses contained in the branch table - variable in size. */ + DBGFADDRESS aAddresses[1]; +} DBGFFLOWBRANCHTBLINT; +/** Pointer to a branch table structure. */ +typedef DBGFFLOWBRANCHTBLINT *PDBGFFLOWBRANCHTBLINT; + + +/** + * Internal control flow graph basic block state. + */ +typedef struct DBGFFLOWBBINT +{ + /** Node for the list of all basic blocks. */ + RTLISTNODE NdFlowBb; + /** The control flow graph the basic block belongs to. */ + PDBGFFLOWINT pFlow; + /** Reference counter. */ + uint32_t volatile cRefs; + /** Basic block end type. */ + DBGFFLOWBBENDTYPE enmEndType; + /** Start address of this basic block. */ + DBGFADDRESS AddrStart; + /** End address of this basic block. */ + DBGFADDRESS AddrEnd; + /** Address of the block succeeding. + * This is valid for conditional jumps + * (the other target is referenced by AddrEnd+1) and + * unconditional jumps (not ret, iret, etc.) except + * if we can't infer the jump target (jmp *eax for example). */ + DBGFADDRESS AddrTarget; + /** The indirect branch table identified for indirect branches. */ + PDBGFFLOWBRANCHTBLINT pFlowBranchTbl; + /** Last status error code if DBGF_FLOW_BB_F_INCOMPLETE_ERR is set. */ + int rcError; + /** Error message if DBGF_FLOW_BB_F_INCOMPLETE_ERR is set. */ + char *pszErr; + /** Flags for this basic block. */ + uint32_t fFlags; + /** Number of instructions in this basic block. */ + uint32_t cInstr; + /** Maximum number of instruction records for this basic block. */ + uint32_t cInstrMax; + /** Instruction records, variable in size. */ + DBGFFLOWBBINSTR aInstr[1]; +} DBGFFLOWBBINT; +/** Pointer to an internal control flow graph basic block state. */ +typedef DBGFFLOWBBINT *PDBGFFLOWBBINT; + + +/** + * Control flow graph iterator state. + */ +typedef struct DBGFFLOWITINT +{ + /** Pointer to the control flow graph (holding a reference). */ + PDBGFFLOWINT pFlow; + /** Next basic block to return. */ + uint32_t idxBbNext; + /** Array of basic blocks sorted by the specified order - variable in size. */ + PDBGFFLOWBBINT apBb[1]; +} DBGFFLOWITINT; +/** Pointer to the internal control flow graph iterator state. */ +typedef DBGFFLOWITINT *PDBGFFLOWITINT; + + +/** + * Control flow graph branch table iterator state. + */ +typedef struct DBGFFLOWBRANCHTBLITINT +{ + /** Pointer to the control flow graph (holding a reference). */ + PDBGFFLOWINT pFlow; + /** Next branch table to return. */ + uint32_t idxTblNext; + /** Array of branch table pointers sorted by the specified order - variable in size. */ + PDBGFFLOWBRANCHTBLINT apBranchTbl[1]; +} DBGFFLOWBRANCHTBLITINT; +/** Pointer to the internal control flow graph branch table iterator state. */ +typedef DBGFFLOWBRANCHTBLITINT *PDBGFFLOWBRANCHTBLITINT; + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ + +static uint32_t dbgfR3FlowBbReleaseInt(PDBGFFLOWBBINT pFlowBb, bool fMayDestroyFlow); +static void dbgfR3FlowBranchTblDestroy(PDBGFFLOWBRANCHTBLINT pFlowBranchTbl); + + +/** + * Checks whether both addresses are equal. + * + * @returns true if both addresses point to the same location, false otherwise. + * @param pAddr1 First address. + * @param pAddr2 Second address. + */ +static bool dbgfR3FlowAddrEqual(PDBGFADDRESS pAddr1, PDBGFADDRESS pAddr2) +{ + return pAddr1->Sel == pAddr2->Sel + && pAddr1->off == pAddr2->off; +} + + +/** + * Checks whether the first given address is lower than the second one. + * + * @returns true if both addresses point to the same location, false otherwise. + * @param pAddr1 First address. + * @param pAddr2 Second address. + */ +static bool dbgfR3FlowAddrLower(PDBGFADDRESS pAddr1, PDBGFADDRESS pAddr2) +{ + return pAddr1->Sel == pAddr2->Sel + && pAddr1->off < pAddr2->off; +} + + +/** + * Checks whether the given basic block and address intersect. + * + * @returns true if they intersect, false otherwise. + * @param pFlowBb The basic block to check. + * @param pAddr The address to check for. + */ +static bool dbgfR3FlowAddrIntersect(PDBGFFLOWBBINT pFlowBb, PDBGFADDRESS pAddr) +{ + return (pFlowBb->AddrStart.Sel == pAddr->Sel) + && (pFlowBb->AddrStart.off <= pAddr->off) + && (pFlowBb->AddrEnd.off >= pAddr->off); +} + + +/** + * Returns the distance of the two given addresses. + * + * @returns Distance of the addresses. + * @param pAddr1 The first address. + * @param pAddr2 The second address. + */ +static RTGCUINTPTR dbgfR3FlowAddrGetDistance(PDBGFADDRESS pAddr1, PDBGFADDRESS pAddr2) +{ + if (pAddr1->Sel == pAddr2->Sel) + { + if (pAddr1->off >= pAddr2->off) + return pAddr1->off - pAddr2->off; + else + return pAddr2->off - pAddr1->off; + } + else + AssertFailed(); + + return 0; +} + + +/** + * Creates a new basic block. + * + * @returns Pointer to the basic block on success or NULL if out of memory. + * @param pThis The control flow graph. + * @param pAddrStart The start of the basic block. + * @param fFlowBbFlags Additional flags for this bascic block. + * @param cInstrMax Maximum number of instructions this block can hold initially. + */ +static PDBGFFLOWBBINT dbgfR3FlowBbCreate(PDBGFFLOWINT pThis, PDBGFADDRESS pAddrStart, uint32_t fFlowBbFlags, + uint32_t cInstrMax) +{ + PDBGFFLOWBBINT pFlowBb = (PDBGFFLOWBBINT)RTMemAllocZ(RT_UOFFSETOF_DYN(DBGFFLOWBBINT, aInstr[cInstrMax])); + if (RT_LIKELY(pFlowBb)) + { + RTListInit(&pFlowBb->NdFlowBb); + pFlowBb->cRefs = 1; + pFlowBb->enmEndType = DBGFFLOWBBENDTYPE_INVALID; + pFlowBb->pFlow = pThis; + pFlowBb->fFlags = DBGF_FLOW_BB_F_EMPTY | fFlowBbFlags; + pFlowBb->AddrStart = *pAddrStart; + pFlowBb->AddrEnd = *pAddrStart; + pFlowBb->rcError = VINF_SUCCESS; + pFlowBb->pszErr = NULL; + pFlowBb->cInstr = 0; + pFlowBb->cInstrMax = cInstrMax; + pFlowBb->pFlowBranchTbl = NULL; + ASMAtomicIncU32(&pThis->cRefsBb); + } + + return pFlowBb; +} + + +/** + * Creates an empty branch table with the given size. + * + * @returns Pointer to the empty branch table on success or NULL if out of memory. + * @param pThis The control flow graph. + * @param pAddrStart The start of the branch table. + * @param idxGenRegBase The general register index holding the base address. + * @param cSlots Number of slots the table has. + */ +static PDBGFFLOWBRANCHTBLINT +dbgfR3FlowBranchTblCreate(PDBGFFLOWINT pThis, PDBGFADDRESS pAddrStart, uint8_t idxGenRegBase, uint32_t cSlots) +{ + PDBGFFLOWBRANCHTBLINT pBranchTbl = (PDBGFFLOWBRANCHTBLINT)RTMemAllocZ(RT_UOFFSETOF_DYN(DBGFFLOWBRANCHTBLINT, + aAddresses[cSlots])); + if (RT_LIKELY(pBranchTbl)) + { + RTListInit(&pBranchTbl->NdBranchTbl); + pBranchTbl->pFlow = pThis; + pBranchTbl->idxGenRegBase = idxGenRegBase; + pBranchTbl->AddrStart = *pAddrStart; + pBranchTbl->cSlots = cSlots; + pBranchTbl->cRefs = 1; + } + + return pBranchTbl; +} + + +/** + * Destroys a control flow graph. + * + * @returns nothing. + * @param pThis The control flow graph to destroy. + */ +static void dbgfR3FlowDestroy(PDBGFFLOWINT pThis) +{ + /* Defer destruction if there are still basic blocks referencing us. */ + PDBGFFLOWBBINT pFlowBb; + PDBGFFLOWBBINT pFlowBbNext; + RTListForEachSafe(&pThis->LstFlowBb, pFlowBb, pFlowBbNext, DBGFFLOWBBINT, NdFlowBb) + { + dbgfR3FlowBbReleaseInt(pFlowBb, false /*fMayDestroyFlow*/); + } + + Assert(!pThis->cRefs); + if (!pThis->cRefsBb) + { + /* Destroy the branch tables. */ + PDBGFFLOWBRANCHTBLINT pTbl = NULL; + PDBGFFLOWBRANCHTBLINT pTblNext = NULL; + RTListForEachSafe(&pThis->LstBranchTbl, pTbl, pTblNext, DBGFFLOWBRANCHTBLINT, NdBranchTbl) + { + dbgfR3FlowBranchTblDestroy(pTbl); + } + + RTStrCacheDestroy(pThis->hStrCacheInstr); + RTMemFree(pThis); + } +} + + +/** + * Destroys a basic block. + * + * @returns nothing. + * @param pFlowBb The basic block to destroy. + * @param fMayDestroyFlow Flag whether the control flow graph container + * should be destroyed when there is nothing referencing it. + */ +static void dbgfR3FlowBbDestroy(PDBGFFLOWBBINT pFlowBb, bool fMayDestroyFlow) +{ + PDBGFFLOWINT pThis = pFlowBb->pFlow; + + RTListNodeRemove(&pFlowBb->NdFlowBb); + pThis->cBbs--; + for (uint32_t idxInstr = 0; idxInstr < pFlowBb->cInstr; idxInstr++) + RTStrCacheRelease(pThis->hStrCacheInstr, pFlowBb->aInstr[idxInstr].pszInstr); + uint32_t cRefsBb = ASMAtomicDecU32(&pThis->cRefsBb); + RTMemFree(pFlowBb); + + if (!cRefsBb && !pThis->cRefs && fMayDestroyFlow) + dbgfR3FlowDestroy(pThis); +} + + +/** + * Destroys a given branch table. + * + * @returns nothing. + * @param pFlowBranchTbl The flow branch table to destroy. + */ +static void dbgfR3FlowBranchTblDestroy(PDBGFFLOWBRANCHTBLINT pFlowBranchTbl) +{ + RTListNodeRemove(&pFlowBranchTbl->NdBranchTbl); + RTMemFree(pFlowBranchTbl); +} + + +/** + * Internal basic block release worker. + * + * @returns New reference count of the released basic block, on 0 + * it is destroyed. + * @param pFlowBb The basic block to release. + * @param fMayDestroyFlow Flag whether the control flow graph container + * should be destroyed when there is nothing referencing it. + */ +static uint32_t dbgfR3FlowBbReleaseInt(PDBGFFLOWBBINT pFlowBb, bool fMayDestroyFlow) +{ + uint32_t cRefs = ASMAtomicDecU32(&pFlowBb->cRefs); + AssertMsg(cRefs < _1M, ("%#x %p %d\n", cRefs, pFlowBb, pFlowBb->enmEndType)); + if (cRefs == 0) + dbgfR3FlowBbDestroy(pFlowBb, fMayDestroyFlow); + return cRefs; +} + + +/** + * Links the given basic block into the control flow graph. + * + * @returns nothing. + * @param pThis The control flow graph to link into. + * @param pFlowBb The basic block to link. + */ +DECLINLINE(void) dbgfR3FlowLink(PDBGFFLOWINT pThis, PDBGFFLOWBBINT pFlowBb) +{ + RTListAppend(&pThis->LstFlowBb, &pFlowBb->NdFlowBb); + pThis->cBbs++; +} + + +/** + * Links the given branch table into the control flow graph. + * + * @returns nothing. + * @param pThis The control flow graph to link into. + * @param pBranchTbl The branch table to link. + */ +DECLINLINE(void) dbgfR3FlowBranchTblLink(PDBGFFLOWINT pThis, PDBGFFLOWBRANCHTBLINT pBranchTbl) +{ + RTListAppend(&pThis->LstBranchTbl, &pBranchTbl->NdBranchTbl); + pThis->cBranchTbls++; +} + + +/** + * Returns the first unpopulated basic block of the given control flow graph. + * + * @returns The first unpopulated control flow graph or NULL if not found. + * @param pThis The control flow graph. + */ +DECLINLINE(PDBGFFLOWBBINT) dbgfR3FlowGetUnpopulatedBb(PDBGFFLOWINT pThis) +{ + PDBGFFLOWBBINT pFlowBb; + RTListForEach(&pThis->LstFlowBb, pFlowBb, DBGFFLOWBBINT, NdFlowBb) + { + if (pFlowBb->fFlags & DBGF_FLOW_BB_F_EMPTY) + return pFlowBb; + } + + return NULL; +} + + +/** + * Returns the branch table with the given address if it exists. + * + * @returns Pointer to the branch table record or NULL if not found. + * @param pThis The control flow graph. + * @param pAddrTbl The branch table address. + */ +DECLINLINE(PDBGFFLOWBRANCHTBLINT) dbgfR3FlowBranchTblFindByAddr(PDBGFFLOWINT pThis, PDBGFADDRESS pAddrTbl) +{ + PDBGFFLOWBRANCHTBLINT pTbl; + RTListForEach(&pThis->LstBranchTbl, pTbl, DBGFFLOWBRANCHTBLINT, NdBranchTbl) + { + if (dbgfR3FlowAddrEqual(&pTbl->AddrStart, pAddrTbl)) + return pTbl; + } + + return NULL; +} + + +/** + * Sets the given error status for the basic block. + * + * @returns nothing. + * @param pFlowBb The basic block causing the error. + * @param rcError The error to set. + * @param pszFmt Format string of the error description. + * @param ... Arguments for the format string. + */ +static void dbgfR3FlowBbSetError(PDBGFFLOWBBINT pFlowBb, int rcError, const char *pszFmt, ...) +{ + va_list va; + va_start(va, pszFmt); + + Assert(!(pFlowBb->fFlags & DBGF_FLOW_BB_F_INCOMPLETE_ERR)); + pFlowBb->fFlags |= DBGF_FLOW_BB_F_INCOMPLETE_ERR; + pFlowBb->fFlags &= ~DBGF_FLOW_BB_F_EMPTY; + pFlowBb->rcError = rcError; + pFlowBb->pszErr = RTStrAPrintf2V(pszFmt, va); + va_end(va); +} + + +/** + * Checks whether the given control flow graph contains a basic block + * with the given start address. + * + * @returns true if there is a basic block with the start address, false otherwise. + * @param pThis The control flow graph. + * @param pAddr The address to check for. + */ +static bool dbgfR3FlowHasBbWithStartAddr(PDBGFFLOWINT pThis, PDBGFADDRESS pAddr) +{ + PDBGFFLOWBBINT pFlowBb; + RTListForEach(&pThis->LstFlowBb, pFlowBb, DBGFFLOWBBINT, NdFlowBb) + { + if (dbgfR3FlowAddrEqual(&pFlowBb->AddrStart, pAddr)) + return true; + } + return false; +} + + +/** + * Splits a given basic block into two at the given address. + * + * @returns VBox status code. + * @param pThis The control flow graph. + * @param pFlowBb The basic block to split. + * @param pAddr The address to split at. + */ +static int dbgfR3FlowBbSplit(PDBGFFLOWINT pThis, PDBGFFLOWBBINT pFlowBb, PDBGFADDRESS pAddr) +{ + int rc = VINF_SUCCESS; + uint32_t idxInstrSplit; + + /* If the block is empty it will get populated later so there is nothing to split, + * same if the start address equals. */ + if ( pFlowBb->fFlags & DBGF_FLOW_BB_F_EMPTY + || dbgfR3FlowAddrEqual(&pFlowBb->AddrStart, pAddr)) + return VINF_SUCCESS; + + /* Find the instruction to split at. */ + for (idxInstrSplit = 1; idxInstrSplit < pFlowBb->cInstr; idxInstrSplit++) + if (dbgfR3FlowAddrEqual(&pFlowBb->aInstr[idxInstrSplit].AddrInstr, pAddr)) + break; + + Assert(idxInstrSplit > 0); + + /* + * Given address might not be on instruction boundary, this is not supported + * so far and results in an error. + */ + if (idxInstrSplit < pFlowBb->cInstr) + { + /* Create new basic block. */ + uint32_t cInstrNew = pFlowBb->cInstr - idxInstrSplit; + PDBGFFLOWBBINT pFlowBbNew = dbgfR3FlowBbCreate(pThis, &pFlowBb->aInstr[idxInstrSplit].AddrInstr, + 0 /*fFlowBbFlags*/, cInstrNew); + if (pFlowBbNew) + { + /* Move instructions over. */ + pFlowBbNew->cInstr = cInstrNew; + pFlowBbNew->AddrEnd = pFlowBb->AddrEnd; + pFlowBbNew->enmEndType = pFlowBb->enmEndType; + pFlowBbNew->AddrTarget = pFlowBb->AddrTarget; + pFlowBbNew->fFlags = pFlowBb->fFlags & ~DBGF_FLOW_BB_F_ENTRY; + pFlowBbNew->pFlowBranchTbl = pFlowBb->pFlowBranchTbl; + pFlowBb->pFlowBranchTbl = NULL; + + /* Move any error to the new basic block and clear them in the old basic block. */ + pFlowBbNew->rcError = pFlowBb->rcError; + pFlowBbNew->pszErr = pFlowBb->pszErr; + pFlowBb->rcError = VINF_SUCCESS; + pFlowBb->pszErr = NULL; + pFlowBb->fFlags &= ~DBGF_FLOW_BB_F_INCOMPLETE_ERR; + + memcpy(&pFlowBbNew->aInstr[0], &pFlowBb->aInstr[idxInstrSplit], cInstrNew * sizeof(DBGFFLOWBBINSTR)); + pFlowBb->cInstr = idxInstrSplit; + pFlowBb->enmEndType = DBGFFLOWBBENDTYPE_UNCOND; + pFlowBb->AddrEnd = pFlowBb->aInstr[idxInstrSplit-1].AddrInstr; + pFlowBb->AddrTarget = pFlowBbNew->AddrStart; + DBGFR3AddrAdd(&pFlowBb->AddrEnd, pFlowBb->aInstr[idxInstrSplit-1].cbInstr - 1); + RT_BZERO(&pFlowBb->aInstr[idxInstrSplit], cInstrNew * sizeof(DBGFFLOWBBINSTR)); + + dbgfR3FlowLink(pThis, pFlowBbNew); + } + else + rc = VERR_NO_MEMORY; + } + else + AssertFailedStmt(rc = VERR_INVALID_STATE); /** @todo Proper status code. */ + + return rc; +} + + +/** + * Makes sure there is an successor at the given address splitting already existing + * basic blocks if they intersect. + * + * @returns VBox status code. + * @param pThis The control flow graph. + * @param pAddrSucc The guest address the new successor should start at. + * @param fNewBbFlags Flags for the new basic block. + * @param pBranchTbl Branch table candidate for this basic block. + */ +static int dbgfR3FlowBbSuccessorAdd(PDBGFFLOWINT pThis, PDBGFADDRESS pAddrSucc, + uint32_t fNewBbFlags, PDBGFFLOWBRANCHTBLINT pBranchTbl) +{ + PDBGFFLOWBBINT pFlowBb; + RTListForEach(&pThis->LstFlowBb, pFlowBb, DBGFFLOWBBINT, NdFlowBb) + { + /* + * The basic block must be split if it intersects with the given address + * and the start address does not equal the given one. + */ + if (dbgfR3FlowAddrIntersect(pFlowBb, pAddrSucc)) + return dbgfR3FlowBbSplit(pThis, pFlowBb, pAddrSucc); + } + + int rc = VINF_SUCCESS; + pFlowBb = dbgfR3FlowBbCreate(pThis, pAddrSucc, fNewBbFlags, 10); + if (pFlowBb) + { + pFlowBb->pFlowBranchTbl = pBranchTbl; + dbgfR3FlowLink(pThis, pFlowBb); + } + else + rc = VERR_NO_MEMORY; + + return rc; +} + + +/** + * Returns whether the parameter indicates an indirect branch. + * + * @returns Flag whether this is an indirect branch. + * @param pDisParam The parameter from the disassembler. + */ +DECLINLINE(bool) dbgfR3FlowBranchTargetIsIndirect(PDISOPPARAM pDisParam) +{ + bool fIndirect = true; + + if ( pDisParam->fUse & (DISUSE_IMMEDIATE8 | DISUSE_IMMEDIATE16 | DISUSE_IMMEDIATE32 | DISUSE_IMMEDIATE64) + || pDisParam->fUse & (DISUSE_IMMEDIATE8_REL | DISUSE_IMMEDIATE16_REL | DISUSE_IMMEDIATE32_REL | DISUSE_IMMEDIATE64_REL)) + fIndirect = false; + + return fIndirect; +} + + +/** + * Resolves the direct branch target address if possible from the given instruction address + * and instruction parameter. + * + * @returns VBox status code. + * @param pUVM The usermode VM handle. + * @param idCpu CPU id for resolving the address. + * @param pDisParam The parameter from the disassembler. + * @param pAddrInstr The instruction address. + * @param cbInstr Size of instruction in bytes. + * @param fRelJmp Flag whether this is a reltive jump. + * @param pAddrJmpTarget Where to store the address to the jump target on success. + */ +static int dbgfR3FlowQueryDirectBranchTarget(PUVM pUVM, VMCPUID idCpu, PDISOPPARAM pDisParam, PDBGFADDRESS pAddrInstr, + uint32_t cbInstr, bool fRelJmp, PDBGFADDRESS pAddrJmpTarget) +{ + int rc = VINF_SUCCESS; + + Assert(!dbgfR3FlowBranchTargetIsIndirect(pDisParam)); + + /* Relative jumps are always from the beginning of the next instruction. */ + *pAddrJmpTarget = *pAddrInstr; + DBGFR3AddrAdd(pAddrJmpTarget, cbInstr); + + if (fRelJmp) + { + RTGCINTPTR iRel = 0; + if (pDisParam->fUse & DISUSE_IMMEDIATE8_REL) + iRel = (int8_t)pDisParam->uValue; + else if (pDisParam->fUse & DISUSE_IMMEDIATE16_REL) + iRel = (int16_t)pDisParam->uValue; + else if (pDisParam->fUse & DISUSE_IMMEDIATE32_REL) + iRel = (int32_t)pDisParam->uValue; + else if (pDisParam->fUse & DISUSE_IMMEDIATE64_REL) + iRel = (int64_t)pDisParam->uValue; + else + AssertFailedStmt(rc = VERR_NOT_SUPPORTED); + + if (iRel < 0) + DBGFR3AddrSub(pAddrJmpTarget, -iRel); + else + DBGFR3AddrAdd(pAddrJmpTarget, iRel); + } + else + { + if (pDisParam->fUse & (DISUSE_IMMEDIATE8 | DISUSE_IMMEDIATE16 | DISUSE_IMMEDIATE32 | DISUSE_IMMEDIATE64)) + { + if (DBGFADDRESS_IS_FLAT(pAddrInstr)) + DBGFR3AddrFromFlat(pUVM, pAddrJmpTarget, pDisParam->uValue); + else + DBGFR3AddrFromSelOff(pUVM, idCpu, pAddrJmpTarget, pAddrInstr->Sel, pDisParam->uValue); + } + else + AssertFailedStmt(rc = VERR_INVALID_STATE); + } + + return rc; +} + + +/** + * Returns the CPU mode based on the given assembler flags. + * + * @returns CPU mode. + * @param pUVM The user mode VM handle. + * @param idCpu CPU id for disassembling. + * @param fFlagsDisasm The flags used for disassembling. + */ +static CPUMMODE dbgfR3FlowGetDisasCpuMode(PUVM pUVM, VMCPUID idCpu, uint32_t fFlagsDisasm) +{ + CPUMMODE enmMode = CPUMMODE_INVALID; + uint32_t fDisasMode = fFlagsDisasm & DBGF_DISAS_FLAGS_MODE_MASK; + if (fDisasMode == DBGF_DISAS_FLAGS_DEFAULT_MODE) + enmMode = DBGFR3CpuGetMode(pUVM, idCpu); + else if ( fDisasMode == DBGF_DISAS_FLAGS_16BIT_MODE + || fDisasMode == DBGF_DISAS_FLAGS_16BIT_REAL_MODE) + enmMode = CPUMMODE_REAL; + else if (fDisasMode == DBGF_DISAS_FLAGS_32BIT_MODE) + enmMode = CPUMMODE_PROTECTED; + else if (fDisasMode == DBGF_DISAS_FLAGS_64BIT_MODE) + enmMode = CPUMMODE_LONG; + else + AssertFailed(); + + return enmMode; +} + + +/** + * Searches backwards in the given basic block starting the given instruction index for + * a mov instruction with the given register as the target where the constant looks like + * a pointer. + * + * @returns Flag whether a candidate was found. + * @param pFlowBb The basic block containing the indirect branch. + * @param idxRegTgt The general register the mov targets. + * @param cbPtr The pointer size to look for. + * @param pUVM The user mode VM handle. + * @param idCpu CPU id for disassembling. + * @param fFlagsDisasm The flags to use for disassembling. + * @param pidxInstrStart The instruction index to start searching for on input, + * The last instruction evaluated on output. + * @param pAddrDest Where to store the candidate address on success. + */ +static bool dbgfR3FlowSearchMovWithConstantPtrSizeBackwards(PDBGFFLOWBBINT pFlowBb, uint8_t idxRegTgt, uint32_t cbPtr, + PUVM pUVM, VMCPUID idCpu, uint32_t fFlagsDisasm, + uint32_t *pidxInstrStart, PDBGFADDRESS pAddrDest) +{ + bool fFound = false; + uint32_t idxInstrCur = *pidxInstrStart; + uint32_t cInstrCheck = idxInstrCur + 1; + + for (;;) + { + /** @todo Avoid to disassemble again. */ + PDBGFFLOWBBINSTR pInstr = &pFlowBb->aInstr[idxInstrCur]; + DBGFDISSTATE DisState; + char szOutput[_4K]; + + int rc = dbgfR3DisasInstrStateEx(pUVM, idCpu, &pInstr->AddrInstr, fFlagsDisasm, + &szOutput[0], sizeof(szOutput), &DisState); + if (RT_SUCCESS(rc)) + { + if ( DisState.pCurInstr->uOpcode == OP_MOV + && (DisState.Param1.fUse & (DISUSE_REG_GEN16 | DISUSE_REG_GEN32 | DISUSE_REG_GEN64)) + && DisState.Param1.Base.idxGenReg == idxRegTgt + /*&& DisState.Param1.cb == cbPtr*/ + && DisState.Param2.cb == cbPtr + && (DisState.Param2.fUse & (DISUSE_IMMEDIATE16 | DISUSE_IMMEDIATE32 | DISUSE_IMMEDIATE64))) + { + /* Found possible candidate. */ + fFound = true; + if (DBGFADDRESS_IS_FLAT(&pInstr->AddrInstr)) + DBGFR3AddrFromFlat(pUVM, pAddrDest, DisState.Param2.uValue); + else + DBGFR3AddrFromSelOff(pUVM, idCpu, pAddrDest, pInstr->AddrInstr.Sel, DisState.Param2.uValue); + break; + } + } + else + break; + + cInstrCheck--; + if (!cInstrCheck) + break; + + idxInstrCur--; + } + + *pidxInstrStart = idxInstrCur; + return fFound; +} + + +/** + * Verifies the given branch table candidate and adds it to the control flow graph on success. + * + * @returns VBox status code. + * @param pThis The flow control graph. + * @param pFlowBb The basic block causing the indirect branch. + * @param pAddrBranchTbl Address of the branch table location. + * @param idxGenRegBase The general register holding the base address. + * @param cbPtr Guest pointer size. + * @param pUVM The user mode VM handle. + * @param idCpu CPU id for disassembling. + * + * @todo Handle branch tables greater than 4KB (lazy coder). + */ +static int dbgfR3FlowBranchTblVerifyAdd(PDBGFFLOWINT pThis, PDBGFFLOWBBINT pFlowBb, PDBGFADDRESS pAddrBranchTbl, + uint8_t idxGenRegBase, uint32_t cbPtr, PUVM pUVM, VMCPUID idCpu) +{ + int rc = VINF_SUCCESS; + PDBGFFLOWBRANCHTBLINT pBranchTbl = dbgfR3FlowBranchTblFindByAddr(pThis, pAddrBranchTbl); + + if (!pBranchTbl) + { + uint32_t cSlots = 0; + uint8_t abBuf[_4K]; + + rc = DBGFR3MemRead(pUVM, idCpu, pAddrBranchTbl, &abBuf[0], sizeof(abBuf)); + if (RT_SUCCESS(rc)) + { + uint8_t *pbBuf = &abBuf[0]; + while (pbBuf < &abBuf[0] + sizeof(abBuf)) + { + DBGFADDRESS AddrDest; + RTGCUINTPTR GCPtr = cbPtr == sizeof(uint64_t) + ? *(uint64_t *)pbBuf + : cbPtr == sizeof(uint32_t) + ? *(uint32_t *)pbBuf + : *(uint16_t *)pbBuf; + pbBuf += cbPtr; + + if (DBGFADDRESS_IS_FLAT(pAddrBranchTbl)) + DBGFR3AddrFromFlat(pUVM, &AddrDest, GCPtr); + else + DBGFR3AddrFromSelOff(pUVM, idCpu, &AddrDest, pAddrBranchTbl->Sel, GCPtr); + + if (dbgfR3FlowAddrGetDistance(&AddrDest, &pFlowBb->AddrEnd) > _512K) + break; + + cSlots++; + } + + /* If there are any slots use it. */ + if (cSlots) + { + pBranchTbl = dbgfR3FlowBranchTblCreate(pThis, pAddrBranchTbl, idxGenRegBase, cSlots); + if (pBranchTbl) + { + /* Get the addresses. */ + for (unsigned i = 0; i < cSlots && RT_SUCCESS(rc); i++) + { + RTGCUINTPTR GCPtr = cbPtr == sizeof(uint64_t) + ? *(uint64_t *)&abBuf[i * cbPtr] + : cbPtr == sizeof(uint32_t) + ? *(uint32_t *)&abBuf[i * cbPtr] + : *(uint16_t *)&abBuf[i * cbPtr]; + + if (DBGFADDRESS_IS_FLAT(pAddrBranchTbl)) + DBGFR3AddrFromFlat(pUVM, &pBranchTbl->aAddresses[i], GCPtr); + else + DBGFR3AddrFromSelOff(pUVM, idCpu, &pBranchTbl->aAddresses[i], + pAddrBranchTbl->Sel, GCPtr); + rc = dbgfR3FlowBbSuccessorAdd(pThis, &pBranchTbl->aAddresses[i], DBGF_FLOW_BB_F_BRANCH_TABLE, + pBranchTbl); + } + dbgfR3FlowBranchTblLink(pThis, pBranchTbl); + } + else + rc = VERR_NO_MEMORY; + } + } + } + + if (pBranchTbl) + pFlowBb->pFlowBranchTbl = pBranchTbl; + + return rc; +} + + +/** + * Checks whether the location for the branch target candidate contains a valid code address. + * + * @returns VBox status code. + * @param pThis The flow control graph. + * @param pFlowBb The basic block causing the indirect branch. + * @param pAddrBranchTgt Address of the branch target location. + * @param idxGenRegBase The general register holding the address of the location. + * @param cbPtr Guest pointer size. + * @param pUVM The user mode VM handle. + * @param idCpu CPU id for disassembling. + * @param fBranchTbl Flag whether this is a possible branch table containing multiple + * targets. + */ +static int dbgfR3FlowCheckBranchTargetLocation(PDBGFFLOWINT pThis, PDBGFFLOWBBINT pFlowBb, PDBGFADDRESS pAddrBranchTgt, + uint8_t idxGenRegBase, uint32_t cbPtr, PUVM pUVM, VMCPUID idCpu, bool fBranchTbl) +{ + int rc = VINF_SUCCESS; + + if (!fBranchTbl) + { + union { uint16_t u16Val; uint32_t u32Val; uint64_t u64Val; } uVal; + rc = DBGFR3MemRead(pUVM, idCpu, pAddrBranchTgt, &uVal, cbPtr); + if (RT_SUCCESS(rc)) + { + DBGFADDRESS AddrTgt; + RTGCUINTPTR GCPtr = cbPtr == sizeof(uint64_t) + ? uVal.u64Val + : cbPtr == sizeof(uint32_t) + ? uVal.u32Val + : uVal.u16Val; + if (DBGFADDRESS_IS_FLAT(pAddrBranchTgt)) + DBGFR3AddrFromFlat(pUVM, &AddrTgt, GCPtr); + else + DBGFR3AddrFromSelOff(pUVM, idCpu, &AddrTgt, pAddrBranchTgt->Sel, GCPtr); + + if (dbgfR3FlowAddrGetDistance(&AddrTgt, &pFlowBb->AddrEnd) <= _128K) + { + /* Finish the basic block. */ + pFlowBb->AddrTarget = AddrTgt; + rc = dbgfR3FlowBbSuccessorAdd(pThis, &AddrTgt, + (pFlowBb->fFlags & DBGF_FLOW_BB_F_BRANCH_TABLE), + pFlowBb->pFlowBranchTbl); + } + else + rc = VERR_NOT_FOUND; + } + } + else + rc = dbgfR3FlowBranchTblVerifyAdd(pThis, pFlowBb, pAddrBranchTgt, + idxGenRegBase, cbPtr, pUVM, idCpu); + + return rc; +} + + +/** + * Tries to resolve the indirect branch. + * + * @returns VBox status code. + * @param pThis The flow control graph. + * @param pFlowBb The basic block causing the indirect branch. + * @param pUVM The user mode VM handle. + * @param idCpu CPU id for disassembling. + * @param pDisParam The parameter from the disassembler. + * @param fFlagsDisasm Flags for the disassembler. + */ +static int dbgfR3FlowTryResolveIndirectBranch(PDBGFFLOWINT pThis, PDBGFFLOWBBINT pFlowBb, PUVM pUVM, + VMCPUID idCpu, PDISOPPARAM pDisParam, uint32_t fFlagsDisasm) +{ + Assert(dbgfR3FlowBranchTargetIsIndirect(pDisParam)); + + uint32_t cbPtr = 0; + CPUMMODE enmMode = dbgfR3FlowGetDisasCpuMode(pUVM, idCpu, fFlagsDisasm); + + switch (enmMode) + { + case CPUMMODE_REAL: + cbPtr = sizeof(uint16_t); + break; + case CPUMMODE_PROTECTED: + cbPtr = sizeof(uint32_t); + break; + case CPUMMODE_LONG: + cbPtr = sizeof(uint64_t); + break; + default: + AssertMsgFailed(("Invalid CPU mode %u\n", enmMode)); + } + + if (pDisParam->fUse & DISUSE_BASE) + { + uint8_t idxRegBase = pDisParam->Base.idxGenReg; + + /* Check that the used register size and the pointer size match. */ + if ( ((pDisParam->fUse & DISUSE_REG_GEN16) && cbPtr == sizeof(uint16_t)) + || ((pDisParam->fUse & DISUSE_REG_GEN32) && cbPtr == sizeof(uint32_t)) + || ((pDisParam->fUse & DISUSE_REG_GEN64) && cbPtr == sizeof(uint64_t))) + { + /* + * Search all instructions backwards until a move to the used general register + * is detected with a constant using the pointer size. + */ + uint32_t idxInstrStart = pFlowBb->cInstr - 1 - 1; /* Don't look at the branch. */ + bool fCandidateFound = false; + bool fBranchTbl = RT_BOOL(pDisParam->fUse & DISUSE_INDEX); + DBGFADDRESS AddrBranchTgt; + do + { + fCandidateFound = dbgfR3FlowSearchMovWithConstantPtrSizeBackwards(pFlowBb, idxRegBase, cbPtr, + pUVM, idCpu, fFlagsDisasm, + &idxInstrStart, &AddrBranchTgt); + if (fCandidateFound) + { + /* Check that the address is not too far away from the instruction address. */ + RTGCUINTPTR offPtr = dbgfR3FlowAddrGetDistance(&AddrBranchTgt, &pFlowBb->AddrEnd); + if (offPtr <= 20 * _1M) + { + /* Read the content at the address and check that it is near this basic block too. */ + int rc = dbgfR3FlowCheckBranchTargetLocation(pThis, pFlowBb, &AddrBranchTgt, idxRegBase, + cbPtr, pUVM, idCpu, fBranchTbl); + if (RT_SUCCESS(rc)) + break; + fCandidateFound = false; + } + + if (idxInstrStart > 0) + idxInstrStart--; + } + } while (idxInstrStart > 0 && !fCandidateFound); + } + else + dbgfR3FlowBbSetError(pFlowBb, VERR_INVALID_STATE, + "The base register size and selected pointer size do not match (fUse=%#x cbPtr=%u)", + pDisParam->fUse, cbPtr); + } + + return VINF_SUCCESS; +} + + +/** + * Tries to resolve the indirect branch. + * + * @returns VBox status code. + * @param pThis The flow control graph. + * @param pFlowBb The basic block causing the indirect branch. + * @param pUVM The user mode VM handle. + * @param idCpu CPU id for disassembling. + * @param pDisParam The parameter from the disassembler. + * @param fFlagsDisasm Flags for the disassembler. + */ +static int dbgfR3FlowBbCheckBranchTblCandidate(PDBGFFLOWINT pThis, PDBGFFLOWBBINT pFlowBb, PUVM pUVM, + VMCPUID idCpu, PDISOPPARAM pDisParam, uint32_t fFlagsDisasm) +{ + int rc = VINF_SUCCESS; + + Assert(pFlowBb->fFlags & DBGF_FLOW_BB_F_BRANCH_TABLE && pFlowBb->pFlowBranchTbl); + + uint32_t cbPtr = 0; + CPUMMODE enmMode = dbgfR3FlowGetDisasCpuMode(pUVM, idCpu, fFlagsDisasm); + + switch (enmMode) + { + case CPUMMODE_REAL: + cbPtr = sizeof(uint16_t); + break; + case CPUMMODE_PROTECTED: + cbPtr = sizeof(uint32_t); + break; + case CPUMMODE_LONG: + cbPtr = sizeof(uint64_t); + break; + default: + AssertMsgFailed(("Invalid CPU mode %u\n", enmMode)); + } + + if (pDisParam->fUse & DISUSE_BASE) + { + uint8_t idxRegBase = pDisParam->Base.idxGenReg; + + /* Check that the used register size and the pointer size match. */ + if ( ((pDisParam->fUse & DISUSE_REG_GEN16) && cbPtr == sizeof(uint16_t)) + || ((pDisParam->fUse & DISUSE_REG_GEN32) && cbPtr == sizeof(uint32_t)) + || ((pDisParam->fUse & DISUSE_REG_GEN64) && cbPtr == sizeof(uint64_t))) + { + if (idxRegBase != pFlowBb->pFlowBranchTbl->idxGenRegBase) + { + /* Try to find the new branch table. */ + pFlowBb->pFlowBranchTbl = NULL; + rc = dbgfR3FlowTryResolveIndirectBranch(pThis, pFlowBb, pUVM, idCpu, pDisParam, fFlagsDisasm); + } + /** @todo else check that the base register is not modified in this basic block. */ + } + else + dbgfR3FlowBbSetError(pFlowBb, VERR_INVALID_STATE, + "The base register size and selected pointer size do not match (fUse=%#x cbPtr=%u)", + pDisParam->fUse, cbPtr); + } + else + dbgfR3FlowBbSetError(pFlowBb, VERR_INVALID_STATE, + "The instruction does not use a register"); + + return rc; +} + + +/** + * Processes and fills one basic block. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param idCpu CPU id for disassembling. + * @param pThis The control flow graph to populate. + * @param pFlowBb The basic block to fill. + * @param cbDisasmMax The maximum amount to disassemble. + * @param fFlags Combination of DBGF_DISAS_FLAGS_*. + */ +static int dbgfR3FlowBbProcess(PUVM pUVM, VMCPUID idCpu, PDBGFFLOWINT pThis, PDBGFFLOWBBINT pFlowBb, + uint32_t cbDisasmMax, uint32_t fFlags) +{ + int rc = VINF_SUCCESS; + uint32_t cbDisasmLeft = cbDisasmMax ? cbDisasmMax : UINT32_MAX; + DBGFADDRESS AddrDisasm = pFlowBb->AddrEnd; + + Assert(pFlowBb->fFlags & DBGF_FLOW_BB_F_EMPTY); + + /* + * Disassemble instruction by instruction until we get a conditional or + * unconditional jump or some sort of return. + */ + while ( cbDisasmLeft + && RT_SUCCESS(rc)) + { + DBGFDISSTATE DisState; + char szOutput[_4K]; + + /* + * Before disassembling we have to check whether the address belongs + * to another basic block and stop here. + */ + if ( !(pFlowBb->fFlags & DBGF_FLOW_BB_F_EMPTY) + && dbgfR3FlowHasBbWithStartAddr(pThis, &AddrDisasm)) + { + pFlowBb->AddrTarget = AddrDisasm; + pFlowBb->enmEndType = DBGFFLOWBBENDTYPE_UNCOND; + break; + } + + pFlowBb->fFlags &= ~DBGF_FLOW_BB_F_EMPTY; + + rc = dbgfR3DisasInstrStateEx(pUVM, idCpu, &AddrDisasm, fFlags, + &szOutput[0], sizeof(szOutput), &DisState); + if (RT_SUCCESS(rc)) + { + cbDisasmLeft -= DisState.cbInstr; + + if (pFlowBb->cInstr == pFlowBb->cInstrMax) + { + /* Reallocate. */ + RTListNodeRemove(&pFlowBb->NdFlowBb); + PDBGFFLOWBBINT pFlowBbNew = (PDBGFFLOWBBINT)RTMemRealloc(pFlowBb, + RT_UOFFSETOF_DYN(DBGFFLOWBBINT, aInstr[pFlowBb->cInstrMax + 10])); + if (pFlowBbNew) + { + pFlowBbNew->cInstrMax += 10; + pFlowBb = pFlowBbNew; + } + else + rc = VERR_NO_MEMORY; + RTListAppend(&pThis->LstFlowBb, &pFlowBb->NdFlowBb); + } + + if (RT_SUCCESS(rc)) + { + PDBGFFLOWBBINSTR pInstr = &pFlowBb->aInstr[pFlowBb->cInstr]; + + pInstr->AddrInstr = AddrDisasm; + pInstr->cbInstr = DisState.cbInstr; + pInstr->pszInstr = RTStrCacheEnter(pThis->hStrCacheInstr, &szOutput[0]); + pFlowBb->cInstr++; + + pFlowBb->AddrEnd = AddrDisasm; + DBGFR3AddrAdd(&pFlowBb->AddrEnd, pInstr->cbInstr - 1); + DBGFR3AddrAdd(&AddrDisasm, pInstr->cbInstr); + + /* + * Check control flow instructions and create new basic blocks + * marking the current one as complete. + */ + if (DisState.pCurInstr->fOpType & DISOPTYPE_CONTROLFLOW) + { + uint16_t uOpc = DisState.pCurInstr->uOpcode; + + if ( uOpc == OP_RETN || uOpc == OP_RETF || uOpc == OP_IRET + || uOpc == OP_SYSEXIT || uOpc == OP_SYSRET) + pFlowBb->enmEndType = DBGFFLOWBBENDTYPE_EXIT; + else if (uOpc == OP_JMP) + { + Assert(DisState.pCurInstr->fOpType & DISOPTYPE_UNCOND_CONTROLFLOW); + + if (dbgfR3FlowBranchTargetIsIndirect(&DisState.Param1)) + { + pFlowBb->enmEndType = DBGFFLOWBBENDTYPE_UNCOND_INDIRECT_JMP; + + if (pFlowBb->fFlags & DBGF_FLOW_BB_F_BRANCH_TABLE) + { + Assert(pThis->fFlags & DBGF_FLOW_CREATE_F_TRY_RESOLVE_INDIRECT_BRANCHES); + + /* + * This basic block was already discovered by parsing a jump table and + * there should be a candidate for the branch table. Check whether it uses the + * same branch table. + */ + rc = dbgfR3FlowBbCheckBranchTblCandidate(pThis, pFlowBb, pUVM, idCpu, + &DisState.Param1, fFlags); + } + else + { + if (pThis->fFlags & DBGF_FLOW_CREATE_F_TRY_RESOLVE_INDIRECT_BRANCHES) + rc = dbgfR3FlowTryResolveIndirectBranch(pThis, pFlowBb, pUVM, idCpu, + &DisState.Param1, fFlags); + else + dbgfR3FlowBbSetError(pFlowBb, VERR_NOT_SUPPORTED, + "Detected indirect branch and resolving it not being enabled"); + } + } + else + { + pFlowBb->enmEndType = DBGFFLOWBBENDTYPE_UNCOND_JMP; + + /* Create one new basic block with the jump target address. */ + rc = dbgfR3FlowQueryDirectBranchTarget(pUVM, idCpu, &DisState.Param1, &pInstr->AddrInstr, pInstr->cbInstr, + RT_BOOL(DisState.pCurInstr->fOpType & DISOPTYPE_RELATIVE_CONTROLFLOW), + &pFlowBb->AddrTarget); + if (RT_SUCCESS(rc)) + rc = dbgfR3FlowBbSuccessorAdd(pThis, &pFlowBb->AddrTarget, + (pFlowBb->fFlags & DBGF_FLOW_BB_F_BRANCH_TABLE), + pFlowBb->pFlowBranchTbl); + } + } + else if (uOpc != OP_CALL) + { + Assert(DisState.pCurInstr->fOpType & DISOPTYPE_COND_CONTROLFLOW); + pFlowBb->enmEndType = DBGFFLOWBBENDTYPE_COND; + + /* + * Create two new basic blocks, one with the jump target address + * and one starting after the current instruction. + */ + rc = dbgfR3FlowBbSuccessorAdd(pThis, &AddrDisasm, + (pFlowBb->fFlags & DBGF_FLOW_BB_F_BRANCH_TABLE), + pFlowBb->pFlowBranchTbl); + if (RT_SUCCESS(rc)) + { + rc = dbgfR3FlowQueryDirectBranchTarget(pUVM, idCpu, &DisState.Param1, &pInstr->AddrInstr, pInstr->cbInstr, + RT_BOOL(DisState.pCurInstr->fOpType & DISOPTYPE_RELATIVE_CONTROLFLOW), + &pFlowBb->AddrTarget); + if (RT_SUCCESS(rc)) + rc = dbgfR3FlowBbSuccessorAdd(pThis, &pFlowBb->AddrTarget, + (pFlowBb->fFlags & DBGF_FLOW_BB_F_BRANCH_TABLE), + pFlowBb->pFlowBranchTbl); + } + } + + if (RT_FAILURE(rc)) + dbgfR3FlowBbSetError(pFlowBb, rc, "Adding successor blocks failed with %Rrc", rc); + + /* Quit disassembling. */ + if ( uOpc != OP_CALL + || RT_FAILURE(rc)) + break; + } + } + else + dbgfR3FlowBbSetError(pFlowBb, rc, "Increasing basic block failed with %Rrc", rc); + } + else + dbgfR3FlowBbSetError(pFlowBb, rc, "Disassembling the instruction failed with %Rrc", rc); + } + + return VINF_SUCCESS; +} + +/** + * Populate all empty basic blocks. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param idCpu CPU id for disassembling. + * @param pThis The control flow graph to populate. + * @param pAddrStart The start address to disassemble at. + * @param cbDisasmMax The maximum amount to disassemble. + * @param fFlags Combination of DBGF_DISAS_FLAGS_*. + */ +static int dbgfR3FlowPopulate(PUVM pUVM, VMCPUID idCpu, PDBGFFLOWINT pThis, PDBGFADDRESS pAddrStart, + uint32_t cbDisasmMax, uint32_t fFlags) +{ + int rc = VINF_SUCCESS; + PDBGFFLOWBBINT pFlowBb = dbgfR3FlowGetUnpopulatedBb(pThis); + DBGFADDRESS AddrEnd = *pAddrStart; + DBGFR3AddrAdd(&AddrEnd, cbDisasmMax); + + while (VALID_PTR(pFlowBb)) + { + rc = dbgfR3FlowBbProcess(pUVM, idCpu, pThis, pFlowBb, cbDisasmMax, fFlags); + if (RT_FAILURE(rc)) + break; + + pFlowBb = dbgfR3FlowGetUnpopulatedBb(pThis); + } + + return rc; +} + +/** + * Creates a new control flow graph from the given start address. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param idCpu CPU id for disassembling. + * @param pAddressStart Where to start creating the control flow graph. + * @param cbDisasmMax Limit the amount of bytes to disassemble, 0 for no limit. + * @param fFlagsFlow Combination of DBGF_FLOW_CREATE_F_* to control the creation of the flow graph. + * @param fFlagsDisasm Combination of DBGF_DISAS_FLAGS_* controlling the style of the disassembled + * instructions. + * @param phFlow Where to store the handle to the control flow graph on success. + */ +VMMR3DECL(int) DBGFR3FlowCreate(PUVM pUVM, VMCPUID idCpu, PDBGFADDRESS pAddressStart, uint32_t cbDisasmMax, + uint32_t fFlagsFlow, uint32_t fFlagsDisasm, PDBGFFLOW phFlow) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertReturn(idCpu < pUVM->cCpus, VERR_INVALID_CPU_ID); + AssertPtrReturn(pAddressStart, VERR_INVALID_POINTER); + AssertReturn(!(fFlagsDisasm & ~DBGF_DISAS_FLAGS_VALID_MASK), VERR_INVALID_PARAMETER); + AssertReturn((fFlagsDisasm & DBGF_DISAS_FLAGS_MODE_MASK) <= DBGF_DISAS_FLAGS_64BIT_MODE, VERR_INVALID_PARAMETER); + + /* Create the control flow graph container. */ + int rc = VINF_SUCCESS; + PDBGFFLOWINT pThis = (PDBGFFLOWINT)RTMemAllocZ(sizeof(DBGFFLOWINT)); + if (RT_LIKELY(pThis)) + { + rc = RTStrCacheCreate(&pThis->hStrCacheInstr, "DBGFFLOW"); + if (RT_SUCCESS(rc)) + { + pThis->cRefs = 1; + pThis->cRefsBb = 0; + pThis->cBbs = 0; + pThis->cBranchTbls = 0; + pThis->fFlags = fFlagsFlow; + RTListInit(&pThis->LstFlowBb); + RTListInit(&pThis->LstBranchTbl); + /* Create the entry basic block and start the work. */ + + PDBGFFLOWBBINT pFlowBb = dbgfR3FlowBbCreate(pThis, pAddressStart, DBGF_FLOW_BB_F_ENTRY, 10); + if (RT_LIKELY(pFlowBb)) + { + dbgfR3FlowLink(pThis, pFlowBb); + rc = dbgfR3FlowPopulate(pUVM, idCpu, pThis, pAddressStart, cbDisasmMax, fFlagsDisasm); + if (RT_SUCCESS(rc)) + { + *phFlow = pThis; + return VINF_SUCCESS; + } + } + else + rc = VERR_NO_MEMORY; + } + + ASMAtomicDecU32(&pThis->cRefs); + dbgfR3FlowDestroy(pThis); + } + else + rc = VERR_NO_MEMORY; + + return rc; +} + + +/** + * Retains the control flow graph handle. + * + * @returns Current reference count. + * @param hFlow The control flow graph handle to retain. + */ +VMMR3DECL(uint32_t) DBGFR3FlowRetain(DBGFFLOW hFlow) +{ + PDBGFFLOWINT pThis = hFlow; + AssertPtrReturn(pThis, UINT32_MAX); + + uint32_t cRefs = ASMAtomicIncU32(&pThis->cRefs); + AssertMsg(cRefs > 1 && cRefs < _1M, ("%#x %p\n", cRefs, pThis)); + return cRefs; +} + + +/** + * Releases the control flow graph handle. + * + * @returns Current reference count, on 0 the control flow graph will be destroyed. + * @param hFlow The control flow graph handle to release. + */ +VMMR3DECL(uint32_t) DBGFR3FlowRelease(DBGFFLOW hFlow) +{ + PDBGFFLOWINT pThis = hFlow; + if (!pThis) + return 0; + AssertPtrReturn(pThis, UINT32_MAX); + + uint32_t cRefs = ASMAtomicDecU32(&pThis->cRefs); + AssertMsg(cRefs < _1M, ("%#x %p\n", cRefs, pThis)); + if (cRefs == 0) + dbgfR3FlowDestroy(pThis); + return cRefs; +} + + +/** + * Queries the basic block denoting the entry point into the control flow graph. + * + * @returns VBox status code. + * @param hFlow The control flow graph handle. + * @param phFlowBb Where to store the basic block handle on success. + */ +VMMR3DECL(int) DBGFR3FlowQueryStartBb(DBGFFLOW hFlow, PDBGFFLOWBB phFlowBb) +{ + PDBGFFLOWINT pThis = hFlow; + AssertPtrReturn(pThis, VERR_INVALID_HANDLE); + + PDBGFFLOWBBINT pFlowBb; + RTListForEach(&pThis->LstFlowBb, pFlowBb, DBGFFLOWBBINT, NdFlowBb) + { + if (pFlowBb->fFlags & DBGF_FLOW_BB_F_ENTRY) + { + *phFlowBb = pFlowBb; + return VINF_SUCCESS; + } + } + + AssertFailed(); /* Should never get here. */ + return VERR_INTERNAL_ERROR; +} + + +/** + * Queries a basic block in the given control flow graph which covers the given + * address. + * + * @returns VBox status code. + * @retval VERR_NOT_FOUND if there is no basic block intersecting with the address. + * @param hFlow The control flow graph handle. + * @param pAddr The address to look for. + * @param phFlowBb Where to store the basic block handle on success. + */ +VMMR3DECL(int) DBGFR3FlowQueryBbByAddress(DBGFFLOW hFlow, PDBGFADDRESS pAddr, PDBGFFLOWBB phFlowBb) +{ + PDBGFFLOWINT pThis = hFlow; + AssertPtrReturn(pThis, VERR_INVALID_HANDLE); + AssertPtrReturn(pAddr, VERR_INVALID_POINTER); + AssertPtrReturn(phFlowBb, VERR_INVALID_POINTER); + + PDBGFFLOWBBINT pFlowBb; + RTListForEach(&pThis->LstFlowBb, pFlowBb, DBGFFLOWBBINT, NdFlowBb) + { + if (dbgfR3FlowAddrIntersect(pFlowBb, pAddr)) + { + DBGFR3FlowBbRetain(pFlowBb); + *phFlowBb = pFlowBb; + return VINF_SUCCESS; + } + } + + return VERR_NOT_FOUND; +} + + +/** + * Queries a branch table in the given control flow graph by the given address. + * + * @returns VBox status code. + * @retval VERR_NOT_FOUND if there is no branch table with the given address. + * @param hFlow The control flow graph handle. + * @param pAddr The address of the branch table. + * @param phFlowBranchTbl Where to store the handle to branch table on success. + * + * @note Call DBGFR3FlowBranchTblRelease() when the handle is not required anymore. + */ +VMMR3DECL(int) DBGFR3FlowQueryBranchTblByAddress(DBGFFLOW hFlow, PDBGFADDRESS pAddr, PDBGFFLOWBRANCHTBL phFlowBranchTbl) +{ + PDBGFFLOWINT pThis = hFlow; + AssertPtrReturn(pThis, VERR_INVALID_HANDLE); + AssertPtrReturn(pAddr, VERR_INVALID_POINTER); + AssertPtrReturn(phFlowBranchTbl, VERR_INVALID_POINTER); + + PDBGFFLOWBRANCHTBLINT pBranchTbl = dbgfR3FlowBranchTblFindByAddr(pThis, pAddr); + if (pBranchTbl) + { + DBGFR3FlowBranchTblRetain(pBranchTbl); + *phFlowBranchTbl = pBranchTbl; + return VINF_SUCCESS; + } + + return VERR_NOT_FOUND; +} + + +/** + * Returns the number of basic blcoks inside the control flow graph. + * + * @returns Number of basic blocks. + * @param hFlow The control flow graph handle. + */ +VMMR3DECL(uint32_t) DBGFR3FlowGetBbCount(DBGFFLOW hFlow) +{ + PDBGFFLOWINT pThis = hFlow; + AssertPtrReturn(pThis, 0); + + return pThis->cBbs; +} + + +/** + * Returns the number of branch tables inside the control flow graph. + * + * @returns Number of basic blocks. + * @param hFlow The control flow graph handle. + */ +VMMR3DECL(uint32_t) DBGFR3FlowGetBranchTblCount(DBGFFLOW hFlow) +{ + PDBGFFLOWINT pThis = hFlow; + AssertPtrReturn(pThis, 0); + + return pThis->cBranchTbls; +} + + +/** + * Retains the basic block handle. + * + * @returns Current reference count. + * @param hFlowBb The basic block handle to retain. + */ +VMMR3DECL(uint32_t) DBGFR3FlowBbRetain(DBGFFLOWBB hFlowBb) +{ + PDBGFFLOWBBINT pFlowBb = hFlowBb; + AssertPtrReturn(pFlowBb, UINT32_MAX); + + uint32_t cRefs = ASMAtomicIncU32(&pFlowBb->cRefs); + AssertMsg(cRefs > 1 && cRefs < _1M, ("%#x %p %d\n", cRefs, pFlowBb, pFlowBb->enmEndType)); + return cRefs; +} + + +/** + * Releases the basic block handle. + * + * @returns Current reference count, on 0 the basic block will be destroyed. + * @param hFlowBb The basic block handle to release. + */ +VMMR3DECL(uint32_t) DBGFR3FlowBbRelease(DBGFFLOWBB hFlowBb) +{ + PDBGFFLOWBBINT pFlowBb = hFlowBb; + if (!pFlowBb) + return 0; + + return dbgfR3FlowBbReleaseInt(pFlowBb, true /* fMayDestroyFlow */); +} + + +/** + * Returns the start address of the basic block. + * + * @returns Pointer to DBGF adress containing the start address of the basic block. + * @param hFlowBb The basic block handle. + * @param pAddrStart Where to store the start address of the basic block. + */ +VMMR3DECL(PDBGFADDRESS) DBGFR3FlowBbGetStartAddress(DBGFFLOWBB hFlowBb, PDBGFADDRESS pAddrStart) +{ + PDBGFFLOWBBINT pFlowBb = hFlowBb; + AssertPtrReturn(pFlowBb, NULL); + AssertPtrReturn(pAddrStart, NULL); + + *pAddrStart = pFlowBb->AddrStart; + return pAddrStart; +} + + +/** + * Returns the end address of the basic block (inclusive). + * + * @returns Pointer to DBGF adress containing the end address of the basic block. + * @param hFlowBb The basic block handle. + * @param pAddrEnd Where to store the end address of the basic block. + */ +VMMR3DECL(PDBGFADDRESS) DBGFR3FlowBbGetEndAddress(DBGFFLOWBB hFlowBb, PDBGFADDRESS pAddrEnd) +{ + PDBGFFLOWBBINT pFlowBb = hFlowBb; + AssertPtrReturn(pFlowBb, NULL); + AssertPtrReturn(pAddrEnd, NULL); + + *pAddrEnd = pFlowBb->AddrEnd; + return pAddrEnd; +} + + +/** + * Returns the address the last instruction in the basic block branches to. + * + * @returns Pointer to DBGF adress containing the branch address of the basic block. + * @param hFlowBb The basic block handle. + * @param pAddrTarget Where to store the branch address of the basic block. + * + * @note This is only valid for unconditional or conditional branches and will assert + * for every other basic block type. + * @note For indirect unconditional branches using a branch table this will return the start address + * of the branch table. + */ +VMMR3DECL(PDBGFADDRESS) DBGFR3FlowBbGetBranchAddress(DBGFFLOWBB hFlowBb, PDBGFADDRESS pAddrTarget) +{ + PDBGFFLOWBBINT pFlowBb = hFlowBb; + AssertPtrReturn(pFlowBb, NULL); + AssertPtrReturn(pAddrTarget, NULL); + AssertReturn( pFlowBb->enmEndType == DBGFFLOWBBENDTYPE_UNCOND_JMP + || pFlowBb->enmEndType == DBGFFLOWBBENDTYPE_COND + || pFlowBb->enmEndType == DBGFFLOWBBENDTYPE_UNCOND_INDIRECT_JMP, + NULL); + + if ( pFlowBb->enmEndType == DBGFFLOWBBENDTYPE_UNCOND_INDIRECT_JMP + && pFlowBb->pFlowBranchTbl) + *pAddrTarget = pFlowBb->pFlowBranchTbl->AddrStart; + else + *pAddrTarget = pFlowBb->AddrTarget; + return pAddrTarget; +} + + +/** + * Returns the address of the next block following this one in the instruction stream. + * (usually end address + 1). + * + * @returns Pointer to DBGF adress containing the following address of the basic block. + * @param hFlowBb The basic block handle. + * @param pAddrFollow Where to store the following address of the basic block. + * + * @note This is only valid for conditional branches and if the last instruction in the + * given basic block doesn't change the control flow but the blocks were split + * because the successor is referenced by multiple other blocks as an entry point. + */ +VMMR3DECL(PDBGFADDRESS) DBGFR3FlowBbGetFollowingAddress(DBGFFLOWBB hFlowBb, PDBGFADDRESS pAddrFollow) +{ + PDBGFFLOWBBINT pFlowBb = hFlowBb; + AssertPtrReturn(pFlowBb, NULL); + AssertPtrReturn(pAddrFollow, NULL); + AssertReturn( pFlowBb->enmEndType == DBGFFLOWBBENDTYPE_UNCOND + || pFlowBb->enmEndType == DBGFFLOWBBENDTYPE_COND, + NULL); + + *pAddrFollow = pFlowBb->AddrEnd; + DBGFR3AddrAdd(pAddrFollow, 1); + return pAddrFollow; +} + + +/** + * Returns the type of the last instruction in the basic block. + * + * @returns Last instruction type. + * @param hFlowBb The basic block handle. + */ +VMMR3DECL(DBGFFLOWBBENDTYPE) DBGFR3FlowBbGetType(DBGFFLOWBB hFlowBb) +{ + PDBGFFLOWBBINT pFlowBb = hFlowBb; + AssertPtrReturn(pFlowBb, DBGFFLOWBBENDTYPE_INVALID); + + return pFlowBb->enmEndType; +} + + +/** + * Get the number of instructions contained in the basic block. + * + * @returns Number of instructions in the basic block. + * @param hFlowBb The basic block handle. + */ +VMMR3DECL(uint32_t) DBGFR3FlowBbGetInstrCount(DBGFFLOWBB hFlowBb) +{ + PDBGFFLOWBBINT pFlowBb = hFlowBb; + AssertPtrReturn(pFlowBb, 0); + + return pFlowBb->cInstr; +} + + +/** + * Get flags for the given basic block. + * + * @returns Combination of DBGF_FLOW_BB_F_* + * @param hFlowBb The basic block handle. + */ +VMMR3DECL(uint32_t) DBGFR3FlowBbGetFlags(DBGFFLOWBB hFlowBb) +{ + PDBGFFLOWBBINT pFlowBb = hFlowBb; + AssertPtrReturn(pFlowBb, 0); + + return pFlowBb->fFlags; +} + + +/** + * Queries the branch table used if the given basic block ends with an indirect branch + * and has a branch table referenced. + * + * @returns VBox status code. + * @param hFlowBb The basic block handle. + * @param phBranchTbl Where to store the branch table handle on success. + * + * @note Release the branch table reference with DBGFR3FlowBranchTblRelease() when not required + * anymore. + */ +VMMR3DECL(int) DBGFR3FlowBbQueryBranchTbl(DBGFFLOWBB hFlowBb, PDBGFFLOWBRANCHTBL phBranchTbl) +{ + PDBGFFLOWBBINT pFlowBb = hFlowBb; + AssertPtrReturn(pFlowBb, VERR_INVALID_HANDLE); + AssertReturn(pFlowBb->enmEndType == DBGFFLOWBBENDTYPE_UNCOND_INDIRECT_JMP, VERR_INVALID_STATE); + AssertPtrReturn(pFlowBb->pFlowBranchTbl, VERR_INVALID_STATE); + AssertPtrReturn(phBranchTbl, VERR_INVALID_POINTER); + + DBGFR3FlowBranchTblRetain(pFlowBb->pFlowBranchTbl); + *phBranchTbl = pFlowBb->pFlowBranchTbl; + return VINF_SUCCESS; +} + + +/** + * Returns the error status and message if the given basic block has an error. + * + * @returns VBox status code of the error for the basic block. + * @param hFlowBb The basic block handle. + * @param ppszErr Where to store the pointer to the error message - optional. + */ +VMMR3DECL(int) DBGFR3FlowBbQueryError(DBGFFLOWBB hFlowBb, const char **ppszErr) +{ + PDBGFFLOWBBINT pFlowBb = hFlowBb; + AssertPtrReturn(pFlowBb, VERR_INVALID_HANDLE); + + if (ppszErr) + *ppszErr = pFlowBb->pszErr; + + return pFlowBb->rcError; +} + + +/** + * Store the disassembled instruction as a string in the given output buffer. + * + * @returns VBox status code. + * @param hFlowBb The basic block handle. + * @param idxInstr The instruction to query. + * @param pAddrInstr Where to store the guest instruction address on success, optional. + * @param pcbInstr Where to store the instruction size on success, optional. + * @param ppszInstr Where to store the pointer to the disassembled instruction string, optional. + */ +VMMR3DECL(int) DBGFR3FlowBbQueryInstr(DBGFFLOWBB hFlowBb, uint32_t idxInstr, PDBGFADDRESS pAddrInstr, + uint32_t *pcbInstr, const char **ppszInstr) +{ + PDBGFFLOWBBINT pFlowBb = hFlowBb; + AssertPtrReturn(pFlowBb, VERR_INVALID_POINTER); + AssertReturn(idxInstr < pFlowBb->cInstr, VERR_INVALID_PARAMETER); + + if (pAddrInstr) + *pAddrInstr = pFlowBb->aInstr[idxInstr].AddrInstr; + if (pcbInstr) + *pcbInstr = pFlowBb->aInstr[idxInstr].cbInstr; + if (ppszInstr) + *ppszInstr = pFlowBb->aInstr[idxInstr].pszInstr; + + return VINF_SUCCESS; +} + + +/** + * Queries the successors of the basic block. + * + * @returns VBox status code. + * @param hFlowBb The basic block handle. + * @param phFlowBbFollow Where to store the handle to the basic block following + * this one (optional). + * @param phFlowBbTarget Where to store the handle to the basic block being the + * branch target for this one (optional). + */ +VMMR3DECL(int) DBGFR3FlowBbQuerySuccessors(DBGFFLOWBB hFlowBb, PDBGFFLOWBB phFlowBbFollow, PDBGFFLOWBB phFlowBbTarget) +{ + PDBGFFLOWBBINT pFlowBb = hFlowBb; + AssertPtrReturn(pFlowBb, VERR_INVALID_POINTER); + + if ( phFlowBbFollow + && ( pFlowBb->enmEndType == DBGFFLOWBBENDTYPE_UNCOND + || pFlowBb->enmEndType == DBGFFLOWBBENDTYPE_COND)) + { + DBGFADDRESS AddrStart = pFlowBb->AddrEnd; + DBGFR3AddrAdd(&AddrStart, 1); + int rc = DBGFR3FlowQueryBbByAddress(pFlowBb->pFlow, &AddrStart, phFlowBbFollow); + AssertRC(rc); + } + + if ( phFlowBbTarget + && ( pFlowBb->enmEndType == DBGFFLOWBBENDTYPE_UNCOND_JMP + || pFlowBb->enmEndType == DBGFFLOWBBENDTYPE_COND)) + { + int rc = DBGFR3FlowQueryBbByAddress(pFlowBb->pFlow, &pFlowBb->AddrTarget, phFlowBbTarget); + AssertRC(rc); + } + + return VINF_SUCCESS; +} + + +/** + * Returns the number of basic blocks referencing this basic block as a target. + * + * @returns Number of other basic blocks referencing this one. + * @param hFlowBb The basic block handle. + * + * @note If the given basic block references itself (loop, etc.) this will be counted as well. + */ +VMMR3DECL(uint32_t) DBGFR3FlowBbGetRefBbCount(DBGFFLOWBB hFlowBb) +{ + PDBGFFLOWBBINT pFlowBb = hFlowBb; + AssertPtrReturn(pFlowBb, 0); + + uint32_t cRefsBb = 0; + PDBGFFLOWBBINT pFlowBbCur; + RTListForEach(&pFlowBb->pFlow->LstFlowBb, pFlowBbCur, DBGFFLOWBBINT, NdFlowBb) + { + if (pFlowBbCur->fFlags & DBGF_FLOW_BB_F_INCOMPLETE_ERR) + continue; + + if ( pFlowBbCur->enmEndType == DBGFFLOWBBENDTYPE_UNCOND + || pFlowBbCur->enmEndType == DBGFFLOWBBENDTYPE_COND) + { + DBGFADDRESS AddrStart = pFlowBb->AddrEnd; + DBGFR3AddrAdd(&AddrStart, 1); + if (dbgfR3FlowAddrEqual(&pFlowBbCur->AddrStart, &AddrStart)) + cRefsBb++; + } + + if ( ( pFlowBbCur->enmEndType == DBGFFLOWBBENDTYPE_UNCOND_JMP + || pFlowBbCur->enmEndType == DBGFFLOWBBENDTYPE_COND) + && dbgfR3FlowAddrEqual(&pFlowBbCur->AddrStart, &pFlowBb->AddrTarget)) + cRefsBb++; + } + return cRefsBb; +} + + +/** + * Returns the basic block handles referencing the given basic block. + * + * @returns VBox status code. + * @retval VERR_BUFFER_OVERFLOW if the array can't hold all the basic blocks. + * @param hFlowBb The basic block handle. + * @param paFlowBbRef Pointer to the array containing the referencing basic block handles on success. + * @param cRef Number of entries in the given array. + */ +VMMR3DECL(int) DBGFR3FlowBbGetRefBb(DBGFFLOWBB hFlowBb, PDBGFFLOWBB paFlowBbRef, uint32_t cRef) +{ + RT_NOREF3(hFlowBb, paFlowBbRef, cRef); + return VERR_NOT_IMPLEMENTED; +} + + +/** + * Retains a reference for the given control flow graph branch table. + * + * @returns new reference count. + * @param hFlowBranchTbl The branch table handle. + */ +VMMR3DECL(uint32_t) DBGFR3FlowBranchTblRetain(DBGFFLOWBRANCHTBL hFlowBranchTbl) +{ + PDBGFFLOWBRANCHTBLINT pFlowBranchTbl = hFlowBranchTbl; + AssertPtrReturn(pFlowBranchTbl, UINT32_MAX); + + uint32_t cRefs = ASMAtomicIncU32(&pFlowBranchTbl->cRefs); + AssertMsg(cRefs > 1 && cRefs < _1M, ("%#x %p\n", cRefs, pFlowBranchTbl)); + return cRefs; +} + + +/** + * Releases a given branch table handle. + * + * @returns the new reference count of the given branch table, on 0 it is destroyed. + * @param hFlowBranchTbl The branch table handle. + */ +VMMR3DECL(uint32_t) DBGFR3FlowBranchTblRelease(DBGFFLOWBRANCHTBL hFlowBranchTbl) +{ + PDBGFFLOWBRANCHTBLINT pFlowBranchTbl = hFlowBranchTbl; + if (!pFlowBranchTbl) + return 0; + AssertPtrReturn(pFlowBranchTbl, UINT32_MAX); + + uint32_t cRefs = ASMAtomicDecU32(&pFlowBranchTbl->cRefs); + AssertMsg(cRefs < _1M, ("%#x %p\n", cRefs, pFlowBranchTbl)); + if (cRefs == 0) + dbgfR3FlowBranchTblDestroy(pFlowBranchTbl); + return cRefs; +} + + +/** + * Return the number of slots the branch table has. + * + * @returns Number of slots in the branch table. + * @param hFlowBranchTbl The branch table handle. + */ +VMMR3DECL(uint32_t) DBGFR3FlowBranchTblGetSlots(DBGFFLOWBRANCHTBL hFlowBranchTbl) +{ + PDBGFFLOWBRANCHTBLINT pFlowBranchTbl = hFlowBranchTbl; + AssertPtrReturn(pFlowBranchTbl, 0); + + return pFlowBranchTbl->cSlots; +} + + +/** + * Returns the start address of the branch table in the guest. + * + * @returns Pointer to start address of the branch table (pAddrStart). + * @param hFlowBranchTbl The branch table handle. + * @param pAddrStart Where to store the branch table address. + */ +VMMR3DECL(PDBGFADDRESS) DBGFR3FlowBranchTblGetStartAddress(DBGFFLOWBRANCHTBL hFlowBranchTbl, PDBGFADDRESS pAddrStart) +{ + PDBGFFLOWBRANCHTBLINT pFlowBranchTbl = hFlowBranchTbl; + AssertPtrReturn(pFlowBranchTbl, NULL); + AssertPtrReturn(pAddrStart, NULL); + + *pAddrStart = pFlowBranchTbl->AddrStart; + return pAddrStart; +} + + +/** + * Returns one address in the branch table at the given slot index. + * + * @return Pointer to the address at the given slot in the given branch table. + * @param hFlowBranchTbl The branch table handle. + * @param idxSlot The slot the address should be returned from. + * @param pAddrSlot Where to store the address. + */ +VMMR3DECL(PDBGFADDRESS) DBGFR3FlowBranchTblGetAddrAtSlot(DBGFFLOWBRANCHTBL hFlowBranchTbl, uint32_t idxSlot, PDBGFADDRESS pAddrSlot) +{ + PDBGFFLOWBRANCHTBLINT pFlowBranchTbl = hFlowBranchTbl; + AssertPtrReturn(pFlowBranchTbl, NULL); + AssertPtrReturn(pAddrSlot, NULL); + AssertReturn(idxSlot < pFlowBranchTbl->cSlots, NULL); + + *pAddrSlot = pFlowBranchTbl->aAddresses[idxSlot]; + return pAddrSlot; +} + + +/** + * Query all addresses contained in the given branch table. + * + * @returns VBox status code. + * @retval VERR_BUFFER_OVERFLOW if there is not enough space in the array to hold all addresses. + * @param hFlowBranchTbl The branch table handle. + * @param paAddrs Where to store the addresses on success. + * @param cAddrs Number of entries the array can hold. + */ +VMMR3DECL(int) DBGFR3FlowBranchTblQueryAddresses(DBGFFLOWBRANCHTBL hFlowBranchTbl, PDBGFADDRESS paAddrs, uint32_t cAddrs) +{ + PDBGFFLOWBRANCHTBLINT pFlowBranchTbl = hFlowBranchTbl; + AssertPtrReturn(pFlowBranchTbl, VERR_INVALID_HANDLE); + AssertPtrReturn(paAddrs, VERR_INVALID_POINTER); + AssertReturn(cAddrs > 0, VERR_INVALID_PARAMETER); + + if (cAddrs < pFlowBranchTbl->cSlots) + return VERR_BUFFER_OVERFLOW; + + memcpy(paAddrs, &pFlowBranchTbl->aAddresses[0], pFlowBranchTbl->cSlots * sizeof(DBGFADDRESS)); + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{FNRTSORTCMP} + */ +static DECLCALLBACK(int) dbgfR3FlowItSortCmp(void const *pvElement1, void const *pvElement2, void *pvUser) +{ + PDBGFFLOWITORDER penmOrder = (PDBGFFLOWITORDER)pvUser; + PDBGFFLOWBBINT pFlowBb1 = *(PDBGFFLOWBBINT *)pvElement1; + PDBGFFLOWBBINT pFlowBb2 = *(PDBGFFLOWBBINT *)pvElement2; + + if (dbgfR3FlowAddrEqual(&pFlowBb1->AddrStart, &pFlowBb2->AddrStart)) + return 0; + + if (*penmOrder == DBGFFLOWITORDER_BY_ADDR_LOWEST_FIRST) + { + if (dbgfR3FlowAddrLower(&pFlowBb1->AddrStart, &pFlowBb2->AddrStart)) + return -1; + else + return 1; + } + else + { + if (dbgfR3FlowAddrLower(&pFlowBb1->AddrStart, &pFlowBb2->AddrStart)) + return 1; + else + return -1; + } +} + + +/** + * Creates a new iterator for the given control flow graph. + * + * @returns VBox status code. + * @param hFlow The control flow graph handle. + * @param enmOrder The order in which the basic blocks are enumerated. + * @param phFlowIt Where to store the handle to the iterator on success. + */ +VMMR3DECL(int) DBGFR3FlowItCreate(DBGFFLOW hFlow, DBGFFLOWITORDER enmOrder, PDBGFFLOWIT phFlowIt) +{ + int rc = VINF_SUCCESS; + PDBGFFLOWINT pFlow = hFlow; + AssertPtrReturn(pFlow, VERR_INVALID_POINTER); + AssertPtrReturn(phFlowIt, VERR_INVALID_POINTER); + AssertReturn(enmOrder > DBGFFLOWITORDER_INVALID && enmOrder < DBGFFLOWITORDER_BREADTH_FIRST, + VERR_INVALID_PARAMETER); + AssertReturn(enmOrder < DBGFFLOWITORDER_DEPTH_FRIST, VERR_NOT_IMPLEMENTED); /** @todo */ + + PDBGFFLOWITINT pIt = (PDBGFFLOWITINT)RTMemAllocZ(RT_UOFFSETOF_DYN(DBGFFLOWITINT, apBb[pFlow->cBbs])); + if (RT_LIKELY(pIt)) + { + DBGFR3FlowRetain(hFlow); + pIt->pFlow = pFlow; + pIt->idxBbNext = 0; + /* Fill the list and then sort. */ + uint32_t idxBb = 0; + PDBGFFLOWBBINT pFlowBb; + RTListForEach(&pFlow->LstFlowBb, pFlowBb, DBGFFLOWBBINT, NdFlowBb) + { + DBGFR3FlowBbRetain(pFlowBb); + pIt->apBb[idxBb++] = pFlowBb; + } + + /* Sort the blocks by address. */ + RTSortShell(&pIt->apBb[0], pFlow->cBbs, sizeof(PDBGFFLOWBBINT), dbgfR3FlowItSortCmp, &enmOrder); + + *phFlowIt = pIt; + } + else + rc = VERR_NO_MEMORY; + + return rc; +} + + +/** + * Destroys a given control flow graph iterator. + * + * @returns nothing. + * @param hFlowIt The control flow graph iterator handle. + */ +VMMR3DECL(void) DBGFR3FlowItDestroy(DBGFFLOWIT hFlowIt) +{ + PDBGFFLOWITINT pIt = hFlowIt; + AssertPtrReturnVoid(pIt); + + for (unsigned i = 0; i < pIt->pFlow->cBbs; i++) + DBGFR3FlowBbRelease(pIt->apBb[i]); + + DBGFR3FlowRelease(pIt->pFlow); + RTMemFree(pIt); +} + + +/** + * Returns the next basic block in the iterator or NULL if there is no + * basic block left. + * + * @returns Handle to the next basic block in the iterator or NULL if the end + * was reached. + * @param hFlowIt The iterator handle. + * + * @note If a valid handle is returned it must be release with DBGFR3FlowBbRelease() + * when not required anymore. + */ +VMMR3DECL(DBGFFLOWBB) DBGFR3FlowItNext(DBGFFLOWIT hFlowIt) +{ + PDBGFFLOWITINT pIt = hFlowIt; + AssertPtrReturn(pIt, NULL); + + PDBGFFLOWBBINT pFlowBb = NULL; + if (pIt->idxBbNext < pIt->pFlow->cBbs) + { + pFlowBb = pIt->apBb[pIt->idxBbNext++]; + DBGFR3FlowBbRetain(pFlowBb); + } + + return pFlowBb; +} + + +/** + * Resets the given iterator to the beginning. + * + * @returns VBox status code. + * @param hFlowIt The iterator handle. + */ +VMMR3DECL(int) DBGFR3FlowItReset(DBGFFLOWIT hFlowIt) +{ + PDBGFFLOWITINT pIt = hFlowIt; + AssertPtrReturn(pIt, VERR_INVALID_HANDLE); + + pIt->idxBbNext = 0; + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{FNRTSORTCMP} + */ +static DECLCALLBACK(int) dbgfR3FlowBranchTblItSortCmp(void const *pvElement1, void const *pvElement2, void *pvUser) +{ + PDBGFFLOWITORDER penmOrder = (PDBGFFLOWITORDER)pvUser; + PDBGFFLOWBRANCHTBLINT pTbl1 = *(PDBGFFLOWBRANCHTBLINT *)pvElement1; + PDBGFFLOWBRANCHTBLINT pTbl2 = *(PDBGFFLOWBRANCHTBLINT *)pvElement2; + + if (dbgfR3FlowAddrEqual(&pTbl1->AddrStart, &pTbl2->AddrStart)) + return 0; + + if (*penmOrder == DBGFFLOWITORDER_BY_ADDR_LOWEST_FIRST) + { + if (dbgfR3FlowAddrLower(&pTbl1->AddrStart, &pTbl2->AddrStart)) + return -1; + else + return 1; + } + else + { + if (dbgfR3FlowAddrLower(&pTbl1->AddrStart, &pTbl2->AddrStart)) + return 1; + else + return -1; + } +} + + +/** + * Creates a new branch table iterator for the given control flow graph. + * + * @returns VBox status code. + * @param hFlow The control flow graph handle. + * @param enmOrder The order in which the basic blocks are enumerated. + * @param phFlowBranchTblIt Where to store the handle to the iterator on success. + */ +VMMR3DECL(int) DBGFR3FlowBranchTblItCreate(DBGFFLOW hFlow, DBGFFLOWITORDER enmOrder, + PDBGFFLOWBRANCHTBLIT phFlowBranchTblIt) +{ + int rc = VINF_SUCCESS; + PDBGFFLOWINT pFlow = hFlow; + AssertPtrReturn(pFlow, VERR_INVALID_POINTER); + AssertPtrReturn(phFlowBranchTblIt, VERR_INVALID_POINTER); + AssertReturn(enmOrder > DBGFFLOWITORDER_INVALID && enmOrder < DBGFFLOWITORDER_BREADTH_FIRST, + VERR_INVALID_PARAMETER); + AssertReturn(enmOrder < DBGFFLOWITORDER_DEPTH_FRIST, VERR_NOT_SUPPORTED); + + PDBGFFLOWBRANCHTBLITINT pIt = (PDBGFFLOWBRANCHTBLITINT)RTMemAllocZ(RT_UOFFSETOF_DYN(DBGFFLOWBRANCHTBLITINT, + apBranchTbl[pFlow->cBranchTbls])); + if (RT_LIKELY(pIt)) + { + DBGFR3FlowRetain(hFlow); + pIt->pFlow = pFlow; + pIt->idxTblNext = 0; + /* Fill the list and then sort. */ + uint32_t idxTbl = 0; + PDBGFFLOWBRANCHTBLINT pFlowBranchTbl; + RTListForEach(&pFlow->LstBranchTbl, pFlowBranchTbl, DBGFFLOWBRANCHTBLINT, NdBranchTbl) + { + DBGFR3FlowBranchTblRetain(pFlowBranchTbl); + pIt->apBranchTbl[idxTbl++] = pFlowBranchTbl; + } + + /* Sort the blocks by address. */ + RTSortShell(&pIt->apBranchTbl[0], pFlow->cBranchTbls, sizeof(PDBGFFLOWBRANCHTBLINT), dbgfR3FlowBranchTblItSortCmp, &enmOrder); + + *phFlowBranchTblIt = pIt; + } + else + rc = VERR_NO_MEMORY; + + return rc; +} + + +/** + * Destroys a given control flow graph branch table iterator. + * + * @returns nothing. + * @param hFlowBranchTblIt The control flow graph branch table iterator handle. + */ +VMMR3DECL(void) DBGFR3FlowBranchTblItDestroy(DBGFFLOWBRANCHTBLIT hFlowBranchTblIt) +{ + PDBGFFLOWBRANCHTBLITINT pIt = hFlowBranchTblIt; + AssertPtrReturnVoid(pIt); + + for (unsigned i = 0; i < pIt->pFlow->cBranchTbls; i++) + DBGFR3FlowBranchTblRelease(pIt->apBranchTbl[i]); + + DBGFR3FlowRelease(pIt->pFlow); + RTMemFree(pIt); +} + + +/** + * Returns the next branch table in the iterator or NULL if there is no + * branch table left. + * + * @returns Handle to the next basic block in the iterator or NULL if the end + * was reached. + * @param hFlowBranchTblIt The iterator handle. + * + * @note If a valid handle is returned it must be release with DBGFR3FlowBranchTblRelease() + * when not required anymore. + */ +VMMR3DECL(DBGFFLOWBRANCHTBL) DBGFR3FlowBranchTblItNext(DBGFFLOWBRANCHTBLIT hFlowBranchTblIt) +{ + PDBGFFLOWBRANCHTBLITINT pIt = hFlowBranchTblIt; + AssertPtrReturn(pIt, NULL); + + PDBGFFLOWBRANCHTBLINT pTbl = NULL; + if (pIt->idxTblNext < pIt->pFlow->cBranchTbls) + { + pTbl = pIt->apBranchTbl[pIt->idxTblNext++]; + DBGFR3FlowBranchTblRetain(pTbl); + } + + return pTbl; +} + + +/** + * Resets the given iterator to the beginning. + * + * @returns VBox status code. + * @param hFlowBranchTblIt The iterator handle. + */ +VMMR3DECL(int) DBGFR3FlowBranchTblItReset(DBGFFLOWBRANCHTBLIT hFlowBranchTblIt) +{ + PDBGFFLOWBRANCHTBLITINT pIt = hFlowBranchTblIt; + AssertPtrReturn(pIt, VERR_INVALID_HANDLE); + + pIt->idxTblNext = 0; + return VINF_SUCCESS; +} diff --git a/src/VBox/VMM/VMMR3/DBGFR3ModInMem.cpp b/src/VBox/VMM/VMMR3/DBGFR3ModInMem.cpp new file mode 100644 index 00000000..dba0c2dd --- /dev/null +++ b/src/VBox/VMM/VMMR3/DBGFR3ModInMem.cpp @@ -0,0 +1,707 @@ +/* $Id: DBGFR3ModInMem.cpp $ */ +/** @file + * DBGFR3ModInMemPe - In memory PE module 'loader'. + */ + +/* + * Copyright (C) 2009-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DBGF +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * The WinNT digger's loader reader instance data. + */ +typedef struct DBGFMODPERDR +{ + /** The VM handle (referenced). */ + PUVM pUVM; + /** The image base. */ + DBGFADDRESS ImageAddr; + /** The image size. */ + uint32_t cbImage; + /** The file offset of the SizeOfImage field in the optional header if it + * needs patching, otherwise set to UINT32_MAX. */ + uint32_t offSizeOfImage; + /** The correct image size. */ + uint32_t cbCorrectImageSize; + /** Number of entries in the aMappings table. */ + uint32_t cMappings; + /** Mapping hint. */ + uint32_t iHint; + /** Mapping file offset to memory offsets, ordered by file offset. */ + struct + { + /** The file offset. */ + uint32_t offFile; + /** The size of this mapping. */ + uint32_t cbMem; + /** The offset to the memory from the start of the image. */ + uint32_t offMem; + } aMappings[1]; +} DBGFMODPERDR; +/** Pointer a WinNT loader reader instance data. */ +typedef DBGFMODPERDR *PDBGFMODPERDR; + +/** + * Stack buffer. + */ +typedef union DBGFMODINMEMBUF +{ + uint8_t ab[0x2000]; + IMAGE_DOS_HEADER DosHdr; + IMAGE_NT_HEADERS32 Nt32; + IMAGE_NT_HEADERS64 Nt64; +} DBGFMODINMEMBUF; +/** Pointer to stack buffer. */ +typedef DBGFMODINMEMBUF *PDBGFMODINMEMBUF; + + + +/** + * Normalizes a debug module name. + * + * @returns Normalized debug module name. + * @param pszName The name. + * @param pszBuf Buffer to use if work is needed. + * @param cbBuf Size of buffer. + */ +const char *dbgfR3ModNormalizeName(const char *pszName, char *pszBuf, size_t cbBuf) +{ + /* + * Skip to the filename in case someone gave us a full filename path. + */ + pszName = RTPathFilenameEx(pszName, RTPATH_STR_F_STYLE_DOS); + + /* + * Is it okay? + */ + size_t cchName = strlen(pszName); + size_t off = 0; + for (;; off++) + { + char ch = pszName[off]; + if (ch == '\0') + return pszName; + if (!RT_C_IS_ALNUM(ch) && ch != '_') + break; + } + + /* + * It's no okay, so morph it. + */ + if (cchName >= cbBuf) + cchName = cbBuf - 1; + for (off = 0; off < cchName; off++) + { + char ch = pszName[off]; + if (!RT_C_IS_ALNUM(ch)) + ch = '_'; + pszBuf[off] = ch; + } + pszBuf[off] = '\0'; + + return pszBuf; +} + + +/** + * Handles in-memory ELF images. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pImageAddr The image address. + * @param fFlags Flags, DBGFMODINMEM_F_XXX. + * @param pszName The module name, optional. + * @param pszFilename The image filename, optional. + * @param enmArch The image arch if we force it, pass + * RTLDRARCH_WHATEVER if you don't care. + * @param cbImage Image size. Pass 0 if not known. + * @param puBuf The header buffer. + * @param phDbgMod Where to return the resulting debug module on success. + * @param pErrInfo Where to return extended error info on failure. + */ +static int dbgfR3ModInMemElf(PUVM pUVM, PCDBGFADDRESS pImageAddr, uint32_t fFlags, const char *pszName, const char *pszFilename, + RTLDRARCH enmArch, uint32_t cbImage, PDBGFMODINMEMBUF puBuf, + PRTDBGMOD phDbgMod, PRTERRINFO pErrInfo) +{ + RT_NOREF(pUVM, fFlags, pszName, pszFilename, enmArch, cbImage, puBuf, phDbgMod); + return RTERRINFO_LOG_SET_F(pErrInfo, VERR_INVALID_EXE_SIGNATURE, "Found ELF magic at %RGv", pImageAddr->FlatPtr); +} + + +/** + * @callback_method_impl{PFNRTLDRRDRMEMREAD} + */ +static DECLCALLBACK(int) dbgfModInMemPeRdr_Read(void *pvBuf, size_t cb, size_t off, void *pvUser) +{ + PDBGFMODPERDR pThis = (PDBGFMODPERDR)pvUser; + uint32_t offFile = (uint32_t)off; + AssertReturn(offFile == off, VERR_INVALID_PARAMETER); + + uint32_t i = pThis->iHint; + if (pThis->aMappings[i].offFile > offFile) + { + i = pThis->cMappings; + while (i-- > 0) + if (offFile >= pThis->aMappings[i].offFile) + break; + pThis->iHint = i; + } + + while (cb > 0) + { + uint32_t offNextMap = i + 1 < pThis->cMappings ? pThis->aMappings[i + 1].offFile : pThis->cbImage; + uint32_t offMap = offFile - pThis->aMappings[i].offFile; + + /* Read file bits backed by memory. */ + if (offMap < pThis->aMappings[i].cbMem) + { + uint32_t cbToRead = pThis->aMappings[i].cbMem - offMap; + if (cbToRead > cb) + cbToRead = (uint32_t)cb; + + DBGFADDRESS Addr = pThis->ImageAddr; + DBGFR3AddrAdd(&Addr, pThis->aMappings[i].offMem + offMap); + + int rc = DBGFR3MemRead(pThis->pUVM, 0 /*idCpu*/, &Addr, pvBuf, cbToRead); + if (RT_FAILURE(rc)) + return rc; + + /* Apply SizeOfImage patch? */ + if ( pThis->offSizeOfImage != UINT32_MAX + && offFile < pThis->offSizeOfImage + 4 + && offFile + cbToRead > pThis->offSizeOfImage) + { + uint32_t SizeOfImage = pThis->cbCorrectImageSize; + uint32_t cbPatch = sizeof(SizeOfImage); + int32_t offPatch = pThis->offSizeOfImage - offFile; + uint8_t *pbPatch = (uint8_t *)pvBuf + offPatch; + if (offFile + cbToRead < pThis->offSizeOfImage + cbPatch) + cbPatch = offFile + cbToRead - pThis->offSizeOfImage; + while (cbPatch-- > 0) + { + if (offPatch >= 0) + *pbPatch = (uint8_t)SizeOfImage; + offPatch++; + pbPatch++; + SizeOfImage >>= 8; + } + } + + /* Done? */ + if (cbToRead == cb) + break; + + offFile += cbToRead; + cb -= cbToRead; + pvBuf = (char *)pvBuf + cbToRead; + } + + /* Mind the gap. */ + if (offNextMap > offFile) + { + uint32_t cbZero = offNextMap - offFile; + if (cbZero > cb) + { + RT_BZERO(pvBuf, cb); + break; + } + + RT_BZERO(pvBuf, cbZero); + offFile += cbZero; + cb -= cbZero; + pvBuf = (char *)pvBuf + cbZero; + } + + pThis->iHint = ++i; + } + + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{PFNRTLDRRDRMEMDTOR} + */ +static DECLCALLBACK(void) dbgfModInMemPeRdr_Dtor(void *pvUser, size_t cbImage) +{ + PDBGFMODPERDR pThis = (PDBGFMODPERDR)pvUser; + RT_NOREF(cbImage); + + VMR3ReleaseUVM(pThis->pUVM); + pThis->pUVM = NULL; + RTMemFree(pvUser); +} + + +/** + * Checks if the section headers look okay. + * + * @returns VBox status code. + * @param paShdrs Pointer to the section headers. + * @param cShdrs Number of headers. + * @param cbImage The image size reported by NT. + * @param cbImageFromHdr The image size by the linker in the header. + * @param uRvaRsrc The RVA of the resource directory. UINT32_MAX if + * no resource directory. + * @param cbSectAlign The section alignment specified in the header. + * @param fNt31 Set if NT 3.1. Needed for chopped off HAL. + * @param pcbImageCorrect The corrected image size. This is derived from + * cbImage and virtual range of the section tables. + * + * The problem is that NT may choose to drop the + * last pages in images it loads early, starting at + * the resource directory. These images will have + * a page aligned cbImage. + * + * @param pErrInfo Where to return more error details. + */ +static int dbgfR3ModPeCheckSectHdrsAndImgSize(PCIMAGE_SECTION_HEADER paShdrs, uint32_t cShdrs, uint32_t cbImage, + uint32_t cbImageFromHdr, uint32_t uRvaRsrc, uint32_t cbSectAlign, + bool fNt31, uint32_t *pcbImageCorrect, PRTERRINFO pErrInfo) +{ + *pcbImageCorrect = cbImage; + + for (uint32_t i = 0; i < cShdrs; i++) + { + if (!paShdrs[i].Name[0]) + return RTERRINFO_LOG_SET_F(pErrInfo, VERR_BAD_EXE_FORMAT, "Section header #%u has no name", i); + + if (paShdrs[i].Characteristics & IMAGE_SCN_TYPE_NOLOAD) + continue; + + /* Tweak to determine the virtual size if the linker didn't set it (NT 3.1). */ + /** @todo this isn't really perfect. cbImage is kind of wrong... */ + uint32_t cbVirtual = paShdrs[i].Misc.VirtualSize; + if (cbVirtual == 0) + { + for (uint32_t j = i + 1; j < cShdrs; j++) + if ( !(paShdrs[j].Characteristics & IMAGE_SCN_TYPE_NOLOAD) + && paShdrs[j].VirtualAddress > paShdrs[i].VirtualAddress) + { + cbVirtual = paShdrs[j].VirtualAddress - paShdrs[i].VirtualAddress; + break; + } + if (!cbVirtual) + { + if (paShdrs[i].VirtualAddress < cbImageFromHdr) + cbVirtual = cbImageFromHdr - paShdrs[i].VirtualAddress; + else if (paShdrs[i].SizeOfRawData > 0) + cbVirtual = RT_ALIGN(paShdrs[i].SizeOfRawData, _4K); + } + } + + /* Check that sizes are within the same range and that both sizes and + addresses are within reasonable limits. */ + if ( RT_ALIGN(cbVirtual, _64K) < RT_ALIGN(paShdrs[i].SizeOfRawData, _64K) + || cbVirtual >= _1G + || paShdrs[i].SizeOfRawData >= _1G) + return RTERRINFO_LOG_SET_F(pErrInfo, VERR_BAD_EXE_FORMAT, + "Section header #%u (%.8s) has a VirtualSize=%#x (%#x) and SizeOfRawData=%#x, that's too much data!", + i, paShdrs[i].Name, cbVirtual, paShdrs[i].Misc.VirtualSize, paShdrs[i].SizeOfRawData); + uint32_t uRvaEnd = paShdrs[i].VirtualAddress + cbVirtual; + if (uRvaEnd >= _1G || uRvaEnd < paShdrs[i].VirtualAddress) + return RTERRINFO_LOG_SET_F(pErrInfo, VERR_BAD_EXE_FORMAT, + "Section header #%u (%.8s) has a VirtualSize=%#x (%#x) and VirtualAddr=%#x, %#x in total, that's too much!", + i, paShdrs[i].Name, cbVirtual, paShdrs[i].Misc.VirtualSize, paShdrs[i].VirtualAddress, uRvaEnd); + + /* Check for images chopped off around '.rsrc'. */ + if ( cbImage < uRvaEnd + && uRvaEnd >= uRvaRsrc) + cbImage = RT_ALIGN(uRvaEnd, cbSectAlign); + + /* Check that the section is within the image. */ + if (uRvaEnd > cbImage && fNt31) + return RTERRINFO_LOG_SET_F(pErrInfo, VERR_BAD_EXE_FORMAT, + "Section header #%u has a virtual address range beyond the image: %#x TO %#x cbImage=%#x", + i, paShdrs[i].VirtualAddress, uRvaEnd, cbImage); + } + + Assert(*pcbImageCorrect == cbImage || !(*pcbImageCorrect & 0xfff)); + *pcbImageCorrect = cbImage; + return VINF_SUCCESS; +} + + +/** + * Create a loader module for the in-guest-memory PE module. + */ +static int dbgfR3ModInMemPeCreateLdrMod(PUVM pUVM, uint32_t fFlags, const char *pszName, PCDBGFADDRESS pImageAddr, + uint32_t cbImage, uint32_t cbImageFromHdr, bool f32Bit, + uint32_t cShdrs, PCIMAGE_SECTION_HEADER paShdrs, uint32_t cbSectAlign, + uint32_t cDataDir, PCIMAGE_DATA_DIRECTORY paDataDir, uint32_t offHdrs, + PRTLDRMOD phLdrMod, PRTERRINFO pErrInfo) +{ + /* + * Allocate and create a reader instance. + */ + PDBGFMODPERDR pRdr = (PDBGFMODPERDR)RTMemAlloc(RT_UOFFSETOF_DYN(DBGFMODPERDR, aMappings[cShdrs + 2])); + if (!pRdr) + return VERR_NO_MEMORY; + + VMR3RetainUVM(pUVM); + pRdr->pUVM = pUVM; + pRdr->ImageAddr = *pImageAddr; + pRdr->cbImage = cbImage; + pRdr->cbCorrectImageSize = cbImage; + pRdr->offSizeOfImage = UINT32_MAX; + pRdr->iHint = 0; + + /* + * Use the section table to construct a more accurate view of the file/image. + */ + uint32_t uRvaRsrc = UINT32_MAX; + if ( cDataDir > IMAGE_DIRECTORY_ENTRY_RESOURCE + && paDataDir[IMAGE_DIRECTORY_ENTRY_RESOURCE].Size > 0) + uRvaRsrc = paDataDir[IMAGE_DIRECTORY_ENTRY_RESOURCE].VirtualAddress; + + int rc = dbgfR3ModPeCheckSectHdrsAndImgSize(paShdrs, cShdrs, cbImage, cbImageFromHdr, uRvaRsrc, cbSectAlign, + RT_BOOL(fFlags & DBGFMODINMEM_F_PE_NT31), &pRdr->cbCorrectImageSize, pErrInfo); + if (RT_SUCCESS(rc)) + { + pRdr->cMappings = 0; + + for (uint32_t i = 0; i < cShdrs; i++) + if ( paShdrs[i].SizeOfRawData > 0 + && paShdrs[i].PointerToRawData > 0) + { + uint32_t j = 1; + if (!pRdr->cMappings) + pRdr->cMappings++; + else + { + while (j < pRdr->cMappings && pRdr->aMappings[j].offFile < paShdrs[i].PointerToRawData) + j++; + if (j < pRdr->cMappings) + memmove(&pRdr->aMappings[j + 1], &pRdr->aMappings[j], (pRdr->cMappings - j) * sizeof(pRdr->aMappings)); + } + pRdr->aMappings[j].offFile = paShdrs[i].PointerToRawData; + pRdr->aMappings[j].offMem = paShdrs[i].VirtualAddress; + pRdr->aMappings[j].cbMem = i + 1 < cShdrs + ? paShdrs[i + 1].VirtualAddress - paShdrs[i].VirtualAddress + : paShdrs[i].Misc.VirtualSize; + if (j == pRdr->cMappings) + pRdr->cbImage = paShdrs[i].PointerToRawData + paShdrs[i].SizeOfRawData; + pRdr->cMappings++; + } + + /* Insert the mapping of the headers that isn't covered by the section table. */ + pRdr->aMappings[0].offFile = 0; + pRdr->aMappings[0].offMem = 0; + pRdr->aMappings[0].cbMem = pRdr->cMappings ? pRdr->aMappings[1].offFile : pRdr->cbImage; + + int j = pRdr->cMappings - 1; + while (j-- > 0) + { + uint32_t cbFile = pRdr->aMappings[j + 1].offFile - pRdr->aMappings[j].offFile; + if (pRdr->aMappings[j].cbMem > cbFile) + pRdr->aMappings[j].cbMem = cbFile; + } + } + else if (fFlags & DBGFMODINMEM_F_NO_READER_FALLBACK) + return rc; + else + { + /* + * Fallback, fake identity mapped file data. + */ + pRdr->cMappings = 1; + pRdr->aMappings[0].offFile = 0; + pRdr->aMappings[0].offMem = 0; + pRdr->aMappings[0].cbMem = pRdr->cbImage; + } + + /* Enable the SizeOfImage patching if necessary. */ + if (pRdr->cbCorrectImageSize != cbImage) + { + Log(("dbgfR3ModInMemPeCreateLdrMod: The image is really %#x bytes long, not %#x as mapped by NT!\n", + pRdr->cbCorrectImageSize, cbImage)); + pRdr->offSizeOfImage = f32Bit + ? offHdrs + RT_OFFSETOF(IMAGE_NT_HEADERS32, OptionalHeader.SizeOfImage) + : offHdrs + RT_OFFSETOF(IMAGE_NT_HEADERS64, OptionalHeader.SizeOfImage); + } + + /* + * Call the loader to open the PE image for debugging. + * Note! It always calls pfnDtor. + */ + RTLDRMOD hLdrMod; + rc = RTLdrOpenInMemory(pszName, RTLDR_O_FOR_DEBUG, RTLDRARCH_WHATEVER, pRdr->cbImage, + dbgfModInMemPeRdr_Read, dbgfModInMemPeRdr_Dtor, pRdr, + &hLdrMod, pErrInfo); + if (RT_SUCCESS(rc)) + *phLdrMod = hLdrMod; + else + *phLdrMod = NIL_RTLDRMOD; + return rc; +} + + +/** + * Handles in-memory PE images. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pImageAddr The image address. + * @param fFlags Flags, DBGFMODINMEM_F_XXX. + * @param pszName The module name, optional. + * @param pszFilename The image filename, optional. + * @param enmArch The image arch if we force it, pass + * RTLDRARCH_WHATEVER if you don't care. + * @param cbImage Image size. Pass 0 if not known. + * @param offPeHdrs Offset of the PE header. + * @param cbPeHdrsPart1 How read into uBuf at @a offPeHdrs. + * @param puBuf The header buffer. + * @param phDbgMod Where to return the resulting debug module on success. + * @param pErrInfo Where to return extended error info on failure. + */ +static int dbgfR3ModInMemPe(PUVM pUVM, PCDBGFADDRESS pImageAddr, uint32_t fFlags, const char *pszName, const char *pszFilename, + RTLDRARCH enmArch, uint32_t cbImage, uint32_t offPeHdrs, uint32_t cbPeHdrsPart1, + PDBGFMODINMEMBUF puBuf, PRTDBGMOD phDbgMod, PRTERRINFO pErrInfo) +{ + /* + * Read the optional header and the section table after validating the + * info we need from the file header. + */ + /* Check the opt hdr size and number of sections as these are used to determine how much to read next. */ + if ( puBuf->Nt32.FileHeader.SizeOfOptionalHeader < sizeof(IMAGE_OPTIONAL_HEADER32) + || puBuf->Nt32.FileHeader.SizeOfOptionalHeader > sizeof(IMAGE_OPTIONAL_HEADER64) + 128) + return RTERRINFO_LOG_SET_F(pErrInfo, VERR_BAD_EXE_FORMAT, "Invalid SizeOfOptionalHeader value: %#RX32", + puBuf->Nt32.FileHeader.SizeOfOptionalHeader); + + if ( puBuf->Nt32.FileHeader.NumberOfSections < 1 + || puBuf->Nt32.FileHeader.NumberOfSections > 190 /* what fits in our 8K buffer */) + return RTERRINFO_LOG_SET_F(pErrInfo, VERR_BAD_EXE_FORMAT, "NumberOfSections is out of range: %#RX32 (1..190)", + puBuf->Nt32.FileHeader.NumberOfSections); + + /* Read the optional header and section table. */ + uint32_t const cbHdrs = RT_UOFFSETOF(IMAGE_NT_HEADERS32, OptionalHeader) + + puBuf->Nt32.FileHeader.SizeOfOptionalHeader + + puBuf->Nt32.FileHeader.NumberOfSections * sizeof(IMAGE_SECTION_HEADER); + AssertReturn(cbHdrs <= sizeof(*puBuf), RTERRINFO_LOG_SET_F(pErrInfo, VERR_INTERNAL_ERROR_2, "cbHdrs=%#x", cbHdrs)); + + DBGFADDRESS PeHdrPart2Addr = *pImageAddr; + DBGFR3AddrAdd(&PeHdrPart2Addr, offPeHdrs + cbPeHdrsPart1); + int rc = DBGFR3MemRead(pUVM, 0 /*idCpu*/, &PeHdrPart2Addr, &puBuf->ab[cbPeHdrsPart1], cbHdrs - cbPeHdrsPart1); + if (RT_FAILURE(rc)) + return RTERRINFO_LOG_SET_F(pErrInfo, rc, + "Failed to read the second part of the PE headers at %RGv (off=%#RX32 + %#RX32): %Rrc", + PeHdrPart2Addr.FlatPtr, offPeHdrs, cbPeHdrsPart1, rc); + + /* + * Check the image architecture and determine the bitness. + */ + RTLDRARCH enmArchActual; + bool f32Bit; + switch (puBuf->Nt32.FileHeader.Machine) + { + case IMAGE_FILE_MACHINE_I386: + enmArchActual = RTLDRARCH_X86_32; + f32Bit = true; + break; + case IMAGE_FILE_MACHINE_AMD64: + enmArchActual = RTLDRARCH_AMD64; + f32Bit = false; + break; + case IMAGE_FILE_MACHINE_ARM: + case IMAGE_FILE_MACHINE_THUMB: + case IMAGE_FILE_MACHINE_ARMNT: + enmArchActual = RTLDRARCH_ARM32; + f32Bit = true; + break; + case IMAGE_FILE_MACHINE_ARM64: + enmArchActual = RTLDRARCH_ARM64; + f32Bit = false; + break; + default: + return RTERRINFO_LOG_SET_F(pErrInfo, VERR_LDR_ARCH_MISMATCH, "Unknown machine: %#x", puBuf->Nt32.FileHeader.Machine); + } + if ( enmArch != RTLDRARCH_WHATEVER + && enmArch != enmArchActual) + return RTERRINFO_LOG_SET_F(pErrInfo, VERR_LDR_ARCH_MISMATCH, "Found %s expected %s", + RTLdrArchName(enmArchActual), RTLdrArchName(enmArch)); + + /* + * Check optional header magic and size. + */ + uint16_t const uOptMagic = f32Bit ? IMAGE_NT_OPTIONAL_HDR32_MAGIC : IMAGE_NT_OPTIONAL_HDR64_MAGIC; + if (puBuf->Nt32.OptionalHeader.Magic != uOptMagic) + return RTERRINFO_LOG_SET_F(pErrInfo, VERR_BAD_EXE_FORMAT, "Unexpected optional header magic: %#x (expected %#x)", + puBuf->Nt32.OptionalHeader.Magic, uOptMagic); + + uint32_t const cDataDir = f32Bit ? puBuf->Nt32.OptionalHeader.NumberOfRvaAndSizes : puBuf->Nt64.OptionalHeader.NumberOfRvaAndSizes; + if ( cDataDir <= IMAGE_DIRECTORY_ENTRY_BASERELOC /* a bit random */ + || cDataDir > 32 /* also random */) + return RTERRINFO_LOG_SET_F(pErrInfo, VERR_BAD_EXE_FORMAT, "Unexpected data directory size: %#x", cDataDir); + + uint32_t cbOptHdr = f32Bit ? sizeof(IMAGE_OPTIONAL_HEADER32) : sizeof(IMAGE_OPTIONAL_HEADER64); + cbOptHdr -= sizeof(IMAGE_DATA_DIRECTORY) * IMAGE_NUMBEROF_DIRECTORY_ENTRIES; + cbOptHdr += sizeof(IMAGE_DATA_DIRECTORY) * cDataDir; + if (puBuf->Nt32.FileHeader.SizeOfOptionalHeader != cbOptHdr) + return RTERRINFO_LOG_SET_F(pErrInfo, VERR_BAD_EXE_FORMAT, "Unexpected optional header size: %#x (expected %#x)", + puBuf->Nt32.FileHeader.SizeOfOptionalHeader, cbOptHdr); + + uint32_t const cbSectAlign = f32Bit ? puBuf->Nt32.OptionalHeader.SectionAlignment : puBuf->Nt64.OptionalHeader.SectionAlignment; + PCIMAGE_SECTION_HEADER pSHdrs = (PCIMAGE_SECTION_HEADER)((uintptr_t)&puBuf->Nt32.OptionalHeader + cbOptHdr); + PCIMAGE_DATA_DIRECTORY paDataDir = (PCIMAGE_DATA_DIRECTORY)((uintptr_t)pSHdrs - cDataDir * sizeof(IMAGE_DATA_DIRECTORY)); + + /* + * Establish the image size. + */ + uint32_t cbImageFromHdr = f32Bit ? puBuf->Nt32.OptionalHeader.SizeOfImage : puBuf->Nt64.OptionalHeader.SizeOfImage; + if ( !cbImage + || (fFlags & DBGFMODINMEM_F_PE_NT31)) + cbImage = RT_ALIGN(cbImageFromHdr, _4K); + else if (RT_ALIGN(cbImageFromHdr, _4K) != RT_ALIGN(cbImage, _4K)) + return RTERRINFO_LOG_SET_F(pErrInfo, VERR_MISMATCH, "Image size mismatch: input=%#x header=%#x", cbImage, cbImageFromHdr); + + /* + * Guess the module name if not specified and make sure it conforms to DBGC expectations. + */ + if (!pszName) + { + if (pszFilename) + pszName = RTPathFilenameEx(pszFilename, RTPATH_STR_F_STYLE_DOS); + /** @todo */ + } + + char szNormalized[128]; + pszName = dbgfR3ModNormalizeName(pszName, szNormalized, sizeof(szNormalized)); + + /* + * Create the module using the in memory image first, falling back on cached image. + */ + RTLDRMOD hLdrMod; + rc = dbgfR3ModInMemPeCreateLdrMod(pUVM, fFlags, pszName, pImageAddr, cbImage, cbImageFromHdr, f32Bit, + puBuf->Nt32.FileHeader.NumberOfSections, pSHdrs, cbSectAlign, cDataDir, paDataDir, + offPeHdrs, &hLdrMod, pErrInfo); + if (RT_FAILURE(rc)) + hLdrMod = NIL_RTLDRMOD; + + RTDBGMOD hMod; + rc = RTDbgModCreateFromPeImage(&hMod, pszFilename, pszName, &hLdrMod, cbImageFromHdr, + puBuf->Nt32.FileHeader.TimeDateStamp, DBGFR3AsGetConfig(pUVM)); + if (RT_SUCCESS(rc)) + *phDbgMod = hMod; + else if (!(fFlags & DBGFMODINMEM_F_NO_CONTAINER_FALLBACK)) + { + /* + * Fallback is a container module. + */ + rc = RTDbgModCreate(&hMod, pszName, cbImage, 0); + if (RT_SUCCESS(rc)) + { + rc = RTDbgModSymbolAdd(hMod, "Headers", 0 /*iSeg*/, 0, cbImage, 0 /*fFlags*/, NULL); + AssertRC(rc); + } + } + return rc; +} + + + +/** + * Process a PE image found in guest memory. + * + * @param pUVM The user mode VM handle. + * @param pImageAddr The image address. + * @param fFlags Flags, DBGFMODINMEM_F_XXX. + * @param pszName The module name, optional. + * @param pszFilename The image filename, optional. + * @param enmArch The image arch if we force it, pass + * RTLDRARCH_WHATEVER if you don't care. + * @param cbImage Image size. Pass 0 if not known. + * @param phDbgMod Where to return the resulting debug module on success. + * @param pErrInfo Where to return extended error info on failure. + */ +VMMR3DECL(int) DBGFR3ModInMem(PUVM pUVM, PCDBGFADDRESS pImageAddr, uint32_t fFlags, const char *pszName, const char *pszFilename, + RTLDRARCH enmArch, uint32_t cbImage, PRTDBGMOD phDbgMod, PRTERRINFO pErrInfo) +{ + /* + * Validate and adjust. + */ + AssertPtrReturn(phDbgMod, VERR_INVALID_POINTER); + *phDbgMod = NIL_RTDBGMOD; + AssertPtrReturn(pImageAddr, VERR_INVALID_POINTER); + AssertMsgReturn(cbImage == 0 || cbImage >= sizeof(IMAGE_NT_HEADERS32) + sizeof(IMAGE_DOS_HEADER), + ("cbImage=%#x\n", cbImage), VERR_INVALID_PARAMETER); + AssertMsgReturn(!(fFlags & ~DBGFMODINMEM_F_VALID_MASK), ("%#x\n", fFlags), VERR_INVALID_FLAGS); + if (enmArch == RTLDRARCH_HOST) + enmArch = RTLdrGetHostArch(); + + /* + * Look for an image header we can work with. + */ + DBGFMODINMEMBUF uBuf; + RT_ZERO(uBuf); + + int rc = DBGFR3MemRead(pUVM, 0 /*idCpu*/, pImageAddr, &uBuf, sizeof(uBuf.DosHdr)); + if (RT_FAILURE(rc)) + return RTERRINFO_LOG_SET_F(pErrInfo, rc, "Failed to read DOS header at %RGv: %Rrc", pImageAddr->FlatPtr, rc); + + if (uBuf.ab[0] == ELFMAG0 && uBuf.ab[1] == ELFMAG1 && uBuf.ab[2] == ELFMAG2 && uBuf.ab[3] == ELFMAG3) + return dbgfR3ModInMemElf(pUVM, pImageAddr, fFlags, pszName, pszFilename, enmArch, cbImage, &uBuf, phDbgMod, pErrInfo); + + uint32_t offNewHdrs; + if (uBuf.DosHdr.e_magic == IMAGE_DOS_SIGNATURE) + { + offNewHdrs = uBuf.DosHdr.e_lfanew; + if ( offNewHdrs < 16 + || offNewHdrs > (cbImage ? _2M : cbImage - sizeof(IMAGE_NT_HEADERS32))) + return RTERRINFO_LOG_SET_F(pErrInfo, rc, "e_lfanew value is out of range: %RX32 (16..%u)", + offNewHdrs, (cbImage ? _2M : cbImage - sizeof(IMAGE_NT_HEADERS32))); + } + else if (uBuf.Nt32.Signature == IMAGE_NT_SIGNATURE) + offNewHdrs = 0; + else + return RTERRINFO_LOG_SET_F(pErrInfo, VERR_INVALID_EXE_SIGNATURE, "Unknown image magic at %RGv: %.8Rhxs", + pImageAddr->FlatPtr, uBuf.ab); + + /* + * Read the next bit of header, assuming PE so stop at the end of + * the COFF file header. + */ + DBGFADDRESS PeHdrAddr = *pImageAddr; + DBGFR3AddrAdd(&PeHdrAddr, offNewHdrs); + uint32_t const cbPeHdrsPart1 = RT_UOFFSETOF(IMAGE_NT_HEADERS32, OptionalHeader); + rc = DBGFR3MemRead(pUVM, 0 /*idCpu*/, &PeHdrAddr, &uBuf, cbPeHdrsPart1); + if (RT_FAILURE(rc)) + return RTERRINFO_LOG_SET_F(pErrInfo, rc, "Failed to read PE/LX/NE headers at %RGv (off=%#RX32): %Rrc", + PeHdrAddr.FlatPtr, offNewHdrs, rc); + + if (uBuf.Nt32.Signature == IMAGE_NT_SIGNATURE) + return dbgfR3ModInMemPe(pUVM, pImageAddr, fFlags, pszName, pszFilename, enmArch, cbImage, offNewHdrs, cbPeHdrsPart1, + &uBuf, phDbgMod, pErrInfo); + + return RTERRINFO_LOG_SET_F(pErrInfo, VERR_INVALID_EXE_SIGNATURE, "No PE/LX/NE header at %RGv (off=%#RX32): %.8Rhxs", + PeHdrAddr.FlatPtr, offNewHdrs, uBuf.ab); +} + diff --git a/src/VBox/VMM/VMMR3/DBGFR3PlugIn.cpp b/src/VBox/VMM/VMMR3/DBGFR3PlugIn.cpp new file mode 100644 index 00000000..5ba71be9 --- /dev/null +++ b/src/VBox/VMM/VMMR3/DBGFR3PlugIn.cpp @@ -0,0 +1,616 @@ +/* $Id: DBGFR3PlugIn.cpp $ */ +/** @file + * DBGF - Debugger Facility, Plug-In Support. + */ + +/* + * Copyright (C) 2008-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DBGF +#include +#include +#include "DBGFInternal.h" +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ + +#define DBGF_PLUG_IN_READ_LOCK(pUVM) \ + do { int rcLock = RTCritSectRwEnterShared(&pUVM->dbgf.s.CritSect); AssertRC(rcLock); } while (0) +#define DBGF_PLUG_IN_READ_UNLOCK(pUVM) \ + do { int rcLock = RTCritSectRwLeaveShared(&pUVM->dbgf.s.CritSect); AssertRC(rcLock); } while (0) + +#define DBGF_PLUG_IN_WRITE_LOCK(pUVM) \ + do { int rcLock = RTCritSectRwEnterExcl(&pUVM->dbgf.s.CritSect); AssertRC(rcLock); } while (0) +#define DBGF_PLUG_IN_WRITE_UNLOCK(pUVM) \ + do { int rcLock = RTCritSectRwLeaveExcl(&pUVM->dbgf.s.CritSect); AssertRC(rcLock); } while (0) + +/** Max allowed length of a plug-in name (excludes the path and suffix). */ +#define DBGFPLUGIN_MAX_NAME 64 + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * Plug-in tracking record. + */ +typedef struct DBGFPLUGIN +{ + /** Pointer to the next plug-in. */ + struct DBGFPLUGIN *pNext; + /** The loader handle. */ + RTLDRMOD hLdrMod; + /** The plug-in entry point. */ + PFNDBGFPLUGIN pfnEntry; + /** The name length. */ + uint8_t cchName; + /** The plug-in name (variable length). */ + char szName[1]; +} DBGFPLUGIN; +/** Pointer to plug-in tracking record. */ +typedef DBGFPLUGIN *PDBGFPLUGIN; + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static DECLCALLBACK(void) dbgfPlugInUnloadAll(PUVM pUVM); +static FNDBGFHANDLERINT dbgfR3PlugInInfoList; + + +/** + * Internal init routine called by DBGFR3Init(). + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + */ +int dbgfR3PlugInInit(PUVM pUVM) +{ + return DBGFR3InfoRegisterInternal(pUVM->pVM, "plugins", "Lists the debugger plug-ins.", dbgfR3PlugInInfoList); +} + + +/** + * Internal cleanup routine called by DBGFR3Term(). + * + * @param pUVM The user mode VM handle. + */ +void dbgfR3PlugInTerm(PUVM pUVM) +{ + dbgfPlugInUnloadAll(pUVM); +} + + +/** + * Extracts the plug-in name from a plug-in specifier that may or may not + * include path and/or suffix. + * + * @returns VBox status code. + * + * @param pszDst Where to return the name. At least DBGFPLUGIN_MAX_NAME + * worth of buffer space. + * @param pszPlugIn The plug-in module specifier to parse. + * @param pErrInfo Optional error information structure. + */ +static int dbgfPlugInExtractName(char *pszDst, const char *pszPlugIn, PRTERRINFO pErrInfo) +{ + /* + * Parse out the name stopping at the extension. + */ + const char *pszName = RTPathFilename(pszPlugIn); + if (!pszName || !*pszName) + return VERR_INVALID_NAME; + if (!RTStrNICmp(pszName, RT_STR_TUPLE(DBGF_PLUG_IN_PREFIX))) + { + pszName += sizeof(DBGF_PLUG_IN_PREFIX) - 1; + if (!*pszName) + return RTErrInfoSetF(pErrInfo, VERR_INVALID_NAME, "Invalid plug-in name: nothing after the prefix"); + } + + int ch; + size_t cchName = 0; + while ( (ch = pszName[cchName]) != '\0' + && ch != '.') + { + if ( RT_C_IS_ALPHA(ch) + || (RT_C_IS_DIGIT(ch) && cchName != 0)) + cchName++; + else + { + if (!RT_C_IS_DIGIT(ch)) + return RTErrInfoSetF(pErrInfo, VERR_INVALID_NAME, "Invalid plug-in name: '%c' is not alphanumeric", ch); + return RTErrInfoSetF(pErrInfo, VERR_INVALID_NAME, + "Invalid plug-in name: Cannot start with a digit (after the prefix)"); + } + } + + if (cchName >= DBGFPLUGIN_MAX_NAME) + return RTErrInfoSetF(pErrInfo, VERR_INVALID_NAME, "Invalid plug-in name: too long (max %u)", DBGFPLUGIN_MAX_NAME); + + /* + * We're very picky about the extension when present. + */ + if ( ch == '.' + && RTStrICmp(&pszName[cchName], RTLdrGetSuff())) + return RTErrInfoSetF(pErrInfo, VERR_INVALID_NAME, + "Invalid plug-in name: Suffix isn't the default dll/so/dylib one (%s): '%s'", + RTLdrGetSuff(), &pszName[cchName]); + + /* + * Copy it. + */ + memcpy(pszDst, pszName, cchName); + pszDst[cchName] = '\0'; + return VINF_SUCCESS; +} + + +/** + * Locate a loaded plug-in. + * + * @returns Pointer to the plug-in tracking structure. + * @param pUVM Pointer to the user-mode VM structure. + * @param pszName The name of the plug-in we're looking for. + * @param ppPrev Where to optionally return the pointer to the + * previous list member. + */ +static PDBGFPLUGIN dbgfR3PlugInLocate(PUVM pUVM, const char *pszName, PDBGFPLUGIN *ppPrev) +{ + PDBGFPLUGIN pPrev = NULL; + PDBGFPLUGIN pCur = pUVM->dbgf.s.pPlugInHead; + while (pCur) + { + if (!RTStrICmp(pCur->szName, pszName)) + { + if (ppPrev) + *ppPrev = pPrev; + return pCur; + } + + /* advance */ + pPrev = pCur; + pCur = pCur->pNext; + } + return NULL; +} + + +/** + * Try load the specified plug-in module. + * + * @returns VINF_SUCCESS on success, path error or loader error on failure. + * + * @param pPlugIn The plug-in tracing record. + * @param pszModule Module name. + * @param pErrInfo Optional error information structure. + */ +static int dbgfR3PlugInTryLoad(PDBGFPLUGIN pPlugIn, const char *pszModule, PRTERRINFO pErrInfo) +{ + /* + * Load it and try resolve the entry point. + */ + int rc = SUPR3HardenedVerifyPlugIn(pszModule, pErrInfo); + if (RT_SUCCESS(rc)) + rc = RTLdrLoadEx(pszModule, &pPlugIn->hLdrMod, RTLDRLOAD_FLAGS_LOCAL, pErrInfo); + if (RT_SUCCESS(rc)) + { + rc = RTLdrGetSymbol(pPlugIn->hLdrMod, DBGF_PLUG_IN_ENTRYPOINT, (void **)&pPlugIn->pfnEntry); + if (RT_SUCCESS(rc)) + { + LogRel(("DBGF: Loaded Plug-In '%s' (%s)\n", pPlugIn->szName, pszModule)); + return VINF_SUCCESS; + } + + RTErrInfoSet(pErrInfo, rc, "Failed to locate plug-in entrypoint (" DBGF_PLUG_IN_ENTRYPOINT ")" ); + LogRel(("DBGF: RTLdrGetSymbol('%s', '%s',) -> %Rrc\n", pszModule, DBGF_PLUG_IN_ENTRYPOINT, rc)); + + RTLdrClose(pPlugIn->hLdrMod); + pPlugIn->hLdrMod = NIL_RTLDRMOD; + } + return rc; +} + + +/** + * RTPathTraverseList callback. + * + * @returns See FNRTPATHTRAVERSER. + * + * @param pchPath See FNRTPATHTRAVERSER. + * @param cchPath See FNRTPATHTRAVERSER. + * @param pvUser1 The plug-in specifier. + * @param pvUser2 The plug-in tracking record. + */ +static DECLCALLBACK(int) dbgfR3PlugInLoadCallback(const char *pchPath, size_t cchPath, void *pvUser1, void *pvUser2) +{ + PDBGFPLUGIN pPlugIn = (PDBGFPLUGIN)pvUser1; + PRTERRINFO pErrInfo = (PRTERRINFO)pvUser2; + + /* + * Join the path and the specified plug-in name, adding prefix and suffix. + */ + const char *pszSuff = RTLdrGetSuff(); + size_t const cchSuff = strlen(pszSuff); + size_t const cchModule = cchPath + sizeof(RTPATH_SLASH_STR) + sizeof(DBGF_PLUG_IN_PREFIX) + pPlugIn->cchName + cchSuff + 4; + char *pszModule = (char *)alloca(cchModule); + AssertReturn(pszModule, VERR_TRY_AGAIN); + memcpy(pszModule, pchPath, cchPath); + pszModule[cchPath] = '\0'; + + int rc = RTPathAppend(pszModule, cchModule, DBGF_PLUG_IN_PREFIX); + AssertRCReturn(rc, VERR_TRY_AGAIN); + strcat(&pszModule[cchPath], pPlugIn->szName); + strcat(&pszModule[cchPath + sizeof(DBGF_PLUG_IN_PREFIX) - 1 + pPlugIn->cchName], pszSuff); + Assert(strlen(pszModule) < cchModule - 4); + + if (RTPathExists(pszModule)) + { + rc = dbgfR3PlugInTryLoad(pPlugIn, pszModule, pErrInfo); + if (RT_SUCCESS(rc)) + return VINF_SUCCESS; + } + + return VERR_TRY_AGAIN; +} + + +/** + * Loads a plug-in. + * + * @returns VBox status code. + * @param pUVM Pointer to the user-mode VM structure. + * @param pszName The plug-in name. + * @param pszMaybeModule Path to the plug-in, or just the + * plug-in name as specified by the user. Ignored + * if no path. + * @param pErrInfo Optional error information structure. + */ +static DECLCALLBACK(int) dbgfR3PlugInLoad(PUVM pUVM, const char *pszName, const char *pszMaybeModule, PRTERRINFO pErrInfo) +{ + DBGF_PLUG_IN_WRITE_LOCK(pUVM); + + /* + * Check if a plug-in by the given name already exists. + */ + PDBGFPLUGIN pPlugIn = dbgfR3PlugInLocate(pUVM, pszName, NULL); + if (pPlugIn) + { + DBGF_PLUG_IN_WRITE_UNLOCK(pUVM); + return RTErrInfoSetF(pErrInfo, VERR_ALREADY_EXISTS, "A plug-in by the name '%s' already exists", pszName); + } + + /* + * Create a module structure and we can pass around via RTPathTraverseList if needed. + */ + size_t cbName = strlen(pszName) + 1; + pPlugIn = (PDBGFPLUGIN)MMR3HeapAllocZU(pUVM, MM_TAG_DBGF, RT_UOFFSETOF_DYN(DBGFPLUGIN, szName[cbName])); + if (RT_UNLIKELY(!pPlugIn)) + { + DBGF_PLUG_IN_WRITE_UNLOCK(pUVM); + return VERR_NO_MEMORY; + } + memcpy(pPlugIn->szName, pszName, cbName); + pPlugIn->cchName = (uint8_t)cbName - 1; + Assert(pPlugIn->cchName == cbName - 1); + + /* + * If the caller specified a path, try load exactly what was specified. + */ + int rc; + if (RTPathHavePath(pszMaybeModule)) + rc = dbgfR3PlugInTryLoad(pPlugIn, pszMaybeModule, pErrInfo); + else + { + /* + * No path specified, search for the plug-in using the canonical + * module name for it. + */ + RTErrInfoClear(pErrInfo); + + /* 1. The private architecture directory. */ + char szPath[_4K]; + rc = RTPathAppPrivateArch(szPath, sizeof(szPath)); + if (RT_SUCCESS(rc)) + rc = RTPathTraverseList(szPath, '\0', dbgfR3PlugInLoadCallback, pPlugIn, pErrInfo); + if (RT_FAILURE_NP(rc)) + { + /* 2. The config value 'PlugInPath' */ + int rc2 = CFGMR3QueryString(CFGMR3GetChild(CFGMR3GetRootU(pUVM), "/DBGF"), "PlugInPath", szPath, sizeof(szPath)); + if (RT_SUCCESS(rc2)) + rc = RTPathTraverseList(szPath, ';', dbgfR3PlugInLoadCallback, pPlugIn, pErrInfo); + if (RT_FAILURE_NP(rc)) + { + /* 3. The VBOXDBG_PLUG_IN_PATH environment variable. */ + rc2 = RTEnvGetEx(RTENV_DEFAULT, "VBOXDBG_PLUG_IN_PATH", szPath, sizeof(szPath), NULL); + if (RT_SUCCESS(rc2)) + rc = RTPathTraverseList(szPath, ';', dbgfR3PlugInLoadCallback, pPlugIn, pErrInfo); + } + } + + if (rc == VERR_END_OF_STRING) + rc = VERR_FILE_NOT_FOUND; + if (pErrInfo && !RTErrInfoIsSet(pErrInfo)) + RTErrInfoSetF(pErrInfo, rc, "Failed to locate '%s'", pPlugIn->szName); + } + if (RT_SUCCESS(rc)) + { + /* + * Try initialize it. + */ + rc = pPlugIn->pfnEntry(DBGFPLUGINOP_INIT, pUVM, VBOX_VERSION); + if (RT_SUCCESS(rc)) + { + /* + * Link it and we're good. + */ + pPlugIn->pNext = pUVM->dbgf.s.pPlugInHead; + pUVM->dbgf.s.pPlugInHead = pPlugIn; + + DBGF_PLUG_IN_WRITE_UNLOCK(pUVM); + return VINF_SUCCESS; + } + + RTErrInfoSet(pErrInfo, rc, "Plug-in init failed"); + LogRel(("DBGF: Plug-in '%s' failed during init: %Rrc\n", pPlugIn->szName, rc)); + RTLdrClose(pPlugIn->hLdrMod); + } + MMR3HeapFree(pPlugIn); + + DBGF_PLUG_IN_WRITE_UNLOCK(pUVM); + return rc; +} + + +/** + * Load a debugging plug-in. + * + * @returns VBox status code. + * @retval VERR_ALREADY_EXISTS if the module was already loaded. + * @retval VINF_BUFFER_OVERFLOW if the actual plug-in name buffer was too small + * (the plug-in was still successfully loaded). + * @param pUVM Pointer to the user-mode VM structure. + * @param pszPlugIn The plug-in name. This may specify the exact path to + * the plug-in module, or it may just specify the core name + * of the plug-in without prefix, suffix and path. + * @param pszActual Buffer to return the actual plug-in name in. Optional. + * This will be returned on VERR_ALREADY_EXSIST too. + * @param cbActual The size of @a pszActual. + * @param pErrInfo Optional error information structure. + */ +VMMR3DECL(int) DBGFR3PlugInLoad(PUVM pUVM, const char *pszPlugIn, char *pszActual, size_t cbActual, PRTERRINFO pErrInfo) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertPtrReturn(pszPlugIn, VERR_INVALID_PARAMETER); + + /* + * Extract the plug-in name. Copy it to the return buffer as we'll want to + * return it in the VERR_ALREADY_EXISTS case too. + */ + char szName[DBGFPLUGIN_MAX_NAME]; + int rc = dbgfPlugInExtractName(szName, pszPlugIn, pErrInfo); + if (RT_SUCCESS(rc)) + { + int rc2 = VINF_SUCCESS; + if (pszActual) + rc2 = RTStrCopy(pszActual, cbActual, szName); + + /* + * Write lock releated DBGF bits and try load it. + */ + rc = VMR3ReqPriorityCallWaitU(pUVM, 0 /*idDstCpu*/, (PFNRT)dbgfR3PlugInLoad, 4, pUVM, szName, pszPlugIn, pErrInfo); + if (rc2 != VINF_SUCCESS && RT_SUCCESS(rc)) + rc = VINF_BUFFER_OVERFLOW; + } + + return rc; +} + + +/** + * Load all plug-ins from the architechture private directory of VBox. + * + * @param pUVM Pointer to the user-mode VM structure. + */ +VMMR3DECL(void) DBGFR3PlugInLoadAll(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN_VOID(pUVM); + + /* + * Pass it on to EMT(0) if necessary (thanks to DBGFR3Os*). + */ + if (VMR3GetVMCPUId(pUVM->pVM) != 0) + { + VMR3ReqPriorityCallVoidWaitU(pUVM, 0 /*idDstCpu*/, (PFNRT)DBGFR3PlugInLoadAll, 1, pUVM); + return; + } + + + /* + * Open the architecture specific directory with a filter on our prefix + * and names including a dot. + */ + const char *pszSuff = RTLdrGetSuff(); + size_t cchSuff = strlen(pszSuff); + + char szPath[RTPATH_MAX]; + int rc = RTPathAppPrivateArch(szPath, sizeof(szPath) - cchSuff); + AssertRCReturnVoid(rc); + size_t offDir = strlen(szPath); + + rc = RTPathAppend(szPath, sizeof(szPath) - cchSuff, DBGF_PLUG_IN_PREFIX "*"); + AssertRCReturnVoid(rc); + strcat(szPath, pszSuff); + + RTDIR hDir; + rc = RTDirOpenFiltered(&hDir, szPath, RTDIRFILTER_WINNT, 0 /*fFlags*/); + if (RT_SUCCESS(rc)) + { + /* + * Now read it and try load each of the plug-in modules. + */ + RTDIRENTRY DirEntry; + while (RT_SUCCESS(RTDirRead(hDir, &DirEntry, NULL))) + { + szPath[offDir] = '\0'; + rc = RTPathAppend(szPath, sizeof(szPath), DirEntry.szName); + if (RT_SUCCESS(rc)) + { + char szName[DBGFPLUGIN_MAX_NAME]; + rc = dbgfPlugInExtractName(szName, DirEntry.szName, NULL); + if (RT_SUCCESS(rc)) + { + DBGF_PLUG_IN_WRITE_LOCK(pUVM); + dbgfR3PlugInLoad(pUVM, szName, szPath, NULL); + DBGF_PLUG_IN_WRITE_UNLOCK(pUVM); + } + } + } + + RTDirClose(hDir); + } +} + + +/** + * Unloads a plug-in by name (no path, prefix or suffix). + * + * @returns VBox status code. + * @retval VERR_NOT_FOUND if the specified plug-in wasn't found. + * @param pUVM Pointer to the user-mode VM structure. + * @param pszName The name of the plug-in to unload. + */ +VMMR3DECL(int) DBGFR3PlugInUnload(PUVM pUVM, const char *pszName) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + + /* + * Pass it on to EMT(0) if necessary (thanks to DBGFR3Os*). + */ + if (VMR3GetVMCPUId(pUVM->pVM) != 0) + return VMR3ReqPriorityCallWaitU(pUVM, 0 /*idDstCpu*/, (PFNRT)DBGFR3PlugInUnload, 2, pUVM, pszName); + + + /* + * Find the plug-in. + */ + DBGF_PLUG_IN_WRITE_LOCK(pUVM); + + int rc; + PDBGFPLUGIN pPrevPlugIn; + PDBGFPLUGIN pPlugIn = dbgfR3PlugInLocate(pUVM, pszName, &pPrevPlugIn); + if (pPlugIn) + { + /* + * Unlink, terminate, unload and free the plug-in. + */ + if (pPrevPlugIn) + pPrevPlugIn->pNext = pPlugIn->pNext; + else + pUVM->dbgf.s.pPlugInHead = pPlugIn->pNext; + + pPlugIn->pfnEntry(DBGFPLUGINOP_TERM, pUVM, 0); + RTLdrClose(pPlugIn->hLdrMod); + + pPlugIn->pfnEntry = NULL; + pPlugIn->hLdrMod = NIL_RTLDRMOD; + MMR3HeapFree(pPlugIn->pNext); + rc = VINF_SUCCESS; + } + else + rc = VERR_NOT_FOUND; + + DBGF_PLUG_IN_WRITE_UNLOCK(pUVM); + return rc; +} + + +/** + * Unload all plug-ins. + * + * @param pUVM Pointer to the user-mode VM structure. + */ +static DECLCALLBACK(void) dbgfPlugInUnloadAll(PUVM pUVM) +{ + DBGF_PLUG_IN_WRITE_LOCK(pUVM); + + while (pUVM->dbgf.s.pPlugInHead) + { + PDBGFPLUGIN pPlugin = pUVM->dbgf.s.pPlugInHead; + pUVM->dbgf.s.pPlugInHead = pPlugin->pNext; + + pPlugin->pfnEntry(DBGFPLUGINOP_TERM, pUVM, 0); + + int rc2 = RTLdrClose(pPlugin->hLdrMod); + AssertRC(rc2); + + pPlugin->pfnEntry = NULL; + pPlugin->hLdrMod = NIL_RTLDRMOD; + MMR3HeapFree(pPlugin); + } + + DBGF_PLUG_IN_WRITE_UNLOCK(pUVM); +} + + +/** + * Unloads all plug-ins. + * + * @param pUVM Pointer to the user-mode VM structure. + */ +VMMR3DECL(void) DBGFR3PlugInUnloadAll(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN_VOID(pUVM); + /* Thanks to DBGFR3Os, this must be done on EMT(0). */ + VMR3ReqPriorityCallVoidWaitU(pUVM, 0 /*idDstCpu*/, (PFNRT)dbgfPlugInUnloadAll, 1, pUVM); +} + + + +/** + * @callback_method_impl{FNDBGFHANDLERINT, The 'plugins' info item.} + */ +static DECLCALLBACK(void) dbgfR3PlugInInfoList(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + PDBGFPLUGIN pPlugIn = pVM->pUVM->dbgf.s.pPlugInHead; + RT_NOREF_PV(pszArgs); + if (pPlugIn) + { + pHlp->pfnPrintf(pHlp, "Debugging plug-in%s: %s", pPlugIn->pNext ? "s" : "", pPlugIn->szName); + while ((pPlugIn = pPlugIn->pNext) != NULL) + pHlp->pfnPrintf(pHlp, ", %s", pPlugIn->szName); + pHlp->pfnPrintf(pHlp, "\n"); + + } + else + pHlp->pfnPrintf(pHlp, "No plug-ins loaded\n"); +} + diff --git a/src/VBox/VMM/VMMR3/DBGFR3Trace.cpp b/src/VBox/VMM/VMMR3/DBGFR3Trace.cpp new file mode 100644 index 00000000..9255ead5 --- /dev/null +++ b/src/VBox/VMM/VMMR3/DBGFR3Trace.cpp @@ -0,0 +1,450 @@ +/* $Id: DBGFR3Trace.cpp $ */ +/** @file + * DBGF - Debugger Facility, Tracing. + */ + +/* + * Copyright (C) 2011-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DBGF +#include +#include +#include +#include +#include "DBGFInternal.h" +#include +#include "VMMTracing.h" + +#include +#include +#include + +#include +#include +#include + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static DECLCALLBACK(void) dbgfR3TraceInfo(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +/** + * VMM trace point group translation table. + */ +static const struct +{ + /** The group name. */ + const char *pszName; + /** The name length. */ + uint32_t cchName; + /** The mask. */ + uint32_t fMask; +} g_aVmmTpGroups[] = +{ + { RT_STR_TUPLE("em"), VMMTPGROUP_EM }, + { RT_STR_TUPLE("hm"), VMMTPGROUP_HM }, + { RT_STR_TUPLE("tm"), VMMTPGROUP_TM }, +}; + + +/** + * Initializes the tracing. + * + * @returns VBox status code + * @param pVM The cross context VM structure. + * @param cbEntry The trace entry size. + * @param cEntries The number of entries. + */ +static int dbgfR3TraceEnable(PVM pVM, uint32_t cbEntry, uint32_t cEntries) +{ + /* + * Don't enable it twice. + */ + if (pVM->hTraceBufR3 != NIL_RTTRACEBUF) + return VERR_ALREADY_EXISTS; + + /* + * Resolve default parameter values. + */ + int rc; + if (!cbEntry) + { + rc = CFGMR3QueryU32Def(CFGMR3GetChild(CFGMR3GetRoot(pVM), "DBGF"), "TraceBufEntrySize", &cbEntry, 128); + AssertRCReturn(rc, rc); + } + if (!cEntries) + { + rc = CFGMR3QueryU32Def(CFGMR3GetChild(CFGMR3GetRoot(pVM), "DBGF"), "TraceBufEntries", &cEntries, 4096); + AssertRCReturn(rc, rc); + } + + /* + * Figure the required size. + */ + RTTRACEBUF hTraceBuf; + size_t cbBlock = 0; + rc = RTTraceBufCarve(&hTraceBuf, cEntries, cbEntry, 0 /*fFlags*/, NULL, &cbBlock); + if (rc != VERR_BUFFER_OVERFLOW) + { + AssertReturn(!RT_SUCCESS_NP(rc), VERR_IPE_UNEXPECTED_INFO_STATUS); + return rc; + } + + /* + * Allocate a hyper heap block and carve a trace buffer out of it. + * + * Note! We ASSUME that the returned trace buffer handle has the same value + * as the heap block. + */ + cbBlock = RT_ALIGN_Z(cbBlock, PAGE_SIZE); + void *pvBlock; + rc = MMR3HyperAllocOnceNoRel(pVM, cbBlock, PAGE_SIZE, MM_TAG_DBGF, &pvBlock); + if (RT_FAILURE(rc)) + return rc; + + rc = RTTraceBufCarve(&hTraceBuf, cEntries, cbEntry, 0 /*fFlags*/, pvBlock, &cbBlock); + AssertRCReturn(rc, rc); + AssertRelease(hTraceBuf == (RTTRACEBUF)pvBlock); + AssertRelease((void *)hTraceBuf == pvBlock); + + pVM->hTraceBufR3 = hTraceBuf; + pVM->hTraceBufR0 = MMHyperCCToR0(pVM, hTraceBuf); + pVM->hTraceBufRC = MMHyperCCToRC(pVM, hTraceBuf); + return VINF_SUCCESS; +} + + +/** + * Initializes the tracing. + * + * @returns VBox status code + * @param pVM The cross context VM structure. + */ +int dbgfR3TraceInit(PVM pVM) +{ + /* + * Initialize the trace buffer handles. + */ + Assert(NIL_RTTRACEBUF == (RTTRACEBUF)NULL); + pVM->hTraceBufR3 = NIL_RTTRACEBUF; + pVM->hTraceBufRC = NIL_RTRCPTR; + pVM->hTraceBufR0 = NIL_RTR0PTR; + + /* + * Check the config and enable tracing if requested. + */ + PCFGMNODE pDbgfNode = CFGMR3GetChild(CFGMR3GetRoot(pVM), "DBGF"); +#if defined(DEBUG) || defined(RTTRACE_ENABLED) + bool const fDefault = false; + const char * const pszConfigDefault = ""; +#else + bool const fDefault = false; + const char * const pszConfigDefault = ""; +#endif + bool fTracingEnabled; + int rc = CFGMR3QueryBoolDef(pDbgfNode, "TracingEnabled", &fTracingEnabled, fDefault); + AssertRCReturn(rc, rc); + if (fTracingEnabled) + { + rc = dbgfR3TraceEnable(pVM, 0, 0); + if (RT_SUCCESS(rc)) + { + if (pDbgfNode) + { + char *pszTracingConfig; + rc = CFGMR3QueryStringAllocDef(pDbgfNode, "TracingConfig", &pszTracingConfig, pszConfigDefault); + if (RT_SUCCESS(rc)) + { + rc = DBGFR3TraceConfig(pVM, pszTracingConfig); + if (RT_FAILURE(rc)) + rc = VMSetError(pVM, rc, RT_SRC_POS, "TracingConfig=\"%s\" -> %Rrc", pszTracingConfig, rc); + MMR3HeapFree(pszTracingConfig); + } + } + else + { + rc = DBGFR3TraceConfig(pVM, pszConfigDefault); + if (RT_FAILURE(rc)) + rc = VMSetError(pVM, rc, RT_SRC_POS, "TracingConfig=\"%s\" (default) -> %Rrc", pszConfigDefault, rc); + } + } + } + + /* + * Register a debug info item that will dump the trace buffer content. + */ + if (RT_SUCCESS(rc)) + rc = DBGFR3InfoRegisterInternal(pVM, "tracebuf", "Display the trace buffer content. No arguments.", dbgfR3TraceInfo); + + return rc; +} + + +/** + * Terminates the tracing. + * + * @param pVM The cross context VM structure. + */ +void dbgfR3TraceTerm(PVM pVM) +{ + /* nothing to do */ + NOREF(pVM); +} + + +/** + * Relocates the trace buffer handle in RC. + * + * @param pVM The cross context VM structure. + */ +void dbgfR3TraceRelocate(PVM pVM) +{ + if (pVM->hTraceBufR3 != NIL_RTTRACEBUF) + pVM->hTraceBufRC = MMHyperCCToRC(pVM, pVM->hTraceBufR3); +} + + +/** + * Change the traceing configuration of the VM. + * + * @returns VBox status code. + * @retval VINF_SUCCESS + * @retval VERR_NOT_FOUND if any of the trace point groups mentioned in the + * config string cannot be found. (Or if the string cannot be made + * sense of.) No change made. + * @retval VERR_INVALID_VM_HANDLE + * @retval VERR_INVALID_POINTER + * + * @param pVM The cross context VM structure. + * @param pszConfig The configuration change specification. + * + * Trace point group names, optionally prefixed by a '-' to + * indicate that the group is being disabled. A special + * group 'all' can be used to enable or disable all trace + * points. + * + * Drivers, devices and USB devices each have their own + * trace point group which can be accessed by prefixing + * their official PDM name by 'drv', 'dev' or 'usb' + * respectively. + */ +VMMDECL(int) DBGFR3TraceConfig(PVM pVM, const char *pszConfig) +{ + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertPtrReturn(pszConfig, VERR_INVALID_POINTER); + if (pVM->hTraceBufR3 == NIL_RTTRACEBUF) + return VERR_DBGF_NO_TRACE_BUFFER; + + /* + * We do this in two passes, the first pass just validates the input string + * and the second applies the changes. + */ + for (uint32_t uPass = 0; uPass < 1; uPass++) + { + char ch; + while ((ch = *pszConfig) != '\0') + { + if (RT_C_IS_SPACE(ch)) + continue; + + /* + * Operation prefix. + */ + bool fNo = false; + do + { + if (ch == 'n' && pszConfig[1] == 'o') + { + fNo = !fNo; + pszConfig++; + } + else if (ch == '+') + fNo = false; + else if (ch == '-' || ch == '!' || ch == '~') + fNo = !fNo; + else + break; + } while ((ch = *++pszConfig) != '\0'); + if (ch == '\0') + break; + + /* + * Extract the name. + */ + const char *pszName = pszConfig; + while ( ch != '\0' + && !RT_C_IS_SPACE(ch) + && !RT_C_IS_PUNCT(ch)) + ch = *++pszConfig; + size_t const cchName = pszConfig - pszName; + + /* + * 'all' - special group that enables or disables all trace points. + */ + if (cchName == 3 && !strncmp(pszName, "all", 3)) + { + if (uPass != 0) + { + uint32_t iCpu = pVM->cCpus; + if (!fNo) + while (iCpu-- > 0) + pVM->aCpus[iCpu].fTraceGroups = UINT32_MAX; + else + while (iCpu-- > 0) + pVM->aCpus[iCpu].fTraceGroups = 0; + PDMR3TracingConfig(pVM, NULL, 0, !fNo, uPass > 0); + } + } + else + { + /* + * A specific group, try the VMM first then PDM. + */ + uint32_t i = RT_ELEMENTS(g_aVmmTpGroups); + while (i-- > 0) + if ( g_aVmmTpGroups[i].cchName == cchName + && !strncmp(g_aVmmTpGroups[i].pszName, pszName, cchName)) + { + if (uPass != 0) + { + uint32_t iCpu = pVM->cCpus; + if (!fNo) + while (iCpu-- > 0) + pVM->aCpus[iCpu].fTraceGroups |= g_aVmmTpGroups[i].fMask; + else + while (iCpu-- > 0) + pVM->aCpus[iCpu].fTraceGroups &= ~g_aVmmTpGroups[i].fMask; + } + break; + } + + if (i == UINT32_MAX) + { + int rc = PDMR3TracingConfig(pVM, pszName, cchName, !fNo, uPass > 0); + if (RT_FAILURE(rc)) + return rc; + } + } + } + } + + return VINF_SUCCESS; +} + + +/** + * Query the trace configuration specification string. + * + * @returns VBox status code. + * @retval VINF_SUCCESS + * @retval VERR_INVALID_VM_HANDLE + * @retval VERR_INVALID_POINTER + * @retval VERR_BUFFER_OVERFLOW if the buffer is too small. Buffer will be + * empty. + + * @param pVM The cross context VM structure. + * @param pszConfig Pointer to the output buffer. + * @param cbConfig The size of the output buffer. + */ +VMMDECL(int) DBGFR3TraceQueryConfig(PVM pVM, char *pszConfig, size_t cbConfig) +{ + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertPtrReturn(pszConfig, VERR_INVALID_POINTER); + if (cbConfig < 1) + return VERR_BUFFER_OVERFLOW; + *pszConfig = '\0'; + + if (pVM->hTraceBufR3 == NIL_RTTRACEBUF) + return VERR_DBGF_NO_TRACE_BUFFER; + + int rc = VINF_SUCCESS; + uint32_t const fTraceGroups = pVM->aCpus[0].fTraceGroups; + if ( fTraceGroups == UINT32_MAX + && PDMR3TracingAreAll(pVM, true /*fEnabled*/)) + rc = RTStrCopy(pszConfig, cbConfig, "all"); + else if ( fTraceGroups == 0 + && PDMR3TracingAreAll(pVM, false /*fEnabled*/)) + rc = RTStrCopy(pszConfig, cbConfig, "-all"); + else + { + char *pszDst = pszConfig; + size_t cbDst = cbConfig; + uint32_t i = RT_ELEMENTS(g_aVmmTpGroups); + while (i-- > 0) + if (g_aVmmTpGroups[i].fMask & fTraceGroups) + { + size_t cchThis = g_aVmmTpGroups[i].cchName + (pszDst != pszConfig); + if (cchThis >= cbDst) + { + rc = VERR_BUFFER_OVERFLOW; + break; + } + if (pszDst != pszConfig) + { + *pszDst = ' '; + memcpy(pszDst + 1, g_aVmmTpGroups[i].pszName, g_aVmmTpGroups[i].cchName + 1); + } + else + memcpy(pszDst, g_aVmmTpGroups[i].pszName, g_aVmmTpGroups[i].cchName + 1); + pszDst += cchThis; + cbDst -= cchThis; + } + + if (RT_SUCCESS(rc)) + rc = PDMR3TracingQueryConfig(pVM, pszDst, cbDst); + } + + if (RT_FAILURE(rc)) + *pszConfig = '\0'; + return rc; +} + + +/** + * @callback_method_impl{FNRTTRACEBUFCALLBACK} + */ +static DECLCALLBACK(int) +dbgfR3TraceInfoDumpEntry(RTTRACEBUF hTraceBuf, uint32_t iEntry, uint64_t NanoTS, RTCPUID idCpu, const char *pszMsg, void *pvUser) +{ + PCDBGFINFOHLP pHlp = (PCDBGFINFOHLP)pvUser; + pHlp->pfnPrintf(pHlp, "#%04u/%'llu/%02x: %s\n", iEntry, NanoTS, idCpu, pszMsg); + NOREF(hTraceBuf); + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{FNDBGFHANDLERINT, Info handler for displaying the trace buffer content.} + */ +static DECLCALLBACK(void) dbgfR3TraceInfo(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + RTTRACEBUF hTraceBuf = pVM->hTraceBufR3; + if (hTraceBuf == NIL_RTTRACEBUF) + pHlp->pfnPrintf(pHlp, "Tracing is disabled\n"); + else + { + pHlp->pfnPrintf(pHlp, "Trace buffer %p - %u entries of %u bytes\n", + hTraceBuf, RTTraceBufGetEntryCount(hTraceBuf), RTTraceBufGetEntrySize(hTraceBuf)); + RTTraceBufEnumEntries(hTraceBuf, dbgfR3TraceInfoDumpEntry, (void *)pHlp); + } + NOREF(pszArgs); +} + diff --git a/src/VBox/VMM/VMMR3/DBGFR3Type.cpp b/src/VBox/VMM/VMMR3/DBGFR3Type.cpp new file mode 100644 index 00000000..60036cb4 --- /dev/null +++ b/src/VBox/VMM/VMMR3/DBGFR3Type.cpp @@ -0,0 +1,1278 @@ +/* $Id: DBGFR3Type.cpp $ */ +/** @file + * DBGF - Debugger Facility, Type Management. + */ + +/* + * Copyright (C) 2016-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/** @page pg_dbgf_type DBGFType - Type Management + * + * The type management system is intended to ease retrieval of values from + * structures in the guest OS without having to take care of the size of pointers. + * + * @todo r=bird: We need to join this up with modules and address spaces. It + * cannot be standalone like this. Also, it must be comming from IPRT as + * there is no point in duplicating code (been there, done that with + * symbols and debug info already). This unfortunately means we need to + * find some common way of abstracting DWARF and Codeview type info so we + * can extend those debug info parsers to make type information available. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DBGF +#include +#include "DBGFInternal.h" +#include +#include +#include +#include + +#include +#include +#include + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ + +/** Locks the type database for writing. */ +#define DBGF_TYPE_DB_LOCK_WRITE(pUVM) \ + do { \ + int rcSem = RTSemRWRequestWrite((pUVM)->dbgf.s.hTypeDbLock, RT_INDEFINITE_WAIT); \ + AssertRC(rcSem); \ + } while (0) + +/** Unlocks the type database after writing. */ +#define DBGF_TYPE_DB_UNLOCK_WRITE(pUVM) \ + do { \ + int rcSem = RTSemRWReleaseWrite((pUVM)->dbgf.s.hTypeDbLock); \ + AssertRC(rcSem); \ + } while (0) + +/** Locks the type database for reading. */ +#define DBGF_TYPE_DB_LOCK_READ(pUVM) \ + do { \ + int rcSem = RTSemRWRequestRead((pUVM)->dbgf.s.hTypeDbLock, RT_INDEFINITE_WAIT); \ + AssertRC(rcSem); \ + } while (0) + +/** Unlocks the type database after reading. */ +#define DBGF_TYPE_DB_UNLOCK_READ(pUVM) \ + do { \ + int rcSem = RTSemRWReleaseRead((pUVM)->dbgf.s.hTypeDbLock); \ + AssertRC(rcSem); \ + } while (0) + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * DBGF registered type. + */ +typedef struct DBGFTYPE +{ + /** String space core. */ + RTSTRSPACECORE Core; + /** Pointer to the registration structure, NULL means builtin type. */ + PCDBGFTYPEREG pReg; + /** How often the type is referenced by other types. */ + volatile uint32_t cRefs; + /** Size of the type. */ + size_t cbType; + /** Builtin type if pReg is NULL (otherwise it is invalid). */ + DBGFTYPEBUILTIN enmTypeBuiltin; +} DBGFTYPE; +/** Pointer to a DBGF type. */ +typedef DBGFTYPE *PDBGFTYPE; + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static int dbgfR3TypeParseBufferByType(PUVM pUVM, PDBGFTYPE pType, uint8_t *pbBuf, size_t cbBuf, + PDBGFTYPEVAL *ppVal, size_t *pcbParsed); + + +/** + * Looks up a type by the identifier. + * + * @returns Pointer to the type structure on success, NULL otherwise. + * @param pUVM The user mode VM handle. + * @param pszType The type identifier. + */ +static PDBGFTYPE dbgfR3TypeLookup(PUVM pUVM, const char *pszType) +{ + PRTSTRSPACE pTypeSpace = &pUVM->dbgf.s.TypeSpace; + return (PDBGFTYPE)RTStrSpaceGet(pTypeSpace, pszType); +} + + +/** + * Calculate the size of the given type. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pType The type to calculate the size for. + * @param fCalcNested Flag whether to calculate the size for nested + * structs if the sizes are 0. + */ +static int dbgfR3TypeCalcSize(PUVM pUVM, PDBGFTYPE pType, bool fCalcNested) +{ + int rc = VINF_SUCCESS; + + /* Builtin types are never recalculated. */ + if (pType->pReg) + { + switch (pType->pReg->enmVariant) + { + case DBGFTYPEVARIANT_STRUCT: + { + size_t cbType = 0; + + /* Go through the members and update size. */ + for (uint32_t i = 0; i < pType->pReg->cMembers && RT_SUCCESS(rc); i++) + { + PCDBGFTYPEREGMEMBER pMember = &pType->pReg->paMembers[i]; + + if (pMember->fFlags & DBGFTYPEREGMEMBER_F_POINTER) + { + /* Use the current pointer size. */ + PDBGFTYPE pTypeMember = dbgfR3TypeLookup(pUVM, "ptr_t"); + if (RT_LIKELY(pTypeMember)) + { + if (pMember->fFlags & DBGFTYPEREGMEMBER_F_ARRAY) + cbType += pMember->cElements * pTypeMember->cbType; + else + cbType += pTypeMember->cbType; + } + } + else + { + PDBGFTYPE pTypeMember = dbgfR3TypeLookup(pUVM, pMember->pszType); + if (RT_LIKELY(pTypeMember)) + { + if ( pTypeMember->cbType == 0 + && fCalcNested) + rc = dbgfR3TypeCalcSize(pUVM, pTypeMember, fCalcNested); + + if (RT_SUCCESS(rc)) + { + if (pMember->fFlags & DBGFTYPEREGMEMBER_F_ARRAY) + cbType += pMember->cElements * pTypeMember->cbType; + else + cbType += pTypeMember->cbType; + } + } + else + rc = VERR_INVALID_STATE; + } + } + + if (RT_SUCCESS(rc)) + pType->cbType = cbType; + break; + } + + case DBGFTYPEVARIANT_UNION: + { + /* Get size of the biggest member and use that one. */ + size_t cbType = 0; + + for (uint32_t i = 0; i < pType->pReg->cMembers && RT_SUCCESS(rc); i++) + { + PCDBGFTYPEREGMEMBER pMember = &pType->pReg->paMembers[i]; + + if (pMember->fFlags & DBGFTYPEREGMEMBER_F_POINTER) + { + /* Use the current pointer size. */ + PDBGFTYPE pTypeMember = dbgfR3TypeLookup(pUVM, "ptr_t"); + if (RT_LIKELY(pTypeMember)) + { + if (pMember->fFlags & DBGFTYPEREGMEMBER_F_ARRAY) + cbType = RT_MAX(cbType, pMember->cElements * pTypeMember->cbType); + else + cbType = RT_MAX(cbType, pTypeMember->cbType); + } + } + else + { + PDBGFTYPE pTypeMember = dbgfR3TypeLookup(pUVM, pMember->pszType); + if (RT_LIKELY(pTypeMember)) + { + if ( pTypeMember->cbType == 0 + && fCalcNested) + rc = dbgfR3TypeCalcSize(pUVM, pTypeMember, fCalcNested); + + if (RT_SUCCESS(rc)) + { + if (pMember->fFlags & DBGFTYPEREGMEMBER_F_ARRAY) + cbType = RT_MAX(cbType, pMember->cElements * pTypeMember->cbType); + else + cbType = RT_MAX(cbType, pTypeMember->cbType); + } + } + else + rc = VERR_INVALID_STATE; + } + } + + if (RT_SUCCESS(rc)) + pType->cbType = cbType; + break; + } + + case DBGFTYPEVARIANT_ALIAS: + { + /* Get the size of the alias. */ + PDBGFTYPE pAliased = dbgfR3TypeLookup(pUVM, pType->pReg->pszAliasedType); + if (RT_LIKELY(pAliased)) + { + if ( pAliased->cbType == 0 + && fCalcNested) + rc = dbgfR3TypeCalcSize(pUVM, pAliased, fCalcNested); + + if (RT_SUCCESS(rc)) + pType->cbType = pAliased->cbType; + } + else + rc = VERR_INVALID_STATE; + break; + } + + default: + AssertMsgFailedReturn(("Invalid type variant: %d", pType->pReg->enmVariant), VERR_INVALID_STATE); + } + } + + return rc; +} + + +/** + * Callback for clearing the size of all non built-in types. + * + * @returns VBox status code. + * @param pStr The type structure. + * @param pvUser The user mode VM handle. + */ +static DECLCALLBACK(int) dbgfR3TypeTraverseClearSize(PRTSTRSPACECORE pStr, void *pvUser) +{ + PDBGFTYPE pType = (PDBGFTYPE)pStr; + + if (pType->pReg) + pType->cbType = 0; + + NOREF(pvUser); + return VINF_SUCCESS; +} + + +/** + * Callback for calculating the size of all non built-in types. + * + * @returns VBox status code. + * @param pStr The type structure. + * @param pvUser The user mode VM handle. + */ +static DECLCALLBACK(int) dbgfR3TypeTraverseCalcSize(PRTSTRSPACECORE pStr, void *pvUser) +{ + PDBGFTYPE pType = (PDBGFTYPE)pStr; + + if ( pType->pReg + && !pType->cbType) + dbgfR3TypeCalcSize((PUVM)pvUser, pType, true /* fCalcNested */); + + return VINF_SUCCESS; +} + + +/** + * Recalculate the sizes of all registered non builtin types. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + */ +static int dbgfR3TypeRecalculateAllSizes(PUVM pUVM) +{ + int rc = VINF_SUCCESS; + + /* + * Clear the sizes of all non builtin types to 0 first so we know which type we + * visited later on. + */ + rc = RTStrSpaceEnumerate(&pUVM->dbgf.s.TypeSpace, dbgfR3TypeTraverseClearSize, pUVM); + if (RT_SUCCESS(rc)) + { + /* Now recalculate the size. */ + rc = RTStrSpaceEnumerate(&pUVM->dbgf.s.TypeSpace, dbgfR3TypeTraverseCalcSize, pUVM); + } + + return rc; +} + +/** + * Validates a given type registration. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pReg The type registration structure. + */ +static int dbgfR3TypeValidate(PUVM pUVM, PCDBGFTYPEREG pReg) +{ + int rc = VINF_SUCCESS; + + switch (pReg->enmVariant) + { + case DBGFTYPEVARIANT_ALIAS: + if ( pReg->cMembers > 0 + || pReg->paMembers + || !pReg->pszAliasedType) + rc = VERR_INVALID_PARAMETER; + else + { + PDBGFTYPE pAlias = dbgfR3TypeLookup(pUVM, pReg->pszAliasedType); + if (RT_UNLIKELY(!pAlias)) + rc = VERR_NOT_FOUND; + } + break; + case DBGFTYPEVARIANT_STRUCT: + case DBGFTYPEVARIANT_UNION: + if (!pReg->pszAliasedType) + { + for (uint32_t i = 0; i < pReg->cMembers; i++) + { + PCDBGFTYPEREGMEMBER pMember = &pReg->paMembers[i]; + + /* Use the current pointer size. */ + PDBGFTYPE pTypeMember = dbgfR3TypeLookup(pUVM, pMember->pszType); + if (RT_UNLIKELY(!pTypeMember)) + { + rc = VERR_NOT_FOUND; + break; + } + + if (pMember->fFlags & DBGFTYPEREGMEMBER_F_ARRAY) + { + if (pMember->cElements == 0) + rc = VERR_INVALID_PARAMETER; + } + else if (pMember->cElements != 0) + rc = VERR_INVALID_PARAMETER; + } + } + else + rc = VERR_INVALID_PARAMETER; + break; + default: + AssertMsgFailedBreakStmt(("Invalid type variant: %d", pReg->enmVariant), + rc = VERR_INVALID_PARAMETER); + } + + return rc; +} + +/** + * Retains or releases the reference counters to referenced types for the given + * type registration structure. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pReg The type registration structure. + * @param fRetain Flag whether to retain or release references. + */ +static int dbgfR3TypeUpdateRefCnts(PUVM pUVM, PCDBGFTYPEREG pReg, bool fRetain) +{ + int rc = VINF_SUCCESS; + + switch (pReg->enmVariant) + { + case DBGFTYPEVARIANT_ALIAS: + { + AssertPtr(pReg->pszAliasedType); + + PDBGFTYPE pAlias = dbgfR3TypeLookup(pUVM, pReg->pszAliasedType); + AssertPtr(pAlias); + + if (fRetain) + pAlias->cRefs++; + else + pAlias->cRefs--; + break; + } + case DBGFTYPEVARIANT_STRUCT: + case DBGFTYPEVARIANT_UNION: + { + for (uint32_t i = 0; i < pReg->cMembers; i++) + { + PCDBGFTYPEREGMEMBER pMember = &pReg->paMembers[i]; + + /* Use the current pointer size. */ + PDBGFTYPE pTypeMember = dbgfR3TypeLookup(pUVM, pMember->pszType); + AssertPtr(pTypeMember); + + if (fRetain) + pTypeMember->cRefs++; + else + pTypeMember->cRefs--; + } + break; + } + default: + AssertMsgFailedBreakStmt(("Invalid type variant: %d", pReg->enmVariant), + rc = VERR_INVALID_PARAMETER); + } + + return rc; +} + + +/** + * Registers a single type in the database. + * + * @returns VBox status code. + * @retval VERR_ALREADY_EXISTS if the type exists already. + * @param pUVM The user mode VM handle. + * @param pReg The type registration structure. + */ +static int dbgfR3TypeRegister(PUVM pUVM, PCDBGFTYPEREG pReg) +{ + int rc = VINF_SUCCESS; + + LogFlowFunc(("pUVM=%#p pReg=%#p{%s}\n", pUVM, pReg, pReg->pszType)); + + if (dbgfR3TypeLookup(pUVM, pReg->pszType) == NULL) + { + rc = dbgfR3TypeValidate(pUVM, pReg); + if (RT_SUCCESS(rc)) + { + PDBGFTYPE pType = (PDBGFTYPE)MMR3HeapAllocZU(pUVM, MM_TAG_DBGF_TYPE, sizeof(DBGFTYPE)); + if (RT_LIKELY(pType)) + { + pType->Core.pszString = pReg->pszType; + pType->pReg = pReg; + pType->cRefs = 0; + pType->enmTypeBuiltin = DBGFTYPEBUILTIN_INVALID; + rc = dbgfR3TypeCalcSize(pUVM, pType, false /* fCalcNested */); + if (RT_SUCCESS(rc)) + { + rc = dbgfR3TypeUpdateRefCnts(pUVM, pReg, true /* fRetain */); + if (RT_SUCCESS(rc)) + { + bool fSucc = RTStrSpaceInsert(&pUVM->dbgf.s.TypeSpace, &pType->Core); + Assert(fSucc); + if (!fSucc) + { + dbgfR3TypeUpdateRefCnts(pUVM, pReg, false /* fRetain */); + rc = VERR_ALREADY_EXISTS; + } + } + } + + if (RT_FAILURE(rc)) + MMR3HeapFree(pType); + } + else + rc = VERR_NO_MEMORY; + } + } + else + rc = VERR_ALREADY_EXISTS; + + LogFlowFunc(("-> rc=%Rrc\n", rc)); + return rc; +} + + +/** + * Registers a new built-in type + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param enmTypeBuiltin The builtin type enum. + * @param cbType Size of the type in bytes. + * @param pszType The type identifier for the builtin type. + */ +static int dbgfR3TypeRegisterBuiltin(PUVM pUVM, DBGFTYPEBUILTIN enmTypeBuiltin, + size_t cbType, const char *pszType) +{ + LogFlowFunc(("pUVM=%#p enmBuiltin=%d pszType=%s\n", pUVM, enmTypeBuiltin, pszType)); + + AssertReturn(!dbgfR3TypeLookup(pUVM, pszType), VERR_INVALID_STATE); + + int rc = VINF_SUCCESS; + PDBGFTYPE pType = (PDBGFTYPE)MMR3HeapAllocZU(pUVM, MM_TAG_DBGF_TYPE, sizeof(DBGFTYPE)); + if (RT_LIKELY(pType)) + { + pType->Core.pszString = pszType; + pType->pReg = NULL; + pType->cRefs = 0; + pType->cbType = cbType; + pType->enmTypeBuiltin = enmTypeBuiltin; + bool fSucc = RTStrSpaceInsert(&pUVM->dbgf.s.TypeSpace, &pType->Core); + Assert(fSucc); + if (!fSucc) + rc = VERR_ALREADY_EXISTS; + + if (RT_FAILURE(rc)) + MMR3HeapFree(pType); + } + else + rc = VERR_NO_MEMORY; + + return rc; +} + + +/** + * Registers builtin types. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + */ +static int dbgfTypeRegisterBuiltinTypes(PUVM pUVM) +{ + int rc = dbgfR3TypeRegisterBuiltin(pUVM, DBGFTYPEBUILTIN_UINT8, sizeof(uint8_t), "uint8_t"); + if (RT_SUCCESS(rc)) + rc = dbgfR3TypeRegisterBuiltin(pUVM, DBGFTYPEBUILTIN_INT8, sizeof(int8_t), "int8_t"); + if (RT_SUCCESS(rc)) + rc = dbgfR3TypeRegisterBuiltin(pUVM, DBGFTYPEBUILTIN_UINT16, sizeof(uint16_t), "uint16_t"); + if (RT_SUCCESS(rc)) + rc = dbgfR3TypeRegisterBuiltin(pUVM, DBGFTYPEBUILTIN_INT16, sizeof(int16_t), "int16_t"); + if (RT_SUCCESS(rc)) + rc = dbgfR3TypeRegisterBuiltin(pUVM, DBGFTYPEBUILTIN_UINT32, sizeof(uint32_t), "uint32_t"); + if (RT_SUCCESS(rc)) + rc = dbgfR3TypeRegisterBuiltin(pUVM, DBGFTYPEBUILTIN_INT32, sizeof(int32_t), "int32_t"); + if (RT_SUCCESS(rc)) + rc = dbgfR3TypeRegisterBuiltin(pUVM, DBGFTYPEBUILTIN_UINT64, sizeof(uint64_t), "uint64_t"); + if (RT_SUCCESS(rc)) + rc = dbgfR3TypeRegisterBuiltin(pUVM, DBGFTYPEBUILTIN_INT64, sizeof(int64_t), "int64_t"); + if (RT_SUCCESS(rc)) + rc = dbgfR3TypeRegisterBuiltin(pUVM, DBGFTYPEBUILTIN_PTR32, sizeof(uint32_t), "ptr32_t"); + if (RT_SUCCESS(rc)) + rc = dbgfR3TypeRegisterBuiltin(pUVM, DBGFTYPEBUILTIN_PTR64, sizeof(uint64_t), "ptr64_t"); + if (RT_SUCCESS(rc)) + rc = dbgfR3TypeRegisterBuiltin(pUVM, DBGFTYPEBUILTIN_PTR, 0, "ptr_t"); + if (RT_SUCCESS(rc)) + rc = dbgfR3TypeRegisterBuiltin(pUVM, DBGFTYPEBUILTIN_SIZE, 0, "size_t"); + + return rc; +} + + +/** + * Parses a single entry for a given type and assigns the value from the byte buffer + * to the value entry. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pMember The type member. + * @param pValEntry The value entry holding the value on success. + * @param pbBuf The raw byte buffer. + * @param cbBuf Size of the byte buffer. + * @param pcbParsed Where to store the amount of consumed bytes on success. + */ +static int dbgfR3TypeParseEntry(PUVM pUVM, PCDBGFTYPEREGMEMBER pMember, PDBGFTYPEVALENTRY pValEntry, + uint8_t *pbBuf, size_t cbBuf, size_t *pcbParsed) +{ + int rc = VINF_SUCCESS; + PDBGFTYPE pTypeMember = dbgfR3TypeLookup(pUVM, pMember->pszType); + uint32_t cValBufs = 1; + size_t cbParsed = 0; + PDBGFTYPEVALBUF pValBuf = &pValEntry->Buf.Val; + + AssertPtrReturn(pTypeMember, VERR_INVALID_STATE); + + if (pMember->fFlags & DBGFTYPEREGMEMBER_F_ARRAY) + { + cValBufs = pMember->cElements; + pValBuf = (PDBGFTYPEVALBUF)MMR3HeapAllocZU(pUVM, MM_TAG_DBGF_TYPE, cValBufs * sizeof(DBGFTYPEVALBUF)); + if (RT_UNLIKELY(!pValBuf)) + rc = VERR_NO_MEMORY; + + pValEntry->Buf.pVal = pValBuf; + pValEntry->cEntries = cValBufs; + pValEntry->cbType = pTypeMember->cbType; + } + + if (RT_SUCCESS(rc)) + { + for (uint32_t iValBuf = 0; iValBuf < cValBufs && RT_SUCCESS(rc); iValBuf++) + { + size_t cbThisParsed = 0; + + if (pTypeMember->pReg) + { + /* Compound or aliased type */ + rc = dbgfR3TypeParseBufferByType(pUVM, pTypeMember, pbBuf, cbBuf, + &pValBuf->pVal, &cbThisParsed); + if (RT_SUCCESS(rc)) + pValEntry->enmType = DBGFTYPEBUILTIN_COMPOUND; + } + else + { + void *pvVal = NULL; + + switch (pTypeMember->enmTypeBuiltin) + { + case DBGFTYPEBUILTIN_UINT8: + pvVal = &pValBuf->u8; + cbThisParsed = 1; + break; + case DBGFTYPEBUILTIN_INT8: + pvVal = &pValBuf->i8; + cbThisParsed = 1; + break; + case DBGFTYPEBUILTIN_UINT16: + pvVal = &pValBuf->u16; + cbThisParsed = 2; + break; + case DBGFTYPEBUILTIN_INT16: + pvVal = &pValBuf->i16; + cbThisParsed = 2; + break; + case DBGFTYPEBUILTIN_UINT32: + pvVal = &pValBuf->u32; + cbThisParsed = 4; + break; + case DBGFTYPEBUILTIN_INT32: + pvVal = &pValBuf->i32; + cbThisParsed = 4; + break; + case DBGFTYPEBUILTIN_UINT64: + pvVal = &pValBuf->u64; + cbThisParsed = 8; + break; + case DBGFTYPEBUILTIN_INT64: + pvVal = &pValBuf->i64; + cbThisParsed = 8; + break; + case DBGFTYPEBUILTIN_PTR32: + pvVal = &pValBuf->GCPtr; + cbThisParsed = 4; + break; + case DBGFTYPEBUILTIN_PTR64: + pvVal = &pValBuf->GCPtr; + cbThisParsed = 8; + break; + case DBGFTYPEBUILTIN_PTR: + pvVal = &pValBuf->GCPtr; + cbThisParsed = pTypeMember->cbType; + break; + case DBGFTYPEBUILTIN_SIZE: + pvVal = &pValBuf->size; + cbThisParsed = pTypeMember->cbType; + break; + case DBGFTYPEBUILTIN_FLOAT32: + case DBGFTYPEBUILTIN_FLOAT64: + case DBGFTYPEBUILTIN_COMPOUND: + default: + AssertMsgFailedBreakStmt(("Invalid built-in type specified: %d\n", pTypeMember->enmTypeBuiltin), + rc = VERR_INVALID_STATE); + } + + if (RT_SUCCESS(rc)) + { + pValEntry->enmType = pTypeMember->enmTypeBuiltin; + if (cbBuf >= cbThisParsed) + memcpy(pvVal, pbBuf, cbThisParsed); + else + rc = VERR_BUFFER_OVERFLOW; + } + } + + pValBuf++; + + cbParsed += cbThisParsed; + pbBuf += cbThisParsed; + cbBuf -= cbThisParsed; + } + } + + if ( RT_FAILURE(rc) + && cValBufs > 1) + MMR3HeapFree(pValBuf); + + if (RT_SUCCESS(rc)) + { + pValEntry->cEntries = cValBufs; + *pcbParsed = cbParsed; + } + + return rc; +} + + +/** + * Parses the given byte buffer and returns the value based no the type information. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pType The type information. + * @param pbBuf The byte buffer to parse. + * @param cbBuf Size of the buffer. + * @param ppVal Where to store the pointer to the value on success. + * @param pcbParsed How many bytes of the buffer we consumed. + */ +static int dbgfR3TypeParseBufferByType(PUVM pUVM, PDBGFTYPE pType, uint8_t *pbBuf, size_t cbBuf, + PDBGFTYPEVAL *ppVal, size_t *pcbParsed) +{ + int rc = VINF_SUCCESS; + uint32_t cEntries = pType->pReg ? pType->pReg->cMembers : 1; + PDBGFTYPEVAL pVal = (PDBGFTYPEVAL)MMR3HeapAllocZU(pUVM, MM_TAG_DBGF_TYPE, + RT_UOFFSETOF_DYN(DBGFTYPEVAL, aEntries[cEntries])); + if (RT_LIKELY(pVal)) + { + size_t cbParsed = 0; + + pVal->pTypeReg = pType->pReg; + for (uint32_t i = 0; i < cEntries && RT_SUCCESS(rc); i++) + { + PCDBGFTYPEREGMEMBER pMember = &pType->pReg->paMembers[i]; + PDBGFTYPEVALENTRY pValEntry = &pVal->aEntries[i]; + rc = dbgfR3TypeParseEntry(pUVM, pMember, pValEntry, pbBuf, cbBuf, &cbParsed); + if (RT_SUCCESS(rc)) + { + pbBuf += cbParsed; + cbBuf -= cbParsed; + } + } + + if (RT_SUCCESS(rc)) + { + pVal->cEntries = cEntries; + *pcbParsed = cbParsed; + *ppVal = pVal; + } + else + MMR3HeapFree(pVal); /** @todo Leak for embedded structs. */ + } + else + rc = VERR_NO_MEMORY; + + return rc; +} + + +/** + * Dumps one level of a typed value. + * + * @returns VBox status code. + * @param pVal The value to dump. + * @param iLvl The current level. + * @param cLvlMax The maximum level. + * @param pfnDump The dumper callback. + * @param pvUser The opaque user data to pass to the dumper callback. + */ +static int dbgfR3TypeValDump(PDBGFTYPEVAL pVal, uint32_t iLvl, uint32_t cLvlMax, + PFNDBGFR3TYPEVALDUMP pfnDump, void *pvUser) +{ + int rc = VINF_SUCCESS; + PCDBGFTYPEREG pType = pVal->pTypeReg; + + for (uint32_t i = 0; i < pVal->cEntries && rc == VINF_SUCCESS; i++) + { + PCDBGFTYPEREGMEMBER pTypeMember = &pType->paMembers[i]; + PDBGFTYPEVALENTRY pValEntry = &pVal->aEntries[i]; + PDBGFTYPEVALBUF pValBuf = pValEntry->cEntries > 1 ? pValEntry->Buf.pVal : &pValEntry->Buf.Val; + + rc = pfnDump(0 /* off */, pTypeMember->pszName, iLvl, pValEntry->enmType, pValEntry->cbType, + pValBuf, pValEntry->cEntries, pvUser); + if ( rc == VINF_SUCCESS + && pValEntry->enmType == DBGFTYPEBUILTIN_COMPOUND + && iLvl < cLvlMax) + { + /* Print embedded structs. */ + for (uint32_t iValBuf = 0; iValBuf < pValEntry->cEntries && rc == VINF_SUCCESS; iValBuf++) + rc = dbgfR3TypeValDump(pValBuf[iValBuf].pVal, iLvl + 1, cLvlMax, pfnDump, pvUser); + } + } + + return rc; +} + + +/** + * Dumps one level of a type. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pType The type to dump. + * @param iLvl The current level. + * @param cLvlMax The maximum level. + * @param pfnDump The dumper callback. + * @param pvUser The opaque user data to pass to the dumper callback. + */ +static int dbgfR3TypeDump(PUVM pUVM, PDBGFTYPE pType, uint32_t iLvl, uint32_t cLvlMax, + PFNDBGFR3TYPEDUMP pfnDump, void *pvUser) +{ + int rc = VINF_SUCCESS; + PCDBGFTYPEREG pTypeReg = pType->pReg; + + switch (pTypeReg->enmVariant) + { + case DBGFTYPEVARIANT_ALIAS: + rc = VERR_NOT_IMPLEMENTED; + break; + case DBGFTYPEVARIANT_STRUCT: + case DBGFTYPEVARIANT_UNION: + for (uint32_t i = 0; i < pTypeReg->cMembers && rc == VINF_SUCCESS; i++) + { + PCDBGFTYPEREGMEMBER pTypeMember = &pTypeReg->paMembers[i]; + PDBGFTYPE pTypeResolved = dbgfR3TypeLookup(pUVM, pTypeMember->pszType); + + rc = pfnDump(0 /* off */, pTypeMember->pszName, iLvl, pTypeMember->pszType, + pTypeMember->fFlags, pTypeMember->cElements, pvUser); + if ( rc == VINF_SUCCESS + && pTypeResolved->pReg + && iLvl < cLvlMax) + { + /* Print embedded structs. */ + rc = dbgfR3TypeDump(pUVM, pTypeResolved, iLvl + 1, cLvlMax, pfnDump, pvUser); + } + } + break; + default: + AssertMsgFailed(("Invalid type variant: %u\n", pTypeReg->enmVariant)); + rc = VERR_INVALID_STATE; + } + + return rc; +} + + +/** + * Initializes the type database. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + */ +DECLHIDDEN(int) dbgfR3TypeInit(PUVM pUVM) +{ + int rc = VINF_SUCCESS; + if (!pUVM->dbgf.s.fTypeDbInitialized) + { + rc = RTSemRWCreate(&pUVM->dbgf.s.hTypeDbLock); + if (RT_SUCCESS(rc)) + { + rc = dbgfTypeRegisterBuiltinTypes(pUVM); + if (RT_FAILURE(rc)) + { + RTSemRWDestroy(pUVM->dbgf.s.hTypeDbLock); + pUVM->dbgf.s.hTypeDbLock = NIL_RTSEMRW; + } + } + pUVM->dbgf.s.fTypeDbInitialized = RT_SUCCESS(rc); + } + return rc; +} + + +/** + * Terminates the type database. + * + * @param pUVM The user mode VM handle. + */ +DECLHIDDEN(void) dbgfR3TypeTerm(PUVM pUVM) +{ + RTSemRWDestroy(pUVM->dbgf.s.hTypeDbLock); + pUVM->dbgf.s.hTypeDbLock = NIL_RTSEMRW; + pUVM->dbgf.s.fTypeDbInitialized = false; +} + + +/** + * Registers a new type for lookup. + * + * @returns VBox status code. + * @retval VERR_ALREADY_EXISTS if the type exists already. + * @param pUVM The user mode VM handle. + * @param cTypes Number of types to register. + * @param paTypes The array of type registration structures to register. + */ +VMMR3DECL(int) DBGFR3TypeRegister(PUVM pUVM, uint32_t cTypes, PCDBGFTYPEREG paTypes) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertReturn(cTypes > 0, VERR_INVALID_PARAMETER); + AssertPtrReturn(paTypes, VERR_INVALID_POINTER); + + int rc = VINF_SUCCESS; + if (!pUVM->dbgf.s.fTypeDbInitialized) + { + rc = dbgfR3TypeInit(pUVM); + if (RT_FAILURE(rc)) + return rc; + } + + DBGF_TYPE_DB_LOCK_WRITE(pUVM); + for (uint32_t i = 0; i < cTypes && RT_SUCCESS(rc); i++) + { + rc = dbgfR3TypeRegister(pUVM, &paTypes[i]); + if ( RT_FAILURE(rc) + && i > 0) + { + /* Deregister types in reverse order. */ + do + { + int rc2 = DBGFR3TypeDeregister(pUVM, paTypes[i].pszType); + AssertRC(rc2); + i--; + } while (i > 0); + + break; + } + } + DBGF_TYPE_DB_UNLOCK_WRITE(pUVM); + + return rc; +} + + +/** + * Deregisters a previously registered type. + * + * @returns VBox status code. + * @retval VERR_NOT_FOUND if the type is not known. + * @retval VERR_RESOURCE_IN_USE if the type is used by another type. + * @param pUVM The user mode VM handle. + * @param pszType The type identifier to deregister. + */ +VMMR3DECL(int) DBGFR3TypeDeregister(PUVM pUVM, const char *pszType) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertPtrReturn(pszType, VERR_INVALID_POINTER); + + int rc = VINF_SUCCESS; + if (!pUVM->dbgf.s.fTypeDbInitialized) + { + rc = dbgfR3TypeInit(pUVM); + if (RT_FAILURE(rc)) + return rc; + } + + DBGF_TYPE_DB_LOCK_WRITE(pUVM); + PDBGFTYPE pType = dbgfR3TypeLookup(pUVM, pszType); + if (pType) + { + if (!pType->cRefs) + { + + } + else + rc = VERR_RESOURCE_IN_USE; + } + else + rc = VERR_NOT_FOUND; + DBGF_TYPE_DB_UNLOCK_WRITE(pUVM); + + return rc; +} + + +/** + * Return the type registration structure for the given type identifier. + * + * @returns VBox status code. + * @retval VERR_NOT_FOUND if the type is not known. + * @param pUVM The user mode VM handle. + * @param pszType The type identifier to get the registration structure from. + * @param ppTypeReg Where to store the type registration structure on success. + */ +VMMR3DECL(int) DBGFR3TypeQueryReg(PUVM pUVM, const char *pszType, PCDBGFTYPEREG *ppTypeReg) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertPtrReturn(pszType, VERR_INVALID_POINTER); + AssertPtrReturn(ppTypeReg, VERR_INVALID_POINTER); + + int rc = VINF_SUCCESS; + if (!pUVM->dbgf.s.fTypeDbInitialized) + { + rc = dbgfR3TypeInit(pUVM); + if (RT_FAILURE(rc)) + return rc; + } + + DBGF_TYPE_DB_LOCK_READ(pUVM); + PDBGFTYPE pType = dbgfR3TypeLookup(pUVM, pszType); + if (pType) + *ppTypeReg = pType->pReg; + else + rc = VERR_NOT_FOUND; + DBGF_TYPE_DB_UNLOCK_READ(pUVM); + + LogFlowFunc(("-> rc=%Rrc\n", rc)); + return rc; +} + + +/** + * Queries the size a given type would occupy in memory. + * + * @returns VBox status code. + * @retval VERR_NOT_FOUND if the type is not known. + * @param pUVM The user mode VM handle. + * @param pszType The type identifier. + * @param pcbType Where to store the amount of memory occupied in bytes. + */ +VMMR3DECL(int) DBGFR3TypeQuerySize(PUVM pUVM, const char *pszType, size_t *pcbType) +{ + LogFlowFunc(("pUVM=%#p pszType=%s pcbType=%#p\n", pUVM, pszType, pcbType)); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertPtrReturn(pszType, VERR_INVALID_POINTER); + AssertPtrReturn(pcbType, VERR_INVALID_POINTER); + + int rc = VINF_SUCCESS; + if (!pUVM->dbgf.s.fTypeDbInitialized) + { + rc = dbgfR3TypeInit(pUVM); + if (RT_FAILURE(rc)) + return rc; + } + + DBGF_TYPE_DB_LOCK_READ(pUVM); + PDBGFTYPE pType = dbgfR3TypeLookup(pUVM, pszType); + if (pType) + *pcbType = pType->cbType; + else + rc = VERR_NOT_FOUND; + DBGF_TYPE_DB_UNLOCK_READ(pUVM); + + LogFlowFunc(("-> rc=%Rrc\n", rc)); + return rc; +} + + +/** + * Sets the size of the given type in bytes. + * + * @returns VBox status code. + * @retval VERR_NOT_FOUND if the type is not known. + * @retval VERR_NOT_SUPPORTED if changing the size of this type is not supported. + * @param pUVM The user mode VM handle. + * @param pszType The type identifier. + * @param cbType The size of the type in bytes. + * + * @note: This currently works only for the builtin pointer type without the explicit + * size (ptr_t or DBGFTYPEBUILTIN_PTR). + */ +VMMR3DECL(int) DBGFR3TypeSetSize(PUVM pUVM, const char *pszType, size_t cbType) +{ + LogFlowFunc(("pUVM=%#p pszType=%s cbType=%zu\n", pUVM, pszType, cbType)); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertPtrReturn(pszType, VERR_INVALID_POINTER); + AssertReturn(cbType > 0, VERR_INVALID_PARAMETER); + + int rc = VINF_SUCCESS; + if (!pUVM->dbgf.s.fTypeDbInitialized) + { + rc = dbgfR3TypeInit(pUVM); + if (RT_FAILURE(rc)) + return rc; + } + + DBGF_TYPE_DB_LOCK_WRITE(pUVM); + PDBGFTYPE pType = dbgfR3TypeLookup(pUVM, pszType); + if (pType) + { + if ( !pType->pReg + && ( pType->enmTypeBuiltin == DBGFTYPEBUILTIN_PTR + || pType->enmTypeBuiltin == DBGFTYPEBUILTIN_SIZE)) + { + if (pType->cbType != cbType) + { + pType->cbType = cbType; + rc = dbgfR3TypeRecalculateAllSizes(pUVM); + } + } + else + rc = VERR_NOT_SUPPORTED; + } + else + rc = VERR_NOT_FOUND; + DBGF_TYPE_DB_UNLOCK_WRITE(pUVM); + + LogFlowFunc(("-> rc=%Rrc\n", rc)); + return rc; +} + + +/** + * Dumps the type information of the given type. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszType The type identifier. + * @param fFlags Flags to control the dumping (reserved, MBZ). + * @param cLvlMax Maximum levels to nest. + * @param pfnDump The dumper callback. + * @param pvUser Opaque user data. + */ +VMMR3DECL(int) DBGFR3TypeDumpEx(PUVM pUVM, const char *pszType, uint32_t fFlags, + uint32_t cLvlMax, PFNDBGFR3TYPEDUMP pfnDump, void *pvUser) +{ + LogFlowFunc(("pUVM=%#p pszType=%s fFlags=%#x cLvlMax=%u pfnDump=%#p pvUser=%#p\n", + pUVM, pszType, fFlags, cLvlMax, pfnDump, pvUser)); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertPtrReturn(pszType, VERR_INVALID_POINTER); + AssertPtrReturn(pfnDump, VERR_INVALID_POINTER); + RT_NOREF_PV(fFlags); + + int rc = VINF_SUCCESS; + if (!pUVM->dbgf.s.fTypeDbInitialized) + { + rc = dbgfR3TypeInit(pUVM); + if (RT_FAILURE(rc)) + return rc; + } + + DBGF_TYPE_DB_LOCK_READ(pUVM); + PDBGFTYPE pType = dbgfR3TypeLookup(pUVM, pszType); + if (pType) + rc = dbgfR3TypeDump(pUVM, pType, 0 /* iLvl */, cLvlMax, pfnDump, pvUser); + else + rc = VERR_NOT_FOUND; + DBGF_TYPE_DB_UNLOCK_READ(pUVM); + + LogFlowFunc(("-> rc=%Rrc\n", rc)); + return rc; +} + + +/** + * Returns the value of a memory buffer at the given address formatted for the given + * type. + * + * @returns VBox status code. + * @retval VERR_NOT_FOUND if the type is not known. + * @param pUVM The user mode VM handle. + * @param pAddress The address to start reading from. + * @param pszType The type identifier. + * @param ppVal Where to store the pointer to the value structure + * on success. + */ +VMMR3DECL(int) DBGFR3TypeQueryValByType(PUVM pUVM, PCDBGFADDRESS pAddress, const char *pszType, + PDBGFTYPEVAL *ppVal) +{ + LogFlowFunc(("pUVM=%#p pAddress=%#p pszType=%s ppVal=%#p\n", pUVM, pAddress, pszType, ppVal)); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertPtrReturn(pAddress, VERR_INVALID_POINTER); + AssertPtrReturn(pszType, VERR_INVALID_POINTER); + AssertPtrReturn(ppVal, VERR_INVALID_POINTER); + + int rc = VINF_SUCCESS; + if (!pUVM->dbgf.s.fTypeDbInitialized) + { + rc = dbgfR3TypeInit(pUVM); + if (RT_FAILURE(rc)) + return rc; + } + + DBGF_TYPE_DB_LOCK_READ(pUVM); + PDBGFTYPE pType = dbgfR3TypeLookup(pUVM, pszType); + if (pType) + { + uint8_t *pbBuf = (uint8_t *)MMR3HeapAllocZU(pUVM, MM_TAG_DBGF_TYPE, pType->cbType); + if (RT_LIKELY(pbBuf)) + { + rc = DBGFR3MemRead(pUVM, 0 /*idCpu*/, pAddress, pbBuf, pType->cbType); + if (RT_SUCCESS(rc)) + { + /* Parse the buffer based on the type. */ + size_t cbParsed = 0; + rc = dbgfR3TypeParseBufferByType(pUVM, pType, pbBuf, pType->cbType, + ppVal, &cbParsed); + } + + MMR3HeapFree(pbBuf); + } + else + rc = VERR_NO_MEMORY; + } + else + rc = VERR_NOT_FOUND; + DBGF_TYPE_DB_UNLOCK_READ(pUVM); + + LogFlowFunc(("-> rc=%Rrc\n", rc)); + return rc; +} + + +/** + * Frees all acquired resources of a value previously obtained with + * DBGFR3TypeQueryValByType(). + * + * @returns nothing. + * @param pVal The value to free. + */ +VMMR3DECL(void) DBGFR3TypeValFree(PDBGFTYPEVAL pVal) +{ + AssertPtrReturnVoid(pVal); + + for (uint32_t i = 0; i < pVal->cEntries; i++) + { + PDBGFTYPEVALENTRY pValEntry = &pVal->aEntries[i]; + PDBGFTYPEVALBUF pValBuf = pValEntry->cEntries > 1 ? pValEntry->Buf.pVal : &pValEntry->Buf.Val; + + if (pValEntry->enmType == DBGFTYPEBUILTIN_COMPOUND) + for (uint32_t iBuf = 0; iBuf < pValEntry->cEntries; iBuf++) + DBGFR3TypeValFree(pValBuf->pVal); + + if (pValEntry->cEntries > 1) + MMR3HeapFree(pValEntry->Buf.pVal); + } + + MMR3HeapFree(pVal); +} + + +/** + * Reads the guest memory with the given type and dumps the content of the type. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pAddress The address to start reading from. + * @param pszType The type identifier. + * @param fFlags Flags for tweaking (reserved, must be zero). + * @param cLvlMax Maximum number of levels to expand embedded structs. + * @param pfnDump The dumper callback. + * @param pvUser The opaque user data to pass to the callback. + */ +VMMR3DECL(int) DBGFR3TypeValDumpEx(PUVM pUVM, PCDBGFADDRESS pAddress, const char *pszType, uint32_t fFlags, + uint32_t cLvlMax, FNDBGFR3TYPEVALDUMP pfnDump, void *pvUser) +{ + LogFlowFunc(("pUVM=%#p pAddress=%#p pszType=%s fFlags=%#x pfnDump=%#p pvUser=%#p\n", + pUVM, pAddress, pszType, fFlags,pfnDump, pvUser)); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertPtrReturn(pAddress, VERR_INVALID_POINTER); + AssertPtrReturn(pszType, VERR_INVALID_POINTER); + AssertPtrReturn(pfnDump, VERR_INVALID_POINTER); + AssertReturn(!fFlags, VERR_INVALID_PARAMETER); + AssertReturn(cLvlMax >= 1, VERR_INVALID_PARAMETER); + + PDBGFTYPEVAL pVal = NULL; + int rc = DBGFR3TypeQueryValByType(pUVM, pAddress, pszType, &pVal); + if (RT_SUCCESS(rc)) + { + rc = dbgfR3TypeValDump(pVal, 0 /* iLvl */, cLvlMax, pfnDump, pvUser); + DBGFR3TypeValFree(pVal); + } + + LogFlowFunc(("-> rc=%Rrc\n", rc)); + return rc; +} + diff --git a/src/VBox/VMM/VMMR3/DBGFReg.cpp b/src/VBox/VMM/VMMR3/DBGFReg.cpp new file mode 100644 index 00000000..6afdd189 --- /dev/null +++ b/src/VBox/VMM/VMMR3/DBGFReg.cpp @@ -0,0 +1,2719 @@ +/* $Id: DBGFReg.cpp $ */ +/** @file + * DBGF - Debugger Facility, Register Methods. + */ + +/* + * Copyright (C) 2010-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DBGF +#include +#include "DBGFInternal.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** Locks the register database for writing. */ +#define DBGF_REG_DB_LOCK_WRITE(pUVM) \ + do { \ + int rcSem = RTSemRWRequestWrite((pUVM)->dbgf.s.hRegDbLock, RT_INDEFINITE_WAIT); \ + AssertRC(rcSem); \ + } while (0) + +/** Unlocks the register database after writing. */ +#define DBGF_REG_DB_UNLOCK_WRITE(pUVM) \ + do { \ + int rcSem = RTSemRWReleaseWrite((pUVM)->dbgf.s.hRegDbLock); \ + AssertRC(rcSem); \ + } while (0) + +/** Locks the register database for reading. */ +#define DBGF_REG_DB_LOCK_READ(pUVM) \ + do { \ + int rcSem = RTSemRWRequestRead((pUVM)->dbgf.s.hRegDbLock, RT_INDEFINITE_WAIT); \ + AssertRC(rcSem); \ + } while (0) + +/** Unlocks the register database after reading. */ +#define DBGF_REG_DB_UNLOCK_READ(pUVM) \ + do { \ + int rcSem = RTSemRWReleaseRead((pUVM)->dbgf.s.hRegDbLock); \ + AssertRC(rcSem); \ + } while (0) + + +/** The max length of a set, register or sub-field name. */ +#define DBGF_REG_MAX_NAME 40 + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * Register set registration record type. + */ +typedef enum DBGFREGSETTYPE +{ + /** Invalid zero value. */ + DBGFREGSETTYPE_INVALID = 0, + /** CPU record. */ + DBGFREGSETTYPE_CPU, + /** Device record. */ + DBGFREGSETTYPE_DEVICE, + /** End of valid record types. */ + DBGFREGSETTYPE_END +} DBGFREGSETTYPE; + + +/** + * Register set registration record. + */ +typedef struct DBGFREGSET +{ + /** String space core. */ + RTSTRSPACECORE Core; + /** The registration record type. */ + DBGFREGSETTYPE enmType; + /** The user argument for the callbacks. */ + union + { + /** The CPU view. */ + PVMCPU pVCpu; + /** The device view. */ + PPDMDEVINS pDevIns; + /** The general view. */ + void *pv; + } uUserArg; + + /** The register descriptors. */ + PCDBGFREGDESC paDescs; + /** The number of register descriptors. */ + uint32_t cDescs; + + /** Array of lookup records. + * The first part of the array runs parallel to paDescs, the rest are + * covering for aliases and bitfield variations. It's done this way to + * simplify the query all operations. */ + struct DBGFREGLOOKUP *paLookupRecs; + /** The number of lookup records. */ + uint32_t cLookupRecs; + + /** The register name prefix. */ + char szPrefix[1]; +} DBGFREGSET; +/** Pointer to a register registration record. */ +typedef DBGFREGSET *PDBGFREGSET; +/** Pointer to a const register registration record. */ +typedef DBGFREGSET const *PCDBGFREGSET; + + +/** + * Register lookup record. + */ +typedef struct DBGFREGLOOKUP +{ + /** The string space core. */ + RTSTRSPACECORE Core; + /** Pointer to the set. */ + PCDBGFREGSET pSet; + /** Pointer to the register descriptor. */ + PCDBGFREGDESC pDesc; + /** If an alias this points to the alias descriptor, NULL if not. */ + PCDBGFREGALIAS pAlias; + /** If a sub-field this points to the sub-field descriptor, NULL if not. */ + PCDBGFREGSUBFIELD pSubField; +} DBGFREGLOOKUP; +/** Pointer to a register lookup record. */ +typedef DBGFREGLOOKUP *PDBGFREGLOOKUP; +/** Pointer to a const register lookup record. */ +typedef DBGFREGLOOKUP const *PCDBGFREGLOOKUP; + + +/** + * Argument packet from DBGFR3RegNmQueryAll to dbgfR3RegNmQueryAllWorker. + */ +typedef struct DBGFR3REGNMQUERYALLARGS +{ + /** The output register array. */ + PDBGFREGENTRYNM paRegs; + /** The number of entries in the output array. */ + size_t cRegs; + /** The current register number when enumerating the string space. + * @remarks Only used by EMT(0). */ + size_t iReg; +} DBGFR3REGNMQUERYALLARGS; +/** Pointer to a dbgfR3RegNmQueryAllWorker argument packet. */ +typedef DBGFR3REGNMQUERYALLARGS *PDBGFR3REGNMQUERYALLARGS; + + +/** + * Argument packet passed by DBGFR3RegPrintfV to dbgfR3RegPrintfCbOutput and + * dbgfR3RegPrintfCbFormat. + */ +typedef struct DBGFR3REGPRINTFARGS +{ + /** The user mode VM handle. */ + PUVM pUVM; + /** The target CPU. */ + VMCPUID idCpu; + /** Set if we're looking at guest registers. */ + bool fGuestRegs; + /** The output buffer. */ + char *pszBuf; + /** The format string. */ + const char *pszFormat; + /** The va list with format arguments. */ + va_list va; + + /** The current buffer offset. */ + size_t offBuf; + /** The amount of buffer space left, not counting the terminator char. */ + size_t cchLeftBuf; + /** The status code of the whole operation. First error is return, + * subsequent ones are suppressed. */ + int rc; +} DBGFR3REGPRINTFARGS; +/** Pointer to a DBGFR3RegPrintfV argument packet. */ +typedef DBGFR3REGPRINTFARGS *PDBGFR3REGPRINTFARGS; + + + +/** + * Initializes the register database. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + */ +int dbgfR3RegInit(PUVM pUVM) +{ + int rc = VINF_SUCCESS; + if (!pUVM->dbgf.s.fRegDbInitialized) + { + rc = RTSemRWCreate(&pUVM->dbgf.s.hRegDbLock); + pUVM->dbgf.s.fRegDbInitialized = RT_SUCCESS(rc); + } + return rc; +} + + +/** + * Terminates the register database. + * + * @param pUVM The user mode VM handle. + */ +void dbgfR3RegTerm(PUVM pUVM) +{ + RTSemRWDestroy(pUVM->dbgf.s.hRegDbLock); + pUVM->dbgf.s.hRegDbLock = NIL_RTSEMRW; + pUVM->dbgf.s.fRegDbInitialized = false; +} + + +/** + * Validates a register name. + * + * This is used for prefixes, aliases and field names. + * + * @returns true if valid, false if not. + * @param pszName The register name to validate. + * @param chDot Set to '.' if accepted, otherwise 0. + */ +static bool dbgfR3RegIsNameValid(const char *pszName, char chDot) +{ + const char *psz = pszName; + if (!RT_C_IS_ALPHA(*psz)) + return false; + char ch; + while ((ch = *++psz)) + if ( !RT_C_IS_LOWER(ch) + && !RT_C_IS_DIGIT(ch) + && ch != '_' + && ch != chDot) + return false; + if (psz - pszName > DBGF_REG_MAX_NAME) + return false; + return true; +} + + +/** + * Common worker for registering a register set. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param paRegisters The register descriptors. + * @param enmType The set type. + * @param pvUserArg The user argument for the callbacks. + * @param pszPrefix The name prefix. + * @param iInstance The instance number to be appended to @a + * pszPrefix when creating the set name. + */ +static int dbgfR3RegRegisterCommon(PUVM pUVM, PCDBGFREGDESC paRegisters, DBGFREGSETTYPE enmType, void *pvUserArg, + const char *pszPrefix, uint32_t iInstance) +{ + /* + * Validate input. + */ + /* The name components. */ + AssertMsgReturn(dbgfR3RegIsNameValid(pszPrefix, 0), ("%s\n", pszPrefix), VERR_INVALID_NAME); + const char *psz = RTStrEnd(pszPrefix, RTSTR_MAX); + bool const fNeedUnderscore = RT_C_IS_DIGIT(psz[-1]); + size_t const cchPrefix = psz - pszPrefix + fNeedUnderscore; + AssertMsgReturn(cchPrefix < RT_SIZEOFMEMB(DBGFREGSET, szPrefix) - 4 - 1, ("%s\n", pszPrefix), VERR_INVALID_NAME); + + AssertMsgReturn(iInstance <= 9999, ("%d\n", iInstance), VERR_INVALID_NAME); + + /* The descriptors. */ + uint32_t cLookupRecs = 0; + uint32_t iDesc; + for (iDesc = 0; paRegisters[iDesc].pszName != NULL; iDesc++) + { + AssertMsgReturn(dbgfR3RegIsNameValid(paRegisters[iDesc].pszName, 0), ("%s (#%u)\n", paRegisters[iDesc].pszName, iDesc), VERR_INVALID_NAME); + + if (enmType == DBGFREGSETTYPE_CPU) + AssertMsgReturn(iDesc < (unsigned)DBGFREG_END && (unsigned)paRegisters[iDesc].enmReg == iDesc, + ("%d iDesc=%d\n", paRegisters[iDesc].enmReg, iDesc), + VERR_INVALID_PARAMETER); + else + AssertReturn(paRegisters[iDesc].enmReg == DBGFREG_END, VERR_INVALID_PARAMETER); + AssertReturn( paRegisters[iDesc].enmType > DBGFREGVALTYPE_INVALID + && paRegisters[iDesc].enmType < DBGFREGVALTYPE_END, VERR_INVALID_PARAMETER); + AssertMsgReturn(!(paRegisters[iDesc].fFlags & ~DBGFREG_FLAGS_READ_ONLY), + ("%#x (#%u)\n", paRegisters[iDesc].fFlags, iDesc), + VERR_INVALID_PARAMETER); + AssertPtrReturn(paRegisters[iDesc].pfnGet, VERR_INVALID_PARAMETER); + AssertReturn(RT_VALID_PTR(paRegisters[iDesc].pfnSet) || (paRegisters[iDesc].fFlags & DBGFREG_FLAGS_READ_ONLY), + VERR_INVALID_PARAMETER); + + uint32_t iAlias = 0; + PCDBGFREGALIAS paAliases = paRegisters[iDesc].paAliases; + if (paAliases) + { + AssertPtrReturn(paAliases, VERR_INVALID_PARAMETER); + for (; paAliases[iAlias].pszName; iAlias++) + { + AssertMsgReturn(dbgfR3RegIsNameValid(paAliases[iAlias].pszName, 0), ("%s (%s)\n", paAliases[iAlias].pszName, paRegisters[iDesc].pszName), VERR_INVALID_NAME); + AssertReturn( paAliases[iAlias].enmType > DBGFREGVALTYPE_INVALID + && paAliases[iAlias].enmType < DBGFREGVALTYPE_END, VERR_INVALID_PARAMETER); + } + } + + uint32_t iSubField = 0; + PCDBGFREGSUBFIELD paSubFields = paRegisters[iDesc].paSubFields; + if (paSubFields) + { + AssertPtrReturn(paSubFields, VERR_INVALID_PARAMETER); + for (; paSubFields[iSubField].pszName; iSubField++) + { + AssertMsgReturn(dbgfR3RegIsNameValid(paSubFields[iSubField].pszName, '.'), ("%s (%s)\n", paSubFields[iSubField].pszName, paRegisters[iDesc].pszName), VERR_INVALID_NAME); + AssertReturn(paSubFields[iSubField].iFirstBit + paSubFields[iSubField].cBits <= 128, VERR_INVALID_PARAMETER); + AssertReturn(paSubFields[iSubField].cBits + paSubFields[iSubField].cShift <= 128, VERR_INVALID_PARAMETER); + AssertPtrNullReturn(paSubFields[iSubField].pfnGet, VERR_INVALID_POINTER); + AssertPtrNullReturn(paSubFields[iSubField].pfnSet, VERR_INVALID_POINTER); + } + } + + cLookupRecs += (1 + iAlias) * (1 + iSubField); + } + + /* Check the instance number of the CPUs. */ + AssertReturn(enmType != DBGFREGSETTYPE_CPU || iInstance < pUVM->cCpus, VERR_INVALID_CPU_ID); + + /* + * Allocate a new record and all associated lookup records. + */ + size_t cbRegSet = RT_UOFFSETOF_DYN(DBGFREGSET, szPrefix[cchPrefix + 4 + 1]); + cbRegSet = RT_ALIGN_Z(cbRegSet, 32); + size_t const offLookupRecArray = cbRegSet; + cbRegSet += cLookupRecs * sizeof(DBGFREGLOOKUP); + + PDBGFREGSET pRegSet = (PDBGFREGSET)MMR3HeapAllocZU(pUVM, MM_TAG_DBGF_REG, cbRegSet); + if (!pRegSet) + return VERR_NO_MEMORY; + + /* + * Initialize the new record. + */ + pRegSet->Core.pszString = pRegSet->szPrefix; + pRegSet->enmType = enmType; + pRegSet->uUserArg.pv = pvUserArg; + pRegSet->paDescs = paRegisters; + pRegSet->cDescs = iDesc; + pRegSet->cLookupRecs = cLookupRecs; + pRegSet->paLookupRecs = (PDBGFREGLOOKUP)((uintptr_t)pRegSet + offLookupRecArray); + if (fNeedUnderscore) + RTStrPrintf(pRegSet->szPrefix, cchPrefix + 4 + 1, "%s_%u", pszPrefix, iInstance); + else + RTStrPrintf(pRegSet->szPrefix, cchPrefix + 4 + 1, "%s%u", pszPrefix, iInstance); + + + /* + * Initialize the lookup records. See DBGFREGSET::paLookupRecs. + */ + char szName[DBGF_REG_MAX_NAME * 3 + 16]; + strcpy(szName, pRegSet->szPrefix); + char *pszReg = strchr(szName, '\0'); + *pszReg++ = '.'; + + /* Array parallel to the descriptors. */ + int rc = VINF_SUCCESS; + PDBGFREGLOOKUP pLookupRec = &pRegSet->paLookupRecs[0]; + for (iDesc = 0; paRegisters[iDesc].pszName != NULL && RT_SUCCESS(rc); iDesc++) + { + strcpy(pszReg, paRegisters[iDesc].pszName); + pLookupRec->Core.pszString = MMR3HeapStrDupU(pUVM, MM_TAG_DBGF_REG, szName); + if (!pLookupRec->Core.pszString) + rc = VERR_NO_STR_MEMORY; + pLookupRec->pSet = pRegSet; + pLookupRec->pDesc = &paRegisters[iDesc]; + pLookupRec->pAlias = NULL; + pLookupRec->pSubField = NULL; + pLookupRec++; + } + + /* Aliases and sub-fields. */ + for (iDesc = 0; paRegisters[iDesc].pszName != NULL && RT_SUCCESS(rc); iDesc++) + { + PCDBGFREGALIAS pCurAlias = NULL; /* first time we add sub-fields for the real name. */ + PCDBGFREGALIAS pNextAlias = paRegisters[iDesc].paAliases; + const char *pszRegName = paRegisters[iDesc].pszName; + while (RT_SUCCESS(rc)) + { + /* Add sub-field records. */ + PCDBGFREGSUBFIELD paSubFields = paRegisters[iDesc].paSubFields; + if (paSubFields) + { + size_t cchReg = strlen(pszRegName); + memcpy(pszReg, pszRegName, cchReg); + char *pszSub = &pszReg[cchReg]; + *pszSub++ = '.'; + for (uint32_t iSubField = 0; paSubFields[iSubField].pszName && RT_SUCCESS(rc); iSubField++) + { + strcpy(pszSub, paSubFields[iSubField].pszName); + pLookupRec->Core.pszString = MMR3HeapStrDupU(pUVM, MM_TAG_DBGF_REG, szName); + if (!pLookupRec->Core.pszString) + rc = VERR_NO_STR_MEMORY; + pLookupRec->pSet = pRegSet; + pLookupRec->pDesc = &paRegisters[iDesc]; + pLookupRec->pAlias = pCurAlias; + pLookupRec->pSubField = &paSubFields[iSubField]; + pLookupRec++; + } + } + + /* Advance to the next alias. */ + pCurAlias = pNextAlias++; + if (!pCurAlias) + break; + pszRegName = pCurAlias->pszName; + if (!pszRegName) + break; + + /* The alias record. */ + strcpy(pszReg, pszRegName); + pLookupRec->Core.pszString = MMR3HeapStrDupU(pUVM, MM_TAG_DBGF_REG, szName); + if (!pLookupRec->Core.pszString) + rc = VERR_NO_STR_MEMORY; + pLookupRec->pSet = pRegSet; + pLookupRec->pDesc = &paRegisters[iDesc]; + pLookupRec->pAlias = pCurAlias; + pLookupRec->pSubField = NULL; + pLookupRec++; + } + } + Assert(pLookupRec == &pRegSet->paLookupRecs[pRegSet->cLookupRecs]); + + if (RT_SUCCESS(rc)) + { + /* + * Insert the record into the register set string space and optionally into + * the CPU register set cache. + */ + DBGF_REG_DB_LOCK_WRITE(pUVM); + + bool fInserted = RTStrSpaceInsert(&pUVM->dbgf.s.RegSetSpace, &pRegSet->Core); + if (fInserted) + { + pUVM->dbgf.s.cRegs += pRegSet->cDescs; + if (enmType == DBGFREGSETTYPE_CPU) + { + if (pRegSet->cDescs > DBGFREG_ALL_COUNT) + pUVM->dbgf.s.cRegs -= pRegSet->cDescs - DBGFREG_ALL_COUNT; + if (!strcmp(pszPrefix, "cpu")) + pUVM->aCpus[iInstance].dbgf.s.pGuestRegSet = pRegSet; + else + pUVM->aCpus[iInstance].dbgf.s.pHyperRegSet = pRegSet; + } + + PDBGFREGLOOKUP paLookupRecs = pRegSet->paLookupRecs; + uint32_t iLookupRec = pRegSet->cLookupRecs; + while (iLookupRec-- > 0) + { + bool fInserted2 = RTStrSpaceInsert(&pUVM->dbgf.s.RegSpace, &paLookupRecs[iLookupRec].Core); + AssertMsg(fInserted2, ("'%s'", paLookupRecs[iLookupRec].Core.pszString)); NOREF(fInserted2); + } + + DBGF_REG_DB_UNLOCK_WRITE(pUVM); + return VINF_SUCCESS; + } + + DBGF_REG_DB_UNLOCK_WRITE(pUVM); + rc = VERR_DUPLICATE; + } + + /* + * Bail out. + */ + for (uint32_t i = 0; i < pRegSet->cLookupRecs; i++) + MMR3HeapFree((char *)pRegSet->paLookupRecs[i].Core.pszString); + MMR3HeapFree(pRegSet); + + return rc; +} + + +/** + * Registers a set of registers for a CPU. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param paRegisters The register descriptors. + * @param fGuestRegs Set if it's the guest registers, clear if + * hypervisor registers. + */ +VMMR3_INT_DECL(int) DBGFR3RegRegisterCpu(PVM pVM, PVMCPU pVCpu, PCDBGFREGDESC paRegisters, bool fGuestRegs) +{ + PUVM pUVM = pVM->pUVM; + if (!pUVM->dbgf.s.fRegDbInitialized) + { + int rc = dbgfR3RegInit(pUVM); + if (RT_FAILURE(rc)) + return rc; + } + + return dbgfR3RegRegisterCommon(pUVM, paRegisters, DBGFREGSETTYPE_CPU, pVCpu, + fGuestRegs ? "cpu" : "hypercpu", pVCpu->idCpu); +} + + +/** + * Registers a set of registers for a device. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param paRegisters The register descriptors. + * @param pDevIns The device instance. This will be the callback user + * argument. + * @param pszPrefix The device name. + * @param iInstance The device instance. + */ +VMMR3_INT_DECL(int) DBGFR3RegRegisterDevice(PVM pVM, PCDBGFREGDESC paRegisters, PPDMDEVINS pDevIns, + const char *pszPrefix, uint32_t iInstance) +{ + AssertPtrReturn(paRegisters, VERR_INVALID_POINTER); + AssertPtrReturn(pDevIns, VERR_INVALID_POINTER); + AssertPtrReturn(pszPrefix, VERR_INVALID_POINTER); + + return dbgfR3RegRegisterCommon(pVM->pUVM, paRegisters, DBGFREGSETTYPE_DEVICE, pDevIns, pszPrefix, iInstance); +} + + +/** + * Clears the register value variable. + * + * @param pValue The variable to clear. + */ +DECLINLINE(void) dbgfR3RegValClear(PDBGFREGVAL pValue) +{ + pValue->au64[0] = 0; + pValue->au64[1] = 0; + pValue->au64[2] = 0; + pValue->au64[3] = 0; + pValue->au64[4] = 0; + pValue->au64[5] = 0; + pValue->au64[6] = 0; + pValue->au64[7] = 0; +} + + +/** + * Sets a 80-bit floating point variable to a 64-bit unsigned interger value. + * + * @param pValue The value. + * @param u64 The integer value. + */ +DECLINLINE(void) dbgfR3RegValR80SetU64(PDBGFREGVAL pValue, uint64_t u64) +{ + /** @todo fixme */ + pValue->r80.s.fSign = 0; + pValue->r80.s.uExponent = 16383; + pValue->r80.s.u64Mantissa = u64; +} + + +/** + * Sets a 80-bit floating point variable to a 64-bit unsigned interger value. + * + * @param pValue The value. + * @param u128 The integer value. + */ +DECLINLINE(void) dbgfR3RegValR80SetU128(PDBGFREGVAL pValue, RTUINT128U u128) +{ + /** @todo fixme */ + pValue->r80.s.fSign = 0; + pValue->r80.s.uExponent = 16383; + pValue->r80.s.u64Mantissa = u128.s.Lo; +} + + +/** + * Get a 80-bit floating point variable as a 64-bit unsigned integer. + * + * @returns 64-bit unsigned integer. + * @param pValue The value. + */ +DECLINLINE(uint64_t) dbgfR3RegValR80GetU64(PCDBGFREGVAL pValue) +{ + /** @todo stupid, stupid MSC. */ + return pValue->r80.s.u64Mantissa; +} + + +/** + * Get a 80-bit floating point variable as a 128-bit unsigned integer. + * + * @returns 128-bit unsigned integer. + * @param pValue The value. + */ +DECLINLINE(RTUINT128U) dbgfR3RegValR80GetU128(PCDBGFREGVAL pValue) +{ + /** @todo stupid, stupid MSC. */ + RTUINT128U uRet; +#if 0 + uRet.s.Lo = (uint64_t)InVal.lrd; + uRet.s.Hi = (uint64_t)InVal.lrd / _4G / _4G; +#else + uRet.s.Lo = pValue->r80.s.u64Mantissa; + uRet.s.Hi = 0; +#endif + return uRet; +} + + +/** + * Performs a cast between register value types. + * + * @retval VINF_SUCCESS + * @retval VINF_DBGF_ZERO_EXTENDED_REGISTER + * @retval VINF_DBGF_TRUNCATED_REGISTER + * @retval VERR_DBGF_UNSUPPORTED_CAST + * + * @param pValue The value to cast (input + output). + * @param enmFromType The input value. + * @param enmToType The desired output value. + */ +static int dbgfR3RegValCast(PDBGFREGVAL pValue, DBGFREGVALTYPE enmFromType, DBGFREGVALTYPE enmToType) +{ + DBGFREGVAL const InVal = *pValue; + dbgfR3RegValClear(pValue); + + /* Note! No default cases here as gcc warnings about missing enum values + are desired. */ + switch (enmFromType) + { + case DBGFREGVALTYPE_U8: + switch (enmToType) + { + case DBGFREGVALTYPE_U8: pValue->u8 = InVal.u8; return VINF_SUCCESS; + case DBGFREGVALTYPE_U16: pValue->u16 = InVal.u8; return VINF_DBGF_ZERO_EXTENDED_REGISTER; + case DBGFREGVALTYPE_U32: pValue->u32 = InVal.u8; return VINF_DBGF_ZERO_EXTENDED_REGISTER; + case DBGFREGVALTYPE_U64: pValue->u64 = InVal.u8; return VINF_DBGF_ZERO_EXTENDED_REGISTER; + case DBGFREGVALTYPE_U128: pValue->u128.s.Lo = InVal.u8; return VINF_DBGF_ZERO_EXTENDED_REGISTER; + case DBGFREGVALTYPE_U256: pValue->u256.Words.w0 = InVal.u8; return VINF_DBGF_ZERO_EXTENDED_REGISTER; + case DBGFREGVALTYPE_U512: pValue->u512.Words.w0 = InVal.u8; return VINF_DBGF_ZERO_EXTENDED_REGISTER; + case DBGFREGVALTYPE_R80: dbgfR3RegValR80SetU64(pValue, InVal.u8); return VINF_DBGF_ZERO_EXTENDED_REGISTER; + case DBGFREGVALTYPE_DTR: return VERR_DBGF_UNSUPPORTED_CAST; + + case DBGFREGVALTYPE_32BIT_HACK: + case DBGFREGVALTYPE_END: + case DBGFREGVALTYPE_INVALID: + break; + } + break; + + case DBGFREGVALTYPE_U16: + switch (enmToType) + { + case DBGFREGVALTYPE_U8: pValue->u8 = InVal.u16; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U16: pValue->u16 = InVal.u16; return VINF_SUCCESS; + case DBGFREGVALTYPE_U32: pValue->u32 = InVal.u16; return VINF_DBGF_ZERO_EXTENDED_REGISTER; + case DBGFREGVALTYPE_U64: pValue->u64 = InVal.u16; return VINF_DBGF_ZERO_EXTENDED_REGISTER; + case DBGFREGVALTYPE_U128: pValue->u128.s.Lo = InVal.u16; return VINF_DBGF_ZERO_EXTENDED_REGISTER; + case DBGFREGVALTYPE_U256: pValue->u256.Words.w0 = InVal.u16; return VINF_DBGF_ZERO_EXTENDED_REGISTER; + case DBGFREGVALTYPE_U512: pValue->u512.Words.w0 = InVal.u16; return VINF_DBGF_ZERO_EXTENDED_REGISTER; + case DBGFREGVALTYPE_R80: dbgfR3RegValR80SetU64(pValue, InVal.u16); return VINF_DBGF_ZERO_EXTENDED_REGISTER; + case DBGFREGVALTYPE_DTR: return VERR_DBGF_UNSUPPORTED_CAST; + + case DBGFREGVALTYPE_32BIT_HACK: + case DBGFREGVALTYPE_END: + case DBGFREGVALTYPE_INVALID: + break; + } + break; + + case DBGFREGVALTYPE_U32: + switch (enmToType) + { + case DBGFREGVALTYPE_U8: pValue->u8 = InVal.u32; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U16: pValue->u16 = InVal.u32; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U32: pValue->u32 = InVal.u32; return VINF_SUCCESS; + case DBGFREGVALTYPE_U64: pValue->u64 = InVal.u32; return VINF_DBGF_ZERO_EXTENDED_REGISTER; + case DBGFREGVALTYPE_U128: pValue->u128.s.Lo = InVal.u32; return VINF_DBGF_ZERO_EXTENDED_REGISTER; + case DBGFREGVALTYPE_U256: pValue->u256.DWords.dw0 = InVal.u32; return VINF_DBGF_ZERO_EXTENDED_REGISTER; + case DBGFREGVALTYPE_U512: pValue->u512.DWords.dw0 = InVal.u32; return VINF_DBGF_ZERO_EXTENDED_REGISTER; + case DBGFREGVALTYPE_R80: dbgfR3RegValR80SetU64(pValue, InVal.u32); return VINF_DBGF_ZERO_EXTENDED_REGISTER; + case DBGFREGVALTYPE_DTR: return VERR_DBGF_UNSUPPORTED_CAST; + + case DBGFREGVALTYPE_32BIT_HACK: + case DBGFREGVALTYPE_END: + case DBGFREGVALTYPE_INVALID: + break; + } + break; + + case DBGFREGVALTYPE_U64: + switch (enmToType) + { + case DBGFREGVALTYPE_U8: pValue->u8 = InVal.u64; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U16: pValue->u16 = InVal.u64; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U32: pValue->u32 = InVal.u64; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U64: pValue->u64 = InVal.u64; return VINF_SUCCESS; + case DBGFREGVALTYPE_U128: pValue->u128.s.Lo = InVal.u64; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U256: pValue->u256.QWords.qw0 = InVal.u64; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U512: pValue->u512.QWords.qw0 = InVal.u64; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_R80: dbgfR3RegValR80SetU64(pValue, InVal.u64); return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_DTR: return VERR_DBGF_UNSUPPORTED_CAST; + + case DBGFREGVALTYPE_32BIT_HACK: + case DBGFREGVALTYPE_END: + case DBGFREGVALTYPE_INVALID: + break; + } + break; + + case DBGFREGVALTYPE_U128: + switch (enmToType) + { + case DBGFREGVALTYPE_U8: pValue->u8 = InVal.u128.s.Lo; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U16: pValue->u16 = InVal.u128.s.Lo; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U32: pValue->u32 = InVal.u128.s.Lo; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U64: pValue->u64 = InVal.u128.s.Lo; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U128: pValue->u128 = InVal.u128; return VINF_SUCCESS; + case DBGFREGVALTYPE_U256: pValue->u256.DQWords.dqw0 = InVal.u128; return VINF_SUCCESS; + case DBGFREGVALTYPE_U512: pValue->u512.DQWords.dqw0 = InVal.u128; return VINF_SUCCESS; + case DBGFREGVALTYPE_R80: dbgfR3RegValR80SetU128(pValue, InVal.u128); return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_DTR: return VERR_DBGF_UNSUPPORTED_CAST; + + case DBGFREGVALTYPE_32BIT_HACK: + case DBGFREGVALTYPE_END: + case DBGFREGVALTYPE_INVALID: + break; + } + break; + + case DBGFREGVALTYPE_U256: + switch (enmToType) + { + case DBGFREGVALTYPE_U8: pValue->u8 = InVal.u256.Words.w0; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U16: pValue->u16 = InVal.u256.Words.w0; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U32: pValue->u32 = InVal.u256.DWords.dw0; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U64: pValue->u64 = InVal.u256.QWords.qw0; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U128: pValue->u128 = InVal.u256.DQWords.dqw0; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U256: pValue->u256 = InVal.u256; return VINF_SUCCESS; + case DBGFREGVALTYPE_U512: pValue->u512.OWords.ow0 = InVal.u256; return VINF_SUCCESS; + case DBGFREGVALTYPE_R80: dbgfR3RegValR80SetU128(pValue, InVal.u256.DQWords.dqw0); return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_DTR: return VERR_DBGF_UNSUPPORTED_CAST; + + case DBGFREGVALTYPE_32BIT_HACK: + case DBGFREGVALTYPE_END: + case DBGFREGVALTYPE_INVALID: + break; + } + break; + + case DBGFREGVALTYPE_U512: + switch (enmToType) + { + case DBGFREGVALTYPE_U8: pValue->u8 = InVal.u512.Words.w0; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U16: pValue->u16 = InVal.u512.Words.w0; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U32: pValue->u32 = InVal.u512.DWords.dw0; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U64: pValue->u64 = InVal.u512.QWords.qw0; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U128: pValue->u128 = InVal.u512.DQWords.dqw0; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U256: pValue->u256 = InVal.u512.OWords.ow0; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U512: pValue->u512 = InVal.u512; return VINF_SUCCESS; + case DBGFREGVALTYPE_R80: dbgfR3RegValR80SetU128(pValue, InVal.u512.DQWords.dqw0); return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_DTR: return VERR_DBGF_UNSUPPORTED_CAST; + + case DBGFREGVALTYPE_32BIT_HACK: + case DBGFREGVALTYPE_END: + case DBGFREGVALTYPE_INVALID: + break; + } + break; + + case DBGFREGVALTYPE_R80: + switch (enmToType) + { + case DBGFREGVALTYPE_U8: pValue->u8 = (uint8_t )dbgfR3RegValR80GetU64(&InVal); return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U16: pValue->u16 = (uint16_t)dbgfR3RegValR80GetU64(&InVal); return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U32: pValue->u32 = (uint32_t)dbgfR3RegValR80GetU64(&InVal); return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U64: pValue->u64 = (uint64_t)dbgfR3RegValR80GetU64(&InVal); return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U128: pValue->u128 = dbgfR3RegValR80GetU128(&InVal); return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U256: pValue->u256.DQWords.dqw0 = dbgfR3RegValR80GetU128(&InVal); return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U512: pValue->u512.DQWords.dqw0 = dbgfR3RegValR80GetU128(&InVal); return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_R80: pValue->r80 = InVal.r80; return VINF_SUCCESS; + case DBGFREGVALTYPE_DTR: return VERR_DBGF_UNSUPPORTED_CAST; + + case DBGFREGVALTYPE_32BIT_HACK: + case DBGFREGVALTYPE_END: + case DBGFREGVALTYPE_INVALID: + break; + } + break; + + case DBGFREGVALTYPE_DTR: + switch (enmToType) + { + case DBGFREGVALTYPE_U8: pValue->u8 = InVal.dtr.u64Base; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U16: pValue->u16 = InVal.dtr.u64Base; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U32: pValue->u32 = InVal.dtr.u64Base; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U64: pValue->u64 = InVal.dtr.u64Base; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U128: pValue->u128.s.Lo = InVal.dtr.u64Base; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U256: pValue->u256.QWords.qw0 = InVal.dtr.u64Base; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_U512: pValue->u512.QWords.qw0 = InVal.dtr.u64Base; return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_R80: dbgfR3RegValR80SetU64(pValue, InVal.dtr.u64Base); return VINF_DBGF_TRUNCATED_REGISTER; + case DBGFREGVALTYPE_DTR: pValue->dtr = InVal.dtr; return VINF_SUCCESS; + + case DBGFREGVALTYPE_32BIT_HACK: + case DBGFREGVALTYPE_END: + case DBGFREGVALTYPE_INVALID: + break; + } + break; + + case DBGFREGVALTYPE_INVALID: + case DBGFREGVALTYPE_END: + case DBGFREGVALTYPE_32BIT_HACK: + break; + } + + AssertMsgFailed(("%d / %d\n", enmFromType, enmToType)); + return VERR_DBGF_UNSUPPORTED_CAST; +} + + +/** + * Worker for the CPU register queries. + * + * @returns VBox status code. + * @retval VINF_SUCCESS + * @retval VERR_INVALID_VM_HANDLE + * @retval VERR_INVALID_CPU_ID + * @retval VERR_DBGF_REGISTER_NOT_FOUND + * @retval VERR_DBGF_UNSUPPORTED_CAST + * @retval VINF_DBGF_TRUNCATED_REGISTER + * @retval VINF_DBGF_ZERO_EXTENDED_REGISTER + * + * @param pUVM The user mode VM handle. + * @param idCpu The virtual CPU ID. + * @param enmReg The register to query. + * @param enmType The desired return type. + * @param fGuestRegs Query guest CPU registers if set (true), + * hypervisor CPU registers if clear (false). + * @param pValue Where to return the register value. + */ +static DECLCALLBACK(int) dbgfR3RegCpuQueryWorkerOnCpu(PUVM pUVM, VMCPUID idCpu, DBGFREG enmReg, DBGFREGVALTYPE enmType, + bool fGuestRegs, PDBGFREGVAL pValue) +{ + int rc = VINF_SUCCESS; + DBGF_REG_DB_LOCK_READ(pUVM); + + /* + * Look up the register set of the specified CPU. + */ + PDBGFREGSET pSet = fGuestRegs + ? pUVM->aCpus[idCpu].dbgf.s.pGuestRegSet + : pUVM->aCpus[idCpu].dbgf.s.pHyperRegSet; + if (RT_LIKELY(pSet)) + { + /* + * Look up the register and get the register value. + */ + if (RT_LIKELY(pSet->cDescs > (size_t)enmReg)) + { + PCDBGFREGDESC pDesc = &pSet->paDescs[enmReg]; + + pValue->au64[0] = pValue->au64[1] = 0; + rc = pDesc->pfnGet(pSet->uUserArg.pv, pDesc, pValue); + if (RT_SUCCESS(rc)) + { + /* + * Do the cast if the desired return type doesn't match what + * the getter returned. + */ + if (pDesc->enmType == enmType) + rc = VINF_SUCCESS; + else + rc = dbgfR3RegValCast(pValue, pDesc->enmType, enmType); + } + } + else + rc = VERR_DBGF_REGISTER_NOT_FOUND; + } + else + rc = VERR_INVALID_CPU_ID; + + DBGF_REG_DB_UNLOCK_READ(pUVM); + return rc; +} + + +/** + * Internal worker for the CPU register query functions. + * + * @returns VBox status code. + * @retval VINF_SUCCESS + * @retval VERR_INVALID_VM_HANDLE + * @retval VERR_INVALID_CPU_ID + * @retval VERR_DBGF_REGISTER_NOT_FOUND + * @retval VERR_DBGF_UNSUPPORTED_CAST + * @retval VINF_DBGF_TRUNCATED_REGISTER + * @retval VINF_DBGF_ZERO_EXTENDED_REGISTER + * + * @param pUVM The user mode VM handle. + * @param idCpu The virtual CPU ID. Can be OR'ed with + * DBGFREG_HYPER_VMCPUID. + * @param enmReg The register to query. + * @param enmType The desired return type. + * @param pValue Where to return the register value. + */ +static int dbgfR3RegCpuQueryWorker(PUVM pUVM, VMCPUID idCpu, DBGFREG enmReg, DBGFREGVALTYPE enmType, PDBGFREGVAL pValue) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, VERR_INVALID_VM_HANDLE); + AssertMsgReturn(enmReg >= DBGFREG_AL && enmReg <= DBGFREG_END, ("%d\n", enmReg), VERR_INVALID_PARAMETER); + + bool const fGuestRegs = !(idCpu & DBGFREG_HYPER_VMCPUID); + idCpu &= ~DBGFREG_HYPER_VMCPUID; + AssertReturn(idCpu < pUVM->cCpus, VERR_INVALID_CPU_ID); + + return VMR3ReqPriorityCallWaitU(pUVM, idCpu, (PFNRT)dbgfR3RegCpuQueryWorkerOnCpu, 6, + pUVM, idCpu, enmReg, enmType, fGuestRegs, pValue); +} + + +/** + * Queries a 8-bit CPU register value. + * + * @retval VINF_SUCCESS + * @retval VERR_INVALID_VM_HANDLE + * @retval VERR_INVALID_CPU_ID + * @retval VERR_DBGF_REGISTER_NOT_FOUND + * @retval VERR_DBGF_UNSUPPORTED_CAST + * @retval VINF_DBGF_TRUNCATED_REGISTER + * + * @param pUVM The user mode VM handle. + * @param idCpu The target CPU ID. Can be OR'ed with + * DBGFREG_HYPER_VMCPUID. + * @param enmReg The register that's being queried. + * @param pu8 Where to store the register value. + */ +VMMR3DECL(int) DBGFR3RegCpuQueryU8(PUVM pUVM, VMCPUID idCpu, DBGFREG enmReg, uint8_t *pu8) +{ + DBGFREGVAL Value; + int rc = dbgfR3RegCpuQueryWorker(pUVM, idCpu, enmReg, DBGFREGVALTYPE_U8, &Value); + if (RT_SUCCESS(rc)) + *pu8 = Value.u8; + else + *pu8 = 0; + return rc; +} + + +/** + * Queries a 16-bit CPU register value. + * + * @retval VINF_SUCCESS + * @retval VERR_INVALID_VM_HANDLE + * @retval VERR_INVALID_CPU_ID + * @retval VERR_DBGF_REGISTER_NOT_FOUND + * @retval VERR_DBGF_UNSUPPORTED_CAST + * @retval VINF_DBGF_TRUNCATED_REGISTER + * @retval VINF_DBGF_ZERO_EXTENDED_REGISTER + * + * @param pUVM The user mode VM handle. + * @param idCpu The target CPU ID. Can be OR'ed with + * DBGFREG_HYPER_VMCPUID. + * @param enmReg The register that's being queried. + * @param pu16 Where to store the register value. + */ +VMMR3DECL(int) DBGFR3RegCpuQueryU16(PUVM pUVM, VMCPUID idCpu, DBGFREG enmReg, uint16_t *pu16) +{ + DBGFREGVAL Value; + int rc = dbgfR3RegCpuQueryWorker(pUVM, idCpu, enmReg, DBGFREGVALTYPE_U16, &Value); + if (RT_SUCCESS(rc)) + *pu16 = Value.u16; + else + *pu16 = 0; + return rc; +} + + +/** + * Queries a 32-bit CPU register value. + * + * @retval VINF_SUCCESS + * @retval VERR_INVALID_VM_HANDLE + * @retval VERR_INVALID_CPU_ID + * @retval VERR_DBGF_REGISTER_NOT_FOUND + * @retval VERR_DBGF_UNSUPPORTED_CAST + * @retval VINF_DBGF_TRUNCATED_REGISTER + * @retval VINF_DBGF_ZERO_EXTENDED_REGISTER + * + * @param pUVM The user mode VM handle. + * @param idCpu The target CPU ID. Can be OR'ed with + * DBGFREG_HYPER_VMCPUID. + * @param enmReg The register that's being queried. + * @param pu32 Where to store the register value. + */ +VMMR3DECL(int) DBGFR3RegCpuQueryU32(PUVM pUVM, VMCPUID idCpu, DBGFREG enmReg, uint32_t *pu32) +{ + DBGFREGVAL Value; + int rc = dbgfR3RegCpuQueryWorker(pUVM, idCpu, enmReg, DBGFREGVALTYPE_U32, &Value); + if (RT_SUCCESS(rc)) + *pu32 = Value.u32; + else + *pu32 = 0; + return rc; +} + + +/** + * Queries a 64-bit CPU register value. + * + * @retval VINF_SUCCESS + * @retval VERR_INVALID_VM_HANDLE + * @retval VERR_INVALID_CPU_ID + * @retval VERR_DBGF_REGISTER_NOT_FOUND + * @retval VERR_DBGF_UNSUPPORTED_CAST + * @retval VINF_DBGF_TRUNCATED_REGISTER + * @retval VINF_DBGF_ZERO_EXTENDED_REGISTER + * + * @param pUVM The user mode VM handle. + * @param idCpu The target CPU ID. Can be OR'ed with + * DBGFREG_HYPER_VMCPUID. + * @param enmReg The register that's being queried. + * @param pu64 Where to store the register value. + */ +VMMR3DECL(int) DBGFR3RegCpuQueryU64(PUVM pUVM, VMCPUID idCpu, DBGFREG enmReg, uint64_t *pu64) +{ + DBGFREGVAL Value; + int rc = dbgfR3RegCpuQueryWorker(pUVM, idCpu, enmReg, DBGFREGVALTYPE_U64, &Value); + if (RT_SUCCESS(rc)) + *pu64 = Value.u64; + else + *pu64 = 0; + return rc; +} + + +/** + * Queries a descriptor table register value. + * + * @retval VINF_SUCCESS + * @retval VERR_INVALID_VM_HANDLE + * @retval VERR_INVALID_CPU_ID + * @retval VERR_DBGF_REGISTER_NOT_FOUND + * @retval VERR_DBGF_UNSUPPORTED_CAST + * @retval VINF_DBGF_TRUNCATED_REGISTER + * @retval VINF_DBGF_ZERO_EXTENDED_REGISTER + * + * @param pUVM The user mode VM handle. + * @param idCpu The target CPU ID. Can be OR'ed with + * DBGFREG_HYPER_VMCPUID. + * @param enmReg The register that's being queried. + * @param pu64Base Where to store the register base value. + * @param pu16Limit Where to store the register limit value. + */ +VMMR3DECL(int) DBGFR3RegCpuQueryXdtr(PUVM pUVM, VMCPUID idCpu, DBGFREG enmReg, uint64_t *pu64Base, uint16_t *pu16Limit) +{ + DBGFREGVAL Value; + int rc = dbgfR3RegCpuQueryWorker(pUVM, idCpu, enmReg, DBGFREGVALTYPE_DTR, &Value); + if (RT_SUCCESS(rc)) + { + *pu64Base = Value.dtr.u64Base; + *pu16Limit = Value.dtr.u32Limit; + } + else + { + *pu64Base = 0; + *pu16Limit = 0; + } + return rc; +} + + +#if 0 /* rewrite / remove */ + +/** + * Wrapper around CPUMQueryGuestMsr for dbgfR3RegCpuQueryBatchWorker. + * + * @retval VINF_SUCCESS + * @retval VERR_DBGF_REGISTER_NOT_FOUND + * + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * @param pReg The where to store the register value and + * size. + * @param idMsr The MSR to get. + */ +static void dbgfR3RegGetMsrBatch(PVMCPU pVCpu, PDBGFREGENTRY pReg, uint32_t idMsr) +{ + pReg->enmType = DBGFREGVALTYPE_U64; + int rc = CPUMQueryGuestMsr(pVCpu, idMsr, &pReg->Val.u64); + if (RT_FAILURE(rc)) + { + AssertMsg(rc == VERR_CPUM_RAISE_GP_0, ("%Rrc\n", rc)); + pReg->Val.u64 = 0; + } +} + + +static DECLCALLBACK(int) dbgfR3RegCpuQueryBatchWorker(PUVM pUVM, VMCPUID idCpu, PDBGFREGENTRY paRegs, size_t cRegs) +{ +#if 0 + PVMCPU pVCpu = &pUVM->pVM->aCpus[idCpu]; + PCCPUMCTX pCtx = CPUMQueryGuestCtxPtr(pVCpu); + + PDBGFREGENTRY pReg = paRegs - 1; + while (cRegs-- > 0) + { + pReg++; + pReg->Val.au64[0] = 0; + pReg->Val.au64[1] = 0; + + DBGFREG const enmReg = pReg->enmReg; + AssertMsgReturn(enmReg >= 0 && enmReg <= DBGFREG_END, ("%d (%#x)\n", enmReg, enmReg), VERR_DBGF_REGISTER_NOT_FOUND); + if (enmReg != DBGFREG_END) + { + PCDBGFREGDESC pDesc = &g_aDbgfRegDescs[enmReg]; + if (!pDesc->pfnGet) + { + PCRTUINT128U pu = (PCRTUINT128U)((uintptr_t)pCtx + pDesc->offCtx); + pReg->enmType = pDesc->enmType; + switch (pDesc->enmType) + { + case DBGFREGVALTYPE_U8: pReg->Val.u8 = pu->au8[0]; break; + case DBGFREGVALTYPE_U16: pReg->Val.u16 = pu->au16[0]; break; + case DBGFREGVALTYPE_U32: pReg->Val.u32 = pu->au32[0]; break; + case DBGFREGVALTYPE_U64: pReg->Val.u64 = pu->au64[0]; break; + case DBGFREGVALTYPE_U128: + pReg->Val.au64[0] = pu->au64[0]; + pReg->Val.au64[1] = pu->au64[1]; + break; + case DBGFREGVALTYPE_R80: + pReg->Val.au64[0] = pu->au64[0]; + pReg->Val.au16[5] = pu->au16[5]; + break; + default: + AssertMsgFailedReturn(("%s %d\n", pDesc->pszName, pDesc->enmType), VERR_IPE_NOT_REACHED_DEFAULT_CASE); + } + } + else + { + int rc = pDesc->pfnGet(pVCpu, pDesc, pCtx, &pReg->Val.u); + if (RT_FAILURE(rc)) + return rc; + } + } + } + return VINF_SUCCESS; +#else + return VERR_NOT_IMPLEMENTED; +#endif +} + + +/** + * Query a batch of registers. + * + * @retval VINF_SUCCESS + * @retval VERR_INVALID_VM_HANDLE + * @retval VERR_INVALID_CPU_ID + * @retval VERR_DBGF_REGISTER_NOT_FOUND + * + * @param pUVM The user mode VM handle. + * @param idCpu The target CPU ID. Can be OR'ed with + * DBGFREG_HYPER_VMCPUID. + * @param paRegs Pointer to an array of @a cRegs elements. On + * input the enmReg members indicates which + * registers to query. On successful return the + * other members are set. DBGFREG_END can be used + * as a filler. + * @param cRegs The number of entries in @a paRegs. + */ +VMMR3DECL(int) DBGFR3RegCpuQueryBatch(PUVM pUVM, VMCPUID idCpu, PDBGFREGENTRY paRegs, size_t cRegs) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, NULL); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, NULL); + AssertReturn(idCpu < pUVM->cCpus, VERR_INVALID_CPU_ID); + if (!cRegs) + return VINF_SUCCESS; + AssertReturn(cRegs < _1M, VERR_OUT_OF_RANGE); + AssertPtrReturn(paRegs, VERR_INVALID_POINTER); + size_t iReg = cRegs; + while (iReg-- > 0) + { + DBGFREG enmReg = paRegs[iReg].enmReg; + AssertMsgReturn(enmReg < DBGFREG_END && enmReg >= DBGFREG_AL, ("%d (%#x)", enmReg, enmReg), VERR_DBGF_REGISTER_NOT_FOUND); + } + + return VMR3ReqCallWaitU(pUVM, idCpu, (PFNRT)dbgfR3RegCpuQueryBatchWorker, 4, pUVM, idCpu, paRegs, cRegs); +} + + +/** + * Query all registers for a Virtual CPU. + * + * @retval VINF_SUCCESS + * @retval VERR_INVALID_VM_HANDLE + * @retval VERR_INVALID_CPU_ID + * + * @param pUVM The user mode VM handle. + * @param idCpu The target CPU ID. Can be OR'ed with + * DBGFREG_HYPER_VMCPUID. + * @param paRegs Pointer to an array of @a cRegs elements. + * These will be filled with the CPU register + * values. Overflowing entries will be set to + * DBGFREG_END. The returned registers can be + * accessed by using the DBGFREG values as index. + * @param cRegs The number of entries in @a paRegs. The + * recommended value is DBGFREG_ALL_COUNT. + */ +VMMR3DECL(int) DBGFR3RegCpuQueryAll(PUVM pUVM, VMCPUID idCpu, PDBGFREGENTRY paRegs, size_t cRegs) +{ + /* + * Validate input. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, NULL); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, NULL); + AssertReturn(idCpu < pUVM->cCpus, VERR_INVALID_CPU_ID); + if (!cRegs) + return VINF_SUCCESS; + AssertReturn(cRegs < _1M, VERR_OUT_OF_RANGE); + AssertPtrReturn(paRegs, VERR_INVALID_POINTER); + + /* + * Convert it into a batch query (lazy bird). + */ + unsigned iReg = 0; + while (iReg < cRegs && iReg < DBGFREG_ALL_COUNT) + { + paRegs[iReg].enmReg = (DBGFREG)iReg; + iReg++; + } + while (iReg < cRegs) + paRegs[iReg++].enmReg = DBGFREG_END; + + return VMR3ReqCallWaitU(pUVM, idCpu, (PFNRT)dbgfR3RegCpuQueryBatchWorker, 4, pUVM, idCpu, paRegs, cRegs); +} + +#endif /* rewrite or remove? */ + +/** + * Gets the name of a register. + * + * @returns Pointer to read-only register name (lower case). NULL if the + * parameters are invalid. + * + * @param pUVM The user mode VM handle. + * @param enmReg The register identifier. + * @param enmType The register type. This is for sort out + * aliases. Pass DBGFREGVALTYPE_INVALID to get + * the standard name. + */ +VMMR3DECL(const char *) DBGFR3RegCpuName(PUVM pUVM, DBGFREG enmReg, DBGFREGVALTYPE enmType) +{ + AssertReturn(enmReg >= DBGFREG_AL && enmReg < DBGFREG_END, NULL); + AssertReturn(enmType >= DBGFREGVALTYPE_INVALID && enmType < DBGFREGVALTYPE_END, NULL); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, NULL); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, NULL); + + PCDBGFREGSET pSet = pUVM->aCpus[0].dbgf.s.pGuestRegSet; + if (RT_UNLIKELY(!pSet)) + return NULL; + + PCDBGFREGDESC pDesc = &pSet->paDescs[enmReg]; + PCDBGFREGALIAS pAlias = pDesc->paAliases; + if ( pAlias + && pDesc->enmType != enmType + && enmType != DBGFREGVALTYPE_INVALID) + { + while (pAlias->pszName) + { + if (pAlias->enmType == enmType) + return pAlias->pszName; + pAlias++; + } + } + + return pDesc->pszName; +} + + +/** + * Fold the string to lower case and copy it into the destination buffer. + * + * @returns Number of folder characters, -1 on overflow. + * @param pszSrc The source string. + * @param cchSrc How much to fold and copy. + * @param pszDst The output buffer. + * @param cbDst The size of the output buffer. + */ +static ssize_t dbgfR3RegCopyToLower(const char *pszSrc, size_t cchSrc, char *pszDst, size_t cbDst) +{ + ssize_t cchFolded = 0; + char ch; + while (cchSrc-- > 0 && (ch = *pszSrc++)) + { + if (RT_UNLIKELY(cbDst <= 1)) + return -1; + cbDst--; + + char chLower = RT_C_TO_LOWER(ch); + cchFolded += chLower != ch; + *pszDst++ = chLower; + } + if (RT_UNLIKELY(!cbDst)) + return -1; + *pszDst = '\0'; + return cchFolded; +} + + +/** + * Resolves the register name. + * + * @returns Lookup record. + * @param pUVM The user mode VM handle. + * @param idDefCpu The default CPU ID set. + * @param pszReg The register name. + * @param fGuestRegs Default to guest CPU registers if set, the + * hypervisor CPU registers if clear. + */ +static PCDBGFREGLOOKUP dbgfR3RegResolve(PUVM pUVM, VMCPUID idDefCpu, const char *pszReg, bool fGuestRegs) +{ + DBGF_REG_DB_LOCK_READ(pUVM); + + /* Try looking up the name without any case folding or cpu prefixing. */ + PRTSTRSPACE pRegSpace = &pUVM->dbgf.s.RegSpace; + PCDBGFREGLOOKUP pLookupRec = (PCDBGFREGLOOKUP)RTStrSpaceGet(pRegSpace, pszReg); + if (!pLookupRec) + { + char szName[DBGF_REG_MAX_NAME * 4 + 16]; + + /* Lower case it and try again. */ + ssize_t cchFolded = dbgfR3RegCopyToLower(pszReg, RTSTR_MAX, szName, sizeof(szName) - DBGF_REG_MAX_NAME); + if (cchFolded > 0) + pLookupRec = (PCDBGFREGLOOKUP)RTStrSpaceGet(pRegSpace, szName); + if ( !pLookupRec + && cchFolded >= 0 + && idDefCpu != VMCPUID_ANY) + { + /* Prefix it with the specified CPU set. */ + size_t cchCpuSet = RTStrPrintf(szName, sizeof(szName), fGuestRegs ? "cpu%u." : "hypercpu%u.", idDefCpu); + dbgfR3RegCopyToLower(pszReg, RTSTR_MAX, &szName[cchCpuSet], sizeof(szName) - cchCpuSet); + pLookupRec = (PCDBGFREGLOOKUP)RTStrSpaceGet(pRegSpace, szName); + } + } + + DBGF_REG_DB_UNLOCK_READ(pUVM); + return pLookupRec; +} + + +/** + * Validates the register name. + * + * @returns VBox status code. + * @retval VINF_SUCCESS if the register was found. + * @retval VERR_DBGF_REGISTER_NOT_FOUND if not found. + * + * @param pUVM The user mode VM handle. + * @param idDefCpu The default CPU. + * @param pszReg The registe name. + */ +VMMR3DECL(int) DBGFR3RegNmValidate(PUVM pUVM, VMCPUID idDefCpu, const char *pszReg) +{ + /* + * Validate input. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, VERR_INVALID_VM_HANDLE); + AssertReturn((idDefCpu & ~DBGFREG_HYPER_VMCPUID) < pUVM->cCpus || idDefCpu == VMCPUID_ANY, VERR_INVALID_CPU_ID); + AssertPtrReturn(pszReg, VERR_INVALID_POINTER); + + /* + * Resolve the register. + */ + bool fGuestRegs = true; + if ((idDefCpu & DBGFREG_HYPER_VMCPUID) && idDefCpu != VMCPUID_ANY) + { + fGuestRegs = false; + idDefCpu &= ~DBGFREG_HYPER_VMCPUID; + } + + PCDBGFREGLOOKUP pLookupRec = dbgfR3RegResolve(pUVM, idDefCpu, pszReg, fGuestRegs); + if (!pLookupRec) + return VERR_DBGF_REGISTER_NOT_FOUND; + return VINF_SUCCESS; +} + + +/** + * On CPU worker for the register queries, used by dbgfR3RegNmQueryWorker and + * dbgfR3RegPrintfCbFormatNormal. + * + * @returns VBox status code. + * + * @param pUVM The user mode VM handle. + * @param pLookupRec The register lookup record. + * @param enmType The desired return type. + * @param pValue Where to return the register value. + * @param penmType Where to store the register value type. + * Optional. + */ +static DECLCALLBACK(int) dbgfR3RegNmQueryWorkerOnCpu(PUVM pUVM, PCDBGFREGLOOKUP pLookupRec, DBGFREGVALTYPE enmType, + PDBGFREGVAL pValue, PDBGFREGVALTYPE penmType) +{ + PCDBGFREGDESC pDesc = pLookupRec->pDesc; + PCDBGFREGSET pSet = pLookupRec->pSet; + PCDBGFREGSUBFIELD pSubField = pLookupRec->pSubField; + DBGFREGVALTYPE enmValueType = pDesc->enmType; + int rc; + + NOREF(pUVM); + + /* + * Get the register or sub-field value. + */ + dbgfR3RegValClear(pValue); + if (!pSubField) + { + rc = pDesc->pfnGet(pSet->uUserArg.pv, pDesc, pValue); + if ( pLookupRec->pAlias + && pLookupRec->pAlias->enmType != enmValueType + && RT_SUCCESS(rc)) + { + rc = dbgfR3RegValCast(pValue, enmValueType, pLookupRec->pAlias->enmType); + enmValueType = pLookupRec->pAlias->enmType; + } + } + else + { + if (pSubField->pfnGet) + { + rc = pSubField->pfnGet(pSet->uUserArg.pv, pSubField, &pValue->u128); + enmValueType = DBGFREGVALTYPE_U128; + } + else + { + rc = pDesc->pfnGet(pSet->uUserArg.pv, pDesc, pValue); + if ( pLookupRec->pAlias + && pLookupRec->pAlias->enmType != enmValueType + && RT_SUCCESS(rc)) + { + rc = dbgfR3RegValCast(pValue, enmValueType, pLookupRec->pAlias->enmType); + enmValueType = pLookupRec->pAlias->enmType; + } + if (RT_SUCCESS(rc)) + { + rc = dbgfR3RegValCast(pValue, enmValueType, DBGFREGVALTYPE_U128); + if (RT_SUCCESS(rc)) + { + RTUInt128AssignShiftLeft(&pValue->u128, -pSubField->iFirstBit); + RTUInt128AssignAndNFirstBits(&pValue->u128, pSubField->cBits); + if (pSubField->cShift) + RTUInt128AssignShiftLeft(&pValue->u128, pSubField->cShift); + } + } + } + if (RT_SUCCESS(rc)) + { + unsigned const cBits = pSubField->cBits + pSubField->cShift; + if (cBits <= 8) + enmValueType = DBGFREGVALTYPE_U8; + else if (cBits <= 16) + enmValueType = DBGFREGVALTYPE_U16; + else if (cBits <= 32) + enmValueType = DBGFREGVALTYPE_U32; + else if (cBits <= 64) + enmValueType = DBGFREGVALTYPE_U64; + else + enmValueType = DBGFREGVALTYPE_U128; + rc = dbgfR3RegValCast(pValue, DBGFREGVALTYPE_U128, enmValueType); + } + } + if (RT_SUCCESS(rc)) + { + /* + * Do the cast if the desired return type doesn't match what + * the getter returned. + */ + if ( enmValueType == enmType + || enmType == DBGFREGVALTYPE_END) + { + rc = VINF_SUCCESS; + if (penmType) + *penmType = enmValueType; + } + else + { + rc = dbgfR3RegValCast(pValue, enmValueType, enmType); + if (penmType) + *penmType = RT_SUCCESS(rc) ? enmType : enmValueType; + } + } + + return rc; +} + + +/** + * Worker for the register queries. + * + * @returns VBox status code. + * @retval VINF_SUCCESS + * @retval VERR_INVALID_VM_HANDLE + * @retval VERR_INVALID_CPU_ID + * @retval VERR_DBGF_REGISTER_NOT_FOUND + * @retval VERR_DBGF_UNSUPPORTED_CAST + * @retval VINF_DBGF_TRUNCATED_REGISTER + * @retval VINF_DBGF_ZERO_EXTENDED_REGISTER + * + * @param pUVM The user mode VM handle. + * @param idDefCpu The virtual CPU ID for the default CPU register + * set. Can be OR'ed with DBGFREG_HYPER_VMCPUID. + * @param pszReg The register to query. + * @param enmType The desired return type. + * @param pValue Where to return the register value. + * @param penmType Where to store the register value type. + * Optional. + */ +static int dbgfR3RegNmQueryWorker(PUVM pUVM, VMCPUID idDefCpu, const char *pszReg, DBGFREGVALTYPE enmType, + PDBGFREGVAL pValue, PDBGFREGVALTYPE penmType) +{ + /* + * Validate input. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, VERR_INVALID_VM_HANDLE); + AssertReturn((idDefCpu & ~DBGFREG_HYPER_VMCPUID) < pUVM->cCpus || idDefCpu == VMCPUID_ANY, VERR_INVALID_CPU_ID); + AssertPtrReturn(pszReg, VERR_INVALID_POINTER); + + Assert(enmType > DBGFREGVALTYPE_INVALID && enmType <= DBGFREGVALTYPE_END); + AssertPtr(pValue); + + /* + * Resolve the register and call the getter on the relevant CPU. + */ + bool fGuestRegs = true; + if ((idDefCpu & DBGFREG_HYPER_VMCPUID) && idDefCpu != VMCPUID_ANY) + { + fGuestRegs = false; + idDefCpu &= ~DBGFREG_HYPER_VMCPUID; + } + PCDBGFREGLOOKUP pLookupRec = dbgfR3RegResolve(pUVM, idDefCpu, pszReg, fGuestRegs); + if (pLookupRec) + { + if (pLookupRec->pSet->enmType == DBGFREGSETTYPE_CPU) + idDefCpu = pLookupRec->pSet->uUserArg.pVCpu->idCpu; + else if (idDefCpu != VMCPUID_ANY) + idDefCpu &= ~DBGFREG_HYPER_VMCPUID; + return VMR3ReqPriorityCallWaitU(pUVM, idDefCpu, (PFNRT)dbgfR3RegNmQueryWorkerOnCpu, 5, + pUVM, pLookupRec, enmType, pValue, penmType); + } + return VERR_DBGF_REGISTER_NOT_FOUND; +} + + +/** + * Queries a descriptor table register value. + * + * @retval VINF_SUCCESS + * @retval VERR_INVALID_VM_HANDLE + * @retval VERR_INVALID_CPU_ID + * @retval VERR_DBGF_REGISTER_NOT_FOUND + * + * @param pUVM The user mode VM handle. + * @param idDefCpu The default target CPU ID, VMCPUID_ANY if not + * applicable. Can be OR'ed with + * DBGFREG_HYPER_VMCPUID. + * @param pszReg The register that's being queried. Except for + * CPU registers, this must be on the form + * "set.reg[.sub]". + * @param pValue Where to store the register value. + * @param penmType Where to store the register value type. + */ +VMMR3DECL(int) DBGFR3RegNmQuery(PUVM pUVM, VMCPUID idDefCpu, const char *pszReg, PDBGFREGVAL pValue, PDBGFREGVALTYPE penmType) +{ + return dbgfR3RegNmQueryWorker(pUVM, idDefCpu, pszReg, DBGFREGVALTYPE_END, pValue, penmType); +} + + +/** + * Queries a 8-bit register value. + * + * @retval VINF_SUCCESS + * @retval VERR_INVALID_VM_HANDLE + * @retval VERR_INVALID_CPU_ID + * @retval VERR_DBGF_REGISTER_NOT_FOUND + * @retval VERR_DBGF_UNSUPPORTED_CAST + * @retval VINF_DBGF_TRUNCATED_REGISTER + * + * @param pUVM The user mode VM handle. + * @param idDefCpu The default target CPU ID, VMCPUID_ANY if not + * applicable. Can be OR'ed with + * DBGFREG_HYPER_VMCPUID. + * @param pszReg The register that's being queried. Except for + * CPU registers, this must be on the form + * "set.reg[.sub]". + * @param pu8 Where to store the register value. + */ +VMMR3DECL(int) DBGFR3RegNmQueryU8(PUVM pUVM, VMCPUID idDefCpu, const char *pszReg, uint8_t *pu8) +{ + DBGFREGVAL Value; + int rc = dbgfR3RegNmQueryWorker(pUVM, idDefCpu, pszReg, DBGFREGVALTYPE_U8, &Value, NULL); + if (RT_SUCCESS(rc)) + *pu8 = Value.u8; + else + *pu8 = 0; + return rc; +} + + +/** + * Queries a 16-bit register value. + * + * @retval VINF_SUCCESS + * @retval VERR_INVALID_VM_HANDLE + * @retval VERR_INVALID_CPU_ID + * @retval VERR_DBGF_REGISTER_NOT_FOUND + * @retval VERR_DBGF_UNSUPPORTED_CAST + * @retval VINF_DBGF_TRUNCATED_REGISTER + * @retval VINF_DBGF_ZERO_EXTENDED_REGISTER + * + * @param pUVM The user mode VM handle. + * @param idDefCpu The default target CPU ID, VMCPUID_ANY if not + * applicable. Can be OR'ed with + * DBGFREG_HYPER_VMCPUID. + * @param pszReg The register that's being queried. Except for + * CPU registers, this must be on the form + * "set.reg[.sub]". + * @param pu16 Where to store the register value. + */ +VMMR3DECL(int) DBGFR3RegNmQueryU16(PUVM pUVM, VMCPUID idDefCpu, const char *pszReg, uint16_t *pu16) +{ + DBGFREGVAL Value; + int rc = dbgfR3RegNmQueryWorker(pUVM, idDefCpu, pszReg, DBGFREGVALTYPE_U16, &Value, NULL); + if (RT_SUCCESS(rc)) + *pu16 = Value.u16; + else + *pu16 = 0; + return rc; +} + + +/** + * Queries a 32-bit register value. + * + * @retval VINF_SUCCESS + * @retval VERR_INVALID_VM_HANDLE + * @retval VERR_INVALID_CPU_ID + * @retval VERR_DBGF_REGISTER_NOT_FOUND + * @retval VERR_DBGF_UNSUPPORTED_CAST + * @retval VINF_DBGF_TRUNCATED_REGISTER + * @retval VINF_DBGF_ZERO_EXTENDED_REGISTER + * + * @param pUVM The user mode VM handle. + * @param idDefCpu The default target CPU ID, VMCPUID_ANY if not + * applicable. Can be OR'ed with + * DBGFREG_HYPER_VMCPUID. + * @param pszReg The register that's being queried. Except for + * CPU registers, this must be on the form + * "set.reg[.sub]". + * @param pu32 Where to store the register value. + */ +VMMR3DECL(int) DBGFR3RegNmQueryU32(PUVM pUVM, VMCPUID idDefCpu, const char *pszReg, uint32_t *pu32) +{ + DBGFREGVAL Value; + int rc = dbgfR3RegNmQueryWorker(pUVM, idDefCpu, pszReg, DBGFREGVALTYPE_U32, &Value, NULL); + if (RT_SUCCESS(rc)) + *pu32 = Value.u32; + else + *pu32 = 0; + return rc; +} + + +/** + * Queries a 64-bit register value. + * + * @retval VINF_SUCCESS + * @retval VERR_INVALID_VM_HANDLE + * @retval VERR_INVALID_CPU_ID + * @retval VERR_DBGF_REGISTER_NOT_FOUND + * @retval VERR_DBGF_UNSUPPORTED_CAST + * @retval VINF_DBGF_TRUNCATED_REGISTER + * @retval VINF_DBGF_ZERO_EXTENDED_REGISTER + * + * @param pUVM The user mode VM handle. + * @param idDefCpu The default target CPU ID, VMCPUID_ANY if not + * applicable. Can be OR'ed with + * DBGFREG_HYPER_VMCPUID. + * @param pszReg The register that's being queried. Except for + * CPU registers, this must be on the form + * "set.reg[.sub]". + * @param pu64 Where to store the register value. + */ +VMMR3DECL(int) DBGFR3RegNmQueryU64(PUVM pUVM, VMCPUID idDefCpu, const char *pszReg, uint64_t *pu64) +{ + DBGFREGVAL Value; + int rc = dbgfR3RegNmQueryWorker(pUVM, idDefCpu, pszReg, DBGFREGVALTYPE_U64, &Value, NULL); + if (RT_SUCCESS(rc)) + *pu64 = Value.u64; + else + *pu64 = 0; + return rc; +} + + +/** + * Queries a 128-bit register value. + * + * @retval VINF_SUCCESS + * @retval VERR_INVALID_VM_HANDLE + * @retval VERR_INVALID_CPU_ID + * @retval VERR_DBGF_REGISTER_NOT_FOUND + * @retval VERR_DBGF_UNSUPPORTED_CAST + * @retval VINF_DBGF_TRUNCATED_REGISTER + * @retval VINF_DBGF_ZERO_EXTENDED_REGISTER + * + * @param pUVM The user mode VM handle. + * @param idDefCpu The default target CPU ID, VMCPUID_ANY if not + * applicable. Can be OR'ed with + * DBGFREG_HYPER_VMCPUID. + * @param pszReg The register that's being queried. Except for + * CPU registers, this must be on the form + * "set.reg[.sub]". + * @param pu128 Where to store the register value. + */ +VMMR3DECL(int) DBGFR3RegNmQueryU128(PUVM pUVM, VMCPUID idDefCpu, const char *pszReg, PRTUINT128U pu128) +{ + DBGFREGVAL Value; + int rc = dbgfR3RegNmQueryWorker(pUVM, idDefCpu, pszReg, DBGFREGVALTYPE_U128, &Value, NULL); + if (RT_SUCCESS(rc)) + *pu128 = Value.u128; + else + pu128->s.Hi = pu128->s.Lo = 0; + return rc; +} + + +#if 0 +/** + * Queries a long double register value. + * + * @retval VINF_SUCCESS + * @retval VERR_INVALID_VM_HANDLE + * @retval VERR_INVALID_CPU_ID + * @retval VERR_DBGF_REGISTER_NOT_FOUND + * @retval VERR_DBGF_UNSUPPORTED_CAST + * @retval VINF_DBGF_TRUNCATED_REGISTER + * @retval VINF_DBGF_ZERO_EXTENDED_REGISTER + * + * @param pUVM The user mode VM handle. + * @param idDefCpu The default target CPU ID, VMCPUID_ANY if not + * applicable. Can be OR'ed with + * DBGFREG_HYPER_VMCPUID. + * @param pszReg The register that's being queried. Except for + * CPU registers, this must be on the form + * "set.reg[.sub]". + * @param plrd Where to store the register value. + */ +VMMR3DECL(int) DBGFR3RegNmQueryLrd(PUVM pUVM, VMCPUID idDefCpu, const char *pszReg, long double *plrd) +{ + DBGFREGVAL Value; + int rc = dbgfR3RegNmQueryWorker(pUVM, idDefCpu, pszReg, DBGFREGVALTYPE_R80, &Value, NULL); + if (RT_SUCCESS(rc)) + *plrd = Value.lrd; + else + *plrd = 0; + return rc; +} +#endif + + +/** + * Queries a descriptor table register value. + * + * @retval VINF_SUCCESS + * @retval VERR_INVALID_VM_HANDLE + * @retval VERR_INVALID_CPU_ID + * @retval VERR_DBGF_REGISTER_NOT_FOUND + * @retval VERR_DBGF_UNSUPPORTED_CAST + * @retval VINF_DBGF_TRUNCATED_REGISTER + * @retval VINF_DBGF_ZERO_EXTENDED_REGISTER + * + * @param pUVM The user mode VM handle. + * @param idDefCpu The default target CPU ID, VMCPUID_ANY if not + * applicable. Can be OR'ed with + * DBGFREG_HYPER_VMCPUID. + * @param pszReg The register that's being queried. Except for + * CPU registers, this must be on the form + * "set.reg[.sub]". + * @param pu64Base Where to store the register base value. + * @param pu16Limit Where to store the register limit value. + */ +VMMR3DECL(int) DBGFR3RegNmQueryXdtr(PUVM pUVM, VMCPUID idDefCpu, const char *pszReg, uint64_t *pu64Base, uint16_t *pu16Limit) +{ + DBGFREGVAL Value; + int rc = dbgfR3RegNmQueryWorker(pUVM, idDefCpu, pszReg, DBGFREGVALTYPE_DTR, &Value, NULL); + if (RT_SUCCESS(rc)) + { + *pu64Base = Value.dtr.u64Base; + *pu16Limit = Value.dtr.u32Limit; + } + else + { + *pu64Base = 0; + *pu16Limit = 0; + } + return rc; +} + + +/// @todo VMMR3DECL(int) DBGFR3RegNmQueryBatch(PUVM pUVM,VMCPUID idDefCpu, DBGFREGENTRYNM paRegs, size_t cRegs); + + +/** + * Gets the number of registers returned by DBGFR3RegNmQueryAll. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pcRegs Where to return the register count. + */ +VMMR3DECL(int) DBGFR3RegNmQueryAllCount(PUVM pUVM, size_t *pcRegs) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + *pcRegs = pUVM->dbgf.s.cRegs; + return VINF_SUCCESS; +} + + +/** + * Pad register entries. + * + * @param paRegs The output array. + * @param cRegs The size of the output array. + * @param iReg The first register to pad. + * @param cRegsToPad The number of registers to pad. + */ +static void dbgfR3RegNmQueryAllPadEntries(PDBGFREGENTRYNM paRegs, size_t cRegs, size_t iReg, size_t cRegsToPad) +{ + if (iReg < cRegs) + { + size_t iEndReg = iReg + cRegsToPad; + if (iEndReg > cRegs) + iEndReg = cRegs; + while (iReg < iEndReg) + { + paRegs[iReg].pszName = NULL; + paRegs[iReg].enmType = DBGFREGVALTYPE_END; + dbgfR3RegValClear(&paRegs[iReg].Val); + iReg++; + } + } +} + + +/** + * Query all registers in a set. + * + * @param pSet The set. + * @param cRegsToQuery The number of registers to query. + * @param paRegs The output array. + * @param cRegs The size of the output array. + */ +static void dbgfR3RegNmQueryAllInSet(PCDBGFREGSET pSet, size_t cRegsToQuery, PDBGFREGENTRYNM paRegs, size_t cRegs) +{ + if (cRegsToQuery > pSet->cDescs) + cRegsToQuery = pSet->cDescs; + if (cRegsToQuery > cRegs) + cRegsToQuery = cRegs; + + for (size_t iReg = 0; iReg < cRegsToQuery; iReg++) + { + paRegs[iReg].enmType = pSet->paDescs[iReg].enmType; + paRegs[iReg].pszName = pSet->paLookupRecs[iReg].Core.pszString; + dbgfR3RegValClear(&paRegs[iReg].Val); + int rc2 = pSet->paDescs[iReg].pfnGet(pSet->uUserArg.pv, &pSet->paDescs[iReg], &paRegs[iReg].Val); + AssertRCSuccess(rc2); + if (RT_FAILURE(rc2)) + dbgfR3RegValClear(&paRegs[iReg].Val); + } +} + + +/** + * @callback_method_impl{FNRTSTRSPACECALLBACK, Worker used by + * dbgfR3RegNmQueryAllWorker} + */ +static DECLCALLBACK(int) dbgfR3RegNmQueryAllEnum(PRTSTRSPACECORE pStr, void *pvUser) +{ + PCDBGFREGSET pSet = (PCDBGFREGSET)pStr; + if (pSet->enmType != DBGFREGSETTYPE_CPU) + { + PDBGFR3REGNMQUERYALLARGS pArgs = (PDBGFR3REGNMQUERYALLARGS)pvUser; + if (pArgs->iReg < pArgs->cRegs) + dbgfR3RegNmQueryAllInSet(pSet, pSet->cDescs, &pArgs->paRegs[pArgs->iReg], pArgs->cRegs - pArgs->iReg); + pArgs->iReg += pSet->cDescs; + } + + return 0; +} + + +/** + * @callback_method_impl{FNVMMEMTRENDEZVOUS, Worker used by DBGFR3RegNmQueryAll} + */ +static DECLCALLBACK(VBOXSTRICTRC) dbgfR3RegNmQueryAllWorker(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + PDBGFR3REGNMQUERYALLARGS pArgs = (PDBGFR3REGNMQUERYALLARGS)pvUser; + PDBGFREGENTRYNM paRegs = pArgs->paRegs; + size_t const cRegs = pArgs->cRegs; + PUVM pUVM = pVM->pUVM; + PUVMCPU pUVCpu = pVCpu->pUVCpu; + + DBGF_REG_DB_LOCK_READ(pUVM); + + /* + * My guest CPU registers. + */ + size_t iCpuReg = pVCpu->idCpu * DBGFREG_ALL_COUNT; + if (pUVCpu->dbgf.s.pGuestRegSet) + { + if (iCpuReg < cRegs) + dbgfR3RegNmQueryAllInSet(pUVCpu->dbgf.s.pGuestRegSet, DBGFREG_ALL_COUNT, &paRegs[iCpuReg], cRegs - iCpuReg); + } + else + dbgfR3RegNmQueryAllPadEntries(paRegs, cRegs, iCpuReg, DBGFREG_ALL_COUNT); + + /* + * My hypervisor CPU registers. + */ + iCpuReg = pUVM->cCpus * DBGFREG_ALL_COUNT + pUVCpu->idCpu * DBGFREG_ALL_COUNT; + if (pUVCpu->dbgf.s.pHyperRegSet) + { + if (iCpuReg < cRegs) + dbgfR3RegNmQueryAllInSet(pUVCpu->dbgf.s.pHyperRegSet, DBGFREG_ALL_COUNT, &paRegs[iCpuReg], cRegs - iCpuReg); + } + else + dbgfR3RegNmQueryAllPadEntries(paRegs, cRegs, iCpuReg, DBGFREG_ALL_COUNT); + + /* + * The primary CPU does all the other registers. + */ + if (pUVCpu->idCpu == 0) + { + pArgs->iReg = pUVM->cCpus * DBGFREG_ALL_COUNT * 2; + RTStrSpaceEnumerate(&pUVM->dbgf.s.RegSetSpace, dbgfR3RegNmQueryAllEnum, pArgs); + dbgfR3RegNmQueryAllPadEntries(paRegs, cRegs, pArgs->iReg, cRegs); + } + + DBGF_REG_DB_UNLOCK_READ(pUVM); + return VINF_SUCCESS; /* Ignore errors. */ +} + + +/** + * Queries all register. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param paRegs The output register value array. The register + * name string is read only and shall not be freed + * or modified. + * @param cRegs The number of entries in @a paRegs. The + * correct size can be obtained by calling + * DBGFR3RegNmQueryAllCount. + */ +VMMR3DECL(int) DBGFR3RegNmQueryAll(PUVM pUVM, PDBGFREGENTRYNM paRegs, size_t cRegs) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertPtrReturn(paRegs, VERR_INVALID_POINTER); + AssertReturn(cRegs > 0, VERR_OUT_OF_RANGE); + + DBGFR3REGNMQUERYALLARGS Args; + Args.paRegs = paRegs; + Args.cRegs = cRegs; + + return VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ALL_AT_ONCE, dbgfR3RegNmQueryAllWorker, &Args); +} + + +/** + * On CPU worker for the register modifications, used by DBGFR3RegNmSet. + * + * @returns VBox status code. + * + * @param pUVM The user mode VM handle. + * @param pLookupRec The register lookup record. Maybe be modified, + * so please pass a copy of the user's one. + * @param pValue The new register value. + * @param pMask Indicate which bits to modify. + */ +static DECLCALLBACK(int) dbgfR3RegNmSetWorkerOnCpu(PUVM pUVM, PDBGFREGLOOKUP pLookupRec, + PCDBGFREGVAL pValue, PCDBGFREGVAL pMask) +{ + RT_NOREF_PV(pUVM); + PCDBGFREGSUBFIELD pSubField = pLookupRec->pSubField; + if (pSubField && pSubField->pfnSet) + return pSubField->pfnSet(pLookupRec->pSet->uUserArg.pv, pSubField, pValue->u128, pMask->u128); + return pLookupRec->pDesc->pfnSet(pLookupRec->pSet->uUserArg.pv, pLookupRec->pDesc, pValue, pMask); +} + + +/** + * Worker for the register setting. + * + * @returns VBox status code. + * @retval VINF_SUCCESS + * @retval VERR_INVALID_VM_HANDLE + * @retval VERR_INVALID_CPU_ID + * @retval VERR_DBGF_REGISTER_NOT_FOUND + * @retval VERR_DBGF_UNSUPPORTED_CAST + * @retval VINF_DBGF_TRUNCATED_REGISTER + * @retval VINF_DBGF_ZERO_EXTENDED_REGISTER + * + * @param pUVM The user mode VM handle. + * @param idDefCpu The virtual CPU ID for the default CPU register + * set. Can be OR'ed with DBGFREG_HYPER_VMCPUID. + * @param pszReg The register to query. + * @param pValue The value to set + * @param enmType How to interpret the value in @a pValue. + */ +VMMR3DECL(int) DBGFR3RegNmSet(PUVM pUVM, VMCPUID idDefCpu, const char *pszReg, PCDBGFREGVAL pValue, DBGFREGVALTYPE enmType) +{ + /* + * Validate input. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, VERR_INVALID_VM_HANDLE); + AssertReturn((idDefCpu & ~DBGFREG_HYPER_VMCPUID) < pUVM->cCpus || idDefCpu == VMCPUID_ANY, VERR_INVALID_CPU_ID); + AssertPtrReturn(pszReg, VERR_INVALID_POINTER); + AssertReturn(enmType > DBGFREGVALTYPE_INVALID && enmType < DBGFREGVALTYPE_END, VERR_INVALID_PARAMETER); + AssertPtrReturn(pValue, VERR_INVALID_PARAMETER); + + /* + * Resolve the register and check that it is writable. + */ + bool fGuestRegs = true; + if ((idDefCpu & DBGFREG_HYPER_VMCPUID) && idDefCpu != VMCPUID_ANY) + { + fGuestRegs = false; + idDefCpu &= ~DBGFREG_HYPER_VMCPUID; + } + PCDBGFREGLOOKUP pLookupRec = dbgfR3RegResolve(pUVM, idDefCpu, pszReg, fGuestRegs); + if (pLookupRec) + { + PCDBGFREGDESC pDesc = pLookupRec->pDesc; + PCDBGFREGSET pSet = pLookupRec->pSet; + PCDBGFREGSUBFIELD pSubField = pLookupRec->pSubField; + + if ( !(pDesc->fFlags & DBGFREG_FLAGS_READ_ONLY) + && (pSubField + ? !(pSubField->fFlags & DBGFREGSUBFIELD_FLAGS_READ_ONLY) + && (pSubField->pfnSet != NULL || pDesc->pfnSet != NULL) + : pDesc->pfnSet != NULL) ) + { + /* + * Calculate the modification mask and cast the input value to the + * type of the target register. + */ + DBGFREGVAL Mask = DBGFREGVAL_INITIALIZE_ZERO; + DBGFREGVAL Value = DBGFREGVAL_INITIALIZE_ZERO; + switch (enmType) + { + case DBGFREGVALTYPE_U8: + Value.u8 = pValue->u8; + Mask.u8 = UINT8_MAX; + break; + case DBGFREGVALTYPE_U16: + Value.u16 = pValue->u16; + Mask.u16 = UINT16_MAX; + break; + case DBGFREGVALTYPE_U32: + Value.u32 = pValue->u32; + Mask.u32 = UINT32_MAX; + break; + case DBGFREGVALTYPE_U64: + Value.u64 = pValue->u64; + Mask.u64 = UINT64_MAX; + break; + case DBGFREGVALTYPE_U128: + Value.u128 = pValue->u128; + Mask.u128.s.Lo = UINT64_MAX; + Mask.u128.s.Hi = UINT64_MAX; + break; + case DBGFREGVALTYPE_U256: + Value.u256 = pValue->u256; + Mask.u256.QWords.qw0 = UINT64_MAX; + Mask.u256.QWords.qw1 = UINT64_MAX; + Mask.u256.QWords.qw2 = UINT64_MAX; + Mask.u256.QWords.qw3 = UINT64_MAX; + break; + case DBGFREGVALTYPE_U512: + Value.u512 = pValue->u512; + Mask.u512.QWords.qw0 = UINT64_MAX; + Mask.u512.QWords.qw1 = UINT64_MAX; + Mask.u512.QWords.qw2 = UINT64_MAX; + Mask.u512.QWords.qw3 = UINT64_MAX; + Mask.u512.QWords.qw4 = UINT64_MAX; + Mask.u512.QWords.qw5 = UINT64_MAX; + Mask.u512.QWords.qw6 = UINT64_MAX; + Mask.u512.QWords.qw7 = UINT64_MAX; + break; + case DBGFREGVALTYPE_R80: +#ifdef RT_COMPILER_WITH_80BIT_LONG_DOUBLE + Value.r80Ex.lrd = pValue->r80Ex.lrd; +#else + Value.r80Ex.au64[0] = pValue->r80Ex.au64[0]; + Value.r80Ex.au16[4] = pValue->r80Ex.au16[4]; +#endif + Value.r80Ex.au64[0] = UINT64_MAX; + Value.r80Ex.au16[4] = UINT16_MAX; + break; + case DBGFREGVALTYPE_DTR: + Value.dtr.u32Limit = pValue->dtr.u32Limit; + Value.dtr.u64Base = pValue->dtr.u64Base; + Mask.dtr.u32Limit = UINT32_MAX; + Mask.dtr.u64Base = UINT64_MAX; + break; + case DBGFREGVALTYPE_32BIT_HACK: + case DBGFREGVALTYPE_END: + case DBGFREGVALTYPE_INVALID: + AssertFailedReturn(VERR_INTERNAL_ERROR_3); + } + + int rc = VINF_SUCCESS; + DBGFREGVALTYPE enmRegType = pDesc->enmType; + if (pSubField) + { + unsigned const cBits = pSubField->cBits + pSubField->cShift; + if (cBits <= 8) + enmRegType = DBGFREGVALTYPE_U8; + else if (cBits <= 16) + enmRegType = DBGFREGVALTYPE_U16; + else if (cBits <= 32) + enmRegType = DBGFREGVALTYPE_U32; + else if (cBits <= 64) + enmRegType = DBGFREGVALTYPE_U64; + else if (cBits <= 128) + enmRegType = DBGFREGVALTYPE_U128; + else if (cBits <= 256) + enmRegType = DBGFREGVALTYPE_U256; + else + enmRegType = DBGFREGVALTYPE_U512; + } + else if (pLookupRec->pAlias) + { + /* Restrict the input to the size of the alias register. */ + DBGFREGVALTYPE enmAliasType = pLookupRec->pAlias->enmType; + if (enmAliasType != enmType) + { + rc = dbgfR3RegValCast(&Value, enmType, enmAliasType); + if (RT_FAILURE(rc)) + return rc; + dbgfR3RegValCast(&Mask, enmType, enmAliasType); + enmType = enmAliasType; + } + } + + if (enmType != enmRegType) + { + int rc2 = dbgfR3RegValCast(&Value, enmType, enmRegType); + if (RT_FAILURE(rc2)) + return rc2; + if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS) + rc2 = VINF_SUCCESS; + dbgfR3RegValCast(&Mask, enmType, enmRegType); + } + + /* + * Subfields needs some extra processing if there is no subfield + * setter, since we'll be feeding it to the normal register setter + * instead. The mask and value must be shifted and truncated to the + * subfield position. + */ + if (pSubField && !pSubField->pfnSet) + { + /* The shift factor is for displaying a subfield value + 2**cShift times larger than the stored value. We have + to undo this before adjusting value and mask. */ + if (pSubField->cShift) + { + /* Warn about trunction of the lower bits that get + shifted out below. */ + if (rc == VINF_SUCCESS) + { + DBGFREGVAL Value2 = Value; + RTUInt128AssignAndNFirstBits(&Value2.u128, -pSubField->cShift); + if (!RTUInt128BitAreAllClear(&Value2.u128)) + rc = VINF_DBGF_TRUNCATED_REGISTER; + } + RTUInt128AssignShiftRight(&Value.u128, pSubField->cShift); + } + + RTUInt128AssignAndNFirstBits(&Value.u128, pSubField->cBits); + if (rc == VINF_SUCCESS && RTUInt128IsNotEqual(&Value.u128, &Value.u128)) + rc = VINF_DBGF_TRUNCATED_REGISTER; + RTUInt128AssignAndNFirstBits(&Mask.u128, pSubField->cBits); + + RTUInt128AssignShiftLeft(&Value.u128, pSubField->iFirstBit); + RTUInt128AssignShiftLeft(&Mask.u128, pSubField->iFirstBit); + } + + /* + * Do the actual work on an EMT. + */ + if (pSet->enmType == DBGFREGSETTYPE_CPU) + idDefCpu = pSet->uUserArg.pVCpu->idCpu; + else if (idDefCpu != VMCPUID_ANY) + idDefCpu &= ~DBGFREG_HYPER_VMCPUID; + + int rc2 = VMR3ReqPriorityCallWaitU(pUVM, idDefCpu, (PFNRT)dbgfR3RegNmSetWorkerOnCpu, 4, + pUVM, pLookupRec, &Value, &Mask); + + if (rc == VINF_SUCCESS || RT_FAILURE(rc2)) + rc = rc2; + return rc; + } + return VERR_DBGF_READ_ONLY_REGISTER; + } + return VERR_DBGF_REGISTER_NOT_FOUND; +} + + +/** + * Internal worker for DBGFR3RegFormatValue, cbBuf is sufficent. + * + * @copydoc DBGFR3RegFormatValueEx + */ +DECLINLINE(ssize_t) dbgfR3RegFormatValueInt(char *pszBuf, size_t cbBuf, PCDBGFREGVAL pValue, DBGFREGVALTYPE enmType, + unsigned uBase, signed int cchWidth, signed int cchPrecision, uint32_t fFlags) +{ + switch (enmType) + { + case DBGFREGVALTYPE_U8: + return RTStrFormatU8(pszBuf, cbBuf, pValue->u8, uBase, cchWidth, cchPrecision, fFlags); + case DBGFREGVALTYPE_U16: + return RTStrFormatU16(pszBuf, cbBuf, pValue->u16, uBase, cchWidth, cchPrecision, fFlags); + case DBGFREGVALTYPE_U32: + return RTStrFormatU32(pszBuf, cbBuf, pValue->u32, uBase, cchWidth, cchPrecision, fFlags); + case DBGFREGVALTYPE_U64: + return RTStrFormatU64(pszBuf, cbBuf, pValue->u64, uBase, cchWidth, cchPrecision, fFlags); + case DBGFREGVALTYPE_U128: + return RTStrFormatU128(pszBuf, cbBuf, &pValue->u128, uBase, cchWidth, cchPrecision, fFlags); + case DBGFREGVALTYPE_U256: + return RTStrFormatU256(pszBuf, cbBuf, &pValue->u256, uBase, cchWidth, cchPrecision, fFlags); + case DBGFREGVALTYPE_U512: + return RTStrFormatU512(pszBuf, cbBuf, &pValue->u512, uBase, cchWidth, cchPrecision, fFlags); + case DBGFREGVALTYPE_R80: + return RTStrFormatR80u2(pszBuf, cbBuf, &pValue->r80Ex, cchWidth, cchPrecision, fFlags); + case DBGFREGVALTYPE_DTR: + { + ssize_t cch = RTStrFormatU64(pszBuf, cbBuf, pValue->dtr.u64Base, + 16, 2+16, 0, RTSTR_F_SPECIAL | RTSTR_F_ZEROPAD); + AssertReturn(cch > 0, VERR_DBGF_REG_IPE_1); + pszBuf[cch++] = ':'; + cch += RTStrFormatU64(&pszBuf[cch], cbBuf - cch, pValue->dtr.u32Limit, + 16, 4, 0, RTSTR_F_ZEROPAD | RTSTR_F_32BIT); + return cch; + } + + case DBGFREGVALTYPE_32BIT_HACK: + case DBGFREGVALTYPE_END: + case DBGFREGVALTYPE_INVALID: + break; + /* no default, want gcc warnings */ + } + + RTStrPrintf(pszBuf, cbBuf, "!enmType=%d!", enmType); + return VERR_DBGF_REG_IPE_2; +} + + +/** + * Format a register value, extended version. + * + * @returns The number of bytes returned, VERR_BUFFER_OVERFLOW on failure. + * @param pszBuf The output buffer. + * @param cbBuf The size of the output buffer. + * @param pValue The value to format. + * @param enmType The value type. + * @param uBase The base (ignored if not applicable). + * @param cchWidth The width if RTSTR_F_WIDTH is set, otherwise + * ignored. + * @param cchPrecision The width if RTSTR_F_PRECISION is set, otherwise + * ignored. + * @param fFlags String formatting flags, RTSTR_F_XXX. + */ +VMMR3DECL(ssize_t) DBGFR3RegFormatValueEx(char *pszBuf, size_t cbBuf, PCDBGFREGVAL pValue, DBGFREGVALTYPE enmType, + unsigned uBase, signed int cchWidth, signed int cchPrecision, uint32_t fFlags) +{ + /* + * Format to temporary buffer using worker shared with dbgfR3RegPrintfCbFormatNormal. + */ + char szTmp[160]; + ssize_t cchOutput = dbgfR3RegFormatValueInt(szTmp, sizeof(szTmp), pValue, enmType, uBase, cchWidth, cchPrecision, fFlags); + if (cchOutput > 0) + { + if ((size_t)cchOutput < cbBuf) + memcpy(pszBuf, szTmp, cchOutput + 1); + else + { + if (cbBuf) + { + memcpy(pszBuf, szTmp, cbBuf - 1); + pszBuf[cbBuf - 1] = '\0'; + } + cchOutput = VERR_BUFFER_OVERFLOW; + } + } + return cchOutput; +} + + +/** + * Format a register value as hexadecimal and with default width according to + * the type. + * + * @returns The number of bytes returned, VERR_BUFFER_OVERFLOW on failure. + * @param pszBuf The output buffer. + * @param cbBuf The size of the output buffer. + * @param pValue The value to format. + * @param enmType The value type. + * @param fSpecial Same as RTSTR_F_SPECIAL. + */ +VMMR3DECL(ssize_t) DBGFR3RegFormatValue(char *pszBuf, size_t cbBuf, PCDBGFREGVAL pValue, DBGFREGVALTYPE enmType, bool fSpecial) +{ + int cchWidth = 0; + switch (enmType) + { + case DBGFREGVALTYPE_U8: cchWidth = 2 + fSpecial*2; break; + case DBGFREGVALTYPE_U16: cchWidth = 4 + fSpecial*2; break; + case DBGFREGVALTYPE_U32: cchWidth = 8 + fSpecial*2; break; + case DBGFREGVALTYPE_U64: cchWidth = 16 + fSpecial*2; break; + case DBGFREGVALTYPE_U128: cchWidth = 32 + fSpecial*2; break; + case DBGFREGVALTYPE_U256: cchWidth = 64 + fSpecial*2; break; + case DBGFREGVALTYPE_U512: cchWidth = 128 + fSpecial*2; break; + case DBGFREGVALTYPE_R80: cchWidth = 0; break; + case DBGFREGVALTYPE_DTR: cchWidth = 16+1+4 + fSpecial*2; break; + + case DBGFREGVALTYPE_32BIT_HACK: + case DBGFREGVALTYPE_END: + case DBGFREGVALTYPE_INVALID: + break; + /* no default, want gcc warnings */ + } + uint32_t fFlags = RTSTR_F_ZEROPAD; + if (fSpecial) + fFlags |= RTSTR_F_SPECIAL; + if (cchWidth != 0) + fFlags |= RTSTR_F_WIDTH; + return DBGFR3RegFormatValueEx(pszBuf, cbBuf, pValue, enmType, 16, cchWidth, 0, fFlags); +} + + +/** + * Format a register using special hacks as well as sub-field specifications + * (the latter isn't implemented yet). + */ +static size_t +dbgfR3RegPrintfCbFormatField(PDBGFR3REGPRINTFARGS pThis, PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, + PCDBGFREGLOOKUP pLookupRec, int cchWidth, int cchPrecision, unsigned fFlags) +{ + char szTmp[160]; + + NOREF(cchWidth); NOREF(cchPrecision); NOREF(fFlags); + + /* + * Retrieve the register value. + */ + DBGFREGVAL Value; + DBGFREGVALTYPE enmType; + int rc = dbgfR3RegNmQueryWorkerOnCpu(pThis->pUVM, pLookupRec, DBGFREGVALTYPE_END, &Value, &enmType); + if (RT_FAILURE(rc)) + { + PCRTSTATUSMSG pErr = RTErrGet(rc); + if (pErr) + return pfnOutput(pvArgOutput, pErr->pszDefine, strlen(pErr->pszDefine)); + return pfnOutput(pvArgOutput, szTmp, RTStrPrintf(szTmp, sizeof(szTmp), "rc=%d", rc)); + } + + char *psz = szTmp; + + /* + * Special case: Format eflags. + */ + if ( pLookupRec->pSet->enmType == DBGFREGSETTYPE_CPU + && pLookupRec->pDesc->enmReg == DBGFREG_RFLAGS + && pLookupRec->pSubField == NULL) + { + rc = dbgfR3RegValCast(&Value, enmType, DBGFREGVALTYPE_U32); + AssertRC(rc); + uint32_t const efl = Value.u32; + + /* the iopl */ + psz += RTStrPrintf(psz, sizeof(szTmp) / 2, "iopl=%u ", X86_EFL_GET_IOPL(efl)); + + /* add flags */ + static const struct + { + const char *pszSet; + const char *pszClear; + uint32_t fFlag; + } aFlags[] = + { + { "vip",NULL, X86_EFL_VIP }, + { "vif",NULL, X86_EFL_VIF }, + { "ac", NULL, X86_EFL_AC }, + { "vm", NULL, X86_EFL_VM }, + { "rf", NULL, X86_EFL_RF }, + { "nt", NULL, X86_EFL_NT }, + { "ov", "nv", X86_EFL_OF }, + { "dn", "up", X86_EFL_DF }, + { "ei", "di", X86_EFL_IF }, + { "tf", NULL, X86_EFL_TF }, + { "ng", "pl", X86_EFL_SF }, + { "zr", "nz", X86_EFL_ZF }, + { "ac", "na", X86_EFL_AF }, + { "po", "pe", X86_EFL_PF }, + { "cy", "nc", X86_EFL_CF }, + }; + for (unsigned i = 0; i < RT_ELEMENTS(aFlags); i++) + { + const char *pszAdd = aFlags[i].fFlag & efl ? aFlags[i].pszSet : aFlags[i].pszClear; + if (pszAdd) + { + *psz++ = *pszAdd++; + *psz++ = *pszAdd++; + if (*pszAdd) + *psz++ = *pszAdd++; + *psz++ = ' '; + } + } + + /* drop trailing space */ + psz--; + } + else + { + /* + * General case. + */ + AssertMsgFailed(("Not implemented: %s\n", pLookupRec->Core.pszString)); + return pfnOutput(pvArgOutput, pLookupRec->Core.pszString, pLookupRec->Core.cchString); + } + + /* Output the string. */ + return pfnOutput(pvArgOutput, szTmp, psz - &szTmp[0]); +} + + +/** + * Formats a register having parsed up to the register name. + */ +static size_t +dbgfR3RegPrintfCbFormatNormal(PDBGFR3REGPRINTFARGS pThis, PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, + PCDBGFREGLOOKUP pLookupRec, unsigned uBase, int cchWidth, int cchPrecision, unsigned fFlags) +{ + char szTmp[160]; + + /* + * Get the register value. + */ + DBGFREGVAL Value; + DBGFREGVALTYPE enmType; + int rc = dbgfR3RegNmQueryWorkerOnCpu(pThis->pUVM, pLookupRec, DBGFREGVALTYPE_END, &Value, &enmType); + if (RT_FAILURE(rc)) + { + PCRTSTATUSMSG pErr = RTErrGet(rc); + if (pErr) + return pfnOutput(pvArgOutput, pErr->pszDefine, strlen(pErr->pszDefine)); + return pfnOutput(pvArgOutput, szTmp, RTStrPrintf(szTmp, sizeof(szTmp), "rc=%d", rc)); + } + + /* + * Format the value. + */ + ssize_t cchOutput = dbgfR3RegFormatValueInt(szTmp, sizeof(szTmp), &Value, enmType, uBase, cchWidth, cchPrecision, fFlags); + if (RT_UNLIKELY(cchOutput <= 0)) + { + AssertFailed(); + return pfnOutput(pvArgOutput, "internal-error", sizeof("internal-error") - 1); + } + return pfnOutput(pvArgOutput, szTmp, cchOutput); +} + + +/** + * @callback_method_impl{FNSTRFORMAT} + */ +static DECLCALLBACK(size_t) +dbgfR3RegPrintfCbFormat(void *pvArg, PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, + const char **ppszFormat, va_list *pArgs, int cchWidth, + int cchPrecision, unsigned fFlags, char chArgSize) +{ + NOREF(pArgs); NOREF(chArgSize); + + /* + * Parse the format type and hand the job to the appropriate worker. + */ + PDBGFR3REGPRINTFARGS pThis = (PDBGFR3REGPRINTFARGS)pvArg; + const char *pszFormat = *ppszFormat; + if ( pszFormat[0] != 'V' + || pszFormat[1] != 'R') + { + AssertMsgFailed(("'%s'\n", pszFormat)); + return 0; + } + unsigned offCurly = 2; + if (pszFormat[offCurly] != '{') + { + AssertMsgReturn(pszFormat[offCurly], ("'%s'\n", pszFormat), 0); + offCurly++; + AssertMsgReturn(pszFormat[offCurly] == '{', ("'%s'\n", pszFormat), 0); + } + const char *pachReg = &pszFormat[offCurly + 1]; + + /* + * The end and length of the register. + */ + const char *pszEnd = strchr(pachReg, '}'); + AssertMsgReturn(pszEnd, ("Missing closing curly bracket: '%s'\n", pszFormat), 0); + size_t const cchReg = pszEnd - pachReg; + + /* + * Look up the register - same as dbgfR3RegResolve, except for locking and + * input string termination. + */ + PRTSTRSPACE pRegSpace = &pThis->pUVM->dbgf.s.RegSpace; + /* Try looking up the name without any case folding or cpu prefixing. */ + PCDBGFREGLOOKUP pLookupRec = (PCDBGFREGLOOKUP)RTStrSpaceGetN(pRegSpace, pachReg, cchReg); + if (!pLookupRec) + { + /* Lower case it and try again. */ + char szName[DBGF_REG_MAX_NAME * 4 + 16]; + ssize_t cchFolded = dbgfR3RegCopyToLower(pachReg, cchReg, szName, sizeof(szName) - DBGF_REG_MAX_NAME); + if (cchFolded > 0) + pLookupRec = (PCDBGFREGLOOKUP)RTStrSpaceGet(pRegSpace, szName); + if ( !pLookupRec + && cchFolded >= 0 + && pThis->idCpu != VMCPUID_ANY) + { + /* Prefix it with the specified CPU set. */ + size_t cchCpuSet = RTStrPrintf(szName, sizeof(szName), pThis->fGuestRegs ? "cpu%u." : "hypercpu%u.", pThis->idCpu); + dbgfR3RegCopyToLower(pachReg, cchReg, &szName[cchCpuSet], sizeof(szName) - cchCpuSet); + pLookupRec = (PCDBGFREGLOOKUP)RTStrSpaceGet(pRegSpace, szName); + } + } + AssertMsgReturn(pLookupRec, ("'%s'\n", pszFormat), 0); + AssertMsgReturn( pLookupRec->pSet->enmType != DBGFREGSETTYPE_CPU + || pLookupRec->pSet->uUserArg.pVCpu->idCpu == pThis->idCpu, + ("'%s' idCpu=%u, pSet/cpu=%u\n", pszFormat, pThis->idCpu, pLookupRec->pSet->uUserArg.pVCpu->idCpu), + 0); + + /* + * Commit the parsed format string. Up to this point it is nice to know + * what register lookup failed and such, so we've delayed comitting. + */ + *ppszFormat = pszEnd + 1; + + /* + * Call the responsible worker. + */ + switch (pszFormat[offCurly - 1]) + { + case 'R': /* %VR{} */ + case 'X': /* %VRX{} */ + return dbgfR3RegPrintfCbFormatNormal(pThis, pfnOutput, pvArgOutput, pLookupRec, + 16, cchWidth, cchPrecision, fFlags); + case 'U': + return dbgfR3RegPrintfCbFormatNormal(pThis, pfnOutput, pvArgOutput, pLookupRec, + 10, cchWidth, cchPrecision, fFlags); + case 'O': + return dbgfR3RegPrintfCbFormatNormal(pThis, pfnOutput, pvArgOutput, pLookupRec, + 8, cchWidth, cchPrecision, fFlags); + case 'B': + return dbgfR3RegPrintfCbFormatNormal(pThis, pfnOutput, pvArgOutput, pLookupRec, + 2, cchWidth, cchPrecision, fFlags); + case 'F': + return dbgfR3RegPrintfCbFormatField(pThis, pfnOutput, pvArgOutput, pLookupRec, cchWidth, cchPrecision, fFlags); + default: + AssertFailed(); + return 0; + } +} + + + +/** + * @callback_method_impl{FNRTSTROUTPUT} + */ +static DECLCALLBACK(size_t) +dbgfR3RegPrintfCbOutput(void *pvArg, const char *pachChars, size_t cbChars) +{ + PDBGFR3REGPRINTFARGS pArgs = (PDBGFR3REGPRINTFARGS)pvArg; + size_t cbToCopy = cbChars; + if (cbToCopy >= pArgs->cchLeftBuf) + { + if (RT_SUCCESS(pArgs->rc)) + pArgs->rc = VERR_BUFFER_OVERFLOW; + cbToCopy = pArgs->cchLeftBuf; + } + if (cbToCopy > 0) + { + memcpy(&pArgs->pszBuf[pArgs->offBuf], pachChars, cbToCopy); + pArgs->offBuf += cbToCopy; + pArgs->cchLeftBuf -= cbToCopy; + pArgs->pszBuf[pArgs->offBuf] = '\0'; + } + return cbToCopy; +} + + +/** + * On CPU worker for the register formatting, used by DBGFR3RegPrintfV. + * + * @returns VBox status code. + * + * @param pArgs The argument package and state. + */ +static DECLCALLBACK(int) dbgfR3RegPrintfWorkerOnCpu(PDBGFR3REGPRINTFARGS pArgs) +{ + DBGF_REG_DB_LOCK_READ(pArgs->pUVM); + RTStrFormatV(dbgfR3RegPrintfCbOutput, pArgs, dbgfR3RegPrintfCbFormat, pArgs, pArgs->pszFormat, pArgs->va); + DBGF_REG_DB_UNLOCK_READ(pArgs->pUVM); + return pArgs->rc; +} + + +/** + * Format a registers. + * + * This is restricted to registers from one CPU, that specified by @a idCpu. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param idCpu The CPU ID of any CPU registers that may be + * printed, pass VMCPUID_ANY if not applicable. + * @param pszBuf The output buffer. + * @param cbBuf The size of the output buffer. + * @param pszFormat The format string. Register names are given by + * %VR{name}, they take no arguments. + * @param va Other format arguments. + */ +VMMR3DECL(int) DBGFR3RegPrintfV(PUVM pUVM, VMCPUID idCpu, char *pszBuf, size_t cbBuf, const char *pszFormat, va_list va) +{ + AssertPtrReturn(pszBuf, VERR_INVALID_POINTER); + AssertReturn(cbBuf > 0, VERR_BUFFER_OVERFLOW); + *pszBuf = '\0'; + + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertReturn((idCpu & ~DBGFREG_HYPER_VMCPUID) < pUVM->cCpus || idCpu == VMCPUID_ANY, VERR_INVALID_CPU_ID); + AssertPtrReturn(pszFormat, VERR_INVALID_POINTER); + + /* + * Set up an argument package and execute the formatting on the + * specified CPU. + */ + DBGFR3REGPRINTFARGS Args; + Args.pUVM = pUVM; + Args.idCpu = idCpu != VMCPUID_ANY ? idCpu & ~DBGFREG_HYPER_VMCPUID : idCpu; + Args.fGuestRegs = idCpu != VMCPUID_ANY && !(idCpu & DBGFREG_HYPER_VMCPUID); + Args.pszBuf = pszBuf; + Args.pszFormat = pszFormat; + va_copy(Args.va, va); + Args.offBuf = 0; + Args.cchLeftBuf = cbBuf - 1; + Args.rc = VINF_SUCCESS; + int rc = VMR3ReqPriorityCallWaitU(pUVM, Args.idCpu, (PFNRT)dbgfR3RegPrintfWorkerOnCpu, 1, &Args); + va_end(Args.va); + return rc; +} + + +/** + * Format a registers. + * + * This is restricted to registers from one CPU, that specified by @a idCpu. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param idCpu The CPU ID of any CPU registers that may be + * printed, pass VMCPUID_ANY if not applicable. + * @param pszBuf The output buffer. + * @param cbBuf The size of the output buffer. + * @param pszFormat The format string. Register names are given by + * %VR{name}, %VRU{name}, %VRO{name} and + * %VRB{name}, which are hexadecimal, (unsigned) + * decimal, octal and binary representation. None + * of these types takes any arguments. + * @param ... Other format arguments. + */ +VMMR3DECL(int) DBGFR3RegPrintf(PUVM pUVM, VMCPUID idCpu, char *pszBuf, size_t cbBuf, const char *pszFormat, ...) +{ + va_list va; + va_start(va, pszFormat); + int rc = DBGFR3RegPrintfV(pUVM, idCpu, pszBuf, cbBuf, pszFormat, va); + va_end(va); + return rc; +} + diff --git a/src/VBox/VMM/VMMR3/DBGFStack.cpp b/src/VBox/VMM/VMMR3/DBGFStack.cpp new file mode 100644 index 00000000..9728ee0f --- /dev/null +++ b/src/VBox/VMM/VMMR3/DBGFStack.cpp @@ -0,0 +1,1153 @@ +/* $Id: DBGFStack.cpp $ */ +/** @file + * DBGF - Debugger Facility, Call Stack Analyser. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DBGF +#include +#include +#include +#include "DBGFInternal.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +static DECLCALLBACK(int) dbgfR3StackReadCallback(PRTDBGUNWINDSTATE pThis, RTUINTPTR uSp, size_t cbToRead, void *pvDst); + +/** + * Unwind context. + * + * @note Using a constructor and destructor here for simple+safe cleanup. + */ +typedef struct DBGFUNWINDCTX +{ + PUVM m_pUVM; + VMCPUID m_idCpu; + RTDBGAS m_hAs; + PCCPUMCTX m_pInitialCtx; + bool m_fIsHostRing0; + uint64_t m_uOsScratch; /**< For passing to DBGFOSREG::pfnStackUnwindAssist. */ + + RTDBGMOD m_hCached; + RTUINTPTR m_uCachedMapping; + RTUINTPTR m_cbCachedMapping; + RTDBGSEGIDX m_idxCachedSegMapping; + + RTDBGUNWINDSTATE m_State; + + DBGFUNWINDCTX(PUVM pUVM, VMCPUID idCpu, PCCPUMCTX pInitialCtx, RTDBGAS hAs) + { + m_State.u32Magic = RTDBGUNWINDSTATE_MAGIC; + m_State.enmArch = RTLDRARCH_AMD64; + m_State.pfnReadStack = dbgfR3StackReadCallback; + m_State.pvUser = this; + RT_ZERO(m_State.u); + if (pInitialCtx) + { + m_State.u.x86.auRegs[X86_GREG_xAX] = pInitialCtx->rax; + m_State.u.x86.auRegs[X86_GREG_xCX] = pInitialCtx->rcx; + m_State.u.x86.auRegs[X86_GREG_xDX] = pInitialCtx->rdx; + m_State.u.x86.auRegs[X86_GREG_xBX] = pInitialCtx->rbx; + m_State.u.x86.auRegs[X86_GREG_xSP] = pInitialCtx->rsp; + m_State.u.x86.auRegs[X86_GREG_xBP] = pInitialCtx->rbp; + m_State.u.x86.auRegs[X86_GREG_xSI] = pInitialCtx->rsi; + m_State.u.x86.auRegs[X86_GREG_xDI] = pInitialCtx->rdi; + m_State.u.x86.auRegs[X86_GREG_x8 ] = pInitialCtx->r8; + m_State.u.x86.auRegs[X86_GREG_x9 ] = pInitialCtx->r9; + m_State.u.x86.auRegs[X86_GREG_x10] = pInitialCtx->r10; + m_State.u.x86.auRegs[X86_GREG_x11] = pInitialCtx->r11; + m_State.u.x86.auRegs[X86_GREG_x12] = pInitialCtx->r12; + m_State.u.x86.auRegs[X86_GREG_x13] = pInitialCtx->r13; + m_State.u.x86.auRegs[X86_GREG_x14] = pInitialCtx->r14; + m_State.u.x86.auRegs[X86_GREG_x15] = pInitialCtx->r15; + m_State.uPc = pInitialCtx->rip; + m_State.u.x86.uRFlags = pInitialCtx->rflags.u; + m_State.u.x86.auSegs[X86_SREG_ES] = pInitialCtx->es.Sel; + m_State.u.x86.auSegs[X86_SREG_CS] = pInitialCtx->cs.Sel; + m_State.u.x86.auSegs[X86_SREG_SS] = pInitialCtx->ss.Sel; + m_State.u.x86.auSegs[X86_SREG_DS] = pInitialCtx->ds.Sel; + m_State.u.x86.auSegs[X86_SREG_GS] = pInitialCtx->gs.Sel; + m_State.u.x86.auSegs[X86_SREG_FS] = pInitialCtx->fs.Sel; + m_State.u.x86.fRealOrV86 = CPUMIsGuestInRealOrV86ModeEx(pInitialCtx); + } + else if (hAs == DBGF_AS_R0) + VMMR3InitR0StackUnwindState(pUVM, idCpu, &m_State); + + m_pUVM = pUVM; + m_idCpu = idCpu; + m_hAs = DBGFR3AsResolveAndRetain(pUVM, hAs); + m_pInitialCtx = pInitialCtx; + m_fIsHostRing0 = hAs == DBGF_AS_R0; + m_uOsScratch = 0; + + m_hCached = NIL_RTDBGMOD; + m_uCachedMapping = 0; + m_cbCachedMapping = 0; + m_idxCachedSegMapping = NIL_RTDBGSEGIDX; + } + + ~DBGFUNWINDCTX(); + +} DBGFUNWINDCTX; +/** Pointer to unwind context. */ +typedef DBGFUNWINDCTX *PDBGFUNWINDCTX; + + +static void dbgfR3UnwindCtxFlushCache(PDBGFUNWINDCTX pUnwindCtx) +{ + if (pUnwindCtx->m_hCached != NIL_RTDBGMOD) + { + RTDbgModRelease(pUnwindCtx->m_hCached); + pUnwindCtx->m_hCached = NIL_RTDBGMOD; + } + pUnwindCtx->m_cbCachedMapping = 0; + pUnwindCtx->m_idxCachedSegMapping = NIL_RTDBGSEGIDX; +} + + +DBGFUNWINDCTX::~DBGFUNWINDCTX() +{ + dbgfR3UnwindCtxFlushCache(this); + if (m_hAs != NIL_RTDBGAS) + { + RTDbgAsRelease(m_hAs); + m_hAs = NIL_RTDBGAS; + } +} + + +/** + * @interface_method_impl{RTDBGUNWINDSTATE,pfnReadStack} + */ +static DECLCALLBACK(int) dbgfR3StackReadCallback(PRTDBGUNWINDSTATE pThis, RTUINTPTR uSp, size_t cbToRead, void *pvDst) +{ + Assert( pThis->enmArch == RTLDRARCH_AMD64 + || pThis->enmArch == RTLDRARCH_X86_32); + + PDBGFUNWINDCTX pUnwindCtx = (PDBGFUNWINDCTX)pThis->pvUser; + DBGFADDRESS SrcAddr; + int rc = VINF_SUCCESS; + if (pUnwindCtx->m_fIsHostRing0) + DBGFR3AddrFromHostR0(&SrcAddr, uSp); + else + { + if ( pThis->enmArch == RTLDRARCH_X86_32 + || pThis->enmArch == RTLDRARCH_X86_16) + { + if (!pThis->u.x86.fRealOrV86) + rc = DBGFR3AddrFromSelOff(pUnwindCtx->m_pUVM, pUnwindCtx->m_idCpu, &SrcAddr, pThis->u.x86.auSegs[X86_SREG_SS], uSp); + else + DBGFR3AddrFromFlat(pUnwindCtx->m_pUVM, &SrcAddr, uSp + ((uint32_t)pThis->u.x86.auSegs[X86_SREG_SS] << 4)); + } + else + DBGFR3AddrFromFlat(pUnwindCtx->m_pUVM, &SrcAddr, uSp); + } + if (RT_SUCCESS(rc)) + rc = DBGFR3MemRead(pUnwindCtx->m_pUVM, pUnwindCtx->m_idCpu, &SrcAddr, pvDst, cbToRead); + if (RT_SUCCESS(rc)) + return rc; + return -rc; /* Ignore read errors. */ +} + + +/** + * Sets PC and SP. + * + * @returns true. + * @param pUnwindCtx The unwind context. + * @param pAddrPC The program counter (PC) value to set. + * @param pAddrStack The stack pointer (SP) value to set. + */ +static bool dbgfR3UnwindCtxSetPcAndSp(PDBGFUNWINDCTX pUnwindCtx, PCDBGFADDRESS pAddrPC, PCDBGFADDRESS pAddrStack) +{ + Assert( pUnwindCtx->m_State.enmArch == RTLDRARCH_AMD64 + || pUnwindCtx->m_State.enmArch == RTLDRARCH_X86_32); + + if (!DBGFADDRESS_IS_FAR(pAddrPC)) + pUnwindCtx->m_State.uPc = pAddrPC->FlatPtr; + else + { + pUnwindCtx->m_State.uPc = pAddrPC->off; + pUnwindCtx->m_State.u.x86.auSegs[X86_SREG_CS] = pAddrPC->Sel; + } + if (!DBGFADDRESS_IS_FAR(pAddrStack)) + pUnwindCtx->m_State.u.x86.auRegs[X86_GREG_xSP] = pAddrStack->FlatPtr; + else + { + pUnwindCtx->m_State.u.x86.auRegs[X86_GREG_xSP] = pAddrStack->off; + pUnwindCtx->m_State.u.x86.auSegs[X86_SREG_SS] = pAddrStack->Sel; + } + return true; +} + + +/** + * Tries to unwind one frame using unwind info. + * + * @returns true on success, false on failure. + * @param pUnwindCtx The unwind context. + */ +static bool dbgfR3UnwindCtxDoOneFrame(PDBGFUNWINDCTX pUnwindCtx) +{ + /* + * Need to load it into the cache? + */ + RTUINTPTR offCache = pUnwindCtx->m_State.uPc - pUnwindCtx->m_uCachedMapping; + if (offCache >= pUnwindCtx->m_cbCachedMapping) + { + RTDBGMOD hDbgMod = NIL_RTDBGMOD; + RTUINTPTR uBase = 0; + RTDBGSEGIDX idxSeg = NIL_RTDBGSEGIDX; + int rc = RTDbgAsModuleByAddr(pUnwindCtx->m_hAs, pUnwindCtx->m_State.uPc, &hDbgMod, &uBase, &idxSeg); + if (RT_SUCCESS(rc)) + { + dbgfR3UnwindCtxFlushCache(pUnwindCtx); + pUnwindCtx->m_hCached = hDbgMod; + pUnwindCtx->m_uCachedMapping = uBase; + pUnwindCtx->m_idxCachedSegMapping = idxSeg; + pUnwindCtx->m_cbCachedMapping = idxSeg == NIL_RTDBGSEGIDX ? RTDbgModImageSize(hDbgMod) + : RTDbgModSegmentSize(hDbgMod, idxSeg); + offCache = pUnwindCtx->m_State.uPc - uBase; + } + else + return false; + } + + /* + * Do the lookup. + */ + AssertCompile(UINT32_MAX == NIL_RTDBGSEGIDX); + int rc = RTDbgModUnwindFrame(pUnwindCtx->m_hCached, pUnwindCtx->m_idxCachedSegMapping, offCache, &pUnwindCtx->m_State); + if (RT_SUCCESS(rc)) + return true; + return false; +} + + +/** + * Read stack memory, will init entire buffer. + */ +DECLINLINE(int) dbgfR3StackRead(PUVM pUVM, VMCPUID idCpu, void *pvBuf, PCDBGFADDRESS pSrcAddr, size_t cb, size_t *pcbRead) +{ + int rc = DBGFR3MemRead(pUVM, idCpu, pSrcAddr, pvBuf, cb); + if (RT_FAILURE(rc)) + { + /* fallback: byte by byte and zero the ones we fail to read. */ + size_t cbRead; + for (cbRead = 0; cbRead < cb; cbRead++) + { + DBGFADDRESS Addr = *pSrcAddr; + rc = DBGFR3MemRead(pUVM, idCpu, DBGFR3AddrAdd(&Addr, cbRead), (uint8_t *)pvBuf + cbRead, 1); + if (RT_FAILURE(rc)) + break; + } + if (cbRead) + rc = VINF_SUCCESS; + memset((char *)pvBuf + cbRead, 0, cb - cbRead); + *pcbRead = cbRead; + } + else + *pcbRead = cb; + return rc; +} + +/** + * Collects sure registers on frame exit. + * + * @returns VINF_SUCCESS or VERR_NO_MEMORY. + * @param pUVM The user mode VM handle for the allocation. + * @param pFrame The frame in question. + * @param pState The unwind state. + */ +static int dbgfR3StackWalkCollectRegisterChanges(PUVM pUVM, PDBGFSTACKFRAME pFrame, PRTDBGUNWINDSTATE pState) +{ + pFrame->cSureRegs = 0; + pFrame->paSureRegs = NULL; + + if ( pState->enmArch == RTLDRARCH_AMD64 + || pState->enmArch == RTLDRARCH_X86_32 + || pState->enmArch == RTLDRARCH_X86_16) + { + if (pState->u.x86.Loaded.fAll) + { + /* + * Count relevant registers. + */ + uint32_t cRegs = 0; + if (pState->u.x86.Loaded.s.fRegs) + for (uint32_t f = 1; f < RT_BIT_32(RT_ELEMENTS(pState->u.x86.auRegs)); f <<= 1) + if (pState->u.x86.Loaded.s.fRegs & f) + cRegs++; + if (pState->u.x86.Loaded.s.fSegs) + for (uint32_t f = 1; f < RT_BIT_32(RT_ELEMENTS(pState->u.x86.auSegs)); f <<= 1) + if (pState->u.x86.Loaded.s.fSegs & f) + cRegs++; + if (pState->u.x86.Loaded.s.fRFlags) + cRegs++; + if (pState->u.x86.Loaded.s.fErrCd) + cRegs++; + if (cRegs > 0) + { + /* + * Allocate the arrays. + */ + PDBGFREGVALEX paSureRegs = (PDBGFREGVALEX)MMR3HeapAllocZU(pUVM, MM_TAG_DBGF_STACK, sizeof(DBGFREGVALEX) * cRegs); + AssertReturn(paSureRegs, VERR_NO_MEMORY); + pFrame->paSureRegs = paSureRegs; + pFrame->cSureRegs = cRegs; + + /* + * Popuplate the arrays. + */ + uint32_t iReg = 0; + if (pState->u.x86.Loaded.s.fRegs) + for (uint32_t i = 0; i < RT_ELEMENTS(pState->u.x86.auRegs); i++) + if (pState->u.x86.Loaded.s.fRegs & RT_BIT(i)) + { + paSureRegs[iReg].Value.u64 = pState->u.x86.auRegs[i]; + paSureRegs[iReg].enmType = DBGFREGVALTYPE_U64; + paSureRegs[iReg].enmReg = (DBGFREG)(DBGFREG_RAX + i); + iReg++; + } + + if (pState->u.x86.Loaded.s.fSegs) + for (uint32_t i = 0; i < RT_ELEMENTS(pState->u.x86.auSegs); i++) + if (pState->u.x86.Loaded.s.fSegs & RT_BIT(i)) + { + paSureRegs[iReg].Value.u16 = pState->u.x86.auSegs[i]; + paSureRegs[iReg].enmType = DBGFREGVALTYPE_U16; + switch (i) + { + case X86_SREG_ES: paSureRegs[iReg].enmReg = DBGFREG_ES; break; + case X86_SREG_CS: paSureRegs[iReg].enmReg = DBGFREG_CS; break; + case X86_SREG_SS: paSureRegs[iReg].enmReg = DBGFREG_SS; break; + case X86_SREG_DS: paSureRegs[iReg].enmReg = DBGFREG_DS; break; + case X86_SREG_FS: paSureRegs[iReg].enmReg = DBGFREG_FS; break; + case X86_SREG_GS: paSureRegs[iReg].enmReg = DBGFREG_GS; break; + default: AssertFailedBreak(); + } + iReg++; + } + + if (iReg < cRegs) + { + if (pState->u.x86.Loaded.s.fRFlags) + { + paSureRegs[iReg].Value.u64 = pState->u.x86.uRFlags; + paSureRegs[iReg].enmType = DBGFREGVALTYPE_U64; + paSureRegs[iReg].enmReg = DBGFREG_RFLAGS; + iReg++; + } + if (pState->u.x86.Loaded.s.fErrCd) + { + paSureRegs[iReg].Value.u64 = pState->u.x86.uErrCd; + paSureRegs[iReg].enmType = DBGFREGVALTYPE_U64; + paSureRegs[iReg].enmReg = DBGFREG_END; + paSureRegs[iReg].pszName = "trap-errcd"; + iReg++; + } + } + Assert(iReg == cRegs); + } + } + } + + return VINF_SUCCESS; +} + + +/** + * Internal worker routine. + * + * On x86 the typical stack frame layout is like this: + * .. .. + * 16 parameter 2 + * 12 parameter 1 + * 8 parameter 0 + * 4 return address + * 0 old ebp; current ebp points here + */ +DECL_NO_INLINE(static, int) dbgfR3StackWalk(PDBGFUNWINDCTX pUnwindCtx, PDBGFSTACKFRAME pFrame, bool fFirst) +{ + /* + * Stop if we got a read error in the previous run. + */ + if (pFrame->fFlags & DBGFSTACKFRAME_FLAGS_LAST) + return VERR_NO_MORE_FILES; + + /* + * Advance the frame (except for the first). + */ + if (!fFirst) /** @todo we can probably eliminate this fFirst business... */ + { + /* frame, pc and stack is taken from the existing frames return members. */ + pFrame->AddrFrame = pFrame->AddrReturnFrame; + pFrame->AddrPC = pFrame->AddrReturnPC; + pFrame->pSymPC = pFrame->pSymReturnPC; + pFrame->pLinePC = pFrame->pLineReturnPC; + + /* increment the frame number. */ + pFrame->iFrame++; + + /* UNWIND_INFO_RET -> USED_UNWIND; return type */ + if (!(pFrame->fFlags & DBGFSTACKFRAME_FLAGS_UNWIND_INFO_RET)) + pFrame->fFlags &= ~DBGFSTACKFRAME_FLAGS_USED_UNWIND_INFO; + else + { + pFrame->fFlags |= DBGFSTACKFRAME_FLAGS_USED_UNWIND_INFO; + pFrame->fFlags &= ~DBGFSTACKFRAME_FLAGS_UNWIND_INFO_RET; + if (pFrame->enmReturnFrameReturnType != RTDBGRETURNTYPE_INVALID) + { + pFrame->enmReturnType = pFrame->enmReturnFrameReturnType; + pFrame->enmReturnFrameReturnType = RTDBGRETURNTYPE_INVALID; + } + } + pFrame->fFlags &= ~DBGFSTACKFRAME_FLAGS_TRAP_FRAME; + } + + /* + * Figure the return address size and use the old PC to guess stack item size. + */ + /** @todo this is bogus... */ + unsigned cbRetAddr = RTDbgReturnTypeSize(pFrame->enmReturnType); + unsigned cbStackItem; + switch (pFrame->AddrPC.fFlags & DBGFADDRESS_FLAGS_TYPE_MASK) + { + case DBGFADDRESS_FLAGS_FAR16: cbStackItem = 2; break; + case DBGFADDRESS_FLAGS_FAR32: cbStackItem = 4; break; + case DBGFADDRESS_FLAGS_FAR64: cbStackItem = 8; break; + case DBGFADDRESS_FLAGS_RING0: cbStackItem = sizeof(RTHCUINTPTR); break; + default: + switch (pFrame->enmReturnType) + { + case RTDBGRETURNTYPE_FAR16: + case RTDBGRETURNTYPE_IRET16: + case RTDBGRETURNTYPE_IRET32_V86: + case RTDBGRETURNTYPE_NEAR16: cbStackItem = 2; break; + + case RTDBGRETURNTYPE_FAR32: + case RTDBGRETURNTYPE_IRET32: + case RTDBGRETURNTYPE_IRET32_PRIV: + case RTDBGRETURNTYPE_NEAR32: cbStackItem = 4; break; + + case RTDBGRETURNTYPE_FAR64: + case RTDBGRETURNTYPE_IRET64: + case RTDBGRETURNTYPE_NEAR64: cbStackItem = 8; break; + + default: + AssertMsgFailed(("%d\n", pFrame->enmReturnType)); + cbStackItem = 4; + break; + } + } + + /* + * Read the raw frame data. + * We double cbRetAddr in case we have a far return. + */ + union + { + uint64_t *pu64; + uint32_t *pu32; + uint16_t *pu16; + uint8_t *pb; + void *pv; + } u, uRet, uArgs, uBp; + size_t cbRead = cbRetAddr*2 + cbStackItem + sizeof(pFrame->Args); + u.pv = alloca(cbRead); + uBp = u; + uRet.pb = u.pb + cbStackItem; + uArgs.pb = u.pb + cbStackItem + cbRetAddr; + + Assert(DBGFADDRESS_IS_VALID(&pFrame->AddrFrame)); + int rc = dbgfR3StackRead(pUnwindCtx->m_pUVM, pUnwindCtx->m_idCpu, u.pv, &pFrame->AddrFrame, cbRead, &cbRead); + if ( RT_FAILURE(rc) + || cbRead < cbRetAddr + cbStackItem) + pFrame->fFlags |= DBGFSTACKFRAME_FLAGS_LAST; + + /* + * Return Frame address. + * + * If we used unwind info to get here, the unwind register context will be + * positioned after the return instruction has been executed. We start by + * picking up the rBP register here for return frame and will try improve + * on it further down by using unwind info. + */ + pFrame->AddrReturnFrame = pFrame->AddrFrame; + if (pFrame->fFlags & DBGFSTACKFRAME_FLAGS_USED_UNWIND_INFO) + { + if ( pFrame->enmReturnType == RTDBGRETURNTYPE_IRET32_PRIV + || pFrame->enmReturnType == RTDBGRETURNTYPE_IRET64) + DBGFR3AddrFromSelOff(pUnwindCtx->m_pUVM, pUnwindCtx->m_idCpu, &pFrame->AddrReturnFrame, + pUnwindCtx->m_State.u.x86.auSegs[X86_SREG_SS], pUnwindCtx->m_State.u.x86.auRegs[X86_GREG_xBP]); + else if (pFrame->enmReturnType == RTDBGRETURNTYPE_IRET32_V86) + DBGFR3AddrFromFlat(pUnwindCtx->m_pUVM, &pFrame->AddrReturnFrame, + ((uint32_t)pUnwindCtx->m_State.u.x86.auSegs[X86_SREG_SS] << 4) + + pUnwindCtx->m_State.u.x86.auRegs[X86_GREG_xBP]); + else + { + pFrame->AddrReturnFrame.off = pUnwindCtx->m_State.u.x86.auRegs[X86_GREG_xBP]; + pFrame->AddrReturnFrame.FlatPtr += pFrame->AddrReturnFrame.off - pFrame->AddrFrame.off; + } + } + else + { + switch (cbStackItem) + { + case 2: pFrame->AddrReturnFrame.off = *uBp.pu16; break; + case 4: pFrame->AddrReturnFrame.off = *uBp.pu32; break; + case 8: pFrame->AddrReturnFrame.off = *uBp.pu64; break; + default: AssertMsgFailedReturn(("cbStackItem=%d\n", cbStackItem), VERR_DBGF_STACK_IPE_1); + } + + /* Watcom tries to keep the frame pointer odd for far returns. */ + if ( cbStackItem <= 4 + && !(pFrame->fFlags & DBGFSTACKFRAME_FLAGS_USED_UNWIND_INFO)) + { + if (pFrame->AddrReturnFrame.off & 1) + { + pFrame->AddrReturnFrame.off &= ~(RTGCUINTPTR)1; + if (pFrame->enmReturnType == RTDBGRETURNTYPE_NEAR16) + { + pFrame->fFlags |= DBGFSTACKFRAME_FLAGS_USED_ODD_EVEN; + pFrame->enmReturnType = RTDBGRETURNTYPE_FAR16; + cbRetAddr = 4; + } + else if (pFrame->enmReturnType == RTDBGRETURNTYPE_NEAR32) + { +#if 1 + /* Assumes returning 32-bit code. */ + pFrame->fFlags |= DBGFSTACKFRAME_FLAGS_USED_ODD_EVEN; + pFrame->enmReturnType = RTDBGRETURNTYPE_FAR32; + cbRetAddr = 8; +#else + /* Assumes returning 16-bit code. */ + pFrame->fFlags |= DBGFSTACKFRAME_FLAGS_USED_ODD_EVEN; + pFrame->enmReturnType = RTDBGRETURNTYPE_FAR16; + cbRetAddr = 4; +#endif + } + } + else if (pFrame->fFlags & DBGFSTACKFRAME_FLAGS_USED_ODD_EVEN) + { + if (pFrame->enmReturnType == RTDBGRETURNTYPE_FAR16) + { + pFrame->enmReturnType = RTDBGRETURNTYPE_NEAR16; + cbRetAddr = 2; + } + else if (pFrame->enmReturnType == RTDBGRETURNTYPE_NEAR32) + { + pFrame->enmReturnType = RTDBGRETURNTYPE_FAR32; + cbRetAddr = 4; + } + pFrame->fFlags &= ~DBGFSTACKFRAME_FLAGS_USED_ODD_EVEN; + } + uArgs.pb = u.pb + cbStackItem + cbRetAddr; + } + + pFrame->AddrReturnFrame.FlatPtr += pFrame->AddrReturnFrame.off - pFrame->AddrFrame.off; + } + + /* + * Return Stack Address. + */ + pFrame->AddrReturnStack = pFrame->AddrReturnFrame; + if (pFrame->fFlags & DBGFSTACKFRAME_FLAGS_USED_UNWIND_INFO) + { + if ( pFrame->enmReturnType == RTDBGRETURNTYPE_IRET32_PRIV + || pFrame->enmReturnType == RTDBGRETURNTYPE_IRET64) + DBGFR3AddrFromSelOff(pUnwindCtx->m_pUVM, pUnwindCtx->m_idCpu, &pFrame->AddrReturnStack, + pUnwindCtx->m_State.u.x86.auSegs[X86_SREG_SS], pUnwindCtx->m_State.u.x86.auRegs[X86_GREG_xSP]); + else if (pFrame->enmReturnType == RTDBGRETURNTYPE_IRET32_V86) + DBGFR3AddrFromFlat(pUnwindCtx->m_pUVM, &pFrame->AddrReturnStack, + ((uint32_t)pUnwindCtx->m_State.u.x86.auSegs[X86_SREG_SS] << 4) + + pUnwindCtx->m_State.u.x86.auRegs[X86_GREG_xSP]); + else + { + pFrame->AddrReturnStack.off = pUnwindCtx->m_State.u.x86.auRegs[X86_GREG_xSP]; + pFrame->AddrReturnStack.FlatPtr += pFrame->AddrReturnStack.off - pFrame->AddrStack.off; + } + } + else + { + pFrame->AddrReturnStack.off += cbStackItem + cbRetAddr; + pFrame->AddrReturnStack.FlatPtr += cbStackItem + cbRetAddr; + } + + /* + * Return PC. + */ + pFrame->AddrReturnPC = pFrame->AddrPC; + if (pFrame->fFlags & DBGFSTACKFRAME_FLAGS_USED_UNWIND_INFO) + { + if (RTDbgReturnTypeIsNear(pFrame->enmReturnType)) + { + pFrame->AddrReturnPC.off = pUnwindCtx->m_State.uPc; + pFrame->AddrReturnPC.FlatPtr += pFrame->AddrReturnPC.off - pFrame->AddrPC.off; + } + else + DBGFR3AddrFromSelOff(pUnwindCtx->m_pUVM, pUnwindCtx->m_idCpu, &pFrame->AddrReturnPC, + pUnwindCtx->m_State.u.x86.auSegs[X86_SREG_CS], pUnwindCtx->m_State.uPc); + } + else + { + int rc2; + switch (pFrame->enmReturnType) + { + case RTDBGRETURNTYPE_NEAR16: + if (DBGFADDRESS_IS_VALID(&pFrame->AddrReturnPC)) + { + pFrame->AddrReturnPC.FlatPtr += *uRet.pu16 - pFrame->AddrReturnPC.off; + pFrame->AddrReturnPC.off = *uRet.pu16; + } + else + DBGFR3AddrFromFlat(pUnwindCtx->m_pUVM, &pFrame->AddrReturnPC, *uRet.pu16); + break; + case RTDBGRETURNTYPE_NEAR32: + if (DBGFADDRESS_IS_VALID(&pFrame->AddrReturnPC)) + { + pFrame->AddrReturnPC.FlatPtr += *uRet.pu32 - pFrame->AddrReturnPC.off; + pFrame->AddrReturnPC.off = *uRet.pu32; + } + else + DBGFR3AddrFromFlat(pUnwindCtx->m_pUVM, &pFrame->AddrReturnPC, *uRet.pu32); + break; + case RTDBGRETURNTYPE_NEAR64: + if (DBGFADDRESS_IS_VALID(&pFrame->AddrReturnPC)) + { + pFrame->AddrReturnPC.FlatPtr += *uRet.pu64 - pFrame->AddrReturnPC.off; + pFrame->AddrReturnPC.off = *uRet.pu64; + } + else + DBGFR3AddrFromFlat(pUnwindCtx->m_pUVM, &pFrame->AddrReturnPC, *uRet.pu64); + break; + case RTDBGRETURNTYPE_FAR16: + rc2 = DBGFR3AddrFromSelOff(pUnwindCtx->m_pUVM, pUnwindCtx->m_idCpu, &pFrame->AddrReturnPC, uRet.pu16[1], uRet.pu16[0]); + if (RT_SUCCESS(rc2)) + break; + rc2 = DBGFR3AddrFromSelOff(pUnwindCtx->m_pUVM, pUnwindCtx->m_idCpu, &pFrame->AddrReturnPC, pFrame->AddrPC.Sel, uRet.pu16[0]); + if (RT_SUCCESS(rc2)) + pFrame->enmReturnType = RTDBGRETURNTYPE_NEAR16; + else + DBGFR3AddrFromSelOff(pUnwindCtx->m_pUVM, pUnwindCtx->m_idCpu, &pFrame->AddrReturnPC, uRet.pu16[1], uRet.pu16[0]); + break; + case RTDBGRETURNTYPE_FAR32: + rc2 = DBGFR3AddrFromSelOff(pUnwindCtx->m_pUVM, pUnwindCtx->m_idCpu, &pFrame->AddrReturnPC, uRet.pu16[2], uRet.pu32[0]); + if (RT_SUCCESS(rc2)) + break; + rc2 = DBGFR3AddrFromSelOff(pUnwindCtx->m_pUVM, pUnwindCtx->m_idCpu, &pFrame->AddrReturnPC, pFrame->AddrPC.Sel, uRet.pu32[0]); + if (RT_SUCCESS(rc2)) + pFrame->enmReturnType = RTDBGRETURNTYPE_NEAR32; + else + DBGFR3AddrFromSelOff(pUnwindCtx->m_pUVM, pUnwindCtx->m_idCpu, &pFrame->AddrReturnPC, uRet.pu16[2], uRet.pu32[0]); + break; + case RTDBGRETURNTYPE_FAR64: + DBGFR3AddrFromSelOff(pUnwindCtx->m_pUVM, pUnwindCtx->m_idCpu, &pFrame->AddrReturnPC, uRet.pu16[4], uRet.pu64[0]); + break; + case RTDBGRETURNTYPE_IRET16: + DBGFR3AddrFromSelOff(pUnwindCtx->m_pUVM, pUnwindCtx->m_idCpu, &pFrame->AddrReturnPC, uRet.pu16[1], uRet.pu16[0]); + break; + case RTDBGRETURNTYPE_IRET32: + DBGFR3AddrFromSelOff(pUnwindCtx->m_pUVM, pUnwindCtx->m_idCpu, &pFrame->AddrReturnPC, uRet.pu16[2], uRet.pu32[0]); + break; + case RTDBGRETURNTYPE_IRET32_PRIV: + DBGFR3AddrFromSelOff(pUnwindCtx->m_pUVM, pUnwindCtx->m_idCpu, &pFrame->AddrReturnPC, uRet.pu16[2], uRet.pu32[0]); + break; + case RTDBGRETURNTYPE_IRET32_V86: + DBGFR3AddrFromSelOff(pUnwindCtx->m_pUVM, pUnwindCtx->m_idCpu, &pFrame->AddrReturnPC, uRet.pu16[2], uRet.pu32[0]); + break; + case RTDBGRETURNTYPE_IRET64: + DBGFR3AddrFromSelOff(pUnwindCtx->m_pUVM, pUnwindCtx->m_idCpu, &pFrame->AddrReturnPC, uRet.pu16[4], uRet.pu64[0]); + break; + default: + AssertMsgFailed(("enmReturnType=%d\n", pFrame->enmReturnType)); + return VERR_INVALID_PARAMETER; + } + } + + + pFrame->pSymReturnPC = DBGFR3AsSymbolByAddrA(pUnwindCtx->m_pUVM, pUnwindCtx->m_hAs, &pFrame->AddrReturnPC, + RTDBGSYMADDR_FLAGS_LESS_OR_EQUAL | RTDBGSYMADDR_FLAGS_SKIP_ABS_IN_DEFERRED, + NULL /*poffDisp*/, NULL /*phMod*/); + pFrame->pLineReturnPC = DBGFR3AsLineByAddrA(pUnwindCtx->m_pUVM, pUnwindCtx->m_hAs, &pFrame->AddrReturnPC, + NULL /*poffDisp*/, NULL /*phMod*/); + + /* + * Frame bitness flag. + */ + /** @todo use previous return type for this? */ + pFrame->fFlags &= ~(DBGFSTACKFRAME_FLAGS_16BIT | DBGFSTACKFRAME_FLAGS_32BIT | DBGFSTACKFRAME_FLAGS_64BIT); + switch (cbStackItem) + { + case 2: pFrame->fFlags |= DBGFSTACKFRAME_FLAGS_16BIT; break; + case 4: pFrame->fFlags |= DBGFSTACKFRAME_FLAGS_32BIT; break; + case 8: pFrame->fFlags |= DBGFSTACKFRAME_FLAGS_64BIT; break; + default: AssertMsgFailedReturn(("cbStackItem=%d\n", cbStackItem), VERR_DBGF_STACK_IPE_2); + } + + /* + * The arguments. + */ + memcpy(&pFrame->Args, uArgs.pv, sizeof(pFrame->Args)); + + /* + * Collect register changes. + * Then call the OS layer to assist us (e.g. NT trap frames). + */ + if (pFrame->fFlags & DBGFSTACKFRAME_FLAGS_USED_UNWIND_INFO) + { + rc = dbgfR3StackWalkCollectRegisterChanges(pUnwindCtx->m_pUVM, pFrame, &pUnwindCtx->m_State); + if (RT_FAILURE(rc)) + return rc; + + if ( pUnwindCtx->m_pInitialCtx + && pUnwindCtx->m_hAs != NIL_RTDBGAS) + { + rc = dbgfR3OSStackUnwindAssist(pUnwindCtx->m_pUVM, pUnwindCtx->m_idCpu, pFrame, &pUnwindCtx->m_State, + pUnwindCtx->m_pInitialCtx, pUnwindCtx->m_hAs, &pUnwindCtx->m_uOsScratch); + if (RT_FAILURE(rc)) + return rc; + } + } + + /* + * Try use unwind information to locate the return frame pointer (for the + * next loop iteration). + */ + Assert(!(pFrame->fFlags & DBGFSTACKFRAME_FLAGS_UNWIND_INFO_RET)); + pFrame->enmReturnFrameReturnType = RTDBGRETURNTYPE_INVALID; + if (!(pFrame->fFlags & DBGFSTACKFRAME_FLAGS_LAST)) + { + /* Set PC and SP if we didn't unwind our way here (context will then point + and the return PC and SP already). */ + if (!(pFrame->fFlags & DBGFSTACKFRAME_FLAGS_USED_UNWIND_INFO)) + { + dbgfR3UnwindCtxSetPcAndSp(pUnwindCtx, &pFrame->AddrReturnPC, &pFrame->AddrReturnStack); + pUnwindCtx->m_State.u.x86.auRegs[X86_GREG_xBP] = pFrame->AddrReturnFrame.off; + } + /** @todo Reevaluate CS if the previous frame return type isn't near. */ + if ( pUnwindCtx->m_State.enmArch == RTLDRARCH_AMD64 + || pUnwindCtx->m_State.enmArch == RTLDRARCH_X86_32 + || pUnwindCtx->m_State.enmArch == RTLDRARCH_X86_16) + pUnwindCtx->m_State.u.x86.Loaded.fAll = 0; + else + AssertFailed(); + if (dbgfR3UnwindCtxDoOneFrame(pUnwindCtx)) + { + if (pUnwindCtx->m_fIsHostRing0) + DBGFR3AddrFromHostR0(&pFrame->AddrReturnFrame, pUnwindCtx->m_State.u.x86.FrameAddr.off); + else + { + DBGFADDRESS AddrReturnFrame = pFrame->AddrReturnFrame; + rc = DBGFR3AddrFromSelOff(pUnwindCtx->m_pUVM, pUnwindCtx->m_idCpu, &AddrReturnFrame, + pUnwindCtx->m_State.u.x86.FrameAddr.sel, pUnwindCtx->m_State.u.x86.FrameAddr.off); + if (RT_SUCCESS(rc)) + pFrame->AddrReturnFrame = AddrReturnFrame; + } + pFrame->enmReturnFrameReturnType = pUnwindCtx->m_State.enmRetType; + pFrame->fFlags |= DBGFSTACKFRAME_FLAGS_UNWIND_INFO_RET; + } + } + + return VINF_SUCCESS; +} + + +/** + * Walks the entire stack allocating memory as we walk. + */ +static DECLCALLBACK(int) dbgfR3StackWalkCtxFull(PUVM pUVM, VMCPUID idCpu, PCCPUMCTX pCtx, RTDBGAS hAs, + DBGFCODETYPE enmCodeType, + PCDBGFADDRESS pAddrFrame, + PCDBGFADDRESS pAddrStack, + PCDBGFADDRESS pAddrPC, + RTDBGRETURNTYPE enmReturnType, + PCDBGFSTACKFRAME *ppFirstFrame) +{ + DBGFUNWINDCTX UnwindCtx(pUVM, idCpu, pCtx, hAs); + + /* alloc first frame. */ + PDBGFSTACKFRAME pCur = (PDBGFSTACKFRAME)MMR3HeapAllocZU(pUVM, MM_TAG_DBGF_STACK, sizeof(*pCur)); + if (!pCur) + return VERR_NO_MEMORY; + + /* + * Initialize the frame. + */ + pCur->pNextInternal = NULL; + pCur->pFirstInternal = pCur; + + int rc = VINF_SUCCESS; + if (pAddrPC) + pCur->AddrPC = *pAddrPC; + else if (enmCodeType != DBGFCODETYPE_GUEST) + DBGFR3AddrFromFlat(pUVM, &pCur->AddrPC, pCtx->rip); + else + rc = DBGFR3AddrFromSelOff(pUVM, idCpu, &pCur->AddrPC, pCtx->cs.Sel, pCtx->rip); + if (RT_SUCCESS(rc)) + { + uint64_t fAddrMask; + if (enmCodeType == DBGFCODETYPE_RING0) + fAddrMask = HC_ARCH_BITS == 64 ? UINT64_MAX : UINT32_MAX; + else if (enmCodeType == DBGFCODETYPE_HYPER) + fAddrMask = UINT32_MAX; + else if (DBGFADDRESS_IS_FAR16(&pCur->AddrPC)) + fAddrMask = UINT16_MAX; + else if (DBGFADDRESS_IS_FAR32(&pCur->AddrPC)) + fAddrMask = UINT32_MAX; + else if (DBGFADDRESS_IS_FAR64(&pCur->AddrPC)) + fAddrMask = UINT64_MAX; + else + { + PVMCPU pVCpu = VMMGetCpuById(pUVM->pVM, idCpu); + CPUMMODE enmCpuMode = CPUMGetGuestMode(pVCpu); + if (enmCpuMode == CPUMMODE_REAL) + { + fAddrMask = UINT16_MAX; + if (enmReturnType == RTDBGRETURNTYPE_INVALID) + pCur->enmReturnType = RTDBGRETURNTYPE_NEAR16; + } + else if ( enmCpuMode == CPUMMODE_PROTECTED + || !CPUMIsGuestIn64BitCode(pVCpu)) + { + fAddrMask = UINT32_MAX; + if (enmReturnType == RTDBGRETURNTYPE_INVALID) + pCur->enmReturnType = RTDBGRETURNTYPE_NEAR32; + } + else + { + fAddrMask = UINT64_MAX; + if (enmReturnType == RTDBGRETURNTYPE_INVALID) + pCur->enmReturnType = RTDBGRETURNTYPE_NEAR64; + } + } + + if (enmReturnType == RTDBGRETURNTYPE_INVALID) + switch (pCur->AddrPC.fFlags & DBGFADDRESS_FLAGS_TYPE_MASK) + { + case DBGFADDRESS_FLAGS_FAR16: pCur->enmReturnType = RTDBGRETURNTYPE_NEAR16; break; + case DBGFADDRESS_FLAGS_FAR32: pCur->enmReturnType = RTDBGRETURNTYPE_NEAR32; break; + case DBGFADDRESS_FLAGS_FAR64: pCur->enmReturnType = RTDBGRETURNTYPE_NEAR64; break; + case DBGFADDRESS_FLAGS_RING0: + pCur->enmReturnType = HC_ARCH_BITS == 64 ? RTDBGRETURNTYPE_NEAR64 : RTDBGRETURNTYPE_NEAR32; + break; + default: + pCur->enmReturnType = RTDBGRETURNTYPE_NEAR32; + break; + } + + + if (pAddrStack) + pCur->AddrStack = *pAddrStack; + else if (enmCodeType != DBGFCODETYPE_GUEST) + DBGFR3AddrFromFlat(pUVM, &pCur->AddrStack, pCtx->rsp & fAddrMask); + else + rc = DBGFR3AddrFromSelOff(pUVM, idCpu, &pCur->AddrStack, pCtx->ss.Sel, pCtx->rsp & fAddrMask); + + Assert(!(pCur->fFlags & DBGFSTACKFRAME_FLAGS_USED_UNWIND_INFO)); + if (pAddrFrame) + pCur->AddrFrame = *pAddrFrame; + else if (enmCodeType != DBGFCODETYPE_GUEST) + DBGFR3AddrFromFlat(pUVM, &pCur->AddrFrame, pCtx->rbp & fAddrMask); + else if (RT_SUCCESS(rc)) + rc = DBGFR3AddrFromSelOff(pUVM, idCpu, &pCur->AddrFrame, pCtx->ss.Sel, pCtx->rbp & fAddrMask); + + /* + * Try unwind and get a better frame pointer and state. + */ + if ( RT_SUCCESS(rc) + && dbgfR3UnwindCtxSetPcAndSp(&UnwindCtx, &pCur->AddrPC, &pCur->AddrStack) + && dbgfR3UnwindCtxDoOneFrame(&UnwindCtx)) + { + pCur->enmReturnType = UnwindCtx.m_State.enmRetType; + pCur->fFlags |= DBGFSTACKFRAME_FLAGS_USED_UNWIND_INFO; + if (!UnwindCtx.m_fIsHostRing0) + rc = DBGFR3AddrFromSelOff(UnwindCtx.m_pUVM, UnwindCtx.m_idCpu, &pCur->AddrFrame, + UnwindCtx.m_State.u.x86.FrameAddr.sel, UnwindCtx.m_State.u.x86.FrameAddr.off); + else + DBGFR3AddrFromHostR0(&pCur->AddrFrame, UnwindCtx.m_State.u.x86.FrameAddr.off); + } + /* + * The first frame. + */ + if (RT_SUCCESS(rc)) + { + if (DBGFADDRESS_IS_VALID(&pCur->AddrPC)) + { + pCur->pSymPC = DBGFR3AsSymbolByAddrA(pUVM, hAs, &pCur->AddrPC, + RTDBGSYMADDR_FLAGS_LESS_OR_EQUAL | RTDBGSYMADDR_FLAGS_SKIP_ABS_IN_DEFERRED, + NULL /*poffDisp*/, NULL /*phMod*/); + pCur->pLinePC = DBGFR3AsLineByAddrA(pUVM, hAs, &pCur->AddrPC, NULL /*poffDisp*/, NULL /*phMod*/); + } + + rc = dbgfR3StackWalk(&UnwindCtx, pCur, true /*fFirst*/); + } + } + else + pCur->enmReturnType = enmReturnType; + if (RT_FAILURE(rc)) + { + DBGFR3StackWalkEnd(pCur); + return rc; + } + + /* + * The other frames. + */ + DBGFSTACKFRAME Next = *pCur; + while (!(pCur->fFlags & (DBGFSTACKFRAME_FLAGS_LAST | DBGFSTACKFRAME_FLAGS_MAX_DEPTH | DBGFSTACKFRAME_FLAGS_LOOP))) + { + Next.cSureRegs = 0; + Next.paSureRegs = NULL; + + /* try walk. */ + rc = dbgfR3StackWalk(&UnwindCtx, &Next, false /*fFirst*/); + if (RT_FAILURE(rc)) + break; + + /* add the next frame to the chain. */ + PDBGFSTACKFRAME pNext = (PDBGFSTACKFRAME)MMR3HeapAllocU(pUVM, MM_TAG_DBGF_STACK, sizeof(*pNext)); + if (!pNext) + { + DBGFR3StackWalkEnd(pCur); + return VERR_NO_MEMORY; + } + *pNext = Next; + pCur->pNextInternal = pNext; + pCur = pNext; + Assert(pCur->pNextInternal == NULL); + + /* check for loop */ + for (PCDBGFSTACKFRAME pLoop = pCur->pFirstInternal; + pLoop && pLoop != pCur; + pLoop = pLoop->pNextInternal) + if (pLoop->AddrFrame.FlatPtr == pCur->AddrFrame.FlatPtr) + { + pCur->fFlags |= DBGFSTACKFRAME_FLAGS_LOOP; + break; + } + + /* check for insane recursion */ + if (pCur->iFrame >= 2048) + pCur->fFlags |= DBGFSTACKFRAME_FLAGS_MAX_DEPTH; + } + + *ppFirstFrame = pCur->pFirstInternal; + return rc; +} + + +/** + * Common worker for DBGFR3StackWalkBeginGuestEx, DBGFR3StackWalkBeginHyperEx, + * DBGFR3StackWalkBeginGuest and DBGFR3StackWalkBeginHyper. + */ +static int dbgfR3StackWalkBeginCommon(PUVM pUVM, + VMCPUID idCpu, + DBGFCODETYPE enmCodeType, + PCDBGFADDRESS pAddrFrame, + PCDBGFADDRESS pAddrStack, + PCDBGFADDRESS pAddrPC, + RTDBGRETURNTYPE enmReturnType, + PCDBGFSTACKFRAME *ppFirstFrame) +{ + /* + * Validate parameters. + */ + *ppFirstFrame = NULL; + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertReturn(idCpu < pVM->cCpus, VERR_INVALID_CPU_ID); + if (pAddrFrame) + AssertReturn(DBGFR3AddrIsValid(pUVM, pAddrFrame), VERR_INVALID_PARAMETER); + if (pAddrStack) + AssertReturn(DBGFR3AddrIsValid(pUVM, pAddrStack), VERR_INVALID_PARAMETER); + if (pAddrPC) + AssertReturn(DBGFR3AddrIsValid(pUVM, pAddrPC), VERR_INVALID_PARAMETER); + AssertReturn(enmReturnType >= RTDBGRETURNTYPE_INVALID && enmReturnType < RTDBGRETURNTYPE_END, VERR_INVALID_PARAMETER); + + /* + * Get the CPUM context pointer and pass it on the specified EMT. + */ + RTDBGAS hAs; + PCCPUMCTX pCtx; + switch (enmCodeType) + { + case DBGFCODETYPE_GUEST: + pCtx = CPUMQueryGuestCtxPtr(VMMGetCpuById(pVM, idCpu)); + hAs = DBGF_AS_GLOBAL; + break; + case DBGFCODETYPE_HYPER: + pCtx = CPUMQueryGuestCtxPtr(VMMGetCpuById(pVM, idCpu)); + hAs = DBGF_AS_RC_AND_GC_GLOBAL; + break; + case DBGFCODETYPE_RING0: + pCtx = NULL; /* No valid context present. */ + hAs = DBGF_AS_R0; + break; + default: + AssertFailedReturn(VERR_INVALID_PARAMETER); + } + return VMR3ReqPriorityCallWaitU(pUVM, idCpu, (PFNRT)dbgfR3StackWalkCtxFull, 10, + pUVM, idCpu, pCtx, hAs, enmCodeType, + pAddrFrame, pAddrStack, pAddrPC, enmReturnType, ppFirstFrame); +} + + +/** + * Begins a guest stack walk, extended version. + * + * This will walk the current stack, constructing a list of info frames which is + * returned to the caller. The caller uses DBGFR3StackWalkNext to traverse the + * list and DBGFR3StackWalkEnd to release it. + * + * @returns VINF_SUCCESS on success. + * @returns VERR_NO_MEMORY if we're out of memory. + * + * @param pUVM The user mode VM handle. + * @param idCpu The ID of the virtual CPU which stack we want to walk. + * @param enmCodeType Code type + * @param pAddrFrame Frame address to start at. (Optional) + * @param pAddrStack Stack address to start at. (Optional) + * @param pAddrPC Program counter to start at. (Optional) + * @param enmReturnType The return address type. (Optional) + * @param ppFirstFrame Where to return the pointer to the first info frame. + */ +VMMR3DECL(int) DBGFR3StackWalkBeginEx(PUVM pUVM, + VMCPUID idCpu, + DBGFCODETYPE enmCodeType, + PCDBGFADDRESS pAddrFrame, + PCDBGFADDRESS pAddrStack, + PCDBGFADDRESS pAddrPC, + RTDBGRETURNTYPE enmReturnType, + PCDBGFSTACKFRAME *ppFirstFrame) +{ + return dbgfR3StackWalkBeginCommon(pUVM, idCpu, enmCodeType, pAddrFrame, pAddrStack, pAddrPC, enmReturnType, ppFirstFrame); +} + + +/** + * Begins a guest stack walk. + * + * This will walk the current stack, constructing a list of info frames which is + * returned to the caller. The caller uses DBGFR3StackWalkNext to traverse the + * list and DBGFR3StackWalkEnd to release it. + * + * @returns VINF_SUCCESS on success. + * @returns VERR_NO_MEMORY if we're out of memory. + * + * @param pUVM The user mode VM handle. + * @param idCpu The ID of the virtual CPU which stack we want to walk. + * @param enmCodeType Code type + * @param ppFirstFrame Where to return the pointer to the first info frame. + */ +VMMR3DECL(int) DBGFR3StackWalkBegin(PUVM pUVM, VMCPUID idCpu, DBGFCODETYPE enmCodeType, PCDBGFSTACKFRAME *ppFirstFrame) +{ + return dbgfR3StackWalkBeginCommon(pUVM, idCpu, enmCodeType, NULL, NULL, NULL, RTDBGRETURNTYPE_INVALID, ppFirstFrame); +} + +/** + * Gets the next stack frame. + * + * @returns Pointer to the info for the next stack frame. + * NULL if no more frames. + * + * @param pCurrent Pointer to the current stack frame. + * + */ +VMMR3DECL(PCDBGFSTACKFRAME) DBGFR3StackWalkNext(PCDBGFSTACKFRAME pCurrent) +{ + return pCurrent + ? pCurrent->pNextInternal + : NULL; +} + + +/** + * Ends a stack walk process. + * + * This *must* be called after a successful first call to any of the stack + * walker functions. If not called we will leak memory or other resources. + * + * @param pFirstFrame The frame returned by one of the begin functions. + */ +VMMR3DECL(void) DBGFR3StackWalkEnd(PCDBGFSTACKFRAME pFirstFrame) +{ + if ( !pFirstFrame + || !pFirstFrame->pFirstInternal) + return; + + PDBGFSTACKFRAME pFrame = (PDBGFSTACKFRAME)pFirstFrame->pFirstInternal; + while (pFrame) + { + PDBGFSTACKFRAME pCur = pFrame; + pFrame = (PDBGFSTACKFRAME)pCur->pNextInternal; + if (pFrame) + { + if (pCur->pSymReturnPC == pFrame->pSymPC) + pFrame->pSymPC = NULL; + if (pCur->pSymReturnPC == pFrame->pSymReturnPC) + pFrame->pSymReturnPC = NULL; + + if (pCur->pSymPC == pFrame->pSymPC) + pFrame->pSymPC = NULL; + if (pCur->pSymPC == pFrame->pSymReturnPC) + pFrame->pSymReturnPC = NULL; + + if (pCur->pLineReturnPC == pFrame->pLinePC) + pFrame->pLinePC = NULL; + if (pCur->pLineReturnPC == pFrame->pLineReturnPC) + pFrame->pLineReturnPC = NULL; + + if (pCur->pLinePC == pFrame->pLinePC) + pFrame->pLinePC = NULL; + if (pCur->pLinePC == pFrame->pLineReturnPC) + pFrame->pLineReturnPC = NULL; + } + + RTDbgSymbolFree(pCur->pSymPC); + RTDbgSymbolFree(pCur->pSymReturnPC); + RTDbgLineFree(pCur->pLinePC); + RTDbgLineFree(pCur->pLineReturnPC); + + if (pCur->paSureRegs) + { + MMR3HeapFree(pCur->paSureRegs); + pCur->paSureRegs = NULL; + pCur->cSureRegs = 0; + } + + pCur->pNextInternal = NULL; + pCur->pFirstInternal = NULL; + pCur->fFlags = 0; + MMR3HeapFree(pCur); + } +} + diff --git a/src/VBox/VMM/VMMR3/EM.cpp b/src/VBox/VMM/VMMR3/EM.cpp new file mode 100644 index 00000000..640d11cc --- /dev/null +++ b/src/VBox/VMM/VMMR3/EM.cpp @@ -0,0 +1,3089 @@ +/* $Id: EM.cpp $ */ +/** @file + * EM - Execution Monitor / Manager. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +/** @page pg_em EM - The Execution Monitor / Manager + * + * The Execution Monitor/Manager is responsible for running the VM, scheduling + * the right kind of execution (Raw-mode, Hardware Assisted, Recompiled or + * Interpreted), and keeping the CPU states in sync. The function + * EMR3ExecuteVM() is the 'main-loop' of the VM, while each of the execution + * modes has different inner loops (emR3RawExecute, emR3HmExecute, and + * emR3RemExecute). + * + * The interpreted execution is only used to avoid switching between + * raw-mode/hm and the recompiler when fielding virtualization traps/faults. + * The interpretation is thus implemented as part of EM. + * + * @see grp_em + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_EM +#define VMCPU_INCL_CPUM_GST_CTX /* for CPUM_IMPORT_GUEST_STATE_RET */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef VBOX_WITH_REM +# include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "EMInternal.h" +#include +#include +#include +#include +#include +#include +#include "VMMTracing.h" + +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static DECLCALLBACK(int) emR3Save(PVM pVM, PSSMHANDLE pSSM); +static DECLCALLBACK(int) emR3Load(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass); +#if defined(LOG_ENABLED) || defined(VBOX_STRICT) +static const char *emR3GetStateName(EMSTATE enmState); +#endif +static VBOXSTRICTRC emR3Debug(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc); +#if defined(VBOX_WITH_REM) || defined(DEBUG) +static int emR3RemStep(PVM pVM, PVMCPU pVCpu); +#endif +static int emR3RemExecute(PVM pVM, PVMCPU pVCpu, bool *pfFFDone); + + +/** + * Initializes the EM. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) EMR3Init(PVM pVM) +{ + LogFlow(("EMR3Init\n")); + /* + * Assert alignment and sizes. + */ + AssertCompileMemberAlignment(VM, em.s, 32); + AssertCompile(sizeof(pVM->em.s) <= sizeof(pVM->em.padding)); + AssertCompile(sizeof(pVM->aCpus[0].em.s.u.FatalLongJump) <= sizeof(pVM->aCpus[0].em.s.u.achPaddingFatalLongJump)); + + /* + * Init the structure. + */ + pVM->em.s.offVM = RT_UOFFSETOF(VM, em.s); + PCFGMNODE pCfgRoot = CFGMR3GetRoot(pVM); + PCFGMNODE pCfgEM = CFGMR3GetChild(pCfgRoot, "EM"); + + bool fEnabled; + int rc = CFGMR3QueryBoolDef(pCfgRoot, "RawR3Enabled", &fEnabled, true); + AssertLogRelRCReturn(rc, rc); + pVM->fRecompileUser = !fEnabled; + + rc = CFGMR3QueryBoolDef(pCfgRoot, "RawR0Enabled", &fEnabled, true); + AssertLogRelRCReturn(rc, rc); + pVM->fRecompileSupervisor = !fEnabled; + +#ifdef VBOX_WITH_RAW_RING1 + rc = CFGMR3QueryBoolDef(pCfgRoot, "RawR1Enabled", &pVM->fRawRing1Enabled, false); + AssertLogRelRCReturn(rc, rc); +#else + pVM->fRawRing1Enabled = false; /* Disabled by default. */ +#endif + + rc = CFGMR3QueryBoolDef(pCfgEM, "IemExecutesAll", &pVM->em.s.fIemExecutesAll, false); + AssertLogRelRCReturn(rc, rc); + + rc = CFGMR3QueryBoolDef(pCfgEM, "TripleFaultReset", &fEnabled, false); + AssertLogRelRCReturn(rc, rc); + pVM->em.s.fGuruOnTripleFault = !fEnabled; + if (!pVM->em.s.fGuruOnTripleFault && pVM->cCpus > 1) + { + LogRel(("EM: Overriding /EM/TripleFaultReset, must be false on SMP.\n")); + pVM->em.s.fGuruOnTripleFault = true; + } + + LogRel(("EMR3Init: fRecompileUser=%RTbool fRecompileSupervisor=%RTbool fRawRing1Enabled=%RTbool fIemExecutesAll=%RTbool fGuruOnTripleFault=%RTbool\n", + pVM->fRecompileUser, pVM->fRecompileSupervisor, pVM->fRawRing1Enabled, pVM->em.s.fIemExecutesAll, pVM->em.s.fGuruOnTripleFault)); + + /** @cfgm{/EM/ExitOptimizationEnabled, bool, true} + * Whether to try correlate exit history in any context, detect hot spots and + * try optimize these using IEM if there are other exits close by. This + * overrides the context specific settings. */ + bool fExitOptimizationEnabled = true; + rc = CFGMR3QueryBoolDef(pCfgEM, "ExitOptimizationEnabled", &fExitOptimizationEnabled, true); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/EM/ExitOptimizationEnabledR0, bool, true} + * Whether to optimize exits in ring-0. Setting this to false will also disable + * the /EM/ExitOptimizationEnabledR0PreemptDisabled setting. Depending on preemption + * capabilities of the host kernel, this optimization may be unavailable. */ + bool fExitOptimizationEnabledR0 = true; + rc = CFGMR3QueryBoolDef(pCfgEM, "ExitOptimizationEnabledR0", &fExitOptimizationEnabledR0, true); + AssertLogRelRCReturn(rc, rc); + fExitOptimizationEnabledR0 &= fExitOptimizationEnabled; + + /** @cfgm{/EM/ExitOptimizationEnabledR0PreemptDisabled, bool, false} + * Whether to optimize exits in ring-0 when preemption is disable (or preemption + * hooks are in effect). */ + /** @todo change the default to true here */ + bool fExitOptimizationEnabledR0PreemptDisabled = true; + rc = CFGMR3QueryBoolDef(pCfgEM, "ExitOptimizationEnabledR0PreemptDisabled", &fExitOptimizationEnabledR0PreemptDisabled, false); + AssertLogRelRCReturn(rc, rc); + fExitOptimizationEnabledR0PreemptDisabled &= fExitOptimizationEnabledR0; + + /** @cfgm{/EM/HistoryExecMaxInstructions, integer, 16, 65535, 8192} + * Maximum number of instruction to let EMHistoryExec execute in one go. */ + uint16_t cHistoryExecMaxInstructions = 8192; + rc = CFGMR3QueryU16Def(pCfgEM, "HistoryExecMaxInstructions", &cHistoryExecMaxInstructions, cHistoryExecMaxInstructions); + AssertLogRelRCReturn(rc, rc); + if (cHistoryExecMaxInstructions < 16) + return VMSetError(pVM, VERR_OUT_OF_RANGE, RT_SRC_POS, "/EM/HistoryExecMaxInstructions value is too small, min 16"); + + /** @cfgm{/EM/HistoryProbeMaxInstructionsWithoutExit, integer, 2, 65535, 24 for HM, 32 for NEM} + * Maximum number of instruction between exits during probing. */ + uint16_t cHistoryProbeMaxInstructionsWithoutExit = 24; +#ifdef RT_OS_WINDOWS + if (VM_IS_NEM_ENABLED(pVM)) + cHistoryProbeMaxInstructionsWithoutExit = 32; +#endif + rc = CFGMR3QueryU16Def(pCfgEM, "HistoryProbeMaxInstructionsWithoutExit", &cHistoryProbeMaxInstructionsWithoutExit, + cHistoryProbeMaxInstructionsWithoutExit); + AssertLogRelRCReturn(rc, rc); + if (cHistoryProbeMaxInstructionsWithoutExit < 2) + return VMSetError(pVM, VERR_OUT_OF_RANGE, RT_SRC_POS, + "/EM/HistoryProbeMaxInstructionsWithoutExit value is too small, min 16"); + + /** @cfgm{/EM/HistoryProbMinInstructions, integer, 0, 65535, depends} + * The default is (/EM/HistoryProbeMaxInstructionsWithoutExit + 1) * 3. */ + uint16_t cHistoryProbeMinInstructions = cHistoryProbeMaxInstructionsWithoutExit < 0x5554 + ? (cHistoryProbeMaxInstructionsWithoutExit + 1) * 3 : 0xffff; + rc = CFGMR3QueryU16Def(pCfgEM, "HistoryProbMinInstructions", &cHistoryProbeMinInstructions, + cHistoryProbeMinInstructions); + AssertLogRelRCReturn(rc, rc); + + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + pVM->aCpus[i].em.s.fExitOptimizationEnabled = fExitOptimizationEnabled; + pVM->aCpus[i].em.s.fExitOptimizationEnabledR0 = fExitOptimizationEnabledR0; + pVM->aCpus[i].em.s.fExitOptimizationEnabledR0PreemptDisabled = fExitOptimizationEnabledR0PreemptDisabled; + + pVM->aCpus[i].em.s.cHistoryExecMaxInstructions = cHistoryExecMaxInstructions; + pVM->aCpus[i].em.s.cHistoryProbeMinInstructions = cHistoryProbeMinInstructions; + pVM->aCpus[i].em.s.cHistoryProbeMaxInstructionsWithoutExit = cHistoryProbeMaxInstructionsWithoutExit; + } + +#ifdef VBOX_WITH_REM + /* + * Initialize the REM critical section. + */ + AssertCompileMemberAlignment(EM, CritSectREM, sizeof(uintptr_t)); + rc = PDMR3CritSectInit(pVM, &pVM->em.s.CritSectREM, RT_SRC_POS, "EM-REM"); + AssertRCReturn(rc, rc); +#endif + + /* + * Saved state. + */ + rc = SSMR3RegisterInternal(pVM, "em", 0, EM_SAVED_STATE_VERSION, 16, + NULL, NULL, NULL, + NULL, emR3Save, NULL, + NULL, emR3Load, NULL); + if (RT_FAILURE(rc)) + return rc; + + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + + pVCpu->em.s.enmState = i == 0 ? EMSTATE_NONE : EMSTATE_WAIT_SIPI; + pVCpu->em.s.enmPrevState = EMSTATE_NONE; + pVCpu->em.s.fForceRAW = false; + pVCpu->em.s.u64TimeSliceStart = 0; /* paranoia */ + pVCpu->em.s.idxContinueExitRec = UINT16_MAX; + +#ifdef VBOX_WITH_RAW_MODE + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + pVCpu->em.s.pPatmGCState = PATMR3QueryGCStateHC(pVM); + AssertMsg(pVCpu->em.s.pPatmGCState, ("PATMR3QueryGCStateHC failed!\n")); + } +#endif + +# define EM_REG_COUNTER(a, b, c) \ + rc = STAMR3RegisterF(pVM, a, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, c, b, i); \ + AssertRC(rc); + +# define EM_REG_COUNTER_USED(a, b, c) \ + rc = STAMR3RegisterF(pVM, a, STAMTYPE_COUNTER, STAMVISIBILITY_USED, STAMUNIT_OCCURENCES, c, b, i); \ + AssertRC(rc); + +# define EM_REG_PROFILE(a, b, c) \ + rc = STAMR3RegisterF(pVM, a, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL, c, b, i); \ + AssertRC(rc); + +# define EM_REG_PROFILE_ADV(a, b, c) \ + rc = STAMR3RegisterF(pVM, a, STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL, c, b, i); \ + AssertRC(rc); + + /* + * Statistics. + */ +#ifdef VBOX_WITH_STATISTICS + PEMSTATS pStats; + rc = MMHyperAlloc(pVM, sizeof(*pStats), 0, MM_TAG_EM, (void **)&pStats); + if (RT_FAILURE(rc)) + return rc; + + pVCpu->em.s.pStatsR3 = pStats; + pVCpu->em.s.pStatsR0 = MMHyperR3ToR0(pVM, pStats); + pVCpu->em.s.pStatsRC = MMHyperR3ToRC(pVM, pStats); + +# if 1 /* rawmode only? */ + EM_REG_COUNTER_USED(&pStats->StatIoRestarted, "/EM/CPU%d/R3/PrivInst/IoRestarted", "I/O instructions restarted in ring-3."); + EM_REG_COUNTER_USED(&pStats->StatIoIem, "/EM/CPU%d/R3/PrivInst/IoIem", "I/O instructions end to IEM in ring-3."); + EM_REG_COUNTER_USED(&pStats->StatCli, "/EM/CPU%d/R3/PrivInst/Cli", "Number of cli instructions."); + EM_REG_COUNTER_USED(&pStats->StatSti, "/EM/CPU%d/R3/PrivInst/Sti", "Number of sli instructions."); + EM_REG_COUNTER_USED(&pStats->StatHlt, "/EM/CPU%d/R3/PrivInst/Hlt", "Number of hlt instructions not handled in GC because of PATM."); + EM_REG_COUNTER_USED(&pStats->StatInvlpg, "/EM/CPU%d/R3/PrivInst/Invlpg", "Number of invlpg instructions."); + EM_REG_COUNTER_USED(&pStats->StatMisc, "/EM/CPU%d/R3/PrivInst/Misc", "Number of misc. instructions."); + EM_REG_COUNTER_USED(&pStats->StatMovWriteCR[0], "/EM/CPU%d/R3/PrivInst/Mov CR0, X", "Number of mov CR0 write instructions."); + EM_REG_COUNTER_USED(&pStats->StatMovWriteCR[1], "/EM/CPU%d/R3/PrivInst/Mov CR1, X", "Number of mov CR1 write instructions."); + EM_REG_COUNTER_USED(&pStats->StatMovWriteCR[2], "/EM/CPU%d/R3/PrivInst/Mov CR2, X", "Number of mov CR2 write instructions."); + EM_REG_COUNTER_USED(&pStats->StatMovWriteCR[3], "/EM/CPU%d/R3/PrivInst/Mov CR3, X", "Number of mov CR3 write instructions."); + EM_REG_COUNTER_USED(&pStats->StatMovWriteCR[4], "/EM/CPU%d/R3/PrivInst/Mov CR4, X", "Number of mov CR4 write instructions."); + EM_REG_COUNTER_USED(&pStats->StatMovReadCR[0], "/EM/CPU%d/R3/PrivInst/Mov X, CR0", "Number of mov CR0 read instructions."); + EM_REG_COUNTER_USED(&pStats->StatMovReadCR[1], "/EM/CPU%d/R3/PrivInst/Mov X, CR1", "Number of mov CR1 read instructions."); + EM_REG_COUNTER_USED(&pStats->StatMovReadCR[2], "/EM/CPU%d/R3/PrivInst/Mov X, CR2", "Number of mov CR2 read instructions."); + EM_REG_COUNTER_USED(&pStats->StatMovReadCR[3], "/EM/CPU%d/R3/PrivInst/Mov X, CR3", "Number of mov CR3 read instructions."); + EM_REG_COUNTER_USED(&pStats->StatMovReadCR[4], "/EM/CPU%d/R3/PrivInst/Mov X, CR4", "Number of mov CR4 read instructions."); + EM_REG_COUNTER_USED(&pStats->StatMovDRx, "/EM/CPU%d/R3/PrivInst/MovDRx", "Number of mov DRx instructions."); + EM_REG_COUNTER_USED(&pStats->StatIret, "/EM/CPU%d/R3/PrivInst/Iret", "Number of iret instructions."); + EM_REG_COUNTER_USED(&pStats->StatMovLgdt, "/EM/CPU%d/R3/PrivInst/Lgdt", "Number of lgdt instructions."); + EM_REG_COUNTER_USED(&pStats->StatMovLidt, "/EM/CPU%d/R3/PrivInst/Lidt", "Number of lidt instructions."); + EM_REG_COUNTER_USED(&pStats->StatMovLldt, "/EM/CPU%d/R3/PrivInst/Lldt", "Number of lldt instructions."); + EM_REG_COUNTER_USED(&pStats->StatSysEnter, "/EM/CPU%d/R3/PrivInst/Sysenter", "Number of sysenter instructions."); + EM_REG_COUNTER_USED(&pStats->StatSysExit, "/EM/CPU%d/R3/PrivInst/Sysexit", "Number of sysexit instructions."); + EM_REG_COUNTER_USED(&pStats->StatSysCall, "/EM/CPU%d/R3/PrivInst/Syscall", "Number of syscall instructions."); + EM_REG_COUNTER_USED(&pStats->StatSysRet, "/EM/CPU%d/R3/PrivInst/Sysret", "Number of sysret instructions."); + EM_REG_COUNTER(&pVCpu->em.s.StatTotalClis, "/EM/CPU%d/Cli/Total", "Total number of cli instructions executed."); +#endif + pVCpu->em.s.pCliStatTree = 0; + + /* these should be considered for release statistics. */ + EM_REG_COUNTER(&pVCpu->em.s.StatIOEmu, "/PROF/CPU%d/EM/Emulation/IO", "Profiling of emR3RawExecuteIOInstruction."); + EM_REG_COUNTER(&pVCpu->em.s.StatPrivEmu, "/PROF/CPU%d/EM/Emulation/Priv", "Profiling of emR3RawPrivileged."); + EM_REG_PROFILE(&pVCpu->em.s.StatHMEntry, "/PROF/CPU%d/EM/HMEnter", "Profiling Hardware Accelerated Mode entry overhead."); + EM_REG_PROFILE(&pVCpu->em.s.StatHMExec, "/PROF/CPU%d/EM/HMExec", "Profiling Hardware Accelerated Mode execution."); + EM_REG_COUNTER(&pVCpu->em.s.StatHMExecuteCalled, "/PROF/CPU%d/EM/HMExecuteCalled", "Number of times enmR3HMExecute is called."); + EM_REG_PROFILE(&pVCpu->em.s.StatIEMEmu, "/PROF/CPU%d/EM/IEMEmuSingle", "Profiling single instruction IEM execution."); + EM_REG_PROFILE(&pVCpu->em.s.StatIEMThenREM, "/PROF/CPU%d/EM/IEMThenRem", "Profiling IEM-then-REM instruction execution (by IEM)."); + EM_REG_PROFILE(&pVCpu->em.s.StatNEMEntry, "/PROF/CPU%d/EM/NEMEnter", "Profiling NEM entry overhead."); +#endif /* VBOX_WITH_STATISTICS */ + EM_REG_PROFILE(&pVCpu->em.s.StatNEMExec, "/PROF/CPU%d/EM/NEMExec", "Profiling NEM execution."); + EM_REG_COUNTER(&pVCpu->em.s.StatNEMExecuteCalled, "/PROF/CPU%d/EM/NEMExecuteCalled", "Number of times enmR3NEMExecute is called."); +#ifdef VBOX_WITH_STATISTICS + EM_REG_PROFILE(&pVCpu->em.s.StatREMEmu, "/PROF/CPU%d/EM/REMEmuSingle", "Profiling single instruction REM execution."); + EM_REG_PROFILE(&pVCpu->em.s.StatREMExec, "/PROF/CPU%d/EM/REMExec", "Profiling REM execution."); + EM_REG_PROFILE(&pVCpu->em.s.StatREMSync, "/PROF/CPU%d/EM/REMSync", "Profiling REM context syncing."); + EM_REG_PROFILE(&pVCpu->em.s.StatRAWEntry, "/PROF/CPU%d/EM/RAWEnter", "Profiling Raw Mode entry overhead."); + EM_REG_PROFILE(&pVCpu->em.s.StatRAWExec, "/PROF/CPU%d/EM/RAWExec", "Profiling Raw Mode execution."); + EM_REG_PROFILE(&pVCpu->em.s.StatRAWTail, "/PROF/CPU%d/EM/RAWTail", "Profiling Raw Mode tail overhead."); +#endif /* VBOX_WITH_STATISTICS */ + + EM_REG_COUNTER(&pVCpu->em.s.StatForcedActions, "/PROF/CPU%d/EM/ForcedActions", "Profiling forced action execution."); + EM_REG_COUNTER(&pVCpu->em.s.StatHalted, "/PROF/CPU%d/EM/Halted", "Profiling halted state (VMR3WaitHalted)."); + EM_REG_PROFILE_ADV(&pVCpu->em.s.StatCapped, "/PROF/CPU%d/EM/Capped", "Profiling capped state (sleep)."); + EM_REG_COUNTER(&pVCpu->em.s.StatREMTotal, "/PROF/CPU%d/EM/REMTotal", "Profiling emR3RemExecute (excluding FFs)."); + EM_REG_COUNTER(&pVCpu->em.s.StatRAWTotal, "/PROF/CPU%d/EM/RAWTotal", "Profiling emR3RawExecute (excluding FFs)."); + + EM_REG_PROFILE_ADV(&pVCpu->em.s.StatTotal, "/PROF/CPU%d/EM/Total", "Profiling EMR3ExecuteVM."); + + rc = STAMR3RegisterF(pVM, &pVCpu->em.s.iNextExit, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, + "Number of recorded exits.", "/PROF/CPU%u/EM/RecordedExits", i); + AssertRC(rc); + + /* History record statistics */ + rc = STAMR3RegisterF(pVM, &pVCpu->em.s.cExitRecordUsed, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, + "Number of used hash table entries.", "/EM/CPU%u/ExitHashing/Used", i); + AssertRC(rc); + + for (uint32_t iStep = 0; iStep < RT_ELEMENTS(pVCpu->em.s.aStatHistoryRecHits); iStep++) + { + rc = STAMR3RegisterF(pVM, &pVCpu->em.s.aStatHistoryRecHits[iStep], STAMTYPE_COUNTER, STAMVISIBILITY_USED, STAMUNIT_OCCURENCES, + "Number of hits at this step.", "/EM/CPU%u/ExitHashing/Step%02u-Hits", i, iStep); + AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pVCpu->em.s.aStatHistoryRecTypeChanged[iStep], STAMTYPE_COUNTER, STAMVISIBILITY_USED, STAMUNIT_OCCURENCES, + "Number of type changes at this step.", "/EM/CPU%u/ExitHashing/Step%02u-TypeChanges", i, iStep); + AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pVCpu->em.s.aStatHistoryRecTypeChanged[iStep], STAMTYPE_COUNTER, STAMVISIBILITY_USED, STAMUNIT_OCCURENCES, + "Number of replacments at this step.", "/EM/CPU%u/ExitHashing/Step%02u-Replacments", i, iStep); + AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pVCpu->em.s.aStatHistoryRecNew[iStep], STAMTYPE_COUNTER, STAMVISIBILITY_USED, STAMUNIT_OCCURENCES, + "Number of new inserts at this step.", "/EM/CPU%u/ExitHashing/Step%02u-NewInserts", i, iStep); + AssertRC(rc); + } + + EM_REG_PROFILE(&pVCpu->em.s.StatHistoryExec, "/EM/CPU%d/ExitOpt/Exec", "Profiling normal EMHistoryExec operation."); + EM_REG_COUNTER(&pVCpu->em.s.StatHistoryExecSavedExits, "/EM/CPU%d/ExitOpt/ExecSavedExit", "Net number of saved exits."); + EM_REG_COUNTER(&pVCpu->em.s.StatHistoryExecInstructions, "/EM/CPU%d/ExitOpt/ExecInstructions", "Number of instructions executed during normal operation."); + EM_REG_PROFILE(&pVCpu->em.s.StatHistoryProbe, "/EM/CPU%d/ExitOpt/Probe", "Profiling EMHistoryExec when probing."); + EM_REG_COUNTER(&pVCpu->em.s.StatHistoryProbeInstructions, "/EM/CPU%d/ExitOpt/ProbeInstructions", "Number of instructions executed during probing."); + EM_REG_COUNTER(&pVCpu->em.s.StatHistoryProbedNormal, "/EM/CPU%d/ExitOpt/ProbedNormal", "Number of EMEXITACTION_NORMAL_PROBED results."); + EM_REG_COUNTER(&pVCpu->em.s.StatHistoryProbedExecWithMax, "/EM/CPU%d/ExitOpt/ProbedExecWithMax", "Number of EMEXITACTION_EXEC_WITH_MAX results."); + EM_REG_COUNTER(&pVCpu->em.s.StatHistoryProbedToRing3, "/EM/CPU%d/ExitOpt/ProbedToRing3", "Number of ring-3 probe continuations."); + } + + emR3InitDbg(pVM); + return VINF_SUCCESS; +} + + +/** + * Called when a VM initialization stage is completed. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param enmWhat The initialization state that was completed. + */ +VMMR3_INT_DECL(int) EMR3InitCompleted(PVM pVM, VMINITCOMPLETED enmWhat) +{ + if (enmWhat == VMINITCOMPLETED_RING0) + LogRel(("EM: Exit history optimizations: enabled=%RTbool enabled-r0=%RTbool enabled-r0-no-preemption=%RTbool\n", + pVM->aCpus[0].em.s.fExitOptimizationEnabled, pVM->aCpus[0].em.s.fExitOptimizationEnabledR0, + pVM->aCpus[0].em.s.fExitOptimizationEnabledR0PreemptDisabled)); + return VINF_SUCCESS; +} + + +/** + * Applies relocations to data and code managed by this + * component. This function will be called at init and + * whenever the VMM need to relocate it self inside the GC. + * + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(void) EMR3Relocate(PVM pVM) +{ + LogFlow(("EMR3Relocate\n")); + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + if (pVCpu->em.s.pStatsR3) + pVCpu->em.s.pStatsRC = MMHyperR3ToRC(pVM, pVCpu->em.s.pStatsR3); + } +} + + +/** + * Reset the EM state for a CPU. + * + * Called by EMR3Reset and hot plugging. + * + * @param pVCpu The cross context virtual CPU structure. + */ +VMMR3_INT_DECL(void) EMR3ResetCpu(PVMCPU pVCpu) +{ + /* Reset scheduling state. */ + pVCpu->em.s.fForceRAW = false; + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_UNHALT); + + /* VMR3ResetFF may return VINF_EM_RESET or VINF_EM_SUSPEND, so transition + out of the HALTED state here so that enmPrevState doesn't end up as + HALTED when EMR3Execute returns. */ + if (pVCpu->em.s.enmState == EMSTATE_HALTED) + { + Log(("EMR3ResetCpu: Cpu#%u %s -> %s\n", pVCpu->idCpu, emR3GetStateName(pVCpu->em.s.enmState), pVCpu->idCpu == 0 ? "EMSTATE_NONE" : "EMSTATE_WAIT_SIPI")); + pVCpu->em.s.enmState = pVCpu->idCpu == 0 ? EMSTATE_NONE : EMSTATE_WAIT_SIPI; + } +} + + +/** + * Reset notification. + * + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(void) EMR3Reset(PVM pVM) +{ + Log(("EMR3Reset: \n")); + for (VMCPUID i = 0; i < pVM->cCpus; i++) + EMR3ResetCpu(&pVM->aCpus[i]); +} + + +/** + * Terminates the EM. + * + * Termination means cleaning up and freeing all resources, + * the VM it self is at this point powered off or suspended. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) EMR3Term(PVM pVM) +{ + AssertMsg(pVM->em.s.offVM, ("bad init order!\n")); + +#ifdef VBOX_WITH_REM + PDMR3CritSectDelete(&pVM->em.s.CritSectREM); +#else + RT_NOREF(pVM); +#endif + return VINF_SUCCESS; +} + + +/** + * Execute state save operation. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM SSM operation handle. + */ +static DECLCALLBACK(int) emR3Save(PVM pVM, PSSMHANDLE pSSM) +{ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + + SSMR3PutBool(pSSM, pVCpu->em.s.fForceRAW); + + Assert(pVCpu->em.s.enmState == EMSTATE_SUSPENDED); + Assert(pVCpu->em.s.enmPrevState != EMSTATE_SUSPENDED); + SSMR3PutU32(pSSM, pVCpu->em.s.enmPrevState); + + /* Save mwait state. */ + SSMR3PutU32(pSSM, pVCpu->em.s.MWait.fWait); + SSMR3PutGCPtr(pSSM, pVCpu->em.s.MWait.uMWaitRAX); + SSMR3PutGCPtr(pSSM, pVCpu->em.s.MWait.uMWaitRCX); + SSMR3PutGCPtr(pSSM, pVCpu->em.s.MWait.uMonitorRAX); + SSMR3PutGCPtr(pSSM, pVCpu->em.s.MWait.uMonitorRCX); + int rc = SSMR3PutGCPtr(pSSM, pVCpu->em.s.MWait.uMonitorRDX); + AssertRCReturn(rc, rc); + } + return VINF_SUCCESS; +} + + +/** + * Execute state load operation. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM SSM operation handle. + * @param uVersion Data layout version. + * @param uPass The data pass. + */ +static DECLCALLBACK(int) emR3Load(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + /* + * Validate version. + */ + if ( uVersion > EM_SAVED_STATE_VERSION + || uVersion < EM_SAVED_STATE_VERSION_PRE_SMP) + { + AssertMsgFailed(("emR3Load: Invalid version uVersion=%d (current %d)!\n", uVersion, EM_SAVED_STATE_VERSION)); + return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION; + } + Assert(uPass == SSM_PASS_FINAL); NOREF(uPass); + + /* + * Load the saved state. + */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + + int rc = SSMR3GetBool(pSSM, &pVCpu->em.s.fForceRAW); + if (RT_FAILURE(rc)) + pVCpu->em.s.fForceRAW = false; + AssertRCReturn(rc, rc); + + if (uVersion > EM_SAVED_STATE_VERSION_PRE_SMP) + { + AssertCompile(sizeof(pVCpu->em.s.enmPrevState) == sizeof(uint32_t)); + rc = SSMR3GetU32(pSSM, (uint32_t *)&pVCpu->em.s.enmPrevState); + AssertRCReturn(rc, rc); + Assert(pVCpu->em.s.enmPrevState != EMSTATE_SUSPENDED); + + pVCpu->em.s.enmState = EMSTATE_SUSPENDED; + } + if (uVersion > EM_SAVED_STATE_VERSION_PRE_MWAIT) + { + /* Load mwait state. */ + rc = SSMR3GetU32(pSSM, &pVCpu->em.s.MWait.fWait); + AssertRCReturn(rc, rc); + rc = SSMR3GetGCPtr(pSSM, &pVCpu->em.s.MWait.uMWaitRAX); + AssertRCReturn(rc, rc); + rc = SSMR3GetGCPtr(pSSM, &pVCpu->em.s.MWait.uMWaitRCX); + AssertRCReturn(rc, rc); + rc = SSMR3GetGCPtr(pSSM, &pVCpu->em.s.MWait.uMonitorRAX); + AssertRCReturn(rc, rc); + rc = SSMR3GetGCPtr(pSSM, &pVCpu->em.s.MWait.uMonitorRCX); + AssertRCReturn(rc, rc); + rc = SSMR3GetGCPtr(pSSM, &pVCpu->em.s.MWait.uMonitorRDX); + AssertRCReturn(rc, rc); + } + + Assert(!pVCpu->em.s.pCliStatTree); + } + return VINF_SUCCESS; +} + + +/** + * Argument packet for emR3SetExecutionPolicy. + */ +struct EMR3SETEXECPOLICYARGS +{ + EMEXECPOLICY enmPolicy; + bool fEnforce; +}; + + +/** + * @callback_method_impl{FNVMMEMTRENDEZVOUS, Rendezvous callback for EMR3SetExecutionPolicy.} + */ +static DECLCALLBACK(VBOXSTRICTRC) emR3SetExecutionPolicy(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + /* + * Only the first CPU changes the variables. + */ + if (pVCpu->idCpu == 0) + { + struct EMR3SETEXECPOLICYARGS *pArgs = (struct EMR3SETEXECPOLICYARGS *)pvUser; + switch (pArgs->enmPolicy) + { + case EMEXECPOLICY_RECOMPILE_RING0: + pVM->fRecompileSupervisor = pArgs->fEnforce; + break; + case EMEXECPOLICY_RECOMPILE_RING3: + pVM->fRecompileUser = pArgs->fEnforce; + break; + case EMEXECPOLICY_IEM_ALL: + pVM->em.s.fIemExecutesAll = pArgs->fEnforce; + break; + default: + AssertFailedReturn(VERR_INVALID_PARAMETER); + } + Log(("EM: Set execution policy (fRecompileUser=%RTbool fRecompileSupervisor=%RTbool fIemExecutesAll=%RTbool)\n", + pVM->fRecompileUser, pVM->fRecompileSupervisor, pVM->em.s.fIemExecutesAll)); + } + + /* + * Force rescheduling if in RAW, HM, NEM, IEM, or REM. + */ + return pVCpu->em.s.enmState == EMSTATE_RAW + || pVCpu->em.s.enmState == EMSTATE_HM + || pVCpu->em.s.enmState == EMSTATE_NEM + || pVCpu->em.s.enmState == EMSTATE_IEM + || pVCpu->em.s.enmState == EMSTATE_REM + || pVCpu->em.s.enmState == EMSTATE_IEM_THEN_REM + ? VINF_EM_RESCHEDULE + : VINF_SUCCESS; +} + + +/** + * Changes an execution scheduling policy parameter. + * + * This is used to enable or disable raw-mode / hardware-virtualization + * execution of user and supervisor code. + * + * @returns VINF_SUCCESS on success. + * @returns VINF_RESCHEDULE if a rescheduling might be required. + * @returns VERR_INVALID_PARAMETER on an invalid enmMode value. + * + * @param pUVM The user mode VM handle. + * @param enmPolicy The scheduling policy to change. + * @param fEnforce Whether to enforce the policy or not. + */ +VMMR3DECL(int) EMR3SetExecutionPolicy(PUVM pUVM, EMEXECPOLICY enmPolicy, bool fEnforce) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, VERR_INVALID_VM_HANDLE); + AssertReturn(enmPolicy > EMEXECPOLICY_INVALID && enmPolicy < EMEXECPOLICY_END, VERR_INVALID_PARAMETER); + + struct EMR3SETEXECPOLICYARGS Args = { enmPolicy, fEnforce }; + return VMMR3EmtRendezvous(pUVM->pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_DESCENDING, emR3SetExecutionPolicy, &Args); +} + + +/** + * Queries an execution scheduling policy parameter. + * + * @returns VBox status code + * @param pUVM The user mode VM handle. + * @param enmPolicy The scheduling policy to query. + * @param pfEnforced Where to return the current value. + */ +VMMR3DECL(int) EMR3QueryExecutionPolicy(PUVM pUVM, EMEXECPOLICY enmPolicy, bool *pfEnforced) +{ + AssertReturn(enmPolicy > EMEXECPOLICY_INVALID && enmPolicy < EMEXECPOLICY_END, VERR_INVALID_PARAMETER); + AssertPtrReturn(pfEnforced, VERR_INVALID_POINTER); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + /* No need to bother EMTs with a query. */ + switch (enmPolicy) + { + case EMEXECPOLICY_RECOMPILE_RING0: + *pfEnforced = pVM->fRecompileSupervisor; + break; + case EMEXECPOLICY_RECOMPILE_RING3: + *pfEnforced = pVM->fRecompileUser; + break; + case EMEXECPOLICY_IEM_ALL: + *pfEnforced = pVM->em.s.fIemExecutesAll; + break; + default: + AssertFailedReturn(VERR_INTERNAL_ERROR_2); + } + + return VINF_SUCCESS; +} + + +/** + * Queries the main execution engine of the VM. + * + * @returns VBox status code + * @param pUVM The user mode VM handle. + * @param pbMainExecutionEngine Where to return the result, VM_EXEC_ENGINE_XXX. + */ +VMMR3DECL(int) EMR3QueryMainExecutionEngine(PUVM pUVM, uint8_t *pbMainExecutionEngine) +{ + AssertPtrReturn(pbMainExecutionEngine, VERR_INVALID_POINTER); + *pbMainExecutionEngine = VM_EXEC_ENGINE_NOT_SET; + + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + *pbMainExecutionEngine = pVM->bMainExecutionEngine; + return VINF_SUCCESS; +} + + +/** + * Raise a fatal error. + * + * Safely terminate the VM with full state report and stuff. This function + * will naturally never return. + * + * @param pVCpu The cross context virtual CPU structure. + * @param rc VBox status code. + */ +VMMR3DECL(void) EMR3FatalError(PVMCPU pVCpu, int rc) +{ + pVCpu->em.s.enmState = EMSTATE_GURU_MEDITATION; + longjmp(pVCpu->em.s.u.FatalLongJump, rc); +} + + +#if defined(LOG_ENABLED) || defined(VBOX_STRICT) +/** + * Gets the EM state name. + * + * @returns pointer to read only state name, + * @param enmState The state. + */ +static const char *emR3GetStateName(EMSTATE enmState) +{ + switch (enmState) + { + case EMSTATE_NONE: return "EMSTATE_NONE"; + case EMSTATE_RAW: return "EMSTATE_RAW"; + case EMSTATE_HM: return "EMSTATE_HM"; + case EMSTATE_IEM: return "EMSTATE_IEM"; + case EMSTATE_REM: return "EMSTATE_REM"; + case EMSTATE_HALTED: return "EMSTATE_HALTED"; + case EMSTATE_WAIT_SIPI: return "EMSTATE_WAIT_SIPI"; + case EMSTATE_SUSPENDED: return "EMSTATE_SUSPENDED"; + case EMSTATE_TERMINATING: return "EMSTATE_TERMINATING"; + case EMSTATE_DEBUG_GUEST_RAW: return "EMSTATE_DEBUG_GUEST_RAW"; + case EMSTATE_DEBUG_GUEST_HM: return "EMSTATE_DEBUG_GUEST_HM"; + case EMSTATE_DEBUG_GUEST_IEM: return "EMSTATE_DEBUG_GUEST_IEM"; + case EMSTATE_DEBUG_GUEST_REM: return "EMSTATE_DEBUG_GUEST_REM"; + case EMSTATE_DEBUG_HYPER: return "EMSTATE_DEBUG_HYPER"; + case EMSTATE_GURU_MEDITATION: return "EMSTATE_GURU_MEDITATION"; + case EMSTATE_IEM_THEN_REM: return "EMSTATE_IEM_THEN_REM"; + case EMSTATE_NEM: return "EMSTATE_NEM"; + case EMSTATE_DEBUG_GUEST_NEM: return "EMSTATE_DEBUG_GUEST_NEM"; + default: return "Unknown!"; + } +} +#endif /* LOG_ENABLED || VBOX_STRICT */ + + +/** + * Handle pending ring-3 I/O port write. + * + * This is in response to a VINF_EM_PENDING_R3_IOPORT_WRITE status code returned + * by EMRZSetPendingIoPortWrite() in ring-0 or raw-mode context. + * + * @returns Strict VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +VBOXSTRICTRC emR3ExecutePendingIoPortWrite(PVM pVM, PVMCPU pVCpu) +{ + CPUM_ASSERT_NOT_EXTRN(pVCpu, CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RFLAGS); + + /* Get and clear the pending data. */ + RTIOPORT const uPort = pVCpu->em.s.PendingIoPortAccess.uPort; + uint32_t const uValue = pVCpu->em.s.PendingIoPortAccess.uValue; + uint8_t const cbValue = pVCpu->em.s.PendingIoPortAccess.cbValue; + uint8_t const cbInstr = pVCpu->em.s.PendingIoPortAccess.cbInstr; + pVCpu->em.s.PendingIoPortAccess.cbValue = 0; + + /* Assert sanity. */ + switch (cbValue) + { + case 1: Assert(!(cbValue & UINT32_C(0xffffff00))); break; + case 2: Assert(!(cbValue & UINT32_C(0xffff0000))); break; + case 4: break; + default: AssertMsgFailedReturn(("cbValue=%#x\n", cbValue), VERR_EM_INTERNAL_ERROR); + } + AssertReturn(cbInstr <= 15 && cbInstr >= 1, VERR_EM_INTERNAL_ERROR); + + /* Do the work.*/ + VBOXSTRICTRC rcStrict = IOMIOPortWrite(pVM, pVCpu, uPort, uValue, cbValue); + LogFlow(("EM/OUT: %#x, %#x LB %u -> %Rrc\n", uPort, uValue, cbValue, VBOXSTRICTRC_VAL(rcStrict) )); + if (IOM_SUCCESS(rcStrict)) + { + pVCpu->cpum.GstCtx.rip += cbInstr; + pVCpu->cpum.GstCtx.rflags.Bits.u1RF = 0; + } + return rcStrict; +} + + +/** + * Handle pending ring-3 I/O port write. + * + * This is in response to a VINF_EM_PENDING_R3_IOPORT_WRITE status code returned + * by EMRZSetPendingIoPortRead() in ring-0 or raw-mode context. + * + * @returns Strict VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +VBOXSTRICTRC emR3ExecutePendingIoPortRead(PVM pVM, PVMCPU pVCpu) +{ + CPUM_ASSERT_NOT_EXTRN(pVCpu, CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_RAX); + + /* Get and clear the pending data. */ + RTIOPORT const uPort = pVCpu->em.s.PendingIoPortAccess.uPort; + uint8_t const cbValue = pVCpu->em.s.PendingIoPortAccess.cbValue; + uint8_t const cbInstr = pVCpu->em.s.PendingIoPortAccess.cbInstr; + pVCpu->em.s.PendingIoPortAccess.cbValue = 0; + + /* Assert sanity. */ + switch (cbValue) + { + case 1: break; + case 2: break; + case 4: break; + default: AssertMsgFailedReturn(("cbValue=%#x\n", cbValue), VERR_EM_INTERNAL_ERROR); + } + AssertReturn(pVCpu->em.s.PendingIoPortAccess.uValue == UINT32_C(0x52454144) /* READ*/, VERR_EM_INTERNAL_ERROR); + AssertReturn(cbInstr <= 15 && cbInstr >= 1, VERR_EM_INTERNAL_ERROR); + + /* Do the work.*/ + uint32_t uValue = 0; + VBOXSTRICTRC rcStrict = IOMIOPortRead(pVM, pVCpu, uPort, &uValue, cbValue); + LogFlow(("EM/IN: %#x LB %u -> %Rrc, %#x\n", uPort, cbValue, VBOXSTRICTRC_VAL(rcStrict), uValue )); + if (IOM_SUCCESS(rcStrict)) + { + if (cbValue == 4) + pVCpu->cpum.GstCtx.rax = uValue; + else if (cbValue == 2) + pVCpu->cpum.GstCtx.ax = (uint16_t)uValue; + else + pVCpu->cpum.GstCtx.al = (uint8_t)uValue; + pVCpu->cpum.GstCtx.rip += cbInstr; + pVCpu->cpum.GstCtx.rflags.Bits.u1RF = 0; + } + return rcStrict; +} + + +/** + * Debug loop. + * + * @returns VBox status code for EM. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param rc Current EM VBox status code. + */ +static VBOXSTRICTRC emR3Debug(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc) +{ + for (;;) + { + Log(("emR3Debug: rc=%Rrc\n", VBOXSTRICTRC_VAL(rc))); + const VBOXSTRICTRC rcLast = rc; + + /* + * Debug related RC. + */ + switch (VBOXSTRICTRC_VAL(rc)) + { + /* + * Single step an instruction. + */ + case VINF_EM_DBG_STEP: + if ( pVCpu->em.s.enmState == EMSTATE_DEBUG_GUEST_RAW + || pVCpu->em.s.enmState == EMSTATE_DEBUG_HYPER + || pVCpu->em.s.fForceRAW /* paranoia */) +#ifdef VBOX_WITH_RAW_MODE + rc = emR3RawStep(pVM, pVCpu); +#else + AssertLogRelMsgFailedStmt(("Bad EM state."), VERR_EM_INTERNAL_ERROR); +#endif + else if (pVCpu->em.s.enmState == EMSTATE_DEBUG_GUEST_HM) + rc = EMR3HmSingleInstruction(pVM, pVCpu, 0 /*fFlags*/); + else if (pVCpu->em.s.enmState == EMSTATE_DEBUG_GUEST_NEM) + rc = VBOXSTRICTRC_TODO(emR3NemSingleInstruction(pVM, pVCpu, 0 /*fFlags*/)); +#ifdef VBOX_WITH_REM + else if (pVCpu->em.s.enmState == EMSTATE_DEBUG_GUEST_REM) + rc = emR3RemStep(pVM, pVCpu); +#endif + else + { + rc = IEMExecOne(pVCpu); /** @todo add dedicated interface... */ + if (rc == VINF_SUCCESS || rc == VINF_EM_RESCHEDULE) + rc = VINF_EM_DBG_STEPPED; + } + break; + + /* + * Simple events: stepped, breakpoint, stop/assertion. + */ + case VINF_EM_DBG_STEPPED: + rc = DBGFR3Event(pVM, DBGFEVENT_STEPPED); + break; + + case VINF_EM_DBG_BREAKPOINT: + rc = DBGFR3EventBreakpoint(pVM, DBGFEVENT_BREAKPOINT); + break; + + case VINF_EM_DBG_STOP: + rc = DBGFR3EventSrc(pVM, DBGFEVENT_DEV_STOP, NULL, 0, NULL, NULL); + break; + + case VINF_EM_DBG_EVENT: + rc = DBGFR3EventHandlePending(pVM, pVCpu); + break; + + case VINF_EM_DBG_HYPER_STEPPED: + rc = DBGFR3Event(pVM, DBGFEVENT_STEPPED_HYPER); + break; + + case VINF_EM_DBG_HYPER_BREAKPOINT: + rc = DBGFR3EventBreakpoint(pVM, DBGFEVENT_BREAKPOINT_HYPER); + break; + + case VINF_EM_DBG_HYPER_ASSERTION: + RTPrintf("\nVINF_EM_DBG_HYPER_ASSERTION:\n%s%s\n", VMMR3GetRZAssertMsg1(pVM), VMMR3GetRZAssertMsg2(pVM)); + RTLogFlush(NULL); + rc = DBGFR3EventAssertion(pVM, DBGFEVENT_ASSERTION_HYPER, VMMR3GetRZAssertMsg1(pVM), VMMR3GetRZAssertMsg2(pVM)); + break; + + /* + * Guru meditation. + */ + case VERR_VMM_RING0_ASSERTION: /** @todo Make a guru meditation event! */ + rc = DBGFR3EventSrc(pVM, DBGFEVENT_FATAL_ERROR, "VERR_VMM_RING0_ASSERTION", 0, NULL, NULL); + break; + case VERR_REM_TOO_MANY_TRAPS: /** @todo Make a guru meditation event! */ + rc = DBGFR3EventSrc(pVM, DBGFEVENT_DEV_STOP, "VERR_REM_TOO_MANY_TRAPS", 0, NULL, NULL); + break; + case VINF_EM_TRIPLE_FAULT: /** @todo Make a guru meditation event! */ + rc = DBGFR3EventSrc(pVM, DBGFEVENT_DEV_STOP, "VINF_EM_TRIPLE_FAULT", 0, NULL, NULL); + break; + + default: /** @todo don't use default for guru, but make special errors code! */ + { + LogRel(("emR3Debug: rc=%Rrc\n", VBOXSTRICTRC_VAL(rc))); + rc = DBGFR3Event(pVM, DBGFEVENT_FATAL_ERROR); + break; + } + } + + /* + * Process the result. + */ + switch (VBOXSTRICTRC_VAL(rc)) + { + /* + * Continue the debugging loop. + */ + case VINF_EM_DBG_STEP: + case VINF_EM_DBG_STOP: + case VINF_EM_DBG_EVENT: + case VINF_EM_DBG_STEPPED: + case VINF_EM_DBG_BREAKPOINT: + case VINF_EM_DBG_HYPER_STEPPED: + case VINF_EM_DBG_HYPER_BREAKPOINT: + case VINF_EM_DBG_HYPER_ASSERTION: + break; + + /* + * Resuming execution (in some form) has to be done here if we got + * a hypervisor debug event. + */ + case VINF_SUCCESS: + case VINF_EM_RESUME: + case VINF_EM_SUSPEND: + case VINF_EM_RESCHEDULE: + case VINF_EM_RESCHEDULE_RAW: + case VINF_EM_RESCHEDULE_REM: + case VINF_EM_HALT: + if (pVCpu->em.s.enmState == EMSTATE_DEBUG_HYPER) + { +#ifdef VBOX_WITH_RAW_MODE + rc = emR3RawResumeHyper(pVM, pVCpu); + if (rc != VINF_SUCCESS && RT_SUCCESS(rc)) + continue; +#else + AssertLogRelMsgFailedReturn(("Not implemented\n"), VERR_EM_INTERNAL_ERROR); +#endif + } + if (rc == VINF_SUCCESS) + rc = VINF_EM_RESCHEDULE; + return rc; + + /* + * The debugger isn't attached. + * We'll simply turn the thing off since that's the easiest thing to do. + */ + case VERR_DBGF_NOT_ATTACHED: + switch (VBOXSTRICTRC_VAL(rcLast)) + { + case VINF_EM_DBG_HYPER_STEPPED: + case VINF_EM_DBG_HYPER_BREAKPOINT: + case VINF_EM_DBG_HYPER_ASSERTION: + case VERR_TRPM_PANIC: + case VERR_TRPM_DONT_PANIC: + case VERR_VMM_RING0_ASSERTION: + case VERR_VMM_HYPER_CR3_MISMATCH: + case VERR_VMM_RING3_CALL_DISABLED: + return rcLast; + } + return VINF_EM_OFF; + + /* + * Status codes terminating the VM in one or another sense. + */ + case VINF_EM_TERMINATE: + case VINF_EM_OFF: + case VINF_EM_RESET: + case VINF_EM_NO_MEMORY: + case VINF_EM_RAW_STALE_SELECTOR: + case VINF_EM_RAW_IRET_TRAP: + case VERR_TRPM_PANIC: + case VERR_TRPM_DONT_PANIC: + case VERR_IEM_INSTR_NOT_IMPLEMENTED: + case VERR_IEM_ASPECT_NOT_IMPLEMENTED: + case VERR_VMM_RING0_ASSERTION: + case VERR_VMM_HYPER_CR3_MISMATCH: + case VERR_VMM_RING3_CALL_DISABLED: + case VERR_INTERNAL_ERROR: + case VERR_INTERNAL_ERROR_2: + case VERR_INTERNAL_ERROR_3: + case VERR_INTERNAL_ERROR_4: + case VERR_INTERNAL_ERROR_5: + case VERR_IPE_UNEXPECTED_STATUS: + case VERR_IPE_UNEXPECTED_INFO_STATUS: + case VERR_IPE_UNEXPECTED_ERROR_STATUS: + return rc; + + /* + * The rest is unexpected, and will keep us here. + */ + default: + AssertMsgFailed(("Unexpected rc %Rrc!\n", VBOXSTRICTRC_VAL(rc))); + break; + } + } /* debug for ever */ +} + + +#if defined(VBOX_WITH_REM) || defined(DEBUG) +/** + * Steps recompiled code. + * + * @returns VBox status code. The most important ones are: VINF_EM_STEP_EVENT, + * VINF_EM_RESCHEDULE, VINF_EM_SUSPEND, VINF_EM_RESET and VINF_EM_TERMINATE. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +static int emR3RemStep(PVM pVM, PVMCPU pVCpu) +{ + Log3(("emR3RemStep: cs:eip=%04x:%08x\n", CPUMGetGuestCS(pVCpu), CPUMGetGuestEIP(pVCpu))); + +# ifdef VBOX_WITH_REM + EMRemLock(pVM); + + /* + * Switch to REM, step instruction, switch back. + */ + int rc = REMR3State(pVM, pVCpu); + if (RT_SUCCESS(rc)) + { + rc = REMR3Step(pVM, pVCpu); + REMR3StateBack(pVM, pVCpu); + } + EMRemUnlock(pVM); + +# else + int rc = VBOXSTRICTRC_TODO(IEMExecOne(pVCpu)); NOREF(pVM); +# endif + + Log3(("emR3RemStep: returns %Rrc cs:eip=%04x:%08x\n", rc, CPUMGetGuestCS(pVCpu), CPUMGetGuestEIP(pVCpu))); + return rc; +} +#endif /* VBOX_WITH_REM || DEBUG */ + + +#ifdef VBOX_WITH_REM +/** + * emR3RemExecute helper that syncs the state back from REM and leave the REM + * critical section. + * + * @returns false - new fInREMState value. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +DECLINLINE(bool) emR3RemExecuteSyncBack(PVM pVM, PVMCPU pVCpu) +{ + STAM_PROFILE_START(&pVCpu->em.s.StatREMSync, a); + REMR3StateBack(pVM, pVCpu); + STAM_PROFILE_STOP(&pVCpu->em.s.StatREMSync, a); + + EMRemUnlock(pVM); + return false; +} +#endif + + +/** + * Executes recompiled code. + * + * This function contains the recompiler version of the inner + * execution loop (the outer loop being in EMR3ExecuteVM()). + * + * @returns VBox status code. The most important ones are: VINF_EM_RESCHEDULE, + * VINF_EM_SUSPEND, VINF_EM_RESET and VINF_EM_TERMINATE. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param pfFFDone Where to store an indicator telling whether or not + * FFs were done before returning. + * + */ +static int emR3RemExecute(PVM pVM, PVMCPU pVCpu, bool *pfFFDone) +{ +#ifdef LOG_ENABLED + uint32_t cpl = CPUMGetGuestCPL(pVCpu); + + if (pVCpu->cpum.GstCtx.eflags.Bits.u1VM) + Log(("EMV86: %04X:%08X IF=%d\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.eflags.Bits.u1IF)); + else + Log(("EMR%d: %04X:%08X ESP=%08X IF=%d CR0=%x eflags=%x\n", cpl, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.eflags.Bits.u1IF, (uint32_t)pVCpu->cpum.GstCtx.cr0, pVCpu->cpum.GstCtx.eflags.u)); +#endif + STAM_REL_PROFILE_ADV_START(&pVCpu->em.s.StatREMTotal, a); + +#if defined(VBOX_STRICT) && defined(DEBUG_bird) + AssertMsg( VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL) + || !MMHyperIsInsideArea(pVM, CPUMGetGuestEIP(pVCpu)), /** @todo @bugref{1419} - get flat address. */ + ("cs:eip=%RX16:%RX32\n", CPUMGetGuestCS(pVCpu), CPUMGetGuestEIP(pVCpu))); +#endif + + /* + * Spin till we get a forced action which returns anything but VINF_SUCCESS + * or the REM suggests raw-mode execution. + */ + *pfFFDone = false; +#ifdef VBOX_WITH_REM + bool fInREMState = false; +#else + uint32_t cLoops = 0; +#endif + int rc = VINF_SUCCESS; + for (;;) + { +#ifdef VBOX_WITH_REM + /* + * Lock REM and update the state if not already in sync. + * + * Note! Big lock, but you are not supposed to own any lock when + * coming in here. + */ + if (!fInREMState) + { + EMRemLock(pVM); + STAM_PROFILE_START(&pVCpu->em.s.StatREMSync, b); + + /* Flush the recompiler translation blocks if the VCPU has changed, + also force a full CPU state resync. */ + if (pVM->em.s.idLastRemCpu != pVCpu->idCpu) + { + REMFlushTBs(pVM); + CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_ALL); + } + pVM->em.s.idLastRemCpu = pVCpu->idCpu; + + rc = REMR3State(pVM, pVCpu); + + STAM_PROFILE_STOP(&pVCpu->em.s.StatREMSync, b); + if (RT_FAILURE(rc)) + break; + fInREMState = true; + + /* + * We might have missed the raising of VMREQ, TIMER and some other + * important FFs while we were busy switching the state. So, check again. + */ + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_REQUEST | VM_FF_PDM_QUEUES | VM_FF_DBGF | VM_FF_CHECK_VM_STATE | VM_FF_RESET) + || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_TIMER | VMCPU_FF_REQUEST)) + { + LogFlow(("emR3RemExecute: Skipping run, because FF is set. %#x\n", pVM->fGlobalForcedActions)); + goto l_REMDoForcedActions; + } + } +#endif + + /* + * Execute REM. + */ + if (RT_LIKELY(emR3IsExecutionAllowed(pVM, pVCpu))) + { + STAM_PROFILE_START(&pVCpu->em.s.StatREMExec, c); +#ifdef VBOX_WITH_REM + rc = REMR3Run(pVM, pVCpu); +#else + rc = VBOXSTRICTRC_TODO(IEMExecLots(pVCpu, NULL /*pcInstructions*/)); +#endif + STAM_PROFILE_STOP(&pVCpu->em.s.StatREMExec, c); + } + else + { + /* Give up this time slice; virtual time continues */ + STAM_REL_PROFILE_ADV_START(&pVCpu->em.s.StatCapped, u); + RTThreadSleep(5); + STAM_REL_PROFILE_ADV_STOP(&pVCpu->em.s.StatCapped, u); + rc = VINF_SUCCESS; + } + + /* + * Deal with high priority post execution FFs before doing anything + * else. Sync back the state and leave the lock to be on the safe side. + */ + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_HIGH_PRIORITY_POST_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HIGH_PRIORITY_POST_MASK)) + { +#ifdef VBOX_WITH_REM + fInREMState = emR3RemExecuteSyncBack(pVM, pVCpu); +#endif + rc = VBOXSTRICTRC_TODO(emR3HighPriorityPostForcedActions(pVM, pVCpu, rc)); + } + + /* + * Process the returned status code. + */ + if (rc != VINF_SUCCESS) + { + if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST) + break; + if (rc != VINF_REM_INTERRUPED_FF) + { +#ifndef VBOX_WITH_REM + /* Try dodge unimplemented IEM trouble by reschduling. */ + if ( rc == VERR_IEM_ASPECT_NOT_IMPLEMENTED + || rc == VERR_IEM_INSTR_NOT_IMPLEMENTED) + { + EMSTATE enmNewState = emR3Reschedule(pVM, pVCpu); + if (enmNewState != EMSTATE_REM && enmNewState != EMSTATE_IEM_THEN_REM) + { + rc = VINF_EM_RESCHEDULE; + break; + } + } +#endif + + /* + * Anything which is not known to us means an internal error + * and the termination of the VM! + */ + AssertMsg(rc == VERR_REM_TOO_MANY_TRAPS, ("Unknown GC return code: %Rra\n", rc)); + break; + } + } + + + /* + * Check and execute forced actions. + * + * Sync back the VM state and leave the lock before calling any of + * these, you never know what's going to happen here. + */ +#ifdef VBOX_HIGH_RES_TIMERS_HACK + TMTimerPollVoid(pVM, pVCpu); +#endif + AssertCompile(VMCPU_FF_ALL_REM_MASK & VMCPU_FF_TIMER); + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_REM_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, + VMCPU_FF_ALL_REM_MASK + & VM_WHEN_RAW_MODE(~(VMCPU_FF_CSAM_PENDING_ACTION | VMCPU_FF_CSAM_SCAN_PAGE), UINT32_MAX)) ) + { +#ifdef VBOX_WITH_REM +l_REMDoForcedActions: + if (fInREMState) + fInREMState = emR3RemExecuteSyncBack(pVM, pVCpu); +#endif + STAM_REL_PROFILE_ADV_SUSPEND(&pVCpu->em.s.StatREMTotal, a); + rc = emR3ForcedActions(pVM, pVCpu, rc); + VBOXVMM_EM_FF_ALL_RET(pVCpu, rc); + STAM_REL_PROFILE_ADV_RESUME(&pVCpu->em.s.StatREMTotal, a); + if ( rc != VINF_SUCCESS + && rc != VINF_EM_RESCHEDULE_REM) + { + *pfFFDone = true; + break; + } + } + +#ifndef VBOX_WITH_REM + /* + * Have to check if we can get back to fast execution mode every so often. + */ + if (!(++cLoops & 7)) + { + EMSTATE enmCheck = emR3Reschedule(pVM, pVCpu); + if ( enmCheck != EMSTATE_REM + && enmCheck != EMSTATE_IEM_THEN_REM) + return VINF_EM_RESCHEDULE; + } +#endif + + } /* The Inner Loop, recompiled execution mode version. */ + + +#ifdef VBOX_WITH_REM + /* + * Returning. Sync back the VM state if required. + */ + if (fInREMState) + fInREMState = emR3RemExecuteSyncBack(pVM, pVCpu); +#endif + + STAM_REL_PROFILE_ADV_STOP(&pVCpu->em.s.StatREMTotal, a); + return rc; +} + + +#ifdef DEBUG + +int emR3SingleStepExecRem(PVM pVM, PVMCPU pVCpu, uint32_t cIterations) +{ + EMSTATE enmOldState = pVCpu->em.s.enmState; + + pVCpu->em.s.enmState = EMSTATE_DEBUG_GUEST_REM; + + Log(("Single step BEGIN:\n")); + for (uint32_t i = 0; i < cIterations; i++) + { + DBGFR3PrgStep(pVCpu); + DBGFR3_DISAS_INSTR_CUR_LOG(pVCpu, "RSS"); + emR3RemStep(pVM, pVCpu); + if (emR3Reschedule(pVM, pVCpu) != EMSTATE_REM) + break; + } + Log(("Single step END:\n")); + CPUMSetGuestEFlags(pVCpu, CPUMGetGuestEFlags(pVCpu) & ~X86_EFL_TF); + pVCpu->em.s.enmState = enmOldState; + return VINF_EM_RESCHEDULE; +} + +#endif /* DEBUG */ + + +/** + * Try execute the problematic code in IEM first, then fall back on REM if there + * is too much of it or if IEM doesn't implement something. + * + * @returns Strict VBox status code from IEMExecLots. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * @param pfFFDone Force flags done indicator. + * + * @thread EMT(pVCpu) + */ +static VBOXSTRICTRC emR3ExecuteIemThenRem(PVM pVM, PVMCPU pVCpu, bool *pfFFDone) +{ + LogFlow(("emR3ExecuteIemThenRem: %04x:%RGv\n", CPUMGetGuestCS(pVCpu), CPUMGetGuestRIP(pVCpu))); + *pfFFDone = false; + + /* + * Execute in IEM for a while. + */ + while (pVCpu->em.s.cIemThenRemInstructions < 1024) + { + uint32_t cInstructions; + VBOXSTRICTRC rcStrict = IEMExecLots(pVCpu, &cInstructions); + pVCpu->em.s.cIemThenRemInstructions += cInstructions; + if (rcStrict != VINF_SUCCESS) + { + if ( rcStrict == VERR_IEM_ASPECT_NOT_IMPLEMENTED + || rcStrict == VERR_IEM_INSTR_NOT_IMPLEMENTED) + break; + + Log(("emR3ExecuteIemThenRem: returns %Rrc after %u instructions\n", + VBOXSTRICTRC_VAL(rcStrict), pVCpu->em.s.cIemThenRemInstructions)); + return rcStrict; + } + + EMSTATE enmNewState = emR3Reschedule(pVM, pVCpu); + if (enmNewState != EMSTATE_REM && enmNewState != EMSTATE_IEM_THEN_REM) + { + LogFlow(("emR3ExecuteIemThenRem: -> %d (%s) after %u instructions\n", + enmNewState, emR3GetStateName(enmNewState), pVCpu->em.s.cIemThenRemInstructions)); + pVCpu->em.s.enmPrevState = pVCpu->em.s.enmState; + pVCpu->em.s.enmState = enmNewState; + return VINF_SUCCESS; + } + + /* + * Check for pending actions. + */ + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_REM_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_ALL_REM_MASK & ~VMCPU_FF_UNHALT)) + return VINF_SUCCESS; + } + + /* + * Switch to REM. + */ + Log(("emR3ExecuteIemThenRem: -> EMSTATE_REM (after %u instructions)\n", pVCpu->em.s.cIemThenRemInstructions)); + pVCpu->em.s.enmState = EMSTATE_REM; + return VINF_SUCCESS; +} + + +/** + * Decides whether to execute RAW, HWACC or REM. + * + * @returns new EM state + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +EMSTATE emR3Reschedule(PVM pVM, PVMCPU pVCpu) +{ + /* + * When forcing raw-mode execution, things are simple. + */ + if (pVCpu->em.s.fForceRAW) + return EMSTATE_RAW; + + /* + * We stay in the wait for SIPI state unless explicitly told otherwise. + */ + if (pVCpu->em.s.enmState == EMSTATE_WAIT_SIPI) + return EMSTATE_WAIT_SIPI; + + /* + * Execute everything in IEM? + */ + if (pVM->em.s.fIemExecutesAll) + return EMSTATE_IEM; + + /* !!! THIS MUST BE IN SYNC WITH remR3CanExecuteRaw !!! */ + /* !!! THIS MUST BE IN SYNC WITH remR3CanExecuteRaw !!! */ + /* !!! THIS MUST BE IN SYNC WITH remR3CanExecuteRaw !!! */ + + X86EFLAGS EFlags = pVCpu->cpum.GstCtx.eflags; + if (!VM_IS_RAW_MODE_ENABLED(pVM)) + { + if (EMIsHwVirtExecutionEnabled(pVM)) + { + if (VM_IS_HM_ENABLED(pVM)) + { + if (HMCanExecuteGuest(pVCpu, &pVCpu->cpum.GstCtx)) + return EMSTATE_HM; + } + else if (NEMR3CanExecuteGuest(pVM, pVCpu)) + return EMSTATE_NEM; + + /* + * Note! Raw mode and hw accelerated mode are incompatible. The latter + * turns off monitoring features essential for raw mode! + */ + return EMSTATE_IEM_THEN_REM; + } + } + + /* + * Standard raw-mode: + * + * Here we only support 16 & 32 bits protected mode ring 3 code that has no IO privileges + * or 32 bits protected mode ring 0 code + * + * The tests are ordered by the likelihood of being true during normal execution. + */ + if (EFlags.u32 & (X86_EFL_TF /* | HF_INHIBIT_IRQ_MASK*/)) + { + Log2(("raw mode refused: EFlags=%#x\n", EFlags.u32)); + return EMSTATE_REM; + } + +# ifndef VBOX_RAW_V86 + if (EFlags.u32 & X86_EFL_VM) { + Log2(("raw mode refused: VM_MASK\n")); + return EMSTATE_REM; + } +# endif + + /** @todo check up the X86_CR0_AM flag in respect to raw mode!!! We're probably not emulating it right! */ + uint32_t u32CR0 = pVCpu->cpum.GstCtx.cr0; + if ((u32CR0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE)) + { + //Log2(("raw mode refused: %s%s%s\n", (u32CR0 & X86_CR0_PG) ? "" : " !PG", (u32CR0 & X86_CR0_PE) ? "" : " !PE", (u32CR0 & X86_CR0_AM) ? "" : " !AM")); + return EMSTATE_REM; + } + + if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_PAE) + { + uint32_t u32Dummy, u32Features; + + CPUMGetGuestCpuId(pVCpu, 1, 0, &u32Dummy, &u32Dummy, &u32Dummy, &u32Features); + if (!(u32Features & X86_CPUID_FEATURE_EDX_PAE)) + return EMSTATE_REM; + } + + unsigned uSS = pVCpu->cpum.GstCtx.ss.Sel; + if ( pVCpu->cpum.GstCtx.eflags.Bits.u1VM + || (uSS & X86_SEL_RPL) == 3) + { + if (!EMIsRawRing3Enabled(pVM)) + return EMSTATE_REM; + + if (!(EFlags.u32 & X86_EFL_IF)) + { + Log2(("raw mode refused: IF (RawR3)\n")); + return EMSTATE_REM; + } + + if (!(u32CR0 & X86_CR0_WP) && EMIsRawRing0Enabled(pVM)) + { + Log2(("raw mode refused: CR0.WP + RawR0\n")); + return EMSTATE_REM; + } + } + else + { + if (!EMIsRawRing0Enabled(pVM)) + return EMSTATE_REM; + + if (EMIsRawRing1Enabled(pVM)) + { + /* Only ring 0 and 1 supervisor code. */ + if ((uSS & X86_SEL_RPL) == 2) /* ring 1 code is moved into ring 2, so we can't support ring-2 in that case. */ + { + Log2(("raw r0 mode refused: CPL %d\n", uSS & X86_SEL_RPL)); + return EMSTATE_REM; + } + } + /* Only ring 0 supervisor code. */ + else if ((uSS & X86_SEL_RPL) != 0) + { + Log2(("raw r0 mode refused: CPL %d\n", uSS & X86_SEL_RPL)); + return EMSTATE_REM; + } + + // Let's start with pure 32 bits ring 0 code first + /** @todo What's pure 32-bit mode? flat? */ + if ( !(pVCpu->cpum.GstCtx.ss.Attr.n.u1DefBig) + || !(pVCpu->cpum.GstCtx.cs.Attr.n.u1DefBig)) + { + Log2(("raw r0 mode refused: SS/CS not 32bit\n")); + return EMSTATE_REM; + } + + /* Write protection must be turned on, or else the guest can overwrite our hypervisor code and data. */ + if (!(u32CR0 & X86_CR0_WP)) + { + Log2(("raw r0 mode refused: CR0.WP=0!\n")); + return EMSTATE_REM; + } + +# ifdef VBOX_WITH_RAW_MODE + if (PATMShouldUseRawMode(pVM, (RTGCPTR)pVCpu->cpum.GstCtx.eip)) + { + Log2(("raw r0 mode forced: patch code\n")); +# ifdef VBOX_WITH_SAFE_STR + Assert(pVCpu->cpum.GstCtx.tr.Sel); +# endif + return EMSTATE_RAW; + } +# endif /* VBOX_WITH_RAW_MODE */ + +# if !defined(VBOX_ALLOW_IF0) && !defined(VBOX_RUN_INTERRUPT_GATE_HANDLERS) + if (!(EFlags.u32 & X86_EFL_IF)) + { + ////Log2(("R0: IF=0 VIF=%d %08X\n", eip, pVMeflags)); + //Log2(("RR0: Interrupts turned off; fall back to emulation\n")); + return EMSTATE_REM; + } +# endif + +# ifndef VBOX_WITH_RAW_RING1 + /** @todo still necessary??? */ + if (EFlags.Bits.u2IOPL != 0) + { + Log2(("raw r0 mode refused: IOPL %d\n", EFlags.Bits.u2IOPL)); + return EMSTATE_REM; + } +# endif + } + + /* + * Stale hidden selectors means raw-mode is unsafe (being very careful). + */ + if (pVCpu->cpum.GstCtx.cs.fFlags & CPUMSELREG_FLAGS_STALE) + { + Log2(("raw mode refused: stale CS\n")); + return EMSTATE_REM; + } + if (pVCpu->cpum.GstCtx.ss.fFlags & CPUMSELREG_FLAGS_STALE) + { + Log2(("raw mode refused: stale SS\n")); + return EMSTATE_REM; + } + if (pVCpu->cpum.GstCtx.ds.fFlags & CPUMSELREG_FLAGS_STALE) + { + Log2(("raw mode refused: stale DS\n")); + return EMSTATE_REM; + } + if (pVCpu->cpum.GstCtx.es.fFlags & CPUMSELREG_FLAGS_STALE) + { + Log2(("raw mode refused: stale ES\n")); + return EMSTATE_REM; + } + if (pVCpu->cpum.GstCtx.fs.fFlags & CPUMSELREG_FLAGS_STALE) + { + Log2(("raw mode refused: stale FS\n")); + return EMSTATE_REM; + } + if (pVCpu->cpum.GstCtx.gs.fFlags & CPUMSELREG_FLAGS_STALE) + { + Log2(("raw mode refused: stale GS\n")); + return EMSTATE_REM; + } + +# ifdef VBOX_WITH_SAFE_STR + if (pVCpu->cpum.GstCtx.tr.Sel == 0) + { + Log(("Raw mode refused -> TR=0\n")); + return EMSTATE_REM; + } +# endif + + /*Assert(PGMPhysIsA20Enabled(pVCpu));*/ + return EMSTATE_RAW; +} + + +/** + * Executes all high priority post execution force actions. + * + * @returns Strict VBox status code. Typically @a rc, but may be upgraded to + * fatal error status code. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param rc The current strict VBox status code rc. + */ +VBOXSTRICTRC emR3HighPriorityPostForcedActions(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc) +{ + VBOXVMM_EM_FF_HIGH(pVCpu, pVM->fGlobalForcedActions, pVCpu->fLocalForcedActions, VBOXSTRICTRC_VAL(rc)); + + if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PDM_CRITSECT)) + PDMCritSectBothFF(pVCpu); + + /* Update CR3 (Nested Paging case for HM). */ + if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3)) + { + CPUM_IMPORT_EXTRN_RCSTRICT(pVCpu, CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_CR3 | CPUMCTX_EXTRN_CR4 | CPUMCTX_EXTRN_EFER, rc); + int rc2 = PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu)); + if (RT_FAILURE(rc2)) + return rc2; + Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3)); + } + + /* Update PAE PDPEs. This must be done *after* PGMUpdateCR3() and used only by the Nested Paging case for HM. */ + if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES)) + { + CPUM_IMPORT_EXTRN_RCSTRICT(pVCpu, CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_CR3 | CPUMCTX_EXTRN_CR4 | CPUMCTX_EXTRN_EFER, rc); + if (CPUMIsGuestInPAEMode(pVCpu)) + { + PX86PDPE pPdpes = HMGetPaePdpes(pVCpu); + AssertPtr(pPdpes); + + PGMGstUpdatePaePdpes(pVCpu, pPdpes); + Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES)); + } + else + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES); + } + + /* IEM has pending work (typically memory write after INS instruction). */ + if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_IEM)) + rc = IEMR3ProcessForceFlag(pVM, pVCpu, rc); + + /* IOM has pending work (comitting an I/O or MMIO write). */ + if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_IOM)) + { + rc = IOMR3ProcessForceFlag(pVM, pVCpu, rc); + if (pVCpu->em.s.idxContinueExitRec >= RT_ELEMENTS(pVCpu->em.s.aExitRecords)) + { /* half likely, or at least it's a line shorter. */ } + else if (rc == VINF_SUCCESS) + rc = VINF_EM_RESUME_R3_HISTORY_EXEC; + else + pVCpu->em.s.idxContinueExitRec = UINT16_MAX; + } + +#ifdef VBOX_WITH_NESTED_HWVIRT_VMX + /* + * VMX Nested-guest APIC-write pending (can cause VM-exits). + * Takes priority over even SMI and INIT signals. + * See Intel spec. 29.4.3.2 "APIC-Write Emulation". + */ + if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_VMX_APIC_WRITE)) + { + rc = VBOXSTRICTRC_VAL(IEMExecVmxVmexitApicWrite(pVCpu)); + Assert(rc != VINF_VMX_INTERCEPT_NOT_ACTIVE); + } +#endif + +#ifdef VBOX_WITH_RAW_MODE + if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_CSAM_PENDING_ACTION)) + CSAMR3DoPendingAction(pVM, pVCpu); +#endif + + if (VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)) + { + if ( rc > VINF_EM_NO_MEMORY + && rc <= VINF_EM_LAST) + rc = VINF_EM_NO_MEMORY; + } + + return rc; +} + + +/** + * Helper for emR3ForcedActions() for VMX interrupt-window VM-exit and VMX external + * interrupt VM-exit. + * + * @returns VBox status code. + * @param pVCpu The cross context virtual CPU structure. + */ +static int emR3VmxNstGstIntrIntercept(PVMCPU pVCpu) +{ +#ifdef VBOX_WITH_NESTED_HWVIRT_VMX + Assert(CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)); + if (CPUMIsGuestVmxProcCtlsSet(pVCpu, &pVCpu->cpum.GstCtx, VMX_PROC_CTLS_INT_WINDOW_EXIT)) + { + CPUM_IMPORT_EXTRN_RET(pVCpu, IEM_CPUMCTX_EXTRN_VMX_VMEXIT_MASK); + VBOXSTRICTRC rcStrict = IEMExecVmxVmexitIntWindow(pVCpu); + if (RT_SUCCESS(rcStrict)) + { + Assert(rcStrict != VINF_PGM_CHANGE_MODE); + Assert(rcStrict != VINF_VMX_VMEXIT); + return VBOXSTRICTRC_VAL(rcStrict); + } + AssertMsgFailed(("Interrupt-window Vm-exit failed! rc=%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); + return VINF_EM_TRIPLE_FAULT; + } + /* Handle the "external interrupt" VM-exit intercept. */ + else if (CPUMIsGuestVmxPinCtlsSet(pVCpu, &pVCpu->cpum.GstCtx, VMX_PIN_CTLS_EXT_INT_EXIT)) + { + VBOXSTRICTRC rcStrict = IEMExecVmxVmexitExtInt(pVCpu, 0 /* uVector */, true /* fIntPending */); + Assert(rcStrict != VINF_PGM_CHANGE_MODE); + Assert(rcStrict != VINF_VMX_VMEXIT); + if (rcStrict != VINF_VMX_INTERCEPT_NOT_ACTIVE) + return VBOXSTRICTRC_TODO(rcStrict); + } +#else + RT_NOREF(pVCpu); +#endif + return VINF_NO_CHANGE; +} + + +/** + * Helper for emR3ForcedActions() for SVM interrupt intercept. + * + * @returns VBox status code. + * @param pVCpu The cross context virtual CPU structure. + */ +static int emR3SvmNstGstIntrIntercept(PVMCPU pVCpu) +{ +#ifdef VBOX_WITH_NESTED_HWVIRT_SVM + Assert(CPUMIsGuestInSvmNestedHwVirtMode(&pVCpu->cpum.GstCtx)); + if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, &pVCpu->cpum.GstCtx, SVM_CTRL_INTERCEPT_INTR)) + { + CPUM_IMPORT_EXTRN_RET(pVCpu, IEM_CPUMCTX_EXTRN_SVM_VMEXIT_MASK); + VBOXSTRICTRC rcStrict = IEMExecSvmVmexit(pVCpu, SVM_EXIT_INTR, 0, 0); + if (RT_SUCCESS(rcStrict)) + { + Assert(rcStrict != VINF_PGM_CHANGE_MODE); + Assert(rcStrict != VINF_SVM_VMEXIT); + return VBOXSTRICTRC_VAL(rcStrict); + } + AssertMsgFailed(("INTR #VMEXIT failed! rc=%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); + return VINF_EM_TRIPLE_FAULT; + } +#else + NOREF(pVCpu); +#endif + return VINF_NO_CHANGE; +} + + +/** + * Helper for emR3ForcedActions() for SVM virtual interrupt intercept. + * + * @returns VBox status code. + * @param pVCpu The cross context virtual CPU structure. + */ +static int emR3SvmNstGstVirtIntrIntercept(PVMCPU pVCpu) +{ +#ifdef VBOX_WITH_NESTED_HWVIRT_SVM + if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, &pVCpu->cpum.GstCtx, SVM_CTRL_INTERCEPT_VINTR)) + { + CPUM_IMPORT_EXTRN_RET(pVCpu, IEM_CPUMCTX_EXTRN_SVM_VMEXIT_MASK); + VBOXSTRICTRC rcStrict = IEMExecSvmVmexit(pVCpu, SVM_EXIT_VINTR, 0, 0); + if (RT_SUCCESS(rcStrict)) + { + Assert(rcStrict != VINF_PGM_CHANGE_MODE); + Assert(rcStrict != VINF_SVM_VMEXIT); + return VBOXSTRICTRC_VAL(rcStrict); + } + AssertMsgFailed(("VINTR #VMEXIT failed! rc=%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); + return VINF_EM_TRIPLE_FAULT; + } +#else + NOREF(pVCpu); +#endif + return VINF_NO_CHANGE; +} + + +/** + * Executes all pending forced actions. + * + * Forced actions can cause execution delays and execution + * rescheduling. The first we deal with using action priority, so + * that for instance pending timers aren't scheduled and ran until + * right before execution. The rescheduling we deal with using + * return codes. The same goes for VM termination, only in that case + * we exit everything. + * + * @returns VBox status code of equal or greater importance/severity than rc. + * The most important ones are: VINF_EM_RESCHEDULE, + * VINF_EM_SUSPEND, VINF_EM_RESET and VINF_EM_TERMINATE. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param rc The current rc. + * + */ +int emR3ForcedActions(PVM pVM, PVMCPU pVCpu, int rc) +{ + STAM_REL_PROFILE_START(&pVCpu->em.s.StatForcedActions, a); +#ifdef VBOX_STRICT + int rcIrq = VINF_SUCCESS; +#endif + int rc2; +#define UPDATE_RC() \ + do { \ + AssertMsg(rc2 <= 0 || (rc2 >= VINF_EM_FIRST && rc2 <= VINF_EM_LAST), ("Invalid FF return code: %Rra\n", rc2)); \ + if (rc2 == VINF_SUCCESS || rc < VINF_SUCCESS) \ + break; \ + if (!rc || rc2 < rc) \ + rc = rc2; \ + } while (0) + VBOXVMM_EM_FF_ALL(pVCpu, pVM->fGlobalForcedActions, pVCpu->fLocalForcedActions, rc); + + /* + * Post execution chunk first. + */ + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_NORMAL_PRIORITY_POST_MASK) + || (VMCPU_FF_NORMAL_PRIORITY_POST_MASK && VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_NORMAL_PRIORITY_POST_MASK)) ) + { + /* + * EMT Rendezvous (must be serviced before termination). + */ + if (VM_FF_IS_SET(pVM, VM_FF_EMT_RENDEZVOUS)) + { + CPUM_IMPORT_EXTRN_RCSTRICT(pVCpu, ~CPUMCTX_EXTRN_KEEPER_MASK, rc); + rc2 = VMMR3EmtRendezvousFF(pVM, pVCpu); + UPDATE_RC(); + /** @todo HACK ALERT! The following test is to make sure EM+TM + * thinks the VM is stopped/reset before the next VM state change + * is made. We need a better solution for this, or at least make it + * possible to do: (rc >= VINF_EM_FIRST && rc <= + * VINF_EM_SUSPEND). */ + if (RT_UNLIKELY(rc == VINF_EM_SUSPEND || rc == VINF_EM_RESET || rc == VINF_EM_OFF)) + { + Log2(("emR3ForcedActions: returns %Rrc\n", rc)); + STAM_REL_PROFILE_STOP(&pVCpu->em.s.StatForcedActions, a); + return rc; + } + } + + /* + * State change request (cleared by vmR3SetStateLocked). + */ + if (VM_FF_IS_SET(pVM, VM_FF_CHECK_VM_STATE)) + { + VMSTATE enmState = VMR3GetState(pVM); + switch (enmState) + { + case VMSTATE_FATAL_ERROR: + case VMSTATE_FATAL_ERROR_LS: + case VMSTATE_GURU_MEDITATION: + case VMSTATE_GURU_MEDITATION_LS: + Log2(("emR3ForcedActions: %s -> VINF_EM_SUSPEND\n", VMGetStateName(enmState) )); + STAM_REL_PROFILE_STOP(&pVCpu->em.s.StatForcedActions, a); + return VINF_EM_SUSPEND; + + case VMSTATE_DESTROYING: + Log2(("emR3ForcedActions: %s -> VINF_EM_TERMINATE\n", VMGetStateName(enmState) )); + STAM_REL_PROFILE_STOP(&pVCpu->em.s.StatForcedActions, a); + return VINF_EM_TERMINATE; + + default: + AssertMsgFailed(("%s\n", VMGetStateName(enmState))); + } + } + + /* + * Debugger Facility polling. + */ + if ( VM_FF_IS_SET(pVM, VM_FF_DBGF) + || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_DBGF) ) + { + CPUM_IMPORT_EXTRN_RCSTRICT(pVCpu, ~CPUMCTX_EXTRN_KEEPER_MASK, rc); + rc2 = DBGFR3VMMForcedAction(pVM, pVCpu); + UPDATE_RC(); + } + + /* + * Postponed reset request. + */ + if (VM_FF_TEST_AND_CLEAR(pVM, VM_FF_RESET)) + { + CPUM_IMPORT_EXTRN_RCSTRICT(pVCpu, ~CPUMCTX_EXTRN_KEEPER_MASK, rc); + rc2 = VBOXSTRICTRC_TODO(VMR3ResetFF(pVM)); + UPDATE_RC(); + } + +#ifdef VBOX_WITH_RAW_MODE + /* + * CSAM page scanning. + */ + if ( !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY) + && VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_CSAM_SCAN_PAGE)) + { + /** @todo check for 16 or 32 bits code! (D bit in the code selector) */ + Log(("Forced action VMCPU_FF_CSAM_SCAN_PAGE\n")); + CPUM_IMPORT_EXTRN_RCSTRICT(pVCpu, ~CPUMCTX_EXTRN_KEEPER_MASK, rc); + CSAMR3CheckCodeEx(pVM, &pVCpu->cpum.GstCtx, pVCpu->cpum.GstCtx.eip); + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_CSAM_SCAN_PAGE); + } +#endif + + /* + * Out of memory? Putting this after CSAM as it may in theory cause us to run out of memory. + */ + if (VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)) + { + rc2 = PGMR3PhysAllocateHandyPages(pVM); + UPDATE_RC(); + if (rc == VINF_EM_NO_MEMORY) + return rc; + } + + /* check that we got them all */ + AssertCompile(VM_FF_NORMAL_PRIORITY_POST_MASK == (VM_FF_CHECK_VM_STATE | VM_FF_DBGF | VM_FF_RESET | VM_FF_PGM_NO_MEMORY | VM_FF_EMT_RENDEZVOUS)); + AssertCompile(VMCPU_FF_NORMAL_PRIORITY_POST_MASK == (VM_WHEN_RAW_MODE(VMCPU_FF_CSAM_SCAN_PAGE, 0) | VMCPU_FF_DBGF)); + } + + /* + * Normal priority then. + * (Executed in no particular order.) + */ + if (VM_FF_IS_PENDING_EXCEPT(pVM, VM_FF_NORMAL_PRIORITY_MASK, VM_FF_PGM_NO_MEMORY)) + { + /* + * PDM Queues are pending. + */ + if (VM_FF_IS_PENDING_EXCEPT(pVM, VM_FF_PDM_QUEUES, VM_FF_PGM_NO_MEMORY)) + PDMR3QueueFlushAll(pVM); + + /* + * PDM DMA transfers are pending. + */ + if (VM_FF_IS_PENDING_EXCEPT(pVM, VM_FF_PDM_DMA, VM_FF_PGM_NO_MEMORY)) + PDMR3DmaRun(pVM); + + /* + * EMT Rendezvous (make sure they are handled before the requests). + */ + if (VM_FF_IS_SET(pVM, VM_FF_EMT_RENDEZVOUS)) + { + CPUM_IMPORT_EXTRN_RCSTRICT(pVCpu, ~CPUMCTX_EXTRN_KEEPER_MASK, rc); + rc2 = VMMR3EmtRendezvousFF(pVM, pVCpu); + UPDATE_RC(); + /** @todo HACK ALERT! The following test is to make sure EM+TM + * thinks the VM is stopped/reset before the next VM state change + * is made. We need a better solution for this, or at least make it + * possible to do: (rc >= VINF_EM_FIRST && rc <= + * VINF_EM_SUSPEND). */ + if (RT_UNLIKELY(rc == VINF_EM_SUSPEND || rc == VINF_EM_RESET || rc == VINF_EM_OFF)) + { + Log2(("emR3ForcedActions: returns %Rrc\n", rc)); + STAM_REL_PROFILE_STOP(&pVCpu->em.s.StatForcedActions, a); + return rc; + } + } + + /* + * Requests from other threads. + */ + if (VM_FF_IS_PENDING_EXCEPT(pVM, VM_FF_REQUEST, VM_FF_PGM_NO_MEMORY)) + { + CPUM_IMPORT_EXTRN_RCSTRICT(pVCpu, ~CPUMCTX_EXTRN_KEEPER_MASK, rc); + rc2 = VMR3ReqProcessU(pVM->pUVM, VMCPUID_ANY, false /*fPriorityOnly*/); + if (rc2 == VINF_EM_OFF || rc2 == VINF_EM_TERMINATE) /** @todo this shouldn't be necessary */ + { + Log2(("emR3ForcedActions: returns %Rrc\n", rc2)); + STAM_REL_PROFILE_STOP(&pVCpu->em.s.StatForcedActions, a); + return rc2; + } + UPDATE_RC(); + /** @todo HACK ALERT! The following test is to make sure EM+TM + * thinks the VM is stopped/reset before the next VM state change + * is made. We need a better solution for this, or at least make it + * possible to do: (rc >= VINF_EM_FIRST && rc <= + * VINF_EM_SUSPEND). */ + if (RT_UNLIKELY(rc == VINF_EM_SUSPEND || rc == VINF_EM_RESET || rc == VINF_EM_OFF)) + { + Log2(("emR3ForcedActions: returns %Rrc\n", rc)); + STAM_REL_PROFILE_STOP(&pVCpu->em.s.StatForcedActions, a); + return rc; + } + } + +#ifdef VBOX_WITH_REM + /* Replay the handler notification changes. */ + if (VM_FF_IS_PENDING_EXCEPT(pVM, VM_FF_REM_HANDLER_NOTIFY, VM_FF_PGM_NO_MEMORY)) + { + /* Try not to cause deadlocks. */ + if ( pVM->cCpus == 1 + || ( !PGMIsLockOwner(pVM) + && !IOMIsLockWriteOwner(pVM)) + ) + { + EMRemLock(pVM); + REMR3ReplayHandlerNotifications(pVM); + EMRemUnlock(pVM); + } + } +#endif + + /* check that we got them all */ + AssertCompile(VM_FF_NORMAL_PRIORITY_MASK == (VM_FF_REQUEST | VM_FF_PDM_QUEUES | VM_FF_PDM_DMA | VM_FF_REM_HANDLER_NOTIFY | VM_FF_EMT_RENDEZVOUS)); + } + + /* + * Normal priority then. (per-VCPU) + * (Executed in no particular order.) + */ + if ( !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY) + && VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_NORMAL_PRIORITY_MASK)) + { + /* + * Requests from other threads. + */ + if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_REQUEST)) + { + CPUM_IMPORT_EXTRN_RCSTRICT(pVCpu, ~CPUMCTX_EXTRN_KEEPER_MASK, rc); + rc2 = VMR3ReqProcessU(pVM->pUVM, pVCpu->idCpu, false /*fPriorityOnly*/); + if (rc2 == VINF_EM_OFF || rc2 == VINF_EM_TERMINATE || rc2 == VINF_EM_RESET) + { + Log2(("emR3ForcedActions: returns %Rrc\n", rc2)); + STAM_REL_PROFILE_STOP(&pVCpu->em.s.StatForcedActions, a); + return rc2; + } + UPDATE_RC(); + /** @todo HACK ALERT! The following test is to make sure EM+TM + * thinks the VM is stopped/reset before the next VM state change + * is made. We need a better solution for this, or at least make it + * possible to do: (rc >= VINF_EM_FIRST && rc <= + * VINF_EM_SUSPEND). */ + if (RT_UNLIKELY(rc == VINF_EM_SUSPEND || rc == VINF_EM_RESET || rc == VINF_EM_OFF)) + { + Log2(("emR3ForcedActions: returns %Rrc\n", rc)); + STAM_REL_PROFILE_STOP(&pVCpu->em.s.StatForcedActions, a); + return rc; + } + } + + /* check that we got them all */ + Assert(!(VMCPU_FF_NORMAL_PRIORITY_MASK & ~VMCPU_FF_REQUEST)); + } + + /* + * High priority pre execution chunk last. + * (Executed in ascending priority order.) + */ + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_HIGH_PRIORITY_PRE_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HIGH_PRIORITY_PRE_MASK)) + { + /* + * Timers before interrupts. + */ + if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TIMER) + && !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)) + TMR3TimerQueuesDo(pVM); + + /* + * Pick up asynchronously posted interrupts into the APIC. + */ + if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_UPDATE_APIC)) + APICUpdatePendingInterrupts(pVCpu); + + /* + * The instruction following an emulated STI should *always* be executed! + * + * Note! We intentionally don't clear VM_FF_INHIBIT_INTERRUPTS here if + * the eip is the same as the inhibited instr address. Before we + * are able to execute this instruction in raw mode (iret to + * guest code) an external interrupt might force a world switch + * again. Possibly allowing a guest interrupt to be dispatched + * in the process. This could break the guest. Sounds very + * unlikely, but such timing sensitive problem are not as rare as + * you might think. + */ + if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS) + && !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)) + { + CPUM_ASSERT_NOT_EXTRN(pVCpu, CPUMCTX_EXTRN_RIP); + if (CPUMGetGuestRIP(pVCpu) != EMGetInhibitInterruptsPC(pVCpu)) + { + Log(("Clearing VMCPU_FF_INHIBIT_INTERRUPTS at %RGv - successor %RGv\n", (RTGCPTR)CPUMGetGuestRIP(pVCpu), EMGetInhibitInterruptsPC(pVCpu))); + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS); + } + else + Log(("Leaving VMCPU_FF_INHIBIT_INTERRUPTS set at %RGv\n", (RTGCPTR)CPUMGetGuestRIP(pVCpu))); + } + + /** @todo SMIs. If we implement SMIs, this is where they will have to be + * delivered. */ + +#ifdef VBOX_WITH_NESTED_HWVIRT_VMX + /* + * VMX Nested-guest monitor-trap flag (MTF) VM-exit. + * Takes priority over "Traps on the previous instruction". + * See Intel spec. 6.9 "Priority Among Simultaneous Exceptions And Interrupts". + */ + if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_VMX_MTF)) + { + rc2 = VBOXSTRICTRC_VAL(IEMExecVmxVmexitMtf(pVCpu)); + Assert(rc2 != VINF_VMX_INTERCEPT_NOT_ACTIVE); + UPDATE_RC(); + } + + /* + * VMX Nested-guest preemption timer VM-exit. + * Takes priority over non-maskable interrupts (NMIs). + */ + if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_VMX_PREEMPT_TIMER)) + { + rc2 = VBOXSTRICTRC_VAL(IEMExecVmxVmexitPreemptTimer(pVCpu)); + if (rc2 == VINF_VMX_INTERCEPT_NOT_ACTIVE) + rc2 = VINF_SUCCESS; + UPDATE_RC(); + } +#endif + + /* + * Guest event injection. + */ + bool fWakeupPending = false; + if ( !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY) + && (!rc || rc >= VINF_EM_RESCHEDULE_HM) + && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS) /* Interrupt shadows block both NMIs and interrupts. */ + && !TRPMHasTrap(pVCpu)) /* An event could already be scheduled for dispatching. */ + { + /* + * NMIs (take priority over external interrupts). + */ + Assert(!HMR3IsEventPending(pVCpu)); + if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NMI) + && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS)) + { + rc2 = TRPMAssertTrap(pVCpu, X86_XCPT_NMI, TRPM_TRAP); + if (rc2 == VINF_SUCCESS) + { + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI); + fWakeupPending = true; + if (pVM->em.s.fIemExecutesAll) + rc2 = VINF_EM_RESCHEDULE; + else + { + rc2 = HMR3IsActive(pVCpu) ? VINF_EM_RESCHEDULE_HM + : VM_IS_NEM_ENABLED(pVM) ? VINF_EM_RESCHEDULE + : VINF_EM_RESCHEDULE_REM; + } + } + UPDATE_RC(); + } + else + { + /* + * External Interrupts. + */ + bool fGif = CPUMGetGuestGif(&pVCpu->cpum.GstCtx); +#ifdef VBOX_WITH_RAW_MODE + fGif &= !PATMIsPatchGCAddr(pVM, pVCpu->cpum.GstCtx.eip); +#endif + if (fGif) + { + /* + * With VMX, virtual interrupts takes priority over physical interrupts. + * With SVM, physical interrupts takes priority over virtual interrupts. + */ + if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NESTED_GUEST) + && CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx) + && CPUMIsGuestVmxVirtIntrEnabled(pVCpu, &pVCpu->cpum.GstCtx)) + { + /** @todo NSTVMX: virtual-interrupt delivery. */ + rc2 = VINF_NO_CHANGE; + } + else if ( VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC) + && CPUMIsGuestPhysIntrEnabled(pVCpu)) + { + bool fInjected = false; + Assert(pVCpu->em.s.enmState != EMSTATE_WAIT_SIPI); + + if (CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)) + rc2 = emR3VmxNstGstIntrIntercept(pVCpu); + else if (CPUMIsGuestInSvmNestedHwVirtMode(&pVCpu->cpum.GstCtx)) + rc2 = emR3SvmNstGstIntrIntercept(pVCpu); + else + rc2 = VINF_NO_CHANGE; + + if (rc2 == VINF_NO_CHANGE) + { + CPUM_IMPORT_EXTRN_RET(pVCpu, IEM_CPUMCTX_EXTRN_XCPT_MASK); + /** @todo this really isn't nice, should properly handle this */ + rc2 = TRPMR3InjectEvent(pVM, pVCpu, TRPM_HARDWARE_INT, &fInjected); + fWakeupPending = true; + if ( pVM->em.s.fIemExecutesAll + && ( rc2 == VINF_EM_RESCHEDULE_REM + || rc2 == VINF_EM_RESCHEDULE_HM + || rc2 == VINF_EM_RESCHEDULE_RAW)) + { + rc2 = VINF_EM_RESCHEDULE; + } + } +#ifdef VBOX_STRICT + if (fInjected) + rcIrq = rc2; +#endif + UPDATE_RC(); + } + else if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NESTED_GUEST) + && CPUMIsGuestInSvmNestedHwVirtMode(&pVCpu->cpum.GstCtx) + && CPUMIsGuestSvmVirtIntrEnabled(pVCpu, &pVCpu->cpum.GstCtx)) + { + rc2 = emR3SvmNstGstVirtIntrIntercept(pVCpu); + if (rc2 == VINF_NO_CHANGE) + { + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NESTED_GUEST); + uint8_t const uNstGstVector = CPUMGetGuestSvmVirtIntrVector(&pVCpu->cpum.GstCtx); + AssertMsg(uNstGstVector > 0 && uNstGstVector <= X86_XCPT_LAST, ("Invalid VINTR %#x\n", uNstGstVector)); + TRPMAssertTrap(pVCpu, uNstGstVector, TRPM_HARDWARE_INT); + Log(("EM: Asserting nested-guest virt. hardware intr: %#x\n", uNstGstVector)); + rc2 = VINF_EM_RESCHEDULE; +#ifdef VBOX_STRICT + rcIrq = rc2; +#endif + } + UPDATE_RC(); + } + } + } + } + + /* + * Allocate handy pages. + */ + if (VM_FF_IS_PENDING_EXCEPT(pVM, VM_FF_PGM_NEED_HANDY_PAGES, VM_FF_PGM_NO_MEMORY)) + { + rc2 = PGMR3PhysAllocateHandyPages(pVM); + UPDATE_RC(); + } + + /* + * Debugger Facility request. + */ + if ( ( VM_FF_IS_SET(pVM, VM_FF_DBGF) + || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_DBGF) ) + && !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY) ) + { + CPUM_IMPORT_EXTRN_RCSTRICT(pVCpu, ~CPUMCTX_EXTRN_KEEPER_MASK, rc); + rc2 = DBGFR3VMMForcedAction(pVM, pVCpu); + UPDATE_RC(); + } + + /* + * EMT Rendezvous (must be serviced before termination). + */ + if ( !fWakeupPending /* don't miss the wakeup from EMSTATE_HALTED! */ + && VM_FF_IS_SET(pVM, VM_FF_EMT_RENDEZVOUS)) + { + CPUM_IMPORT_EXTRN_RCSTRICT(pVCpu, ~CPUMCTX_EXTRN_KEEPER_MASK, rc); + rc2 = VMMR3EmtRendezvousFF(pVM, pVCpu); + UPDATE_RC(); + /** @todo HACK ALERT! The following test is to make sure EM+TM thinks the VM is + * stopped/reset before the next VM state change is made. We need a better + * solution for this, or at least make it possible to do: (rc >= VINF_EM_FIRST + * && rc >= VINF_EM_SUSPEND). */ + if (RT_UNLIKELY(rc == VINF_EM_SUSPEND || rc == VINF_EM_RESET || rc == VINF_EM_OFF)) + { + Log2(("emR3ForcedActions: returns %Rrc\n", rc)); + STAM_REL_PROFILE_STOP(&pVCpu->em.s.StatForcedActions, a); + return rc; + } + } + + /* + * State change request (cleared by vmR3SetStateLocked). + */ + if ( !fWakeupPending /* don't miss the wakeup from EMSTATE_HALTED! */ + && VM_FF_IS_SET(pVM, VM_FF_CHECK_VM_STATE)) + { + VMSTATE enmState = VMR3GetState(pVM); + switch (enmState) + { + case VMSTATE_FATAL_ERROR: + case VMSTATE_FATAL_ERROR_LS: + case VMSTATE_GURU_MEDITATION: + case VMSTATE_GURU_MEDITATION_LS: + Log2(("emR3ForcedActions: %s -> VINF_EM_SUSPEND\n", VMGetStateName(enmState) )); + STAM_REL_PROFILE_STOP(&pVCpu->em.s.StatForcedActions, a); + return VINF_EM_SUSPEND; + + case VMSTATE_DESTROYING: + Log2(("emR3ForcedActions: %s -> VINF_EM_TERMINATE\n", VMGetStateName(enmState) )); + STAM_REL_PROFILE_STOP(&pVCpu->em.s.StatForcedActions, a); + return VINF_EM_TERMINATE; + + default: + AssertMsgFailed(("%s\n", VMGetStateName(enmState))); + } + } + + /* + * Out of memory? Since most of our fellow high priority actions may cause us + * to run out of memory, we're employing VM_FF_IS_PENDING_EXCEPT and putting this + * at the end rather than the start. Also, VM_FF_TERMINATE has higher priority + * than us since we can terminate without allocating more memory. + */ + if (VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)) + { + rc2 = PGMR3PhysAllocateHandyPages(pVM); + UPDATE_RC(); + if (rc == VINF_EM_NO_MEMORY) + return rc; + } + + /* + * If the virtual sync clock is still stopped, make TM restart it. + */ + if (VM_FF_IS_SET(pVM, VM_FF_TM_VIRTUAL_SYNC)) + TMR3VirtualSyncFF(pVM, pVCpu); + +#ifdef DEBUG + /* + * Debug, pause the VM. + */ + if (VM_FF_IS_SET(pVM, VM_FF_DEBUG_SUSPEND)) + { + VM_FF_CLEAR(pVM, VM_FF_DEBUG_SUSPEND); + Log(("emR3ForcedActions: returns VINF_EM_SUSPEND\n")); + return VINF_EM_SUSPEND; + } +#endif + + /* check that we got them all */ + AssertCompile(VM_FF_HIGH_PRIORITY_PRE_MASK == (VM_FF_TM_VIRTUAL_SYNC | VM_FF_DBGF | VM_FF_CHECK_VM_STATE | VM_FF_DEBUG_SUSPEND | VM_FF_PGM_NEED_HANDY_PAGES | VM_FF_PGM_NO_MEMORY | VM_FF_EMT_RENDEZVOUS)); + AssertCompile(VMCPU_FF_HIGH_PRIORITY_PRE_MASK == (VMCPU_FF_TIMER | VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_UPDATE_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL | VMCPU_FF_INHIBIT_INTERRUPTS | VMCPU_FF_DBGF | VMCPU_FF_INTERRUPT_NESTED_GUEST | VMCPU_FF_VMX_MTF | VM_WHEN_RAW_MODE(VMCPU_FF_SELM_SYNC_TSS | VMCPU_FF_TRPM_SYNC_IDT | VMCPU_FF_SELM_SYNC_GDT | VMCPU_FF_SELM_SYNC_LDT, 0))); + } + +#undef UPDATE_RC + Log2(("emR3ForcedActions: returns %Rrc\n", rc)); + STAM_REL_PROFILE_STOP(&pVCpu->em.s.StatForcedActions, a); + Assert(rcIrq == VINF_SUCCESS || rcIrq == rc); + return rc; +} + + +/** + * Check if the preset execution time cap restricts guest execution scheduling. + * + * @returns true if allowed, false otherwise + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +bool emR3IsExecutionAllowed(PVM pVM, PVMCPU pVCpu) +{ + uint64_t u64UserTime, u64KernelTime; + + if ( pVM->uCpuExecutionCap != 100 + && RT_SUCCESS(RTThreadGetExecutionTimeMilli(&u64KernelTime, &u64UserTime))) + { + uint64_t u64TimeNow = RTTimeMilliTS(); + if (pVCpu->em.s.u64TimeSliceStart + EM_TIME_SLICE < u64TimeNow) + { + /* New time slice. */ + pVCpu->em.s.u64TimeSliceStart = u64TimeNow; + pVCpu->em.s.u64TimeSliceStartExec = u64KernelTime + u64UserTime; + pVCpu->em.s.u64TimeSliceExec = 0; + } + pVCpu->em.s.u64TimeSliceExec = u64KernelTime + u64UserTime - pVCpu->em.s.u64TimeSliceStartExec; + + Log2(("emR3IsExecutionAllowed: start=%RX64 startexec=%RX64 exec=%RX64 (cap=%x)\n", pVCpu->em.s.u64TimeSliceStart, pVCpu->em.s.u64TimeSliceStartExec, pVCpu->em.s.u64TimeSliceExec, (EM_TIME_SLICE * pVM->uCpuExecutionCap) / 100)); + if (pVCpu->em.s.u64TimeSliceExec >= (EM_TIME_SLICE * pVM->uCpuExecutionCap) / 100) + return false; + } + return true; +} + + +/** + * Execute VM. + * + * This function is the main loop of the VM. The emulation thread + * calls this function when the VM has been successfully constructed + * and we're ready for executing the VM. + * + * Returning from this function means that the VM is turned off or + * suspended (state already saved) and deconstruction is next in line. + * + * All interaction from other thread are done using forced actions + * and signaling of the wait object. + * + * @returns VBox status code, informational status codes may indicate failure. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +VMMR3_INT_DECL(int) EMR3ExecuteVM(PVM pVM, PVMCPU pVCpu) +{ + Log(("EMR3ExecuteVM: pVM=%p enmVMState=%d (%s) enmState=%d (%s) enmPrevState=%d (%s) fForceRAW=%RTbool\n", + pVM, + pVM->enmVMState, VMR3GetStateName(pVM->enmVMState), + pVCpu->em.s.enmState, emR3GetStateName(pVCpu->em.s.enmState), + pVCpu->em.s.enmPrevState, emR3GetStateName(pVCpu->em.s.enmPrevState), + pVCpu->em.s.fForceRAW)); + VM_ASSERT_EMT(pVM); + AssertMsg( pVCpu->em.s.enmState == EMSTATE_NONE + || pVCpu->em.s.enmState == EMSTATE_WAIT_SIPI + || pVCpu->em.s.enmState == EMSTATE_SUSPENDED, + ("%s\n", emR3GetStateName(pVCpu->em.s.enmState))); + + int rc = setjmp(pVCpu->em.s.u.FatalLongJump); + if (rc == 0) + { + /* + * Start the virtual time. + */ + TMR3NotifyResume(pVM, pVCpu); + + /* + * The Outer Main Loop. + */ + bool fFFDone = false; + + /* Reschedule right away to start in the right state. */ + rc = VINF_SUCCESS; + + /* If resuming after a pause or a state load, restore the previous + state or else we'll start executing code. Else, just reschedule. */ + if ( pVCpu->em.s.enmState == EMSTATE_SUSPENDED + && ( pVCpu->em.s.enmPrevState == EMSTATE_WAIT_SIPI + || pVCpu->em.s.enmPrevState == EMSTATE_HALTED)) + pVCpu->em.s.enmState = pVCpu->em.s.enmPrevState; + else + pVCpu->em.s.enmState = emR3Reschedule(pVM, pVCpu); + pVCpu->em.s.cIemThenRemInstructions = 0; + Log(("EMR3ExecuteVM: enmState=%s\n", emR3GetStateName(pVCpu->em.s.enmState))); + + STAM_REL_PROFILE_ADV_START(&pVCpu->em.s.StatTotal, x); + for (;;) + { + /* + * Before we can schedule anything (we're here because + * scheduling is required) we must service any pending + * forced actions to avoid any pending action causing + * immediate rescheduling upon entering an inner loop + * + * Do forced actions. + */ + if ( !fFFDone + && RT_SUCCESS(rc) + && rc != VINF_EM_TERMINATE + && rc != VINF_EM_OFF + && ( VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_REM_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_ALL_REM_MASK & ~VMCPU_FF_UNHALT))) + { + rc = emR3ForcedActions(pVM, pVCpu, rc); + VBOXVMM_EM_FF_ALL_RET(pVCpu, rc); + if ( ( rc == VINF_EM_RESCHEDULE_REM + || rc == VINF_EM_RESCHEDULE_HM) + && pVCpu->em.s.fForceRAW) + rc = VINF_EM_RESCHEDULE_RAW; + } + else if (fFFDone) + fFFDone = false; + + /* + * Now what to do? + */ + Log2(("EMR3ExecuteVM: rc=%Rrc\n", rc)); + EMSTATE const enmOldState = pVCpu->em.s.enmState; + switch (rc) + { + /* + * Keep doing what we're currently doing. + */ + case VINF_SUCCESS: + break; + + /* + * Reschedule - to raw-mode execution. + */ +/** @todo r=bird: consider merging VINF_EM_RESCHEDULE_RAW with VINF_EM_RESCHEDULE_HM, they serve the same purpose here at least. */ + case VINF_EM_RESCHEDULE_RAW: + Assert(!pVM->em.s.fIemExecutesAll || pVCpu->em.s.enmState != EMSTATE_IEM); + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + Log2(("EMR3ExecuteVM: VINF_EM_RESCHEDULE_RAW: %d -> %d (EMSTATE_RAW)\n", enmOldState, EMSTATE_RAW)); + pVCpu->em.s.enmState = EMSTATE_RAW; + } + else + { + AssertLogRelFailed(); + pVCpu->em.s.enmState = EMSTATE_NONE; + } + break; + + /* + * Reschedule - to HM or NEM. + */ + case VINF_EM_RESCHEDULE_HM: + Assert(!pVM->em.s.fIemExecutesAll || pVCpu->em.s.enmState != EMSTATE_IEM); + Assert(!pVCpu->em.s.fForceRAW); + if (VM_IS_HM_ENABLED(pVM)) + { + Log2(("EMR3ExecuteVM: VINF_EM_RESCHEDULE_HM: %d -> %d (EMSTATE_HM)\n", enmOldState, EMSTATE_HM)); + pVCpu->em.s.enmState = EMSTATE_HM; + } + else if (VM_IS_NEM_ENABLED(pVM)) + { + Log2(("EMR3ExecuteVM: VINF_EM_RESCHEDULE_HM: %d -> %d (EMSTATE_NEM)\n", enmOldState, EMSTATE_NEM)); + pVCpu->em.s.enmState = EMSTATE_NEM; + } + else + { + AssertLogRelFailed(); + pVCpu->em.s.enmState = EMSTATE_NONE; + } + break; + + /* + * Reschedule - to recompiled execution. + */ + case VINF_EM_RESCHEDULE_REM: + Assert(!pVM->em.s.fIemExecutesAll || pVCpu->em.s.enmState != EMSTATE_IEM); + if (!VM_IS_RAW_MODE_ENABLED(pVM)) + { + Log2(("EMR3ExecuteVM: VINF_EM_RESCHEDULE_REM: %d -> %d (EMSTATE_IEM_THEN_REM)\n", + enmOldState, EMSTATE_IEM_THEN_REM)); + if (pVCpu->em.s.enmState != EMSTATE_IEM_THEN_REM) + { + pVCpu->em.s.enmState = EMSTATE_IEM_THEN_REM; + pVCpu->em.s.cIemThenRemInstructions = 0; + } + } + else + { + Log2(("EMR3ExecuteVM: VINF_EM_RESCHEDULE_REM: %d -> %d (EMSTATE_REM)\n", enmOldState, EMSTATE_REM)); + pVCpu->em.s.enmState = EMSTATE_REM; + } + break; + + /* + * Resume. + */ + case VINF_EM_RESUME: + Log2(("EMR3ExecuteVM: VINF_EM_RESUME: %d -> VINF_EM_RESCHEDULE\n", enmOldState)); + /* Don't reschedule in the halted or wait for SIPI case. */ + if ( pVCpu->em.s.enmPrevState == EMSTATE_WAIT_SIPI + || pVCpu->em.s.enmPrevState == EMSTATE_HALTED) + { + pVCpu->em.s.enmState = pVCpu->em.s.enmPrevState; + break; + } + /* fall through and get scheduled. */ + RT_FALL_THRU(); + + /* + * Reschedule. + */ + case VINF_EM_RESCHEDULE: + { + EMSTATE enmState = emR3Reschedule(pVM, pVCpu); + Log2(("EMR3ExecuteVM: VINF_EM_RESCHEDULE: %d -> %d (%s)\n", enmOldState, enmState, emR3GetStateName(enmState))); + if (pVCpu->em.s.enmState != enmState && enmState == EMSTATE_IEM_THEN_REM) + pVCpu->em.s.cIemThenRemInstructions = 0; + pVCpu->em.s.enmState = enmState; + break; + } + + /* + * Halted. + */ + case VINF_EM_HALT: + Log2(("EMR3ExecuteVM: VINF_EM_HALT: %d -> %d\n", enmOldState, EMSTATE_HALTED)); + pVCpu->em.s.enmState = EMSTATE_HALTED; + break; + + /* + * Switch to the wait for SIPI state (application processor only) + */ + case VINF_EM_WAIT_SIPI: + Assert(pVCpu->idCpu != 0); + Log2(("EMR3ExecuteVM: VINF_EM_WAIT_SIPI: %d -> %d\n", enmOldState, EMSTATE_WAIT_SIPI)); + pVCpu->em.s.enmState = EMSTATE_WAIT_SIPI; + break; + + + /* + * Suspend. + */ + case VINF_EM_SUSPEND: + Log2(("EMR3ExecuteVM: VINF_EM_SUSPEND: %d -> %d\n", enmOldState, EMSTATE_SUSPENDED)); + Assert(enmOldState != EMSTATE_SUSPENDED); + pVCpu->em.s.enmPrevState = enmOldState; + pVCpu->em.s.enmState = EMSTATE_SUSPENDED; + break; + + /* + * Reset. + * We might end up doing a double reset for now, we'll have to clean up the mess later. + */ + case VINF_EM_RESET: + { + if (pVCpu->idCpu == 0) + { + EMSTATE enmState = emR3Reschedule(pVM, pVCpu); + Log2(("EMR3ExecuteVM: VINF_EM_RESET: %d -> %d (%s)\n", enmOldState, enmState, emR3GetStateName(enmState))); + if (pVCpu->em.s.enmState != enmState && enmState == EMSTATE_IEM_THEN_REM) + pVCpu->em.s.cIemThenRemInstructions = 0; + pVCpu->em.s.enmState = enmState; + } + else + { + /* All other VCPUs go into the wait for SIPI state. */ + pVCpu->em.s.enmState = EMSTATE_WAIT_SIPI; + } + break; + } + + /* + * Power Off. + */ + case VINF_EM_OFF: + pVCpu->em.s.enmState = EMSTATE_TERMINATING; + Log2(("EMR3ExecuteVM: returns VINF_EM_OFF (%d -> %d)\n", enmOldState, EMSTATE_TERMINATING)); + TMR3NotifySuspend(pVM, pVCpu); + STAM_REL_PROFILE_ADV_STOP(&pVCpu->em.s.StatTotal, x); + return rc; + + /* + * Terminate the VM. + */ + case VINF_EM_TERMINATE: + pVCpu->em.s.enmState = EMSTATE_TERMINATING; + Log(("EMR3ExecuteVM returns VINF_EM_TERMINATE (%d -> %d)\n", enmOldState, EMSTATE_TERMINATING)); + if (pVM->enmVMState < VMSTATE_DESTROYING) /* ugly */ + TMR3NotifySuspend(pVM, pVCpu); + STAM_REL_PROFILE_ADV_STOP(&pVCpu->em.s.StatTotal, x); + return rc; + + + /* + * Out of memory, suspend the VM and stuff. + */ + case VINF_EM_NO_MEMORY: + Log2(("EMR3ExecuteVM: VINF_EM_NO_MEMORY: %d -> %d\n", enmOldState, EMSTATE_SUSPENDED)); + Assert(enmOldState != EMSTATE_SUSPENDED); + pVCpu->em.s.enmPrevState = enmOldState; + pVCpu->em.s.enmState = EMSTATE_SUSPENDED; + TMR3NotifySuspend(pVM, pVCpu); + STAM_REL_PROFILE_ADV_STOP(&pVCpu->em.s.StatTotal, x); + + rc = VMSetRuntimeError(pVM, VMSETRTERR_FLAGS_SUSPEND, "HostMemoryLow", + N_("Unable to allocate and lock memory. The virtual machine will be paused. Please close applications to free up memory or close the VM")); + if (rc != VINF_EM_SUSPEND) + { + if (RT_SUCCESS_NP(rc)) + { + AssertLogRelMsgFailed(("%Rrc\n", rc)); + rc = VERR_EM_INTERNAL_ERROR; + } + pVCpu->em.s.enmState = EMSTATE_GURU_MEDITATION; + } + return rc; + + /* + * Guest debug events. + */ + case VINF_EM_DBG_STEPPED: + case VINF_EM_DBG_STOP: + case VINF_EM_DBG_EVENT: + case VINF_EM_DBG_BREAKPOINT: + case VINF_EM_DBG_STEP: + if (enmOldState == EMSTATE_RAW) + { + Log2(("EMR3ExecuteVM: %Rrc: %d -> %d\n", rc, enmOldState, EMSTATE_DEBUG_GUEST_RAW)); + pVCpu->em.s.enmState = EMSTATE_DEBUG_GUEST_RAW; + } + else if (enmOldState == EMSTATE_HM) + { + Log2(("EMR3ExecuteVM: %Rrc: %d -> %d\n", rc, enmOldState, EMSTATE_DEBUG_GUEST_HM)); + pVCpu->em.s.enmState = EMSTATE_DEBUG_GUEST_HM; + } + else if (enmOldState == EMSTATE_NEM) + { + Log2(("EMR3ExecuteVM: %Rrc: %d -> %d\n", rc, enmOldState, EMSTATE_DEBUG_GUEST_NEM)); + pVCpu->em.s.enmState = EMSTATE_DEBUG_GUEST_NEM; + } + else if (enmOldState == EMSTATE_REM) + { + Log2(("EMR3ExecuteVM: %Rrc: %d -> %d\n", rc, enmOldState, EMSTATE_DEBUG_GUEST_REM)); + pVCpu->em.s.enmState = EMSTATE_DEBUG_GUEST_REM; + } + else + { + Log2(("EMR3ExecuteVM: %Rrc: %d -> %d\n", rc, enmOldState, EMSTATE_DEBUG_GUEST_IEM)); + pVCpu->em.s.enmState = EMSTATE_DEBUG_GUEST_IEM; + } + break; + + /* + * Hypervisor debug events. + */ + case VINF_EM_DBG_HYPER_STEPPED: + case VINF_EM_DBG_HYPER_BREAKPOINT: + case VINF_EM_DBG_HYPER_ASSERTION: + Log2(("EMR3ExecuteVM: %Rrc: %d -> %d\n", rc, enmOldState, EMSTATE_DEBUG_HYPER)); + pVCpu->em.s.enmState = EMSTATE_DEBUG_HYPER; + break; + + /* + * Triple fault. + */ + case VINF_EM_TRIPLE_FAULT: + if (!pVM->em.s.fGuruOnTripleFault) + { + Log(("EMR3ExecuteVM: VINF_EM_TRIPLE_FAULT: CPU reset...\n")); + rc = VBOXSTRICTRC_TODO(VMR3ResetTripleFault(pVM)); + Log2(("EMR3ExecuteVM: VINF_EM_TRIPLE_FAULT: %d -> %d (rc=%Rrc)\n", enmOldState, pVCpu->em.s.enmState, rc)); + continue; + } + /* Else fall through and trigger a guru. */ + RT_FALL_THRU(); + + case VERR_VMM_RING0_ASSERTION: + Log(("EMR3ExecuteVM: %Rrc: %d -> %d (EMSTATE_GURU_MEDITATION)\n", rc, enmOldState, EMSTATE_GURU_MEDITATION)); + pVCpu->em.s.enmState = EMSTATE_GURU_MEDITATION; + break; + + /* + * Any error code showing up here other than the ones we + * know and process above are considered to be FATAL. + * + * Unknown warnings and informational status codes are also + * included in this. + */ + default: + if (RT_SUCCESS_NP(rc)) + { + AssertMsgFailed(("Unexpected warning or informational status code %Rra!\n", rc)); + rc = VERR_EM_INTERNAL_ERROR; + } + Log(("EMR3ExecuteVM: %Rrc: %d -> %d (EMSTATE_GURU_MEDITATION)\n", rc, enmOldState, EMSTATE_GURU_MEDITATION)); + pVCpu->em.s.enmState = EMSTATE_GURU_MEDITATION; + break; + } + + /* + * Act on state transition. + */ + EMSTATE const enmNewState = pVCpu->em.s.enmState; + if (enmOldState != enmNewState) + { + VBOXVMM_EM_STATE_CHANGED(pVCpu, enmOldState, enmNewState, rc); + + /* Clear MWait flags and the unhalt FF. */ + if ( enmOldState == EMSTATE_HALTED + && ( (pVCpu->em.s.MWait.fWait & EMMWAIT_FLAG_ACTIVE) + || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_UNHALT)) + && ( enmNewState == EMSTATE_RAW + || enmNewState == EMSTATE_HM + || enmNewState == EMSTATE_NEM + || enmNewState == EMSTATE_REM + || enmNewState == EMSTATE_IEM_THEN_REM + || enmNewState == EMSTATE_DEBUG_GUEST_RAW + || enmNewState == EMSTATE_DEBUG_GUEST_HM + || enmNewState == EMSTATE_DEBUG_GUEST_NEM + || enmNewState == EMSTATE_DEBUG_GUEST_IEM + || enmNewState == EMSTATE_DEBUG_GUEST_REM) ) + { + if (pVCpu->em.s.MWait.fWait & EMMWAIT_FLAG_ACTIVE) + { + LogFlow(("EMR3ExecuteVM: Clearing MWAIT\n")); + pVCpu->em.s.MWait.fWait &= ~(EMMWAIT_FLAG_ACTIVE | EMMWAIT_FLAG_BREAKIRQIF0); + } + if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_UNHALT)) + { + LogFlow(("EMR3ExecuteVM: Clearing UNHALT\n")); + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_UNHALT); + } + } + } + else + VBOXVMM_EM_STATE_UNCHANGED(pVCpu, enmNewState, rc); + + STAM_PROFILE_ADV_STOP(&pVCpu->em.s.StatTotal, x); /* (skip this in release) */ + STAM_PROFILE_ADV_START(&pVCpu->em.s.StatTotal, x); + + /* + * Act on the new state. + */ + switch (enmNewState) + { + /* + * Execute raw. + */ + case EMSTATE_RAW: +#ifdef VBOX_WITH_RAW_MODE + rc = emR3RawExecute(pVM, pVCpu, &fFFDone); +#else + AssertLogRelMsgFailed(("%Rrc\n", rc)); + rc = VERR_EM_INTERNAL_ERROR; +#endif + break; + + /* + * Execute hardware accelerated raw. + */ + case EMSTATE_HM: + rc = emR3HmExecute(pVM, pVCpu, &fFFDone); + break; + + /* + * Execute hardware accelerated raw. + */ + case EMSTATE_NEM: + rc = VBOXSTRICTRC_TODO(emR3NemExecute(pVM, pVCpu, &fFFDone)); + break; + + /* + * Execute recompiled. + */ + case EMSTATE_REM: + rc = emR3RemExecute(pVM, pVCpu, &fFFDone); + Log2(("EMR3ExecuteVM: emR3RemExecute -> %Rrc\n", rc)); + break; + + /* + * Execute in the interpreter. + */ + case EMSTATE_IEM: + { +#if 0 /* For testing purposes. */ + STAM_PROFILE_START(&pVCpu->em.s.StatHmExec, x1); + rc = VBOXSTRICTRC_TODO(EMR3HmSingleInstruction(pVM, pVCpu, EM_ONE_INS_FLAGS_RIP_CHANGE)); + STAM_PROFILE_STOP(&pVCpu->em.s.StatHmExec, x1); + if (rc == VINF_EM_DBG_STEPPED || rc == VINF_EM_RESCHEDULE_HM || rc == VINF_EM_RESCHEDULE_REM || rc == VINF_EM_RESCHEDULE_RAW) + rc = VINF_SUCCESS; + else if (rc == VERR_EM_CANNOT_EXEC_GUEST) +#endif + rc = VBOXSTRICTRC_TODO(IEMExecLots(pVCpu, NULL /*pcInstructions*/)); + if (pVM->em.s.fIemExecutesAll) + { + Assert(rc != VINF_EM_RESCHEDULE_REM); + Assert(rc != VINF_EM_RESCHEDULE_RAW); + Assert(rc != VINF_EM_RESCHEDULE_HM); + } + fFFDone = false; + break; + } + + /* + * Execute in IEM, hoping we can quickly switch aback to HM + * or RAW execution. If our hopes fail, we go to REM. + */ + case EMSTATE_IEM_THEN_REM: + { + STAM_PROFILE_START(&pVCpu->em.s.StatIEMThenREM, pIemThenRem); + rc = VBOXSTRICTRC_TODO(emR3ExecuteIemThenRem(pVM, pVCpu, &fFFDone)); + STAM_PROFILE_STOP(&pVCpu->em.s.StatIEMThenREM, pIemThenRem); + break; + } + + /* + * Application processor execution halted until SIPI. + */ + case EMSTATE_WAIT_SIPI: + /* no break */ + /* + * hlt - execution halted until interrupt. + */ + case EMSTATE_HALTED: + { + STAM_REL_PROFILE_START(&pVCpu->em.s.StatHalted, y); + /* If HM (or someone else) store a pending interrupt in + TRPM, it must be dispatched ASAP without any halting. + Anything pending in TRPM has been accepted and the CPU + should already be the right state to receive it. */ + if (TRPMHasTrap(pVCpu)) + rc = VINF_EM_RESCHEDULE; + /* MWAIT has a special extension where it's woken up when + an interrupt is pending even when IF=0. */ + else if ( (pVCpu->em.s.MWait.fWait & (EMMWAIT_FLAG_ACTIVE | EMMWAIT_FLAG_BREAKIRQIF0)) + == (EMMWAIT_FLAG_ACTIVE | EMMWAIT_FLAG_BREAKIRQIF0)) + { + rc = VMR3WaitHalted(pVM, pVCpu, false /*fIgnoreInterrupts*/); + if (rc == VINF_SUCCESS) + { + if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_UPDATE_APIC)) + APICUpdatePendingInterrupts(pVCpu); + + if (VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC + | VMCPU_FF_INTERRUPT_NESTED_GUEST + | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI | VMCPU_FF_UNHALT)) + { + Log(("EMR3ExecuteVM: Triggering reschedule on pending IRQ after MWAIT\n")); + rc = VINF_EM_RESCHEDULE; + } + } + } + else + { + rc = VMR3WaitHalted(pVM, pVCpu, !(CPUMGetGuestEFlags(pVCpu) & X86_EFL_IF)); + /* We're only interested in NMI/SMIs here which have their own FFs, so we don't need to + check VMCPU_FF_UPDATE_APIC here. */ + if ( rc == VINF_SUCCESS + && VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI | VMCPU_FF_UNHALT)) + { + Log(("EMR3ExecuteVM: Triggering reschedule on pending NMI/SMI/UNHALT after HLT\n")); + rc = VINF_EM_RESCHEDULE; + } + } + + STAM_REL_PROFILE_STOP(&pVCpu->em.s.StatHalted, y); + break; + } + + /* + * Suspended - return to VM.cpp. + */ + case EMSTATE_SUSPENDED: + TMR3NotifySuspend(pVM, pVCpu); + STAM_REL_PROFILE_ADV_STOP(&pVCpu->em.s.StatTotal, x); + Log(("EMR3ExecuteVM: actually returns %Rrc (state %s / %s)\n", rc, emR3GetStateName(pVCpu->em.s.enmState), emR3GetStateName(enmOldState))); + return VINF_EM_SUSPEND; + + /* + * Debugging in the guest. + */ + case EMSTATE_DEBUG_GUEST_RAW: + case EMSTATE_DEBUG_GUEST_HM: + case EMSTATE_DEBUG_GUEST_NEM: + case EMSTATE_DEBUG_GUEST_IEM: + case EMSTATE_DEBUG_GUEST_REM: + TMR3NotifySuspend(pVM, pVCpu); + rc = VBOXSTRICTRC_TODO(emR3Debug(pVM, pVCpu, rc)); + TMR3NotifyResume(pVM, pVCpu); + Log2(("EMR3ExecuteVM: emR3Debug -> %Rrc (state %d)\n", rc, pVCpu->em.s.enmState)); + break; + + /* + * Debugging in the hypervisor. + */ + case EMSTATE_DEBUG_HYPER: + { + TMR3NotifySuspend(pVM, pVCpu); + STAM_REL_PROFILE_ADV_STOP(&pVCpu->em.s.StatTotal, x); + + rc = VBOXSTRICTRC_TODO(emR3Debug(pVM, pVCpu, rc)); + Log2(("EMR3ExecuteVM: emR3Debug -> %Rrc (state %d)\n", rc, pVCpu->em.s.enmState)); + if (rc != VINF_SUCCESS) + { + if (rc == VINF_EM_OFF || rc == VINF_EM_TERMINATE) + pVCpu->em.s.enmState = EMSTATE_TERMINATING; + else + { + /* switch to guru meditation mode */ + pVCpu->em.s.enmState = EMSTATE_GURU_MEDITATION; + VMR3SetGuruMeditation(pVM); /* This notifies the other EMTs. */ + VMMR3FatalDump(pVM, pVCpu, rc); + } + Log(("EMR3ExecuteVM: actually returns %Rrc (state %s / %s)\n", rc, emR3GetStateName(pVCpu->em.s.enmState), emR3GetStateName(enmOldState))); + return rc; + } + + STAM_REL_PROFILE_ADV_START(&pVCpu->em.s.StatTotal, x); + TMR3NotifyResume(pVM, pVCpu); + break; + } + + /* + * Guru meditation takes place in the debugger. + */ + case EMSTATE_GURU_MEDITATION: + { + TMR3NotifySuspend(pVM, pVCpu); + VMR3SetGuruMeditation(pVM); /* This notifies the other EMTs. */ + VMMR3FatalDump(pVM, pVCpu, rc); + emR3Debug(pVM, pVCpu, rc); + STAM_REL_PROFILE_ADV_STOP(&pVCpu->em.s.StatTotal, x); + Log(("EMR3ExecuteVM: actually returns %Rrc (state %s / %s)\n", rc, emR3GetStateName(pVCpu->em.s.enmState), emR3GetStateName(enmOldState))); + return rc; + } + + /* + * The states we don't expect here. + */ + case EMSTATE_NONE: + case EMSTATE_TERMINATING: + default: + AssertMsgFailed(("EMR3ExecuteVM: Invalid state %d!\n", pVCpu->em.s.enmState)); + pVCpu->em.s.enmState = EMSTATE_GURU_MEDITATION; + TMR3NotifySuspend(pVM, pVCpu); + STAM_REL_PROFILE_ADV_STOP(&pVCpu->em.s.StatTotal, x); + Log(("EMR3ExecuteVM: actually returns %Rrc (state %s / %s)\n", rc, emR3GetStateName(pVCpu->em.s.enmState), emR3GetStateName(enmOldState))); + return VERR_EM_INTERNAL_ERROR; + } + } /* The Outer Main Loop */ + } + else + { + /* + * Fatal error. + */ + Log(("EMR3ExecuteVM: returns %Rrc because of longjmp / fatal error; (state %s / %s)\n", rc, emR3GetStateName(pVCpu->em.s.enmState), emR3GetStateName(pVCpu->em.s.enmPrevState))); + TMR3NotifySuspend(pVM, pVCpu); + VMR3SetGuruMeditation(pVM); /* This notifies the other EMTs. */ + VMMR3FatalDump(pVM, pVCpu, rc); + emR3Debug(pVM, pVCpu, rc); + STAM_REL_PROFILE_ADV_STOP(&pVCpu->em.s.StatTotal, x); + /** @todo change the VM state! */ + return rc; + } + + /* not reached */ +} + +/** + * Notify EM of a state change (used by FTM) + * + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) EMR3NotifySuspend(PVM pVM) +{ + PVMCPU pVCpu = VMMGetCpu(pVM); + + TMR3NotifySuspend(pVM, pVCpu); /* Stop the virtual time. */ + pVCpu->em.s.enmPrevState = pVCpu->em.s.enmState; + pVCpu->em.s.enmState = EMSTATE_SUSPENDED; + return VINF_SUCCESS; +} + +/** + * Notify EM of a state change (used by FTM) + * + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) EMR3NotifyResume(PVM pVM) +{ + PVMCPU pVCpu = VMMGetCpu(pVM); + EMSTATE enmCurState = pVCpu->em.s.enmState; + + TMR3NotifyResume(pVM, pVCpu); /* Resume the virtual time. */ + pVCpu->em.s.enmState = pVCpu->em.s.enmPrevState; + pVCpu->em.s.enmPrevState = enmCurState; + return VINF_SUCCESS; +} diff --git a/src/VBox/VMM/VMMR3/EMHM.cpp b/src/VBox/VMM/VMMR3/EMHM.cpp new file mode 100644 index 00000000..2b03d20b --- /dev/null +++ b/src/VBox/VMM/VMMR3/EMHM.cpp @@ -0,0 +1,510 @@ +/* $Id: EMHM.cpp $ */ +/** @file + * EM - Execution Monitor / Manager - hardware virtualization + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_EM +#define VMCPU_INCL_CPUM_GST_CTX +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef VBOX_WITH_REM +# include +#endif +#include +#include +#include +#include +#include +#include +#include +#include "EMInternal.h" +#include +#include +#include +#include +#include +#include +#include +#include "VMMTracing.h" + +#include + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static int emR3HmHandleRC(PVM pVM, PVMCPU pVCpu, int rc); +DECLINLINE(int) emR3HmExecuteInstruction(PVM pVM, PVMCPU pVCpu, const char *pszPrefix, int rcGC = VINF_SUCCESS); +static int emR3HmExecuteIOInstruction(PVM pVM, PVMCPU pVCpu); +static int emR3HmForcedActions(PVM pVM, PVMCPU pVCpu); + +#define EMHANDLERC_WITH_HM +#define emR3ExecuteInstruction emR3HmExecuteInstruction +#define emR3ExecuteIOInstruction emR3HmExecuteIOInstruction +#include "EMHandleRCTmpl.h" + + +/** + * Executes instruction in HM mode if we can. + * + * This is somewhat comparable to REMR3EmulateInstruction. + * + * @returns VBox strict status code. + * @retval VINF_EM_DBG_STEPPED on success. + * @retval VERR_EM_CANNOT_EXEC_GUEST if we cannot execute guest instructions in + * HM right now. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure for the calling EMT. + * @param fFlags Combinations of EM_ONE_INS_FLAGS_XXX. + * @thread EMT. + */ +VMMR3_INT_DECL(VBOXSTRICTRC) EMR3HmSingleInstruction(PVM pVM, PVMCPU pVCpu, uint32_t fFlags) +{ + Assert(!(fFlags & ~EM_ONE_INS_FLAGS_MASK)); + + if (!HMCanExecuteGuest(pVCpu, &pVCpu->cpum.GstCtx)) + return VINF_EM_RESCHEDULE; + + uint64_t const uOldRip = pVCpu->cpum.GstCtx.rip; + for (;;) + { + /* + * Service necessary FFs before going into HM. + */ + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_HIGH_PRIORITY_PRE_RAW_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HIGH_PRIORITY_PRE_RAW_MASK)) + { + VBOXSTRICTRC rcStrict = emR3HmForcedActions(pVM, pVCpu); + if (rcStrict != VINF_SUCCESS) + { + Log(("EMR3HmSingleInstruction: FFs before -> %Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); + return rcStrict; + } + } + + /* + * Go execute it. + */ + bool fOld = HMSetSingleInstruction(pVM, pVCpu, true); + VBOXSTRICTRC rcStrict = VMMR3HmRunGC(pVM, pVCpu); + HMSetSingleInstruction(pVM, pVCpu, fOld); + LogFlow(("EMR3HmSingleInstruction: %Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); + + /* + * Handle high priority FFs and informational status codes. We don't do + * normal FF processing the caller or the next call can deal with them. + */ + VMCPU_FF_CLEAR_MASK(pVCpu, VMCPU_FF_RESUME_GUEST_MASK); + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_HIGH_PRIORITY_POST_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HIGH_PRIORITY_POST_MASK)) + { + rcStrict = emR3HighPriorityPostForcedActions(pVM, pVCpu, rcStrict); + LogFlow(("EMR3HmSingleInstruction: FFs after -> %Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); + } + + if (rcStrict != VINF_SUCCESS && (rcStrict < VINF_EM_FIRST || rcStrict > VINF_EM_LAST)) + { + rcStrict = emR3HmHandleRC(pVM, pVCpu, VBOXSTRICTRC_TODO(rcStrict)); + Log(("EMR3HmSingleInstruction: emR3HmHandleRC -> %Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); + } + + /* + * Done? + */ + if ( (rcStrict != VINF_SUCCESS && rcStrict != VINF_EM_DBG_STEPPED) + || !(fFlags & EM_ONE_INS_FLAGS_RIP_CHANGE) + || pVCpu->cpum.GstCtx.rip != uOldRip) + { + if (rcStrict == VINF_SUCCESS && pVCpu->cpum.GstCtx.rip != uOldRip) + rcStrict = VINF_EM_DBG_STEPPED; + Log(("EMR3HmSingleInstruction: returns %Rrc (rip %llx -> %llx)\n", VBOXSTRICTRC_VAL(rcStrict), uOldRip, pVCpu->cpum.GstCtx.rip)); + CPUM_IMPORT_EXTRN_RET(pVCpu, ~CPUMCTX_EXTRN_KEEPER_MASK); + return rcStrict; + } + } +} + + +/** + * Executes one (or perhaps a few more) instruction(s). + * + * @returns VBox status code suitable for EM. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param rcRC Return code from RC. + * @param pszPrefix Disassembly prefix. If not NULL we'll disassemble the + * instruction and prefix the log output with this text. + */ +#if defined(LOG_ENABLED) || defined(DOXYGEN_RUNNING) +static int emR3HmExecuteInstructionWorker(PVM pVM, PVMCPU pVCpu, int rcRC, const char *pszPrefix) +#else +static int emR3HmExecuteInstructionWorker(PVM pVM, PVMCPU pVCpu, int rcRC) +#endif +{ + NOREF(rcRC); + +#ifdef LOG_ENABLED + /* + * Log it. + */ + Log(("EMINS: %04x:%RGv RSP=%RGv\n", pVCpu->cpum.GstCtx.cs.Sel, (RTGCPTR)pVCpu->cpum.GstCtx.rip, (RTGCPTR)pVCpu->cpum.GstCtx.rsp)); + if (pszPrefix) + { + DBGFR3_INFO_LOG(pVM, pVCpu, "cpumguest", pszPrefix); + DBGFR3_DISAS_INSTR_CUR_LOG(pVCpu, pszPrefix); + } +#endif + + /* + * Use IEM and fallback on REM if the functionality is missing. + * Once IEM gets mature enough, nothing should ever fall back. + */ + STAM_PROFILE_START(&pVCpu->em.s.StatIEMEmu, a); + VBOXSTRICTRC rcStrict; + uint32_t idxContinueExitRec = pVCpu->em.s.idxContinueExitRec; + RT_UNTRUSTED_NONVOLATILE_COPY_FENCE(); + if (idxContinueExitRec >= RT_ELEMENTS(pVCpu->em.s.aExitRecords)) + { + CPUM_IMPORT_EXTRN_RET(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK); + rcStrict = VBOXSTRICTRC_TODO(IEMExecOne(pVCpu)); + } + else + { + RT_UNTRUSTED_VALIDATED_FENCE(); + rcStrict = EMHistoryExec(pVCpu, &pVCpu->em.s.aExitRecords[idxContinueExitRec], 0); + LogFlow(("emR3HmExecuteInstruction: %Rrc (EMHistoryExec)\n", VBOXSTRICTRC_VAL(rcStrict))); + } + STAM_PROFILE_STOP(&pVCpu->em.s.StatIEMEmu, a); + + if ( rcStrict == VERR_IEM_ASPECT_NOT_IMPLEMENTED + || rcStrict == VERR_IEM_INSTR_NOT_IMPLEMENTED) + { +#ifdef VBOX_WITH_REM + STAM_PROFILE_START(&pVCpu->em.s.StatREMEmu, b); + EMRemLock(pVM); + /* Flush the recompiler TLB if the VCPU has changed. */ + if (pVM->em.s.idLastRemCpu != pVCpu->idCpu) + CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_ALL); + pVM->em.s.idLastRemCpu = pVCpu->idCpu; + + rcStrict = REMR3EmulateInstruction(pVM, pVCpu); + EMRemUnlock(pVM); + STAM_PROFILE_STOP(&pVCpu->em.s.StatREMEmu, b); +#else /* !VBOX_WITH_REM */ + NOREF(pVM); +#endif /* !VBOX_WITH_REM */ + } + + return VBOXSTRICTRC_TODO(rcStrict); +} + + +/** + * Executes one (or perhaps a few more) instruction(s). + * This is just a wrapper for discarding pszPrefix in non-logging builds. + * + * @returns VBox status code suitable for EM. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param pszPrefix Disassembly prefix. If not NULL we'll disassemble the + * instruction and prefix the log output with this text. + * @param rcGC GC return code + */ +DECLINLINE(int) emR3HmExecuteInstruction(PVM pVM, PVMCPU pVCpu, const char *pszPrefix, int rcGC) +{ +#ifdef LOG_ENABLED + return emR3HmExecuteInstructionWorker(pVM, pVCpu, rcGC, pszPrefix); +#else + RT_NOREF_PV(pszPrefix); + return emR3HmExecuteInstructionWorker(pVM, pVCpu, rcGC); +#endif +} + + +/** + * Executes one (or perhaps a few more) IO instruction(s). + * + * @returns VBox status code suitable for EM. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +static int emR3HmExecuteIOInstruction(PVM pVM, PVMCPU pVCpu) +{ + RT_NOREF(pVM); + STAM_PROFILE_START(&pVCpu->em.s.StatIOEmu, a); + + VBOXSTRICTRC rcStrict; + uint32_t idxContinueExitRec = pVCpu->em.s.idxContinueExitRec; + RT_UNTRUSTED_NONVOLATILE_COPY_FENCE(); + if (idxContinueExitRec >= RT_ELEMENTS(pVCpu->em.s.aExitRecords)) + { + /* + * Hand it over to the interpreter. + */ + CPUM_IMPORT_EXTRN_RET(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK); + rcStrict = IEMExecOne(pVCpu); + LogFlow(("emR3HmExecuteIOInstruction: %Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); + } + else + { + RT_UNTRUSTED_VALIDATED_FENCE(); + CPUM_IMPORT_EXTRN_RET(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK); + rcStrict = EMHistoryExec(pVCpu, &pVCpu->em.s.aExitRecords[idxContinueExitRec], 0); + LogFlow(("emR3HmExecuteIOInstruction: %Rrc (EMHistoryExec)\n", VBOXSTRICTRC_VAL(rcStrict))); + STAM_COUNTER_INC(&pVCpu->em.s.CTX_SUFF(pStats)->StatIoRestarted); + } + + STAM_COUNTER_INC(&pVCpu->em.s.CTX_SUFF(pStats)->StatIoIem); + STAM_PROFILE_STOP(&pVCpu->em.s.StatIOEmu, a); + return VBOXSTRICTRC_TODO(rcStrict); +} + + +/** + * Process HM specific forced actions. + * + * This function is called when any FFs in the VM_FF_HIGH_PRIORITY_PRE_RAW_MASK + * or/and VMCPU_FF_HIGH_PRIORITY_PRE_RAW_MASK are pending. + * + * @returns VBox status code. May return VINF_EM_NO_MEMORY but none of the other + * EM statuses. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +static int emR3HmForcedActions(PVM pVM, PVMCPU pVCpu) +{ + /* + * Sync page directory. + */ + if (VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)) + { + CPUM_IMPORT_EXTRN_RET(pVCpu, CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_CR3 | CPUMCTX_EXTRN_CR4); + Assert(pVCpu->em.s.enmState != EMSTATE_WAIT_SIPI); + int rc = PGMSyncCR3(pVCpu, pVCpu->cpum.GstCtx.cr0, pVCpu->cpum.GstCtx.cr3, pVCpu->cpum.GstCtx.cr4, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)); + if (RT_FAILURE(rc)) + return rc; + +#ifdef VBOX_WITH_RAW_MODE + Assert(!VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_SELM_SYNC_GDT | VMCPU_FF_SELM_SYNC_LDT)); +#endif + + /* Prefetch pages for EIP and ESP. */ + /** @todo This is rather expensive. Should investigate if it really helps at all. */ + /** @todo this should be skipped! */ + CPUM_IMPORT_EXTRN_RET(pVCpu, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_SS); + rc = PGMPrefetchPage(pVCpu, SELMToFlat(pVM, DISSELREG_CS, CPUMCTX2CORE(&pVCpu->cpum.GstCtx), pVCpu->cpum.GstCtx.rip)); + if (rc == VINF_SUCCESS) + rc = PGMPrefetchPage(pVCpu, SELMToFlat(pVM, DISSELREG_SS, CPUMCTX2CORE(&pVCpu->cpum.GstCtx), pVCpu->cpum.GstCtx.rsp)); + if (rc != VINF_SUCCESS) + { + if (rc != VINF_PGM_SYNC_CR3) + { + AssertLogRelMsgReturn(RT_FAILURE(rc), ("%Rrc\n", rc), VERR_IPE_UNEXPECTED_INFO_STATUS); + return rc; + } + rc = PGMSyncCR3(pVCpu, pVCpu->cpum.GstCtx.cr0, pVCpu->cpum.GstCtx.cr3, pVCpu->cpum.GstCtx.cr4, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)); + if (RT_FAILURE(rc)) + return rc; + } + /** @todo maybe prefetch the supervisor stack page as well */ +#ifdef VBOX_WITH_RAW_MODE + Assert(!VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_SELM_SYNC_GDT | VMCPU_FF_SELM_SYNC_LDT)); +#endif + } + + /* + * Allocate handy pages (just in case the above actions have consumed some pages). + */ + if (VM_FF_IS_PENDING_EXCEPT(pVM, VM_FF_PGM_NEED_HANDY_PAGES, VM_FF_PGM_NO_MEMORY)) + { + int rc = PGMR3PhysAllocateHandyPages(pVM); + if (RT_FAILURE(rc)) + return rc; + } + + /* + * Check whether we're out of memory now. + * + * This may stem from some of the above actions or operations that has been executed + * since we ran FFs. The allocate handy pages must for instance always be followed by + * this check. + */ + if (VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)) + return VINF_EM_NO_MEMORY; + + return VINF_SUCCESS; +} + + +/** + * Executes hardware accelerated raw code. (Intel VT-x & AMD-V) + * + * This function contains the raw-mode version of the inner + * execution loop (the outer loop being in EMR3ExecuteVM()). + * + * @returns VBox status code. The most important ones are: VINF_EM_RESCHEDULE, VINF_EM_RESCHEDULE_RAW, + * VINF_EM_RESCHEDULE_REM, VINF_EM_SUSPEND, VINF_EM_RESET and VINF_EM_TERMINATE. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param pfFFDone Where to store an indicator telling whether or not + * FFs were done before returning. + */ +int emR3HmExecute(PVM pVM, PVMCPU pVCpu, bool *pfFFDone) +{ + int rc = VERR_IPE_UNINITIALIZED_STATUS; + + LogFlow(("emR3HmExecute%d: (cs:eip=%04x:%RGv)\n", pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, (RTGCPTR)pVCpu->cpum.GstCtx.rip)); + *pfFFDone = false; + + STAM_COUNTER_INC(&pVCpu->em.s.StatHMExecuteCalled); + + /* + * Spin till we get a forced action which returns anything but VINF_SUCCESS. + */ + for (;;) + { + STAM_PROFILE_ADV_START(&pVCpu->em.s.StatHMEntry, a); + + /* Check if a forced reschedule is pending. */ + if (HMR3IsRescheduleRequired(pVM, &pVCpu->cpum.GstCtx)) + { + rc = VINF_EM_RESCHEDULE; + break; + } + + /* + * Process high priority pre-execution raw-mode FFs. + */ +#ifdef VBOX_WITH_RAW_MODE + Assert(!VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_SELM_SYNC_TSS | VMCPU_FF_SELM_SYNC_GDT | VMCPU_FF_SELM_SYNC_LDT)); +#endif + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_HIGH_PRIORITY_PRE_RAW_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HIGH_PRIORITY_PRE_RAW_MASK)) + { + rc = emR3HmForcedActions(pVM, pVCpu); + if (rc != VINF_SUCCESS) + break; + } + +#ifdef LOG_ENABLED + /* + * Log important stuff before entering GC. + */ + if (TRPMHasTrap(pVCpu)) + Log(("CPU%d: Pending hardware interrupt=0x%x cs:rip=%04X:%RGv\n", pVCpu->idCpu, TRPMGetTrapNo(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, (RTGCPTR)pVCpu->cpum.GstCtx.rip)); + + uint32_t cpl = CPUMGetGuestCPL(pVCpu); + if (pVM->cCpus == 1) + { + if (pVCpu->cpum.GstCtx.eflags.Bits.u1VM) + Log(("HWV86: %08X IF=%d\n", pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.eflags.Bits.u1IF)); + else if (CPUMIsGuestIn64BitCodeEx(&pVCpu->cpum.GstCtx)) + Log(("HWR%d: %04X:%RGv ESP=%RGv IF=%d IOPL=%d CR0=%x CR4=%x EFER=%x\n", cpl, pVCpu->cpum.GstCtx.cs.Sel, (RTGCPTR)pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.Bits.u1IF, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, (uint32_t)pVCpu->cpum.GstCtx.cr0, (uint32_t)pVCpu->cpum.GstCtx.cr4, (uint32_t)pVCpu->cpum.GstCtx.msrEFER)); + else + Log(("HWR%d: %04X:%08X ESP=%08X IF=%d IOPL=%d CR0=%x CR4=%x EFER=%x\n", cpl, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.eflags.Bits.u1IF, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, (uint32_t)pVCpu->cpum.GstCtx.cr0, (uint32_t)pVCpu->cpum.GstCtx.cr4, (uint32_t)pVCpu->cpum.GstCtx.msrEFER)); + } + else + { + if (pVCpu->cpum.GstCtx.eflags.Bits.u1VM) + Log(("HWV86-CPU%d: %08X IF=%d\n", pVCpu->idCpu, pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.eflags.Bits.u1IF)); + else if (CPUMIsGuestIn64BitCodeEx(&pVCpu->cpum.GstCtx)) + Log(("HWR%d-CPU%d: %04X:%RGv ESP=%RGv IF=%d IOPL=%d CR0=%x CR4=%x EFER=%x\n", cpl, pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, (RTGCPTR)pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.Bits.u1IF, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, (uint32_t)pVCpu->cpum.GstCtx.cr0, (uint32_t)pVCpu->cpum.GstCtx.cr4, (uint32_t)pVCpu->cpum.GstCtx.msrEFER)); + else + Log(("HWR%d-CPU%d: %04X:%08X ESP=%08X IF=%d IOPL=%d CR0=%x CR4=%x EFER=%x\n", cpl, pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.eflags.Bits.u1IF, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, (uint32_t)pVCpu->cpum.GstCtx.cr0, (uint32_t)pVCpu->cpum.GstCtx.cr4, (uint32_t)pVCpu->cpum.GstCtx.msrEFER)); + } +#endif /* LOG_ENABLED */ + + /* + * Execute the code. + */ + STAM_PROFILE_ADV_STOP(&pVCpu->em.s.StatHMEntry, a); + + if (RT_LIKELY(emR3IsExecutionAllowed(pVM, pVCpu))) + { + STAM_PROFILE_START(&pVCpu->em.s.StatHMExec, x); + rc = VMMR3HmRunGC(pVM, pVCpu); + STAM_PROFILE_STOP(&pVCpu->em.s.StatHMExec, x); + } + else + { + /* Give up this time slice; virtual time continues */ + STAM_REL_PROFILE_ADV_START(&pVCpu->em.s.StatCapped, u); + RTThreadSleep(5); + STAM_REL_PROFILE_ADV_STOP(&pVCpu->em.s.StatCapped, u); + rc = VINF_SUCCESS; + } + + + /* + * Deal with high priority post execution FFs before doing anything else. + */ + VMCPU_FF_CLEAR_MASK(pVCpu, VMCPU_FF_RESUME_GUEST_MASK); + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_HIGH_PRIORITY_POST_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HIGH_PRIORITY_POST_MASK)) + rc = VBOXSTRICTRC_TODO(emR3HighPriorityPostForcedActions(pVM, pVCpu, rc)); + + /* + * Process the returned status code. + */ + if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST) + break; + + rc = emR3HmHandleRC(pVM, pVCpu, rc); + if (rc != VINF_SUCCESS) + break; + + /* + * Check and execute forced actions. + */ +#ifdef VBOX_HIGH_RES_TIMERS_HACK + TMTimerPollVoid(pVM, pVCpu); +#endif + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_ALL_MASK)) + { + rc = emR3ForcedActions(pVM, pVCpu, rc); + VBOXVMM_EM_FF_ALL_RET(pVCpu, rc); + if ( rc != VINF_SUCCESS + && rc != VINF_EM_RESCHEDULE_HM) + { + *pfFFDone = true; + break; + } + } + } + + /* + * Return to outer loop. + */ +#if defined(LOG_ENABLED) && defined(DEBUG) + RTLogFlush(NULL); +#endif + return rc; +} + diff --git a/src/VBox/VMM/VMMR3/EMR3Dbg.cpp b/src/VBox/VMM/VMMR3/EMR3Dbg.cpp new file mode 100644 index 00000000..370387bd --- /dev/null +++ b/src/VBox/VMM/VMMR3/EMR3Dbg.cpp @@ -0,0 +1,338 @@ +/* $Id: EMR3Dbg.cpp $ */ +/** @file + * EM - Execution Monitor / Manager, Debugger Related Bits. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_EM +#include +#include +#include +#include +#include "EMInternal.h" +#include +#include +#include + + +/** @callback_method_impl{FNDBGCCMD, + * Implements the '.alliem' command. } + */ +static DECLCALLBACK(int) enmR3DbgCmdAllIem(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PUVM pUVM, PCDBGCVAR paArgs, unsigned cArgs) +{ + int rc; + bool f; + + if (cArgs == 0) + { + rc = EMR3QueryExecutionPolicy(pUVM, EMEXECPOLICY_IEM_ALL, &f); + if (RT_FAILURE(rc)) + return DBGCCmdHlpFailRc(pCmdHlp, pCmd, rc, "EMR3QueryExecutionPolicy(,EMEXECPOLICY_IEM_ALL,"); + DBGCCmdHlpPrintf(pCmdHlp, f ? "alliem: enabled\n" : "alliem: disabled\n"); + } + else + { + rc = DBGCCmdHlpVarToBool(pCmdHlp, &paArgs[0], &f); + if (RT_FAILURE(rc)) + return DBGCCmdHlpFailRc(pCmdHlp, pCmd, rc, "DBGCCmdHlpVarToBool"); + rc = EMR3SetExecutionPolicy(pUVM, EMEXECPOLICY_IEM_ALL, f); + if (RT_FAILURE(rc)) + return DBGCCmdHlpFailRc(pCmdHlp, pCmd, rc, "EMR3SetExecutionPolicy(,EMEXECPOLICY_IEM_ALL,%RTbool)", f); + } + return VINF_SUCCESS; +} + + +/** Describes a optional boolean argument. */ +static DBGCVARDESC const g_BoolArg = { 0, 1, DBGCVAR_CAT_ANY, 0, "boolean", "Boolean value." }; + +/** Commands. */ +static DBGCCMD const g_aCmds[] = +{ + { + "alliem", 0, 1, &g_BoolArg, 1, 0, enmR3DbgCmdAllIem, "[boolean]", + "Enables or disabled executing ALL code in IEM, if no arguments are given it displays the current status." + }, +}; + + +/** + * Translates EMEXITTYPE into a name. + * + * @returns Pointer to read-only name, NULL if unknown type. + * @param enmExitType The exit type to name. + */ +VMM_INT_DECL(const char *) EMR3GetExitTypeName(EMEXITTYPE enmExitType) +{ + switch (enmExitType) + { + case EMEXITTYPE_INVALID: return "invalid"; + case EMEXITTYPE_IO_PORT_READ: return "I/O port read"; + case EMEXITTYPE_IO_PORT_WRITE: return "I/O port write"; + case EMEXITTYPE_IO_PORT_STR_READ: return "I/O port string read"; + case EMEXITTYPE_IO_PORT_STR_WRITE: return "I/O port string write"; + case EMEXITTYPE_MMIO: return "MMIO access"; + case EMEXITTYPE_MMIO_READ: return "MMIO read"; + case EMEXITTYPE_MMIO_WRITE: return "MMIO write"; + case EMEXITTYPE_MSR_READ: return "MSR read"; + case EMEXITTYPE_MSR_WRITE: return "MSR write"; + case EMEXITTYPE_CPUID: return "CPUID"; + case EMEXITTYPE_RDTSC: return "RDTSC"; + case EMEXITTYPE_MOV_CRX: return "MOV CRx"; + case EMEXITTYPE_MOV_DRX: return "MOV DRx"; + + /* Raw-mode only: */ + case EMEXITTYPE_INVLPG: return "INVLPG"; + case EMEXITTYPE_LLDT: return "LLDT"; + case EMEXITTYPE_RDPMC: return "RDPMC"; + case EMEXITTYPE_CLTS: return "CLTS"; + case EMEXITTYPE_STI: return "STI"; + case EMEXITTYPE_INT: return "INT"; + case EMEXITTYPE_SYSCALL: return "SYSCALL"; + case EMEXITTYPE_SYSENTER: return "SYSENTER"; + case EMEXITTYPE_HLT: return "HLT"; + } + return NULL; +} + + +/** + * Translates flags+type into an exit name. + * + * @returns Exit name. + * @param uFlagsAndType The exit to name. + * @param pszFallback Buffer for formatting a numeric fallback. + * @param cbFallback Size of fallback buffer. + */ +static const char *emR3HistoryGetExitName(uint32_t uFlagsAndType, char *pszFallback, size_t cbFallback) +{ + const char *pszExitName; + switch (uFlagsAndType & EMEXIT_F_KIND_MASK) + { + case EMEXIT_F_KIND_EM: + pszExitName = EMR3GetExitTypeName((EMEXITTYPE)(uFlagsAndType & EMEXIT_F_TYPE_MASK)); + break; + + case EMEXIT_F_KIND_VMX: + pszExitName = HMGetVmxExitName( uFlagsAndType & EMEXIT_F_TYPE_MASK); + break; + + case EMEXIT_F_KIND_SVM: + pszExitName = HMGetSvmExitName( uFlagsAndType & EMEXIT_F_TYPE_MASK); + break; + + case EMEXIT_F_KIND_NEM: + pszExitName = NEMR3GetExitName( uFlagsAndType & EMEXIT_F_TYPE_MASK); + break; + + case EMEXIT_F_KIND_XCPT: + switch (uFlagsAndType & EMEXIT_F_TYPE_MASK) + { + case X86_XCPT_DE: return "Xcpt #DE"; + case X86_XCPT_DB: return "Xcpt #DB"; + case X86_XCPT_NMI: return "Xcpt #NMI"; + case X86_XCPT_BP: return "Xcpt #BP"; + case X86_XCPT_OF: return "Xcpt #OF"; + case X86_XCPT_BR: return "Xcpt #BR"; + case X86_XCPT_UD: return "Xcpt #UD"; + case X86_XCPT_NM: return "Xcpt #NM"; + case X86_XCPT_DF: return "Xcpt #DF"; + case X86_XCPT_CO_SEG_OVERRUN: return "Xcpt #CO_SEG_OVERRUN"; + case X86_XCPT_TS: return "Xcpt #TS"; + case X86_XCPT_NP: return "Xcpt #NP"; + case X86_XCPT_SS: return "Xcpt #SS"; + case X86_XCPT_GP: return "Xcpt #GP"; + case X86_XCPT_PF: return "Xcpt #PF"; + case X86_XCPT_MF: return "Xcpt #MF"; + case X86_XCPT_AC: return "Xcpt #AC"; + case X86_XCPT_MC: return "Xcpt #MC"; + case X86_XCPT_XF: return "Xcpt #XF"; + case X86_XCPT_VE: return "Xcpt #VE"; + case X86_XCPT_SX: return "Xcpt #SX"; + default: + pszExitName = NULL; + break; + } + break; + + default: + AssertFailed(); + pszExitName = NULL; + break; + } + if (pszExitName) + return pszExitName; + RTStrPrintf(pszFallback, cbFallback, "%#06x", uFlagsAndType & (EMEXIT_F_KIND_MASK | EMEXIT_F_TYPE_MASK)); + return pszFallback; +} + + +/** + * Displays the VM-exit history. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helper functions. + * @param pszArgs Arguments, ignored. + */ +static DECLCALLBACK(void) emR3InfoExitHistory(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + NOREF(pszArgs); + + /* + * Figure out target cpu and parse arguments. + */ + PVMCPU pVCpu = VMMGetCpu(pVM); + if (!pVCpu) + pVCpu = &pVM->aCpus[0]; + bool fReverse = true; + uint32_t cLeft = RT_ELEMENTS(pVCpu->em.s.aExitHistory); + + while (pszArgs && *pszArgs) + { + pszArgs = RTStrStripL(pszArgs); + if (!*pszArgs) + break; + if (RT_C_IS_DIGIT(*pszArgs)) + { + /* The number to dump. */ + uint32_t uValue = cLeft; + RTStrToUInt32Ex(pszArgs, (char **)&pszArgs, 0, &uValue); + if (uValue > 0) + cLeft = RT_MIN(uValue, RT_ELEMENTS(pVCpu->em.s.aExitHistory)); + } + else if (RTStrCmp(pszArgs, "reverse") == 0) + { + pszArgs += 7; + fReverse = true; + } + else if (RTStrCmp(pszArgs, "ascending") == 0) + { + pszArgs += 9; + fReverse = false; + } + else if (RTStrCmp(pszArgs, "asc") == 0) + { + pszArgs += 3; + fReverse = false; + } + else + { + const char *pszStart = pszArgs; + while (*pszArgs && !RT_C_IS_SPACE(*pszArgs)) + pszArgs++; + pHlp->pfnPrintf(pHlp, "Unknown option: %.*s\n", pszArgs - pszStart, pszArgs); + } + } + + /* + * Do the job. + */ + uint64_t idx = pVCpu->em.s.iNextExit; + if (idx == 0) + pHlp->pfnPrintf(pHlp, "CPU[%u]: VM-exit history: empty\n", pVCpu->idCpu); + else + { + /* + * Print header. + */ + pHlp->pfnPrintf(pHlp, + "CPU[%u]: VM-exit history:\n" + " Exit No.: TSC timestamp / delta RIP (Flat/*) Exit Name\n" + , pVCpu->idCpu); + + /* + * Adjust bounds if ascending order. + */ + if (!fReverse) + { + if (idx > cLeft) + idx -= cLeft; + else + { + cLeft = idx; + idx = 0; + } + } + + /* + * Print the entries. + */ + uint64_t uPrevTimestamp = 0; + do + { + if (fReverse) + idx -= 1; + PCEMEXITENTRY const pEntry = &pVCpu->em.s.aExitHistory[(uintptr_t)idx & 0xff]; + + /* Get the exit name. */ + char szExitName[16]; + const char *pszExitName = emR3HistoryGetExitName(pEntry->uFlagsAndType, szExitName, sizeof(szExitName)); + + /* Calc delta (negative if reverse order, positive ascending). */ + int64_t offDelta = uPrevTimestamp != 0 && pEntry->uTimestamp != 0 ? pEntry->uTimestamp - uPrevTimestamp : 0; + uPrevTimestamp = pEntry->uTimestamp; + + char szPC[32]; + if (!(pEntry->uFlagsAndType & (EMEXIT_F_CS_EIP | EMEXIT_F_UNFLATTENED_PC))) + RTStrPrintf(szPC, sizeof(szPC), "%016RX64 ", pEntry->uFlatPC); + else if (pEntry->uFlagsAndType & EMEXIT_F_UNFLATTENED_PC) + RTStrPrintf(szPC, sizeof(szPC), "%016RX64*", pEntry->uFlatPC); + else + RTStrPrintf(szPC, sizeof(szPC), "%04x:%08RX32* ", (uint32_t)(pEntry->uFlatPC >> 32), (uint32_t)pEntry->uFlatPC); + + /* Do the printing. */ + if (pEntry->idxSlot == UINT32_MAX) + pHlp->pfnPrintf(pHlp, " %10RU64: %#018RX64/%+-9RI64 %s %#07x %s\n", + idx, pEntry->uTimestamp, offDelta, szPC, pEntry->uFlagsAndType, pszExitName); + else + { + /** @todo more on this later */ + pHlp->pfnPrintf(pHlp, " %10RU64: %#018RX64/%+-9RI64 %s %#07x %s slot=%#x\n", + idx, pEntry->uTimestamp, offDelta, szPC, pEntry->uFlagsAndType, pszExitName, pEntry->idxSlot); + } + + /* Advance if ascending. */ + if (!fReverse) + idx += 1; + } while (--cLeft > 0 && idx > 0); + } +} + + +int emR3InitDbg(PVM pVM) +{ + /* + * Register info dumpers. + */ + const char *pszExitsDesc = "Dumps the VM-exit history. Arguments: Number of entries; 'asc', 'ascending' or 'reverse'."; + int rc = DBGFR3InfoRegisterInternalEx(pVM, "exits", pszExitsDesc, emR3InfoExitHistory, DBGFINFO_FLAGS_ALL_EMTS); + AssertLogRelRCReturn(rc, rc); + rc = DBGFR3InfoRegisterInternalEx(pVM, "exithistory", pszExitsDesc, emR3InfoExitHistory, DBGFINFO_FLAGS_ALL_EMTS); + AssertLogRelRCReturn(rc, rc); + +#ifdef VBOX_WITH_DEBUGGER + /* + * Register debugger commands. + */ + rc = DBGCRegisterCommands(&g_aCmds[0], RT_ELEMENTS(g_aCmds)); + AssertLogRelRCReturn(rc, rc); +#endif + + return VINF_SUCCESS; +} + diff --git a/src/VBox/VMM/VMMR3/EMR3Nem.cpp b/src/VBox/VMM/VMMR3/EMR3Nem.cpp new file mode 100644 index 00000000..85817fdd --- /dev/null +++ b/src/VBox/VMM/VMMR3/EMR3Nem.cpp @@ -0,0 +1,501 @@ +/* $Id: EMR3Nem.cpp $ */ +/** @file + * EM - Execution Monitor / Manager - NEM interface. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_EM +#define VMCPU_INCL_CPUM_GST_CTX +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef VBOX_WITH_REM +# include +#endif +#include +#include +#include +#include +#include +#include +#include "EMInternal.h" +#include +#include +#include +#include +#include +#include +#include +#include "VMMTracing.h" + +#include + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static int emR3NemHandleRC(PVM pVM, PVMCPU pVCpu, int rc); +DECLINLINE(int) emR3NemExecuteInstruction(PVM pVM, PVMCPU pVCpu, const char *pszPrefix, int rcGC = VINF_SUCCESS); +static int emR3NemExecuteIOInstruction(PVM pVM, PVMCPU pVCpu); +static int emR3NemForcedActions(PVM pVM, PVMCPU pVCpu); + +#define EMHANDLERC_WITH_NEM +#define emR3ExecuteInstruction emR3NemExecuteInstruction +#define emR3ExecuteIOInstruction emR3NemExecuteIOInstruction +#include "EMHandleRCTmpl.h" + + +/** + * Executes instruction in NEM mode if we can. + * + * This is somewhat comparable to REMR3EmulateInstruction. + * + * @returns VBox strict status code. + * @retval VINF_EM_DBG_STEPPED on success. + * @retval VERR_EM_CANNOT_EXEC_GUEST if we cannot execute guest instructions in + * HM right now. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure for the calling EMT. + * @param fFlags Combinations of EM_ONE_INS_FLAGS_XXX. + * @thread EMT. + */ +VBOXSTRICTRC emR3NemSingleInstruction(PVM pVM, PVMCPU pVCpu, uint32_t fFlags) +{ + Assert(!(fFlags & ~EM_ONE_INS_FLAGS_MASK)); + + if (!NEMR3CanExecuteGuest(pVM, pVCpu)) + return VINF_EM_RESCHEDULE; + + uint64_t const uOldRip = pVCpu->cpum.GstCtx.rip; + for (;;) + { + /* + * Service necessary FFs before going into HM. + */ + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_HIGH_PRIORITY_PRE_RAW_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HIGH_PRIORITY_PRE_RAW_MASK)) + { + VBOXSTRICTRC rcStrict = emR3NemForcedActions(pVM, pVCpu); + if (rcStrict != VINF_SUCCESS) + { + Log(("emR3NemSingleInstruction: FFs before -> %Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); + return rcStrict; + } + } + + /* + * Go execute it. + */ + bool fOld = NEMR3SetSingleInstruction(pVM, pVCpu, true); + VBOXSTRICTRC rcStrict = NEMR3RunGC(pVM, pVCpu); + NEMR3SetSingleInstruction(pVM, pVCpu, fOld); + LogFlow(("emR3NemSingleInstruction: %Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); + + /* + * Handle high priority FFs and informational status codes. We don't do + * normal FF processing the caller or the next call can deal with them. + */ + VMCPU_FF_CLEAR_MASK(pVCpu, VMCPU_FF_RESUME_GUEST_MASK); + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_HIGH_PRIORITY_POST_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HIGH_PRIORITY_POST_MASK)) + { + rcStrict = emR3HighPriorityPostForcedActions(pVM, pVCpu, rcStrict); + LogFlow(("emR3NemSingleInstruction: FFs after -> %Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); + } + + if (rcStrict != VINF_SUCCESS && (rcStrict < VINF_EM_FIRST || rcStrict > VINF_EM_LAST)) + { + rcStrict = emR3NemHandleRC(pVM, pVCpu, VBOXSTRICTRC_TODO(rcStrict)); + Log(("emR3NemSingleInstruction: emR3NemHandleRC -> %Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); + } + + /* + * Done? + */ + CPUM_ASSERT_NOT_EXTRN(pVCpu, CPUMCTX_EXTRN_RIP); + if ( (rcStrict != VINF_SUCCESS && rcStrict != VINF_EM_DBG_STEPPED) + || !(fFlags & EM_ONE_INS_FLAGS_RIP_CHANGE) + || pVCpu->cpum.GstCtx.rip != uOldRip) + { + if (rcStrict == VINF_SUCCESS && pVCpu->cpum.GstCtx.rip != uOldRip) + rcStrict = VINF_EM_DBG_STEPPED; + Log(("emR3NemSingleInstruction: returns %Rrc (rip %llx -> %llx)\n", + VBOXSTRICTRC_VAL(rcStrict), uOldRip, pVCpu->cpum.GstCtx.rip)); + CPUM_IMPORT_EXTRN_RET(pVCpu, ~CPUMCTX_EXTRN_KEEPER_MASK); + return rcStrict; + } + } +} + + +/** + * Executes one (or perhaps a few more) instruction(s). + * + * @returns VBox status code suitable for EM. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param rcRC Return code from RC. + * @param pszPrefix Disassembly prefix. If not NULL we'll disassemble the + * instruction and prefix the log output with this text. + */ +#if defined(LOG_ENABLED) || defined(DOXYGEN_RUNNING) +static int emR3NemExecuteInstructionWorker(PVM pVM, PVMCPU pVCpu, int rcRC, const char *pszPrefix) +#else +static int emR3NemExecuteInstructionWorker(PVM pVM, PVMCPU pVCpu, int rcRC) +#endif +{ + NOREF(rcRC); + +#ifdef LOG_ENABLED + /* + * Log it. + */ + Log(("EMINS: %04x:%RGv RSP=%RGv\n", pVCpu->cpum.GstCtx.cs.Sel, (RTGCPTR)pVCpu->cpum.GstCtx.rip, (RTGCPTR)pVCpu->cpum.GstCtx.rsp)); + if (pszPrefix) + { + DBGFR3_INFO_LOG(pVM, pVCpu, "cpumguest", pszPrefix); + DBGFR3_DISAS_INSTR_CUR_LOG(pVCpu, pszPrefix); + } +#endif + + /* + * Use IEM and fallback on REM if the functionality is missing. + * Once IEM gets mature enough, nothing should ever fall back. + */ + STAM_PROFILE_START(&pVCpu->em.s.StatIEMEmu, a); + + VBOXSTRICTRC rcStrict; + uint32_t idxContinueExitRec = pVCpu->em.s.idxContinueExitRec; + RT_UNTRUSTED_NONVOLATILE_COPY_FENCE(); + if (idxContinueExitRec >= RT_ELEMENTS(pVCpu->em.s.aExitRecords)) + { + CPUM_IMPORT_EXTRN_RET(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK); + rcStrict = IEMExecOne(pVCpu); + } + else + { + RT_UNTRUSTED_VALIDATED_FENCE(); + rcStrict = EMHistoryExec(pVCpu, &pVCpu->em.s.aExitRecords[idxContinueExitRec], 0); + LogFlow(("emR3NemExecuteInstruction: %Rrc (EMHistoryExec)\n", VBOXSTRICTRC_VAL(rcStrict))); + } + + STAM_PROFILE_STOP(&pVCpu->em.s.StatIEMEmu, a); + + if ( rcStrict == VERR_IEM_ASPECT_NOT_IMPLEMENTED + || rcStrict == VERR_IEM_INSTR_NOT_IMPLEMENTED) + { +#ifdef VBOX_WITH_REM + STAM_PROFILE_START(&pVCpu->em.s.StatREMEmu, b); + CPUM_IMPORT_EXTRN_RET(pVCpu, ~CPUMCTX_EXTRN_KEEPER_MASK); + EMRemLock(pVM); + /* Flush the recompiler TLB if the VCPU has changed. */ + if (pVM->em.s.idLastRemCpu != pVCpu->idCpu) + CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_ALL); + pVM->em.s.idLastRemCpu = pVCpu->idCpu; + + rcStrict = REMR3EmulateInstruction(pVM, pVCpu); + EMRemUnlock(pVM); + STAM_PROFILE_STOP(&pVCpu->em.s.StatREMEmu, b); +#else /* !VBOX_WITH_REM */ + NOREF(pVM); +#endif /* !VBOX_WITH_REM */ + } + return VBOXSTRICTRC_TODO(rcStrict); +} + + +/** + * Executes one (or perhaps a few more) instruction(s). + * This is just a wrapper for discarding pszPrefix in non-logging builds. + * + * @returns VBox status code suitable for EM. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param pszPrefix Disassembly prefix. If not NULL we'll disassemble the + * instruction and prefix the log output with this text. + * @param rcGC GC return code + */ +DECLINLINE(int) emR3NemExecuteInstruction(PVM pVM, PVMCPU pVCpu, const char *pszPrefix, int rcGC) +{ +#ifdef LOG_ENABLED + return emR3NemExecuteInstructionWorker(pVM, pVCpu, rcGC, pszPrefix); +#else + RT_NOREF_PV(pszPrefix); + return emR3NemExecuteInstructionWorker(pVM, pVCpu, rcGC); +#endif +} + +/** + * Executes one (or perhaps a few more) IO instruction(s). + * + * @returns VBox status code suitable for EM. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +static int emR3NemExecuteIOInstruction(PVM pVM, PVMCPU pVCpu) +{ + RT_NOREF_PV(pVM); + STAM_PROFILE_START(&pVCpu->em.s.StatIOEmu, a); + + /* + * Hand it over to the interpreter. + */ + CPUM_IMPORT_EXTRN_RET(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK); + VBOXSTRICTRC rcStrict; + uint32_t idxContinueExitRec = pVCpu->em.s.idxContinueExitRec; + RT_UNTRUSTED_NONVOLATILE_COPY_FENCE(); + if (idxContinueExitRec >= RT_ELEMENTS(pVCpu->em.s.aExitRecords)) + { + rcStrict = IEMExecOne(pVCpu); + LogFlow(("emR3NemExecuteIOInstruction: %Rrc (IEMExecOne)\n", VBOXSTRICTRC_VAL(rcStrict))); + STAM_COUNTER_INC(&pVCpu->em.s.CTX_SUFF(pStats)->StatIoIem); + } + else + { + RT_UNTRUSTED_VALIDATED_FENCE(); + rcStrict = EMHistoryExec(pVCpu, &pVCpu->em.s.aExitRecords[idxContinueExitRec], 0); + LogFlow(("emR3NemExecuteIOInstruction: %Rrc (EMHistoryExec)\n", VBOXSTRICTRC_VAL(rcStrict))); + STAM_COUNTER_INC(&pVCpu->em.s.CTX_SUFF(pStats)->StatIoRestarted); + } + + STAM_PROFILE_STOP(&pVCpu->em.s.StatIOEmu, a); + return VBOXSTRICTRC_TODO(rcStrict); +} + + +/** + * Process NEM specific forced actions. + * + * This function is called when any FFs in VM_FF_HIGH_PRIORITY_PRE_RAW_MASK + * or/and VMCPU_FF_HIGH_PRIORITY_PRE_RAW_MASK are pending. + * + * @returns VBox status code. May return VINF_EM_NO_MEMORY but none of the other + * EM statuses. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +static int emR3NemForcedActions(PVM pVM, PVMCPU pVCpu) +{ +#ifdef VBOX_WITH_RAW_MODE + Assert(!VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_SELM_SYNC_TSS | VMCPU_FF_SELM_SYNC_GDT | VMCPU_FF_SELM_SYNC_LDT)); +#endif + + /* + * Sync page directory should not happen in NEM mode. + */ + if (VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)) + { + Log(("NEM: TODO: Make VMCPU_FF_PGM_SYNC_CR3 / VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL quiet! (%#RX64)\n", (uint64_t)pVCpu->fLocalForcedActions)); + VMCPU_FF_CLEAR_MASK(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL); + } + + /* + * Allocate handy pages (just in case the above actions have consumed some pages). + */ + if (VM_FF_IS_PENDING_EXCEPT(pVM, VM_FF_PGM_NEED_HANDY_PAGES, VM_FF_PGM_NO_MEMORY)) + { + int rc = PGMR3PhysAllocateHandyPages(pVM); + if (RT_FAILURE(rc)) + return rc; + } + + /* + * Check whether we're out of memory now. + * + * This may stem from some of the above actions or operations that has been executed + * since we ran FFs. The allocate handy pages must for instance always be followed by + * this check. + */ + if (VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)) + return VINF_EM_NO_MEMORY; + + return VINF_SUCCESS; +} + + +/** + * Executes hardware accelerated raw code. (Intel VT-x & AMD-V) + * + * This function contains the raw-mode version of the inner + * execution loop (the outer loop being in EMR3ExecuteVM()). + * + * @returns VBox status code. The most important ones are: VINF_EM_RESCHEDULE, VINF_EM_RESCHEDULE_RAW, + * VINF_EM_RESCHEDULE_REM, VINF_EM_SUSPEND, VINF_EM_RESET and VINF_EM_TERMINATE. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param pfFFDone Where to store an indicator telling whether or not + * FFs were done before returning. + */ +VBOXSTRICTRC emR3NemExecute(PVM pVM, PVMCPU pVCpu, bool *pfFFDone) +{ + VBOXSTRICTRC rcStrict = VERR_IPE_UNINITIALIZED_STATUS; + + LogFlow(("emR3NemExecute%d: (cs:eip=%04x:%RGv)\n", pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, (RTGCPTR)pVCpu->cpum.GstCtx.rip)); + *pfFFDone = false; + + STAM_REL_COUNTER_INC(&pVCpu->em.s.StatNEMExecuteCalled); + + /* + * Spin till we get a forced action which returns anything but VINF_SUCCESS. + */ + for (;;) + { + STAM_PROFILE_ADV_START(&pVCpu->em.s.StatNEMEntry, a); + +#if 0 + /* Check if a forced reschedule is pending. */ + if (NEMR3IsRescheduleRequired(pVCpu)) + { + rcStrict = VINF_EM_RESCHEDULE; + break; + } +#endif + + /* + * Process high priority pre-execution raw-mode FFs. + */ + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_HIGH_PRIORITY_PRE_RAW_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HIGH_PRIORITY_PRE_RAW_MASK)) + { + rcStrict = emR3NemForcedActions(pVM, pVCpu); + if (rcStrict != VINF_SUCCESS) + break; + } + +#ifdef LOG_ENABLED + /* + * Log important stuff before entering GC. + */ + if (TRPMHasTrap(pVCpu)) + Log(("CPU%d: Pending hardware interrupt=0x%x cs:rip=%04X:%RGv\n", pVCpu->idCpu, TRPMGetTrapNo(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, (RTGCPTR)pVCpu->cpum.GstCtx.rip)); + + if (!(pVCpu->cpum.GstCtx.fExtrn & ( CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_SS + | CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_CR4 | CPUMCTX_EXTRN_EFER))) + { + uint32_t cpl = CPUMGetGuestCPL(pVCpu); + if (pVM->cCpus == 1) + { + if (pVCpu->cpum.GstCtx.eflags.Bits.u1VM) + Log(("NEMV86: %08x IF=%d\n", pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.eflags.Bits.u1IF)); + else if (CPUMIsGuestIn64BitCodeEx(&pVCpu->cpum.GstCtx)) + Log(("NEMR%d: %04x:%RGv ESP=%RGv IF=%d IOPL=%d CR0=%x CR4=%x EFER=%x\n", cpl, pVCpu->cpum.GstCtx.cs.Sel, (RTGCPTR)pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.Bits.u1IF, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, (uint32_t)pVCpu->cpum.GstCtx.cr0, (uint32_t)pVCpu->cpum.GstCtx.cr4, (uint32_t)pVCpu->cpum.GstCtx.msrEFER)); + else + Log(("NEMR%d: %04x:%08x ESP=%08X IF=%d IOPL=%d CR0=%x CR4=%x EFER=%x\n", cpl, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.eflags.Bits.u1IF, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, (uint32_t)pVCpu->cpum.GstCtx.cr0, (uint32_t)pVCpu->cpum.GstCtx.cr4, (uint32_t)pVCpu->cpum.GstCtx.msrEFER)); + } + else + { + if (pVCpu->cpum.GstCtx.eflags.Bits.u1VM) + Log(("NEMV86-CPU%d: %08x IF=%d\n", pVCpu->idCpu, pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.eflags.Bits.u1IF)); + else if (CPUMIsGuestIn64BitCodeEx(&pVCpu->cpum.GstCtx)) + Log(("NEMR%d-CPU%d: %04x:%RGv ESP=%RGv IF=%d IOPL=%d CR0=%x CR4=%x EFER=%x\n", cpl, pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, (RTGCPTR)pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.Bits.u1IF, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, (uint32_t)pVCpu->cpum.GstCtx.cr0, (uint32_t)pVCpu->cpum.GstCtx.cr4, (uint32_t)pVCpu->cpum.GstCtx.msrEFER)); + else + Log(("NEMR%d-CPU%d: %04x:%08x ESP=%08X IF=%d IOPL=%d CR0=%x CR4=%x EFER=%x\n", cpl, pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.eflags.Bits.u1IF, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, (uint32_t)pVCpu->cpum.GstCtx.cr0, (uint32_t)pVCpu->cpum.GstCtx.cr4, (uint32_t)pVCpu->cpum.GstCtx.msrEFER)); + } + } + else if (pVM->cCpus == 1) + Log(("NEMRx: -> NEMR3RunGC\n")); + else + Log(("NEMRx-CPU%u: -> NEMR3RunGC\n", pVCpu->idCpu)); +#endif /* LOG_ENABLED */ + + /* + * Execute the code. + */ + if (RT_LIKELY(emR3IsExecutionAllowed(pVM, pVCpu))) + { + STAM_PROFILE_ADV_STOP(&pVCpu->em.s.StatNEMEntry, a); + STAM_REL_PROFILE_START(&pVCpu->em.s.StatNEMExec, x); + rcStrict = NEMR3RunGC(pVM, pVCpu); + STAM_REL_PROFILE_STOP(&pVCpu->em.s.StatNEMExec, x); + } + else + { + /* Give up this time slice; virtual time continues */ + STAM_PROFILE_ADV_STOP(&pVCpu->em.s.StatNEMEntry, a); + STAM_REL_PROFILE_ADV_START(&pVCpu->em.s.StatCapped, u); + RTThreadSleep(5); + STAM_REL_PROFILE_ADV_STOP(&pVCpu->em.s.StatCapped, u); + rcStrict = VINF_SUCCESS; + } + + + /* + * Deal with high priority post execution FFs before doing anything else. + */ + VMCPU_FF_CLEAR_MASK(pVCpu, VMCPU_FF_RESUME_GUEST_MASK); + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_HIGH_PRIORITY_POST_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HIGH_PRIORITY_POST_MASK)) + rcStrict = emR3HighPriorityPostForcedActions(pVM, pVCpu, rcStrict); + + /* + * Process the returned status code. + */ + if (rcStrict >= VINF_EM_FIRST && rcStrict <= VINF_EM_LAST) + break; + + rcStrict = emR3NemHandleRC(pVM, pVCpu, VBOXSTRICTRC_TODO(rcStrict)); + if (rcStrict != VINF_SUCCESS) + break; + + /* + * Check and execute forced actions. + */ +#ifdef VBOX_HIGH_RES_TIMERS_HACK + TMTimerPollVoid(pVM, pVCpu); +#endif + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_ALL_MASK)) + { + rcStrict = emR3ForcedActions(pVM, pVCpu, VBOXSTRICTRC_TODO(rcStrict)); + VBOXVMM_EM_FF_ALL_RET(pVCpu, VBOXSTRICTRC_VAL(rcStrict)); + if ( rcStrict != VINF_SUCCESS + && rcStrict != VINF_EM_RESCHEDULE_HM) + { + *pfFFDone = true; + break; + } + } + } + + /* + * Return to outer loop, making sure the fetch all state as we leave. + * + * Note! Not using CPUM_IMPORT_EXTRN_RET here, to prioritize an rcStrict error + * status over import errors. + */ + if (pVCpu->cpum.GstCtx.fExtrn) + { + int rcImport = NEMImportStateOnDemand(pVCpu, pVCpu->cpum.GstCtx.fExtrn); + AssertReturn(RT_SUCCESS(rcImport) || RT_FAILURE_NP(rcStrict), rcImport); + } +#if defined(LOG_ENABLED) && defined(DEBUG) + RTLogFlush(NULL); +#endif + return rcStrict; +} + diff --git a/src/VBox/VMM/VMMR3/EMRaw.cpp b/src/VBox/VMM/VMMR3/EMRaw.cpp new file mode 100644 index 00000000..749ab20f --- /dev/null +++ b/src/VBox/VMM/VMMR3/EMRaw.cpp @@ -0,0 +1,1518 @@ +/* $Id: EMRaw.cpp $ */ +/** @file + * EM - Execution Monitor / Manager - software virtualization + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_EM +#define VMCPU_INCL_CPUM_GST_CTX +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef VBOX_WITH_REM +# include +#endif +#include +#include +#include +#include +#include +#include +#include +#include "EMInternal.h" +#include +#include +#include +#include +#include +#include +#include "VMMTracing.h" + +#include +#include +#include +#include +#include + + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static int emR3RawHandleRC(PVM pVM, PVMCPU pVCpu, int rc); +static int emR3RawForcedActions(PVM pVM, PVMCPU pVCpu); +DECLINLINE(int) emR3RawExecuteInstruction(PVM pVM, PVMCPU pVCpu, const char *pszPrefix, int rcGC = VINF_SUCCESS); +static int emR3RawGuestTrap(PVM pVM, PVMCPU pVCpu); +static int emR3RawPatchTrap(PVM pVM, PVMCPU pVCpu, int gcret); +static int emR3RawPrivileged(PVM pVM, PVMCPU pVCpu); +static int emR3RawExecuteIOInstruction(PVM pVM, PVMCPU pVCpu); +static int emR3RawRingSwitch(PVM pVM, PVMCPU pVCpu); +static int emR3RawUpdateForceFlag(PVM pVM, PVMCPU pVCpu, int rc); + +#define EMHANDLERC_WITH_PATM +#define emR3ExecuteInstruction emR3RawExecuteInstruction +#define emR3ExecuteIOInstruction emR3RawExecuteIOInstruction +#include "EMHandleRCTmpl.h" + + + +#ifdef VBOX_WITH_STATISTICS +/** + * Just a braindead function to keep track of cli addresses. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param GCPtrInstr The EIP of the cli instruction. + */ +static void emR3RecordCli(PVM pVM, PVMCPU pVCpu, RTGCPTR GCPtrInstr) +{ + PCLISTAT pRec; + + pRec = (PCLISTAT)RTAvlGCPtrGet(&pVCpu->em.s.pCliStatTree, GCPtrInstr); + if (!pRec) + { + /* New cli instruction; insert into the tree. */ + pRec = (PCLISTAT)MMR3HeapAllocZ(pVM, MM_TAG_EM, sizeof(*pRec)); + Assert(pRec); + if (!pRec) + return; + pRec->Core.Key = GCPtrInstr; + + char szCliStatName[32]; + RTStrPrintf(szCliStatName, sizeof(szCliStatName), "/EM/Cli/0x%RGv", GCPtrInstr); + STAM_REG(pVM, &pRec->Counter, STAMTYPE_COUNTER, szCliStatName, STAMUNIT_OCCURENCES, "Number of times cli was executed."); + + bool fRc = RTAvlGCPtrInsert(&pVCpu->em.s.pCliStatTree, &pRec->Core); + Assert(fRc); NOREF(fRc); + } + STAM_COUNTER_INC(&pRec->Counter); + STAM_COUNTER_INC(&pVCpu->em.s.StatTotalClis); +} +#endif /* VBOX_WITH_STATISTICS */ + + + +/** + * Resumes executing hypervisor after a debug event. + * + * This is kind of special since our current guest state is + * potentially out of sync. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +int emR3RawResumeHyper(PVM pVM, PVMCPU pVCpu) +{ + int rc; + Assert(pVCpu->em.s.enmState == EMSTATE_DEBUG_HYPER); + Log(("emR3RawResumeHyper: cs:eip=%RTsel:%RGr efl=%RGr\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.eflags)); + + /* + * Resume execution. + */ + CPUMRawEnter(pVCpu); + CPUMSetHyperEFlags(pVCpu, CPUMGetHyperEFlags(pVCpu) | X86_EFL_RF); + rc = VMMR3ResumeHyper(pVM, pVCpu); + Log(("emR3RawResumeHyper: cs:eip=%RTsel:%RGr efl=%RGr - returned from GC with rc=%Rrc\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.eflags, rc)); + rc = CPUMRawLeave(pVCpu, rc); + VMCPU_FF_CLEAR_MASK(pVCpu, VMCPU_FF_RESUME_GUEST_MASK); + + /* + * Deal with the return code. + */ + rc = VBOXSTRICTRC_TODO(emR3HighPriorityPostForcedActions(pVM, pVCpu, rc)); + rc = emR3RawHandleRC(pVM, pVCpu, rc); + rc = emR3RawUpdateForceFlag(pVM, pVCpu, rc); + return rc; +} + + +/** + * Steps rawmode. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +int emR3RawStep(PVM pVM, PVMCPU pVCpu) +{ + Assert( pVCpu->em.s.enmState == EMSTATE_DEBUG_HYPER + || pVCpu->em.s.enmState == EMSTATE_DEBUG_GUEST_RAW + || pVCpu->em.s.enmState == EMSTATE_DEBUG_GUEST_REM); + int rc; + bool fGuest = pVCpu->em.s.enmState != EMSTATE_DEBUG_HYPER; +#ifndef DEBUG_sander + Log(("emR3RawStep: cs:eip=%RTsel:%RGr efl=%RGr\n", fGuest ? CPUMGetGuestCS(pVCpu) : CPUMGetHyperCS(pVCpu), + fGuest ? CPUMGetGuestEIP(pVCpu) : CPUMGetHyperEIP(pVCpu), fGuest ? CPUMGetGuestEFlags(pVCpu) : CPUMGetHyperEFlags(pVCpu))); +#endif + if (fGuest) + { + /* + * Check vital forced actions, but ignore pending interrupts and timers. + */ + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_HIGH_PRIORITY_PRE_RAW_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HIGH_PRIORITY_PRE_RAW_MASK)) + { + rc = emR3RawForcedActions(pVM, pVCpu); + VBOXVMM_EM_FF_RAW_RET(pVCpu, rc); + if (rc != VINF_SUCCESS) + return rc; + } + + /* + * Set flags for single stepping. + */ + CPUMSetGuestEFlags(pVCpu, CPUMGetGuestEFlags(pVCpu) | X86_EFL_TF | X86_EFL_RF); + } + else + CPUMSetHyperEFlags(pVCpu, CPUMGetHyperEFlags(pVCpu) | X86_EFL_TF | X86_EFL_RF); + + /* + * Single step. + * We do not start time or anything, if anything we should just do a few nanoseconds. + */ + CPUMRawEnter(pVCpu); + do + { + if (pVCpu->em.s.enmState == EMSTATE_DEBUG_HYPER) + rc = VMMR3ResumeHyper(pVM, pVCpu); + else + rc = VMMR3RawRunGC(pVM, pVCpu); +#ifndef DEBUG_sander + Log(("emR3RawStep: cs:eip=%RTsel:%RGr efl=%RGr - GC rc %Rrc\n", fGuest ? CPUMGetGuestCS(pVCpu) : CPUMGetHyperCS(pVCpu), + fGuest ? CPUMGetGuestEIP(pVCpu) : CPUMGetHyperEIP(pVCpu), fGuest ? CPUMGetGuestEFlags(pVCpu) : CPUMGetHyperEFlags(pVCpu), rc)); +#endif + } while ( rc == VINF_SUCCESS + || rc == VINF_EM_RAW_INTERRUPT); + rc = CPUMRawLeave(pVCpu, rc); + VMCPU_FF_CLEAR_MASK(pVCpu, VMCPU_FF_RESUME_GUEST_MASK); + + /* + * Make sure the trap flag is cleared. + * (Too bad if the guest is trying to single step too.) + */ + if (fGuest) + CPUMSetGuestEFlags(pVCpu, CPUMGetGuestEFlags(pVCpu) & ~X86_EFL_TF); + else + CPUMSetHyperEFlags(pVCpu, CPUMGetHyperEFlags(pVCpu) & ~X86_EFL_TF); + + /* + * Deal with the return codes. + */ + rc = VBOXSTRICTRC_TODO(emR3HighPriorityPostForcedActions(pVM, pVCpu, rc)); + rc = emR3RawHandleRC(pVM, pVCpu, rc); + rc = emR3RawUpdateForceFlag(pVM, pVCpu, rc); + return rc; +} + + +#ifdef DEBUG + + +int emR3SingleStepExecRaw(PVM pVM, PVMCPU pVCpu, uint32_t cIterations) +{ + int rc = VINF_SUCCESS; + EMSTATE enmOldState = pVCpu->em.s.enmState; + pVCpu->em.s.enmState = EMSTATE_DEBUG_GUEST_RAW; + + Log(("Single step BEGIN:\n")); + for (uint32_t i = 0; i < cIterations; i++) + { + DBGFR3PrgStep(pVCpu); + DBGFR3_DISAS_INSTR_CUR_LOG(pVCpu, "RSS"); + rc = emR3RawStep(pVM, pVCpu); + if ( rc != VINF_SUCCESS + && rc != VINF_EM_DBG_STEPPED) + break; + } + Log(("Single step END: rc=%Rrc\n", rc)); + CPUMSetGuestEFlags(pVCpu, CPUMGetGuestEFlags(pVCpu) & ~X86_EFL_TF); + pVCpu->em.s.enmState = enmOldState; + return rc; +} + +#endif /* DEBUG */ + + +/** + * Executes one (or perhaps a few more) instruction(s). + * + * @returns VBox status code suitable for EM. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param rcGC GC return code + * @param pszPrefix Disassembly prefix. If not NULL we'll disassemble the + * instruction and prefix the log output with this text. + */ +#if defined(LOG_ENABLED) || defined(DOXYGEN_RUNNING) +static int emR3RawExecuteInstructionWorker(PVM pVM, PVMCPU pVCpu, int rcGC, const char *pszPrefix) +#else +static int emR3RawExecuteInstructionWorker(PVM pVM, PVMCPU pVCpu, int rcGC) +#endif +{ + int rc; + +#ifdef LOG_ENABLED + /* + * Disassemble the instruction if requested. + */ + if (pszPrefix) + { + DBGFR3_INFO_LOG(pVM, pVCpu, "cpumguest", pszPrefix); + DBGFR3_DISAS_INSTR_CUR_LOG(pVCpu, pszPrefix); + } +#endif /* LOG_ENABLED */ + + /* + * PATM is making life more interesting. + * We cannot hand anything to REM which has an EIP inside patch code. So, we'll + * tell PATM there is a trap in this code and have it take the appropriate actions + * to allow us execute the code in REM. + */ + if (PATMIsPatchGCAddr(pVM, pVCpu->cpum.GstCtx.eip)) + { + Log(("emR3RawExecuteInstruction: In patch block. eip=%RRv\n", (RTRCPTR)pVCpu->cpum.GstCtx.eip)); + + RTGCPTR uNewEip; + rc = PATMR3HandleTrap(pVM, &pVCpu->cpum.GstCtx, pVCpu->cpum.GstCtx.eip, &uNewEip); + switch (rc) + { + /* + * It's not very useful to emulate a single instruction and then go back to raw + * mode; just execute the whole block until IF is set again. + */ + case VINF_SUCCESS: + Log(("emR3RawExecuteInstruction: Executing instruction starting at new address %RGv IF=%d VMIF=%x\n", + uNewEip, pVCpu->cpum.GstCtx.eflags.Bits.u1IF, pVCpu->em.s.pPatmGCState->uVMFlags)); + pVCpu->cpum.GstCtx.eip = uNewEip; + Assert(pVCpu->cpum.GstCtx.eip); + + if (pVCpu->cpum.GstCtx.eflags.Bits.u1IF) + { + /* + * The last instruction in the patch block needs to be executed!! (sti/sysexit for example) + */ + Log(("PATCH: IF=1 -> emulate last instruction as it can't be interrupted!!\n")); + return emR3RawExecuteInstruction(pVM, pVCpu, "PATCHIR"); + } + else if (rcGC == VINF_PATM_PENDING_IRQ_AFTER_IRET) + { + /* special case: iret, that sets IF, detected a pending irq/event */ + return emR3RawExecuteInstruction(pVM, pVCpu, "PATCHIRET"); + } + return VINF_EM_RESCHEDULE_REM; + + /* + * One instruction. + */ + case VINF_PATCH_EMULATE_INSTR: + Log(("emR3RawExecuteInstruction: Emulate patched instruction at %RGv IF=%d VMIF=%x\n", + uNewEip, pVCpu->cpum.GstCtx.eflags.Bits.u1IF, pVCpu->em.s.pPatmGCState->uVMFlags)); + pVCpu->cpum.GstCtx.eip = uNewEip; + return emR3RawExecuteInstruction(pVM, pVCpu, "PATCHIR"); + + /* + * The patch was disabled, hand it to the REM. + */ + case VERR_PATCH_DISABLED: + Log(("emR3RawExecuteInstruction: Disabled patch -> new eip %RGv IF=%d VMIF=%x\n", + uNewEip, pVCpu->cpum.GstCtx.eflags.Bits.u1IF, pVCpu->em.s.pPatmGCState->uVMFlags)); + pVCpu->cpum.GstCtx.eip = uNewEip; + if (pVCpu->cpum.GstCtx.eflags.Bits.u1IF) + { + /* + * The last instruction in the patch block needs to be executed!! (sti/sysexit for example) + */ + Log(("PATCH: IF=1 -> emulate last instruction as it can't be interrupted!!\n")); + return emR3RawExecuteInstruction(pVM, pVCpu, "PATCHIR"); + } + return VINF_EM_RESCHEDULE_REM; + + /* Force continued patch exection; usually due to write monitored stack. */ + case VINF_PATCH_CONTINUE: + return VINF_SUCCESS; + + default: + AssertReleaseMsgFailed(("Unknown return code %Rrc from PATMR3HandleTrap\n", rc)); + return VERR_IPE_UNEXPECTED_STATUS; + } + } + + + /* + * Use IEM and fallback on REM if the functionality is missing. + * Once IEM gets mature enough, nothing should ever fall back. + */ +#define VBOX_WITH_FIRST_IEM_STEP_B +#if defined(VBOX_WITH_FIRST_IEM_STEP_B) || !defined(VBOX_WITH_REM) + Log(("EMINS: %04x:%RGv RSP=%RGv\n", pVCpu->cpum.GstCtx.cs.Sel, (RTGCPTR)pVCpu->cpum.GstCtx.rip, (RTGCPTR)pVCpu->cpum.GstCtx.rsp)); + STAM_PROFILE_START(&pVCpu->em.s.StatIEMEmu, a); + rc = VBOXSTRICTRC_TODO(IEMExecOne(pVCpu)); + STAM_PROFILE_STOP(&pVCpu->em.s.StatIEMEmu, a); + if (RT_SUCCESS(rc)) + { + if (rc == VINF_SUCCESS || rc == VINF_EM_RESCHEDULE) + rc = VINF_EM_RESCHEDULE; + } + else if ( rc == VERR_IEM_ASPECT_NOT_IMPLEMENTED + || rc == VERR_IEM_INSTR_NOT_IMPLEMENTED) +#endif + { +#ifdef VBOX_WITH_REM + STAM_PROFILE_START(&pVCpu->em.s.StatREMEmu, b); +# ifndef VBOX_WITH_FIRST_IEM_STEP_B + Log(("EMINS[rem]: %04x:%RGv RSP=%RGv\n", pVCpu->cpum.GstCtx.cs.Sel, (RTGCPTR)pVCpu->cpum.GstCtx.rip, (RTGCPTR)pVCpu->cpum.GstCtx.rsp)); +//# elif defined(DEBUG_bird) +// AssertFailed(); +# endif + EMRemLock(pVM); + /* Flush the recompiler TLB if the VCPU has changed. */ + if (pVM->em.s.idLastRemCpu != pVCpu->idCpu) + CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_ALL); + pVM->em.s.idLastRemCpu = pVCpu->idCpu; + + rc = REMR3EmulateInstruction(pVM, pVCpu); + EMRemUnlock(pVM); + STAM_PROFILE_STOP(&pVCpu->em.s.StatREMEmu, b); +#else /* !VBOX_WITH_REM */ + NOREF(pVM); +#endif /* !VBOX_WITH_REM */ + } + return rc; +} + + +/** + * Executes one (or perhaps a few more) instruction(s). + * This is just a wrapper for discarding pszPrefix in non-logging builds. + * + * @returns VBox status code suitable for EM. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param pszPrefix Disassembly prefix. If not NULL we'll disassemble the + * instruction and prefix the log output with this text. + * @param rcGC GC return code + */ +DECLINLINE(int) emR3RawExecuteInstruction(PVM pVM, PVMCPU pVCpu, const char *pszPrefix, int rcGC) +{ +#ifdef LOG_ENABLED + return emR3RawExecuteInstructionWorker(pVM, pVCpu, rcGC, pszPrefix); +#else + RT_NOREF_PV(pszPrefix); + return emR3RawExecuteInstructionWorker(pVM, pVCpu, rcGC); +#endif +} + +/** + * Executes one (or perhaps a few more) IO instruction(s). + * + * @returns VBox status code suitable for EM. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +static int emR3RawExecuteIOInstruction(PVM pVM, PVMCPU pVCpu) +{ + STAM_PROFILE_START(&pVCpu->em.s.StatIOEmu, a); + RT_NOREF_PV(pVM); + + /* Hand it over to the interpreter. */ + VBOXSTRICTRC rcStrict = IEMExecOne(pVCpu); + LogFlow(("emR3RawExecuteIOInstruction: %Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); + STAM_COUNTER_INC(&pVCpu->em.s.CTX_SUFF(pStats)->StatIoIem); + STAM_PROFILE_STOP(&pVCpu->em.s.StatIOEmu, a); + return VBOXSTRICTRC_TODO(rcStrict); +} + + +/** + * Handle a guest context trap. + * + * @returns VBox status code suitable for EM. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +static int emR3RawGuestTrap(PVM pVM, PVMCPU pVCpu) +{ + /* + * Get the trap info. + */ + uint8_t u8TrapNo; + TRPMEVENT enmType; + RTGCUINT uErrorCode; + RTGCUINTPTR uCR2; + int rc = TRPMQueryTrapAll(pVCpu, &u8TrapNo, &enmType, &uErrorCode, &uCR2, NULL /* pu8InstrLen */); + if (RT_FAILURE(rc)) + { + AssertReleaseMsgFailed(("No trap! (rc=%Rrc)\n", rc)); + return rc; + } + + +#if 1 /* Experimental: Review, disable if it causes trouble. */ + /* + * Handle traps in patch code first. + * + * We catch a few of these cases in RC before returning to R3 (#PF, #GP, #BP) + * but several traps isn't handled specially by TRPM in RC and we end up here + * instead. One example is #DE. + */ + uint32_t uCpl = CPUMGetGuestCPL(pVCpu); + if ( uCpl == 0 + && PATMIsPatchGCAddr(pVM, pVCpu->cpum.GstCtx.eip)) + { + LogFlow(("emR3RawGuestTrap: trap %#x in patch code; eip=%08x\n", u8TrapNo, pVCpu->cpum.GstCtx.eip)); + return emR3RawPatchTrap(pVM, pVCpu, rc); + } +#endif + + /* + * If the guest gate is marked unpatched, then we will check again if we can patch it. + * (This assumes that we've already tried and failed to dispatch the trap in + * RC for the gates that already has been patched. Which is true for most high + * volume traps, because these are handled specially, but not for odd ones like #DE.) + */ + if (TRPMR3GetGuestTrapHandler(pVM, u8TrapNo) == TRPM_INVALID_HANDLER) + { + CSAMR3CheckGates(pVM, u8TrapNo, 1); + Log(("emR3RawHandleRC: recheck gate %x -> valid=%d\n", u8TrapNo, TRPMR3GetGuestTrapHandler(pVM, u8TrapNo) != TRPM_INVALID_HANDLER)); + + /* If it was successful, then we could go back to raw mode. */ + if (TRPMR3GetGuestTrapHandler(pVM, u8TrapNo) != TRPM_INVALID_HANDLER) + { + /* Must check pending forced actions as our IDT or GDT might be out of sync. */ + rc = EMR3CheckRawForcedActions(pVM, pVCpu); + AssertRCReturn(rc, rc); + + TRPMERRORCODE enmError = uErrorCode != ~0U + ? TRPM_TRAP_HAS_ERRORCODE + : TRPM_TRAP_NO_ERRORCODE; + rc = TRPMForwardTrap(pVCpu, CPUMCTX2CORE(&pVCpu->cpum.GstCtx), u8TrapNo, uErrorCode, enmError, TRPM_TRAP, -1); + if (rc == VINF_SUCCESS /* Don't use RT_SUCCESS */) + { + TRPMResetTrap(pVCpu); + return VINF_EM_RESCHEDULE_RAW; + } + AssertMsg(rc == VINF_EM_RAW_GUEST_TRAP, ("%Rrc\n", rc)); + } + } + + /* + * Scan kernel code that traps; we might not get another chance. + */ + /** @todo move this up before the dispatching? */ + if ( (pVCpu->cpum.GstCtx.ss.Sel & X86_SEL_RPL) <= 1 + && !pVCpu->cpum.GstCtx.eflags.Bits.u1VM) + { + Assert(!PATMIsPatchGCAddr(pVM, pVCpu->cpum.GstCtx.eip)); + CSAMR3CheckCodeEx(pVM, &pVCpu->cpum.GstCtx, pVCpu->cpum.GstCtx.eip); + } + + /* + * Trap specific handling. + */ + if (u8TrapNo == 6) /* (#UD) Invalid opcode. */ + { + /* + * If MONITOR & MWAIT are supported, then interpret them here. + */ + DISCPUSTATE cpu; + rc = CPUMR3DisasmInstrCPU(pVM, pVCpu, &pVCpu->cpum.GstCtx, pVCpu->cpum.GstCtx.rip, &cpu, "Guest Trap (#UD): "); + if ( RT_SUCCESS(rc) + && (cpu.pCurInstr->uOpcode == OP_MONITOR || cpu.pCurInstr->uOpcode == OP_MWAIT)) + { + uint32_t u32Dummy, u32Features, u32ExtFeatures; + CPUMGetGuestCpuId(pVCpu, 1, 0, &u32Dummy, &u32Dummy, &u32ExtFeatures, &u32Features); + if (u32ExtFeatures & X86_CPUID_FEATURE_ECX_MONITOR) + { + rc = TRPMResetTrap(pVCpu); + AssertRC(rc); + + rc = VBOXSTRICTRC_TODO(EMInterpretInstructionDisasState(pVCpu, &cpu, CPUMCTX2CORE(&pVCpu->cpum.GstCtx), + 0, EMCODETYPE_SUPERVISOR)); + if (RT_SUCCESS(rc)) + return rc; + return emR3RawExecuteInstruction(pVM, pVCpu, "Monitor: "); + } + } + } + else if (u8TrapNo == 13) /* (#GP) Privileged exception */ + { + /* + * Handle I/O bitmap? + */ + /** @todo We're not supposed to be here with a false guest trap concerning + * I/O access. We can easily handle those in RC. */ + DISCPUSTATE cpu; + rc = CPUMR3DisasmInstrCPU(pVM, pVCpu, &pVCpu->cpum.GstCtx, pVCpu->cpum.GstCtx.rip, &cpu, "Guest Trap: "); + if ( RT_SUCCESS(rc) + && (cpu.pCurInstr->fOpType & DISOPTYPE_PORTIO)) + { + /* + * We should really check the TSS for the IO bitmap, but it's not like this + * lazy approach really makes things worse. + */ + rc = TRPMResetTrap(pVCpu); + AssertRC(rc); + return emR3RawExecuteInstruction(pVM, pVCpu, "IO Guest Trap: "); + } + } + +#ifdef LOG_ENABLED + DBGFR3_INFO_LOG(pVM, pVCpu, "cpumguest", "Guest trap"); + DBGFR3_DISAS_INSTR_CUR_LOG(pVCpu, "Guest trap"); + + /* Get guest page information. */ + uint64_t fFlags = 0; + RTGCPHYS GCPhys = 0; + int rc2 = PGMGstGetPage(pVCpu, uCR2, &fFlags, &GCPhys); + Log(("emR3RawGuestTrap: cs:eip=%04x:%08x: trap=%02x err=%08x cr2=%08x cr0=%08x%s: Phys=%RGp fFlags=%08llx %s %s %s%s rc2=%d\n", + pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip, u8TrapNo, uErrorCode, uCR2, (uint32_t)pVCpu->cpum.GstCtx.cr0, + (enmType == TRPM_SOFTWARE_INT) ? " software" : "", GCPhys, fFlags, + fFlags & X86_PTE_P ? "P " : "NP", fFlags & X86_PTE_US ? "U" : "S", + fFlags & X86_PTE_RW ? "RW" : "R0", fFlags & X86_PTE_G ? " G" : "", rc2)); +#endif + + /* + * #PG has CR2. + * (Because of stuff like above we must set CR2 in a delayed fashion.) + */ + if (u8TrapNo == 14 /* #PG */) + pVCpu->cpum.GstCtx.cr2 = uCR2; + + return VINF_EM_RESCHEDULE_REM; +} + + +/** + * Handle a ring switch trap. + * Need to do statistics and to install patches. The result is going to REM. + * + * @returns VBox status code suitable for EM. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +static int emR3RawRingSwitch(PVM pVM, PVMCPU pVCpu) +{ + int rc; + DISCPUSTATE Cpu; + + /* + * sysenter, syscall & callgate + */ + rc = CPUMR3DisasmInstrCPU(pVM, pVCpu, &pVCpu->cpum.GstCtx, pVCpu->cpum.GstCtx.rip, &Cpu, "RSWITCH: "); + if (RT_SUCCESS(rc)) + { + if (Cpu.pCurInstr->uOpcode == OP_SYSENTER) + { + if (pVCpu->cpum.GstCtx.SysEnter.cs != 0) + { + rc = PATMR3InstallPatch(pVM, SELMToFlat(pVM, DISSELREG_CS, CPUMCTX2CORE(&pVCpu->cpum.GstCtx), pVCpu->cpum.GstCtx.eip), + CPUMGetGuestCodeBits(pVCpu) == 32 ? PATMFL_CODE32 : 0); + if (RT_SUCCESS(rc)) + { + DBGFR3_DISAS_INSTR_CUR_LOG(pVCpu, "Patched sysenter instruction"); + return VINF_EM_RESCHEDULE_RAW; + } + } + } + +#ifdef VBOX_WITH_STATISTICS + switch (Cpu.pCurInstr->uOpcode) + { + case OP_SYSENTER: + STAM_COUNTER_INC(&pVCpu->em.s.CTX_SUFF(pStats)->StatSysEnter); + break; + case OP_SYSEXIT: + STAM_COUNTER_INC(&pVCpu->em.s.CTX_SUFF(pStats)->StatSysExit); + break; + case OP_SYSCALL: + STAM_COUNTER_INC(&pVCpu->em.s.CTX_SUFF(pStats)->StatSysCall); + break; + case OP_SYSRET: + STAM_COUNTER_INC(&pVCpu->em.s.CTX_SUFF(pStats)->StatSysRet); + break; + } +#endif + } + else + AssertRC(rc); + + /* go to the REM to emulate a single instruction */ + return emR3RawExecuteInstruction(pVM, pVCpu, "RSWITCH: "); +} + + +/** + * Handle a trap (\#PF or \#GP) in patch code + * + * @returns VBox status code suitable for EM. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param gcret GC return code. + */ +static int emR3RawPatchTrap(PVM pVM, PVMCPU pVCpu, int gcret) +{ + uint8_t u8TrapNo; + int rc; + TRPMEVENT enmType; + RTGCUINT uErrorCode; + RTGCUINTPTR uCR2; + + Assert(PATMIsPatchGCAddr(pVM, pVCpu->cpum.GstCtx.eip)); + + if (gcret == VINF_PATM_PATCH_INT3) + { + u8TrapNo = 3; + uCR2 = 0; + uErrorCode = 0; + } + else if (gcret == VINF_PATM_PATCH_TRAP_GP) + { + /* No active trap in this case. Kind of ugly. */ + u8TrapNo = X86_XCPT_GP; + uCR2 = 0; + uErrorCode = 0; + } + else + { + rc = TRPMQueryTrapAll(pVCpu, &u8TrapNo, &enmType, &uErrorCode, &uCR2, NULL /* pu8InstrLen */); + if (RT_FAILURE(rc)) + { + AssertReleaseMsgFailed(("emR3RawPatchTrap: no trap! (rc=%Rrc) gcret=%Rrc\n", rc, gcret)); + return rc; + } + /* Reset the trap as we'll execute the original instruction again. */ + TRPMResetTrap(pVCpu); + } + + /* + * Deal with traps inside patch code. + * (This code won't run outside GC.) + */ + if (u8TrapNo != 1) + { +#ifdef LOG_ENABLED + DBGFR3_INFO_LOG(pVM, pVCpu, "cpumguest", "Trap in patch code"); + DBGFR3_DISAS_INSTR_CUR_LOG(pVCpu, "Patch code"); + + DISCPUSTATE Cpu; + rc = CPUMR3DisasmInstrCPU(pVM, pVCpu, &pVCpu->cpum.GstCtx, pVCpu->cpum.GstCtx.eip, &Cpu, "Patch code: "); + if ( RT_SUCCESS(rc) + && Cpu.pCurInstr->uOpcode == OP_IRET) + { + uint32_t eip, selCS, uEFlags; + + /* Iret crashes are bad as we have already changed the flags on the stack */ + rc = PGMPhysSimpleReadGCPtr(pVCpu, &eip, pVCpu->cpum.GstCtx.esp, 4); + rc |= PGMPhysSimpleReadGCPtr(pVCpu, &selCS, pVCpu->cpum.GstCtx.esp+4, 4); + rc |= PGMPhysSimpleReadGCPtr(pVCpu, &uEFlags, pVCpu->cpum.GstCtx.esp+8, 4); + if (rc == VINF_SUCCESS) + { + if ( (uEFlags & X86_EFL_VM) + || (selCS & X86_SEL_RPL) == 3) + { + uint32_t selSS, esp; + + rc |= PGMPhysSimpleReadGCPtr(pVCpu, &esp, pVCpu->cpum.GstCtx.esp + 12, 4); + rc |= PGMPhysSimpleReadGCPtr(pVCpu, &selSS, pVCpu->cpum.GstCtx.esp + 16, 4); + + if (uEFlags & X86_EFL_VM) + { + uint32_t selDS, selES, selFS, selGS; + rc = PGMPhysSimpleReadGCPtr(pVCpu, &selES, pVCpu->cpum.GstCtx.esp + 20, 4); + rc |= PGMPhysSimpleReadGCPtr(pVCpu, &selDS, pVCpu->cpum.GstCtx.esp + 24, 4); + rc |= PGMPhysSimpleReadGCPtr(pVCpu, &selFS, pVCpu->cpum.GstCtx.esp + 28, 4); + rc |= PGMPhysSimpleReadGCPtr(pVCpu, &selGS, pVCpu->cpum.GstCtx.esp + 32, 4); + if (rc == VINF_SUCCESS) + { + Log(("Patch code: IRET->VM stack frame: return address %04X:%08RX32 eflags=%08x ss:esp=%04X:%08RX32\n", selCS, eip, uEFlags, selSS, esp)); + Log(("Patch code: IRET->VM stack frame: DS=%04X ES=%04X FS=%04X GS=%04X\n", selDS, selES, selFS, selGS)); + } + } + else + Log(("Patch code: IRET stack frame: return address %04X:%08RX32 eflags=%08x ss:esp=%04X:%08RX32\n", selCS, eip, uEFlags, selSS, esp)); + } + else + Log(("Patch code: IRET stack frame: return address %04X:%08RX32 eflags=%08x\n", selCS, eip, uEFlags)); + } + } +#endif /* LOG_ENABLED */ + Log(("emR3RawPatchTrap: in patch: eip=%08x: trap=%02x err=%08x cr2=%08x cr0=%08x\n", + pVCpu->cpum.GstCtx.eip, u8TrapNo, uErrorCode, uCR2, (uint32_t)pVCpu->cpum.GstCtx.cr0)); + + RTGCPTR uNewEip; + rc = PATMR3HandleTrap(pVM, &pVCpu->cpum.GstCtx, pVCpu->cpum.GstCtx.eip, &uNewEip); + switch (rc) + { + /* + * Execute the faulting instruction. + */ + case VINF_SUCCESS: + { + /** @todo execute a whole block */ + Log(("emR3RawPatchTrap: Executing faulting instruction at new address %RGv\n", uNewEip)); + if (!(pVCpu->em.s.pPatmGCState->uVMFlags & X86_EFL_IF)) + Log(("emR3RawPatchTrap: Virtual IF flag disabled!!\n")); + + pVCpu->cpum.GstCtx.eip = uNewEip; + AssertRelease(pVCpu->cpum.GstCtx.eip); + + if (pVCpu->cpum.GstCtx.eflags.Bits.u1IF) + { + /* Windows XP lets irets fault intentionally and then takes action based on the opcode; an + * int3 patch overwrites it and leads to blue screens. Remove the patch in this case. + */ + if ( u8TrapNo == X86_XCPT_GP + && PATMIsInt3Patch(pVM, pVCpu->cpum.GstCtx.eip, NULL, NULL)) + { + /** @todo move to PATMR3HandleTrap */ + Log(("Possible Windows XP iret fault at %08RX32\n", pVCpu->cpum.GstCtx.eip)); + PATMR3RemovePatch(pVM, pVCpu->cpum.GstCtx.eip); + } + + /** @todo Knoppix 5 regression when returning VINF_SUCCESS here and going back to raw mode. */ + /* Note: possibly because a reschedule is required (e.g. iret to V86 code) */ + + return emR3RawExecuteInstruction(pVM, pVCpu, "PATCHIR"); + /* Interrupts are enabled; just go back to the original instruction. + return VINF_SUCCESS; */ + } + return VINF_EM_RESCHEDULE_REM; + } + + /* + * One instruction. + */ + case VINF_PATCH_EMULATE_INSTR: + Log(("emR3RawPatchTrap: Emulate patched instruction at %RGv IF=%d VMIF=%x\n", + uNewEip, pVCpu->cpum.GstCtx.eflags.Bits.u1IF, pVCpu->em.s.pPatmGCState->uVMFlags)); + pVCpu->cpum.GstCtx.eip = uNewEip; + AssertRelease(pVCpu->cpum.GstCtx.eip); + return emR3RawExecuteInstruction(pVM, pVCpu, "PATCHEMUL: "); + + /* + * The patch was disabled, hand it to the REM. + */ + case VERR_PATCH_DISABLED: + if (!(pVCpu->em.s.pPatmGCState->uVMFlags & X86_EFL_IF)) + Log(("emR3RawPatchTrap: Virtual IF flag disabled!!\n")); + pVCpu->cpum.GstCtx.eip = uNewEip; + AssertRelease(pVCpu->cpum.GstCtx.eip); + + if (pVCpu->cpum.GstCtx.eflags.Bits.u1IF) + { + /* + * The last instruction in the patch block needs to be executed!! (sti/sysexit for example) + */ + Log(("PATCH: IF=1 -> emulate last instruction as it can't be interrupted!!\n")); + return emR3RawExecuteInstruction(pVM, pVCpu, "PATCHIR"); + } + return VINF_EM_RESCHEDULE_REM; + + /* Force continued patch exection; usually due to write monitored stack. */ + case VINF_PATCH_CONTINUE: + return VINF_SUCCESS; + + /* + * Anything else is *fatal*. + */ + default: + AssertReleaseMsgFailed(("Unknown return code %Rrc from PATMR3HandleTrap!\n", rc)); + return VERR_IPE_UNEXPECTED_STATUS; + } + } + return VINF_SUCCESS; +} + + +/** + * Handle a privileged instruction. + * + * @returns VBox status code suitable for EM. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +static int emR3RawPrivileged(PVM pVM, PVMCPU pVCpu) +{ + Assert(!pVCpu->cpum.GstCtx.eflags.Bits.u1VM); + + if (PATMIsEnabled(pVM)) + { + /* + * Check if in patch code. + */ + if (PATMR3IsInsidePatchJump(pVM, pVCpu->cpum.GstCtx.eip, NULL)) + { +#ifdef LOG_ENABLED + DBGFR3_INFO_LOG(pVM, pVCpu, "cpumguest", "PRIV"); +#endif + AssertMsgFailed(("FATAL ERROR: executing random instruction inside generated patch jump %08x\n", pVCpu->cpum.GstCtx.eip)); + return VERR_EM_RAW_PATCH_CONFLICT; + } + if ( (pVCpu->cpum.GstCtx.ss.Sel & X86_SEL_RPL) == 0 + && !pVCpu->cpum.GstCtx.eflags.Bits.u1VM + && !PATMIsPatchGCAddr(pVM, pVCpu->cpum.GstCtx.eip)) + { + int rc = PATMR3InstallPatch(pVM, SELMToFlat(pVM, DISSELREG_CS, CPUMCTX2CORE(&pVCpu->cpum.GstCtx), pVCpu->cpum.GstCtx.eip), + CPUMGetGuestCodeBits(pVCpu) == 32 ? PATMFL_CODE32 : 0); + if (RT_SUCCESS(rc)) + { +#ifdef LOG_ENABLED + DBGFR3_INFO_LOG(pVM, pVCpu, "cpumguest", "PRIV"); +#endif + DBGFR3_DISAS_INSTR_CUR_LOG(pVCpu, "Patched privileged instruction"); + return VINF_SUCCESS; + } + } + } + +#ifdef LOG_ENABLED + if (!PATMIsPatchGCAddr(pVM, pVCpu->cpum.GstCtx.eip)) + { + DBGFR3_INFO_LOG(pVM, pVCpu, "cpumguest", "PRIV"); + DBGFR3_DISAS_INSTR_CUR_LOG(pVCpu, "Privileged instr"); + } +#endif + + /* + * Instruction statistics and logging. + */ + DISCPUSTATE Cpu; + int rc; + + rc = CPUMR3DisasmInstrCPU(pVM, pVCpu, &pVCpu->cpum.GstCtx, pVCpu->cpum.GstCtx.rip, &Cpu, "PRIV: "); + if (RT_SUCCESS(rc)) + { +#ifdef VBOX_WITH_STATISTICS + PEMSTATS pStats = pVCpu->em.s.CTX_SUFF(pStats); + switch (Cpu.pCurInstr->uOpcode) + { + case OP_INVLPG: + STAM_COUNTER_INC(&pStats->StatInvlpg); + break; + case OP_IRET: + STAM_COUNTER_INC(&pStats->StatIret); + break; + case OP_CLI: + STAM_COUNTER_INC(&pStats->StatCli); + emR3RecordCli(pVM, pVCpu, pVCpu->cpum.GstCtx.rip); + break; + case OP_STI: + STAM_COUNTER_INC(&pStats->StatSti); + break; + case OP_INSB: + case OP_INSWD: + case OP_IN: + case OP_OUTSB: + case OP_OUTSWD: + case OP_OUT: + AssertMsgFailed(("Unexpected privileged exception due to port IO\n")); + break; + + case OP_MOV_CR: + if (Cpu.Param1.fUse & DISUSE_REG_GEN32) + { + //read + Assert(Cpu.Param2.fUse & DISUSE_REG_CR); + Assert(Cpu.Param2.Base.idxCtrlReg <= DISCREG_CR4); + STAM_COUNTER_INC(&pStats->StatMovReadCR[Cpu.Param2.Base.idxCtrlReg]); + } + else + { + //write + Assert(Cpu.Param1.fUse & DISUSE_REG_CR); + Assert(Cpu.Param1.Base.idxCtrlReg <= DISCREG_CR4); + STAM_COUNTER_INC(&pStats->StatMovWriteCR[Cpu.Param1.Base.idxCtrlReg]); + } + break; + + case OP_MOV_DR: + STAM_COUNTER_INC(&pStats->StatMovDRx); + break; + case OP_LLDT: + STAM_COUNTER_INC(&pStats->StatMovLldt); + break; + case OP_LIDT: + STAM_COUNTER_INC(&pStats->StatMovLidt); + break; + case OP_LGDT: + STAM_COUNTER_INC(&pStats->StatMovLgdt); + break; + case OP_SYSENTER: + STAM_COUNTER_INC(&pStats->StatSysEnter); + break; + case OP_SYSEXIT: + STAM_COUNTER_INC(&pStats->StatSysExit); + break; + case OP_SYSCALL: + STAM_COUNTER_INC(&pStats->StatSysCall); + break; + case OP_SYSRET: + STAM_COUNTER_INC(&pStats->StatSysRet); + break; + case OP_HLT: + STAM_COUNTER_INC(&pStats->StatHlt); + break; + default: + STAM_COUNTER_INC(&pStats->StatMisc); + Log4(("emR3RawPrivileged: opcode=%d\n", Cpu.pCurInstr->uOpcode)); + break; + } +#endif /* VBOX_WITH_STATISTICS */ + if ( (pVCpu->cpum.GstCtx.ss.Sel & X86_SEL_RPL) == 0 + && !pVCpu->cpum.GstCtx.eflags.Bits.u1VM + && CPUMGetGuestCodeBits(pVCpu) == 32) + { + STAM_PROFILE_START(&pVCpu->em.s.StatPrivEmu, a); + switch (Cpu.pCurInstr->uOpcode) + { + case OP_CLI: + pVCpu->cpum.GstCtx.eflags.u32 &= ~X86_EFL_IF; + Assert(Cpu.cbInstr == 1); + pVCpu->cpum.GstCtx.rip += Cpu.cbInstr; + STAM_PROFILE_STOP(&pVCpu->em.s.StatPrivEmu, a); + return VINF_EM_RESCHEDULE_REM; /* must go to the recompiler now! */ + + case OP_STI: + pVCpu->cpum.GstCtx.eflags.u32 |= X86_EFL_IF; + EMSetInhibitInterruptsPC(pVCpu, pVCpu->cpum.GstCtx.rip + Cpu.cbInstr); + Assert(Cpu.cbInstr == 1); + pVCpu->cpum.GstCtx.rip += Cpu.cbInstr; + STAM_PROFILE_STOP(&pVCpu->em.s.StatPrivEmu, a); + return VINF_SUCCESS; + + case OP_HLT: + if (PATMIsPatchGCAddr(pVM, pVCpu->cpum.GstCtx.eip)) + { + PATMTRANSSTATE enmState; + RTGCPTR pOrgInstrGC = PATMR3PatchToGCPtr(pVM, pVCpu->cpum.GstCtx.eip, &enmState); + + if (enmState == PATMTRANS_OVERWRITTEN) + { + rc = PATMR3DetectConflict(pVM, pOrgInstrGC, pOrgInstrGC); + Assert(rc == VERR_PATCH_DISABLED); + /* Conflict detected, patch disabled */ + Log(("emR3RawPrivileged: detected conflict -> disabled patch at %08RX32\n", pVCpu->cpum.GstCtx.eip)); + + enmState = PATMTRANS_SAFE; + } + + /* The translation had better be successful. Otherwise we can't recover. */ + AssertReleaseMsg(pOrgInstrGC && enmState != PATMTRANS_OVERWRITTEN, ("Unable to translate instruction address at %08RX32\n", pVCpu->cpum.GstCtx.eip)); + if (enmState != PATMTRANS_OVERWRITTEN) + pVCpu->cpum.GstCtx.eip = pOrgInstrGC; + } + /* no break; we could just return VINF_EM_HALT here */ + RT_FALL_THRU(); + + case OP_MOV_CR: + case OP_MOV_DR: +#ifdef LOG_ENABLED + if (PATMIsPatchGCAddr(pVM, pVCpu->cpum.GstCtx.eip)) + { + DBGFR3_INFO_LOG(pVM, pVCpu, "cpumguest", "PRIV"); + DBGFR3_DISAS_INSTR_CUR_LOG(pVCpu, "Privileged instr"); + } +#endif + + rc = VBOXSTRICTRC_TODO(EMInterpretInstructionDisasState(pVCpu, &Cpu, CPUMCTX2CORE(&pVCpu->cpum.GstCtx), + 0, EMCODETYPE_SUPERVISOR)); + if (RT_SUCCESS(rc)) + { + STAM_PROFILE_STOP(&pVCpu->em.s.StatPrivEmu, a); + + if ( Cpu.pCurInstr->uOpcode == OP_MOV_CR + && Cpu.Param1.fUse == DISUSE_REG_CR /* write */ + ) + { + /* Deal with CR0 updates inside patch code that force + * us to go to the recompiler. + */ + if ( PATMIsPatchGCAddr(pVM, pVCpu->cpum.GstCtx.rip) + && (pVCpu->cpum.GstCtx.cr0 & (X86_CR0_WP|X86_CR0_PG|X86_CR0_PE)) != (X86_CR0_WP|X86_CR0_PG|X86_CR0_PE)) + { + PATMTRANSSTATE enmState; + RTGCPTR pOrgInstrGC = PATMR3PatchToGCPtr(pVM, pVCpu->cpum.GstCtx.rip, &enmState); + + Log(("Force recompiler switch due to cr0 (%RGp) update rip=%RGv -> %RGv (enmState=%d)\n", pVCpu->cpum.GstCtx.cr0, pVCpu->cpum.GstCtx.rip, pOrgInstrGC, enmState)); + if (enmState == PATMTRANS_OVERWRITTEN) + { + rc = PATMR3DetectConflict(pVM, pOrgInstrGC, pOrgInstrGC); + Assert(rc == VERR_PATCH_DISABLED); + /* Conflict detected, patch disabled */ + Log(("emR3RawPrivileged: detected conflict -> disabled patch at %RGv\n", (RTGCPTR)pVCpu->cpum.GstCtx.rip)); + enmState = PATMTRANS_SAFE; + } + /* The translation had better be successful. Otherwise we can't recover. */ + AssertReleaseMsg(pOrgInstrGC && enmState != PATMTRANS_OVERWRITTEN, ("Unable to translate instruction address at %RGv\n", (RTGCPTR)pVCpu->cpum.GstCtx.rip)); + if (enmState != PATMTRANS_OVERWRITTEN) + pVCpu->cpum.GstCtx.rip = pOrgInstrGC; + } + + /* Reschedule is necessary as the execution/paging mode might have changed. */ + return VINF_EM_RESCHEDULE; + } + return rc; /* can return VINF_EM_HALT as well. */ + } + AssertMsgReturn(rc == VERR_EM_INTERPRETER, ("%Rrc\n", rc), rc); + break; /* fall back to the recompiler */ + } + STAM_PROFILE_STOP(&pVCpu->em.s.StatPrivEmu, a); + } + } + + if (PATMIsPatchGCAddr(pVM, pVCpu->cpum.GstCtx.eip)) + return emR3RawPatchTrap(pVM, pVCpu, VINF_PATM_PATCH_TRAP_GP); + + return emR3RawExecuteInstruction(pVM, pVCpu, "PRIV"); +} + + +/** + * Update the forced rawmode execution modifier. + * + * This function is called when we're returning from the raw-mode loop(s). If we're + * in patch code, it will set a flag forcing execution to be resumed in raw-mode, + * if not in patch code, the flag will be cleared. + * + * We should never interrupt patch code while it's being executed. Cli patches can + * contain big code blocks, but they are always executed with IF=0. Other patches + * replace single instructions and should be atomic. + * + * @returns Updated rc. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param rc The result code. + */ +static int emR3RawUpdateForceFlag(PVM pVM, PVMCPU pVCpu, int rc) +{ + if (PATMIsPatchGCAddr(pVM, pVCpu->cpum.GstCtx.eip)) /** @todo check cs selector base/type */ + { + /* ignore reschedule attempts. */ + switch (rc) + { + case VINF_EM_RESCHEDULE: + case VINF_EM_RESCHEDULE_REM: + LogFlow(("emR3RawUpdateForceFlag: patch address -> force raw reschedule\n")); + rc = VINF_SUCCESS; + break; + } + pVCpu->em.s.fForceRAW = true; + } + else + pVCpu->em.s.fForceRAW = false; + return rc; +} + + +/** + * Check for pending raw actions + * + * @returns VBox status code. May return VINF_EM_NO_MEMORY but none of the other + * EM statuses. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +VMMR3_INT_DECL(int) EMR3CheckRawForcedActions(PVM pVM, PVMCPU pVCpu) +{ + int rc = emR3RawForcedActions(pVM, pVCpu); + VBOXVMM_EM_FF_RAW_RET(pVCpu, rc); + return rc; +} + + +/** + * Process raw-mode specific forced actions. + * + * This function is called when any FFs in the VM_FF_HIGH_PRIORITY_PRE_RAW_MASK is pending. + * + * @returns VBox status code. May return VINF_EM_NO_MEMORY but none of the other + * EM statuses. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +static int emR3RawForcedActions(PVM pVM, PVMCPU pVCpu) +{ + /* + * Note that the order is *vitally* important! + * Also note that SELMR3UpdateFromCPUM may trigger VM_FF_SELM_SYNC_TSS. + */ + VBOXVMM_EM_FF_RAW(pVCpu, pVM->fGlobalForcedActions, pVCpu->fLocalForcedActions); + + /* + * Sync selector tables. + */ + if (VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_SELM_SYNC_GDT | VMCPU_FF_SELM_SYNC_LDT)) + { + VBOXSTRICTRC rcStrict = SELMR3UpdateFromCPUM(pVM, pVCpu); + if (rcStrict != VINF_SUCCESS) + return VBOXSTRICTRC_TODO(rcStrict); + } + + /* + * Sync IDT. + * + * The CSAMR3CheckGates call in TRPMR3SyncIDT may call PGMPrefetchPage + * and PGMShwModifyPage, so we're in for trouble if for instance a + * PGMSyncCR3+pgmR3PoolClearAll is pending. + */ + if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TRPM_SYNC_IDT)) + { + if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3) + && EMIsRawRing0Enabled(pVM) + && CSAMIsEnabled(pVM)) + { + int rc = PGMSyncCR3(pVCpu, pVCpu->cpum.GstCtx.cr0, pVCpu->cpum.GstCtx.cr3, pVCpu->cpum.GstCtx.cr4, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)); + if (RT_FAILURE(rc)) + return rc; + } + + int rc = TRPMR3SyncIDT(pVM, pVCpu); + if (RT_FAILURE(rc)) + return rc; + } + + /* + * Sync TSS. + */ + if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_SELM_SYNC_TSS)) + { + int rc = SELMR3SyncTSS(pVM, pVCpu); + if (RT_FAILURE(rc)) + return rc; + } + + /* + * Sync page directory. + */ + if (VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)) + { + Assert(pVCpu->em.s.enmState != EMSTATE_WAIT_SIPI); + int rc = PGMSyncCR3(pVCpu, pVCpu->cpum.GstCtx.cr0, pVCpu->cpum.GstCtx.cr3, pVCpu->cpum.GstCtx.cr4, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)); + if (RT_FAILURE(rc)) + return rc == VERR_PGM_NO_HYPERVISOR_ADDRESS ? VINF_EM_RESCHEDULE_REM : rc; + + Assert(!VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_SELM_SYNC_GDT | VMCPU_FF_SELM_SYNC_LDT)); + + /* Prefetch pages for EIP and ESP. */ + /** @todo This is rather expensive. Should investigate if it really helps at all. */ + rc = PGMPrefetchPage(pVCpu, SELMToFlat(pVM, DISSELREG_CS, CPUMCTX2CORE(&pVCpu->cpum.GstCtx), pVCpu->cpum.GstCtx.rip)); + if (rc == VINF_SUCCESS) + rc = PGMPrefetchPage(pVCpu, SELMToFlat(pVM, DISSELREG_SS, CPUMCTX2CORE(&pVCpu->cpum.GstCtx), pVCpu->cpum.GstCtx.rsp)); + if (rc != VINF_SUCCESS) + { + if (rc != VINF_PGM_SYNC_CR3) + { + AssertLogRelMsgReturn(RT_FAILURE(rc), ("%Rrc\n", rc), VERR_IPE_UNEXPECTED_INFO_STATUS); + return rc; + } + rc = PGMSyncCR3(pVCpu, pVCpu->cpum.GstCtx.cr0, pVCpu->cpum.GstCtx.cr3, pVCpu->cpum.GstCtx.cr4, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)); + if (RT_FAILURE(rc)) + return rc; + } + /** @todo maybe prefetch the supervisor stack page as well */ + Assert(!VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_SELM_SYNC_GDT | VMCPU_FF_SELM_SYNC_LDT)); + } + + /* + * Allocate handy pages (just in case the above actions have consumed some pages). + */ + if (VM_FF_IS_PENDING_EXCEPT(pVM, VM_FF_PGM_NEED_HANDY_PAGES, VM_FF_PGM_NO_MEMORY)) + { + int rc = PGMR3PhysAllocateHandyPages(pVM); + if (RT_FAILURE(rc)) + return rc; + } + + /* + * Check whether we're out of memory now. + * + * This may stem from some of the above actions or operations that has been executed + * since we ran FFs. The allocate handy pages must for instance always be followed by + * this check. + */ + if (VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)) + return VINF_EM_NO_MEMORY; + + return VINF_SUCCESS; +} + + +/** + * Executes raw code. + * + * This function contains the raw-mode version of the inner + * execution loop (the outer loop being in EMR3ExecuteVM()). + * + * @returns VBox status code. The most important ones are: VINF_EM_RESCHEDULE, + * VINF_EM_RESCHEDULE_REM, VINF_EM_SUSPEND, VINF_EM_RESET and VINF_EM_TERMINATE. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param pfFFDone Where to store an indicator telling whether or not + * FFs were done before returning. + */ +int emR3RawExecute(PVM pVM, PVMCPU pVCpu, bool *pfFFDone) +{ + STAM_REL_PROFILE_ADV_START(&pVCpu->em.s.StatRAWTotal, a); + + int rc = VERR_IPE_UNINITIALIZED_STATUS; + LogFlow(("emR3RawExecute: (cs:eip=%04x:%08x)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip)); + pVCpu->em.s.fForceRAW = false; + *pfFFDone = false; + + + /* + * + * Spin till we get a forced action or raw mode status code resulting in + * in anything but VINF_SUCCESS or VINF_EM_RESCHEDULE_RAW. + * + */ + for (;;) + { + STAM_PROFILE_ADV_START(&pVCpu->em.s.StatRAWEntry, b); + + /* + * Check various preconditions. + */ +#ifdef VBOX_STRICT + Assert(pVCpu->cpum.GstCtx.eflags.Bits.u1VM || (pVCpu->cpum.GstCtx.ss.Sel & X86_SEL_RPL) == 3 || (pVCpu->cpum.GstCtx.ss.Sel & X86_SEL_RPL) == 0 + || (EMIsRawRing1Enabled(pVM) && (pVCpu->cpum.GstCtx.ss.Sel & X86_SEL_RPL) == 1)); + AssertMsg( (pVCpu->cpum.GstCtx.eflags.u32 & X86_EFL_IF) + || PATMShouldUseRawMode(pVM, (RTGCPTR)pVCpu->cpum.GstCtx.eip), + ("Tried to execute code with IF at EIP=%08x!\n", pVCpu->cpum.GstCtx.eip)); + if ( !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL) + && PGMMapHasConflicts(pVM)) + { + PGMMapCheck(pVM); + AssertMsgFailed(("We should not get conflicts any longer!!!\n")); + return VERR_EM_UNEXPECTED_MAPPING_CONFLICT; + } +#endif /* VBOX_STRICT */ + + /* + * Process high priority pre-execution raw-mode FFs. + */ + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_HIGH_PRIORITY_PRE_RAW_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HIGH_PRIORITY_PRE_RAW_MASK)) + { + rc = emR3RawForcedActions(pVM, pVCpu); + VBOXVMM_EM_FF_RAW_RET(pVCpu, rc); + if (rc != VINF_SUCCESS) + break; + } + + /* + * If we're going to execute ring-0 code, the guest state needs to + * be modified a bit and some of the state components (IF, SS/CS RPL, + * and perhaps EIP) needs to be stored with PATM. + */ + rc = CPUMRawEnter(pVCpu); + if (rc != VINF_SUCCESS) + { + STAM_PROFILE_ADV_STOP(&pVCpu->em.s.StatRAWEntry, b); + break; + } + + /* + * Scan code before executing it. Don't bother with user mode or V86 code + */ + if ( (pVCpu->cpum.GstCtx.ss.Sel & X86_SEL_RPL) <= 1 + && !pVCpu->cpum.GstCtx.eflags.Bits.u1VM + && !PATMIsPatchGCAddr(pVM, pVCpu->cpum.GstCtx.eip)) + { + STAM_PROFILE_ADV_SUSPEND(&pVCpu->em.s.StatRAWEntry, b); + CSAMR3CheckCodeEx(pVM, &pVCpu->cpum.GstCtx, pVCpu->cpum.GstCtx.eip); + STAM_PROFILE_ADV_RESUME(&pVCpu->em.s.StatRAWEntry, b); + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_HIGH_PRIORITY_PRE_RAW_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HIGH_PRIORITY_PRE_RAW_MASK)) + { + rc = emR3RawForcedActions(pVM, pVCpu); + VBOXVMM_EM_FF_RAW_RET(pVCpu, rc); + if (rc != VINF_SUCCESS) + { + rc = CPUMRawLeave(pVCpu, rc); + break; + } + } + } + +#ifdef LOG_ENABLED + /* + * Log important stuff before entering GC. + */ + PPATMGCSTATE pGCState = PATMR3QueryGCStateHC(pVM); + if (pVCpu->cpum.GstCtx.eflags.Bits.u1VM) + Log(("RV86: %04x:%08x IF=%d VMFlags=%x\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.eflags.Bits.u1IF, pGCState->uVMFlags)); + else if ((pVCpu->cpum.GstCtx.ss.Sel & X86_SEL_RPL) == 1) + Log(("RR0: %x:%08x ESP=%x:%08x EFL=%x IF=%d/%d VMFlags=%x PIF=%d CPL=%d (Scanned=%d)\n", + pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.esp, CPUMRawGetEFlags(pVCpu), !!(pGCState->uVMFlags & X86_EFL_IF), pVCpu->cpum.GstCtx.eflags.Bits.u1IF, + pGCState->uVMFlags, pGCState->fPIF, (pVCpu->cpum.GstCtx.ss.Sel & X86_SEL_RPL), CSAMIsPageScanned(pVM, (RTGCPTR)pVCpu->cpum.GstCtx.eip))); +# ifdef VBOX_WITH_RAW_RING1 + else if ((pVCpu->cpum.GstCtx.ss.Sel & X86_SEL_RPL) == 2) + Log(("RR1: %x:%08x ESP=%x:%08x IF=%d VMFlags=%x CPL=%x\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.eflags.Bits.u1IF, pGCState->uVMFlags, (pVCpu->cpum.GstCtx.ss.Sel & X86_SEL_RPL))); +# endif + else if ((pVCpu->cpum.GstCtx.ss.Sel & X86_SEL_RPL) == 3) + Log(("RR3: %x:%08x ESP=%x:%08x IF=%d VMFlags=%x\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.eflags.Bits.u1IF, pGCState->uVMFlags)); +#endif /* LOG_ENABLED */ + + + + /* + * Execute the code. + */ + STAM_PROFILE_ADV_STOP(&pVCpu->em.s.StatRAWEntry, b); + if (RT_LIKELY(emR3IsExecutionAllowed(pVM, pVCpu))) + { + STAM_PROFILE_START(&pVCpu->em.s.StatRAWExec, c); + VBOXVMM_EM_RAW_RUN_PRE(pVCpu, &pVCpu->cpum.GstCtx); + rc = VMMR3RawRunGC(pVM, pVCpu); + VBOXVMM_EM_RAW_RUN_RET(pVCpu, &pVCpu->cpum.GstCtx, rc); + STAM_PROFILE_STOP(&pVCpu->em.s.StatRAWExec, c); + } + else + { + /* Give up this time slice; virtual time continues */ + STAM_REL_PROFILE_ADV_START(&pVCpu->em.s.StatCapped, u); + RTThreadSleep(5); + STAM_REL_PROFILE_ADV_STOP(&pVCpu->em.s.StatCapped, u); + rc = VINF_SUCCESS; + } + STAM_PROFILE_ADV_START(&pVCpu->em.s.StatRAWTail, d); + + LogFlow(("RR%u-E: %08x ESP=%08x EFL=%x IF=%d/%d VMFlags=%x PIF=%d\n", + (pVCpu->cpum.GstCtx.ss.Sel & X86_SEL_RPL), pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, CPUMRawGetEFlags(pVCpu), + !!(pGCState->uVMFlags & X86_EFL_IF), pVCpu->cpum.GstCtx.eflags.Bits.u1IF, pGCState->uVMFlags, pGCState->fPIF)); + LogFlow(("VMMR3RawRunGC returned %Rrc\n", rc)); + + + + /* + * Restore the real CPU state and deal with high priority post + * execution FFs before doing anything else. + */ + rc = CPUMRawLeave(pVCpu, rc); + VMCPU_FF_CLEAR_MASK(pVCpu, VMCPU_FF_RESUME_GUEST_MASK); + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_HIGH_PRIORITY_POST_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HIGH_PRIORITY_POST_MASK)) + rc = VBOXSTRICTRC_TODO(emR3HighPriorityPostForcedActions(pVM, pVCpu, rc)); + +#ifdef VBOX_STRICT + /* + * Assert TSS consistency & rc vs patch code. + */ + if ( !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_SELM_SYNC_TSS | VMCPU_FF_SELM_SYNC_GDT) /* GDT implies TSS at the moment. */ + && EMIsRawRing0Enabled(pVM)) + SELMR3CheckTSS(pVM); + switch (rc) + { + case VINF_SUCCESS: + case VINF_EM_RAW_INTERRUPT: + case VINF_PATM_PATCH_TRAP_PF: + case VINF_PATM_PATCH_TRAP_GP: + case VINF_PATM_PATCH_INT3: + case VINF_PATM_CHECK_PATCH_PAGE: + case VINF_EM_RAW_EXCEPTION_PRIVILEGED: + case VINF_EM_RAW_GUEST_TRAP: + case VINF_EM_RESCHEDULE_RAW: + break; + + default: + if (PATMIsPatchGCAddr(pVM, pVCpu->cpum.GstCtx.eip) && !(pVCpu->cpum.GstCtx.eflags.u32 & X86_EFL_TF)) + LogIt(0, LOG_GROUP_PATM, ("Patch code interrupted at %RRv for reason %Rrc\n", (RTRCPTR)CPUMGetGuestEIP(pVCpu), rc)); + break; + } + /* + * Let's go paranoid! + */ + if ( !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL) + && PGMMapHasConflicts(pVM)) + { + PGMMapCheck(pVM); + AssertMsgFailed(("We should not get conflicts any longer!!! rc=%Rrc\n", rc)); + return VERR_EM_UNEXPECTED_MAPPING_CONFLICT; + } +#endif /* VBOX_STRICT */ + + /* + * Process the returned status code. + */ + if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST) + { + STAM_PROFILE_ADV_STOP(&pVCpu->em.s.StatRAWTail, d); + break; + } + rc = emR3RawHandleRC(pVM, pVCpu, rc); + if (rc != VINF_SUCCESS) + { + rc = emR3RawUpdateForceFlag(pVM, pVCpu, rc); + if (rc != VINF_SUCCESS) + { + STAM_PROFILE_ADV_STOP(&pVCpu->em.s.StatRAWTail, d); + break; + } + } + + /* + * Check and execute forced actions. + */ +#ifdef VBOX_HIGH_RES_TIMERS_HACK + TMTimerPollVoid(pVM, pVCpu); +#endif + STAM_PROFILE_ADV_STOP(&pVCpu->em.s.StatRAWTail, d); + if ( VM_FF_IS_ANY_SET(pVM, ~VM_FF_HIGH_PRIORITY_PRE_RAW_MASK | VM_FF_PGM_NO_MEMORY) + || VMCPU_FF_IS_ANY_SET(pVCpu, ~VMCPU_FF_HIGH_PRIORITY_PRE_RAW_MASK)) + { + Assert(pVCpu->cpum.GstCtx.eflags.Bits.u1VM || (pVCpu->cpum.GstCtx.ss.Sel & X86_SEL_RPL) != (EMIsRawRing1Enabled(pVM) ? 2U : 1U)); + + STAM_REL_PROFILE_ADV_SUSPEND(&pVCpu->em.s.StatRAWTotal, a); + rc = emR3ForcedActions(pVM, pVCpu, rc); + VBOXVMM_EM_FF_ALL_RET(pVCpu, rc); + STAM_REL_PROFILE_ADV_RESUME(&pVCpu->em.s.StatRAWTotal, a); + if ( rc != VINF_SUCCESS + && rc != VINF_EM_RESCHEDULE_RAW) + { + rc = emR3RawUpdateForceFlag(pVM, pVCpu, rc); + if (rc != VINF_SUCCESS) + { + *pfFFDone = true; + break; + } + } + } + } + + /* + * Return to outer loop. + */ +#if defined(LOG_ENABLED) && defined(DEBUG) + RTLogFlush(NULL); +#endif + STAM_REL_PROFILE_ADV_STOP(&pVCpu->em.s.StatRAWTotal, a); + return rc; +} + diff --git a/src/VBox/VMM/VMMR3/FTM.cpp b/src/VBox/VMM/VMMR3/FTM.cpp new file mode 100644 index 00000000..d1acc040 --- /dev/null +++ b/src/VBox/VMM/VMMR3/FTM.cpp @@ -0,0 +1,1368 @@ +/* $Id: FTM.cpp $ */ +/** @file + * FTM - Fault Tolerance Manager + */ + +/* + * Copyright (C) 2010-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_FTM +#include +#include +#include +#include +#include +#include +#include "FTMInternal.h" +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + + +/******************************************************************************* + * Structures and Typedefs * + *******************************************************************************/ + +/** + * TCP stream header. + * + * This is an extra layer for fixing the problem with figuring out when the SSM + * stream ends. + */ +typedef struct FTMTCPHDR +{ + /** Magic value. */ + uint32_t u32Magic; + /** The size of the data block following this header. + * 0 indicates the end of the stream, while UINT32_MAX indicates + * cancelation. */ + uint32_t cb; +} FTMTCPHDR; +/** Magic value for FTMTCPHDR::u32Magic. (Egberto Gismonti Amin) */ +#define FTMTCPHDR_MAGIC UINT32_C(0x19471205) +/** The max block size. */ +#define FTMTCPHDR_MAX_SIZE UINT32_C(0x00fffff8) + +/** + * TCP stream header. + * + * This is an extra layer for fixing the problem with figuring out when the SSM + * stream ends. + */ +typedef struct FTMTCPHDRMEM +{ + /** Magic value. */ + uint32_t u32Magic; + /** Size (Uncompressed) of the pages following the header. */ + uint32_t cbPageRange; + /** GC Physical address of the page(s) to sync. */ + RTGCPHYS GCPhys; + /** The size of the data block following this header. + * 0 indicates the end of the stream, while UINT32_MAX indicates + * cancelation. */ + uint32_t cb; +} FTMTCPHDRMEM; + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +static const char g_szWelcome[] = "VirtualBox-Fault-Tolerance-Sync-1.0\n"; + +static DECLCALLBACK(int) ftmR3PageTreeDestroyCallback(PAVLGCPHYSNODECORE pBaseNode, void *pvUser); + +/** + * Initializes the FTM. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) FTMR3Init(PVM pVM) +{ + /* + * Assert alignment and sizes. + */ + AssertCompile(sizeof(pVM->ftm.s) <= sizeof(pVM->ftm.padding)); + AssertCompileMemberAlignment(FTM, CritSect, sizeof(uintptr_t)); + + /** @todo saved state for master nodes! */ + pVM->ftm.s.pszAddress = NULL; + pVM->ftm.s.pszPassword = NULL; + pVM->fFaultTolerantMaster = false; + pVM->ftm.s.fIsStandbyNode = false; + pVM->ftm.s.standby.hServer = NIL_RTTCPSERVER; + pVM->ftm.s.hShutdownEvent = NIL_RTSEMEVENT; + pVM->ftm.s.hSocket = NIL_RTSOCKET; + + /* + * Initialize the PGM critical section. + */ + int rc = PDMR3CritSectInit(pVM, &pVM->ftm.s.CritSect, RT_SRC_POS, "FTM"); + AssertRCReturn(rc, rc); + + /* + * Register statistics. + */ + STAM_REL_REG(pVM, &pVM->ftm.s.StatReceivedMem, STAMTYPE_COUNTER, "/FT/Received/Mem", STAMUNIT_BYTES, "The amount of memory pages that was received."); + STAM_REL_REG(pVM, &pVM->ftm.s.StatReceivedState, STAMTYPE_COUNTER, "/FT/Received/State", STAMUNIT_BYTES, "The amount of state information that was received."); + STAM_REL_REG(pVM, &pVM->ftm.s.StatSentMem, STAMTYPE_COUNTER, "/FT/Sent/Mem", STAMUNIT_BYTES, "The amount of memory pages that was sent."); + STAM_REL_REG(pVM, &pVM->ftm.s.StatSentState, STAMTYPE_COUNTER, "/FT/Sent/State", STAMUNIT_BYTES, "The amount of state information that was sent."); + STAM_REL_REG(pVM, &pVM->ftm.s.StatDeltaVM, STAMTYPE_COUNTER, "/FT/Sync/DeltaVM", STAMUNIT_OCCURENCES, "Number of delta vm syncs."); + STAM_REL_REG(pVM, &pVM->ftm.s.StatFullSync, STAMTYPE_COUNTER, "/FT/Sync/Full", STAMUNIT_OCCURENCES, "Number of full vm syncs."); + STAM_REL_REG(pVM, &pVM->ftm.s.StatDeltaMem, STAMTYPE_COUNTER, "/FT/Sync/DeltaMem", STAMUNIT_OCCURENCES, "Number of delta mem syncs."); + STAM_REL_REG(pVM, &pVM->ftm.s.StatCheckpointStorage, STAMTYPE_COUNTER, "/FT/Checkpoint/Storage", STAMUNIT_OCCURENCES, "Number of storage checkpoints."); + STAM_REL_REG(pVM, &pVM->ftm.s.StatCheckpointNetwork, STAMTYPE_COUNTER, "/FT/Checkpoint/Network", STAMUNIT_OCCURENCES, "Number of network checkpoints."); +#ifdef VBOX_WITH_STATISTICS + STAM_REG(pVM, &pVM->ftm.s.StatCheckpoint, STAMTYPE_PROFILE, "/FT/Checkpoint", STAMUNIT_TICKS_PER_CALL, "Profiling of FTMR3SetCheckpoint."); + STAM_REG(pVM, &pVM->ftm.s.StatCheckpointPause, STAMTYPE_PROFILE, "/FT/Checkpoint/Pause", STAMUNIT_TICKS_PER_CALL, "Profiling of FTMR3SetCheckpoint."); + STAM_REG(pVM, &pVM->ftm.s.StatCheckpointResume, STAMTYPE_PROFILE, "/FT/Checkpoint/Resume", STAMUNIT_TICKS_PER_CALL, "Profiling of FTMR3SetCheckpoint."); + STAM_REG(pVM, &pVM->ftm.s.StatSentMemRAM, STAMTYPE_COUNTER, "/FT/Sent/Mem/RAM", STAMUNIT_BYTES, "The amount of memory pages that was sent."); + STAM_REG(pVM, &pVM->ftm.s.StatSentMemMMIO2, STAMTYPE_COUNTER, "/FT/Sent/Mem/MMIO2", STAMUNIT_BYTES, "The amount of memory pages that was sent."); + STAM_REG(pVM, &pVM->ftm.s.StatSentMemShwROM, STAMTYPE_COUNTER, "/FT/Sent/Mem/ShwROM", STAMUNIT_BYTES, "The amount of memory pages that was sent."); + STAM_REG(pVM, &pVM->ftm.s.StatSentStateWrite, STAMTYPE_COUNTER, "/FT/Sent/State/Writes", STAMUNIT_BYTES, "The nr of write calls."); +#endif + return VINF_SUCCESS; +} + +/** + * Terminates the FTM. + * + * Termination means cleaning up and freeing all resources, + * the VM itself is at this point powered off or suspended. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) FTMR3Term(PVM pVM) +{ + if (pVM->ftm.s.hShutdownEvent != NIL_RTSEMEVENT) + { + RTSemEventDestroy(pVM->ftm.s.hShutdownEvent); + pVM->ftm.s.hShutdownEvent = NIL_RTSEMEVENT; + } + if (pVM->ftm.s.hSocket != NIL_RTSOCKET) + { + RTTcpClientClose(pVM->ftm.s.hSocket); + pVM->ftm.s.hSocket = NIL_RTSOCKET; + } + if (pVM->ftm.s.standby.hServer) + { + RTTcpServerDestroy(pVM->ftm.s.standby.hServer); + pVM->ftm.s.standby.hServer = NULL; + } + if (pVM->ftm.s.pszAddress) + RTMemFree(pVM->ftm.s.pszAddress); + if (pVM->ftm.s.pszPassword) + RTMemFree(pVM->ftm.s.pszPassword); + + /* Remove all pending memory updates. */ + if (pVM->ftm.s.standby.pPhysPageTree) + { + RTAvlGCPhysDestroy(&pVM->ftm.s.standby.pPhysPageTree, ftmR3PageTreeDestroyCallback, NULL); + pVM->ftm.s.standby.pPhysPageTree = NULL; + } + + pVM->ftm.s.pszAddress = NULL; + pVM->ftm.s.pszPassword = NULL; + + PDMR3CritSectDelete(&pVM->ftm.s.CritSect); + return VINF_SUCCESS; +} + + +static int ftmR3TcpWriteACK(PVM pVM) +{ + int rc = RTTcpWrite(pVM->ftm.s.hSocket, RT_STR_TUPLE("ACK\n")); + if (RT_FAILURE(rc)) + { + LogRel(("FTSync: RTTcpWrite(,ACK,) -> %Rrc\n", rc)); + } + return rc; +} + + +static int ftmR3TcpWriteNACK(PVM pVM, int32_t rc2, const char *pszMsgText = NULL) +{ + char szMsg[256]; + size_t cch; + if (pszMsgText && *pszMsgText) + { + cch = RTStrPrintf(szMsg, sizeof(szMsg), "NACK=%d;%s\n", rc2, pszMsgText); + for (size_t off = 6; off + 1 < cch; off++) + if (szMsg[off] == '\n') + szMsg[off] = '\r'; + } + else + cch = RTStrPrintf(szMsg, sizeof(szMsg), "NACK=%d\n", rc2); + int rc = RTTcpWrite(pVM->ftm.s.hSocket, szMsg, cch); + if (RT_FAILURE(rc)) + LogRel(("FTSync: RTTcpWrite(,%s,%zu) -> %Rrc\n", szMsg, cch, rc)); + return rc; +} + +/** + * Reads a string from the socket. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pszBuf The output buffer. + * @param cchBuf The size of the output buffer. + * + */ +static int ftmR3TcpReadLine(PVM pVM, char *pszBuf, size_t cchBuf) +{ + char *pszStart = pszBuf; + RTSOCKET Sock = pVM->ftm.s.hSocket; + + AssertReturn(cchBuf > 1, VERR_INTERNAL_ERROR); + *pszBuf = '\0'; + + /* dead simple approach. */ + for (;;) + { + char ch; + int rc = RTTcpRead(Sock, &ch, sizeof(ch), NULL); + if (RT_FAILURE(rc)) + { + LogRel(("FTSync: RTTcpRead -> %Rrc while reading string ('%s')\n", rc, pszStart)); + return rc; + } + if ( ch == '\n' + || ch == '\0') + return VINF_SUCCESS; + if (cchBuf <= 1) + { + LogRel(("FTSync: String buffer overflow: '%s'\n", pszStart)); + return VERR_BUFFER_OVERFLOW; + } + *pszBuf++ = ch; + *pszBuf = '\0'; + cchBuf--; + } +} + +/** + * Reads an ACK or NACK. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pszWhich Which ACK is this this? + * @param pszNAckMsg Optional NACK message. + */ +static int ftmR3TcpReadACK(PVM pVM, const char *pszWhich, const char *pszNAckMsg = NULL) +{ + char szMsg[256]; + int rc = ftmR3TcpReadLine(pVM, szMsg, sizeof(szMsg)); + if (RT_FAILURE(rc)) + return rc; + + if (!strcmp(szMsg, "ACK")) + return VINF_SUCCESS; + + if (!strncmp(szMsg, RT_STR_TUPLE("NACK="))) + { + char *pszMsgText = strchr(szMsg, ';'); + if (pszMsgText) + *pszMsgText++ = '\0'; + + int32_t vrc2; + rc = RTStrToInt32Full(&szMsg[sizeof("NACK=") - 1], 10, &vrc2); + if (rc == VINF_SUCCESS) + { + /* + * Well formed NACK, transform it into an error. + */ + if (pszNAckMsg) + { + LogRel(("FTSync: %s: NACK=%Rrc (%d)\n", pszWhich, vrc2, vrc2)); + return VERR_INTERNAL_ERROR; + } + + if (pszMsgText) + { + pszMsgText = RTStrStrip(pszMsgText); + for (size_t off = 0; pszMsgText[off]; off++) + if (pszMsgText[off] == '\r') + pszMsgText[off] = '\n'; + + LogRel(("FTSync: %s: NACK=%Rrc (%d) - '%s'\n", pszWhich, vrc2, vrc2, pszMsgText)); + } + return VERR_INTERNAL_ERROR_2; + } + + if (pszMsgText) + pszMsgText[-1] = ';'; + } + return VERR_INTERNAL_ERROR_3; +} + +/** + * Submitts a command to the destination and waits for the ACK. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pszCommand The command. + * @param fWaitForAck Whether to wait for the ACK. + */ +static int ftmR3TcpSubmitCommand(PVM pVM, const char *pszCommand, bool fWaitForAck = true) +{ + int rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 2, pszCommand, strlen(pszCommand), RT_STR_TUPLE("\n")); + if (RT_FAILURE(rc)) + return rc; + if (!fWaitForAck) + return VINF_SUCCESS; + return ftmR3TcpReadACK(pVM, pszCommand); +} + +/** + * @interface_method_impl{SSMSTRMOPS,pfnWrite} + */ +static DECLCALLBACK(int) ftmR3TcpOpWrite(void *pvUser, uint64_t offStream, const void *pvBuf, size_t cbToWrite) +{ + PVM pVM = (PVM)pvUser; + NOREF(offStream); + + AssertReturn(cbToWrite > 0, VINF_SUCCESS); + AssertReturn(cbToWrite < UINT32_MAX, VERR_OUT_OF_RANGE); + AssertReturn(pVM->fFaultTolerantMaster, VERR_INVALID_HANDLE); + + STAM_COUNTER_INC(&pVM->ftm.s.StatSentStateWrite); + for (;;) + { + FTMTCPHDR Hdr; + Hdr.u32Magic = FTMTCPHDR_MAGIC; + Hdr.cb = RT_MIN((uint32_t)cbToWrite, FTMTCPHDR_MAX_SIZE); + int rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 2, &Hdr, sizeof(Hdr), pvBuf, (size_t)Hdr.cb); + if (RT_FAILURE(rc)) + { + LogRel(("FTSync/TCP: Write error: %Rrc (cb=%#x)\n", rc, Hdr.cb)); + return rc; + } + pVM->ftm.s.StatSentState.c += Hdr.cb + sizeof(Hdr); + pVM->ftm.s.syncstate.uOffStream += Hdr.cb; + if (Hdr.cb == cbToWrite) + return VINF_SUCCESS; + + /* advance */ + cbToWrite -= Hdr.cb; + pvBuf = (uint8_t const *)pvBuf + Hdr.cb; + } +} + + +/** + * Selects and poll for close condition. + * + * We can use a relatively high poll timeout here since it's only used to get + * us out of error paths. In the normal cause of events, we'll get a + * end-of-stream header. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + */ +static int ftmR3TcpReadSelect(PVM pVM) +{ + int rc; + do + { + rc = RTTcpSelectOne(pVM->ftm.s.hSocket, 1000); + if (RT_FAILURE(rc) && rc != VERR_TIMEOUT) + { + pVM->ftm.s.syncstate.fIOError = true; + LogRel(("FTSync/TCP: Header select error: %Rrc\n", rc)); + break; + } + if (pVM->ftm.s.syncstate.fStopReading) + { + rc = VERR_EOF; + break; + } + } while (rc == VERR_TIMEOUT); + return rc; +} + + +/** + * @interface_method_impl{SSMSTRMOPS,pfnRead} + */ +static DECLCALLBACK(int) ftmR3TcpOpRead(void *pvUser, uint64_t offStream, void *pvBuf, size_t cbToRead, size_t *pcbRead) +{ + PVM pVM = (PVM)pvUser; + AssertReturn(!pVM->fFaultTolerantMaster, VERR_INVALID_HANDLE); + NOREF(offStream); + + for (;;) + { + int rc; + + /* + * Check for various conditions and may have been signalled. + */ + if (pVM->ftm.s.syncstate.fEndOfStream) + return VERR_EOF; + if (pVM->ftm.s.syncstate.fStopReading) + return VERR_EOF; + if (pVM->ftm.s.syncstate.fIOError) + return VERR_IO_GEN_FAILURE; + + /* + * If there is no more data in the current block, read the next + * block header. + */ + if (!pVM->ftm.s.syncstate.cbReadBlock) + { + rc = ftmR3TcpReadSelect(pVM); + if (RT_FAILURE(rc)) + return rc; + FTMTCPHDR Hdr; + rc = RTTcpRead(pVM->ftm.s.hSocket, &Hdr, sizeof(Hdr), NULL); + if (RT_FAILURE(rc)) + { + pVM->ftm.s.syncstate.fIOError = true; + LogRel(("FTSync/TCP: Header read error: %Rrc\n", rc)); + return rc; + } + pVM->ftm.s.StatReceivedState.c += sizeof(Hdr); + + if (RT_UNLIKELY( Hdr.u32Magic != FTMTCPHDR_MAGIC + || Hdr.cb > FTMTCPHDR_MAX_SIZE + || Hdr.cb == 0)) + { + if ( Hdr.u32Magic == FTMTCPHDR_MAGIC + && ( Hdr.cb == 0 + || Hdr.cb == UINT32_MAX) + ) + { + pVM->ftm.s.syncstate.fEndOfStream = true; + pVM->ftm.s.syncstate.cbReadBlock = 0; + return Hdr.cb ? VERR_SSM_CANCELLED : VERR_EOF; + } + pVM->ftm.s.syncstate.fIOError = true; + LogRel(("FTSync/TCP: Invalid block: u32Magic=%#x cb=%#x\n", Hdr.u32Magic, Hdr.cb)); + return VERR_IO_GEN_FAILURE; + } + + pVM->ftm.s.syncstate.cbReadBlock = Hdr.cb; + if (pVM->ftm.s.syncstate.fStopReading) + return VERR_EOF; + } + + /* + * Read more data. + */ + rc = ftmR3TcpReadSelect(pVM); + if (RT_FAILURE(rc)) + return rc; + + uint32_t cb = (uint32_t)RT_MIN(pVM->ftm.s.syncstate.cbReadBlock, cbToRead); + rc = RTTcpRead(pVM->ftm.s.hSocket, pvBuf, cb, pcbRead); + if (RT_FAILURE(rc)) + { + pVM->ftm.s.syncstate.fIOError = true; + LogRel(("FTSync/TCP: Data read error: %Rrc (cb=%#x)\n", rc, cb)); + return rc; + } + if (pcbRead) + { + cb = (uint32_t)*pcbRead; + pVM->ftm.s.StatReceivedState.c += cb; + pVM->ftm.s.syncstate.uOffStream += cb; + pVM->ftm.s.syncstate.cbReadBlock -= cb; + return VINF_SUCCESS; + } + pVM->ftm.s.StatReceivedState.c += cb; + pVM->ftm.s.syncstate.uOffStream += cb; + pVM->ftm.s.syncstate.cbReadBlock -= cb; + if (cbToRead == cb) + return VINF_SUCCESS; + + /* Advance to the next block. */ + cbToRead -= cb; + pvBuf = (uint8_t *)pvBuf + cb; + } +} + + +/** + * @interface_method_impl{SSMSTRMOPS,pfnSeek} + */ +static DECLCALLBACK(int) ftmR3TcpOpSeek(void *pvUser, int64_t offSeek, unsigned uMethod, uint64_t *poffActual) +{ + NOREF(pvUser); NOREF(offSeek); NOREF(uMethod); NOREF(poffActual); + return VERR_NOT_SUPPORTED; +} + + +/** + * @interface_method_impl{SSMSTRMOPS,pfnTell} + */ +static DECLCALLBACK(uint64_t) ftmR3TcpOpTell(void *pvUser) +{ + PVM pVM = (PVM)pvUser; + return pVM->ftm.s.syncstate.uOffStream; +} + + +/** + * @interface_method_impl{SSMSTRMOPS,pfnSize} + */ +static DECLCALLBACK(int) ftmR3TcpOpSize(void *pvUser, uint64_t *pcb) +{ + NOREF(pvUser); NOREF(pcb); + return VERR_NOT_SUPPORTED; +} + + +/** + * @interface_method_impl{SSMSTRMOPS,pfnIsOk} + */ +static DECLCALLBACK(int) ftmR3TcpOpIsOk(void *pvUser) +{ + PVM pVM = (PVM)pvUser; + + if (pVM->fFaultTolerantMaster) + { + /* Poll for incoming NACKs and errors from the other side */ + int rc = RTTcpSelectOne(pVM->ftm.s.hSocket, 0); + if (rc != VERR_TIMEOUT) + { + if (RT_SUCCESS(rc)) + { + LogRel(("FTSync/TCP: Incoming data detect by IsOk, assuming it is a cancellation NACK.\n")); + rc = VERR_SSM_CANCELLED; + } + else + LogRel(("FTSync/TCP: RTTcpSelectOne -> %Rrc (IsOk).\n", rc)); + return rc; + } + } + + return VINF_SUCCESS; +} + + +/** + * @interface_method_impl{SSMSTRMOPS,pfnClose} + */ +static DECLCALLBACK(int) ftmR3TcpOpClose(void *pvUser, bool fCancelled) +{ + PVM pVM = (PVM)pvUser; + + if (pVM->fFaultTolerantMaster) + { + FTMTCPHDR EofHdr; + EofHdr.u32Magic = FTMTCPHDR_MAGIC; + EofHdr.cb = fCancelled ? UINT32_MAX : 0; + int rc = RTTcpWrite(pVM->ftm.s.hSocket, &EofHdr, sizeof(EofHdr)); + if (RT_FAILURE(rc)) + { + LogRel(("FTSync/TCP: EOF Header write error: %Rrc\n", rc)); + return rc; + } + } + else + { + ASMAtomicWriteBool(&pVM->ftm.s.syncstate.fStopReading, true); + } + + return VINF_SUCCESS; +} + + +/** + * Method table for a TCP based stream. + */ +static SSMSTRMOPS const g_ftmR3TcpOps = +{ + SSMSTRMOPS_VERSION, + ftmR3TcpOpWrite, + ftmR3TcpOpRead, + ftmR3TcpOpSeek, + ftmR3TcpOpTell, + ftmR3TcpOpSize, + ftmR3TcpOpIsOk, + ftmR3TcpOpClose, + SSMSTRMOPS_VERSION +}; + + +/** + * VMR3ReqCallWait callback + * + * @param pVM The cross context VM structure. + * + */ +static DECLCALLBACK(void) ftmR3WriteProtectMemory(PVM pVM) +{ + int rc = PGMR3PhysWriteProtectRAM(pVM); + AssertRC(rc); +} + + +/** + * Sync the VM state + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +static int ftmR3PerformFullSync(PVM pVM) +{ + bool fSuspended = false; + + int rc = VMR3Suspend(pVM->pUVM, VMSUSPENDREASON_FTM_SYNC); + AssertRCReturn(rc, rc); + + STAM_REL_COUNTER_INC(&pVM->ftm.s.StatFullSync); + + RTSocketRetain(pVM->ftm.s.hSocket); /* For concurrent access by I/O thread and EMT. */ + + /* Reset the sync state. */ + pVM->ftm.s.syncstate.uOffStream = 0; + pVM->ftm.s.syncstate.cbReadBlock = 0; + pVM->ftm.s.syncstate.fStopReading = false; + pVM->ftm.s.syncstate.fIOError = false; + pVM->ftm.s.syncstate.fEndOfStream = false; + + rc = ftmR3TcpSubmitCommand(pVM, "full-sync"); + AssertRC(rc); + + pVM->ftm.s.fDeltaLoadSaveActive = false; + rc = VMR3SaveFT(pVM->pUVM, &g_ftmR3TcpOps, pVM, &fSuspended, false /* fSkipStateChanges */); + AssertRC(rc); + + rc = ftmR3TcpReadACK(pVM, "full-sync-complete"); + AssertRC(rc); + + RTSocketRelease(pVM->ftm.s.hSocket); + + /* Write protect all memory. */ + rc = VMR3ReqCallWait(pVM, VMCPUID_ANY, (PFNRT)ftmR3WriteProtectMemory, 1, pVM); + AssertRCReturn(rc, rc); + + rc = VMR3Resume(pVM->pUVM, VMRESUMEREASON_FTM_SYNC); + AssertRC(rc); + + return rc; +} + + +/** + * PGMR3PhysEnumDirtyFTPages callback for syncing dirty physical pages + * + * @param pVM The cross context VM structure. + * @param GCPhys GC physical address + * @param pRange HC virtual address of the page(s) + * @param cbRange Size of the dirty range in bytes. + * @param pvUser User argument + */ +static DECLCALLBACK(int) ftmR3SyncDirtyPage(PVM pVM, RTGCPHYS GCPhys, uint8_t *pRange, unsigned cbRange, void *pvUser) +{ + NOREF(pvUser); + FTMTCPHDRMEM Hdr; + Hdr.u32Magic = FTMTCPHDR_MAGIC; + Hdr.GCPhys = GCPhys; + Hdr.cbPageRange = cbRange; + Hdr.cb = cbRange; + /** @todo compress page(s). */ + int rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 2, &Hdr, sizeof(Hdr), pRange, (size_t)Hdr.cb); + if (RT_FAILURE(rc)) + { + LogRel(("FTSync/TCP: Write error (ftmR3SyncDirtyPage): %Rrc (cb=%#x)\n", rc, Hdr.cb)); + return rc; + } + pVM->ftm.s.StatSentMem.c += Hdr.cb + sizeof(Hdr); + +#ifdef VBOX_WITH_STATISTICS + switch (PGMPhysGetPageType(pVM, GCPhys)) + { + case PGMPAGETYPE_RAM: + pVM->ftm.s.StatSentMemRAM.c += Hdr.cb + sizeof(Hdr); + break; + + case PGMPAGETYPE_MMIO2: + pVM->ftm.s.StatSentMemMMIO2.c += Hdr.cb + sizeof(Hdr); + break; + + case PGMPAGETYPE_ROM_SHADOW: + pVM->ftm.s.StatSentMemShwROM.c += Hdr.cb + sizeof(Hdr); + break; + + case PGMPAGETYPE_MMIO2_ALIAS_MMIO: + case PGMPAGETYPE_SPECIAL_ALIAS_MMIO: + AssertFailed(); + break; + + default: + AssertFailed(); + break; + } +#endif + + return (pVM->ftm.s.fCheckpointingActive) ? VERR_INTERRUPTED : VINF_SUCCESS; +} + +/** + * Thread function which starts syncing process for this master VM + * + * @param hThread The thread handle. + * @param pvUser Pointer to the VM. + * @return VINF_SUCCESS (ignored). + * + */ +static DECLCALLBACK(int) ftmR3MasterThread(RTTHREAD hThread, void *pvUser) +{ + int rc = VINF_SUCCESS; + PVM pVM = (PVM)pvUser; + NOREF(hThread); + + for (;;) + { + /* + * Try connect to the standby machine. + */ + Log(("ftmR3MasterThread: client connect to %s %d\n", pVM->ftm.s.pszAddress, pVM->ftm.s.uPort)); + rc = RTTcpClientConnect(pVM->ftm.s.pszAddress, pVM->ftm.s.uPort, &pVM->ftm.s.hSocket); + if (RT_SUCCESS(rc)) + { + Log(("ftmR3MasterThread: CONNECTED\n")); + + /* Disable Nagle. */ + rc = RTTcpSetSendCoalescing(pVM->ftm.s.hSocket, false /*fEnable*/); + AssertRC(rc); + + /* Read and check the welcome message. */ + char szLine[RT_MAX(128, sizeof(g_szWelcome))]; + RT_ZERO(szLine); + rc = RTTcpRead(pVM->ftm.s.hSocket, szLine, sizeof(g_szWelcome) - 1, NULL); + if ( RT_SUCCESS(rc) + && !strcmp(szLine, g_szWelcome)) + { + /* password */ + if (pVM->ftm.s.pszPassword) + rc = RTTcpWrite(pVM->ftm.s.hSocket, pVM->ftm.s.pszPassword, strlen(pVM->ftm.s.pszPassword)); + + if (RT_SUCCESS(rc)) + { + /* ACK */ + rc = ftmR3TcpReadACK(pVM, "password", "Invalid password"); + if (RT_SUCCESS(rc)) + { + /** @todo verify VM config. */ + break; + } + } + } + /* Failed, so don't bother anymore. */ + return VINF_SUCCESS; + } + rc = RTSemEventWait(pVM->ftm.s.hShutdownEvent, 1000 /* 1 second */); + if (rc != VERR_TIMEOUT) + return VINF_SUCCESS; /* told to quit */ + } + + /* Successfully initialized the connection to the standby node. + * Start the sync process. + */ + + /* First sync all memory and write protect everything so + * we can send changed pages later on. + */ + + rc = ftmR3PerformFullSync(pVM); + + for (;;) + { + rc = RTSemEventWait(pVM->ftm.s.hShutdownEvent, pVM->ftm.s.uInterval); + if (rc != VERR_TIMEOUT) + break; /* told to quit */ + + if (!pVM->ftm.s.fCheckpointingActive) + { + rc = PDMCritSectEnter(&pVM->ftm.s.CritSect, VERR_SEM_BUSY); + AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", rc)); + + rc = ftmR3TcpSubmitCommand(pVM, "mem-sync"); + AssertRC(rc); + + /* sync the changed memory with the standby node. */ + /* Write protect all memory. */ + if (!pVM->ftm.s.fCheckpointingActive) + { + rc = VMR3ReqCallWait(pVM, VMCPUID_ANY, (PFNRT)ftmR3WriteProtectMemory, 1, pVM); + AssertRC(rc); + } + + /* Enumerate all dirty pages and send them to the standby VM. */ + if (!pVM->ftm.s.fCheckpointingActive) + { + rc = PGMR3PhysEnumDirtyFTPages(pVM, ftmR3SyncDirtyPage, NULL /* pvUser */); + Assert(rc == VINF_SUCCESS || rc == VERR_INTERRUPTED); + } + + /* Send last memory header to signal the end. */ + FTMTCPHDRMEM Hdr; + Hdr.u32Magic = FTMTCPHDR_MAGIC; + Hdr.GCPhys = 0; + Hdr.cbPageRange = 0; + Hdr.cb = 0; + rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 1, &Hdr, sizeof(Hdr)); + if (RT_FAILURE(rc)) + LogRel(("FTSync/TCP: Write error (ftmR3MasterThread): %Rrc (cb=%#x)\n", rc, Hdr.cb)); + + rc = ftmR3TcpReadACK(pVM, "mem-sync-complete"); + AssertRC(rc); + + PDMCritSectLeave(&pVM->ftm.s.CritSect); + } + } + return rc; +} + +/** + * Syncs memory from the master VM + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +static int ftmR3SyncMem(PVM pVM) +{ + while (true) + { + FTMTCPHDRMEM Hdr; + RTGCPHYS GCPhys; + + /* Read memory header. */ + int rc = RTTcpRead(pVM->ftm.s.hSocket, &Hdr, sizeof(Hdr), NULL); + if (RT_FAILURE(rc)) + { + Log(("RTTcpRead failed with %Rrc\n", rc)); + break; + } + pVM->ftm.s.StatReceivedMem.c += sizeof(Hdr); + + if (Hdr.cb == 0) + break; /* end of sync. */ + + Assert(Hdr.cb == Hdr.cbPageRange); /** @todo uncompress */ + GCPhys = Hdr.GCPhys; + + /* Must be a multiple of PAGE_SIZE. */ + Assert((Hdr.cbPageRange & 0xfff) == 0); + + while (Hdr.cbPageRange) + { + PFTMPHYSPAGETREENODE pNode = (PFTMPHYSPAGETREENODE)RTAvlGCPhysGet(&pVM->ftm.s.standby.pPhysPageTree, GCPhys); + if (!pNode) + { + /* Allocate memory for the node and page. */ + pNode = (PFTMPHYSPAGETREENODE)RTMemAllocZ(sizeof(*pNode) + PAGE_SIZE); + AssertBreak(pNode); + + /* Insert the node into the tree. */ + pNode->Core.Key = GCPhys; + pNode->pPage = (void *)(pNode + 1); + bool fRet = RTAvlGCPhysInsert(&pVM->ftm.s.standby.pPhysPageTree, &pNode->Core); + Assert(fRet); NOREF(fRet); + } + + /* Fetch the page. */ + rc = RTTcpRead(pVM->ftm.s.hSocket, pNode->pPage, PAGE_SIZE, NULL); + if (RT_FAILURE(rc)) + { + Log(("RTTcpRead page data (%d bytes) failed with %Rrc\n", Hdr.cb, rc)); + break; + } + pVM->ftm.s.StatReceivedMem.c += PAGE_SIZE; + Hdr.cbPageRange -= PAGE_SIZE; + GCPhys += PAGE_SIZE; + } + } + return VINF_SUCCESS; +} + + +/** + * Callback handler for RTAvlGCPhysDestroy + * + * @returns 0 to continue, otherwise stop + * @param pBaseNode Node to destroy + * @param pvUser Pointer to the VM. + */ +static DECLCALLBACK(int) ftmR3PageTreeDestroyCallback(PAVLGCPHYSNODECORE pBaseNode, void *pvUser) +{ + PVM pVM = (PVM)pvUser; + PFTMPHYSPAGETREENODE pNode = (PFTMPHYSPAGETREENODE)pBaseNode; + + if (pVM) /* NULL when the VM is destroyed. */ + { + /* Update the guest memory of the standby VM. */ + int rc = PGMR3PhysWriteExternal(pVM, pNode->Core.Key, pNode->pPage, PAGE_SIZE, PGMACCESSORIGIN_FTM); + AssertRC(rc); + } + RTMemFree(pNode); + return 0; +} + +/** + * Thread function which monitors the health of the master VM + * + * @param hThread The thread handle. + * @param pvUser Pointer to the VM. + * @return VINF_SUCCESS (ignored). + * + */ +static DECLCALLBACK(int) ftmR3StandbyThread(RTTHREAD hThread, void *pvUser) +{ + PVM pVM = (PVM)pvUser; + NOREF(hThread); + + for (;;) + { + uint64_t u64TimeNow; + + int rc = RTSemEventWait(pVM->ftm.s.hShutdownEvent, pVM->ftm.s.uInterval); + if (rc != VERR_TIMEOUT) + break; /* told to quit */ + + if (pVM->ftm.s.standby.u64LastHeartbeat) + { + u64TimeNow = RTTimeMilliTS(); + + if (u64TimeNow > pVM->ftm.s.standby.u64LastHeartbeat + pVM->ftm.s.uInterval * 4) + { + /* Timeout; prepare to fallover. */ + LogRel(("FTSync: TIMEOUT (%RX64 vs %RX64 ms): activate standby VM!\n", u64TimeNow, pVM->ftm.s.standby.u64LastHeartbeat + pVM->ftm.s.uInterval * 2)); + + pVM->ftm.s.fActivateStandby = true; + /** @todo prevent split-brain. */ + break; + } + } + } + + return VINF_SUCCESS; +} + + +/** + * Listen for incoming traffic destined for the standby VM. + * + * @copydoc FNRTTCPSERVE + * + * @returns VINF_SUCCESS or VERR_TCP_SERVER_STOP. + */ +static DECLCALLBACK(int) ftmR3StandbyServeConnection(RTSOCKET hSocket, void *pvUser) +{ + PVM pVM = (PVM)pvUser; + + pVM->ftm.s.hSocket = hSocket; + + /* + * Disable Nagle. + */ + int rc = RTTcpSetSendCoalescing(hSocket, false /*fEnable*/); + AssertRC(rc); + + /* Send the welcome message to the master node. */ + rc = RTTcpWrite(hSocket, g_szWelcome, sizeof(g_szWelcome) - 1); + if (RT_FAILURE(rc)) + { + LogRel(("Teleporter: Failed to write welcome message: %Rrc\n", rc)); + return VINF_SUCCESS; + } + + /* + * Password. + */ + const char *pszPassword = pVM->ftm.s.pszPassword; + if (pszPassword) + { + unsigned off = 0; + while (pszPassword[off]) + { + char ch; + rc = RTTcpRead(hSocket, &ch, sizeof(ch), NULL); + if ( RT_FAILURE(rc) + || pszPassword[off] != ch) + { + if (RT_FAILURE(rc)) + LogRel(("FTSync: Password read failure (off=%u): %Rrc\n", off, rc)); + else + LogRel(("FTSync: Invalid password (off=%u)\n", off)); + ftmR3TcpWriteNACK(pVM, VERR_AUTHENTICATION_FAILURE); + return VINF_SUCCESS; + } + off++; + } + } + rc = ftmR3TcpWriteACK(pVM); + if (RT_FAILURE(rc)) + return VINF_SUCCESS; + + /** @todo verify VM config. */ + + /* + * Stop the server. + * + * Note! After this point we must return VERR_TCP_SERVER_STOP, while prior + * to it we must not return that value! + */ + RTTcpServerShutdown(pVM->ftm.s.standby.hServer); + + /* + * Command processing loop. + */ + //bool fDone = false; + for (;;) + { + bool fFullSync = false; + char szCmd[128]; + + rc = ftmR3TcpReadLine(pVM, szCmd, sizeof(szCmd)); + if (RT_FAILURE(rc)) + break; + + pVM->ftm.s.standby.u64LastHeartbeat = RTTimeMilliTS(); + if (!strcmp(szCmd, "mem-sync")) + { + rc = ftmR3TcpWriteACK(pVM); + AssertRC(rc); + if (RT_FAILURE(rc)) + continue; + + rc = ftmR3SyncMem(pVM); + AssertRC(rc); + + rc = ftmR3TcpWriteACK(pVM); + AssertRC(rc); + } + else + if ( !strcmp(szCmd, "checkpoint") + || !strcmp(szCmd, "full-sync") + || (fFullSync = true)) /* intended assignment */ + { + rc = ftmR3TcpWriteACK(pVM); + AssertRC(rc); + if (RT_FAILURE(rc)) + continue; + + /* Flush all pending memory updates. */ + if (pVM->ftm.s.standby.pPhysPageTree) + { + RTAvlGCPhysDestroy(&pVM->ftm.s.standby.pPhysPageTree, ftmR3PageTreeDestroyCallback, pVM); + pVM->ftm.s.standby.pPhysPageTree = NULL; + } + + RTSocketRetain(pVM->ftm.s.hSocket); /* For concurrent access by I/O thread and EMT. */ + + /* Reset the sync state. */ + pVM->ftm.s.syncstate.uOffStream = 0; + pVM->ftm.s.syncstate.cbReadBlock = 0; + pVM->ftm.s.syncstate.fStopReading = false; + pVM->ftm.s.syncstate.fIOError = false; + pVM->ftm.s.syncstate.fEndOfStream = false; + + pVM->ftm.s.fDeltaLoadSaveActive = (fFullSync == false); + rc = VMR3LoadFromStreamFT(pVM->pUVM, &g_ftmR3TcpOps, pVM); + pVM->ftm.s.fDeltaLoadSaveActive = false; + RTSocketRelease(pVM->ftm.s.hSocket); + AssertRC(rc); + if (RT_FAILURE(rc)) + { + LogRel(("FTSync: VMR3LoadFromStream -> %Rrc\n", rc)); + ftmR3TcpWriteNACK(pVM, rc); + continue; + } + + /* The EOS might not have been read, make sure it is. */ + pVM->ftm.s.syncstate.fStopReading = false; + size_t cbRead; + rc = ftmR3TcpOpRead(pVM, pVM->ftm.s.syncstate.uOffStream, szCmd, 1, &cbRead); + if (rc != VERR_EOF) + { + LogRel(("FTSync: Draining teleporterTcpOpRead -> %Rrc\n", rc)); + ftmR3TcpWriteNACK(pVM, rc); + continue; + } + + rc = ftmR3TcpWriteACK(pVM); + AssertRC(rc); + } + } + LogFlowFunc(("returns mRc=%Rrc\n", rc)); + return VERR_TCP_SERVER_STOP; +} + +/** + * Powers on the fault tolerant virtual machine. + * + * @returns VBox status code. + * + * @param pUVM The user mode VM handle. + * @param fMaster FT master or standby + * @param uInterval FT sync interval + * @param pszAddress Standby VM address + * @param uPort Standby VM port + * @param pszPassword FT password (NULL for none) + * + * @thread Any thread. + * @vmstate Created + * @vmstateto PoweringOn+Running (master), PoweringOn+Running_FT (standby) + */ +VMMR3DECL(int) FTMR3PowerOn(PUVM pUVM, bool fMaster, unsigned uInterval, + const char *pszAddress, unsigned uPort, const char *pszPassword) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + VMSTATE enmVMState = VMR3GetState(pVM); + AssertMsgReturn(enmVMState == VMSTATE_CREATED, + ("%s\n", VMR3GetStateName(enmVMState)), + VERR_INTERNAL_ERROR_4); + AssertReturn(pszAddress, VERR_INVALID_PARAMETER); + + if (pVM->ftm.s.uInterval) + pVM->ftm.s.uInterval = uInterval; + else + pVM->ftm.s.uInterval = 50; /* standard sync interval of 50ms */ + + pVM->ftm.s.uPort = uPort; + pVM->ftm.s.pszAddress = RTStrDup(pszAddress); + if (pszPassword) + pVM->ftm.s.pszPassword = RTStrDup(pszPassword); + + int rc = RTSemEventCreate(&pVM->ftm.s.hShutdownEvent); + if (RT_FAILURE(rc)) + return rc; + + if (fMaster) + { + rc = RTThreadCreate(NULL, ftmR3MasterThread, pVM, + 0, RTTHREADTYPE_IO /* higher than normal priority */, 0, "ftmMaster"); + if (RT_FAILURE(rc)) + return rc; + + pVM->fFaultTolerantMaster = true; + if (PGMIsUsingLargePages(pVM)) + { + /* Must disable large page usage as 2 MB pages are too big to write monitor. */ + LogRel(("FTSync: disabling large page usage.\n")); + PGMSetLargePageUsage(pVM, false); + } + /** @todo might need to disable page fusion as well */ + + return VMR3PowerOn(pVM->pUVM); + } + + + /* standby */ + rc = RTThreadCreate(NULL, ftmR3StandbyThread, pVM, + 0, RTTHREADTYPE_DEFAULT, 0, "ftmStandby"); + if (RT_FAILURE(rc)) + return rc; + + rc = RTTcpServerCreateEx(pszAddress, uPort, &pVM->ftm.s.standby.hServer); + if (RT_FAILURE(rc)) + return rc; + pVM->ftm.s.fIsStandbyNode = true; + + rc = RTTcpServerListen(pVM->ftm.s.standby.hServer, ftmR3StandbyServeConnection, pVM); + /** @todo deal with the exit code to check if we should activate this standby VM. */ + if (pVM->ftm.s.fActivateStandby) + { + /** @todo fallover. */ + } + + if (pVM->ftm.s.standby.hServer) + { + RTTcpServerDestroy(pVM->ftm.s.standby.hServer); + pVM->ftm.s.standby.hServer = NULL; + } + if (rc == VERR_TCP_SERVER_SHUTDOWN) + rc = VINF_SUCCESS; /* ignore this error; the standby process was cancelled. */ + return rc; +} + +/** + * Powers off the fault tolerant virtual machine (standby). + * + * @returns VBox status code. + * + * @param pUVM The user mode VM handle. + */ +VMMR3DECL(int) FTMR3CancelStandby(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertReturn(!pVM->fFaultTolerantMaster, VERR_NOT_SUPPORTED); + Assert(pVM->ftm.s.standby.hServer); + + return RTTcpServerShutdown(pVM->ftm.s.standby.hServer); +} + +/** + * Rendezvous callback used by FTMR3SetCheckpoint + * Sync state + changed memory with the standby node. + * + * This is only called on one of the EMTs while the other ones are waiting for + * it to complete this function. + * + * @returns VINF_SUCCESS (VBox strict status code). + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. Unused. + * @param pvUser Not used. + */ +static DECLCALLBACK(VBOXSTRICTRC) ftmR3SetCheckpointRendezvous(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + int rc = VINF_SUCCESS; + bool fSuspended = false; + NOREF(pVCpu); + NOREF(pvUser); + + /* We don't call VMR3Suspend here to avoid the overhead of state changes and notifications. This + * is only a short suspend. + */ + STAM_PROFILE_START(&pVM->ftm.s.StatCheckpointPause, a); + PDMR3Suspend(pVM); + + /* Hack alert: as EM is responsible for dealing with the suspend state. We must do this here ourselves, but only for this EMT.*/ + EMR3NotifySuspend(pVM); + STAM_PROFILE_STOP(&pVM->ftm.s.StatCheckpointPause, a); + + STAM_REL_COUNTER_INC(&pVM->ftm.s.StatDeltaVM); + + RTSocketRetain(pVM->ftm.s.hSocket); /* For concurrent access by I/O thread and EMT. */ + + /* Reset the sync state. */ + pVM->ftm.s.syncstate.uOffStream = 0; + pVM->ftm.s.syncstate.cbReadBlock = 0; + pVM->ftm.s.syncstate.fStopReading = false; + pVM->ftm.s.syncstate.fIOError = false; + pVM->ftm.s.syncstate.fEndOfStream = false; + + rc = ftmR3TcpSubmitCommand(pVM, "checkpoint"); + AssertRC(rc); + + pVM->ftm.s.fDeltaLoadSaveActive = true; + rc = VMR3SaveFT(pVM->pUVM, &g_ftmR3TcpOps, pVM, &fSuspended, true /* fSkipStateChanges */); + pVM->ftm.s.fDeltaLoadSaveActive = false; + AssertRC(rc); + + rc = ftmR3TcpReadACK(pVM, "checkpoint-complete"); + AssertRC(rc); + + RTSocketRelease(pVM->ftm.s.hSocket); + + /* Write protect all memory. */ + rc = PGMR3PhysWriteProtectRAM(pVM); + AssertRC(rc); + + /* We don't call VMR3Resume here to avoid the overhead of state changes and notifications. This + * is only a short suspend. + */ + STAM_PROFILE_START(&pVM->ftm.s.StatCheckpointResume, b); + PGMR3ResetNoMorePhysWritesFlag(pVM); + PDMR3Resume(pVM); + + /* Hack alert as EM is responsible for dealing with the suspend state. We must do this here ourselves, but only for this EMT.*/ + EMR3NotifyResume(pVM); + STAM_PROFILE_STOP(&pVM->ftm.s.StatCheckpointResume, b); + + return rc; +} + +/** + * Performs a full sync to the standby node + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param enmCheckpoint Checkpoint type + */ +VMMR3_INT_DECL(int) FTMR3SetCheckpoint(PVM pVM, FTMCHECKPOINTTYPE enmCheckpoint) +{ + int rc; + + if (!pVM->fFaultTolerantMaster) + return VINF_SUCCESS; + + switch (enmCheckpoint) + { + case FTMCHECKPOINTTYPE_NETWORK: + STAM_REL_COUNTER_INC(&pVM->ftm.s.StatCheckpointNetwork); + break; + + case FTMCHECKPOINTTYPE_STORAGE: + STAM_REL_COUNTER_INC(&pVM->ftm.s.StatCheckpointStorage); + break; + + default: + AssertMsgFailedReturn(("%d\n", enmCheckpoint), VERR_INVALID_PARAMETER); + } + + pVM->ftm.s.fCheckpointingActive = true; + if (VM_IS_EMT(pVM)) + { + PVMCPU pVCpu = VMMGetCpu(pVM); + + /* We must take special care here as the memory sync is competing with us and requires a responsive EMT. */ + while ((rc = PDMCritSectTryEnter(&pVM->ftm.s.CritSect)) == VERR_SEM_BUSY) + { + if (VM_FF_IS_SET(pVM, VM_FF_EMT_RENDEZVOUS)) + { + rc = VMMR3EmtRendezvousFF(pVM, pVCpu); + AssertRC(rc); + } + + if (VM_FF_IS_SET(pVM, VM_FF_REQUEST)) + { + rc = VMR3ReqProcessU(pVM->pUVM, VMCPUID_ANY, true /*fPriorityOnly*/); + AssertRC(rc); + } + } + } + else + rc = PDMCritSectEnter(&pVM->ftm.s.CritSect, VERR_SEM_BUSY); + + AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", rc)); + + STAM_PROFILE_START(&pVM->ftm.s.StatCheckpoint, a); + + rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, ftmR3SetCheckpointRendezvous, NULL); + + STAM_PROFILE_STOP(&pVM->ftm.s.StatCheckpoint, a); + + PDMCritSectLeave(&pVM->ftm.s.CritSect); + pVM->ftm.s.fCheckpointingActive = false; + + return rc; +} diff --git a/src/VBox/VMM/VMMR3/GIM.cpp b/src/VBox/VMM/VMMR3/GIM.cpp new file mode 100644 index 00000000..8b13c26d --- /dev/null +++ b/src/VBox/VMM/VMMR3/GIM.cpp @@ -0,0 +1,724 @@ +/* $Id: GIM.cpp $ */ +/** @file + * GIM - Guest Interface Manager. + */ + +/* + * Copyright (C) 2014-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +/** @page pg_gim GIM - The Guest Interface Manager + * + * The Guest Interface Manager abstracts an interface provider through which + * guests may interact with the hypervisor. + * + * @see grp_gim + * + * + * @section sec_gim_provider Providers + * + * A GIM provider implements a particular hypervisor interface such as Microsoft + * Hyper-V, Linux KVM and so on. It hooks into various components in the VMM to + * ease the guest in running under a recognized, virtualized environment. + * + * The GIM provider configured for the VM needs to be recognized by the guest OS + * in order to make use of features supported by the interface. Since it + * requires co-operation from the guest OS, a GIM provider may also be referred to + * as a paravirtualization interface. + * + * One of the goals of having a paravirtualized interface is for enabling guests + * to be more accurate and efficient when operating in a virtualized + * environment. For instance, a guest OS which interfaces to VirtualBox through + * a GIM provider may rely on the provider for supplying the correct TSC + * frequency of the host processor. The guest can then avoid caliberating the + * TSC itself, resulting in higher accuracy and better performance. + * + * At most, only one GIM provider can be active for a running VM and cannot be + * changed during the lifetime of the VM. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_GIM +#include +#include +#include +#include +#include "GIMInternal.h" +#include + +#include + +#include +#include +#include + +/* Include all GIM providers. */ +#include "GIMMinimalInternal.h" +#include "GIMHvInternal.h" +#include "GIMKvmInternal.h" + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static FNSSMINTSAVEEXEC gimR3Save; +static FNSSMINTLOADEXEC gimR3Load; +static FNSSMINTLOADDONE gimR3LoadDone; + + +/** + * Initializes the GIM. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) GIMR3Init(PVM pVM) +{ + LogFlow(("GIMR3Init\n")); + + /* + * Assert alignment and sizes. + */ + AssertCompile(sizeof(pVM->gim.s) <= sizeof(pVM->gim.padding)); + AssertCompile(sizeof(pVM->aCpus[0].gim.s) <= sizeof(pVM->aCpus[0].gim.padding)); + + /* + * Initialize members. + */ + pVM->gim.s.hSemiReadOnlyMmio2Handler = NIL_PGMPHYSHANDLERTYPE; + + /* + * Register the saved state data unit. + */ + int rc = SSMR3RegisterInternal(pVM, "GIM", 0 /* uInstance */, GIM_SAVED_STATE_VERSION, sizeof(GIM), + NULL /* pfnLivePrep */, NULL /* pfnLiveExec */, NULL /* pfnLiveVote*/, + NULL /* pfnSavePrep */, gimR3Save, NULL /* pfnSaveDone */, + NULL /* pfnLoadPrep */, gimR3Load, gimR3LoadDone); + if (RT_FAILURE(rc)) + return rc; + + /* + * Read configuration. + */ + PCFGMNODE pCfgNode = CFGMR3GetChild(CFGMR3GetRoot(pVM), "GIM/"); + + /* + * Validate the GIM settings. + */ + rc = CFGMR3ValidateConfig(pCfgNode, "/GIM/", /* pszNode */ + "Provider" /* pszValidValues */ + "|Version", + "HyperV", /* pszValidNodes */ + "GIM", /* pszWho */ + 0); /* uInstance */ + if (RT_FAILURE(rc)) + return rc; + + /** @cfgm{/GIM/Provider, string} + * The name of the GIM provider. The default is "none". */ + char szProvider[64]; + rc = CFGMR3QueryStringDef(pCfgNode, "Provider", szProvider, sizeof(szProvider), "None"); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/GIM/Version, uint32_t} + * The interface version. The default is 0, which means "provide the most + * up-to-date implementation". */ + uint32_t uVersion; + rc = CFGMR3QueryU32Def(pCfgNode, "Version", &uVersion, 0 /* default */); + AssertLogRelRCReturn(rc, rc); + + /* + * Setup the GIM provider for this VM. + */ + LogRel(("GIM: Using provider '%s' (Implementation version: %u)\n", szProvider, uVersion)); + if (!RTStrCmp(szProvider, "None")) + pVM->gim.s.enmProviderId = GIMPROVIDERID_NONE; + else + { + pVM->gim.s.u32Version = uVersion; + /** @todo r=bird: Because u32Version is saved, it should be translated to the + * 'most up-to-date implementation' version number when 0. Otherwise, + * we'll have abiguities when loading the state of older VMs. */ + if (!RTStrCmp(szProvider, "Minimal")) + { + pVM->gim.s.enmProviderId = GIMPROVIDERID_MINIMAL; + rc = gimR3MinimalInit(pVM); + } + else if (!RTStrCmp(szProvider, "HyperV")) + { + pVM->gim.s.enmProviderId = GIMPROVIDERID_HYPERV; + rc = gimR3HvInit(pVM, pCfgNode); + } + else if (!RTStrCmp(szProvider, "KVM")) + { + pVM->gim.s.enmProviderId = GIMPROVIDERID_KVM; + rc = gimR3KvmInit(pVM); + } + else + rc = VMR3SetError(pVM->pUVM, VERR_GIM_INVALID_PROVIDER, RT_SRC_POS, "Provider '%s' unknown.", szProvider); + } + + /* + * Statistics. + */ + STAM_REL_REG_USED(pVM, &pVM->gim.s.StatDbgXmit, STAMTYPE_COUNTER, "/GIM/Debug/Transmit", STAMUNIT_OCCURENCES, "Debug packets sent."); + STAM_REL_REG_USED(pVM, &pVM->gim.s.StatDbgXmitBytes, STAMTYPE_COUNTER, "/GIM/Debug/TransmitBytes", STAMUNIT_OCCURENCES, "Debug bytes sent."); + STAM_REL_REG_USED(pVM, &pVM->gim.s.StatDbgRecv, STAMTYPE_COUNTER, "/GIM/Debug/Receive", STAMUNIT_OCCURENCES, "Debug packets received."); + STAM_REL_REG_USED(pVM, &pVM->gim.s.StatDbgRecvBytes, STAMTYPE_COUNTER, "/GIM/Debug/ReceiveBytes", STAMUNIT_OCCURENCES, "Debug bytes received."); + + STAM_REL_REG_USED(pVM, &pVM->gim.s.StatHypercalls, STAMTYPE_COUNTER, "/GIM/Hypercalls", STAMUNIT_OCCURENCES, "Number of hypercalls initiated."); + return rc; +} + + +/** + * Initializes the remaining bits of the GIM provider. + * + * This is called after initializing HM and most other VMM components. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @thread EMT(0) + */ +VMMR3_INT_DECL(int) GIMR3InitCompleted(PVM pVM) +{ + switch (pVM->gim.s.enmProviderId) + { + case GIMPROVIDERID_MINIMAL: + return gimR3MinimalInitCompleted(pVM); + + case GIMPROVIDERID_HYPERV: + return gimR3HvInitCompleted(pVM); + + case GIMPROVIDERID_KVM: + return gimR3KvmInitCompleted(pVM); + + default: + break; + } + + if (!TMR3CpuTickIsFixedRateMonotonic(pVM, true /* fWithParavirtEnabled */)) + LogRel(("GIM: Warning!!! Host TSC is unstable. The guest may behave unpredictably with a paravirtualized clock.\n")); + + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{FNSSMINTSAVEEXEC} + */ +static DECLCALLBACK(int) gimR3Save(PVM pVM, PSSMHANDLE pSSM) +{ + AssertReturn(pVM, VERR_INVALID_PARAMETER); + AssertReturn(pSSM, VERR_SSM_INVALID_STATE); + + int rc = VINF_SUCCESS; +#if 0 + /* Save per-CPU data. */ + SSMR3PutU32(pSSM, pVM->cCpus); + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + rc = SSMR3PutXYZ(pSSM, pVM->aCpus[i].gim.s.XYZ); + } +#endif + + /* + * Save per-VM data. + */ + SSMR3PutU32(pSSM, pVM->gim.s.enmProviderId); + SSMR3PutU32(pSSM, pVM->gim.s.u32Version); + + /* + * Save provider-specific data. + */ + switch (pVM->gim.s.enmProviderId) + { + case GIMPROVIDERID_HYPERV: + rc = gimR3HvSave(pVM, pSSM); + AssertRCReturn(rc, rc); + break; + + case GIMPROVIDERID_KVM: + rc = gimR3KvmSave(pVM, pSSM); + AssertRCReturn(rc, rc); + break; + + default: + break; + } + + return rc; +} + + +/** + * @callback_method_impl{FNSSMINTLOADEXEC} + */ +static DECLCALLBACK(int) gimR3Load(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + if (uPass != SSM_PASS_FINAL) + return VINF_SUCCESS; + if (uVersion != GIM_SAVED_STATE_VERSION) + return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION; + + int rc; +#if 0 + /* Load per-CPU data. */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + rc = SSMR3PutXYZ(pSSM, pVM->aCpus[i].gim.s.XYZ); + } +#endif + + /* + * Load per-VM data. + */ + uint32_t uProviderId; + uint32_t uProviderVersion; + + rc = SSMR3GetU32(pSSM, &uProviderId); AssertRCReturn(rc, rc); + rc = SSMR3GetU32(pSSM, &uProviderVersion); AssertRCReturn(rc, rc); + + if ((GIMPROVIDERID)uProviderId != pVM->gim.s.enmProviderId) + return SSMR3SetCfgError(pSSM, RT_SRC_POS, N_("Saved GIM provider %u differs from the configured one (%u)."), + uProviderId, pVM->gim.s.enmProviderId); +#if 0 /** @todo r=bird: Figure out what you mean to do here with the version. */ + if (uProviderVersion != pVM->gim.s.u32Version) + return SSMR3SetCfgError(pSSM, RT_SRC_POS, N_("Saved GIM provider version %u differs from the configured one (%u)."), + uProviderVersion, pVM->gim.s.u32Version); +#else + pVM->gim.s.u32Version = uProviderVersion; +#endif + + /* + * Load provider-specific data. + */ + switch (pVM->gim.s.enmProviderId) + { + case GIMPROVIDERID_HYPERV: + rc = gimR3HvLoad(pVM, pSSM); + AssertRCReturn(rc, rc); + break; + + case GIMPROVIDERID_KVM: + rc = gimR3KvmLoad(pVM, pSSM); + AssertRCReturn(rc, rc); + break; + + default: + break; + } + + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{FNSSMINTLOADDONE} + */ +static DECLCALLBACK(int) gimR3LoadDone(PVM pVM, PSSMHANDLE pSSM) +{ + switch (pVM->gim.s.enmProviderId) + { + case GIMPROVIDERID_HYPERV: + return gimR3HvLoadDone(pVM, pSSM); + + default: + return VINF_SUCCESS; + } +} + + +/** + * Terminates the GIM. + * + * Termination means cleaning up and freeing all resources, + * the VM itself is, at this point, powered off or suspended. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) GIMR3Term(PVM pVM) +{ + switch (pVM->gim.s.enmProviderId) + { + case GIMPROVIDERID_HYPERV: + return gimR3HvTerm(pVM); + + case GIMPROVIDERID_KVM: + return gimR3KvmTerm(pVM); + + default: + break; + } + return VINF_SUCCESS; +} + + +/** + * Applies relocations to data and code managed by this + * component. This function will be called at init and + * whenever the VMM need to relocate it self inside the GC. + * + * @param pVM The cross context VM structure. + * @param offDelta Relocation delta relative to old location. + */ +VMMR3_INT_DECL(void) GIMR3Relocate(PVM pVM, RTGCINTPTR offDelta) +{ + switch (pVM->gim.s.enmProviderId) + { + case GIMPROVIDERID_HYPERV: + gimR3HvRelocate(pVM, offDelta); + break; + + default: + break; + } +} + + +/** + * The VM is being reset. + * + * For the GIM component this means unmapping and unregistering MMIO2 regions + * and other provider-specific resets. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(void) GIMR3Reset(PVM pVM) +{ + switch (pVM->gim.s.enmProviderId) + { + case GIMPROVIDERID_HYPERV: + return gimR3HvReset(pVM); + + case GIMPROVIDERID_KVM: + return gimR3KvmReset(pVM); + + default: + break; + } +} + + +/** + * Registers the GIM device with VMM. + * + * @param pVM The cross context VM structure. + * @param pDevIns Pointer to the GIM device instance. + * @param pDbg Pointer to the GIM device debug structure, can be + * NULL. + */ +VMMR3DECL(void) GIMR3GimDeviceRegister(PVM pVM, PPDMDEVINS pDevIns, PGIMDEBUG pDbg) +{ + pVM->gim.s.pDevInsR3 = pDevIns; + pVM->gim.s.pDbgR3 = pDbg; +} + + +/** + * Gets debug setup specified by the provider. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDbgSetup Where to store the debug setup details. + */ +VMMR3DECL(int) GIMR3GetDebugSetup(PVM pVM, PGIMDEBUGSETUP pDbgSetup) +{ + AssertReturn(pVM, VERR_INVALID_PARAMETER); + AssertReturn(pDbgSetup, VERR_INVALID_PARAMETER); + + switch (pVM->gim.s.enmProviderId) + { + case GIMPROVIDERID_HYPERV: + return gimR3HvGetDebugSetup(pVM, pDbgSetup); + default: + break; + } + return VERR_GIM_NO_DEBUG_CONNECTION; +} + + +/** + * Read data from a host debug session. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pvRead The read buffer. + * @param pcbRead The size of the read buffer as well as where to store + * the number of bytes read. + * @param pfnReadComplete Callback when the buffer has been read and + * before signaling reading of the next buffer. + * Optional, can be NULL. + * @thread EMT. + */ +VMMR3_INT_DECL(int) gimR3DebugRead(PVM pVM, void *pvRead, size_t *pcbRead, PFNGIMDEBUGBUFREADCOMPLETED pfnReadComplete) +{ + PGIMDEBUG pDbg = pVM->gim.s.pDbgR3; + if (pDbg) + { + if (ASMAtomicReadBool(&pDbg->fDbgRecvBufRead) == true) + { + STAM_REL_COUNTER_INC(&pVM->gim.s.StatDbgRecv); + STAM_REL_COUNTER_ADD(&pVM->gim.s.StatDbgRecvBytes, pDbg->cbDbgRecvBufRead); + + memcpy(pvRead, pDbg->pvDbgRecvBuf, pDbg->cbDbgRecvBufRead); + *pcbRead = pDbg->cbDbgRecvBufRead; + if (pfnReadComplete) + pfnReadComplete(pVM); + RTSemEventMultiSignal(pDbg->hDbgRecvThreadSem); + ASMAtomicWriteBool(&pDbg->fDbgRecvBufRead, false); + return VINF_SUCCESS; + } + else + *pcbRead = 0; + return VERR_NO_DATA; + } + return VERR_GIM_NO_DEBUG_CONNECTION; +} + + +/** + * Write data to a host debug session. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pvWrite The write buffer. + * @param pcbWrite The size of the write buffer as well as where to store + * the number of bytes written. + * @thread EMT. + */ +VMMR3_INT_DECL(int) gimR3DebugWrite(PVM pVM, void *pvWrite, size_t *pcbWrite) +{ + PGIMDEBUG pDbg = pVM->gim.s.pDbgR3; + if (pDbg) + { + PPDMISTREAM pDbgStream = pDbg->pDbgDrvStream; + if (pDbgStream) + { + size_t cbWrite = *pcbWrite; + int rc = pDbgStream->pfnWrite(pDbgStream, pvWrite, pcbWrite); + if ( RT_SUCCESS(rc) + && *pcbWrite == cbWrite) + { + STAM_REL_COUNTER_INC(&pVM->gim.s.StatDbgXmit); + STAM_REL_COUNTER_ADD(&pVM->gim.s.StatDbgXmitBytes, *pcbWrite); + } + return rc; + } + } + return VERR_GIM_NO_DEBUG_CONNECTION; +} + + +/** + * Returns the array of MMIO2 regions that are expected to be registered and + * later mapped into the guest-physical address space for the GIM provider + * configured for the VM. + * + * @returns Pointer to an array of GIM MMIO2 regions, may return NULL. + * @param pVM The cross context VM structure. + * @param pcRegions Where to store the number of items in the array. + * + * @remarks The caller does not own and therefore must -NOT- try to free the + * returned pointer. + */ +VMMR3DECL(PGIMMMIO2REGION) GIMR3GetMmio2Regions(PVM pVM, uint32_t *pcRegions) +{ + Assert(pVM); + Assert(pcRegions); + + *pcRegions = 0; + switch (pVM->gim.s.enmProviderId) + { + case GIMPROVIDERID_HYPERV: + return gimR3HvGetMmio2Regions(pVM, pcRegions); + + default: + break; + } + + return NULL; +} + +#if 0 /* ??? */ + +/** + * @callback_method_impl{FNPGMPHYSHANDLER, + * Write access handler for mapped MMIO2 pages. Currently ignores writes.} + * + * @todo In the future we might want to let the GIM provider decide what the + * handler should do (like throwing \#GP faults). + */ +static DECLCALLBACK(VBOXSTRICTRC) gimR3Mmio2WriteHandler(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, + size_t cbBuf, PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, + void *pvUser) +{ + RT_NOREF6(pVM, pVCpu, GCPhys, pvPhys, pvBuf, cbBuf); + RT_NOREF3(enmAccessType, enmOrigin, pvUser); + + /* + * Ignore writes to the mapped MMIO2 page. + */ + Assert(enmAccessType == PGMACCESSTYPE_WRITE); + return VINF_SUCCESS; /** @todo Hyper-V says we should \#GP(0) fault for writes to the Hypercall and TSC page. */ +} + + +/** + * Unmaps a registered MMIO2 region in the guest address space and removes any + * access handlers for it. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pRegion Pointer to the GIM MMIO2 region. + */ +VMMR3_INT_DECL(int) gimR3Mmio2Unmap(PVM pVM, PGIMMMIO2REGION pRegion) +{ + AssertPtr(pVM); + AssertPtr(pRegion); + + PPDMDEVINS pDevIns = pVM->gim.s.pDevInsR3; + AssertPtr(pDevIns); + if (pRegion->fMapped) + { + int rc = PGMHandlerPhysicalDeregister(pVM, pRegion->GCPhysPage); + AssertRC(rc); + + rc = PDMDevHlpMMIO2Unmap(pDevIns, pRegion->iRegion, pRegion->GCPhysPage); + if (RT_SUCCESS(rc)) + { + pRegion->fMapped = false; + pRegion->GCPhysPage = NIL_RTGCPHYS; + } + } + return VINF_SUCCESS; +} + + +/** + * Maps a registered MMIO2 region in the guest address space. + * + * The region will be made read-only and writes from the guest will be ignored. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pRegion Pointer to the GIM MMIO2 region. + * @param GCPhysRegion Where in the guest address space to map the region. + */ +VMMR3_INT_DECL(int) GIMR3Mmio2Map(PVM pVM, PGIMMMIO2REGION pRegion, RTGCPHYS GCPhysRegion) +{ + PPDMDEVINS pDevIns = pVM->gim.s.pDevInsR3; + AssertPtr(pDevIns); + + /* The guest-physical address must be page-aligned. */ + if (GCPhysRegion & PAGE_OFFSET_MASK) + { + LogFunc(("%s: %#RGp not paging aligned\n", pRegion->szDescription, GCPhysRegion)); + return VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS; + } + + /* Allow only normal pages to be overlaid using our MMIO2 pages (disallow MMIO, ROM, reserved pages). */ + /** @todo Hyper-V doesn't seem to be very strict about this, may be relax + * later if some guest really requires it. */ + if (!PGMPhysIsGCPhysNormal(pVM, GCPhysRegion)) + { + LogFunc(("%s: %#RGp is not normal memory\n", pRegion->szDescription, GCPhysRegion)); + return VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS; + } + + if (!pRegion->fRegistered) + { + LogFunc(("%s: Region has not been registered.\n", pRegion->szDescription)); + return VERR_GIM_IPE_1; + } + + /* + * Map the MMIO2 region over the specified guest-physical address. + */ + int rc = PDMDevHlpMMIOExMap(pDevIns, NULL, pRegion->iRegion, GCPhysRegion); + if (RT_SUCCESS(rc)) + { + /* + * Install access-handlers for the mapped page to prevent (ignore) writes to it + * from the guest. + */ + if (pVM->gim.s.hSemiReadOnlyMmio2Handler == NIL_PGMPHYSHANDLERTYPE) + rc = PGMR3HandlerPhysicalTypeRegister(pVM, PGMPHYSHANDLERKIND_WRITE, + gimR3Mmio2WriteHandler, + NULL /* pszModR0 */, NULL /* pszHandlerR0 */, NULL /* pszPfHandlerR0 */, + NULL /* pszModRC */, NULL /* pszHandlerRC */, NULL /* pszPfHandlerRC */, + "GIM read-only MMIO2 handler", + &pVM->gim.s.hSemiReadOnlyMmio2Handler); + if (RT_SUCCESS(rc)) + { + rc = PGMHandlerPhysicalRegister(pVM, GCPhysRegion, GCPhysRegion + (pRegion->cbRegion - 1), + pVM->gim.s.hSemiReadOnlyMmio2Handler, + NULL /* pvUserR3 */, NIL_RTR0PTR /* pvUserR0 */, NIL_RTRCPTR /* pvUserRC */, + pRegion->szDescription); + if (RT_SUCCESS(rc)) + { + pRegion->fMapped = true; + pRegion->GCPhysPage = GCPhysRegion; + return rc; + } + } + + PDMDevHlpMMIO2Unmap(pDevIns, pRegion->iRegion, GCPhysRegion); + } + + return rc; +} + + +/** + * Registers the physical handler for the registered and mapped MMIO2 region. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pRegion Pointer to the GIM MMIO2 region. + */ +VMMR3_INT_DECL(int) gimR3Mmio2HandlerPhysicalRegister(PVM pVM, PGIMMMIO2REGION pRegion) +{ + AssertPtr(pRegion); + AssertReturn(pRegion->fRegistered, VERR_GIM_IPE_2); + AssertReturn(pRegion->fMapped, VERR_GIM_IPE_3); + + return PGMR3HandlerPhysicalRegister(pVM, + PGMPHYSHANDLERKIND_WRITE, + pRegion->GCPhysPage, pRegion->GCPhysPage + (pRegion->cbRegion - 1), + gimR3Mmio2WriteHandler, NULL /* pvUserR3 */, + NULL /* pszModR0 */, NULL /* pszHandlerR0 */, NIL_RTR0PTR /* pvUserR0 */, + NULL /* pszModRC */, NULL /* pszHandlerRC */, NIL_RTRCPTR /* pvUserRC */, + pRegion->szDescription); +} + + +/** + * Deregisters the physical handler for the MMIO2 region. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pRegion Pointer to the GIM MMIO2 region. + */ +VMMR3_INT_DECL(int) gimR3Mmio2HandlerPhysicalDeregister(PVM pVM, PGIMMMIO2REGION pRegion) +{ + return PGMHandlerPhysicalDeregister(pVM, pRegion->GCPhysPage); +} + +#endif + diff --git a/src/VBox/VMM/VMMR3/GIMHv.cpp b/src/VBox/VMM/VMMR3/GIMHv.cpp new file mode 100644 index 00000000..955deaad --- /dev/null +++ b/src/VBox/VMM/VMMR3/GIMHv.cpp @@ -0,0 +1,2318 @@ +/* $Id: GIMHv.cpp $ */ +/** @file + * GIM - Guest Interface Manager, Hyper-V implementation. + */ + +/* + * Copyright (C) 2014-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_GIM +#include +#include +#include +#include +#include +#include +#include +#include +#include "GIMInternal.h" +#include + +#include +#include + +#include +#include +#include +#include +#include +#ifdef DEBUG_ramshankar +# include +#endif + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** + * GIM Hyper-V saved-state version. + */ +#define GIM_HV_SAVED_STATE_VERSION UINT32_C(4) +/** Saved states, priot to saving debug UDP source/destination ports. */ +#define GIM_HV_SAVED_STATE_VERSION_PRE_DEBUG_UDP_PORTS UINT32_C(3) +/** Saved states, prior to any synthetic interrupt controller support. */ +#define GIM_HV_SAVED_STATE_VERSION_PRE_SYNIC UINT32_C(2) +/** Vanilla saved states, prior to any debug support. */ +#define GIM_HV_SAVED_STATE_VERSION_PRE_DEBUG UINT32_C(1) + +#ifdef VBOX_WITH_STATISTICS +# define GIMHV_MSRRANGE(a_uFirst, a_uLast, a_szName) \ + { (a_uFirst), (a_uLast), kCpumMsrRdFn_Gim, kCpumMsrWrFn_Gim, 0, 0, 0, 0, 0, a_szName, { 0 }, { 0 }, { 0 }, { 0 } } +#else +# define GIMHV_MSRRANGE(a_uFirst, a_uLast, a_szName) \ + { (a_uFirst), (a_uLast), kCpumMsrRdFn_Gim, kCpumMsrWrFn_Gim, 0, 0, 0, 0, 0, a_szName } +#endif + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +/** + * Array of MSR ranges supported by Hyper-V. + */ +static CPUMMSRRANGE const g_aMsrRanges_HyperV[] = +{ + GIMHV_MSRRANGE(MSR_GIM_HV_RANGE0_FIRST, MSR_GIM_HV_RANGE0_LAST, "Hyper-V range 0"), + GIMHV_MSRRANGE(MSR_GIM_HV_RANGE1_FIRST, MSR_GIM_HV_RANGE1_LAST, "Hyper-V range 1"), + GIMHV_MSRRANGE(MSR_GIM_HV_RANGE2_FIRST, MSR_GIM_HV_RANGE2_LAST, "Hyper-V range 2"), + GIMHV_MSRRANGE(MSR_GIM_HV_RANGE3_FIRST, MSR_GIM_HV_RANGE3_LAST, "Hyper-V range 3"), + GIMHV_MSRRANGE(MSR_GIM_HV_RANGE4_FIRST, MSR_GIM_HV_RANGE4_LAST, "Hyper-V range 4"), + GIMHV_MSRRANGE(MSR_GIM_HV_RANGE5_FIRST, MSR_GIM_HV_RANGE5_LAST, "Hyper-V range 5"), + GIMHV_MSRRANGE(MSR_GIM_HV_RANGE6_FIRST, MSR_GIM_HV_RANGE6_LAST, "Hyper-V range 6"), + GIMHV_MSRRANGE(MSR_GIM_HV_RANGE7_FIRST, MSR_GIM_HV_RANGE7_LAST, "Hyper-V range 7"), + GIMHV_MSRRANGE(MSR_GIM_HV_RANGE8_FIRST, MSR_GIM_HV_RANGE8_LAST, "Hyper-V range 8"), + GIMHV_MSRRANGE(MSR_GIM_HV_RANGE9_FIRST, MSR_GIM_HV_RANGE9_LAST, "Hyper-V range 9"), + GIMHV_MSRRANGE(MSR_GIM_HV_RANGE10_FIRST, MSR_GIM_HV_RANGE10_LAST, "Hyper-V range 10"), + GIMHV_MSRRANGE(MSR_GIM_HV_RANGE11_FIRST, MSR_GIM_HV_RANGE11_LAST, "Hyper-V range 11"), + GIMHV_MSRRANGE(MSR_GIM_HV_RANGE12_FIRST, MSR_GIM_HV_RANGE12_LAST, "Hyper-V range 12") +}; +#undef GIMHV_MSRRANGE + +/** + * DHCP OFFER packet response to the guest (client) over the Hyper-V debug + * transport. + * + * - MAC: Destination: broadcast. + * - MAC: Source: 00:00:00:00:01 (hypervisor). It's important that it's + * different from the client's MAC address which is all 0's. + * - IP: Source: 10.0.5.1 (hypervisor) + * - IP: Destination: broadcast. + * - IP: Checksum included. + * - BOOTP: Client IP address: 10.0.5.5. + * - BOOTP: Server IP address: 10.0.5.1. + * - DHCP options: Subnet mask, router, lease-time, DHCP server identifier. + * Options are kept to a minimum required for making Windows guests happy. + */ +#define GIMHV_DEBUGCLIENT_IPV4 RT_H2N_U32_C(0x0a000505) /* 10.0.5.5 */ +#define GIMHV_DEBUGSERVER_IPV4 RT_H2N_U32_C(0x0a000501) /* 10.0.5.1 */ +static const uint8_t g_abDhcpOffer[] = +{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x08, 0x00, 0x45, 0x10, + 0x01, 0x28, 0x00, 0x00, 0x00, 0x00, 0x40, 0x11, 0x6a, 0xb5, 0x0a, 0x00, 0x05, 0x01, 0xff, 0xff, + 0xff, 0xff, 0x00, 0x43, 0x00, 0x44, 0x01, 0x14, 0x00, 0x00, 0x02, 0x01, 0x06, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x05, 0x05, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x63, 0x82, 0x53, 0x63, 0x35, 0x01, 0x02, 0x01, 0x04, 0xff, + 0xff, 0xff, 0x00, 0x03, 0x04, 0x0a, 0x00, 0x05, 0x01, 0x33, 0x04, 0xff, 0xff, 0xff, 0xff, 0x36, + 0x04, 0x0a, 0x00, 0x05, 0x01, 0xff +}; + +/** + * DHCP ACK packet response to the guest (client) over the Hyper-V debug + * transport. + * + * - MAC: Destination: 00:00:00:00:00 (client). + * - IP: Destination: 10.0.5.5 (client). + * - Rest are mostly similar to the DHCP offer. + */ +static const uint8_t g_abDhcpAck[] = +{ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x08, 0x00, 0x45, 0x10, + 0x01, 0x28, 0x00, 0x00, 0x00, 0x00, 0x40, 0x11, 0x5b, 0xb0, 0x0a, 0x00, 0x05, 0x01, 0x0a, 0x00, + 0x05, 0x05, 0x00, 0x43, 0x00, 0x44, 0x01, 0x14, 0x00, 0x00, 0x02, 0x01, 0x06, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x05, 0x05, 0x0a, 0x00, 0x05, 0x05, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x63, 0x82, 0x53, 0x63, 0x35, 0x01, 0x05, 0x01, 0x04, 0xff, + 0xff, 0xff, 0x00, 0x03, 0x04, 0x0a, 0x00, 0x05, 0x01, 0x33, 0x04, 0xff, 0xff, 0xff, 0xff, 0x36, + 0x04, 0x0a, 0x00, 0x05, 0x01, 0xff +}; + +/** + * ARP reply to the guest (client) over the Hyper-V debug transport. + * + * - MAC: Destination: 00:00:00:00:00 (client) + * - MAC: Source: 00:00:00:00:01 (hypervisor) + * - ARP: Reply: 10.0.5.1 is at Source MAC address. + */ +static const uint8_t g_abArpReply[] = +{ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x08, 0x06, 0x00, 0x01, + 0x08, 0x00, 0x06, 0x04, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x0a, 0x00, 0x05, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x05, 0x05 +}; + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static int gimR3HvInitHypercallSupport(PVM pVM); +static void gimR3HvTermHypercallSupport(PVM pVM); +static int gimR3HvInitDebugSupport(PVM pVM); +static void gimR3HvTermDebugSupport(PVM pVM); +static DECLCALLBACK(void) gimR3HvTimerCallback(PVM pVM, PTMTIMER pTimer, void *pvUser); + +/** + * Initializes the Hyper-V GIM provider. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pGimCfg The GIM CFGM node. + */ +VMMR3_INT_DECL(int) gimR3HvInit(PVM pVM, PCFGMNODE pGimCfg) +{ + AssertReturn(pVM, VERR_INVALID_PARAMETER); + AssertReturn(pVM->gim.s.enmProviderId == GIMPROVIDERID_HYPERV, VERR_INTERNAL_ERROR_5); + + PGIMHV pHv = &pVM->gim.s.u.Hv; + + /* + * Read configuration. + */ + PCFGMNODE pCfgHv = CFGMR3GetChild(pGimCfg, "HyperV"); + if (pCfgHv) + { + /* + * Validate the Hyper-V settings. + */ + int rc2 = CFGMR3ValidateConfig(pCfgHv, "/HyperV/", + "VendorID" + "|VSInterface" + "|HypercallDebugInterface", + "" /* pszValidNodes */, "GIM/HyperV" /* pszWho */, 0 /* uInstance */); + if (RT_FAILURE(rc2)) + return rc2; + } + + /** @cfgm{/GIM/HyperV/VendorID, string, 'VBoxVBoxVBox'} + * The Hyper-V vendor signature, must be 12 characters. */ + char szVendor[13]; + int rc = CFGMR3QueryStringDef(pCfgHv, "VendorID", szVendor, sizeof(szVendor), "VBoxVBoxVBox"); + AssertLogRelRCReturn(rc, rc); + AssertLogRelMsgReturn(strlen(szVendor) == 12, + ("The VendorID config value must be exactly 12 chars, '%s' isn't!\n", szVendor), + VERR_INVALID_PARAMETER); + + LogRel(("GIM: HyperV: Reporting vendor as '%s'\n", szVendor)); + /** @todo r=bird: GIM_HV_VENDOR_MICROSOFT is 12 char and the string is max + * 12+terminator, so the NCmp is a little bit misleading. */ + if (!RTStrNCmp(szVendor, GIM_HV_VENDOR_MICROSOFT, sizeof(GIM_HV_VENDOR_MICROSOFT) - 1)) + { + LogRel(("GIM: HyperV: Warning! Posing as the Microsoft vendor may alter guest behaviour!\n")); + pHv->fIsVendorMsHv = true; + } + + /** @cfgm{/GIM/HyperV/VSInterface, bool, true} + * The Microsoft virtualization service interface (debugging). */ + rc = CFGMR3QueryBoolDef(pCfgHv, "VSInterface", &pHv->fIsInterfaceVs, false); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/GIM/HyperV/HypercallDebugInterface, bool, false} + * Whether we specify the guest to use hypercalls for debugging rather than MSRs. */ + rc = CFGMR3QueryBoolDef(pCfgHv, "HypercallDebugInterface", &pHv->fDbgHypercallInterface, false); + AssertLogRelRCReturn(rc, rc); + + /* + * Determine interface capabilities based on the version. + */ + if (!pVM->gim.s.u32Version) + { + /* Basic features. */ + pHv->uBaseFeat = 0 + //| GIM_HV_BASE_FEAT_VP_RUNTIME_MSR + | GIM_HV_BASE_FEAT_PART_TIME_REF_COUNT_MSR + //| GIM_HV_BASE_FEAT_BASIC_SYNIC_MSRS // Both required for synethetic timers + //| GIM_HV_BASE_FEAT_STIMER_MSRS // Both required for synethetic timers + | GIM_HV_BASE_FEAT_APIC_ACCESS_MSRS + | GIM_HV_BASE_FEAT_HYPERCALL_MSRS + | GIM_HV_BASE_FEAT_VP_ID_MSR + | GIM_HV_BASE_FEAT_VIRT_SYS_RESET_MSR + //| GIM_HV_BASE_FEAT_STAT_PAGES_MSR + | GIM_HV_BASE_FEAT_PART_REF_TSC_MSR + //| GIM_HV_BASE_FEAT_GUEST_IDLE_STATE_MSR + | GIM_HV_BASE_FEAT_TIMER_FREQ_MSRS + //| GIM_HV_BASE_FEAT_DEBUG_MSRS + ; + + /* Miscellaneous features. */ + pHv->uMiscFeat = 0 + //| GIM_HV_MISC_FEAT_GUEST_DEBUGGING + //| GIM_HV_MISC_FEAT_XMM_HYPERCALL_INPUT + | GIM_HV_MISC_FEAT_TIMER_FREQ + | GIM_HV_MISC_FEAT_GUEST_CRASH_MSRS + //| GIM_HV_MISC_FEAT_DEBUG_MSRS + ; + + /* Hypervisor recommendations to the guest. */ + pHv->uHyperHints = GIM_HV_HINT_MSR_FOR_SYS_RESET + | GIM_HV_HINT_RELAX_TIME_CHECKS + | GIM_HV_HINT_X2APIC_MSRS + ; + + /* Partition features. */ + pHv->uPartFlags |= GIM_HV_PART_FLAGS_EXTENDED_HYPERCALLS; + + /* Expose more if we're posing as Microsoft. We can, if needed, force MSR-based Hv + debugging by not exposing these bits while exposing the VS interface. The better + way is what we do currently, via the GIM_HV_DEBUG_OPTIONS_USE_HYPERCALLS bit. */ + if (pHv->fIsVendorMsHv) + { + pHv->uMiscFeat |= GIM_HV_MISC_FEAT_GUEST_DEBUGGING + | GIM_HV_MISC_FEAT_DEBUG_MSRS; + + pHv->uPartFlags |= GIM_HV_PART_FLAGS_DEBUGGING; + } + } + + /* + * Populate the required fields in MMIO2 region records for registering. + */ + AssertCompile(GIM_HV_PAGE_SIZE == PAGE_SIZE); + PGIMMMIO2REGION pRegion = &pHv->aMmio2Regions[GIM_HV_HYPERCALL_PAGE_REGION_IDX]; + pRegion->iRegion = GIM_HV_HYPERCALL_PAGE_REGION_IDX; + pRegion->fRCMapping = false; + pRegion->cbRegion = PAGE_SIZE; /* Sanity checked in gimR3HvLoad(), gimR3HvEnableTscPage() & gimR3HvEnableHypercallPage() */ + pRegion->GCPhysPage = NIL_RTGCPHYS; + RTStrCopy(pRegion->szDescription, sizeof(pRegion->szDescription), "Hyper-V hypercall page"); + + pRegion = &pHv->aMmio2Regions[GIM_HV_REF_TSC_PAGE_REGION_IDX]; + pRegion->iRegion = GIM_HV_REF_TSC_PAGE_REGION_IDX; + pRegion->fRCMapping = false; + pRegion->cbRegion = PAGE_SIZE; /* Sanity checked in gimR3HvLoad(), gimR3HvEnableTscPage() & gimR3HvEnableHypercallPage() */ + pRegion->GCPhysPage = NIL_RTGCPHYS; + RTStrCopy(pRegion->szDescription, sizeof(pRegion->szDescription), "Hyper-V TSC page"); + + /* + * Make sure the CPU ID bit are in accordance to the Hyper-V + * requirement and other paranoia checks. + * See "Requirements for implementing the Microsoft hypervisor interface" spec. + */ + Assert(!(pHv->uPartFlags & ( GIM_HV_PART_FLAGS_CREATE_PART + | GIM_HV_PART_FLAGS_ACCESS_MEMORY_POOL + | GIM_HV_PART_FLAGS_ACCESS_PART_ID + | GIM_HV_PART_FLAGS_ADJUST_MSG_BUFFERS + | GIM_HV_PART_FLAGS_CREATE_PORT + | GIM_HV_PART_FLAGS_ACCESS_STATS + | GIM_HV_PART_FLAGS_CPU_MGMT + | GIM_HV_PART_FLAGS_CPU_PROFILER))); + Assert((pHv->uBaseFeat & (GIM_HV_BASE_FEAT_HYPERCALL_MSRS | GIM_HV_BASE_FEAT_VP_ID_MSR)) + == (GIM_HV_BASE_FEAT_HYPERCALL_MSRS | GIM_HV_BASE_FEAT_VP_ID_MSR)); +#ifdef VBOX_STRICT + for (unsigned i = 0; i < RT_ELEMENTS(pHv->aMmio2Regions); i++) + { + PCGIMMMIO2REGION pCur = &pHv->aMmio2Regions[i]; + Assert(!pCur->fRCMapping); + Assert(!pCur->fMapped); + Assert(pCur->GCPhysPage == NIL_RTGCPHYS); + } +#endif + + /* + * Expose HVP (Hypervisor Present) bit to the guest. + */ + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_HVP); + + /* + * Modify the standard hypervisor leaves for Hyper-V. + */ + CPUMCPUIDLEAF HyperLeaf; + RT_ZERO(HyperLeaf); + HyperLeaf.uLeaf = UINT32_C(0x40000000); + if ( pHv->fIsVendorMsHv + && pHv->fIsInterfaceVs) + HyperLeaf.uEax = UINT32_C(0x40000082); /* Since we expose 0x40000082 below for the Hyper-V PV-debugging case. */ + else + HyperLeaf.uEax = UINT32_C(0x40000006); /* Minimum value for Hyper-V default is 0x40000005. */ + /* + * Don't report vendor as 'Microsoft Hv'[1] by default, see @bugref{7270#c152}. + * [1]: ebx=0x7263694d ('rciM') ecx=0x666f736f ('foso') edx=0x76482074 ('vH t') + */ + { + uint32_t uVendorEbx; + uint32_t uVendorEcx; + uint32_t uVendorEdx; + uVendorEbx = ((uint32_t)szVendor[ 3]) << 24 | ((uint32_t)szVendor[ 2]) << 16 | ((uint32_t)szVendor[1]) << 8 + | (uint32_t)szVendor[ 0]; + uVendorEcx = ((uint32_t)szVendor[ 7]) << 24 | ((uint32_t)szVendor[ 6]) << 16 | ((uint32_t)szVendor[5]) << 8 + | (uint32_t)szVendor[ 4]; + uVendorEdx = ((uint32_t)szVendor[11]) << 24 | ((uint32_t)szVendor[10]) << 16 | ((uint32_t)szVendor[9]) << 8 + | (uint32_t)szVendor[ 8]; + HyperLeaf.uEbx = uVendorEbx; + HyperLeaf.uEcx = uVendorEcx; + HyperLeaf.uEdx = uVendorEdx; + } + rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf); + AssertLogRelRCReturn(rc, rc); + + HyperLeaf.uLeaf = UINT32_C(0x40000001); + HyperLeaf.uEax = 0x31237648; /* 'Hv#1' */ + HyperLeaf.uEbx = 0; /* Reserved */ + HyperLeaf.uEcx = 0; /* Reserved */ + HyperLeaf.uEdx = 0; /* Reserved */ + rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf); + AssertLogRelRCReturn(rc, rc); + + /* + * Add Hyper-V specific leaves. + */ + HyperLeaf.uLeaf = UINT32_C(0x40000002); /* MBZ until MSR_GIM_HV_GUEST_OS_ID is set by the guest. */ + HyperLeaf.uEax = 0; + HyperLeaf.uEbx = 0; + HyperLeaf.uEcx = 0; + HyperLeaf.uEdx = 0; + rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf); + AssertLogRelRCReturn(rc, rc); + + HyperLeaf.uLeaf = UINT32_C(0x40000003); + HyperLeaf.uEax = pHv->uBaseFeat; + HyperLeaf.uEbx = pHv->uPartFlags; + HyperLeaf.uEcx = pHv->uPowMgmtFeat; + HyperLeaf.uEdx = pHv->uMiscFeat; + rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf); + AssertLogRelRCReturn(rc, rc); + + HyperLeaf.uLeaf = UINT32_C(0x40000004); + HyperLeaf.uEax = pHv->uHyperHints; + HyperLeaf.uEbx = 0xffffffff; + HyperLeaf.uEcx = 0; + HyperLeaf.uEdx = 0; + rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf); + AssertLogRelRCReturn(rc, rc); + + RT_ZERO(HyperLeaf); + HyperLeaf.uLeaf = UINT32_C(0x40000005); + rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf); + AssertLogRelRCReturn(rc, rc); + + /* Leaf 0x40000006 is inserted in gimR3HvInitCompleted(). */ + + if ( pHv->fIsVendorMsHv + && pHv->fIsInterfaceVs) + { + HyperLeaf.uLeaf = UINT32_C(0x40000080); + HyperLeaf.uEax = 0; + HyperLeaf.uEbx = 0x7263694d; /* 'rciM' */ + HyperLeaf.uEcx = 0x666f736f; /* 'foso'*/ + HyperLeaf.uEdx = 0x53562074; /* 'SV t' */ + rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf); + AssertLogRelRCReturn(rc, rc); + + HyperLeaf.uLeaf = UINT32_C(0x40000081); + HyperLeaf.uEax = 0x31235356; /* '1#SV' */ + HyperLeaf.uEbx = 0; + HyperLeaf.uEcx = 0; + HyperLeaf.uEdx = 0; + rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf); + AssertLogRelRCReturn(rc, rc); + + HyperLeaf.uLeaf = UINT32_C(0x40000082); + HyperLeaf.uEax = RT_BIT_32(1); + HyperLeaf.uEbx = 0; + HyperLeaf.uEcx = 0; + HyperLeaf.uEdx = 0; + rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf); + AssertLogRelRCReturn(rc, rc); + } + + /* + * Insert all MSR ranges of Hyper-V. + */ + for (unsigned i = 0; i < RT_ELEMENTS(g_aMsrRanges_HyperV); i++) + { + int rc2 = CPUMR3MsrRangesInsert(pVM, &g_aMsrRanges_HyperV[i]); + AssertLogRelRCReturn(rc2, rc2); + } + + /* + * Setup non-zero MSRs. + */ + if (pHv->uMiscFeat & GIM_HV_MISC_FEAT_GUEST_CRASH_MSRS) + pHv->uCrashCtlMsr = MSR_GIM_HV_CRASH_CTL_NOTIFY; + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PGIMHVCPU pHvCpu = &pVM->aCpus[i].gim.s.u.HvCpu; + for (uint8_t idxSintMsr = 0; idxSintMsr < RT_ELEMENTS(pHvCpu->auSintMsrs); idxSintMsr++) + pHvCpu->auSintMsrs[idxSintMsr] = MSR_GIM_HV_SINT_MASKED; + } + + /* + * Setup hypercall support. + */ + rc = gimR3HvInitHypercallSupport(pVM); + AssertLogRelRCReturn(rc, rc); + + /* + * Setup debug support. + */ + rc = gimR3HvInitDebugSupport(pVM); + AssertLogRelRCReturn(rc, rc); + + /* + * Setup up the per-VCPU synthetic timers. + */ + if ( (pHv->uBaseFeat & GIM_HV_BASE_FEAT_STIMER_MSRS) + || (pHv->uBaseFeat & GIM_HV_BASE_FEAT_BASIC_SYNIC_MSRS)) + { + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[idCpu]; + PGIMHVCPU pHvCpu = &pVCpu->gim.s.u.HvCpu; + + for (uint8_t idxStimer = 0; idxStimer < RT_ELEMENTS(pHvCpu->aStimers); idxStimer++) + { + PGIMHVSTIMER pHvStimer = &pHvCpu->aStimers[idxStimer]; + + /* Associate the synthetic timer with its corresponding VCPU. */ + pHvStimer->idCpu = pVCpu->idCpu; + pHvStimer->idxStimer = idxStimer; + + /* Create the timer and associate the context pointers. */ + RTStrPrintf(&pHvStimer->szTimerDesc[0], sizeof(pHvStimer->szTimerDesc), "Hyper-V[%u] Timer%u", pVCpu->idCpu, + idxStimer); + rc = TMR3TimerCreateInternal(pVM, TMCLOCK_VIRTUAL_SYNC, gimR3HvTimerCallback, pHvStimer /* pvUser */, + pHvStimer->szTimerDesc, &pHvStimer->pTimerR3); + AssertLogRelRCReturn(rc, rc); + pHvStimer->pTimerR0 = TMTimerR0Ptr(pHvStimer->pTimerR3); + pHvStimer->pTimerRC = TMTimerRCPtr(pHvStimer->pTimerR3); + } + } + } + + /* + * Register statistics. + */ + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[idCpu]; + PGIMHVCPU pHvCpu = &pVCpu->gim.s.u.HvCpu; + + for (size_t idxStimer = 0; idxStimer < RT_ELEMENTS(pHvCpu->aStatStimerFired); idxStimer++) + { + int rc2 = STAMR3RegisterF(pVM, &pHvCpu->aStatStimerFired[idxStimer], STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, + STAMUNIT_OCCURENCES, "Number of times the synthetic timer fired.", + "/GIM/HyperV/%u/Stimer%u_Fired", idCpu, idxStimer); + AssertLogRelRCReturn(rc2, rc2); + } + } + + return VINF_SUCCESS; +} + + +/** + * Initializes remaining bits of the Hyper-V provider. + * + * This is called after initializing HM and almost all other VMM components. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) gimR3HvInitCompleted(PVM pVM) +{ + PGIMHV pHv = &pVM->gim.s.u.Hv; + pHv->cTscTicksPerSecond = TMCpuTicksPerSecond(pVM); + + /* + * Determine interface capabilities based on the version. + */ + if (!pVM->gim.s.u32Version) + { + /* Hypervisor capabilities; features used by the hypervisor. */ + pHv->uHyperCaps = HMIsNestedPagingActive(pVM) ? GIM_HV_HOST_FEAT_NESTED_PAGING : 0; + pHv->uHyperCaps |= HMIsMsrBitmapActive(pVM) ? GIM_HV_HOST_FEAT_MSR_BITMAP : 0; + } + + CPUMCPUIDLEAF HyperLeaf; + RT_ZERO(HyperLeaf); + HyperLeaf.uLeaf = UINT32_C(0x40000006); + HyperLeaf.uEax = pHv->uHyperCaps; + HyperLeaf.uEbx = 0; + HyperLeaf.uEcx = 0; + HyperLeaf.uEdx = 0; + int rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf); + AssertLogRelRCReturn(rc, rc); + + /* + * Inform APIC whether Hyper-V compatibility mode is enabled or not. + * Do this here rather than on gimR3HvInit() as it gets called after APIC + * has finished inserting/removing the x2APIC MSR range. + */ + if (pHv->uHyperHints & GIM_HV_HINT_X2APIC_MSRS) + APICR3HvSetCompatMode(pVM, true); + + return rc; +} + + +/** + * Terminates the Hyper-V GIM provider. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) gimR3HvTerm(PVM pVM) +{ + gimR3HvReset(pVM); + gimR3HvTermHypercallSupport(pVM); + gimR3HvTermDebugSupport(pVM); + + PCGIMHV pHv = &pVM->gim.s.u.Hv; + if ( (pHv->uBaseFeat & GIM_HV_BASE_FEAT_STIMER_MSRS) + || (pHv->uBaseFeat & GIM_HV_BASE_FEAT_BASIC_SYNIC_MSRS)) + { + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + PGIMHVCPU pHvCpu = &pVM->aCpus[idCpu].gim.s.u.HvCpu; + for (uint8_t idxStimer = 0; idxStimer < RT_ELEMENTS(pHvCpu->aStimers); idxStimer++) + { + PGIMHVSTIMER pHvStimer = &pHvCpu->aStimers[idxStimer]; + TMR3TimerDestroy(pHvStimer->pTimerR3); + } + } + } + + return VINF_SUCCESS; +} + + +/** + * Applies relocations to data and code managed by this + * component. This function will be called at init and + * whenever the VMM need to relocate it self inside the GC. + * + * @param pVM The cross context VM structure. + * @param offDelta Relocation delta relative to old location. + */ +VMMR3_INT_DECL(void) gimR3HvRelocate(PVM pVM, RTGCINTPTR offDelta) +{ + RT_NOREF1(offDelta); + + PCGIMHV pHv = &pVM->gim.s.u.Hv; + if ( (pHv->uBaseFeat & GIM_HV_BASE_FEAT_STIMER_MSRS) + || (pHv->uBaseFeat & GIM_HV_BASE_FEAT_BASIC_SYNIC_MSRS)) + { + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + PGIMHVCPU pHvCpu = &pVM->aCpus[idCpu].gim.s.u.HvCpu; + for (uint8_t idxStimer = 0; idxStimer < RT_ELEMENTS(pHvCpu->aStimers); idxStimer++) + { + PGIMHVSTIMER pHvStimer = &pHvCpu->aStimers[idxStimer]; + pHvStimer->pTimerRC = TMTimerRCPtr(pHvStimer->pTimerR3); + } + } + } +} + + +/** + * This resets Hyper-V provider MSRs and unmaps whatever Hyper-V regions that + * the guest may have mapped. + * + * This is called when the VM is being reset. + * + * @param pVM The cross context VM structure. + * + * @thread EMT(0) + */ +VMMR3_INT_DECL(void) gimR3HvReset(PVM pVM) +{ + VM_ASSERT_EMT0(pVM); + + /* + * Unmap MMIO2 pages that the guest may have setup. + */ + LogRel(("GIM: HyperV: Resetting MMIO2 regions and MSRs\n")); + PGIMHV pHv = &pVM->gim.s.u.Hv; + for (unsigned i = 0; i < RT_ELEMENTS(pHv->aMmio2Regions); i++) + { + PGIMMMIO2REGION pRegion = &pHv->aMmio2Regions[i]; +#if 0 + gimR3Mmio2Unmap(pVM, pRegion); +#else + pRegion->fMapped = false; + pRegion->GCPhysPage = NIL_RTGCPHYS; +#endif + } + + /* + * Reset MSRs. + */ + pHv->u64GuestOsIdMsr = 0; + pHv->u64HypercallMsr = 0; + pHv->u64TscPageMsr = 0; + pHv->uCrashP0Msr = 0; + pHv->uCrashP1Msr = 0; + pHv->uCrashP2Msr = 0; + pHv->uCrashP3Msr = 0; + pHv->uCrashP4Msr = 0; + pHv->uDbgStatusMsr = 0; + pHv->uDbgPendingBufferMsr = 0; + pHv->uDbgSendBufferMsr = 0; + pHv->uDbgRecvBufferMsr = 0; + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PGIMHVCPU pHvCpu = &pVM->aCpus[i].gim.s.u.HvCpu; + pHvCpu->uSControlMsr = 0; + pHvCpu->uSimpMsr = 0; + pHvCpu->uSiefpMsr = 0; + pHvCpu->uApicAssistPageMsr = 0; + + for (uint8_t idxSint = 0; idxSint < RT_ELEMENTS(pHvCpu->auSintMsrs); idxSint++) + pHvCpu->auSintMsrs[idxSint] = MSR_GIM_HV_SINT_MASKED; + + for (uint8_t idxStimer = 0; idxStimer < RT_ELEMENTS(pHvCpu->aStimers); idxStimer++) + { + PGIMHVSTIMER pHvStimer = &pHvCpu->aStimers[idxStimer]; + pHvStimer->uStimerConfigMsr = 0; + pHvStimer->uStimerCountMsr = 0; + } + } +} + + +/** + * Returns a pointer to the MMIO2 regions supported by Hyper-V. + * + * @returns Pointer to an array of MMIO2 regions. + * @param pVM The cross context VM structure. + * @param pcRegions Where to store the number of regions in the array. + */ +VMMR3_INT_DECL(PGIMMMIO2REGION) gimR3HvGetMmio2Regions(PVM pVM, uint32_t *pcRegions) +{ + Assert(GIMIsEnabled(pVM)); + PGIMHV pHv = &pVM->gim.s.u.Hv; + + *pcRegions = RT_ELEMENTS(pHv->aMmio2Regions); + Assert(*pcRegions <= UINT8_MAX); /* See PGMR3PhysMMIO2Register(). */ + return pHv->aMmio2Regions; +} + + +/** + * Callback for when debug data is available over the debugger connection. + * + * @param pVM The cross context VM structure. + */ +static DECLCALLBACK(void) gimR3HvDebugBufAvail(PVM pVM) +{ + PGIMHV pHv = &pVM->gim.s.u.Hv; + RTGCPHYS GCPhysPendingBuffer = pHv->uDbgPendingBufferMsr; + if ( GCPhysPendingBuffer + && PGMPhysIsGCPhysNormal(pVM, GCPhysPendingBuffer)) + { + uint8_t bPendingData = 1; + int rc = PGMPhysSimpleWriteGCPhys(pVM, GCPhysPendingBuffer, &bPendingData, sizeof(bPendingData)); + if (RT_FAILURE(rc)) + { + LogRelMax(5, ("GIM: HyperV: Failed to set pending debug receive buffer at %#RGp, rc=%Rrc\n", GCPhysPendingBuffer, + rc)); + } + } +} + + +/** + * Callback for when debug data has been read from the debugger connection. + * + * This will be invoked before signalling read of the next debug buffer. + * + * @param pVM The cross context VM structure. + */ +static DECLCALLBACK(void) gimR3HvDebugBufReadCompleted(PVM pVM) +{ + PGIMHV pHv = &pVM->gim.s.u.Hv; + RTGCPHYS GCPhysPendingBuffer = pHv->uDbgPendingBufferMsr; + if ( GCPhysPendingBuffer + && PGMPhysIsGCPhysNormal(pVM, GCPhysPendingBuffer)) + { + uint8_t bPendingData = 0; + int rc = PGMPhysSimpleWriteGCPhys(pVM, GCPhysPendingBuffer, &bPendingData, sizeof(bPendingData)); + if (RT_FAILURE(rc)) + { + LogRelMax(5, ("GIM: HyperV: Failed to clear pending debug receive buffer at %#RGp, rc=%Rrc\n", GCPhysPendingBuffer, + rc)); + } + } +} + + +/** + * Get Hyper-V debug setup parameters. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDbgSetup Where to store the debug setup details. + */ +VMMR3_INT_DECL(int) gimR3HvGetDebugSetup(PVM pVM, PGIMDEBUGSETUP pDbgSetup) +{ + Assert(pDbgSetup); + PGIMHV pHv = &pVM->gim.s.u.Hv; + if (pHv->fDbgEnabled) + { + pDbgSetup->pfnDbgRecvBufAvail = gimR3HvDebugBufAvail; + pDbgSetup->cbDbgRecvBuf = GIM_HV_PAGE_SIZE; + return VINF_SUCCESS; + } + return VERR_GIM_NO_DEBUG_CONNECTION; +} + + +/** + * Hyper-V state-save operation. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + */ +VMMR3_INT_DECL(int) gimR3HvSave(PVM pVM, PSSMHANDLE pSSM) +{ + PCGIMHV pHv = &pVM->gim.s.u.Hv; + + /* + * Save the Hyper-V SSM version. + */ + SSMR3PutU32(pSSM, GIM_HV_SAVED_STATE_VERSION); + + /* + * Save per-VM MSRs. + */ + SSMR3PutU64(pSSM, pHv->u64GuestOsIdMsr); + SSMR3PutU64(pSSM, pHv->u64HypercallMsr); + SSMR3PutU64(pSSM, pHv->u64TscPageMsr); + + /* + * Save Hyper-V features / capabilities. + */ + SSMR3PutU32(pSSM, pHv->uBaseFeat); + SSMR3PutU32(pSSM, pHv->uPartFlags); + SSMR3PutU32(pSSM, pHv->uPowMgmtFeat); + SSMR3PutU32(pSSM, pHv->uMiscFeat); + SSMR3PutU32(pSSM, pHv->uHyperHints); + SSMR3PutU32(pSSM, pHv->uHyperCaps); + + /* + * Save the Hypercall region. + */ + PCGIMMMIO2REGION pRegion = &pHv->aMmio2Regions[GIM_HV_HYPERCALL_PAGE_REGION_IDX]; + SSMR3PutU8(pSSM, pRegion->iRegion); + SSMR3PutBool(pSSM, pRegion->fRCMapping); + SSMR3PutU32(pSSM, pRegion->cbRegion); + SSMR3PutGCPhys(pSSM, pRegion->GCPhysPage); + SSMR3PutStrZ(pSSM, pRegion->szDescription); + + /* + * Save the reference TSC region. + */ + pRegion = &pHv->aMmio2Regions[GIM_HV_REF_TSC_PAGE_REGION_IDX]; + SSMR3PutU8(pSSM, pRegion->iRegion); + SSMR3PutBool(pSSM, pRegion->fRCMapping); + SSMR3PutU32(pSSM, pRegion->cbRegion); + SSMR3PutGCPhys(pSSM, pRegion->GCPhysPage); + SSMR3PutStrZ(pSSM, pRegion->szDescription); + /* Save the TSC sequence so we can bump it on restore (as the CPU frequency/offset may change). */ + uint32_t uTscSequence = 0; + if ( pRegion->fMapped + && MSR_GIM_HV_REF_TSC_IS_ENABLED(pHv->u64TscPageMsr)) + { + PCGIMHVREFTSC pRefTsc = (PCGIMHVREFTSC)pRegion->pvPageR3; + uTscSequence = pRefTsc->u32TscSequence; + } + SSMR3PutU32(pSSM, uTscSequence); + + /* + * Save debug support data. + */ + SSMR3PutU64(pSSM, pHv->uDbgPendingBufferMsr); + SSMR3PutU64(pSSM, pHv->uDbgSendBufferMsr); + SSMR3PutU64(pSSM, pHv->uDbgRecvBufferMsr); + SSMR3PutU64(pSSM, pHv->uDbgStatusMsr); + SSMR3PutU32(pSSM, pHv->enmDbgReply); + SSMR3PutU32(pSSM, pHv->uDbgBootpXId); + SSMR3PutU32(pSSM, pHv->DbgGuestIp4Addr.u); + SSMR3PutU16(pSSM, pHv->uUdpGuestDstPort); + SSMR3PutU16(pSSM, pHv->uUdpGuestSrcPort); + + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PGIMHVCPU pHvCpu = &pVM->aCpus[i].gim.s.u.HvCpu; + SSMR3PutU64(pSSM, pHvCpu->uSimpMsr); + for (size_t idxSintMsr = 0; idxSintMsr < RT_ELEMENTS(pHvCpu->auSintMsrs); idxSintMsr++) + SSMR3PutU64(pSSM, pHvCpu->auSintMsrs[idxSintMsr]); + } + + return SSMR3PutU8(pSSM, UINT8_MAX); +} + + +/** + * Hyper-V state-load operation, final pass. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + */ +VMMR3_INT_DECL(int) gimR3HvLoad(PVM pVM, PSSMHANDLE pSSM) +{ + /* + * Load the Hyper-V SSM version first. + */ + uint32_t uHvSavedStateVersion; + int rc = SSMR3GetU32(pSSM, &uHvSavedStateVersion); + AssertRCReturn(rc, rc); + if ( uHvSavedStateVersion != GIM_HV_SAVED_STATE_VERSION + && uHvSavedStateVersion != GIM_HV_SAVED_STATE_VERSION_PRE_DEBUG_UDP_PORTS + && uHvSavedStateVersion != GIM_HV_SAVED_STATE_VERSION_PRE_SYNIC + && uHvSavedStateVersion != GIM_HV_SAVED_STATE_VERSION_PRE_DEBUG) + return SSMR3SetLoadError(pSSM, VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION, RT_SRC_POS, + N_("Unsupported Hyper-V saved-state version %u (current %u)!"), + uHvSavedStateVersion, GIM_HV_SAVED_STATE_VERSION); + + /* + * Update the TSC frequency from TM. + */ + PGIMHV pHv = &pVM->gim.s.u.Hv; + pHv->cTscTicksPerSecond = TMCpuTicksPerSecond(pVM); + + /* + * Load per-VM MSRs. + */ + SSMR3GetU64(pSSM, &pHv->u64GuestOsIdMsr); + SSMR3GetU64(pSSM, &pHv->u64HypercallMsr); + SSMR3GetU64(pSSM, &pHv->u64TscPageMsr); + + /* + * Load Hyper-V features / capabilities. + */ + SSMR3GetU32(pSSM, &pHv->uBaseFeat); + SSMR3GetU32(pSSM, &pHv->uPartFlags); + SSMR3GetU32(pSSM, &pHv->uPowMgmtFeat); + SSMR3GetU32(pSSM, &pHv->uMiscFeat); + SSMR3GetU32(pSSM, &pHv->uHyperHints); + SSMR3GetU32(pSSM, &pHv->uHyperCaps); + + /* + * Load and enable the Hypercall region. + */ + PGIMMMIO2REGION pRegion = &pHv->aMmio2Regions[GIM_HV_HYPERCALL_PAGE_REGION_IDX]; + SSMR3GetU8(pSSM, &pRegion->iRegion); + SSMR3GetBool(pSSM, &pRegion->fRCMapping); + SSMR3GetU32(pSSM, &pRegion->cbRegion); + SSMR3GetGCPhys(pSSM, &pRegion->GCPhysPage); + rc = SSMR3GetStrZ(pSSM, pRegion->szDescription, sizeof(pRegion->szDescription)); + AssertRCReturn(rc, rc); + + if (pRegion->cbRegion != PAGE_SIZE) + return SSMR3SetCfgError(pSSM, RT_SRC_POS, N_("Hypercall page region size %u invalid, expected %u"), + pRegion->cbRegion, PAGE_SIZE); + + if (MSR_GIM_HV_HYPERCALL_PAGE_IS_ENABLED(pHv->u64HypercallMsr)) + { + Assert(pRegion->GCPhysPage != NIL_RTGCPHYS); + if (RT_LIKELY(pRegion->fRegistered)) + { + rc = gimR3HvEnableHypercallPage(pVM, pRegion->GCPhysPage); + if (RT_FAILURE(rc)) + return SSMR3SetCfgError(pSSM, RT_SRC_POS, N_("Failed to enable the hypercall page. GCPhys=%#RGp rc=%Rrc"), + pRegion->GCPhysPage, rc); + } + else + return SSMR3SetCfgError(pSSM, RT_SRC_POS, N_("Hypercall MMIO2 region not registered. Missing GIM device?!")); + } + + /* + * Load and enable the reference TSC region. + */ + uint32_t uTscSequence; + pRegion = &pHv->aMmio2Regions[GIM_HV_REF_TSC_PAGE_REGION_IDX]; + SSMR3GetU8(pSSM, &pRegion->iRegion); + SSMR3GetBool(pSSM, &pRegion->fRCMapping); + SSMR3GetU32(pSSM, &pRegion->cbRegion); + SSMR3GetGCPhys(pSSM, &pRegion->GCPhysPage); + SSMR3GetStrZ(pSSM, pRegion->szDescription, sizeof(pRegion->szDescription)); + rc = SSMR3GetU32(pSSM, &uTscSequence); + AssertRCReturn(rc, rc); + + if (pRegion->cbRegion != PAGE_SIZE) + return SSMR3SetCfgError(pSSM, RT_SRC_POS, N_("TSC page region size %u invalid, expected %u"), + pRegion->cbRegion, PAGE_SIZE); + + if (MSR_GIM_HV_REF_TSC_IS_ENABLED(pHv->u64TscPageMsr)) + { + Assert(pRegion->GCPhysPage != NIL_RTGCPHYS); + if (pRegion->fRegistered) + { + rc = gimR3HvEnableTscPage(pVM, pRegion->GCPhysPage, true /* fUseThisTscSeq */, uTscSequence); + if (RT_FAILURE(rc)) + return SSMR3SetCfgError(pSSM, RT_SRC_POS, N_("Failed to enable the TSC page. GCPhys=%#RGp rc=%Rrc"), + pRegion->GCPhysPage, rc); + } + else + return SSMR3SetCfgError(pSSM, RT_SRC_POS, N_("TSC-page MMIO2 region not registered. Missing GIM device?!")); + } + + /* + * Load the debug support data. + */ + if (uHvSavedStateVersion > GIM_HV_SAVED_STATE_VERSION_PRE_DEBUG) + { + SSMR3GetU64(pSSM, &pHv->uDbgPendingBufferMsr); + SSMR3GetU64(pSSM, &pHv->uDbgSendBufferMsr); + SSMR3GetU64(pSSM, &pHv->uDbgRecvBufferMsr); + SSMR3GetU64(pSSM, &pHv->uDbgStatusMsr); + SSMR3GetU32(pSSM, (uint32_t *)&pHv->enmDbgReply); + SSMR3GetU32(pSSM, &pHv->uDbgBootpXId); + rc = SSMR3GetU32(pSSM, &pHv->DbgGuestIp4Addr.u); + AssertRCReturn(rc, rc); + if (uHvSavedStateVersion > GIM_HV_SAVED_STATE_VERSION_PRE_DEBUG_UDP_PORTS) + { + rc = SSMR3GetU16(pSSM, &pHv->uUdpGuestDstPort); AssertRCReturn(rc, rc); + rc = SSMR3GetU16(pSSM, &pHv->uUdpGuestSrcPort); AssertRCReturn(rc, rc); + } + + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PGIMHVCPU pHvCpu = &pVM->aCpus[i].gim.s.u.HvCpu; + SSMR3GetU64(pSSM, &pHvCpu->uSimpMsr); + if (uHvSavedStateVersion <= GIM_HV_SAVED_STATE_VERSION_PRE_SYNIC) + SSMR3GetU64(pSSM, &pHvCpu->auSintMsrs[GIM_HV_VMBUS_MSG_SINT]); + else + { + for (uint8_t idxSintMsr = 0; idxSintMsr < RT_ELEMENTS(pHvCpu->auSintMsrs); idxSintMsr++) + SSMR3GetU64(pSSM, &pHvCpu->auSintMsrs[idxSintMsr]); + } + } + + uint8_t bDelim; + rc = SSMR3GetU8(pSSM, &bDelim); + } + else + rc = VINF_SUCCESS; + + return rc; +} + + +/** + * Hyper-V load-done callback. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + */ +VMMR3_INT_DECL(int) gimR3HvLoadDone(PVM pVM, PSSMHANDLE pSSM) +{ + if (RT_SUCCESS(SSMR3HandleGetStatus(pSSM))) + { + /* + * Update EM on whether MSR_GIM_HV_GUEST_OS_ID allows hypercall instructions. + */ + if (pVM->gim.s.u.Hv.u64GuestOsIdMsr) + for (VMCPUID i = 0; i < pVM->cCpus; i++) + EMSetHypercallInstructionsEnabled(&pVM->aCpus[i], true); + else + for (VMCPUID i = 0; i < pVM->cCpus; i++) + EMSetHypercallInstructionsEnabled(&pVM->aCpus[i], false); + } + return VINF_SUCCESS; +} + + +/** + * Enables the Hyper-V APIC-assist page. + * + * @returns VBox status code. + * @param pVCpu The cross context virtual CPU structure. + * @param GCPhysApicAssistPage Where to map the APIC-assist page. + */ +VMMR3_INT_DECL(int) gimR3HvEnableApicAssistPage(PVMCPU pVCpu, RTGCPHYS GCPhysApicAssistPage) +{ + PVM pVM = pVCpu->CTX_SUFF(pVM); + PPDMDEVINSR3 pDevIns = pVM->gim.s.pDevInsR3; + AssertPtrReturn(pDevIns, VERR_GIM_DEVICE_NOT_REGISTERED); + + /* + * Map the APIC-assist-page at the specified address. + */ + /** @todo this is buggy when large pages are used due to a PGM limitation, see + * @bugref{7532}. Instead of the overlay style mapping, we just + * rewrite guest memory directly. */ + size_t const cbApicAssistPage = PAGE_SIZE; + void *pvApicAssist = RTMemAllocZ(cbApicAssistPage); + if (RT_LIKELY(pvApicAssist)) + { + int rc = PGMPhysSimpleWriteGCPhys(pVM, GCPhysApicAssistPage, pvApicAssist, cbApicAssistPage); + if (RT_SUCCESS(rc)) + { + /** @todo Inform APIC. */ + LogRel(("GIM%u: HyperV: Enabled APIC-assist page at %#RGp\n", pVCpu->idCpu, GCPhysApicAssistPage)); + } + else + { + LogRelFunc(("GIM%u: HyperV: PGMPhysSimpleWriteGCPhys failed. rc=%Rrc\n", pVCpu->idCpu, rc)); + rc = VERR_GIM_OPERATION_FAILED; + } + + RTMemFree(pvApicAssist); + return rc; + } + + LogRelFunc(("GIM%u: HyperV: Failed to alloc %u bytes\n", pVCpu->idCpu, cbApicAssistPage)); + return VERR_NO_MEMORY; +} + + +/** + * Disables the Hyper-V APIC-assist page. + * + * @returns VBox status code. + * @param pVCpu The cross context virtual CPU structure. + */ +VMMR3_INT_DECL(int) gimR3HvDisableApicAssistPage(PVMCPU pVCpu) +{ + LogRel(("GIM%u: HyperV: Disabled APIC-assist page\n", pVCpu->idCpu)); + /** @todo inform APIC */ + return VINF_SUCCESS; +} + + +/** + * Hyper-V synthetic timer callback. + * + * @param pVM The cross context VM structure. + * @param pTimer Pointer to timer. + * @param pvUser Pointer to the synthetic timer. + */ +static DECLCALLBACK(void) gimR3HvTimerCallback(PVM pVM, PTMTIMER pTimer, void *pvUser) +{ + PGIMHVSTIMER pHvStimer = (PGIMHVSTIMER)pvUser; + Assert(pHvStimer); + Assert(TMTimerIsLockOwner(pTimer)); RT_NOREF(pTimer); + Assert(pHvStimer->idCpu < pVM->cCpus); + + PVMCPU pVCpu = &pVM->aCpus[pHvStimer->idCpu]; + PGIMHVCPU pHvCpu = &pVCpu->gim.s.u.HvCpu; + Assert(pHvStimer->idxStimer < RT_ELEMENTS(pHvCpu->aStatStimerFired)); + + STAM_COUNTER_INC(&pHvCpu->aStatStimerFired[pHvStimer->idxStimer]); + + uint64_t const uStimerConfig = pHvStimer->uStimerConfigMsr; + uint16_t const idxSint = MSR_GIM_HV_STIMER_GET_SINTX(uStimerConfig); + if (RT_LIKELY(idxSint < RT_ELEMENTS(pHvCpu->auSintMsrs))) + { + uint64_t const uSint = pHvCpu->auSintMsrs[idxSint]; + if (!MSR_GIM_HV_SINT_IS_MASKED(uSint)) + { + uint8_t const uVector = MSR_GIM_HV_SINT_GET_VECTOR(uSint); + bool const fAutoEoi = MSR_GIM_HV_SINT_IS_AUTOEOI(uSint); + APICHvSendInterrupt(pVCpu, uVector, fAutoEoi, XAPICTRIGGERMODE_EDGE); + } + } + + /* Re-arm the timer if it's periodic. */ + if (MSR_GIM_HV_STIMER_IS_PERIODIC(uStimerConfig)) + gimHvStartStimer(pVCpu, pHvStimer); +} + + +/** + * Enables the Hyper-V SIEF page. + * + * @returns VBox status code. + * @param pVCpu The cross context virtual CPU structure. + * @param GCPhysSiefPage Where to map the SIEF page. + */ +VMMR3_INT_DECL(int) gimR3HvEnableSiefPage(PVMCPU pVCpu, RTGCPHYS GCPhysSiefPage) +{ + PVM pVM = pVCpu->CTX_SUFF(pVM); + PPDMDEVINSR3 pDevIns = pVM->gim.s.pDevInsR3; + AssertPtrReturn(pDevIns, VERR_GIM_DEVICE_NOT_REGISTERED); + + /* + * Map the SIEF page at the specified address. + */ + /** @todo this is buggy when large pages are used due to a PGM limitation, see + * @bugref{7532}. Instead of the overlay style mapping, we just + * rewrite guest memory directly. */ + size_t const cbSiefPage = PAGE_SIZE; + void *pvSiefPage = RTMemAllocZ(cbSiefPage); + if (RT_LIKELY(pvSiefPage)) + { + int rc = PGMPhysSimpleWriteGCPhys(pVM, GCPhysSiefPage, pvSiefPage, cbSiefPage); + if (RT_SUCCESS(rc)) + { + /** @todo SIEF setup. */ + LogRel(("GIM%u: HyperV: Enabled SIEF page at %#RGp\n", pVCpu->idCpu, GCPhysSiefPage)); + } + else + { + LogRelFunc(("GIM%u: HyperV: PGMPhysSimpleWriteGCPhys failed. rc=%Rrc\n", pVCpu->idCpu, rc)); + rc = VERR_GIM_OPERATION_FAILED; + } + + RTMemFree(pvSiefPage); + return rc; + } + + LogRelFunc(("GIM%u: HyperV: Failed to alloc %u bytes\n", pVCpu->idCpu, cbSiefPage)); + return VERR_NO_MEMORY; +} + + +/** + * Disables the Hyper-V SIEF page. + * + * @returns VBox status code. + * @param pVCpu The cross context virtual CPU structure. + */ +VMMR3_INT_DECL(int) gimR3HvDisableSiefPage(PVMCPU pVCpu) +{ + LogRel(("GIM%u: HyperV: Disabled APIC-assist page\n", pVCpu->idCpu)); + /** @todo SIEF teardown. */ + return VINF_SUCCESS; +} + + +/** + * Enables the Hyper-V TSC page. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param GCPhysTscPage Where to map the TSC page. + * @param fUseThisTscSeq Whether to set the TSC sequence number to the one + * specified in @a uTscSeq. + * @param uTscSeq The TSC sequence value to use. Ignored if + * @a fUseThisTscSeq is false. + */ +VMMR3_INT_DECL(int) gimR3HvEnableTscPage(PVM pVM, RTGCPHYS GCPhysTscPage, bool fUseThisTscSeq, uint32_t uTscSeq) +{ + PPDMDEVINSR3 pDevIns = pVM->gim.s.pDevInsR3; + PGIMMMIO2REGION pRegion = &pVM->gim.s.u.Hv.aMmio2Regions[GIM_HV_REF_TSC_PAGE_REGION_IDX]; + AssertPtrReturn(pDevIns, VERR_GIM_DEVICE_NOT_REGISTERED); + + int rc; + if (pRegion->fMapped) + { + /* + * Is it already enabled at the given guest-address? + */ + if (pRegion->GCPhysPage == GCPhysTscPage) + return VINF_SUCCESS; + + /* + * If it's mapped at a different address, unmap the previous address. + */ + rc = gimR3HvDisableTscPage(pVM); + AssertRC(rc); + } + + /* + * Map the TSC-page at the specified address. + */ + Assert(!pRegion->fMapped); + + /** @todo this is buggy when large pages are used due to a PGM limitation, see + * @bugref{7532}. Instead of the overlay style mapping, we just + * rewrite guest memory directly. */ +#if 0 + rc = gimR3Mmio2Map(pVM, pRegion, GCPhysTscPage); + if (RT_SUCCESS(rc)) + { + Assert(pRegion->GCPhysPage == GCPhysTscPage); + + /* + * Update the TSC scale. Windows guests expect a non-zero TSC sequence, otherwise + * they fallback to using the reference count MSR which is not ideal in terms of VM-exits. + * + * Also, Hyper-V normalizes the time in 10 MHz, see: + * http://technet.microsoft.com/it-it/sysinternals/dn553408%28v=vs.110%29 + */ + PGIMHVREFTSC pRefTsc = (PGIMHVREFTSC)pRegion->pvPageR3; + Assert(pRefTsc); + + PGIMHV pHv = &pVM->gim.s.u.Hv; + uint64_t const u64TscKHz = pHv->cTscTicksPerSecond / UINT64_C(1000); + uint32_t u32TscSeq = 1; + if ( fUseThisTscSeq + && uTscSeq < UINT32_C(0xfffffffe)) + u32TscSeq = uTscSeq + 1; + pRefTsc->u32TscSequence = u32TscSeq; + pRefTsc->u64TscScale = ((INT64_C(10000) << 32) / u64TscKHz) << 32; + pRefTsc->i64TscOffset = 0; + + LogRel(("GIM: HyperV: Enabled TSC page at %#RGp - u64TscScale=%#RX64 u64TscKHz=%#RX64 (%'RU64) Seq=%#RU32\n", + GCPhysTscPage, pRefTsc->u64TscScale, u64TscKHz, u64TscKHz, pRefTsc->u32TscSequence)); + + TMR3CpuTickParavirtEnable(pVM); + return VINF_SUCCESS; + } + else + LogRelFunc(("gimR3Mmio2Map failed. rc=%Rrc\n", rc)); + return VERR_GIM_OPERATION_FAILED; +#else + AssertReturn(pRegion->cbRegion == PAGE_SIZE, VERR_GIM_IPE_2); + PGIMHVREFTSC pRefTsc = (PGIMHVREFTSC)RTMemAllocZ(PAGE_SIZE); + if (RT_UNLIKELY(!pRefTsc)) + { + LogRelFunc(("Failed to alloc %u bytes\n", PAGE_SIZE)); + return VERR_NO_MEMORY; + } + + PGIMHV pHv = &pVM->gim.s.u.Hv; + uint64_t const u64TscKHz = pHv->cTscTicksPerSecond / UINT64_C(1000); + uint32_t u32TscSeq = 1; + if ( fUseThisTscSeq + && uTscSeq < UINT32_C(0xfffffffe)) + u32TscSeq = uTscSeq + 1; + pRefTsc->u32TscSequence = u32TscSeq; + pRefTsc->u64TscScale = ((INT64_C(10000) << 32) / u64TscKHz) << 32; + pRefTsc->i64TscOffset = 0; + + rc = PGMPhysSimpleWriteGCPhys(pVM, GCPhysTscPage, pRefTsc, sizeof(*pRefTsc)); + if (RT_SUCCESS(rc)) + { + LogRel(("GIM: HyperV: Enabled TSC page at %#RGp - u64TscScale=%#RX64 u64TscKHz=%#RX64 (%'RU64) Seq=%#RU32\n", + GCPhysTscPage, pRefTsc->u64TscScale, u64TscKHz, u64TscKHz, pRefTsc->u32TscSequence)); + + pRegion->GCPhysPage = GCPhysTscPage; + pRegion->fMapped = true; + TMR3CpuTickParavirtEnable(pVM); + } + else + { + LogRelFunc(("GIM: HyperV: PGMPhysSimpleWriteGCPhys failed. rc=%Rrc\n", rc)); + rc = VERR_GIM_OPERATION_FAILED; + } + RTMemFree(pRefTsc); + return rc; +#endif +} + + +/** + * Enables the Hyper-V SIM page. + * + * @returns VBox status code. + * @param pVCpu The cross context virtual CPU structure. + * @param GCPhysSimPage Where to map the SIM page. + */ +VMMR3_INT_DECL(int) gimR3HvEnableSimPage(PVMCPU pVCpu, RTGCPHYS GCPhysSimPage) +{ + PVM pVM = pVCpu->CTX_SUFF(pVM); + PPDMDEVINSR3 pDevIns = pVM->gim.s.pDevInsR3; + AssertPtrReturn(pDevIns, VERR_GIM_DEVICE_NOT_REGISTERED); + + /* + * Map the SIMP page at the specified address. + */ + /** @todo this is buggy when large pages are used due to a PGM limitation, see + * @bugref{7532}. Instead of the overlay style mapping, we just + * rewrite guest memory directly. */ + size_t const cbSimPage = PAGE_SIZE; + void *pvSimPage = RTMemAllocZ(cbSimPage); + if (RT_LIKELY(pvSimPage)) + { + int rc = PGMPhysSimpleWriteGCPhys(pVM, GCPhysSimPage, pvSimPage, cbSimPage); + if (RT_SUCCESS(rc)) + { + /** @todo SIM setup. */ + LogRel(("GIM%u: HyperV: Enabled SIM page at %#RGp\n", pVCpu->idCpu, GCPhysSimPage)); + } + else + { + LogRelFunc(("GIM%u: HyperV: PGMPhysSimpleWriteGCPhys failed. rc=%Rrc\n", pVCpu->idCpu, rc)); + rc = VERR_GIM_OPERATION_FAILED; + } + + RTMemFree(pvSimPage); + return rc; + } + + LogRelFunc(("GIM%u: HyperV: Failed to alloc %u bytes\n", pVCpu->idCpu, cbSimPage)); + return VERR_NO_MEMORY; +} + + +/** + * Disables the Hyper-V SIM page. + * + * @returns VBox status code. + * @param pVCpu The cross context virtual CPU structure. + */ +VMMR3_INT_DECL(int) gimR3HvDisableSimPage(PVMCPU pVCpu) +{ + LogRel(("GIM%u: HyperV: Disabled SIM page\n", pVCpu->idCpu)); + /** @todo SIM teardown. */ + return VINF_SUCCESS; +} + + + +/** + * Disables the Hyper-V TSC page. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) gimR3HvDisableTscPage(PVM pVM) +{ + PGIMHV pHv = &pVM->gim.s.u.Hv; + PGIMMMIO2REGION pRegion = &pHv->aMmio2Regions[GIM_HV_REF_TSC_PAGE_REGION_IDX]; + if (pRegion->fMapped) + { +#if 0 + gimR3Mmio2Unmap(pVM, pRegion); + Assert(!pRegion->fMapped); +#else + pRegion->fMapped = false; +#endif + LogRel(("GIM: HyperV: Disabled TSC page\n")); + + TMR3CpuTickParavirtDisable(pVM); + return VINF_SUCCESS; + } + return VERR_GIM_PVTSC_NOT_ENABLED; +} + + +/** + * Disables the Hyper-V Hypercall page. + * + * @returns VBox status code. + */ +VMMR3_INT_DECL(int) gimR3HvDisableHypercallPage(PVM pVM) +{ + PGIMHV pHv = &pVM->gim.s.u.Hv; + PGIMMMIO2REGION pRegion = &pHv->aMmio2Regions[GIM_HV_HYPERCALL_PAGE_REGION_IDX]; + if (pRegion->fMapped) + { +#if 0 + gimR3Mmio2Unmap(pVM, pRegion); + Assert(!pRegion->fMapped); +#else + pRegion->fMapped = false; +#endif + LogRel(("GIM: HyperV: Disabled Hypercall-page\n")); + return VINF_SUCCESS; + } + return VERR_GIM_HYPERCALLS_NOT_ENABLED; +} + + +/** + * Enables the Hyper-V Hypercall page. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param GCPhysHypercallPage Where to map the hypercall page. + */ +VMMR3_INT_DECL(int) gimR3HvEnableHypercallPage(PVM pVM, RTGCPHYS GCPhysHypercallPage) +{ + PPDMDEVINSR3 pDevIns = pVM->gim.s.pDevInsR3; + PGIMMMIO2REGION pRegion = &pVM->gim.s.u.Hv.aMmio2Regions[GIM_HV_HYPERCALL_PAGE_REGION_IDX]; + AssertPtrReturn(pDevIns, VERR_GIM_DEVICE_NOT_REGISTERED); + + if (pRegion->fMapped) + { + /* + * Is it already enabled at the given guest-address? + */ + if (pRegion->GCPhysPage == GCPhysHypercallPage) + return VINF_SUCCESS; + + /* + * If it's mapped at a different address, unmap the previous address. + */ + int rc2 = gimR3HvDisableHypercallPage(pVM); + AssertRC(rc2); + } + + /* + * Map the hypercall-page at the specified address. + */ + Assert(!pRegion->fMapped); + + /** @todo this is buggy when large pages are used due to a PGM limitation, see + * @bugref{7532}. Instead of the overlay style mapping, we just + * rewrite guest memory directly. */ +#if 0 + int rc = gimR3Mmio2Map(pVM, pRegion, GCPhysHypercallPage); + if (RT_SUCCESS(rc)) + { + Assert(pRegion->GCPhysPage == GCPhysHypercallPage); + + /* + * Patch the hypercall-page. + */ + size_t cbWritten = 0; + rc = VMMPatchHypercall(pVM, pRegion->pvPageR3, PAGE_SIZE, &cbWritten); + if ( RT_SUCCESS(rc) + && cbWritten < PAGE_SIZE) + { + uint8_t *pbLast = (uint8_t *)pRegion->pvPageR3 + cbWritten; + *pbLast = 0xc3; /* RET */ + + /* + * Notify VMM that hypercalls are now enabled for all VCPUs. + */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + VMMHypercallsEnable(&pVM->aCpus[i]); + + LogRel(("GIM: HyperV: Enabled hypercall page at %#RGp\n", GCPhysHypercallPage)); + return VINF_SUCCESS; + } + else + { + if (rc == VINF_SUCCESS) + rc = VERR_GIM_OPERATION_FAILED; + LogRel(("GIM: HyperV: VMMPatchHypercall failed. rc=%Rrc cbWritten=%u\n", rc, cbWritten)); + } + + gimR3Mmio2Unmap(pVM, pRegion); + } + + LogRel(("GIM: HyperV: gimR3Mmio2Map failed. rc=%Rrc\n", rc)); + return rc; +#else + AssertReturn(pRegion->cbRegion == PAGE_SIZE, VERR_GIM_IPE_3); + void *pvHypercallPage = RTMemAllocZ(PAGE_SIZE); + if (RT_UNLIKELY(!pvHypercallPage)) + { + LogRelFunc(("Failed to alloc %u bytes\n", PAGE_SIZE)); + return VERR_NO_MEMORY; + } + + /* + * Patch the hypercall-page. + */ + size_t cbHypercall = 0; + int rc = GIMQueryHypercallOpcodeBytes(pVM, pvHypercallPage, PAGE_SIZE, &cbHypercall, NULL /*puDisOpcode*/); + if ( RT_SUCCESS(rc) + && cbHypercall < PAGE_SIZE) + { + uint8_t *pbLast = (uint8_t *)pvHypercallPage + cbHypercall; + *pbLast = 0xc3; /* RET */ + + rc = PGMPhysSimpleWriteGCPhys(pVM, GCPhysHypercallPage, pvHypercallPage, PAGE_SIZE); + if (RT_SUCCESS(rc)) + { + pRegion->GCPhysPage = GCPhysHypercallPage; + pRegion->fMapped = true; + LogRel(("GIM: HyperV: Enabled hypercall page at %#RGp\n", GCPhysHypercallPage)); + } + else + LogRel(("GIM: HyperV: PGMPhysSimpleWriteGCPhys failed during hypercall page setup. rc=%Rrc\n", rc)); + } + else + { + if (rc == VINF_SUCCESS) + rc = VERR_GIM_OPERATION_FAILED; + LogRel(("GIM: HyperV: VMMPatchHypercall failed. rc=%Rrc cbHypercall=%u\n", rc, cbHypercall)); + } + + RTMemFree(pvHypercallPage); + return rc; +#endif +} + + +/** + * Initializes Hyper-V guest hypercall support. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +static int gimR3HvInitHypercallSupport(PVM pVM) +{ + PGIMHV pHv = &pVM->gim.s.u.Hv; + pHv->pbHypercallIn = (uint8_t *)RTMemAllocZ(GIM_HV_PAGE_SIZE); + if (RT_LIKELY(pHv->pbHypercallIn)) + { + pHv->pbHypercallOut = (uint8_t *)RTMemAllocZ(GIM_HV_PAGE_SIZE); + if (RT_LIKELY(pHv->pbHypercallOut)) + return VINF_SUCCESS; + RTMemFree(pHv->pbHypercallIn); + } + return VERR_NO_MEMORY; +} + + +/** + * Terminates Hyper-V guest hypercall support. + * + * @param pVM The cross context VM structure. + */ +static void gimR3HvTermHypercallSupport(PVM pVM) +{ + PGIMHV pHv = &pVM->gim.s.u.Hv; + RTMemFree(pHv->pbHypercallIn); + pHv->pbHypercallIn = NULL; + + RTMemFree(pHv->pbHypercallOut); + pHv->pbHypercallOut = NULL; +} + + +/** + * Initializes Hyper-V guest debug support. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +static int gimR3HvInitDebugSupport(PVM pVM) +{ + PGIMHV pHv = &pVM->gim.s.u.Hv; + if ( (pHv->uPartFlags & GIM_HV_PART_FLAGS_DEBUGGING) + || pHv->fIsInterfaceVs) + { + pHv->fDbgEnabled = true; + pHv->pvDbgBuffer = RTMemAllocZ(PAGE_SIZE); + if (!pHv->pvDbgBuffer) + return VERR_NO_MEMORY; + } + return VINF_SUCCESS; +} + + +/** + * Terminates Hyper-V guest debug support. + * + * @param pVM The cross context VM structure. + */ +static void gimR3HvTermDebugSupport(PVM pVM) +{ + PGIMHV pHv = &pVM->gim.s.u.Hv; + if (pHv->pvDbgBuffer) + { + RTMemFree(pHv->pvDbgBuffer); + pHv->pvDbgBuffer = NULL; + } +} + + +/** + * Reads data from a debugger connection, asynchronous. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pvBuf Where to read the data. + * @param cbBuf Size of the read buffer @a pvBuf, must be >= @a cbRead. + * @param cbRead Number of bytes to read. + * @param pcbRead Where to store how many bytes were really read. + * @param cMsTimeout Timeout of the read operation in milliseconds. + * @param fUdpPkt Whether the debug data returned in @a pvBuf needs to be + * encapsulated in a UDP frame. + * + * @thread EMT. + */ +VMMR3_INT_DECL(int) gimR3HvDebugRead(PVM pVM, void *pvBuf, uint32_t cbBuf, uint32_t cbRead, uint32_t *pcbRead, + uint32_t cMsTimeout, bool fUdpPkt) +{ + NOREF(cMsTimeout); /** @todo implement timeout. */ + AssertCompile(sizeof(size_t) >= sizeof(uint32_t)); + AssertReturn(cbBuf >= cbRead, VERR_INVALID_PARAMETER); + + int rc; + if (!fUdpPkt) + { + /* + * Read the raw debug data. + */ + size_t cbReallyRead = cbRead; + rc = gimR3DebugRead(pVM, pvBuf, &cbReallyRead, gimR3HvDebugBufReadCompleted); + *pcbRead = (uint32_t)cbReallyRead; + } + else + { + /* + * Guest requires UDP encapsulated frames. + */ + PGIMHV pHv = &pVM->gim.s.u.Hv; + rc = VERR_GIM_IPE_1; + switch (pHv->enmDbgReply) + { + case GIMHVDEBUGREPLY_UDP: + { + size_t cbReallyRead = cbRead; + rc = gimR3DebugRead(pVM, pvBuf, &cbReallyRead, gimR3HvDebugBufReadCompleted); + if ( RT_SUCCESS(rc) + && cbReallyRead > 0) + { + uint8_t abFrame[sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + sizeof(RTNETUDP)]; + if (cbReallyRead + sizeof(abFrame) <= cbBuf) + { + /* + * Windows guests pumps ethernet frames over the Hyper-V debug connection as + * explained in gimR3HvHypercallPostDebugData(). Here, we reconstruct the packet + * with the guest's self-chosen IP ARP address we saved in pHv->DbgGuestAddr. + * + * Note! We really need to pass the minimum IPv4 header length. The Windows 10 guest + * is -not- happy if we include the IPv4 options field, i.e. using sizeof(RTNETIPV4) + * instead of RTNETIPV4_MIN_LEN. + */ + RT_ZERO(abFrame); + PRTNETETHERHDR pEthHdr = (PRTNETETHERHDR)&abFrame[0]; + PRTNETIPV4 pIpHdr = (PRTNETIPV4) (pEthHdr + 1); + PRTNETUDP pUdpHdr = (PRTNETUDP) ((uint8_t *)pIpHdr + RTNETIPV4_MIN_LEN); + + /* Ethernet */ + pEthHdr->EtherType = RT_H2N_U16_C(RTNET_ETHERTYPE_IPV4); + /* IPv4 */ + pIpHdr->ip_v = 4; + pIpHdr->ip_hl = RTNETIPV4_MIN_LEN / sizeof(uint32_t); + pIpHdr->ip_tos = 0; + pIpHdr->ip_len = RT_H2N_U16((uint16_t)cbReallyRead + sizeof(RTNETUDP) + RTNETIPV4_MIN_LEN); + pIpHdr->ip_id = 0; + pIpHdr->ip_off = 0; + pIpHdr->ip_ttl = 255; + pIpHdr->ip_p = RTNETIPV4_PROT_UDP; + pIpHdr->ip_sum = 0; + pIpHdr->ip_src.u = 0; + pIpHdr->ip_dst.u = pHv->DbgGuestIp4Addr.u; + pIpHdr->ip_sum = RTNetIPv4HdrChecksum(pIpHdr); + /* UDP */ + pUdpHdr->uh_dport = pHv->uUdpGuestSrcPort; + pUdpHdr->uh_sport = pHv->uUdpGuestDstPort; + pUdpHdr->uh_ulen = RT_H2N_U16_C((uint16_t)cbReallyRead + sizeof(*pUdpHdr)); + + /* Make room by moving the payload and prepending the headers. */ + uint8_t *pbData = (uint8_t *)pvBuf; + memmove(pbData + sizeof(abFrame), pbData, cbReallyRead); + memcpy(pbData, &abFrame[0], sizeof(abFrame)); + + /* Update the adjusted sizes. */ + cbReallyRead += sizeof(abFrame); + } + else + rc = VERR_BUFFER_UNDERFLOW; + } + *pcbRead = (uint32_t)cbReallyRead; + break; + } + + case GIMHVDEBUGREPLY_ARP_REPLY: + { + uint32_t const cbArpReplyPkt = sizeof(g_abArpReply); + if (cbBuf >= cbArpReplyPkt) + { + memcpy(pvBuf, g_abArpReply, cbArpReplyPkt); + rc = VINF_SUCCESS; + *pcbRead = cbArpReplyPkt; + pHv->enmDbgReply = GIMHVDEBUGREPLY_ARP_REPLY_SENT; + } + else + { + rc = VERR_BUFFER_UNDERFLOW; + *pcbRead = 0; + } + break; + } + + case GIMHVDEBUGREPLY_DHCP_OFFER: + { + uint32_t const cbDhcpOfferPkt = sizeof(g_abDhcpOffer); + if (cbBuf >= cbDhcpOfferPkt) + { + memcpy(pvBuf, g_abDhcpOffer, cbDhcpOfferPkt); + PRTNETETHERHDR pEthHdr = (PRTNETETHERHDR)pvBuf; + PRTNETIPV4 pIpHdr = (PRTNETIPV4) (pEthHdr + 1); + PRTNETUDP pUdpHdr = (PRTNETUDP) ((uint8_t *)pIpHdr + RTNETIPV4_MIN_LEN); + PRTNETBOOTP pBootpHdr = (PRTNETBOOTP) (pUdpHdr + 1); + pBootpHdr->bp_xid = pHv->uDbgBootpXId; + + rc = VINF_SUCCESS; + *pcbRead = cbDhcpOfferPkt; + pHv->enmDbgReply = GIMHVDEBUGREPLY_DHCP_OFFER_SENT; + LogRel(("GIM: HyperV: Debug DHCP offered IP address %RTnaipv4, transaction Id %#x\n", pBootpHdr->bp_yiaddr, + RT_N2H_U32(pHv->uDbgBootpXId))); + } + else + { + rc = VERR_BUFFER_UNDERFLOW; + *pcbRead = 0; + } + break; + } + + case GIMHVDEBUGREPLY_DHCP_ACK: + { + uint32_t const cbDhcpAckPkt = sizeof(g_abDhcpAck); + if (cbBuf >= cbDhcpAckPkt) + { + memcpy(pvBuf, g_abDhcpAck, cbDhcpAckPkt); + PRTNETETHERHDR pEthHdr = (PRTNETETHERHDR)pvBuf; + PRTNETIPV4 pIpHdr = (PRTNETIPV4) (pEthHdr + 1); + PRTNETUDP pUdpHdr = (PRTNETUDP) ((uint8_t *)pIpHdr + RTNETIPV4_MIN_LEN); + PRTNETBOOTP pBootpHdr = (PRTNETBOOTP) (pUdpHdr + 1); + pBootpHdr->bp_xid = pHv->uDbgBootpXId; + + rc = VINF_SUCCESS; + *pcbRead = cbDhcpAckPkt; + pHv->enmDbgReply = GIMHVDEBUGREPLY_DHCP_ACK_SENT; + LogRel(("GIM: HyperV: Debug DHCP acknowledged IP address %RTnaipv4, transaction Id %#x\n", + pBootpHdr->bp_yiaddr, RT_N2H_U32(pHv->uDbgBootpXId))); + } + else + { + rc = VERR_BUFFER_UNDERFLOW; + *pcbRead = 0; + } + break; + } + + case GIMHVDEBUGREPLY_ARP_REPLY_SENT: + case GIMHVDEBUGREPLY_DHCP_OFFER_SENT: + case GIMHVDEBUGREPLY_DHCP_ACK_SENT: + { + rc = VINF_SUCCESS; + *pcbRead = 0; + break; + } + + default: + { + AssertMsgFailed(("GIM: HyperV: Invalid/unimplemented debug reply type %u\n", pHv->enmDbgReply)); + rc = VERR_INTERNAL_ERROR_2; + } + } + Assert(rc != VERR_GIM_IPE_1); + +#ifdef DEBUG_ramshankar + if ( rc == VINF_SUCCESS + && *pcbRead > 0) + { + RTSOCKET hSocket; + int rc2 = RTUdpCreateClientSocket("localhost", 52000, NULL, &hSocket); + if (RT_SUCCESS(rc2)) + { + size_t cbTmpWrite = *pcbRead; + RTSocketWriteNB(hSocket, pvBuf, *pcbRead, &cbTmpWrite); NOREF(cbTmpWrite); + RTSocketClose(hSocket); + } + } +#endif + } + + return rc; +} + + +/** + * Writes data to the debugger connection, asynchronous. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pvData Pointer to the data to be written. + * @param cbWrite Size of the write buffer @a pvData. + * @param pcbWritten Where to store the number of bytes written. + * @param fUdpPkt Whether the debug data in @a pvData is encapsulated in a + * UDP frame. + * + * @thread EMT. + */ +VMMR3_INT_DECL(int) gimR3HvDebugWrite(PVM pVM, void *pvData, uint32_t cbWrite, uint32_t *pcbWritten, bool fUdpPkt) +{ + Assert(cbWrite > 0); + + PGIMHV pHv = &pVM->gim.s.u.Hv; + bool fIgnorePkt = false; + uint8_t *pbData = (uint8_t *)pvData; + if (fUdpPkt) + { +#ifdef DEBUG_ramshankar + RTSOCKET hSocket; + int rc2 = RTUdpCreateClientSocket("localhost", 52000, NULL, &hSocket); + if (RT_SUCCESS(rc2)) + { + size_t cbTmpWrite = cbWrite; + RTSocketWriteNB(hSocket, pbData, cbWrite, &cbTmpWrite); NOREF(cbTmpWrite); + RTSocketClose(hSocket); + } +#endif + /* + * Windows guests sends us ethernet frames over the Hyper-V debug connection. + * It sends DHCP/ARP queries with zero'd out MAC addresses and requires fudging up the + * packets somewhere. + * + * The Microsoft WinDbg debugger talks UDP and thus only expects the actual debug + * protocol payload. + * + * If the guest is configured with the "nodhcp" option it sends ARP queries with + * a self-chosen IP and after a couple of attempts of receiving no replies, the guest + * picks its own IP address. After this, the guest starts sending the UDP packets + * we require. We thus ignore the initial ARP packets until the guest eventually + * starts talking UDP. Then we can finally feed the UDP payload over the debug + * connection. + * + * When 'kdvm.dll' is the debug transport in the guest (Windows 7), it doesn't bother + * with this DHCP/ARP phase. It starts sending debug data in a UDP frame right away. + */ + if (cbWrite > sizeof(RTNETETHERHDR)) + { + PCRTNETETHERHDR pEtherHdr = (PCRTNETETHERHDR)pbData; + if (pEtherHdr->EtherType == RT_H2N_U16_C(RTNET_ETHERTYPE_IPV4)) + { + if (cbWrite > sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN) + { + size_t const cbMaxIpHdr = cbWrite - sizeof(RTNETETHERHDR) - sizeof(RTNETUDP) - 1; + size_t const cbMaxIpPkt = cbWrite - sizeof(RTNETETHERHDR); + PCRTNETIPV4 pIp4Hdr = (PCRTNETIPV4)(pbData + sizeof(RTNETETHERHDR)); + bool const fValidIp4 = RTNetIPv4IsHdrValid(pIp4Hdr, cbMaxIpHdr, cbMaxIpPkt, false /*fChecksum*/); + if ( fValidIp4 + && pIp4Hdr->ip_p == RTNETIPV4_PROT_UDP) + { + uint32_t const cbIpHdr = pIp4Hdr->ip_hl * 4; + uint32_t const cbMaxUdpPkt = cbWrite - sizeof(RTNETETHERHDR) - cbIpHdr; + PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uint8_t *)pIp4Hdr + cbIpHdr); + if ( pUdpHdr->uh_ulen > RT_H2N_U16(sizeof(RTNETUDP)) + && pUdpHdr->uh_ulen <= RT_H2N_U16((uint16_t)cbMaxUdpPkt)) + { + /* + * Check for DHCP. + */ + bool fBuggyPkt = false; + size_t const cbUdpPkt = cbMaxIpPkt - cbIpHdr; + if ( pUdpHdr->uh_dport == RT_N2H_U16_C(RTNETIPV4_PORT_BOOTPS) + && pUdpHdr->uh_sport == RT_N2H_U16_C(RTNETIPV4_PORT_BOOTPC)) + { + PCRTNETBOOTP pDhcpPkt = (PCRTNETBOOTP)(pUdpHdr + 1); + uint8_t bMsgType; + if ( cbMaxIpPkt >= cbIpHdr + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN + && RTNetIPv4IsDHCPValid(pUdpHdr, pDhcpPkt, cbUdpPkt - sizeof(*pUdpHdr), &bMsgType)) + { + switch (bMsgType) + { + case RTNET_DHCP_MT_DISCOVER: + pHv->enmDbgReply = GIMHVDEBUGREPLY_DHCP_OFFER; + pHv->uDbgBootpXId = pDhcpPkt->bp_xid; + break; + case RTNET_DHCP_MT_REQUEST: + pHv->enmDbgReply = GIMHVDEBUGREPLY_DHCP_ACK; + pHv->uDbgBootpXId = pDhcpPkt->bp_xid; + break; + default: + LogRelMax(5, ("GIM: HyperV: Debug DHCP MsgType %#x not implemented! Packet dropped\n", + bMsgType)); + break; + } + fIgnorePkt = true; + } + else if ( pIp4Hdr->ip_src.u == GIMHV_DEBUGCLIENT_IPV4 + && pIp4Hdr->ip_dst.u == 0) + { + /* + * Windows 8.1 seems to be sending malformed BOOTP packets at the final stage of the + * debugger sequence. It appears that a previously sent DHCP request buffer wasn't cleared + * in the guest and they re-use it instead of sending a zero destination+source port packet + * as expected below. + * + * We workaround Microsoft's bug here, or at least, I'm classifying it as a bug to + * preserve my own sanity, see @bugref{8006#c54}. + */ + fBuggyPkt = true; + } + } + + if ( ( !pUdpHdr->uh_dport + && !pUdpHdr->uh_sport) + || fBuggyPkt) + { + /* + * Extract the UDP payload and pass it to the debugger and record the guest IP address. + * + * Hyper-V sends UDP debugger packets with source and destination port as 0 except in the + * aforementioned buggy case. The buggy packet case requires us to remember the ports and + * reply to them, otherwise the guest won't receive the replies we sent with port 0. + */ + uint32_t const cbFrameHdr = sizeof(RTNETETHERHDR) + cbIpHdr + sizeof(RTNETUDP); + pbData += cbFrameHdr; + cbWrite -= cbFrameHdr; + pHv->DbgGuestIp4Addr.u = pIp4Hdr->ip_src.u; + pHv->uUdpGuestDstPort = pUdpHdr->uh_dport; + pHv->uUdpGuestSrcPort = pUdpHdr->uh_sport; + pHv->enmDbgReply = GIMHVDEBUGREPLY_UDP; + } + else + { + LogFlow(("GIM: HyperV: Ignoring UDP packet SourcePort=%u DstPort=%u\n", pUdpHdr->uh_sport, + pUdpHdr->uh_dport)); + fIgnorePkt = true; + } + } + else + { + LogFlow(("GIM: HyperV: Ignoring malformed UDP packet. cbMaxUdpPkt=%u UdpPkt.len=%u\n", cbMaxUdpPkt, + RT_N2H_U16(pUdpHdr->uh_ulen))); + fIgnorePkt = true; + } + } + else + { + LogFlow(("GIM: HyperV: Ignoring non-IP / non-UDP packet. fValidIp4=%RTbool Proto=%u\n", fValidIp4, + pIp4Hdr->ip_p)); + fIgnorePkt = true; + } + } + else + { + LogFlow(("GIM: HyperV: Ignoring IPv4 packet; too short to be valid UDP. cbWrite=%u\n", cbWrite)); + fIgnorePkt = true; + } + } + else if (pEtherHdr->EtherType == RT_H2N_U16_C(RTNET_ETHERTYPE_ARP)) + { + /* + * Check for targetted ARP query. + */ + PCRTNETARPHDR pArpHdr = (PCRTNETARPHDR)(pbData + sizeof(RTNETETHERHDR)); + if ( pArpHdr->ar_hlen == sizeof(RTMAC) + && pArpHdr->ar_plen == sizeof(RTNETADDRIPV4) + && pArpHdr->ar_htype == RT_H2N_U16(RTNET_ARP_ETHER) + && pArpHdr->ar_ptype == RT_H2N_U16(RTNET_ETHERTYPE_IPV4)) + { + uint16_t uArpOp = pArpHdr->ar_oper; + if (uArpOp == RT_H2N_U16_C(RTNET_ARPOP_REQUEST)) + { + PCRTNETARPIPV4 pArpPkt = (PCRTNETARPIPV4)pArpHdr; + bool fGratuitous = pArpPkt->ar_spa.u == pArpPkt->ar_tpa.u; + if ( !fGratuitous + && pArpPkt->ar_spa.u == GIMHV_DEBUGCLIENT_IPV4 + && pArpPkt->ar_tpa.u == GIMHV_DEBUGSERVER_IPV4) + { + pHv->enmDbgReply = GIMHVDEBUGREPLY_ARP_REPLY; + } + } + } + fIgnorePkt = true; + } + else + { + LogFlow(("GIM: HyperV: Ignoring non-IP packet. Ethertype=%#x\n", RT_N2H_U16(pEtherHdr->EtherType))); + fIgnorePkt = true; + } + } + } + + if (!fIgnorePkt) + { + AssertCompile(sizeof(size_t) >= sizeof(uint32_t)); + size_t cbWriteBuf = cbWrite; + int rc = gimR3DebugWrite(pVM, pbData, &cbWriteBuf); + if ( RT_SUCCESS(rc) + && cbWriteBuf == cbWrite) + *pcbWritten = (uint32_t)cbWriteBuf; + else + *pcbWritten = 0; + } + else + *pcbWritten = cbWrite; + + return VINF_SUCCESS; +} + + +/** + * Performs the HvPostDebugData hypercall. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param prcHv Where to store the result of the hypercall operation. + * + * @thread EMT. + */ +VMMR3_INT_DECL(int) gimR3HvHypercallPostDebugData(PVM pVM, int *prcHv) +{ + AssertPtr(pVM); + AssertPtr(prcHv); + PGIMHV pHv = &pVM->gim.s.u.Hv; + int rcHv = GIM_HV_STATUS_OPERATION_DENIED; + + /* + * Grab the parameters. + */ + PGIMHVDEBUGPOSTIN pIn = (PGIMHVDEBUGPOSTIN)pHv->pbHypercallIn; + AssertPtrReturn(pIn, VERR_GIM_IPE_1); + uint32_t cbWrite = pIn->cbWrite; + uint32_t fFlags = pIn->fFlags; + uint8_t *pbData = ((uint8_t *)pIn) + sizeof(PGIMHVDEBUGPOSTIN); + + PGIMHVDEBUGPOSTOUT pOut = (PGIMHVDEBUGPOSTOUT)pHv->pbHypercallOut; + + /* + * Perform the hypercall. + */ +#if 0 + /* Currently disabled as Windows 10 guest passes us undocumented flags. */ + if (fFlags & ~GIM_HV_DEBUG_POST_OPTIONS_MASK)) + rcHv = GIM_HV_STATUS_INVALID_PARAMETER; +#else + RT_NOREF1(fFlags); +#endif + if (cbWrite > GIM_HV_DEBUG_MAX_DATA_SIZE) + rcHv = GIM_HV_STATUS_INVALID_PARAMETER; + else if (!cbWrite) + { + rcHv = GIM_HV_STATUS_SUCCESS; + pOut->cbPending = 0; + } + else if (cbWrite > 0) + { + uint32_t cbWritten = 0; + int rc2 = gimR3HvDebugWrite(pVM, pbData, cbWrite, &cbWritten, pHv->fIsVendorMsHv /*fUdpPkt*/); + if ( RT_SUCCESS(rc2) + && cbWritten == cbWrite) + { + pOut->cbPending = 0; + rcHv = GIM_HV_STATUS_SUCCESS; + } + else + rcHv = GIM_HV_STATUS_INSUFFICIENT_BUFFER; + } + + /* + * Update the guest memory with result. + */ + int rc = PGMPhysSimpleWriteGCPhys(pVM, pHv->GCPhysHypercallOut, pHv->pbHypercallOut, sizeof(GIMHVDEBUGPOSTOUT)); + if (RT_FAILURE(rc)) + { + LogRelMax(10, ("GIM: HyperV: HvPostDebugData failed to update guest memory. rc=%Rrc\n", rc)); + rc = VERR_GIM_HYPERCALL_MEMORY_WRITE_FAILED; + } + else + Assert(rc == VINF_SUCCESS); + + *prcHv = rcHv; + return rc; +} + + +/** + * Performs the HvRetrieveDebugData hypercall. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param prcHv Where to store the result of the hypercall operation. + * + * @thread EMT. + */ +VMMR3_INT_DECL(int) gimR3HvHypercallRetrieveDebugData(PVM pVM, int *prcHv) +{ + AssertPtr(pVM); + AssertPtr(prcHv); + PGIMHV pHv = &pVM->gim.s.u.Hv; + int rcHv = GIM_HV_STATUS_OPERATION_DENIED; + + /* + * Grab the parameters. + */ + PGIMHVDEBUGRETRIEVEIN pIn = (PGIMHVDEBUGRETRIEVEIN)pHv->pbHypercallIn; + AssertPtrReturn(pIn, VERR_GIM_IPE_1); + uint32_t cbRead = pIn->cbRead; + uint32_t fFlags = pIn->fFlags; + uint64_t uTimeout = pIn->u64Timeout; + uint32_t cMsTimeout = (fFlags & GIM_HV_DEBUG_RETREIVE_LOOP) ? (uTimeout * 100) / RT_NS_1MS_64 : 0; + + PGIMHVDEBUGRETRIEVEOUT pOut = (PGIMHVDEBUGRETRIEVEOUT)pHv->pbHypercallOut; + AssertPtrReturn(pOut, VERR_GIM_IPE_2); + uint32_t *pcbReallyRead = &pOut->cbRead; + uint32_t *pcbRemainingRead = &pOut->cbRemaining; + void *pvData = ((uint8_t *)pOut) + sizeof(GIMHVDEBUGRETRIEVEOUT); + + /* + * Perform the hypercall. + */ + *pcbReallyRead = 0; + *pcbRemainingRead = cbRead; +#if 0 + /* Currently disabled as Windows 10 guest passes us undocumented flags. */ + if (fFlags & ~GIM_HV_DEBUG_RETREIVE_OPTIONS_MASK) + rcHv = GIM_HV_STATUS_INVALID_PARAMETER; +#endif + if (cbRead > GIM_HV_DEBUG_MAX_DATA_SIZE) + rcHv = GIM_HV_STATUS_INVALID_PARAMETER; + else if (fFlags & GIM_HV_DEBUG_RETREIVE_TEST_ACTIVITY) + rcHv = GIM_HV_STATUS_SUCCESS; /** @todo implement this. */ + else if (!cbRead) + rcHv = GIM_HV_STATUS_SUCCESS; + else if (cbRead > 0) + { + int rc2 = gimR3HvDebugRead(pVM, pvData, GIM_HV_PAGE_SIZE, cbRead, pcbReallyRead, cMsTimeout, + pHv->fIsVendorMsHv /*fUdpPkt*/); + Assert(*pcbReallyRead <= cbRead); + if ( RT_SUCCESS(rc2) + && *pcbReallyRead > 0) + { + *pcbRemainingRead = cbRead - *pcbReallyRead; + rcHv = GIM_HV_STATUS_SUCCESS; + } + else + rcHv = GIM_HV_STATUS_NO_DATA; + } + + /* + * Update the guest memory with result. + */ + int rc = PGMPhysSimpleWriteGCPhys(pVM, pHv->GCPhysHypercallOut, pHv->pbHypercallOut, + sizeof(GIMHVDEBUGRETRIEVEOUT) + *pcbReallyRead); + if (RT_FAILURE(rc)) + { + LogRelMax(10, ("GIM: HyperV: HvRetrieveDebugData failed to update guest memory. rc=%Rrc\n", rc)); + rc = VERR_GIM_HYPERCALL_MEMORY_WRITE_FAILED; + } + else + Assert(rc == VINF_SUCCESS); + + *prcHv = rcHv; + return rc; +} + + +/** + * Performs the HvExtCallQueryCapabilities extended hypercall. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param prcHv Where to store the result of the hypercall operation. + * + * @thread EMT. + */ +VMMR3_INT_DECL(int) gimR3HvHypercallExtQueryCap(PVM pVM, int *prcHv) +{ + AssertPtr(pVM); + AssertPtr(prcHv); + PGIMHV pHv = &pVM->gim.s.u.Hv; + + /* + * Grab the parameters. + */ + PGIMHVEXTQUERYCAP pOut = (PGIMHVEXTQUERYCAP)pHv->pbHypercallOut; + + /* + * Perform the hypercall. + */ + pOut->fCapabilities = GIM_HV_EXT_HYPERCALL_CAP_ZERO_MEM; + + /* + * Update the guest memory with result. + */ + int rcHv; + int rc = PGMPhysSimpleWriteGCPhys(pVM, pHv->GCPhysHypercallOut, pHv->pbHypercallOut, sizeof(GIMHVEXTQUERYCAP)); + if (RT_SUCCESS(rc)) + { + rcHv = GIM_HV_STATUS_SUCCESS; + LogRel(("GIM: HyperV: Queried extended hypercall capabilities %#RX64 at %#RGp\n", pOut->fCapabilities, + pHv->GCPhysHypercallOut)); + } + else + { + rcHv = GIM_HV_STATUS_OPERATION_DENIED; + LogRelMax(10, ("GIM: HyperV: HvHypercallExtQueryCap failed to update guest memory. rc=%Rrc\n", rc)); + rc = VERR_GIM_HYPERCALL_MEMORY_WRITE_FAILED; + } + + *prcHv = rcHv; + return rc; +} + + +/** + * Performs the HvExtCallGetBootZeroedMemory extended hypercall. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param prcHv Where to store the result of the hypercall operation. + * + * @thread EMT. + */ +VMMR3_INT_DECL(int) gimR3HvHypercallExtGetBootZeroedMem(PVM pVM, int *prcHv) +{ + AssertPtr(pVM); + AssertPtr(prcHv); + PGIMHV pHv = &pVM->gim.s.u.Hv; + + /* + * Grab the parameters. + */ + PGIMHVEXTGETBOOTZEROMEM pOut = (PGIMHVEXTGETBOOTZEROMEM)pHv->pbHypercallOut; + + /* + * Perform the hypercall. + */ + uint32_t const cRanges = PGMR3PhysGetRamRangeCount(pVM); + pOut->cPages = 0; + for (uint32_t iRange = 0; iRange < cRanges; iRange++) + { + RTGCPHYS GCPhysStart; + RTGCPHYS GCPhysEnd; + int rc = PGMR3PhysGetRange(pVM, iRange, &GCPhysStart, &GCPhysEnd, NULL /* pszDesc */, NULL /* fIsMmio */); + if (RT_FAILURE(rc)) + { + LogRelMax(10, ("GIM: HyperV: HvHypercallExtGetBootZeroedMem: PGMR3PhysGetRange failed for iRange(%u) rc=%Rrc\n", + iRange, rc)); + *prcHv = GIM_HV_STATUS_OPERATION_DENIED; + return rc; + } + + RTGCPHYS const cbRange = RT_ALIGN(GCPhysEnd - GCPhysStart + 1, PAGE_SIZE); + pOut->cPages += cbRange >> GIM_HV_PAGE_SHIFT; + if (iRange == 0) + pOut->GCPhysStart = GCPhysStart; + } + + /* + * Update the guest memory with result. + */ + int rcHv; + int rc = PGMPhysSimpleWriteGCPhys(pVM, pHv->GCPhysHypercallOut, pHv->pbHypercallOut, sizeof(GIMHVEXTGETBOOTZEROMEM)); + if (RT_SUCCESS(rc)) + { + LogRel(("GIM: HyperV: Queried boot zeroed guest memory range (starting at %#RGp spanning %u pages) at %#RGp\n", + pOut->GCPhysStart, pOut->cPages, pHv->GCPhysHypercallOut)); + rcHv = GIM_HV_STATUS_SUCCESS; + } + else + { + rcHv = GIM_HV_STATUS_OPERATION_DENIED; + LogRelMax(10, ("GIM: HyperV: HvHypercallExtGetBootZeroedMem failed to update guest memory. rc=%Rrc\n", rc)); + rc = VERR_GIM_HYPERCALL_MEMORY_WRITE_FAILED; + } + + *prcHv = rcHv; + return rc; +} + diff --git a/src/VBox/VMM/VMMR3/GIMKvm.cpp b/src/VBox/VMM/VMMR3/GIMKvm.cpp new file mode 100644 index 00000000..58ae0d54 --- /dev/null +++ b/src/VBox/VMM/VMMR3/GIMKvm.cpp @@ -0,0 +1,535 @@ +/* $Id: GIMKvm.cpp $ */ +/** @file + * GIM - Guest Interface Manager, KVM implementation. + */ + +/* + * Copyright (C) 2015-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_GIM +#include +#include +#include +#include +#include +#include +#include "GIMInternal.h" +#include + +#include +#include +#include + +#include +#include +#include +#include + + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ + +/** + * GIM KVM saved-state version. + */ +#define GIM_KVM_SAVED_STATE_VERSION UINT32_C(1) + +/** + * VBox internal struct. to passback to EMT rendezvous callback while enabling + * the KVM wall-clock. + */ +typedef struct KVMWALLCLOCKINFO +{ + /** Guest physical address of the wall-clock struct. */ + RTGCPHYS GCPhysWallClock; +} KVMWALLCLOCKINFO; +/** Pointer to the wall-clock info. struct. */ +typedef KVMWALLCLOCKINFO *PKVMWALLCLOCKINFO; + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +#ifdef VBOX_WITH_STATISTICS +# define GIMKVM_MSRRANGE(a_uFirst, a_uLast, a_szName) \ + { (a_uFirst), (a_uLast), kCpumMsrRdFn_Gim, kCpumMsrWrFn_Gim, 0, 0, 0, 0, 0, a_szName, { 0 }, { 0 }, { 0 }, { 0 } } +#else +# define GIMKVM_MSRRANGE(a_uFirst, a_uLast, a_szName) \ + { (a_uFirst), (a_uLast), kCpumMsrRdFn_Gim, kCpumMsrWrFn_Gim, 0, 0, 0, 0, 0, a_szName } +#endif + +/** + * Array of MSR ranges supported by KVM. + */ +static CPUMMSRRANGE const g_aMsrRanges_Kvm[] = +{ + GIMKVM_MSRRANGE(MSR_GIM_KVM_RANGE0_FIRST, MSR_GIM_KVM_RANGE0_LAST, "KVM range 0"), + GIMKVM_MSRRANGE(MSR_GIM_KVM_RANGE1_FIRST, MSR_GIM_KVM_RANGE1_LAST, "KVM range 1") +}; +#undef GIMKVM_MSRRANGE + + +/** + * Initializes the KVM GIM provider. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) gimR3KvmInit(PVM pVM) +{ + AssertReturn(pVM, VERR_INVALID_PARAMETER); + AssertReturn(pVM->gim.s.enmProviderId == GIMPROVIDERID_KVM, VERR_INTERNAL_ERROR_5); + + int rc; + PGIMKVM pKvm = &pVM->gim.s.u.Kvm; + + /* + * Determine interface capabilities based on the version. + */ + if (!pVM->gim.s.u32Version) + { + /* Basic features. */ + pKvm->uBaseFeat = 0 + | GIM_KVM_BASE_FEAT_CLOCK_OLD + //| GIM_KVM_BASE_FEAT_NOP_IO_DELAY + //| GIM_KVM_BASE_FEAT_MMU_OP + | GIM_KVM_BASE_FEAT_CLOCK + //| GIM_KVM_BASE_FEAT_ASYNC_PF + //| GIM_KVM_BASE_FEAT_STEAL_TIME + //| GIM_KVM_BASE_FEAT_PV_EOI + | GIM_KVM_BASE_FEAT_PV_UNHALT + ; + /* Rest of the features are determined in gimR3KvmInitCompleted(). */ + } + + /* + * Expose HVP (Hypervisor Present) bit to the guest. + */ + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_HVP); + + /* + * Modify the standard hypervisor leaves for KVM. + */ + CPUMCPUIDLEAF HyperLeaf; + RT_ZERO(HyperLeaf); + HyperLeaf.uLeaf = UINT32_C(0x40000000); + HyperLeaf.uEax = UINT32_C(0x40000001); /* Minimum value for KVM is 0x40000001. */ + HyperLeaf.uEbx = 0x4B4D564B; /* 'KVMK' */ + HyperLeaf.uEcx = 0x564B4D56; /* 'VMKV' */ + HyperLeaf.uEdx = 0x0000004D; /* 'M000' */ + rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf); + AssertLogRelRCReturn(rc, rc); + + /* + * Add KVM specific leaves. + */ + HyperLeaf.uLeaf = UINT32_C(0x40000001); + HyperLeaf.uEax = pKvm->uBaseFeat; + HyperLeaf.uEbx = 0; /* Reserved */ + HyperLeaf.uEcx = 0; /* Reserved */ + HyperLeaf.uEdx = 0; /* Reserved */ + rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf); + AssertLogRelRCReturn(rc, rc); + + /* + * Insert all MSR ranges of KVM. + */ + for (unsigned i = 0; i < RT_ELEMENTS(g_aMsrRanges_Kvm); i++) + { + rc = CPUMR3MsrRangesInsert(pVM, &g_aMsrRanges_Kvm[i]); + AssertLogRelRCReturn(rc, rc); + } + + /* + * Setup hypercall and #UD handling. + * Note! We always need to trap VMCALL/VMMCALL hypercall using #UDs for raw-mode VMs. + */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + EMSetHypercallInstructionsEnabled(&pVM->aCpus[i], true); + + size_t cbHypercall = 0; + rc = GIMQueryHypercallOpcodeBytes(pVM, pKvm->abOpcodeNative, sizeof(pKvm->abOpcodeNative), &cbHypercall, &pKvm->uOpcodeNative); + AssertLogRelRCReturn(rc, rc); + AssertLogRelReturn(cbHypercall == sizeof(pKvm->abOpcodeNative), VERR_GIM_IPE_1); + pKvm->fTrapXcptUD = pKvm->uOpcodeNative != OP_VMCALL || VM_IS_RAW_MODE_ENABLED(pVM); + + return VINF_SUCCESS; +} + + +/** + * Initializes remaining bits of the KVM provider. + * + * This is called after initializing HM and almost all other VMM components. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) gimR3KvmInitCompleted(PVM pVM) +{ + PGIMKVM pKvm = &pVM->gim.s.u.Kvm; + pKvm->cTscTicksPerSecond = TMCpuTicksPerSecond(pVM); + + if (TMR3CpuTickIsFixedRateMonotonic(pVM, true /* fWithParavirtEnabled */)) + { + /** @todo We might want to consider just enabling this bit *always*. As far + * as I can see in the Linux guest, the "TSC_STABLE" bit is only + * translated as a "monotonic" bit which even in Async systems we + * -should- be reporting a strictly monotonic TSC to the guest. */ + pKvm->uBaseFeat |= GIM_KVM_BASE_FEAT_TSC_STABLE; + + CPUMCPUIDLEAF HyperLeaf; + RT_ZERO(HyperLeaf); + HyperLeaf.uLeaf = UINT32_C(0x40000001); + HyperLeaf.uEax = pKvm->uBaseFeat; + HyperLeaf.uEbx = 0; + HyperLeaf.uEcx = 0; + HyperLeaf.uEdx = 0; + int rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf); + AssertLogRelRCReturn(rc, rc); + } + return VINF_SUCCESS; +} + + +/** + * Terminates the KVM GIM provider. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) gimR3KvmTerm(PVM pVM) +{ + gimR3KvmReset(pVM); + return VINF_SUCCESS; +} + + +/** + * This resets KVM provider MSRs and unmaps whatever KVM regions that + * the guest may have mapped. + * + * This is called when the VM is being reset. + * + * @param pVM The cross context VM structure. + * @thread EMT(0) + */ +VMMR3_INT_DECL(void) gimR3KvmReset(PVM pVM) +{ + VM_ASSERT_EMT0(pVM); + LogRel(("GIM: KVM: Resetting MSRs\n")); + + /* + * Reset MSRs. + */ + PGIMKVM pKvm = &pVM->gim.s.u.Kvm; + pKvm->u64WallClockMsr = 0; + for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++) + { + PGIMKVMCPU pKvmCpu = &pVM->aCpus[iCpu].gim.s.u.KvmCpu; + pKvmCpu->u64SystemTimeMsr = 0; + pKvmCpu->u32SystemTimeVersion = 0; + pKvmCpu->fSystemTimeFlags = 0; + pKvmCpu->GCPhysSystemTime = 0; + pKvmCpu->uTsc = 0; + pKvmCpu->uVirtNanoTS = 0; + } +} + + +/** + * KVM state-save operation. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + */ +VMMR3_INT_DECL(int) gimR3KvmSave(PVM pVM, PSSMHANDLE pSSM) +{ + PCGIMKVM pKvm = &pVM->gim.s.u.Kvm; + + /* + * Save the KVM SSM version. + */ + SSMR3PutU32(pSSM, GIM_KVM_SAVED_STATE_VERSION); + + /* + * Save per-VCPU data. + */ + for (uint32_t i = 0; i < pVM->cCpus; i++) + { + PCGIMKVMCPU pKvmCpu = &pVM->aCpus[i].gim.s.u.KvmCpu; + SSMR3PutU64(pSSM, pKvmCpu->u64SystemTimeMsr); + SSMR3PutU64(pSSM, pKvmCpu->uTsc); + SSMR3PutU64(pSSM, pKvmCpu->uVirtNanoTS); + SSMR3PutGCPhys(pSSM, pKvmCpu->GCPhysSystemTime); + SSMR3PutU32(pSSM, pKvmCpu->u32SystemTimeVersion); + SSMR3PutU8(pSSM, pKvmCpu->fSystemTimeFlags); + } + + /* + * Save per-VM data. + */ + SSMR3PutU64(pSSM, pKvm->u64WallClockMsr); + return SSMR3PutU32(pSSM, pKvm->uBaseFeat); +} + + +/** + * KVM state-load operation, final pass. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + */ +VMMR3_INT_DECL(int) gimR3KvmLoad(PVM pVM, PSSMHANDLE pSSM) +{ + /* + * Load the KVM SSM version first. + */ + uint32_t uKvmSavedStatVersion; + int rc = SSMR3GetU32(pSSM, &uKvmSavedStatVersion); + AssertRCReturn(rc, rc); + if (uKvmSavedStatVersion != GIM_KVM_SAVED_STATE_VERSION) + return SSMR3SetLoadError(pSSM, VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION, RT_SRC_POS, + N_("Unsupported KVM saved-state version %u (expected %u)."), + uKvmSavedStatVersion, GIM_KVM_SAVED_STATE_VERSION); + + /* + * Update the TSC frequency from TM. + */ + PGIMKVM pKvm = &pVM->gim.s.u.Kvm; + pKvm->cTscTicksPerSecond = TMCpuTicksPerSecond(pVM); + + /* + * Load per-VCPU data. + */ + for (uint32_t i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + PGIMKVMCPU pKvmCpu = &pVCpu->gim.s.u.KvmCpu; + + SSMR3GetU64(pSSM, &pKvmCpu->u64SystemTimeMsr); + SSMR3GetU64(pSSM, &pKvmCpu->uTsc); + SSMR3GetU64(pSSM, &pKvmCpu->uVirtNanoTS); + SSMR3GetGCPhys(pSSM, &pKvmCpu->GCPhysSystemTime); + SSMR3GetU32(pSSM, &pKvmCpu->u32SystemTimeVersion); + rc = SSMR3GetU8(pSSM, &pKvmCpu->fSystemTimeFlags); + AssertRCReturn(rc, rc); + + /* Enable the system-time struct. if necessary. */ + /** @todo update guest struct only if cTscTicksPerSecond doesn't match host + * anymore. */ + if (MSR_GIM_KVM_SYSTEM_TIME_IS_ENABLED(pKvmCpu->u64SystemTimeMsr)) + { + Assert(!TMVirtualIsTicking(pVM)); /* paranoia. */ + Assert(!TMCpuTickIsTicking(pVCpu)); + gimR3KvmEnableSystemTime(pVM, pVCpu); + } + } + + /* + * Load per-VM data. + */ + SSMR3GetU64(pSSM, &pKvm->u64WallClockMsr); + rc = SSMR3GetU32(pSSM, &pKvm->uBaseFeat); + AssertRCReturn(rc, rc); + + return VINF_SUCCESS; +} + + +/** + * Enables the KVM VCPU system-time structure. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * + * @remarks Don't do any release assertions here, these can be triggered by + * guest R0 code. + */ +VMMR3_INT_DECL(int) gimR3KvmEnableSystemTime(PVM pVM, PVMCPU pVCpu) +{ + PGIMKVM pKvm = &pVM->gim.s.u.Kvm; + PGIMKVMCPU pKvmCpu = &pVCpu->gim.s.u.KvmCpu; + + /* + * Validate the mapping address first. + */ + if (!PGMPhysIsGCPhysNormal(pVM, pKvmCpu->GCPhysSystemTime)) + { + LogRel(("GIM: KVM: VCPU%3d: Invalid physical addr requested for mapping system-time struct. GCPhysSystemTime=%#RGp\n", + pVCpu->idCpu, pKvmCpu->GCPhysSystemTime)); + return VERR_GIM_OPERATION_FAILED; + } + + /* + * Construct the system-time struct. + */ + GIMKVMSYSTEMTIME SystemTime; + RT_ZERO(SystemTime); + SystemTime.u32Version = pKvmCpu->u32SystemTimeVersion; + SystemTime.u64NanoTS = pKvmCpu->uVirtNanoTS; + SystemTime.u64Tsc = pKvmCpu->uTsc; + SystemTime.fFlags = pKvmCpu->fSystemTimeFlags | GIM_KVM_SYSTEM_TIME_FLAGS_TSC_STABLE; + + /* + * How the guest calculates the system time (nanoseconds): + * + * tsc = rdtsc - SysTime.u64Tsc + * if (SysTime.i8TscShift >= 0) + * tsc <<= i8TscShift; + * else + * tsc >>= -i8TscShift; + * time = ((tsc * SysTime.u32TscScale) >> 32) + SysTime.u64NanoTS + */ + uint64_t u64TscFreq = pKvm->cTscTicksPerSecond; + SystemTime.i8TscShift = 0; + while (u64TscFreq > 2 * RT_NS_1SEC_64) + { + u64TscFreq >>= 1; + SystemTime.i8TscShift--; + } + uint32_t uTscFreqLo = (uint32_t)u64TscFreq; + while (uTscFreqLo <= RT_NS_1SEC) + { + uTscFreqLo <<= 1; + SystemTime.i8TscShift++; + } + SystemTime.u32TscScale = ASMDivU64ByU32RetU32(RT_NS_1SEC_64 << 32, uTscFreqLo); + + /* + * Update guest memory with the system-time struct. + */ + Assert(!(SystemTime.u32Version & UINT32_C(1))); + int rc = PGMPhysSimpleWriteGCPhys(pVM, pKvmCpu->GCPhysSystemTime, &SystemTime, sizeof(GIMKVMSYSTEMTIME)); + if (RT_SUCCESS(rc)) + { + LogRel(("GIM: KVM: VCPU%3d: Enabled system-time struct. at %#RGp - u32TscScale=%#RX32 i8TscShift=%d uVersion=%#RU32 " + "fFlags=%#x uTsc=%#RX64 uVirtNanoTS=%#RX64\n", pVCpu->idCpu, pKvmCpu->GCPhysSystemTime, SystemTime.u32TscScale, + SystemTime.i8TscShift, SystemTime.u32Version, SystemTime.fFlags, pKvmCpu->uTsc, pKvmCpu->uVirtNanoTS)); + TMR3CpuTickParavirtEnable(pVM); + } + else + LogRel(("GIM: KVM: VCPU%3d: Failed to write system-time struct. at %#RGp. rc=%Rrc\n", + pVCpu->idCpu, pKvmCpu->GCPhysSystemTime, rc)); + + return rc; +} + + +/** + * Disables the KVM system-time struct. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) gimR3KvmDisableSystemTime(PVM pVM) +{ + TMR3CpuTickParavirtDisable(pVM); + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{PFNVMMEMTRENDEZVOUS, + * Worker for gimR3KvmEnableWallClock} + */ +static DECLCALLBACK(VBOXSTRICTRC) gimR3KvmEnableWallClockCallback(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + PKVMWALLCLOCKINFO pWallClockInfo = (PKVMWALLCLOCKINFO)pvUser; AssertPtr(pWallClockInfo); + RTGCPHYS GCPhysWallClock = pWallClockInfo->GCPhysWallClock; + RT_NOREF1(pVCpu); + + /* + * Read the wall-clock version (sequence) from the guest. + */ + uint32_t uVersion; + Assert(PGMPhysIsGCPhysNormal(pVM, GCPhysWallClock)); + int rc = PGMPhysSimpleReadGCPhys(pVM, &uVersion, GCPhysWallClock, sizeof(uVersion)); + if (RT_FAILURE(rc)) + { + LogRel(("GIM: KVM: Failed to read wall-clock struct. version at %#RGp. rc=%Rrc\n", GCPhysWallClock, rc)); + return rc; + } + + /* + * Ensure the version is incrementally even. + */ + /* faster: uVersion = (uVersion | 1) + 1; */ + if (!(uVersion & 1)) + ++uVersion; + ++uVersion; + + /* + * Update wall-clock guest struct. with UTC information. + */ + RTTIMESPEC TimeSpec; + int32_t iSec; + int32_t iNano; + TMR3UtcNow(pVM, &TimeSpec); + RTTimeSpecGetSecondsAndNano(&TimeSpec, &iSec, &iNano); + + GIMKVMWALLCLOCK WallClock; + RT_ZERO(WallClock); + AssertCompile(sizeof(uVersion) == sizeof(WallClock.u32Version)); + WallClock.u32Version = uVersion; + WallClock.u32Sec = iSec; + WallClock.u32Nano = iNano; + + /* + * Write out the wall-clock struct. to guest memory. + */ + Assert(!(WallClock.u32Version & 1)); + rc = PGMPhysSimpleWriteGCPhys(pVM, GCPhysWallClock, &WallClock, sizeof(GIMKVMWALLCLOCK)); + if (RT_SUCCESS(rc)) + LogRel(("GIM: KVM: Enabled wall-clock struct. at %#RGp - u32Sec=%u u32Nano=%u uVersion=%#RU32\n", GCPhysWallClock, + WallClock.u32Sec, WallClock.u32Nano, WallClock.u32Version)); + else + LogRel(("GIM: KVM: Failed to write wall-clock struct. at %#RGp. rc=%Rrc\n", GCPhysWallClock, rc)); + return rc; +} + + +/** + * Enables the KVM wall-clock structure. + * + * Since the wall-clock can be read by any VCPU but it is a global struct. in + * guest-memory, we do an EMT rendezvous here to be on the safe side. The + * alternative is to use an MMIO2 region and use the WallClock.u32Version field + * for transactional update. However, this MSR is rarely written to (typically + * once during bootup) it's currently not a performance issue especially since + * we're already in ring-3. If we really wanted better performance in this code + * path, we should be doing it in ring-0 with transactional update while make + * sure there is only 1 writer as well. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param GCPhysWallClock Where the guest wall-clock structure is located. + * + * @remarks Don't do any release assertions here, these can be triggered by + * guest R0 code. + */ +VMMR3_INT_DECL(int) gimR3KvmEnableWallClock(PVM pVM, RTGCPHYS GCPhysWallClock) +{ + KVMWALLCLOCKINFO WallClockInfo; + WallClockInfo.GCPhysWallClock = GCPhysWallClock; + return VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, gimR3KvmEnableWallClockCallback, &WallClockInfo); +} + diff --git a/src/VBox/VMM/VMMR3/GIMMinimal.cpp b/src/VBox/VMM/VMMR3/GIMMinimal.cpp new file mode 100644 index 00000000..7973d7c9 --- /dev/null +++ b/src/VBox/VMM/VMMR3/GIMMinimal.cpp @@ -0,0 +1,131 @@ +/* $Id: GIMMinimal.cpp $ */ +/** @file + * GIM - Guest Interface Manager, Minimal implementation. + */ + +/* + * Copyright (C) 2014-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_GIM +#include +#include +#include +#include +#include "GIMInternal.h" +#include + +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ + +/** + * Initializes the Minimal provider. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) gimR3MinimalInit(PVM pVM) +{ + AssertReturn(pVM, VERR_INVALID_PARAMETER); + AssertReturn(pVM->gim.s.enmProviderId == GIMPROVIDERID_MINIMAL, VERR_INTERNAL_ERROR_5); + + /* + * Expose HVP (Hypervisor Present) bit to the guest. + */ + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_HVP); + + /* + * Insert the hypervisor leaf range. + */ + CPUMCPUIDLEAF HyperLeaf; + RT_ZERO(HyperLeaf); + HyperLeaf.uLeaf = UINT32_C(0x40000000); + HyperLeaf.uEax = UINT32_C(0x40000010); /* Maximum leaf we implement. */ + int rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf); + if (RT_SUCCESS(rc)) + { + /* + * Insert missing zero leaves (you never know what missing leaves are + * going to return when read). + */ + RT_ZERO(HyperLeaf); + for (uint32_t uLeaf = UINT32_C(0x40000001); uLeaf <= UINT32_C(0x40000010); uLeaf++) + { + HyperLeaf.uLeaf = uLeaf; + rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf); + AssertLogRelRCReturn(rc, rc); + } + } + else + LogRel(("GIM: Minimal: Failed to insert hypervisor leaf %#RX32. rc=%Rrc\n", HyperLeaf.uLeaf, rc)); + + return rc; +} + + +/** + * Initializes remaining bits of the Minimal provider. + * This is called after initializing HM and almost all other VMM components. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) gimR3MinimalInitCompleted(PVM pVM) +{ + /* + * Expose a generic hypervisor-agnostic leaf (originally defined by VMware). + * The leaves range from 0x40000010 to 0x400000FF. + * + * This is done in the init. completed routine as we need PDM to be + * initialized (otherwise APICGetTimerFreq() would fail). + */ + CPUMCPUIDLEAF HyperLeaf; + int rc = CPUMR3CpuIdGetLeaf(pVM, &HyperLeaf, 0x40000000, 0 /* uSubLeaf */); + if (RT_SUCCESS(rc)) + { + Assert(HyperLeaf.uEax >= 0x40000010); + + /* + * Add the timing information hypervisor leaf. + * MacOS X uses this to determine the TSC, bus frequency. See @bugref{7270}. + * + * EAX - TSC frequency in KHz. + * EBX - APIC frequency in KHz. + * ECX, EDX - Reserved. + */ + uint64_t uApicFreq; + rc = APICGetTimerFreq(pVM, &uApicFreq); + AssertLogRelRCReturn(rc, rc); + + RT_ZERO(HyperLeaf); + HyperLeaf.uLeaf = UINT32_C(0x40000010); + HyperLeaf.uEax = TMCpuTicksPerSecond(pVM) / UINT64_C(1000); + HyperLeaf.uEbx = (uApicFreq + 500) / UINT64_C(1000); + rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf); + AssertLogRelRCReturn(rc, rc); + } + else + LogRel(("GIM: Minimal: failed to get hypervisor leaf 0x40000000. rc=%Rrc\n", rc)); + + return VINF_SUCCESS; +} + diff --git a/src/VBox/VMM/VMMR3/GMM.cpp b/src/VBox/VMM/VMMR3/GMM.cpp new file mode 100644 index 00000000..a045bc8c --- /dev/null +++ b/src/VBox/VMM/VMMR3/GMM.cpp @@ -0,0 +1,451 @@ +/* $Id: GMM.cpp $ */ +/** @file + * GMM - Global Memory Manager, ring-3 request wrappers. + */ + +/* + * Copyright (C) 2008-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_GMM +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + + +/** + * @see GMMR0InitialReservation + */ +GMMR3DECL(int) GMMR3InitialReservation(PVM pVM, uint64_t cBasePages, uint32_t cShadowPages, uint32_t cFixedPages, + GMMOCPOLICY enmPolicy, GMMPRIORITY enmPriority) +{ + GMMINITIALRESERVATIONREQ Req; + Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + Req.Hdr.cbReq = sizeof(Req); + Req.cBasePages = cBasePages; + Req.cShadowPages = cShadowPages; + Req.cFixedPages = cFixedPages; + Req.enmPolicy = enmPolicy; + Req.enmPriority = enmPriority; + return VMMR3CallR0(pVM, VMMR0_DO_GMM_INITIAL_RESERVATION, 0, &Req.Hdr); +} + + +/** + * @see GMMR0UpdateReservation + */ +GMMR3DECL(int) GMMR3UpdateReservation(PVM pVM, uint64_t cBasePages, uint32_t cShadowPages, uint32_t cFixedPages) +{ + GMMUPDATERESERVATIONREQ Req; + Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + Req.Hdr.cbReq = sizeof(Req); + Req.cBasePages = cBasePages; + Req.cShadowPages = cShadowPages; + Req.cFixedPages = cFixedPages; + return VMMR3CallR0(pVM, VMMR0_DO_GMM_UPDATE_RESERVATION, 0, &Req.Hdr); +} + + +/** + * Prepares a GMMR0AllocatePages request. + * + * @returns VINF_SUCCESS or VERR_NO_TMP_MEMORY. + * @param pVM The cross context VM structure. + * @param[out] ppReq Where to store the pointer to the request packet. + * @param cPages The number of pages that's to be allocated. + * @param enmAccount The account to charge. + */ +GMMR3DECL(int) GMMR3AllocatePagesPrepare(PVM pVM, PGMMALLOCATEPAGESREQ *ppReq, uint32_t cPages, GMMACCOUNT enmAccount) +{ + uint32_t cb = RT_UOFFSETOF_DYN(GMMALLOCATEPAGESREQ, aPages[cPages]); + PGMMALLOCATEPAGESREQ pReq = (PGMMALLOCATEPAGESREQ)RTMemTmpAllocZ(cb); + if (!pReq) + return VERR_NO_TMP_MEMORY; + + pReq->Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + pReq->Hdr.cbReq = cb; + pReq->enmAccount = enmAccount; + pReq->cPages = cPages; + NOREF(pVM); + *ppReq = pReq; + return VINF_SUCCESS; +} + + +/** + * Performs a GMMR0AllocatePages request. + * + * This will call VMSetError on failure. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pReq Pointer to the request (returned by GMMR3AllocatePagesPrepare). + */ +GMMR3DECL(int) GMMR3AllocatePagesPerform(PVM pVM, PGMMALLOCATEPAGESREQ pReq) +{ + for (unsigned i = 0; ; i++) + { + int rc = VMMR3CallR0(pVM, VMMR0_DO_GMM_ALLOCATE_PAGES, 0, &pReq->Hdr); + if (RT_SUCCESS(rc)) + { +#ifdef LOG_ENABLED + for (uint32_t iPage = 0; iPage < pReq->cPages; iPage++) + Log3(("GMMR3AllocatePagesPerform: idPage=%#x HCPhys=%RHp\n", + pReq->aPages[iPage].idPage, pReq->aPages[iPage].HCPhysGCPhys)); +#endif + return rc; + } + if (rc != VERR_GMM_SEED_ME) + return VMSetError(pVM, rc, RT_SRC_POS, + N_("GMMR0AllocatePages failed to allocate %u pages"), + pReq->cPages); + Assert(i < pReq->cPages); + + /* + * Seed another chunk. + */ + void *pvChunk; + rc = SUPR3PageAlloc(GMM_CHUNK_SIZE >> PAGE_SHIFT, &pvChunk); + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, + N_("Out of memory (SUPR3PageAlloc) seeding a %u pages allocation request"), + pReq->cPages); + + rc = VMMR3CallR0(pVM, VMMR0_DO_GMM_SEED_CHUNK, (uintptr_t)pvChunk, NULL); + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, N_("GMM seeding failed")); + } +} + + +/** + * Cleans up a GMMR0AllocatePages request. + * @param pReq Pointer to the request (returned by GMMR3AllocatePagesPrepare). + */ +GMMR3DECL(void) GMMR3AllocatePagesCleanup(PGMMALLOCATEPAGESREQ pReq) +{ + RTMemTmpFree(pReq); +} + + +/** + * Prepares a GMMR0FreePages request. + * + * @returns VINF_SUCCESS or VERR_NO_TMP_MEMORY. + * @param pVM The cross context VM structure. + * @param[out] ppReq Where to store the pointer to the request packet. + * @param cPages The number of pages that's to be freed. + * @param enmAccount The account to charge. + */ +GMMR3DECL(int) GMMR3FreePagesPrepare(PVM pVM, PGMMFREEPAGESREQ *ppReq, uint32_t cPages, GMMACCOUNT enmAccount) +{ + uint32_t cb = RT_UOFFSETOF_DYN(GMMFREEPAGESREQ, aPages[cPages]); + PGMMFREEPAGESREQ pReq = (PGMMFREEPAGESREQ)RTMemTmpAllocZ(cb); + if (!pReq) + return VERR_NO_TMP_MEMORY; + + pReq->Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + pReq->Hdr.cbReq = cb; + pReq->enmAccount = enmAccount; + pReq->cPages = cPages; + NOREF(pVM); + *ppReq = pReq; + return VINF_SUCCESS; +} + + +/** + * Re-prepares a GMMR0FreePages request. + * + * @returns VINF_SUCCESS or VERR_NO_TMP_MEMORY. + * @param pVM The cross context VM structure. + * @param pReq A request buffer previously returned by + * GMMR3FreePagesPrepare(). + * @param cPages The number of pages originally passed to + * GMMR3FreePagesPrepare(). + * @param enmAccount The account to charge. + */ +GMMR3DECL(void) GMMR3FreePagesRePrep(PVM pVM, PGMMFREEPAGESREQ pReq, uint32_t cPages, GMMACCOUNT enmAccount) +{ + Assert(pReq->Hdr.u32Magic == SUPVMMR0REQHDR_MAGIC); + pReq->Hdr.cbReq = RT_UOFFSETOF_DYN(GMMFREEPAGESREQ, aPages[cPages]); + pReq->enmAccount = enmAccount; + pReq->cPages = cPages; + NOREF(pVM); +} + + +/** + * Performs a GMMR0FreePages request. + * This will call VMSetError on failure. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pReq Pointer to the request (returned by GMMR3FreePagesPrepare). + * @param cActualPages The number of pages actually freed. + */ +GMMR3DECL(int) GMMR3FreePagesPerform(PVM pVM, PGMMFREEPAGESREQ pReq, uint32_t cActualPages) +{ + /* + * Adjust the request if we ended up with fewer pages than anticipated. + */ + if (cActualPages != pReq->cPages) + { + AssertReturn(cActualPages < pReq->cPages, VERR_GMM_ACTUAL_PAGES_IPE); + if (!cActualPages) + return VINF_SUCCESS; + pReq->cPages = cActualPages; + pReq->Hdr.cbReq = RT_UOFFSETOF_DYN(GMMFREEPAGESREQ, aPages[cActualPages]); + } + + /* + * Do the job. + */ + int rc = VMMR3CallR0(pVM, VMMR0_DO_GMM_FREE_PAGES, 0, &pReq->Hdr); + if (RT_SUCCESS(rc)) + return rc; + AssertRC(rc); + return VMSetError(pVM, rc, RT_SRC_POS, + N_("GMMR0FreePages failed to free %u pages"), + pReq->cPages); +} + + +/** + * Cleans up a GMMR0FreePages request. + * @param pReq Pointer to the request (returned by GMMR3FreePagesPrepare). + */ +GMMR3DECL(void) GMMR3FreePagesCleanup(PGMMFREEPAGESREQ pReq) +{ + RTMemTmpFree(pReq); +} + + +/** + * Frees allocated pages, for bailing out on failure. + * + * This will not call VMSetError on failure but will use AssertLogRel instead. + * + * @param pVM The cross context VM structure. + * @param pAllocReq The allocation request to undo. + */ +GMMR3DECL(void) GMMR3FreeAllocatedPages(PVM pVM, GMMALLOCATEPAGESREQ const *pAllocReq) +{ + uint32_t cb = RT_UOFFSETOF_DYN(GMMFREEPAGESREQ, aPages[pAllocReq->cPages]); + PGMMFREEPAGESREQ pReq = (PGMMFREEPAGESREQ)RTMemTmpAllocZ(cb); + AssertLogRelReturnVoid(pReq); + + pReq->Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + pReq->Hdr.cbReq = cb; + pReq->enmAccount = pAllocReq->enmAccount; + pReq->cPages = pAllocReq->cPages; + uint32_t iPage = pAllocReq->cPages; + while (iPage-- > 0) + { + Assert(pAllocReq->aPages[iPage].idPage != NIL_GMM_PAGEID); + pReq->aPages[iPage].idPage = pAllocReq->aPages[iPage].idPage; + } + + int rc = VMMR3CallR0(pVM, VMMR0_DO_GMM_FREE_PAGES, 0, &pReq->Hdr); + AssertLogRelRC(rc); + + RTMemTmpFree(pReq); +} + + +/** + * @see GMMR0BalloonedPages + */ +GMMR3DECL(int) GMMR3BalloonedPages(PVM pVM, GMMBALLOONACTION enmAction, uint32_t cBalloonedPages) +{ + GMMBALLOONEDPAGESREQ Req; + Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + Req.Hdr.cbReq = sizeof(Req); + Req.enmAction = enmAction; + Req.cBalloonedPages = cBalloonedPages; + + return VMMR3CallR0(pVM, VMMR0_DO_GMM_BALLOONED_PAGES, 0, &Req.Hdr); +} + + +/** + * @see GMMR0QueryVMMMemoryStatsReq + */ +GMMR3DECL(int) GMMR3QueryHypervisorMemoryStats(PVM pVM, uint64_t *pcTotalAllocPages, uint64_t *pcTotalFreePages, uint64_t *pcTotalBalloonPages, uint64_t *puTotalBalloonSize) +{ + GMMMEMSTATSREQ Req; + Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + Req.Hdr.cbReq = sizeof(Req); + Req.cAllocPages = 0; + Req.cFreePages = 0; + Req.cBalloonedPages = 0; + Req.cSharedPages = 0; + + *pcTotalAllocPages = 0; + *pcTotalFreePages = 0; + *pcTotalBalloonPages = 0; + *puTotalBalloonSize = 0; + + /* Must be callable from any thread, so can't use VMMR3CallR0. */ + int rc = SUPR3CallVMMR0Ex(pVM->pVMR0, NIL_VMCPUID, VMMR0_DO_GMM_QUERY_HYPERVISOR_MEM_STATS, 0, &Req.Hdr); + if (rc == VINF_SUCCESS) + { + *pcTotalAllocPages = Req.cAllocPages; + *pcTotalFreePages = Req.cFreePages; + *pcTotalBalloonPages = Req.cBalloonedPages; + *puTotalBalloonSize = Req.cSharedPages; + } + return rc; +} + + +/** + * @see GMMR0QueryMemoryStatsReq + */ +GMMR3DECL(int) GMMR3QueryMemoryStats(PVM pVM, uint64_t *pcAllocPages, uint64_t *pcMaxPages, uint64_t *pcBalloonPages) +{ + GMMMEMSTATSREQ Req; + Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + Req.Hdr.cbReq = sizeof(Req); + Req.cAllocPages = 0; + Req.cFreePages = 0; + Req.cBalloonedPages = 0; + + *pcAllocPages = 0; + *pcMaxPages = 0; + *pcBalloonPages = 0; + + int rc = VMMR3CallR0(pVM, VMMR0_DO_GMM_QUERY_MEM_STATS, 0, &Req.Hdr); + if (rc == VINF_SUCCESS) + { + *pcAllocPages = Req.cAllocPages; + *pcMaxPages = Req.cMaxPages; + *pcBalloonPages = Req.cBalloonedPages; + } + return rc; +} + + +/** + * @see GMMR0MapUnmapChunk + */ +GMMR3DECL(int) GMMR3MapUnmapChunk(PVM pVM, uint32_t idChunkMap, uint32_t idChunkUnmap, PRTR3PTR ppvR3) +{ + GMMMAPUNMAPCHUNKREQ Req; + Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + Req.Hdr.cbReq = sizeof(Req); + Req.idChunkMap = idChunkMap; + Req.idChunkUnmap = idChunkUnmap; + Req.pvR3 = NULL; + int rc = VMMR3CallR0(pVM, VMMR0_DO_GMM_MAP_UNMAP_CHUNK, 0, &Req.Hdr); + if (RT_SUCCESS(rc) && ppvR3) + *ppvR3 = Req.pvR3; + return rc; +} + + +/** + * @see GMMR0FreeLargePage + */ +GMMR3DECL(int) GMMR3FreeLargePage(PVM pVM, uint32_t idPage) +{ + GMMFREELARGEPAGEREQ Req; + Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + Req.Hdr.cbReq = sizeof(Req); + Req.idPage = idPage; + return VMMR3CallR0(pVM, VMMR0_DO_GMM_FREE_LARGE_PAGE, 0, &Req.Hdr); +} + + +/** + * @see GMMR0SeedChunk + */ +GMMR3DECL(int) GMMR3SeedChunk(PVM pVM, RTR3PTR pvR3) +{ + return VMMR3CallR0(pVM, VMMR0_DO_GMM_SEED_CHUNK, (uintptr_t)pvR3, NULL); +} + + +/** + * @see GMMR0RegisterSharedModule + */ +GMMR3DECL(int) GMMR3RegisterSharedModule(PVM pVM, PGMMREGISTERSHAREDMODULEREQ pReq) +{ + pReq->Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + pReq->Hdr.cbReq = RT_UOFFSETOF_DYN(GMMREGISTERSHAREDMODULEREQ, aRegions[pReq->cRegions]); + int rc = VMMR3CallR0(pVM, VMMR0_DO_GMM_REGISTER_SHARED_MODULE, 0, &pReq->Hdr); + if (rc == VINF_SUCCESS) + rc = pReq->rc; + return rc; +} + + +/** + * @see GMMR0RegisterSharedModule + */ +GMMR3DECL(int) GMMR3UnregisterSharedModule(PVM pVM, PGMMUNREGISTERSHAREDMODULEREQ pReq) +{ + pReq->Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + pReq->Hdr.cbReq = sizeof(*pReq); + return VMMR3CallR0(pVM, VMMR0_DO_GMM_UNREGISTER_SHARED_MODULE, 0, &pReq->Hdr); +} + + +/** + * @see GMMR0ResetSharedModules + */ +GMMR3DECL(int) GMMR3ResetSharedModules(PVM pVM) +{ + return VMMR3CallR0(pVM, VMMR0_DO_GMM_RESET_SHARED_MODULES, 0, NULL); +} + + +/** + * @see GMMR0CheckSharedModules + */ +GMMR3DECL(int) GMMR3CheckSharedModules(PVM pVM) +{ + return VMMR3CallR0(pVM, VMMR0_DO_GMM_CHECK_SHARED_MODULES, 0, NULL); +} + + +#if defined(VBOX_STRICT) && HC_ARCH_BITS == 64 +/** + * @see GMMR0FindDuplicatePage + */ +GMMR3DECL(bool) GMMR3IsDuplicatePage(PVM pVM, uint32_t idPage) +{ + GMMFINDDUPLICATEPAGEREQ Req; + Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + Req.Hdr.cbReq = sizeof(Req); + Req.idPage = idPage; + Req.fDuplicate = false; + + /* Must be callable from any thread, so can't use VMMR3CallR0. */ + int rc = SUPR3CallVMMR0Ex(pVM->pVMR0, NIL_VMCPUID, VMMR0_DO_GMM_FIND_DUPLICATE_PAGE, 0, &Req.Hdr); + if (rc == VINF_SUCCESS) + return Req.fDuplicate; + return false; +} +#endif /* VBOX_STRICT && HC_ARCH_BITS == 64 */ + diff --git a/src/VBox/VMM/VMMR3/HM.cpp b/src/VBox/VMM/VMMR3/HM.cpp new file mode 100644 index 00000000..234b7d9a --- /dev/null +++ b/src/VBox/VMM/VMMR3/HM.cpp @@ -0,0 +1,3322 @@ +/* $Id: HM.cpp $ */ +/** @file + * HM - Intel/AMD VM Hardware Support Manager. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +/** @page pg_hm HM - Hardware Assisted Virtualization Manager + * + * The HM manages guest execution using the VT-x and AMD-V CPU hardware + * extensions. + * + * {summary of what HM does} + * + * Hardware assisted virtualization manager was originally abbreviated HWACCM, + * however that was cumbersome to write and parse for such a central component, + * so it was shortened to HM when refactoring the code in the 4.3 development + * cycle. + * + * {add sections with more details} + * + * @sa @ref grp_hm + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_HM +#define VMCPU_INCL_CPUM_GST_CTX +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef VBOX_WITH_REM +# include +#endif +#include +#include +#include "HMInternal.h" +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** @def HMVMX_REPORT_FEAT + * Reports VT-x feature to the release log. + * + * @param a_uAllowed1 Mask of allowed-1 feature bits. + * @param a_uAllowed0 Mask of allowed-0 feature bits. + * @param a_StrDesc The description string to report. + * @param a_Featflag Mask of the feature to report. + */ +#define HMVMX_REPORT_FEAT(a_uAllowed1, a_uAllowed0, a_StrDesc, a_Featflag) \ + do { \ + if ((a_uAllowed1) & (a_Featflag)) \ + { \ + if ((a_uAllowed0) & (a_Featflag)) \ + LogRel(("HM: " a_StrDesc " (must be set)\n")); \ + else \ + LogRel(("HM: " a_StrDesc "\n")); \ + } \ + else \ + LogRel(("HM: " a_StrDesc " (must be cleared)\n")); \ + } while (0) + +/** @def HMVMX_REPORT_ALLOWED_FEAT + * Reports an allowed VT-x feature to the release log. + * + * @param a_uAllowed1 Mask of allowed-1 feature bits. + * @param a_StrDesc The description string to report. + * @param a_FeatFlag Mask of the feature to report. + */ +#define HMVMX_REPORT_ALLOWED_FEAT(a_uAllowed1, a_StrDesc, a_FeatFlag) \ + do { \ + if ((a_uAllowed1) & (a_FeatFlag)) \ + LogRel(("HM: " a_StrDesc "\n")); \ + else \ + LogRel(("HM: " a_StrDesc " not supported\n")); \ + } while (0) + +/** @def HMVMX_REPORT_MSR_CAP + * Reports MSR feature capability. + * + * @param a_MsrCaps Mask of MSR feature bits. + * @param a_StrDesc The description string to report. + * @param a_fCap Mask of the feature to report. + */ +#define HMVMX_REPORT_MSR_CAP(a_MsrCaps, a_StrDesc, a_fCap) \ + do { \ + if ((a_MsrCaps) & (a_fCap)) \ + LogRel(("HM: " a_StrDesc "\n")); \ + } while (0) + +/** @def HMVMX_LOGREL_FEAT + * Dumps a feature flag from a bitmap of features to the release log. + * + * @param a_fVal The value of all the features. + * @param a_fMask The specific bitmask of the feature. + */ +#define HMVMX_LOGREL_FEAT(a_fVal, a_fMask) \ + do { \ + if ((a_fVal) & (a_fMask)) \ + LogRel(("HM: %s\n", #a_fMask)); \ + } while (0) + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static DECLCALLBACK(int) hmR3Save(PVM pVM, PSSMHANDLE pSSM); +static DECLCALLBACK(int) hmR3Load(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass); +static DECLCALLBACK(void) hmR3InfoSvmNstGstVmcbCache(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); +static DECLCALLBACK(void) hmR3Info(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); +static DECLCALLBACK(void) hmR3InfoEventPending(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); +static int hmR3InitFinalizeR3(PVM pVM); +static int hmR3InitFinalizeR0(PVM pVM); +static int hmR3InitFinalizeR0Intel(PVM pVM); +static int hmR3InitFinalizeR0Amd(PVM pVM); +static int hmR3TermCPU(PVM pVM); + + + +/** + * Initializes the HM. + * + * This is the very first component to really do init after CFGM so that we can + * establish the predominant execution engine for the VM prior to initializing + * other modules. It takes care of NEM initialization if needed (HM disabled or + * not available in HW). + * + * If VT-x or AMD-V hardware isn't available, HM will try fall back on a native + * hypervisor API via NEM, and then back on raw-mode if that isn't available + * either. The fallback to raw-mode will not happen if /HM/HMForced is set + * (like for guest using SMP or 64-bit as well as for complicated guest like OS + * X, OS/2 and others). + * + * Note that a lot of the set up work is done in ring-0 and thus postponed till + * the ring-3 and ring-0 callback to HMR3InitCompleted. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * + * @remarks Be careful with what we call here, since most of the VMM components + * are uninitialized. + */ +VMMR3_INT_DECL(int) HMR3Init(PVM pVM) +{ + LogFlow(("HMR3Init\n")); + + /* + * Assert alignment and sizes. + */ + AssertCompileMemberAlignment(VM, hm.s, 32); + AssertCompile(sizeof(pVM->hm.s) <= sizeof(pVM->hm.padding)); + + /* + * Register the saved state data unit. + */ + int rc = SSMR3RegisterInternal(pVM, "HWACCM", 0, HM_SAVED_STATE_VERSION, sizeof(HM), + NULL, NULL, NULL, + NULL, hmR3Save, NULL, + NULL, hmR3Load, NULL); + if (RT_FAILURE(rc)) + return rc; + + /* + * Register info handlers. + */ + rc = DBGFR3InfoRegisterInternalEx(pVM, "hm", "Dumps HM info.", hmR3Info, DBGFINFO_FLAGS_ALL_EMTS); + AssertRCReturn(rc, rc); + + rc = DBGFR3InfoRegisterInternalEx(pVM, "hmeventpending", "Dumps the pending HM event.", hmR3InfoEventPending, + DBGFINFO_FLAGS_ALL_EMTS); + AssertRCReturn(rc, rc); + + rc = DBGFR3InfoRegisterInternalEx(pVM, "svmvmcbcache", "Dumps the HM SVM nested-guest VMCB cache.", + hmR3InfoSvmNstGstVmcbCache, DBGFINFO_FLAGS_ALL_EMTS); + AssertRCReturn(rc, rc); + + /* + * Read configuration. + */ + PCFGMNODE pCfgHm = CFGMR3GetChild(CFGMR3GetRoot(pVM), "HM/"); + + /* + * Validate the HM settings. + */ + rc = CFGMR3ValidateConfig(pCfgHm, "/HM/", + "HMForced" + "|UseNEMInstead" + "|FallbackToNEM" + "|EnableNestedPaging" + "|EnableUX" + "|EnableLargePages" + "|EnableVPID" + "|IBPBOnVMExit" + "|IBPBOnVMEntry" + "|SpecCtrlByHost" + "|L1DFlushOnSched" + "|L1DFlushOnVMEntry" + "|TPRPatchingEnabled" + "|64bitEnabled" + "|Exclusive" + "|MaxResumeLoops" + "|VmxPleGap" + "|VmxPleWindow" + "|UseVmxPreemptTimer" + "|SvmPauseFilter" + "|SvmPauseFilterThreshold" + "|SvmVirtVmsaveVmload" + "|SvmVGif" + "|LovelyMesaDrvWorkaround", + "" /* pszValidNodes */, "HM" /* pszWho */, 0 /* uInstance */); + if (RT_FAILURE(rc)) + return rc; + + /** @cfgm{/HM/HMForced, bool, false} + * Forces hardware virtualization, no falling back on raw-mode. HM must be + * enabled, i.e. /HMEnabled must be true. */ + bool fHMForced; +#ifdef VBOX_WITH_RAW_MODE + rc = CFGMR3QueryBoolDef(pCfgHm, "HMForced", &fHMForced, false); + AssertRCReturn(rc, rc); + AssertLogRelMsgReturn(!fHMForced || pVM->fHMEnabled, ("Configuration error: HM forced but not enabled!\n"), + VERR_INVALID_PARAMETER); +# if defined(RT_OS_DARWIN) + if (pVM->fHMEnabled) + fHMForced = true; +# endif + AssertLogRelMsgReturn(pVM->cCpus == 1 || pVM->fHMEnabled, ("Configuration error: SMP requires HM to be enabled!\n"), + VERR_INVALID_PARAMETER); + if (pVM->cCpus > 1) + fHMForced = true; +#else /* !VBOX_WITH_RAW_MODE */ + AssertRelease(pVM->fHMEnabled); + fHMForced = true; +#endif /* !VBOX_WITH_RAW_MODE */ + + /** @cfgm{/HM/UseNEMInstead, bool, true} + * Don't use HM, use NEM instead. */ + bool fUseNEMInstead = false; + rc = CFGMR3QueryBoolDef(pCfgHm, "UseNEMInstead", &fUseNEMInstead, false); + AssertRCReturn(rc, rc); + if (fUseNEMInstead && pVM->fHMEnabled) + { + LogRel(("HM: Setting fHMEnabled to false because fUseNEMInstead is set.\n")); + pVM->fHMEnabled = false; + } + + /** @cfgm{/HM/FallbackToNEM, bool, true} + * Enables fallback on NEM. */ + bool fFallbackToNEM = true; + rc = CFGMR3QueryBoolDef(pCfgHm, "FallbackToNEM", &fFallbackToNEM, true); + AssertRCReturn(rc, rc); + + /** @cfgm{/HM/EnableNestedPaging, bool, false} + * Enables nested paging (aka extended page tables). */ + rc = CFGMR3QueryBoolDef(pCfgHm, "EnableNestedPaging", &pVM->hm.s.fAllowNestedPaging, false); + AssertRCReturn(rc, rc); + + /** @cfgm{/HM/EnableUX, bool, true} + * Enables the VT-x unrestricted execution feature. */ + rc = CFGMR3QueryBoolDef(pCfgHm, "EnableUX", &pVM->hm.s.vmx.fAllowUnrestricted, true); + AssertRCReturn(rc, rc); + + /** @cfgm{/HM/EnableLargePages, bool, false} + * Enables using large pages (2 MB) for guest memory, thus saving on (nested) + * page table walking and maybe better TLB hit rate in some cases. */ + rc = CFGMR3QueryBoolDef(pCfgHm, "EnableLargePages", &pVM->hm.s.fLargePages, false); + AssertRCReturn(rc, rc); + + /** @cfgm{/HM/EnableVPID, bool, false} + * Enables the VT-x VPID feature. */ + rc = CFGMR3QueryBoolDef(pCfgHm, "EnableVPID", &pVM->hm.s.vmx.fAllowVpid, false); + AssertRCReturn(rc, rc); + + /** @cfgm{/HM/TPRPatchingEnabled, bool, false} + * Enables TPR patching for 32-bit windows guests with IO-APIC. */ + rc = CFGMR3QueryBoolDef(pCfgHm, "TPRPatchingEnabled", &pVM->hm.s.fTprPatchingAllowed, false); + AssertRCReturn(rc, rc); + + /** @cfgm{/HM/64bitEnabled, bool, 32-bit:false, 64-bit:true} + * Enables AMD64 cpu features. + * On 32-bit hosts this isn't default and require host CPU support. 64-bit hosts + * already have the support. */ +#ifdef VBOX_ENABLE_64_BITS_GUESTS + rc = CFGMR3QueryBoolDef(pCfgHm, "64bitEnabled", &pVM->hm.s.fAllow64BitGuests, HC_ARCH_BITS == 64); + AssertLogRelRCReturn(rc, rc); +#else + pVM->hm.s.fAllow64BitGuests = false; +#endif + + /** @cfgm{/HM/VmxPleGap, uint32_t, 0} + * The pause-filter exiting gap in TSC ticks. When the number of ticks between + * two successive PAUSE instructions exceeds VmxPleGap, the CPU considers the + * latest PAUSE instruction to be start of a new PAUSE loop. + */ + rc = CFGMR3QueryU32Def(pCfgHm, "VmxPleGap", &pVM->hm.s.vmx.cPleGapTicks, 0); + AssertRCReturn(rc, rc); + + /** @cfgm{/HM/VmxPleWindow, uint32_t, 0} + * The pause-filter exiting window in TSC ticks. When the number of ticks + * between the current PAUSE instruction and first PAUSE of a loop exceeds + * VmxPleWindow, a VM-exit is triggered. + * + * Setting VmxPleGap and VmxPleGap to 0 disables pause-filter exiting. + */ + rc = CFGMR3QueryU32Def(pCfgHm, "VmxPleWindow", &pVM->hm.s.vmx.cPleWindowTicks, 0); + AssertRCReturn(rc, rc); + + /** @cfgm{/HM/SvmPauseFilterCount, uint16_t, 0} + * A counter that is decrement each time a PAUSE instruction is executed by the + * guest. When the counter is 0, a \#VMEXIT is triggered. + * + * Setting SvmPauseFilterCount to 0 disables pause-filter exiting. + */ + rc = CFGMR3QueryU16Def(pCfgHm, "SvmPauseFilter", &pVM->hm.s.svm.cPauseFilter, 0); + AssertRCReturn(rc, rc); + + /** @cfgm{/HM/SvmPauseFilterThreshold, uint16_t, 0} + * The pause filter threshold in ticks. When the elapsed time (in ticks) between + * two successive PAUSE instructions exceeds SvmPauseFilterThreshold, the + * PauseFilter count is reset to its initial value. However, if PAUSE is + * executed PauseFilter times within PauseFilterThreshold ticks, a VM-exit will + * be triggered. + * + * Requires SvmPauseFilterCount to be non-zero for pause-filter threshold to be + * activated. + */ + rc = CFGMR3QueryU16Def(pCfgHm, "SvmPauseFilterThreshold", &pVM->hm.s.svm.cPauseFilterThresholdTicks, 0); + AssertRCReturn(rc, rc); + + /** @cfgm{/HM/SvmVirtVmsaveVmload, bool, true} + * Whether to make use of virtualized VMSAVE/VMLOAD feature of the CPU if it's + * available. */ + rc = CFGMR3QueryBoolDef(pCfgHm, "SvmVirtVmsaveVmload", &pVM->hm.s.svm.fVirtVmsaveVmload, true); + AssertRCReturn(rc, rc); + + /** @cfgm{/HM/SvmVGif, bool, true} + * Whether to make use of Virtual GIF (Global Interrupt Flag) feature of the CPU + * if it's available. */ + rc = CFGMR3QueryBoolDef(pCfgHm, "SvmVGif", &pVM->hm.s.svm.fVGif, true); + AssertRCReturn(rc, rc); + + /** @cfgm{/HM/Exclusive, bool} + * Determines the init method for AMD-V and VT-x. If set to true, HM will do a + * global init for each host CPU. If false, we do local init each time we wish + * to execute guest code. + * + * On Windows, default is false due to the higher risk of conflicts with other + * hypervisors. + * + * On Mac OS X, this setting is ignored since the code does not handle local + * init when it utilizes the OS provided VT-x function, SUPR0EnableVTx(). + */ +#if defined(RT_OS_DARWIN) + pVM->hm.s.fGlobalInit = true; +#else + rc = CFGMR3QueryBoolDef(pCfgHm, "Exclusive", &pVM->hm.s.fGlobalInit, +# if defined(RT_OS_WINDOWS) + false +# else + true +# endif + ); + AssertLogRelRCReturn(rc, rc); +#endif + + /** @cfgm{/HM/MaxResumeLoops, uint32_t} + * The number of times to resume guest execution before we forcibly return to + * ring-3. The return value of RTThreadPreemptIsPendingTrusty in ring-0 + * determines the default value. */ + rc = CFGMR3QueryU32Def(pCfgHm, "MaxResumeLoops", &pVM->hm.s.cMaxResumeLoops, 0 /* set by R0 later */); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/HM/UseVmxPreemptTimer, bool} + * Whether to make use of the VMX-preemption timer feature of the CPU if it's + * available. */ + rc = CFGMR3QueryBoolDef(pCfgHm, "UseVmxPreemptTimer", &pVM->hm.s.vmx.fUsePreemptTimer, true); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/HM/IBPBOnVMExit, bool} + * Costly paranoia setting. */ + rc = CFGMR3QueryBoolDef(pCfgHm, "IBPBOnVMExit", &pVM->hm.s.fIbpbOnVmExit, false); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/HM/IBPBOnVMEntry, bool} + * Costly paranoia setting. */ + rc = CFGMR3QueryBoolDef(pCfgHm, "IBPBOnVMEntry", &pVM->hm.s.fIbpbOnVmEntry, false); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/HM/L1DFlushOnSched, bool, true} + * CVS-2018-3646 workaround, ignored on CPUs that aren't affected. */ + rc = CFGMR3QueryBoolDef(pCfgHm, "L1DFlushOnSched", &pVM->hm.s.fL1dFlushOnSched, true); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/HM/L1DFlushOnVMEntry, bool} + * CVS-2018-3646 workaround, ignored on CPUs that aren't affected. */ + rc = CFGMR3QueryBoolDef(pCfgHm, "L1DFlushOnVMEntry", &pVM->hm.s.fL1dFlushOnVmEntry, false); + AssertLogRelRCReturn(rc, rc); + + /* Disable L1DFlushOnSched if L1DFlushOnVMEntry is enabled. */ + if (pVM->hm.s.fL1dFlushOnVmEntry) + pVM->hm.s.fL1dFlushOnSched = false; + + /** @cfgm{/HM/SpecCtrlByHost, bool} + * Another expensive paranoia setting. */ + rc = CFGMR3QueryBoolDef(pCfgHm, "SpecCtrlByHost", &pVM->hm.s.fSpecCtrlByHost, false); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/HM/LovelyMesaDrvWorkaround,bool} + * Workaround for mesa vmsvga 3d driver making incorrect assumptions about + * the hypervisor it is running under. */ + bool f; + rc = CFGMR3QueryBoolDef(pCfgHm, "LovelyMesaDrvWorkaround", &f, false); + AssertLogRelRCReturn(rc, rc); + for (VMCPUID i = 0; i < pVM->cCpus; i++) + pVM->aCpus[i].hm.s.fTrapXcptGpForLovelyMesaDrv = f; + + /* + * Check if VT-x or AMD-v support according to the users wishes. + */ + /** @todo SUPR3QueryVTCaps won't catch VERR_VMX_IN_VMX_ROOT_MODE or + * VERR_SVM_IN_USE. */ + if (pVM->fHMEnabled) + { + uint32_t fCaps; + rc = SUPR3QueryVTCaps(&fCaps); + if (RT_SUCCESS(rc)) + { + if (fCaps & SUPVTCAPS_AMD_V) + { + pVM->hm.s.svm.fSupported = true; + LogRel(("HM: HMR3Init: AMD-V%s\n", fCaps & SUPVTCAPS_NESTED_PAGING ? " w/ nested paging" : "")); + VM_SET_MAIN_EXECUTION_ENGINE(pVM, VM_EXEC_ENGINE_HW_VIRT); + } + else if (fCaps & SUPVTCAPS_VT_X) + { + const char *pszWhy; + rc = SUPR3QueryVTxSupported(&pszWhy); + if (RT_SUCCESS(rc)) + { + pVM->hm.s.vmx.fSupported = true; + LogRel(("HM: HMR3Init: VT-x%s%s%s\n", + fCaps & SUPVTCAPS_NESTED_PAGING ? " w/ nested paging" : "", + fCaps & SUPVTCAPS_VTX_UNRESTRICTED_GUEST ? " and unrestricted guest execution" : "", + (fCaps & (SUPVTCAPS_NESTED_PAGING | SUPVTCAPS_VTX_UNRESTRICTED_GUEST)) ? " hw support" : "")); + VM_SET_MAIN_EXECUTION_ENGINE(pVM, VM_EXEC_ENGINE_HW_VIRT); + } + else + { + /* + * Before failing, try fallback to NEM if we're allowed to do that. + */ + pVM->fHMEnabled = false; + Assert(pVM->bMainExecutionEngine == VM_EXEC_ENGINE_NOT_SET); + if (fFallbackToNEM) + { + LogRel(("HM: HMR3Init: Attempting fall back to NEM: The host kernel does not support VT-x - %s\n", pszWhy)); + int rc2 = NEMR3Init(pVM, true /*fFallback*/, fHMForced); + + ASMCompilerBarrier(); /* NEMR3Init may have changed bMainExecutionEngine. */ + if ( RT_SUCCESS(rc2) + && pVM->bMainExecutionEngine != VM_EXEC_ENGINE_NOT_SET) + rc = VINF_SUCCESS; + } + if (RT_FAILURE(rc)) + { + if (fHMForced) + return VMSetError(pVM, rc, RT_SRC_POS, "The host kernel does not support VT-x: %s\n", pszWhy); + + /* Fall back to raw-mode. */ + LogRel(("HM: HMR3Init: Falling back to raw-mode: The host kernel does not support VT-x - %s\n", pszWhy)); + VM_SET_MAIN_EXECUTION_ENGINE(pVM, VM_EXEC_ENGINE_RAW_MODE); + } + } + } + else + AssertLogRelMsgFailedReturn(("SUPR3QueryVTCaps didn't return either AMD-V or VT-x flag set (%#x)!\n", fCaps), + VERR_INTERNAL_ERROR_5); + + /* + * Do we require a little bit or raw-mode for 64-bit guest execution? + */ + pVM->fHMNeedRawModeCtx = HC_ARCH_BITS == 32 + && pVM->fHMEnabled + && pVM->hm.s.fAllow64BitGuests; + + /* + * Disable nested paging and unrestricted guest execution now if they're + * configured so that CPUM can make decisions based on our configuration. + */ + Assert(!pVM->hm.s.fNestedPaging); + if (pVM->hm.s.fAllowNestedPaging) + { + if (fCaps & SUPVTCAPS_NESTED_PAGING) + pVM->hm.s.fNestedPaging = true; + else + pVM->hm.s.fAllowNestedPaging = false; + } + + if (fCaps & SUPVTCAPS_VT_X) + { + Assert(!pVM->hm.s.vmx.fUnrestrictedGuest); + if (pVM->hm.s.vmx.fAllowUnrestricted) + { + if ( (fCaps & SUPVTCAPS_VTX_UNRESTRICTED_GUEST) + && pVM->hm.s.fNestedPaging) + pVM->hm.s.vmx.fUnrestrictedGuest = true; + else + pVM->hm.s.vmx.fAllowUnrestricted = false; + } + } + } + else + { + const char *pszMsg; + switch (rc) + { + case VERR_UNSUPPORTED_CPU: pszMsg = "Unknown CPU, VT-x or AMD-v features cannot be ascertained"; break; + case VERR_VMX_NO_VMX: pszMsg = "VT-x is not available"; break; + case VERR_VMX_MSR_VMX_DISABLED: pszMsg = "VT-x is disabled in the BIOS"; break; + case VERR_VMX_MSR_ALL_VMX_DISABLED: pszMsg = "VT-x is disabled in the BIOS for all CPU modes"; break; + case VERR_VMX_MSR_LOCKING_FAILED: pszMsg = "Failed to enable and lock VT-x features"; break; + case VERR_SVM_NO_SVM: pszMsg = "AMD-V is not available"; break; + case VERR_SVM_DISABLED: pszMsg = "AMD-V is disabled in the BIOS (or by the host OS)"; break; + default: + return VMSetError(pVM, rc, RT_SRC_POS, "SUPR3QueryVTCaps failed with %Rrc", rc); + } + + /* + * Before failing, try fallback to NEM if we're allowed to do that. + */ + pVM->fHMEnabled = false; + if (fFallbackToNEM) + { + LogRel(("HM: HMR3Init: Attempting fall back to NEM: %s\n", pszMsg)); + int rc2 = NEMR3Init(pVM, true /*fFallback*/, fHMForced); + ASMCompilerBarrier(); /* NEMR3Init may have changed bMainExecutionEngine. */ + if ( RT_SUCCESS(rc2) + && pVM->bMainExecutionEngine != VM_EXEC_ENGINE_NOT_SET) + rc = VINF_SUCCESS; + } + if (RT_FAILURE(rc)) + { + if (fHMForced) + return VM_SET_ERROR(pVM, rc, pszMsg); + + LogRel(("HM: HMR3Init: Falling back to raw-mode: %s\n", pszMsg)); + VM_SET_MAIN_EXECUTION_ENGINE(pVM, VM_EXEC_ENGINE_RAW_MODE); + } + } + } + else + { + /* + * Disabled HM mean raw-mode, unless NEM is supposed to be used. + */ + if (!fUseNEMInstead) + VM_SET_MAIN_EXECUTION_ENGINE(pVM, VM_EXEC_ENGINE_RAW_MODE); + else + { + rc = NEMR3Init(pVM, false /*fFallback*/, true); + ASMCompilerBarrier(); /* NEMR3Init may have changed bMainExecutionEngine. */ + if (RT_FAILURE(rc)) + return rc; + } + } + + return VINF_SUCCESS; +} + + +/** + * Initializes HM components after ring-3 phase has been fully initialized. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +static int hmR3InitFinalizeR3(PVM pVM) +{ + LogFlow(("HMR3InitCPU\n")); + + if (!HMIsEnabled(pVM)) + return VINF_SUCCESS; + + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + pVCpu->hm.s.fActive = false; + pVCpu->hm.s.fGIMTrapXcptUD = GIMShouldTrapXcptUD(pVCpu); /* Is safe to call now since GIMR3Init() has completed. */ + } + +#ifdef VBOX_WITH_STATISTICS + STAM_REG(pVM, &pVM->hm.s.StatTprPatchSuccess, STAMTYPE_COUNTER, "/HM/TPR/Patch/Success", STAMUNIT_OCCURENCES, "Number of times an instruction was successfully patched."); + STAM_REG(pVM, &pVM->hm.s.StatTprPatchFailure, STAMTYPE_COUNTER, "/HM/TPR/Patch/Failed", STAMUNIT_OCCURENCES, "Number of unsuccessful patch attempts."); + STAM_REG(pVM, &pVM->hm.s.StatTprReplaceSuccessCr8, STAMTYPE_COUNTER, "/HM/TPR/Replace/SuccessCR8",STAMUNIT_OCCURENCES, "Number of instruction replacements by MOV CR8."); + STAM_REG(pVM, &pVM->hm.s.StatTprReplaceSuccessVmc, STAMTYPE_COUNTER, "/HM/TPR/Replace/SuccessVMC",STAMUNIT_OCCURENCES, "Number of instruction replacements by VMMCALL."); + STAM_REG(pVM, &pVM->hm.s.StatTprReplaceFailure, STAMTYPE_COUNTER, "/HM/TPR/Replace/Failed", STAMUNIT_OCCURENCES, "Number of unsuccessful replace attempts."); +#endif + + /* + * Statistics. + */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + int rc; + +#ifdef VBOX_WITH_STATISTICS + rc = STAMR3RegisterF(pVM, &pVCpu->hm.s.StatPoke, STAMTYPE_PROFILE, STAMVISIBILITY_USED, STAMUNIT_TICKS_PER_CALL, + "Profiling of RTMpPokeCpu.", + "/PROF/CPU%d/HM/Poke", i); + AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pVCpu->hm.s.StatSpinPoke, STAMTYPE_PROFILE, STAMVISIBILITY_USED, STAMUNIT_TICKS_PER_CALL, + "Profiling of poke wait.", + "/PROF/CPU%d/HM/PokeWait", i); + AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pVCpu->hm.s.StatSpinPokeFailed, STAMTYPE_PROFILE, STAMVISIBILITY_USED, STAMUNIT_TICKS_PER_CALL, + "Profiling of poke wait when RTMpPokeCpu fails.", + "/PROF/CPU%d/HM/PokeWaitFailed", i); + AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pVCpu->hm.s.StatEntry, STAMTYPE_PROFILE, STAMVISIBILITY_USED, STAMUNIT_TICKS_PER_CALL, + "Profiling of entry until entering GC.", + "/PROF/CPU%d/HM/Entry", i); + AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pVCpu->hm.s.StatPreExit, STAMTYPE_PROFILE, STAMVISIBILITY_USED, STAMUNIT_TICKS_PER_CALL, + "Profiling of pre-exit processing after returning from GC.", + "/PROF/CPU%d/HM/SwitchFromGC_1", i); + AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pVCpu->hm.s.StatExitHandling, STAMTYPE_PROFILE, STAMVISIBILITY_USED, STAMUNIT_TICKS_PER_CALL, + "Profiling of exit handling (longjmps not included!)", + "/PROF/CPU%d/HM/SwitchFromGC_2", i); + AssertRC(rc); + + rc = STAMR3RegisterF(pVM, &pVCpu->hm.s.StatExitIO, STAMTYPE_PROFILE, STAMVISIBILITY_USED, STAMUNIT_TICKS_PER_CALL, + "I/O.", + "/PROF/CPU%d/HM/SwitchFromGC_2/IO", i); + AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pVCpu->hm.s.StatExitMovCRx, STAMTYPE_PROFILE, STAMVISIBILITY_USED, STAMUNIT_TICKS_PER_CALL, + "MOV CRx.", + "/PROF/CPU%d/HM/SwitchFromGC_2/MovCRx", i); + AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pVCpu->hm.s.StatExitXcptNmi, STAMTYPE_PROFILE, STAMVISIBILITY_USED, STAMUNIT_TICKS_PER_CALL, + "Exceptions, NMIs.", + "/PROF/CPU%d/HM/SwitchFromGC_2/XcptNmi", i); + AssertRC(rc); + + rc = STAMR3RegisterF(pVM, &pVCpu->hm.s.StatImportGuestState, STAMTYPE_PROFILE, STAMVISIBILITY_USED, STAMUNIT_TICKS_PER_CALL, + "Profiling of importing guest state from hardware after VM-exit.", + "/PROF/CPU%d/HM/ImportGuestState", i); + AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pVCpu->hm.s.StatExportGuestState, STAMTYPE_PROFILE, STAMVISIBILITY_USED, STAMUNIT_TICKS_PER_CALL, + "Profiling of exporting guest state to hardware before VM-entry.", + "/PROF/CPU%d/HM/ExportGuestState", i); + AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pVCpu->hm.s.StatLoadGuestFpuState, STAMTYPE_PROFILE, STAMVISIBILITY_USED, STAMUNIT_TICKS_PER_CALL, + "Profiling of CPUMR0LoadGuestFPU.", + "/PROF/CPU%d/HM/LoadGuestFpuState", i); + AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pVCpu->hm.s.StatInGC, STAMTYPE_PROFILE, STAMVISIBILITY_USED, STAMUNIT_TICKS_PER_CALL, + "Profiling of execution of guest-code in hardware.", + "/PROF/CPU%d/HM/InGC", i); + AssertRC(rc); + +# if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) + rc = STAMR3RegisterF(pVM, &pVCpu->hm.s.StatWorldSwitch3264, STAMTYPE_PROFILE, STAMVISIBILITY_USED, + STAMUNIT_TICKS_PER_CALL, "Profiling of the 32/64 switcher.", + "/PROF/CPU%d/HM/Switcher3264", i); + AssertRC(rc); +# endif + +# ifdef HM_PROFILE_EXIT_DISPATCH + rc = STAMR3RegisterF(pVM, &pVCpu->hm.s.StatExitDispatch, STAMTYPE_PROFILE_ADV, STAMVISIBILITY_USED, + STAMUNIT_TICKS_PER_CALL, "Profiling the dispatching of exit handlers.", + "/PROF/CPU%d/HM/ExitDispatch", i); + AssertRC(rc); +# endif + +#endif +# define HM_REG_COUNTER(a, b, desc) \ + rc = STAMR3RegisterF(pVM, a, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, desc, b, i); \ + AssertRC(rc); + +#ifdef VBOX_WITH_STATISTICS + HM_REG_COUNTER(&pVCpu->hm.s.StatExitAll, "/HM/CPU%d/Exit/All", "Exits (total)."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitShadowNM, "/HM/CPU%d/Exit/Trap/Shw/#NM", "Shadow #NM (device not available, no math co-processor) exception."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitGuestNM, "/HM/CPU%d/Exit/Trap/Gst/#NM", "Guest #NM (device not available, no math co-processor) exception."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitShadowPF, "/HM/CPU%d/Exit/Trap/Shw/#PF", "Shadow #PF (page fault) exception."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitShadowPFEM, "/HM/CPU%d/Exit/Trap/Shw/#PF-EM", "#PF (page fault) exception going back to ring-3 for emulating the instruction."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitGuestPF, "/HM/CPU%d/Exit/Trap/Gst/#PF", "Guest #PF (page fault) exception."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitGuestUD, "/HM/CPU%d/Exit/Trap/Gst/#UD", "Guest #UD (undefined opcode) exception."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitGuestSS, "/HM/CPU%d/Exit/Trap/Gst/#SS", "Guest #SS (stack-segment fault) exception."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitGuestNP, "/HM/CPU%d/Exit/Trap/Gst/#NP", "Guest #NP (segment not present) exception."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitGuestGP, "/HM/CPU%d/Exit/Trap/Gst/#GP", "Guest #GP (general protection) exception."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitGuestMF, "/HM/CPU%d/Exit/Trap/Gst/#MF", "Guest #MF (x87 FPU error, math fault) exception."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitGuestDE, "/HM/CPU%d/Exit/Trap/Gst/#DE", "Guest #DE (divide error) exception."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitGuestDB, "/HM/CPU%d/Exit/Trap/Gst/#DB", "Guest #DB (debug) exception."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitGuestBP, "/HM/CPU%d/Exit/Trap/Gst/#BP", "Guest #BP (breakpoint) exception."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitGuestXF, "/HM/CPU%d/Exit/Trap/Gst/#XF", "Guest #XF (extended math fault, SIMD FPU) exception."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitGuestXcpUnk, "/HM/CPU%d/Exit/Trap/Gst/Other", "Other guest exceptions."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitHlt, "/HM/CPU%d/Exit/Instr/Hlt", "HLT instruction."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitRdmsr, "/HM/CPU%d/Exit/Instr/Rdmsr", "RDMSR instruction."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitWrmsr, "/HM/CPU%d/Exit/Instr/Wrmsr", "WRMSR instruction."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitMwait, "/HM/CPU%d/Exit/Instr/Mwait", "MWAIT instruction."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitMonitor, "/HM/CPU%d/Exit/Instr/Monitor", "MONITOR instruction."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitDRxWrite, "/HM/CPU%d/Exit/Instr/DR-Write", "Debug register write."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitDRxRead, "/HM/CPU%d/Exit/Instr/DR-Read", "Debug register read."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitCR0Read, "/HM/CPU%d/Exit/Instr/CR-Read/CR0", "CR0 read."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitCR2Read, "/HM/CPU%d/Exit/Instr/CR-Read/CR2", "CR2 read."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitCR3Read, "/HM/CPU%d/Exit/Instr/CR-Read/CR3", "CR3 read."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitCR4Read, "/HM/CPU%d/Exit/Instr/CR-Read/CR4", "CR4 read."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitCR8Read, "/HM/CPU%d/Exit/Instr/CR-Read/CR8", "CR8 read."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitCR0Write, "/HM/CPU%d/Exit/Instr/CR-Write/CR0", "CR0 write."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitCR2Write, "/HM/CPU%d/Exit/Instr/CR-Write/CR2", "CR2 write."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitCR3Write, "/HM/CPU%d/Exit/Instr/CR-Write/CR3", "CR3 write."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitCR4Write, "/HM/CPU%d/Exit/Instr/CR-Write/CR4", "CR4 write."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitCR8Write, "/HM/CPU%d/Exit/Instr/CR-Write/CR8", "CR8 write."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitClts, "/HM/CPU%d/Exit/Instr/CLTS", "CLTS instruction."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitLmsw, "/HM/CPU%d/Exit/Instr/LMSW", "LMSW instruction."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitCli, "/HM/CPU%d/Exit/Instr/Cli", "CLI instruction."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitSti, "/HM/CPU%d/Exit/Instr/Sti", "STI instruction."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitPushf, "/HM/CPU%d/Exit/Instr/Pushf", "PUSHF instruction."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitPopf, "/HM/CPU%d/Exit/Instr/Popf", "POPF instruction."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitIret, "/HM/CPU%d/Exit/Instr/Iret", "IRET instruction."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitInt, "/HM/CPU%d/Exit/Instr/Int", "INT instruction."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitXdtrAccess, "/HM/CPU%d/Exit/Instr/XdtrAccess", "GDTR, IDTR, LDTR access."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitIOWrite, "/HM/CPU%d/Exit/IO/Write", "I/O write."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitIORead, "/HM/CPU%d/Exit/IO/Read", "I/O read."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitIOStringWrite, "/HM/CPU%d/Exit/IO/WriteString", "String I/O write."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitIOStringRead, "/HM/CPU%d/Exit/IO/ReadString", "String I/O read."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitIntWindow, "/HM/CPU%d/Exit/IntWindow", "Interrupt-window exit. Guest is ready to receive interrupts again."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitExtInt, "/HM/CPU%d/Exit/ExtInt", "Physical maskable interrupt (host)."); +#endif + HM_REG_COUNTER(&pVCpu->hm.s.StatExitHostNmiInGC, "/HM/CPU%d/Exit/HostNmiInGC", "Host NMI received while in guest context."); +#ifdef VBOX_WITH_STATISTICS + HM_REG_COUNTER(&pVCpu->hm.s.StatExitPreemptTimer, "/HM/CPU%d/Exit/PreemptTimer", "VMX-preemption timer expired."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitTprBelowThreshold, "/HM/CPU%d/Exit/TprBelowThreshold", "TPR lowered below threshold by the guest."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitTaskSwitch, "/HM/CPU%d/Exit/TaskSwitch", "Task switch."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitMtf, "/HM/CPU%d/Exit/MonitorTrapFlag", "Monitor Trap Flag."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExitApicAccess, "/HM/CPU%d/Exit/ApicAccess", "APIC access. Guest attempted to access memory at a physical address on the APIC-access page."); + + HM_REG_COUNTER(&pVCpu->hm.s.StatSwitchTprMaskedIrq, "/HM/CPU%d/Switch/TprMaskedIrq", "PDMGetInterrupt() signals TPR masks pending Irq."); + HM_REG_COUNTER(&pVCpu->hm.s.StatSwitchGuestIrq, "/HM/CPU%d/Switch/IrqPending", "PDMGetInterrupt() cleared behind our back!?!."); + HM_REG_COUNTER(&pVCpu->hm.s.StatSwitchPendingHostIrq, "/HM/CPU%d/Switch/PendingHostIrq", "Exit to ring-3 due to pending host interrupt before executing guest code."); + HM_REG_COUNTER(&pVCpu->hm.s.StatSwitchHmToR3FF, "/HM/CPU%d/Switch/HmToR3FF", "Exit to ring-3 due to pending timers, EMT rendezvous, critical section etc."); + HM_REG_COUNTER(&pVCpu->hm.s.StatSwitchExitToR3, "/HM/CPU%d/Switch/ExitToR3", "Exit to ring-3 (total)."); + HM_REG_COUNTER(&pVCpu->hm.s.StatSwitchLongJmpToR3, "/HM/CPU%d/Switch/LongJmpToR3", "Longjump to ring-3."); + HM_REG_COUNTER(&pVCpu->hm.s.StatSwitchMaxResumeLoops, "/HM/CPU%d/Switch/MaxResumeToR3", "Maximum VMRESUME inner-loop counter reached."); + HM_REG_COUNTER(&pVCpu->hm.s.StatSwitchHltToR3, "/HM/CPU%d/Switch/HltToR3", "HLT causing us to go to ring-3."); + HM_REG_COUNTER(&pVCpu->hm.s.StatSwitchApicAccessToR3, "/HM/CPU%d/Switch/ApicAccessToR3", "APIC access causing us to go to ring-3."); +#endif + HM_REG_COUNTER(&pVCpu->hm.s.StatSwitchPreempt, "/HM/CPU%d/Switch/Preempting", "EMT has been preempted while in HM context."); +#ifdef VBOX_WITH_STATISTICS + HM_REG_COUNTER(&pVCpu->hm.s.StatSwitchPreemptExportHostState, "/HM/CPU%d/Switch/ExportHostState", "Preemption caused us to re-export the host state."); + + HM_REG_COUNTER(&pVCpu->hm.s.StatInjectInterrupt, "/HM/CPU%d/EventInject/Interrupt", "Injected an external interrupt into the guest."); + HM_REG_COUNTER(&pVCpu->hm.s.StatInjectXcpt, "/HM/CPU%d/EventInject/Trap", "Injected an exception into the guest."); + HM_REG_COUNTER(&pVCpu->hm.s.StatInjectPendingReflect, "/HM/CPU%d/EventInject/PendingReflect", "Reflecting an exception (or #DF) caused due to event injection."); + HM_REG_COUNTER(&pVCpu->hm.s.StatInjectPendingInterpret, "/HM/CPU%d/EventInject/PendingInterpret", "Falling to interpreter for handling exception caused due to event injection."); + + HM_REG_COUNTER(&pVCpu->hm.s.StatFlushPage, "/HM/CPU%d/Flush/Page", "Invalidating a guest page on all guest CPUs."); + HM_REG_COUNTER(&pVCpu->hm.s.StatFlushPageManual, "/HM/CPU%d/Flush/Page/Virt", "Invalidating a guest page using guest-virtual address."); + HM_REG_COUNTER(&pVCpu->hm.s.StatFlushPhysPageManual, "/HM/CPU%d/Flush/Page/Phys", "Invalidating a guest page using guest-physical address."); + HM_REG_COUNTER(&pVCpu->hm.s.StatFlushTlb, "/HM/CPU%d/Flush/TLB", "Forcing a full guest-TLB flush (ring-0)."); + HM_REG_COUNTER(&pVCpu->hm.s.StatFlushTlbManual, "/HM/CPU%d/Flush/TLB/Manual", "Request a full guest-TLB flush."); + HM_REG_COUNTER(&pVCpu->hm.s.StatFlushTlbWorldSwitch, "/HM/CPU%d/Flush/TLB/CpuSwitch", "Forcing a full guest-TLB flush due to host-CPU reschedule or ASID-limit hit by another guest-VCPU."); + HM_REG_COUNTER(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch, "/HM/CPU%d/Flush/TLB/Skipped", "No TLB flushing required."); + HM_REG_COUNTER(&pVCpu->hm.s.StatFlushEntire, "/HM/CPU%d/Flush/TLB/Entire", "Flush the entire TLB (host + guest)."); + HM_REG_COUNTER(&pVCpu->hm.s.StatFlushAsid, "/HM/CPU%d/Flush/TLB/ASID", "Flushed guest-TLB entries for the current VPID."); + HM_REG_COUNTER(&pVCpu->hm.s.StatFlushNestedPaging, "/HM/CPU%d/Flush/TLB/NestedPaging", "Flushed guest-TLB entries for the current EPT."); + HM_REG_COUNTER(&pVCpu->hm.s.StatFlushTlbInvlpgVirt, "/HM/CPU%d/Flush/TLB/InvlpgVirt", "Invalidated a guest-TLB entry for a guest-virtual address."); + HM_REG_COUNTER(&pVCpu->hm.s.StatFlushTlbInvlpgPhys, "/HM/CPU%d/Flush/TLB/InvlpgPhys", "Currently not possible, flushes entire guest-TLB."); + HM_REG_COUNTER(&pVCpu->hm.s.StatTlbShootdown, "/HM/CPU%d/Flush/Shootdown/Page", "Inter-VCPU request to flush queued guest page."); + HM_REG_COUNTER(&pVCpu->hm.s.StatTlbShootdownFlush, "/HM/CPU%d/Flush/Shootdown/TLB", "Inter-VCPU request to flush entire guest-TLB."); + + HM_REG_COUNTER(&pVCpu->hm.s.StatTscParavirt, "/HM/CPU%d/TSC/Paravirt", "Paravirtualized TSC in effect."); + HM_REG_COUNTER(&pVCpu->hm.s.StatTscOffset, "/HM/CPU%d/TSC/Offset", "TSC offsetting is in effect."); + HM_REG_COUNTER(&pVCpu->hm.s.StatTscIntercept, "/HM/CPU%d/TSC/Intercept", "Intercept TSC accesses."); + + HM_REG_COUNTER(&pVCpu->hm.s.StatDRxArmed, "/HM/CPU%d/Debug/Armed", "Loaded guest-debug state while loading guest-state."); + HM_REG_COUNTER(&pVCpu->hm.s.StatDRxContextSwitch, "/HM/CPU%d/Debug/ContextSwitch", "Loaded guest-debug state on MOV DRx."); + HM_REG_COUNTER(&pVCpu->hm.s.StatDRxIoCheck, "/HM/CPU%d/Debug/IOCheck", "Checking for I/O breakpoint."); + + HM_REG_COUNTER(&pVCpu->hm.s.StatExportMinimal, "/HM/CPU%d/Export/Minimal", "VM-entry exporting minimal guest-state."); + HM_REG_COUNTER(&pVCpu->hm.s.StatExportFull, "/HM/CPU%d/Export/Full", "VM-entry exporting the full guest-state."); + HM_REG_COUNTER(&pVCpu->hm.s.StatLoadGuestFpu, "/HM/CPU%d/Export/GuestFpu", "VM-entry loading the guest-FPU state."); + + HM_REG_COUNTER(&pVCpu->hm.s.StatVmxCheckBadRmSelBase, "/HM/CPU%d/VMXCheck/RMSelBase", "Could not use VMX due to unsuitable real-mode selector base."); + HM_REG_COUNTER(&pVCpu->hm.s.StatVmxCheckBadRmSelLimit, "/HM/CPU%d/VMXCheck/RMSelLimit", "Could not use VMX due to unsuitable real-mode selector limit."); + HM_REG_COUNTER(&pVCpu->hm.s.StatVmxCheckBadRmSelAttr, "/HM/CPU%d/VMXCheck/RMSelAttrs", "Could not use VMX due to unsuitable real-mode selector limit."); + HM_REG_COUNTER(&pVCpu->hm.s.StatVmxCheckRmOk, "/HM/CPU%d/VMXCheck/VMX_RM", "VMX execution in real (V86) mode OK."); + HM_REG_COUNTER(&pVCpu->hm.s.StatVmxCheckBadSel, "/HM/CPU%d/VMXCheck/Selector", "Could not use VMX due to unsuitable selector."); + HM_REG_COUNTER(&pVCpu->hm.s.StatVmxCheckBadRpl, "/HM/CPU%d/VMXCheck/RPL", "Could not use VMX due to unsuitable RPL."); + HM_REG_COUNTER(&pVCpu->hm.s.StatVmxCheckPmOk, "/HM/CPU%d/VMXCheck/VMX_PM", "VMX execution in protected mode OK."); + +#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) + HM_REG_COUNTER(&pVCpu->hm.s.StatFpu64SwitchBack, "/HM/CPU%d/Switch64/Fpu", "Saving guest FPU/XMM state."); + HM_REG_COUNTER(&pVCpu->hm.s.StatDebug64SwitchBack, "/HM/CPU%d/Switch64/Debug", "Saving guest debug state."); +#endif + +#undef HM_REG_COUNTER + + bool const fCpuSupportsVmx = ASMIsIntelCpu() || ASMIsViaCentaurCpu() || ASMIsShanghaiCpu(); + + /* + * Guest Exit reason stats. + */ + pVCpu->hm.s.paStatExitReason = NULL; + rc = MMHyperAlloc(pVM, MAX_EXITREASON_STAT * sizeof(*pVCpu->hm.s.paStatExitReason), 0 /* uAlignment */, MM_TAG_HM, + (void **)&pVCpu->hm.s.paStatExitReason); + AssertRCReturn(rc, rc); + + if (fCpuSupportsVmx) + { + for (int j = 0; j < MAX_EXITREASON_STAT; j++) + { + const char *pszExitName = HMGetVmxExitName(j); + if (pszExitName) + { + rc = STAMR3RegisterF(pVM, &pVCpu->hm.s.paStatExitReason[j], STAMTYPE_COUNTER, STAMVISIBILITY_USED, + STAMUNIT_OCCURENCES, pszExitName, "/HM/CPU%d/Exit/Reason/%02x", i, j); + AssertRCReturn(rc, rc); + } + } + } + else + { + for (int j = 0; j < MAX_EXITREASON_STAT; j++) + { + const char *pszExitName = HMGetSvmExitName(j); + if (pszExitName) + { + rc = STAMR3RegisterF(pVM, &pVCpu->hm.s.paStatExitReason[j], STAMTYPE_COUNTER, STAMVISIBILITY_USED, + STAMUNIT_OCCURENCES, pszExitName, "/HM/CPU%d/Exit/Reason/%02x", i, j); + AssertRCReturn(rc, rc); + } + } + } + rc = STAMR3RegisterF(pVM, &pVCpu->hm.s.StatExitReasonNpf, STAMTYPE_COUNTER, STAMVISIBILITY_USED, STAMUNIT_OCCURENCES, + "Nested page fault", "/HM/CPU%d/Exit/Reason/#NPF", i); + AssertRCReturn(rc, rc); + pVCpu->hm.s.paStatExitReasonR0 = MMHyperR3ToR0(pVM, pVCpu->hm.s.paStatExitReason); +# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE + Assert(pVCpu->hm.s.paStatExitReasonR0 != NIL_RTR0PTR || !HMIsEnabled(pVM)); +# else + Assert(pVCpu->hm.s.paStatExitReasonR0 != NIL_RTR0PTR); +# endif + +#if defined(VBOX_WITH_NESTED_HWVIRT_SVM) || defined(VBOX_WITH_NESTED_HWVIRT_VMX) + /* + * Nested-guest VM-exit reason stats. + */ + pVCpu->hm.s.paStatNestedExitReason = NULL; + rc = MMHyperAlloc(pVM, MAX_EXITREASON_STAT * sizeof(*pVCpu->hm.s.paStatNestedExitReason), 0 /* uAlignment */, MM_TAG_HM, + (void **)&pVCpu->hm.s.paStatNestedExitReason); + AssertRCReturn(rc, rc); + if (fCpuSupportsVmx) + { + for (int j = 0; j < MAX_EXITREASON_STAT; j++) + { + const char *pszExitName = HMGetVmxExitName(j); + if (pszExitName) + { + rc = STAMR3RegisterF(pVM, &pVCpu->hm.s.paStatNestedExitReason[j], STAMTYPE_COUNTER, STAMVISIBILITY_USED, + STAMUNIT_OCCURENCES, pszExitName, "/HM/CPU%d/NestedExit/Reason/%02x", i, j); + AssertRC(rc); + } + } + } + else + { + for (int j = 0; j < MAX_EXITREASON_STAT; j++) + { + const char *pszExitName = HMGetSvmExitName(j); + if (pszExitName) + { + rc = STAMR3RegisterF(pVM, &pVCpu->hm.s.paStatNestedExitReason[j], STAMTYPE_COUNTER, STAMVISIBILITY_USED, + STAMUNIT_OCCURENCES, pszExitName, "/HM/CPU%d/NestedExit/Reason/%02x", i, j); + AssertRC(rc); + } + } + } + rc = STAMR3RegisterF(pVM, &pVCpu->hm.s.StatNestedExitReasonNpf, STAMTYPE_COUNTER, STAMVISIBILITY_USED, + STAMUNIT_OCCURENCES, "Nested page fault", "/HM/CPU%d/NestedExit/Reason/#NPF", i); + AssertRCReturn(rc, rc); + pVCpu->hm.s.paStatNestedExitReasonR0 = MMHyperR3ToR0(pVM, pVCpu->hm.s.paStatNestedExitReason); +# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE + Assert(pVCpu->hm.s.paStatNestedExitReasonR0 != NIL_RTR0PTR || !HMIsEnabled(pVM)); +# else + Assert(pVCpu->hm.s.paStatNestedExitReasonR0 != NIL_RTR0PTR); +# endif +#endif + + /* + * Injected events stats. + */ + rc = MMHyperAlloc(pVM, sizeof(STAMCOUNTER) * 256, 8, MM_TAG_HM, (void **)&pVCpu->hm.s.paStatInjectedIrqs); + AssertRCReturn(rc, rc); + pVCpu->hm.s.paStatInjectedIrqsR0 = MMHyperR3ToR0(pVM, pVCpu->hm.s.paStatInjectedIrqs); +# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE + Assert(pVCpu->hm.s.paStatInjectedIrqsR0 != NIL_RTR0PTR || !HMIsEnabled(pVM)); +# else + Assert(pVCpu->hm.s.paStatInjectedIrqsR0 != NIL_RTR0PTR); +# endif + for (unsigned j = 0; j < 255; j++) + { + STAMR3RegisterF(pVM, &pVCpu->hm.s.paStatInjectedIrqs[j], STAMTYPE_COUNTER, STAMVISIBILITY_USED, STAMUNIT_OCCURENCES, + "Injected event.", + (j < 0x20) ? "/HM/CPU%d/EventInject/InjectTrap/%02X" : "/HM/CPU%d/EventInject/InjectIRQ/%02X", i, j); + } + +#endif /* VBOX_WITH_STATISTICS */ + } + +#ifdef VBOX_WITH_CRASHDUMP_MAGIC + /* + * Magic marker for searching in crash dumps. + */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + + PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache; + strcpy((char *)pCache->aMagic, "VMCSCACHE Magic"); + pCache->uMagic = UINT64_C(0xdeadbeefdeadbeef); + } +#endif + + return VINF_SUCCESS; +} + + +/** + * Called when a init phase has completed. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param enmWhat The phase that completed. + */ +VMMR3_INT_DECL(int) HMR3InitCompleted(PVM pVM, VMINITCOMPLETED enmWhat) +{ + switch (enmWhat) + { + case VMINITCOMPLETED_RING3: + return hmR3InitFinalizeR3(pVM); + case VMINITCOMPLETED_RING0: + return hmR3InitFinalizeR0(pVM); + default: + return VINF_SUCCESS; + } +} + + +/** + * Turns off normal raw mode features. + * + * @param pVM The cross context VM structure. + */ +static void hmR3DisableRawMode(PVM pVM) +{ +/** @todo r=bird: HM shouldn't be doing this crap. */ + /* Reinit the paging mode to force the new shadow mode. */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + PGMHCChangeMode(pVM, pVCpu, PGMMODE_REAL); + } +} + + +/** + * Initialize VT-x or AMD-V. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +static int hmR3InitFinalizeR0(PVM pVM) +{ + int rc; + + if (!HMIsEnabled(pVM)) + return VINF_SUCCESS; + + /* + * Hack to allow users to work around broken BIOSes that incorrectly set + * EFER.SVME, which makes us believe somebody else is already using AMD-V. + */ + if ( !pVM->hm.s.vmx.fSupported + && !pVM->hm.s.svm.fSupported + && pVM->hm.s.rcInit == VERR_SVM_IN_USE /* implies functional AMD-V */ + && RTEnvExist("VBOX_HWVIRTEX_IGNORE_SVM_IN_USE")) + { + LogRel(("HM: VBOX_HWVIRTEX_IGNORE_SVM_IN_USE active!\n")); + pVM->hm.s.svm.fSupported = true; + pVM->hm.s.svm.fIgnoreInUseError = true; + pVM->hm.s.rcInit = VINF_SUCCESS; + } + + /* + * Report ring-0 init errors. + */ + if ( !pVM->hm.s.vmx.fSupported + && !pVM->hm.s.svm.fSupported) + { + LogRel(("HM: Failed to initialize VT-x / AMD-V: %Rrc\n", pVM->hm.s.rcInit)); + LogRel(("HM: VMX MSR_IA32_FEATURE_CONTROL=%RX64\n", pVM->hm.s.vmx.Msrs.u64FeatCtrl)); + switch (pVM->hm.s.rcInit) + { + case VERR_VMX_IN_VMX_ROOT_MODE: + return VM_SET_ERROR(pVM, VERR_VMX_IN_VMX_ROOT_MODE, "VT-x is being used by another hypervisor"); + case VERR_VMX_NO_VMX: + return VM_SET_ERROR(pVM, VERR_VMX_NO_VMX, "VT-x is not available"); + case VERR_VMX_MSR_VMX_DISABLED: + return VM_SET_ERROR(pVM, VERR_VMX_MSR_VMX_DISABLED, "VT-x is disabled in the BIOS"); + case VERR_VMX_MSR_ALL_VMX_DISABLED: + return VM_SET_ERROR(pVM, VERR_VMX_MSR_ALL_VMX_DISABLED, "VT-x is disabled in the BIOS for all CPU modes"); + case VERR_VMX_MSR_LOCKING_FAILED: + return VM_SET_ERROR(pVM, VERR_VMX_MSR_LOCKING_FAILED, "Failed to lock VT-x features while trying to enable VT-x"); + case VERR_VMX_MSR_VMX_ENABLE_FAILED: + return VM_SET_ERROR(pVM, VERR_VMX_MSR_VMX_ENABLE_FAILED, "Failed to enable VT-x features"); + case VERR_VMX_MSR_SMX_VMX_ENABLE_FAILED: + return VM_SET_ERROR(pVM, VERR_VMX_MSR_SMX_VMX_ENABLE_FAILED, "Failed to enable VT-x features in SMX mode"); + + case VERR_SVM_IN_USE: + return VM_SET_ERROR(pVM, VERR_SVM_IN_USE, "AMD-V is being used by another hypervisor"); + case VERR_SVM_NO_SVM: + return VM_SET_ERROR(pVM, VERR_SVM_NO_SVM, "AMD-V is not available"); + case VERR_SVM_DISABLED: + return VM_SET_ERROR(pVM, VERR_SVM_DISABLED, "AMD-V is disabled in the BIOS"); + } + return VMSetError(pVM, pVM->hm.s.rcInit, RT_SRC_POS, "HM ring-0 init failed: %Rrc", pVM->hm.s.rcInit); + } + + /* + * Enable VT-x or AMD-V on all host CPUs. + */ + rc = SUPR3CallVMMR0Ex(pVM->pVMR0, 0 /*idCpu*/, VMMR0_DO_HM_ENABLE, 0, NULL); + if (RT_FAILURE(rc)) + { + LogRel(("HM: Failed to enable, error %Rrc\n", rc)); + HMR3CheckError(pVM, rc); + return rc; + } + + /* + * No TPR patching is required when the IO-APIC is not enabled for this VM. + * (Main should have taken care of this already) + */ + if (!PDMHasIoApic(pVM)) + { + Assert(!pVM->hm.s.fTprPatchingAllowed); /* paranoia */ + pVM->hm.s.fTprPatchingAllowed = false; + } + + /* + * Check if L1D flush is needed/possible. + */ + if ( !pVM->cpum.ro.HostFeatures.fFlushCmd + || pVM->cpum.ro.HostFeatures.enmMicroarch < kCpumMicroarch_Intel_Core7_Nehalem + || pVM->cpum.ro.HostFeatures.enmMicroarch >= kCpumMicroarch_Intel_Core7_End + || pVM->cpum.ro.HostFeatures.fArchVmmNeedNotFlushL1d + || pVM->cpum.ro.HostFeatures.fArchRdclNo) + pVM->hm.s.fL1dFlushOnSched = pVM->hm.s.fL1dFlushOnVmEntry = false; + + /* + * Sync options. + */ + /** @todo Move this out of of CPUMCTX and into some ring-0 only HM structure. + * That will require a little bit of work, of course. */ + for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[iCpu]; + PCPUMCTX pCpuCtx = &pVCpu->cpum.GstCtx; + pCpuCtx->fWorldSwitcher &= ~(CPUMCTX_WSF_IBPB_EXIT | CPUMCTX_WSF_IBPB_ENTRY); + if (pVM->cpum.ro.HostFeatures.fIbpb) + { + if (pVM->hm.s.fIbpbOnVmExit) + pCpuCtx->fWorldSwitcher |= CPUMCTX_WSF_IBPB_EXIT; + if (pVM->hm.s.fIbpbOnVmEntry) + pCpuCtx->fWorldSwitcher |= CPUMCTX_WSF_IBPB_ENTRY; + } + if (pVM->cpum.ro.HostFeatures.fFlushCmd && pVM->hm.s.fL1dFlushOnVmEntry) + pCpuCtx->fWorldSwitcher |= CPUMCTX_WSF_L1D_ENTRY; + if (iCpu == 0) + LogRel(("HM: fWorldSwitcher=%#x (fIbpbOnVmExit=%RTbool fIbpbOnVmEntry=%RTbool fL1dFlushOnVmEntry=%RTbool); fL1dFlushOnSched=%RTbool\n", + pCpuCtx->fWorldSwitcher, pVM->hm.s.fIbpbOnVmExit, pVM->hm.s.fIbpbOnVmEntry, pVM->hm.s.fL1dFlushOnVmEntry, + pVM->hm.s.fL1dFlushOnSched)); + } + + /* + * Do the vendor specific initialization + * + * Note! We disable release log buffering here since we're doing relatively + * lot of logging and doesn't want to hit the disk with each LogRel + * statement. + */ + AssertLogRelReturn(!pVM->hm.s.fInitialized, VERR_HM_IPE_5); + bool fOldBuffered = RTLogRelSetBuffering(true /*fBuffered*/); + if (pVM->hm.s.vmx.fSupported) + rc = hmR3InitFinalizeR0Intel(pVM); + else + rc = hmR3InitFinalizeR0Amd(pVM); + LogRel(("HM: VT-x/AMD-V init method: %s\n", (pVM->hm.s.fGlobalInit) ? "GLOBAL" : "LOCAL")); + RTLogRelSetBuffering(fOldBuffered); + pVM->hm.s.fInitialized = true; + + return rc; +} + + +/** + * @callback_method_impl{FNPDMVMMDEVHEAPNOTIFY} + */ +static DECLCALLBACK(void) hmR3VmmDevHeapNotify(PVM pVM, void *pvAllocation, RTGCPHYS GCPhysAllocation) +{ + NOREF(pVM); + NOREF(pvAllocation); + NOREF(GCPhysAllocation); +} + + +/** + * Returns a description of the VMCS (and associated regions') memory type given the + * IA32_VMX_BASIC MSR. + * + * @returns The descriptive memory type. + * @param uMsrVmxBasic IA32_VMX_BASIC MSR value. + */ +static const char *hmR3VmxGetMemTypeDesc(uint64_t uMsrVmxBasic) +{ + uint8_t const uMemType = RT_BF_GET(uMsrVmxBasic, VMX_BF_BASIC_VMCS_MEM_TYPE); + switch (uMemType) + { + case VMX_BASIC_MEM_TYPE_WB: return "Write Back (WB)"; + case VMX_BASIC_MEM_TYPE_UC: return "Uncacheable (UC)"; + } + return "Unknown"; +} + + +/** + * Returns a single-line description of all the activity-states supported by the CPU + * given the IA32_VMX_MISC MSR. + * + * @returns All supported activity states. + * @param uMsrMisc IA32_VMX_MISC MSR value. + */ +static const char *hmR3VmxGetActivityStateAllDesc(uint64_t uMsrMisc) +{ + static const char * const s_apszActStates[] = + { + "", + " ( HLT )", + " ( SHUTDOWN )", + " ( HLT SHUTDOWN )", + " ( SIPI_WAIT )", + " ( HLT SIPI_WAIT )", + " ( SHUTDOWN SIPI_WAIT )", + " ( HLT SHUTDOWN SIPI_WAIT )" + }; + uint8_t const idxActStates = RT_BF_GET(uMsrMisc, VMX_BF_MISC_ACTIVITY_STATES); + Assert(idxActStates < RT_ELEMENTS(s_apszActStates)); + return s_apszActStates[idxActStates]; +} + + +/** + * Reports MSR_IA32_FEATURE_CONTROL MSR to the log. + * + * @param fFeatMsr The feature control MSR value. + */ +static void hmR3VmxReportFeatCtlMsr(uint64_t fFeatMsr) +{ + uint64_t const val = fFeatMsr; + LogRel(("HM: MSR_IA32_FEATURE_CONTROL = %#RX64\n", val)); + HMVMX_REPORT_MSR_CAP(val, "LOCK", MSR_IA32_FEATURE_CONTROL_LOCK); + HMVMX_REPORT_MSR_CAP(val, "SMX_VMXON", MSR_IA32_FEATURE_CONTROL_SMX_VMXON); + HMVMX_REPORT_MSR_CAP(val, "VMXON", MSR_IA32_FEATURE_CONTROL_VMXON); + HMVMX_REPORT_MSR_CAP(val, "SENTER_LOCAL_FN0", MSR_IA32_FEATURE_CONTROL_SENTER_LOCAL_FN_0); + HMVMX_REPORT_MSR_CAP(val, "SENTER_LOCAL_FN1", MSR_IA32_FEATURE_CONTROL_SENTER_LOCAL_FN_1); + HMVMX_REPORT_MSR_CAP(val, "SENTER_LOCAL_FN2", MSR_IA32_FEATURE_CONTROL_SENTER_LOCAL_FN_2); + HMVMX_REPORT_MSR_CAP(val, "SENTER_LOCAL_FN3", MSR_IA32_FEATURE_CONTROL_SENTER_LOCAL_FN_3); + HMVMX_REPORT_MSR_CAP(val, "SENTER_LOCAL_FN4", MSR_IA32_FEATURE_CONTROL_SENTER_LOCAL_FN_4); + HMVMX_REPORT_MSR_CAP(val, "SENTER_LOCAL_FN5", MSR_IA32_FEATURE_CONTROL_SENTER_LOCAL_FN_5); + HMVMX_REPORT_MSR_CAP(val, "SENTER_LOCAL_FN6", MSR_IA32_FEATURE_CONTROL_SENTER_LOCAL_FN_6); + HMVMX_REPORT_MSR_CAP(val, "SENTER_GLOBAL_EN", MSR_IA32_FEATURE_CONTROL_SENTER_GLOBAL_EN); + HMVMX_REPORT_MSR_CAP(val, "SGX_LAUNCH_EN", MSR_IA32_FEATURE_CONTROL_SGX_LAUNCH_EN); + HMVMX_REPORT_MSR_CAP(val, "SGX_GLOBAL_EN", MSR_IA32_FEATURE_CONTROL_SGX_GLOBAL_EN); + HMVMX_REPORT_MSR_CAP(val, "LMCE", MSR_IA32_FEATURE_CONTROL_LMCE); + if (!(val & MSR_IA32_FEATURE_CONTROL_LOCK)) + LogRel(("HM: MSR_IA32_FEATURE_CONTROL lock bit not set, possibly bad hardware!\n")); +} + + +/** + * Reports MSR_IA32_VMX_BASIC MSR to the log. + * + * @param uBasicMsr The VMX basic MSR value. + */ +static void hmR3VmxReportBasicMsr(uint64_t uBasicMsr) +{ + LogRel(("HM: MSR_IA32_VMX_BASIC = %#RX64\n", uBasicMsr)); + LogRel(("HM: VMCS id = %#x\n", RT_BF_GET(uBasicMsr, VMX_BF_BASIC_VMCS_ID))); + LogRel(("HM: VMCS size = %u bytes\n", RT_BF_GET(uBasicMsr, VMX_BF_BASIC_VMCS_SIZE))); + LogRel(("HM: VMCS physical address limit = %s\n", RT_BF_GET(uBasicMsr, VMX_BF_BASIC_PHYSADDR_WIDTH) ? + "< 4 GB" : "None")); + LogRel(("HM: VMCS memory type = %s\n", hmR3VmxGetMemTypeDesc(uBasicMsr))); + LogRel(("HM: Dual-monitor treatment support = %RTbool\n", RT_BF_GET(uBasicMsr, VMX_BF_BASIC_DUAL_MON))); + LogRel(("HM: OUTS & INS instruction-info = %RTbool\n", RT_BF_GET(uBasicMsr, VMX_BF_BASIC_VMCS_INS_OUTS))); + LogRel(("HM: Supports true capability MSRs = %RTbool\n", RT_BF_GET(uBasicMsr, VMX_BF_BASIC_TRUE_CTLS))); +} + + +/** + * Reports MSR_IA32_PINBASED_CTLS to the log. + * + * @param pVmxMsr Pointer to the VMX MSR. + */ +static void hmR3VmxReportPinBasedCtlsMsr(PCVMXCTLSMSR pVmxMsr) +{ + uint64_t const fAllowed1 = pVmxMsr->n.allowed1; + uint64_t const fAllowed0 = pVmxMsr->n.allowed0; + LogRel(("HM: MSR_IA32_VMX_PINBASED_CTLS = %#RX64\n", pVmxMsr->u)); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "EXT_INT_EXIT", VMX_PIN_CTLS_EXT_INT_EXIT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "NMI_EXIT", VMX_PIN_CTLS_NMI_EXIT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "VIRTUAL_NMI", VMX_PIN_CTLS_VIRT_NMI); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "PREEMPT_TIMER", VMX_PIN_CTLS_PREEMPT_TIMER); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "POSTED_INT", VMX_PIN_CTLS_POSTED_INT); +} + + +/** + * Reports MSR_IA32_VMX_PROCBASED_CTLS MSR to the log. + * + * @param pVmxMsr Pointer to the VMX MSR. + */ +static void hmR3VmxReportProcBasedCtlsMsr(PCVMXCTLSMSR pVmxMsr) +{ + uint64_t const fAllowed1 = pVmxMsr->n.allowed1; + uint64_t const fAllowed0 = pVmxMsr->n.allowed0; + LogRel(("HM: MSR_IA32_VMX_PROCBASED_CTLS = %#RX64\n", pVmxMsr->u)); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "INT_WINDOW_EXIT", VMX_PROC_CTLS_INT_WINDOW_EXIT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "USE_TSC_OFFSETTING", VMX_PROC_CTLS_USE_TSC_OFFSETTING); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "HLT_EXIT", VMX_PROC_CTLS_HLT_EXIT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "INVLPG_EXIT", VMX_PROC_CTLS_INVLPG_EXIT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "MWAIT_EXIT", VMX_PROC_CTLS_MWAIT_EXIT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "RDPMC_EXIT", VMX_PROC_CTLS_RDPMC_EXIT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "RDTSC_EXIT", VMX_PROC_CTLS_RDTSC_EXIT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "CR3_LOAD_EXIT", VMX_PROC_CTLS_CR3_LOAD_EXIT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "CR3_STORE_EXIT", VMX_PROC_CTLS_CR3_STORE_EXIT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "CR8_LOAD_EXIT", VMX_PROC_CTLS_CR8_LOAD_EXIT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "CR8_STORE_EXIT", VMX_PROC_CTLS_CR8_STORE_EXIT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "USE_TPR_SHADOW", VMX_PROC_CTLS_USE_TPR_SHADOW); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "NMI_WINDOW_EXIT", VMX_PROC_CTLS_NMI_WINDOW_EXIT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "MOV_DR_EXIT", VMX_PROC_CTLS_MOV_DR_EXIT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "UNCOND_IO_EXIT", VMX_PROC_CTLS_UNCOND_IO_EXIT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "USE_IO_BITMAPS", VMX_PROC_CTLS_USE_IO_BITMAPS); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "MONITOR_TRAP_FLAG", VMX_PROC_CTLS_MONITOR_TRAP_FLAG); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "USE_MSR_BITMAPS", VMX_PROC_CTLS_USE_MSR_BITMAPS); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "MONITOR_EXIT", VMX_PROC_CTLS_MONITOR_EXIT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "PAUSE_EXIT", VMX_PROC_CTLS_PAUSE_EXIT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "USE_SECONDARY_CTLS", VMX_PROC_CTLS_USE_SECONDARY_CTLS); +} + + +/** + * Reports MSR_IA32_VMX_PROCBASED_CTLS2 MSR to the log. + * + * @param pVmxMsr Pointer to the VMX MSR. + */ +static void hmR3VmxReportProcBasedCtls2Msr(PCVMXCTLSMSR pVmxMsr) +{ + uint64_t const fAllowed1 = pVmxMsr->n.allowed1; + uint64_t const fAllowed0 = pVmxMsr->n.allowed0; + LogRel(("HM: MSR_IA32_VMX_PROCBASED_CTLS2 = %#RX64\n", pVmxMsr->u)); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "VIRT_APIC_ACCESS", VMX_PROC_CTLS2_VIRT_APIC_ACCESS); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "EPT", VMX_PROC_CTLS2_EPT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "DESC_TABLE_EXIT", VMX_PROC_CTLS2_DESC_TABLE_EXIT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "RDTSCP", VMX_PROC_CTLS2_RDTSCP); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "VIRT_X2APIC_MODE", VMX_PROC_CTLS2_VIRT_X2APIC_MODE); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "VPID", VMX_PROC_CTLS2_VPID); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "WBINVD_EXIT", VMX_PROC_CTLS2_WBINVD_EXIT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "UNRESTRICTED_GUEST", VMX_PROC_CTLS2_UNRESTRICTED_GUEST); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "APIC_REG_VIRT", VMX_PROC_CTLS2_APIC_REG_VIRT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "VIRT_INT_DELIVERY", VMX_PROC_CTLS2_VIRT_INT_DELIVERY); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "PAUSE_LOOP_EXIT", VMX_PROC_CTLS2_PAUSE_LOOP_EXIT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "RDRAND_EXIT", VMX_PROC_CTLS2_RDRAND_EXIT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "INVPCID", VMX_PROC_CTLS2_INVPCID); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "VMFUNC", VMX_PROC_CTLS2_VMFUNC); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "VMCS_SHADOWING", VMX_PROC_CTLS2_VMCS_SHADOWING); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "ENCLS_EXIT", VMX_PROC_CTLS2_ENCLS_EXIT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "RDSEED_EXIT", VMX_PROC_CTLS2_RDSEED_EXIT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "PML", VMX_PROC_CTLS2_PML); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "EPT_VE", VMX_PROC_CTLS2_EPT_VE); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "CONCEAL_FROM_PT", VMX_PROC_CTLS2_CONCEAL_FROM_PT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "XSAVES_XRSTORS", VMX_PROC_CTLS2_XSAVES_XRSTORS); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "TSC_SCALING", VMX_PROC_CTLS2_TSC_SCALING); +} + + +/** + * Reports MSR_IA32_VMX_ENTRY_CTLS to the log. + * + * @param pVmxMsr Pointer to the VMX MSR. + */ +static void hmR3VmxReportEntryCtlsMsr(PCVMXCTLSMSR pVmxMsr) +{ + uint64_t const fAllowed1 = pVmxMsr->n.allowed1; + uint64_t const fAllowed0 = pVmxMsr->n.allowed0; + LogRel(("HM: MSR_IA32_VMX_ENTRY_CTLS = %#RX64\n", pVmxMsr->u)); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "LOAD_DEBUG", VMX_ENTRY_CTLS_LOAD_DEBUG); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "IA32E_MODE_GUEST", VMX_ENTRY_CTLS_IA32E_MODE_GUEST); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "ENTRY_TO_SMM", VMX_ENTRY_CTLS_ENTRY_TO_SMM); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "DEACTIVATE_DUAL_MON", VMX_ENTRY_CTLS_DEACTIVATE_DUAL_MON); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "LOAD_PERF_MSR", VMX_ENTRY_CTLS_LOAD_PERF_MSR); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "LOAD_PAT_MSR", VMX_ENTRY_CTLS_LOAD_PAT_MSR); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "LOAD_EFER_MSR", VMX_ENTRY_CTLS_LOAD_EFER_MSR); +} + + +/** + * Reports MSR_IA32_VMX_EXIT_CTLS to the log. + * + * @param pVmxMsr Pointer to the VMX MSR. + */ +static void hmR3VmxReportExitCtlsMsr(PCVMXCTLSMSR pVmxMsr) +{ + uint64_t const fAllowed1 = pVmxMsr->n.allowed1; + uint64_t const fAllowed0 = pVmxMsr->n.allowed0; + LogRel(("HM: MSR_IA32_VMX_EXIT_CTLS = %#RX64\n", pVmxMsr->u)); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "SAVE_DEBUG", VMX_EXIT_CTLS_SAVE_DEBUG); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "HOST_ADDR_SPACE_SIZE", VMX_EXIT_CTLS_HOST_ADDR_SPACE_SIZE); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "LOAD_PERF_MSR", VMX_EXIT_CTLS_LOAD_PERF_MSR); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "ACK_EXT_INT", VMX_EXIT_CTLS_ACK_EXT_INT); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "SAVE_PAT_MSR", VMX_EXIT_CTLS_SAVE_PAT_MSR); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "LOAD_PAT_MSR", VMX_EXIT_CTLS_LOAD_PAT_MSR); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "SAVE_EFER_MSR", VMX_EXIT_CTLS_SAVE_EFER_MSR); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "LOAD_EFER_MSR", VMX_EXIT_CTLS_LOAD_EFER_MSR); + HMVMX_REPORT_FEAT(fAllowed1, fAllowed0, "SAVE_PREEMPT_TIMER", VMX_EXIT_CTLS_SAVE_PREEMPT_TIMER); +} + + +/** + * Reports MSR_IA32_VMX_EPT_VPID_CAP MSR to the log. + * + * @param fCaps The VMX EPT/VPID capability MSR value. + */ +static void hmR3VmxReportEptVpidCapsMsr(uint64_t fCaps) +{ + LogRel(("HM: MSR_IA32_VMX_EPT_VPID_CAP = %#RX64\n", fCaps)); + HMVMX_REPORT_MSR_CAP(fCaps, "RWX_X_ONLY", MSR_IA32_VMX_EPT_VPID_CAP_RWX_X_ONLY); + HMVMX_REPORT_MSR_CAP(fCaps, "PAGE_WALK_LENGTH_4", MSR_IA32_VMX_EPT_VPID_CAP_PAGE_WALK_LENGTH_4); + HMVMX_REPORT_MSR_CAP(fCaps, "EMT_UC", MSR_IA32_VMX_EPT_VPID_CAP_EMT_UC); + HMVMX_REPORT_MSR_CAP(fCaps, "EMT_WB", MSR_IA32_VMX_EPT_VPID_CAP_EMT_WB); + HMVMX_REPORT_MSR_CAP(fCaps, "PDE_2M", MSR_IA32_VMX_EPT_VPID_CAP_PDE_2M); + HMVMX_REPORT_MSR_CAP(fCaps, "PDPTE_1G", MSR_IA32_VMX_EPT_VPID_CAP_PDPTE_1G); + HMVMX_REPORT_MSR_CAP(fCaps, "INVEPT", MSR_IA32_VMX_EPT_VPID_CAP_INVEPT); + HMVMX_REPORT_MSR_CAP(fCaps, "EPT_ACCESS_DIRTY", MSR_IA32_VMX_EPT_VPID_CAP_EPT_ACCESS_DIRTY); + HMVMX_REPORT_MSR_CAP(fCaps, "INVEPT_SINGLE_CONTEXT", MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_SINGLE_CONTEXT); + HMVMX_REPORT_MSR_CAP(fCaps, "INVEPT_ALL_CONTEXTS", MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS); + HMVMX_REPORT_MSR_CAP(fCaps, "INVVPID", MSR_IA32_VMX_EPT_VPID_CAP_INVVPID); + HMVMX_REPORT_MSR_CAP(fCaps, "INVVPID_INDIV_ADDR", MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR); + HMVMX_REPORT_MSR_CAP(fCaps, "INVVPID_SINGLE_CONTEXT", MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT); + HMVMX_REPORT_MSR_CAP(fCaps, "INVVPID_ALL_CONTEXTS", MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS); + HMVMX_REPORT_MSR_CAP(fCaps, "INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS", MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS); +} + + +/** + * Reports MSR_IA32_VMX_MISC MSR to the log. + * + * @param pVM Pointer to the VM. + * @param fMisc The VMX misc. MSR value. + */ +static void hmR3VmxReportMiscMsr(PVM pVM, uint64_t fMisc) +{ + LogRel(("HM: MSR_IA32_VMX_MISC = %#RX64\n", fMisc)); + uint8_t const cPreemptTimerShift = RT_BF_GET(fMisc, VMX_BF_MISC_PREEMPT_TIMER_TSC); + if (cPreemptTimerShift == pVM->hm.s.vmx.cPreemptTimerShift) + LogRel(("HM: PREEMPT_TIMER_TSC = %#x\n", cPreemptTimerShift)); + else + { + LogRel(("HM: PREEMPT_TIMER_TSC = %#x - erratum detected, using %#x instead\n", cPreemptTimerShift, + pVM->hm.s.vmx.cPreemptTimerShift)); + } + LogRel(("HM: EXIT_SAVE_EFER_LMA = %RTbool\n", RT_BF_GET(fMisc, VMX_BF_MISC_EXIT_SAVE_EFER_LMA))); + LogRel(("HM: ACTIVITY_STATES = %#x%s\n", RT_BF_GET(fMisc, VMX_BF_MISC_ACTIVITY_STATES), + hmR3VmxGetActivityStateAllDesc(fMisc))); + LogRel(("HM: INTEL_PT = %RTbool\n", RT_BF_GET(fMisc, VMX_BF_MISC_INTEL_PT))); + LogRel(("HM: SMM_READ_SMBASE_MSR = %RTbool\n", RT_BF_GET(fMisc, VMX_BF_MISC_SMM_READ_SMBASE_MSR))); + LogRel(("HM: CR3_TARGET = %#x\n", RT_BF_GET(fMisc, VMX_BF_MISC_CR3_TARGET))); + LogRel(("HM: MAX_MSR = %#x ( %u )\n", RT_BF_GET(fMisc, VMX_BF_MISC_MAX_MSRS), + VMX_MISC_MAX_MSRS(fMisc))); + LogRel(("HM: VMXOFF_BLOCK_SMI = %RTbool\n", RT_BF_GET(fMisc, VMX_BF_MISC_VMXOFF_BLOCK_SMI))); + LogRel(("HM: VMWRITE_ALL = %RTbool\n", RT_BF_GET(fMisc, VMX_BF_MISC_VMWRITE_ALL))); + LogRel(("HM: ENTRY_INJECT_SOFT_INT = %#x\n", RT_BF_GET(fMisc, VMX_BF_MISC_ENTRY_INJECT_SOFT_INT))); + LogRel(("HM: MSEG_ID = %#x\n", RT_BF_GET(fMisc, VMX_BF_MISC_MSEG_ID))); +} + + +/** + * Reports MSR_IA32_VMX_VMCS_ENUM MSR to the log. + * + * @param uVmcsEnum The VMX VMCS enum MSR value. + */ +static void hmR3VmxReportVmcsEnumMsr(uint64_t uVmcsEnum) +{ + LogRel(("HM: MSR_IA32_VMX_VMCS_ENUM = %#RX64\n", uVmcsEnum)); + LogRel(("HM: HIGHEST_IDX = %#x\n", RT_BF_GET(uVmcsEnum, VMX_BF_VMCS_ENUM_HIGHEST_IDX))); +} + + +/** + * Reports MSR_IA32_VMX_VMFUNC MSR to the log. + * + * @param uVmFunc The VMX VMFUNC MSR value. + */ +static void hmR3VmxReportVmFuncMsr(uint64_t uVmFunc) +{ + LogRel(("HM: MSR_IA32_VMX_VMFUNC = %#RX64\n", uVmFunc)); + HMVMX_REPORT_ALLOWED_FEAT(uVmFunc, "EPTP_SWITCHING", RT_BF_GET(uVmFunc, VMX_BF_VMFUNC_EPTP_SWITCHING)); +} + + +/** + * Reports VMX CR0, CR4 fixed MSRs. + * + * @param pMsrs Pointer to the VMX MSRs. + */ +static void hmR3VmxReportCrFixedMsrs(PVMXMSRS pMsrs) +{ + LogRel(("HM: MSR_IA32_VMX_CR0_FIXED0 = %#RX64\n", pMsrs->u64Cr0Fixed0)); + LogRel(("HM: MSR_IA32_VMX_CR0_FIXED1 = %#RX64\n", pMsrs->u64Cr0Fixed1)); + LogRel(("HM: MSR_IA32_VMX_CR4_FIXED0 = %#RX64\n", pMsrs->u64Cr4Fixed0)); + LogRel(("HM: MSR_IA32_VMX_CR4_FIXED1 = %#RX64\n", pMsrs->u64Cr4Fixed1)); +} + + +/** + * Finish VT-x initialization (after ring-0 init). + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +static int hmR3InitFinalizeR0Intel(PVM pVM) +{ + int rc; + + Log(("pVM->hm.s.vmx.fSupported = %d\n", pVM->hm.s.vmx.fSupported)); + AssertLogRelReturn(pVM->hm.s.vmx.Msrs.u64FeatCtrl != 0, VERR_HM_IPE_4); + + LogRel(("HM: Using VT-x implementation 2.0\n")); + LogRel(("HM: Max resume loops = %u\n", pVM->hm.s.cMaxResumeLoops)); + LogRel(("HM: Host CR4 = %#RX64\n", pVM->hm.s.vmx.u64HostCr4)); + LogRel(("HM: Host EFER = %#RX64\n", pVM->hm.s.vmx.u64HostEfer)); + LogRel(("HM: MSR_IA32_SMM_MONITOR_CTL = %#RX64\n", pVM->hm.s.vmx.u64HostSmmMonitorCtl)); + + hmR3VmxReportFeatCtlMsr(pVM->hm.s.vmx.Msrs.u64FeatCtrl); + hmR3VmxReportBasicMsr(pVM->hm.s.vmx.Msrs.u64Basic); + + hmR3VmxReportPinBasedCtlsMsr(&pVM->hm.s.vmx.Msrs.PinCtls); + hmR3VmxReportProcBasedCtlsMsr(&pVM->hm.s.vmx.Msrs.ProcCtls); + if (pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS) + hmR3VmxReportProcBasedCtls2Msr(&pVM->hm.s.vmx.Msrs.ProcCtls2); + + hmR3VmxReportEntryCtlsMsr(&pVM->hm.s.vmx.Msrs.EntryCtls); + hmR3VmxReportExitCtlsMsr(&pVM->hm.s.vmx.Msrs.ExitCtls); + + if (RT_BF_GET(pVM->hm.s.vmx.Msrs.u64Basic, VMX_BF_BASIC_TRUE_CTLS)) + { + /* We don't extensively dump the true capability MSRs as we don't use them, see @bugref{9180#c5}. */ + LogRel(("HM: MSR_IA32_VMX_TRUE_PINBASED_CTLS = %#RX64\n", pVM->hm.s.vmx.Msrs.TruePinCtls)); + LogRel(("HM: MSR_IA32_VMX_TRUE_PROCBASED_CTLS = %#RX64\n", pVM->hm.s.vmx.Msrs.TrueProcCtls)); + LogRel(("HM: MSR_IA32_VMX_TRUE_ENTRY_CTLS = %#RX64\n", pVM->hm.s.vmx.Msrs.TrueEntryCtls)); + LogRel(("HM: MSR_IA32_VMX_TRUE_EXIT_CTLS = %#RX64\n", pVM->hm.s.vmx.Msrs.TrueExitCtls)); + } + + hmR3VmxReportMiscMsr(pVM, pVM->hm.s.vmx.Msrs.u64Misc); + hmR3VmxReportVmcsEnumMsr(pVM->hm.s.vmx.Msrs.u64VmcsEnum); + if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps) + hmR3VmxReportEptVpidCapsMsr(pVM->hm.s.vmx.Msrs.u64EptVpidCaps); + if (pVM->hm.s.vmx.Msrs.u64VmFunc) + hmR3VmxReportVmFuncMsr(pVM->hm.s.vmx.Msrs.u64VmFunc); + hmR3VmxReportCrFixedMsrs(&pVM->hm.s.vmx.Msrs); + + LogRel(("HM: APIC-access page physaddr = %#RHp\n", pVM->hm.s.vmx.HCPhysApicAccess)); + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + LogRel(("HM: VCPU%3d: MSR bitmap physaddr = %#RHp\n", i, pVM->aCpus[i].hm.s.vmx.HCPhysMsrBitmap)); + LogRel(("HM: VCPU%3d: VMCS physaddr = %#RHp\n", i, pVM->aCpus[i].hm.s.vmx.HCPhysVmcs)); + } + + /* + * EPT and unrestricted guest execution are determined in HMR3Init, verify the sanity of that. + */ + AssertLogRelReturn( !pVM->hm.s.fNestedPaging + || (pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_EPT), + VERR_HM_IPE_1); + AssertLogRelReturn( !pVM->hm.s.vmx.fUnrestrictedGuest + || ( (pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_UNRESTRICTED_GUEST) + && pVM->hm.s.fNestedPaging), + VERR_HM_IPE_1); + + /* + * Enable VPID if configured and supported. + */ + if (pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VPID) + pVM->hm.s.vmx.fVpid = pVM->hm.s.vmx.fAllowVpid; + +#if 0 + /* + * Enable APIC register virtualization and virtual-interrupt delivery if supported. + */ + if ( (pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_APIC_REG_VIRT) + && (pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_INTR_DELIVERY)) + pVM->hm.s.fVirtApicRegs = true; + + /* + * Enable posted-interrupt processing if supported. + */ + /** @todo Add and query IPRT API for host OS support for posted-interrupt IPI + * here. */ + if ( (pVM->hm.s.vmx.Msrs.PinCtls.n.allowed1 & VMX_PIN_CTLS_POSTED_INT) + && (pVM->hm.s.vmx.Msrs.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_ACK_EXT_INT)) + pVM->hm.s.fPostedIntrs = true; +#endif + + /* + * Disallow RDTSCP in the guest if there is no secondary process-based VM execution controls as otherwise + * RDTSCP would cause a #UD. There might be no CPUs out there where this happens, as RDTSCP was introduced + * in Nehalems and secondary VM exec. controls should be supported in all of them, but nonetheless it's Intel... + */ + if ( !(pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS) + && CPUMR3GetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_RDTSCP)) + { + CPUMR3ClearGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_RDTSCP); + LogRel(("HM: Disabled RDTSCP\n")); + } + + if (!pVM->hm.s.vmx.fUnrestrictedGuest) + { + /* Allocate three pages for the TSS we need for real mode emulation. (2 pages for the IO bitmap) */ + rc = PDMR3VmmDevHeapAlloc(pVM, HM_VTX_TOTAL_DEVHEAP_MEM, hmR3VmmDevHeapNotify, (RTR3PTR *)&pVM->hm.s.vmx.pRealModeTSS); + if (RT_SUCCESS(rc)) + { + /* The IO bitmap starts right after the virtual interrupt redirection bitmap. + Refer Intel spec. 20.3.3 "Software Interrupt Handling in Virtual-8086 mode" + esp. Figure 20-5.*/ + ASMMemZero32(pVM->hm.s.vmx.pRealModeTSS, sizeof(*pVM->hm.s.vmx.pRealModeTSS)); + pVM->hm.s.vmx.pRealModeTSS->offIoBitmap = sizeof(*pVM->hm.s.vmx.pRealModeTSS); + + /* Bit set to 0 means software interrupts are redirected to the + 8086 program interrupt handler rather than switching to + protected-mode handler. */ + memset(pVM->hm.s.vmx.pRealModeTSS->IntRedirBitmap, 0, sizeof(pVM->hm.s.vmx.pRealModeTSS->IntRedirBitmap)); + + /* Allow all port IO, so that port IO instructions do not cause + exceptions and would instead cause a VM-exit (based on VT-x's + IO bitmap which we currently configure to always cause an exit). */ + memset(pVM->hm.s.vmx.pRealModeTSS + 1, 0, PAGE_SIZE * 2); + *((unsigned char *)pVM->hm.s.vmx.pRealModeTSS + HM_VTX_TSS_SIZE - 2) = 0xff; + + /* + * Construct a 1024 element page directory with 4 MB pages for the identity mapped + * page table used in real and protected mode without paging with EPT. + */ + pVM->hm.s.vmx.pNonPagingModeEPTPageTable = (PX86PD)((char *)pVM->hm.s.vmx.pRealModeTSS + PAGE_SIZE * 3); + for (uint32_t i = 0; i < X86_PG_ENTRIES; i++) + { + pVM->hm.s.vmx.pNonPagingModeEPTPageTable->a[i].u = _4M * i; + pVM->hm.s.vmx.pNonPagingModeEPTPageTable->a[i].u |= X86_PDE4M_P | X86_PDE4M_RW | X86_PDE4M_US + | X86_PDE4M_A | X86_PDE4M_D | X86_PDE4M_PS + | X86_PDE4M_G; + } + + /* We convert it here every time as PCI regions could be reconfigured. */ + if (PDMVmmDevHeapIsEnabled(pVM)) + { + RTGCPHYS GCPhys; + rc = PDMVmmDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pRealModeTSS, &GCPhys); + AssertRCReturn(rc, rc); + LogRel(("HM: Real Mode TSS guest physaddr = %#RGp\n", GCPhys)); + + rc = PDMVmmDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys); + AssertRCReturn(rc, rc); + LogRel(("HM: Non-Paging Mode EPT CR3 = %#RGp\n", GCPhys)); + } + } + else + { + LogRel(("HM: No real mode VT-x support (PDMR3VMMDevHeapAlloc returned %Rrc)\n", rc)); + pVM->hm.s.vmx.pRealModeTSS = NULL; + pVM->hm.s.vmx.pNonPagingModeEPTPageTable = NULL; + return VMSetError(pVM, rc, RT_SRC_POS, + "HM failure: No real mode VT-x support (PDMR3VMMDevHeapAlloc returned %Rrc)", rc); + } + } + + LogRel((pVM->hm.s.fAllow64BitGuests + ? "HM: Guest support: 32-bit and 64-bit\n" + : "HM: Guest support: 32-bit only\n")); + + /* + * Call ring-0 to set up the VM. + */ + rc = SUPR3CallVMMR0Ex(pVM->pVMR0, 0 /* idCpu */, VMMR0_DO_HM_SETUP_VM, 0 /* u64Arg */, NULL /* pReqHdr */); + if (rc != VINF_SUCCESS) + { + LogRel(("HM: VMX setup failed with rc=%Rrc!\n", rc)); + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + LogRel(("HM: CPU[%u] Last instruction error %#x\n", i, pVCpu->hm.s.vmx.LastError.u32InstrError)); + LogRel(("HM: CPU[%u] HM error %#x (%u)\n", i, pVCpu->hm.s.u32HMError, pVCpu->hm.s.u32HMError)); + } + HMR3CheckError(pVM, rc); + return VMSetError(pVM, rc, RT_SRC_POS, "VT-x setup failed: %Rrc", rc); + } + + LogRel(("HM: Supports VMCS EFER fields = %RTbool\n", pVM->hm.s.vmx.fSupportsVmcsEfer)); + LogRel(("HM: Enabled VMX\n")); + pVM->hm.s.vmx.fEnabled = true; + + hmR3DisableRawMode(pVM); /** @todo make this go away! */ + + /* + * Change the CPU features. + */ + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_SEP); + if (pVM->hm.s.fAllow64BitGuests) + { + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_PAE); + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_LONG_MODE); + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_SYSCALL); /* 64 bits only on Intel CPUs */ + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_LAHF); + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_NX); + } + /* Turn on NXE if PAE has been enabled *and* the host has turned on NXE + (we reuse the host EFER in the switcher). */ + /** @todo this needs to be fixed properly!! */ + else if (CPUMR3GetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_PAE)) + { + if (pVM->hm.s.vmx.u64HostEfer & MSR_K6_EFER_NXE) + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_NX); + else + LogRel(("HM: NX not enabled on the host, unavailable to PAE guest\n")); + } + + /* + * Log configuration details. + */ + if (pVM->hm.s.fNestedPaging) + { + LogRel(("HM: Enabled nested paging\n")); + if (pVM->hm.s.vmx.enmTlbFlushEpt == VMXTLBFLUSHEPT_SINGLE_CONTEXT) + LogRel(("HM: EPT flush type = Single context\n")); + else if (pVM->hm.s.vmx.enmTlbFlushEpt == VMXTLBFLUSHEPT_ALL_CONTEXTS) + LogRel(("HM: EPT flush type = All contexts\n")); + else if (pVM->hm.s.vmx.enmTlbFlushEpt == VMXTLBFLUSHEPT_NOT_SUPPORTED) + LogRel(("HM: EPT flush type = Not supported\n")); + else + LogRel(("HM: EPT flush type = %#x\n", pVM->hm.s.vmx.enmTlbFlushEpt)); + + if (pVM->hm.s.vmx.fUnrestrictedGuest) + LogRel(("HM: Enabled unrestricted guest execution\n")); + +#if HC_ARCH_BITS == 64 + if (pVM->hm.s.fLargePages) + { + /* Use large (2 MB) pages for our EPT PDEs where possible. */ + PGMSetLargePageUsage(pVM, true); + LogRel(("HM: Enabled large page support\n")); + } +#endif + } + else + Assert(!pVM->hm.s.vmx.fUnrestrictedGuest); + + if (pVM->hm.s.fVirtApicRegs) + LogRel(("HM: Enabled APIC-register virtualization support\n")); + + if (pVM->hm.s.fPostedIntrs) + LogRel(("HM: Enabled posted-interrupt processing support\n")); + + if (pVM->hm.s.vmx.fVpid) + { + LogRel(("HM: Enabled VPID\n")); + if (pVM->hm.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_INDIV_ADDR) + LogRel(("HM: VPID flush type = Individual addresses\n")); + else if (pVM->hm.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_SINGLE_CONTEXT) + LogRel(("HM: VPID flush type = Single context\n")); + else if (pVM->hm.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_ALL_CONTEXTS) + LogRel(("HM: VPID flush type = All contexts\n")); + else if (pVM->hm.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_SINGLE_CONTEXT_RETAIN_GLOBALS) + LogRel(("HM: VPID flush type = Single context retain globals\n")); + else + LogRel(("HM: VPID flush type = %#x\n", pVM->hm.s.vmx.enmTlbFlushVpid)); + } + else if (pVM->hm.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_NOT_SUPPORTED) + LogRel(("HM: Ignoring VPID capabilities of CPU\n")); + + if (pVM->hm.s.vmx.fUsePreemptTimer) + LogRel(("HM: Enabled VMX-preemption timer (cPreemptTimerShift=%u)\n", pVM->hm.s.vmx.cPreemptTimerShift)); + else + LogRel(("HM: Disabled VMX-preemption timer\n")); + + return VINF_SUCCESS; +} + + +/** + * Finish AMD-V initialization (after ring-0 init). + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +static int hmR3InitFinalizeR0Amd(PVM pVM) +{ + Log(("pVM->hm.s.svm.fSupported = %d\n", pVM->hm.s.svm.fSupported)); + + LogRel(("HM: Using AMD-V implementation 2.0\n")); + + uint32_t u32Family; + uint32_t u32Model; + uint32_t u32Stepping; + if (HMIsSubjectToSvmErratum170(&u32Family, &u32Model, &u32Stepping)) + LogRel(("HM: AMD Cpu with erratum 170 family %#x model %#x stepping %#x\n", u32Family, u32Model, u32Stepping)); + LogRel(("HM: Max resume loops = %u\n", pVM->hm.s.cMaxResumeLoops)); + LogRel(("HM: AMD HWCR MSR = %#RX64\n", pVM->hm.s.svm.u64MsrHwcr)); + LogRel(("HM: AMD-V revision = %#x\n", pVM->hm.s.svm.u32Rev)); + LogRel(("HM: AMD-V max ASID = %RU32\n", pVM->hm.s.uMaxAsid)); + LogRel(("HM: AMD-V features = %#x\n", pVM->hm.s.svm.u32Features)); + + /* + * Enumerate AMD-V features. + */ + static const struct { uint32_t fFlag; const char *pszName; } s_aSvmFeatures[] = + { +#define HMSVM_REPORT_FEATURE(a_StrDesc, a_Define) { a_Define, a_StrDesc } + HMSVM_REPORT_FEATURE("NESTED_PAGING", X86_CPUID_SVM_FEATURE_EDX_NESTED_PAGING), + HMSVM_REPORT_FEATURE("LBR_VIRT", X86_CPUID_SVM_FEATURE_EDX_LBR_VIRT), + HMSVM_REPORT_FEATURE("SVM_LOCK", X86_CPUID_SVM_FEATURE_EDX_SVM_LOCK), + HMSVM_REPORT_FEATURE("NRIP_SAVE", X86_CPUID_SVM_FEATURE_EDX_NRIP_SAVE), + HMSVM_REPORT_FEATURE("TSC_RATE_MSR", X86_CPUID_SVM_FEATURE_EDX_TSC_RATE_MSR), + HMSVM_REPORT_FEATURE("VMCB_CLEAN", X86_CPUID_SVM_FEATURE_EDX_VMCB_CLEAN), + HMSVM_REPORT_FEATURE("FLUSH_BY_ASID", X86_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID), + HMSVM_REPORT_FEATURE("DECODE_ASSISTS", X86_CPUID_SVM_FEATURE_EDX_DECODE_ASSISTS), + HMSVM_REPORT_FEATURE("PAUSE_FILTER", X86_CPUID_SVM_FEATURE_EDX_PAUSE_FILTER), + HMSVM_REPORT_FEATURE("PAUSE_FILTER_THRESHOLD", X86_CPUID_SVM_FEATURE_EDX_PAUSE_FILTER_THRESHOLD), + HMSVM_REPORT_FEATURE("AVIC", X86_CPUID_SVM_FEATURE_EDX_AVIC), + HMSVM_REPORT_FEATURE("VIRT_VMSAVE_VMLOAD", X86_CPUID_SVM_FEATURE_EDX_VIRT_VMSAVE_VMLOAD), + HMSVM_REPORT_FEATURE("VGIF", X86_CPUID_SVM_FEATURE_EDX_VGIF), +#undef HMSVM_REPORT_FEATURE + }; + + uint32_t fSvmFeatures = pVM->hm.s.svm.u32Features; + for (unsigned i = 0; i < RT_ELEMENTS(s_aSvmFeatures); i++) + if (fSvmFeatures & s_aSvmFeatures[i].fFlag) + { + LogRel(("HM: %s\n", s_aSvmFeatures[i].pszName)); + fSvmFeatures &= ~s_aSvmFeatures[i].fFlag; + } + if (fSvmFeatures) + for (unsigned iBit = 0; iBit < 32; iBit++) + if (RT_BIT_32(iBit) & fSvmFeatures) + LogRel(("HM: Reserved bit %u\n", iBit)); + + /* + * Nested paging is determined in HMR3Init, verify the sanity of that. + */ + AssertLogRelReturn( !pVM->hm.s.fNestedPaging + || (pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_NESTED_PAGING), + VERR_HM_IPE_1); + +#if 0 + /** @todo Add and query IPRT API for host OS support for posted-interrupt IPI + * here. */ + if (RTR0IsPostIpiSupport()) + pVM->hm.s.fPostedIntrs = true; +#endif + + /* + * Call ring-0 to set up the VM. + */ + int rc = SUPR3CallVMMR0Ex(pVM->pVMR0, 0 /*idCpu*/, VMMR0_DO_HM_SETUP_VM, 0, NULL); + if (rc != VINF_SUCCESS) + { + AssertMsgFailed(("%Rrc\n", rc)); + LogRel(("HM: AMD-V setup failed with rc=%Rrc!\n", rc)); + return VMSetError(pVM, rc, RT_SRC_POS, "AMD-V setup failed: %Rrc", rc); + } + + LogRel(("HM: Enabled SVM\n")); + pVM->hm.s.svm.fEnabled = true; + + if (pVM->hm.s.fNestedPaging) + { + LogRel(("HM: Enabled nested paging\n")); + + /* + * Enable large pages (2 MB) if applicable. + */ +#if HC_ARCH_BITS == 64 + if (pVM->hm.s.fLargePages) + { + PGMSetLargePageUsage(pVM, true); + LogRel(("HM: Enabled large page support\n")); + } +#endif + } + + if (pVM->hm.s.fVirtApicRegs) + LogRel(("HM: Enabled APIC-register virtualization support\n")); + + if (pVM->hm.s.fPostedIntrs) + LogRel(("HM: Enabled posted-interrupt processing support\n")); + + hmR3DisableRawMode(pVM); + + /* + * Change the CPU features. + */ + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_SEP); + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_SYSCALL); + if (pVM->hm.s.fAllow64BitGuests) + { + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_PAE); + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_LONG_MODE); + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_NX); + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_LAHF); + } + /* Turn on NXE if PAE has been enabled. */ + else if (CPUMR3GetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_PAE)) + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_NX); + + LogRel(("HM: %s TPR patching\n", (pVM->hm.s.fTprPatchingAllowed) ? "Enabled" : "Disabled")); + + LogRel((pVM->hm.s.fAllow64BitGuests + ? "HM: Guest support: 32-bit and 64-bit\n" + : "HM: Guest support: 32-bit only\n")); + + return VINF_SUCCESS; +} + + +/** + * Applies relocations to data and code managed by this + * component. This function will be called at init and + * whenever the VMM need to relocate it self inside the GC. + * + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(void) HMR3Relocate(PVM pVM) +{ + Log(("HMR3Relocate to %RGv\n", MMHyperGetArea(pVM, 0))); + + /* Fetch the current paging mode during the relocate callback during state loading. */ + if (VMR3GetState(pVM) == VMSTATE_LOADING) + { + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + pVCpu->hm.s.enmShadowMode = PGMGetShadowMode(pVCpu); + } + } +#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) + if (HMIsEnabled(pVM)) + { + switch (PGMGetHostMode(pVM)) + { + case PGMMODE_32_BIT: + pVM->hm.s.pfnHost32ToGuest64R0 = VMMR3GetHostToGuestSwitcher(pVM, VMMSWITCHER_32_TO_AMD64); + break; + + case PGMMODE_PAE: + case PGMMODE_PAE_NX: + pVM->hm.s.pfnHost32ToGuest64R0 = VMMR3GetHostToGuestSwitcher(pVM, VMMSWITCHER_PAE_TO_AMD64); + break; + + default: + AssertFailed(); + break; + } + } +#endif + return; +} + + +/** + * Terminates the HM. + * + * Termination means cleaning up and freeing all resources, + * the VM itself is, at this point, powered off or suspended. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) HMR3Term(PVM pVM) +{ + if (pVM->hm.s.vmx.pRealModeTSS) + { + PDMR3VmmDevHeapFree(pVM, pVM->hm.s.vmx.pRealModeTSS); + pVM->hm.s.vmx.pRealModeTSS = 0; + } + hmR3TermCPU(pVM); + return 0; +} + + +/** + * Terminates the per-VCPU HM. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +static int hmR3TermCPU(PVM pVM) +{ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; NOREF(pVCpu); + +#ifdef VBOX_WITH_STATISTICS + if (pVCpu->hm.s.paStatExitReason) + { + MMHyperFree(pVM, pVCpu->hm.s.paStatExitReason); + pVCpu->hm.s.paStatExitReason = NULL; + pVCpu->hm.s.paStatExitReasonR0 = NIL_RTR0PTR; + } + if (pVCpu->hm.s.paStatInjectedIrqs) + { + MMHyperFree(pVM, pVCpu->hm.s.paStatInjectedIrqs); + pVCpu->hm.s.paStatInjectedIrqs = NULL; + pVCpu->hm.s.paStatInjectedIrqsR0 = NIL_RTR0PTR; + } +#endif + +#ifdef VBOX_WITH_CRASHDUMP_MAGIC + memset(pVCpu->hm.s.vmx.VMCSCache.aMagic, 0, sizeof(pVCpu->hm.s.vmx.VMCSCache.aMagic)); + pVCpu->hm.s.vmx.VMCSCache.uMagic = 0; + pVCpu->hm.s.vmx.VMCSCache.uPos = 0xffffffff; +#endif + } + return 0; +} + + +/** + * Resets a virtual CPU. + * + * Used by HMR3Reset and CPU hot plugging. + * + * @param pVCpu The cross context virtual CPU structure to reset. + */ +VMMR3_INT_DECL(void) HMR3ResetCpu(PVMCPU pVCpu) +{ + /* Sync. entire state on VM reset R0-reentry. It's safe to reset + the HM flags here, all other EMTs are in ring-3. See VMR3Reset(). */ + pVCpu->hm.s.fCtxChanged |= HM_CHANGED_HOST_CONTEXT | HM_CHANGED_ALL_GUEST; + + pVCpu->hm.s.fActive = false; + pVCpu->hm.s.Event.fPending = false; + pVCpu->hm.s.vmx.fWasInRealMode = true; + pVCpu->hm.s.vmx.u64MsrApicBase = 0; + pVCpu->hm.s.vmx.fSwitchedTo64on32 = false; + + /* Reset the contents of the read cache. */ + PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache; + for (unsigned j = 0; j < pCache->Read.cValidEntries; j++) + pCache->Read.aFieldVal[j] = 0; + +#ifdef VBOX_WITH_CRASHDUMP_MAGIC + /* Magic marker for searching in crash dumps. */ + strcpy((char *)pCache->aMagic, "VMCSCACHE Magic"); + pCache->uMagic = UINT64_C(0xdeadbeefdeadbeef); +#endif +} + + +/** + * The VM is being reset. + * + * For the HM component this means that any GDT/LDT/TSS monitors + * needs to be removed. + * + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(void) HMR3Reset(PVM pVM) +{ + LogFlow(("HMR3Reset:\n")); + + if (HMIsEnabled(pVM)) + hmR3DisableRawMode(pVM); + + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + + HMR3ResetCpu(pVCpu); + } + + /* Clear all patch information. */ + pVM->hm.s.pGuestPatchMem = 0; + pVM->hm.s.pFreeGuestPatchMem = 0; + pVM->hm.s.cbGuestPatchMem = 0; + pVM->hm.s.cPatches = 0; + pVM->hm.s.PatchTree = 0; + pVM->hm.s.fTPRPatchingActive = false; + ASMMemZero32(pVM->hm.s.aPatches, sizeof(pVM->hm.s.aPatches)); +} + + +/** + * Callback to patch a TPR instruction (vmmcall or mov cr8). + * + * @returns VBox strict status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * @param pvUser Unused. + */ +static DECLCALLBACK(VBOXSTRICTRC) hmR3RemovePatches(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + VMCPUID idCpu = (VMCPUID)(uintptr_t)pvUser; + + /* Only execute the handler on the VCPU the original patch request was issued. */ + if (pVCpu->idCpu != idCpu) + return VINF_SUCCESS; + + Log(("hmR3RemovePatches\n")); + for (unsigned i = 0; i < pVM->hm.s.cPatches; i++) + { + uint8_t abInstr[15]; + PHMTPRPATCH pPatch = &pVM->hm.s.aPatches[i]; + RTGCPTR pInstrGC = (RTGCPTR)pPatch->Core.Key; + int rc; + +#ifdef LOG_ENABLED + char szOutput[256]; + rc = DBGFR3DisasInstrEx(pVM->pUVM, pVCpu->idCpu, CPUMGetGuestCS(pVCpu), pInstrGC, DBGF_DISAS_FLAGS_DEFAULT_MODE, + szOutput, sizeof(szOutput), NULL); + if (RT_SUCCESS(rc)) + Log(("Patched instr: %s\n", szOutput)); +#endif + + /* Check if the instruction is still the same. */ + rc = PGMPhysSimpleReadGCPtr(pVCpu, abInstr, pInstrGC, pPatch->cbNewOp); + if (rc != VINF_SUCCESS) + { + Log(("Patched code removed? (rc=%Rrc0\n", rc)); + continue; /* swapped out or otherwise removed; skip it. */ + } + + if (memcmp(abInstr, pPatch->aNewOpcode, pPatch->cbNewOp)) + { + Log(("Patched instruction was changed! (rc=%Rrc0\n", rc)); + continue; /* skip it. */ + } + + rc = PGMPhysSimpleWriteGCPtr(pVCpu, pInstrGC, pPatch->aOpcode, pPatch->cbOp); + AssertRC(rc); + +#ifdef LOG_ENABLED + rc = DBGFR3DisasInstrEx(pVM->pUVM, pVCpu->idCpu, CPUMGetGuestCS(pVCpu), pInstrGC, DBGF_DISAS_FLAGS_DEFAULT_MODE, + szOutput, sizeof(szOutput), NULL); + if (RT_SUCCESS(rc)) + Log(("Original instr: %s\n", szOutput)); +#endif + } + pVM->hm.s.cPatches = 0; + pVM->hm.s.PatchTree = 0; + pVM->hm.s.pFreeGuestPatchMem = pVM->hm.s.pGuestPatchMem; + pVM->hm.s.fTPRPatchingActive = false; + return VINF_SUCCESS; +} + + +/** + * Worker for enabling patching in a VT-x/AMD-V guest. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param idCpu VCPU to execute hmR3RemovePatches on. + * @param pPatchMem Patch memory range. + * @param cbPatchMem Size of the memory range. + */ +static int hmR3EnablePatching(PVM pVM, VMCPUID idCpu, RTRCPTR pPatchMem, unsigned cbPatchMem) +{ + int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONE_BY_ONE, hmR3RemovePatches, (void *)(uintptr_t)idCpu); + AssertRC(rc); + + pVM->hm.s.pGuestPatchMem = pPatchMem; + pVM->hm.s.pFreeGuestPatchMem = pPatchMem; + pVM->hm.s.cbGuestPatchMem = cbPatchMem; + return VINF_SUCCESS; +} + + +/** + * Enable patching in a VT-x/AMD-V guest + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatchMem Patch memory range. + * @param cbPatchMem Size of the memory range. + */ +VMMR3_INT_DECL(int) HMR3EnablePatching(PVM pVM, RTGCPTR pPatchMem, unsigned cbPatchMem) +{ + VM_ASSERT_EMT(pVM); + Log(("HMR3EnablePatching %RGv size %x\n", pPatchMem, cbPatchMem)); + if (pVM->cCpus > 1) + { + /* We own the IOM lock here and could cause a deadlock by waiting for a VCPU that is blocking on the IOM lock. */ + int rc = VMR3ReqCallNoWait(pVM, VMCPUID_ANY_QUEUE, + (PFNRT)hmR3EnablePatching, 4, pVM, VMMGetCpuId(pVM), (RTRCPTR)pPatchMem, cbPatchMem); + AssertRC(rc); + return rc; + } + return hmR3EnablePatching(pVM, VMMGetCpuId(pVM), (RTRCPTR)pPatchMem, cbPatchMem); +} + + +/** + * Disable patching in a VT-x/AMD-V guest. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatchMem Patch memory range. + * @param cbPatchMem Size of the memory range. + */ +VMMR3_INT_DECL(int) HMR3DisablePatching(PVM pVM, RTGCPTR pPatchMem, unsigned cbPatchMem) +{ + Log(("HMR3DisablePatching %RGv size %x\n", pPatchMem, cbPatchMem)); + RT_NOREF2(pPatchMem, cbPatchMem); + + Assert(pVM->hm.s.pGuestPatchMem == pPatchMem); + Assert(pVM->hm.s.cbGuestPatchMem == cbPatchMem); + + /** @todo Potential deadlock when other VCPUs are waiting on the IOM lock (we own it)!! */ + int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONE_BY_ONE, hmR3RemovePatches, + (void *)(uintptr_t)VMMGetCpuId(pVM)); + AssertRC(rc); + + pVM->hm.s.pGuestPatchMem = 0; + pVM->hm.s.pFreeGuestPatchMem = 0; + pVM->hm.s.cbGuestPatchMem = 0; + pVM->hm.s.fTPRPatchingActive = false; + return VINF_SUCCESS; +} + + +/** + * Callback to patch a TPR instruction (vmmcall or mov cr8). + * + * @returns VBox strict status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * @param pvUser User specified CPU context. + * + */ +static DECLCALLBACK(VBOXSTRICTRC) hmR3ReplaceTprInstr(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + /* + * Only execute the handler on the VCPU the original patch request was + * issued. (The other CPU(s) might not yet have switched to protected + * mode, nor have the correct memory context.) + */ + VMCPUID idCpu = (VMCPUID)(uintptr_t)pvUser; + if (pVCpu->idCpu != idCpu) + return VINF_SUCCESS; + + /* + * We're racing other VCPUs here, so don't try patch the instruction twice + * and make sure there is still room for our patch record. + */ + PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; + PHMTPRPATCH pPatch = (PHMTPRPATCH)RTAvloU32Get(&pVM->hm.s.PatchTree, (AVLOU32KEY)pCtx->eip); + if (pPatch) + { + Log(("hmR3ReplaceTprInstr: already patched %RGv\n", pCtx->rip)); + return VINF_SUCCESS; + } + uint32_t const idx = pVM->hm.s.cPatches; + if (idx >= RT_ELEMENTS(pVM->hm.s.aPatches)) + { + Log(("hmR3ReplaceTprInstr: no available patch slots (%RGv)\n", pCtx->rip)); + return VINF_SUCCESS; + } + pPatch = &pVM->hm.s.aPatches[idx]; + + Log(("hmR3ReplaceTprInstr: rip=%RGv idxPatch=%u\n", pCtx->rip, idx)); + + /* + * Disassembler the instruction and get cracking. + */ + DBGFR3_DISAS_INSTR_CUR_LOG(pVCpu, "hmR3ReplaceTprInstr"); + PDISCPUSTATE pDis = &pVCpu->hm.s.DisState; + uint32_t cbOp; + int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, &cbOp); + AssertRC(rc); + if ( rc == VINF_SUCCESS + && pDis->pCurInstr->uOpcode == OP_MOV + && cbOp >= 3) + { + static uint8_t const s_abVMMCall[3] = { 0x0f, 0x01, 0xd9 }; + + rc = PGMPhysSimpleReadGCPtr(pVCpu, pPatch->aOpcode, pCtx->rip, cbOp); + AssertRC(rc); + + pPatch->cbOp = cbOp; + + if (pDis->Param1.fUse == DISUSE_DISPLACEMENT32) + { + /* write. */ + if (pDis->Param2.fUse == DISUSE_REG_GEN32) + { + pPatch->enmType = HMTPRINSTR_WRITE_REG; + pPatch->uSrcOperand = pDis->Param2.Base.idxGenReg; + Log(("hmR3ReplaceTprInstr: HMTPRINSTR_WRITE_REG %u\n", pDis->Param2.Base.idxGenReg)); + } + else + { + Assert(pDis->Param2.fUse == DISUSE_IMMEDIATE32); + pPatch->enmType = HMTPRINSTR_WRITE_IMM; + pPatch->uSrcOperand = pDis->Param2.uValue; + Log(("hmR3ReplaceTprInstr: HMTPRINSTR_WRITE_IMM %#llx\n", pDis->Param2.uValue)); + } + rc = PGMPhysSimpleWriteGCPtr(pVCpu, pCtx->rip, s_abVMMCall, sizeof(s_abVMMCall)); + AssertRC(rc); + + memcpy(pPatch->aNewOpcode, s_abVMMCall, sizeof(s_abVMMCall)); + pPatch->cbNewOp = sizeof(s_abVMMCall); + STAM_COUNTER_INC(&pVM->hm.s.StatTprReplaceSuccessVmc); + } + else + { + /* + * TPR Read. + * + * Found: + * mov eax, dword [fffe0080] (5 bytes) + * Check if next instruction is: + * shr eax, 4 + */ + Assert(pDis->Param1.fUse == DISUSE_REG_GEN32); + + uint8_t const idxMmioReg = pDis->Param1.Base.idxGenReg; + uint8_t const cbOpMmio = cbOp; + uint64_t const uSavedRip = pCtx->rip; + + pCtx->rip += cbOp; + rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, &cbOp); + DBGFR3_DISAS_INSTR_CUR_LOG(pVCpu, "Following read"); + pCtx->rip = uSavedRip; + + if ( rc == VINF_SUCCESS + && pDis->pCurInstr->uOpcode == OP_SHR + && pDis->Param1.fUse == DISUSE_REG_GEN32 + && pDis->Param1.Base.idxGenReg == idxMmioReg + && pDis->Param2.fUse == DISUSE_IMMEDIATE8 + && pDis->Param2.uValue == 4 + && cbOpMmio + cbOp < sizeof(pVM->hm.s.aPatches[idx].aOpcode)) + { + uint8_t abInstr[15]; + + /* Replacing the two instructions above with an AMD-V specific lock-prefixed 32-bit MOV CR8 instruction so as to + access CR8 in 32-bit mode and not cause a #VMEXIT. */ + rc = PGMPhysSimpleReadGCPtr(pVCpu, &pPatch->aOpcode, pCtx->rip, cbOpMmio + cbOp); + AssertRC(rc); + + pPatch->cbOp = cbOpMmio + cbOp; + + /* 0xf0, 0x0f, 0x20, 0xc0 = mov eax, cr8 */ + abInstr[0] = 0xf0; + abInstr[1] = 0x0f; + abInstr[2] = 0x20; + abInstr[3] = 0xc0 | pDis->Param1.Base.idxGenReg; + for (unsigned i = 4; i < pPatch->cbOp; i++) + abInstr[i] = 0x90; /* nop */ + + rc = PGMPhysSimpleWriteGCPtr(pVCpu, pCtx->rip, abInstr, pPatch->cbOp); + AssertRC(rc); + + memcpy(pPatch->aNewOpcode, abInstr, pPatch->cbOp); + pPatch->cbNewOp = pPatch->cbOp; + STAM_COUNTER_INC(&pVM->hm.s.StatTprReplaceSuccessCr8); + + Log(("Acceptable read/shr candidate!\n")); + pPatch->enmType = HMTPRINSTR_READ_SHR4; + } + else + { + pPatch->enmType = HMTPRINSTR_READ; + pPatch->uDstOperand = idxMmioReg; + + rc = PGMPhysSimpleWriteGCPtr(pVCpu, pCtx->rip, s_abVMMCall, sizeof(s_abVMMCall)); + AssertRC(rc); + + memcpy(pPatch->aNewOpcode, s_abVMMCall, sizeof(s_abVMMCall)); + pPatch->cbNewOp = sizeof(s_abVMMCall); + STAM_COUNTER_INC(&pVM->hm.s.StatTprReplaceSuccessVmc); + Log(("hmR3ReplaceTprInstr: HMTPRINSTR_READ %u\n", pPatch->uDstOperand)); + } + } + + pPatch->Core.Key = pCtx->eip; + rc = RTAvloU32Insert(&pVM->hm.s.PatchTree, &pPatch->Core); + AssertRC(rc); + + pVM->hm.s.cPatches++; + return VINF_SUCCESS; + } + + /* + * Save invalid patch, so we will not try again. + */ + Log(("hmR3ReplaceTprInstr: Failed to patch instr!\n")); + pPatch->Core.Key = pCtx->eip; + pPatch->enmType = HMTPRINSTR_INVALID; + rc = RTAvloU32Insert(&pVM->hm.s.PatchTree, &pPatch->Core); + AssertRC(rc); + pVM->hm.s.cPatches++; + STAM_COUNTER_INC(&pVM->hm.s.StatTprReplaceFailure); + return VINF_SUCCESS; +} + + +/** + * Callback to patch a TPR instruction (jump to generated code). + * + * @returns VBox strict status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * @param pvUser User specified CPU context. + * + */ +static DECLCALLBACK(VBOXSTRICTRC) hmR3PatchTprInstr(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + /* + * Only execute the handler on the VCPU the original patch request was + * issued. (The other CPU(s) might not yet have switched to protected + * mode, nor have the correct memory context.) + */ + VMCPUID idCpu = (VMCPUID)(uintptr_t)pvUser; + if (pVCpu->idCpu != idCpu) + return VINF_SUCCESS; + + /* + * We're racing other VCPUs here, so don't try patch the instruction twice + * and make sure there is still room for our patch record. + */ + PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; + PHMTPRPATCH pPatch = (PHMTPRPATCH)RTAvloU32Get(&pVM->hm.s.PatchTree, (AVLOU32KEY)pCtx->eip); + if (pPatch) + { + Log(("hmR3PatchTprInstr: already patched %RGv\n", pCtx->rip)); + return VINF_SUCCESS; + } + uint32_t const idx = pVM->hm.s.cPatches; + if (idx >= RT_ELEMENTS(pVM->hm.s.aPatches)) + { + Log(("hmR3PatchTprInstr: no available patch slots (%RGv)\n", pCtx->rip)); + return VINF_SUCCESS; + } + pPatch = &pVM->hm.s.aPatches[idx]; + + Log(("hmR3PatchTprInstr: rip=%RGv idxPatch=%u\n", pCtx->rip, idx)); + DBGFR3_DISAS_INSTR_CUR_LOG(pVCpu, "hmR3PatchTprInstr"); + + /* + * Disassemble the instruction and get cracking. + */ + PDISCPUSTATE pDis = &pVCpu->hm.s.DisState; + uint32_t cbOp; + int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, &cbOp); + AssertRC(rc); + if ( rc == VINF_SUCCESS + && pDis->pCurInstr->uOpcode == OP_MOV + && cbOp >= 5) + { + uint8_t aPatch[64]; + uint32_t off = 0; + + rc = PGMPhysSimpleReadGCPtr(pVCpu, pPatch->aOpcode, pCtx->rip, cbOp); + AssertRC(rc); + + pPatch->cbOp = cbOp; + pPatch->enmType = HMTPRINSTR_JUMP_REPLACEMENT; + + if (pDis->Param1.fUse == DISUSE_DISPLACEMENT32) + { + /* + * TPR write: + * + * push ECX [51] + * push EDX [52] + * push EAX [50] + * xor EDX,EDX [31 D2] + * mov EAX,EAX [89 C0] + * or + * mov EAX,0000000CCh [B8 CC 00 00 00] + * mov ECX,0C0000082h [B9 82 00 00 C0] + * wrmsr [0F 30] + * pop EAX [58] + * pop EDX [5A] + * pop ECX [59] + * jmp return_address [E9 return_address] + */ + bool fUsesEax = (pDis->Param2.fUse == DISUSE_REG_GEN32 && pDis->Param2.Base.idxGenReg == DISGREG_EAX); + + aPatch[off++] = 0x51; /* push ecx */ + aPatch[off++] = 0x52; /* push edx */ + if (!fUsesEax) + aPatch[off++] = 0x50; /* push eax */ + aPatch[off++] = 0x31; /* xor edx, edx */ + aPatch[off++] = 0xd2; + if (pDis->Param2.fUse == DISUSE_REG_GEN32) + { + if (!fUsesEax) + { + aPatch[off++] = 0x89; /* mov eax, src_reg */ + aPatch[off++] = MAKE_MODRM(3, pDis->Param2.Base.idxGenReg, DISGREG_EAX); + } + } + else + { + Assert(pDis->Param2.fUse == DISUSE_IMMEDIATE32); + aPatch[off++] = 0xb8; /* mov eax, immediate */ + *(uint32_t *)&aPatch[off] = pDis->Param2.uValue; + off += sizeof(uint32_t); + } + aPatch[off++] = 0xb9; /* mov ecx, 0xc0000082 */ + *(uint32_t *)&aPatch[off] = MSR_K8_LSTAR; + off += sizeof(uint32_t); + + aPatch[off++] = 0x0f; /* wrmsr */ + aPatch[off++] = 0x30; + if (!fUsesEax) + aPatch[off++] = 0x58; /* pop eax */ + aPatch[off++] = 0x5a; /* pop edx */ + aPatch[off++] = 0x59; /* pop ecx */ + } + else + { + /* + * TPR read: + * + * push ECX [51] + * push EDX [52] + * push EAX [50] + * mov ECX,0C0000082h [B9 82 00 00 C0] + * rdmsr [0F 32] + * mov EAX,EAX [89 C0] + * pop EAX [58] + * pop EDX [5A] + * pop ECX [59] + * jmp return_address [E9 return_address] + */ + Assert(pDis->Param1.fUse == DISUSE_REG_GEN32); + + if (pDis->Param1.Base.idxGenReg != DISGREG_ECX) + aPatch[off++] = 0x51; /* push ecx */ + if (pDis->Param1.Base.idxGenReg != DISGREG_EDX ) + aPatch[off++] = 0x52; /* push edx */ + if (pDis->Param1.Base.idxGenReg != DISGREG_EAX) + aPatch[off++] = 0x50; /* push eax */ + + aPatch[off++] = 0x31; /* xor edx, edx */ + aPatch[off++] = 0xd2; + + aPatch[off++] = 0xb9; /* mov ecx, 0xc0000082 */ + *(uint32_t *)&aPatch[off] = MSR_K8_LSTAR; + off += sizeof(uint32_t); + + aPatch[off++] = 0x0f; /* rdmsr */ + aPatch[off++] = 0x32; + + if (pDis->Param1.Base.idxGenReg != DISGREG_EAX) + { + aPatch[off++] = 0x89; /* mov dst_reg, eax */ + aPatch[off++] = MAKE_MODRM(3, DISGREG_EAX, pDis->Param1.Base.idxGenReg); + } + + if (pDis->Param1.Base.idxGenReg != DISGREG_EAX) + aPatch[off++] = 0x58; /* pop eax */ + if (pDis->Param1.Base.idxGenReg != DISGREG_EDX ) + aPatch[off++] = 0x5a; /* pop edx */ + if (pDis->Param1.Base.idxGenReg != DISGREG_ECX) + aPatch[off++] = 0x59; /* pop ecx */ + } + aPatch[off++] = 0xe9; /* jmp return_address */ + *(RTRCUINTPTR *)&aPatch[off] = ((RTRCUINTPTR)pCtx->eip + cbOp) - ((RTRCUINTPTR)pVM->hm.s.pFreeGuestPatchMem + off + 4); + off += sizeof(RTRCUINTPTR); + + if (pVM->hm.s.pFreeGuestPatchMem + off <= pVM->hm.s.pGuestPatchMem + pVM->hm.s.cbGuestPatchMem) + { + /* Write new code to the patch buffer. */ + rc = PGMPhysSimpleWriteGCPtr(pVCpu, pVM->hm.s.pFreeGuestPatchMem, aPatch, off); + AssertRC(rc); + +#ifdef LOG_ENABLED + uint32_t cbCurInstr; + for (RTGCPTR GCPtrInstr = pVM->hm.s.pFreeGuestPatchMem; + GCPtrInstr < pVM->hm.s.pFreeGuestPatchMem + off; + GCPtrInstr += RT_MAX(cbCurInstr, 1)) + { + char szOutput[256]; + rc = DBGFR3DisasInstrEx(pVM->pUVM, pVCpu->idCpu, pCtx->cs.Sel, GCPtrInstr, DBGF_DISAS_FLAGS_DEFAULT_MODE, + szOutput, sizeof(szOutput), &cbCurInstr); + if (RT_SUCCESS(rc)) + Log(("Patch instr %s\n", szOutput)); + else + Log(("%RGv: rc=%Rrc\n", GCPtrInstr, rc)); + } +#endif + + pPatch->aNewOpcode[0] = 0xE9; + *(RTRCUINTPTR *)&pPatch->aNewOpcode[1] = ((RTRCUINTPTR)pVM->hm.s.pFreeGuestPatchMem) - ((RTRCUINTPTR)pCtx->eip + 5); + + /* Overwrite the TPR instruction with a jump. */ + rc = PGMPhysSimpleWriteGCPtr(pVCpu, pCtx->eip, pPatch->aNewOpcode, 5); + AssertRC(rc); + + DBGFR3_DISAS_INSTR_CUR_LOG(pVCpu, "Jump"); + + pVM->hm.s.pFreeGuestPatchMem += off; + pPatch->cbNewOp = 5; + + pPatch->Core.Key = pCtx->eip; + rc = RTAvloU32Insert(&pVM->hm.s.PatchTree, &pPatch->Core); + AssertRC(rc); + + pVM->hm.s.cPatches++; + pVM->hm.s.fTPRPatchingActive = true; + STAM_COUNTER_INC(&pVM->hm.s.StatTprPatchSuccess); + return VINF_SUCCESS; + } + + Log(("Ran out of space in our patch buffer!\n")); + } + else + Log(("hmR3PatchTprInstr: Failed to patch instr!\n")); + + + /* + * Save invalid patch, so we will not try again. + */ + pPatch = &pVM->hm.s.aPatches[idx]; + pPatch->Core.Key = pCtx->eip; + pPatch->enmType = HMTPRINSTR_INVALID; + rc = RTAvloU32Insert(&pVM->hm.s.PatchTree, &pPatch->Core); + AssertRC(rc); + pVM->hm.s.cPatches++; + STAM_COUNTER_INC(&pVM->hm.s.StatTprPatchFailure); + return VINF_SUCCESS; +} + + +/** + * Attempt to patch TPR mmio instructions. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +VMMR3_INT_DECL(int) HMR3PatchTprInstr(PVM pVM, PVMCPU pVCpu) +{ + int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONE_BY_ONE, + pVM->hm.s.pGuestPatchMem ? hmR3PatchTprInstr : hmR3ReplaceTprInstr, + (void *)(uintptr_t)pVCpu->idCpu); + AssertRC(rc); + return rc; +} + + +/** + * Checks if we need to reschedule due to VMM device heap changes. + * + * @returns true if a reschedule is required, otherwise false. + * @param pVM The cross context VM structure. + * @param pCtx VM execution context. + */ +VMMR3_INT_DECL(bool) HMR3IsRescheduleRequired(PVM pVM, PCPUMCTX pCtx) +{ + /* + * The VMM device heap is a requirement for emulating real-mode or protected-mode without paging + * when the unrestricted guest execution feature is missing (VT-x only). + */ + if ( pVM->hm.s.vmx.fEnabled + && !pVM->hm.s.vmx.fUnrestrictedGuest + && CPUMIsGuestInRealModeEx(pCtx) + && !PDMVmmDevHeapIsEnabled(pVM)) + return true; + + return false; +} + + +/** + * Noticiation callback from DBGF when interrupt breakpoints or generic debug + * event settings changes. + * + * DBGF will call HMR3NotifyDebugEventChangedPerCpu on each CPU afterwards, this + * function is just updating the VM globals. + * + * @param pVM The VM cross context VM structure. + * @thread EMT(0) + */ +VMMR3_INT_DECL(void) HMR3NotifyDebugEventChanged(PVM pVM) +{ + /* Interrupts. */ + bool fUseDebugLoop = pVM->dbgf.ro.cSoftIntBreakpoints > 0 + || pVM->dbgf.ro.cHardIntBreakpoints > 0; + + /* CPU Exceptions. */ + for (DBGFEVENTTYPE enmEvent = DBGFEVENT_XCPT_FIRST; + !fUseDebugLoop && enmEvent <= DBGFEVENT_XCPT_LAST; + enmEvent = (DBGFEVENTTYPE)(enmEvent + 1)) + fUseDebugLoop = DBGF_IS_EVENT_ENABLED(pVM, enmEvent); + + /* Common VM exits. */ + for (DBGFEVENTTYPE enmEvent = DBGFEVENT_EXIT_FIRST; + !fUseDebugLoop && enmEvent <= DBGFEVENT_EXIT_LAST_COMMON; + enmEvent = (DBGFEVENTTYPE)(enmEvent + 1)) + fUseDebugLoop = DBGF_IS_EVENT_ENABLED(pVM, enmEvent); + + /* Vendor specific VM exits. */ + if (HMR3IsVmxEnabled(pVM->pUVM)) + for (DBGFEVENTTYPE enmEvent = DBGFEVENT_EXIT_VMX_FIRST; + !fUseDebugLoop && enmEvent <= DBGFEVENT_EXIT_VMX_LAST; + enmEvent = (DBGFEVENTTYPE)(enmEvent + 1)) + fUseDebugLoop = DBGF_IS_EVENT_ENABLED(pVM, enmEvent); + else + for (DBGFEVENTTYPE enmEvent = DBGFEVENT_EXIT_SVM_FIRST; + !fUseDebugLoop && enmEvent <= DBGFEVENT_EXIT_SVM_LAST; + enmEvent = (DBGFEVENTTYPE)(enmEvent + 1)) + fUseDebugLoop = DBGF_IS_EVENT_ENABLED(pVM, enmEvent); + + /* Done. */ + pVM->hm.s.fUseDebugLoop = fUseDebugLoop; +} + + +/** + * Follow up notification callback to HMR3NotifyDebugEventChanged for each CPU. + * + * HM uses this to combine the decision made by HMR3NotifyDebugEventChanged with + * per CPU settings. + * + * @param pVM The VM cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + */ +VMMR3_INT_DECL(void) HMR3NotifyDebugEventChangedPerCpu(PVM pVM, PVMCPU pVCpu) +{ + pVCpu->hm.s.fUseDebugLoop = pVCpu->hm.s.fSingleInstruction | pVM->hm.s.fUseDebugLoop; +} + + +/** + * Checks if we are currently using hardware acceleration. + * + * @returns true if hardware acceleration is being used, otherwise false. + * @param pVCpu The cross context virtual CPU structure. + */ +VMMR3_INT_DECL(bool) HMR3IsActive(PVMCPU pVCpu) +{ + return pVCpu->hm.s.fActive; +} + + +/** + * External interface for querying whether hardware acceleration is enabled. + * + * @returns true if VT-x or AMD-V is being used, otherwise false. + * @param pUVM The user mode VM handle. + * @sa HMIsEnabled, HMIsEnabledNotMacro. + */ +VMMR3DECL(bool) HMR3IsEnabled(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, false); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, false); + return pVM->fHMEnabled; /* Don't use the macro as the GUI may query us very very early. */ +} + + +/** + * External interface for querying whether VT-x is being used. + * + * @returns true if VT-x is being used, otherwise false. + * @param pUVM The user mode VM handle. + * @sa HMR3IsSvmEnabled, HMIsEnabled + */ +VMMR3DECL(bool) HMR3IsVmxEnabled(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, false); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, false); + return pVM->hm.s.vmx.fEnabled + && pVM->hm.s.vmx.fSupported + && pVM->fHMEnabled; +} + + +/** + * External interface for querying whether AMD-V is being used. + * + * @returns true if VT-x is being used, otherwise false. + * @param pUVM The user mode VM handle. + * @sa HMR3IsVmxEnabled, HMIsEnabled + */ +VMMR3DECL(bool) HMR3IsSvmEnabled(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, false); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, false); + return pVM->hm.s.svm.fEnabled + && pVM->hm.s.svm.fSupported + && pVM->fHMEnabled; +} + + +/** + * Checks if we are currently using nested paging. + * + * @returns true if nested paging is being used, otherwise false. + * @param pUVM The user mode VM handle. + */ +VMMR3DECL(bool) HMR3IsNestedPagingActive(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, false); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, false); + return pVM->hm.s.fNestedPaging; +} + + +/** + * Checks if virtualized APIC registers is enabled. + * + * When enabled this feature allows the hardware to access most of the + * APIC registers in the virtual-APIC page without causing VM-exits. See + * Intel spec. 29.1.1 "Virtualized APIC Registers". + * + * @returns true if virtualized APIC registers is enabled, otherwise + * false. + * @param pUVM The user mode VM handle. + */ +VMMR3DECL(bool) HMR3IsVirtApicRegsEnabled(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, false); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, false); + return pVM->hm.s.fVirtApicRegs; +} + + +/** + * Checks if APIC posted-interrupt processing is enabled. + * + * This returns whether we can deliver interrupts to the guest without + * leaving guest-context by updating APIC state from host-context. + * + * @returns true if APIC posted-interrupt processing is enabled, + * otherwise false. + * @param pUVM The user mode VM handle. + */ +VMMR3DECL(bool) HMR3IsPostedIntrsEnabled(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, false); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, false); + return pVM->hm.s.fPostedIntrs; +} + + +/** + * Checks if we are currently using VPID in VT-x mode. + * + * @returns true if VPID is being used, otherwise false. + * @param pUVM The user mode VM handle. + */ +VMMR3DECL(bool) HMR3IsVpidActive(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, false); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, false); + return pVM->hm.s.vmx.fVpid; +} + + +/** + * Checks if we are currently using VT-x unrestricted execution, + * aka UX. + * + * @returns true if UX is being used, otherwise false. + * @param pUVM The user mode VM handle. + */ +VMMR3DECL(bool) HMR3IsUXActive(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, false); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, false); + return pVM->hm.s.vmx.fUnrestrictedGuest + || pVM->hm.s.svm.fSupported; +} + + +/** + * Checks if internal events are pending. In that case we are not allowed to dispatch interrupts. + * + * @returns true if an internal event is pending, otherwise false. + * @param pVCpu The cross context virtual CPU structure. + */ +VMMR3_INT_DECL(bool) HMR3IsEventPending(PVMCPU pVCpu) +{ + return HMIsEnabled(pVCpu->pVMR3) + && pVCpu->hm.s.Event.fPending; +} + + +/** + * Checks if the VMX-preemption timer is being used. + * + * @returns true if the VMX-preemption timer is being used, otherwise false. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(bool) HMR3IsVmxPreemptionTimerUsed(PVM pVM) +{ + return HMIsEnabled(pVM) + && pVM->hm.s.vmx.fEnabled + && pVM->hm.s.vmx.fUsePreemptTimer; +} + + +/** + * Check fatal VT-x/AMD-V error and produce some meaningful + * log release message. + * + * @param pVM The cross context VM structure. + * @param iStatusCode VBox status code. + */ +VMMR3_INT_DECL(void) HMR3CheckError(PVM pVM, int iStatusCode) +{ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + switch (iStatusCode) + { + /** @todo r=ramshankar: Are all EMTs out of ring-0 at this point!? If not, we + * might be getting inaccurate values for non-guru'ing EMTs. */ + case VERR_VMX_INVALID_VMCS_FIELD: + break; + + case VERR_VMX_INVALID_VMCS_PTR: + LogRel(("HM: VERR_VMX_INVALID_VMCS_PTR:\n")); + LogRel(("HM: CPU[%u] Current pointer %#RGp vs %#RGp\n", i, pVCpu->hm.s.vmx.LastError.u64VmcsPhys, + pVCpu->hm.s.vmx.HCPhysVmcs)); + LogRel(("HM: CPU[%u] Current VMCS version %#x\n", i, pVCpu->hm.s.vmx.LastError.u32VmcsRev)); + LogRel(("HM: CPU[%u] Entered Host Cpu %u\n", i, pVCpu->hm.s.vmx.LastError.idEnteredCpu)); + LogRel(("HM: CPU[%u] Current Host Cpu %u\n", i, pVCpu->hm.s.vmx.LastError.idCurrentCpu)); + break; + + case VERR_VMX_UNABLE_TO_START_VM: + LogRel(("HM: VERR_VMX_UNABLE_TO_START_VM:\n")); + LogRel(("HM: CPU[%u] Instruction error %#x\n", i, pVCpu->hm.s.vmx.LastError.u32InstrError)); + LogRel(("HM: CPU[%u] Exit reason %#x\n", i, pVCpu->hm.s.vmx.LastError.u32ExitReason)); + + if ( pVCpu->hm.s.vmx.LastError.u32InstrError == VMXINSTRERR_VMLAUNCH_NON_CLEAR_VMCS + || pVCpu->hm.s.vmx.LastError.u32InstrError == VMXINSTRERR_VMRESUME_NON_LAUNCHED_VMCS) + { + LogRel(("HM: CPU[%u] Entered Host Cpu %u\n", i, pVCpu->hm.s.vmx.LastError.idEnteredCpu)); + LogRel(("HM: CPU[%u] Current Host Cpu %u\n", i, pVCpu->hm.s.vmx.LastError.idCurrentCpu)); + } + else if (pVCpu->hm.s.vmx.LastError.u32InstrError == VMXINSTRERR_VMENTRY_INVALID_CTLS) + { + LogRel(("HM: CPU[%u] PinCtls %#RX32\n", i, pVCpu->hm.s.vmx.u32PinCtls)); + { + uint32_t const u32Val = pVCpu->hm.s.vmx.u32PinCtls; + HMVMX_LOGREL_FEAT(u32Val, VMX_PIN_CTLS_EXT_INT_EXIT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PIN_CTLS_NMI_EXIT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PIN_CTLS_VIRT_NMI ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PIN_CTLS_PREEMPT_TIMER); + HMVMX_LOGREL_FEAT(u32Val, VMX_PIN_CTLS_POSTED_INT ); + } + LogRel(("HM: CPU[%u] ProcCtls %#RX32\n", i, pVCpu->hm.s.vmx.u32ProcCtls)); + { + uint32_t const u32Val = pVCpu->hm.s.vmx.u32ProcCtls; + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS_INT_WINDOW_EXIT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS_USE_TSC_OFFSETTING); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS_HLT_EXIT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS_INVLPG_EXIT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS_MWAIT_EXIT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS_RDPMC_EXIT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS_RDTSC_EXIT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS_CR3_LOAD_EXIT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS_CR3_STORE_EXIT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS_CR8_LOAD_EXIT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS_CR8_STORE_EXIT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS_USE_TPR_SHADOW ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS_NMI_WINDOW_EXIT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS_MOV_DR_EXIT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS_UNCOND_IO_EXIT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS_USE_IO_BITMAPS ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS_MONITOR_TRAP_FLAG ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS_USE_MSR_BITMAPS ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS_MONITOR_EXIT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS_PAUSE_EXIT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS_USE_SECONDARY_CTLS); + } + LogRel(("HM: CPU[%u] ProcCtls2 %#RX32\n", i, pVCpu->hm.s.vmx.u32ProcCtls2)); + { + uint32_t const u32Val = pVCpu->hm.s.vmx.u32ProcCtls2; + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS2_VIRT_APIC_ACCESS ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS2_EPT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS2_DESC_TABLE_EXIT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS2_RDTSCP ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS2_VIRT_X2APIC_MODE ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS2_VPID ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS2_WBINVD_EXIT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS2_UNRESTRICTED_GUEST); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS2_APIC_REG_VIRT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS2_VIRT_INT_DELIVERY ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS2_PAUSE_LOOP_EXIT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS2_RDRAND_EXIT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS2_INVPCID ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS2_VMFUNC ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS2_VMCS_SHADOWING ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS2_ENCLS_EXIT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS2_RDSEED_EXIT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS2_PML ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS2_EPT_VE ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS2_CONCEAL_FROM_PT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS2_XSAVES_XRSTORS ); + HMVMX_LOGREL_FEAT(u32Val, VMX_PROC_CTLS2_TSC_SCALING ); + } + LogRel(("HM: CPU[%u] EntryCtls %#RX32\n", i, pVCpu->hm.s.vmx.u32EntryCtls)); + { + uint32_t const u32Val = pVCpu->hm.s.vmx.u32EntryCtls; + HMVMX_LOGREL_FEAT(u32Val, VMX_ENTRY_CTLS_LOAD_DEBUG ); + HMVMX_LOGREL_FEAT(u32Val, VMX_ENTRY_CTLS_IA32E_MODE_GUEST ); + HMVMX_LOGREL_FEAT(u32Val, VMX_ENTRY_CTLS_ENTRY_TO_SMM ); + HMVMX_LOGREL_FEAT(u32Val, VMX_ENTRY_CTLS_DEACTIVATE_DUAL_MON); + HMVMX_LOGREL_FEAT(u32Val, VMX_ENTRY_CTLS_LOAD_PERF_MSR ); + HMVMX_LOGREL_FEAT(u32Val, VMX_ENTRY_CTLS_LOAD_PAT_MSR ); + HMVMX_LOGREL_FEAT(u32Val, VMX_ENTRY_CTLS_LOAD_EFER_MSR ); + } + LogRel(("HM: CPU[%u] ExitCtls %#RX32\n", i, pVCpu->hm.s.vmx.u32ExitCtls)); + { + uint32_t const u32Val = pVCpu->hm.s.vmx.u32ExitCtls; + HMVMX_LOGREL_FEAT(u32Val, VMX_EXIT_CTLS_SAVE_DEBUG ); + HMVMX_LOGREL_FEAT(u32Val, VMX_EXIT_CTLS_HOST_ADDR_SPACE_SIZE ); + HMVMX_LOGREL_FEAT(u32Val, VMX_EXIT_CTLS_LOAD_PERF_MSR ); + HMVMX_LOGREL_FEAT(u32Val, VMX_EXIT_CTLS_ACK_EXT_INT ); + HMVMX_LOGREL_FEAT(u32Val, VMX_EXIT_CTLS_SAVE_PAT_MSR ); + HMVMX_LOGREL_FEAT(u32Val, VMX_EXIT_CTLS_LOAD_PAT_MSR ); + HMVMX_LOGREL_FEAT(u32Val, VMX_EXIT_CTLS_SAVE_EFER_MSR ); + HMVMX_LOGREL_FEAT(u32Val, VMX_EXIT_CTLS_LOAD_EFER_MSR ); + HMVMX_LOGREL_FEAT(u32Val, VMX_EXIT_CTLS_SAVE_PREEMPT_TIMER ); + } + LogRel(("HM: CPU[%u] HCPhysMsrBitmap %#RHp\n", i, pVCpu->hm.s.vmx.HCPhysMsrBitmap)); + LogRel(("HM: CPU[%u] HCPhysGuestMsr %#RHp\n", i, pVCpu->hm.s.vmx.HCPhysGuestMsr)); + LogRel(("HM: CPU[%u] HCPhysHostMsr %#RHp\n", i, pVCpu->hm.s.vmx.HCPhysHostMsr)); + LogRel(("HM: CPU[%u] cMsrs %u\n", i, pVCpu->hm.s.vmx.cMsrs)); + } + /** @todo Log VM-entry event injection control fields + * VMX_VMCS_CTRL_ENTRY_IRQ_INFO, VMX_VMCS_CTRL_ENTRY_EXCEPTION_ERRCODE + * and VMX_VMCS_CTRL_ENTRY_INSTR_LENGTH from the VMCS. */ + break; + + /* The guru will dump the HM error and exit history. Nothing extra to report for these errors. */ + case VERR_VMX_INVALID_VMXON_PTR: + case VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO: + case VERR_VMX_INVALID_GUEST_STATE: + case VERR_VMX_UNEXPECTED_EXIT: + case VERR_SVM_UNKNOWN_EXIT: + case VERR_SVM_UNEXPECTED_EXIT: + case VERR_SVM_UNEXPECTED_PATCH_TYPE: + case VERR_SVM_UNEXPECTED_XCPT_EXIT: + case VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_TYPE: + break; + } + } + + if (iStatusCode == VERR_VMX_UNABLE_TO_START_VM) + { + LogRel(("HM: VERR_VMX_UNABLE_TO_START_VM: VM-entry allowed-1 %#RX32\n", pVM->hm.s.vmx.Msrs.EntryCtls.n.allowed1)); + LogRel(("HM: VERR_VMX_UNABLE_TO_START_VM: VM-entry allowed-0 %#RX32\n", pVM->hm.s.vmx.Msrs.EntryCtls.n.allowed0)); + } + else if (iStatusCode == VERR_VMX_INVALID_VMXON_PTR) + LogRel(("HM: HCPhysVmxEnableError = %#RHp\n", pVM->hm.s.vmx.HCPhysVmxEnableError)); +} + + +/** + * Execute state save operation. + * + * Save only data that cannot be re-loaded while entering HM ring-0 code. This + * is because we always save the VM state from ring-3 and thus most HM state + * will be re-synced dynamically at runtime and don't need to be part of the VM + * saved state. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM SSM operation handle. + */ +static DECLCALLBACK(int) hmR3Save(PVM pVM, PSSMHANDLE pSSM) +{ + int rc; + + Log(("hmR3Save:\n")); + + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + Assert(!pVM->aCpus[i].hm.s.Event.fPending); + if (pVM->cpum.ro.GuestFeatures.fSvm) + { + PCSVMNESTEDVMCBCACHE pVmcbNstGstCache = &pVM->aCpus[i].hm.s.svm.NstGstVmcbCache; + rc = SSMR3PutBool(pSSM, pVmcbNstGstCache->fCacheValid); + rc |= SSMR3PutU16(pSSM, pVmcbNstGstCache->u16InterceptRdCRx); + rc |= SSMR3PutU16(pSSM, pVmcbNstGstCache->u16InterceptWrCRx); + rc |= SSMR3PutU16(pSSM, pVmcbNstGstCache->u16InterceptRdDRx); + rc |= SSMR3PutU16(pSSM, pVmcbNstGstCache->u16InterceptWrDRx); + rc |= SSMR3PutU16(pSSM, pVmcbNstGstCache->u16PauseFilterThreshold); + rc |= SSMR3PutU16(pSSM, pVmcbNstGstCache->u16PauseFilterCount); + rc |= SSMR3PutU32(pSSM, pVmcbNstGstCache->u32InterceptXcpt); + rc |= SSMR3PutU64(pSSM, pVmcbNstGstCache->u64InterceptCtrl); + rc |= SSMR3PutU64(pSSM, pVmcbNstGstCache->u64TSCOffset); + rc |= SSMR3PutBool(pSSM, pVmcbNstGstCache->fVIntrMasking); + rc |= SSMR3PutBool(pSSM, pVmcbNstGstCache->fNestedPaging); + rc |= SSMR3PutBool(pSSM, pVmcbNstGstCache->fLbrVirt); + AssertRCReturn(rc, rc); + } + } + + /* Save the guest patch data. */ + rc = SSMR3PutGCPtr(pSSM, pVM->hm.s.pGuestPatchMem); + rc |= SSMR3PutGCPtr(pSSM, pVM->hm.s.pFreeGuestPatchMem); + rc |= SSMR3PutU32(pSSM, pVM->hm.s.cbGuestPatchMem); + + /* Store all the guest patch records too. */ + rc |= SSMR3PutU32(pSSM, pVM->hm.s.cPatches); + AssertRCReturn(rc, rc); + + for (uint32_t i = 0; i < pVM->hm.s.cPatches; i++) + { + AssertCompileSize(HMTPRINSTR, 4); + PCHMTPRPATCH pPatch = &pVM->hm.s.aPatches[i]; + rc = SSMR3PutU32(pSSM, pPatch->Core.Key); + rc |= SSMR3PutMem(pSSM, pPatch->aOpcode, sizeof(pPatch->aOpcode)); + rc |= SSMR3PutU32(pSSM, pPatch->cbOp); + rc |= SSMR3PutMem(pSSM, pPatch->aNewOpcode, sizeof(pPatch->aNewOpcode)); + rc |= SSMR3PutU32(pSSM, pPatch->cbNewOp); + rc |= SSMR3PutU32(pSSM, (uint32_t)pPatch->enmType); + rc |= SSMR3PutU32(pSSM, pPatch->uSrcOperand); + rc |= SSMR3PutU32(pSSM, pPatch->uDstOperand); + rc |= SSMR3PutU32(pSSM, pPatch->pJumpTarget); + rc |= SSMR3PutU32(pSSM, pPatch->cFaults); + AssertRCReturn(rc, rc); + } + + return VINF_SUCCESS; +} + + +/** + * Execute state load operation. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM SSM operation handle. + * @param uVersion Data layout version. + * @param uPass The data pass. + */ +static DECLCALLBACK(int) hmR3Load(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + int rc; + + LogFlowFunc(("uVersion=%u\n", uVersion)); + Assert(uPass == SSM_PASS_FINAL); NOREF(uPass); + + /* + * Validate version. + */ + if ( uVersion != HM_SAVED_STATE_VERSION_SVM_NESTED_HWVIRT + && uVersion != HM_SAVED_STATE_VERSION_TPR_PATCHING + && uVersion != HM_SAVED_STATE_VERSION_NO_TPR_PATCHING + && uVersion != HM_SAVED_STATE_VERSION_2_0_X) + { + AssertMsgFailed(("hmR3Load: Invalid version uVersion=%d!\n", uVersion)); + return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION; + } + + /* + * Load per-VCPU state. + */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + if (uVersion >= HM_SAVED_STATE_VERSION_SVM_NESTED_HWVIRT) + { + /* Load the SVM nested hw.virt state if the VM is configured for it. */ + if (pVM->cpum.ro.GuestFeatures.fSvm) + { + PSVMNESTEDVMCBCACHE pVmcbNstGstCache = &pVM->aCpus[i].hm.s.svm.NstGstVmcbCache; + rc = SSMR3GetBool(pSSM, &pVmcbNstGstCache->fCacheValid); + rc |= SSMR3GetU16(pSSM, &pVmcbNstGstCache->u16InterceptRdCRx); + rc |= SSMR3GetU16(pSSM, &pVmcbNstGstCache->u16InterceptWrCRx); + rc |= SSMR3GetU16(pSSM, &pVmcbNstGstCache->u16InterceptRdDRx); + rc |= SSMR3GetU16(pSSM, &pVmcbNstGstCache->u16InterceptWrDRx); + rc |= SSMR3GetU16(pSSM, &pVmcbNstGstCache->u16PauseFilterThreshold); + rc |= SSMR3GetU16(pSSM, &pVmcbNstGstCache->u16PauseFilterCount); + rc |= SSMR3GetU32(pSSM, &pVmcbNstGstCache->u32InterceptXcpt); + rc |= SSMR3GetU64(pSSM, &pVmcbNstGstCache->u64InterceptCtrl); + rc |= SSMR3GetU64(pSSM, &pVmcbNstGstCache->u64TSCOffset); + rc |= SSMR3GetBool(pSSM, &pVmcbNstGstCache->fVIntrMasking); + rc |= SSMR3GetBool(pSSM, &pVmcbNstGstCache->fNestedPaging); + rc |= SSMR3GetBool(pSSM, &pVmcbNstGstCache->fLbrVirt); + AssertRCReturn(rc, rc); + } + } + else + { + /* Pending HM event (obsolete for a long time since TPRM holds the info.) */ + rc = SSMR3GetU32(pSSM, &pVM->aCpus[i].hm.s.Event.fPending); + rc |= SSMR3GetU32(pSSM, &pVM->aCpus[i].hm.s.Event.u32ErrCode); + rc |= SSMR3GetU64(pSSM, &pVM->aCpus[i].hm.s.Event.u64IntInfo); + + /* VMX fWasInRealMode related data. */ + uint32_t uDummy; + rc |= SSMR3GetU32(pSSM, &uDummy); AssertRCReturn(rc, rc); + rc |= SSMR3GetU32(pSSM, &uDummy); AssertRCReturn(rc, rc); + rc |= SSMR3GetU32(pSSM, &uDummy); AssertRCReturn(rc, rc); + AssertRCReturn(rc, rc); + } + } + + /* + * Load TPR patching data. + */ + if (uVersion >= HM_SAVED_STATE_VERSION_TPR_PATCHING) + { + rc = SSMR3GetGCPtr(pSSM, &pVM->hm.s.pGuestPatchMem); + rc |= SSMR3GetGCPtr(pSSM, &pVM->hm.s.pFreeGuestPatchMem); + rc |= SSMR3GetU32(pSSM, &pVM->hm.s.cbGuestPatchMem); + + /* Fetch all TPR patch records. */ + rc |= SSMR3GetU32(pSSM, &pVM->hm.s.cPatches); + AssertRCReturn(rc, rc); + for (uint32_t i = 0; i < pVM->hm.s.cPatches; i++) + { + PHMTPRPATCH pPatch = &pVM->hm.s.aPatches[i]; + rc = SSMR3GetU32(pSSM, &pPatch->Core.Key); + rc |= SSMR3GetMem(pSSM, pPatch->aOpcode, sizeof(pPatch->aOpcode)); + rc |= SSMR3GetU32(pSSM, &pPatch->cbOp); + rc |= SSMR3GetMem(pSSM, pPatch->aNewOpcode, sizeof(pPatch->aNewOpcode)); + rc |= SSMR3GetU32(pSSM, &pPatch->cbNewOp); + rc |= SSMR3GetU32(pSSM, (uint32_t *)&pPatch->enmType); + + if (pPatch->enmType == HMTPRINSTR_JUMP_REPLACEMENT) + pVM->hm.s.fTPRPatchingActive = true; + Assert(pPatch->enmType == HMTPRINSTR_JUMP_REPLACEMENT || pVM->hm.s.fTPRPatchingActive == false); + + rc |= SSMR3GetU32(pSSM, &pPatch->uSrcOperand); + rc |= SSMR3GetU32(pSSM, &pPatch->uDstOperand); + rc |= SSMR3GetU32(pSSM, &pPatch->cFaults); + rc |= SSMR3GetU32(pSSM, &pPatch->pJumpTarget); + AssertRCReturn(rc, rc); + + LogFlow(("hmR3Load: patch %d\n", i)); + LogFlow(("Key = %x\n", pPatch->Core.Key)); + LogFlow(("cbOp = %d\n", pPatch->cbOp)); + LogFlow(("cbNewOp = %d\n", pPatch->cbNewOp)); + LogFlow(("type = %d\n", pPatch->enmType)); + LogFlow(("srcop = %d\n", pPatch->uSrcOperand)); + LogFlow(("dstop = %d\n", pPatch->uDstOperand)); + LogFlow(("cFaults = %d\n", pPatch->cFaults)); + LogFlow(("target = %x\n", pPatch->pJumpTarget)); + + rc = RTAvloU32Insert(&pVM->hm.s.PatchTree, &pPatch->Core); + AssertRCReturn(rc, rc); + } + } + + return VINF_SUCCESS; +} + + +/** + * Displays HM info. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helper functions. + * @param pszArgs Arguments, ignored. + */ +static DECLCALLBACK(void) hmR3Info(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + NOREF(pszArgs); + PVMCPU pVCpu = VMMGetCpu(pVM); + if (!pVCpu) + pVCpu = &pVM->aCpus[0]; + + if (HMIsEnabled(pVM)) + { + if (pVM->hm.s.vmx.fSupported) + pHlp->pfnPrintf(pHlp, "CPU[%u]: VT-x info:\n", pVCpu->idCpu); + else + pHlp->pfnPrintf(pHlp, "CPU[%u]: AMD-V info:\n", pVCpu->idCpu); + pHlp->pfnPrintf(pHlp, " HM error = %#x (%u)\n", pVCpu->hm.s.u32HMError, pVCpu->hm.s.u32HMError); + pHlp->pfnPrintf(pHlp, " rcLastExitToR3 = %Rrc\n", pVCpu->hm.s.rcLastExitToR3); + } + else + pHlp->pfnPrintf(pHlp, "HM is not enabled for this VM!\n"); +} + + +/** + * Displays the HM pending event. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helper functions. + * @param pszArgs Arguments, ignored. + */ +static DECLCALLBACK(void) hmR3InfoEventPending(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + NOREF(pszArgs); + PVMCPU pVCpu = VMMGetCpu(pVM); + if (!pVCpu) + pVCpu = &pVM->aCpus[0]; + + if (HMIsEnabled(pVM)) + { + pHlp->pfnPrintf(pHlp, "CPU[%u]: HM event (fPending=%RTbool)\n", pVCpu->idCpu, pVCpu->hm.s.Event.fPending); + if (pVCpu->hm.s.Event.fPending) + { + pHlp->pfnPrintf(pHlp, " u64IntInfo = %#RX64\n", pVCpu->hm.s.Event.u64IntInfo); + pHlp->pfnPrintf(pHlp, " u32ErrCode = %#RX64\n", pVCpu->hm.s.Event.u32ErrCode); + pHlp->pfnPrintf(pHlp, " cbInstr = %u bytes\n", pVCpu->hm.s.Event.cbInstr); + pHlp->pfnPrintf(pHlp, " GCPtrFaultAddress = %#RGp\n", pVCpu->hm.s.Event.GCPtrFaultAddress); + } + } + else + pHlp->pfnPrintf(pHlp, "HM is not enabled for this VM!\n"); +} + + +/** + * Displays the SVM nested-guest VMCB cache. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helper functions. + * @param pszArgs Arguments, ignored. + */ +static DECLCALLBACK(void) hmR3InfoSvmNstGstVmcbCache(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + NOREF(pszArgs); + PVMCPU pVCpu = VMMGetCpu(pVM); + if (!pVCpu) + pVCpu = &pVM->aCpus[0]; + + bool const fSvmEnabled = HMR3IsSvmEnabled(pVM->pUVM); + if ( fSvmEnabled + && pVM->cpum.ro.GuestFeatures.fSvm) + { + PCSVMNESTEDVMCBCACHE pVmcbNstGstCache = &pVCpu->hm.s.svm.NstGstVmcbCache; + pHlp->pfnPrintf(pHlp, "CPU[%u]: HM SVM nested-guest VMCB cache\n", pVCpu->idCpu); + pHlp->pfnPrintf(pHlp, " fCacheValid = %#RTbool\n", pVmcbNstGstCache->fCacheValid); + pHlp->pfnPrintf(pHlp, " u16InterceptRdCRx = %#RX16\n", pVmcbNstGstCache->u16InterceptRdCRx); + pHlp->pfnPrintf(pHlp, " u16InterceptWrCRx = %#RX16\n", pVmcbNstGstCache->u16InterceptWrCRx); + pHlp->pfnPrintf(pHlp, " u16InterceptRdDRx = %#RX16\n", pVmcbNstGstCache->u16InterceptRdDRx); + pHlp->pfnPrintf(pHlp, " u16InterceptWrDRx = %#RX16\n", pVmcbNstGstCache->u16InterceptWrDRx); + pHlp->pfnPrintf(pHlp, " u16PauseFilterThreshold = %#RX16\n", pVmcbNstGstCache->u16PauseFilterThreshold); + pHlp->pfnPrintf(pHlp, " u16PauseFilterCount = %#RX16\n", pVmcbNstGstCache->u16PauseFilterCount); + pHlp->pfnPrintf(pHlp, " u32InterceptXcpt = %#RX32\n", pVmcbNstGstCache->u32InterceptXcpt); + pHlp->pfnPrintf(pHlp, " u64InterceptCtrl = %#RX64\n", pVmcbNstGstCache->u64InterceptCtrl); + pHlp->pfnPrintf(pHlp, " u64TSCOffset = %#RX64\n", pVmcbNstGstCache->u64TSCOffset); + pHlp->pfnPrintf(pHlp, " fVIntrMasking = %RTbool\n", pVmcbNstGstCache->fVIntrMasking); + pHlp->pfnPrintf(pHlp, " fNestedPaging = %RTbool\n", pVmcbNstGstCache->fNestedPaging); + pHlp->pfnPrintf(pHlp, " fLbrVirt = %RTbool\n", pVmcbNstGstCache->fLbrVirt); + } + else + { + if (!fSvmEnabled) + pHlp->pfnPrintf(pHlp, "HM SVM is not enabled for this VM!\n"); + else + pHlp->pfnPrintf(pHlp, "SVM feature is not exposed to the guest!\n"); + } +} + diff --git a/src/VBox/VMM/VMMR3/IEMR3.cpp b/src/VBox/VMM/VMMR3/IEMR3.cpp new file mode 100644 index 00000000..8e320743 --- /dev/null +++ b/src/VBox/VMM/VMMR3/IEMR3.cpp @@ -0,0 +1,214 @@ +/* $Id: IEMR3.cpp $ */ +/** @file + * IEM - Interpreted Execution Manager. + */ + +/* + * Copyright (C) 2011-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_EM +#include +#include +#include +#include "IEMInternal.h" +#include +#include + +#include +#include + +static const char *iemGetTargetCpuName(uint32_t enmTargetCpu) +{ + switch (enmTargetCpu) + { +#define CASE_RET_STR(enmValue) case enmValue: return #enmValue + (sizeof("IEMTARGETCPU_") - 1) + CASE_RET_STR(IEMTARGETCPU_8086); + CASE_RET_STR(IEMTARGETCPU_V20); + CASE_RET_STR(IEMTARGETCPU_186); + CASE_RET_STR(IEMTARGETCPU_286); + CASE_RET_STR(IEMTARGETCPU_386); + CASE_RET_STR(IEMTARGETCPU_486); + CASE_RET_STR(IEMTARGETCPU_PENTIUM); + CASE_RET_STR(IEMTARGETCPU_PPRO); + CASE_RET_STR(IEMTARGETCPU_CURRENT); +#undef CASE_RET_STR + default: return "Unknown"; + } +} + +/** + * Initializes the interpreted execution manager. + * + * This must be called after CPUM as we're quering information from CPUM about + * the guest and host CPUs. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(int) IEMR3Init(PVM pVM) +{ + uint64_t const uInitialTlbRevision = UINT64_C(0) - (IEMTLB_REVISION_INCR * 200U); + uint64_t const uInitialTlbPhysRev = UINT64_C(0) - (IEMTLB_PHYS_REV_INCR * 100U); + + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[idCpu]; + + pVCpu->iem.s.CodeTlb.uTlbRevision = pVCpu->iem.s.DataTlb.uTlbRevision = uInitialTlbRevision; + pVCpu->iem.s.CodeTlb.uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev = uInitialTlbPhysRev; + + STAMR3RegisterF(pVM, &pVCpu->iem.s.cInstructions, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT, + "Instructions interpreted", "/IEM/CPU%u/cInstructions", idCpu); + STAMR3RegisterF(pVM, &pVCpu->iem.s.cLongJumps, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES, + "Number of longjmp calls", "/IEM/CPU%u/cLongJumps", idCpu); + STAMR3RegisterF(pVM, &pVCpu->iem.s.cPotentialExits, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT, + "Potential exits", "/IEM/CPU%u/cPotentialExits", idCpu); + STAMR3RegisterF(pVM, &pVCpu->iem.s.cRetAspectNotImplemented, STAMTYPE_U32_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT, + "VERR_IEM_ASPECT_NOT_IMPLEMENTED", "/IEM/CPU%u/cRetAspectNotImplemented", idCpu); + STAMR3RegisterF(pVM, &pVCpu->iem.s.cRetInstrNotImplemented, STAMTYPE_U32_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT, + "VERR_IEM_INSTR_NOT_IMPLEMENTED", "/IEM/CPU%u/cRetInstrNotImplemented", idCpu); + STAMR3RegisterF(pVM, &pVCpu->iem.s.cRetInfStatuses, STAMTYPE_U32_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT, + "Informational statuses returned", "/IEM/CPU%u/cRetInfStatuses", idCpu); + STAMR3RegisterF(pVM, &pVCpu->iem.s.cRetErrStatuses, STAMTYPE_U32_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT, + "Error statuses returned", "/IEM/CPU%u/cRetErrStatuses", idCpu); + STAMR3RegisterF(pVM, &pVCpu->iem.s.cbWritten, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES, + "Approx bytes written", "/IEM/CPU%u/cbWritten", idCpu); + STAMR3RegisterF(pVM, &pVCpu->iem.s.cPendingCommit, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES, + "Times RC/R0 had to postpone instruction committing to ring-3", "/IEM/CPU%u/cPendingCommit", idCpu); + +#ifdef VBOX_WITH_STATISTICS + STAMR3RegisterF(pVM, &pVCpu->iem.s.CodeTlb.cTlbHits, STAMTYPE_U64_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT, + "Code TLB hits", "/IEM/CPU%u/CodeTlb-Hits", idCpu); + STAMR3RegisterF(pVM, &pVCpu->iem.s.DataTlb.cTlbHits, STAMTYPE_U64_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT, + "Data TLB hits", "/IEM/CPU%u/DataTlb-Hits", idCpu); +#endif + STAMR3RegisterF(pVM, &pVCpu->iem.s.CodeTlb.cTlbMisses, STAMTYPE_U32_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT, + "Code TLB misses", "/IEM/CPU%u/CodeTlb-Misses", idCpu); + STAMR3RegisterF(pVM, &pVCpu->iem.s.CodeTlb.uTlbRevision, STAMTYPE_X64, STAMVISIBILITY_ALWAYS, STAMUNIT_NONE, + "Code TLB revision", "/IEM/CPU%u/CodeTlb-Revision", idCpu); + STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.CodeTlb.uTlbPhysRev, STAMTYPE_X64, STAMVISIBILITY_ALWAYS, STAMUNIT_NONE, + "Code TLB physical revision", "/IEM/CPU%u/CodeTlb-PhysRev", idCpu); + STAMR3RegisterF(pVM, &pVCpu->iem.s.CodeTlb.cTlbSlowReadPath, STAMTYPE_U32_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_NONE, + "Code TLB slow read path", "/IEM/CPU%u/CodeTlb-SlowReads", idCpu); + + STAMR3RegisterF(pVM, &pVCpu->iem.s.DataTlb.cTlbMisses, STAMTYPE_U32_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT, + "Data TLB misses", "/IEM/CPU%u/DataTlb-Misses", idCpu); + STAMR3RegisterF(pVM, &pVCpu->iem.s.DataTlb.uTlbRevision, STAMTYPE_X64, STAMVISIBILITY_ALWAYS, STAMUNIT_NONE, + "Data TLB revision", "/IEM/CPU%u/DataTlb-Revision", idCpu); + STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.DataTlb.uTlbPhysRev, STAMTYPE_X64, STAMVISIBILITY_ALWAYS, STAMUNIT_NONE, + "Data TLB physical revision", "/IEM/CPU%u/DataTlb-PhysRev", idCpu); + +#if defined(VBOX_WITH_STATISTICS) && !defined(DOXYGEN_RUNNING) + /* Allocate instruction statistics and register them. */ + pVCpu->iem.s.pStatsR3 = (PIEMINSTRSTATS)MMR3HeapAllocZ(pVM, MM_TAG_IEM, sizeof(IEMINSTRSTATS)); + AssertLogRelReturn(pVCpu->iem.s.pStatsR3, VERR_NO_MEMORY); + int rc = MMHyperAlloc(pVM, sizeof(IEMINSTRSTATS), sizeof(uint64_t), MM_TAG_IEM, (void **)&pVCpu->iem.s.pStatsCCR3); + AssertLogRelRCReturn(rc, rc); + pVCpu->iem.s.pStatsR0 = MMHyperR3ToR0(pVM, pVCpu->iem.s.pStatsCCR3); + pVCpu->iem.s.pStatsRC = MMHyperR3ToR0(pVM, pVCpu->iem.s.pStatsCCR3); +# define IEM_DO_INSTR_STAT(a_Name, a_szDesc) \ + STAMR3RegisterF(pVM, &pVCpu->iem.s.pStatsCCR3->a_Name, STAMTYPE_U32_RESET, STAMVISIBILITY_USED, \ + STAMUNIT_COUNT, a_szDesc, "/IEM/CPU%u/instr-RZ/" #a_Name, idCpu); \ + STAMR3RegisterF(pVM, &pVCpu->iem.s.pStatsR3->a_Name, STAMTYPE_U32_RESET, STAMVISIBILITY_USED, \ + STAMUNIT_COUNT, a_szDesc, "/IEM/CPU%u/instr-R3/" #a_Name, idCpu); +# include "IEMInstructionStatisticsTmpl.h" +# undef IEM_DO_INSTR_STAT +#endif + + /* + * Host and guest CPU information. + */ + if (idCpu == 0) + { + pVCpu->iem.s.enmCpuVendor = CPUMGetGuestCpuVendor(pVM); + pVCpu->iem.s.enmHostCpuVendor = CPUMGetHostCpuVendor(pVM); +#if IEM_CFG_TARGET_CPU == IEMTARGETCPU_DYNAMIC + switch (pVM->cpum.ro.GuestFeatures.enmMicroarch) + { + case kCpumMicroarch_Intel_8086: pVCpu->iem.s.uTargetCpu = IEMTARGETCPU_8086; break; + case kCpumMicroarch_Intel_80186: pVCpu->iem.s.uTargetCpu = IEMTARGETCPU_186; break; + case kCpumMicroarch_Intel_80286: pVCpu->iem.s.uTargetCpu = IEMTARGETCPU_286; break; + case kCpumMicroarch_Intel_80386: pVCpu->iem.s.uTargetCpu = IEMTARGETCPU_386; break; + case kCpumMicroarch_Intel_80486: pVCpu->iem.s.uTargetCpu = IEMTARGETCPU_486; break; + case kCpumMicroarch_Intel_P5: pVCpu->iem.s.uTargetCpu = IEMTARGETCPU_PENTIUM; break; + case kCpumMicroarch_Intel_P6: pVCpu->iem.s.uTargetCpu = IEMTARGETCPU_PPRO; break; + case kCpumMicroarch_NEC_V20: pVCpu->iem.s.uTargetCpu = IEMTARGETCPU_V20; break; + case kCpumMicroarch_NEC_V30: pVCpu->iem.s.uTargetCpu = IEMTARGETCPU_V20; break; + default: pVCpu->iem.s.uTargetCpu = IEMTARGETCPU_CURRENT; break; + } + LogRel(("IEM: TargetCpu=%s, Microarch=%s\n", iemGetTargetCpuName(pVCpu->iem.s.uTargetCpu), CPUMR3MicroarchName(pVM->cpum.ro.GuestFeatures.enmMicroarch))); +#endif + } + else + { + pVCpu->iem.s.enmCpuVendor = pVM->aCpus[0].iem.s.enmCpuVendor; + pVCpu->iem.s.enmHostCpuVendor = pVM->aCpus[0].iem.s.enmHostCpuVendor; +#if IEM_CFG_TARGET_CPU == IEMTARGETCPU_DYNAMIC + pVCpu->iem.s.uTargetCpu = pVM->aCpus[0].iem.s.uTargetCpu; +#endif + } + + /* + * Mark all buffers free. + */ + uint32_t iMemMap = RT_ELEMENTS(pVCpu->iem.s.aMemMappings); + while (iMemMap-- > 0) + pVCpu->iem.s.aMemMappings[iMemMap].fAccess = IEM_ACCESS_INVALID; + } + +#ifdef VBOX_WITH_NESTED_HWVIRT_VMX + /* + * Register the per-VM VMX APIC-access page handler type. + */ + if (pVM->cpum.ro.GuestFeatures.fVmx) + { + PVMCPU pVCpu0 = &pVM->aCpus[0]; + int rc = PGMR3HandlerPhysicalTypeRegister(pVM, PGMPHYSHANDLERKIND_ALL, iemVmxApicAccessPageHandler, + NULL /* pszModR0 */, + "iemVmxApicAccessPageHandler", NULL /* pszPfHandlerR0 */, + NULL /* pszModRC */, + NULL /* pszHandlerRC */, NULL /* pszPfHandlerRC */, + "VMX APIC-access page", &pVCpu0->iem.s.hVmxApicAccessPage); + AssertLogRelRCReturn(rc, rc); + } +#endif + + return VINF_SUCCESS; +} + + +VMMR3DECL(int) IEMR3Term(PVM pVM) +{ + NOREF(pVM); +#if defined(VBOX_WITH_STATISTICS) && !defined(DOXYGEN_RUNNING) + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[idCpu]; + MMR3HeapFree(pVCpu->iem.s.pStatsR3); + pVCpu->iem.s.pStatsR3 = NULL; + } +#endif + return VINF_SUCCESS; +} + + +VMMR3DECL(void) IEMR3Relocate(PVM pVM) +{ + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + if (pVM->aCpus[idCpu].iem.s.pStatsRC) + pVM->aCpus[idCpu].iem.s.pStatsRC = MMHyperR3ToRC(pVM, pVM->aCpus[idCpu].iem.s.pStatsCCR3); +} + diff --git a/src/VBox/VMM/VMMR3/IOM.cpp b/src/VBox/VMM/VMMR3/IOM.cpp new file mode 100644 index 00000000..1c0305a9 --- /dev/null +++ b/src/VBox/VMM/VMMR3/IOM.cpp @@ -0,0 +1,2388 @@ +/* $Id: IOM.cpp $ */ +/** @file + * IOM - Input / Output Monitor. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/** @page pg_iom IOM - The Input / Output Monitor + * + * The input/output monitor will handle I/O exceptions routing them to the + * appropriate device. It implements an API to register and deregister virtual + * I/0 port handlers and memory mapped I/O handlers. A handler is PDM devices + * and a set of callback functions. + * + * @see grp_iom + * + * + * @section sec_iom_rawmode Raw-Mode + * + * In raw-mode I/O port access is trapped (\#GP(0)) by ensuring that the actual + * IOPL is 0 regardless of what the guest IOPL is. The \#GP handler use the + * disassembler (DIS) to figure which instruction caused it (there are a number + * of instructions in addition to the I/O ones) and if it's an I/O port access + * it will hand it to IOMRCIOPortHandler (via EMInterpretPortIO). + * IOMRCIOPortHandler will lookup the port in the AVL tree of registered + * handlers. If found, the handler will be called otherwise default action is + * taken. (Default action is to write into the void and read all set bits.) + * + * Memory Mapped I/O (MMIO) is implemented as a slightly special case of PGM + * access handlers. An MMIO range is registered with IOM which then registers it + * with the PGM access handler sub-system. The access handler catches all + * access and will be called in the context of a \#PF handler. In RC and R0 this + * handler is iomMmioPfHandler while in ring-3 it's iomR3MmioHandler (although + * in ring-3 there can be alternative ways). iomMmioPfHandler will attempt to + * emulate the instruction that is doing the access and pass the corresponding + * reads / writes to the device. + * + * Emulating I/O port access is less complex and should be slightly faster than + * emulating MMIO, so in most cases we should encourage the OS to use port I/O. + * Devices which are frequently accessed should register GC handlers to speed up + * execution. + * + * + * @section sec_iom_hm Hardware Assisted Virtualization Mode + * + * When running in hardware assisted virtualization mode we'll be doing much the + * same things as in raw-mode. The main difference is that we're running in the + * host ring-0 context and that we don't get faults (\#GP(0) and \#PG) but + * exits. + * + * + * @section sec_iom_rem Recompiled Execution Mode + * + * When running in the recompiler things are different. I/O port access is + * handled by calling IOMIOPortRead and IOMIOPortWrite directly. While MMIO can + * be handled in one of two ways. The normal way is that we have a registered a + * special RAM range with the recompiler and in the three callbacks (for byte, + * word and dword access) we call IOMMMIORead and IOMMMIOWrite directly. The + * alternative ways that the physical memory access which goes via PGM will take + * care of it by calling iomR3MmioHandler via the PGM access handler machinery + * - this shouldn't happen but it is an alternative... + * + * + * @section sec_iom_other Other Accesses + * + * I/O ports aren't really exposed in any other way, unless you count the + * instruction interpreter in EM, but that's just what we're doing in the + * raw-mode \#GP(0) case really. Now, it's possible to call IOMIOPortRead and + * IOMIOPortWrite directly to talk to a device, but this is really bad behavior + * and should only be done as temporary hacks (the PC BIOS device used to setup + * the CMOS this way back in the dark ages). + * + * MMIO has similar direct routes as the I/O ports and these shouldn't be used + * for the same reasons and with the same restrictions. OTOH since MMIO is + * mapped into the physical memory address space, it can be accessed in a number + * of ways thru PGM. + * + * + * @section sec_iom_logging Logging Levels + * + * Following assignments: + * - Level 5 is used for defering I/O port and MMIO writes to ring-3. + * + */ + +/** @todo MMIO - simplifying the device end. + * - Add a return status for doing DBGFSTOP on access where there are no known + * registers. + * - + * + * */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_IOM +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "IOMInternal.h" +#include + +#include +#include +#include +#include +#include +#include + +#include "IOMInline.h" + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static void iomR3FlushCache(PVM pVM); +static DECLCALLBACK(int) iomR3RelocateIOPortCallback(PAVLROIOPORTNODECORE pNode, void *pvUser); +static DECLCALLBACK(int) iomR3RelocateMMIOCallback(PAVLROGCPHYSNODECORE pNode, void *pvUser); +static DECLCALLBACK(void) iomR3IOPortInfo(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); +static DECLCALLBACK(void) iomR3MMIOInfo(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); +static FNIOMIOPORTIN iomR3IOPortDummyIn; +static FNIOMIOPORTOUT iomR3IOPortDummyOut; +static FNIOMIOPORTINSTRING iomR3IOPortDummyInStr; +static FNIOMIOPORTOUTSTRING iomR3IOPortDummyOutStr; + +#ifdef VBOX_WITH_STATISTICS +static const char *iomR3IOPortGetStandardName(RTIOPORT Port); +#endif + + +/** + * Initializes the IOM. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) IOMR3Init(PVM pVM) +{ + LogFlow(("IOMR3Init:\n")); + + /* + * Assert alignment and sizes. + */ + AssertCompileMemberAlignment(VM, iom.s, 32); + AssertCompile(sizeof(pVM->iom.s) <= sizeof(pVM->iom.padding)); + AssertCompileMemberAlignment(IOM, CritSect, sizeof(uintptr_t)); + + /* + * Setup any fixed pointers and offsets. + */ + pVM->iom.s.offVM = RT_UOFFSETOF(VM, iom); + + /* + * Initialize the REM critical section. + */ +#ifdef IOM_WITH_CRIT_SECT_RW + int rc = PDMR3CritSectRwInit(pVM, &pVM->iom.s.CritSect, RT_SRC_POS, "IOM Lock"); +#else + int rc = PDMR3CritSectInit(pVM, &pVM->iom.s.CritSect, RT_SRC_POS, "IOM Lock"); +#endif + AssertRCReturn(rc, rc); + + /* + * Allocate the trees structure. + */ + rc = MMHyperAlloc(pVM, sizeof(*pVM->iom.s.pTreesR3), 0, MM_TAG_IOM, (void **)&pVM->iom.s.pTreesR3); + if (RT_SUCCESS(rc)) + { + pVM->iom.s.pTreesRC = MMHyperR3ToRC(pVM, pVM->iom.s.pTreesR3); + pVM->iom.s.pTreesR0 = MMHyperR3ToR0(pVM, pVM->iom.s.pTreesR3); + + /* + * Register the MMIO access handler type. + */ + rc = PGMR3HandlerPhysicalTypeRegister(pVM, PGMPHYSHANDLERKIND_MMIO, + iomMmioHandler, + NULL, "iomMmioHandler", "iomMmioPfHandler", + NULL, "iomMmioHandler", "iomMmioPfHandler", + "MMIO", &pVM->iom.s.hMmioHandlerType); + AssertRC(rc); + if (RT_SUCCESS(rc)) + { + + /* + * Info. + */ + DBGFR3InfoRegisterInternal(pVM, "ioport", "Dumps all IOPort ranges. No arguments.", &iomR3IOPortInfo); + DBGFR3InfoRegisterInternal(pVM, "mmio", "Dumps all MMIO ranges. No arguments.", &iomR3MMIOInfo); + + /* + * Statistics. + */ + STAM_REG(pVM, &pVM->iom.s.StatRZMMIOHandler, STAMTYPE_PROFILE, "/IOM/RZ-MMIOHandler", STAMUNIT_TICKS_PER_CALL, "Profiling of the iomMmioPfHandler() body, only success calls."); + STAM_REG(pVM, &pVM->iom.s.StatRZMMIO1Byte, STAMTYPE_COUNTER, "/IOM/RZ-MMIOHandler/Access1", STAMUNIT_OCCURENCES, "MMIO access by 1 byte counter."); + STAM_REG(pVM, &pVM->iom.s.StatRZMMIO2Bytes, STAMTYPE_COUNTER, "/IOM/RZ-MMIOHandler/Access2", STAMUNIT_OCCURENCES, "MMIO access by 2 bytes counter."); + STAM_REG(pVM, &pVM->iom.s.StatRZMMIO4Bytes, STAMTYPE_COUNTER, "/IOM/RZ-MMIOHandler/Access4", STAMUNIT_OCCURENCES, "MMIO access by 4 bytes counter."); + STAM_REG(pVM, &pVM->iom.s.StatRZMMIO8Bytes, STAMTYPE_COUNTER, "/IOM/RZ-MMIOHandler/Access8", STAMUNIT_OCCURENCES, "MMIO access by 8 bytes counter."); + STAM_REG(pVM, &pVM->iom.s.StatRZMMIOFailures, STAMTYPE_COUNTER, "/IOM/RZ-MMIOHandler/MMIOFailures", STAMUNIT_OCCURENCES, "Number of times iomMmioPfHandler() didn't service the request."); + STAM_REG(pVM, &pVM->iom.s.StatRZInstMov, STAMTYPE_PROFILE, "/IOM/RZ-MMIOHandler/Inst/MOV", STAMUNIT_TICKS_PER_CALL, "Profiling of the MOV instruction emulation."); + STAM_REG(pVM, &pVM->iom.s.StatRZInstCmp, STAMTYPE_PROFILE, "/IOM/RZ-MMIOHandler/Inst/CMP", STAMUNIT_TICKS_PER_CALL, "Profiling of the CMP instruction emulation."); + STAM_REG(pVM, &pVM->iom.s.StatRZInstAnd, STAMTYPE_PROFILE, "/IOM/RZ-MMIOHandler/Inst/AND", STAMUNIT_TICKS_PER_CALL, "Profiling of the AND instruction emulation."); + STAM_REG(pVM, &pVM->iom.s.StatRZInstOr, STAMTYPE_PROFILE, "/IOM/RZ-MMIOHandler/Inst/OR", STAMUNIT_TICKS_PER_CALL, "Profiling of the OR instruction emulation."); + STAM_REG(pVM, &pVM->iom.s.StatRZInstXor, STAMTYPE_PROFILE, "/IOM/RZ-MMIOHandler/Inst/XOR", STAMUNIT_TICKS_PER_CALL, "Profiling of the XOR instruction emulation."); + STAM_REG(pVM, &pVM->iom.s.StatRZInstBt, STAMTYPE_PROFILE, "/IOM/RZ-MMIOHandler/Inst/BT", STAMUNIT_TICKS_PER_CALL, "Profiling of the BT instruction emulation."); + STAM_REG(pVM, &pVM->iom.s.StatRZInstTest, STAMTYPE_PROFILE, "/IOM/RZ-MMIOHandler/Inst/TEST", STAMUNIT_TICKS_PER_CALL, "Profiling of the TEST instruction emulation."); + STAM_REG(pVM, &pVM->iom.s.StatRZInstXchg, STAMTYPE_PROFILE, "/IOM/RZ-MMIOHandler/Inst/XCHG", STAMUNIT_TICKS_PER_CALL, "Profiling of the XCHG instruction emulation."); + STAM_REG(pVM, &pVM->iom.s.StatRZInstStos, STAMTYPE_PROFILE, "/IOM/RZ-MMIOHandler/Inst/STOS", STAMUNIT_TICKS_PER_CALL, "Profiling of the STOS instruction emulation."); + STAM_REG(pVM, &pVM->iom.s.StatRZInstLods, STAMTYPE_PROFILE, "/IOM/RZ-MMIOHandler/Inst/LODS", STAMUNIT_TICKS_PER_CALL, "Profiling of the LODS instruction emulation."); +#ifdef IOM_WITH_MOVS_SUPPORT + STAM_REG(pVM, &pVM->iom.s.StatRZInstMovs, STAMTYPE_PROFILE_ADV, "/IOM/RZ-MMIOHandler/Inst/MOVS", STAMUNIT_TICKS_PER_CALL, "Profiling of the MOVS instruction emulation."); + STAM_REG(pVM, &pVM->iom.s.StatRZInstMovsToMMIO, STAMTYPE_PROFILE, "/IOM/RZ-MMIOHandler/Inst/MOVS/ToMMIO", STAMUNIT_TICKS_PER_CALL, "Profiling of the MOVS instruction emulation - Mem2MMIO."); + STAM_REG(pVM, &pVM->iom.s.StatRZInstMovsFromMMIO, STAMTYPE_PROFILE, "/IOM/RZ-MMIOHandler/Inst/MOVS/FromMMIO", STAMUNIT_TICKS_PER_CALL, "Profiling of the MOVS instruction emulation - MMIO2Mem."); + STAM_REG(pVM, &pVM->iom.s.StatRZInstMovsMMIO, STAMTYPE_PROFILE, "/IOM/RZ-MMIOHandler/Inst/MOVS/MMIO2MMIO", STAMUNIT_TICKS_PER_CALL, "Profiling of the MOVS instruction emulation - MMIO2MMIO."); +#endif + STAM_REG(pVM, &pVM->iom.s.StatRZInstOther, STAMTYPE_COUNTER, "/IOM/RZ-MMIOHandler/Inst/Other", STAMUNIT_OCCURENCES, "Other instructions counter."); + STAM_REG(pVM, &pVM->iom.s.StatR3MMIOHandler, STAMTYPE_COUNTER, "/IOM/R3-MMIOHandler", STAMUNIT_OCCURENCES, "Number of calls to iomR3MmioHandler."); + STAM_REG(pVM, &pVM->iom.s.StatInstIn, STAMTYPE_COUNTER, "/IOM/IOWork/In", STAMUNIT_OCCURENCES, "Counter of any IN instructions."); + STAM_REG(pVM, &pVM->iom.s.StatInstOut, STAMTYPE_COUNTER, "/IOM/IOWork/Out", STAMUNIT_OCCURENCES, "Counter of any OUT instructions."); + STAM_REG(pVM, &pVM->iom.s.StatInstIns, STAMTYPE_COUNTER, "/IOM/IOWork/Ins", STAMUNIT_OCCURENCES, "Counter of any INS instructions."); + STAM_REG(pVM, &pVM->iom.s.StatInstOuts, STAMTYPE_COUNTER, "/IOM/IOWork/Outs", STAMUNIT_OCCURENCES, "Counter of any OUTS instructions."); + } + } + + /* Redundant, but just in case we change something in the future */ + iomR3FlushCache(pVM); + + LogFlow(("IOMR3Init: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Flushes the IOM port & statistics lookup cache + * + * @param pVM The cross context VM structure. + */ +static void iomR3FlushCache(PVM pVM) +{ + /* + * Since all relevant (1) cache use requires at least read access to the + * critical section, we can exclude all other EMTs by grabbing exclusive + * access to the critical section and then safely update the caches of + * other EMTs. + * (1) The irrelvant access not holding the lock is in assertion code. + */ + IOM_LOCK_EXCL(pVM); + VMCPUID iCpu = pVM->cCpus; + while (iCpu-- > 0) + { + PVMCPU pVCpu = &pVM->aCpus[iCpu]; + pVCpu->iom.s.pRangeLastReadR0 = NIL_RTR0PTR; + pVCpu->iom.s.pRangeLastWriteR0 = NIL_RTR0PTR; + pVCpu->iom.s.pStatsLastReadR0 = NIL_RTR0PTR; + pVCpu->iom.s.pStatsLastWriteR0 = NIL_RTR0PTR; + pVCpu->iom.s.pMMIORangeLastR0 = NIL_RTR0PTR; + pVCpu->iom.s.pMMIOStatsLastR0 = NIL_RTR0PTR; + + pVCpu->iom.s.pRangeLastReadR3 = NULL; + pVCpu->iom.s.pRangeLastWriteR3 = NULL; + pVCpu->iom.s.pStatsLastReadR3 = NULL; + pVCpu->iom.s.pStatsLastWriteR3 = NULL; + pVCpu->iom.s.pMMIORangeLastR3 = NULL; + pVCpu->iom.s.pMMIOStatsLastR3 = NULL; + + pVCpu->iom.s.pRangeLastReadRC = NIL_RTRCPTR; + pVCpu->iom.s.pRangeLastWriteRC = NIL_RTRCPTR; + pVCpu->iom.s.pStatsLastReadRC = NIL_RTRCPTR; + pVCpu->iom.s.pStatsLastWriteRC = NIL_RTRCPTR; + pVCpu->iom.s.pMMIORangeLastRC = NIL_RTRCPTR; + pVCpu->iom.s.pMMIOStatsLastRC = NIL_RTRCPTR; + } + + IOM_UNLOCK_EXCL(pVM); +} + + +/** + * The VM is being reset. + * + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(void) IOMR3Reset(PVM pVM) +{ + iomR3FlushCache(pVM); +} + + +/** + * Applies relocations to data and code managed by this + * component. This function will be called at init and + * whenever the VMM need to relocate it self inside the GC. + * + * The IOM will update the addresses used by the switcher. + * + * @param pVM The cross context VM structure. + * @param offDelta Relocation delta relative to old location. + */ +VMMR3_INT_DECL(void) IOMR3Relocate(PVM pVM, RTGCINTPTR offDelta) +{ + LogFlow(("IOMR3Relocate: offDelta=%d\n", offDelta)); + + /* + * Apply relocations to the GC callbacks. + */ + pVM->iom.s.pTreesRC = MMHyperR3ToRC(pVM, pVM->iom.s.pTreesR3); + RTAvlroIOPortDoWithAll(&pVM->iom.s.pTreesR3->IOPortTreeRC, true, iomR3RelocateIOPortCallback, &offDelta); + RTAvlroGCPhysDoWithAll(&pVM->iom.s.pTreesR3->MMIOTree, true, iomR3RelocateMMIOCallback, &offDelta); + + /* + * Reset the raw-mode cache (don't bother relocating it). + */ + VMCPUID iCpu = pVM->cCpus; + while (iCpu-- > 0) + { + PVMCPU pVCpu = &pVM->aCpus[iCpu]; + pVCpu->iom.s.pRangeLastReadRC = NIL_RTRCPTR; + pVCpu->iom.s.pRangeLastWriteRC = NIL_RTRCPTR; + pVCpu->iom.s.pStatsLastReadRC = NIL_RTRCPTR; + pVCpu->iom.s.pStatsLastWriteRC = NIL_RTRCPTR; + pVCpu->iom.s.pMMIORangeLastRC = NIL_RTRCPTR; + pVCpu->iom.s.pMMIOStatsLastRC = NIL_RTRCPTR; + } +} + + +/** + * Callback function for relocating a I/O port range. + * + * @returns 0 (continue enum) + * @param pNode Pointer to a IOMIOPORTRANGERC node. + * @param pvUser Pointer to the offDelta. This is a pointer to the delta since we're + * not certain the delta will fit in a void pointer for all possible configs. + */ +static DECLCALLBACK(int) iomR3RelocateIOPortCallback(PAVLROIOPORTNODECORE pNode, void *pvUser) +{ + PIOMIOPORTRANGERC pRange = (PIOMIOPORTRANGERC)pNode; + RTGCINTPTR offDelta = *(PRTGCINTPTR)pvUser; + + Assert(pRange->pDevIns); + pRange->pDevIns += offDelta; + if (pRange->pfnOutCallback) + pRange->pfnOutCallback += offDelta; + if (pRange->pfnInCallback) + pRange->pfnInCallback += offDelta; + if (pRange->pfnOutStrCallback) + pRange->pfnOutStrCallback += offDelta; + if (pRange->pfnInStrCallback) + pRange->pfnInStrCallback += offDelta; + if (pRange->pvUser > _64K) + pRange->pvUser += offDelta; + return 0; +} + + +/** + * Callback function for relocating a MMIO range. + * + * @returns 0 (continue enum) + * @param pNode Pointer to a IOMMMIORANGE node. + * @param pvUser Pointer to the offDelta. This is a pointer to the delta since we're + * not certain the delta will fit in a void pointer for all possible configs. + */ +static DECLCALLBACK(int) iomR3RelocateMMIOCallback(PAVLROGCPHYSNODECORE pNode, void *pvUser) +{ + PIOMMMIORANGE pRange = (PIOMMMIORANGE)pNode; + RTGCINTPTR offDelta = *(PRTGCINTPTR)pvUser; + + if (pRange->pDevInsRC) + pRange->pDevInsRC += offDelta; + if (pRange->pfnWriteCallbackRC) + pRange->pfnWriteCallbackRC += offDelta; + if (pRange->pfnReadCallbackRC) + pRange->pfnReadCallbackRC += offDelta; + if (pRange->pfnFillCallbackRC) + pRange->pfnFillCallbackRC += offDelta; + if (pRange->pvUserRC > _64K) + pRange->pvUserRC += offDelta; + + return 0; +} + + +/** + * Terminates the IOM. + * + * Termination means cleaning up and freeing all resources, + * the VM it self is at this point powered off or suspended. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) IOMR3Term(PVM pVM) +{ + /* + * IOM is not owning anything but automatically freed resources, + * so there's nothing to do here. + */ + NOREF(pVM); + return VINF_SUCCESS; +} + +#ifdef VBOX_WITH_STATISTICS + +/** + * Create the statistics node for an I/O port. + * + * @returns Pointer to new stats node. + * + * @param pVM The cross context VM structure. + * @param Port Port. + * @param pszDesc Description. + */ +static PIOMIOPORTSTATS iomR3IOPortStatsCreate(PVM pVM, RTIOPORT Port, const char *pszDesc) +{ + IOM_LOCK_EXCL(pVM); + + /* check if it already exists. */ + PIOMIOPORTSTATS pPort = (PIOMIOPORTSTATS)RTAvloIOPortGet(&pVM->iom.s.pTreesR3->IOPortStatTree, Port); + if (pPort) + { + IOM_UNLOCK_EXCL(pVM); + return pPort; + } + + /* allocate stats node. */ + int rc = MMHyperAlloc(pVM, sizeof(*pPort), 0, MM_TAG_IOM_STATS, (void **)&pPort); + AssertRC(rc); + if (RT_SUCCESS(rc)) + { + /* insert into the tree. */ + pPort->Core.Key = Port; + if (RTAvloIOPortInsert(&pVM->iom.s.pTreesR3->IOPortStatTree, &pPort->Core)) + { + IOM_UNLOCK_EXCL(pVM); + + /* put a name on common ports. */ + if (!pszDesc) + pszDesc = iomR3IOPortGetStandardName(Port); + + /* register the statistics counters. */ + rc = STAMR3RegisterF(pVM, &pPort->InR3, STAMTYPE_COUNTER, STAMVISIBILITY_USED, STAMUNIT_OCCURENCES, pszDesc, "/IOM/Ports/%04x-In-R3", Port); AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pPort->OutR3, STAMTYPE_COUNTER, STAMVISIBILITY_USED, STAMUNIT_OCCURENCES, pszDesc, "/IOM/Ports/%04x-Out-R3", Port); AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pPort->InRZ, STAMTYPE_COUNTER, STAMVISIBILITY_USED, STAMUNIT_OCCURENCES, pszDesc, "/IOM/Ports/%04x-In-RZ", Port); AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pPort->OutRZ, STAMTYPE_COUNTER, STAMVISIBILITY_USED, STAMUNIT_OCCURENCES, pszDesc, "/IOM/Ports/%04x-Out-RZ", Port); AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pPort->InRZToR3, STAMTYPE_COUNTER, STAMVISIBILITY_USED, STAMUNIT_OCCURENCES, pszDesc, "/IOM/Ports/%04x-In-RZtoR3", Port); AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pPort->OutRZToR3,STAMTYPE_COUNTER, STAMVISIBILITY_USED, STAMUNIT_OCCURENCES, pszDesc, "/IOM/Ports/%04x-Out-RZtoR3", Port); AssertRC(rc); + + /* Profiling */ + rc = STAMR3RegisterF(pVM, &pPort->ProfInR3, STAMTYPE_PROFILE, STAMVISIBILITY_USED, STAMUNIT_TICKS_PER_CALL, pszDesc,"/IOM/Ports/%04x-In-R3/Prof", Port); AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pPort->ProfOutR3,STAMTYPE_PROFILE, STAMVISIBILITY_USED, STAMUNIT_TICKS_PER_CALL, pszDesc,"/IOM/Ports/%04x-Out-R3/Prof", Port); AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pPort->ProfInRZ, STAMTYPE_PROFILE, STAMVISIBILITY_USED, STAMUNIT_TICKS_PER_CALL, pszDesc,"/IOM/Ports/%04x-In-RZ/Prof", Port); AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pPort->ProfOutRZ,STAMTYPE_PROFILE, STAMVISIBILITY_USED, STAMUNIT_TICKS_PER_CALL, pszDesc,"/IOM/Ports/%04x-Out-RZ/Prof", Port); AssertRC(rc); + + return pPort; + } + + AssertMsgFailed(("what! Port=%d\n", Port)); + MMHyperFree(pVM, pPort); + } + IOM_UNLOCK_EXCL(pVM); + return NULL; +} + + +/** + * Create the statistics node for an MMIO address. + * + * @returns Pointer to new stats node. + * + * @param pVM The cross context VM structure. + * @param GCPhys The address. + * @param pszDesc Description. + */ +PIOMMMIOSTATS iomR3MMIOStatsCreate(PVM pVM, RTGCPHYS GCPhys, const char *pszDesc) +{ + IOM_LOCK_EXCL(pVM); + + /* check if it already exists. */ + PIOMMMIOSTATS pStats = (PIOMMMIOSTATS)RTAvloGCPhysGet(&pVM->iom.s.pTreesR3->MmioStatTree, GCPhys); + if (pStats) + { + IOM_UNLOCK_EXCL(pVM); + return pStats; + } + + /* allocate stats node. */ + int rc = MMHyperAlloc(pVM, sizeof(*pStats), 0, MM_TAG_IOM_STATS, (void **)&pStats); + AssertRC(rc); + if (RT_SUCCESS(rc)) + { + /* insert into the tree. */ + pStats->Core.Key = GCPhys; + if (RTAvloGCPhysInsert(&pVM->iom.s.pTreesR3->MmioStatTree, &pStats->Core)) + { + IOM_UNLOCK_EXCL(pVM); + + rc = STAMR3RegisterF(pVM, &pStats->Accesses, STAMTYPE_COUNTER, STAMVISIBILITY_USED, STAMUNIT_OCCURENCES, pszDesc, "/IOM/MMIO/%RGp", GCPhys); AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pStats->ProfReadR3, STAMTYPE_PROFILE, STAMVISIBILITY_USED, STAMUNIT_TICKS_PER_CALL, pszDesc, "/IOM/MMIO/%RGp/Read-R3", GCPhys); AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pStats->ProfWriteR3, STAMTYPE_PROFILE, STAMVISIBILITY_USED, STAMUNIT_TICKS_PER_CALL, pszDesc, "/IOM/MMIO/%RGp/Write-R3", GCPhys); AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pStats->ProfReadRZ, STAMTYPE_PROFILE, STAMVISIBILITY_USED, STAMUNIT_TICKS_PER_CALL, pszDesc, "/IOM/MMIO/%RGp/Read-RZ", GCPhys); AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pStats->ProfWriteRZ, STAMTYPE_PROFILE, STAMVISIBILITY_USED, STAMUNIT_TICKS_PER_CALL, pszDesc, "/IOM/MMIO/%RGp/Write-RZ", GCPhys); AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pStats->ReadRZToR3, STAMTYPE_COUNTER, STAMVISIBILITY_USED, STAMUNIT_OCCURENCES, pszDesc, "/IOM/MMIO/%RGp/Read-RZtoR3", GCPhys); AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pStats->WriteRZToR3, STAMTYPE_COUNTER, STAMVISIBILITY_USED, STAMUNIT_OCCURENCES, pszDesc, "/IOM/MMIO/%RGp/Write-RZtoR3", GCPhys); AssertRC(rc); + + return pStats; + } + AssertMsgFailed(("what! GCPhys=%RGp\n", GCPhys)); + MMHyperFree(pVM, pStats); + } + IOM_UNLOCK_EXCL(pVM); + return NULL; +} + +#endif /* VBOX_WITH_STATISTICS */ + +/** + * Registers a I/O port ring-3 handler. + * + * This API is called by PDM on behalf of a device. Devices must first register + * ring-3 ranges before any GC and R0 ranges can be registered using IOMR3IOPortRegisterRC() + * and IOMR3IOPortRegisterR0(). + * + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pDevIns PDM device instance owning the port range. + * @param PortStart First port number in the range. + * @param cPorts Number of ports to register. + * @param pvUser User argument for the callbacks. + * @param pfnOutCallback Pointer to function which is gonna handle OUT operations in R3. + * @param pfnInCallback Pointer to function which is gonna handle IN operations in R3. + * @param pfnOutStrCallback Pointer to function which is gonna handle string OUT operations in R3. + * @param pfnInStrCallback Pointer to function which is gonna handle string IN operations in R3. + * @param pszDesc Pointer to description string. This must not be freed. + */ +VMMR3_INT_DECL(int) IOMR3IOPortRegisterR3(PVM pVM, PPDMDEVINS pDevIns, RTIOPORT PortStart, RTUINT cPorts, RTHCPTR pvUser, + R3PTRTYPE(PFNIOMIOPORTOUT) pfnOutCallback, R3PTRTYPE(PFNIOMIOPORTIN) pfnInCallback, + R3PTRTYPE(PFNIOMIOPORTOUTSTRING) pfnOutStrCallback, R3PTRTYPE(PFNIOMIOPORTINSTRING) pfnInStrCallback, const char *pszDesc) +{ + LogFlow(("IOMR3IOPortRegisterR3: pDevIns=%p PortStart=%#x cPorts=%#x pvUser=%RHv pfnOutCallback=%#x pfnInCallback=%#x pfnOutStrCallback=%#x pfnInStrCallback=%#x pszDesc=%s\n", + pDevIns, PortStart, cPorts, pvUser, pfnOutCallback, pfnInCallback, pfnOutStrCallback, pfnInStrCallback, pszDesc)); + + /* + * Validate input. + */ + if ( (RTUINT)PortStart + cPorts <= (RTUINT)PortStart + || (RTUINT)PortStart + cPorts > 0x10000) + { + AssertMsgFailed(("Invalid port range %#x-%#x (inclusive)! (%s)\n", PortStart, (RTUINT)PortStart + (cPorts - 1), pszDesc)); + return VERR_IOM_INVALID_IOPORT_RANGE; + } + if (!pfnOutCallback && !pfnInCallback) + { + AssertMsgFailed(("no handlers specfied for %#x-%#x (inclusive)! (%s)\n", PortStart, (RTUINT)PortStart + (cPorts - 1), pszDesc)); + return VERR_INVALID_PARAMETER; + } + if (!pfnOutCallback) + pfnOutCallback = iomR3IOPortDummyOut; + if (!pfnInCallback) + pfnInCallback = iomR3IOPortDummyIn; + if (!pfnOutStrCallback) + pfnOutStrCallback = iomR3IOPortDummyOutStr; + if (!pfnInStrCallback) + pfnInStrCallback = iomR3IOPortDummyInStr; + + /* Flush the IO port lookup cache */ + iomR3FlushCache(pVM); + + /* + * Allocate new range record and initialize it. + */ + PIOMIOPORTRANGER3 pRange; + int rc = MMHyperAlloc(pVM, sizeof(*pRange), 0, MM_TAG_IOM, (void **)&pRange); + if (RT_SUCCESS(rc)) + { + pRange->Core.Key = PortStart; + pRange->Core.KeyLast = PortStart + (cPorts - 1); + pRange->Port = PortStart; + pRange->cPorts = cPorts; + pRange->pvUser = pvUser; + pRange->pDevIns = pDevIns; + pRange->pfnOutCallback = pfnOutCallback; + pRange->pfnInCallback = pfnInCallback; + pRange->pfnOutStrCallback = pfnOutStrCallback; + pRange->pfnInStrCallback = pfnInStrCallback; + pRange->pszDesc = pszDesc; + + /* + * Try Insert it. + */ + IOM_LOCK_EXCL(pVM); + if (RTAvlroIOPortInsert(&pVM->iom.s.pTreesR3->IOPortTreeR3, &pRange->Core)) + { +#ifdef VBOX_WITH_STATISTICS + for (unsigned iPort = 0; iPort < cPorts; iPort++) + iomR3IOPortStatsCreate(pVM, PortStart + iPort, pszDesc); +#endif + IOM_UNLOCK_EXCL(pVM); + return VINF_SUCCESS; + } + IOM_UNLOCK_EXCL(pVM); + + /* conflict. */ + DBGFR3Info(pVM->pUVM, "ioport", NULL, NULL); + AssertMsgFailed(("Port range %#x-%#x (%s) conflicts with existing range(s)!\n", PortStart, (unsigned)PortStart + cPorts - 1, pszDesc)); + MMHyperFree(pVM, pRange); + rc = VERR_IOM_IOPORT_RANGE_CONFLICT; + } + + return rc; +} + + +/** + * Registers a I/O port RC handler. + * + * This API is called by PDM on behalf of a device. Devices must first register ring-3 ranges + * using IOMIOPortRegisterR3() before calling this function. + * + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pDevIns PDM device instance owning the port range. + * @param PortStart First port number in the range. + * @param cPorts Number of ports to register. + * @param pvUser User argument for the callbacks. + * @param pfnOutCallback Pointer to function which is gonna handle OUT operations in GC. + * @param pfnInCallback Pointer to function which is gonna handle IN operations in GC. + * @param pfnOutStrCallback Pointer to function which is gonna handle string OUT operations in GC. + * @param pfnInStrCallback Pointer to function which is gonna handle string IN operations in GC. + * @param pszDesc Pointer to description string. This must not be freed. + */ +VMMR3_INT_DECL(int) IOMR3IOPortRegisterRC(PVM pVM, PPDMDEVINS pDevIns, RTIOPORT PortStart, RTUINT cPorts, RTRCPTR pvUser, + RCPTRTYPE(PFNIOMIOPORTOUT) pfnOutCallback, RCPTRTYPE(PFNIOMIOPORTIN) pfnInCallback, + RCPTRTYPE(PFNIOMIOPORTOUTSTRING) pfnOutStrCallback, RCPTRTYPE(PFNIOMIOPORTINSTRING) pfnInStrCallback, const char *pszDesc) +{ + LogFlow(("IOMR3IOPortRegisterRC: pDevIns=%p PortStart=%#x cPorts=%#x pvUser=%RRv pfnOutCallback=%RRv pfnInCallback=%RRv pfnOutStrCallback=%RRv pfnInStrCallback=%RRv pszDesc=%s\n", + pDevIns, PortStart, cPorts, pvUser, pfnOutCallback, pfnInCallback, pfnOutStrCallback, pfnInStrCallback, pszDesc)); + AssertReturn(VM_IS_RAW_MODE_ENABLED(pVM), VERR_IOM_HM_IPE); + + /* + * Validate input. + */ + if ( (RTUINT)PortStart + cPorts <= (RTUINT)PortStart + || (RTUINT)PortStart + cPorts > 0x10000) + { + AssertMsgFailed(("Invalid port range %#x-%#x! (%s)\n", PortStart, (RTUINT)PortStart + (cPorts - 1), pszDesc)); + return VERR_IOM_INVALID_IOPORT_RANGE; + } + RTIOPORT PortLast = PortStart + (cPorts - 1); + if (!pfnOutCallback && !pfnInCallback) + { + AssertMsgFailed(("Invalid port range %#x-%#x! No callbacks! (%s)\n", PortStart, PortLast, pszDesc)); + return VERR_INVALID_PARAMETER; + } + + IOM_LOCK_EXCL(pVM); + + /* + * Validate that there are ring-3 ranges for the ports. + */ + RTIOPORT Port = PortStart; + while (Port <= PortLast && Port >= PortStart) + { + PIOMIOPORTRANGER3 pRange = (PIOMIOPORTRANGER3)RTAvlroIOPortRangeGet(&pVM->iom.s.CTX_SUFF(pTrees)->IOPortTreeR3, Port); + if (!pRange) + { + AssertMsgFailed(("No R3! Port=%#x %#x-%#x! (%s)\n", Port, PortStart, (unsigned)PortStart + cPorts - 1, pszDesc)); + IOM_UNLOCK_EXCL(pVM); + return VERR_IOM_NO_R3_IOPORT_RANGE; + } +#ifndef IOM_NO_PDMINS_CHECKS +# ifndef IN_RC + if (pRange->pDevIns != pDevIns) +# else + if (pRange->pDevIns != MMHyperRCToCC(pVM, pDevIns)) +# endif + { + AssertMsgFailed(("Not owner! Port=%#x %#x-%#x! (%s)\n", Port, PortStart, (unsigned)PortStart + cPorts - 1, pszDesc)); + IOM_UNLOCK_EXCL(pVM); + return VERR_IOM_NOT_IOPORT_RANGE_OWNER; + } +#endif + Port = pRange->Core.KeyLast + 1; + } + + /* Flush the IO port lookup cache */ + iomR3FlushCache(pVM); + + /* + * Allocate new range record and initialize it. + */ + PIOMIOPORTRANGERC pRange; + int rc = MMHyperAlloc(pVM, sizeof(*pRange), 0, MM_TAG_IOM, (void **)&pRange); + if (RT_SUCCESS(rc)) + { + pRange->Core.Key = PortStart; + pRange->Core.KeyLast = PortLast; + pRange->Port = PortStart; + pRange->cPorts = cPorts; + pRange->pvUser = pvUser; + pRange->pfnOutCallback = pfnOutCallback; + pRange->pfnInCallback = pfnInCallback; + pRange->pfnOutStrCallback = pfnOutStrCallback; + pRange->pfnInStrCallback = pfnInStrCallback; + pRange->pDevIns = MMHyperCCToRC(pVM, pDevIns); + pRange->pszDesc = pszDesc; + + /* + * Insert it. + */ + if (RTAvlroIOPortInsert(&pVM->iom.s.CTX_SUFF(pTrees)->IOPortTreeRC, &pRange->Core)) + { + IOM_UNLOCK_EXCL(pVM); + return VINF_SUCCESS; + } + + /* conflict. */ + AssertMsgFailed(("Port range %#x-%#x (%s) conflicts with existing range(s)!\n", PortStart, (unsigned)PortStart + cPorts - 1, pszDesc)); + MMHyperFree(pVM, pRange); + rc = VERR_IOM_IOPORT_RANGE_CONFLICT; + } + IOM_UNLOCK_EXCL(pVM); + return rc; +} + + +/** + * Registers a Port IO R0 handler. + * + * This API is called by PDM on behalf of a device. Devices must first register ring-3 ranges + * using IOMR3IOPortRegisterR3() before calling this function. + * + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pDevIns PDM device instance owning the port range. + * @param PortStart First port number in the range. + * @param cPorts Number of ports to register. + * @param pvUser User argument for the callbacks. + * @param pfnOutCallback Pointer to function which is gonna handle OUT operations in GC. + * @param pfnInCallback Pointer to function which is gonna handle IN operations in GC. + * @param pfnOutStrCallback Pointer to function which is gonna handle OUT operations in GC. + * @param pfnInStrCallback Pointer to function which is gonna handle IN operations in GC. + * @param pszDesc Pointer to description string. This must not be freed. + */ +VMMR3_INT_DECL(int) IOMR3IOPortRegisterR0(PVM pVM, PPDMDEVINS pDevIns, RTIOPORT PortStart, RTUINT cPorts, RTR0PTR pvUser, + R0PTRTYPE(PFNIOMIOPORTOUT) pfnOutCallback, R0PTRTYPE(PFNIOMIOPORTIN) pfnInCallback, + R0PTRTYPE(PFNIOMIOPORTOUTSTRING) pfnOutStrCallback, R0PTRTYPE(PFNIOMIOPORTINSTRING) pfnInStrCallback, + const char *pszDesc) +{ + LogFlow(("IOMR3IOPortRegisterR0: pDevIns=%p PortStart=%#x cPorts=%#x pvUser=%RHv pfnOutCallback=%RHv pfnInCallback=%RHv pfnOutStrCallback=%RHv pfnInStrCallback=%RHv pszDesc=%s\n", + pDevIns, PortStart, cPorts, pvUser, pfnOutCallback, pfnInCallback, pfnOutStrCallback, pfnInStrCallback, pszDesc)); + + /* + * Validate input. + */ + if ( (RTUINT)PortStart + cPorts <= (RTUINT)PortStart + || (RTUINT)PortStart + cPorts > 0x10000) + { + AssertMsgFailed(("Invalid port range %#x-%#x! (%s)\n", PortStart, (RTUINT)PortStart + (cPorts - 1), pszDesc)); + return VERR_IOM_INVALID_IOPORT_RANGE; + } + RTIOPORT PortLast = PortStart + (cPorts - 1); + if (!pfnOutCallback && !pfnInCallback) + { + AssertMsgFailed(("Invalid port range %#x-%#x! No callbacks! (%s)\n", PortStart, PortLast, pszDesc)); + return VERR_INVALID_PARAMETER; + } + + IOM_LOCK_EXCL(pVM); + + /* + * Validate that there are ring-3 ranges for the ports. + */ + RTIOPORT Port = PortStart; + while (Port <= PortLast && Port >= PortStart) + { + PIOMIOPORTRANGER3 pRange = (PIOMIOPORTRANGER3)RTAvlroIOPortRangeGet(&pVM->iom.s.CTX_SUFF(pTrees)->IOPortTreeR3, Port); + if (!pRange) + { + AssertMsgFailed(("No R3! Port=%#x %#x-%#x! (%s)\n", Port, PortStart, (unsigned)PortStart + cPorts - 1, pszDesc)); + IOM_UNLOCK_EXCL(pVM); + return VERR_IOM_NO_R3_IOPORT_RANGE; + } +#ifndef IOM_NO_PDMINS_CHECKS +# ifndef IN_RC + if (pRange->pDevIns != pDevIns) +# else + if (pRange->pDevIns != MMHyperRCToCC(pVM, pDevIns)) +# endif + { + AssertMsgFailed(("Not owner! Port=%#x %#x-%#x! (%s)\n", Port, PortStart, (unsigned)PortStart + cPorts - 1, pszDesc)); + IOM_UNLOCK_EXCL(pVM); + return VERR_IOM_NOT_IOPORT_RANGE_OWNER; + } +#endif + Port = pRange->Core.KeyLast + 1; + } + + /* Flush the IO port lookup cache */ + iomR3FlushCache(pVM); + + /* + * Allocate new range record and initialize it. + */ + PIOMIOPORTRANGER0 pRange; + int rc = MMHyperAlloc(pVM, sizeof(*pRange), 0, MM_TAG_IOM, (void **)&pRange); + if (RT_SUCCESS(rc)) + { + pRange->Core.Key = PortStart; + pRange->Core.KeyLast = PortLast; + pRange->Port = PortStart; + pRange->cPorts = cPorts; + pRange->pvUser = pvUser; + pRange->pfnOutCallback = pfnOutCallback; + pRange->pfnInCallback = pfnInCallback; + pRange->pfnOutStrCallback = pfnOutStrCallback; + pRange->pfnInStrCallback = pfnInStrCallback; + pRange->pDevIns = MMHyperR3ToR0(pVM, pDevIns); + pRange->pszDesc = pszDesc; + + /* + * Insert it. + */ + if (RTAvlroIOPortInsert(&pVM->iom.s.CTX_SUFF(pTrees)->IOPortTreeR0, &pRange->Core)) + { + IOM_UNLOCK_EXCL(pVM); + return VINF_SUCCESS; + } + + /* conflict. */ + AssertMsgFailed(("Port range %#x-%#x (%s) conflicts with existing range(s)!\n", PortStart, (unsigned)PortStart + cPorts - 1, pszDesc)); + MMHyperFree(pVM, pRange); + rc = VERR_IOM_IOPORT_RANGE_CONFLICT; + } + IOM_UNLOCK_EXCL(pVM); + return rc; +} + + +/** + * Deregisters a I/O Port range. + * + * The specified range must be registered using IOMR3IOPortRegister previous to + * this call. The range does can be a smaller part of the range specified to + * IOMR3IOPortRegister, but it can never be larger. + * + * This function will remove GC, R0 and R3 context port handlers for this range. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pDevIns The device instance associated with the range. + * @param PortStart First port number in the range. + * @param cPorts Number of ports to remove starting at PortStart. + * + * @remark This function mainly for PCI PnP Config and will not do + * all the checks you might expect it to do. + */ +VMMR3_INT_DECL(int) IOMR3IOPortDeregister(PVM pVM, PPDMDEVINS pDevIns, RTIOPORT PortStart, RTUINT cPorts) +{ + LogFlow(("IOMR3IOPortDeregister: pDevIns=%p PortStart=%#x cPorts=%#x\n", pDevIns, PortStart, cPorts)); + + /* + * Validate input. + */ + if ( (RTUINT)PortStart + cPorts < (RTUINT)PortStart + || (RTUINT)PortStart + cPorts > 0x10000) + { + AssertMsgFailed(("Invalid port range %#x-%#x!\n", PortStart, (unsigned)PortStart + cPorts - 1)); + return VERR_IOM_INVALID_IOPORT_RANGE; + } + + IOM_LOCK_EXCL(pVM); + + /* Flush the IO port lookup cache */ + iomR3FlushCache(pVM); + + /* + * Check ownership. + */ + RTIOPORT PortLast = PortStart + (cPorts - 1); + RTIOPORT Port = PortStart; + while (Port <= PortLast && Port >= PortStart) + { + PIOMIOPORTRANGER3 pRange = (PIOMIOPORTRANGER3)RTAvlroIOPortRangeGet(&pVM->iom.s.pTreesR3->IOPortTreeR3, Port); + if (pRange) + { + Assert(Port <= pRange->Core.KeyLast); +#ifndef IOM_NO_PDMINS_CHECKS + if (pRange->pDevIns != pDevIns) + { + AssertMsgFailed(("Removal of ports in range %#x-%#x rejected because not owner of %#x-%#x (%s)\n", + PortStart, PortLast, pRange->Core.Key, pRange->Core.KeyLast, pRange->pszDesc)); + IOM_UNLOCK_EXCL(pVM); + return VERR_IOM_NOT_IOPORT_RANGE_OWNER; + } +#else /* IOM_NO_PDMINS_CHECKS */ + RT_NOREF_PV(pDevIns); +#endif /* IOM_NO_PDMINS_CHECKS */ + Port = pRange->Core.KeyLast; + } + Port++; + } + + /* + * Remove any RC ranges first. + */ + int rc = VINF_SUCCESS; + Port = PortStart; + while (Port <= PortLast && Port >= PortStart) + { + /* + * Try find range. + */ + PIOMIOPORTRANGERC pRange = (PIOMIOPORTRANGERC)RTAvlroIOPortRangeGet(&pVM->iom.s.pTreesR3->IOPortTreeRC, Port); + if (pRange) + { + if ( pRange->Core.Key == Port + && pRange->Core.KeyLast <= PortLast) + { + /* + * Kick out the entire range. + */ + void *pv = RTAvlroIOPortRemove(&pVM->iom.s.pTreesR3->IOPortTreeRC, Port); + Assert(pv == (void *)pRange); NOREF(pv); + Port += pRange->cPorts; + MMHyperFree(pVM, pRange); + } + else if (pRange->Core.Key == Port) + { + /* + * Cut of the head of the range, done. + */ + pRange->cPorts -= Port - pRange->Port; + pRange->Core.Key = Port; + pRange->Port = Port; + break; + } + else if (pRange->Core.KeyLast <= PortLast) + { + /* + * Just cut of the tail. + */ + unsigned c = pRange->Core.KeyLast - Port + 1; + pRange->Core.KeyLast -= c; + pRange->cPorts -= c; + Port += c; + } + else + { + /* + * Split the range, done. + */ + Assert(pRange->Core.KeyLast > PortLast && pRange->Core.Key < Port); + /* create tail. */ + PIOMIOPORTRANGERC pRangeNew; + int rc2 = MMHyperAlloc(pVM, sizeof(*pRangeNew), 0, MM_TAG_IOM, (void **)&pRangeNew); + if (RT_FAILURE(rc2)) + { + IOM_UNLOCK_EXCL(pVM); + return rc2; + } + *pRangeNew = *pRange; + pRangeNew->Core.Key = PortLast; + pRangeNew->Port = PortLast; + pRangeNew->cPorts = pRangeNew->Core.KeyLast - PortLast + 1; + + LogFlow(("IOMR3IOPortDeregister (rc): split the range; new %x\n", pRangeNew->Core.Key)); + + /* adjust head */ + pRange->Core.KeyLast = Port - 1; + pRange->cPorts = Port - pRange->Port; + + /* insert */ + if (!RTAvlroIOPortInsert(&pVM->iom.s.pTreesR3->IOPortTreeRC, &pRangeNew->Core)) + { + AssertMsgFailed(("This cannot happen!\n")); + MMHyperFree(pVM, pRangeNew); + rc = VERR_IOM_IOPORT_IPE_1; + } + break; + } + } + else /* next port */ + Port++; + } /* for all ports - RC. */ + + + /* + * Remove any R0 ranges. + */ + Port = PortStart; + while (Port <= PortLast && Port >= PortStart) + { + /* + * Try find range. + */ + PIOMIOPORTRANGER0 pRange = (PIOMIOPORTRANGER0)RTAvlroIOPortRangeGet(&pVM->iom.s.pTreesR3->IOPortTreeR0, Port); + if (pRange) + { + if ( pRange->Core.Key == Port + && pRange->Core.KeyLast <= PortLast) + { + /* + * Kick out the entire range. + */ + void *pv = RTAvlroIOPortRemove(&pVM->iom.s.pTreesR3->IOPortTreeR0, Port); + Assert(pv == (void *)pRange); NOREF(pv); + Port += pRange->cPorts; + MMHyperFree(pVM, pRange); + } + else if (pRange->Core.Key == Port) + { + /* + * Cut of the head of the range, done. + */ + pRange->cPorts -= Port - pRange->Port; + pRange->Core.Key = Port; + pRange->Port = Port; + break; + } + else if (pRange->Core.KeyLast <= PortLast) + { + /* + * Just cut of the tail. + */ + unsigned c = pRange->Core.KeyLast - Port + 1; + pRange->Core.KeyLast -= c; + pRange->cPorts -= c; + Port += c; + } + else + { + /* + * Split the range, done. + */ + Assert(pRange->Core.KeyLast > PortLast && pRange->Core.Key < Port); + /* create tail. */ + PIOMIOPORTRANGER0 pRangeNew; + int rc2 = MMHyperAlloc(pVM, sizeof(*pRangeNew), 0, MM_TAG_IOM, (void **)&pRangeNew); + if (RT_FAILURE(rc2)) + { + IOM_UNLOCK_EXCL(pVM); + return rc2; + } + *pRangeNew = *pRange; + pRangeNew->Core.Key = PortLast; + pRangeNew->Port = PortLast; + pRangeNew->cPorts = pRangeNew->Core.KeyLast - PortLast + 1; + + LogFlow(("IOMR3IOPortDeregister (r0): split the range; new %x\n", pRangeNew->Core.Key)); + + /* adjust head */ + pRange->Core.KeyLast = Port - 1; + pRange->cPorts = Port - pRange->Port; + + /* insert */ + if (!RTAvlroIOPortInsert(&pVM->iom.s.pTreesR3->IOPortTreeR0, &pRangeNew->Core)) + { + AssertMsgFailed(("This cannot happen!\n")); + MMHyperFree(pVM, pRangeNew); + rc = VERR_IOM_IOPORT_IPE_1; + } + break; + } + } + else /* next port */ + Port++; + } /* for all ports - R0. */ + + /* + * And the same procedure for ring-3 ranges. + */ + Port = PortStart; + while (Port <= PortLast && Port >= PortStart) + { + /* + * Try find range. + */ + PIOMIOPORTRANGER3 pRange = (PIOMIOPORTRANGER3)RTAvlroIOPortRangeGet(&pVM->iom.s.pTreesR3->IOPortTreeR3, Port); + if (pRange) + { + if ( pRange->Core.Key == Port + && pRange->Core.KeyLast <= PortLast) + { + /* + * Kick out the entire range. + */ + void *pv = RTAvlroIOPortRemove(&pVM->iom.s.pTreesR3->IOPortTreeR3, Port); + Assert(pv == (void *)pRange); NOREF(pv); + Port += pRange->cPorts; + MMHyperFree(pVM, pRange); + } + else if (pRange->Core.Key == Port) + { + /* + * Cut of the head of the range, done. + */ + pRange->cPorts -= Port - pRange->Port; + pRange->Core.Key = Port; + pRange->Port = Port; + break; + } + else if (pRange->Core.KeyLast <= PortLast) + { + /* + * Just cut of the tail. + */ + unsigned c = pRange->Core.KeyLast - Port + 1; + pRange->Core.KeyLast -= c; + pRange->cPorts -= c; + Port += c; + } + else + { + /* + * Split the range, done. + */ + Assert(pRange->Core.KeyLast > PortLast && pRange->Core.Key < Port); + /* create tail. */ + PIOMIOPORTRANGER3 pRangeNew; + int rc2 = MMHyperAlloc(pVM, sizeof(*pRangeNew), 0, MM_TAG_IOM, (void **)&pRangeNew); + if (RT_FAILURE(rc2)) + { + IOM_UNLOCK_EXCL(pVM); + return rc2; + } + *pRangeNew = *pRange; + pRangeNew->Core.Key = PortLast; + pRangeNew->Port = PortLast; + pRangeNew->cPorts = pRangeNew->Core.KeyLast - PortLast + 1; + + LogFlow(("IOMR3IOPortDeregister (r3): split the range; new %x\n", pRangeNew->Core.Key)); + + /* adjust head */ + pRange->Core.KeyLast = Port - 1; + pRange->cPorts = Port - pRange->Port; + + /* insert */ + if (!RTAvlroIOPortInsert(&pVM->iom.s.pTreesR3->IOPortTreeR3, &pRangeNew->Core)) + { + AssertMsgFailed(("This cannot happen!\n")); + MMHyperFree(pVM, pRangeNew); + rc = VERR_IOM_IOPORT_IPE_1; + } + break; + } + } + else /* next port */ + Port++; + } /* for all ports - ring-3. */ + + /* done */ + IOM_UNLOCK_EXCL(pVM); + return rc; +} + + +/** + * Dummy Port I/O Handler for IN operations. + * + * @returns VBox status code. + * + * @param pDevIns The device instance. + * @param pvUser User argument. + * @param Port Port number used for the IN operation. + * @param pu32 Where to store the result. + * @param cb Number of bytes read. + */ +static DECLCALLBACK(int) iomR3IOPortDummyIn(PPDMDEVINS pDevIns, void *pvUser, RTIOPORT Port, uint32_t *pu32, unsigned cb) +{ + NOREF(pDevIns); NOREF(pvUser); NOREF(Port); + switch (cb) + { + case 1: *pu32 = 0xff; break; + case 2: *pu32 = 0xffff; break; + case 4: *pu32 = UINT32_C(0xffffffff); break; + default: + AssertReleaseMsgFailed(("cb=%d\n", cb)); + return VERR_IOM_IOPORT_IPE_2; + } + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{FNIOMIOPORTINSTRING, + * Dummy Port I/O Handler for string IN operations.} + */ +static DECLCALLBACK(int) iomR3IOPortDummyInStr(PPDMDEVINS pDevIns, void *pvUser, RTIOPORT Port, uint8_t *pbDst, + uint32_t *pcTransfer, unsigned cb) +{ + NOREF(pDevIns); NOREF(pvUser); NOREF(Port); NOREF(pbDst); NOREF(pcTransfer); NOREF(cb); + return VINF_SUCCESS; +} + + +/** + * Dummy Port I/O Handler for OUT operations. + * + * @returns VBox status code. + * + * @param pDevIns The device instance. + * @param pvUser User argument. + * @param Port Port number used for the OUT operation. + * @param u32 The value to output. + * @param cb The value size in bytes. + */ +static DECLCALLBACK(int) iomR3IOPortDummyOut(PPDMDEVINS pDevIns, void *pvUser, RTIOPORT Port, uint32_t u32, unsigned cb) +{ + NOREF(pDevIns); NOREF(pvUser); NOREF(Port); NOREF(u32); NOREF(cb); + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{FNIOMIOPORTOUTSTRING, + * Dummy Port I/O Handler for string OUT operations.} + */ +static DECLCALLBACK(int) iomR3IOPortDummyOutStr(PPDMDEVINS pDevIns, void *pvUser, RTIOPORT Port, uint8_t const *pbSrc, + uint32_t *pcTransfer, unsigned cb) +{ + NOREF(pDevIns); NOREF(pvUser); NOREF(Port); NOREF(pbSrc); NOREF(pcTransfer); NOREF(cb); + return VINF_SUCCESS; +} + + +/** + * Display a single I/O port ring-3 range. + * + * @returns 0 + * @param pNode Pointer to I/O port HC range. + * @param pvUser Pointer to info output callback structure. + */ +static DECLCALLBACK(int) iomR3IOPortInfoOneR3(PAVLROIOPORTNODECORE pNode, void *pvUser) +{ + PIOMIOPORTRANGER3 pRange = (PIOMIOPORTRANGER3)pNode; + PCDBGFINFOHLP pHlp = (PCDBGFINFOHLP)pvUser; + pHlp->pfnPrintf(pHlp, + "%04x-%04x %p %p %p %p %s\n", + pRange->Core.Key, + pRange->Core.KeyLast, + pRange->pDevIns, + pRange->pfnInCallback, + pRange->pfnOutCallback, + pRange->pvUser, + pRange->pszDesc); + return 0; +} + + +/** + * Display a single I/O port GC range. + * + * @returns 0 + * @param pNode Pointer to IOPORT GC range. + * @param pvUser Pointer to info output callback structure. + */ +static DECLCALLBACK(int) iomR3IOPortInfoOneRC(PAVLROIOPORTNODECORE pNode, void *pvUser) +{ + PIOMIOPORTRANGERC pRange = (PIOMIOPORTRANGERC)pNode; + PCDBGFINFOHLP pHlp = (PCDBGFINFOHLP)pvUser; + pHlp->pfnPrintf(pHlp, + "%04x-%04x %RRv %RRv %RRv %RRv %s\n", + pRange->Core.Key, + pRange->Core.KeyLast, + pRange->pDevIns, + pRange->pfnInCallback, + pRange->pfnOutCallback, + pRange->pvUser, + pRange->pszDesc); + return 0; +} + + +/** + * Display all registered I/O port ranges. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helpers. + * @param pszArgs Arguments, ignored. + */ +static DECLCALLBACK(void) iomR3IOPortInfo(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + NOREF(pszArgs); + pHlp->pfnPrintf(pHlp, + "I/O Port R3 ranges (pVM=%p)\n" + "Range %.*s %.*s %.*s %.*s Description\n", + pVM, + sizeof(RTHCPTR) * 2, "pDevIns ", + sizeof(RTHCPTR) * 2, "In ", + sizeof(RTHCPTR) * 2, "Out ", + sizeof(RTHCPTR) * 2, "pvUser "); + RTAvlroIOPortDoWithAll(&pVM->iom.s.pTreesR3->IOPortTreeR3, true, iomR3IOPortInfoOneR3, (void *)pHlp); + + pHlp->pfnPrintf(pHlp, + "I/O Port R0 ranges (pVM=%p)\n" + "Range %.*s %.*s %.*s %.*s Description\n", + pVM, + sizeof(RTHCPTR) * 2, "pDevIns ", + sizeof(RTHCPTR) * 2, "In ", + sizeof(RTHCPTR) * 2, "Out ", + sizeof(RTHCPTR) * 2, "pvUser "); + RTAvlroIOPortDoWithAll(&pVM->iom.s.pTreesR3->IOPortTreeR0, true, iomR3IOPortInfoOneR3, (void *)pHlp); + + pHlp->pfnPrintf(pHlp, + "I/O Port GC ranges (pVM=%p)\n" + "Range %.*s %.*s %.*s %.*s Description\n", + pVM, + sizeof(RTRCPTR) * 2, "pDevIns ", + sizeof(RTRCPTR) * 2, "In ", + sizeof(RTRCPTR) * 2, "Out ", + sizeof(RTRCPTR) * 2, "pvUser "); + RTAvlroIOPortDoWithAll(&pVM->iom.s.pTreesR3->IOPortTreeRC, true, iomR3IOPortInfoOneRC, (void *)pHlp); +} + + +/** + * Registers a Memory Mapped I/O R3 handler. + * + * This API is called by PDM on behalf of a device. Devices must register ring-3 ranges + * before any GC and R0 ranges can be registered using IOMR3MMIORegisterRC() and IOMR3MMIORegisterR0(). + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pDevIns PDM device instance owning the MMIO range. + * @param GCPhysStart First physical address in the range. + * @param cbRange The size of the range (in bytes). + * @param pvUser User argument for the callbacks. + * @param pfnWriteCallback Pointer to function which is gonna handle Write operations. + * @param pfnReadCallback Pointer to function which is gonna handle Read operations. + * @param pfnFillCallback Pointer to function which is gonna handle Fill/memset operations. + * @param fFlags Flags, see IOMMMIO_FLAGS_XXX. + * @param pszDesc Pointer to description string. This must not be freed. + */ +VMMR3_INT_DECL(int) +IOMR3MmioRegisterR3(PVM pVM, PPDMDEVINS pDevIns, RTGCPHYS GCPhysStart, RTGCPHYS cbRange, RTHCPTR pvUser, + R3PTRTYPE(PFNIOMMMIOWRITE) pfnWriteCallback, R3PTRTYPE(PFNIOMMMIOREAD) pfnReadCallback, + R3PTRTYPE(PFNIOMMMIOFILL) pfnFillCallback, uint32_t fFlags, const char *pszDesc) +{ + LogFlow(("IOMR3MmioRegisterR3: pDevIns=%p GCPhysStart=%RGp cbRange=%RGp pvUser=%RHv pfnWriteCallback=%#x pfnReadCallback=%#x pfnFillCallback=%#x fFlags=%#x pszDesc=%s\n", + pDevIns, GCPhysStart, cbRange, pvUser, pfnWriteCallback, pfnReadCallback, pfnFillCallback, fFlags, pszDesc)); + int rc; + + /* + * Validate input. + */ + AssertMsgReturn(GCPhysStart + (cbRange - 1) >= GCPhysStart,("Wrapped! %RGp LB %RGp\n", GCPhysStart, cbRange), + VERR_IOM_INVALID_MMIO_RANGE); + AssertMsgReturn( !(fFlags & ~IOMMMIO_FLAGS_VALID_MASK) + && (fFlags & IOMMMIO_FLAGS_READ_MODE) <= IOMMMIO_FLAGS_READ_DWORD_QWORD + && (fFlags & IOMMMIO_FLAGS_WRITE_MODE) <= IOMMMIO_FLAGS_WRITE_ONLY_DWORD_QWORD, + ("%#x\n", fFlags), + VERR_INVALID_PARAMETER); + + /* + * Allocate new range record and initialize it. + */ + PIOMMMIORANGE pRange; + rc = MMHyperAlloc(pVM, sizeof(*pRange), 0, MM_TAG_IOM, (void **)&pRange); + if (RT_SUCCESS(rc)) + { + pRange->Core.Key = GCPhysStart; + pRange->Core.KeyLast = GCPhysStart + (cbRange - 1); + pRange->GCPhys = GCPhysStart; + pRange->cb = cbRange; + pRange->cRefs = 1; /* The tree reference. */ + pRange->pszDesc = pszDesc; + + //pRange->pvUserR0 = NIL_RTR0PTR; + //pRange->pDevInsR0 = NIL_RTR0PTR; + //pRange->pfnReadCallbackR0 = NIL_RTR0PTR; + //pRange->pfnWriteCallbackR0 = NIL_RTR0PTR; + //pRange->pfnFillCallbackR0 = NIL_RTR0PTR; + + //pRange->pvUserRC = NIL_RTRCPTR; + //pRange->pDevInsRC = NIL_RTRCPTR; + //pRange->pfnReadCallbackRC = NIL_RTRCPTR; + //pRange->pfnWriteCallbackRC = NIL_RTRCPTR; + //pRange->pfnFillCallbackRC = NIL_RTRCPTR; + + pRange->fFlags = fFlags; + + pRange->pvUserR3 = pvUser; + pRange->pDevInsR3 = pDevIns; + pRange->pfnReadCallbackR3 = pfnReadCallback; + pRange->pfnWriteCallbackR3 = pfnWriteCallback; + pRange->pfnFillCallbackR3 = pfnFillCallback; + + /* + * Try register it with PGM and then insert it into the tree. + */ + rc = PGMR3PhysMMIORegister(pVM, GCPhysStart, cbRange, pVM->iom.s.hMmioHandlerType, + pRange, MMHyperR3ToR0(pVM, pRange), MMHyperR3ToRC(pVM, pRange), pszDesc); + if (RT_SUCCESS(rc)) + { + IOM_LOCK_EXCL(pVM); + if (RTAvlroGCPhysInsert(&pVM->iom.s.pTreesR3->MMIOTree, &pRange->Core)) + { + iomR3FlushCache(pVM); + IOM_UNLOCK_EXCL(pVM); + return VINF_SUCCESS; + } + + /* bail out */ + IOM_UNLOCK_EXCL(pVM); + DBGFR3Info(pVM->pUVM, "mmio", NULL, NULL); + AssertMsgFailed(("This cannot happen!\n")); + rc = VERR_IOM_IOPORT_IPE_3; + } + + MMHyperFree(pVM, pRange); + } + if (pDevIns->iInstance > 0) + MMR3HeapFree((void *)pszDesc); + return rc; +} + + +/** + * Registers a Memory Mapped I/O RC handler range. + * + * This API is called by PDM on behalf of a device. Devices must first register ring-3 ranges + * using IOMMMIORegisterR3() before calling this function. + * + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pDevIns PDM device instance owning the MMIO range. + * @param GCPhysStart First physical address in the range. + * @param cbRange The size of the range (in bytes). + * @param pvUser User argument for the callbacks. + * @param pfnWriteCallback Pointer to function which is gonna handle Write operations. + * @param pfnReadCallback Pointer to function which is gonna handle Read operations. + * @param pfnFillCallback Pointer to function which is gonna handle Fill/memset operations. + * @thread EMT + */ +VMMR3_INT_DECL(int) +IOMR3MmioRegisterRC(PVM pVM, PPDMDEVINS pDevIns, RTGCPHYS GCPhysStart, RTGCPHYS cbRange, RTGCPTR pvUser, + RCPTRTYPE(PFNIOMMMIOWRITE) pfnWriteCallback, RCPTRTYPE(PFNIOMMMIOREAD) pfnReadCallback, + RCPTRTYPE(PFNIOMMMIOFILL) pfnFillCallback) +{ + LogFlow(("IOMR3MmioRegisterRC: pDevIns=%p GCPhysStart=%RGp cbRange=%RGp pvUser=%RGv pfnWriteCallback=%#x pfnReadCallback=%#x pfnFillCallback=%#x\n", + pDevIns, GCPhysStart, cbRange, pvUser, pfnWriteCallback, pfnReadCallback, pfnFillCallback)); + AssertReturn(VM_IS_RAW_MODE_ENABLED(pVM), VERR_IOM_HM_IPE); + + /* + * Validate input. + */ + if (!pfnWriteCallback && !pfnReadCallback) + { + AssertMsgFailed(("No callbacks! %RGp LB %RGp\n", GCPhysStart, cbRange)); + return VERR_INVALID_PARAMETER; + } + PVMCPU pVCpu = VMMGetCpu(pVM); Assert(pVCpu); + + /* + * Find the MMIO range and check that the input matches. + */ + IOM_LOCK_EXCL(pVM); + PIOMMMIORANGE pRange = iomMmioGetRange(pVM, pVCpu, GCPhysStart); + AssertReturnStmt(pRange, IOM_UNLOCK_EXCL(pVM), VERR_IOM_MMIO_RANGE_NOT_FOUND); + AssertReturnStmt(pRange->pDevInsR3 == pDevIns, IOM_UNLOCK_EXCL(pVM), VERR_IOM_NOT_MMIO_RANGE_OWNER); + AssertReturnStmt(pRange->GCPhys == GCPhysStart, IOM_UNLOCK_EXCL(pVM), VERR_IOM_INVALID_MMIO_RANGE); + AssertReturnStmt(pRange->cb == cbRange, IOM_UNLOCK_EXCL(pVM), VERR_IOM_INVALID_MMIO_RANGE); + + pRange->pvUserRC = pvUser; + pRange->pfnReadCallbackRC = pfnReadCallback; + pRange->pfnWriteCallbackRC= pfnWriteCallback; + pRange->pfnFillCallbackRC = pfnFillCallback; + pRange->pDevInsRC = MMHyperCCToRC(pVM, pDevIns); + IOM_UNLOCK_EXCL(pVM); + + return VINF_SUCCESS; +} + + +/** + * Registers a Memory Mapped I/O R0 handler range. + * + * This API is called by PDM on behalf of a device. Devices must first register ring-3 ranges + * using IOMMR3MIORegisterHC() before calling this function. + * + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pDevIns PDM device instance owning the MMIO range. + * @param GCPhysStart First physical address in the range. + * @param cbRange The size of the range (in bytes). + * @param pvUser User argument for the callbacks. + * @param pfnWriteCallback Pointer to function which is gonna handle Write operations. + * @param pfnReadCallback Pointer to function which is gonna handle Read operations. + * @param pfnFillCallback Pointer to function which is gonna handle Fill/memset operations. + * @thread EMT + */ +VMMR3_INT_DECL(int) +IOMR3MmioRegisterR0(PVM pVM, PPDMDEVINS pDevIns, RTGCPHYS GCPhysStart, RTGCPHYS cbRange, RTR0PTR pvUser, + R0PTRTYPE(PFNIOMMMIOWRITE) pfnWriteCallback, + R0PTRTYPE(PFNIOMMMIOREAD) pfnReadCallback, + R0PTRTYPE(PFNIOMMMIOFILL) pfnFillCallback) +{ + LogFlow(("IOMR3MmioRegisterR0: pDevIns=%p GCPhysStart=%RGp cbRange=%RGp pvUser=%RHv pfnWriteCallback=%#x pfnReadCallback=%#x pfnFillCallback=%#x\n", + pDevIns, GCPhysStart, cbRange, pvUser, pfnWriteCallback, pfnReadCallback, pfnFillCallback)); + + /* + * Validate input. + */ + if (!pfnWriteCallback && !pfnReadCallback) + { + AssertMsgFailed(("No callbacks! %RGp LB %RGp\n", GCPhysStart, cbRange)); + return VERR_INVALID_PARAMETER; + } + PVMCPU pVCpu = VMMGetCpu(pVM); Assert(pVCpu); + + /* + * Find the MMIO range and check that the input matches. + */ + IOM_LOCK_EXCL(pVM); + PIOMMMIORANGE pRange = iomMmioGetRange(pVM, pVCpu, GCPhysStart); + AssertReturnStmt(pRange, IOM_UNLOCK_EXCL(pVM), VERR_IOM_MMIO_RANGE_NOT_FOUND); + AssertReturnStmt(pRange->pDevInsR3 == pDevIns, IOM_UNLOCK_EXCL(pVM), VERR_IOM_NOT_MMIO_RANGE_OWNER); + AssertReturnStmt(pRange->GCPhys == GCPhysStart, IOM_UNLOCK_EXCL(pVM), VERR_IOM_INVALID_MMIO_RANGE); + AssertReturnStmt(pRange->cb == cbRange, IOM_UNLOCK_EXCL(pVM), VERR_IOM_INVALID_MMIO_RANGE); + + pRange->pvUserR0 = pvUser; + pRange->pfnReadCallbackR0 = pfnReadCallback; + pRange->pfnWriteCallbackR0= pfnWriteCallback; + pRange->pfnFillCallbackR0 = pfnFillCallback; + pRange->pDevInsR0 = MMHyperCCToR0(pVM, pDevIns); + IOM_UNLOCK_EXCL(pVM); + + return VINF_SUCCESS; +} + + +/** + * Deregisters a Memory Mapped I/O handler range. + * + * Registered GC, R0, and R3 ranges are affected. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pDevIns Device instance which the MMIO region is registered. + * @param GCPhysStart First physical address (GC) in the range. + * @param cbRange Number of bytes to deregister. + * + * @remark This function mainly for PCI PnP Config and will not do + * all the checks you might expect it to do. + */ +VMMR3_INT_DECL(int) IOMR3MmioDeregister(PVM pVM, PPDMDEVINS pDevIns, RTGCPHYS GCPhysStart, RTGCPHYS cbRange) +{ + LogFlow(("IOMR3MmioDeregister: pDevIns=%p GCPhysStart=%RGp cbRange=%RGp\n", pDevIns, GCPhysStart, cbRange)); + + /* + * Validate input. + */ + RTGCPHYS GCPhysLast = GCPhysStart + (cbRange - 1); + if (GCPhysLast < GCPhysStart) + { + AssertMsgFailed(("Wrapped! %#x LB %RGp\n", GCPhysStart, cbRange)); + return VERR_IOM_INVALID_MMIO_RANGE; + } + PVMCPU pVCpu = VMMGetCpu(pVM); Assert(pVCpu); + + IOM_LOCK_EXCL(pVM); + + /* + * Check ownership and such for the entire area. + */ + RTGCPHYS GCPhys = GCPhysStart; + while (GCPhys <= GCPhysLast && GCPhys >= GCPhysStart) + { + PIOMMMIORANGE pRange = iomMmioGetRange(pVM, pVCpu, GCPhys); + if (!pRange) + { + IOM_UNLOCK_EXCL(pVM); + return VERR_IOM_MMIO_RANGE_NOT_FOUND; + } + AssertMsgReturnStmt(pRange->pDevInsR3 == pDevIns, + ("Not owner! GCPhys=%RGp %RGp LB %RGp %s\n", GCPhys, GCPhysStart, cbRange, pRange->pszDesc), + IOM_UNLOCK_EXCL(pVM), + VERR_IOM_NOT_MMIO_RANGE_OWNER); + AssertMsgReturnStmt(pRange->Core.KeyLast <= GCPhysLast, + ("Incomplete R3 range! GCPhys=%RGp %RGp LB %RGp %s\n", GCPhys, GCPhysStart, cbRange, pRange->pszDesc), + IOM_UNLOCK_EXCL(pVM), + VERR_IOM_INCOMPLETE_MMIO_RANGE); + + /* next */ + Assert(GCPhys <= pRange->Core.KeyLast); + GCPhys = pRange->Core.KeyLast + 1; + } + + /* + * Do the actual removing of the MMIO ranges. + */ + GCPhys = GCPhysStart; + while (GCPhys <= GCPhysLast && GCPhys >= GCPhysStart) + { + iomR3FlushCache(pVM); + + PIOMMMIORANGE pRange = (PIOMMMIORANGE)RTAvlroGCPhysRemove(&pVM->iom.s.pTreesR3->MMIOTree, GCPhys); + Assert(pRange); + Assert(pRange->Core.Key == GCPhys && pRange->Core.KeyLast <= GCPhysLast); + IOM_UNLOCK_EXCL(pVM); /* Lock order fun. */ + + /* remove it from PGM */ + int rc = PGMR3PhysMMIODeregister(pVM, GCPhys, pRange->cb); + AssertRC(rc); + + IOM_LOCK_EXCL(pVM); + + /* advance and free. */ + GCPhys = pRange->Core.KeyLast + 1; + if (pDevIns->iInstance > 0) + { + void *pvDesc = ASMAtomicXchgPtr((void * volatile *)&pRange->pszDesc, NULL); + MMR3HeapFree(pvDesc); + } + iomMmioReleaseRange(pVM, pRange); + } + + IOM_UNLOCK_EXCL(pVM); + return VINF_SUCCESS; +} + + +/** + * Pre-Registers a MMIO region. + * + * The rest of of the manipulation of this region goes thru the PGMPhysMMIOEx* + * APIs: PGMR3PhysMMIOExMap, PGMR3PhysMMIOExUnmap, PGMR3PhysMMIOExDeregister + * + * @returns VBox status code. + * @param pVM Pointer to the cross context VM structure. + * @param pDevIns The device. + * @param iSubDev The sub-device number. + * @param iRegion The region number. + * @param cbRegion The size of the MMIO region. Must be a multiple + * of X86_PAGE_SIZE + * @param fFlags Flags, see IOMMMIO_FLAGS_XXX. + * @param pszDesc Pointer to description string. This must not be + * freed. + * @param pvUserR3 Ring-3 user pointer. + * @param pfnWriteCallbackR3 Callback for handling writes, ring-3. Mandatory. + * @param pfnReadCallbackR3 Callback for handling reads, ring-3. Mandatory. + * @param pfnFillCallbackR3 Callback for handling fills, ring-3. Optional. + * @param pvUserR0 Ring-0 user pointer. + * @param pfnWriteCallbackR0 Callback for handling writes, ring-0. Optional. + * @param pfnReadCallbackR0 Callback for handling reads, ring-0. Optional. + * @param pfnFillCallbackR0 Callback for handling fills, ring-0. Optional. + * @param pvUserRC Raw-mode context user pointer. This will be + * relocated with the hypervisor guest mapping if + * the unsigned integer value is 0x10000 or above. + * @param pfnWriteCallbackRC Callback for handling writes, RC. Optional. + * @param pfnReadCallbackRC Callback for handling reads, RC. Optional. + * @param pfnFillCallbackRC Callback for handling fills, RC. Optional. + */ +VMMR3_INT_DECL(int) IOMR3MmioExPreRegister(PVM pVM, PPDMDEVINS pDevIns, uint32_t iSubDev, uint32_t iRegion, RTGCPHYS cbRegion, + uint32_t fFlags, const char *pszDesc, + RTR3PTR pvUserR3, + R3PTRTYPE(PFNIOMMMIOWRITE) pfnWriteCallbackR3, + R3PTRTYPE(PFNIOMMMIOREAD) pfnReadCallbackR3, + R3PTRTYPE(PFNIOMMMIOFILL) pfnFillCallbackR3, + RTR0PTR pvUserR0, + R0PTRTYPE(PFNIOMMMIOWRITE) pfnWriteCallbackR0, + R0PTRTYPE(PFNIOMMMIOREAD) pfnReadCallbackR0, + R0PTRTYPE(PFNIOMMMIOFILL) pfnFillCallbackR0, + RTRCPTR pvUserRC, + RCPTRTYPE(PFNIOMMMIOWRITE) pfnWriteCallbackRC, + RCPTRTYPE(PFNIOMMMIOREAD) pfnReadCallbackRC, + RCPTRTYPE(PFNIOMMMIOFILL) pfnFillCallbackRC) +{ + LogFlow(("IOMR3MmioExPreRegister: pDevIns=%p iSubDev=%u iRegion=%u cbRegion=%RGp fFlags=%#x pszDesc=%s\n" + " pvUserR3=%RHv pfnWriteCallbackR3=%RHv pfnReadCallbackR3=%RHv pfnFillCallbackR3=%RHv\n" + " pvUserR0=%RHv pfnWriteCallbackR0=%RHv pfnReadCallbackR0=%RHv pfnFillCallbackR0=%RHv\n" + " pvUserRC=%RRv pfnWriteCallbackRC=%RRv pfnReadCallbackRC=%RRv pfnFillCallbackRC=%RRv\n", + pDevIns, iSubDev, iRegion, cbRegion, fFlags, pszDesc, + pvUserR3, pfnWriteCallbackR3, pfnReadCallbackR3, pfnFillCallbackR3, + pvUserR0, pfnWriteCallbackR0, pfnReadCallbackR0, pfnFillCallbackR0, + pvUserRC, pfnWriteCallbackRC, pfnReadCallbackRC, pfnFillCallbackRC)); + + /* + * Validate input. + */ + AssertReturn(cbRegion > 0, VERR_INVALID_PARAMETER); + AssertReturn(RT_ALIGN_T(cbRegion, X86_PAGE_SIZE, RTGCPHYS), VERR_INVALID_PARAMETER); + AssertMsgReturn( !(fFlags & ~IOMMMIO_FLAGS_VALID_MASK) + && (fFlags & IOMMMIO_FLAGS_READ_MODE) <= IOMMMIO_FLAGS_READ_DWORD_QWORD + && (fFlags & IOMMMIO_FLAGS_WRITE_MODE) <= IOMMMIO_FLAGS_WRITE_ONLY_DWORD_QWORD, + ("%#x\n", fFlags), + VERR_INVALID_PARAMETER); + AssertPtrReturn(pfnWriteCallbackR3, VERR_INVALID_POINTER); + AssertPtrReturn(pfnReadCallbackR3, VERR_INVALID_POINTER); + + /* + * Allocate new range record and initialize it. + */ + PIOMMMIORANGE pRange; + int rc = MMHyperAlloc(pVM, sizeof(*pRange), 0, MM_TAG_IOM, (void **)&pRange); + if (RT_SUCCESS(rc)) + { + pRange->Core.Key = NIL_RTGCPHYS; + pRange->Core.KeyLast = NIL_RTGCPHYS; + pRange->GCPhys = NIL_RTGCPHYS; + pRange->cb = cbRegion; + pRange->cRefs = 1; /* The PGM reference. */ + pRange->fFlags = fFlags; + + pRange->pvUserR3 = pvUserR3; + pRange->pDevInsR3 = pDevIns; + pRange->pfnReadCallbackR3 = pfnReadCallbackR3; + pRange->pfnWriteCallbackR3 = pfnWriteCallbackR3; + pRange->pfnFillCallbackR3 = pfnFillCallbackR3; + pRange->pszDesc = pszDesc; + + if (pfnReadCallbackR0 || pfnWriteCallbackR0 || pfnFillCallbackR0) + { + pRange->pvUserR0 = pvUserR0; + pRange->pDevInsR0 = MMHyperCCToR0(pVM, pDevIns); + pRange->pfnReadCallbackR0 = pfnReadCallbackR0; + pRange->pfnWriteCallbackR0 = pfnWriteCallbackR0; + pRange->pfnFillCallbackR0 = pfnFillCallbackR0; + } + + if (pfnReadCallbackRC || pfnWriteCallbackRC || pfnFillCallbackRC) + { + pRange->pvUserRC = pvUserRC; + pRange->pDevInsRC = MMHyperCCToRC(pVM, pDevIns); + pRange->pfnReadCallbackRC = pfnReadCallbackRC; + pRange->pfnWriteCallbackRC = pfnWriteCallbackRC; + pRange->pfnFillCallbackRC = pfnFillCallbackRC; + } + + /* + * Try register it with PGM. PGM will call us back when it's mapped in + * and out of the guest address space, and once it's destroyed. + */ + rc = PGMR3PhysMMIOExPreRegister(pVM, pDevIns, iSubDev, iRegion, cbRegion, pVM->iom.s.hMmioHandlerType, + pRange, MMHyperR3ToR0(pVM, pRange), MMHyperR3ToRC(pVM, pRange), pszDesc); + if (RT_SUCCESS(rc)) + return VINF_SUCCESS; + + MMHyperFree(pVM, pRange); + } + if (pDevIns->iInstance > 0) + MMR3HeapFree((void *)pszDesc); + return rc; + +} + + +/** + * Notfication from PGM that the pre-registered MMIO region has been mapped into + * user address space. + * + * @returns VBox status code. + * @param pVM Pointer to the cross context VM structure. + * @param pvUser The pvUserR3 argument of PGMR3PhysMMIOExPreRegister. + * @param GCPhys The mapping address. + * @remarks Called while owning the PGM lock. + */ +VMMR3_INT_DECL(int) IOMR3MmioExNotifyMapped(PVM pVM, void *pvUser, RTGCPHYS GCPhys) +{ + PIOMMMIORANGE pRange = (PIOMMMIORANGE)pvUser; + AssertReturn(pRange->GCPhys == NIL_RTGCPHYS, VERR_IOM_MMIO_IPE_1); + + IOM_LOCK_EXCL(pVM); + Assert(pRange->GCPhys == NIL_RTGCPHYS); + pRange->GCPhys = GCPhys; + pRange->Core.Key = GCPhys; + pRange->Core.KeyLast = GCPhys + pRange->cb - 1; + if (RTAvlroGCPhysInsert(&pVM->iom.s.pTreesR3->MMIOTree, &pRange->Core)) + { + iomR3FlushCache(pVM); + IOM_UNLOCK_EXCL(pVM); + return VINF_SUCCESS; + } + IOM_UNLOCK_EXCL(pVM); + + AssertLogRelMsgFailed(("RTAvlroGCPhysInsert failed on %RGp..%RGp - %s\n", pRange->Core.Key, pRange->Core.KeyLast, pRange->pszDesc)); + pRange->GCPhys = NIL_RTGCPHYS; + pRange->Core.Key = NIL_RTGCPHYS; + pRange->Core.KeyLast = NIL_RTGCPHYS; + return VERR_IOM_MMIO_IPE_2; +} + + +/** + * Notfication from PGM that the pre-registered MMIO region has been unmapped + * from user address space. + * + * @param pVM Pointer to the cross context VM structure. + * @param pvUser The pvUserR3 argument of PGMR3PhysMMIOExPreRegister. + * @param GCPhys The mapping address. + * @remarks Called while owning the PGM lock. + */ +VMMR3_INT_DECL(void) IOMR3MmioExNotifyUnmapped(PVM pVM, void *pvUser, RTGCPHYS GCPhys) +{ + PIOMMMIORANGE pRange = (PIOMMMIORANGE)pvUser; + AssertLogRelReturnVoid(pRange->GCPhys == GCPhys); + + IOM_LOCK_EXCL(pVM); + Assert(pRange->GCPhys == GCPhys); + PIOMMMIORANGE pRemoved = (PIOMMMIORANGE)RTAvlroGCPhysRemove(&pVM->iom.s.pTreesR3->MMIOTree, GCPhys); + if (pRemoved == pRange) + { + pRange->GCPhys = NIL_RTGCPHYS; + pRange->Core.Key = NIL_RTGCPHYS; + pRange->Core.KeyLast = NIL_RTGCPHYS; + iomR3FlushCache(pVM); + IOM_UNLOCK_EXCL(pVM); + } + else + { + if (pRemoved) + RTAvlroGCPhysInsert(&pVM->iom.s.pTreesR3->MMIOTree, &pRemoved->Core); + IOM_UNLOCK_EXCL(pVM); + AssertLogRelMsgFailed(("RTAvlroGCPhysRemove returned %p instead of %p for %RGp (%s)\n", + pRemoved, pRange, GCPhys, pRange->pszDesc)); + } +} + + +/** + * Notfication from PGM that the pre-registered MMIO region has been mapped into + * user address space. + * + * @param pVM Pointer to the cross context VM structure. + * @param pvUser The pvUserR3 argument of PGMR3PhysMMIOExPreRegister. + * @remarks Called while owning the PGM lock. + */ +VMMR3_INT_DECL(void) IOMR3MmioExNotifyDeregistered(PVM pVM, void *pvUser) +{ + PIOMMMIORANGE pRange = (PIOMMMIORANGE)pvUser; + AssertLogRelReturnVoid(pRange->GCPhys == NIL_RTGCPHYS); + iomMmioReleaseRange(pVM, pRange); +} + + +/** + * Handles the unlikely and probably fatal merge cases. + * + * @returns Merged status code. + * @param rcStrict Current EM status code. + * @param rcStrictCommit The IOM I/O or MMIO write commit status to merge + * with @a rcStrict. + * @param rcIom For logging purposes only. + * @param pVCpu The cross context virtual CPU structure of the + * calling EMT. For logging purposes. + */ +DECL_NO_INLINE(static, VBOXSTRICTRC) iomR3MergeStatusSlow(VBOXSTRICTRC rcStrict, VBOXSTRICTRC rcStrictCommit, + int rcIom, PVMCPU pVCpu) +{ + if (RT_FAILURE_NP(rcStrict)) + return rcStrict; + + if (RT_FAILURE_NP(rcStrictCommit)) + return rcStrictCommit; + + if (rcStrict == rcStrictCommit) + return rcStrictCommit; + + AssertLogRelMsgFailed(("rcStrictCommit=%Rrc rcStrict=%Rrc IOPort={%#06x<-%#xx/%u} MMIO={%RGp<-%.*Rhxs} (rcIom=%Rrc)\n", + VBOXSTRICTRC_VAL(rcStrictCommit), VBOXSTRICTRC_VAL(rcStrict), + pVCpu->iom.s.PendingIOPortWrite.IOPort, + pVCpu->iom.s.PendingIOPortWrite.u32Value, pVCpu->iom.s.PendingIOPortWrite.cbValue, + pVCpu->iom.s.PendingMmioWrite.GCPhys, + pVCpu->iom.s.PendingMmioWrite.cbValue, &pVCpu->iom.s.PendingMmioWrite.abValue[0], rcIom)); + return VERR_IOM_FF_STATUS_IPE; +} + + +/** + * Helper for IOMR3ProcessForceFlag. + * + * @returns Merged status code. + * @param rcStrict Current EM status code. + * @param rcStrictCommit The IOM I/O or MMIO write commit status to merge + * with @a rcStrict. + * @param rcIom Either VINF_IOM_R3_IOPORT_COMMIT_WRITE or + * VINF_IOM_R3_MMIO_COMMIT_WRITE. + * @param pVCpu The cross context virtual CPU structure of the + * calling EMT. + */ +DECLINLINE(VBOXSTRICTRC) iomR3MergeStatus(VBOXSTRICTRC rcStrict, VBOXSTRICTRC rcStrictCommit, int rcIom, PVMCPU pVCpu) +{ + /* Simple. */ + if (RT_LIKELY(rcStrict == rcIom || rcStrict == VINF_EM_RAW_TO_R3 || rcStrict == VINF_SUCCESS)) + return rcStrictCommit; + + if (RT_LIKELY(rcStrictCommit == VINF_SUCCESS)) + return rcStrict; + + /* EM scheduling status codes. */ + if (RT_LIKELY( rcStrict >= VINF_EM_FIRST + && rcStrict <= VINF_EM_LAST)) + { + if (RT_LIKELY( rcStrictCommit >= VINF_EM_FIRST + && rcStrictCommit <= VINF_EM_LAST)) + return rcStrict < rcStrictCommit ? rcStrict : rcStrictCommit; + } + + /* Unlikely */ + return iomR3MergeStatusSlow(rcStrict, rcStrictCommit, rcIom, pVCpu); +} + + +/** + * Called by force-flag handling code when VMCPU_FF_IOM is set. + * + * @returns Merge between @a rcStrict and what the commit operation returned. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * @param rcStrict The status code returned by ring-0 or raw-mode. + * @thread EMT(pVCpu) + * + * @remarks The VMCPU_FF_IOM flag is handled before the status codes by EM, so + * we're very likely to see @a rcStrict set to + * VINF_IOM_R3_IOPORT_COMMIT_WRITE and VINF_IOM_R3_MMIO_COMMIT_WRITE + * here. + */ +VMMR3_INT_DECL(VBOXSTRICTRC) IOMR3ProcessForceFlag(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rcStrict) +{ + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_IOM); + Assert(pVCpu->iom.s.PendingIOPortWrite.cbValue || pVCpu->iom.s.PendingMmioWrite.cbValue); + + if (pVCpu->iom.s.PendingIOPortWrite.cbValue) + { + Log5(("IOM: Dispatching pending I/O port write: %#x LB %u -> %RTiop\n", pVCpu->iom.s.PendingIOPortWrite.u32Value, + pVCpu->iom.s.PendingIOPortWrite.cbValue, pVCpu->iom.s.PendingIOPortWrite.IOPort)); + VBOXSTRICTRC rcStrictCommit = IOMIOPortWrite(pVM, pVCpu, pVCpu->iom.s.PendingIOPortWrite.IOPort, + pVCpu->iom.s.PendingIOPortWrite.u32Value, + pVCpu->iom.s.PendingIOPortWrite.cbValue); + pVCpu->iom.s.PendingIOPortWrite.cbValue = 0; + rcStrict = iomR3MergeStatus(rcStrict, rcStrictCommit, VINF_IOM_R3_IOPORT_COMMIT_WRITE, pVCpu); + } + + + if (pVCpu->iom.s.PendingMmioWrite.cbValue) + { + Log5(("IOM: Dispatching pending MMIO write: %RGp LB %#x\n", + pVCpu->iom.s.PendingMmioWrite.GCPhys, pVCpu->iom.s.PendingMmioWrite.cbValue)); + /** @todo Try optimize this some day? Currently easier and correcter to + * involve PGM here since we never know if the MMIO area is still mapped + * to the same location as when we wrote to it in RC/R0 context. */ + VBOXSTRICTRC rcStrictCommit = PGMPhysWrite(pVM, pVCpu->iom.s.PendingMmioWrite.GCPhys, + pVCpu->iom.s.PendingMmioWrite.abValue, pVCpu->iom.s.PendingMmioWrite.cbValue, + PGMACCESSORIGIN_IOM); + pVCpu->iom.s.PendingMmioWrite.cbValue = 0; + rcStrict = iomR3MergeStatus(rcStrict, rcStrictCommit, VINF_IOM_R3_MMIO_COMMIT_WRITE, pVCpu); + } + + return rcStrict; +} + + +/** + * Notification from DBGF that the number of active I/O port or MMIO + * breakpoints has change. + * + * For performance reasons, IOM will only call DBGF before doing I/O and MMIO + * accesses where there are armed breakpoints. + * + * @param pVM The cross context VM structure. + * @param fPortIo True if there are armed I/O port breakpoints. + * @param fMmio True if there are armed MMIO breakpoints. + */ +VMMR3_INT_DECL(void) IOMR3NotifyBreakpointCountChange(PVM pVM, bool fPortIo, bool fMmio) +{ + /** @todo I/O breakpoints. */ + RT_NOREF3(pVM, fPortIo, fMmio); +} + + +/** + * Notification from DBGF that an event has been enabled or disabled. + * + * For performance reasons, IOM may cache the state of events it implements. + * + * @param pVM The cross context VM structure. + * @param enmEvent The event. + * @param fEnabled The new state. + */ +VMMR3_INT_DECL(void) IOMR3NotifyDebugEventChange(PVM pVM, DBGFEVENT enmEvent, bool fEnabled) +{ + /** @todo IOM debug events. */ + RT_NOREF3(pVM, enmEvent, fEnabled); +} + + +/** + * Display a single MMIO range. + * + * @returns 0 + * @param pNode Pointer to MMIO R3 range. + * @param pvUser Pointer to info output callback structure. + */ +static DECLCALLBACK(int) iomR3MMIOInfoOne(PAVLROGCPHYSNODECORE pNode, void *pvUser) +{ + PIOMMMIORANGE pRange = (PIOMMMIORANGE)pNode; + PCDBGFINFOHLP pHlp = (PCDBGFINFOHLP)pvUser; + pHlp->pfnPrintf(pHlp, + "%RGp-%RGp %RHv %RHv %RHv %RHv %RHv %s\n", + pRange->Core.Key, + pRange->Core.KeyLast, + pRange->pDevInsR3, + pRange->pfnReadCallbackR3, + pRange->pfnWriteCallbackR3, + pRange->pfnFillCallbackR3, + pRange->pvUserR3, + pRange->pszDesc); + pHlp->pfnPrintf(pHlp, + "%*s %RHv %RHv %RHv %RHv %RHv\n", + sizeof(RTGCPHYS) * 2 * 2 + 1, "R0", + pRange->pDevInsR0, + pRange->pfnReadCallbackR0, + pRange->pfnWriteCallbackR0, + pRange->pfnFillCallbackR0, + pRange->pvUserR0); + pHlp->pfnPrintf(pHlp, + "%*s %RRv %RRv %RRv %RRv %RRv\n", + sizeof(RTGCPHYS) * 2 * 2 + 1, "RC", + pRange->pDevInsRC, + pRange->pfnReadCallbackRC, + pRange->pfnWriteCallbackRC, + pRange->pfnFillCallbackRC, + pRange->pvUserRC); + return 0; +} + + +/** + * Display registered MMIO ranges to the log. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helpers. + * @param pszArgs Arguments, ignored. + */ +static DECLCALLBACK(void) iomR3MMIOInfo(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + NOREF(pszArgs); + pHlp->pfnPrintf(pHlp, + "MMIO ranges (pVM=%p)\n" + "%.*s %.*s %.*s %.*s %.*s %.*s %s\n", + pVM, + sizeof(RTGCPHYS) * 4 + 1, "GC Phys Range ", + sizeof(RTHCPTR) * 2, "pDevIns ", + sizeof(RTHCPTR) * 2, "Read ", + sizeof(RTHCPTR) * 2, "Write ", + sizeof(RTHCPTR) * 2, "Fill ", + sizeof(RTHCPTR) * 2, "pvUser ", + "Description"); + RTAvlroGCPhysDoWithAll(&pVM->iom.s.pTreesR3->MMIOTree, true, iomR3MMIOInfoOne, (void *)pHlp); +} + + +#ifdef VBOX_WITH_STATISTICS +/** + * Tries to come up with the standard name for a port. + * + * @returns Pointer to readonly string if known. + * @returns NULL if unknown port number. + * + * @param Port The port to name. + */ +static const char *iomR3IOPortGetStandardName(RTIOPORT Port) +{ + switch (Port) + { + case 0x00: case 0x10: case 0x20: case 0x30: case 0x40: case 0x50: case 0x70: + case 0x01: case 0x11: case 0x21: case 0x31: case 0x41: case 0x51: case 0x61: case 0x71: + case 0x02: case 0x12: case 0x22: case 0x32: case 0x42: case 0x52: case 0x62: case 0x72: + case 0x03: case 0x13: case 0x23: case 0x33: case 0x43: case 0x53: case 0x63: case 0x73: + case 0x04: case 0x14: case 0x24: case 0x34: case 0x44: case 0x54: case 0x74: + case 0x05: case 0x15: case 0x25: case 0x35: case 0x45: case 0x55: case 0x65: case 0x75: + case 0x06: case 0x16: case 0x26: case 0x36: case 0x46: case 0x56: case 0x66: case 0x76: + case 0x07: case 0x17: case 0x27: case 0x37: case 0x47: case 0x57: case 0x67: case 0x77: + case 0x08: case 0x18: case 0x28: case 0x38: case 0x48: case 0x58: case 0x68: case 0x78: + case 0x09: case 0x19: case 0x29: case 0x39: case 0x49: case 0x59: case 0x69: case 0x79: + case 0x0a: case 0x1a: case 0x2a: case 0x3a: case 0x4a: case 0x5a: case 0x6a: case 0x7a: + case 0x0b: case 0x1b: case 0x2b: case 0x3b: case 0x4b: case 0x5b: case 0x6b: case 0x7b: + case 0x0c: case 0x1c: case 0x2c: case 0x3c: case 0x4c: case 0x5c: case 0x6c: case 0x7c: + case 0x0d: case 0x1d: case 0x2d: case 0x3d: case 0x4d: case 0x5d: case 0x6d: case 0x7d: + case 0x0e: case 0x1e: case 0x2e: case 0x3e: case 0x4e: case 0x5e: case 0x6e: case 0x7e: + case 0x0f: case 0x1f: case 0x2f: case 0x3f: case 0x4f: case 0x5f: case 0x6f: case 0x7f: + + case 0x80: case 0x90: case 0xa0: case 0xb0: case 0xc0: case 0xd0: case 0xe0: case 0xf0: + case 0x81: case 0x91: case 0xa1: case 0xb1: case 0xc1: case 0xd1: case 0xe1: case 0xf1: + case 0x82: case 0x92: case 0xa2: case 0xb2: case 0xc2: case 0xd2: case 0xe2: case 0xf2: + case 0x83: case 0x93: case 0xa3: case 0xb3: case 0xc3: case 0xd3: case 0xe3: case 0xf3: + case 0x84: case 0x94: case 0xa4: case 0xb4: case 0xc4: case 0xd4: case 0xe4: case 0xf4: + case 0x85: case 0x95: case 0xa5: case 0xb5: case 0xc5: case 0xd5: case 0xe5: case 0xf5: + case 0x86: case 0x96: case 0xa6: case 0xb6: case 0xc6: case 0xd6: case 0xe6: case 0xf6: + case 0x87: case 0x97: case 0xa7: case 0xb7: case 0xc7: case 0xd7: case 0xe7: case 0xf7: + case 0x88: case 0x98: case 0xa8: case 0xb8: case 0xc8: case 0xd8: case 0xe8: case 0xf8: + case 0x89: case 0x99: case 0xa9: case 0xb9: case 0xc9: case 0xd9: case 0xe9: case 0xf9: + case 0x8a: case 0x9a: case 0xaa: case 0xba: case 0xca: case 0xda: case 0xea: case 0xfa: + case 0x8b: case 0x9b: case 0xab: case 0xbb: case 0xcb: case 0xdb: case 0xeb: case 0xfb: + case 0x8c: case 0x9c: case 0xac: case 0xbc: case 0xcc: case 0xdc: case 0xec: case 0xfc: + case 0x8d: case 0x9d: case 0xad: case 0xbd: case 0xcd: case 0xdd: case 0xed: case 0xfd: + case 0x8e: case 0x9e: case 0xae: case 0xbe: case 0xce: case 0xde: case 0xee: case 0xfe: + case 0x8f: case 0x9f: case 0xaf: case 0xbf: case 0xcf: case 0xdf: case 0xef: case 0xff: + return "System Reserved"; + + case 0x60: + case 0x64: + return "Keyboard & Mouse"; + + case 0x378: + case 0x379: + case 0x37a: + case 0x37b: + case 0x37c: + case 0x37d: + case 0x37e: + case 0x37f: + case 0x3bc: + case 0x3bd: + case 0x3be: + case 0x3bf: + case 0x278: + case 0x279: + case 0x27a: + case 0x27b: + case 0x27c: + case 0x27d: + case 0x27e: + case 0x27f: + return "LPT1/2/3"; + + case 0x3f8: + case 0x3f9: + case 0x3fa: + case 0x3fb: + case 0x3fc: + case 0x3fd: + case 0x3fe: + case 0x3ff: + return "COM1"; + + case 0x2f8: + case 0x2f9: + case 0x2fa: + case 0x2fb: + case 0x2fc: + case 0x2fd: + case 0x2fe: + case 0x2ff: + return "COM2"; + + case 0x3e8: + case 0x3e9: + case 0x3ea: + case 0x3eb: + case 0x3ec: + case 0x3ed: + case 0x3ee: + case 0x3ef: + return "COM3"; + + case 0x2e8: + case 0x2e9: + case 0x2ea: + case 0x2eb: + case 0x2ec: + case 0x2ed: + case 0x2ee: + case 0x2ef: + return "COM4"; + + case 0x200: + case 0x201: + case 0x202: + case 0x203: + case 0x204: + case 0x205: + case 0x206: + case 0x207: + return "Joystick"; + + case 0x3f0: + case 0x3f1: + case 0x3f2: + case 0x3f3: + case 0x3f4: + case 0x3f5: + case 0x3f6: + case 0x3f7: + return "Floppy"; + + case 0x1f0: + case 0x1f1: + case 0x1f2: + case 0x1f3: + case 0x1f4: + case 0x1f5: + case 0x1f6: + case 0x1f7: + //case 0x3f6: + //case 0x3f7: + return "IDE 1st"; + + case 0x170: + case 0x171: + case 0x172: + case 0x173: + case 0x174: + case 0x175: + case 0x176: + case 0x177: + case 0x376: + case 0x377: + return "IDE 2nd"; + + case 0x1e0: + case 0x1e1: + case 0x1e2: + case 0x1e3: + case 0x1e4: + case 0x1e5: + case 0x1e6: + case 0x1e7: + case 0x3e6: + case 0x3e7: + return "IDE 3rd"; + + case 0x160: + case 0x161: + case 0x162: + case 0x163: + case 0x164: + case 0x165: + case 0x166: + case 0x167: + case 0x366: + case 0x367: + return "IDE 4th"; + + case 0x130: case 0x140: case 0x150: + case 0x131: case 0x141: case 0x151: + case 0x132: case 0x142: case 0x152: + case 0x133: case 0x143: case 0x153: + case 0x134: case 0x144: case 0x154: + case 0x135: case 0x145: case 0x155: + case 0x136: case 0x146: case 0x156: + case 0x137: case 0x147: case 0x157: + case 0x138: case 0x148: case 0x158: + case 0x139: case 0x149: case 0x159: + case 0x13a: case 0x14a: case 0x15a: + case 0x13b: case 0x14b: case 0x15b: + case 0x13c: case 0x14c: case 0x15c: + case 0x13d: case 0x14d: case 0x15d: + case 0x13e: case 0x14e: case 0x15e: + case 0x13f: case 0x14f: case 0x15f: + case 0x220: case 0x230: + case 0x221: case 0x231: + case 0x222: case 0x232: + case 0x223: case 0x233: + case 0x224: case 0x234: + case 0x225: case 0x235: + case 0x226: case 0x236: + case 0x227: case 0x237: + case 0x228: case 0x238: + case 0x229: case 0x239: + case 0x22a: case 0x23a: + case 0x22b: case 0x23b: + case 0x22c: case 0x23c: + case 0x22d: case 0x23d: + case 0x22e: case 0x23e: + case 0x22f: case 0x23f: + case 0x330: case 0x340: case 0x350: + case 0x331: case 0x341: case 0x351: + case 0x332: case 0x342: case 0x352: + case 0x333: case 0x343: case 0x353: + case 0x334: case 0x344: case 0x354: + case 0x335: case 0x345: case 0x355: + case 0x336: case 0x346: case 0x356: + case 0x337: case 0x347: case 0x357: + case 0x338: case 0x348: case 0x358: + case 0x339: case 0x349: case 0x359: + case 0x33a: case 0x34a: case 0x35a: + case 0x33b: case 0x34b: case 0x35b: + case 0x33c: case 0x34c: case 0x35c: + case 0x33d: case 0x34d: case 0x35d: + case 0x33e: case 0x34e: case 0x35e: + case 0x33f: case 0x34f: case 0x35f: + return "SCSI (typically)"; + + case 0x320: + case 0x321: + case 0x322: + case 0x323: + case 0x324: + case 0x325: + case 0x326: + case 0x327: + return "XT HD"; + + case 0x3b0: + case 0x3b1: + case 0x3b2: + case 0x3b3: + case 0x3b4: + case 0x3b5: + case 0x3b6: + case 0x3b7: + case 0x3b8: + case 0x3b9: + case 0x3ba: + case 0x3bb: + return "VGA"; + + case 0x3c0: case 0x3d0: + case 0x3c1: case 0x3d1: + case 0x3c2: case 0x3d2: + case 0x3c3: case 0x3d3: + case 0x3c4: case 0x3d4: + case 0x3c5: case 0x3d5: + case 0x3c6: case 0x3d6: + case 0x3c7: case 0x3d7: + case 0x3c8: case 0x3d8: + case 0x3c9: case 0x3d9: + case 0x3ca: case 0x3da: + case 0x3cb: case 0x3db: + case 0x3cc: case 0x3dc: + case 0x3cd: case 0x3dd: + case 0x3ce: case 0x3de: + case 0x3cf: case 0x3df: + return "VGA/EGA"; + + case 0x240: case 0x260: case 0x280: + case 0x241: case 0x261: case 0x281: + case 0x242: case 0x262: case 0x282: + case 0x243: case 0x263: case 0x283: + case 0x244: case 0x264: case 0x284: + case 0x245: case 0x265: case 0x285: + case 0x246: case 0x266: case 0x286: + case 0x247: case 0x267: case 0x287: + case 0x248: case 0x268: case 0x288: + case 0x249: case 0x269: case 0x289: + case 0x24a: case 0x26a: case 0x28a: + case 0x24b: case 0x26b: case 0x28b: + case 0x24c: case 0x26c: case 0x28c: + case 0x24d: case 0x26d: case 0x28d: + case 0x24e: case 0x26e: case 0x28e: + case 0x24f: case 0x26f: case 0x28f: + case 0x300: + case 0x301: + case 0x388: + case 0x389: + case 0x38a: + case 0x38b: + return "Sound Card (typically)"; + + default: + return NULL; + } +} +#endif /* VBOX_WITH_STATISTICS */ + diff --git a/src/VBox/VMM/VMMR3/MM.cpp b/src/VBox/VMM/VMMR3/MM.cpp new file mode 100644 index 00000000..53dcbb94 --- /dev/null +++ b/src/VBox/VMM/VMMR3/MM.cpp @@ -0,0 +1,856 @@ +/* $Id: MM.cpp $ */ +/** @file + * MM - Memory Manager. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/** @page pg_mm MM - The Memory Manager + * + * The memory manager is in charge of the following memory: + * - Hypervisor Memory Area (HMA) - Address space management. + * - Hypervisor Heap - A memory heap that lives in all contexts. + * - Tagged ring-3 heap. + * - Page pools - Primarily used by PGM for shadow page tables. + * - Locked process memory - Guest RAM and other. (reduce/obsolete this) + * - Physical guest memory (RAM & ROM) - Moving to PGM. (obsolete this) + * + * The global memory manager (GMM) is the global counter part / partner of MM. + * MM will provide therefore ring-3 callable interfaces for some of the GMM APIs + * related to resource tracking (PGM is the user). + * + * @see grp_mm + * + * + * @section sec_mm_hma Hypervisor Memory Area + * + * The HMA is used when executing in raw-mode. We borrow, with the help of + * PGMMap, some unused space (one or more page directory entries to be precise) + * in the guest's virtual memory context. PGM will monitor the guest's virtual + * address space for changes and relocate the HMA when required. + * + * To give some idea what's in the HMA, study the 'info hma' output: + * @verbatim +VBoxDbg> info hma +Hypervisor Memory Area (HMA) Layout: Base 00000000a0000000, 0x00800000 bytes +00000000a05cc000-00000000a05cd000 DYNAMIC fence +00000000a05c4000-00000000a05cc000 DYNAMIC Dynamic mapping +00000000a05c3000-00000000a05c4000 DYNAMIC fence +00000000a05b8000-00000000a05c3000 DYNAMIC Paging +00000000a05b6000-00000000a05b8000 MMIO2 0000000000000000 PCNetShMem +00000000a0536000-00000000a05b6000 MMIO2 0000000000000000 VGA VRam +00000000a0523000-00000000a0536000 00002aaab3d0c000 LOCKED autofree alloc once (PDM_DEVICE) +00000000a0522000-00000000a0523000 DYNAMIC fence +00000000a051e000-00000000a0522000 00002aaab36f5000 LOCKED autofree VBoxDD2RC.rc +00000000a051d000-00000000a051e000 DYNAMIC fence +00000000a04eb000-00000000a051d000 00002aaab36c3000 LOCKED autofree VBoxDDRC.rc +00000000a04ea000-00000000a04eb000 DYNAMIC fence +00000000a04e9000-00000000a04ea000 00002aaab36c2000 LOCKED autofree ram range (High ROM Region) +00000000a04e8000-00000000a04e9000 DYNAMIC fence +00000000a040e000-00000000a04e8000 00002aaab2e6d000 LOCKED autofree VMMRC.rc +00000000a0208000-00000000a040e000 00002aaab2c67000 LOCKED autofree alloc once (PATM) +00000000a01f7000-00000000a0208000 00002aaaab92d000 LOCKED autofree alloc once (SELM) +00000000a01e7000-00000000a01f7000 00002aaaab5e8000 LOCKED autofree alloc once (SELM) +00000000a01e6000-00000000a01e7000 DYNAMIC fence +00000000a01e5000-00000000a01e6000 00002aaaab5e7000 HCPHYS 00000000c363c000 Core Code +00000000a01e4000-00000000a01e5000 DYNAMIC fence +00000000a01e3000-00000000a01e4000 00002aaaaab26000 HCPHYS 00000000619cf000 GIP +00000000a01a2000-00000000a01e3000 00002aaaabf32000 LOCKED autofree alloc once (PGM_PHYS) +00000000a016b000-00000000a01a2000 00002aaab233f000 LOCKED autofree alloc once (PGM_POOL) +00000000a016a000-00000000a016b000 DYNAMIC fence +00000000a0165000-00000000a016a000 DYNAMIC CR3 mapping +00000000a0164000-00000000a0165000 DYNAMIC fence +00000000a0024000-00000000a0164000 00002aaab215f000 LOCKED autofree Heap +00000000a0023000-00000000a0024000 DYNAMIC fence +00000000a0001000-00000000a0023000 00002aaab1d24000 LOCKED pages VM +00000000a0000000-00000000a0001000 DYNAMIC fence + @endverbatim + * + * + * @section sec_mm_hyperheap Hypervisor Heap + * + * The heap is accessible from ring-3, ring-0 and the raw-mode context. That + * said, it's not necessarily mapped into ring-0 on if that's possible since we + * don't wish to waste kernel address space without a good reason. + * + * Allocations within the heap are always in the same relative position in all + * contexts, so, it's possible to use offset based linking. In fact, the heap is + * internally using offset based linked lists tracking heap blocks. We use + * offset linked AVL trees and lists in a lot of places where share structures + * between RC, R3 and R0, so this is a strict requirement of the heap. However + * this means that we cannot easily extend the heap since the extension won't + * necessarily be in the continuation of the current heap memory in all (or any) + * context. + * + * All allocations are tagged. Per tag allocation statistics will be maintaining + * and exposed thru STAM when VBOX_WITH_STATISTICS is defined. + * + * + * @section sec_mm_r3heap Tagged Ring-3 Heap + * + * The ring-3 heap is a wrapper around the RTMem API adding allocation + * statistics and automatic cleanup on VM destruction. + * + * Per tag allocation statistics will be maintaining and exposed thru STAM when + * VBOX_WITH_STATISTICS is defined. + * + * + * @section sec_mm_page Page Pool + * + * The MM manages a page pool from which other components can allocate locked, + * page aligned and page sized memory objects. The pool provides facilities to + * convert back and forth between (host) physical and virtual addresses (within + * the pool of course). Several specialized interfaces are provided for the most + * common allocations and conversions to save the caller from bothersome casting + * and extra parameter passing. + * + * + * @section sec_mm_locked Locked Process Memory + * + * MM manages the locked process memory. This is used for a bunch of things + * (count the LOCKED entries in the 'info hma' output found in @ref sec_mm_hma), + * but the main consumer of memory is currently for guest RAM. There is an + * ongoing rewrite that will move all the guest RAM allocation to PGM and + * GMM. + * + * The locking of memory is something doing in cooperation with the VirtualBox + * support driver, SUPDrv (aka. VBoxDrv), thru the support library API, + * SUPR3 (aka. SUPLib). + * + * + * @section sec_mm_phys Physical Guest Memory + * + * MM is currently managing the physical memory for the guest. It relies heavily + * on PGM for this. There is an ongoing rewrite that will move this to PGM. (The + * rewrite is driven by the need for more flexible guest ram allocation, but + * also motivated by the fact that MMPhys is just adding stupid bureaucracy and + * that MMR3PhysReserve is a totally weird artifact that must go away.) + * + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_MM +#include +#include +#include +#include +#include +#include "MMInternal.h" +#include +#include +#include +#include + +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** The current saved state version of MM. */ +#define MM_SAVED_STATE_VERSION 2 + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static DECLCALLBACK(int) mmR3Save(PVM pVM, PSSMHANDLE pSSM); +static DECLCALLBACK(int) mmR3Load(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass); + + + + +/** + * Initializes the MM members of the UVM. + * + * This is currently only the ring-3 heap. + * + * @returns VBox status code. + * @param pUVM Pointer to the user mode VM structure. + */ +VMMR3DECL(int) MMR3InitUVM(PUVM pUVM) +{ + /* + * Assert sizes and order. + */ + AssertCompile(sizeof(pUVM->mm.s) <= sizeof(pUVM->mm.padding)); + AssertRelease(sizeof(pUVM->mm.s) <= sizeof(pUVM->mm.padding)); + Assert(!pUVM->mm.s.pHeap); + + /* + * Init the heap. + */ + int rc = mmR3HeapCreateU(pUVM, &pUVM->mm.s.pHeap); + if (RT_SUCCESS(rc)) + { + rc = mmR3UkHeapCreateU(pUVM, &pUVM->mm.s.pUkHeap); + if (RT_SUCCESS(rc)) + return VINF_SUCCESS; + mmR3HeapDestroy(pUVM->mm.s.pHeap); + pUVM->mm.s.pHeap = NULL; + } + return rc; +} + + +/** + * Initializes the MM. + * + * MM is managing the virtual address space (among other things) and + * setup the hypervisor memory area mapping in the VM structure and + * the hypervisor alloc-only-heap. Assuming the current init order + * and components the hypervisor memory area looks like this: + * -# VM Structure. + * -# Hypervisor alloc only heap (also call Hypervisor memory region). + * -# Core code. + * + * MM determines the virtual address of the hypervisor memory area by + * checking for location at previous run. If that property isn't available + * it will choose a default starting location, currently 0xa0000000. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(int) MMR3Init(PVM pVM) +{ + LogFlow(("MMR3Init\n")); + + /* + * Assert alignment, sizes and order. + */ + AssertRelease(!(RT_UOFFSETOF(VM, mm.s) & 31)); + AssertRelease(sizeof(pVM->mm.s) <= sizeof(pVM->mm.padding)); + AssertMsg(pVM->mm.s.offVM == 0, ("Already initialized!\n")); + + /* + * Init the structure. + */ + pVM->mm.s.offVM = RT_UOFFSETOF(VM, mm); + pVM->mm.s.offLookupHyper = NIL_OFFSET; + + /* + * Init the page pool. + */ + int rc = mmR3PagePoolInit(pVM); + if (RT_SUCCESS(rc)) + { + /* + * Init the hypervisor related stuff. + */ + rc = mmR3HyperInit(pVM); + if (RT_SUCCESS(rc)) + { + /* + * Register the saved state data unit. + */ + rc = SSMR3RegisterInternal(pVM, "mm", 1, MM_SAVED_STATE_VERSION, sizeof(uint32_t) * 2, + NULL, NULL, NULL, + NULL, mmR3Save, NULL, + NULL, mmR3Load, NULL); + if (RT_SUCCESS(rc)) + { + /* + * Statistics. + */ + STAM_REG(pVM, &pVM->mm.s.cBasePages, STAMTYPE_U64, "/MM/Reserved/cBasePages", STAMUNIT_PAGES, "Reserved number of base pages, ROM and Shadow ROM included."); + STAM_REG(pVM, &pVM->mm.s.cHandyPages, STAMTYPE_U32, "/MM/Reserved/cHandyPages", STAMUNIT_PAGES, "Reserved number of handy pages."); + STAM_REG(pVM, &pVM->mm.s.cShadowPages, STAMTYPE_U32, "/MM/Reserved/cShadowPages", STAMUNIT_PAGES, "Reserved number of shadow paging pages."); + STAM_REG(pVM, &pVM->mm.s.cFixedPages, STAMTYPE_U32, "/MM/Reserved/cFixedPages", STAMUNIT_PAGES, "Reserved number of fixed pages (MMIO2)."); + STAM_REG(pVM, &pVM->mm.s.cbRamBase, STAMTYPE_U64, "/MM/cbRamBase", STAMUNIT_BYTES, "Size of the base RAM."); + + return rc; + } + + /* .... failure .... */ + } + } + MMR3Term(pVM); + return rc; +} + + +/** + * Initializes the MM parts which depends on PGM being initialized. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @remark No cleanup necessary since MMR3Term() will be called on failure. + */ +VMMR3DECL(int) MMR3InitPaging(PVM pVM) +{ + LogFlow(("MMR3InitPaging:\n")); + + /* + * Query the CFGM values. + */ + int rc; + PCFGMNODE pMMCfg = CFGMR3GetChild(CFGMR3GetRoot(pVM), "MM"); + if (!pMMCfg) + { + rc = CFGMR3InsertNode(CFGMR3GetRoot(pVM), "MM", &pMMCfg); + AssertRCReturn(rc, rc); + } + + /** @cfgm{/RamSize, uint64_t, 0, 16TB, 0} + * Specifies the size of the base RAM that is to be set up during + * VM initialization. + */ + uint64_t cbRam; + rc = CFGMR3QueryU64(CFGMR3GetRoot(pVM), "RamSize", &cbRam); + if (rc == VERR_CFGM_VALUE_NOT_FOUND) + cbRam = 0; + else + AssertMsgRCReturn(rc, ("Configuration error: Failed to query integer \"RamSize\", rc=%Rrc.\n", rc), rc); + AssertLogRelMsg(!(cbRam & ~X86_PTE_PAE_PG_MASK), ("%RGp X86_PTE_PAE_PG_MASK=%RX64\n", cbRam, X86_PTE_PAE_PG_MASK)); + AssertLogRelMsgReturn(cbRam <= GMM_GCPHYS_LAST, ("cbRam=%RGp GMM_GCPHYS_LAST=%RX64\n", cbRam, GMM_GCPHYS_LAST), VERR_OUT_OF_RANGE); + cbRam &= X86_PTE_PAE_PG_MASK; + pVM->mm.s.cbRamBase = cbRam; + + /** @cfgm{/RamHoleSize, uint32_t, 0, 4032MB, 512MB} + * Specifies the size of the memory hole. The memory hole is used + * to avoid mapping RAM to the range normally used for PCI memory regions. + * Must be aligned on a 4MB boundary. */ + uint32_t cbRamHole; + rc = CFGMR3QueryU32Def(CFGMR3GetRoot(pVM), "RamHoleSize", &cbRamHole, MM_RAM_HOLE_SIZE_DEFAULT); + AssertLogRelMsgRCReturn(rc, ("Configuration error: Failed to query integer \"RamHoleSize\", rc=%Rrc.\n", rc), rc); + AssertLogRelMsgReturn(cbRamHole <= 4032U * _1M, + ("Configuration error: \"RamHoleSize\"=%#RX32 is too large.\n", cbRamHole), VERR_OUT_OF_RANGE); + AssertLogRelMsgReturn(cbRamHole > 16 * _1M, + ("Configuration error: \"RamHoleSize\"=%#RX32 is too large.\n", cbRamHole), VERR_OUT_OF_RANGE); + AssertLogRelMsgReturn(!(cbRamHole & (_4M - 1)), + ("Configuration error: \"RamHoleSize\"=%#RX32 is misaligned.\n", cbRamHole), VERR_OUT_OF_RANGE); + uint64_t const offRamHole = _4G - cbRamHole; + if (cbRam < offRamHole) + Log(("MM: %RU64 bytes of RAM\n", cbRam)); + else + Log(("MM: %RU64 bytes of RAM with a hole at %RU64 up to 4GB.\n", cbRam, offRamHole)); + + /** @cfgm{/MM/Policy, string, no overcommitment} + * Specifies the policy to use when reserving memory for this VM. The recognized + * value is 'no overcommitment' (default). See GMMPOLICY. + */ + GMMOCPOLICY enmOcPolicy; + char sz[64]; + rc = CFGMR3QueryString(CFGMR3GetRoot(pVM), "Policy", sz, sizeof(sz)); + if (RT_SUCCESS(rc)) + { + if ( !RTStrICmp(sz, "no_oc") + || !RTStrICmp(sz, "no overcommitment")) + enmOcPolicy = GMMOCPOLICY_NO_OC; + else + return VMSetError(pVM, VERR_INVALID_PARAMETER, RT_SRC_POS, "Unknown \"MM/Policy\" value \"%s\"", sz); + } + else if (rc == VERR_CFGM_VALUE_NOT_FOUND) + enmOcPolicy = GMMOCPOLICY_NO_OC; + else + AssertMsgFailedReturn(("Configuration error: Failed to query string \"MM/Policy\", rc=%Rrc.\n", rc), rc); + + /** @cfgm{/MM/Priority, string, normal} + * Specifies the memory priority of this VM. The priority comes into play when the + * system is overcommitted and the VMs needs to be milked for memory. The recognized + * values are 'low', 'normal' (default) and 'high'. See GMMPRIORITY. + */ + GMMPRIORITY enmPriority; + rc = CFGMR3QueryString(CFGMR3GetRoot(pVM), "Priority", sz, sizeof(sz)); + if (RT_SUCCESS(rc)) + { + if (!RTStrICmp(sz, "low")) + enmPriority = GMMPRIORITY_LOW; + else if (!RTStrICmp(sz, "normal")) + enmPriority = GMMPRIORITY_NORMAL; + else if (!RTStrICmp(sz, "high")) + enmPriority = GMMPRIORITY_HIGH; + else + return VMSetError(pVM, VERR_INVALID_PARAMETER, RT_SRC_POS, "Unknown \"MM/Priority\" value \"%s\"", sz); + } + else if (rc == VERR_CFGM_VALUE_NOT_FOUND) + enmPriority = GMMPRIORITY_NORMAL; + else + AssertMsgFailedReturn(("Configuration error: Failed to query string \"MM/Priority\", rc=%Rrc.\n", rc), rc); + + /* + * Make the initial memory reservation with GMM. + */ + uint64_t cBasePages = (cbRam >> PAGE_SHIFT) + pVM->mm.s.cBasePages; + rc = GMMR3InitialReservation(pVM, + RT_MAX(cBasePages + pVM->mm.s.cHandyPages, 1), + RT_MAX(pVM->mm.s.cShadowPages, 1), + RT_MAX(pVM->mm.s.cFixedPages, 1), + enmOcPolicy, + enmPriority); + if (RT_FAILURE(rc)) + { + if (rc == VERR_GMM_MEMORY_RESERVATION_DECLINED) + return VMSetError(pVM, rc, RT_SRC_POS, + N_("Insufficient free memory to start the VM (cbRam=%#RX64 enmOcPolicy=%d enmPriority=%d)"), + cbRam, enmOcPolicy, enmPriority); + return VMSetError(pVM, rc, RT_SRC_POS, "GMMR3InitialReservation(,%#RX64,0,0,%d,%d)", + cbRam >> PAGE_SHIFT, enmOcPolicy, enmPriority); + } + + /* + * If RamSize is 0 we're done now. + */ + if (cbRam < PAGE_SIZE) + { + Log(("MM: No RAM configured\n")); + return VINF_SUCCESS; + } + + /* + * Setup the base ram (PGM). + */ + pVM->mm.s.cbRamHole = cbRamHole; + if (cbRam > offRamHole) + { + pVM->mm.s.cbRamBelow4GB = offRamHole; + rc = PGMR3PhysRegisterRam(pVM, 0, offRamHole, "Base RAM"); + if (RT_SUCCESS(rc)) + { + pVM->mm.s.cbRamAbove4GB = cbRam - offRamHole; + rc = PGMR3PhysRegisterRam(pVM, _4G, cbRam - offRamHole, "Above 4GB Base RAM"); + } + } + else + { + pVM->mm.s.cbRamBelow4GB = cbRam; + pVM->mm.s.cbRamAbove4GB = 0; + rc = PGMR3PhysRegisterRam(pVM, 0, cbRam, "Base RAM"); + } + + /* + * Enabled mmR3UpdateReservation here since we don't want the + * PGMR3PhysRegisterRam calls above mess things up. + */ + pVM->mm.s.fDoneMMR3InitPaging = true; + AssertMsg(pVM->mm.s.cBasePages == cBasePages || RT_FAILURE(rc), ("%RX64 != %RX64\n", pVM->mm.s.cBasePages, cBasePages)); + + LogFlow(("MMR3InitPaging: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Terminates the MM. + * + * Termination means cleaning up and freeing all resources, + * the VM it self is at this point powered off or suspended. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(int) MMR3Term(PVM pVM) +{ + /* + * Destroy the page pool. (first as it used the hyper heap) + */ + mmR3PagePoolTerm(pVM); + + /* Clean up the hypervisor heap. */ + mmR3HyperTerm(pVM); + + /* + * Zero stuff to detect after termination use of the MM interface + */ + pVM->mm.s.offLookupHyper = NIL_OFFSET; + pVM->mm.s.pHyperHeapR3 = NULL; /* freed above. */ + pVM->mm.s.pHyperHeapR0 = NIL_RTR0PTR; /* freed above. */ + pVM->mm.s.pHyperHeapRC = NIL_RTRCPTR; /* freed above. */ + pVM->mm.s.offVM = 0; /* init assertion on this */ + + /* + * Destroy the User-kernel heap here since the support driver session + * may have been terminated by the time we get to MMR3TermUVM. + */ + mmR3UkHeapDestroy(pVM->pUVM->mm.s.pUkHeap); + pVM->pUVM->mm.s.pUkHeap = NULL; + + return VINF_SUCCESS; +} + + +/** + * Terminates the UVM part of MM. + * + * Termination means cleaning up and freeing all resources, + * the VM it self is at this point powered off or suspended. + * + * @returns VBox status code. + * @param pUVM Pointer to the user mode VM structure. + */ +VMMR3DECL(void) MMR3TermUVM(PUVM pUVM) +{ + /* + * Destroy the heaps. + */ + if (pUVM->mm.s.pUkHeap) + { + mmR3UkHeapDestroy(pUVM->mm.s.pUkHeap); + pUVM->mm.s.pUkHeap = NULL; + } + mmR3HeapDestroy(pUVM->mm.s.pHeap); + pUVM->mm.s.pHeap = NULL; +} + + +/** + * Checks if the both VM and UVM parts of MM have been initialized. + * + * @returns true if initialized, false if not. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(bool) MMR3IsInitialized(PVM pVM) +{ + return pVM->mm.s.pHyperHeapR3 != NULL; +} + + +/** + * Execute state save operation. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM SSM operation handle. + */ +static DECLCALLBACK(int) mmR3Save(PVM pVM, PSSMHANDLE pSSM) +{ + LogFlow(("mmR3Save:\n")); + + /* (PGM saves the physical memory.) */ + SSMR3PutU64(pSSM, pVM->mm.s.cBasePages); + return SSMR3PutU64(pSSM, pVM->mm.s.cbRamBase); +} + + +/** + * Execute state load operation. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM SSM operation handle. + * @param uVersion Data layout version. + * @param uPass The data pass. + */ +static DECLCALLBACK(int) mmR3Load(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + LogFlow(("mmR3Load:\n")); + Assert(uPass == SSM_PASS_FINAL); NOREF(uPass); + + /* + * Validate version. + */ + if ( SSM_VERSION_MAJOR_CHANGED(uVersion, MM_SAVED_STATE_VERSION) + || !uVersion) + { + AssertMsgFailed(("mmR3Load: Invalid version uVersion=%d!\n", uVersion)); + return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION; + } + + /* + * Check the cBasePages and cbRamBase values. + */ + int rc; + RTUINT cb1; + + /* cBasePages (ignored) */ + uint64_t cPages; + if (uVersion >= 2) + rc = SSMR3GetU64(pSSM, &cPages); + else + { + rc = SSMR3GetUInt(pSSM, &cb1); + cPages = cb1 >> PAGE_SHIFT; + } + if (RT_FAILURE(rc)) + return rc; + + /* cbRamBase */ + uint64_t cb; + if (uVersion != 1) + rc = SSMR3GetU64(pSSM, &cb); + else + { + rc = SSMR3GetUInt(pSSM, &cb1); + cb = cb1; + } + if (RT_FAILURE(rc)) + return rc; + AssertLogRelMsgReturn(cb == pVM->mm.s.cbRamBase, + ("Memory configuration has changed. cbRamBase=%#RX64 save=%#RX64\n", pVM->mm.s.cbRamBase, cb), + VERR_SSM_LOAD_MEMORY_SIZE_MISMATCH); + + /* (PGM restores the physical memory.) */ + return rc; +} + + +/** + * Updates GMM with memory reservation changes. + * + * Called when MM::cbRamRegistered, MM::cShadowPages or MM::cFixedPages changes. + * + * @returns VBox status code - see GMMR0UpdateReservation. + * @param pVM The cross context VM structure. + */ +int mmR3UpdateReservation(PVM pVM) +{ + VM_ASSERT_EMT(pVM); + if (pVM->mm.s.fDoneMMR3InitPaging) + return GMMR3UpdateReservation(pVM, + RT_MAX(pVM->mm.s.cBasePages + pVM->mm.s.cHandyPages, 1), + RT_MAX(pVM->mm.s.cShadowPages, 1), + RT_MAX(pVM->mm.s.cFixedPages, 1)); + return VINF_SUCCESS; +} + + +/** + * Interface for PGM to increase the reservation of RAM and ROM pages. + * + * This can be called before MMR3InitPaging. + * + * @returns VBox status code. Will set VM error on failure. + * @param pVM The cross context VM structure. + * @param cAddBasePages The number of pages to add. + */ +VMMR3DECL(int) MMR3IncreaseBaseReservation(PVM pVM, uint64_t cAddBasePages) +{ + uint64_t cOld = pVM->mm.s.cBasePages; + pVM->mm.s.cBasePages += cAddBasePages; + LogFlow(("MMR3IncreaseBaseReservation: +%RU64 (%RU64 -> %RU64\n", cAddBasePages, cOld, pVM->mm.s.cBasePages)); + int rc = mmR3UpdateReservation(pVM); + if (RT_FAILURE(rc)) + { + VMSetError(pVM, rc, RT_SRC_POS, N_("Failed to reserved physical memory for the RAM (%#RX64 -> %#RX64 + %#RX32)"), + cOld, pVM->mm.s.cBasePages, pVM->mm.s.cHandyPages); + pVM->mm.s.cBasePages = cOld; + } + return rc; +} + + +/** + * Interface for PGM to make reservations for handy pages in addition to the + * base memory. + * + * This can be called before MMR3InitPaging. + * + * @returns VBox status code. Will set VM error on failure. + * @param pVM The cross context VM structure. + * @param cHandyPages The number of handy pages. + */ +VMMR3DECL(int) MMR3ReserveHandyPages(PVM pVM, uint32_t cHandyPages) +{ + AssertReturn(!pVM->mm.s.cHandyPages, VERR_WRONG_ORDER); + + pVM->mm.s.cHandyPages = cHandyPages; + LogFlow(("MMR3ReserveHandyPages: %RU32 (base %RU64)\n", pVM->mm.s.cHandyPages, pVM->mm.s.cBasePages)); + int rc = mmR3UpdateReservation(pVM); + if (RT_FAILURE(rc)) + { + VMSetError(pVM, rc, RT_SRC_POS, N_("Failed to reserved physical memory for the RAM (%#RX64 + %#RX32)"), + pVM->mm.s.cBasePages, pVM->mm.s.cHandyPages); + pVM->mm.s.cHandyPages = 0; + } + return rc; +} + + +/** + * Interface for PGM to adjust the reservation of fixed pages. + * + * This can be called before MMR3InitPaging. + * + * @returns VBox status code. Will set VM error on failure. + * @param pVM The cross context VM structure. + * @param cDeltaFixedPages The number of pages to add (positive) or subtract (negative). + * @param pszDesc Some description associated with the reservation. + */ +VMMR3DECL(int) MMR3AdjustFixedReservation(PVM pVM, int32_t cDeltaFixedPages, const char *pszDesc) +{ + const uint32_t cOld = pVM->mm.s.cFixedPages; + pVM->mm.s.cFixedPages += cDeltaFixedPages; + LogFlow(("MMR3AdjustFixedReservation: %d (%u -> %u)\n", cDeltaFixedPages, cOld, pVM->mm.s.cFixedPages)); + int rc = mmR3UpdateReservation(pVM); + if (RT_FAILURE(rc)) + { + VMSetError(pVM, rc, RT_SRC_POS, N_("Failed to reserve physical memory (%#x -> %#x; %s)"), + cOld, pVM->mm.s.cFixedPages, pszDesc); + pVM->mm.s.cFixedPages = cOld; + } + return rc; +} + + +/** + * Interface for PGM to update the reservation of shadow pages. + * + * This can be called before MMR3InitPaging. + * + * @returns VBox status code. Will set VM error on failure. + * @param pVM The cross context VM structure. + * @param cShadowPages The new page count. + */ +VMMR3DECL(int) MMR3UpdateShadowReservation(PVM pVM, uint32_t cShadowPages) +{ + const uint32_t cOld = pVM->mm.s.cShadowPages; + pVM->mm.s.cShadowPages = cShadowPages; + LogFlow(("MMR3UpdateShadowReservation: %u -> %u\n", cOld, pVM->mm.s.cShadowPages)); + int rc = mmR3UpdateReservation(pVM); + if (RT_FAILURE(rc)) + { + VMSetError(pVM, rc, RT_SRC_POS, N_("Failed to reserve physical memory for shadow page tables (%#x -> %#x)"), cOld, pVM->mm.s.cShadowPages); + pVM->mm.s.cShadowPages = cOld; + } + return rc; +} + + +/** + * Convert HC Physical address to HC Virtual address. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param HCPhys The host context virtual address. + * @param ppv Where to store the resulting address. + * @thread The Emulation Thread. + * + * @remarks Avoid whenever possible. + * Intended for the debugger facility only. + * @todo Rename to indicate the special usage. + */ +VMMR3DECL(int) MMR3HCPhys2HCVirt(PVM pVM, RTHCPHYS HCPhys, void **ppv) +{ + /* + * Try page tables. + */ + int rc = MMPagePhys2PageTry(pVM, HCPhys, ppv); + if (RT_SUCCESS(rc)) + return rc; + + /* + * Iterate thru the lookup records for HMA. + */ + uint32_t off = HCPhys & PAGE_OFFSET_MASK; + HCPhys &= X86_PTE_PAE_PG_MASK; + PMMLOOKUPHYPER pCur = (PMMLOOKUPHYPER)((uint8_t *)pVM->mm.s.CTX_SUFF(pHyperHeap) + pVM->mm.s.offLookupHyper); + for (;;) + { + switch (pCur->enmType) + { + case MMLOOKUPHYPERTYPE_LOCKED: + { + PCRTHCPHYS paHCPhysPages = pCur->u.Locked.paHCPhysPages; + size_t iPage = pCur->cb >> PAGE_SHIFT; + while (iPage-- > 0) + if (paHCPhysPages[iPage] == HCPhys) + { + *ppv = (char *)pCur->u.Locked.pvR3 + (iPage << PAGE_SHIFT) + off; + return VINF_SUCCESS; + } + break; + } + + case MMLOOKUPHYPERTYPE_HCPHYS: + if (pCur->u.HCPhys.HCPhys - HCPhys < pCur->cb) + { + *ppv = (uint8_t *)pCur->u.HCPhys.pvR3 + pCur->u.HCPhys.HCPhys - HCPhys + off; + return VINF_SUCCESS; + } + break; + + case MMLOOKUPHYPERTYPE_GCPHYS: /* (for now we'll not allow these kind of conversions) */ + case MMLOOKUPHYPERTYPE_MMIO2: + case MMLOOKUPHYPERTYPE_DYNAMIC: + break; + + default: + AssertMsgFailed(("enmType=%d\n", pCur->enmType)); + break; + } + + /* next */ + if (pCur->offNext == (int32_t)NIL_OFFSET) + break; + pCur = (PMMLOOKUPHYPER)((uint8_t *)pCur + pCur->offNext); + } + /* give up */ + return VERR_INVALID_POINTER; +} + + + +/** + * Get the size of the base RAM. + * This usually means the size of the first contiguous block of physical memory. + * + * @returns The guest base RAM size. + * @param pVM The cross context VM structure. + * @thread Any. + * + * @deprecated + */ +VMMR3DECL(uint64_t) MMR3PhysGetRamSize(PVM pVM) +{ + return pVM->mm.s.cbRamBase; +} + + +/** + * Get the size of RAM below 4GB (starts at address 0x00000000). + * + * @returns The amount of RAM below 4GB in bytes. + * @param pVM The cross context VM structure. + * @thread Any. + */ +VMMR3DECL(uint32_t) MMR3PhysGetRamSizeBelow4GB(PVM pVM) +{ + VM_ASSERT_VALID_EXT_RETURN(pVM, UINT32_MAX); + return pVM->mm.s.cbRamBelow4GB; +} + + +/** + * Get the size of RAM above 4GB (starts at address 0x000100000000). + * + * @returns The amount of RAM above 4GB in bytes. + * @param pVM The cross context VM structure. + * @thread Any. + */ +VMMR3DECL(uint64_t) MMR3PhysGetRamSizeAbove4GB(PVM pVM) +{ + VM_ASSERT_VALID_EXT_RETURN(pVM, UINT64_MAX); + return pVM->mm.s.cbRamAbove4GB; +} + + +/** + * Get the size of the RAM hole below 4GB. + * + * @returns Size in bytes. + * @param pVM The cross context VM structure. + * @thread Any. + */ +VMMR3DECL(uint32_t) MMR3PhysGet4GBRamHoleSize(PVM pVM) +{ + VM_ASSERT_VALID_EXT_RETURN(pVM, UINT32_MAX); + return pVM->mm.s.cbRamHole; +} + diff --git a/src/VBox/VMM/VMMR3/MMHeap.cpp b/src/VBox/VMM/VMMR3/MMHeap.cpp new file mode 100644 index 00000000..967cd188 --- /dev/null +++ b/src/VBox/VMM/VMMR3/MMHeap.cpp @@ -0,0 +1,696 @@ +/* $Id: MMHeap.cpp $ */ +/** @file + * MM - Memory Manager - Heap. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_MM_HEAP +#include +#include +#include +#include "MMInternal.h" +#include +#include +#include +#include +#include + +#include +#include +#include + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static void *mmR3HeapAlloc(PMMHEAP pHeap, MMTAG enmTag, size_t cbSize, bool fZero); + + + +/** + * Allocate and initialize a heap structure and it's associated substructures. + * + * @returns VBox status code. + * @param pUVM Pointer to the user mode VM structure. + * @param ppHeap Where to store the heap pointer. + */ +int mmR3HeapCreateU(PUVM pUVM, PMMHEAP *ppHeap) +{ + PMMHEAP pHeap = (PMMHEAP)RTMemAllocZ(sizeof(MMHEAP) + sizeof(MMHEAPSTAT)); + if (pHeap) + { + int rc = RTCritSectInit(&pHeap->Lock); + if (RT_SUCCESS(rc)) + { + /* + * Initialize the global stat record. + */ + pHeap->pUVM = pUVM; + pHeap->Stat.pHeap = pHeap; +#ifdef MMR3HEAP_WITH_STATISTICS + PMMHEAPSTAT pStat = &pHeap->Stat; + STAMR3RegisterU(pUVM, &pStat->cAllocations, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, "/MM/R3Heap/cAllocations", STAMUNIT_CALLS, "Number or MMR3HeapAlloc() calls."); + STAMR3RegisterU(pUVM, &pStat->cReallocations, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, "/MM/R3Heap/cReallocations", STAMUNIT_CALLS, "Number of MMR3HeapRealloc() calls."); + STAMR3RegisterU(pUVM, &pStat->cFrees, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, "/MM/R3Heap/cFrees", STAMUNIT_CALLS, "Number of MMR3HeapFree() calls."); + STAMR3RegisterU(pUVM, &pStat->cFailures, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, "/MM/R3Heap/cFailures", STAMUNIT_COUNT, "Number of failures."); + STAMR3RegisterU(pUVM, &pStat->cbCurAllocated, sizeof(pStat->cbCurAllocated) == sizeof(uint32_t) ? STAMTYPE_U32 : STAMTYPE_U64, + STAMVISIBILITY_ALWAYS, "/MM/R3Heap/cbCurAllocated", STAMUNIT_BYTES, "Number of bytes currently allocated."); + STAMR3RegisterU(pUVM, &pStat->cbAllocated, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, "/MM/R3Heap/cbAllocated", STAMUNIT_BYTES, "Total number of bytes allocated."); + STAMR3RegisterU(pUVM, &pStat->cbFreed, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, "/MM/R3Heap/cbFreed", STAMUNIT_BYTES, "Total number of bytes freed."); +#endif + *ppHeap = pHeap; + return VINF_SUCCESS; + } + AssertRC(rc); + RTMemFree(pHeap); + } + AssertMsgFailed(("failed to allocate heap structure\n")); + return VERR_NO_MEMORY; +} + + +/** + * Destroy a heap. + * + * @param pHeap Heap handle. + */ +void mmR3HeapDestroy(PMMHEAP pHeap) +{ + /* + * Start by deleting the lock, that'll trap anyone + * attempting to use the heap. + */ + RTCritSectDelete(&pHeap->Lock); + + /* + * Walk the node list and free all the memory. + */ + PMMHEAPHDR pHdr = pHeap->pHead; + while (pHdr) + { + void *pv = pHdr; + pHdr = pHdr->pNext; + RTMemFree(pv); + } + + /* + * Free the stat nodes. + */ + /** @todo free all nodes in a AVL tree. */ + RTMemFree(pHeap); +} + + +/** + * Allocate memory associating it with the VM for collective cleanup. + * + * The memory will be allocated from the default heap but a header + * is added in which we keep track of which VM it belongs to and chain + * all the allocations together so they can be freed in one go. + * + * This interface is typically used for memory block which will not be + * freed during the life of the VM. + * + * @returns Pointer to allocated memory. + * @param pUVM Pointer to the user mode VM structure. + * @param enmTag Statistics tag. Statistics are collected on a per tag + * basis in addition to a global one. Thus we can easily + * identify how memory is used by the VM. See MM_TAG_*. + * @param cbSize Size of the block. + */ +VMMR3DECL(void *) MMR3HeapAllocU(PUVM pUVM, MMTAG enmTag, size_t cbSize) +{ + Assert(pUVM->mm.s.pHeap); + return mmR3HeapAlloc(pUVM->mm.s.pHeap, enmTag, cbSize, false); +} + + +/** + * Allocate memory associating it with the VM for collective cleanup. + * + * The memory will be allocated from the default heap but a header + * is added in which we keep track of which VM it belongs to and chain + * all the allocations together so they can be freed in one go. + * + * This interface is typically used for memory block which will not be + * freed during the life of the VM. + * + * @returns Pointer to allocated memory. + * @param pVM The cross context VM structure. + * @param enmTag Statistics tag. Statistics are collected on a per tag + * basis in addition to a global one. Thus we can easily + * identify how memory is used by the VM. See MM_TAG_*. + * @param cbSize Size of the block. + */ +VMMR3DECL(void *) MMR3HeapAlloc(PVM pVM, MMTAG enmTag, size_t cbSize) +{ + return mmR3HeapAlloc(pVM->pUVM->mm.s.pHeap, enmTag, cbSize, false); +} + + +/** + * Same as MMR3HeapAllocU(). + * + * @returns Pointer to allocated memory. + * @param pUVM Pointer to the user mode VM structure. + * @param enmTag Statistics tag. Statistics are collected on a per tag + * basis in addition to a global one. Thus we can easily + * identify how memory is used by the VM. See MM_TAG_*. + * @param cbSize Size of the block. + * @param ppv Where to store the pointer to the allocated memory on success. + */ +VMMR3DECL(int) MMR3HeapAllocExU(PUVM pUVM, MMTAG enmTag, size_t cbSize, void **ppv) +{ + Assert(pUVM->mm.s.pHeap); + void *pv = mmR3HeapAlloc(pUVM->mm.s.pHeap, enmTag, cbSize, false); + if (pv) + { + *ppv = pv; + return VINF_SUCCESS; + } + return VERR_NO_MEMORY; +} + + +/** + * Same as MMR3HeapAlloc(). + * + * @returns Pointer to allocated memory. + * @param pVM The cross context VM structure. + * @param enmTag Statistics tag. Statistics are collected on a per tag + * basis in addition to a global one. Thus we can easily + * identify how memory is used by the VM. See MM_TAG_*. + * @param cbSize Size of the block. + * @param ppv Where to store the pointer to the allocated memory on success. + */ +VMMR3DECL(int) MMR3HeapAllocEx(PVM pVM, MMTAG enmTag, size_t cbSize, void **ppv) +{ + void *pv = mmR3HeapAlloc(pVM->pUVM->mm.s.pHeap, enmTag, cbSize, false); + if (pv) + { + *ppv = pv; + return VINF_SUCCESS; + } + return VERR_NO_MEMORY; +} + + +/** + * Same as MMR3HeapAlloc() only the memory is zeroed. + * + * @returns Pointer to allocated memory. + * @param pUVM Pointer to the user mode VM structure. + * @param enmTag Statistics tag. Statistics are collected on a per tag + * basis in addition to a global one. Thus we can easily + * identify how memory is used by the VM. See MM_TAG_*. + * @param cbSize Size of the block. + */ +VMMR3DECL(void *) MMR3HeapAllocZU(PUVM pUVM, MMTAG enmTag, size_t cbSize) +{ + return mmR3HeapAlloc(pUVM->mm.s.pHeap, enmTag, cbSize, true); +} + + +/** + * Same as MMR3HeapAlloc() only the memory is zeroed. + * + * @returns Pointer to allocated memory. + * @param pVM The cross context VM structure. + * @param enmTag Statistics tag. Statistics are collected on a per tag + * basis in addition to a global one. Thus we can easily + * identify how memory is used by the VM. See MM_TAG_*. + * @param cbSize Size of the block. + */ +VMMR3DECL(void *) MMR3HeapAllocZ(PVM pVM, MMTAG enmTag, size_t cbSize) +{ + return mmR3HeapAlloc(pVM->pUVM->mm.s.pHeap, enmTag, cbSize, true); +} + + +/** + * Same as MMR3HeapAllocZ(). + * + * @returns Pointer to allocated memory. + * @param pUVM Pointer to the user mode VM structure. + * @param enmTag Statistics tag. Statistics are collected on a per tag + * basis in addition to a global one. Thus we can easily + * identify how memory is used by the VM. See MM_TAG_*. + * @param cbSize Size of the block. + * @param ppv Where to store the pointer to the allocated memory on success. + */ +VMMR3DECL(int) MMR3HeapAllocZExU(PUVM pUVM, MMTAG enmTag, size_t cbSize, void **ppv) +{ + Assert(pUVM->mm.s.pHeap); + void *pv = mmR3HeapAlloc(pUVM->mm.s.pHeap, enmTag, cbSize, true); + if (pv) + { + *ppv = pv; + return VINF_SUCCESS; + } + return VERR_NO_MEMORY; +} + + +/** + * Same as MMR3HeapAllocZ(). + * + * @returns Pointer to allocated memory. + * @param pVM The cross context VM structure. + * @param enmTag Statistics tag. Statistics are collected on a per tag + * basis in addition to a global one. Thus we can easily + * identify how memory is used by the VM. See MM_TAG_*. + * @param cbSize Size of the block. + * @param ppv Where to store the pointer to the allocated memory on success. + */ +VMMR3DECL(int) MMR3HeapAllocZEx(PVM pVM, MMTAG enmTag, size_t cbSize, void **ppv) +{ + void *pv = mmR3HeapAlloc(pVM->pUVM->mm.s.pHeap, enmTag, cbSize, true); + if (pv) + { + *ppv = pv; + return VINF_SUCCESS; + } + return VERR_NO_MEMORY; +} + + +/** + * Allocate memory from the heap. + * + * @returns Pointer to allocated memory. + * @param pHeap Heap handle. + * @param enmTag Statistics tag. Statistics are collected on a per tag + * basis in addition to a global one. Thus we can easily + * identify how memory is used by the VM. See MM_TAG_*. + * @param cbSize Size of the block. + * @param fZero Whether or not to zero the memory block. + */ +void *mmR3HeapAlloc(PMMHEAP pHeap, MMTAG enmTag, size_t cbSize, bool fZero) +{ +#ifdef MMR3HEAP_WITH_STATISTICS + RTCritSectEnter(&pHeap->Lock); + + /* + * Find/alloc statistics nodes. + */ + pHeap->Stat.cAllocations++; + PMMHEAPSTAT pStat = (PMMHEAPSTAT)RTAvlULGet(&pHeap->pStatTree, (AVLULKEY)enmTag); + if (pStat) + { + pStat->cAllocations++; + + RTCritSectLeave(&pHeap->Lock); + } + else + { + pStat = (PMMHEAPSTAT)RTMemAllocZ(sizeof(MMHEAPSTAT)); + if (!pStat) + { + pHeap->Stat.cFailures++; + AssertMsgFailed(("Failed to allocate heap stat record.\n")); + RTCritSectLeave(&pHeap->Lock); + return NULL; + } + pStat->Core.Key = (AVLULKEY)enmTag; + pStat->pHeap = pHeap; + RTAvlULInsert(&pHeap->pStatTree, &pStat->Core); + + pStat->cAllocations++; + RTCritSectLeave(&pHeap->Lock); + + /* register the statistics */ + PUVM pUVM = pHeap->pUVM; + const char *pszTag = mmGetTagName(enmTag); + STAMR3RegisterFU(pUVM, &pStat->cbCurAllocated, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES, "Number of bytes currently allocated.", "/MM/R3Heap/%s", pszTag); + STAMR3RegisterFU(pUVM, &pStat->cAllocations, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_CALLS, "Number or MMR3HeapAlloc() calls.", "/MM/R3Heap/%s/cAllocations", pszTag); + STAMR3RegisterFU(pUVM, &pStat->cReallocations, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_CALLS, "Number of MMR3HeapRealloc() calls.", "/MM/R3Heap/%s/cReallocations", pszTag); + STAMR3RegisterFU(pUVM, &pStat->cFrees, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_CALLS, "Number of MMR3HeapFree() calls.", "/MM/R3Heap/%s/cFrees", pszTag); + STAMR3RegisterFU(pUVM, &pStat->cFailures, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT, "Number of failures.", "/MM/R3Heap/%s/cFailures", pszTag); + STAMR3RegisterFU(pUVM, &pStat->cbAllocated, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES, "Total number of bytes allocated.", "/MM/R3Heap/%s/cbAllocated", pszTag); + STAMR3RegisterFU(pUVM, &pStat->cbFreed, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES, "Total number of bytes freed.", "/MM/R3Heap/%s/cbFreed", pszTag); + } +#else + RT_NOREF_PV(enmTag); +#endif + + /* + * Validate input. + */ + if (cbSize == 0) + { +#ifdef MMR3HEAP_WITH_STATISTICS + RTCritSectEnter(&pHeap->Lock); + pStat->cFailures++; + pHeap->Stat.cFailures++; + RTCritSectLeave(&pHeap->Lock); +#endif + return NULL; + } + + /* + * Allocate heap block. + */ + cbSize = RT_ALIGN_Z(cbSize, MMR3HEAP_SIZE_ALIGNMENT) + sizeof(MMHEAPHDR); + PMMHEAPHDR pHdr = (PMMHEAPHDR)(fZero ? RTMemAllocZ(cbSize) : RTMemAlloc(cbSize)); + if (!pHdr) + { + AssertMsgFailed(("Failed to allocate heap block %d, enmTag=%x(%.4s).\n", cbSize, enmTag, &enmTag)); +#ifdef MMR3HEAP_WITH_STATISTICS + RTCritSectEnter(&pHeap->Lock); + pStat->cFailures++; + pHeap->Stat.cFailures++; + RTCritSectLeave(&pHeap->Lock); +#endif + return NULL; + } + Assert(!((uintptr_t)pHdr & (RTMEM_ALIGNMENT - 1))); + + RTCritSectEnter(&pHeap->Lock); + + /* + * Init and link in the header. + */ + pHdr->pNext = NULL; + pHdr->pPrev = pHeap->pTail; + if (pHdr->pPrev) + pHdr->pPrev->pNext = pHdr; + else + pHeap->pHead = pHdr; + pHeap->pTail = pHdr; +#ifdef MMR3HEAP_WITH_STATISTICS + pHdr->pStat = pStat; +#else + pHdr->pStat = &pHeap->Stat; +#endif + pHdr->cbSize = cbSize; + + /* + * Update statistics + */ +#ifdef MMR3HEAP_WITH_STATISTICS + pStat->cbAllocated += cbSize; + pStat->cbCurAllocated += cbSize; + pHeap->Stat.cbAllocated += cbSize; + pHeap->Stat.cbCurAllocated += cbSize; +#endif + + RTCritSectLeave(&pHeap->Lock); + + return pHdr + 1; +} + + +/** + * Reallocate memory allocated with MMR3HeapAlloc() or MMR3HeapRealloc(). + * + * @returns Pointer to reallocated memory. + * @param pv Pointer to the memory block to reallocate. + * Must not be NULL! + * @param cbNewSize New block size. + */ +VMMR3DECL(void *) MMR3HeapRealloc(void *pv, size_t cbNewSize) +{ + AssertMsg(pv, ("Invalid pointer pv=%p\n", pv)); + if (!pv) + return NULL; + + /* + * If newsize is zero then this is a free. + */ + if (!cbNewSize) + { + MMR3HeapFree(pv); + return NULL; + } + + /* + * Validate header. + */ + PMMHEAPHDR pHdr = (PMMHEAPHDR)pv - 1; + if ( pHdr->cbSize & (MMR3HEAP_SIZE_ALIGNMENT - 1) + || (uintptr_t)pHdr & (RTMEM_ALIGNMENT - 1)) + { + AssertMsgFailed(("Invalid heap header! pv=%p, size=%#x\n", pv, pHdr->cbSize)); + return NULL; + } + Assert(pHdr->pStat != NULL); + Assert(!((uintptr_t)pHdr->pNext & (RTMEM_ALIGNMENT - 1))); + Assert(!((uintptr_t)pHdr->pPrev & (RTMEM_ALIGNMENT - 1))); + + PMMHEAP pHeap = pHdr->pStat->pHeap; + +#ifdef MMR3HEAP_WITH_STATISTICS + RTCritSectEnter(&pHeap->Lock); + pHdr->pStat->cReallocations++; + pHeap->Stat.cReallocations++; + RTCritSectLeave(&pHeap->Lock); +#endif + + /* + * Reallocate the block. + */ + cbNewSize = RT_ALIGN_Z(cbNewSize, MMR3HEAP_SIZE_ALIGNMENT) + sizeof(MMHEAPHDR); + PMMHEAPHDR pHdrNew = (PMMHEAPHDR)RTMemRealloc(pHdr, cbNewSize); + if (!pHdrNew) + { +#ifdef MMR3HEAP_WITH_STATISTICS + RTCritSectEnter(&pHeap->Lock); + pHdr->pStat->cFailures++; + pHeap->Stat.cFailures++; + RTCritSectLeave(&pHeap->Lock); +#endif + return NULL; + } + + /* + * Update pointers. + */ + if (pHdrNew != pHdr) + { + RTCritSectEnter(&pHeap->Lock); + if (pHdrNew->pPrev) + pHdrNew->pPrev->pNext = pHdrNew; + else + pHeap->pHead = pHdrNew; + + if (pHdrNew->pNext) + pHdrNew->pNext->pPrev = pHdrNew; + else + pHeap->pTail = pHdrNew; + RTCritSectLeave(&pHeap->Lock); + } + + /* + * Update statistics. + */ +#ifdef MMR3HEAP_WITH_STATISTICS + RTCritSectEnter(&pHeap->Lock); + pHdrNew->pStat->cbAllocated += cbNewSize - pHdrNew->cbSize; + pHeap->Stat.cbAllocated += cbNewSize - pHdrNew->cbSize; + RTCritSectLeave(&pHeap->Lock); +#endif + + pHdrNew->cbSize = cbNewSize; + + return pHdrNew + 1; +} + + +/** + * Duplicates the specified string. + * + * @returns Pointer to the duplicate. + * @returns NULL on failure or when input NULL. + * @param pUVM Pointer to the user mode VM structure. + * @param enmTag Statistics tag. Statistics are collected on a per tag + * basis in addition to a global one. Thus we can easily + * identify how memory is used by the VM. See MM_TAG_*. + * @param psz The string to duplicate. NULL is allowed. + */ +VMMR3DECL(char *) MMR3HeapStrDupU(PUVM pUVM, MMTAG enmTag, const char *psz) +{ + if (!psz) + return NULL; + AssertPtr(psz); + + size_t cch = strlen(psz) + 1; + char *pszDup = (char *)MMR3HeapAllocU(pUVM, enmTag, cch); + if (pszDup) + memcpy(pszDup, psz, cch); + return pszDup; +} + + +/** + * Duplicates the specified string. + * + * @returns Pointer to the duplicate. + * @returns NULL on failure or when input NULL. + * @param pVM The cross context VM structure. + * @param enmTag Statistics tag. Statistics are collected on a per tag + * basis in addition to a global one. Thus we can easily + * identify how memory is used by the VM. See MM_TAG_*. + * @param psz The string to duplicate. NULL is allowed. + */ +VMMR3DECL(char *) MMR3HeapStrDup(PVM pVM, MMTAG enmTag, const char *psz) +{ + return MMR3HeapStrDupU(pVM->pUVM, enmTag, psz); +} + + +/** + * Allocating string printf. + * + * @returns Pointer to the string. + * @param pVM The cross context VM structure. + * @param enmTag The statistics tag. + * @param pszFormat The format string. + * @param ... Format arguments. + */ +VMMR3DECL(char *) MMR3HeapAPrintf(PVM pVM, MMTAG enmTag, const char *pszFormat, ...) +{ + va_list va; + va_start(va, pszFormat); + char *psz = MMR3HeapAPrintfVU(pVM->pUVM, enmTag, pszFormat, va); + va_end(va); + return psz; +} + + +/** + * Allocating string printf. + * + * @returns Pointer to the string. + * @param pUVM Pointer to the user mode VM structure. + * @param enmTag The statistics tag. + * @param pszFormat The format string. + * @param ... Format arguments. + */ +VMMR3DECL(char *) MMR3HeapAPrintfU(PUVM pUVM, MMTAG enmTag, const char *pszFormat, ...) +{ + va_list va; + va_start(va, pszFormat); + char *psz = MMR3HeapAPrintfVU(pUVM, enmTag, pszFormat, va); + va_end(va); + return psz; +} + + +/** + * Allocating string printf. + * + * @returns Pointer to the string. + * @param pVM The cross context VM structure. + * @param enmTag The statistics tag. + * @param pszFormat The format string. + * @param va Format arguments. + */ +VMMR3DECL(char *) MMR3HeapAPrintfV(PVM pVM, MMTAG enmTag, const char *pszFormat, va_list va) +{ + return MMR3HeapAPrintfVU(pVM->pUVM, enmTag, pszFormat, va); +} + + +/** + * Allocating string printf. + * + * @returns Pointer to the string. + * @param pUVM Pointer to the user mode VM structure. + * @param enmTag The statistics tag. + * @param pszFormat The format string. + * @param va Format arguments. + */ +VMMR3DECL(char *) MMR3HeapAPrintfVU(PUVM pUVM, MMTAG enmTag, const char *pszFormat, va_list va) +{ + /* + * The lazy bird way. + */ + char *psz; + int cch = RTStrAPrintfV(&psz, pszFormat, va); + if (cch < 0) + return NULL; + Assert(psz[cch] == '\0'); + char *pszRet = (char *)MMR3HeapAllocU(pUVM, enmTag, cch + 1); + if (pszRet) + memcpy(pszRet, psz, cch + 1); + RTStrFree(psz); + return pszRet; +} + + +/** + * Releases memory allocated with MMR3HeapAlloc() or MMR3HeapRealloc(). + * + * @param pv Pointer to the memory block to free. + */ +VMMR3DECL(void) MMR3HeapFree(void *pv) +{ + /* Ignore NULL pointers. */ + if (!pv) + return; + + /* + * Validate header. + */ + PMMHEAPHDR pHdr = (PMMHEAPHDR)pv - 1; + if ( pHdr->cbSize & (MMR3HEAP_SIZE_ALIGNMENT - 1) + || (uintptr_t)pHdr & (RTMEM_ALIGNMENT - 1)) + { + AssertMsgFailed(("Invalid heap header! pv=%p, size=%#x\n", pv, pHdr->cbSize)); + return; + } + Assert(pHdr->pStat != NULL); + Assert(!((uintptr_t)pHdr->pNext & (RTMEM_ALIGNMENT - 1))); + Assert(!((uintptr_t)pHdr->pPrev & (RTMEM_ALIGNMENT - 1))); + + /* + * Update statistics + */ + PMMHEAP pHeap = pHdr->pStat->pHeap; + RTCritSectEnter(&pHeap->Lock); + +#ifdef MMR3HEAP_WITH_STATISTICS + pHdr->pStat->cFrees++; + pHeap->Stat.cFrees++; + pHdr->pStat->cbFreed += pHdr->cbSize; + pHeap->Stat.cbFreed += pHdr->cbSize; + pHdr->pStat->cbCurAllocated -= pHdr->cbSize; + pHeap->Stat.cbCurAllocated -= pHdr->cbSize; +#endif + + /* + * Unlink it. + */ + if (pHdr->pPrev) + pHdr->pPrev->pNext = pHdr->pNext; + else + pHeap->pHead = pHdr->pNext; + + if (pHdr->pNext) + pHdr->pNext->pPrev = pHdr->pPrev; + else + pHeap->pTail = pHdr->pPrev; + + RTCritSectLeave(&pHeap->Lock); + + /* + * Free the memory. + */ + RTMemFree(pHdr); +} + diff --git a/src/VBox/VMM/VMMR3/MMHyper.cpp b/src/VBox/VMM/VMMR3/MMHyper.cpp new file mode 100644 index 00000000..f59e8da1 --- /dev/null +++ b/src/VBox/VMM/VMMR3/MMHyper.cpp @@ -0,0 +1,1509 @@ +/* $Id: MMHyper.cpp $ */ +/** @file + * MM - Memory Manager - Hypervisor Memory Area. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_MM_HYPER +#include +#include +#include +#include +#include "MMInternal.h" +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static DECLCALLBACK(bool) mmR3HyperRelocateCallback(PVM pVM, RTGCPTR GCPtrOld, RTGCPTR GCPtrNew, PGMRELOCATECALL enmMode, + void *pvUser); +static int mmR3HyperMap(PVM pVM, const size_t cb, const char *pszDesc, PRTGCPTR pGCPtr, PMMLOOKUPHYPER *ppLookup); +static int mmR3HyperHeapCreate(PVM pVM, const size_t cb, PMMHYPERHEAP *ppHeap, PRTR0PTR pR0PtrHeap); +static int mmR3HyperHeapMap(PVM pVM, PMMHYPERHEAP pHeap, PRTGCPTR ppHeapGC); +static DECLCALLBACK(void) mmR3HyperInfoHma(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); + + +/** + * Determin the default heap size. + * + * @returns The heap size in bytes. + * @param pVM The cross context VM structure. + */ +static uint32_t mmR3HyperComputeHeapSize(PVM pVM) +{ + /* + * Gather parameters. + */ + bool fCanUseLargerHeap; + int rc = CFGMR3QueryBoolDef(CFGMR3GetChild(CFGMR3GetRoot(pVM), "MM"), "CanUseLargerHeap", &fCanUseLargerHeap, false); + AssertStmt(RT_SUCCESS(rc), fCanUseLargerHeap = false); + + uint64_t cbRam; + rc = CFGMR3QueryU64(CFGMR3GetRoot(pVM), "RamSize", &cbRam); + AssertStmt(RT_SUCCESS(rc), cbRam = _1G); + + /* + * We need to keep saved state compatibility if raw-mode is an option, + * so lets filter out that case first. + */ + if ( !fCanUseLargerHeap + && VM_IS_RAW_MODE_ENABLED(pVM) + && cbRam < 16*_1G64) + return 1280 * _1K; + + /* + * Calculate the heap size. + */ + uint32_t cbHeap = _1M; + + /* The newer chipset may have more devices attached, putting additional + pressure on the heap. */ + if (fCanUseLargerHeap) + cbHeap += _1M; + + /* More CPUs means some extra memory usage. */ + if (pVM->cCpus > 1) + cbHeap += pVM->cCpus * _64K; + + /* Lots of memory means extra memory consumption as well (pool). */ + if (cbRam > 16*_1G64) + cbHeap += _2M; /** @todo figure out extactly how much */ + + return RT_ALIGN(cbHeap, _256K); +} + + +/** + * Initializes the hypervisor related MM stuff without + * calling down to PGM. + * + * PGM is not initialized at this point, PGM relies on + * the heap to initialize. + * + * @returns VBox status code. + */ +int mmR3HyperInit(PVM pVM) +{ + LogFlow(("mmR3HyperInit:\n")); + + /* + * Decide Hypervisor mapping in the guest context + * And setup various hypervisor area and heap parameters. + */ + pVM->mm.s.pvHyperAreaGC = (RTGCPTR)MM_HYPER_AREA_ADDRESS; + pVM->mm.s.cbHyperArea = MM_HYPER_AREA_MAX_SIZE; + AssertRelease(RT_ALIGN_T(pVM->mm.s.pvHyperAreaGC, 1 << X86_PD_SHIFT, RTGCPTR) == pVM->mm.s.pvHyperAreaGC); + Assert(pVM->mm.s.pvHyperAreaGC < 0xff000000); + + /** @todo @bugref{1865}, @bugref{3202}: Change the cbHyperHeap default + * depending on whether VT-x/AMD-V is enabled or not! Don't waste + * precious kernel space on heap for the PATM. + */ + PCFGMNODE pMM = CFGMR3GetChild(CFGMR3GetRoot(pVM), "MM"); + uint32_t cbHyperHeap; + int rc = CFGMR3QueryU32Def(pMM, "cbHyperHeap", &cbHyperHeap, mmR3HyperComputeHeapSize(pVM)); + AssertLogRelRCReturn(rc, rc); + + cbHyperHeap = RT_ALIGN_32(cbHyperHeap, PAGE_SIZE); + LogRel(("MM: cbHyperHeap=%#x (%u)\n", cbHyperHeap, cbHyperHeap)); + + /* + * Allocate the hypervisor heap. + * + * (This must be done before we start adding memory to the + * hypervisor static area because lookup records are allocated from it.) + */ + rc = mmR3HyperHeapCreate(pVM, cbHyperHeap, &pVM->mm.s.pHyperHeapR3, &pVM->mm.s.pHyperHeapR0); + if (RT_SUCCESS(rc)) + { + /* + * Make a small head fence to fend of accidental sequential access. + */ + MMR3HyperReserve(pVM, PAGE_SIZE, "fence", NULL); + + /* + * Map the VM structure into the hypervisor space. + */ + AssertRelease(pVM->cbSelf == RT_UOFFSETOF_DYN(VM, aCpus[pVM->cCpus])); + RTGCPTR GCPtr; + rc = MMR3HyperMapPages(pVM, pVM, pVM->pVMR0, RT_ALIGN_Z(pVM->cbSelf, PAGE_SIZE) >> PAGE_SHIFT, pVM->paVMPagesR3, "VM", + &GCPtr); + if (RT_SUCCESS(rc)) + { + pVM->pVMRC = (RTRCPTR)GCPtr; + for (VMCPUID i = 0; i < pVM->cCpus; i++) + pVM->aCpus[i].pVMRC = pVM->pVMRC; + + /* Reserve a page for fencing. */ + MMR3HyperReserve(pVM, PAGE_SIZE, "fence", NULL); + + /* + * Map the heap into the hypervisor space. + */ + rc = mmR3HyperHeapMap(pVM, pVM->mm.s.pHyperHeapR3, &GCPtr); + if (RT_SUCCESS(rc)) + { + pVM->mm.s.pHyperHeapRC = (RTRCPTR)GCPtr; + Assert(pVM->mm.s.pHyperHeapRC == GCPtr); + + /* + * Register info handlers. + */ + DBGFR3InfoRegisterInternal(pVM, "hma", "Show the layout of the Hypervisor Memory Area.", mmR3HyperInfoHma); + + LogFlow(("mmR3HyperInit: returns VINF_SUCCESS\n")); + return VINF_SUCCESS; + } + /* Caller will do proper cleanup. */ + } + } + + LogFlow(("mmR3HyperInit: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Cleans up the hypervisor heap. + * + * @returns VBox status code. + */ +int mmR3HyperTerm(PVM pVM) +{ + if (pVM->mm.s.pHyperHeapR3) + PDMR3CritSectDelete(&pVM->mm.s.pHyperHeapR3->Lock); + + return VINF_SUCCESS; +} + + +/** + * Finalizes the HMA mapping. + * + * This is called later during init, most (all) HMA allocations should be done + * by the time this function is called. + * + * @returns VBox status code. + */ +VMMR3DECL(int) MMR3HyperInitFinalize(PVM pVM) +{ + LogFlow(("MMR3HyperInitFinalize:\n")); + + /* + * Initialize the hyper heap critical section. + */ + int rc = PDMR3CritSectInit(pVM, &pVM->mm.s.pHyperHeapR3->Lock, RT_SRC_POS, "MM-HYPER"); + AssertRC(rc); + + /* + * Adjust and create the HMA mapping. + */ + while ((RTINT)pVM->mm.s.offHyperNextStatic + 64*_1K < (RTINT)pVM->mm.s.cbHyperArea - _4M) + pVM->mm.s.cbHyperArea -= _4M; + rc = PGMR3MapPT(pVM, pVM->mm.s.pvHyperAreaGC, pVM->mm.s.cbHyperArea, 0 /*fFlags*/, + mmR3HyperRelocateCallback, NULL, "Hypervisor Memory Area"); + if (RT_FAILURE(rc)) + return rc; + pVM->mm.s.fPGMInitialized = true; + + /* + * Do all the delayed mappings. + */ + PMMLOOKUPHYPER pLookup = (PMMLOOKUPHYPER)((uintptr_t)pVM->mm.s.pHyperHeapR3 + pVM->mm.s.offLookupHyper); + for (;;) + { + RTGCPTR GCPtr = pVM->mm.s.pvHyperAreaGC + pLookup->off; + uint32_t cPages = pLookup->cb >> PAGE_SHIFT; + switch (pLookup->enmType) + { + case MMLOOKUPHYPERTYPE_LOCKED: + { + PCRTHCPHYS paHCPhysPages = pLookup->u.Locked.paHCPhysPages; + for (uint32_t i = 0; i < cPages; i++) + { + rc = PGMMap(pVM, GCPtr + (i << PAGE_SHIFT), paHCPhysPages[i], PAGE_SIZE, 0); + AssertRCReturn(rc, rc); + } + break; + } + + case MMLOOKUPHYPERTYPE_HCPHYS: + rc = PGMMap(pVM, GCPtr, pLookup->u.HCPhys.HCPhys, pLookup->cb, 0); + break; + + case MMLOOKUPHYPERTYPE_GCPHYS: + { + const RTGCPHYS GCPhys = pLookup->u.GCPhys.GCPhys; + const uint32_t cb = pLookup->cb; + for (uint32_t off = 0; off < cb; off += PAGE_SIZE) + { + RTHCPHYS HCPhys; + rc = PGMPhysGCPhys2HCPhys(pVM, GCPhys + off, &HCPhys); + if (RT_FAILURE(rc)) + break; + rc = PGMMap(pVM, GCPtr + off, HCPhys, PAGE_SIZE, 0); + if (RT_FAILURE(rc)) + break; + } + break; + } + + case MMLOOKUPHYPERTYPE_MMIO2: + { + const RTGCPHYS offEnd = pLookup->u.MMIO2.off + pLookup->cb; + for (RTGCPHYS offCur = pLookup->u.MMIO2.off; offCur < offEnd; offCur += PAGE_SIZE) + { + RTHCPHYS HCPhys; + rc = PGMR3PhysMMIO2GetHCPhys(pVM, pLookup->u.MMIO2.pDevIns, pLookup->u.MMIO2.iSubDev, + pLookup->u.MMIO2.iRegion, offCur, &HCPhys); + if (RT_FAILURE(rc)) + break; + rc = PGMMap(pVM, GCPtr + (offCur - pLookup->u.MMIO2.off), HCPhys, PAGE_SIZE, 0); + if (RT_FAILURE(rc)) + break; + } + break; + } + + case MMLOOKUPHYPERTYPE_DYNAMIC: + /* do nothing here since these are either fences or managed by someone else using PGM. */ + break; + + default: + AssertMsgFailed(("enmType=%d\n", pLookup->enmType)); + break; + } + + if (RT_FAILURE(rc)) + { + AssertMsgFailed(("rc=%Rrc cb=%d off=%#RX32 enmType=%d pszDesc=%s\n", + rc, pLookup->cb, pLookup->off, pLookup->enmType, pLookup->pszDesc)); + return rc; + } + + /* next */ + if (pLookup->offNext == (int32_t)NIL_OFFSET) + break; + pLookup = (PMMLOOKUPHYPER)((uintptr_t)pLookup + pLookup->offNext); + } + + LogFlow(("MMR3HyperInitFinalize: returns VINF_SUCCESS\n")); + return VINF_SUCCESS; +} + + +/** + * Callback function which will be called when PGM is trying to find a new + * location for the mapping. + * + * The callback is called in two modes, 1) the check mode and 2) the relocate mode. + * In 1) the callback should say if it objects to a suggested new location. If it + * accepts the new location, it is called again for doing it's relocation. + * + * + * @returns true if the location is ok. + * @returns false if another location should be found. + * @param pVM The cross context VM structure. + * @param GCPtrOld The old virtual address. + * @param GCPtrNew The new virtual address. + * @param enmMode Used to indicate the callback mode. + * @param pvUser User argument. Ignored. + * @remark The return value is no a failure indicator, it's an acceptance + * indicator. Relocation can not fail! + */ +static DECLCALLBACK(bool) mmR3HyperRelocateCallback(PVM pVM, RTGCPTR GCPtrOld, RTGCPTR GCPtrNew, + PGMRELOCATECALL enmMode, void *pvUser) +{ + NOREF(pvUser); + switch (enmMode) + { + /* + * Verify location - all locations are good for us. + */ + case PGMRELOCATECALL_SUGGEST: + return true; + + /* + * Execute the relocation. + */ + case PGMRELOCATECALL_RELOCATE: + { + /* + * Accepted! + */ + AssertMsg(GCPtrOld == pVM->mm.s.pvHyperAreaGC, + ("GCPtrOld=%RGv pVM->mm.s.pvHyperAreaGC=%RGv\n", GCPtrOld, pVM->mm.s.pvHyperAreaGC)); + Log(("Relocating the hypervisor from %RGv to %RGv\n", GCPtrOld, GCPtrNew)); + + /* + * Relocate the VM structure and ourselves. + */ + RTGCINTPTR offDelta = GCPtrNew - GCPtrOld; + pVM->pVMRC += offDelta; + for (VMCPUID i = 0; i < pVM->cCpus; i++) + pVM->aCpus[i].pVMRC = pVM->pVMRC; + + pVM->mm.s.pvHyperAreaGC += offDelta; + Assert(pVM->mm.s.pvHyperAreaGC < _4G); + pVM->mm.s.pHyperHeapRC += offDelta; + pVM->mm.s.pHyperHeapR3->pbHeapRC += offDelta; + pVM->mm.s.pHyperHeapR3->pVMRC = pVM->pVMRC; + + /* + * Relocate the rest. + */ + VMR3Relocate(pVM, offDelta); + return true; + } + + default: + AssertMsgFailed(("Invalid relocation mode %d\n", enmMode)); + } + + return false; +} + +/** + * Service a VMMCALLRING3_MMHYPER_LOCK call. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(int) MMR3LockCall(PVM pVM) +{ + PMMHYPERHEAP pHeap = pVM->mm.s.CTX_SUFF(pHyperHeap); + + int rc = PDMR3CritSectEnterEx(&pHeap->Lock, true /* fHostCall */); + AssertRC(rc); + return rc; +} + +/** + * Maps contiguous HC physical memory into the hypervisor region in the GC. + * + * @return VBox status code. + * + * @param pVM The cross context VM structure. + * @param pvR3 Ring-3 address of the memory. Must be page aligned! + * @param pvR0 Optional ring-0 address of the memory. + * @param HCPhys Host context physical address of the memory to be + * mapped. Must be page aligned! + * @param cb Size of the memory. Will be rounded up to nearest page. + * @param pszDesc Description. + * @param pGCPtr Where to store the GC address. + */ +VMMR3DECL(int) MMR3HyperMapHCPhys(PVM pVM, void *pvR3, RTR0PTR pvR0, RTHCPHYS HCPhys, size_t cb, + const char *pszDesc, PRTGCPTR pGCPtr) +{ + LogFlow(("MMR3HyperMapHCPhys: pvR3=%p pvR0=%p HCPhys=%RHp cb=%d pszDesc=%p:{%s} pGCPtr=%p\n", + pvR3, pvR0, HCPhys, (int)cb, pszDesc, pszDesc, pGCPtr)); + + /* + * Validate input. + */ + AssertReturn(RT_ALIGN_P(pvR3, PAGE_SIZE) == pvR3, VERR_INVALID_PARAMETER); + AssertReturn(RT_ALIGN_T(pvR0, PAGE_SIZE, RTR0PTR) == pvR0, VERR_INVALID_PARAMETER); + AssertReturn(RT_ALIGN_T(HCPhys, PAGE_SIZE, RTHCPHYS) == HCPhys, VERR_INVALID_PARAMETER); + AssertReturn(pszDesc && *pszDesc, VERR_INVALID_PARAMETER); + + /* + * Add the memory to the hypervisor area. + */ + uint32_t cbAligned = RT_ALIGN_32(cb, PAGE_SIZE); + AssertReturn(cbAligned >= cb, VERR_INVALID_PARAMETER); + RTGCPTR GCPtr; + PMMLOOKUPHYPER pLookup; + int rc = mmR3HyperMap(pVM, cbAligned, pszDesc, &GCPtr, &pLookup); + if (RT_SUCCESS(rc)) + { + pLookup->enmType = MMLOOKUPHYPERTYPE_HCPHYS; + pLookup->u.HCPhys.pvR3 = pvR3; + pLookup->u.HCPhys.pvR0 = pvR0; + pLookup->u.HCPhys.HCPhys = HCPhys; + + /* + * Update the page table. + */ + if (pVM->mm.s.fPGMInitialized) + rc = PGMMap(pVM, GCPtr, HCPhys, cbAligned, 0); + if (RT_SUCCESS(rc)) + *pGCPtr = GCPtr; + } + return rc; +} + + +/** + * Maps contiguous GC physical memory into the hypervisor region in the GC. + * + * @return VBox status code. + * + * @param pVM The cross context VM structure. + * @param GCPhys Guest context physical address of the memory to be mapped. Must be page aligned! + * @param cb Size of the memory. Will be rounded up to nearest page. + * @param pszDesc Mapping description. + * @param pGCPtr Where to store the GC address. + */ +VMMR3DECL(int) MMR3HyperMapGCPhys(PVM pVM, RTGCPHYS GCPhys, size_t cb, const char *pszDesc, PRTGCPTR pGCPtr) +{ + LogFlow(("MMR3HyperMapGCPhys: GCPhys=%RGp cb=%d pszDesc=%p:{%s} pGCPtr=%p\n", GCPhys, (int)cb, pszDesc, pszDesc, pGCPtr)); + + /* + * Validate input. + */ + AssertReturn(RT_ALIGN_T(GCPhys, PAGE_SIZE, RTGCPHYS) == GCPhys, VERR_INVALID_PARAMETER); + AssertReturn(pszDesc && *pszDesc, VERR_INVALID_PARAMETER); + + /* + * Add the memory to the hypervisor area. + */ + cb = RT_ALIGN_Z(cb, PAGE_SIZE); + RTGCPTR GCPtr; + PMMLOOKUPHYPER pLookup; + int rc = mmR3HyperMap(pVM, cb, pszDesc, &GCPtr, &pLookup); + if (RT_SUCCESS(rc)) + { + pLookup->enmType = MMLOOKUPHYPERTYPE_GCPHYS; + pLookup->u.GCPhys.GCPhys = GCPhys; + + /* + * Update the page table. + */ + for (unsigned off = 0; off < cb; off += PAGE_SIZE) + { + RTHCPHYS HCPhys; + rc = PGMPhysGCPhys2HCPhys(pVM, GCPhys + off, &HCPhys); + AssertRC(rc); + if (RT_FAILURE(rc)) + { + AssertMsgFailed(("rc=%Rrc GCPhys=%RGp off=%#x %s\n", rc, GCPhys, off, pszDesc)); + break; + } + if (pVM->mm.s.fPGMInitialized) + { + rc = PGMMap(pVM, GCPtr + off, HCPhys, PAGE_SIZE, 0); + AssertRC(rc); + if (RT_FAILURE(rc)) + { + AssertMsgFailed(("rc=%Rrc GCPhys=%RGp off=%#x %s\n", rc, GCPhys, off, pszDesc)); + break; + } + } + } + + if (RT_SUCCESS(rc) && pGCPtr) + *pGCPtr = GCPtr; + } + return rc; +} + + +/** + * Maps a portion of an MMIO2 region into the hypervisor region. + * + * Callers of this API must never deregister the MMIO2 region before the + * VM is powered off. If this becomes a requirement MMR3HyperUnmapMMIO2 + * API will be needed to perform cleanups. + * + * @return VBox status code. + * + * @param pVM The cross context VM structure. + * @param pDevIns The device owning the MMIO2 memory. + * @param iSubDev The sub-device number. + * @param iRegion The region. + * @param off The offset into the region. Will be rounded down to closest page boundary. + * @param cb The number of bytes to map. Will be rounded up to the closest page boundary. + * @param pszDesc Mapping description. + * @param pRCPtr Where to store the RC address. + */ +VMMR3DECL(int) MMR3HyperMapMMIO2(PVM pVM, PPDMDEVINS pDevIns, uint32_t iSubDev, uint32_t iRegion, RTGCPHYS off, RTGCPHYS cb, + const char *pszDesc, PRTRCPTR pRCPtr) +{ + LogFlow(("MMR3HyperMapMMIO2: pDevIns=%p iSubDev=%#x iRegion=%#x off=%RGp cb=%RGp pszDesc=%p:{%s} pRCPtr=%p\n", + pDevIns, iSubDev, iRegion, off, cb, pszDesc, pszDesc, pRCPtr)); + int rc; + + /* + * Validate input. + */ + AssertReturn(pszDesc && *pszDesc, VERR_INVALID_PARAMETER); + AssertReturn(off + cb > off, VERR_INVALID_PARAMETER); + uint32_t const offPage = off & PAGE_OFFSET_MASK; + off &= ~(RTGCPHYS)PAGE_OFFSET_MASK; + cb += offPage; + cb = RT_ALIGN_Z(cb, PAGE_SIZE); + const RTGCPHYS offEnd = off + cb; + AssertReturn(offEnd > off, VERR_INVALID_PARAMETER); + for (RTGCPHYS offCur = off; offCur < offEnd; offCur += PAGE_SIZE) + { + RTHCPHYS HCPhys; + rc = PGMR3PhysMMIO2GetHCPhys(pVM, pDevIns, iSubDev, iRegion, offCur, &HCPhys); + AssertMsgRCReturn(rc, ("rc=%Rrc - iSubDev=%#x iRegion=%#x off=%RGp\n", rc, iSubDev, iRegion, off), rc); + } + + /* + * Add the memory to the hypervisor area. + */ + RTGCPTR GCPtr; + PMMLOOKUPHYPER pLookup; + rc = mmR3HyperMap(pVM, cb, pszDesc, &GCPtr, &pLookup); + if (RT_SUCCESS(rc)) + { + pLookup->enmType = MMLOOKUPHYPERTYPE_MMIO2; + pLookup->u.MMIO2.pDevIns = pDevIns; + pLookup->u.MMIO2.iSubDev = iSubDev; + pLookup->u.MMIO2.iRegion = iRegion; + pLookup->u.MMIO2.off = off; + + /* + * Update the page table. + */ + if (pVM->mm.s.fPGMInitialized) + { + for (RTGCPHYS offCur = off; offCur < offEnd; offCur += PAGE_SIZE) + { + RTHCPHYS HCPhys; + rc = PGMR3PhysMMIO2GetHCPhys(pVM, pDevIns, iSubDev, iRegion, offCur, &HCPhys); + AssertRCReturn(rc, rc); + rc = PGMMap(pVM, GCPtr + (offCur - off), HCPhys, PAGE_SIZE, 0); + if (RT_FAILURE(rc)) + { + AssertMsgFailed(("rc=%Rrc offCur=%RGp %s\n", rc, offCur, pszDesc)); + break; + } + } + } + + if (RT_SUCCESS(rc)) + { + GCPtr |= offPage; + *pRCPtr = GCPtr; + AssertLogRelReturn(*pRCPtr == GCPtr, VERR_INTERNAL_ERROR); + } + } + return rc; +} + + +/** + * Maps locked R3 virtual memory into the hypervisor region in the GC. + * + * @return VBox status code. + * + * @param pVM The cross context VM structure. + * @param pvR3 The ring-3 address of the memory, must be page aligned. + * @param pvR0 The ring-0 address of the memory, must be page aligned. (optional) + * @param cPages The number of pages. + * @param paPages The page descriptors. + * @param pszDesc Mapping description. + * @param pGCPtr Where to store the GC address corresponding to pvR3. + */ +VMMR3DECL(int) MMR3HyperMapPages(PVM pVM, void *pvR3, RTR0PTR pvR0, size_t cPages, PCSUPPAGE paPages, + const char *pszDesc, PRTGCPTR pGCPtr) +{ + LogFlow(("MMR3HyperMapPages: pvR3=%p pvR0=%p cPages=%zu paPages=%p pszDesc=%p:{%s} pGCPtr=%p\n", + pvR3, pvR0, cPages, paPages, pszDesc, pszDesc, pGCPtr)); + + /* + * Validate input. + */ + AssertPtrReturn(pvR3, VERR_INVALID_POINTER); + AssertPtrReturn(paPages, VERR_INVALID_POINTER); + AssertReturn(cPages > 0, VERR_PAGE_COUNT_OUT_OF_RANGE); + AssertReturn(cPages <= VBOX_MAX_ALLOC_PAGE_COUNT, VERR_PAGE_COUNT_OUT_OF_RANGE); + AssertPtrReturn(pszDesc, VERR_INVALID_POINTER); + AssertReturn(*pszDesc, VERR_INVALID_PARAMETER); + AssertPtrReturn(pGCPtr, VERR_INVALID_PARAMETER); + + /* + * Add the memory to the hypervisor area. + */ + RTGCPTR GCPtr; + PMMLOOKUPHYPER pLookup; + int rc = mmR3HyperMap(pVM, cPages << PAGE_SHIFT, pszDesc, &GCPtr, &pLookup); + if (RT_SUCCESS(rc)) + { + /* + * Copy the physical page addresses and tell PGM about them. + */ + PRTHCPHYS paHCPhysPages = (PRTHCPHYS)MMR3HeapAlloc(pVM, MM_TAG_MM, sizeof(RTHCPHYS) * cPages); + if (paHCPhysPages) + { + for (size_t i = 0; i < cPages; i++) + { + AssertReleaseMsgReturn( paPages[i].Phys != 0 + && paPages[i].Phys != NIL_RTHCPHYS + && !(paPages[i].Phys & PAGE_OFFSET_MASK), + ("i=%#zx Phys=%RHp %s\n", i, paPages[i].Phys, pszDesc), + VERR_INTERNAL_ERROR); + paHCPhysPages[i] = paPages[i].Phys; + } + + if (pVM->mm.s.fPGMInitialized) + { + for (size_t i = 0; i < cPages; i++) + { + rc = PGMMap(pVM, GCPtr + (i << PAGE_SHIFT), paHCPhysPages[i], PAGE_SIZE, 0); + AssertRCBreak(rc); + } + } + if (RT_SUCCESS(rc)) + { + pLookup->enmType = MMLOOKUPHYPERTYPE_LOCKED; + pLookup->u.Locked.pvR3 = pvR3; + pLookup->u.Locked.pvR0 = pvR0; + pLookup->u.Locked.paHCPhysPages = paHCPhysPages; + + /* done. */ + *pGCPtr = GCPtr; + return rc; + } + /* Don't care about failure clean, we're screwed if this fails anyway. */ + } + } + + return rc; +} + + +/** + * Reserves a hypervisor memory area. + * Most frequent usage is fence pages and dynamically mappings like the guest PD and PDPT. + * + * @return VBox status code. + * + * @param pVM The cross context VM structure. + * @param cb Size of the memory. Will be rounded up to nearest page. + * @param pszDesc Mapping description. + * @param pGCPtr Where to store the assigned GC address. Optional. + */ +VMMR3DECL(int) MMR3HyperReserve(PVM pVM, unsigned cb, const char *pszDesc, PRTGCPTR pGCPtr) +{ + LogFlow(("MMR3HyperMapHCRam: cb=%d pszDesc=%p:{%s} pGCPtr=%p\n", (int)cb, pszDesc, pszDesc, pGCPtr)); + + /* + * Validate input. + */ + if ( cb <= 0 + || !pszDesc + || !*pszDesc) + { + AssertMsgFailed(("Invalid parameter\n")); + return VERR_INVALID_PARAMETER; + } + + /* + * Add the memory to the hypervisor area. + */ + RTGCPTR GCPtr; + PMMLOOKUPHYPER pLookup; + int rc = mmR3HyperMap(pVM, cb, pszDesc, &GCPtr, &pLookup); + if (RT_SUCCESS(rc)) + { + pLookup->enmType = MMLOOKUPHYPERTYPE_DYNAMIC; + if (pGCPtr) + *pGCPtr = GCPtr; + return VINF_SUCCESS; + } + return rc; +} + + +/** + * Adds memory to the hypervisor memory arena. + * + * @return VBox status code. + * @param pVM The cross context VM structure. + * @param cb Size of the memory. Will be rounded up to nearest page. + * @param pszDesc The description of the memory. + * @param pGCPtr Where to store the GC address. + * @param ppLookup Where to store the pointer to the lookup record. + * @remark We assume the threading structure of VBox imposes natural + * serialization of most functions, this one included. + */ +static int mmR3HyperMap(PVM pVM, const size_t cb, const char *pszDesc, PRTGCPTR pGCPtr, PMMLOOKUPHYPER *ppLookup) +{ + /* + * Validate input. + */ + const uint32_t cbAligned = RT_ALIGN_32(cb, PAGE_SIZE); + AssertReturn(cbAligned >= cb, VERR_INVALID_PARAMETER); + if (pVM->mm.s.offHyperNextStatic + cbAligned >= pVM->mm.s.cbHyperArea) /* don't use the last page, it's a fence. */ + { + AssertMsgFailed(("Out of static mapping space in the HMA! offHyperAreaGC=%x cbAligned=%x cbHyperArea=%x\n", + pVM->mm.s.offHyperNextStatic, cbAligned, pVM->mm.s.cbHyperArea)); + return VERR_NO_MEMORY; + } + + /* + * Allocate lookup record. + */ + PMMLOOKUPHYPER pLookup; + int rc = MMHyperAlloc(pVM, sizeof(*pLookup), 1, MM_TAG_MM, (void **)&pLookup); + if (RT_SUCCESS(rc)) + { + /* + * Initialize it and insert it. + */ + pLookup->offNext = pVM->mm.s.offLookupHyper; + pLookup->cb = cbAligned; + pLookup->off = pVM->mm.s.offHyperNextStatic; + pVM->mm.s.offLookupHyper = (uint8_t *)pLookup - (uint8_t *)pVM->mm.s.pHyperHeapR3; + if (pLookup->offNext != (int32_t)NIL_OFFSET) + pLookup->offNext -= pVM->mm.s.offLookupHyper; + pLookup->enmType = MMLOOKUPHYPERTYPE_INVALID; + memset(&pLookup->u, 0xff, sizeof(pLookup->u)); + pLookup->pszDesc = pszDesc; + + /* Mapping. */ + *pGCPtr = pVM->mm.s.pvHyperAreaGC + pVM->mm.s.offHyperNextStatic; + pVM->mm.s.offHyperNextStatic += cbAligned; + + /* Return pointer. */ + *ppLookup = pLookup; + } + + AssertRC(rc); + LogFlow(("mmR3HyperMap: returns %Rrc *pGCPtr=%RGv\n", rc, *pGCPtr)); + return rc; +} + + +/** + * Allocates a new heap. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param cb The size of the new heap. + * @param ppHeap Where to store the heap pointer on successful return. + * @param pR0PtrHeap Where to store the ring-0 address of the heap on + * success. + */ +static int mmR3HyperHeapCreate(PVM pVM, const size_t cb, PMMHYPERHEAP *ppHeap, PRTR0PTR pR0PtrHeap) +{ + /* + * Allocate the hypervisor heap. + */ + const uint32_t cbAligned = RT_ALIGN_32(cb, PAGE_SIZE); + AssertReturn(cbAligned >= cb, VERR_INVALID_PARAMETER); + uint32_t const cPages = cbAligned >> PAGE_SHIFT; + PSUPPAGE paPages = (PSUPPAGE)MMR3HeapAlloc(pVM, MM_TAG_MM, cPages * sizeof(paPages[0])); + if (!paPages) + return VERR_NO_MEMORY; + void *pv; + RTR0PTR pvR0 = NIL_RTR0PTR; + int rc = SUPR3PageAllocEx(cPages, + 0 /*fFlags*/, + &pv, +#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE) || defined(VBOX_WITH_MORE_RING0_MEM_MAPPINGS) + &pvR0, +#else + NULL, +#endif + paPages); + if (RT_SUCCESS(rc)) + { +#if !defined(VBOX_WITH_2X_4GB_ADDR_SPACE) && !defined(VBOX_WITH_MORE_RING0_MEM_MAPPINGS) + pvR0 = (uintptr_t)pv; +#endif + memset(pv, 0, cbAligned); + + /* + * Initialize the heap and first free chunk. + */ + PMMHYPERHEAP pHeap = (PMMHYPERHEAP)pv; + pHeap->u32Magic = MMHYPERHEAP_MAGIC; + pHeap->pbHeapR3 = (uint8_t *)pHeap + MMYPERHEAP_HDR_SIZE; + pHeap->pbHeapR0 = pvR0 != NIL_RTR0PTR ? pvR0 + MMYPERHEAP_HDR_SIZE : NIL_RTR0PTR; + //pHeap->pbHeapRC = 0; // set by mmR3HyperHeapMap() + pHeap->pVMR3 = pVM; + pHeap->pVMR0 = pVM->pVMR0; + pHeap->pVMRC = pVM->pVMRC; + pHeap->cbHeap = cbAligned - MMYPERHEAP_HDR_SIZE; + pHeap->cbFree = pHeap->cbHeap - sizeof(MMHYPERCHUNK); + //pHeap->offFreeHead = 0; + //pHeap->offFreeTail = 0; + pHeap->offPageAligned = pHeap->cbHeap; + //pHeap->HyperHeapStatTree = 0; + pHeap->paPages = paPages; + + PMMHYPERCHUNKFREE pFree = (PMMHYPERCHUNKFREE)pHeap->pbHeapR3; + pFree->cb = pHeap->cbFree; + //pFree->core.offNext = 0; + MMHYPERCHUNK_SET_TYPE(&pFree->core, MMHYPERCHUNK_FLAGS_FREE); + pFree->core.offHeap = -(int32_t)MMYPERHEAP_HDR_SIZE; + //pFree->offNext = 0; + //pFree->offPrev = 0; + + STAMR3Register(pVM, &pHeap->cbHeap, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, "/MM/HyperHeap/cbHeap", STAMUNIT_BYTES, "The heap size."); + STAMR3Register(pVM, &pHeap->cbFree, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, "/MM/HyperHeap/cbFree", STAMUNIT_BYTES, "The free space."); + + *ppHeap = pHeap; + *pR0PtrHeap = pvR0; + return VINF_SUCCESS; + } + AssertMsgFailed(("SUPR3PageAllocEx(%d,,,,) -> %Rrc\n", cbAligned >> PAGE_SHIFT, rc)); + + *ppHeap = NULL; + return rc; +} + +/** + * Allocates a new heap. + */ +static int mmR3HyperHeapMap(PVM pVM, PMMHYPERHEAP pHeap, PRTGCPTR ppHeapGC) +{ + Assert(RT_ALIGN_Z(pHeap->cbHeap + MMYPERHEAP_HDR_SIZE, PAGE_SIZE) == pHeap->cbHeap + MMYPERHEAP_HDR_SIZE); + Assert(pHeap->paPages); + int rc = MMR3HyperMapPages(pVM, + pHeap, + pHeap->pbHeapR0 != NIL_RTR0PTR ? pHeap->pbHeapR0 - MMYPERHEAP_HDR_SIZE : NIL_RTR0PTR, + (pHeap->cbHeap + MMYPERHEAP_HDR_SIZE) >> PAGE_SHIFT, + pHeap->paPages, + "Heap", ppHeapGC); + if (RT_SUCCESS(rc)) + { + pHeap->pVMRC = pVM->pVMRC; + pHeap->pbHeapRC = *ppHeapGC + MMYPERHEAP_HDR_SIZE; + /* Reserve a page for fencing. */ + MMR3HyperReserve(pVM, PAGE_SIZE, "fence", NULL); + + /* We won't need these any more. */ + MMR3HeapFree(pHeap->paPages); + pHeap->paPages = NULL; + } + return rc; +} + + +/** + * Allocates memory in the Hypervisor (GC VMM) area which never will + * be freed and doesn't have any offset based relation to other heap blocks. + * + * The latter means that two blocks allocated by this API will not have the + * same relative position to each other in GC and HC. In short, never use + * this API for allocating nodes for an offset based AVL tree! + * + * The returned memory is of course zeroed. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param cb Number of bytes to allocate. + * @param uAlignment Required memory alignment in bytes. + * Values are 0,8,16,32 and PAGE_SIZE. + * 0 -> default alignment, i.e. 8 bytes. + * @param enmTag The statistics tag. + * @param ppv Where to store the address to the allocated + * memory. + * @remark This is assumed not to be used at times when serialization is required. + */ +VMMR3DECL(int) MMR3HyperAllocOnceNoRel(PVM pVM, size_t cb, unsigned uAlignment, MMTAG enmTag, void **ppv) +{ + return MMR3HyperAllocOnceNoRelEx(pVM, cb, uAlignment, enmTag, 0/*fFlags*/, ppv); +} + + +/** + * Allocates memory in the Hypervisor (GC VMM) area which never will + * be freed and doesn't have any offset based relation to other heap blocks. + * + * The latter means that two blocks allocated by this API will not have the + * same relative position to each other in GC and HC. In short, never use + * this API for allocating nodes for an offset based AVL tree! + * + * The returned memory is of course zeroed. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param cb Number of bytes to allocate. + * @param uAlignment Required memory alignment in bytes. + * Values are 0,8,16,32 and PAGE_SIZE. + * 0 -> default alignment, i.e. 8 bytes. + * @param enmTag The statistics tag. + * @param fFlags Flags, see MMHYPER_AONR_FLAGS_KERNEL_MAPPING. + * @param ppv Where to store the address to the allocated memory. + * @remark This is assumed not to be used at times when serialization is required. + */ +VMMR3DECL(int) MMR3HyperAllocOnceNoRelEx(PVM pVM, size_t cb, unsigned uAlignment, MMTAG enmTag, uint32_t fFlags, void **ppv) +{ + AssertMsg(cb >= 8, ("Hey! Do you really mean to allocate less than 8 bytes?! cb=%d\n", cb)); + Assert(!(fFlags & ~(MMHYPER_AONR_FLAGS_KERNEL_MAPPING))); + + /* + * Choose between allocating a new chunk of HMA memory + * and the heap. We will only do BIG allocations from HMA and + * only at creation time. + */ + if ( ( cb < _64K + && ( uAlignment != PAGE_SIZE + || cb < 48*_1K) + && !(fFlags & MMHYPER_AONR_FLAGS_KERNEL_MAPPING) + ) + || VMR3GetState(pVM) != VMSTATE_CREATING + ) + { + Assert(!(fFlags & MMHYPER_AONR_FLAGS_KERNEL_MAPPING)); + int rc = MMHyperAlloc(pVM, cb, uAlignment, enmTag, ppv); + if ( rc != VERR_MM_HYPER_NO_MEMORY + || cb <= 8*_1K) + { + Log2(("MMR3HyperAllocOnceNoRel: cb=%#zx uAlignment=%#x returns %Rrc and *ppv=%p\n", + cb, uAlignment, rc, *ppv)); + return rc; + } + } + +#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE + /* + * Set MMHYPER_AONR_FLAGS_KERNEL_MAPPING if we're in going to execute in ring-0. + */ + if (VM_IS_HM_OR_NEM_ENABLED(pVM)) + fFlags |= MMHYPER_AONR_FLAGS_KERNEL_MAPPING; +#endif + + /* + * Validate alignment. + */ + switch (uAlignment) + { + case 0: + case 8: + case 16: + case 32: + case PAGE_SIZE: + break; + default: + AssertMsgFailed(("Invalid alignment %u\n", uAlignment)); + return VERR_INVALID_PARAMETER; + } + + /* + * Allocate the pages and map them into HMA space. + */ + uint32_t const cbAligned = RT_ALIGN_32(cb, PAGE_SIZE); + AssertReturn(cbAligned >= cb, VERR_INVALID_PARAMETER); + uint32_t const cPages = cbAligned >> PAGE_SHIFT; + PSUPPAGE paPages = (PSUPPAGE)RTMemTmpAlloc(cPages * sizeof(paPages[0])); + if (!paPages) + return VERR_NO_TMP_MEMORY; + void *pvPages; + RTR0PTR pvR0 = NIL_RTR0PTR; + int rc = SUPR3PageAllocEx(cPages, + 0 /*fFlags*/, + &pvPages, +#ifdef VBOX_WITH_MORE_RING0_MEM_MAPPINGS + &pvR0, +#else + fFlags & MMHYPER_AONR_FLAGS_KERNEL_MAPPING ? &pvR0 : NULL, +#endif + paPages); + if (RT_SUCCESS(rc)) + { +#ifdef VBOX_WITH_MORE_RING0_MEM_MAPPINGS + Assert(pvR0 != NIL_RTR0PTR); +#else + if (!(fFlags & MMHYPER_AONR_FLAGS_KERNEL_MAPPING)) +# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE + pvR0 = NIL_RTR0PTR; +# else + pvR0 = (RTR0PTR)pvPages; +# endif +#endif + + memset(pvPages, 0, cbAligned); + + RTGCPTR GCPtr; + rc = MMR3HyperMapPages(pVM, + pvPages, + pvR0, + cPages, + paPages, + MMR3HeapAPrintf(pVM, MM_TAG_MM, "alloc once (%s)", mmGetTagName(enmTag)), + &GCPtr); + /* not needed anymore */ + RTMemTmpFree(paPages); + if (RT_SUCCESS(rc)) + { + *ppv = pvPages; + Log2(("MMR3HyperAllocOnceNoRel: cbAligned=%#x uAlignment=%#x returns VINF_SUCCESS and *ppv=%p\n", + cbAligned, uAlignment, *ppv)); + MMR3HyperReserve(pVM, PAGE_SIZE, "fence", NULL); + return rc; + } + AssertMsgFailed(("Failed to allocate %zd bytes! %Rrc\n", cbAligned, rc)); + SUPR3PageFreeEx(pvPages, cPages); + + + /* + * HACK ALERT! Try allocate it off the heap so that we don't freak + * out during vga/vmmdev mmio2 allocation with certain ram sizes. + */ + /** @todo make a proper fix for this so we will never end up in this kind of situation! */ + Log(("MMR3HyperAllocOnceNoRel: MMR3HyperMapHCRam failed with rc=%Rrc, try MMHyperAlloc(,%#x,,) instead\n", rc, cb)); + int rc2 = MMHyperAlloc(pVM, cb, uAlignment, enmTag, ppv); + if (RT_SUCCESS(rc2)) + { + Log2(("MMR3HyperAllocOnceNoRel: cb=%#x uAlignment=%#x returns %Rrc and *ppv=%p\n", + cb, uAlignment, rc, *ppv)); + return rc; + } + } + else + AssertMsgFailed(("Failed to allocate %zd bytes! %Rrc\n", cbAligned, rc)); + + if (rc == VERR_NO_MEMORY) + rc = VERR_MM_HYPER_NO_MEMORY; + LogRel(("MMR3HyperAllocOnceNoRel: cb=%#zx uAlignment=%#x returns %Rrc\n", cb, uAlignment, rc)); + return rc; +} + + +/** + * Lookus up a ring-3 pointer to HMA. + * + * @returns The lookup record on success, NULL on failure. + * @param pVM The cross context VM structure. + * @param pvR3 The ring-3 address to look up. + */ +DECLINLINE(PMMLOOKUPHYPER) mmR3HyperLookupR3(PVM pVM, void *pvR3) +{ + PMMLOOKUPHYPER pLookup = (PMMLOOKUPHYPER)((uint8_t *)pVM->mm.s.pHyperHeapR3 + pVM->mm.s.offLookupHyper); + for (;;) + { + switch (pLookup->enmType) + { + case MMLOOKUPHYPERTYPE_LOCKED: + { + unsigned off = (uint8_t *)pvR3 - (uint8_t *)pLookup->u.Locked.pvR3; + if (off < pLookup->cb) + return pLookup; + break; + } + + case MMLOOKUPHYPERTYPE_HCPHYS: + { + unsigned off = (uint8_t *)pvR3 - (uint8_t *)pLookup->u.HCPhys.pvR3; + if (off < pLookup->cb) + return pLookup; + break; + } + + case MMLOOKUPHYPERTYPE_GCPHYS: + case MMLOOKUPHYPERTYPE_MMIO2: + case MMLOOKUPHYPERTYPE_DYNAMIC: + /** @todo ? */ + break; + + default: + AssertMsgFailed(("enmType=%d\n", pLookup->enmType)); + return NULL; + } + + /* next */ + if ((unsigned)pLookup->offNext == NIL_OFFSET) + return NULL; + pLookup = (PMMLOOKUPHYPER)((uint8_t *)pLookup + pLookup->offNext); + } +} + + +/** + * Set / unset guard status on one or more hyper heap pages. + * + * @returns VBox status code (first failure). + * @param pVM The cross context VM structure. + * @param pvStart The hyper heap page address. Must be page + * aligned. + * @param cb The number of bytes. Must be page aligned. + * @param fSet Whether to set or unset guard page status. + */ +VMMR3DECL(int) MMR3HyperSetGuard(PVM pVM, void *pvStart, size_t cb, bool fSet) +{ + /* + * Validate input. + */ + AssertReturn(!((uintptr_t)pvStart & PAGE_OFFSET_MASK), VERR_INVALID_POINTER); + AssertReturn(!(cb & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER); + AssertReturn(cb <= UINT32_MAX, VERR_INVALID_PARAMETER); + PMMLOOKUPHYPER pLookup = mmR3HyperLookupR3(pVM, pvStart); + AssertReturn(pLookup, VERR_INVALID_PARAMETER); + AssertReturn(pLookup->enmType == MMLOOKUPHYPERTYPE_LOCKED, VERR_INVALID_PARAMETER); + + /* + * Get down to business. + * Note! We quietly ignore errors from the support library since the + * protection stuff isn't possible to implement on all platforms. + */ + uint8_t *pbR3 = (uint8_t *)pLookup->u.Locked.pvR3; + RTR0PTR R0Ptr = pLookup->u.Locked.pvR0 != (uintptr_t)pLookup->u.Locked.pvR3 + ? pLookup->u.Locked.pvR0 + : NIL_RTR0PTR; + uint32_t off = (uint32_t)((uint8_t *)pvStart - pbR3); + int rc; + if (fSet) + { + rc = PGMMapSetPage(pVM, MMHyperR3ToRC(pVM, pvStart), cb, 0); + SUPR3PageProtect(pbR3, R0Ptr, off, (uint32_t)cb, RTMEM_PROT_NONE); + } + else + { + rc = PGMMapSetPage(pVM, MMHyperR3ToRC(pVM, pvStart), cb, X86_PTE_P | X86_PTE_A | X86_PTE_D | X86_PTE_RW); + SUPR3PageProtect(pbR3, R0Ptr, off, (uint32_t)cb, RTMEM_PROT_READ | RTMEM_PROT_WRITE); + } + return rc; +} + + +/** + * Convert hypervisor HC virtual address to HC physical address. + * + * @returns HC physical address. + * @param pVM The cross context VM structure. + * @param pvR3 Host context virtual address. + */ +VMMR3DECL(RTHCPHYS) MMR3HyperHCVirt2HCPhys(PVM pVM, void *pvR3) +{ + PMMLOOKUPHYPER pLookup = (PMMLOOKUPHYPER)((uint8_t *)pVM->mm.s.pHyperHeapR3 + pVM->mm.s.offLookupHyper); + for (;;) + { + switch (pLookup->enmType) + { + case MMLOOKUPHYPERTYPE_LOCKED: + { + unsigned off = (uint8_t *)pvR3 - (uint8_t *)pLookup->u.Locked.pvR3; + if (off < pLookup->cb) + return pLookup->u.Locked.paHCPhysPages[off >> PAGE_SHIFT] | (off & PAGE_OFFSET_MASK); + break; + } + + case MMLOOKUPHYPERTYPE_HCPHYS: + { + unsigned off = (uint8_t *)pvR3 - (uint8_t *)pLookup->u.HCPhys.pvR3; + if (off < pLookup->cb) + return pLookup->u.HCPhys.HCPhys + off; + break; + } + + case MMLOOKUPHYPERTYPE_GCPHYS: + case MMLOOKUPHYPERTYPE_MMIO2: + case MMLOOKUPHYPERTYPE_DYNAMIC: + /* can (or don't want to) convert these kind of records. */ + break; + + default: + AssertMsgFailed(("enmType=%d\n", pLookup->enmType)); + break; + } + + /* next */ + if ((unsigned)pLookup->offNext == NIL_OFFSET) + break; + pLookup = (PMMLOOKUPHYPER)((uint8_t *)pLookup + pLookup->offNext); + } + + AssertMsgFailed(("pvR3=%p is not inside the hypervisor memory area!\n", pvR3)); + return NIL_RTHCPHYS; +} + + +/** + * Implements the hcphys-not-found return case of MMR3HyperQueryInfoFromHCPhys. + * + * @returns VINF_SUCCESS, VINF_BUFFER_OVERFLOW. + * @param pVM The cross context VM structure. + * @param HCPhys The host physical address to look for. + * @param pLookup The HMA lookup entry corresponding to HCPhys. + * @param pszWhat Where to return the description. + * @param cbWhat Size of the return buffer. + * @param pcbAlloc Where to return the size of whatever it is. + */ +static int mmR3HyperQueryInfoFromHCPhysFound(PVM pVM, RTHCPHYS HCPhys, PMMLOOKUPHYPER pLookup, + char *pszWhat, size_t cbWhat, uint32_t *pcbAlloc) +{ + NOREF(pVM); NOREF(HCPhys); + *pcbAlloc = pLookup->cb; + int rc = RTStrCopy(pszWhat, cbWhat, pLookup->pszDesc); + return rc == VERR_BUFFER_OVERFLOW ? VINF_BUFFER_OVERFLOW : rc; +} + + +/** + * Scans the HMA for the physical page and reports back a description if found. + * + * @returns VINF_SUCCESS, VINF_BUFFER_OVERFLOW, VERR_NOT_FOUND. + * @param pVM The cross context VM structure. + * @param HCPhys The host physical address to look for. + * @param pszWhat Where to return the description. + * @param cbWhat Size of the return buffer. + * @param pcbAlloc Where to return the size of whatever it is. + */ +VMMR3_INT_DECL(int) MMR3HyperQueryInfoFromHCPhys(PVM pVM, RTHCPHYS HCPhys, char *pszWhat, size_t cbWhat, uint32_t *pcbAlloc) +{ + RTHCPHYS HCPhysPage = HCPhys & ~(RTHCPHYS)PAGE_OFFSET_MASK; + PMMLOOKUPHYPER pLookup = (PMMLOOKUPHYPER)((uint8_t *)pVM->mm.s.pHyperHeapR3 + pVM->mm.s.offLookupHyper); + for (;;) + { + switch (pLookup->enmType) + { + case MMLOOKUPHYPERTYPE_LOCKED: + { + uint32_t i = pLookup->cb >> PAGE_SHIFT; + while (i-- > 0) + if (pLookup->u.Locked.paHCPhysPages[i] == HCPhysPage) + return mmR3HyperQueryInfoFromHCPhysFound(pVM, HCPhys, pLookup, pszWhat, cbWhat, pcbAlloc); + break; + } + + case MMLOOKUPHYPERTYPE_HCPHYS: + { + if (pLookup->u.HCPhys.HCPhys - HCPhysPage < pLookup->cb) + return mmR3HyperQueryInfoFromHCPhysFound(pVM, HCPhys, pLookup, pszWhat, cbWhat, pcbAlloc); + break; + } + + case MMLOOKUPHYPERTYPE_MMIO2: + case MMLOOKUPHYPERTYPE_GCPHYS: + case MMLOOKUPHYPERTYPE_DYNAMIC: + { + /* brute force. */ + uint32_t i = pLookup->cb >> PAGE_SHIFT; + while (i-- > 0) + { + RTGCPTR GCPtr = pLookup->off + pVM->mm.s.pvHyperAreaGC; + RTHCPHYS HCPhysCur; + int rc = PGMMapGetPage(pVM, GCPtr, NULL, &HCPhysCur); + if (RT_SUCCESS(rc) && HCPhysCur == HCPhysPage) + return mmR3HyperQueryInfoFromHCPhysFound(pVM, HCPhys, pLookup, pszWhat, cbWhat, pcbAlloc); + } + break; + } + default: + AssertMsgFailed(("enmType=%d\n", pLookup->enmType)); + break; + } + + /* next */ + if ((unsigned)pLookup->offNext == NIL_OFFSET) + break; + pLookup = (PMMLOOKUPHYPER)((uint8_t *)pLookup + pLookup->offNext); + } + return VERR_NOT_FOUND; +} + + +#if 0 /* unused, not implemented */ +/** + * Convert hypervisor HC physical address to HC virtual address. + * + * @returns HC virtual address. + * @param pVM The cross context VM structure. + * @param HCPhys Host context physical address. + */ +VMMR3DECL(void *) MMR3HyperHCPhys2HCVirt(PVM pVM, RTHCPHYS HCPhys) +{ + void *pv; + int rc = MMR3HyperHCPhys2HCVirtEx(pVM, HCPhys, &pv); + if (RT_SUCCESS(rc)) + return pv; + AssertMsgFailed(("Invalid address HCPhys=%x rc=%d\n", HCPhys, rc)); + return NULL; +} + + +/** + * Convert hypervisor HC physical address to HC virtual address. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param HCPhys Host context physical address. + * @param ppv Where to store the HC virtual address. + */ +VMMR3DECL(int) MMR3HyperHCPhys2HCVirtEx(PVM pVM, RTHCPHYS HCPhys, void **ppv) +{ + /* + * Linear search. + */ + /** @todo implement when actually used. */ + return VERR_INVALID_POINTER; +} +#endif /* unused, not implemented */ + + +/** + * Read hypervisor memory from GC virtual address. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pvDst Destination address (HC of course). + * @param GCPtr GC virtual address. + * @param cb Number of bytes to read. + * + * @remarks For DBGF only. + */ +VMMR3DECL(int) MMR3HyperReadGCVirt(PVM pVM, void *pvDst, RTGCPTR GCPtr, size_t cb) +{ + if (GCPtr - pVM->mm.s.pvHyperAreaGC >= pVM->mm.s.cbHyperArea) + return VERR_INVALID_POINTER; + return PGMR3MapRead(pVM, pvDst, GCPtr, cb); +} + + +/** + * Info handler for 'hma', it dumps the list of lookup records for the hypervisor memory area. + * + * @param pVM The cross context VM structure. + * @param pHlp Callback functions for doing output. + * @param pszArgs Argument string. Optional and specific to the handler. + */ +static DECLCALLBACK(void) mmR3HyperInfoHma(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + NOREF(pszArgs); + + pHlp->pfnPrintf(pHlp, "Hypervisor Memory Area (HMA) Layout: Base %RGv, 0x%08x bytes\n", + pVM->mm.s.pvHyperAreaGC, pVM->mm.s.cbHyperArea); + + PMMLOOKUPHYPER pLookup = (PMMLOOKUPHYPER)((uint8_t *)pVM->mm.s.pHyperHeapR3 + pVM->mm.s.offLookupHyper); + for (;;) + { + switch (pLookup->enmType) + { + case MMLOOKUPHYPERTYPE_LOCKED: + pHlp->pfnPrintf(pHlp, "%RGv-%RGv %RHv %RHv LOCKED %-*s %s\n", + pLookup->off + pVM->mm.s.pvHyperAreaGC, + pLookup->off + pVM->mm.s.pvHyperAreaGC + pLookup->cb, + pLookup->u.Locked.pvR3, + pLookup->u.Locked.pvR0, + sizeof(RTHCPTR) * 2, "", + pLookup->pszDesc); + break; + + case MMLOOKUPHYPERTYPE_HCPHYS: + pHlp->pfnPrintf(pHlp, "%RGv-%RGv %RHv %RHv HCPHYS %RHp %s\n", + pLookup->off + pVM->mm.s.pvHyperAreaGC, + pLookup->off + pVM->mm.s.pvHyperAreaGC + pLookup->cb, + pLookup->u.HCPhys.pvR3, + pLookup->u.HCPhys.pvR0, + pLookup->u.HCPhys.HCPhys, + pLookup->pszDesc); + break; + + case MMLOOKUPHYPERTYPE_GCPHYS: + pHlp->pfnPrintf(pHlp, "%RGv-%RGv %*s GCPHYS %RGp%*s %s\n", + pLookup->off + pVM->mm.s.pvHyperAreaGC, + pLookup->off + pVM->mm.s.pvHyperAreaGC + pLookup->cb, + sizeof(RTHCPTR) * 2 * 2 + 1, "", + pLookup->u.GCPhys.GCPhys, RT_ABS((int)(sizeof(RTHCPHYS) - sizeof(RTGCPHYS))) * 2, "", + pLookup->pszDesc); + break; + + case MMLOOKUPHYPERTYPE_MMIO2: + pHlp->pfnPrintf(pHlp, "%RGv-%RGv %*s MMIO2 %RGp%*s %s\n", + pLookup->off + pVM->mm.s.pvHyperAreaGC, + pLookup->off + pVM->mm.s.pvHyperAreaGC + pLookup->cb, + sizeof(RTHCPTR) * 2 * 2 + 1, "", + pLookup->u.MMIO2.off, RT_ABS((int)(sizeof(RTHCPHYS) - sizeof(RTGCPHYS))) * 2, "", + pLookup->pszDesc); + break; + + case MMLOOKUPHYPERTYPE_DYNAMIC: + pHlp->pfnPrintf(pHlp, "%RGv-%RGv %*s DYNAMIC %*s %s\n", + pLookup->off + pVM->mm.s.pvHyperAreaGC, + pLookup->off + pVM->mm.s.pvHyperAreaGC + pLookup->cb, + sizeof(RTHCPTR) * 2 * 2 + 1, "", + sizeof(RTHCPTR) * 2, "", + pLookup->pszDesc); + break; + + default: + AssertMsgFailed(("enmType=%d\n", pLookup->enmType)); + break; + } + + /* next */ + if ((unsigned)pLookup->offNext == NIL_OFFSET) + break; + pLookup = (PMMLOOKUPHYPER)((uint8_t *)pLookup + pLookup->offNext); + } +} + + +/** + * Re-allocates memory from the hyper heap. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pvOld The existing block of memory in the hyper heap to + * re-allocate (can be NULL). + * @param cbOld Size of the existing block. + * @param uAlignmentNew Required memory alignment in bytes. Values are + * 0,8,16,32 and PAGE_SIZE. 0 -> default alignment, + * i.e. 8 bytes. + * @param enmTagNew The statistics tag. + * @param cbNew The required size of the new block. + * @param ppv Where to store the address to the re-allocated + * block. + * + * @remarks This does not work like normal realloc() on failure, the memory + * pointed to by @a pvOld is lost if there isn't sufficient space on + * the hyper heap for the re-allocation to succeed. +*/ +VMMR3DECL(int) MMR3HyperRealloc(PVM pVM, void *pvOld, size_t cbOld, unsigned uAlignmentNew, MMTAG enmTagNew, size_t cbNew, + void **ppv) +{ + if (!pvOld) + return MMHyperAlloc(pVM, cbNew, uAlignmentNew, enmTagNew, ppv); + + if (!cbNew && pvOld) + return MMHyperFree(pVM, pvOld); + + if (cbOld == cbNew) + return VINF_SUCCESS; + + size_t cbData = RT_MIN(cbNew, cbOld); + void *pvTmp = RTMemTmpAlloc(cbData); + if (RT_UNLIKELY(!pvTmp)) + { + MMHyperFree(pVM, pvOld); + return VERR_NO_TMP_MEMORY; + } + memcpy(pvTmp, pvOld, cbData); + + int rc = MMHyperFree(pVM, pvOld); + if (RT_SUCCESS(rc)) + { + rc = MMHyperAlloc(pVM, cbNew, uAlignmentNew, enmTagNew, ppv); + if (RT_SUCCESS(rc)) + { + Assert(cbData <= cbNew); + memcpy(*ppv, pvTmp, cbData); + } + } + else + AssertMsgFailed(("Failed to free hyper heap block pvOld=%p cbOld=%u\n", pvOld, cbOld)); + + RTMemTmpFree(pvTmp); + return rc; +} + diff --git a/src/VBox/VMM/VMMR3/MMPagePool.cpp b/src/VBox/VMM/VMMR3/MMPagePool.cpp new file mode 100644 index 00000000..5364a3cd --- /dev/null +++ b/src/VBox/VMM/VMMR3/MMPagePool.cpp @@ -0,0 +1,527 @@ +/* $Id: MMPagePool.cpp $ */ +/** @file + * MM - Memory Manager - Page Pool. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_MM_POOL +#include +#include +#include +#include "MMInternal.h" +#include +#include +#include +#include +#include +#include +#define USE_INLINE_ASM_BIT_OPS +#ifdef USE_INLINE_ASM_BIT_OPS +# include +#endif +#include + + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +#ifdef IN_RING3 +static void * mmR3PagePoolAlloc(PMMPAGEPOOL pPool); +static void mmR3PagePoolFree(PMMPAGEPOOL pPool, void *pv); +#endif + + +/** + * Initializes the page pool + * + * @return VBox status code. + * @param pVM The cross context VM structure. + * @thread The Emulation Thread. + */ +int mmR3PagePoolInit(PVM pVM) +{ + AssertMsg(!pVM->mm.s.pPagePoolR3, ("Already initialized!\n")); + + /* + * Allocate the pool structures. + */ + /** @todo @bugref{1865},@bugref{3202}: mapping the page pool page into ring-0. + * Need to change the ways we allocate it... */ + AssertReleaseReturn(sizeof(*pVM->mm.s.pPagePoolR3) + sizeof(*pVM->mm.s.pPagePoolLowR3) < PAGE_SIZE, VERR_INTERNAL_ERROR); + int rc = SUPR3PageAllocEx(1, 0 /*fFlags*/, (void **)&pVM->mm.s.pPagePoolR3, NULL /*pR0Ptr*/, NULL /*paPages*/); + if (RT_FAILURE(rc)) + return rc; + memset(pVM->mm.s.pPagePoolR3, 0, PAGE_SIZE); + pVM->mm.s.pPagePoolR3->pVM = pVM; + STAM_REG(pVM, &pVM->mm.s.pPagePoolR3->cPages, STAMTYPE_U32, "/MM/Page/Def/cPages", STAMUNIT_PAGES, "Number of pages in the default pool."); + STAM_REG(pVM, &pVM->mm.s.pPagePoolR3->cFreePages, STAMTYPE_U32, "/MM/Page/Def/cFreePages", STAMUNIT_PAGES, "Number of free pages in the default pool."); + STAM_REG(pVM, &pVM->mm.s.pPagePoolR3->cSubPools, STAMTYPE_U32, "/MM/Page/Def/cSubPools", STAMUNIT_COUNT, "Number of sub pools in the default pool."); + STAM_REG(pVM, &pVM->mm.s.pPagePoolR3->cAllocCalls, STAMTYPE_COUNTER, "/MM/Page/Def/cAllocCalls", STAMUNIT_CALLS, "Number of MMR3PageAlloc() calls for the default pool."); + STAM_REG(pVM, &pVM->mm.s.pPagePoolR3->cFreeCalls, STAMTYPE_COUNTER, "/MM/Page/Def/cFreeCalls", STAMUNIT_CALLS, "Number of MMR3PageFree()+MMR3PageFreeByPhys() calls for the default pool."); + STAM_REG(pVM, &pVM->mm.s.pPagePoolR3->cToPhysCalls, STAMTYPE_COUNTER, "/MM/Page/Def/cToPhysCalls", STAMUNIT_CALLS, "Number of MMR3Page2Phys() calls for this pool."); + STAM_REG(pVM, &pVM->mm.s.pPagePoolR3->cToVirtCalls, STAMTYPE_COUNTER, "/MM/Page/Def/cToVirtCalls", STAMUNIT_CALLS, "Number of MMR3PagePhys2Page()+MMR3PageFreeByPhys() calls for the default pool."); + STAM_REG(pVM, &pVM->mm.s.pPagePoolR3->cErrors, STAMTYPE_COUNTER, "/MM/Page/Def/cErrors", STAMUNIT_ERRORS,"Number of errors for the default pool."); + + pVM->mm.s.pPagePoolLowR3 = pVM->mm.s.pPagePoolR3 + 1; + pVM->mm.s.pPagePoolLowR3->pVM = pVM; + pVM->mm.s.pPagePoolLowR3->fLow = true; + STAM_REG(pVM, &pVM->mm.s.pPagePoolLowR3->cPages, STAMTYPE_U32, "/MM/Page/Low/cPages", STAMUNIT_PAGES, "Number of pages in the <4GB pool."); + STAM_REG(pVM, &pVM->mm.s.pPagePoolLowR3->cFreePages, STAMTYPE_U32, "/MM/Page/Low/cFreePages", STAMUNIT_PAGES, "Number of free pages in the <4GB pool."); + STAM_REG(pVM, &pVM->mm.s.pPagePoolLowR3->cSubPools, STAMTYPE_U32, "/MM/Page/Low/cSubPools", STAMUNIT_COUNT, "Number of sub pools in the <4GB pool."); + STAM_REG(pVM, &pVM->mm.s.pPagePoolLowR3->cAllocCalls, STAMTYPE_COUNTER, "/MM/Page/Low/cAllocCalls", STAMUNIT_CALLS, "Number of MMR3PageAllocLow() calls for the <4GB pool."); + STAM_REG(pVM, &pVM->mm.s.pPagePoolLowR3->cFreeCalls, STAMTYPE_COUNTER, "/MM/Page/Low/cFreeCalls", STAMUNIT_CALLS, "Number of MMR3PageFreeLow()+MMR3PageFreeByPhys() calls for the <4GB pool."); + STAM_REG(pVM, &pVM->mm.s.pPagePoolLowR3->cToPhysCalls,STAMTYPE_COUNTER, "/MM/Page/Low/cToPhysCalls", STAMUNIT_CALLS, "Number of MMR3Page2Phys() calls for the <4GB pool."); + STAM_REG(pVM, &pVM->mm.s.pPagePoolLowR3->cToVirtCalls,STAMTYPE_COUNTER, "/MM/Page/Low/cToVirtCalls", STAMUNIT_CALLS, "Number of MMR3PagePhys2Page()+MMR3PageFreeByPhys() calls for the <4GB pool."); + STAM_REG(pVM, &pVM->mm.s.pPagePoolLowR3->cErrors, STAMTYPE_COUNTER, "/MM/Page/Low/cErrors", STAMUNIT_ERRORS,"Number of errors for the <4GB pool."); + +#ifndef VBOX_WITH_2X_4GB_ADDR_SPACE + pVM->mm.s.pPagePoolR0 = (uintptr_t)pVM->mm.s.pPagePoolR3; + pVM->mm.s.pPagePoolLowR0 = (uintptr_t)pVM->mm.s.pPagePoolLowR3; +#endif + + /** @todo init a mutex? */ + return VINF_SUCCESS; +} + + +/** + * Release all locks and free the allocated memory. + * + * @param pVM The cross context VM structure. + * @thread The Emulation Thread. + */ +void mmR3PagePoolTerm(PVM pVM) +{ + if (pVM->mm.s.pPagePoolR3) + { + /* + * Unlock all memory held by subpools and free the memory. + * (The MM Heap will free the memory used for internal stuff.) + */ + Assert(!pVM->mm.s.pPagePoolR3->fLow); + PMMPAGESUBPOOL pSubPool = pVM->mm.s.pPagePoolR3->pHead; + while (pSubPool) + { + int rc = SUPR3PageFreeEx(pSubPool->pvPages, pSubPool->cPages); + AssertMsgRC(rc, ("SUPR3PageFreeEx(%p) failed with rc=%Rrc\n", pSubPool->pvPages, rc)); + pSubPool->pvPages = NULL; + + /* next */ + pSubPool = pSubPool->pNext; + } + pVM->mm.s.pPagePoolR3 = NULL; +#ifndef VBOX_WITH_2X_4GB_ADDR_SPACE + pVM->mm.s.pPagePoolR0 = NIL_RTR0PTR; +#endif + } + + if (pVM->mm.s.pPagePoolLowR3) + { + /* + * Free the memory. + */ + Assert(pVM->mm.s.pPagePoolLowR3->fLow); + PMMPAGESUBPOOL pSubPool = pVM->mm.s.pPagePoolLowR3->pHead; + while (pSubPool) + { + int rc = SUPR3LowFree(pSubPool->pvPages, pSubPool->cPages); + AssertMsgRC(rc, ("SUPR3LowFree(%p) failed with rc=%d\n", pSubPool->pvPages, rc)); + pSubPool->pvPages = NULL; + + /* next */ + pSubPool = pSubPool->pNext; + } + pVM->mm.s.pPagePoolLowR3 = NULL; +#ifndef VBOX_WITH_2X_4GB_ADDR_SPACE + pVM->mm.s.pPagePoolLowR0 = NIL_RTR0PTR; +#endif + } +} + + +/** + * Allocates a page from the page pool. + * + * @returns Pointer to allocated page(s). + * @returns NULL on failure. + * @param pPool Pointer to the page pool. + * @thread The Emulation Thread. + */ +DECLINLINE(void *) mmR3PagePoolAlloc(PMMPAGEPOOL pPool) +{ + VM_ASSERT_EMT(pPool->pVM); + STAM_COUNTER_INC(&pPool->cAllocCalls); + + /* + * Walk free list. + */ + if (pPool->pHeadFree) + { + PMMPAGESUBPOOL pSub = pPool->pHeadFree; + /* decrement free count and unlink if no more free entries. */ + if (!--pSub->cPagesFree) + pPool->pHeadFree = pSub->pNextFree; +#ifdef VBOX_WITH_STATISTICS + pPool->cFreePages--; +#endif + + /* find free spot in bitmap. */ +#ifdef USE_INLINE_ASM_BIT_OPS + const int iPage = ASMBitFirstClear(pSub->auBitmap, pSub->cPages); + if (iPage >= 0) + { + Assert(!ASMBitTest(pSub->auBitmap, iPage)); + ASMBitSet(pSub->auBitmap, iPage); + return (uint8_t *)pSub->pvPages + PAGE_SIZE * iPage; + } +#else + unsigned *pu = &pSub->auBitmap[0]; + unsigned *puEnd = &pSub->auBitmap[pSub->cPages / (sizeof(pSub->auBitmap) * 8)]; + while (pu < puEnd) + { + unsigned u; + if ((u = *pu) != ~0U) + { + unsigned iBit = 0; + unsigned uMask = 1; + while (iBit < sizeof(pSub->auBitmap[0]) * 8) + { + if (!(u & uMask)) + { + *pu |= uMask; + return (uint8_t *)pSub->pvPages + + PAGE_SIZE * (iBit + ((uint8_t *)pu - (uint8_t *)&pSub->auBitmap[0]) * 8); + } + iBit++; + uMask <<= 1; + } + STAM_COUNTER_INC(&pPool->cErrors); + AssertMsgFailed(("how odd, expected to find a free bit in %#x, but didn't\n", u)); + } + /* next */ + pu++; + } +#endif + STAM_COUNTER_INC(&pPool->cErrors); +#ifdef VBOX_WITH_STATISTICS + pPool->cFreePages++; +#endif + AssertMsgFailed(("how strange, expected to find a free bit in %p, but didn't (%d pages supposed to be free!)\n", pSub, pSub->cPagesFree + 1)); + } + + /* + * Allocate new subpool. + */ + unsigned cPages = !pPool->fLow ? 128 : 32; + PMMPAGESUBPOOL pSub; + int rc = MMHyperAlloc(pPool->pVM, + RT_UOFFSETOF_DYN(MMPAGESUBPOOL, auBitmap[cPages / (sizeof(pSub->auBitmap[0]) * 8)]) + + (sizeof(SUPPAGE) + sizeof(MMPPLOOKUPHCPHYS)) * cPages + + sizeof(MMPPLOOKUPHCPTR), + 0, + MM_TAG_MM_PAGE, + (void **)&pSub); + if (RT_FAILURE(rc)) + return NULL; + + PSUPPAGE paPhysPages = (PSUPPAGE)&pSub->auBitmap[cPages / (sizeof(pSub->auBitmap[0]) * 8)]; + Assert((uintptr_t)paPhysPages >= (uintptr_t)&pSub->auBitmap[1]); + if (!pPool->fLow) + { + rc = SUPR3PageAllocEx(cPages, + 0 /* fFlags */, + &pSub->pvPages, + NULL, + paPhysPages); + if (RT_FAILURE(rc)) + rc = VMSetError(pPool->pVM, rc, RT_SRC_POS, + N_("Failed to lock host %zd bytes of memory (out of memory)"), (size_t)cPages << PAGE_SHIFT); + } + else + rc = SUPR3LowAlloc(cPages, &pSub->pvPages, NULL, paPhysPages); + if (RT_SUCCESS(rc)) + { + /* + * Setup the sub structure and allocate the requested page. + */ + pSub->cPages = cPages; + pSub->cPagesFree= cPages - 1; + pSub->paPhysPages = paPhysPages; + memset(pSub->auBitmap, 0, cPages / 8); + /* allocate first page. */ + pSub->auBitmap[0] |= 1; + /* link into free chain. */ + pSub->pNextFree = pPool->pHeadFree; + pPool->pHeadFree= pSub; + /* link into main chain. */ + pSub->pNext = pPool->pHead; + pPool->pHead = pSub; + /* update pool statistics. */ + pPool->cSubPools++; + pPool->cPages += cPages; +#ifdef VBOX_WITH_STATISTICS + pPool->cFreePages += cPages - 1; +#endif + + /* + * Initialize the physical pages with backpointer to subpool. + */ + unsigned i = cPages; + while (i-- > 0) + { + AssertMsg(paPhysPages[i].Phys && !(paPhysPages[i].Phys & PAGE_OFFSET_MASK), + ("i=%d Phys=%d\n", i, paPhysPages[i].Phys)); + paPhysPages[i].uReserved = (RTHCUINTPTR)pSub; + } + + /* + * Initialize the physical lookup record with backpointers to the physical pages. + */ + PMMPPLOOKUPHCPHYS paLookupPhys = (PMMPPLOOKUPHCPHYS)&paPhysPages[cPages]; + i = cPages; + while (i-- > 0) + { + paLookupPhys[i].pPhysPage = &paPhysPages[i]; + paLookupPhys[i].Core.Key = paPhysPages[i].Phys; + RTAvlHCPhysInsert(&pPool->pLookupPhys, &paLookupPhys[i].Core); + } + + /* + * And the one record for virtual memory lookup. + */ + PMMPPLOOKUPHCPTR pLookupVirt = (PMMPPLOOKUPHCPTR)&paLookupPhys[cPages]; + pLookupVirt->pSubPool = pSub; + pLookupVirt->Core.Key = pSub->pvPages; + RTAvlPVInsert(&pPool->pLookupVirt, &pLookupVirt->Core); + + /* return allocated page (first). */ + return pSub->pvPages; + } + + MMHyperFree(pPool->pVM, pSub); + STAM_COUNTER_INC(&pPool->cErrors); + if (pPool->fLow) + VMSetError(pPool->pVM, rc, RT_SRC_POS, + N_("Failed to expand page pool for memory below 4GB. Current size: %d pages"), + pPool->cPages); + AssertMsgFailed(("Failed to expand pool%s. rc=%Rrc poolsize=%d\n", + pPool->fLow ? " (<4GB)" : "", rc, pPool->cPages)); + return NULL; +} + + +/** + * Frees a page from the page pool. + * + * @param pPool Pointer to the page pool. + * @param pv Pointer to the page to free. + * I.e. pointer returned by mmR3PagePoolAlloc(). + * @thread The Emulation Thread. + */ +DECLINLINE(void) mmR3PagePoolFree(PMMPAGEPOOL pPool, void *pv) +{ + VM_ASSERT_EMT(pPool->pVM); + STAM_COUNTER_INC(&pPool->cFreeCalls); + + /* + * Lookup the virtual address. + */ + PMMPPLOOKUPHCPTR pLookup = (PMMPPLOOKUPHCPTR)RTAvlPVGetBestFit(&pPool->pLookupVirt, pv, false); + if ( !pLookup + || (uint8_t *)pv >= (uint8_t *)pLookup->pSubPool->pvPages + (pLookup->pSubPool->cPages << PAGE_SHIFT) + ) + { + STAM_COUNTER_INC(&pPool->cErrors); + AssertMsgFailed(("invalid pointer %p\n", pv)); + return; + } + + /* + * Free the page. + */ + PMMPAGESUBPOOL pSubPool = pLookup->pSubPool; + /* clear bitmap bit */ + const unsigned iPage = ((uint8_t *)pv - (uint8_t *)pSubPool->pvPages) >> PAGE_SHIFT; +#ifdef USE_INLINE_ASM_BIT_OPS + Assert(ASMBitTest(pSubPool->auBitmap, iPage)); + ASMBitClear(pSubPool->auBitmap, iPage); +#else + unsigned iBit = iPage % (sizeof(pSubPool->auBitmap[0]) * 8); + unsigned iIndex = iPage / (sizeof(pSubPool->auBitmap[0]) * 8); + pSubPool->auBitmap[iIndex] &= ~(1 << iBit); +#endif + /* update stats. */ + pSubPool->cPagesFree++; +#ifdef VBOX_WITH_STATISTICS + pPool->cFreePages++; +#endif + if (pSubPool->cPagesFree == 1) + { + pSubPool->pNextFree = pPool->pHeadFree; + pPool->pHeadFree = pSubPool; + } +} + + +/** + * Allocates a page from the page pool. + * + * This function may returns pages which has physical addresses any + * where. If you require a page to be within the first 4GB of physical + * memory, use MMR3PageAllocLow(). + * + * @returns Pointer to the allocated page page. + * @returns NULL on failure. + * @param pVM The cross context VM structure. + * @thread The Emulation Thread. + */ +VMMR3DECL(void *) MMR3PageAlloc(PVM pVM) +{ + /* Note: unprotected by locks; currently fine as it's used during init or under the PGM lock */ + return mmR3PagePoolAlloc(pVM->mm.s.pPagePoolR3); +} + + +/** + * Allocates a page from the page pool and return its physical address. + * + * This function may returns pages which has physical addresses any + * where. If you require a page to be within the first 4GB of physical + * memory, use MMR3PageAllocLow(). + * + * @returns Pointer to the allocated page page. + * @returns NIL_RTHCPHYS on failure. + * @param pVM The cross context VM structure. + * @thread The Emulation Thread. + */ +VMMR3DECL(RTHCPHYS) MMR3PageAllocPhys(PVM pVM) +{ + /* Note: unprotected by locks; currently fine as it's used during init or under the PGM lock */ + /** @todo optimize this, it's the most common case now. */ + void *pv = mmR3PagePoolAlloc(pVM->mm.s.pPagePoolR3); + if (pv) + return mmPagePoolPtr2Phys(pVM->mm.s.pPagePoolR3, pv); + return NIL_RTHCPHYS; +} + + +/** + * Frees a page allocated from the page pool by MMR3PageAlloc() or + * MMR3PageAllocPhys(). + * + * @param pVM The cross context VM structure. + * @param pvPage Pointer to the page. + * @thread The Emulation Thread. + */ +VMMR3DECL(void) MMR3PageFree(PVM pVM, void *pvPage) +{ + mmR3PagePoolFree(pVM->mm.s.pPagePoolR3, pvPage); +} + + +/** + * Allocates a page from the low page pool. + * + * @returns Pointer to the allocated page. + * @returns NULL on failure. + * @param pVM The cross context VM structure. + * @thread The Emulation Thread. + */ +VMMR3DECL(void *) MMR3PageAllocLow(PVM pVM) +{ + return mmR3PagePoolAlloc(pVM->mm.s.pPagePoolLowR3); +} + + +/** + * Frees a page allocated from the page pool by MMR3PageAllocLow(). + * + * @param pVM The cross context VM structure. + * @param pvPage Pointer to the page. + * @thread The Emulation Thread. + */ +VMMR3DECL(void) MMR3PageFreeLow(PVM pVM, void *pvPage) +{ + mmR3PagePoolFree(pVM->mm.s.pPagePoolLowR3, pvPage); +} + + +/** + * Free a page allocated from the page pool by physical address. + * This works for pages allocated by MMR3PageAlloc(), MMR3PageAllocPhys() + * and MMR3PageAllocLow(). + * + * @param pVM The cross context VM structure. + * @param HCPhysPage The physical address of the page to be freed. + * @thread The Emulation Thread. + */ +VMMR3DECL(void) MMR3PageFreeByPhys(PVM pVM, RTHCPHYS HCPhysPage) +{ + void *pvPage = mmPagePoolPhys2Ptr(pVM->mm.s.pPagePoolR3, HCPhysPage); + if (!pvPage) + pvPage = mmPagePoolPhys2Ptr(pVM->mm.s.pPagePoolLowR3, HCPhysPage); + if (pvPage) + mmR3PagePoolFree(pVM->mm.s.pPagePoolR3, pvPage); + else + AssertMsgFailed(("Invalid address HCPhysPT=%#x\n", HCPhysPage)); +} + + +/** + * Gets the HC pointer to the dummy page. + * + * The dummy page is used as a place holder to prevent potential bugs + * from doing really bad things to the system. + * + * @returns Pointer to the dummy page. + * @param pVM The cross context VM structure. + * @thread The Emulation Thread. + */ +VMMR3DECL(void *) MMR3PageDummyHCPtr(PVM pVM) +{ + VM_ASSERT_EMT(pVM); + if (!pVM->mm.s.pvDummyPage) + { + pVM->mm.s.pvDummyPage = mmR3PagePoolAlloc(pVM->mm.s.pPagePoolR3); + AssertRelease(pVM->mm.s.pvDummyPage); + pVM->mm.s.HCPhysDummyPage = mmPagePoolPtr2Phys(pVM->mm.s.pPagePoolR3, pVM->mm.s.pvDummyPage); + AssertRelease(!(pVM->mm.s.HCPhysDummyPage & ~X86_PTE_PAE_PG_MASK)); + } + return pVM->mm.s.pvDummyPage; +} + + +/** + * Gets the HC Phys to the dummy page. + * + * The dummy page is used as a place holder to prevent potential bugs + * from doing really bad things to the system. + * + * @returns Pointer to the dummy page. + * @param pVM The cross context VM structure. + * @thread The Emulation Thread. + */ +VMMR3DECL(RTHCPHYS) MMR3PageDummyHCPhys(PVM pVM) +{ + VM_ASSERT_EMT(pVM); + if (!pVM->mm.s.pvDummyPage) + MMR3PageDummyHCPtr(pVM); + return pVM->mm.s.HCPhysDummyPage; +} + diff --git a/src/VBox/VMM/VMMR3/MMUkHeap.cpp b/src/VBox/VMM/VMMR3/MMUkHeap.cpp new file mode 100644 index 00000000..0b45a638 --- /dev/null +++ b/src/VBox/VMM/VMMR3/MMUkHeap.cpp @@ -0,0 +1,427 @@ +/* $Id: MMUkHeap.cpp $ */ +/** @file + * MM - Memory Manager - Ring-3 Heap with kernel accessible mapping. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_MM_HEAP +#include +#include +#include "MMInternal.h" +#include +#include +#include +#include +#include + +#include +#include +#include + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static void *mmR3UkHeapAlloc(PMMUKHEAP pHeap, MMTAG enmTag, size_t cb, bool fZero, PRTR0PTR pR0Ptr); + + + +/** + * Create a User-kernel heap. + * + * This does not require SUPLib to be initialized as we'll lazily allocate the + * kernel accessible memory on the first alloc call. + * + * @returns VBox status code. + * @param pUVM Pointer to the user mode VM structure. + * @param ppHeap Where to store the heap pointer. + */ +int mmR3UkHeapCreateU(PUVM pUVM, PMMUKHEAP *ppHeap) +{ + PMMUKHEAP pHeap = (PMMUKHEAP)MMR3HeapAllocZU(pUVM, MM_TAG_MM, sizeof(MMUKHEAP)); + if (pHeap) + { + int rc = RTCritSectInit(&pHeap->Lock); + if (RT_SUCCESS(rc)) + { + /* + * Initialize the global stat record. + */ + pHeap->pUVM = pUVM; +#ifdef MMUKHEAP_WITH_STATISTICS + PMMUKHEAPSTAT pStat = &pHeap->Stat; + STAMR3RegisterU(pUVM, &pStat->cAllocations, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, "/MM/UkHeap/cAllocations", STAMUNIT_CALLS, "Number or MMR3UkHeapAlloc() calls."); + STAMR3RegisterU(pUVM, &pStat->cReallocations, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, "/MM/UkHeap/cReallocations", STAMUNIT_CALLS, "Number of MMR3UkHeapRealloc() calls."); + STAMR3RegisterU(pUVM, &pStat->cFrees, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, "/MM/UkHeap/cFrees", STAMUNIT_CALLS, "Number of MMR3UkHeapFree() calls."); + STAMR3RegisterU(pUVM, &pStat->cFailures, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, "/MM/UkHeap/cFailures", STAMUNIT_COUNT, "Number of failures."); + STAMR3RegisterU(pUVM, &pStat->cbCurAllocated, sizeof(pStat->cbCurAllocated) == sizeof(uint32_t) ? STAMTYPE_U32 : STAMTYPE_U64, + STAMVISIBILITY_ALWAYS, "/MM/UkHeap/cbCurAllocated", STAMUNIT_BYTES, "Number of bytes currently allocated."); + STAMR3RegisterU(pUVM, &pStat->cbAllocated, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, "/MM/UkHeap/cbAllocated", STAMUNIT_BYTES, "Total number of bytes allocated."); + STAMR3RegisterU(pUVM, &pStat->cbFreed, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, "/MM/UkHeap/cbFreed", STAMUNIT_BYTES, "Total number of bytes freed."); +#endif + *ppHeap = pHeap; + return VINF_SUCCESS; + } + AssertRC(rc); + MMR3HeapFree(pHeap); + } + AssertMsgFailed(("failed to allocate heap structure\n")); + return VERR_NO_MEMORY; +} + + +/** + * Destroy a User-kernel heap. + * + * @param pHeap Heap handle. + */ +void mmR3UkHeapDestroy(PMMUKHEAP pHeap) +{ + /* + * Start by deleting the lock, that'll trap anyone + * attempting to use the heap. + */ + RTCritSectDelete(&pHeap->Lock); + + /* + * Walk the sub-heaps and free them. + */ + while (pHeap->pSubHeapHead) + { + PMMUKHEAPSUB pSubHeap = pHeap->pSubHeapHead; + pHeap->pSubHeapHead = pSubHeap->pNext; + SUPR3PageFreeEx(pSubHeap->pv, pSubHeap->cb >> PAGE_SHIFT); + //MMR3HeapFree(pSubHeap); - rely on the automatic cleanup. + } + //MMR3HeapFree(pHeap->stats); + //MMR3HeapFree(pHeap); +} + + +/** + * Allocate memory associating it with the VM for collective cleanup. + * + * The memory will be allocated from the default heap but a header + * is added in which we keep track of which VM it belongs to and chain + * all the allocations together so they can be freed in one go. + * + * This interface is typically used for memory block which will not be + * freed during the life of the VM. + * + * @returns Pointer to allocated memory. + * @param pVM The cross context VM structure. + * @param enmTag Statistics tag. Statistics are collected on a per tag + * basis in addition to a global one. Thus we can easily + * identify how memory is used by the VM. + * @param cbSize Size of the block. + * @param pR0Ptr Where to return the ring-0 address of the memory. + */ +VMMR3DECL(void *) MMR3UkHeapAlloc(PVM pVM, MMTAG enmTag, size_t cbSize, PRTR0PTR pR0Ptr) +{ + return mmR3UkHeapAlloc(pVM->pUVM->mm.s.pUkHeap, enmTag, cbSize, false, pR0Ptr); +} + + +/** + * Same as MMR3UkHeapAlloc(). + * + * @returns Pointer to allocated memory. + * @param pVM The cross context VM structure. + * @param enmTag Statistics tag. Statistics are collected on a per tag + * basis in addition to a global one. Thus we can easily + * identify how memory is used by the VM. + * @param cbSize Size of the block. + * @param ppv Where to store the pointer to the allocated memory on success. + * @param pR0Ptr Where to return the ring-0 address of the memory. + */ +VMMR3DECL(int) MMR3UkHeapAllocEx(PVM pVM, MMTAG enmTag, size_t cbSize, void **ppv, PRTR0PTR pR0Ptr) +{ + void *pv = mmR3UkHeapAlloc(pVM->pUVM->mm.s.pUkHeap, enmTag, cbSize, false, pR0Ptr); + if (pv) + { + *ppv = pv; + return VINF_SUCCESS; + } + return VERR_NO_MEMORY; +} + + +/** + * Same as MMR3UkHeapAlloc() only the memory is zeroed. + * + * @returns Pointer to allocated memory. + * @param pVM The cross context VM structure. + * @param enmTag Statistics tag. Statistics are collected on a per tag + * basis in addition to a global one. Thus we can easily + * identify how memory is used by the VM. + * @param cbSize Size of the block. + * @param pR0Ptr Where to return the ring-0 address of the memory. + */ +VMMR3DECL(void *) MMR3UkHeapAllocZ(PVM pVM, MMTAG enmTag, size_t cbSize, PRTR0PTR pR0Ptr) +{ + return mmR3UkHeapAlloc(pVM->pUVM->mm.s.pUkHeap, enmTag, cbSize, true, pR0Ptr); +} + + +/** + * Same as MMR3UkHeapAllocZ(). + * + * @returns Pointer to allocated memory. + * @param pVM The cross context VM structure. + * @param enmTag Statistics tag. Statistics are collected on a per tag + * basis in addition to a global one. Thus we can easily + * identify how memory is used by the VM. + * @param cbSize Size of the block. + * @param ppv Where to store the pointer to the allocated memory on success. + * @param pR0Ptr Where to return the ring-0 address of the memory. + */ +VMMR3DECL(int) MMR3UkHeapAllocZEx(PVM pVM, MMTAG enmTag, size_t cbSize, void **ppv, PRTR0PTR pR0Ptr) +{ + void *pv = mmR3UkHeapAlloc(pVM->pUVM->mm.s.pUkHeap, enmTag, cbSize, true, pR0Ptr); + if (pv) + { + *ppv = pv; + return VINF_SUCCESS; + } + return VERR_NO_MEMORY; +} + + +/*** + * Worker for mmR3UkHeapAlloc that creates and adds a new sub-heap. + * + * @returns Pointer to the new sub-heap. + * @param pHeap The heap + * @param cbSubHeap The size of the sub-heap. + */ +static PMMUKHEAPSUB mmR3UkHeapAddSubHeap(PMMUKHEAP pHeap, size_t cbSubHeap) +{ + PMMUKHEAPSUB pSubHeap = (PMMUKHEAPSUB)MMR3HeapAllocU(pHeap->pUVM, MM_TAG_MM/*_UK_HEAP*/, sizeof(*pSubHeap)); + if (pSubHeap) + { + pSubHeap->cb = cbSubHeap; + int rc = SUPR3PageAllocEx(pSubHeap->cb >> PAGE_SHIFT, 0, &pSubHeap->pv, &pSubHeap->pvR0, NULL); + if (RT_SUCCESS(rc)) + { + rc = RTHeapSimpleInit(&pSubHeap->hSimple, pSubHeap->pv, pSubHeap->cb); + if (RT_SUCCESS(rc)) + { + pSubHeap->pNext = pHeap->pSubHeapHead; + pHeap->pSubHeapHead = pSubHeap; + return pSubHeap; + } + + /* bail out */ + SUPR3PageFreeEx(pSubHeap->pv, pSubHeap->cb >> PAGE_SHIFT); + } + MMR3HeapFree(pSubHeap); + } + return NULL; +} + + +/** + * Allocate memory from the heap. + * + * @returns Pointer to allocated memory. + * @param pHeap Heap handle. + * @param enmTag Statistics tag. Statistics are collected on a per tag + * basis in addition to a global one. Thus we can easily + * identify how memory is used by the VM. + * @param cb Size of the block. + * @param fZero Whether or not to zero the memory block. + * @param pR0Ptr Where to return the ring-0 pointer. + */ +static void *mmR3UkHeapAlloc(PMMUKHEAP pHeap, MMTAG enmTag, size_t cb, bool fZero, PRTR0PTR pR0Ptr) +{ + if (pR0Ptr) + *pR0Ptr = NIL_RTR0PTR; + RTCritSectEnter(&pHeap->Lock); + +#ifdef MMUKHEAP_WITH_STATISTICS + /* + * Find/alloc statistics nodes. + */ + pHeap->Stat.cAllocations++; + PMMUKHEAPSTAT pStat = (PMMUKHEAPSTAT)RTAvlULGet(&pHeap->pStatTree, (AVLULKEY)enmTag); + if (pStat) + pStat->cAllocations++; + else + { + pStat = (PMMUKHEAPSTAT)MMR3HeapAllocZU(pHeap->pUVM, MM_TAG_MM, sizeof(MMUKHEAPSTAT)); + if (!pStat) + { + pHeap->Stat.cFailures++; + AssertMsgFailed(("Failed to allocate heap stat record.\n")); + RTCritSectLeave(&pHeap->Lock); + return NULL; + } + pStat->Core.Key = (AVLULKEY)enmTag; + RTAvlULInsert(&pHeap->pStatTree, &pStat->Core); + + pStat->cAllocations++; + + /* register the statistics */ + PUVM pUVM = pHeap->pUVM; + const char *pszTag = mmGetTagName(enmTag); + STAMR3RegisterFU(pUVM, &pStat->cbCurAllocated, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES, "Number of bytes currently allocated.", "/MM/UkHeap/%s", pszTag); + STAMR3RegisterFU(pUVM, &pStat->cAllocations, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_CALLS, "Number or MMR3UkHeapAlloc() calls.", "/MM/UkHeap/%s/cAllocations", pszTag); + STAMR3RegisterFU(pUVM, &pStat->cReallocations, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_CALLS, "Number of MMR3UkHeapRealloc() calls.", "/MM/UkHeap/%s/cReallocations", pszTag); + STAMR3RegisterFU(pUVM, &pStat->cFrees, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_CALLS, "Number of MMR3UkHeapFree() calls.", "/MM/UkHeap/%s/cFrees", pszTag); + STAMR3RegisterFU(pUVM, &pStat->cFailures, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT, "Number of failures.", "/MM/UkHeap/%s/cFailures", pszTag); + STAMR3RegisterFU(pUVM, &pStat->cbAllocated, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES, "Total number of bytes allocated.", "/MM/UkHeap/%s/cbAllocated", pszTag); + STAMR3RegisterFU(pUVM, &pStat->cbFreed, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES, "Total number of bytes freed.", "/MM/UkHeap/%s/cbFreed", pszTag); + } +#else + RT_NOREF_PV(enmTag); +#endif + + /* + * Validate input. + */ + if (cb == 0) + { +#ifdef MMUKHEAP_WITH_STATISTICS + pStat->cFailures++; + pHeap->Stat.cFailures++; +#endif + RTCritSectLeave(&pHeap->Lock); + return NULL; + } + + /* + * Allocate heap block. + */ + cb = RT_ALIGN_Z(cb, MMUKHEAP_SIZE_ALIGNMENT); + void *pv = NULL; + PMMUKHEAPSUB pSubHeapPrev = NULL; + PMMUKHEAPSUB pSubHeap = pHeap->pSubHeapHead; + while (pSubHeap) + { + if (fZero) + pv = RTHeapSimpleAllocZ(pSubHeap->hSimple, cb, MMUKHEAP_SIZE_ALIGNMENT); + else + pv = RTHeapSimpleAlloc(pSubHeap->hSimple, cb, MMUKHEAP_SIZE_ALIGNMENT); + if (pv) + { + /* Move the sub-heap with free memory to the head. */ + if (pSubHeapPrev) + { + pSubHeapPrev->pNext = pSubHeap->pNext; + pSubHeap->pNext = pHeap->pSubHeapHead; + pHeap->pSubHeapHead = pSubHeap; + } + break; + } + pSubHeapPrev = pSubHeap; + pSubHeap = pSubHeap->pNext; + } + if (RT_UNLIKELY(!pv)) + { + /* + * Add another sub-heap. + */ + pSubHeap = mmR3UkHeapAddSubHeap(pHeap, RT_MAX(RT_ALIGN_Z(cb, PAGE_SIZE) + PAGE_SIZE * 16, _256K)); + if (pSubHeap) + { + if (fZero) + pv = RTHeapSimpleAllocZ(pSubHeap->hSimple, cb, MMUKHEAP_SIZE_ALIGNMENT); + else + pv = RTHeapSimpleAlloc(pSubHeap->hSimple, cb, MMUKHEAP_SIZE_ALIGNMENT); + } + if (RT_UNLIKELY(!pv)) + { + AssertMsgFailed(("Failed to allocate heap block %d, enmTag=%x(%.4s).\n", cb, enmTag, &enmTag)); +#ifdef MMUKHEAP_WITH_STATISTICS + pStat->cFailures++; + pHeap->Stat.cFailures++; +#endif + RTCritSectLeave(&pHeap->Lock); + return NULL; + } + } + + /* + * Update statistics + */ +#ifdef MMUKHEAP_WITH_STATISTICS + size_t cbActual = RTHeapSimpleSize(pSubHeap->hSimple, pv); + pStat->cbAllocated += cbActual; + pStat->cbCurAllocated += cbActual; + pHeap->Stat.cbAllocated += cbActual; + pHeap->Stat.cbCurAllocated += cbActual; +#endif + + if (pR0Ptr) + *pR0Ptr = (uintptr_t)pv - (uintptr_t)pSubHeap->pv + pSubHeap->pvR0; + RTCritSectLeave(&pHeap->Lock); + return pv; +} + + +/** + * Releases memory allocated with MMR3UkHeapAlloc() and MMR3UkHeapAllocZ() + * + * @param pVM The cross context VM structure. + * @param pv Pointer to the memory block to free. + * @param enmTag The allocation accounting tag. + */ +VMMR3DECL(void) MMR3UkHeapFree(PVM pVM, void *pv, MMTAG enmTag) +{ + /* Ignore NULL pointers. */ + if (!pv) + return; + + PMMUKHEAP pHeap = pVM->pUVM->mm.s.pUkHeap; + RTCritSectEnter(&pHeap->Lock); + + /* + * Find the sub-heap and block + */ +#ifdef MMUKHEAP_WITH_STATISTICS + size_t cbActual = 0; +#endif + PMMUKHEAPSUB pSubHeap = pHeap->pSubHeapHead; + while (pSubHeap) + { + if ((uintptr_t)pv - (uintptr_t)pSubHeap->pv < pSubHeap->cb) + { +#ifdef MMUKHEAP_WITH_STATISTICS + cbActual = RTHeapSimpleSize(pSubHeap->hSimple, pv); + PMMUKHEAPSTAT pStat = (PMMUKHEAPSTAT)RTAvlULGet(&pHeap->pStatTree, (AVLULKEY)enmTag); + if (pStat) + { + pStat->cFrees++; + pStat->cbCurAllocated -= cbActual; + pStat->cbFreed += cbActual; + } + pHeap->Stat.cFrees++; + pHeap->Stat.cbFreed += cbActual; + pHeap->Stat.cbCurAllocated -= cbActual; +#else + RT_NOREF_PV(enmTag); +#endif + RTHeapSimpleFree(pSubHeap->hSimple, pv); + + RTCritSectLeave(&pHeap->Lock); + return; + } + } + AssertMsgFailed(("pv=%p\n", pv)); +} + diff --git a/src/VBox/VMM/VMMR3/Makefile.kup b/src/VBox/VMM/VMMR3/Makefile.kup new file mode 100644 index 00000000..e69de29b diff --git a/src/VBox/VMM/VMMR3/NEMR3.cpp b/src/VBox/VMM/VMMR3/NEMR3.cpp new file mode 100644 index 00000000..3006ec35 --- /dev/null +++ b/src/VBox/VMM/VMMR3/NEMR3.cpp @@ -0,0 +1,508 @@ +/* $Id: NEMR3.cpp $ */ +/** @file + * NEM - Native execution manager. + */ + +/* + * Copyright (C) 2018-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +/** @page pg_nem NEM - Native Execution Manager. + * + * This is an alternative execution manage to HM and raw-mode. On one host + * (Windows) we're forced to use this, on the others we just do it because we + * can. Since this is host specific in nature, information about an + * implementation is contained in the NEMR3Native-xxxx.cpp files. + * + * @ref pg_nem_win + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_NEM +#include +#include +#include "NEMInternal.h" +#include +#include +#include + +#include + + + +/** + * Basic init and configuration reading. + * + * Always call NEMR3Term after calling this. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) NEMR3InitConfig(PVM pVM) +{ + LogFlow(("NEMR3Init\n")); + + /* + * Assert alignment and sizes. + */ + AssertCompileMemberAlignment(VM, nem.s, 64); + AssertCompile(sizeof(pVM->nem.s) <= sizeof(pVM->nem.padding)); + + /* + * Initialize state info so NEMR3Term will always be happy. + * No returning prior to setting magics! + */ + pVM->nem.s.u32Magic = NEM_MAGIC; + for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++) + pVM->aCpus[iCpu].nem.s.u32Magic = NEMCPU_MAGIC; + + /* + * Read configuration. + */ + PCFGMNODE pCfgNem = CFGMR3GetChild(CFGMR3GetRoot(pVM), "NEM/"); + + /* + * Validate the NEM settings. + */ + int rc = CFGMR3ValidateConfig(pCfgNem, + "/NEM/", + "Enabled" + "|Allow64BitGuests" +#ifdef RT_OS_WINDOWS + "|UseRing0Runloop" +#endif + , + "" /* pszValidNodes */, "NEM" /* pszWho */, 0 /* uInstance */); + if (RT_FAILURE(rc)) + return rc; + + /** @cfgm{/NEM/NEMEnabled, bool, true} + * Whether NEM is enabled. */ + rc = CFGMR3QueryBoolDef(pCfgNem, "Enabled", &pVM->nem.s.fEnabled, true); + AssertLogRelRCReturn(rc, rc); + + +#ifdef VBOX_WITH_64_BITS_GUESTS + /** @cfgm{/NEM/Allow64BitGuests, bool, 32-bit:false, 64-bit:true} + * Enables AMD64 CPU features. + * On 32-bit hosts this isn't default and require host CPU support. 64-bit hosts + * already have the support. */ + rc = CFGMR3QueryBoolDef(pCfgNem, "Allow64BitGuests", &pVM->nem.s.fAllow64BitGuests, HC_ARCH_BITS == 64); + AssertLogRelRCReturn(rc, rc); +#else + pVM->nem.s.fAllow64BitGuests = false; +#endif + +#ifdef RT_OS_WINDOWS + /** @cfgm{/NEM/UseRing0Runloop, bool, true} + * Whether to use the ring-0 runloop (if enabled in the build) or the ring-3 one. + * The latter is generally slower. This option serves as a way out in case + * something breaks in the ring-0 loop. */ +# ifdef NEM_WIN_USE_RING0_RUNLOOP_BY_DEFAULT + bool fUseRing0Runloop = true; +# else + bool fUseRing0Runloop = false; +# endif + rc = CFGMR3QueryBoolDef(pCfgNem, "UseRing0Runloop", &fUseRing0Runloop, fUseRing0Runloop); + AssertLogRelRCReturn(rc, rc); + pVM->nem.s.fUseRing0Runloop = fUseRing0Runloop; +#endif + + return VINF_SUCCESS; +} + + +/** + * This is called by HMR3Init() when HM cannot be used. + * + * Sets VM::bMainExecutionEngine to VM_EXEC_ENGINE_NATIVE_API if we can use a + * native hypervisor API to execute the VM. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param fFallback Whether this is a fallback call. Cleared if the VM is + * configured to use NEM instead of HM. + * @param fForced Whether /HM/HMForced was set. If set and we fail to + * enable NEM, we'll return a failure status code. + * Otherwise we'll assume HMR3Init falls back on raw-mode. + */ +VMMR3_INT_DECL(int) NEMR3Init(PVM pVM, bool fFallback, bool fForced) +{ + Assert(pVM->bMainExecutionEngine != VM_EXEC_ENGINE_NATIVE_API); + int rc; + if (pVM->nem.s.fEnabled) + { +#ifdef VBOX_WITH_NATIVE_NEM + rc = nemR3NativeInit(pVM, fFallback, fForced); + ASMCompilerBarrier(); /* May have changed bMainExecutionEngine. */ +#else + RT_NOREF(fFallback); + rc = VINF_SUCCESS; +#endif + if (RT_SUCCESS(rc)) + { + if (pVM->bMainExecutionEngine == VM_EXEC_ENGINE_NATIVE_API) + LogRel(("NEM: NEMR3Init: Active.\n")); + else + { + LogRel(("NEM: NEMR3Init: Not available.\n")); + if (fForced) + rc = VERR_NEM_NOT_AVAILABLE; + } + } + else + LogRel(("NEM: NEMR3Init: Native init failed: %Rrc.\n", rc)); + } + else + { + LogRel(("NEM: NEMR3Init: Disabled.\n")); + rc = fForced ? VERR_NEM_NOT_ENABLED : VINF_SUCCESS; + } + return rc; +} + + +/** + * Perform initialization that depends on CPUM working. + * + * This is a noop if NEM wasn't activated by a previous NEMR3Init() call. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) NEMR3InitAfterCPUM(PVM pVM) +{ + int rc = VINF_SUCCESS; + if (pVM->bMainExecutionEngine == VM_EXEC_ENGINE_NATIVE_API) + { + /* + * Enable CPU features making general ASSUMPTIONS (there are two similar + * blocks of code in HM.cpp), to avoid duplicating this code. The + * native backend can make check capabilities and adjust as needed. + */ + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_SEP); + if (CPUMGetGuestCpuVendor(pVM) == CPUMCPUVENDOR_AMD) + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_SYSCALL); /* 64 bits only on Intel CPUs */ + if (pVM->nem.s.fAllow64BitGuests) + { + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_SYSCALL); + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_PAE); + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_LONG_MODE); + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_LAHF); + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_NX); + } + /* Turn on NXE if PAE has been enabled. */ + else if (CPUMR3GetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_PAE)) + CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_NX); + + /* + * Do native after-CPUM init. + */ +#ifdef VBOX_WITH_NATIVE_NEM + rc = nemR3NativeInitAfterCPUM(pVM); +#else + RT_NOREF(pVM); +#endif + } + return rc; +} + + +/** + * Called when a init phase has completed. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param enmWhat The phase that completed. + */ +VMMR3_INT_DECL(int) NEMR3InitCompleted(PVM pVM, VMINITCOMPLETED enmWhat) +{ + /* + * Check if GIM needs #UD, since that applies to everyone. + */ + if (enmWhat == VMINITCOMPLETED_RING3) + for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++) + pVM->aCpus[iCpu].nem.s.fGIMTrapXcptUD = GIMShouldTrapXcptUD(&pVM->aCpus[iCpu]); + + /* + * Call native code. + */ + int rc = VINF_SUCCESS; +#ifdef VBOX_WITH_NATIVE_NEM + if (pVM->bMainExecutionEngine == VM_EXEC_ENGINE_NATIVE_API) + rc = nemR3NativeInitCompleted(pVM, enmWhat); +#else + RT_NOREF(pVM, enmWhat); +#endif + return rc; +} + + +/** + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) NEMR3Term(PVM pVM) +{ + AssertReturn(pVM->nem.s.u32Magic == NEM_MAGIC, VERR_WRONG_ORDER); + for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++) + AssertReturn(pVM->aCpus[iCpu].nem.s.u32Magic == NEMCPU_MAGIC, VERR_WRONG_ORDER); + + /* Do native termination. */ + int rc = VINF_SUCCESS; +#ifdef VBOX_WITH_NATIVE_NEM + if (pVM->bMainExecutionEngine == VM_EXEC_ENGINE_NATIVE_API) + rc = nemR3NativeTerm(pVM); +#endif + + /* Mark it as terminated. */ + for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++) + pVM->aCpus[iCpu].nem.s.u32Magic = NEMCPU_MAGIC_DEAD; + pVM->nem.s.u32Magic = NEM_MAGIC_DEAD; + return rc; +} + +/** + * External interface for querying whether native execution API is used. + * + * @returns true if NEM is being used, otherwise false. + * @param pUVM The user mode VM handle. + * @sa HMR3IsEnabled + */ +VMMR3DECL(bool) NEMR3IsEnabled(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, false); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, false); + return VM_IS_NEM_ENABLED(pVM); +} + + +/** + * The VM is being reset. + * + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(void) NEMR3Reset(PVM pVM) +{ +#ifdef VBOX_WITH_NATIVE_NEM + if (pVM->bMainExecutionEngine == VM_EXEC_ENGINE_NATIVE_API) + nemR3NativeReset(pVM); +#else + RT_NOREF(pVM); +#endif +} + + +/** + * Resets a virtual CPU. + * + * Used to bring up secondary CPUs on SMP as well as CPU hot plugging. + * + * @param pVCpu The cross context virtual CPU structure to reset. + * @param fInitIpi Set if being reset due to INIT IPI. + */ +VMMR3_INT_DECL(void) NEMR3ResetCpu(PVMCPU pVCpu, bool fInitIpi) +{ +#ifdef VBOX_WITH_NATIVE_NEM + if (pVCpu->pVMR3->bMainExecutionEngine == VM_EXEC_ENGINE_NATIVE_API) + nemR3NativeResetCpu(pVCpu, fInitIpi); +#else + RT_NOREF(pVCpu, fInitIpi); +#endif +} + + +/** + * Indicates to TM that TMTSCMODE_NATIVE_API should be used for TSC. + * + * @returns true if TMTSCMODE_NATIVE_API must be used, otherwise @c false. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(bool) NEMR3NeedSpecialTscMode(PVM pVM) +{ +#ifdef VBOX_WITH_NATIVE_NEM +# ifdef RT_OS_WINDOWS + if (VM_IS_NEM_ENABLED(pVM)) + return true; +# endif +#else + RT_NOREF(pVM); +#endif + return false; +} + + +/** + * Gets the name of a generic NEM exit code. + * + * @returns Pointer to read only string if @a uExit is known, otherwise NULL. + * @param uExit The NEM exit to name. + */ +VMMR3DECL(const char *) NEMR3GetExitName(uint32_t uExit) +{ + switch ((NEMEXITTYPE)uExit) + { + case NEMEXITTYPE_UNRECOVERABLE_EXCEPTION: return "NEM unrecoverable exception"; + case NEMEXITTYPE_INVALID_VP_REGISTER_VALUE: return "NEM invalid vp register value"; + case NEMEXITTYPE_INTTERRUPT_WINDOW: return "NEM interrupt window"; + case NEMEXITTYPE_HALT: return "NEM halt"; + case NEMEXITTYPE_XCPT_UD: return "NEM #UD"; + case NEMEXITTYPE_XCPT_DB: return "NEM #DB"; + case NEMEXITTYPE_XCPT_BP: return "NEM #BP"; + case NEMEXITTYPE_CANCELED: return "NEM canceled"; + case NEMEXITTYPE_MEMORY_ACCESS: return "NEM memory access"; + } + + return NULL; +} + + +VMMR3_INT_DECL(VBOXSTRICTRC) NEMR3RunGC(PVM pVM, PVMCPU pVCpu) +{ + Assert(VM_IS_NEM_ENABLED(pVM)); +#ifdef VBOX_WITH_NATIVE_NEM + return nemR3NativeRunGC(pVM, pVCpu); +#else + NOREF(pVM); NOREF(pVCpu); + return VERR_INTERNAL_ERROR_3; +#endif +} + + +VMMR3_INT_DECL(bool) NEMR3CanExecuteGuest(PVM pVM, PVMCPU pVCpu) +{ + Assert(VM_IS_NEM_ENABLED(pVM)); +#ifdef VBOX_WITH_NATIVE_NEM + return nemR3NativeCanExecuteGuest(pVM, pVCpu); +#else + NOREF(pVM); NOREF(pVCpu); + return false; +#endif +} + + +VMMR3_INT_DECL(bool) NEMR3SetSingleInstruction(PVM pVM, PVMCPU pVCpu, bool fEnable) +{ + Assert(VM_IS_NEM_ENABLED(pVM)); +#ifdef VBOX_WITH_NATIVE_NEM + return nemR3NativeSetSingleInstruction(pVM, pVCpu, fEnable); +#else + NOREF(pVM); NOREF(pVCpu); NOREF(fEnable); + return false; +#endif +} + + +VMMR3_INT_DECL(void) NEMR3NotifyFF(PVM pVM, PVMCPU pVCpu, uint32_t fFlags) +{ + AssertLogRelReturnVoid(VM_IS_NEM_ENABLED(pVM)); +#ifdef VBOX_WITH_NATIVE_NEM + nemR3NativeNotifyFF(pVM, pVCpu, fFlags); +#else + RT_NOREF(pVM, pVCpu, fFlags); +#endif +} + + + + +VMMR3_INT_DECL(int) NEMR3NotifyPhysRamRegister(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb) +{ + int rc = VINF_SUCCESS; +#ifdef VBOX_WITH_NATIVE_NEM + if (pVM->bMainExecutionEngine == VM_EXEC_ENGINE_NATIVE_API) + rc = nemR3NativeNotifyPhysRamRegister(pVM, GCPhys, cb); +#else + NOREF(pVM); NOREF(GCPhys); NOREF(cb); +#endif + return rc; +} + + +VMMR3_INT_DECL(int) NEMR3NotifyPhysMmioExMap(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags, void *pvMmio2) +{ + int rc = VINF_SUCCESS; +#ifdef VBOX_WITH_NATIVE_NEM + if (pVM->bMainExecutionEngine == VM_EXEC_ENGINE_NATIVE_API) + rc = nemR3NativeNotifyPhysMmioExMap(pVM, GCPhys, cb, fFlags, pvMmio2); +#else + NOREF(pVM); NOREF(GCPhys); NOREF(cb); NOREF(fFlags); NOREF(pvMmio2); +#endif + return rc; +} + + +VMMR3_INT_DECL(int) NEMR3NotifyPhysMmioExUnmap(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags) +{ + int rc = VINF_SUCCESS; +#ifdef VBOX_WITH_NATIVE_NEM + if (pVM->bMainExecutionEngine == VM_EXEC_ENGINE_NATIVE_API) + rc = nemR3NativeNotifyPhysMmioExUnmap(pVM, GCPhys, cb, fFlags); +#else + NOREF(pVM); NOREF(GCPhys); NOREF(cb); NOREF(fFlags); +#endif + return rc; +} + + +VMMR3_INT_DECL(int) NEMR3NotifyPhysRomRegisterEarly(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags) +{ + int rc = VINF_SUCCESS; +#ifdef VBOX_WITH_NATIVE_NEM + if (pVM->bMainExecutionEngine == VM_EXEC_ENGINE_NATIVE_API) + rc = nemR3NativeNotifyPhysRomRegisterEarly(pVM, GCPhys, cb, fFlags); +#else + NOREF(pVM); NOREF(GCPhys); NOREF(cb); NOREF(fFlags); +#endif + return rc; +} + + +/** + * Called after the ROM range has been fully completed. + * + * This will be preceeded by a NEMR3NotifyPhysRomRegisterEarly() call as well a + * number of NEMHCNotifyPhysPageProtChanged calls. + * + * @returns VBox status code + * @param pVM The cross context VM structure. + * @param GCPhys The ROM address (page aligned). + * @param cb The size (page aligned). + * @param fFlags NEM_NOTIFY_PHYS_ROM_F_XXX. + */ +VMMR3_INT_DECL(int) NEMR3NotifyPhysRomRegisterLate(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags) +{ + int rc = VINF_SUCCESS; +#ifdef VBOX_WITH_NATIVE_NEM + if (pVM->bMainExecutionEngine == VM_EXEC_ENGINE_NATIVE_API) + rc = nemR3NativeNotifyPhysRomRegisterLate(pVM, GCPhys, cb, fFlags); +#else + NOREF(pVM); NOREF(GCPhys); NOREF(cb); NOREF(fFlags); +#endif + return rc; +} + + +VMMR3_INT_DECL(void) NEMR3NotifySetA20(PVMCPU pVCpu, bool fEnabled) +{ +#ifdef VBOX_WITH_NATIVE_NEM + if (pVCpu->pVMR3->bMainExecutionEngine == VM_EXEC_ENGINE_NATIVE_API) + nemR3NativeNotifySetA20(pVCpu, fEnabled); +#else + NOREF(pVCpu); NOREF(fEnabled); +#endif +} + diff --git a/src/VBox/VMM/VMMR3/NEMR3Native-win.cpp b/src/VBox/VMM/VMMR3/NEMR3Native-win.cpp new file mode 100644 index 00000000..18882c8a --- /dev/null +++ b/src/VBox/VMM/VMMR3/NEMR3Native-win.cpp @@ -0,0 +1,2780 @@ +/* $Id: NEMR3Native-win.cpp $ */ +/** @file + * NEM - Native execution manager, native ring-3 Windows backend. + * + * Log group 2: Exit logging. + * Log group 3: Log context on exit. + * Log group 5: Ring-3 memory management + * Log group 6: Ring-0 memory management + * Log group 12: API intercepts. + */ + +/* + * Copyright (C) 2018-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_NEM +#define VMCPU_INCL_CPUM_GST_CTX +#include +#include +#include +#include + +#ifndef _WIN32_WINNT_WIN10 +# error "Missing _WIN32_WINNT_WIN10" +#endif +#ifndef _WIN32_WINNT_WIN10_RS1 /* Missing define, causing trouble for us. */ +# define _WIN32_WINNT_WIN10_RS1 (_WIN32_WINNT_WIN10 + 1) +#endif +#include +#include +#include +#include +#include /* no api header for this. */ + +#include +#include +#include +#include +#include +#include +#include "NEMInternal.h" +#include + +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +#ifdef LOG_ENABLED +# define NEM_WIN_INTERCEPT_NT_IO_CTLS +#endif + +/** VID I/O control detection: Fake partition handle input. */ +#define NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE ((HANDLE)(uintptr_t)38479125) +/** VID I/O control detection: Fake partition ID return. */ +#define NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_ID UINT64_C(0xfa1e000042424242) +/** VID I/O control detection: Fake CPU index input. */ +#define NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX UINT32_C(42) +/** VID I/O control detection: Fake timeout input. */ +#define NEM_WIN_IOCTL_DETECTOR_FAKE_TIMEOUT UINT32_C(0x00080286) + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +/** @name APIs imported from WinHvPlatform.dll + * @{ */ +static decltype(WHvGetCapability) * g_pfnWHvGetCapability; +static decltype(WHvCreatePartition) * g_pfnWHvCreatePartition; +static decltype(WHvSetupPartition) * g_pfnWHvSetupPartition; +static decltype(WHvDeletePartition) * g_pfnWHvDeletePartition; +static decltype(WHvGetPartitionProperty) * g_pfnWHvGetPartitionProperty; +static decltype(WHvSetPartitionProperty) * g_pfnWHvSetPartitionProperty; +static decltype(WHvMapGpaRange) * g_pfnWHvMapGpaRange; +static decltype(WHvUnmapGpaRange) * g_pfnWHvUnmapGpaRange; +static decltype(WHvTranslateGva) * g_pfnWHvTranslateGva; +#ifndef NEM_WIN_USE_OUR_OWN_RUN_API +static decltype(WHvCreateVirtualProcessor) * g_pfnWHvCreateVirtualProcessor; +static decltype(WHvDeleteVirtualProcessor) * g_pfnWHvDeleteVirtualProcessor; +static decltype(WHvRunVirtualProcessor) * g_pfnWHvRunVirtualProcessor; +static decltype(WHvCancelRunVirtualProcessor) * g_pfnWHvCancelRunVirtualProcessor; +static decltype(WHvGetVirtualProcessorRegisters) * g_pfnWHvGetVirtualProcessorRegisters; +static decltype(WHvSetVirtualProcessorRegisters) * g_pfnWHvSetVirtualProcessorRegisters; +#endif +/** @} */ + +/** @name APIs imported from Vid.dll + * @{ */ +static decltype(VidGetHvPartitionId) *g_pfnVidGetHvPartitionId; +static decltype(VidStartVirtualProcessor) *g_pfnVidStartVirtualProcessor; +static decltype(VidStopVirtualProcessor) *g_pfnVidStopVirtualProcessor; +static decltype(VidMessageSlotMap) *g_pfnVidMessageSlotMap; +static decltype(VidMessageSlotHandleAndGetNext) *g_pfnVidMessageSlotHandleAndGetNext; +#ifdef LOG_ENABLED +static decltype(VidGetVirtualProcessorState) *g_pfnVidGetVirtualProcessorState; +static decltype(VidSetVirtualProcessorState) *g_pfnVidSetVirtualProcessorState; +static decltype(VidGetVirtualProcessorRunningStatus) *g_pfnVidGetVirtualProcessorRunningStatus; +#endif +/** @} */ + +/** The Windows build number. */ +static uint32_t g_uBuildNo = 17134; + + + +/** + * Import instructions. + */ +static const struct +{ + uint8_t idxDll; /**< 0 for WinHvPlatform.dll, 1 for vid.dll. */ + bool fOptional; /**< Set if import is optional. */ + PFNRT *ppfn; /**< The function pointer variable. */ + const char *pszName; /**< The function name. */ +} g_aImports[] = +{ +#define NEM_WIN_IMPORT(a_idxDll, a_fOptional, a_Name) { (a_idxDll), (a_fOptional), (PFNRT *)&RT_CONCAT(g_pfn,a_Name), #a_Name } + NEM_WIN_IMPORT(0, false, WHvGetCapability), + NEM_WIN_IMPORT(0, false, WHvCreatePartition), + NEM_WIN_IMPORT(0, false, WHvSetupPartition), + NEM_WIN_IMPORT(0, false, WHvDeletePartition), + NEM_WIN_IMPORT(0, false, WHvGetPartitionProperty), + NEM_WIN_IMPORT(0, false, WHvSetPartitionProperty), + NEM_WIN_IMPORT(0, false, WHvMapGpaRange), + NEM_WIN_IMPORT(0, false, WHvUnmapGpaRange), + NEM_WIN_IMPORT(0, false, WHvTranslateGva), +#ifndef NEM_WIN_USE_OUR_OWN_RUN_API + NEM_WIN_IMPORT(0, false, WHvCreateVirtualProcessor), + NEM_WIN_IMPORT(0, false, WHvDeleteVirtualProcessor), + NEM_WIN_IMPORT(0, false, WHvRunVirtualProcessor), + NEM_WIN_IMPORT(0, false, WHvCancelRunVirtualProcessor), + NEM_WIN_IMPORT(0, false, WHvGetVirtualProcessorRegisters), + NEM_WIN_IMPORT(0, false, WHvSetVirtualProcessorRegisters), +#endif + NEM_WIN_IMPORT(1, false, VidGetHvPartitionId), + NEM_WIN_IMPORT(1, false, VidMessageSlotMap), + NEM_WIN_IMPORT(1, false, VidMessageSlotHandleAndGetNext), + NEM_WIN_IMPORT(1, false, VidStartVirtualProcessor), + NEM_WIN_IMPORT(1, false, VidStopVirtualProcessor), +#ifdef LOG_ENABLED + NEM_WIN_IMPORT(1, false, VidGetVirtualProcessorState), + NEM_WIN_IMPORT(1, false, VidSetVirtualProcessorState), + NEM_WIN_IMPORT(1, false, VidGetVirtualProcessorRunningStatus), +#endif +#undef NEM_WIN_IMPORT +}; + + +/** The real NtDeviceIoControlFile API in NTDLL. */ +static decltype(NtDeviceIoControlFile) *g_pfnNtDeviceIoControlFile; +/** Pointer to the NtDeviceIoControlFile import table entry. */ +static decltype(NtDeviceIoControlFile) **g_ppfnVidNtDeviceIoControlFile; +/** Info about the VidGetHvPartitionId I/O control interface. */ +static NEMWINIOCTL g_IoCtlGetHvPartitionId; +/** Info about the VidStartVirtualProcessor I/O control interface. */ +static NEMWINIOCTL g_IoCtlStartVirtualProcessor; +/** Info about the VidStopVirtualProcessor I/O control interface. */ +static NEMWINIOCTL g_IoCtlStopVirtualProcessor; +/** Info about the VidMessageSlotHandleAndGetNext I/O control interface. */ +static NEMWINIOCTL g_IoCtlMessageSlotHandleAndGetNext; +#ifdef LOG_ENABLED +/** Info about the VidMessageSlotMap I/O control interface - for logging. */ +static NEMWINIOCTL g_IoCtlMessageSlotMap; +/* Info about the VidGetVirtualProcessorState I/O control interface - for logging. */ +static NEMWINIOCTL g_IoCtlGetVirtualProcessorState; +/* Info about the VidSetVirtualProcessorState I/O control interface - for logging. */ +static NEMWINIOCTL g_IoCtlSetVirtualProcessorState; +/** Pointer to what nemR3WinIoctlDetector_ForLogging should fill in. */ +static NEMWINIOCTL *g_pIoCtlDetectForLogging; +#endif + +#ifdef NEM_WIN_INTERCEPT_NT_IO_CTLS +/** Mapping slot for CPU #0. + * @{ */ +static VID_MESSAGE_MAPPING_HEADER *g_pMsgSlotMapping = NULL; +static const HV_MESSAGE_HEADER *g_pHvMsgHdr; +static const HV_X64_INTERCEPT_MESSAGE_HEADER *g_pX64MsgHdr; +/** @} */ +#endif + + +/* + * Let the preprocessor alias the APIs to import variables for better autocompletion. + */ +#ifndef IN_SLICKEDIT +# define WHvGetCapability g_pfnWHvGetCapability +# define WHvCreatePartition g_pfnWHvCreatePartition +# define WHvSetupPartition g_pfnWHvSetupPartition +# define WHvDeletePartition g_pfnWHvDeletePartition +# define WHvGetPartitionProperty g_pfnWHvGetPartitionProperty +# define WHvSetPartitionProperty g_pfnWHvSetPartitionProperty +# define WHvMapGpaRange g_pfnWHvMapGpaRange +# define WHvUnmapGpaRange g_pfnWHvUnmapGpaRange +# define WHvTranslateGva g_pfnWHvTranslateGva +# define WHvCreateVirtualProcessor g_pfnWHvCreateVirtualProcessor +# define WHvDeleteVirtualProcessor g_pfnWHvDeleteVirtualProcessor +# define WHvRunVirtualProcessor g_pfnWHvRunVirtualProcessor +# define WHvGetRunExitContextSize g_pfnWHvGetRunExitContextSize +# define WHvCancelRunVirtualProcessor g_pfnWHvCancelRunVirtualProcessor +# define WHvGetVirtualProcessorRegisters g_pfnWHvGetVirtualProcessorRegisters +# define WHvSetVirtualProcessorRegisters g_pfnWHvSetVirtualProcessorRegisters + +# define VidMessageSlotHandleAndGetNext g_pfnVidMessageSlotHandleAndGetNext +# define VidStartVirtualProcessor g_pfnVidStartVirtualProcessor +# define VidStopVirtualProcessor g_pfnVidStopVirtualProcessor + +#endif + +/** WHV_MEMORY_ACCESS_TYPE names */ +static const char * const g_apszWHvMemAccesstypes[4] = { "read", "write", "exec", "!undefined!" }; + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ + +/* + * Instantate the code we share with ring-0. + */ +#ifdef NEM_WIN_USE_OUR_OWN_RUN_API +# define NEM_WIN_TEMPLATE_MODE_OWN_RUN_API +#else +# undef NEM_WIN_TEMPLATE_MODE_OWN_RUN_API +#endif +#include "../VMMAll/NEMAllNativeTemplate-win.cpp.h" + + + +#ifdef NEM_WIN_INTERCEPT_NT_IO_CTLS +/** + * Wrapper that logs the call from VID.DLL. + * + * This is very handy for figuring out why an API call fails. + */ +static NTSTATUS WINAPI +nemR3WinLogWrapper_NtDeviceIoControlFile(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx, + PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput, + PVOID pvOutput, ULONG cbOutput) +{ + + char szFunction[32]; + const char *pszFunction; + if (uFunction == g_IoCtlMessageSlotHandleAndGetNext.uFunction) + pszFunction = "VidMessageSlotHandleAndGetNext"; + else if (uFunction == g_IoCtlStartVirtualProcessor.uFunction) + pszFunction = "VidStartVirtualProcessor"; + else if (uFunction == g_IoCtlStopVirtualProcessor.uFunction) + pszFunction = "VidStopVirtualProcessor"; + else if (uFunction == g_IoCtlMessageSlotMap.uFunction) + pszFunction = "VidMessageSlotMap"; + else if (uFunction == g_IoCtlGetVirtualProcessorState.uFunction) + pszFunction = "VidGetVirtualProcessorState"; + else if (uFunction == g_IoCtlSetVirtualProcessorState.uFunction) + pszFunction = "VidSetVirtualProcessorState"; + else + { + RTStrPrintf(szFunction, sizeof(szFunction), "%#x", uFunction); + pszFunction = szFunction; + } + + if (cbInput > 0 && pvInput) + Log12(("VID!NtDeviceIoControlFile: %s/input: %.*Rhxs\n", pszFunction, RT_MIN(cbInput, 32), pvInput)); + NTSTATUS rcNt = g_pfnNtDeviceIoControlFile(hFile, hEvt, pfnApcCallback, pvApcCtx, pIos, uFunction, + pvInput, cbInput, pvOutput, cbOutput); + if (!hEvt && !pfnApcCallback && !pvApcCtx) + Log12(("VID!NtDeviceIoControlFile: hFile=%#zx pIos=%p->{s:%#x, i:%#zx} uFunction=%s Input=%p LB %#x Output=%p LB %#x) -> %#x; Caller=%p\n", + hFile, pIos, pIos->Status, pIos->Information, pszFunction, pvInput, cbInput, pvOutput, cbOutput, rcNt, ASMReturnAddress())); + else + Log12(("VID!NtDeviceIoControlFile: hFile=%#zx hEvt=%#zx Apc=%p/%p pIos=%p->{s:%#x, i:%#zx} uFunction=%s Input=%p LB %#x Output=%p LB %#x) -> %#x; Caller=%p\n", + hFile, hEvt, pfnApcCallback, pvApcCtx, pIos, pIos->Status, pIos->Information, pszFunction, + pvInput, cbInput, pvOutput, cbOutput, rcNt, ASMReturnAddress())); + if (cbOutput > 0 && pvOutput) + { + Log12(("VID!NtDeviceIoControlFile: %s/output: %.*Rhxs\n", pszFunction, RT_MIN(cbOutput, 32), pvOutput)); + if (uFunction == 0x2210cc && g_pMsgSlotMapping == NULL && cbOutput >= sizeof(void *)) + { + g_pMsgSlotMapping = *(VID_MESSAGE_MAPPING_HEADER **)pvOutput; + g_pHvMsgHdr = (const HV_MESSAGE_HEADER *)(g_pMsgSlotMapping + 1); + g_pX64MsgHdr = (const HV_X64_INTERCEPT_MESSAGE_HEADER *)(g_pHvMsgHdr + 1); + Log12(("VID!NtDeviceIoControlFile: Message slot mapping: %p\n", g_pMsgSlotMapping)); + } + } + if ( g_pMsgSlotMapping + && ( uFunction == g_IoCtlMessageSlotHandleAndGetNext.uFunction + || uFunction == g_IoCtlStopVirtualProcessor.uFunction + || uFunction == g_IoCtlMessageSlotMap.uFunction + )) + Log12(("VID!NtDeviceIoControlFile: enmVidMsgType=%#x cb=%#x msg=%#x payload=%u cs:rip=%04x:%08RX64 (%s)\n", + g_pMsgSlotMapping->enmVidMsgType, g_pMsgSlotMapping->cbMessage, + g_pHvMsgHdr->MessageType, g_pHvMsgHdr->PayloadSize, + g_pX64MsgHdr->CsSegment.Selector, g_pX64MsgHdr->Rip, pszFunction)); + + return rcNt; +} +#endif /* NEM_WIN_INTERCEPT_NT_IO_CTLS */ + + +/** + * Patches the call table of VID.DLL so we can intercept NtDeviceIoControlFile. + * + * This is for used to figure out the I/O control codes and in logging builds + * for logging API calls that WinHvPlatform.dll does. + * + * @returns VBox status code. + * @param hLdrModVid The VID module handle. + * @param pErrInfo Where to return additional error information. + */ +static int nemR3WinInitVidIntercepts(RTLDRMOD hLdrModVid, PRTERRINFO pErrInfo) +{ + /* + * Locate the real API. + */ + g_pfnNtDeviceIoControlFile = (decltype(NtDeviceIoControlFile) *)RTLdrGetSystemSymbol("NTDLL.DLL", "NtDeviceIoControlFile"); + AssertReturn(g_pfnNtDeviceIoControlFile != NULL, + RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Failed to resolve NtDeviceIoControlFile from NTDLL.DLL")); + + /* + * Locate the PE header and get what we need from it. + */ + uint8_t const *pbImage = (uint8_t const *)RTLdrGetNativeHandle(hLdrModVid); + IMAGE_DOS_HEADER const *pMzHdr = (IMAGE_DOS_HEADER const *)pbImage; + AssertReturn(pMzHdr->e_magic == IMAGE_DOS_SIGNATURE, + RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL mapping doesn't start with MZ signature: %#x", pMzHdr->e_magic)); + IMAGE_NT_HEADERS const *pNtHdrs = (IMAGE_NT_HEADERS const *)&pbImage[pMzHdr->e_lfanew]; + AssertReturn(pNtHdrs->Signature == IMAGE_NT_SIGNATURE, + RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL has invalid PE signaturre: %#x @%#x", + pNtHdrs->Signature, pMzHdr->e_lfanew)); + + uint32_t const cbImage = pNtHdrs->OptionalHeader.SizeOfImage; + IMAGE_DATA_DIRECTORY const ImportDir = pNtHdrs->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT]; + + /* + * Walk the import descriptor table looking for NTDLL.DLL. + */ + AssertReturn( ImportDir.Size > 0 + && ImportDir.Size < cbImage, + RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad import directory size: %#x", ImportDir.Size)); + AssertReturn( ImportDir.VirtualAddress > 0 + && ImportDir.VirtualAddress <= cbImage - ImportDir.Size, + RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad import directory RVA: %#x", ImportDir.VirtualAddress)); + + for (PIMAGE_IMPORT_DESCRIPTOR pImps = (PIMAGE_IMPORT_DESCRIPTOR)&pbImage[ImportDir.VirtualAddress]; + pImps->Name != 0 && pImps->FirstThunk != 0; + pImps++) + { + AssertReturn(pImps->Name < cbImage, + RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad import directory entry name: %#x", pImps->Name)); + const char *pszModName = (const char *)&pbImage[pImps->Name]; + if (RTStrICmpAscii(pszModName, "ntdll.dll")) + continue; + AssertReturn(pImps->FirstThunk < cbImage, + RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad FirstThunk: %#x", pImps->FirstThunk)); + AssertReturn(pImps->OriginalFirstThunk < cbImage, + RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad FirstThunk: %#x", pImps->FirstThunk)); + + /* + * Walk the thunks table(s) looking for NtDeviceIoControlFile. + */ + PIMAGE_THUNK_DATA pFirstThunk = (PIMAGE_THUNK_DATA)&pbImage[pImps->FirstThunk]; /* update this. */ + PIMAGE_THUNK_DATA pThunk = pImps->OriginalFirstThunk == 0 /* read from this. */ + ? (PIMAGE_THUNK_DATA)&pbImage[pImps->FirstThunk] + : (PIMAGE_THUNK_DATA)&pbImage[pImps->OriginalFirstThunk]; + while (pThunk->u1.Ordinal != 0) + { + if (!(pThunk->u1.Ordinal & IMAGE_ORDINAL_FLAG32)) + { + AssertReturn(pThunk->u1.Ordinal > 0 && pThunk->u1.Ordinal < cbImage, + RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad FirstThunk: %#x", pImps->FirstThunk)); + + const char *pszSymbol = (const char *)&pbImage[(uintptr_t)pThunk->u1.AddressOfData + 2]; + if (strcmp(pszSymbol, "NtDeviceIoControlFile") == 0) + { + DWORD fOldProt = PAGE_READONLY; + VirtualProtect(&pFirstThunk->u1.Function, sizeof(uintptr_t), PAGE_EXECUTE_READWRITE, &fOldProt); + g_ppfnVidNtDeviceIoControlFile = (decltype(NtDeviceIoControlFile) **)&pFirstThunk->u1.Function; + /* Don't restore the protection here, so we modify the NtDeviceIoControlFile pointer later. */ + } + } + + pThunk++; + pFirstThunk++; + } + } + + if (*g_ppfnVidNtDeviceIoControlFile) + { +#ifdef NEM_WIN_INTERCEPT_NT_IO_CTLS + *g_ppfnVidNtDeviceIoControlFile = nemR3WinLogWrapper_NtDeviceIoControlFile; +#endif + return VINF_SUCCESS; + } + return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Failed to patch NtDeviceIoControlFile import in VID.DLL!"); +} + + +/** + * Worker for nemR3NativeInit that probes and load the native API. + * + * @returns VBox status code. + * @param fForced Whether the HMForced flag is set and we should + * fail if we cannot initialize. + * @param pErrInfo Where to always return error info. + */ +static int nemR3WinInitProbeAndLoad(bool fForced, PRTERRINFO pErrInfo) +{ + /* + * Check that the DLL files we need are present, but without loading them. + * We'd like to avoid loading them unnecessarily. + */ + WCHAR wszPath[MAX_PATH + 64]; + UINT cwcPath = GetSystemDirectoryW(wszPath, MAX_PATH); + if (cwcPath >= MAX_PATH || cwcPath < 2) + return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "GetSystemDirectoryW failed (%#x / %u)", cwcPath, GetLastError()); + + if (wszPath[cwcPath - 1] != '\\' || wszPath[cwcPath - 1] != '/') + wszPath[cwcPath++] = '\\'; + RTUtf16CopyAscii(&wszPath[cwcPath], RT_ELEMENTS(wszPath) - cwcPath, "WinHvPlatform.dll"); + if (GetFileAttributesW(wszPath) == INVALID_FILE_ATTRIBUTES) + return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE, "The native API dll was not found (%ls)", wszPath); + + /* + * Check that we're in a VM and that the hypervisor identifies itself as Hyper-V. + */ + if (!ASMHasCpuId()) + return RTErrInfoSet(pErrInfo, VERR_NEM_NOT_AVAILABLE, "No CPUID support"); + if (!ASMIsValidStdRange(ASMCpuId_EAX(0))) + return RTErrInfoSet(pErrInfo, VERR_NEM_NOT_AVAILABLE, "No CPUID leaf #1"); + if (!(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_HVP)) + return RTErrInfoSet(pErrInfo, VERR_NEM_NOT_AVAILABLE, "Not in a hypervisor partition (HVP=0)"); + + uint32_t cMaxHyperLeaf = 0; + uint32_t uEbx = 0; + uint32_t uEcx = 0; + uint32_t uEdx = 0; + ASMCpuIdExSlow(0x40000000, 0, 0, 0, &cMaxHyperLeaf, &uEbx, &uEcx, &uEdx); + if (!ASMIsValidHypervisorRange(cMaxHyperLeaf)) + return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE, "Invalid hypervisor CPUID range (%#x %#x %#x %#x)", + cMaxHyperLeaf, uEbx, uEcx, uEdx); + if ( uEbx != UINT32_C(0x7263694d) /* Micr */ + || uEcx != UINT32_C(0x666f736f) /* osof */ + || uEdx != UINT32_C(0x76482074) /* t Hv */) + return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE, + "Not Hyper-V CPUID signature: %#x %#x %#x (expected %#x %#x %#x)", + uEbx, uEcx, uEdx, UINT32_C(0x7263694d), UINT32_C(0x666f736f), UINT32_C(0x76482074)); + if (cMaxHyperLeaf < UINT32_C(0x40000005)) + return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE, "Too narrow hypervisor CPUID range (%#x)", cMaxHyperLeaf); + + /** @todo would be great if we could recognize a root partition from the + * CPUID info, but I currently don't dare do that. */ + + /* + * Now try load the DLLs and resolve the APIs. + */ + static const char * const s_apszDllNames[2] = { "WinHvPlatform.dll", "vid.dll" }; + RTLDRMOD ahMods[2] = { NIL_RTLDRMOD, NIL_RTLDRMOD }; + int rc = VINF_SUCCESS; + for (unsigned i = 0; i < RT_ELEMENTS(s_apszDllNames); i++) + { + int rc2 = RTLdrLoadSystem(s_apszDllNames[i], true /*fNoUnload*/, &ahMods[i]); + if (RT_FAILURE(rc2)) + { + if (!RTErrInfoIsSet(pErrInfo)) + RTErrInfoSetF(pErrInfo, rc2, "Failed to load API DLL: %s: %Rrc", s_apszDllNames[i], rc2); + else + RTErrInfoAddF(pErrInfo, rc2, "; %s: %Rrc", s_apszDllNames[i], rc2); + ahMods[i] = NIL_RTLDRMOD; + rc = VERR_NEM_INIT_FAILED; + } + } + if (RT_SUCCESS(rc)) + rc = nemR3WinInitVidIntercepts(ahMods[1], pErrInfo); + if (RT_SUCCESS(rc)) + { + for (unsigned i = 0; i < RT_ELEMENTS(g_aImports); i++) + { + int rc2 = RTLdrGetSymbol(ahMods[g_aImports[i].idxDll], g_aImports[i].pszName, (void **)g_aImports[i].ppfn); + if (RT_FAILURE(rc2)) + { + *g_aImports[i].ppfn = NULL; + + LogRel(("NEM: %s: Failed to import %s!%s: %Rrc", + g_aImports[i].fOptional ? "info" : fForced ? "fatal" : "error", + s_apszDllNames[g_aImports[i].idxDll], g_aImports[i].pszName, rc2)); + if (!g_aImports[i].fOptional) + { + if (RTErrInfoIsSet(pErrInfo)) + RTErrInfoAddF(pErrInfo, rc2, ", %s!%s", + s_apszDllNames[g_aImports[i].idxDll], g_aImports[i].pszName); + else + rc = RTErrInfoSetF(pErrInfo, rc2, "Failed to import: %s!%s", + s_apszDllNames[g_aImports[i].idxDll], g_aImports[i].pszName); + Assert(RT_FAILURE(rc)); + } + } + } + if (RT_SUCCESS(rc)) + { + Assert(!RTErrInfoIsSet(pErrInfo)); + } + } + + for (unsigned i = 0; i < RT_ELEMENTS(ahMods); i++) + RTLdrClose(ahMods[i]); + return rc; +} + + +/** + * Wrapper for different WHvGetCapability signatures. + */ +DECLINLINE(HRESULT) WHvGetCapabilityWrapper(WHV_CAPABILITY_CODE enmCap, WHV_CAPABILITY *pOutput, uint32_t cbOutput) +{ + return g_pfnWHvGetCapability(enmCap, pOutput, cbOutput, NULL); +} + + +/** + * Worker for nemR3NativeInit that gets the hypervisor capabilities. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pErrInfo Where to always return error info. + */ +static int nemR3WinInitCheckCapabilities(PVM pVM, PRTERRINFO pErrInfo) +{ +#define NEM_LOG_REL_CAP_EX(a_szField, a_szFmt, a_Value) LogRel(("NEM: %-38s= " a_szFmt "\n", a_szField, a_Value)) +#define NEM_LOG_REL_CAP_SUB_EX(a_szField, a_szFmt, a_Value) LogRel(("NEM: %36s: " a_szFmt "\n", a_szField, a_Value)) +#define NEM_LOG_REL_CAP_SUB(a_szField, a_Value) NEM_LOG_REL_CAP_SUB_EX(a_szField, "%d", a_Value) + + /* + * Is the hypervisor present with the desired capability? + * + * In build 17083 this translates into: + * - CPUID[0x00000001].HVP is set + * - CPUID[0x40000000] == "Microsoft Hv" + * - CPUID[0x40000001].eax == "Hv#1" + * - CPUID[0x40000003].ebx[12] is set. + * - VidGetExoPartitionProperty(INVALID_HANDLE_VALUE, 0x60000, &Ignored) returns + * a non-zero value. + */ + /** + * @todo Someone at Microsoft please explain weird API design: + * 1. Pointless CapabilityCode duplication int the output; + * 2. No output size. + */ + WHV_CAPABILITY Caps; + RT_ZERO(Caps); + SetLastError(0); + HRESULT hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeHypervisorPresent, &Caps, sizeof(Caps)); + DWORD rcWin = GetLastError(); + if (FAILED(hrc)) + return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, + "WHvGetCapability/WHvCapabilityCodeHypervisorPresent failed: %Rhrc (Last=%#x/%u)", + hrc, RTNtLastStatusValue(), RTNtLastErrorValue()); + if (!Caps.HypervisorPresent) + { + if (!RTPathExists(RTPATH_NT_PASSTHRU_PREFIX "Device\\VidExo")) + return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE, + "WHvCapabilityCodeHypervisorPresent is FALSE! Make sure you have enabled the 'Windows Hypervisor Platform' feature."); + return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE, "WHvCapabilityCodeHypervisorPresent is FALSE! (%u)", rcWin); + } + LogRel(("NEM: WHvCapabilityCodeHypervisorPresent is TRUE, so this might work...\n")); + + + /* + * Check what extended VM exits are supported. + */ + RT_ZERO(Caps); + hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeExtendedVmExits, &Caps, sizeof(Caps)); + if (FAILED(hrc)) + return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, + "WHvGetCapability/WHvCapabilityCodeExtendedVmExits failed: %Rhrc (Last=%#x/%u)", + hrc, RTNtLastStatusValue(), RTNtLastErrorValue()); + NEM_LOG_REL_CAP_EX("WHvCapabilityCodeExtendedVmExits", "%'#018RX64", Caps.ExtendedVmExits.AsUINT64); + pVM->nem.s.fExtendedMsrExit = RT_BOOL(Caps.ExtendedVmExits.X64MsrExit); + pVM->nem.s.fExtendedCpuIdExit = RT_BOOL(Caps.ExtendedVmExits.X64CpuidExit); + pVM->nem.s.fExtendedXcptExit = RT_BOOL(Caps.ExtendedVmExits.ExceptionExit); + NEM_LOG_REL_CAP_SUB("fExtendedMsrExit", pVM->nem.s.fExtendedMsrExit); + NEM_LOG_REL_CAP_SUB("fExtendedCpuIdExit", pVM->nem.s.fExtendedCpuIdExit); + NEM_LOG_REL_CAP_SUB("fExtendedXcptExit", pVM->nem.s.fExtendedXcptExit); + if (Caps.ExtendedVmExits.AsUINT64 & ~(uint64_t)7) + LogRel(("NEM: Warning! Unknown VM exit definitions: %#RX64\n", Caps.ExtendedVmExits.AsUINT64)); + /** @todo RECHECK: WHV_EXTENDED_VM_EXITS typedef. */ + + /* + * Check features in case they end up defining any. + */ + RT_ZERO(Caps); + hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeFeatures, &Caps, sizeof(Caps)); + if (FAILED(hrc)) + return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, + "WHvGetCapability/WHvCapabilityCodeFeatures failed: %Rhrc (Last=%#x/%u)", + hrc, RTNtLastStatusValue(), RTNtLastErrorValue()); + if (Caps.Features.AsUINT64 & ~(uint64_t)0) + LogRel(("NEM: Warning! Unknown feature definitions: %#RX64\n", Caps.Features.AsUINT64)); + /** @todo RECHECK: WHV_CAPABILITY_FEATURES typedef. */ + + /* + * Check supported exception exit bitmap bits. + * We don't currently require this, so we just log failure. + */ + RT_ZERO(Caps); + hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeExceptionExitBitmap, &Caps, sizeof(Caps)); + if (SUCCEEDED(hrc)) + LogRel(("NEM: Supported exception exit bitmap: %#RX64\n", Caps.ExceptionExitBitmap)); + else + LogRel(("NEM: Warning! WHvGetCapability/WHvCapabilityCodeExceptionExitBitmap failed: %Rhrc (Last=%#x/%u)", + hrc, RTNtLastStatusValue(), RTNtLastErrorValue())); + + /* + * Check that the CPU vendor is supported. + */ + RT_ZERO(Caps); + hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeProcessorVendor, &Caps, sizeof(Caps)); + if (FAILED(hrc)) + return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, + "WHvGetCapability/WHvCapabilityCodeProcessorVendor failed: %Rhrc (Last=%#x/%u)", + hrc, RTNtLastStatusValue(), RTNtLastErrorValue()); + switch (Caps.ProcessorVendor) + { + /** @todo RECHECK: WHV_PROCESSOR_VENDOR typedef. */ + case WHvProcessorVendorIntel: + NEM_LOG_REL_CAP_EX("WHvCapabilityCodeProcessorVendor", "%d - Intel", Caps.ProcessorVendor); + pVM->nem.s.enmCpuVendor = CPUMCPUVENDOR_INTEL; + break; + case WHvProcessorVendorAmd: + NEM_LOG_REL_CAP_EX("WHvCapabilityCodeProcessorVendor", "%d - AMD", Caps.ProcessorVendor); + pVM->nem.s.enmCpuVendor = CPUMCPUVENDOR_AMD; + break; + default: + NEM_LOG_REL_CAP_EX("WHvCapabilityCodeProcessorVendor", "%d", Caps.ProcessorVendor); + return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Unknown processor vendor: %d", Caps.ProcessorVendor); + } + + /* + * CPU features, guessing these are virtual CPU features? + */ + RT_ZERO(Caps); + hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeProcessorFeatures, &Caps, sizeof(Caps)); + if (FAILED(hrc)) + return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, + "WHvGetCapability/WHvCapabilityCodeProcessorFeatures failed: %Rhrc (Last=%#x/%u)", + hrc, RTNtLastStatusValue(), RTNtLastErrorValue()); + NEM_LOG_REL_CAP_EX("WHvCapabilityCodeProcessorFeatures", "%'#018RX64", Caps.ProcessorFeatures.AsUINT64); +#define NEM_LOG_REL_CPU_FEATURE(a_Field) NEM_LOG_REL_CAP_SUB(#a_Field, Caps.ProcessorFeatures.a_Field) + NEM_LOG_REL_CPU_FEATURE(Sse3Support); + NEM_LOG_REL_CPU_FEATURE(LahfSahfSupport); + NEM_LOG_REL_CPU_FEATURE(Ssse3Support); + NEM_LOG_REL_CPU_FEATURE(Sse4_1Support); + NEM_LOG_REL_CPU_FEATURE(Sse4_2Support); + NEM_LOG_REL_CPU_FEATURE(Sse4aSupport); + NEM_LOG_REL_CPU_FEATURE(XopSupport); + NEM_LOG_REL_CPU_FEATURE(PopCntSupport); + NEM_LOG_REL_CPU_FEATURE(Cmpxchg16bSupport); + NEM_LOG_REL_CPU_FEATURE(Altmovcr8Support); + NEM_LOG_REL_CPU_FEATURE(LzcntSupport); + NEM_LOG_REL_CPU_FEATURE(MisAlignSseSupport); + NEM_LOG_REL_CPU_FEATURE(MmxExtSupport); + NEM_LOG_REL_CPU_FEATURE(Amd3DNowSupport); + NEM_LOG_REL_CPU_FEATURE(ExtendedAmd3DNowSupport); + NEM_LOG_REL_CPU_FEATURE(Page1GbSupport); + NEM_LOG_REL_CPU_FEATURE(AesSupport); + NEM_LOG_REL_CPU_FEATURE(PclmulqdqSupport); + NEM_LOG_REL_CPU_FEATURE(PcidSupport); + NEM_LOG_REL_CPU_FEATURE(Fma4Support); + NEM_LOG_REL_CPU_FEATURE(F16CSupport); + NEM_LOG_REL_CPU_FEATURE(RdRandSupport); + NEM_LOG_REL_CPU_FEATURE(RdWrFsGsSupport); + NEM_LOG_REL_CPU_FEATURE(SmepSupport); + NEM_LOG_REL_CPU_FEATURE(EnhancedFastStringSupport); + NEM_LOG_REL_CPU_FEATURE(Bmi1Support); + NEM_LOG_REL_CPU_FEATURE(Bmi2Support); + /* two reserved bits here, see below */ + NEM_LOG_REL_CPU_FEATURE(MovbeSupport); + NEM_LOG_REL_CPU_FEATURE(Npiep1Support); + NEM_LOG_REL_CPU_FEATURE(DepX87FPUSaveSupport); + NEM_LOG_REL_CPU_FEATURE(RdSeedSupport); + NEM_LOG_REL_CPU_FEATURE(AdxSupport); + NEM_LOG_REL_CPU_FEATURE(IntelPrefetchSupport); + NEM_LOG_REL_CPU_FEATURE(SmapSupport); + NEM_LOG_REL_CPU_FEATURE(HleSupport); + NEM_LOG_REL_CPU_FEATURE(RtmSupport); + NEM_LOG_REL_CPU_FEATURE(RdtscpSupport); + NEM_LOG_REL_CPU_FEATURE(ClflushoptSupport); + NEM_LOG_REL_CPU_FEATURE(ClwbSupport); + NEM_LOG_REL_CPU_FEATURE(ShaSupport); + NEM_LOG_REL_CPU_FEATURE(X87PointersSavedSupport); +#undef NEM_LOG_REL_CPU_FEATURE + if (Caps.ProcessorFeatures.AsUINT64 & (~(RT_BIT_64(43) - 1) | RT_BIT_64(27) | RT_BIT_64(28))) + LogRel(("NEM: Warning! Unknown CPU features: %#RX64\n", Caps.ProcessorFeatures.AsUINT64)); + pVM->nem.s.uCpuFeatures.u64 = Caps.ProcessorFeatures.AsUINT64; + /** @todo RECHECK: WHV_PROCESSOR_FEATURES typedef. */ + + /* + * The cache line flush size. + */ + RT_ZERO(Caps); + hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeProcessorClFlushSize, &Caps, sizeof(Caps)); + if (FAILED(hrc)) + return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, + "WHvGetCapability/WHvCapabilityCodeProcessorClFlushSize failed: %Rhrc (Last=%#x/%u)", + hrc, RTNtLastStatusValue(), RTNtLastErrorValue()); + NEM_LOG_REL_CAP_EX("WHvCapabilityCodeProcessorClFlushSize", "2^%u", Caps.ProcessorClFlushSize); + if (Caps.ProcessorClFlushSize < 8 && Caps.ProcessorClFlushSize > 9) + return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Unsupported cache line flush size: %u", Caps.ProcessorClFlushSize); + pVM->nem.s.cCacheLineFlushShift = Caps.ProcessorClFlushSize; + + /* + * See if they've added more properties that we're not aware of. + */ + /** @todo RECHECK: WHV_CAPABILITY_CODE typedef. */ + if (!IsDebuggerPresent()) /* Too noisy when in debugger, so skip. */ + { + static const struct + { + uint32_t iMin, iMax; } s_aUnknowns[] = + { + { 0x0004, 0x000f }, + { 0x1003, 0x100f }, + { 0x2000, 0x200f }, + { 0x3000, 0x300f }, + { 0x4000, 0x400f }, + }; + for (uint32_t j = 0; j < RT_ELEMENTS(s_aUnknowns); j++) + for (uint32_t i = s_aUnknowns[j].iMin; i <= s_aUnknowns[j].iMax; i++) + { + RT_ZERO(Caps); + hrc = WHvGetCapabilityWrapper((WHV_CAPABILITY_CODE)i, &Caps, sizeof(Caps)); + if (SUCCEEDED(hrc)) + LogRel(("NEM: Warning! Unknown capability %#x returning: %.*Rhxs\n", i, sizeof(Caps), &Caps)); + } + } + + /* + * For proper operation, we require CPUID exits. + */ + if (!pVM->nem.s.fExtendedCpuIdExit) + return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Missing required extended CPUID exit support"); + if (!pVM->nem.s.fExtendedMsrExit) + return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Missing required extended MSR exit support"); + if (!pVM->nem.s.fExtendedXcptExit) + return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Missing required extended exception exit support"); + +#undef NEM_LOG_REL_CAP_EX +#undef NEM_LOG_REL_CAP_SUB_EX +#undef NEM_LOG_REL_CAP_SUB + return VINF_SUCCESS; +} + + +/** + * Used to fill in g_IoCtlGetHvPartitionId. + */ +static NTSTATUS WINAPI +nemR3WinIoctlDetector_GetHvPartitionId(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx, + PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput, + PVOID pvOutput, ULONG cbOutput) +{ + AssertLogRelMsgReturn(hFile == NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, ("hFile=%p\n", hFile), STATUS_INVALID_PARAMETER_1); + RT_NOREF(hEvt); RT_NOREF(pfnApcCallback); RT_NOREF(pvApcCtx); + AssertLogRelMsgReturn(RT_VALID_PTR(pIos), ("pIos=%p\n", pIos), STATUS_INVALID_PARAMETER_5); + AssertLogRelMsgReturn(cbInput == 0, ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_8); + RT_NOREF(pvInput); + + AssertLogRelMsgReturn(RT_VALID_PTR(pvOutput), ("pvOutput=%p\n", pvOutput), STATUS_INVALID_PARAMETER_9); + AssertLogRelMsgReturn(cbOutput == sizeof(HV_PARTITION_ID), ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10); + *(HV_PARTITION_ID *)pvOutput = NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_ID; + + g_IoCtlGetHvPartitionId.cbInput = cbInput; + g_IoCtlGetHvPartitionId.cbOutput = cbOutput; + g_IoCtlGetHvPartitionId.uFunction = uFunction; + + return STATUS_SUCCESS; +} + + +/** + * Used to fill in g_IoCtlStartVirtualProcessor. + */ +static NTSTATUS WINAPI +nemR3WinIoctlDetector_StartVirtualProcessor(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx, + PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput, + PVOID pvOutput, ULONG cbOutput) +{ + AssertLogRelMsgReturn(hFile == NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, ("hFile=%p\n", hFile), STATUS_INVALID_PARAMETER_1); + RT_NOREF(hEvt); RT_NOREF(pfnApcCallback); RT_NOREF(pvApcCtx); + AssertLogRelMsgReturn(RT_VALID_PTR(pIos), ("pIos=%p\n", pIos), STATUS_INVALID_PARAMETER_5); + AssertLogRelMsgReturn(cbInput == sizeof(HV_VP_INDEX), ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_8); + AssertLogRelMsgReturn(RT_VALID_PTR(pvInput), ("pvInput=%p\n", pvInput), STATUS_INVALID_PARAMETER_9); + AssertLogRelMsgReturn(*(HV_VP_INDEX *)pvInput == NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX, + ("*piCpu=%u\n", *(HV_VP_INDEX *)pvInput), STATUS_INVALID_PARAMETER_9); + AssertLogRelMsgReturn(cbOutput == 0, ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10); + RT_NOREF(pvOutput); + + g_IoCtlStartVirtualProcessor.cbInput = cbInput; + g_IoCtlStartVirtualProcessor.cbOutput = cbOutput; + g_IoCtlStartVirtualProcessor.uFunction = uFunction; + + return STATUS_SUCCESS; +} + + +/** + * Used to fill in g_IoCtlStartVirtualProcessor. + */ +static NTSTATUS WINAPI +nemR3WinIoctlDetector_StopVirtualProcessor(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx, + PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput, + PVOID pvOutput, ULONG cbOutput) +{ + AssertLogRelMsgReturn(hFile == NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, ("hFile=%p\n", hFile), STATUS_INVALID_PARAMETER_1); + RT_NOREF(hEvt); RT_NOREF(pfnApcCallback); RT_NOREF(pvApcCtx); + AssertLogRelMsgReturn(RT_VALID_PTR(pIos), ("pIos=%p\n", pIos), STATUS_INVALID_PARAMETER_5); + AssertLogRelMsgReturn(cbInput == sizeof(HV_VP_INDEX), ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_8); + AssertLogRelMsgReturn(RT_VALID_PTR(pvInput), ("pvInput=%p\n", pvInput), STATUS_INVALID_PARAMETER_9); + AssertLogRelMsgReturn(*(HV_VP_INDEX *)pvInput == NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX, + ("*piCpu=%u\n", *(HV_VP_INDEX *)pvInput), STATUS_INVALID_PARAMETER_9); + AssertLogRelMsgReturn(cbOutput == 0, ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10); + RT_NOREF(pvOutput); + + g_IoCtlStopVirtualProcessor.cbInput = cbInput; + g_IoCtlStopVirtualProcessor.cbOutput = cbOutput; + g_IoCtlStopVirtualProcessor.uFunction = uFunction; + + return STATUS_SUCCESS; +} + + +/** + * Used to fill in g_IoCtlMessageSlotHandleAndGetNext + */ +static NTSTATUS WINAPI +nemR3WinIoctlDetector_MessageSlotHandleAndGetNext(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx, + PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput, + PVOID pvOutput, ULONG cbOutput) +{ + AssertLogRelMsgReturn(hFile == NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, ("hFile=%p\n", hFile), STATUS_INVALID_PARAMETER_1); + RT_NOREF(hEvt); RT_NOREF(pfnApcCallback); RT_NOREF(pvApcCtx); + AssertLogRelMsgReturn(RT_VALID_PTR(pIos), ("pIos=%p\n", pIos), STATUS_INVALID_PARAMETER_5); + + if (g_uBuildNo >= 17758) + { + /* No timeout since about build 17758, it's now always an infinite wait. So, a somewhat compatible change. */ + AssertLogRelMsgReturn(cbInput == RT_UOFFSETOF(VID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT, cMillies), + ("cbInput=%#x\n", cbInput), + STATUS_INVALID_PARAMETER_8); + AssertLogRelMsgReturn(RT_VALID_PTR(pvInput), ("pvInput=%p\n", pvInput), STATUS_INVALID_PARAMETER_9); + PCVID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT pVidIn = (PCVID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT)pvInput; + AssertLogRelMsgReturn( pVidIn->iCpu == NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX + && pVidIn->fFlags == VID_MSHAGN_F_HANDLE_MESSAGE, + ("iCpu=%u fFlags=%#x cMillies=%#x\n", pVidIn->iCpu, pVidIn->fFlags, pVidIn->cMillies), + STATUS_INVALID_PARAMETER_9); + AssertLogRelMsgReturn(cbOutput == 0, ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10); + } + else + { + AssertLogRelMsgReturn(cbInput == sizeof(VID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT), ("cbInput=%#x\n", cbInput), + STATUS_INVALID_PARAMETER_8); + AssertLogRelMsgReturn(RT_VALID_PTR(pvInput), ("pvInput=%p\n", pvInput), STATUS_INVALID_PARAMETER_9); + PCVID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT pVidIn = (PCVID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT)pvInput; + AssertLogRelMsgReturn( pVidIn->iCpu == NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX + && pVidIn->fFlags == VID_MSHAGN_F_HANDLE_MESSAGE + && pVidIn->cMillies == NEM_WIN_IOCTL_DETECTOR_FAKE_TIMEOUT, + ("iCpu=%u fFlags=%#x cMillies=%#x\n", pVidIn->iCpu, pVidIn->fFlags, pVidIn->cMillies), + STATUS_INVALID_PARAMETER_9); + AssertLogRelMsgReturn(cbOutput == 0, ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10); + RT_NOREF(pvOutput); + } + + g_IoCtlMessageSlotHandleAndGetNext.cbInput = cbInput; + g_IoCtlMessageSlotHandleAndGetNext.cbOutput = cbOutput; + g_IoCtlMessageSlotHandleAndGetNext.uFunction = uFunction; + + return STATUS_SUCCESS; +} + + +#ifdef LOG_ENABLED +/** + * Used to fill in what g_pIoCtlDetectForLogging points to. + */ +static NTSTATUS WINAPI nemR3WinIoctlDetector_ForLogging(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx, + PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput, + PVOID pvOutput, ULONG cbOutput) +{ + RT_NOREF(hFile, hEvt, pfnApcCallback, pvApcCtx, pIos, pvInput, pvOutput); + + g_pIoCtlDetectForLogging->cbInput = cbInput; + g_pIoCtlDetectForLogging->cbOutput = cbOutput; + g_pIoCtlDetectForLogging->uFunction = uFunction; + + return STATUS_SUCCESS; +} +#endif + + +/** + * Worker for nemR3NativeInit that detect I/O control function numbers for VID. + * + * We use the function numbers directly in ring-0 and to name functions when + * logging NtDeviceIoControlFile calls. + * + * @note We could alternatively do this by disassembling the respective + * functions, but hooking NtDeviceIoControlFile and making fake calls + * more easily provides the desired information. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. Will set I/O + * control info members. + * @param pErrInfo Where to always return error info. + */ +static int nemR3WinInitDiscoverIoControlProperties(PVM pVM, PRTERRINFO pErrInfo) +{ + /* + * Probe the I/O control information for select VID APIs so we can use + * them directly from ring-0 and better log them. + * + */ + decltype(NtDeviceIoControlFile) * const pfnOrg = *g_ppfnVidNtDeviceIoControlFile; + + /* VidGetHvPartitionId - must work due to memory. */ + *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_GetHvPartitionId; + HV_PARTITION_ID idHvPartition = HV_PARTITION_ID_INVALID; + BOOL fRet = g_pfnVidGetHvPartitionId(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, &idHvPartition); + *g_ppfnVidNtDeviceIoControlFile = pfnOrg; + AssertReturn(fRet && idHvPartition == NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_ID && g_IoCtlGetHvPartitionId.uFunction != 0, + RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, + "Problem figuring out VidGetHvPartitionId: fRet=%u idHvPartition=%#x dwErr=%u", + fRet, idHvPartition, GetLastError()) ); + LogRel(("NEM: VidGetHvPartitionId -> fun:%#x in:%#x out:%#x\n", + g_IoCtlGetHvPartitionId.uFunction, g_IoCtlGetHvPartitionId.cbInput, g_IoCtlGetHvPartitionId.cbOutput)); + + int rcRet = VINF_SUCCESS; + /* VidStartVirtualProcessor */ + *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_StartVirtualProcessor; + fRet = g_pfnVidStartVirtualProcessor(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX); + *g_ppfnVidNtDeviceIoControlFile = pfnOrg; + AssertStmt(fRet && g_IoCtlStartVirtualProcessor.uFunction != 0, + rcRet = RTERRINFO_LOG_REL_SET_F(pErrInfo, VERR_NEM_RING3_ONLY, + "Problem figuring out VidStartVirtualProcessor: fRet=%u dwErr=%u", + fRet, GetLastError()) ); + LogRel(("NEM: VidStartVirtualProcessor -> fun:%#x in:%#x out:%#x\n", g_IoCtlStartVirtualProcessor.uFunction, + g_IoCtlStartVirtualProcessor.cbInput, g_IoCtlStartVirtualProcessor.cbOutput)); + + /* VidStopVirtualProcessor */ + *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_StopVirtualProcessor; + fRet = g_pfnVidStopVirtualProcessor(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX); + *g_ppfnVidNtDeviceIoControlFile = pfnOrg; + AssertStmt(fRet && g_IoCtlStopVirtualProcessor.uFunction != 0, + rcRet = RTERRINFO_LOG_REL_SET_F(pErrInfo, VERR_NEM_RING3_ONLY, + "Problem figuring out VidStopVirtualProcessor: fRet=%u dwErr=%u", + fRet, GetLastError()) ); + LogRel(("NEM: VidStopVirtualProcessor -> fun:%#x in:%#x out:%#x\n", g_IoCtlStopVirtualProcessor.uFunction, + g_IoCtlStopVirtualProcessor.cbInput, g_IoCtlStopVirtualProcessor.cbOutput)); + + /* VidMessageSlotHandleAndGetNext */ + *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_MessageSlotHandleAndGetNext; + fRet = g_pfnVidMessageSlotHandleAndGetNext(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, + NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX, VID_MSHAGN_F_HANDLE_MESSAGE, + NEM_WIN_IOCTL_DETECTOR_FAKE_TIMEOUT); + *g_ppfnVidNtDeviceIoControlFile = pfnOrg; + AssertStmt(fRet && g_IoCtlMessageSlotHandleAndGetNext.uFunction != 0, + rcRet = RTERRINFO_LOG_REL_SET_F(pErrInfo, VERR_NEM_RING3_ONLY, + "Problem figuring out VidMessageSlotHandleAndGetNext: fRet=%u dwErr=%u", + fRet, GetLastError()) ); + LogRel(("NEM: VidMessageSlotHandleAndGetNext -> fun:%#x in:%#x out:%#x\n", + g_IoCtlMessageSlotHandleAndGetNext.uFunction, g_IoCtlMessageSlotHandleAndGetNext.cbInput, + g_IoCtlMessageSlotHandleAndGetNext.cbOutput)); + +#ifdef LOG_ENABLED + /* The following are only for logging: */ + union + { + VID_MAPPED_MESSAGE_SLOT MapSlot; + HV_REGISTER_NAME Name; + HV_REGISTER_VALUE Value; + } uBuf; + + /* VidMessageSlotMap */ + g_pIoCtlDetectForLogging = &g_IoCtlMessageSlotMap; + *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_ForLogging; + fRet = g_pfnVidMessageSlotMap(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, &uBuf.MapSlot, NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX); + *g_ppfnVidNtDeviceIoControlFile = pfnOrg; + Assert(fRet); + LogRel(("NEM: VidMessageSlotMap -> fun:%#x in:%#x out:%#x\n", g_pIoCtlDetectForLogging->uFunction, + g_pIoCtlDetectForLogging->cbInput, g_pIoCtlDetectForLogging->cbOutput)); + + /* VidGetVirtualProcessorState */ + uBuf.Name = HvRegisterExplicitSuspend; + g_pIoCtlDetectForLogging = &g_IoCtlGetVirtualProcessorState; + *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_ForLogging; + fRet = g_pfnVidGetVirtualProcessorState(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX, + &uBuf.Name, 1, &uBuf.Value); + *g_ppfnVidNtDeviceIoControlFile = pfnOrg; + Assert(fRet); + LogRel(("NEM: VidGetVirtualProcessorState -> fun:%#x in:%#x out:%#x\n", g_pIoCtlDetectForLogging->uFunction, + g_pIoCtlDetectForLogging->cbInput, g_pIoCtlDetectForLogging->cbOutput)); + + /* VidSetVirtualProcessorState */ + uBuf.Name = HvRegisterExplicitSuspend; + g_pIoCtlDetectForLogging = &g_IoCtlSetVirtualProcessorState; + *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_ForLogging; + fRet = g_pfnVidSetVirtualProcessorState(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX, + &uBuf.Name, 1, &uBuf.Value); + *g_ppfnVidNtDeviceIoControlFile = pfnOrg; + Assert(fRet); + LogRel(("NEM: VidSetVirtualProcessorState -> fun:%#x in:%#x out:%#x\n", g_pIoCtlDetectForLogging->uFunction, + g_pIoCtlDetectForLogging->cbInput, g_pIoCtlDetectForLogging->cbOutput)); + + g_pIoCtlDetectForLogging = NULL; +#endif + + /* Done. */ + pVM->nem.s.IoCtlGetHvPartitionId = g_IoCtlGetHvPartitionId; + pVM->nem.s.IoCtlStartVirtualProcessor = g_IoCtlStartVirtualProcessor; + pVM->nem.s.IoCtlStopVirtualProcessor = g_IoCtlStopVirtualProcessor; + pVM->nem.s.IoCtlMessageSlotHandleAndGetNext = g_IoCtlMessageSlotHandleAndGetNext; + return rcRet; +} + + +/** + * Creates and sets up a Hyper-V (exo) partition. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pErrInfo Where to always return error info. + */ +static int nemR3WinInitCreatePartition(PVM pVM, PRTERRINFO pErrInfo) +{ + AssertReturn(!pVM->nem.s.hPartition, RTErrInfoSet(pErrInfo, VERR_WRONG_ORDER, "Wrong initalization order")); + AssertReturn(!pVM->nem.s.hPartitionDevice, RTErrInfoSet(pErrInfo, VERR_WRONG_ORDER, "Wrong initalization order")); + + /* + * Create the partition. + */ + WHV_PARTITION_HANDLE hPartition; + HRESULT hrc = WHvCreatePartition(&hPartition); + if (FAILED(hrc)) + return RTErrInfoSetF(pErrInfo, VERR_NEM_VM_CREATE_FAILED, "WHvCreatePartition failed with %Rhrc (Last=%#x/%u)", + hrc, RTNtLastStatusValue(), RTNtLastErrorValue()); + + int rc; + + /* + * Set partition properties, most importantly the CPU count. + */ + /** + * @todo Someone at Microsoft please explain another weird API: + * - Why this API doesn't take the WHV_PARTITION_PROPERTY_CODE value as an + * argument rather than as part of the struct. That is so weird if you've + * used any other NT or windows API, including WHvGetCapability(). + * - Why use PVOID when WHV_PARTITION_PROPERTY is what's expected. We + * technically only need 9 bytes for setting/getting + * WHVPartitionPropertyCodeProcessorClFlushSize, but the API insists on 16. */ + WHV_PARTITION_PROPERTY Property; + RT_ZERO(Property); + Property.ProcessorCount = pVM->cCpus; + hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeProcessorCount, &Property, sizeof(Property)); + if (SUCCEEDED(hrc)) + { + RT_ZERO(Property); + Property.ExtendedVmExits.X64CpuidExit = pVM->nem.s.fExtendedCpuIdExit; /** @todo Register fixed results and restrict cpuid exits */ + Property.ExtendedVmExits.X64MsrExit = pVM->nem.s.fExtendedMsrExit; + Property.ExtendedVmExits.ExceptionExit = pVM->nem.s.fExtendedXcptExit; + hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeExtendedVmExits, &Property, sizeof(Property)); + if (SUCCEEDED(hrc)) + { + /* + * We'll continue setup in nemR3NativeInitAfterCPUM. + */ + pVM->nem.s.fCreatedEmts = false; + pVM->nem.s.hPartition = hPartition; + LogRel(("NEM: Created partition %p.\n", hPartition)); + return VINF_SUCCESS; + } + + rc = RTErrInfoSetF(pErrInfo, VERR_NEM_VM_CREATE_FAILED, + "Failed setting WHvPartitionPropertyCodeExtendedVmExits to %'#RX64: %Rhrc", + Property.ExtendedVmExits.AsUINT64, hrc); + } + else + rc = RTErrInfoSetF(pErrInfo, VERR_NEM_VM_CREATE_FAILED, + "Failed setting WHvPartitionPropertyCodeProcessorCount to %u: %Rhrc (Last=%#x/%u)", + pVM->cCpus, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()); + WHvDeletePartition(hPartition); + + Assert(!pVM->nem.s.hPartitionDevice); + Assert(!pVM->nem.s.hPartition); + return rc; +} + + +/** + * Makes sure APIC and firmware will not allow X2APIC mode. + * + * This is rather ugly. + * + * @returns VBox status code + * @param pVM The cross context VM structure. + */ +static int nemR3WinDisableX2Apic(PVM pVM) +{ + /* + * First make sure the 'Mode' config value of the APIC isn't set to X2APIC. + * This defaults to APIC, so no need to change unless it's X2APIC. + */ + PCFGMNODE pCfg = CFGMR3GetChild(CFGMR3GetRoot(pVM), "/Devices/apic/0/Config"); + if (pCfg) + { + uint8_t bMode = 0; + int rc = CFGMR3QueryU8(pCfg, "Mode", &bMode); + AssertLogRelMsgReturn(RT_SUCCESS(rc) || rc == VERR_CFGM_VALUE_NOT_FOUND, ("%Rrc\n", rc), rc); + if (RT_SUCCESS(rc) && bMode == PDMAPICMODE_X2APIC) + { + LogRel(("NEM: Adjusting APIC configuration from X2APIC to APIC max mode. X2APIC is not supported by the WinHvPlatform API!\n")); + LogRel(("NEM: Disable Hyper-V if you need X2APIC for your guests!\n")); + rc = CFGMR3RemoveValue(pCfg, "Mode"); + rc = CFGMR3InsertInteger(pCfg, "Mode", PDMAPICMODE_APIC); + AssertLogRelRCReturn(rc, rc); + } + } + + /* + * Now the firmwares. + * These also defaults to APIC and only needs adjusting if configured to X2APIC (2). + */ + static const char * const s_apszFirmwareConfigs[] = + { + "/Devices/efi/0/Config", + "/Devices/pcbios/0/Config", + }; + for (unsigned i = 0; i < RT_ELEMENTS(s_apszFirmwareConfigs); i++) + { + pCfg = CFGMR3GetChild(CFGMR3GetRoot(pVM), "/Devices/APIC/0/Config"); + if (pCfg) + { + uint8_t bMode = 0; + int rc = CFGMR3QueryU8(pCfg, "APIC", &bMode); + AssertLogRelMsgReturn(RT_SUCCESS(rc) || rc == VERR_CFGM_VALUE_NOT_FOUND, ("%Rrc\n", rc), rc); + if (RT_SUCCESS(rc) && bMode == 2) + { + LogRel(("NEM: Adjusting %s/Mode from 2 (X2APIC) to 1 (APIC).\n", s_apszFirmwareConfigs[i])); + rc = CFGMR3RemoveValue(pCfg, "APIC"); + rc = CFGMR3InsertInteger(pCfg, "APIC", 1); + AssertLogRelRCReturn(rc, rc); + } + } + } + + return VINF_SUCCESS; +} + + +/** + * Try initialize the native API. + * + * This may only do part of the job, more can be done in + * nemR3NativeInitAfterCPUM() and nemR3NativeInitCompleted(). + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param fFallback Whether we're in fallback mode or use-NEM mode. In + * the latter we'll fail if we cannot initialize. + * @param fForced Whether the HMForced flag is set and we should + * fail if we cannot initialize. + */ +int nemR3NativeInit(PVM pVM, bool fFallback, bool fForced) +{ + g_uBuildNo = RTSystemGetNtBuildNo(); + + /* + * Some state init. + */ + pVM->nem.s.fA20Enabled = true; +#if 0 + for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++) + { + PNEMCPU pNemCpu = &pVM->aCpus[iCpu].nem.s; + } +#endif + + /* + * Error state. + * The error message will be non-empty on failure and 'rc' will be set too. + */ + RTERRINFOSTATIC ErrInfo; + PRTERRINFO pErrInfo = RTErrInfoInitStatic(&ErrInfo); + int rc = nemR3WinInitProbeAndLoad(fForced, pErrInfo); + if (RT_SUCCESS(rc)) + { + /* + * Check the capabilties of the hypervisor, starting with whether it's present. + */ + rc = nemR3WinInitCheckCapabilities(pVM, pErrInfo); + if (RT_SUCCESS(rc)) + { + /* + * Discover the VID I/O control function numbers we need. + */ + rc = nemR3WinInitDiscoverIoControlProperties(pVM, pErrInfo); + if (rc == VERR_NEM_RING3_ONLY) + { + if (pVM->nem.s.fUseRing0Runloop) + { + LogRel(("NEM: Disabling UseRing0Runloop.\n")); + pVM->nem.s.fUseRing0Runloop = false; + } + rc = VINF_SUCCESS; + } + if (RT_SUCCESS(rc)) + { + /* + * Check out our ring-0 capabilities. + */ + rc = SUPR3CallVMMR0Ex(pVM->pVMR0, 0 /*idCpu*/, VMMR0_DO_NEM_INIT_VM, 0, NULL); + if (RT_SUCCESS(rc)) + { + /* + * Create and initialize a partition. + */ + rc = nemR3WinInitCreatePartition(pVM, pErrInfo); + if (RT_SUCCESS(rc)) + { + VM_SET_MAIN_EXECUTION_ENGINE(pVM, VM_EXEC_ENGINE_NATIVE_API); + Log(("NEM: Marked active!\n")); + nemR3WinDisableX2Apic(pVM); + + /* Register release statistics */ + for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++) + { + PNEMCPU pNemCpu = &pVM->aCpus[iCpu].nem.s; + STAMR3RegisterF(pVM, &pNemCpu->StatExitPortIo, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of port I/O exits", "/NEM/CPU%u/ExitPortIo", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatExitMemUnmapped, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of unmapped memory exits", "/NEM/CPU%u/ExitMemUnmapped", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatExitMemIntercept, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of intercepted memory exits", "/NEM/CPU%u/ExitMemIntercept", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatExitHalt, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of HLT exits", "/NEM/CPU%u/ExitHalt", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatExitInterruptWindow, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of HLT exits", "/NEM/CPU%u/ExitInterruptWindow", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatExitCpuId, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of CPUID exits", "/NEM/CPU%u/ExitCpuId", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatExitMsr, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of MSR access exits", "/NEM/CPU%u/ExitMsr", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatExitException, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of exception exits", "/NEM/CPU%u/ExitException", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatExitExceptionBp, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of #BP exits", "/NEM/CPU%u/ExitExceptionBp", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatExitExceptionDb, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of #DB exits", "/NEM/CPU%u/ExitExceptionDb", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatExitExceptionUd, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of #UD exits", "/NEM/CPU%u/ExitExceptionUd", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatExitExceptionUdHandled, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of handled #UD exits", "/NEM/CPU%u/ExitExceptionUdHandled", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatExitUnrecoverable, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of unrecoverable exits", "/NEM/CPU%u/ExitUnrecoverable", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatGetMsgTimeout, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of get message timeouts/alerts", "/NEM/CPU%u/GetMsgTimeout", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatStopCpuSuccess, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of successful CPU stops", "/NEM/CPU%u/StopCpuSuccess", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatStopCpuPending, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of pending CPU stops", "/NEM/CPU%u/StopCpuPending", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatStopCpuPendingAlerts,STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of pending CPU stop alerts", "/NEM/CPU%u/StopCpuPendingAlerts", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatStopCpuPendingOdd, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of odd pending CPU stops (see code)", "/NEM/CPU%u/StopCpuPendingOdd", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatCancelChangedState, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of cancel changed state", "/NEM/CPU%u/CancelChangedState", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatCancelAlertedThread, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of cancel alerted EMT", "/NEM/CPU%u/CancelAlertedEMT", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatBreakOnFFPre, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of pre execution FF breaks", "/NEM/CPU%u/BreakOnFFPre", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatBreakOnFFPost, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of post execution FF breaks", "/NEM/CPU%u/BreakOnFFPost", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatBreakOnCancel, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of cancel execution breaks", "/NEM/CPU%u/BreakOnCancel", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatBreakOnStatus, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of status code breaks", "/NEM/CPU%u/BreakOnStatus", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatImportOnDemand, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of on-demand state imports", "/NEM/CPU%u/ImportOnDemand", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatImportOnReturn, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of state imports on loop return", "/NEM/CPU%u/ImportOnReturn", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatImportOnReturnSkipped, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of skipped state imports on loop return", "/NEM/CPU%u/ImportOnReturnSkipped", iCpu); + STAMR3RegisterF(pVM, &pNemCpu->StatQueryCpuTick, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of TSC queries", "/NEM/CPU%u/QueryCpuTick", iCpu); + } + + PUVM pUVM = pVM->pUVM; + STAMR3RegisterRefresh(pUVM, &pVM->nem.s.R0Stats.cPagesAvailable, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, + STAMUNIT_PAGES, STAM_REFRESH_GRP_NEM, "Free pages available to the hypervisor", + "/NEM/R0Stats/cPagesAvailable"); + STAMR3RegisterRefresh(pUVM, &pVM->nem.s.R0Stats.cPagesInUse, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, + STAMUNIT_PAGES, STAM_REFRESH_GRP_NEM, "Pages in use by hypervisor", + "/NEM/R0Stats/cPagesInUse"); + } + } + } + } + } + + /* + * We only fail if in forced mode, otherwise just log the complaint and return. + */ + Assert(pVM->bMainExecutionEngine == VM_EXEC_ENGINE_NATIVE_API || RTErrInfoIsSet(pErrInfo)); + if ( (fForced || !fFallback) + && pVM->bMainExecutionEngine != VM_EXEC_ENGINE_NATIVE_API) + return VMSetError(pVM, RT_SUCCESS_NP(rc) ? VERR_NEM_NOT_AVAILABLE : rc, RT_SRC_POS, "%s", pErrInfo->pszMsg); + + if (RTErrInfoIsSet(pErrInfo)) + LogRel(("NEM: Not available: %s\n", pErrInfo->pszMsg)); + return VINF_SUCCESS; +} + + +/** + * This is called after CPUMR3Init is done. + * + * @returns VBox status code. + * @param pVM The VM handle.. + */ +int nemR3NativeInitAfterCPUM(PVM pVM) +{ + /* + * Validate sanity. + */ + WHV_PARTITION_HANDLE hPartition = pVM->nem.s.hPartition; + AssertReturn(hPartition != NULL, VERR_WRONG_ORDER); + AssertReturn(!pVM->nem.s.hPartitionDevice, VERR_WRONG_ORDER); + AssertReturn(!pVM->nem.s.fCreatedEmts, VERR_WRONG_ORDER); + AssertReturn(pVM->bMainExecutionEngine == VM_EXEC_ENGINE_NATIVE_API, VERR_WRONG_ORDER); + + /* + * Continue setting up the partition now that we've got most of the CPUID feature stuff. + */ + WHV_PARTITION_PROPERTY Property; + HRESULT hrc; + +#if 0 + /* Not sure if we really need to set the vendor. + Update: Apparently we don't. WHvPartitionPropertyCodeProcessorVendor was removed in 17110. */ + RT_ZERO(Property); + Property.ProcessorVendor = pVM->nem.s.enmCpuVendor == CPUMCPUVENDOR_AMD ? WHvProcessorVendorAmd + : WHvProcessorVendorIntel; + hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeProcessorVendor, &Property, sizeof(Property)); + if (FAILED(hrc)) + return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS, + "Failed to set WHvPartitionPropertyCodeProcessorVendor to %u: %Rhrc (Last=%#x/%u)", + Property.ProcessorVendor, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()); +#endif + + /* Not sure if we really need to set the cache line flush size. */ + RT_ZERO(Property); + Property.ProcessorClFlushSize = pVM->nem.s.cCacheLineFlushShift; + hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeProcessorClFlushSize, &Property, sizeof(Property)); + if (FAILED(hrc)) + return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS, + "Failed to set WHvPartitionPropertyCodeProcessorClFlushSize to %u: %Rhrc (Last=%#x/%u)", + pVM->nem.s.cCacheLineFlushShift, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()); + + /* Intercept #DB, #BP and #UD exceptions. */ + RT_ZERO(Property); + Property.ExceptionExitBitmap = RT_BIT_64(WHvX64ExceptionTypeDebugTrapOrFault) + | RT_BIT_64(WHvX64ExceptionTypeBreakpointTrap) + | RT_BIT_64(WHvX64ExceptionTypeInvalidOpcodeFault); + hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeExceptionExitBitmap, &Property, sizeof(Property)); + if (FAILED(hrc)) + return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS, + "Failed to set WHvPartitionPropertyCodeExceptionExitBitmap to %#RX64: %Rhrc (Last=%#x/%u)", + Property.ExceptionExitBitmap, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()); + + + /* + * Sync CPU features with CPUM. + */ + /** @todo sync CPU features with CPUM. */ + + /* Set the partition property. */ + RT_ZERO(Property); + Property.ProcessorFeatures.AsUINT64 = pVM->nem.s.uCpuFeatures.u64; + hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeProcessorFeatures, &Property, sizeof(Property)); + if (FAILED(hrc)) + return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS, + "Failed to set WHvPartitionPropertyCodeProcessorFeatures to %'#RX64: %Rhrc (Last=%#x/%u)", + pVM->nem.s.uCpuFeatures.u64, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()); + + /* + * Set up the partition and create EMTs. + * + * Seems like this is where the partition is actually instantiated and we get + * a handle to it. + */ + hrc = WHvSetupPartition(hPartition); + if (FAILED(hrc)) + return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS, + "Call to WHvSetupPartition failed: %Rhrc (Last=%#x/%u)", + hrc, RTNtLastStatusValue(), RTNtLastErrorValue()); + + /* Get the handle. */ + HANDLE hPartitionDevice; + __try + { + hPartitionDevice = ((HANDLE *)hPartition)[1]; + } + __except(EXCEPTION_EXECUTE_HANDLER) + { + hrc = GetExceptionCode(); + hPartitionDevice = NULL; + } + if ( hPartitionDevice == NULL + || hPartitionDevice == (HANDLE)(intptr_t)-1) + return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS, + "Failed to get device handle for partition %p: %Rhrc", hPartition, hrc); + + HV_PARTITION_ID idHvPartition = HV_PARTITION_ID_INVALID; + if (!g_pfnVidGetHvPartitionId(hPartitionDevice, &idHvPartition)) + return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS, + "Failed to get device handle and/or partition ID for %p (hPartitionDevice=%p, Last=%#x/%u)", + hPartition, hPartitionDevice, RTNtLastStatusValue(), RTNtLastErrorValue()); + pVM->nem.s.hPartitionDevice = hPartitionDevice; + pVM->nem.s.idHvPartition = idHvPartition; + + /* + * Setup the EMTs. + */ + VMCPUID iCpu; + for (iCpu = 0; iCpu < pVM->cCpus; iCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[iCpu]; + + pVCpu->nem.s.hNativeThreadHandle = (RTR3PTR)RTThreadGetNativeHandle(VMR3GetThreadHandle(pVCpu->pUVCpu)); + Assert((HANDLE)pVCpu->nem.s.hNativeThreadHandle != INVALID_HANDLE_VALUE); + +#ifndef NEM_WIN_USE_OUR_OWN_RUN_API +# ifdef NEM_WIN_WITH_RING0_RUNLOOP + if (!pVM->nem.s.fUseRing0Runloop) +# endif + { + hrc = WHvCreateVirtualProcessor(hPartition, iCpu, 0 /*fFlags*/); + if (FAILED(hrc)) + { + NTSTATUS const rcNtLast = RTNtLastStatusValue(); + DWORD const dwErrLast = RTNtLastErrorValue(); + while (iCpu-- > 0) + { + HRESULT hrc2 = WHvDeleteVirtualProcessor(hPartition, iCpu); + AssertLogRelMsg(SUCCEEDED(hrc2), ("WHvDeleteVirtualProcessor(%p, %u) -> %Rhrc (Last=%#x/%u)\n", + hPartition, iCpu, hrc2, RTNtLastStatusValue(), + RTNtLastErrorValue())); + } + return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS, + "Call to WHvSetupPartition failed: %Rhrc (Last=%#x/%u)", hrc, rcNtLast, dwErrLast); + } + } +# ifdef NEM_WIN_WITH_RING0_RUNLOOP + else +# endif +#endif /* !NEM_WIN_USE_OUR_OWN_RUN_API */ +#if defined(NEM_WIN_WITH_RING0_RUNLOOP) || defined(NEM_WIN_USE_OUR_OWN_RUN_API) + { + VID_MAPPED_MESSAGE_SLOT MappedMsgSlot = { NULL, UINT32_MAX, UINT32_MAX }; + if (g_pfnVidMessageSlotMap(hPartitionDevice, &MappedMsgSlot, iCpu)) + { + AssertLogRelMsg(MappedMsgSlot.iCpu == iCpu && MappedMsgSlot.uParentAdvisory == UINT32_MAX, + ("%#x %#x (iCpu=%#x)\n", MappedMsgSlot.iCpu, MappedMsgSlot.uParentAdvisory, iCpu)); + pVCpu->nem.s.pvMsgSlotMapping = MappedMsgSlot.pMsgBlock; + } + else + { + NTSTATUS const rcNtLast = RTNtLastStatusValue(); + DWORD const dwErrLast = RTNtLastErrorValue(); + return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS, + "Call to WHvSetupPartition failed: %Rhrc (Last=%#x/%u)", hrc, rcNtLast, dwErrLast); + } + } +#endif + } + pVM->nem.s.fCreatedEmts = true; + + /* + * Do some more ring-0 initialization now that we've got the partition handle. + */ + int rc = VMMR3CallR0Emt(pVM, &pVM->aCpus[0], VMMR0_DO_NEM_INIT_VM_PART_2, 0, NULL); + if (RT_SUCCESS(rc)) + { + LogRel(("NEM: Successfully set up partition (device handle %p, partition ID %#llx)\n", hPartitionDevice, idHvPartition)); + +#if 1 + VMMR3CallR0Emt(pVM, &pVM->aCpus[0], VMMR0_DO_NEM_UPDATE_STATISTICS, 0, NULL); + LogRel(("NEM: Memory balance: %#RX64 out of %#RX64 pages in use\n", + pVM->nem.s.R0Stats.cPagesInUse, pVM->nem.s.R0Stats.cPagesAvailable)); +#endif + + /* + * Register statistics on shared pages. + */ + /** @todo HvCallMapStatsPage */ + + /* + * Adjust features. + * Note! We've already disabled X2APIC via CFGM during the first init call. + */ + +#if 0 && defined(DEBUG_bird) + /* + * Poke and probe a little. + */ + PVMCPU pVCpu = &pVM->aCpus[0]; + uint32_t aRegNames[1024]; + HV_REGISTER_VALUE aRegValues[1024]; + uint32_t aPropCodes[128]; + uint64_t aPropValues[128]; + for (int iOuter = 0; iOuter < 5; iOuter++) + { + LogRel(("\niOuter %d\n", iOuter)); +# if 1 + /* registers */ + uint32_t iRegValue = 0; + uint32_t cRegChanges = 0; + for (uint32_t iReg = 0; iReg < 0x001101ff; iReg++) + { + if (iOuter != 0 && aRegNames[iRegValue] > iReg) + continue; + RT_ZERO(pVCpu->nem.s.Hypercall.Experiment); + pVCpu->nem.s.Hypercall.Experiment.uItem = iReg; + int rc2 = VMMR3CallR0Emt(pVM, pVCpu, VMMR0_DO_NEM_EXPERIMENT, 0, NULL); + AssertLogRelRCBreak(rc2); + if (pVCpu->nem.s.Hypercall.Experiment.fSuccess) + { + LogRel(("Register %#010x = %#18RX64, %#18RX64\n", iReg, + pVCpu->nem.s.Hypercall.Experiment.uLoValue, pVCpu->nem.s.Hypercall.Experiment.uHiValue)); + if (iReg == HvX64RegisterTsc) + { + uint64_t uTsc = ASMReadTSC(); + LogRel(("TSC = %#18RX64; Delta %#18RX64 or %#18RX64\n", + uTsc, pVCpu->nem.s.Hypercall.Experiment.uLoValue - uTsc, uTsc - pVCpu->nem.s.Hypercall.Experiment.uLoValue)); + } + + if (iOuter == 0) + aRegNames[iRegValue] = iReg; + else if( aRegValues[iRegValue].Reg128.Low64 != pVCpu->nem.s.Hypercall.Experiment.uLoValue + || aRegValues[iRegValue].Reg128.High64 != pVCpu->nem.s.Hypercall.Experiment.uHiValue) + { + LogRel(("Changed from %#18RX64, %#18RX64 !!\n", + aRegValues[iRegValue].Reg128.Low64, aRegValues[iRegValue].Reg128.High64)); + LogRel(("Delta %#18RX64, %#18RX64 !!\n", + pVCpu->nem.s.Hypercall.Experiment.uLoValue - aRegValues[iRegValue].Reg128.Low64, + pVCpu->nem.s.Hypercall.Experiment.uHiValue - aRegValues[iRegValue].Reg128.High64)); + cRegChanges++; + } + aRegValues[iRegValue].Reg128.Low64 = pVCpu->nem.s.Hypercall.Experiment.uLoValue; + aRegValues[iRegValue].Reg128.High64 = pVCpu->nem.s.Hypercall.Experiment.uHiValue; + iRegValue++; + AssertBreak(iRegValue < RT_ELEMENTS(aRegValues)); + } + } + LogRel(("Found %u registers, %u changed\n", iRegValue, cRegChanges)); +# endif +# if 1 + /* partition properties */ + uint32_t iPropValue = 0; + uint32_t cPropChanges = 0; + for (uint32_t iProp = 0; iProp < 0xc11ff; iProp++) + { + if (iProp == HvPartitionPropertyDebugChannelId /* hangs host */) + continue; + if (iOuter != 0 && aPropCodes[iPropValue] > iProp) + continue; + RT_ZERO(pVCpu->nem.s.Hypercall.Experiment); + pVCpu->nem.s.Hypercall.Experiment.uItem = iProp; + int rc2 = VMMR3CallR0Emt(pVM, pVCpu, VMMR0_DO_NEM_EXPERIMENT, 1, NULL); + AssertLogRelRCBreak(rc2); + if (pVCpu->nem.s.Hypercall.Experiment.fSuccess) + { + LogRel(("Property %#010x = %#18RX64\n", iProp, pVCpu->nem.s.Hypercall.Experiment.uLoValue)); + if (iOuter == 0) + aPropCodes[iPropValue] = iProp; + else if (aPropValues[iPropValue] != pVCpu->nem.s.Hypercall.Experiment.uLoValue) + { + LogRel(("Changed from %#18RX64, delta %#18RX64!!\n", + aPropValues[iPropValue], pVCpu->nem.s.Hypercall.Experiment.uLoValue - aPropValues[iPropValue])); + cRegChanges++; + } + aPropValues[iPropValue] = pVCpu->nem.s.Hypercall.Experiment.uLoValue; + iPropValue++; + AssertBreak(iPropValue < RT_ELEMENTS(aPropValues)); + } + } + LogRel(("Found %u properties, %u changed\n", iPropValue, cPropChanges)); +# endif + + /* Modify the TSC register value and see what changes. */ + if (iOuter != 0) + { + RT_ZERO(pVCpu->nem.s.Hypercall.Experiment); + pVCpu->nem.s.Hypercall.Experiment.uItem = HvX64RegisterTsc; + pVCpu->nem.s.Hypercall.Experiment.uHiValue = UINT64_C(0x00000fffffffffff) >> iOuter; + pVCpu->nem.s.Hypercall.Experiment.uLoValue = UINT64_C(0x0011100000000000) << iOuter; + VMMR3CallR0Emt(pVM, pVCpu, VMMR0_DO_NEM_EXPERIMENT, 2, NULL); + LogRel(("Setting HvX64RegisterTsc -> %RTbool (%#RX64)\n", pVCpu->nem.s.Hypercall.Experiment.fSuccess, pVCpu->nem.s.Hypercall.Experiment.uStatus)); + } + + RT_ZERO(pVCpu->nem.s.Hypercall.Experiment); + pVCpu->nem.s.Hypercall.Experiment.uItem = HvX64RegisterTsc; + VMMR3CallR0Emt(pVM, pVCpu, VMMR0_DO_NEM_EXPERIMENT, 0, NULL); + LogRel(("HvX64RegisterTsc = %#RX64, %#RX64\n", pVCpu->nem.s.Hypercall.Experiment.uLoValue, pVCpu->nem.s.Hypercall.Experiment.uHiValue)); + } + +#endif + return VINF_SUCCESS; + } + return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS, "Call to NEMR0InitVMPart2 failed: %Rrc", rc); +} + + +int nemR3NativeInitCompleted(PVM pVM, VMINITCOMPLETED enmWhat) +{ + //BOOL fRet = SetThreadPriority(GetCurrentThread(), 0); + //AssertLogRel(fRet); + + NOREF(pVM); NOREF(enmWhat); + return VINF_SUCCESS; +} + + +int nemR3NativeTerm(PVM pVM) +{ + /* + * Delete the partition. + */ + WHV_PARTITION_HANDLE hPartition = pVM->nem.s.hPartition; + pVM->nem.s.hPartition = NULL; + pVM->nem.s.hPartitionDevice = NULL; + if (hPartition != NULL) + { + VMCPUID iCpu = pVM->nem.s.fCreatedEmts ? pVM->cCpus : 0; + LogRel(("NEM: Destroying partition %p with its %u VCpus...\n", hPartition, iCpu)); + while (iCpu-- > 0) + { + pVM->aCpus[iCpu].nem.s.pvMsgSlotMapping = NULL; +#ifndef NEM_WIN_USE_OUR_OWN_RUN_API +# ifdef NEM_WIN_WITH_RING0_RUNLOOP + if (!pVM->nem.s.fUseRing0Runloop) +# endif + { + HRESULT hrc = WHvDeleteVirtualProcessor(hPartition, iCpu); + AssertLogRelMsg(SUCCEEDED(hrc), ("WHvDeleteVirtualProcessor(%p, %u) -> %Rhrc (Last=%#x/%u)\n", + hPartition, iCpu, hrc, RTNtLastStatusValue(), + RTNtLastErrorValue())); + } +#endif + } + WHvDeletePartition(hPartition); + } + pVM->nem.s.fCreatedEmts = false; + return VINF_SUCCESS; +} + + +/** + * VM reset notification. + * + * @param pVM The cross context VM structure. + */ +void nemR3NativeReset(PVM pVM) +{ + /* Unfix the A20 gate. */ + pVM->nem.s.fA20Fixed = false; +} + + +/** + * Reset CPU due to INIT IPI or hot (un)plugging. + * + * @param pVCpu The cross context virtual CPU structure of the CPU being + * reset. + * @param fInitIpi Whether this is the INIT IPI or hot (un)plugging case. + */ +void nemR3NativeResetCpu(PVMCPU pVCpu, bool fInitIpi) +{ + /* Lock the A20 gate if INIT IPI, make sure it's enabled. */ + if (fInitIpi && pVCpu->idCpu > 0) + { + PVM pVM = pVCpu->CTX_SUFF(pVM); + if (!pVM->nem.s.fA20Enabled) + nemR3NativeNotifySetA20(pVCpu, true); + pVM->nem.s.fA20Enabled = true; + pVM->nem.s.fA20Fixed = true; + } +} + + +VBOXSTRICTRC nemR3NativeRunGC(PVM pVM, PVMCPU pVCpu) +{ +#ifdef NEM_WIN_WITH_RING0_RUNLOOP + if (pVM->nem.s.fUseRing0Runloop) + { + for (;;) + { + VBOXSTRICTRC rcStrict = VMMR3CallR0EmtFast(pVM, pVCpu, VMMR0_DO_NEM_RUN); + if (RT_SUCCESS(rcStrict)) + { + /* + * We deal with VINF_NEM_FLUSH_TLB here, since we're running the risk of + * getting these while we already got another RC (I/O ports). + */ + /* Status codes: */ + VBOXSTRICTRC rcPending = pVCpu->nem.s.rcPending; + pVCpu->nem.s.rcPending = VINF_SUCCESS; + if (rcStrict == VINF_NEM_FLUSH_TLB || rcPending == VINF_NEM_FLUSH_TLB) + { + LogFlow(("nemR3NativeRunGC: calling PGMFlushTLB...\n")); + int rc = PGMFlushTLB(pVCpu, CPUMGetGuestCR3(pVCpu), true); + AssertRCReturn(rc, rc); + if (rcStrict == VINF_NEM_FLUSH_TLB) + { + if ( !VM_FF_IS_ANY_SET(pVM, VM_FF_HIGH_PRIORITY_POST_MASK | VM_FF_HP_R0_PRE_HM_MASK) + && !VMCPU_FF_IS_ANY_SET(pVCpu, (VMCPU_FF_HIGH_PRIORITY_POST_MASK | VMCPU_FF_HP_R0_PRE_HM_MASK) + & ~VMCPU_FF_RESUME_GUEST_MASK)) + { + VMCPU_FF_CLEAR_MASK(pVCpu, VMCPU_FF_RESUME_GUEST_MASK); + continue; + } + rcStrict = VINF_SUCCESS; + } + } + else + AssertMsg(rcPending == VINF_SUCCESS, ("rcPending=%Rrc\n", VBOXSTRICTRC_VAL(rcPending) )); + } + LogFlow(("nemR3NativeRunGC: returns %Rrc\n", VBOXSTRICTRC_VAL(rcStrict) )); + return rcStrict; + } + } +#endif + return nemHCWinRunGC(pVM, pVCpu, NULL /*pGVM*/, NULL /*pGVCpu*/); +} + + +bool nemR3NativeCanExecuteGuest(PVM pVM, PVMCPU pVCpu) +{ + NOREF(pVM); NOREF(pVCpu); + return true; +} + + +bool nemR3NativeSetSingleInstruction(PVM pVM, PVMCPU pVCpu, bool fEnable) +{ + NOREF(pVM); NOREF(pVCpu); NOREF(fEnable); + return false; +} + + +/** + * Forced flag notification call from VMEmt.h. + * + * This is only called when pVCpu is in the VMCPUSTATE_STARTED_EXEC_NEM state. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the CPU + * to be notified. + * @param fFlags Notification flags, VMNOTIFYFF_FLAGS_XXX. + */ +void nemR3NativeNotifyFF(PVM pVM, PVMCPU pVCpu, uint32_t fFlags) +{ +#ifdef NEM_WIN_USE_OUR_OWN_RUN_API + nemHCWinCancelRunVirtualProcessor(pVM, pVCpu); +#else +# ifdef NEM_WIN_WITH_RING0_RUNLOOP + if (pVM->nem.s.fUseRing0Runloop) + nemHCWinCancelRunVirtualProcessor(pVM, pVCpu); + else +# endif + { + Log8(("nemR3NativeNotifyFF: canceling %u\n", pVCpu->idCpu)); + HRESULT hrc = WHvCancelRunVirtualProcessor(pVM->nem.s.hPartition, pVCpu->idCpu, 0); + AssertMsg(SUCCEEDED(hrc), ("WHvCancelRunVirtualProcessor -> hrc=%Rhrc\n", hrc)); + RT_NOREF_PV(hrc); + } +#endif + RT_NOREF_PV(fFlags); +} + + +DECLINLINE(int) nemR3NativeGCPhys2R3PtrReadOnly(PVM pVM, RTGCPHYS GCPhys, const void **ppv) +{ + PGMPAGEMAPLOCK Lock; + int rc = PGMPhysGCPhys2CCPtrReadOnly(pVM, GCPhys, ppv, &Lock); + if (RT_SUCCESS(rc)) + PGMPhysReleasePageMappingLock(pVM, &Lock); + return rc; +} + + +DECLINLINE(int) nemR3NativeGCPhys2R3PtrWriteable(PVM pVM, RTGCPHYS GCPhys, void **ppv) +{ + PGMPAGEMAPLOCK Lock; + int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhys, ppv, &Lock); + if (RT_SUCCESS(rc)) + PGMPhysReleasePageMappingLock(pVM, &Lock); + return rc; +} + + +int nemR3NativeNotifyPhysRamRegister(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb) +{ + Log5(("nemR3NativeNotifyPhysRamRegister: %RGp LB %RGp\n", GCPhys, cb)); + NOREF(pVM); NOREF(GCPhys); NOREF(cb); + return VINF_SUCCESS; +} + + +int nemR3NativeNotifyPhysMmioExMap(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags, void *pvMmio2) +{ + Log5(("nemR3NativeNotifyPhysMmioExMap: %RGp LB %RGp fFlags=%#x pvMmio2=%p\n", GCPhys, cb, fFlags, pvMmio2)); + NOREF(pVM); NOREF(GCPhys); NOREF(cb); NOREF(fFlags); NOREF(pvMmio2); + return VINF_SUCCESS; +} + + +int nemR3NativeNotifyPhysMmioExUnmap(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags) +{ + Log5(("nemR3NativeNotifyPhysMmioExUnmap: %RGp LB %RGp fFlags=%#x\n", GCPhys, cb, fFlags)); + NOREF(pVM); NOREF(GCPhys); NOREF(cb); NOREF(fFlags); + return VINF_SUCCESS; +} + + +/** + * Called early during ROM registration, right after the pages have been + * allocated and the RAM range updated. + * + * This will be succeeded by a number of NEMHCNotifyPhysPageProtChanged() calls + * and finally a NEMR3NotifyPhysRomRegisterEarly(). + * + * @returns VBox status code + * @param pVM The cross context VM structure. + * @param GCPhys The ROM address (page aligned). + * @param cb The size (page aligned). + * @param fFlags NEM_NOTIFY_PHYS_ROM_F_XXX. + */ +int nemR3NativeNotifyPhysRomRegisterEarly(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags) +{ + Log5(("nemR3NativeNotifyPhysRomRegisterEarly: %RGp LB %RGp fFlags=%#x\n", GCPhys, cb, fFlags)); +#if 0 /* Let's not do this after all. We'll protection change notifications for each page and if not we'll map them lazily. */ + RTGCPHYS const cPages = cb >> X86_PAGE_SHIFT; + for (RTGCPHYS iPage = 0; iPage < cPages; iPage++, GCPhys += X86_PAGE_SIZE) + { + const void *pvPage; + int rc = nemR3NativeGCPhys2R3PtrReadOnly(pVM, GCPhys, &pvPage); + if (RT_SUCCESS(rc)) + { + HRESULT hrc = WHvMapGpaRange(pVM->nem.s.hPartition, (void *)pvPage, GCPhys, X86_PAGE_SIZE, + WHvMapGpaRangeFlagRead | WHvMapGpaRangeFlagExecute); + if (SUCCEEDED(hrc)) + { /* likely */ } + else + { + LogRel(("nemR3NativeNotifyPhysRomRegisterEarly: GCPhys=%RGp hrc=%Rhrc (%#x) Last=%#x/%u\n", + GCPhys, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue())); + return VERR_NEM_INIT_FAILED; + } + } + else + { + LogRel(("nemR3NativeNotifyPhysRomRegisterEarly: GCPhys=%RGp rc=%Rrc\n", GCPhys, rc)); + return rc; + } + } +#else + NOREF(pVM); NOREF(GCPhys); NOREF(cb); +#endif + RT_NOREF_PV(fFlags); + return VINF_SUCCESS; +} + + +/** + * Called after the ROM range has been fully completed. + * + * This will be preceeded by a NEMR3NotifyPhysRomRegisterEarly() call as well a + * number of NEMHCNotifyPhysPageProtChanged calls. + * + * @returns VBox status code + * @param pVM The cross context VM structure. + * @param GCPhys The ROM address (page aligned). + * @param cb The size (page aligned). + * @param fFlags NEM_NOTIFY_PHYS_ROM_F_XXX. + */ +int nemR3NativeNotifyPhysRomRegisterLate(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags) +{ + Log5(("nemR3NativeNotifyPhysRomRegisterLate: %RGp LB %RGp fFlags=%#x\n", GCPhys, cb, fFlags)); + NOREF(pVM); NOREF(GCPhys); NOREF(cb); NOREF(fFlags); + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{FNPGMPHYSNEMCHECKPAGE} + */ +static DECLCALLBACK(int) nemR3WinUnsetForA20CheckerCallback(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys, + PPGMPHYSNEMPAGEINFO pInfo, void *pvUser) +{ + /* We'll just unmap the memory. */ + if (pInfo->u2NemState > NEM_WIN_PAGE_STATE_UNMAPPED) + { +#ifdef NEM_WIN_USE_HYPERCALLS_FOR_PAGES + int rc = nemHCWinHypercallUnmapPage(pVM, pVCpu, GCPhys); + AssertRC(rc); + if (RT_SUCCESS(rc)) +#else + HRESULT hrc = WHvUnmapGpaRange(pVM->nem.s.hPartition, GCPhys, X86_PAGE_SIZE); + if (SUCCEEDED(hrc)) +#endif + { + uint32_t cMappedPages = ASMAtomicDecU32(&pVM->nem.s.cMappedPages); NOREF(cMappedPages); + Log5(("NEM GPA unmapped/A20: %RGp (was %s, cMappedPages=%u)\n", GCPhys, g_apszPageStates[pInfo->u2NemState], cMappedPages)); + pInfo->u2NemState = NEM_WIN_PAGE_STATE_UNMAPPED; + } + else + { +#ifdef NEM_WIN_USE_HYPERCALLS_FOR_PAGES + LogRel(("nemR3WinUnsetForA20CheckerCallback/unmap: GCPhys=%RGp rc=%Rrc\n", GCPhys, rc)); + return rc; +#else + LogRel(("nemR3WinUnsetForA20CheckerCallback/unmap: GCPhys=%RGp hrc=%Rhrc (%#x) Last=%#x/%u\n", + GCPhys, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue())); + return VERR_INTERNAL_ERROR_2; +#endif + } + } + RT_NOREF(pVCpu, pvUser); + return VINF_SUCCESS; +} + + +/** + * Unmaps a page from Hyper-V for the purpose of emulating A20 gate behavior. + * + * @returns The PGMPhysNemQueryPageInfo result. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param GCPhys The page to unmap. + */ +static int nemR3WinUnmapPageForA20Gate(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys) +{ + PGMPHYSNEMPAGEINFO Info; + return PGMPhysNemPageInfoChecker(pVM, pVCpu, GCPhys, false /*fMakeWritable*/, &Info, + nemR3WinUnsetForA20CheckerCallback, NULL); +} + + +/** + * Called when the A20 state changes. + * + * Hyper-V doesn't seem to offer a simple way of implementing the A20 line + * features of PCs. So, we do a very minimal emulation of the HMA to make DOS + * happy. + * + * @param pVCpu The CPU the A20 state changed on. + * @param fEnabled Whether it was enabled (true) or disabled. + */ +void nemR3NativeNotifySetA20(PVMCPU pVCpu, bool fEnabled) +{ + Log(("nemR3NativeNotifySetA20: fEnabled=%RTbool\n", fEnabled)); + PVM pVM = pVCpu->CTX_SUFF(pVM); + if (!pVM->nem.s.fA20Fixed) + { + pVM->nem.s.fA20Enabled = fEnabled; + for (RTGCPHYS GCPhys = _1M; GCPhys < _1M + _64K; GCPhys += X86_PAGE_SIZE) + nemR3WinUnmapPageForA20Gate(pVM, pVCpu, GCPhys); + } +} + + +/** @page pg_nem_win NEM/win - Native Execution Manager, Windows. + * + * On Windows the Hyper-V root partition (dom0 in zen terminology) does not have + * nested VT-x or AMD-V capabilities. Early on raw-mode worked inside it, but + * for a while now we've been getting \#GPs when trying to modify CR4 in the + * world switcher. So, when Hyper-V is active on Windows we have little choice + * but to use Hyper-V to run our VMs. + * + * + * @section sub_nem_win_whv The WinHvPlatform API + * + * Since Windows 10 build 17083 there is a documented API for managing Hyper-V + * VMs: header file WinHvPlatform.h and implementation in WinHvPlatform.dll. + * This interface is a wrapper around the undocumented Virtualization + * Infrastructure Driver (VID) API - VID.DLL and VID.SYS. The wrapper is + * written in C++, namespaced, early versions (at least) was using standard C++ + * container templates in several places. + * + * When creating a VM using WHvCreatePartition, it will only create the + * WinHvPlatform structures for it, to which you get an abstract pointer. The + * VID API that actually creates the partition is first engaged when you call + * WHvSetupPartition after first setting a lot of properties using + * WHvSetPartitionProperty. Since the VID API is just a very thin wrapper + * around CreateFile and NtDeviceIoControlFile, it returns an actual HANDLE for + * the partition to WinHvPlatform. We fish this HANDLE out of the WinHvPlatform + * partition structures because we need to talk directly to VID for reasons + * we'll get to in a bit. (Btw. we could also intercept the CreateFileW or + * NtDeviceIoControlFile calls from VID.DLL to get the HANDLE should fishing in + * the partition structures become difficult.) + * + * The WinHvPlatform API requires us to both set the number of guest CPUs before + * setting up the partition and call WHvCreateVirtualProcessor for each of them. + * The CPU creation function boils down to a VidMessageSlotMap call that sets up + * and maps a message buffer into ring-3 for async communication with hyper-V + * and/or the VID.SYS thread actually running the CPU thru + * WinHvRunVpDispatchLoop(). When for instance a VMEXIT is encountered, hyper-V + * sends a message that the WHvRunVirtualProcessor API retrieves (and later + * acknowledges) via VidMessageSlotHandleAndGetNext. Since or about build + * 17757 a register page is also mapped into user space when creating the + * virtual CPU. It should be noteded that WHvDeleteVirtualProcessor doesn't do + * much as there seems to be no partner function VidMessagesSlotMap that + * reverses what it did. + * + * Memory is managed thru calls to WHvMapGpaRange and WHvUnmapGpaRange (GPA does + * not mean grade point average here, but rather guest physical addressspace), + * which corresponds to VidCreateVaGpaRangeSpecifyUserVa and VidDestroyGpaRange + * respectively. As 'UserVa' indicates, the functions works on user process + * memory. The mappings are also subject to quota restrictions, so the number + * of ranges are limited and probably their total size as well. Obviously + * VID.SYS keeps track of the ranges, but so does WinHvPlatform, which means + * there is a bit of overhead involved and quota restrctions makes sense. + * + * Running guest code is done through the WHvRunVirtualProcessor function. It + * asynchronously starts or resumes hyper-V CPU execution and then waits for an + * VMEXIT message. Hyper-V / VID.SYS will return information about the message + * in the message buffer mapping, and WHvRunVirtualProcessor will convert that + * finto it's own WHV_RUN_VP_EXIT_CONTEXT format. + * + * Other threads can interrupt the execution by using WHvCancelVirtualProcessor, + * which since or about build 17757 uses VidMessageSlotHandleAndGetNext to do + * the work (earlier builds would open the waiting thread, do a dummy + * QueueUserAPC on it, and let it upon return use VidStopVirtualProcessor to + * do the actual stopping). While there is certainly a race between cancelation + * and the CPU causing a natural VMEXIT, it is not known whether this still + * causes extra work on subsequent WHvRunVirtualProcessor calls (it did in and + * earlier 17134). + * + * Registers are retrieved and set via WHvGetVirtualProcessorRegisters and + * WHvSetVirtualProcessorRegisters. In addition, several VMEXITs include + * essential register state in the exit context information, potentially making + * it possible to emulate the instruction causing the exit without involving + * WHvGetVirtualProcessorRegisters. + * + * + * @subsection subsec_nem_win_whv_cons Issues & Feedback + * + * Here are some observations (mostly against build 17101): + * + * - The VMEXIT performance is dismal (build 17134). + * + * Our proof of concept implementation with a kernel runloop (i.e. not using + * WHvRunVirtualProcessor and friends, but calling VID.SYS fast I/O control + * entry point directly) delivers 9-10% of the port I/O performance and only + * 6-7% of the MMIO performance that we have with our own hypervisor. + * + * When using the offical WinHvPlatform API, the numbers are %3 for port I/O + * and 5% for MMIO. + * + * While the tests we've done are using tight tight loops only doing port I/O + * and MMIO, the problem is clearly visible when running regular guest OSes. + * Anything that hammers the VGA device would be suffering, for example: + * + * - Windows 2000 boot screen animation overloads us with MMIO exits + * and won't even boot because all the time is spent in interrupt + * handlers and redrawin the screen. + * + * - DSL 4.4 and its bootmenu logo is slower than molasses in january. + * + * We have not found a workaround for this yet. + * + * Something that might improve the issue a little is to detect blocks with + * excessive MMIO and port I/O exits and emulate instructions to cover + * multiple exits before letting Hyper-V have a go at the guest execution + * again. This will only improve the situation under some circumstances, + * since emulating instructions without recompilation can be expensive, so + * there will only be real gains if the exitting instructions are tightly + * packed. + * + * Update: Security fixes during the summer of 2018 caused the performance to + * dropped even more. + * + * Update [build 17757]: Some performance improvements here, but they don't + * yet make up for what was lost this summer. + * + * + * - We need a way to directly modify the TSC offset (or bias if you like). + * + * The current approach of setting the WHvX64RegisterTsc register one by one + * on each virtual CPU in sequence will introduce random inaccuracies, + * especially if the thread doing the job is reschduled at a bad time. + * + * + * - Unable to access WHvX64RegisterMsrMtrrCap (build 17134). + * + * + * - On AMD Ryzen grub/debian 9.0 ends up with a unrecoverable exception + * when IA32_MTRR_PHYSMASK0 is written. + * + * + * - The IA32_APIC_BASE register does not work right: + * + * - Attempts by the guest to clear bit 11 (EN) are ignored, both the + * guest and the VMM reads back the old value. + * + * - Attempts to modify the base address (bits NN:12) seems to be ignored + * in the same way. + * + * - The VMM can modify both the base address as well as the the EN and + * BSP bits, however this is useless if we cannot intercept the WRMSR. + * + * - Attempts by the guest to set the EXTD bit (X2APIC) result in \#GP(0), + * while the VMM ends up with with ERROR_HV_INVALID_PARAMETER. Seems + * there is no way to support X2APIC. + * + * + * - Not sure if this is a thing, but WHvCancelVirtualProcessor seems to cause + * cause a lot more spurious WHvRunVirtualProcessor returns that what we get + * with the replacement code. By spurious returns we mean that the + * subsequent call to WHvRunVirtualProcessor would return immediately. + * + * Update [build 17757]: New cancelation code might have addressed this, but + * haven't had time to test it yet. + * + * + * - There is no API for modifying protection of a page within a GPA range. + * + * From what we can tell, the only way to modify the protection (like readonly + * -> writable, or vice versa) is to first unmap the range and then remap it + * with the new protection. + * + * We are for instance doing this quite a bit in order to track dirty VRAM + * pages. VRAM pages starts out as readonly, when the guest writes to a page + * we take an exit, notes down which page it is, makes it writable and restart + * the instruction. After refreshing the display, we reset all the writable + * pages to readonly again, bulk fashion. + * + * Now to work around this issue, we do page sized GPA ranges. In addition to + * add a lot of tracking overhead to WinHvPlatform and VID.SYS, this also + * causes us to exceed our quota before we've even mapped a default sized + * (128MB) VRAM page-by-page. So, to work around this quota issue we have to + * lazily map pages and actively restrict the number of mappings. + * + * Our best workaround thus far is bypassing WinHvPlatform and VID entirely + * when in comes to guest memory management and instead use the underlying + * hypercalls (HvCallMapGpaPages, HvCallUnmapGpaPages) to do it ourselves. + * (This also maps a whole lot better into our own guest page management + * infrastructure.) + * + * Update [build 17757]: Introduces a KVM like dirty logging API which could + * help tracking dirty VGA pages, while being useless for shadow ROM and + * devices trying catch the guest updating descriptors and such. + * + * + * - Observed problems doing WHvUnmapGpaRange immediately followed by + * WHvMapGpaRange. + * + * As mentioned above, we've been forced to use this sequence when modifying + * page protection. However, when transitioning from readonly to writable, + * we've ended up looping forever with the same write to readonly memory + * VMEXIT. We're wondering if this issue might be related to the lazy mapping + * logic in WinHvPlatform. + * + * Workaround: Insert a WHvRunVirtualProcessor call and make sure to get a GPA + * unmapped exit between the two calls. Not entirely great performance wise + * (or the santity of our code). + * + * + * - Implementing A20 gate behavior is tedious, where as correctly emulating the + * A20M# pin (present on 486 and later) is near impossible for SMP setups + * (e.g. possiblity of two CPUs with different A20 status). + * + * Workaround: Only do A20 on CPU 0, restricting the emulation to HMA. We + * unmap all pages related to HMA (0x100000..0x10ffff) when the A20 state + * changes, lazily syncing the right pages back when accessed. + * + * + * - WHVRunVirtualProcessor wastes time converting VID/Hyper-V messages to its + * own format (WHV_RUN_VP_EXIT_CONTEXT). + * + * We understand this might be because Microsoft wishes to remain free to + * modify the VID/Hyper-V messages, but it's still rather silly and does slow + * things down a little. We'd much rather just process the messages directly. + * + * + * - WHVRunVirtualProcessor would've benefited from using a callback interface: + * + * - The potential size changes of the exit context structure wouldn't be + * an issue, since the function could manage that itself. + * + * - State handling could probably be simplified (like cancelation). + * + * + * - WHvGetVirtualProcessorRegisters and WHvSetVirtualProcessorRegisters + * internally converts register names, probably using temporary heap buffers. + * + * From the looks of things, they are converting from WHV_REGISTER_NAME to + * HV_REGISTER_NAME from in the "Virtual Processor Register Names" section in + * the "Hypervisor Top-Level Functional Specification" document. This feels + * like an awful waste of time. + * + * We simply cannot understand why HV_REGISTER_NAME isn't used directly here, + * or at least the same values, making any conversion reduntant. Restricting + * access to certain registers could easily be implement by scanning the + * inputs. + * + * To avoid the heap + conversion overhead, we're currently using the + * HvCallGetVpRegisters and HvCallSetVpRegisters calls directly, at least for + * the ring-0 code. + * + * Update [build 17757]: Register translation has been very cleverly + * optimized and made table driven (2 top level tables, 4 + 1 leaf tables). + * Register information consists of the 32-bit HV register name, register page + * offset, and flags (giving valid offset, size and more). Register + * getting/settings seems to be done by hoping that the register page provides + * it all, and falling back on the VidSetVirtualProcessorState if one or more + * registers are not available there. + * + * Note! We have currently not updated our ring-0 code to take the register + * page into account, so it's suffering a little compared to the ring-3 code + * that now uses the offical APIs for registers. + * + * + * - The YMM and XCR0 registers are not yet named (17083). This probably + * wouldn't be a problem if HV_REGISTER_NAME was used, see previous point. + * + * Update [build 17757]: XCR0 is added. YMM register values seems to be put + * into a yet undocumented XsaveState interface. Approach is a little bulky, + * but saves number of enums and dispenses with register transation. Also, + * the underlying Vid setter API duplicates the input buffer on the heap, + * adding a 16 byte header. + * + * + * - Why does VID.SYS only query/set 32 registers at the time thru the + * HvCallGetVpRegisters and HvCallSetVpRegisters hypercalls? + * + * We've not trouble getting/setting all the registers defined by + * WHV_REGISTER_NAME in one hypercall (around 80). Some kind of stack + * buffering or similar? + * + * + * - To handle the VMMCALL / VMCALL instructions, it seems we need to intercept + * \#UD exceptions and inspect the opcodes. A dedicated exit for hypercalls + * would be more efficient, esp. for guests using \#UD for other purposes.. + * + * + * - Wrong instruction length in the VpContext with unmapped GPA memory exit + * contexts on 17115/AMD. + * + * One byte "PUSH CS" was reported as 2 bytes, while a two byte + * "MOV [EBX],EAX" was reported with a 1 byte instruction length. Problem + * naturally present in untranslated hyper-v messages. + * + * + * - The I/O port exit context information seems to be missing the address size + * information needed for correct string I/O emulation. + * + * VT-x provides this information in bits 7:9 in the instruction information + * field on newer CPUs. AMD-V in bits 7:9 in the EXITINFO1 field in the VMCB. + * + * We can probably work around this by scanning the instruction bytes for + * address size prefixes. Haven't investigated it any further yet. + * + * + * - Querying WHvCapabilityCodeExceptionExitBitmap returns zero even when + * intercepts demonstrably works (17134). + * + * + * - Querying HvPartitionPropertyDebugChannelId via HvCallGetPartitionProperty + * (hypercall) hangs the host (17134). + * + * + * + * Old concerns that have been addressed: + * + * - The WHvCancelVirtualProcessor API schedules a dummy usermode APC callback + * in order to cancel any current or future alertable wait in VID.SYS during + * the VidMessageSlotHandleAndGetNext call. + * + * IIRC this will make the kernel schedule the specified callback thru + * NTDLL!KiUserApcDispatcher by modifying the thread context and quite + * possibly the userland thread stack. When the APC callback returns to + * KiUserApcDispatcher, it will call NtContinue to restore the old thread + * context and resume execution from there. This naturally adds up to some + * CPU cycles, ring transitions aren't for free, especially after Spectre & + * Meltdown mitigations. + * + * Using NtAltertThread call could do the same without the thread context + * modifications and the extra kernel call. + * + * Update: All concerns have addressed in or about build 17757. + * + * The WHvCancelVirtualProcessor API is now implemented using a new + * VidMessageSlotHandleAndGetNext() flag (4). Codepath is slightly longer + * than NtAlertThread, but has the added benefit that spurious wakeups can be + * more easily reduced. + * + * + * - When WHvRunVirtualProcessor returns without a message, or on a terse + * VID message like HLT, it will make a kernel call to get some registers. + * This is potentially inefficient if the caller decides he needs more + * register state. + * + * It would be better to just return what's available and let the caller fetch + * what is missing from his point of view in a single kernel call. + * + * Update: All concerns have been addressed in or about build 17757. Selected + * registers are now available via shared memory and thus HLT should (not + * verified) no longer require a system call to compose the exit context data. + * + * + * - The WHvRunVirtualProcessor implementation does lazy GPA range mappings when + * a unmapped GPA message is received from hyper-V. + * + * Since MMIO is currently realized as unmapped GPA, this will slow down all + * MMIO accesses a tiny little bit as WHvRunVirtualProcessor looks up the + * guest physical address to check if it is a pending lazy mapping. + * + * The lazy mapping feature makes no sense to us. We as API user have all the + * information and can do lazy mapping ourselves if we want/have to (see next + * point). + * + * Update: All concerns have been addressed in or about build 17757. + * + * + * - The WHvGetCapability function has a weird design: + * - The CapabilityCode parameter is pointlessly duplicated in the output + * structure (WHV_CAPABILITY). + * + * - API takes void pointer, but everyone will probably be using + * WHV_CAPABILITY due to WHV_CAPABILITY::CapabilityCode making it + * impractical to use anything else. + * + * - No output size. + * + * - See GetFileAttributesEx, GetFileInformationByHandleEx, + * FindFirstFileEx, and others for typical pattern for generic + * information getters. + * + * Update: All concerns have been addressed in build 17110. + * + * + * - The WHvGetPartitionProperty function uses the same weird design as + * WHvGetCapability, see above. + * + * Update: All concerns have been addressed in build 17110. + * + * + * - The WHvSetPartitionProperty function has a totally weird design too: + * - In contrast to its partner WHvGetPartitionProperty, the property code + * is not a separate input parameter here but part of the input + * structure. + * + * - The input structure is a void pointer rather than a pointer to + * WHV_PARTITION_PROPERTY which everyone probably will be using because + * of the WHV_PARTITION_PROPERTY::PropertyCode field. + * + * - Really, why use PVOID for the input when the function isn't accepting + * minimal sizes. E.g. WHVPartitionPropertyCodeProcessorClFlushSize only + * requires a 9 byte input, but the function insists on 16 bytes (17083). + * + * - See GetFileAttributesEx, SetFileInformationByHandle, FindFirstFileEx, + * and others for typical pattern for generic information setters and + * getters. + * + * Update: All concerns have been addressed in build 17110. + * + * + * + * @section sec_nem_win_impl Our implementation. + * + * We set out with the goal of wanting to run as much as possible in ring-0, + * reasoning that this would give use the best performance. + * + * This goal was approached gradually, starting out with a pure WinHvPlatform + * implementation, gradually replacing parts: register access, guest memory + * handling, running virtual processors. Then finally moving it all into + * ring-0, while keeping most of it configurable so that we could make + * comparisons (see NEMInternal.h and nemR3NativeRunGC()). + * + * + * @subsection subsect_nem_win_impl_ioctl VID.SYS I/O control calls + * + * To run things in ring-0 we need to talk directly to VID.SYS thru its I/O + * control interface. Looking at changes between like build 17083 and 17101 (if + * memory serves) a set of the VID I/O control numbers shifted a little, which + * means we need to determin them dynamically. We currently do this by hooking + * the NtDeviceIoControlFile API call from VID.DLL and snooping up the + * parameters when making dummy calls to relevant APIs. (We could also + * disassemble the relevant APIs and try fish out the information from that, but + * this is way simpler.) + * + * Issuing I/O control calls from ring-0 is facing a small challenge with + * respect to direct buffering. When using direct buffering the device will + * typically check that the buffer is actually in the user address space range + * and reject kernel addresses. Fortunately, we've got the cross context VM + * structure that is mapped into both kernel and user space, it's also locked + * and safe to access from kernel space. So, we place the I/O control buffers + * in the per-CPU part of it (NEMCPU::uIoCtlBuf) and give the driver the user + * address if direct access buffering or kernel address if not. + * + * The I/O control calls are 'abstracted' in the support driver, see + * SUPR0IoCtlSetupForHandle(), SUPR0IoCtlPerform() and SUPR0IoCtlCleanup(). + * + * + * @subsection subsect_nem_win_impl_cpumctx CPUMCTX + * + * Since the CPU state needs to live in Hyper-V when executing, we probably + * should not transfer more than necessary when handling VMEXITs. To help us + * manage this CPUMCTX got a new field CPUMCTX::fExtrn that to indicate which + * part of the state is currently externalized (== in Hyper-V). + * + * + * @subsection sec_nem_win_benchmarks Benchmarks. + * + * @subsubsection subsect_nem_win_benchmarks_bs2t1 17134/2018-06-22: Bootsector2-test1 + * + * This is ValidationKit/bootsectors/bootsector2-test1.asm as of 2018-06-22 + * (internal r123172) running a the release build of VirtualBox from the same + * source, though with exit optimizations disabled. Host is AMD Threadripper 1950X + * running out an up to date 64-bit Windows 10 build 17134. + * + * The base line column is using the official WinHv API for everything but physical + * memory mapping. The 2nd column is the default NEM/win configuration where we + * put the main execution loop in ring-0, using hypercalls when we can and VID for + * managing execution. The 3rd column is regular VirtualBox using AMD-V directly, + * hyper-V is disabled, main execution loop in ring-0. + * + * @verbatim +TESTING... WinHv API Hypercalls + VID VirtualBox AMD-V + 32-bit paged protected mode, CPUID : 108 874 ins/sec 113% / 123 602 1198% / 1 305 113 + 32-bit pae protected mode, CPUID : 106 722 ins/sec 115% / 122 740 1232% / 1 315 201 + 64-bit long mode, CPUID : 106 798 ins/sec 114% / 122 111 1198% / 1 280 404 + 16-bit unpaged protected mode, CPUID : 106 835 ins/sec 114% / 121 994 1216% / 1 299 665 + 32-bit unpaged protected mode, CPUID : 105 257 ins/sec 115% / 121 772 1235% / 1 300 860 + real mode, CPUID : 104 507 ins/sec 116% / 121 800 1228% / 1 283 848 +CPUID EAX=1 : PASSED + 32-bit paged protected mode, RDTSC : 99 581 834 ins/sec 100% / 100 323 307 93% / 93 473 299 + 32-bit pae protected mode, RDTSC : 99 620 585 ins/sec 100% / 99 960 952 84% / 83 968 839 + 64-bit long mode, RDTSC : 100 540 009 ins/sec 100% / 100 946 372 93% / 93 652 826 + 16-bit unpaged protected mode, RDTSC : 99 688 473 ins/sec 100% / 100 097 751 76% / 76 281 287 + 32-bit unpaged protected mode, RDTSC : 98 385 857 ins/sec 102% / 100 510 404 94% / 93 379 536 + real mode, RDTSC : 100 087 967 ins/sec 101% / 101 386 138 93% / 93 234 999 +RDTSC : PASSED + 32-bit paged protected mode, Read CR4 : 2 156 102 ins/sec 98% / 2 121 967 17114% / 369 009 009 + 32-bit pae protected mode, Read CR4 : 2 163 820 ins/sec 98% / 2 133 804 17469% / 377 999 261 + 64-bit long mode, Read CR4 : 2 164 822 ins/sec 98% / 2 128 698 18875% / 408 619 313 + 16-bit unpaged protected mode, Read CR4 : 2 162 367 ins/sec 100% / 2 168 508 17132% / 370 477 568 + 32-bit unpaged protected mode, Read CR4 : 2 163 189 ins/sec 100% / 2 169 808 16768% / 362 734 679 + real mode, Read CR4 : 2 162 436 ins/sec 100% / 2 164 914 15551% / 336 288 998 +Read CR4 : PASSED + real mode, 32-bit IN : 104 649 ins/sec 118% / 123 513 1028% / 1 075 831 + real mode, 32-bit OUT : 107 102 ins/sec 115% / 123 660 982% / 1 052 259 + real mode, 32-bit IN-to-ring-3 : 105 697 ins/sec 98% / 104 471 201% / 213 216 + real mode, 32-bit OUT-to-ring-3 : 105 830 ins/sec 98% / 104 598 198% / 210 495 + 16-bit unpaged protected mode, 32-bit IN : 104 855 ins/sec 117% / 123 174 1029% / 1 079 591 + 16-bit unpaged protected mode, 32-bit OUT : 107 529 ins/sec 115% / 124 250 992% / 1 067 053 + 16-bit unpaged protected mode, 32-bit IN-to-ring-3 : 106 337 ins/sec 103% / 109 565 196% / 209 367 + 16-bit unpaged protected mode, 32-bit OUT-to-ring-3 : 107 558 ins/sec 100% / 108 237 191% / 206 387 + 32-bit unpaged protected mode, 32-bit IN : 106 351 ins/sec 116% / 123 584 1016% / 1 081 325 + 32-bit unpaged protected mode, 32-bit OUT : 106 424 ins/sec 116% / 124 252 995% / 1 059 408 + 32-bit unpaged protected mode, 32-bit IN-to-ring-3 : 104 035 ins/sec 101% / 105 305 202% / 210 750 + 32-bit unpaged protected mode, 32-bit OUT-to-ring-3 : 103 831 ins/sec 102% / 106 919 205% / 213 198 + 32-bit paged protected mode, 32-bit IN : 103 356 ins/sec 119% / 123 870 1041% / 1 076 463 + 32-bit paged protected mode, 32-bit OUT : 107 177 ins/sec 115% / 124 302 998% / 1 069 655 + 32-bit paged protected mode, 32-bit IN-to-ring-3 : 104 491 ins/sec 100% / 104 744 200% / 209 264 + 32-bit paged protected mode, 32-bit OUT-to-ring-3 : 106 603 ins/sec 97% / 103 849 197% / 210 219 + 32-bit pae protected mode, 32-bit IN : 105 923 ins/sec 115% / 122 759 1041% / 1 103 261 + 32-bit pae protected mode, 32-bit OUT : 107 083 ins/sec 117% / 126 057 1024% / 1 096 667 + 32-bit pae protected mode, 32-bit IN-to-ring-3 : 106 114 ins/sec 97% / 103 496 199% / 211 312 + 32-bit pae protected mode, 32-bit OUT-to-ring-3 : 105 675 ins/sec 96% / 102 096 198% / 209 890 + 64-bit long mode, 32-bit IN : 105 800 ins/sec 113% / 120 006 1013% / 1 072 116 + 64-bit long mode, 32-bit OUT : 105 635 ins/sec 113% / 120 375 997% / 1 053 655 + 64-bit long mode, 32-bit IN-to-ring-3 : 105 274 ins/sec 95% / 100 763 197% / 208 026 + 64-bit long mode, 32-bit OUT-to-ring-3 : 106 262 ins/sec 94% / 100 749 196% / 209 288 +NOP I/O Port Access : PASSED + 32-bit paged protected mode, 32-bit read : 57 687 ins/sec 119% / 69 136 1197% / 690 548 + 32-bit paged protected mode, 32-bit write : 57 957 ins/sec 118% / 68 935 1183% / 685 930 + 32-bit paged protected mode, 32-bit read-to-ring-3 : 57 958 ins/sec 95% / 55 432 276% / 160 505 + 32-bit paged protected mode, 32-bit write-to-ring-3 : 57 922 ins/sec 100% / 58 340 304% / 176 464 + 32-bit pae protected mode, 32-bit read : 57 478 ins/sec 119% / 68 453 1141% / 656 159 + 32-bit pae protected mode, 32-bit write : 57 226 ins/sec 118% / 68 097 1157% / 662 504 + 32-bit pae protected mode, 32-bit read-to-ring-3 : 57 582 ins/sec 94% / 54 651 268% / 154 867 + 32-bit pae protected mode, 32-bit write-to-ring-3 : 57 697 ins/sec 100% / 57 750 299% / 173 030 + 64-bit long mode, 32-bit read : 57 128 ins/sec 118% / 67 779 1071% / 611 949 + 64-bit long mode, 32-bit write : 57 127 ins/sec 118% / 67 632 1084% / 619 395 + 64-bit long mode, 32-bit read-to-ring-3 : 57 181 ins/sec 94% / 54 123 265% / 151 937 + 64-bit long mode, 32-bit write-to-ring-3 : 57 297 ins/sec 99% / 57 286 294% / 168 694 + 16-bit unpaged protected mode, 32-bit read : 58 827 ins/sec 118% / 69 545 1185% / 697 602 + 16-bit unpaged protected mode, 32-bit write : 58 678 ins/sec 118% / 69 442 1183% / 694 387 + 16-bit unpaged protected mode, 32-bit read-to-ring-3 : 57 841 ins/sec 96% / 55 730 275% / 159 163 + 16-bit unpaged protected mode, 32-bit write-to-ring-3 : 57 855 ins/sec 101% / 58 834 304% / 176 169 + 32-bit unpaged protected mode, 32-bit read : 58 063 ins/sec 120% / 69 690 1233% / 716 444 + 32-bit unpaged protected mode, 32-bit write : 57 936 ins/sec 120% / 69 633 1199% / 694 753 + 32-bit unpaged protected mode, 32-bit read-to-ring-3 : 58 451 ins/sec 96% / 56 183 273% / 159 972 + 32-bit unpaged protected mode, 32-bit write-to-ring-3 : 58 962 ins/sec 99% / 58 955 298% / 175 936 + real mode, 32-bit read : 58 571 ins/sec 118% / 69 478 1160% / 679 917 + real mode, 32-bit write : 58 418 ins/sec 118% / 69 320 1185% / 692 513 + real mode, 32-bit read-to-ring-3 : 58 072 ins/sec 96% / 55 751 274% / 159 145 + real mode, 32-bit write-to-ring-3 : 57 870 ins/sec 101% / 58 755 307% / 178 042 +NOP MMIO Access : PASSED +SUCCESS + * @endverbatim + * + * What we see here is: + * + * - The WinHv API approach is 10 to 12 times slower for exits we can + * handle directly in ring-0 in the VBox AMD-V code. + * + * - The WinHv API approach is 2 to 3 times slower for exits we have to + * go to ring-3 to handle with the VBox AMD-V code. + * + * - By using hypercalls and VID.SYS from ring-0 we gain between + * 13% and 20% over the WinHv API on exits handled in ring-0. + * + * - For exits requiring ring-3 handling are between 6% slower and 3% faster + * than the WinHv API. + * + * + * As a side note, it looks like Hyper-V doesn't let the guest read CR4 but + * triggers exits all the time. This isn't all that important these days since + * OSes like Linux cache the CR4 value specifically to avoid these kinds of exits. + * + * + * @subsubsection subsect_nem_win_benchmarks_bs2t1u1 17134/2018-10-02: Bootsector2-test1 + * + * Update on 17134. While expectantly testing a couple of newer builds (17758, + * 17763) hoping for some increases in performance, the numbers turned out + * altogether worse than the June test run. So, we went back to the 1803 + * (17134) installation, made sure it was fully up to date (as per 2018-10-02) + * and re-tested. + * + * The numbers had somehow turned significantly worse over the last 3-4 months, + * dropping around 70% for the WinHv API test, more for Hypercalls + VID. + * + * @verbatim +TESTING... WinHv API Hypercalls + VID VirtualBox AMD-V * + 32-bit paged protected mode, CPUID : 33 270 ins/sec 33 154 + real mode, CPUID : 33 534 ins/sec 32 711 + [snip] + 32-bit paged protected mode, RDTSC : 102 216 011 ins/sec 98 225 419 + real mode, RDTSC : 102 492 243 ins/sec 98 225 419 + [snip] + 32-bit paged protected mode, Read CR4 : 2 096 165 ins/sec 2 123 815 + real mode, Read CR4 : 2 081 047 ins/sec 2 075 151 + [snip] + 32-bit paged protected mode, 32-bit IN : 32 739 ins/sec 33 655 + 32-bit paged protected mode, 32-bit OUT : 32 702 ins/sec 33 777 + 32-bit paged protected mode, 32-bit IN-to-ring-3 : 32 579 ins/sec 29 985 + 32-bit paged protected mode, 32-bit OUT-to-ring-3 : 32 750 ins/sec 29 757 + [snip] + 32-bit paged protected mode, 32-bit read : 20 042 ins/sec 21 489 + 32-bit paged protected mode, 32-bit write : 20 036 ins/sec 21 493 + 32-bit paged protected mode, 32-bit read-to-ring-3 : 19 985 ins/sec 19 143 + 32-bit paged protected mode, 32-bit write-to-ring-3 : 19 972 ins/sec 19 595 + + * @endverbatim + * + * Suspects are security updates and/or microcode updates installed since then. + * Given that the RDTSC and CR4 numbers are reasonably unchanges, it seems that + * the Hyper-V core loop (in hvax64.exe) aren't affected. Our ring-0 runloop + * is equally affected as the ring-3 based runloop, so it cannot be ring + * switching as such (unless the ring-0 loop is borked and we didn't notice yet). + * + * The issue is probably in the thread / process switching area, could be + * something special for hyper-V interrupt delivery or worker thread switching. + * + * Really wish this thread ping-pong going on in VID.SYS could be eliminated! + * + * + * @subsubsection subsect_nem_win_benchmarks_bs2t1u2 17763: Bootsector2-test1 + * + * Some preliminary numbers for build 17763 on the 3.4 GHz AMD 1950X, the second + * column will improve we get time to have a look the register page. + * + * There is a 50% performance loss here compared to the June numbers with + * build 17134. The RDTSC numbers hits that it isn't in the Hyper-V core + * (hvax64.exe), but something on the NT side. + * + * Clearing bit 20 in nt!KiSpeculationFeatures speeds things up (i.e. changing + * the dword from 0x00300065 to 0x00200065 in windbg). This is checked by + * nt!KePrepareToDispatchVirtualProcessor, making it a no-op if the flag is + * clear. winhvr!WinHvpVpDispatchLoop call that function before making + * hypercall 0xc2, which presumably does the heavy VCpu lifting in hvcax64.exe. + * + * @verbatim +TESTING... WinHv API Hypercalls + VID clr(bit-20) + WinHv API + 32-bit paged protected mode, CPUID : 54 145 ins/sec 51 436 130 076 + real mode, CPUID : 54 178 ins/sec 51 713 130 449 + [snip] + 32-bit paged protected mode, RDTSC : 98 927 639 ins/sec 100 254 552 100 549 882 + real mode, RDTSC : 99 601 206 ins/sec 100 886 699 100 470 957 + [snip] + 32-bit paged protected mode, 32-bit IN : 54 621 ins/sec 51 524 128 294 + 32-bit paged protected mode, 32-bit OUT : 54 870 ins/sec 51 671 129 397 + 32-bit paged protected mode, 32-bit IN-to-ring-3 : 54 624 ins/sec 43 964 127 874 + 32-bit paged protected mode, 32-bit OUT-to-ring-3 : 54 803 ins/sec 44 087 129 443 + [snip] + 32-bit paged protected mode, 32-bit read : 28 230 ins/sec 34 042 48 113 + 32-bit paged protected mode, 32-bit write : 27 962 ins/sec 34 050 48 069 + 32-bit paged protected mode, 32-bit read-to-ring-3 : 27 841 ins/sec 28 397 48 146 + 32-bit paged protected mode, 32-bit write-to-ring-3 : 27 896 ins/sec 29 455 47 970 + * @endverbatim + * + * + * @subsubsection subsect_nem_win_benchmarks_w2k 17134/2018-06-22: Windows 2000 Boot & Shutdown + * + * Timing the startup and automatic shutdown of a Windows 2000 SP4 guest serves + * as a real world benchmark and example of why exit performance is import. When + * Windows 2000 boots up is doing a lot of VGA redrawing of the boot animation, + * which is very costly. Not having installed guest additions leaves it in a VGA + * mode after the bootup sequence is done, keep up the screen access expenses, + * though the graphics driver more economical than the bootvid code. + * + * The VM was configured to automatically logon. A startup script was installed + * to perform the automatic shuting down and powering off the VM (thru + * vts_shutdown.exe -f -p). An offline snapshot of the VM was taken an restored + * before each test run. The test time run time is calculated from the monotonic + * VBox.log timestamps, starting with the state change to 'RUNNING' and stopping + * at 'POWERING_OFF'. + * + * The host OS and VirtualBox build is the same as for the bootsector2-test1 + * scenario. + * + * Results: + * + * - WinHv API for all but physical page mappings: + * 32 min 12.19 seconds + * + * - The default NEM/win configuration where we put the main execution loop + * in ring-0, using hypercalls when we can and VID for managing execution: + * 3 min 23.18 seconds + * + * - Regular VirtualBox using AMD-V directly, hyper-V is disabled, main + * execution loop in ring-0: + * 58.09 seconds + * + * - WinHv API with exit history based optimizations: + * 58.66 seconds + * + * - Hypercall + VID.SYS with exit history base optimizations: + * 58.94 seconds + * + * With a well above average machine needing over half an hour for booting a + * nearly 20 year old guest kind of says it all. The 13%-20% exit performance + * increase we get by using hypercalls and VID.SYS directly pays off a lot here. + * The 3m23s is almost acceptable in comparison to the half an hour. + * + * The similarity between the last three results strongly hits at windows 2000 + * doing a lot of waiting during boot and shutdown and isn't the best testcase + * once a basic performance level is reached. + * + * + * @subsubsection subsection_iem_win_benchmarks_deb9_nat Debian 9 NAT performance + * + * This benchmark is about network performance over NAT from a 64-bit Debian 9 + * VM with a single CPU. For network performance measurements, we use our own + * NetPerf tool (ValidationKit/utils/network/NetPerf.cpp) to measure latency + * and throughput. + * + * The setups, builds and configurations are as in the previous benchmarks + * (release r123172 on 1950X running 64-bit W10/17134 (2016-06-xx). Please note + * that the exit optimizations hasn't yet been in tuned with NetPerf in mind. + * + * The NAT network setup was selected here since it's the default one and the + * slowest one. There is quite a bit of IPC with worker threads and packet + * processing involved. + * + * Latency test is first up. This is a classic back and forth between the two + * NetPerf instances, where the key measurement is the roundrip latency. The + * values here are the lowest result over 3-6 runs. + * + * Against host system: + * - 152 258 ns/roundtrip - 100% - regular VirtualBox SVM + * - 271 059 ns/roundtrip - 178% - Hypercalls + VID.SYS in ring-0 with exit optimizations. + * - 280 149 ns/roundtrip - 184% - Hypercalls + VID.SYS in ring-0 + * - 317 735 ns/roundtrip - 209% - Win HV API with exit optimizations. + * - 342 440 ns/roundtrip - 225% - Win HV API + * + * Against a remote Windows 10 system over a 10Gbps link: + * - 243 969 ns/roundtrip - 100% - regular VirtualBox SVM + * - 384 427 ns/roundtrip - 158% - Win HV API with exit optimizations. + * - 402 411 ns/roundtrip - 165% - Hypercalls + VID.SYS in ring-0 + * - 406 313 ns/roundtrip - 167% - Win HV API + * - 413 160 ns/roundtrip - 169% - Hypercalls + VID.SYS in ring-0 with exit optimizations. + * + * What we see here is: + * + * - Consistent and signficant latency increase using Hyper-V compared + * to directly harnessing AMD-V ourselves. + * + * - When talking to the host, it's clear that the hypercalls + VID.SYS + * in ring-0 method pays off. + * + * - When talking to a different host, the numbers are closer and it + * is not longer clear which Hyper-V execution method is better. + * + * + * Throughput benchmarks are performed by one side pushing data full throttle + * for 10 seconds (minus a 1 second at each end of the test), then reversing + * the roles and measuring it in the other direction. The tests ran 3-5 times + * and below are the highest and lowest results in each direction. + * + * Receiving from host system: + * - Regular VirtualBox SVM: + * Max: 96 907 549 bytes/s - 100% + * Min: 86 912 095 bytes/s - 100% + * - Hypercalls + VID.SYS in ring-0: + * Max: 84 036 544 bytes/s - 87% + * Min: 64 978 112 bytes/s - 75% + * - Hypercalls + VID.SYS in ring-0 with exit optimizations: + * Max: 77 760 699 bytes/s - 80% + * Min: 72 677 171 bytes/s - 84% + * - Win HV API with exit optimizations: + * Max: 64 465 905 bytes/s - 67% + * Min: 62 286 369 bytes/s - 72% + * - Win HV API: + * Max: 62 466 631 bytes/s - 64% + * Min: 61 362 782 bytes/s - 70% + * + * Sending to the host system: + * - Regular VirtualBox SVM: + * Max: 87 728 652 bytes/s - 100% + * Min: 86 923 198 bytes/s - 100% + * - Hypercalls + VID.SYS in ring-0: + * Max: 84 280 749 bytes/s - 96% + * Min: 78 369 842 bytes/s - 90% + * - Hypercalls + VID.SYS in ring-0 with exit optimizations: + * Max: 84 119 932 bytes/s - 96% + * Min: 77 396 811 bytes/s - 89% + * - Win HV API: + * Max: 81 714 377 bytes/s - 93% + * Min: 78 697 419 bytes/s - 91% + * - Win HV API with exit optimizations: + * Max: 80 502 488 bytes/s - 91% + * Min: 71 164 978 bytes/s - 82% + * + * Receiving from a remote Windows 10 system over a 10Gbps link: + * - Hypercalls + VID.SYS in ring-0: + * Max: 115 346 922 bytes/s - 136% + * Min: 112 912 035 bytes/s - 137% + * - Regular VirtualBox SVM: + * Max: 84 517 504 bytes/s - 100% + * Min: 82 597 049 bytes/s - 100% + * - Hypercalls + VID.SYS in ring-0 with exit optimizations: + * Max: 77 736 251 bytes/s - 92% + * Min: 73 813 784 bytes/s - 89% + * - Win HV API with exit optimizations: + * Max: 63 035 587 bytes/s - 75% + * Min: 57 538 380 bytes/s - 70% + * - Win HV API: + * Max: 62 279 185 bytes/s - 74% + * Min: 56 813 866 bytes/s - 69% + * + * Sending to a remote Windows 10 system over a 10Gbps link: + * - Win HV API with exit optimizations: + * Max: 116 502 357 bytes/s - 103% + * Min: 49 046 550 bytes/s - 59% + * - Regular VirtualBox SVM: + * Max: 113 030 991 bytes/s - 100% + * Min: 83 059 511 bytes/s - 100% + * - Hypercalls + VID.SYS in ring-0: + * Max: 106 435 031 bytes/s - 94% + * Min: 47 253 510 bytes/s - 57% + * - Hypercalls + VID.SYS in ring-0 with exit optimizations: + * Max: 94 842 287 bytes/s - 84% + * Min: 68 362 172 bytes/s - 82% + * - Win HV API: + * Max: 65 165 225 bytes/s - 58% + * Min: 47 246 573 bytes/s - 57% + * + * What we see here is: + * + * - Again consistent numbers when talking to the host. Showing that the + * ring-0 approach is preferable to the ring-3 one. + * + * - Again when talking to a remote host, things get more difficult to + * make sense of. The spread is larger and direct AMD-V gets beaten by + * a different the Hyper-V approaches in each direction. + * + * - However, if we treat the first entry (remote host) as weird spikes, the + * other entries are consistently worse compared to direct AMD-V. For the + * send case we get really bad results for WinHV. + * + */ + diff --git a/src/VBox/VMM/VMMR3/PATM.cpp b/src/VBox/VMM/VMMR3/PATM.cpp new file mode 100644 index 00000000..087008ce --- /dev/null +++ b/src/VBox/VMM/VMMR3/PATM.cpp @@ -0,0 +1,6887 @@ +/* $Id: PATM.cpp $ */ +/** @file + * PATM - Dynamic Guest OS Patching Manager + * + * @note Never ever reuse patch memory!! + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +/** @page pg_patm PATM - Patch Manager + * + * The patch manager (PATM) patches privileged guest code to allow it to execute + * directly in raw-mode. + * + * The PATM works closely together with the @ref pg_csam "CSAM" detect code + * needing patching and detected changes to the patch. It also interfaces with + * other components, like @ref pg_trpm "TRPM" and @ref pg_rem "REM", for these + * purposes. + * + * @sa @ref grp_patm + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PATM +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "PATMInternal.h" +#include "PATMPatch.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "PATMA.h" + +//#define PATM_REMOVE_PATCH_ON_TOO_MANY_TRAPS +//#define PATM_DISABLE_ALL + +/** + * Refresh trampoline patch state. + */ +typedef struct PATMREFRESHPATCH +{ + /** Pointer to the VM structure. */ + PVM pVM; + /** The trampoline patch record. */ + PPATCHINFO pPatchTrampoline; + /** The new patch we want to jump to. */ + PPATCHINFO pPatchRec; +} PATMREFRESHPATCH, *PPATMREFRESHPATCH; + + +#define PATMREAD_RAWCODE 1 /* read code as-is */ +#define PATMREAD_ORGCODE 2 /* read original guest opcode bytes; not the patched bytes */ +#define PATMREAD_NOCHECK 4 /* don't check for patch conflicts */ + +/* + * Private structure used during disassembly + */ +typedef struct +{ + PVM pVM; + PPATCHINFO pPatchInfo; + R3PTRTYPE(uint8_t *) pbInstrHC; + RTRCPTR pInstrGC; + uint32_t fReadFlags; +} PATMDISASM, *PPATMDISASM; + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static int patmDisableUnusablePatch(PVM pVM, RTRCPTR pInstrGC, RTRCPTR pConflictAddr, PPATCHINFO pPatch); +static int patmActivateInt3Patch(PVM pVM, PPATCHINFO pPatch); +static int patmDeactivateInt3Patch(PVM pVM, PPATCHINFO pPatch); + +#ifdef LOG_ENABLED // keep gcc quiet +static bool patmIsCommonIDTHandlerPatch(PVM pVM, RTRCPTR pInstrGC); +#endif +#ifdef VBOX_WITH_STATISTICS +static const char *PATMPatchType(PVM pVM, PPATCHINFO pPatch); +static void patmResetStat(PVM pVM, void *pvSample); +static void patmPrintStat(PVM pVM, void *pvSample, char *pszBuf, size_t cchBuf); +#endif + +#define patmPatchHCPtr2PatchGCPtr(pVM, pHC) (pVM->patm.s.pPatchMemGC + (pHC - pVM->patm.s.pPatchMemHC)) +#define patmPatchGCPtr2PatchHCPtr(pVM, pGC) (pVM->patm.s.pPatchMemHC + (pGC - pVM->patm.s.pPatchMemGC)) + +static int patmReinit(PVM pVM); +static DECLCALLBACK(int) patmR3RelocatePatches(PAVLOU32NODECORE pNode, void *pParam); +#ifdef PATM_RESOLVE_CONFLICTS_WITH_JUMP_PATCHES +static RTRCPTR patmR3GuestGCPtrToPatchGCPtrSimple(PVM pVM, RCPTRTYPE(uint8_t*) pInstrGC); +#endif +static int patmR3MarkDirtyPatch(PVM pVM, PPATCHINFO pPatch); + +#ifdef VBOX_WITH_DEBUGGER +static DECLCALLBACK(int) DisableAllPatches(PAVLOU32NODECORE pNode, void *pVM); +static FNDBGCCMD patmr3CmdOn; +static FNDBGCCMD patmr3CmdOff; + +/** Command descriptors. */ +static const DBGCCMD g_aCmds[] = +{ + /* pszCmd, cArgsMin, cArgsMax, paArgDesc, cArgDescs, fFlags, pfnHandler pszSyntax, ....pszDescription */ + { "patmon", 0, 0, NULL, 0, 0, patmr3CmdOn, "", "Enable patching." }, + { "patmoff", 0, 0, NULL, 0, 0, patmr3CmdOff, "", "Disable patching." }, +}; +#endif + +/* Don't want to break saved states, so put it here as a global variable. */ +static unsigned int cIDTHandlersDisabled = 0; + +/** + * Initializes the PATM. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) PATMR3Init(PVM pVM) +{ + int rc; + + /* + * We only need a saved state dummy loader if HM is enabled. + */ + if (!VM_IS_RAW_MODE_ENABLED(pVM)) + { + pVM->fPATMEnabled = false; + return SSMR3RegisterStub(pVM, "PATM", 0); + } + + /* + * Raw-mode. + */ + Log(("PATMR3Init: Patch record size %d\n", sizeof(PATCHINFO))); + + /* These values can't change as they are hardcoded in patch code (old saved states!) */ + AssertCompile(VMCPU_FF_TIMER == RT_BIT_32(2)); + AssertCompile(VM_FF_REQUEST == VMCPU_FF_REQUEST); + AssertCompile(VMCPU_FF_INTERRUPT_APIC == RT_BIT_32(0)); + AssertCompile(VMCPU_FF_INTERRUPT_PIC == RT_BIT_32(1)); + + AssertReleaseMsg(g_fPatmInterruptFlag == (VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_TIMER | VMCPU_FF_REQUEST), + ("Interrupt flags out of sync!! g_fPatmInterruptFlag=%#x expected %#x. broken assembler?\n", g_fPatmInterruptFlag, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_TIMER | VMCPU_FF_REQUEST)); + + /* Allocate patch memory and GC patch state memory. */ + pVM->patm.s.cbPatchMem = PATCH_MEMORY_SIZE; + /* Add another page in case the generated code is much larger than expected. */ + /** @todo bad safety precaution */ + rc = MMR3HyperAllocOnceNoRel(pVM, PATCH_MEMORY_SIZE + PAGE_SIZE + PATM_STACK_TOTAL_SIZE + PAGE_SIZE + PATM_STAT_MEMSIZE, PAGE_SIZE, MM_TAG_PATM, (void **)&pVM->patm.s.pPatchMemHC); + if (RT_FAILURE(rc)) + { + Log(("MMHyperAlloc failed with %Rrc\n", rc)); + return rc; + } + pVM->patm.s.pPatchMemGC = MMHyperR3ToRC(pVM, pVM->patm.s.pPatchMemHC); + + /* PATM stack page for call instruction execution. (2 parts: one for our private stack and one to store the original return address */ + pVM->patm.s.pGCStackHC = (RTRCPTR *)(pVM->patm.s.pPatchMemHC + PATCH_MEMORY_SIZE + PAGE_SIZE); + pVM->patm.s.pGCStackGC = MMHyperR3ToRC(pVM, pVM->patm.s.pGCStackHC); + + patmR3DbgInit(pVM); + + /* + * Hypervisor memory for GC status data (read/write) + * + * Note1: This is non-critical data; if trashed by the guest, then it will only cause problems for itself + * Note2: This doesn't really belong here, but we need access to it for relocation purposes + * + */ + Assert(sizeof(PATMGCSTATE) < PAGE_SIZE); /* Note: hardcoded dependencies on this exist. */ + pVM->patm.s.pGCStateHC = (PPATMGCSTATE)((uint8_t *)pVM->patm.s.pGCStackHC + PATM_STACK_TOTAL_SIZE); + pVM->patm.s.pGCStateGC = MMHyperR3ToRC(pVM, pVM->patm.s.pGCStateHC); + + /* Hypervisor memory for patch statistics */ + pVM->patm.s.pStatsHC = (PSTAMRATIOU32)((uint8_t *)pVM->patm.s.pGCStateHC + PAGE_SIZE); + pVM->patm.s.pStatsGC = MMHyperR3ToRC(pVM, pVM->patm.s.pStatsHC); + + /* Memory for patch lookup trees. */ + rc = MMHyperAlloc(pVM, sizeof(*pVM->patm.s.PatchLookupTreeHC), 0, MM_TAG_PATM, (void **)&pVM->patm.s.PatchLookupTreeHC); + AssertRCReturn(rc, rc); + pVM->patm.s.PatchLookupTreeGC = MMHyperR3ToRC(pVM, pVM->patm.s.PatchLookupTreeHC); + +#ifdef RT_ARCH_AMD64 /* see patmReinit(). */ + /* Check CFGM option. */ + rc = CFGMR3QueryBool(CFGMR3GetRoot(pVM), "PATMEnabled", &pVM->fPATMEnabled); + if (RT_FAILURE(rc)) +# ifdef PATM_DISABLE_ALL + pVM->fPATMEnabled = false; +# else + pVM->fPATMEnabled = true; +# endif +#endif + + rc = patmReinit(pVM); + AssertRC(rc); + if (RT_FAILURE(rc)) + return rc; + + /* + * Register the virtual page access handler type. + */ + rc = PGMR3HandlerVirtualTypeRegister(pVM, PGMVIRTHANDLERKIND_ALL, false /*fRelocUserRC*/, + NULL /*pfnInvalidateR3*/, + patmVirtPageHandler, + "patmVirtPageHandler", "patmRCVirtPagePfHandler", + "PATMMonitorPatchJump", &pVM->patm.s.hMonitorPageType); + AssertRCReturn(rc, rc); + + /* + * Register save and load state notifiers. + */ + rc = SSMR3RegisterInternal(pVM, "PATM", 0, PATM_SAVED_STATE_VERSION, sizeof(pVM->patm.s) + PATCH_MEMORY_SIZE + PAGE_SIZE + PATM_STACK_TOTAL_SIZE + PAGE_SIZE, + NULL, NULL, NULL, + NULL, patmR3Save, NULL, + NULL, patmR3Load, NULL); + AssertRCReturn(rc, rc); + +#ifdef VBOX_WITH_DEBUGGER + /* + * Debugger commands. + */ + static bool s_fRegisteredCmds = false; + if (!s_fRegisteredCmds) + { + int rc2 = DBGCRegisterCommands(&g_aCmds[0], RT_ELEMENTS(g_aCmds)); + if (RT_SUCCESS(rc2)) + s_fRegisteredCmds = true; + } +#endif + +#ifdef VBOX_WITH_STATISTICS + STAM_REG(pVM, &pVM->patm.s.StatNrOpcodeRead, STAMTYPE_COUNTER, "/PATM/OpcodeBytesRead", STAMUNIT_OCCURENCES, "The number of opcode bytes read by the recompiler."); + STAM_REG(pVM, &pVM->patm.s.StatPATMMemoryUsed,STAMTYPE_COUNTER, "/PATM/MemoryUsed", STAMUNIT_OCCURENCES, "The amount of hypervisor heap used for patches."); + STAM_REG(pVM, &pVM->patm.s.StatDisabled, STAMTYPE_COUNTER, "/PATM/Patch/Disabled", STAMUNIT_OCCURENCES, "Number of times patches were disabled."); + STAM_REG(pVM, &pVM->patm.s.StatEnabled, STAMTYPE_COUNTER, "/PATM/Patch/Enabled", STAMUNIT_OCCURENCES, "Number of times patches were enabled."); + STAM_REG(pVM, &pVM->patm.s.StatDirty, STAMTYPE_COUNTER, "/PATM/Patch/Dirty", STAMUNIT_OCCURENCES, "Number of times patches were marked dirty."); + STAM_REG(pVM, &pVM->patm.s.StatUnusable, STAMTYPE_COUNTER, "/PATM/Patch/Unusable", STAMUNIT_OCCURENCES, "Number of unusable patches (conflicts)."); + STAM_REG(pVM, &pVM->patm.s.StatInstalled, STAMTYPE_COUNTER, "/PATM/Patch/Installed", STAMUNIT_OCCURENCES, "Number of installed patches."); + STAM_REG(pVM, &pVM->patm.s.StatInt3Callable, STAMTYPE_COUNTER, "/PATM/Patch/Int3Callable", STAMUNIT_OCCURENCES, "Number of cli patches turned into int3 patches."); + + STAM_REG(pVM, &pVM->patm.s.StatInt3BlockRun, STAMTYPE_COUNTER, "/PATM/Patch/Run/Int3", STAMUNIT_OCCURENCES, "Number of times an int3 block patch was executed."); + STAMR3RegisterF(pVM, &pVM->patm.s.pGCStateHC->uPatchCalls, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, NULL, "/PATM/Patch/Run/Normal"); + + STAM_REG(pVM, &pVM->patm.s.StatInstalledFunctionPatches, STAMTYPE_COUNTER, "/PATM/Patch/Installed/Function", STAMUNIT_OCCURENCES, "Number of installed function duplication patches."); + STAM_REG(pVM, &pVM->patm.s.StatInstalledTrampoline, STAMTYPE_COUNTER, "/PATM/Patch/Installed/Trampoline", STAMUNIT_OCCURENCES, "Number of installed trampoline patches."); + STAM_REG(pVM, &pVM->patm.s.StatInstalledJump, STAMTYPE_COUNTER, "/PATM/Patch/Installed/Jump", STAMUNIT_OCCURENCES, "Number of installed jump patches."); + + STAM_REG(pVM, &pVM->patm.s.StatOverwritten, STAMTYPE_COUNTER, "/PATM/Patch/Overwritten", STAMUNIT_OCCURENCES, "Number of overwritten patches."); + STAM_REG(pVM, &pVM->patm.s.StatFixedConflicts,STAMTYPE_COUNTER, "/PATM/Patch/ConflictFixed", STAMUNIT_OCCURENCES, "Number of fixed conflicts."); + STAM_REG(pVM, &pVM->patm.s.StatFlushed, STAMTYPE_COUNTER, "/PATM/Patch/Flushed", STAMUNIT_OCCURENCES, "Number of flushes of pages with patch jumps."); + STAM_REG(pVM, &pVM->patm.s.StatMonitored, STAMTYPE_COUNTER, "/PATM/Patch/Monitored", STAMUNIT_OCCURENCES, "Number of patches in monitored patch pages."); + STAM_REG(pVM, &pVM->patm.s.StatPageBoundaryCrossed, STAMTYPE_COUNTER, "/PATM/Patch/BoundaryCross", STAMUNIT_OCCURENCES, "Number of refused patches due to patch jump crossing page boundary."); + + STAM_REG(pVM, &pVM->patm.s.StatHandleTrap, STAMTYPE_PROFILE, "/PATM/HandleTrap", STAMUNIT_TICKS_PER_CALL, "Profiling of PATMR3HandleTrap"); + STAM_REG(pVM, &pVM->patm.s.StatPushTrap, STAMTYPE_COUNTER, "/PATM/HandleTrap/PushWP", STAMUNIT_OCCURENCES, "Number of traps due to monitored stack pages."); + + STAM_REG(pVM, &pVM->patm.s.StatSwitchBack, STAMTYPE_COUNTER, "/PATM/SwitchBack", STAMUNIT_OCCURENCES, "Switch back to original guest code when IF=1 & executing PATM instructions"); + STAM_REG(pVM, &pVM->patm.s.StatSwitchBackFail,STAMTYPE_COUNTER, "/PATM/SwitchBackFail", STAMUNIT_OCCURENCES, "Failed switch back to original guest code when IF=1 & executing PATM instructions"); + + STAM_REG(pVM, &pVM->patm.s.StatDuplicateREQFailed, STAMTYPE_COUNTER, "/PATM/Function/DupREQ/Failed", STAMUNIT_OCCURENCES, "Nr of failed PATMR3DuplicateFunctionRequest calls"); + STAM_REG(pVM, &pVM->patm.s.StatDuplicateREQSuccess, STAMTYPE_COUNTER, "/PATM/Function/DupREQ/Success", STAMUNIT_OCCURENCES, "Nr of successful PATMR3DuplicateFunctionRequest calls"); + STAM_REG(pVM, &pVM->patm.s.StatDuplicateUseExisting,STAMTYPE_COUNTER, "/PATM/Function/DupREQ/UseExist", STAMUNIT_OCCURENCES, "Nr of successful PATMR3DuplicateFunctionRequest calls when using an existing patch"); + + STAM_REG(pVM, &pVM->patm.s.StatFunctionLookupInsert, STAMTYPE_COUNTER, "/PATM/Function/Lookup/Insert", STAMUNIT_OCCURENCES, "Nr of successful function address insertions"); + STAM_REG(pVM, &pVM->patm.s.StatFunctionLookupReplace, STAMTYPE_COUNTER, "/PATM/Function/Lookup/Replace", STAMUNIT_OCCURENCES, "Nr of successful function address replacements"); + STAM_REG(pVM, &pVM->patm.s.StatU32FunctionMaxSlotsUsed, STAMTYPE_U32_RESET,"/PATM/Function/Lookup/MaxSlots", STAMUNIT_OCCURENCES, "Maximum nr of lookup slots used in all call patches"); + + STAM_REG(pVM, &pVM->patm.s.StatFunctionFound, STAMTYPE_COUNTER, "/PATM/Function/Found", STAMUNIT_OCCURENCES, "Nr of successful function patch lookups in GC"); + STAM_REG(pVM, &pVM->patm.s.StatFunctionNotFound, STAMTYPE_COUNTER, "/PATM/Function/NotFound", STAMUNIT_OCCURENCES, "Nr of failed function patch lookups in GC"); + + STAM_REG(pVM, &pVM->patm.s.StatPatchWrite, STAMTYPE_PROFILE, "/PATM/Write/Handle", STAMUNIT_TICKS_PER_CALL, "Profiling of PATMR3PatchWrite"); + STAM_REG(pVM, &pVM->patm.s.StatPatchWriteDetect, STAMTYPE_PROFILE, "/PATM/Write/Detect", STAMUNIT_TICKS_PER_CALL, "Profiling of PATMIsWriteToPatchPage"); + STAM_REG(pVM, &pVM->patm.s.StatPatchWriteInterpreted, STAMTYPE_COUNTER, "/PATM/Write/Interpreted/Success", STAMUNIT_OCCURENCES, "Nr of interpreted patch writes."); + STAM_REG(pVM, &pVM->patm.s.StatPatchWriteInterpretedFailed, STAMTYPE_COUNTER, "/PATM/Write/Interpreted/Failed", STAMUNIT_OCCURENCES, "Nr of failed interpreted patch writes."); + + STAM_REG(pVM, &pVM->patm.s.StatPatchRefreshSuccess, STAMTYPE_COUNTER, "/PATM/Refresh/Success", STAMUNIT_OCCURENCES, "Successful patch refreshes"); + STAM_REG(pVM, &pVM->patm.s.StatPatchRefreshFailed, STAMTYPE_COUNTER, "/PATM/Refresh/Failure", STAMUNIT_OCCURENCES, "Failed patch refreshes"); + + STAM_REG(pVM, &pVM->patm.s.StatPatchPageInserted, STAMTYPE_COUNTER, "/PATM/Page/Inserted", STAMUNIT_OCCURENCES, "Nr of inserted guest pages that were patched"); + STAM_REG(pVM, &pVM->patm.s.StatPatchPageRemoved, STAMTYPE_COUNTER, "/PATM/Page/Removed", STAMUNIT_OCCURENCES, "Nr of removed guest pages that were patched"); + + STAM_REG(pVM, &pVM->patm.s.StatInstrDirty, STAMTYPE_COUNTER, "/PATM/Instr/Dirty/Detected", STAMUNIT_OCCURENCES, "Number of times instructions were marked dirty."); + STAM_REG(pVM, &pVM->patm.s.StatInstrDirtyGood, STAMTYPE_COUNTER, "/PATM/Instr/Dirty/Corrected", STAMUNIT_OCCURENCES, "Number of times instructions were marked dirty and corrected later on."); + STAM_REG(pVM, &pVM->patm.s.StatInstrDirtyBad, STAMTYPE_COUNTER, "/PATM/Instr/Dirty/Failed", STAMUNIT_OCCURENCES, "Number of times instructions were marked dirty and we were not able to correct them."); + + STAM_REG(pVM, &pVM->patm.s.StatSysEnter, STAMTYPE_COUNTER, "/PATM/Emul/SysEnter", STAMUNIT_OCCURENCES, "Number of times sysenter was emulated."); + STAM_REG(pVM, &pVM->patm.s.StatSysExit, STAMTYPE_COUNTER, "/PATM/Emul/SysExit" , STAMUNIT_OCCURENCES, "Number of times sysexit was emulated."); + STAM_REG(pVM, &pVM->patm.s.StatEmulIret, STAMTYPE_COUNTER, "/PATM/Emul/Iret/Success", STAMUNIT_OCCURENCES, "Number of times iret was emulated."); + STAM_REG(pVM, &pVM->patm.s.StatEmulIretFailed, STAMTYPE_COUNTER, "/PATM/Emul/Iret/Failed", STAMUNIT_OCCURENCES, "Number of times iret was emulated."); + + STAM_REG(pVM, &pVM->patm.s.StatGenRet, STAMTYPE_COUNTER, "/PATM/Gen/Ret" , STAMUNIT_OCCURENCES, "Number of generated ret instructions."); + STAM_REG(pVM, &pVM->patm.s.StatGenRetReused, STAMTYPE_COUNTER, "/PATM/Gen/RetReused" , STAMUNIT_OCCURENCES, "Number of reused ret instructions."); + STAM_REG(pVM, &pVM->patm.s.StatGenCall, STAMTYPE_COUNTER, "/PATM/Gen/Call", STAMUNIT_OCCURENCES, "Number of generated call instructions."); + STAM_REG(pVM, &pVM->patm.s.StatGenJump, STAMTYPE_COUNTER, "/PATM/Gen/Jmp" , STAMUNIT_OCCURENCES, "Number of generated indirect jump instructions."); + STAM_REG(pVM, &pVM->patm.s.StatGenPopf, STAMTYPE_COUNTER, "/PATM/Gen/Popf" , STAMUNIT_OCCURENCES, "Number of generated popf instructions."); + + STAM_REG(pVM, &pVM->patm.s.StatCheckPendingIRQ, STAMTYPE_COUNTER, "/PATM/GC/CheckIRQ" , STAMUNIT_OCCURENCES, "Number of traps that ask to check for pending irqs."); +#endif /* VBOX_WITH_STATISTICS */ + + Log(("g_patmCallRecord.cbFunction %u\n", g_patmCallRecord.cbFunction)); + Log(("g_patmCallIndirectRecord.cbFunction %u\n", g_patmCallIndirectRecord.cbFunction)); + Log(("g_patmRetRecord.cbFunction %u\n", g_patmRetRecord.cbFunction)); + Log(("g_patmJumpIndirectRecord.cbFunction %u\n", g_patmJumpIndirectRecord.cbFunction)); + Log(("g_patmPopf32Record.cbFunction %u\n", g_patmPopf32Record.cbFunction)); + Log(("g_patmIretRecord.cbFunction %u\n", g_patmIretRecord.cbFunction)); + Log(("g_patmStiRecord.cbFunction %u\n", g_patmStiRecord.cbFunction)); + Log(("g_patmCheckIFRecord.cbFunction %u\n", g_patmCheckIFRecord.cbFunction)); + + return rc; +} + +/** + * Finalizes HMA page attributes. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) PATMR3InitFinalize(PVM pVM) +{ + if (!VM_IS_RAW_MODE_ENABLED(pVM)) + return VINF_SUCCESS; + + /* + * The GC state, stack and statistics must be read/write for the guest + * (supervisor only of course). + * + * Remember, we run guest code at ring-1 and ring-2 levels, which are + * considered supervisor levels by the paging structures. We run the VMM + * in ring-0 with CR0.WP=0 and mapping all VMM structures as read-only + * pages. The following structures are exceptions and must be mapped with + * write access so the ring-1 and ring-2 code can modify them. + */ + int rc = PGMMapSetPage(pVM, pVM->patm.s.pGCStateGC, PAGE_SIZE, X86_PTE_P | X86_PTE_A | X86_PTE_D | X86_PTE_RW); + AssertLogRelMsgReturn(RT_SUCCESS(rc), ("Failed to make the GCState accessible to ring-1 and ring-2 code: %Rrc\n", rc), rc); + + rc = PGMMapSetPage(pVM, pVM->patm.s.pGCStackGC, PATM_STACK_TOTAL_SIZE, X86_PTE_P | X86_PTE_A | X86_PTE_D | X86_PTE_RW); + AssertLogRelMsgReturn(RT_SUCCESS(rc), ("Failed to make the GCStack accessible to ring-1 and ring-2 code: %Rrc\n", rc), rc); + + rc = PGMMapSetPage(pVM, pVM->patm.s.pStatsGC, PATM_STAT_MEMSIZE, X86_PTE_P | X86_PTE_A | X86_PTE_D | X86_PTE_RW); + AssertLogRelMsgReturn(RT_SUCCESS(rc), ("Failed to make the stats struct accessible to ring-1 and ring-2 code: %Rrc\n", rc), rc); + + /* + * Find the patch helper segment so we can identify code running there as patch code. + */ + rc = PDMR3LdrGetSymbolRC(pVM, NULL, "g_PatchHlpBegin", &pVM->patm.s.pbPatchHelpersRC); + AssertLogRelMsgReturn(RT_SUCCESS(rc), ("Failed to resolve g_PatchHlpBegin: %Rrc\n", rc), rc); + pVM->patm.s.pbPatchHelpersR3 = (uint8_t *)MMHyperRCToR3(pVM, pVM->patm.s.pbPatchHelpersRC); + AssertLogRelReturn(pVM->patm.s.pbPatchHelpersR3 != NULL, VERR_INTERNAL_ERROR_3); + + RTRCPTR RCPtrEnd; + rc = PDMR3LdrGetSymbolRC(pVM, NULL, "g_PatchHlpEnd", &RCPtrEnd); + AssertLogRelMsgReturn(RT_SUCCESS(rc), ("Failed to resolve g_PatchHlpEnd: %Rrc\n", rc), rc); + + pVM->patm.s.cbPatchHelpers = RCPtrEnd - pVM->patm.s.pbPatchHelpersRC; + AssertLogRelMsgReturn(pVM->patm.s.cbPatchHelpers < _128K, + ("%RRv-%RRv => %#x\n", pVM->patm.s.pbPatchHelpersRC, RCPtrEnd, pVM->patm.s.cbPatchHelpers), + VERR_INTERNAL_ERROR_4); + + + return VINF_SUCCESS; +} + +/** + * (Re)initializes PATM + * + * @param pVM The cross context VM structure. + */ +static int patmReinit(PVM pVM) +{ + int rc; + + /* + * Assert alignment and sizes. + */ + AssertRelease(!(RT_UOFFSETOF(VM, patm.s) & 31)); + AssertRelease(sizeof(pVM->patm.s) <= sizeof(pVM->patm.padding)); + + /* + * Setup any fixed pointers and offsets. + */ + pVM->patm.s.offVM = RT_UOFFSETOF(VM, patm); + +#ifndef RT_ARCH_AMD64 /* would be nice if this was changed everywhere. was driving me crazy on AMD64. */ +#ifndef PATM_DISABLE_ALL + pVM->fPATMEnabled = true; +#endif +#endif + + Assert(pVM->patm.s.pGCStateHC); + memset(pVM->patm.s.pGCStateHC, 0, PAGE_SIZE); + AssertReleaseMsg(pVM->patm.s.pGCStateGC, ("Impossible! MMHyperHC2GC(%p) failed!\n", pVM->patm.s.pGCStateGC)); + + Log(("Patch memory allocated at %p - %RRv\n", pVM->patm.s.pPatchMemHC, pVM->patm.s.pPatchMemGC)); + pVM->patm.s.pGCStateHC->uVMFlags = X86_EFL_IF; + + Assert(pVM->patm.s.pGCStackHC); + memset(pVM->patm.s.pGCStackHC, 0, PAGE_SIZE); + AssertReleaseMsg(pVM->patm.s.pGCStackGC, ("Impossible! MMHyperHC2GC(%p) failed!\n", pVM->patm.s.pGCStackGC)); + pVM->patm.s.pGCStateHC->Psp = PATM_STACK_SIZE; + pVM->patm.s.pGCStateHC->fPIF = 1; /* PATM Interrupt Flag */ + + Assert(pVM->patm.s.pStatsHC); + memset(pVM->patm.s.pStatsHC, 0, PATM_STAT_MEMSIZE); + AssertReleaseMsg(pVM->patm.s.pStatsGC, ("Impossible! MMHyperHC2GC(%p) failed!\n", pVM->patm.s.pStatsGC)); + + Assert(pVM->patm.s.pPatchMemHC); + Assert(pVM->patm.s.pPatchMemGC == MMHyperR3ToRC(pVM, pVM->patm.s.pPatchMemHC)); + memset(pVM->patm.s.pPatchMemHC, 0, PATCH_MEMORY_SIZE); + AssertReleaseMsg(pVM->patm.s.pPatchMemGC, ("Impossible! MMHyperHC2GC(%p) failed!\n", pVM->patm.s.pPatchMemHC)); + + /* Needed for future patching of sldt/sgdt/sidt/str etc. */ + pVM->patm.s.pCPUMCtxGC = VM_RC_ADDR(pVM, CPUMQueryGuestCtxPtr(VMMGetCpu(pVM))); + + Assert(pVM->patm.s.PatchLookupTreeHC); + Assert(pVM->patm.s.PatchLookupTreeGC == MMHyperR3ToRC(pVM, pVM->patm.s.PatchLookupTreeHC)); + + /* + * (Re)Initialize PATM structure + */ + Assert(!pVM->patm.s.PatchLookupTreeHC->PatchTree); + Assert(!pVM->patm.s.PatchLookupTreeHC->PatchTreeByPatchAddr); + Assert(!pVM->patm.s.PatchLookupTreeHC->PatchTreeByPage); + pVM->patm.s.offPatchMem = 16; /* don't start with zero here */ + pVM->patm.s.uCurrentPatchIdx = 1; /* Index zero is a dummy */ + pVM->patm.s.pvFaultMonitor = 0; + pVM->patm.s.deltaReloc = 0; + + /* Lowest and highest patched instruction */ + pVM->patm.s.pPatchedInstrGCLowest = RTRCPTR_MAX; + pVM->patm.s.pPatchedInstrGCHighest = 0; + + pVM->patm.s.PatchLookupTreeHC->PatchTree = 0; + pVM->patm.s.PatchLookupTreeHC->PatchTreeByPatchAddr = 0; + pVM->patm.s.PatchLookupTreeHC->PatchTreeByPage = 0; + + pVM->patm.s.pfnSysEnterPatchGC = 0; + pVM->patm.s.pfnSysEnterGC = 0; + + pVM->patm.s.fOutOfMemory = false; + + pVM->patm.s.pfnHelperCallGC = 0; + patmR3DbgReset(pVM); + + /* Generate all global functions to be used by future patches. */ + /* We generate a fake patch in order to use the existing code for relocation. */ + rc = MMHyperAlloc(pVM, sizeof(PATMPATCHREC), 0, MM_TAG_PATM_PATCH, (void **)&pVM->patm.s.pGlobalPatchRec); + if (RT_FAILURE(rc)) + { + Log(("Out of memory!!!!\n")); + return VERR_NO_MEMORY; + } + pVM->patm.s.pGlobalPatchRec->patch.flags = PATMFL_GLOBAL_FUNCTIONS; + pVM->patm.s.pGlobalPatchRec->patch.uState = PATCH_ENABLED; + pVM->patm.s.pGlobalPatchRec->patch.pPatchBlockOffset = pVM->patm.s.offPatchMem; + + rc = patmPatchGenGlobalFunctions(pVM, &pVM->patm.s.pGlobalPatchRec->patch); + AssertRC(rc); + + /* Update free pointer in patch memory. */ + pVM->patm.s.offPatchMem += pVM->patm.s.pGlobalPatchRec->patch.uCurPatchOffset; + /* Round to next 8 byte boundary. */ + pVM->patm.s.offPatchMem = RT_ALIGN_32(pVM->patm.s.offPatchMem, 8); + + + return rc; +} + + +/** + * Applies relocations to data and code managed by this + * component. This function will be called at init and + * whenever the VMM need to relocate it self inside the GC. + * + * The PATM will update the addresses used by the switcher. + * + * @param pVM The cross context VM structure. + * @param offDelta The relocation delta. + */ +VMMR3_INT_DECL(void) PATMR3Relocate(PVM pVM, RTRCINTPTR offDelta) +{ + if (!VM_IS_RAW_MODE_ENABLED(pVM)) + return; + + RTRCPTR GCPtrNew = MMHyperR3ToRC(pVM, pVM->patm.s.pGCStateHC); + Assert((RTRCINTPTR)(GCPtrNew - pVM->patm.s.pGCStateGC) == offDelta); + + Log(("PATMR3Relocate from %RRv to %RRv - delta %08X\n", pVM->patm.s.pGCStateGC, GCPtrNew, offDelta)); + if (offDelta) + { + PCPUMCTX pCtx; + + /* Update CPUMCTX guest context pointer. */ + pVM->patm.s.pCPUMCtxGC += offDelta; + + pVM->patm.s.deltaReloc = offDelta; + RTAvloU32DoWithAll(&pVM->patm.s.PatchLookupTreeHC->PatchTree, true, patmR3RelocatePatches, (void *)pVM); + + pVM->patm.s.pGCStateGC = GCPtrNew; + pVM->patm.s.pPatchMemGC = MMHyperR3ToRC(pVM, pVM->patm.s.pPatchMemHC); + pVM->patm.s.pGCStackGC = MMHyperR3ToRC(pVM, pVM->patm.s.pGCStackHC); + pVM->patm.s.pStatsGC = MMHyperR3ToRC(pVM, pVM->patm.s.pStatsHC); + pVM->patm.s.PatchLookupTreeGC = MMHyperR3ToRC(pVM, pVM->patm.s.PatchLookupTreeHC); + + if (pVM->patm.s.pfnSysEnterPatchGC) + pVM->patm.s.pfnSysEnterPatchGC += offDelta; + + /* If we are running patch code right now, then also adjust EIP. */ + pCtx = CPUMQueryGuestCtxPtr(VMMGetCpu(pVM)); + if (PATMIsPatchGCAddr(pVM, pCtx->eip)) + pCtx->eip += offDelta; + + /* Deal with the global patch functions. */ + pVM->patm.s.pfnHelperCallGC += offDelta; + pVM->patm.s.pfnHelperRetGC += offDelta; + pVM->patm.s.pfnHelperIretGC += offDelta; + pVM->patm.s.pfnHelperJumpGC += offDelta; + + pVM->patm.s.pbPatchHelpersRC += offDelta; + + patmR3RelocatePatches(&pVM->patm.s.pGlobalPatchRec->Core, (void *)pVM); + } +} + + +/** + * Terminates the PATM. + * + * Termination means cleaning up and freeing all resources, + * the VM it self is at this point powered off or suspended. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) PATMR3Term(PVM pVM) +{ + if (!VM_IS_RAW_MODE_ENABLED(pVM)) + return VINF_SUCCESS; + + patmR3DbgTerm(pVM); + + /* Memory was all allocated from the two MM heaps and requires no freeing. */ + return VINF_SUCCESS; +} + + +/** + * PATM reset callback. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) PATMR3Reset(PVM pVM) +{ + Log(("PATMR3Reset\n")); + if (!VM_IS_RAW_MODE_ENABLED(pVM)) + return VINF_SUCCESS; + + /* Free all patches. */ + for (;;) + { + PPATMPATCHREC pPatchRec = (PPATMPATCHREC)RTAvloU32RemoveBestFit(&pVM->patm.s.PatchLookupTreeHC->PatchTree, 0, true); + if (pPatchRec) + patmR3RemovePatch(pVM, pPatchRec, true); + else + break; + } + Assert(!pVM->patm.s.PatchLookupTreeHC->PatchTreeByPage); + Assert(!pVM->patm.s.PatchLookupTreeHC->PatchTree); + pVM->patm.s.PatchLookupTreeHC->PatchTreeByPatchAddr = 0; + pVM->patm.s.PatchLookupTreeHC->PatchTreeByPage = 0; + + int rc = patmReinit(pVM); + if (RT_SUCCESS(rc)) + rc = PATMR3InitFinalize(pVM); /* paranoia */ + + return rc; +} + +/** + * @callback_method_impl{FNDISREADBYTES} + */ +static DECLCALLBACK(int) patmReadBytes(PDISCPUSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead) +{ + PATMDISASM *pDisInfo = (PATMDISASM *)pDis->pvUser; + +/** @todo change this to read more! */ + /* + * Trap/interrupt handler typically call common code on entry. Which might already have patches inserted. + * As we currently don't support calling patch code from patch code, we'll let it read the original opcode bytes instead. + */ + /** @todo could change in the future! */ + if (pDisInfo->fReadFlags & PATMREAD_ORGCODE) + { + size_t cbRead = cbMaxRead; + RTUINTPTR uSrcAddr = pDis->uInstrAddr + offInstr; + int rc = PATMR3ReadOrgInstr(pDisInfo->pVM, pDis->uInstrAddr + offInstr, &pDis->abInstr[offInstr], cbRead, &cbRead); + if (RT_SUCCESS(rc)) + { + if (cbRead >= cbMinRead) + { + pDis->cbCachedInstr = offInstr + (uint8_t)cbRead; + return VINF_SUCCESS; + } + + cbMinRead -= (uint8_t)cbRead; + cbMaxRead -= (uint8_t)cbRead; + offInstr += (uint8_t)cbRead; + uSrcAddr += cbRead; + } + +#ifdef VBOX_STRICT + if ( !(pDisInfo->pPatchInfo->flags & (PATMFL_DUPLICATE_FUNCTION|PATMFL_IDTHANDLER)) + && !(pDisInfo->fReadFlags & PATMREAD_NOCHECK)) + { + Assert(PATMR3IsInsidePatchJump(pDisInfo->pVM, pDis->uInstrAddr + offInstr, NULL) == false); + Assert(PATMR3IsInsidePatchJump(pDisInfo->pVM, pDis->uInstrAddr + offInstr + cbMinRead-1, NULL) == false); + } +#endif + } + + int rc = VINF_SUCCESS; + RTGCPTR32 uSrcAddr = (RTGCPTR32)pDis->uInstrAddr + offInstr; + if ( !pDisInfo->pbInstrHC + || ( PAGE_ADDRESS(pDisInfo->pInstrGC) != PAGE_ADDRESS(uSrcAddr + cbMinRead - 1) + && !PATMIsPatchGCAddr(pDisInfo->pVM, uSrcAddr))) + { + Assert(!PATMIsPatchGCAddr(pDisInfo->pVM, uSrcAddr)); + rc = PGMPhysSimpleReadGCPtr(&pDisInfo->pVM->aCpus[0], &pDis->abInstr[offInstr], uSrcAddr, cbMinRead); + offInstr += cbMinRead; + } + else + { + /* + * pbInstrHC is the base address; adjust according to the GC pointer. + * + * Try read the max number of bytes here. Since the disassembler only + * ever uses these bytes for the current instruction, it doesn't matter + * much if we accidentally read the start of the next instruction even + * if it happens to be a patch jump or int3. + */ + uint8_t const *pbInstrHC = pDisInfo->pbInstrHC; AssertPtr(pbInstrHC); + pbInstrHC += uSrcAddr - pDisInfo->pInstrGC; + + size_t cbMaxRead1 = PAGE_SIZE - (uSrcAddr & PAGE_OFFSET_MASK); + size_t cbMaxRead2 = PAGE_SIZE - ((uintptr_t)pbInstrHC & PAGE_OFFSET_MASK); + size_t cbToRead = RT_MIN(cbMaxRead1, RT_MAX(cbMaxRead2, cbMinRead)); + if (cbToRead > cbMaxRead) + cbToRead = cbMaxRead; + + memcpy(&pDis->abInstr[offInstr], pbInstrHC, cbToRead); + offInstr += (uint8_t)cbToRead; + } + + pDis->cbCachedInstr = offInstr; + return rc; +} + + +DECLINLINE(bool) patmR3DisInstrToStr(PVM pVM, PPATCHINFO pPatch, RTGCPTR32 InstrGCPtr32, uint8_t *pbInstrHC, uint32_t fReadFlags, + PDISCPUSTATE pCpu, uint32_t *pcbInstr, char *pszOutput, size_t cbOutput) +{ + PATMDISASM disinfo; + disinfo.pVM = pVM; + disinfo.pPatchInfo = pPatch; + disinfo.pbInstrHC = pbInstrHC; + disinfo.pInstrGC = InstrGCPtr32; + disinfo.fReadFlags = fReadFlags; + return RT_SUCCESS(DISInstrToStrWithReader(InstrGCPtr32, + (pPatch->flags & PATMFL_CODE32) ? DISCPUMODE_32BIT : DISCPUMODE_16BIT, + patmReadBytes, &disinfo, + pCpu, pcbInstr, pszOutput, cbOutput)); +} + + +DECLINLINE(bool) patmR3DisInstr(PVM pVM, PPATCHINFO pPatch, RTGCPTR32 InstrGCPtr32, uint8_t *pbInstrHC, uint32_t fReadFlags, + PDISCPUSTATE pCpu, uint32_t *pcbInstr) +{ + PATMDISASM disinfo; + disinfo.pVM = pVM; + disinfo.pPatchInfo = pPatch; + disinfo.pbInstrHC = pbInstrHC; + disinfo.pInstrGC = InstrGCPtr32; + disinfo.fReadFlags = fReadFlags; + return RT_SUCCESS(DISInstrWithReader(InstrGCPtr32, + (pPatch->flags & PATMFL_CODE32) ? DISCPUMODE_32BIT : DISCPUMODE_16BIT, + patmReadBytes, &disinfo, + pCpu, pcbInstr)); +} + + +DECLINLINE(bool) patmR3DisInstrNoStrOpMode(PVM pVM, PPATCHINFO pPatch, RTGCPTR32 InstrGCPtr32, uint8_t *pbInstrHC, + uint32_t fReadFlags, + PDISCPUSTATE pCpu, uint32_t *pcbInstr) +{ + PATMDISASM disinfo; + disinfo.pVM = pVM; + disinfo.pPatchInfo = pPatch; + disinfo.pbInstrHC = pbInstrHC; + disinfo.pInstrGC = InstrGCPtr32; + disinfo.fReadFlags = fReadFlags; + return RT_SUCCESS(DISInstrWithReader(InstrGCPtr32, pPatch->uOpMode, patmReadBytes, &disinfo, + pCpu, pcbInstr)); +} + +#ifdef LOG_ENABLED +# define PATM_LOG_ORG_PATCH_INSTR(a_pVM, a_pPatch, a_szComment) \ + PATM_LOG_PATCH_INSTR(a_pVM, a_pPatch, PATMREAD_ORGCODE, a_szComment, " patch:") +# define PATM_LOG_RAW_PATCH_INSTR(a_pVM, a_pPatch, a_szComment) \ + PATM_LOG_PATCH_INSTR(a_pVM, a_pPatch, PATMREAD_RAWCODE, a_szComment, " patch:") + +# define PATM_LOG_PATCH_INSTR(a_pVM, a_pPatch, a_fFlags, a_szComment1, a_szComment2) \ + do { \ + if (LogIsEnabled()) \ + patmLogRawPatchInstr(a_pVM, a_pPatch, a_fFlags, a_szComment1, a_szComment2); \ + } while (0) + +static void patmLogRawPatchInstr(PVM pVM, PPATCHINFO pPatch, uint32_t fFlags, + const char *pszComment1, const char *pszComment2) +{ + DISCPUSTATE DisState; + char szOutput[128]; + szOutput[0] = '\0'; + patmR3DisInstrToStr(pVM, pPatch, pPatch->pPrivInstrGC, NULL, fFlags, + &DisState, NULL, szOutput, sizeof(szOutput)); + Log(("%s%s %s", pszComment1, pszComment2, szOutput)); +} + +#else +# define PATM_LOG_ORG_PATCH_INSTR(a_pVM, a_pPatch, a_szComment) do { } while (0) +# define PATM_LOG_RAW_PATCH_INSTR(a_pVM, a_pPatch, a_szComment) do { } while (0) +# define PATM_LOG_PATCH_INSTR(a_pVM, a_pPatch, a_fFlags, a_szComment1, a_szComment2) do { } while (0) +#endif + + +/** + * Callback function for RTAvloU32DoWithAll + * + * Updates all fixups in the patches + * + * @returns VBox status code. + * @param pNode Current node + * @param pParam Pointer to the VM. + */ +static DECLCALLBACK(int) patmR3RelocatePatches(PAVLOU32NODECORE pNode, void *pParam) +{ + PPATMPATCHREC pPatch = (PPATMPATCHREC)pNode; + PVM pVM = (PVM)pParam; + RTRCINTPTR delta; + int rc; + + /* Nothing to do if the patch is not active. */ + if (pPatch->patch.uState == PATCH_REFUSED) + return 0; + + if (pPatch->patch.flags & PATMFL_PATCHED_GUEST_CODE) + PATM_LOG_PATCH_INSTR(pVM, &pPatch->patch, PATMREAD_RAWCODE, "Org patch jump:", ""); + + Log(("Nr of fixups %d\n", pPatch->patch.nrFixups)); + delta = (RTRCINTPTR)pVM->patm.s.deltaReloc; + + /* + * Apply fixups. + */ + AVLPVKEY key = NULL; + for (;;) + { + /* Get the record that's closest from above (after or equal to key). */ + PRELOCREC pRec = (PRELOCREC)RTAvlPVGetBestFit(&pPatch->patch.FixupTree, key, true); + if (!pRec) + break; + + key = (uint8_t *)pRec->Core.Key + 1; /* search for the next record during the next round. */ + + switch (pRec->uType) + { + case FIXUP_ABSOLUTE_IN_PATCH_ASM_TMPL: + Assert(pRec->pDest == pRec->pSource); Assert(PATM_IS_ASMFIX(pRec->pSource)); + Log(("Absolute patch template fixup type %#x at %RHv -> %RHv at %RRv\n", pRec->pSource, *(RTRCUINTPTR *)pRec->pRelocPos, *(RTRCINTPTR*)pRec->pRelocPos + delta, pRec->pRelocPos)); + *(RTRCUINTPTR *)pRec->pRelocPos += delta; + break; + + case FIXUP_ABSOLUTE: + Log(("Absolute fixup at %RRv %RHv -> %RHv at %RRv\n", pRec->pSource, *(RTRCUINTPTR *)pRec->pRelocPos, *(RTRCINTPTR*)pRec->pRelocPos + delta, pRec->pRelocPos)); + if ( !pRec->pSource + || PATMIsPatchGCAddr(pVM, pRec->pSource)) + { + *(RTRCUINTPTR *)pRec->pRelocPos += delta; + } + else + { + uint8_t curInstr[15]; + uint8_t oldInstr[15]; + Assert(pRec->pSource && pPatch->patch.cbPrivInstr <= 15); + + Assert(!(pPatch->patch.flags & PATMFL_GLOBAL_FUNCTIONS)); + + memcpy(oldInstr, pPatch->patch.aPrivInstr, pPatch->patch.cbPrivInstr); + *(RTRCPTR *)&oldInstr[pPatch->patch.cbPrivInstr - sizeof(RTRCPTR)] = pRec->pDest; + + rc = PGMPhysSimpleReadGCPtr(VMMGetCpu0(pVM), curInstr, pPatch->patch.pPrivInstrGC, pPatch->patch.cbPrivInstr); + Assert(RT_SUCCESS(rc) || rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT); + + pRec->pDest = (RTRCPTR)((RTRCUINTPTR)pRec->pDest + delta); + + if ( rc == VERR_PAGE_NOT_PRESENT + || rc == VERR_PAGE_TABLE_NOT_PRESENT) + { + RTRCPTR pPage = pPatch->patch.pPrivInstrGC & PAGE_BASE_GC_MASK; + + Log(("PATM: Patch page not present -> check later!\n")); + rc = PGMR3HandlerVirtualRegister(pVM, VMMGetCpu(pVM), pVM->patm.s.hMonitorPageType, + pPage, + pPage + (PAGE_SIZE - 1) /* inclusive! */, + (void *)(uintptr_t)pPage, NIL_RTRCPTR /*pvUserRC*/, NULL /*pszDesc*/); + Assert(RT_SUCCESS(rc) || rc == VERR_PGM_HANDLER_VIRTUAL_CONFLICT); + } + else + if (memcmp(curInstr, oldInstr, pPatch->patch.cbPrivInstr)) + { + Log(("PATM: Patch was overwritten -> disabling patch!!\n")); + /* + * Disable patch; this is not a good solution + */ + /** @todo hopefully it was completely overwritten (if the read was successful)!!!! */ + pPatch->patch.uState = PATCH_DISABLED; + } + else + if (RT_SUCCESS(rc)) + { + *(RTRCPTR *)&curInstr[pPatch->patch.cbPrivInstr - sizeof(RTRCPTR)] = pRec->pDest; + rc = PGMPhysSimpleDirtyWriteGCPtr(VMMGetCpu0(pVM), pRec->pSource, curInstr, pPatch->patch.cbPrivInstr); + AssertRC(rc); + } + } + break; + + case FIXUP_REL_JMPTOPATCH: + { + RTRCPTR pTarget = (RTRCPTR)((RTRCINTPTR)pRec->pDest + delta); + + if ( pPatch->patch.uState == PATCH_ENABLED + && (pPatch->patch.flags & PATMFL_PATCHED_GUEST_CODE)) + { + uint8_t oldJump[SIZEOF_NEAR_COND_JUMP32]; + uint8_t temp[SIZEOF_NEAR_COND_JUMP32]; + RTRCPTR pJumpOffGC; + RTRCINTPTR displ = (RTRCINTPTR)pTarget - (RTRCINTPTR)pRec->pSource; + RTRCINTPTR displOld= (RTRCINTPTR)pRec->pDest - (RTRCINTPTR)pRec->pSource; + +#if 0 /** @todo '*(int32_t*)pRec->pRelocPos' crashes on restore of an XP VM here. pRelocPos=0x8000dbe2180a (bird) */ + Log(("Relative fixup (g2p) %08X -> %08X at %08X (source=%08x, target=%08x)\n", *(int32_t*)pRec->pRelocPos, displ, pRec->pRelocPos, pRec->pSource, pRec->pDest)); +#else + Log(("Relative fixup (g2p) ???????? -> %08X at %08X (source=%08x, target=%08x)\n", displ, pRec->pRelocPos, pRec->pSource, pRec->pDest)); +#endif + + Assert(pRec->pSource - pPatch->patch.cbPatchJump == pPatch->patch.pPrivInstrGC); +#ifdef PATM_RESOLVE_CONFLICTS_WITH_JUMP_PATCHES + if (pPatch->patch.cbPatchJump == SIZEOF_NEAR_COND_JUMP32) + { + Assert(pPatch->patch.flags & PATMFL_JUMP_CONFLICT); + + pJumpOffGC = pPatch->patch.pPrivInstrGC + 2; //two byte opcode + oldJump[0] = pPatch->patch.aPrivInstr[0]; + oldJump[1] = pPatch->patch.aPrivInstr[1]; + *(RTRCUINTPTR *)&oldJump[2] = displOld; + } + else +#endif + if (pPatch->patch.cbPatchJump == SIZEOF_NEARJUMP32) + { + pJumpOffGC = pPatch->patch.pPrivInstrGC + 1; //one byte opcode + oldJump[0] = 0xE9; + *(RTRCUINTPTR *)&oldJump[1] = displOld; + } + else + { + AssertMsgFailed(("Invalid patch jump size %d\n", pPatch->patch.cbPatchJump)); + continue; //this should never happen!! + } + Assert(pPatch->patch.cbPatchJump <= sizeof(temp)); + + /* + * Read old patch jump and compare it to the one we previously installed + */ + rc = PGMPhysSimpleReadGCPtr(VMMGetCpu0(pVM), temp, pPatch->patch.pPrivInstrGC, pPatch->patch.cbPatchJump); + Assert(RT_SUCCESS(rc) || rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT); + + if ( rc == VERR_PAGE_NOT_PRESENT + || rc == VERR_PAGE_TABLE_NOT_PRESENT) + { + RTRCPTR pPage = pPatch->patch.pPrivInstrGC & PAGE_BASE_GC_MASK; + Log(("PATM: Patch page not present -> check later!\n")); + rc = PGMR3HandlerVirtualRegister(pVM, VMMGetCpu(pVM), pVM->patm.s.hMonitorPageType, + pPage, + pPage + (PAGE_SIZE - 1) /* inclusive! */, + (void *)(uintptr_t)pPage, NIL_RTRCPTR /*pvUserRC*/, NULL /*pszDesc*/); + Assert(RT_SUCCESS(rc) || rc == VERR_PGM_HANDLER_VIRTUAL_CONFLICT); + } + else + if (memcmp(temp, oldJump, pPatch->patch.cbPatchJump)) + { + Log(("PATM: Patch jump was overwritten -> disabling patch!!\n")); + /* + * Disable patch; this is not a good solution + */ + /** @todo hopefully it was completely overwritten (if the read was successful)!!!! */ + pPatch->patch.uState = PATCH_DISABLED; + } + else + if (RT_SUCCESS(rc)) + { + rc = PGMPhysSimpleDirtyWriteGCPtr(VMMGetCpu0(pVM), pJumpOffGC, &displ, sizeof(displ)); + AssertRC(rc); + } + else + AssertMsgFailed(("Unexpected error %d from MMR3PhysReadGCVirt\n", rc)); + } + else + Log(("Skip the guest jump to patch code for this disabled patch %RGv - %08X\n", pPatch->patch.pPrivInstrGC, pRec->pRelocPos)); + + pRec->pDest = pTarget; + break; + } + + case FIXUP_REL_JMPTOGUEST: + { + RTRCPTR pSource = (RTRCPTR)((RTRCINTPTR)pRec->pSource + delta); + RTRCINTPTR displ = (RTRCINTPTR)pRec->pDest - (RTRCINTPTR)pSource; + + Assert(!(pPatch->patch.flags & PATMFL_GLOBAL_FUNCTIONS)); + Log(("Relative fixup (p2g) %08X -> %08X at %08X (source=%08x, target=%08x)\n", *(int32_t*)pRec->pRelocPos, displ, pRec->pRelocPos, pRec->pSource, pRec->pDest)); + *(RTRCUINTPTR *)pRec->pRelocPos = displ; + pRec->pSource = pSource; + break; + } + + case FIXUP_REL_HELPER_IN_PATCH_ASM_TMPL: + case FIXUP_CONSTANT_IN_PATCH_ASM_TMPL: + /* Only applicable when loading state. */ + Assert(pRec->pDest == pRec->pSource); + Assert(PATM_IS_ASMFIX(pRec->pSource)); + break; + + default: + AssertMsg(0, ("Invalid fixup type!!\n")); + return VERR_INVALID_PARAMETER; + } + } + + if (pPatch->patch.flags & PATMFL_PATCHED_GUEST_CODE) + PATM_LOG_PATCH_INSTR(pVM, &pPatch->patch, PATMREAD_RAWCODE, "Rel patch jump:", ""); + return 0; +} + +#ifdef VBOX_WITH_DEBUGGER + +/** + * Callback function for RTAvloU32DoWithAll + * + * Enables the patch that's being enumerated + * + * @returns 0 (continue enumeration). + * @param pNode Current node + * @param pVM The cross context VM structure. + */ +static DECLCALLBACK(int) EnableAllPatches(PAVLOU32NODECORE pNode, void *pVM) +{ + PPATMPATCHREC pPatch = (PPATMPATCHREC)pNode; + + PATMR3EnablePatch((PVM)pVM, (RTRCPTR)pPatch->Core.Key); + return 0; +} + + +/** + * Callback function for RTAvloU32DoWithAll + * + * Disables the patch that's being enumerated + * + * @returns 0 (continue enumeration). + * @param pNode Current node + * @param pVM The cross context VM structure. + */ +static DECLCALLBACK(int) DisableAllPatches(PAVLOU32NODECORE pNode, void *pVM) +{ + PPATMPATCHREC pPatch = (PPATMPATCHREC)pNode; + + PATMR3DisablePatch((PVM)pVM, (RTRCPTR)pPatch->Core.Key); + return 0; +} + +#endif /* VBOX_WITH_DEBUGGER */ + +/** + * Returns the host context pointer of the GC context structure + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(PPATMGCSTATE) PATMR3QueryGCStateHC(PVM pVM) +{ + AssertReturn(VM_IS_RAW_MODE_ENABLED(pVM), NULL); + return pVM->patm.s.pGCStateHC; +} + + +/** + * Allows or disallow patching of privileged instructions executed by the guest OS + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param fAllowPatching Allow/disallow patching + */ +VMMR3DECL(int) PATMR3AllowPatching(PUVM pUVM, bool fAllowPatching) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + if (VM_IS_RAW_MODE_ENABLED(pVM)) + pVM->fPATMEnabled = fAllowPatching; + else + Assert(!pVM->fPATMEnabled); + return VINF_SUCCESS; +} + + +/** + * Checks if the patch manager is enabled or not. + * + * @returns true if enabled, false if not (or if invalid handle). + * @param pUVM The user mode VM handle. + */ +VMMR3DECL(bool) PATMR3IsEnabled(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, false); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, false); + return PATMIsEnabled(pVM); +} + + +/** + * Convert a GC patch block pointer to a HC patch pointer + * + * @returns HC pointer or NULL if it's not a GC patch pointer + * @param pVM The cross context VM structure. + * @param pAddrGC GC pointer + */ +VMMR3_INT_DECL(void *) PATMR3GCPtrToHCPtr(PVM pVM, RTRCPTR pAddrGC) +{ + AssertReturn(VM_IS_RAW_MODE_ENABLED(pVM), NULL); + RTRCUINTPTR offPatch = (RTRCUINTPTR)pAddrGC - (RTRCUINTPTR)pVM->patm.s.pPatchMemGC; + if (offPatch >= pVM->patm.s.cbPatchMem) + { + offPatch = (RTRCUINTPTR)pAddrGC - (RTRCUINTPTR)pVM->patm.s.pbPatchHelpersRC; + if (offPatch >= pVM->patm.s.cbPatchHelpers) + return NULL; + return pVM->patm.s.pbPatchHelpersR3 + offPatch; + } + return pVM->patm.s.pPatchMemHC + offPatch; +} + + +/** + * Convert guest context address to host context pointer + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pCacheRec Address conversion cache record + * @param pGCPtr Guest context pointer + * + * @returns Host context pointer or NULL in case of an error + * + */ +R3PTRTYPE(uint8_t *) patmR3GCVirtToHCVirt(PVM pVM, PPATMP2GLOOKUPREC pCacheRec, RCPTRTYPE(uint8_t *) pGCPtr) +{ + int rc; + R3PTRTYPE(uint8_t *) pHCPtr; + uint32_t offset; + + offset = (RTRCUINTPTR)pGCPtr - (RTRCUINTPTR)pVM->patm.s.pPatchMemGC; + if (offset < pVM->patm.s.cbPatchMem) + { +#ifdef VBOX_STRICT + PPATCHINFO pPatch = (PPATCHINFO)pCacheRec->pPatch; + Assert(pPatch); Assert(offset - pPatch->pPatchBlockOffset < pPatch->cbPatchBlockSize); +#endif + return pVM->patm.s.pPatchMemHC + offset; + } + /* Note! We're _not_ including the patch helpers here. */ + + offset = pGCPtr & PAGE_OFFSET_MASK; + if (pCacheRec->pGuestLoc == (pGCPtr & PAGE_BASE_GC_MASK)) + return pCacheRec->pPageLocStartHC + offset; + + /* Release previous lock if any. */ + if (pCacheRec->Lock.pvMap) + { + PGMPhysReleasePageMappingLock(pVM, &pCacheRec->Lock); + pCacheRec->Lock.pvMap = NULL; + } + + rc = PGMPhysGCPtr2CCPtrReadOnly(VMMGetCpu(pVM), pGCPtr, (const void **)&pHCPtr, &pCacheRec->Lock); + if (rc != VINF_SUCCESS) + { + AssertMsg(rc == VINF_SUCCESS || rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("MMR3PhysGCVirt2HCVirtEx failed for %08X\n", pGCPtr)); + return NULL; + } + pCacheRec->pPageLocStartHC = (R3PTRTYPE(uint8_t*))((RTHCUINTPTR)pHCPtr & PAGE_BASE_HC_MASK); + pCacheRec->pGuestLoc = pGCPtr & PAGE_BASE_GC_MASK; + return pHCPtr; +} + + +/** + * Calculates and fills in all branch targets + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Current patch block pointer + * + */ +static int patmr3SetBranchTargets(PVM pVM, PPATCHINFO pPatch) +{ + int32_t displ; + + PJUMPREC pRec = 0; + unsigned nrJumpRecs = 0; + + /* + * Set all branch targets inside the patch block. + * We remove all jump records as they are no longer needed afterwards. + */ + while (true) + { + RCPTRTYPE(uint8_t *) pInstrGC; + RCPTRTYPE(uint8_t *) pBranchTargetGC = 0; + + pRec = (PJUMPREC)RTAvlPVRemoveBestFit(&pPatch->JumpTree, 0, true); + if (pRec == 0) + break; + + nrJumpRecs++; + + /* HC in patch block to GC in patch block. */ + pInstrGC = patmPatchHCPtr2PatchGCPtr(pVM, pRec->pJumpHC); + + if (pRec->opcode == OP_CALL) + { + /* Special case: call function replacement patch from this patch block. + */ + PPATMPATCHREC pFunctionRec = patmQueryFunctionPatch(pVM, pRec->pTargetGC); + if (!pFunctionRec) + { + int rc; + + if (PATMR3HasBeenPatched(pVM, pRec->pTargetGC) == false) + rc = PATMR3InstallPatch(pVM, pRec->pTargetGC, PATMFL_CODE32 | PATMFL_DUPLICATE_FUNCTION); + else + rc = VERR_PATCHING_REFUSED; /* exists as a normal patch; can't use it */ + + if (RT_FAILURE(rc)) + { + uint8_t *pPatchHC; + RTRCPTR pPatchGC; + RTRCPTR pOrgInstrGC; + + pOrgInstrGC = PATMR3PatchToGCPtr(pVM, pInstrGC, 0); + Assert(pOrgInstrGC); + + /* Failure for some reason -> mark exit point with int 3. */ + Log(("Failed to install function replacement patch (at %x) for reason %Rrc\n", pOrgInstrGC, rc)); + + pPatchGC = patmGuestGCPtrToPatchGCPtr(pVM, pPatch, pOrgInstrGC); + Assert(pPatchGC); + + pPatchHC = pVM->patm.s.pPatchMemHC + (pPatchGC - pVM->patm.s.pPatchMemGC); + + /* Set a breakpoint at the very beginning of the recompiled instruction */ + *pPatchHC = 0xCC; + + continue; + } + } + else + { + Log(("Patch block %RRv called as function\n", pFunctionRec->patch.pPrivInstrGC)); + pFunctionRec->patch.flags |= PATMFL_CODE_REFERENCED; + } + + pBranchTargetGC = PATMR3QueryPatchGCPtr(pVM, pRec->pTargetGC); + } + else + pBranchTargetGC = patmGuestGCPtrToPatchGCPtr(pVM, pPatch, pRec->pTargetGC); + + if (pBranchTargetGC == 0) + { + AssertMsgFailed(("patmr3SetBranchTargets: patmGuestGCPtrToPatchGCPtr failed for %08X\n", pRec->pTargetGC)); + return VERR_PATCHING_REFUSED; + } + /* Our jumps *always* have a dword displacement (to make things easier). */ + Assert(sizeof(uint32_t) == sizeof(RTRCPTR)); + displ = pBranchTargetGC - (pInstrGC + pRec->offDispl + sizeof(RTRCPTR)); + *(RTRCPTR *)(pRec->pJumpHC + pRec->offDispl) = displ; + Log(("Set branch target %d to %08X : %08x - (%08x + %d + %d)\n", nrJumpRecs, displ, pBranchTargetGC, pInstrGC, pRec->offDispl, sizeof(RTRCPTR))); + } + Assert(nrJumpRecs == pPatch->nrJumpRecs); + Assert(pPatch->JumpTree == 0); + return VINF_SUCCESS; +} + +/** + * Add an illegal instruction record + * + * @param pVM The cross context VM structure. + * @param pPatch Patch structure ptr + * @param pInstrGC Guest context pointer to privileged instruction + * + */ +static void patmAddIllegalInstrRecord(PVM pVM, PPATCHINFO pPatch, RTRCPTR pInstrGC) +{ + PAVLPVNODECORE pRec; + + pRec = (PAVLPVNODECORE)MMR3HeapAllocZ(pVM, MM_TAG_PATM_PATCH, sizeof(*pRec)); + Assert(pRec); + pRec->Key = (AVLPVKEY)(uintptr_t)pInstrGC; + + bool ret = RTAvlPVInsert(&pPatch->pTempInfo->IllegalInstrTree, pRec); + Assert(ret); NOREF(ret); + pPatch->pTempInfo->nrIllegalInstr++; +} + +static bool patmIsIllegalInstr(PPATCHINFO pPatch, RTRCPTR pInstrGC) +{ + PAVLPVNODECORE pRec; + + pRec = RTAvlPVGet(&pPatch->pTempInfo->IllegalInstrTree, (AVLPVKEY)(uintptr_t)pInstrGC); + if (pRec) + return true; + else + return false; +} + +/** + * Add a patch to guest lookup record + * + * @param pVM The cross context VM structure. + * @param pPatch Patch structure ptr + * @param pPatchInstrHC Guest context pointer to patch block + * @param pInstrGC Guest context pointer to privileged instruction + * @param enmType Lookup type + * @param fDirty Dirty flag + * + * @note Be extremely careful with this function. Make absolutely sure the guest + * address is correct! (to avoid executing instructions twice!) + */ +void patmR3AddP2GLookupRecord(PVM pVM, PPATCHINFO pPatch, uint8_t *pPatchInstrHC, RTRCPTR pInstrGC, PATM_LOOKUP_TYPE enmType, bool fDirty) +{ + bool ret; + PRECPATCHTOGUEST pPatchToGuestRec; + PRECGUESTTOPATCH pGuestToPatchRec; + uint32_t PatchOffset = pPatchInstrHC - pVM->patm.s.pPatchMemHC; /* Offset in memory reserved for PATM. */ + + LogFlowFunc(("pVM=%#p pPatch=%#p pPatchInstrHC=%#p pInstrGC=%#x enmType=%d fDirty=%RTbool\n", + pVM, pPatch, pPatchInstrHC, pInstrGC, enmType, fDirty)); + + if (enmType == PATM_LOOKUP_PATCH2GUEST) + { + pPatchToGuestRec = (PRECPATCHTOGUEST)RTAvlU32Get(&pPatch->Patch2GuestAddrTree, PatchOffset); + if (pPatchToGuestRec && pPatchToGuestRec->Core.Key == PatchOffset) + return; /* already there */ + + Assert(!pPatchToGuestRec); + } +#ifdef VBOX_STRICT + else + { + pPatchToGuestRec = (PRECPATCHTOGUEST)RTAvlU32Get(&pPatch->Patch2GuestAddrTree, PatchOffset); + Assert(!pPatchToGuestRec); + } +#endif + + pPatchToGuestRec = (PRECPATCHTOGUEST)MMR3HeapAllocZ(pVM, MM_TAG_PATM_PATCH, sizeof(RECPATCHTOGUEST) + sizeof(RECGUESTTOPATCH)); + Assert(pPatchToGuestRec); + pPatchToGuestRec->Core.Key = PatchOffset; + pPatchToGuestRec->pOrgInstrGC = pInstrGC; + pPatchToGuestRec->enmType = enmType; + pPatchToGuestRec->fDirty = fDirty; + + ret = RTAvlU32Insert(&pPatch->Patch2GuestAddrTree, &pPatchToGuestRec->Core); + Assert(ret); + + /* GC to patch address */ + if (enmType == PATM_LOOKUP_BOTHDIR) + { + pGuestToPatchRec = (PRECGUESTTOPATCH)RTAvlU32Get(&pPatch->Guest2PatchAddrTree, pInstrGC); + if (!pGuestToPatchRec) + { + pGuestToPatchRec = (PRECGUESTTOPATCH)(pPatchToGuestRec+1); + pGuestToPatchRec->Core.Key = pInstrGC; + pGuestToPatchRec->PatchOffset = PatchOffset; + + ret = RTAvlU32Insert(&pPatch->Guest2PatchAddrTree, &pGuestToPatchRec->Core); + Assert(ret); + } + } + + pPatch->nrPatch2GuestRecs++; +} + + +/** + * Removes a patch to guest lookup record + * + * @param pVM The cross context VM structure. + * @param pPatch Patch structure ptr + * @param pPatchInstrGC Guest context pointer to patch block + */ +void patmr3RemoveP2GLookupRecord(PVM pVM, PPATCHINFO pPatch, RTRCPTR pPatchInstrGC) +{ + PAVLU32NODECORE pNode; + PAVLU32NODECORE pNode2; + PRECPATCHTOGUEST pPatchToGuestRec; + uint32_t PatchOffset = pPatchInstrGC - pVM->patm.s.pPatchMemGC; /* Offset in memory reserved for PATM. */ + + pPatchToGuestRec = (PRECPATCHTOGUEST)RTAvlU32Get(&pPatch->Patch2GuestAddrTree, PatchOffset); + Assert(pPatchToGuestRec); + if (pPatchToGuestRec) + { + if (pPatchToGuestRec->enmType == PATM_LOOKUP_BOTHDIR) + { + PRECGUESTTOPATCH pGuestToPatchRec = (PRECGUESTTOPATCH)(pPatchToGuestRec+1); + + Assert(pGuestToPatchRec->Core.Key); + pNode2 = RTAvlU32Remove(&pPatch->Guest2PatchAddrTree, pGuestToPatchRec->Core.Key); + Assert(pNode2); + } + pNode = RTAvlU32Remove(&pPatch->Patch2GuestAddrTree, pPatchToGuestRec->Core.Key); + Assert(pNode); + + MMR3HeapFree(pPatchToGuestRec); + pPatch->nrPatch2GuestRecs--; + } +} + + +/** + * RTAvlPVDestroy callback. + */ +static DECLCALLBACK(int) patmEmptyTreePVCallback(PAVLPVNODECORE pNode, void *) +{ + MMR3HeapFree(pNode); + return 0; +} + +/** + * Empty the specified tree (PV tree, MMR3 heap) + * + * @param pVM The cross context VM structure. + * @param ppTree Tree to empty + */ +static void patmEmptyTree(PVM pVM, PAVLPVNODECORE *ppTree) +{ + NOREF(pVM); + RTAvlPVDestroy(ppTree, patmEmptyTreePVCallback, NULL); +} + + +/** + * RTAvlU32Destroy callback. + */ +static DECLCALLBACK(int) patmEmptyTreeU32Callback(PAVLU32NODECORE pNode, void *) +{ + MMR3HeapFree(pNode); + return 0; +} + +/** + * Empty the specified tree (U32 tree, MMR3 heap) + * + * @param pVM The cross context VM structure. + * @param ppTree Tree to empty + */ +static void patmEmptyTreeU32(PVM pVM, PPAVLU32NODECORE ppTree) +{ + NOREF(pVM); + RTAvlU32Destroy(ppTree, patmEmptyTreeU32Callback, NULL); +} + + +/** + * Analyses the instructions following the cli for compliance with our heuristics for cli & pushf + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pCpu CPU disassembly state + * @param pInstrGC Guest context pointer to privileged instruction + * @param pCurInstrGC Guest context pointer to the current instruction + * @param pCacheRec Cache record ptr + * + */ +static int patmAnalyseBlockCallback(PVM pVM, DISCPUSTATE *pCpu, RCPTRTYPE(uint8_t *) pInstrGC, RCPTRTYPE(uint8_t *) pCurInstrGC, PPATMP2GLOOKUPREC pCacheRec) +{ + PPATCHINFO pPatch = (PPATCHINFO)pCacheRec->pPatch; + bool fIllegalInstr = false; + + /* + * Preliminary heuristics: + *- no call instructions without a fixed displacement between cli and sti/popf + *- no jumps in the instructions following cli (4+ bytes; enough for the replacement jump (5 bytes)) + *- no nested pushf/cli + *- sti/popf should be the (eventual) target of all branches + *- no near or far returns; no int xx, no into + * + * Note: Later on we can impose less stricter guidelines if the need arises + */ + + /* Bail out if the patch gets too big. */ + if (pPatch->cbPatchBlockSize >= MAX_PATCH_SIZE) + { + Log(("Code block too big (%x) for patch at %RRv!!\n", pPatch->cbPatchBlockSize, pCurInstrGC)); + fIllegalInstr = true; + patmAddIllegalInstrRecord(pVM, pPatch, pCurInstrGC); + } + else + { + /* No unconditional jumps or calls without fixed displacements. */ + if ( (pCpu->pCurInstr->fOpType & DISOPTYPE_CONTROLFLOW) + && (pCpu->pCurInstr->uOpcode == OP_JMP || pCpu->pCurInstr->uOpcode == OP_CALL) + ) + { + Assert(pCpu->Param1.cb <= 4 || pCpu->Param1.cb == 6); + if ( pCpu->Param1.cb == 6 /* far call/jmp */ + || (pCpu->pCurInstr->uOpcode == OP_CALL && !(pPatch->flags & PATMFL_SUPPORT_CALLS)) + || (OP_PARM_VTYPE(pCpu->pCurInstr->fParam1) != OP_PARM_J && !(pPatch->flags & PATMFL_SUPPORT_INDIRECT_CALLS)) + ) + { + fIllegalInstr = true; + patmAddIllegalInstrRecord(pVM, pPatch, pCurInstrGC); + } + } + + /* An unconditional (short) jump right after a cli is a potential problem; we will overwrite whichever function comes afterwards */ + if (pPatch->opcode == OP_CLI && pCpu->pCurInstr->uOpcode == OP_JMP) + { + if ( pCurInstrGC > pPatch->pPrivInstrGC + && pCurInstrGC + pCpu->cbInstr < pPatch->pPrivInstrGC + SIZEOF_NEARJUMP32) /* hardcoded patch jump size; cbPatchJump is still zero */ + { + Log(("Dangerous unconditional jump ends in our generated patch jump!! (%x vs %x)\n", pCurInstrGC, pPatch->pPrivInstrGC)); + /* We turn this one into a int 3 callable patch. */ + pPatch->flags |= PATMFL_INT3_REPLACEMENT_BLOCK; + } + } + else + /* no nested pushfs just yet; nested cli is allowed for cli patches though. */ + if (pPatch->opcode == OP_PUSHF) + { + if (pCurInstrGC != pInstrGC && pCpu->pCurInstr->uOpcode == OP_PUSHF) + { + fIllegalInstr = true; + patmAddIllegalInstrRecord(pVM, pPatch, pCurInstrGC); + } + } + + /* no far returns */ + if (pCpu->pCurInstr->uOpcode == OP_RETF) + { + pPatch->pTempInfo->nrRetInstr++; + fIllegalInstr = true; + patmAddIllegalInstrRecord(pVM, pPatch, pCurInstrGC); + } + else if ( pCpu->pCurInstr->uOpcode == OP_INT3 + || pCpu->pCurInstr->uOpcode == OP_INT + || pCpu->pCurInstr->uOpcode == OP_INTO) + { + /* No int xx or into either. */ + fIllegalInstr = true; + patmAddIllegalInstrRecord(pVM, pPatch, pCurInstrGC); + } + } + + pPatch->cbPatchBlockSize += pCpu->cbInstr; + + /* Illegal instruction -> end of analysis phase for this code block */ + if (fIllegalInstr || patmIsIllegalInstr(pPatch, pCurInstrGC)) + return VINF_SUCCESS; + + /* Check for exit points. */ + switch (pCpu->pCurInstr->uOpcode) + { + case OP_SYSEXIT: + return VINF_SUCCESS; /* duplicate it; will fault or emulated in GC. */ + + case OP_SYSENTER: + case OP_ILLUD2: + /* This appears to be some kind of kernel panic in Linux 2.4; no point to analyse more. */ + Log(("Illegal opcode (0xf 0xb) -> return here\n")); + return VINF_SUCCESS; + + case OP_STI: + case OP_POPF: + Assert(!(pPatch->flags & (PATMFL_DUPLICATE_FUNCTION))); + /* If out exit point lies within the generated patch jump, then we have to refuse!! */ + if (pCurInstrGC > pPatch->pPrivInstrGC && pCurInstrGC < pPatch->pPrivInstrGC + SIZEOF_NEARJUMP32) /* hardcoded patch jump size; cbPatchJump is still zero */ + { + Log(("Exit point within patch jump itself!! (%x vs %x)\n", pCurInstrGC, pPatch->pPrivInstrGC)); + return VERR_PATCHING_REFUSED; + } + if (pPatch->opcode == OP_PUSHF) + { + if (pCpu->pCurInstr->uOpcode == OP_POPF) + { + if (pPatch->cbPatchBlockSize >= SIZEOF_NEARJUMP32) + return VINF_SUCCESS; + + /* Or else we need to duplicate more instructions, because we can't jump back yet! */ + Log(("WARNING: End of block reached, but we need to duplicate some extra instruction to avoid a conflict with the patch jump\n")); + pPatch->flags |= PATMFL_CHECK_SIZE; + } + break; /* sti doesn't mark the end of a pushf block; only popf does. */ + } + RT_FALL_THRU(); + case OP_RETN: /* exit point for function replacement */ + return VINF_SUCCESS; + + case OP_IRET: + return VINF_SUCCESS; /* exitpoint */ + + case OP_CPUID: + case OP_CALL: + case OP_JMP: + break; + +#ifdef VBOX_WITH_SAFE_STR /** @todo remove DISOPTYPE_PRIVILEGED_NOTRAP from disasm table */ + case OP_STR: + break; +#endif + + default: + if (pCpu->pCurInstr->fOpType & (DISOPTYPE_PRIVILEGED_NOTRAP)) + { + patmAddIllegalInstrRecord(pVM, pPatch, pCurInstrGC); + return VINF_SUCCESS; /* exit point */ + } + break; + } + + /* If single instruction patch, we've copied enough instructions *and* the current instruction is not a relative jump. */ + if ((pPatch->flags & PATMFL_CHECK_SIZE) && pPatch->cbPatchBlockSize > SIZEOF_NEARJUMP32 && !(pCpu->pCurInstr->fOpType & DISOPTYPE_RELATIVE_CONTROLFLOW)) + { + /* The end marker for this kind of patch is any instruction at a location outside our patch jump. */ + Log(("End of block at %RRv size %d\n", pCurInstrGC, pCpu->cbInstr)); + return VINF_SUCCESS; + } + + return VWRN_CONTINUE_ANALYSIS; +} + +/** + * Analyses the instructions inside a function for compliance + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pCpu CPU disassembly state + * @param pInstrGC Guest context pointer to privileged instruction + * @param pCurInstrGC Guest context pointer to the current instruction + * @param pCacheRec Cache record ptr + * + */ +static int patmAnalyseFunctionCallback(PVM pVM, DISCPUSTATE *pCpu, RCPTRTYPE(uint8_t *) pInstrGC, RCPTRTYPE(uint8_t *) pCurInstrGC, PPATMP2GLOOKUPREC pCacheRec) +{ + PPATCHINFO pPatch = (PPATCHINFO)pCacheRec->pPatch; + bool fIllegalInstr = false; + NOREF(pInstrGC); + + //Preliminary heuristics: + //- no call instructions + //- ret ends a block + + Assert(pPatch->flags & (PATMFL_DUPLICATE_FUNCTION)); + + // bail out if the patch gets too big + if (pPatch->cbPatchBlockSize >= MAX_PATCH_SIZE) + { + Log(("Code block too big (%x) for function patch at %RRv!!\n", pPatch->cbPatchBlockSize, pCurInstrGC)); + fIllegalInstr = true; + patmAddIllegalInstrRecord(pVM, pPatch, pCurInstrGC); + } + else + { + // no unconditional jumps or calls without fixed displacements + if ( (pCpu->pCurInstr->fOpType & DISOPTYPE_CONTROLFLOW) + && (pCpu->pCurInstr->uOpcode == OP_JMP || pCpu->pCurInstr->uOpcode == OP_CALL) + ) + { + Assert(pCpu->Param1.cb <= 4 || pCpu->Param1.cb == 6); + if ( pCpu->Param1.cb == 6 /* far call/jmp */ + || (pCpu->pCurInstr->uOpcode == OP_CALL && !(pPatch->flags & PATMFL_SUPPORT_CALLS)) + || (OP_PARM_VTYPE(pCpu->pCurInstr->fParam1) != OP_PARM_J && !(pPatch->flags & PATMFL_SUPPORT_INDIRECT_CALLS)) + ) + { + fIllegalInstr = true; + patmAddIllegalInstrRecord(pVM, pPatch, pCurInstrGC); + } + } + else /* no far returns */ + if (pCpu->pCurInstr->uOpcode == OP_RETF) + { + patmAddIllegalInstrRecord(pVM, pPatch, pCurInstrGC); + fIllegalInstr = true; + } + else /* no int xx or into either */ + if (pCpu->pCurInstr->uOpcode == OP_INT3 || pCpu->pCurInstr->uOpcode == OP_INT || pCpu->pCurInstr->uOpcode == OP_INTO) + { + patmAddIllegalInstrRecord(pVM, pPatch, pCurInstrGC); + fIllegalInstr = true; + } + + #if 0 + /// @todo we can handle certain in/out and privileged instructions in the guest context + if (pCpu->pCurInstr->fOpType & DISOPTYPE_PRIVILEGED && pCpu->pCurInstr->uOpcode != OP_STI) + { + Log(("Illegal instructions for function patch!!\n")); + return VERR_PATCHING_REFUSED; + } + #endif + } + + pPatch->cbPatchBlockSize += pCpu->cbInstr; + + /* Illegal instruction -> end of analysis phase for this code block */ + if (fIllegalInstr || patmIsIllegalInstr(pPatch, pCurInstrGC)) + { + return VINF_SUCCESS; + } + + // Check for exit points + switch (pCpu->pCurInstr->uOpcode) + { + case OP_ILLUD2: + //This appears to be some kind of kernel panic in Linux 2.4; no point to analyse more + Log(("Illegal opcode (0xf 0xb) -> return here\n")); + return VINF_SUCCESS; + + case OP_IRET: + case OP_SYSEXIT: /* will fault or emulated in GC */ + case OP_RETN: + return VINF_SUCCESS; + +#ifdef VBOX_WITH_SAFE_STR /** @todo remove DISOPTYPE_PRIVILEGED_NOTRAP from disasm table */ + case OP_STR: + break; +#endif + + case OP_POPF: + case OP_STI: + return VWRN_CONTINUE_ANALYSIS; + default: + if (pCpu->pCurInstr->fOpType & (DISOPTYPE_PRIVILEGED_NOTRAP)) + { + patmAddIllegalInstrRecord(pVM, pPatch, pCurInstrGC); + return VINF_SUCCESS; /* exit point */ + } + return VWRN_CONTINUE_ANALYSIS; + } + + return VWRN_CONTINUE_ANALYSIS; +} + +/** + * Recompiles the instructions in a code block + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pCpu CPU disassembly state + * @param pInstrGC Guest context pointer to privileged instruction + * @param pCurInstrGC Guest context pointer to the current instruction + * @param pCacheRec Cache record ptr + * + */ +static DECLCALLBACK(int) patmRecompileCallback(PVM pVM, DISCPUSTATE *pCpu, RCPTRTYPE(uint8_t *) pInstrGC, RCPTRTYPE(uint8_t *) pCurInstrGC, PPATMP2GLOOKUPREC pCacheRec) +{ + PPATCHINFO pPatch = (PPATCHINFO)pCacheRec->pPatch; + int rc = VINF_SUCCESS; + bool fInhibitIRQInstr = false; /* did the instruction cause PATMFL_INHIBITIRQS to be set? */ + + LogFlow(("patmRecompileCallback %RRv %RRv\n", pInstrGC, pCurInstrGC)); + + if ( patmGuestGCPtrToPatchGCPtr(pVM, pPatch, pCurInstrGC) != 0 + && !(pPatch->flags & PATMFL_RECOMPILE_NEXT)) /* do not do this when the next instruction *must* be executed! */ + { + /* + * Been there, done that; so insert a jump (we don't want to duplicate code) + * no need to record this instruction as it's glue code that never crashes (it had better not!) + */ + Log(("patmRecompileCallback: jump to code we've recompiled before %RRv!\n", pCurInstrGC)); + return patmPatchGenRelJump(pVM, pPatch, pCurInstrGC, OP_JMP, !!(pCpu->fPrefix & DISPREFIX_OPSIZE)); + } + + if (pPatch->flags & (PATMFL_DUPLICATE_FUNCTION)) + { + rc = patmAnalyseFunctionCallback(pVM, pCpu, pInstrGC, pCurInstrGC, pCacheRec); + } + else + rc = patmAnalyseBlockCallback(pVM, pCpu, pInstrGC, pCurInstrGC, pCacheRec); + + if (RT_FAILURE(rc)) + return rc; + + /* Note: Never do a direct return unless a failure is encountered! */ + + /* Clear recompilation of next instruction flag; we are doing that right here. */ + if (pPatch->flags & PATMFL_RECOMPILE_NEXT) + pPatch->flags &= ~PATMFL_RECOMPILE_NEXT; + + /* Add lookup record for patch to guest address translation */ + patmR3AddP2GLookupRecord(pVM, pPatch, PATCHCODE_PTR_HC(pPatch) + pPatch->uCurPatchOffset, pCurInstrGC, PATM_LOOKUP_BOTHDIR); + + /* Update lowest and highest instruction address for this patch */ + if (pCurInstrGC < pPatch->pInstrGCLowest) + pPatch->pInstrGCLowest = pCurInstrGC; + else + if (pCurInstrGC > pPatch->pInstrGCHighest) + pPatch->pInstrGCHighest = pCurInstrGC + pCpu->cbInstr; + + /* Illegal instruction -> end of recompile phase for this code block. */ + if (patmIsIllegalInstr(pPatch, pCurInstrGC)) + { + Log(("Illegal instruction at %RRv -> mark with int 3\n", pCurInstrGC)); + rc = patmPatchGenIllegalInstr(pVM, pPatch); + goto end; + } + + /* For our first attempt, we'll handle only simple relative jumps (immediate offset coded in instruction). + * Indirect calls are handled below. + */ + if ( (pCpu->pCurInstr->fOpType & DISOPTYPE_CONTROLFLOW) + && (pCpu->pCurInstr->uOpcode != OP_CALL || (pPatch->flags & PATMFL_SUPPORT_CALLS)) + && (OP_PARM_VTYPE(pCpu->pCurInstr->fParam1) == OP_PARM_J)) + { + RCPTRTYPE(uint8_t *) pTargetGC = PATMResolveBranch(pCpu, pCurInstrGC); + if (pTargetGC == 0) + { + Log(("We don't support far jumps here!! (%08X)\n", pCpu->Param1.fUse)); + return VERR_PATCHING_REFUSED; + } + + if (pCpu->pCurInstr->uOpcode == OP_CALL) + { + Assert(!PATMIsPatchGCAddr(pVM, pTargetGC)); + rc = patmPatchGenCall(pVM, pPatch, pCpu, pCurInstrGC, pTargetGC, false); + if (RT_FAILURE(rc)) + goto end; + } + else + rc = patmPatchGenRelJump(pVM, pPatch, pTargetGC, pCpu->pCurInstr->uOpcode, !!(pCpu->fPrefix & DISPREFIX_OPSIZE)); + + if (RT_SUCCESS(rc)) + rc = VWRN_CONTINUE_RECOMPILE; + + goto end; + } + + switch (pCpu->pCurInstr->uOpcode) + { + case OP_CLI: + { + /* If a cli is found while duplicating instructions for another patch, then it's of vital importance to continue + * until we've found the proper exit point(s). + */ + if ( pCurInstrGC != pInstrGC + && !(pPatch->flags & (PATMFL_DUPLICATE_FUNCTION)) + ) + { + Log(("cli instruction found in other instruction patch block; force it to continue & find an exit point\n")); + pPatch->flags &= ~(PATMFL_CHECK_SIZE | PATMFL_SINGLE_INSTRUCTION); + } + /* Set by irq inhibition; no longer valid now. */ + pPatch->flags &= ~PATMFL_GENERATE_JUMPTOGUEST; + + rc = patmPatchGenCli(pVM, pPatch); + if (RT_SUCCESS(rc)) + rc = VWRN_CONTINUE_RECOMPILE; + break; + } + + case OP_MOV: + if (pCpu->pCurInstr->fOpType & DISOPTYPE_POTENTIALLY_DANGEROUS) + { + /* mov ss, src? */ + if ( (pCpu->Param1.fUse & DISUSE_REG_SEG) + && (pCpu->Param1.Base.idxSegReg == DISSELREG_SS)) + { + Log(("Force recompilation of next instruction for OP_MOV at %RRv\n", pCurInstrGC)); + pPatch->flags |= PATMFL_RECOMPILE_NEXT; + /** @todo this could cause a fault (ring 0 selector being loaded in ring 1) */ + } +#if 0 /* necessary for Haiku */ + else + if ( (pCpu->Param2.fUse & DISUSE_REG_SEG) + && (pCpu->Param2.Base.idxSegReg == USE_REG_SS) + && (pCpu->Param1.fUse & (DISUSE_REG_GEN32|DISUSE_REG_GEN16))) /** @todo memory operand must in theory be handled too */ + { + /* mov GPR, ss */ + rc = patmPatchGenMovFromSS(pVM, pPatch, pCpu, pCurInstrGC); + if (RT_SUCCESS(rc)) + rc = VWRN_CONTINUE_RECOMPILE; + break; + } +#endif + } + goto duplicate_instr; + + case OP_POP: + /** @todo broken comparison!! should be if ((pCpu->Param1.fUse & DISUSE_REG_SEG) && (pCpu->Param1.Base.idxSegReg == DISSELREG_SS)) */ + if (pCpu->pCurInstr->fParam1 == OP_PARM_REG_SS) + { + Assert(pCpu->pCurInstr->fOpType & DISOPTYPE_INHIBIT_IRQS); + + Log(("Force recompilation of next instruction for OP_MOV at %RRv\n", pCurInstrGC)); + pPatch->flags |= PATMFL_RECOMPILE_NEXT; + } + goto duplicate_instr; + + case OP_STI: + { + RTRCPTR pNextInstrGC = 0; /* by default no inhibit irq */ + + /* In a sequence of instructions that inhibit irqs, only the first one actually inhibits irqs. */ + if (!(pPatch->flags & PATMFL_INHIBIT_IRQS)) + { + pPatch->flags |= PATMFL_INHIBIT_IRQS | PATMFL_GENERATE_JUMPTOGUEST; + fInhibitIRQInstr = true; + pNextInstrGC = pCurInstrGC + pCpu->cbInstr; + Log(("Inhibit irqs for instruction OP_STI at %RRv\n", pCurInstrGC)); + } + rc = patmPatchGenSti(pVM, pPatch, pCurInstrGC, pNextInstrGC); + + if (RT_SUCCESS(rc)) + { + DISCPUSTATE cpu = *pCpu; + unsigned cbInstr; + int disret; + RCPTRTYPE(uint8_t *) pReturnInstrGC; + + pPatch->flags |= PATMFL_FOUND_PATCHEND; + + pNextInstrGC = pCurInstrGC + pCpu->cbInstr; + { /* Force pNextInstrHC out of scope after using it */ + uint8_t *pNextInstrHC = patmR3GCVirtToHCVirt(pVM, pCacheRec, pNextInstrGC); + if (pNextInstrHC == NULL) + { + AssertFailed(); + return VERR_PATCHING_REFUSED; + } + + // Disassemble the next instruction + disret = patmR3DisInstr(pVM, pPatch, pNextInstrGC, pNextInstrHC, PATMREAD_ORGCODE, &cpu, &cbInstr); + } + if (disret == false) + { + AssertMsgFailed(("STI: Disassembly failed (probably page not present) -> return to caller\n")); + return VERR_PATCHING_REFUSED; + } + pReturnInstrGC = pNextInstrGC + cbInstr; + + if ( (pPatch->flags & (PATMFL_DUPLICATE_FUNCTION)) + || pReturnInstrGC <= pInstrGC + || pReturnInstrGC - pInstrGC >= SIZEOF_NEARJUMP32 + ) + { + /* Not an exit point for function duplication patches */ + if ( (pPatch->flags & PATMFL_DUPLICATE_FUNCTION) + && RT_SUCCESS(rc)) + { + pPatch->flags &= ~PATMFL_GENERATE_JUMPTOGUEST; /* Don't generate a jump back */ + rc = VWRN_CONTINUE_RECOMPILE; + } + else + rc = VINF_SUCCESS; //exit point + } + else { + Log(("PATM: sti occurred too soon; refusing patch!\n")); + rc = VERR_PATCHING_REFUSED; //not allowed!! + } + } + break; + } + + case OP_POPF: + { + bool fGenerateJmpBack = (pCurInstrGC + pCpu->cbInstr - pInstrGC >= SIZEOF_NEARJUMP32); + + /* Not an exit point for IDT handler or function replacement patches */ + /* Note: keep IOPL in mind when changing any of this!! (see comments in PATMA.asm, PATMPopf32Replacement) */ + if (pPatch->flags & (PATMFL_IDTHANDLER|PATMFL_DUPLICATE_FUNCTION)) + fGenerateJmpBack = false; + + rc = patmPatchGenPopf(pVM, pPatch, pCurInstrGC + pCpu->cbInstr, !!(pCpu->fPrefix & DISPREFIX_OPSIZE), fGenerateJmpBack); + if (RT_SUCCESS(rc)) + { + if (fGenerateJmpBack == false) + { + /* Not an exit point for IDT handler or function replacement patches */ + rc = VWRN_CONTINUE_RECOMPILE; + } + else + { + pPatch->flags |= PATMFL_FOUND_PATCHEND; + rc = VINF_SUCCESS; /* exit point! */ + } + } + break; + } + + case OP_PUSHF: + rc = patmPatchGenPushf(pVM, pPatch, !!(pCpu->fPrefix & DISPREFIX_OPSIZE)); + if (RT_SUCCESS(rc)) + rc = VWRN_CONTINUE_RECOMPILE; + break; + + case OP_PUSH: + /** @todo broken comparison!! should be if ((pCpu->Param1.fUse & DISUSE_REG_SEG) && (pCpu->Param1.Base.idxSegReg == DISSELREG_SS)) */ + if (pCpu->pCurInstr->fParam1 == OP_PARM_REG_CS) + { + rc = patmPatchGenPushCS(pVM, pPatch); + if (RT_SUCCESS(rc)) + rc = VWRN_CONTINUE_RECOMPILE; + break; + } + goto duplicate_instr; + + case OP_IRET: + Log(("IRET at %RRv\n", pCurInstrGC)); + rc = patmPatchGenIret(pVM, pPatch, pCurInstrGC, !!(pCpu->fPrefix & DISPREFIX_OPSIZE)); + if (RT_SUCCESS(rc)) + { + pPatch->flags |= PATMFL_FOUND_PATCHEND; + rc = VINF_SUCCESS; /* exit point by definition */ + } + break; + + case OP_ILLUD2: + /* This appears to be some kind of kernel panic in Linux 2.4; no point to continue */ + rc = patmPatchGenIllegalInstr(pVM, pPatch); + if (RT_SUCCESS(rc)) + rc = VINF_SUCCESS; /* exit point by definition */ + Log(("Illegal opcode (0xf 0xb)\n")); + break; + + case OP_CPUID: + rc = patmPatchGenCpuid(pVM, pPatch, pCurInstrGC); + if (RT_SUCCESS(rc)) + rc = VWRN_CONTINUE_RECOMPILE; + break; + + case OP_STR: +#ifdef VBOX_WITH_SAFE_STR /** @todo remove DISOPTYPE_PRIVILEGED_NOTRAP from disasm table and move OP_STR into ifndef */ + /* Now safe because our shadow TR entry is identical to the guest's. */ + goto duplicate_instr; +#endif + case OP_SLDT: + rc = patmPatchGenSldtStr(pVM, pPatch, pCpu, pCurInstrGC); + if (RT_SUCCESS(rc)) + rc = VWRN_CONTINUE_RECOMPILE; + break; + + case OP_SGDT: + case OP_SIDT: + rc = patmPatchGenSxDT(pVM, pPatch, pCpu, pCurInstrGC); + if (RT_SUCCESS(rc)) + rc = VWRN_CONTINUE_RECOMPILE; + break; + + case OP_RETN: + /* retn is an exit point for function patches */ + rc = patmPatchGenRet(pVM, pPatch, pCpu, pCurInstrGC); + if (RT_SUCCESS(rc)) + rc = VINF_SUCCESS; /* exit point by definition */ + break; + + case OP_SYSEXIT: + /* Duplicate it, so it can be emulated in GC (or fault). */ + rc = patmPatchGenDuplicate(pVM, pPatch, pCpu, pCurInstrGC); + if (RT_SUCCESS(rc)) + rc = VINF_SUCCESS; /* exit point by definition */ + break; + + case OP_CALL: + Assert(pPatch->flags & PATMFL_SUPPORT_INDIRECT_CALLS); + /* In interrupt gate handlers it's possible to encounter jumps or calls when IF has been enabled again. + * In that case we'll jump to the original instruction and continue from there. Otherwise an int 3 is executed. + */ + Assert(pCpu->Param1.cb == 4 || pCpu->Param1.cb == 6); + if (pPatch->flags & PATMFL_SUPPORT_INDIRECT_CALLS && pCpu->Param1.cb == 4 /* no far calls! */) + { + rc = patmPatchGenCall(pVM, pPatch, pCpu, pCurInstrGC, (RTRCPTR)0xDEADBEEF, true); + if (RT_SUCCESS(rc)) + { + rc = VWRN_CONTINUE_RECOMPILE; + } + break; + } + goto gen_illegal_instr; + + case OP_JMP: + Assert(pPatch->flags & PATMFL_SUPPORT_INDIRECT_CALLS); + /* In interrupt gate handlers it's possible to encounter jumps or calls when IF has been enabled again. + * In that case we'll jump to the original instruction and continue from there. Otherwise an int 3 is executed. + */ + Assert(pCpu->Param1.cb == 4 || pCpu->Param1.cb == 6); + if (pPatch->flags & PATMFL_SUPPORT_INDIRECT_CALLS && pCpu->Param1.cb == 4 /* no far jumps! */) + { + rc = patmPatchGenJump(pVM, pPatch, pCpu, pCurInstrGC); + if (RT_SUCCESS(rc)) + rc = VINF_SUCCESS; /* end of branch */ + break; + } + goto gen_illegal_instr; + + case OP_INT3: + case OP_INT: + case OP_INTO: + goto gen_illegal_instr; + + case OP_MOV_DR: + /* Note: currently we let DRx writes cause a trap d; our trap handler will decide to interpret it or not. */ + if (pCpu->pCurInstr->fParam2 == OP_PARM_Dd) + { + rc = patmPatchGenMovDebug(pVM, pPatch, pCpu); + if (RT_SUCCESS(rc)) + rc = VWRN_CONTINUE_RECOMPILE; + break; + } + goto duplicate_instr; + + case OP_MOV_CR: + /* Note: currently we let CRx writes cause a trap d; our trap handler will decide to interpret it or not. */ + if (pCpu->pCurInstr->fParam2 == OP_PARM_Cd) + { + rc = patmPatchGenMovControl(pVM, pPatch, pCpu); + if (RT_SUCCESS(rc)) + rc = VWRN_CONTINUE_RECOMPILE; + break; + } + goto duplicate_instr; + + default: + if (pCpu->pCurInstr->fOpType & (DISOPTYPE_CONTROLFLOW | DISOPTYPE_PRIVILEGED_NOTRAP)) + { +gen_illegal_instr: + rc = patmPatchGenIllegalInstr(pVM, pPatch); + if (RT_SUCCESS(rc)) + rc = VINF_SUCCESS; /* exit point by definition */ + } + else + { +duplicate_instr: + Log(("patmPatchGenDuplicate\n")); + rc = patmPatchGenDuplicate(pVM, pPatch, pCpu, pCurInstrGC); + if (RT_SUCCESS(rc)) + rc = VWRN_CONTINUE_RECOMPILE; + } + break; + } + +end: + + if ( !fInhibitIRQInstr + && (pPatch->flags & PATMFL_INHIBIT_IRQS)) + { + int rc2; + RTRCPTR pNextInstrGC = pCurInstrGC + pCpu->cbInstr; + + pPatch->flags &= ~PATMFL_INHIBIT_IRQS; + Log(("Clear inhibit IRQ flag at %RRv\n", pCurInstrGC)); + if (pPatch->flags & PATMFL_GENERATE_JUMPTOGUEST) + { + Log(("patmRecompileCallback: generate jump back to guest (%RRv) after fused instruction\n", pNextInstrGC)); + + rc2 = patmPatchGenJumpToGuest(pVM, pPatch, pNextInstrGC, true /* clear inhibit irq flag */); + pPatch->flags &= ~PATMFL_GENERATE_JUMPTOGUEST; + rc = VINF_SUCCESS; /* end of the line */ + } + else + { + rc2 = patmPatchGenClearInhibitIRQ(pVM, pPatch, pNextInstrGC); + } + if (RT_FAILURE(rc2)) + rc = rc2; + } + + if (RT_SUCCESS(rc)) + { + // If single instruction patch, we've copied enough instructions *and* the current instruction is not a relative jump + if ( (pPatch->flags & PATMFL_CHECK_SIZE) + && pCurInstrGC + pCpu->cbInstr - pInstrGC >= SIZEOF_NEARJUMP32 + && !(pCpu->pCurInstr->fOpType & DISOPTYPE_RELATIVE_CONTROLFLOW) + && !(pPatch->flags & PATMFL_RECOMPILE_NEXT) /* do not do this when the next instruction *must* be executed! */ + ) + { + RTRCPTR pNextInstrGC = pCurInstrGC + pCpu->cbInstr; + + // The end marker for this kind of patch is any instruction at a location outside our patch jump + Log(("patmRecompileCallback: end found for single instruction patch at %RRv cbInstr %d\n", pNextInstrGC, pCpu->cbInstr)); + + rc = patmPatchGenJumpToGuest(pVM, pPatch, pNextInstrGC); + AssertRC(rc); + } + } + return rc; +} + + +#ifdef LOG_ENABLED + +/** + * Add a disasm jump record (temporary for prevent duplicate analysis) + * + * @param pVM The cross context VM structure. + * @param pPatch Patch structure ptr + * @param pInstrGC Guest context pointer to privileged instruction + * + */ +static void patmPatchAddDisasmJump(PVM pVM, PPATCHINFO pPatch, RTRCPTR pInstrGC) +{ + PAVLPVNODECORE pRec; + + pRec = (PAVLPVNODECORE)MMR3HeapAllocZ(pVM, MM_TAG_PATM_PATCH, sizeof(*pRec)); + Assert(pRec); + pRec->Key = (AVLPVKEY)(uintptr_t)pInstrGC; + + int ret = RTAvlPVInsert(&pPatch->pTempInfo->DisasmJumpTree, pRec); + Assert(ret); +} + +/** + * Checks if jump target has been analysed before. + * + * @returns VBox status code. + * @param pPatch Patch struct + * @param pInstrGC Jump target + * + */ +static bool patmIsKnownDisasmJump(PPATCHINFO pPatch, RTRCPTR pInstrGC) +{ + PAVLPVNODECORE pRec; + + pRec = RTAvlPVGet(&pPatch->pTempInfo->DisasmJumpTree, (AVLPVKEY)(uintptr_t)pInstrGC); + if (pRec) + return true; + return false; +} + +/** + * For proper disassembly of the final patch block + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pCpu CPU disassembly state + * @param pInstrGC Guest context pointer to privileged instruction + * @param pCurInstrGC Guest context pointer to the current instruction + * @param pCacheRec Cache record ptr + * + */ +DECLCALLBACK(int) patmR3DisasmCallback(PVM pVM, DISCPUSTATE *pCpu, RCPTRTYPE(uint8_t *) pInstrGC, + RCPTRTYPE(uint8_t *) pCurInstrGC, PPATMP2GLOOKUPREC pCacheRec) +{ + PPATCHINFO pPatch = (PPATCHINFO)pCacheRec->pPatch; + NOREF(pInstrGC); + + if (pCpu->pCurInstr->uOpcode == OP_INT3) + { + /* Could be an int3 inserted in a call patch. Check to be sure */ + DISCPUSTATE cpu; + RTRCPTR pOrgJumpGC; + + pOrgJumpGC = patmPatchGCPtr2GuestGCPtr(pVM, pPatch, pCurInstrGC); + + { /* Force pOrgJumpHC out of scope after using it */ + uint8_t *pOrgJumpHC = patmR3GCVirtToHCVirt(pVM, pCacheRec, pOrgJumpGC); + + bool disret = patmR3DisInstr(pVM, pPatch, pOrgJumpGC, pOrgJumpHC, PATMREAD_ORGCODE, &cpu, NULL); + if (!disret || cpu.pCurInstr->uOpcode != OP_CALL || cpu.Param1.cb != 4 /* only near calls */) + return VINF_SUCCESS; + } + return VWRN_CONTINUE_ANALYSIS; + } + + if ( pCpu->pCurInstr->uOpcode == OP_ILLUD2 + && PATMIsPatchGCAddr(pVM, pCurInstrGC)) + { + /* the indirect call patch contains an 0xF/0xB illegal instr to call for assistance; check for this and continue */ + return VWRN_CONTINUE_ANALYSIS; + } + + if ( (pCpu->pCurInstr->uOpcode == OP_CALL && !(pPatch->flags & PATMFL_SUPPORT_CALLS)) + || pCpu->pCurInstr->uOpcode == OP_INT + || pCpu->pCurInstr->uOpcode == OP_IRET + || pCpu->pCurInstr->uOpcode == OP_RETN + || pCpu->pCurInstr->uOpcode == OP_RETF + ) + { + return VINF_SUCCESS; + } + + if (pCpu->pCurInstr->uOpcode == OP_ILLUD2) + return VINF_SUCCESS; + + return VWRN_CONTINUE_ANALYSIS; +} + + +/** + * Disassembles the code stream until the callback function detects a failure or decides everything is acceptable + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pInstrGC Guest context pointer to the initial privileged instruction + * @param pCurInstrGC Guest context pointer to the current instruction + * @param pfnPATMR3Disasm Callback for testing the disassembled instruction + * @param pCacheRec Cache record ptr + * + */ +int patmr3DisasmCode(PVM pVM, RCPTRTYPE(uint8_t *) pInstrGC, RCPTRTYPE(uint8_t *) pCurInstrGC, PFN_PATMR3ANALYSE pfnPATMR3Disasm, PPATMP2GLOOKUPREC pCacheRec) +{ + DISCPUSTATE cpu; + PPATCHINFO pPatch = (PPATCHINFO)pCacheRec->pPatch; + int rc = VWRN_CONTINUE_ANALYSIS; + uint32_t cbInstr, delta; + R3PTRTYPE(uint8_t *) pCurInstrHC = 0; + bool disret; + char szOutput[256]; + + Assert(pCurInstrHC != PATCHCODE_PTR_HC(pPatch) || pPatch->pTempInfo->DisasmJumpTree == 0); + + /* We need this to determine branch targets (and for disassembling). */ + delta = pVM->patm.s.pPatchMemGC - (uintptr_t)pVM->patm.s.pPatchMemHC; + + while (rc == VWRN_CONTINUE_ANALYSIS) + { + pCurInstrHC = patmR3GCVirtToHCVirt(pVM, pCacheRec, pCurInstrGC); + if (pCurInstrHC == NULL) + { + rc = VERR_PATCHING_REFUSED; + goto end; + } + + disret = patmR3DisInstrToStr(pVM, pPatch, pCurInstrGC, pCurInstrHC, PATMREAD_RAWCODE, + &cpu, &cbInstr, szOutput, sizeof(szOutput)); + if (PATMIsPatchGCAddr(pVM, pCurInstrGC)) + { + RTRCPTR pOrgInstrGC = patmPatchGCPtr2GuestGCPtr(pVM, pPatch, pCurInstrGC); + + if (pOrgInstrGC != pPatch->pTempInfo->pLastDisasmInstrGC) + Log(("DIS %RRv<-%s", pOrgInstrGC, szOutput)); + else + Log(("DIS %s", szOutput)); + + pPatch->pTempInfo->pLastDisasmInstrGC = pOrgInstrGC; + if (patmIsIllegalInstr(pPatch, pOrgInstrGC)) + { + rc = VINF_SUCCESS; + goto end; + } + } + else + Log(("DIS: %s", szOutput)); + + if (disret == false) + { + Log(("Disassembly failed (probably page not present) -> return to caller\n")); + rc = VINF_SUCCESS; + goto end; + } + + rc = pfnPATMR3Disasm(pVM, &cpu, pInstrGC, pCurInstrGC, pCacheRec); + if (rc != VWRN_CONTINUE_ANALYSIS) { + break; //done! + } + + /* For our first attempt, we'll handle only simple relative jumps and calls (immediate offset coded in instruction) */ + if ( (cpu.pCurInstr->fOpType & DISOPTYPE_CONTROLFLOW) + && (OP_PARM_VTYPE(cpu.pCurInstr->fParam1) == OP_PARM_J) + && cpu.pCurInstr->uOpcode != OP_CALL /* complete functions are replaced; don't bother here. */ + ) + { + RTRCPTR pTargetGC = PATMResolveBranch(&cpu, pCurInstrGC); + RTRCPTR pOrgTargetGC; + + if (pTargetGC == 0) + { + Log(("We don't support far jumps here!! (%08X)\n", cpu.Param1.fUse)); + rc = VERR_PATCHING_REFUSED; + break; + } + + if (!PATMIsPatchGCAddr(pVM, pTargetGC)) + { + //jump back to guest code + rc = VINF_SUCCESS; + goto end; + } + pOrgTargetGC = PATMR3PatchToGCPtr(pVM, pTargetGC, 0); + + if (patmIsCommonIDTHandlerPatch(pVM, pOrgTargetGC)) + { + rc = VINF_SUCCESS; + goto end; + } + + if (patmIsKnownDisasmJump(pPatch, pTargetGC) == false) + { + /* New jump, let's check it. */ + patmPatchAddDisasmJump(pVM, pPatch, pTargetGC); + + if (cpu.pCurInstr->uOpcode == OP_CALL) pPatch->pTempInfo->nrCalls++; + rc = patmr3DisasmCode(pVM, pInstrGC, pTargetGC, pfnPATMR3Disasm, pCacheRec); + if (cpu.pCurInstr->uOpcode == OP_CALL) pPatch->pTempInfo->nrCalls--; + + if (rc != VINF_SUCCESS) { + break; //done! + } + } + if (cpu.pCurInstr->uOpcode == OP_JMP) + { + /* Unconditional jump; return to caller. */ + rc = VINF_SUCCESS; + goto end; + } + + rc = VWRN_CONTINUE_ANALYSIS; + } + pCurInstrGC += cbInstr; + } +end: + return rc; +} + +/** + * Disassembles the code stream until the callback function detects a failure or decides everything is acceptable + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pInstrGC Guest context pointer to the initial privileged instruction + * @param pCurInstrGC Guest context pointer to the current instruction + * @param pfnPATMR3Disasm Callback for testing the disassembled instruction + * @param pCacheRec Cache record ptr + * + */ +int patmr3DisasmCodeStream(PVM pVM, RCPTRTYPE(uint8_t *) pInstrGC, RCPTRTYPE(uint8_t *) pCurInstrGC, PFN_PATMR3ANALYSE pfnPATMR3Disasm, PPATMP2GLOOKUPREC pCacheRec) +{ + PPATCHINFO pPatch = (PPATCHINFO)pCacheRec->pPatch; + + int rc = patmr3DisasmCode(pVM, pInstrGC, pCurInstrGC, pfnPATMR3Disasm, pCacheRec); + /* Free all disasm jump records. */ + patmEmptyTree(pVM, &pPatch->pTempInfo->DisasmJumpTree); + return rc; +} + +#endif /* LOG_ENABLED */ + +/** + * Detects it the specified address falls within a 5 byte jump generated for an active patch. + * If so, this patch is permanently disabled. + * + * @param pVM The cross context VM structure. + * @param pInstrGC Guest context pointer to instruction + * @param pConflictGC Guest context pointer to check + * + * @note also checks for patch hints to make sure they can never be enabled if a conflict is present. + * + */ +VMMR3_INT_DECL(int) PATMR3DetectConflict(PVM pVM, RTRCPTR pInstrGC, RTRCPTR pConflictGC) +{ + AssertReturn(VM_IS_RAW_MODE_ENABLED(pVM), VERR_PATCH_NO_CONFLICT); + PPATCHINFO pTargetPatch = patmFindActivePatchByEntrypoint(pVM, pConflictGC, true /* include patch hints */); + if (pTargetPatch) + return patmDisableUnusablePatch(pVM, pInstrGC, pConflictGC, pTargetPatch); + return VERR_PATCH_NO_CONFLICT; +} + +/** + * Recompile the code stream until the callback function detects a failure or decides everything is acceptable + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pInstrGC Guest context pointer to privileged instruction + * @param pCurInstrGC Guest context pointer to the current instruction + * @param pfnPATMR3Recompile Callback for testing the disassembled instruction + * @param pCacheRec Cache record ptr + * + */ +static int patmRecompileCodeStream(PVM pVM, RCPTRTYPE(uint8_t *) pInstrGC, RCPTRTYPE(uint8_t *) pCurInstrGC, PFN_PATMR3ANALYSE pfnPATMR3Recompile, PPATMP2GLOOKUPREC pCacheRec) +{ + DISCPUSTATE cpu; + PPATCHINFO pPatch = (PPATCHINFO)pCacheRec->pPatch; + int rc = VWRN_CONTINUE_ANALYSIS; + uint32_t cbInstr; + R3PTRTYPE(uint8_t *) pCurInstrHC = 0; + bool disret; +#ifdef LOG_ENABLED + char szOutput[256]; +#endif + + while (rc == VWRN_CONTINUE_RECOMPILE) + { + pCurInstrHC = patmR3GCVirtToHCVirt(pVM, pCacheRec, pCurInstrGC); + if (pCurInstrHC == NULL) + { + rc = VERR_PATCHING_REFUSED; /* fatal in this case */ + goto end; + } +#ifdef LOG_ENABLED + disret = patmR3DisInstrToStr(pVM, pPatch, pCurInstrGC, pCurInstrHC, PATMREAD_ORGCODE, + &cpu, &cbInstr, szOutput, sizeof(szOutput)); + Log(("Recompile: %s", szOutput)); +#else + disret = patmR3DisInstr(pVM, pPatch, pCurInstrGC, pCurInstrHC, PATMREAD_ORGCODE, &cpu, &cbInstr); +#endif + if (disret == false) + { + Log(("Disassembly failed (probably page not present) -> return to caller\n")); + + /* Add lookup record for patch to guest address translation */ + patmR3AddP2GLookupRecord(pVM, pPatch, PATCHCODE_PTR_HC(pPatch) + pPatch->uCurPatchOffset, pCurInstrGC, PATM_LOOKUP_BOTHDIR); + patmPatchGenIllegalInstr(pVM, pPatch); + rc = VINF_SUCCESS; /* Note: don't fail here; we might refuse an important patch!! */ + goto end; + } + + rc = pfnPATMR3Recompile(pVM, &cpu, pInstrGC, pCurInstrGC, pCacheRec); + if (rc != VWRN_CONTINUE_RECOMPILE) + { + /* If irqs are inhibited because of the current instruction, then we must make sure the next one is executed! */ + if ( rc == VINF_SUCCESS + && (pPatch->flags & PATMFL_INHIBIT_IRQS)) + { + DISCPUSTATE cpunext; + uint32_t opsizenext; + uint8_t *pNextInstrHC; + RTRCPTR pNextInstrGC = pCurInstrGC + cbInstr; + + Log(("patmRecompileCodeStream: irqs inhibited by instruction %RRv\n", pNextInstrGC)); + + /* Certain instructions (e.g. sti) force the next instruction to be executed before any interrupts can occur. + * Recompile the next instruction as well + */ + pNextInstrHC = patmR3GCVirtToHCVirt(pVM, pCacheRec, pNextInstrGC); + if (pNextInstrHC == NULL) + { + rc = VERR_PATCHING_REFUSED; /* fatal in this case */ + goto end; + } + disret = patmR3DisInstr(pVM, pPatch, pNextInstrGC, pNextInstrHC, PATMREAD_ORGCODE, &cpunext, &opsizenext); + if (disret == false) + { + rc = VERR_PATCHING_REFUSED; /* fatal in this case */ + goto end; + } + switch(cpunext.pCurInstr->uOpcode) + { + case OP_IRET: /* inhibit cleared in generated code */ + case OP_SYSEXIT: /* faults; inhibit should be cleared in HC handling */ + case OP_HLT: + break; /* recompile these */ + + default: + if (cpunext.pCurInstr->fOpType & DISOPTYPE_CONTROLFLOW) + { + Log(("Unexpected control flow instruction after inhibit irq instruction\n")); + + rc = patmPatchGenJumpToGuest(pVM, pPatch, pNextInstrGC, true /* clear inhibit irq flag */); + AssertRC(rc); + pPatch->flags &= ~PATMFL_INHIBIT_IRQS; + goto end; /** @todo should be ok to ignore instruction fusing in this case */ + } + break; + } + + /* Note: after a cli we must continue to a proper exit point */ + if (cpunext.pCurInstr->uOpcode != OP_CLI) + { + rc = pfnPATMR3Recompile(pVM, &cpunext, pInstrGC, pNextInstrGC, pCacheRec); + if (RT_SUCCESS(rc)) + { + rc = VINF_SUCCESS; + goto end; + } + break; + } + else + rc = VWRN_CONTINUE_RECOMPILE; + } + else + break; /* done! */ + } + + /** @todo continue with the instructions following the jump and then recompile the jump target code */ + + + /* For our first attempt, we'll handle only simple relative jumps and calls (immediate offset coded in instruction). */ + if ( (cpu.pCurInstr->fOpType & DISOPTYPE_CONTROLFLOW) + && (OP_PARM_VTYPE(cpu.pCurInstr->fParam1) == OP_PARM_J) + && cpu.pCurInstr->uOpcode != OP_CALL /* complete functions are replaced; don't bother here. */ + ) + { + RCPTRTYPE(uint8_t *) addr = PATMResolveBranch(&cpu, pCurInstrGC); + if (addr == 0) + { + Log(("We don't support far jumps here!! (%08X)\n", cpu.Param1.fUse)); + rc = VERR_PATCHING_REFUSED; + break; + } + + Log(("Jump encountered target %RRv\n", addr)); + + /* We don't check if the branch target lies in a valid page as we've already done that in the analysis phase. */ + if (!(cpu.pCurInstr->fOpType & DISOPTYPE_UNCOND_CONTROLFLOW)) + { + Log(("patmRecompileCodeStream continue passed conditional jump\n")); + /* First we need to finish this linear code stream until the next exit point. */ + rc = patmRecompileCodeStream(pVM, pInstrGC, pCurInstrGC+cbInstr, pfnPATMR3Recompile, pCacheRec); + if (RT_FAILURE(rc)) + { + Log(("patmRecompileCodeStream fatal error %d\n", rc)); + break; //fatal error + } + } + + if (patmGuestGCPtrToPatchGCPtr(pVM, pPatch, addr) == 0) + { + /* New code; let's recompile it. */ + Log(("patmRecompileCodeStream continue with jump\n")); + + /* + * If we are jumping to an existing patch (or within 5 bytes of the entrypoint), then we must temporarily disable + * this patch so we can continue our analysis + * + * We rely on CSAM to detect and resolve conflicts + */ + PPATCHINFO pTargetPatch = patmFindActivePatchByEntrypoint(pVM, addr); + if(pTargetPatch) + { + Log(("Found active patch at target %RRv (%RRv) -> temporarily disabling it!!\n", addr, pTargetPatch->pPrivInstrGC)); + PATMR3DisablePatch(pVM, pTargetPatch->pPrivInstrGC); + } + + if (cpu.pCurInstr->uOpcode == OP_CALL) pPatch->pTempInfo->nrCalls++; + rc = patmRecompileCodeStream(pVM, pInstrGC, addr, pfnPATMR3Recompile, pCacheRec); + if (cpu.pCurInstr->uOpcode == OP_CALL) pPatch->pTempInfo->nrCalls--; + + if(pTargetPatch) + PATMR3EnablePatch(pVM, pTargetPatch->pPrivInstrGC); + + if (RT_FAILURE(rc)) + { + Log(("patmRecompileCodeStream fatal error %d\n", rc)); + break; //done! + } + } + /* Always return to caller here; we're done! */ + rc = VINF_SUCCESS; + goto end; + } + else + if (cpu.pCurInstr->fOpType & DISOPTYPE_UNCOND_CONTROLFLOW) + { + rc = VINF_SUCCESS; + goto end; + } + pCurInstrGC += cbInstr; + } +end: + Assert(!(pPatch->flags & PATMFL_RECOMPILE_NEXT)); + return rc; +} + + +/** + * Generate the jump from guest to patch code + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch record + * @param pCacheRec Guest translation lookup cache record + * @param fAddFixup Whether to add a fixup record. + */ +static int patmGenJumpToPatch(PVM pVM, PPATCHINFO pPatch, PPATMP2GLOOKUPREC pCacheRec, bool fAddFixup = true) +{ + uint8_t temp[8]; + uint8_t *pPB; + int rc; + + Assert(pPatch->cbPatchJump <= sizeof(temp)); + Assert(!(pPatch->flags & PATMFL_PATCHED_GUEST_CODE)); + + pPB = patmR3GCVirtToHCVirt(pVM, pCacheRec, pPatch->pPrivInstrGC); + Assert(pPB); + +#ifdef PATM_RESOLVE_CONFLICTS_WITH_JUMP_PATCHES + if (pPatch->flags & PATMFL_JUMP_CONFLICT) + { + Assert(pPatch->pPatchJumpDestGC); + + if (pPatch->cbPatchJump == SIZEOF_NEARJUMP32) + { + // jmp [PatchCode] + if (fAddFixup) + { + if (patmPatchAddReloc32(pVM, pPatch, &pPB[1], FIXUP_REL_JMPTOPATCH, pPatch->pPrivInstrGC + pPatch->cbPatchJump, + pPatch->pPatchJumpDestGC) != VINF_SUCCESS) + { + Log(("Relocation failed for the jump in the guest code!!\n")); + return VERR_PATCHING_REFUSED; + } + } + + temp[0] = pPatch->aPrivInstr[0]; //jump opcode copied from original instruction + *(uint32_t *)&temp[1] = (uint32_t)pPatch->pPatchJumpDestGC - ((uint32_t)pPatch->pPrivInstrGC + pPatch->cbPatchJump); //return address + } + else + if (pPatch->cbPatchJump == SIZEOF_NEAR_COND_JUMP32) + { + // jmp [PatchCode] + if (fAddFixup) + { + if (patmPatchAddReloc32(pVM, pPatch, &pPB[2], FIXUP_REL_JMPTOPATCH, pPatch->pPrivInstrGC + pPatch->cbPatchJump, + pPatch->pPatchJumpDestGC) != VINF_SUCCESS) + { + Log(("Relocation failed for the jump in the guest code!!\n")); + return VERR_PATCHING_REFUSED; + } + } + + temp[0] = pPatch->aPrivInstr[0]; //jump opcode copied from original instruction + temp[1] = pPatch->aPrivInstr[1]; //jump opcode copied from original instruction + *(uint32_t *)&temp[2] = (uint32_t)pPatch->pPatchJumpDestGC - ((uint32_t)pPatch->pPrivInstrGC + pPatch->cbPatchJump); //return address + } + else + { + Assert(0); + return VERR_PATCHING_REFUSED; + } + } + else +#endif + { + Assert(pPatch->cbPatchJump == SIZEOF_NEARJUMP32); + + // jmp [PatchCode] + if (fAddFixup) + { + if (patmPatchAddReloc32(pVM, pPatch, &pPB[1], FIXUP_REL_JMPTOPATCH, pPatch->pPrivInstrGC + SIZEOF_NEARJUMP32, + PATCHCODE_PTR_GC(pPatch)) != VINF_SUCCESS) + { + Log(("Relocation failed for the jump in the guest code!!\n")); + return VERR_PATCHING_REFUSED; + } + } + temp[0] = 0xE9; //jmp + *(uint32_t *)&temp[1] = (RTRCUINTPTR)PATCHCODE_PTR_GC(pPatch) - ((RTRCUINTPTR)pPatch->pPrivInstrGC + SIZEOF_NEARJUMP32); //return address + } + rc = PGMPhysSimpleDirtyWriteGCPtr(VMMGetCpu0(pVM), pPatch->pPrivInstrGC, temp, pPatch->cbPatchJump); + AssertRC(rc); + + if (rc == VINF_SUCCESS) + pPatch->flags |= PATMFL_PATCHED_GUEST_CODE; + + return rc; +} + +/** + * Remove the jump from guest to patch code + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch record + */ +static int patmRemoveJumpToPatch(PVM pVM, PPATCHINFO pPatch) +{ +#ifdef DEBUG + DISCPUSTATE cpu; + char szOutput[256]; + uint32_t cbInstr, i = 0; + bool disret; + + while (i < pPatch->cbPrivInstr) + { + disret = patmR3DisInstrToStr(pVM, pPatch, pPatch->pPrivInstrGC + i, NULL, PATMREAD_ORGCODE, + &cpu, &cbInstr, szOutput, sizeof(szOutput)); + if (disret == false) + break; + + Log(("Org patch jump: %s", szOutput)); + Assert(cbInstr); + i += cbInstr; + } +#endif + + /* Restore original code (privileged instruction + following instructions that were overwritten because of the 5/6 byte jmp). */ + int rc = PGMPhysSimpleDirtyWriteGCPtr(VMMGetCpu0(pVM), pPatch->pPrivInstrGC, pPatch->aPrivInstr, pPatch->cbPatchJump); +#ifdef DEBUG + if (rc == VINF_SUCCESS) + { + i = 0; + while (i < pPatch->cbPrivInstr) + { + disret = patmR3DisInstrToStr(pVM, pPatch, pPatch->pPrivInstrGC + i, NULL, PATMREAD_ORGCODE, + &cpu, &cbInstr, szOutput, sizeof(szOutput)); + if (disret == false) + break; + + Log(("Org instr: %s", szOutput)); + Assert(cbInstr); + i += cbInstr; + } + } +#endif + pPatch->flags &= ~PATMFL_PATCHED_GUEST_CODE; + return rc; +} + +/** + * Generate the call from guest to patch code + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch record + * @param pTargetGC The target of the fixup (i.e. the patch code we're + * calling into). + * @param pCacheRec Guest translation cache record + * @param fAddFixup Whether to add a fixup record. + */ +static int patmGenCallToPatch(PVM pVM, PPATCHINFO pPatch, RTRCPTR pTargetGC, PPATMP2GLOOKUPREC pCacheRec, bool fAddFixup = true) +{ + uint8_t temp[8]; + uint8_t *pPB; + int rc; + + Assert(pPatch->cbPatchJump <= sizeof(temp)); + + pPB = patmR3GCVirtToHCVirt(pVM, pCacheRec, pPatch->pPrivInstrGC); + Assert(pPB); + + Assert(pPatch->cbPatchJump == SIZEOF_NEARJUMP32); + + // jmp [PatchCode] + if (fAddFixup) + { + if (patmPatchAddReloc32(pVM, pPatch, &pPB[1], FIXUP_REL_JMPTOPATCH, + pPatch->pPrivInstrGC + SIZEOF_NEARJUMP32, pTargetGC) != VINF_SUCCESS) + { + Log(("Relocation failed for the jump in the guest code!!\n")); + return VERR_PATCHING_REFUSED; + } + } + + Assert(pPatch->aPrivInstr[0] == 0xE8 || pPatch->aPrivInstr[0] == 0xE9); /* call or jmp */ + temp[0] = pPatch->aPrivInstr[0]; + *(uint32_t *)&temp[1] = (uint32_t)pTargetGC - ((uint32_t)pPatch->pPrivInstrGC + SIZEOF_NEARJUMP32); //return address + + rc = PGMPhysSimpleDirtyWriteGCPtr(VMMGetCpu0(pVM), pPatch->pPrivInstrGC, temp, pPatch->cbPatchJump); + AssertRC(rc); + + return rc; +} + + +/** + * Patch cli/sti pushf/popf instruction block at specified location + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pInstrGC Guest context point to privileged instruction + * @param pInstrHC Host context point to privileged instruction + * @param uOpcode Instruction opcode + * @param uOpSize Size of starting instruction + * @param pPatchRec Patch record + * + * @note returns failure if patching is not allowed or possible + * + */ +static int patmR3PatchBlock(PVM pVM, RTRCPTR pInstrGC, R3PTRTYPE(uint8_t *) pInstrHC, + uint32_t uOpcode, uint32_t uOpSize, PPATMPATCHREC pPatchRec) +{ + PPATCHINFO pPatch = &pPatchRec->patch; + int rc = VERR_PATCHING_REFUSED; + uint32_t orgOffsetPatchMem = UINT32_MAX; + RTRCPTR pInstrStart; + bool fInserted; + NOREF(pInstrHC); NOREF(uOpSize); + + /* Save original offset (in case of failures later on) */ + /** @todo use the hypervisor heap (that has quite a few consequences for save/restore though) */ + orgOffsetPatchMem = pVM->patm.s.offPatchMem; + + Assert(!(pPatch->flags & (PATMFL_GUEST_SPECIFIC|PATMFL_USER_MODE|PATMFL_TRAPHANDLER))); + switch (uOpcode) + { + case OP_MOV: + break; + + case OP_CLI: + case OP_PUSHF: + /* We can 'call' a cli or pushf patch. It will either return to the original guest code when IF is set again, or fault. */ + /* Note: special precautions are taken when disabling and enabling such patches. */ + pPatch->flags |= PATMFL_CALLABLE_AS_FUNCTION; + break; + + default: + if (!(pPatch->flags & PATMFL_IDTHANDLER)) + { + AssertMsg(0, ("patmR3PatchBlock: Invalid opcode %x\n", uOpcode)); + return VERR_INVALID_PARAMETER; + } + } + + if (!(pPatch->flags & (PATMFL_IDTHANDLER|PATMFL_IDTHANDLER_WITHOUT_ENTRYPOINT|PATMFL_SYSENTER|PATMFL_INT3_REPLACEMENT_BLOCK))) + pPatch->flags |= PATMFL_MUST_INSTALL_PATCHJMP; + + /* If we're going to insert a patch jump, then the jump itself is not allowed to cross a page boundary. */ + if ( (pPatch->flags & PATMFL_MUST_INSTALL_PATCHJMP) + && PAGE_ADDRESS(pInstrGC) != PAGE_ADDRESS(pInstrGC + SIZEOF_NEARJUMP32) + ) + { + STAM_COUNTER_INC(&pVM->patm.s.StatPageBoundaryCrossed); + Log(("Patch jump would cross page boundary -> refuse!!\n")); + rc = VERR_PATCHING_REFUSED; + goto failure; + } + + pPatch->nrPatch2GuestRecs = 0; + pInstrStart = pInstrGC; + +#ifdef PATM_ENABLE_CALL + pPatch->flags |= PATMFL_SUPPORT_CALLS | PATMFL_SUPPORT_INDIRECT_CALLS; +#endif + + pPatch->pPatchBlockOffset = pVM->patm.s.offPatchMem; + pPatch->uCurPatchOffset = 0; + + if ((pPatch->flags & (PATMFL_IDTHANDLER|PATMFL_IDTHANDLER_WITHOUT_ENTRYPOINT|PATMFL_SYSENTER)) == PATMFL_IDTHANDLER) + { + Assert(pPatch->flags & PATMFL_INTHANDLER); + + /* Install fake cli patch (to clear the virtual IF and check int xx parameters) */ + rc = patmPatchGenIntEntry(pVM, pPatch, pInstrGC); + if (RT_FAILURE(rc)) + goto failure; + } + + /***************************************************************************************************************************/ + /* Note: We can't insert *any* code before a sysenter handler; some linux guests have an invalid stack at this point!!!!! */ + /***************************************************************************************************************************/ +#ifdef VBOX_WITH_STATISTICS + if (!(pPatch->flags & PATMFL_SYSENTER)) + { + rc = patmPatchGenStats(pVM, pPatch, pInstrGC); + if (RT_FAILURE(rc)) + goto failure; + } +#endif + + PATMP2GLOOKUPREC cacheRec; + RT_ZERO(cacheRec); + cacheRec.pPatch = pPatch; + + rc = patmRecompileCodeStream(pVM, pInstrGC, pInstrGC, patmRecompileCallback, &cacheRec); + /* Free leftover lock if any. */ + if (cacheRec.Lock.pvMap) + { + PGMPhysReleasePageMappingLock(pVM, &cacheRec.Lock); + cacheRec.Lock.pvMap = NULL; + } + if (rc != VINF_SUCCESS) + { + Log(("PATMR3PatchCli: patmRecompileCodeStream failed with %d\n", rc)); + goto failure; + } + + /* Calculated during analysis. */ + if (pPatch->cbPatchBlockSize < SIZEOF_NEARJUMP32) + { + /* Most likely cause: we encountered an illegal instruction very early on. */ + /** @todo could turn it into an int3 callable patch. */ + Log(("patmR3PatchBlock: patch block too small -> refuse\n")); + rc = VERR_PATCHING_REFUSED; + goto failure; + } + + /* size of patch block */ + pPatch->cbPatchBlockSize = pPatch->uCurPatchOffset; + + + /* Update free pointer in patch memory. */ + pVM->patm.s.offPatchMem += pPatch->cbPatchBlockSize; + /* Round to next 8 byte boundary. */ + pVM->patm.s.offPatchMem = RT_ALIGN_32(pVM->patm.s.offPatchMem, 8); + + /* + * Insert into patch to guest lookup tree + */ + LogFlow(("Insert %RRv patch offset %RRv\n", pPatchRec->patch.pPrivInstrGC, pPatch->pPatchBlockOffset)); + pPatchRec->CoreOffset.Key = pPatch->pPatchBlockOffset; + fInserted = RTAvloU32Insert(&pVM->patm.s.PatchLookupTreeHC->PatchTreeByPatchAddr, &pPatchRec->CoreOffset); + AssertMsg(fInserted, ("RTAvlULInsert failed for %x\n", pPatchRec->CoreOffset.Key)); + if (!fInserted) + { + rc = VERR_PATCHING_REFUSED; + goto failure; + } + + /* Note that patmr3SetBranchTargets can install additional patches!! */ + rc = patmr3SetBranchTargets(pVM, pPatch); + if (rc != VINF_SUCCESS) + { + Log(("PATMR3PatchCli: patmr3SetBranchTargets failed with %d\n", rc)); + goto failure; + } + +#ifdef LOG_ENABLED + Log(("Patch code ----------------------------------------------------------\n")); + patmr3DisasmCodeStream(pVM, PATCHCODE_PTR_GC(pPatch), PATCHCODE_PTR_GC(pPatch), patmR3DisasmCallback, &cacheRec); + /* Free leftover lock if any. */ + if (cacheRec.Lock.pvMap) + { + PGMPhysReleasePageMappingLock(pVM, &cacheRec.Lock); + cacheRec.Lock.pvMap = NULL; + } + Log(("Patch code ends -----------------------------------------------------\n")); +#endif + + /* make a copy of the guest code bytes that will be overwritten */ + pPatch->cbPatchJump = SIZEOF_NEARJUMP32; + + rc = PGMPhysSimpleReadGCPtr(VMMGetCpu0(pVM), pPatch->aPrivInstr, pPatch->pPrivInstrGC, pPatch->cbPatchJump); + AssertRC(rc); + + if (pPatch->flags & PATMFL_INT3_REPLACEMENT_BLOCK) + { + /*uint8_t bASMInt3 = 0xCC; - unused */ + + Log(("patmR3PatchBlock %RRv -> int 3 callable patch.\n", pPatch->pPrivInstrGC)); + /* Replace first opcode byte with 'int 3'. */ + rc = patmActivateInt3Patch(pVM, pPatch); + if (RT_FAILURE(rc)) + goto failure; + + /* normal patch can be turned into an int3 patch -> clear patch jump installation flag. */ + pPatch->flags &= ~PATMFL_MUST_INSTALL_PATCHJMP; + + pPatch->flags &= ~PATMFL_INSTR_HINT; + STAM_COUNTER_INC(&pVM->patm.s.StatInt3Callable); + } + else + if (pPatch->flags & PATMFL_MUST_INSTALL_PATCHJMP) + { + Assert(!(pPatch->flags & (PATMFL_IDTHANDLER|PATMFL_IDTHANDLER_WITHOUT_ENTRYPOINT|PATMFL_SYSENTER|PATMFL_INT3_REPLACEMENT_BLOCK))); + /* now insert a jump in the guest code */ + rc = patmGenJumpToPatch(pVM, pPatch, &cacheRec, true); + AssertRC(rc); + if (RT_FAILURE(rc)) + goto failure; + + } + + patmR3DbgAddPatch(pVM, pPatchRec); + + PATM_LOG_RAW_PATCH_INSTR(pVM, pPatch, patmGetInstructionString(pPatch->opcode, pPatch->flags)); + + patmEmptyTree(pVM, &pPatch->pTempInfo->IllegalInstrTree); + pPatch->pTempInfo->nrIllegalInstr = 0; + + Log(("Successfully installed %s patch at %RRv\n", patmGetInstructionString(pPatch->opcode, pPatch->flags), pInstrGC)); + + pPatch->uState = PATCH_ENABLED; + return VINF_SUCCESS; + +failure: + if (pPatchRec->CoreOffset.Key) + RTAvloU32Remove(&pVM->patm.s.PatchLookupTreeHC->PatchTreeByPatchAddr, pPatchRec->CoreOffset.Key); + + patmEmptyTree(pVM, &pPatch->FixupTree); + pPatch->nrFixups = 0; + + patmEmptyTree(pVM, &pPatch->JumpTree); + pPatch->nrJumpRecs = 0; + + patmEmptyTree(pVM, &pPatch->pTempInfo->IllegalInstrTree); + pPatch->pTempInfo->nrIllegalInstr = 0; + + /* Turn this cli patch into a dummy. */ + pPatch->uState = PATCH_REFUSED; + pPatch->pPatchBlockOffset = 0; + + // Give back the patch memory we no longer need + Assert(orgOffsetPatchMem != (uint32_t)~0); + pVM->patm.s.offPatchMem = orgOffsetPatchMem; + + return rc; +} + +/** + * Patch IDT handler + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pInstrGC Guest context point to privileged instruction + * @param uOpSize Size of starting instruction + * @param pPatchRec Patch record + * @param pCacheRec Cache record ptr + * + * @note returns failure if patching is not allowed or possible + * + */ +static int patmIdtHandler(PVM pVM, RTRCPTR pInstrGC, uint32_t uOpSize, PPATMPATCHREC pPatchRec, PPATMP2GLOOKUPREC pCacheRec) +{ + PPATCHINFO pPatch = &pPatchRec->patch; + bool disret; + DISCPUSTATE cpuPush, cpuJmp; + uint32_t cbInstr; + RTRCPTR pCurInstrGC = pInstrGC; + uint8_t *pCurInstrHC, *pInstrHC; + uint32_t orgOffsetPatchMem = UINT32_MAX; + + pInstrHC = pCurInstrHC = patmR3GCVirtToHCVirt(pVM, pCacheRec, pCurInstrGC); + AssertReturn(pCurInstrHC, VERR_PAGE_NOT_PRESENT); + + /* + * In Linux it's often the case that many interrupt handlers push a predefined value onto the stack + * and then jump to a common entrypoint. In order not to waste a lot of memory, we will check for this + * condition here and only patch the common entypoint once. + */ + disret = patmR3DisInstr(pVM, pPatch, pCurInstrGC, pCurInstrHC, PATMREAD_ORGCODE, &cpuPush, &cbInstr); + Assert(disret); + if (disret && cpuPush.pCurInstr->uOpcode == OP_PUSH) + { + RTRCPTR pJmpInstrGC; + int rc; + pCurInstrGC += cbInstr; + + disret = patmR3DisInstr(pVM, pPatch, pCurInstrGC, pCurInstrHC, PATMREAD_ORGCODE, &cpuJmp, &cbInstr); + if ( disret + && cpuJmp.pCurInstr->uOpcode == OP_JMP + && (pJmpInstrGC = PATMResolveBranch(&cpuJmp, pCurInstrGC)) + ) + { + bool fInserted; + PPATMPATCHREC pJmpPatch = (PPATMPATCHREC)RTAvloU32Get(&pVM->patm.s.PatchLookupTreeHC->PatchTree, pJmpInstrGC); + if (pJmpPatch == 0) + { + /* Patch it first! */ + rc = PATMR3InstallPatch(pVM, pJmpInstrGC, pPatch->flags | PATMFL_IDTHANDLER_WITHOUT_ENTRYPOINT); + if (rc != VINF_SUCCESS) + goto failure; + pJmpPatch = (PPATMPATCHREC)RTAvloU32Get(&pVM->patm.s.PatchLookupTreeHC->PatchTree, pJmpInstrGC); + Assert(pJmpPatch); + } + if (pJmpPatch->patch.uState != PATCH_ENABLED) + goto failure; + + /* save original offset (in case of failures later on) */ + orgOffsetPatchMem = pVM->patm.s.offPatchMem; + + pPatch->pPatchBlockOffset = pVM->patm.s.offPatchMem; + pPatch->uCurPatchOffset = 0; + pPatch->nrPatch2GuestRecs = 0; + +#ifdef VBOX_WITH_STATISTICS + rc = patmPatchGenStats(pVM, pPatch, pInstrGC); + if (RT_FAILURE(rc)) + goto failure; +#endif + + /* Install fake cli patch (to clear the virtual IF) */ + rc = patmPatchGenIntEntry(pVM, pPatch, pInstrGC); + if (RT_FAILURE(rc)) + goto failure; + + /* Add lookup record for patch to guest address translation (for the push) */ + patmR3AddP2GLookupRecord(pVM, pPatch, PATCHCODE_PTR_HC(pPatch) + pPatch->uCurPatchOffset, pInstrGC, PATM_LOOKUP_BOTHDIR); + + /* Duplicate push. */ + rc = patmPatchGenDuplicate(pVM, pPatch, &cpuPush, pInstrGC); + if (RT_FAILURE(rc)) + goto failure; + + /* Generate jump to common entrypoint. */ + rc = patmPatchGenPatchJump(pVM, pPatch, pCurInstrGC, PATCHCODE_PTR_GC(&pJmpPatch->patch)); + if (RT_FAILURE(rc)) + goto failure; + + /* size of patch block */ + pPatch->cbPatchBlockSize = pPatch->uCurPatchOffset; + + /* Update free pointer in patch memory. */ + pVM->patm.s.offPatchMem += pPatch->cbPatchBlockSize; + /* Round to next 8 byte boundary */ + pVM->patm.s.offPatchMem = RT_ALIGN_32(pVM->patm.s.offPatchMem, 8); + + /* There's no jump from guest to patch code. */ + pPatch->cbPatchJump = 0; + + +#ifdef LOG_ENABLED + Log(("Patch code ----------------------------------------------------------\n")); + patmr3DisasmCodeStream(pVM, PATCHCODE_PTR_GC(pPatch), PATCHCODE_PTR_GC(pPatch), patmR3DisasmCallback, pCacheRec); + Log(("Patch code ends -----------------------------------------------------\n")); +#endif + Log(("Successfully installed IDT handler patch at %RRv\n", pInstrGC)); + + /* + * Insert into patch to guest lookup tree + */ + LogFlow(("Insert %RRv patch offset %RRv\n", pPatchRec->patch.pPrivInstrGC, pPatch->pPatchBlockOffset)); + pPatchRec->CoreOffset.Key = pPatch->pPatchBlockOffset; + fInserted = RTAvloU32Insert(&pVM->patm.s.PatchLookupTreeHC->PatchTreeByPatchAddr, &pPatchRec->CoreOffset); + AssertMsg(fInserted, ("RTAvlULInsert failed for %x\n", pPatchRec->CoreOffset.Key)); + patmR3DbgAddPatch(pVM, pPatchRec); + + pPatch->uState = PATCH_ENABLED; + + return VINF_SUCCESS; + } + } +failure: + /* Give back the patch memory we no longer need */ + if (orgOffsetPatchMem != (uint32_t)~0) + pVM->patm.s.offPatchMem = orgOffsetPatchMem; + + return patmR3PatchBlock(pVM, pInstrGC, pInstrHC, OP_CLI, uOpSize, pPatchRec); +} + +/** + * Install a trampoline to call a guest trap handler directly + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pInstrGC Guest context point to privileged instruction + * @param pPatchRec Patch record + * @param pCacheRec Cache record ptr + * + */ +static int patmInstallTrapTrampoline(PVM pVM, RTRCPTR pInstrGC, PPATMPATCHREC pPatchRec, PPATMP2GLOOKUPREC pCacheRec) +{ + PPATCHINFO pPatch = &pPatchRec->patch; + int rc = VERR_PATCHING_REFUSED; + uint32_t orgOffsetPatchMem = UINT32_MAX; + bool fInserted; + + // save original offset (in case of failures later on) + orgOffsetPatchMem = pVM->patm.s.offPatchMem; + + pPatch->pPatchBlockOffset = pVM->patm.s.offPatchMem; + pPatch->uCurPatchOffset = 0; + pPatch->nrPatch2GuestRecs = 0; + +#ifdef VBOX_WITH_STATISTICS + rc = patmPatchGenStats(pVM, pPatch, pInstrGC); + if (RT_FAILURE(rc)) + goto failure; +#endif + + rc = patmPatchGenTrapEntry(pVM, pPatch, pInstrGC); + if (RT_FAILURE(rc)) + goto failure; + + /* size of patch block */ + pPatch->cbPatchBlockSize = pPatch->uCurPatchOffset; + + /* Update free pointer in patch memory. */ + pVM->patm.s.offPatchMem += pPatch->cbPatchBlockSize; + /* Round to next 8 byte boundary */ + pVM->patm.s.offPatchMem = RT_ALIGN_32(pVM->patm.s.offPatchMem, 8); + + /* There's no jump from guest to patch code. */ + pPatch->cbPatchJump = 0; + +#ifdef LOG_ENABLED + Log(("Patch code ----------------------------------------------------------\n")); + patmr3DisasmCodeStream(pVM, PATCHCODE_PTR_GC(pPatch), PATCHCODE_PTR_GC(pPatch), patmR3DisasmCallback, pCacheRec); + Log(("Patch code ends -----------------------------------------------------\n")); +#else + RT_NOREF_PV(pCacheRec); +#endif + PATM_LOG_ORG_PATCH_INSTR(pVM, pPatch, "TRAP handler"); + Log(("Successfully installed Trap Trampoline patch at %RRv\n", pInstrGC)); + + /* + * Insert into patch to guest lookup tree + */ + LogFlow(("Insert %RRv patch offset %RRv\n", pPatchRec->patch.pPrivInstrGC, pPatch->pPatchBlockOffset)); + pPatchRec->CoreOffset.Key = pPatch->pPatchBlockOffset; + fInserted = RTAvloU32Insert(&pVM->patm.s.PatchLookupTreeHC->PatchTreeByPatchAddr, &pPatchRec->CoreOffset); + AssertMsg(fInserted, ("RTAvlULInsert failed for %x\n", pPatchRec->CoreOffset.Key)); + patmR3DbgAddPatch(pVM, pPatchRec); + + pPatch->uState = PATCH_ENABLED; + return VINF_SUCCESS; + +failure: + AssertMsgFailed(("Failed to install trap handler trampoline!!\n")); + + /* Turn this cli patch into a dummy. */ + pPatch->uState = PATCH_REFUSED; + pPatch->pPatchBlockOffset = 0; + + /* Give back the patch memory we no longer need */ + Assert(orgOffsetPatchMem != (uint32_t)~0); + pVM->patm.s.offPatchMem = orgOffsetPatchMem; + + return rc; +} + + +#ifdef LOG_ENABLED +/** + * Check if the instruction is patched as a common idt handler + * + * @returns true or false + * @param pVM The cross context VM structure. + * @param pInstrGC Guest context point to the instruction + * + */ +static bool patmIsCommonIDTHandlerPatch(PVM pVM, RTRCPTR pInstrGC) +{ + PPATMPATCHREC pRec; + + pRec = (PPATMPATCHREC)RTAvloU32Get(&pVM->patm.s.PatchLookupTreeHC->PatchTree, pInstrGC); + if (pRec && pRec->patch.flags & PATMFL_IDTHANDLER_WITHOUT_ENTRYPOINT) + return true; + return false; +} +#endif //DEBUG + + +/** + * Duplicates a complete function + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pInstrGC Guest context point to privileged instruction + * @param pPatchRec Patch record + * @param pCacheRec Cache record ptr + * + */ +static int patmDuplicateFunction(PVM pVM, RTRCPTR pInstrGC, PPATMPATCHREC pPatchRec, PPATMP2GLOOKUPREC pCacheRec) +{ + PPATCHINFO pPatch = &pPatchRec->patch; + int rc = VERR_PATCHING_REFUSED; + uint32_t orgOffsetPatchMem = UINT32_MAX; + bool fInserted; + + Log(("patmDuplicateFunction %RRv\n", pInstrGC)); + /* Save original offset (in case of failures later on). */ + orgOffsetPatchMem = pVM->patm.s.offPatchMem; + + /* We will not go on indefinitely with call instruction handling. */ + if (pVM->patm.s.ulCallDepth > PATM_MAX_CALL_DEPTH) + { + Log(("patmDuplicateFunction: maximum callback depth reached!!\n")); + return VERR_PATCHING_REFUSED; + } + + pVM->patm.s.ulCallDepth++; + +#ifdef PATM_ENABLE_CALL + pPatch->flags |= PATMFL_SUPPORT_CALLS | PATMFL_SUPPORT_INDIRECT_CALLS; +#endif + + Assert(pPatch->flags & (PATMFL_DUPLICATE_FUNCTION)); + + pPatch->nrPatch2GuestRecs = 0; + pPatch->pPatchBlockOffset = pVM->patm.s.offPatchMem; + pPatch->uCurPatchOffset = 0; + + /* Note: Set the PATM interrupt flag here; it was cleared before the patched call. (!!!) */ + rc = patmPatchGenSetPIF(pVM, pPatch, pInstrGC); + if (RT_FAILURE(rc)) + goto failure; + +#ifdef VBOX_WITH_STATISTICS + rc = patmPatchGenStats(pVM, pPatch, pInstrGC); + if (RT_FAILURE(rc)) + goto failure; +#endif + + rc = patmRecompileCodeStream(pVM, pInstrGC, pInstrGC, patmRecompileCallback, pCacheRec); + if (rc != VINF_SUCCESS) + { + Log(("PATMR3PatchCli: patmRecompileCodeStream failed with %d\n", rc)); + goto failure; + } + + //size of patch block + pPatch->cbPatchBlockSize = pPatch->uCurPatchOffset; + + //update free pointer in patch memory + pVM->patm.s.offPatchMem += pPatch->cbPatchBlockSize; + /* Round to next 8 byte boundary. */ + pVM->patm.s.offPatchMem = RT_ALIGN_32(pVM->patm.s.offPatchMem, 8); + + pPatch->uState = PATCH_ENABLED; + + /* + * Insert into patch to guest lookup tree + */ + LogFlow(("Insert %RRv patch offset %RRv\n", pPatchRec->patch.pPrivInstrGC, pPatch->pPatchBlockOffset)); + pPatchRec->CoreOffset.Key = pPatch->pPatchBlockOffset; + fInserted = RTAvloU32Insert(&pVM->patm.s.PatchLookupTreeHC->PatchTreeByPatchAddr, &pPatchRec->CoreOffset); + AssertMsg(fInserted, ("RTAvloU32Insert failed for %x\n", pPatchRec->CoreOffset.Key)); + if (!fInserted) + { + rc = VERR_PATCHING_REFUSED; + goto failure; + } + + /* Note that patmr3SetBranchTargets can install additional patches!! */ + rc = patmr3SetBranchTargets(pVM, pPatch); + if (rc != VINF_SUCCESS) + { + Log(("PATMR3PatchCli: patmr3SetBranchTargets failed with %d\n", rc)); + goto failure; + } + + patmR3DbgAddPatch(pVM, pPatchRec); + +#ifdef LOG_ENABLED + Log(("Patch code ----------------------------------------------------------\n")); + patmr3DisasmCodeStream(pVM, PATCHCODE_PTR_GC(pPatch), PATCHCODE_PTR_GC(pPatch), patmR3DisasmCallback, pCacheRec); + Log(("Patch code ends -----------------------------------------------------\n")); +#endif + + Log(("Successfully installed function duplication patch at %RRv\n", pInstrGC)); + + patmEmptyTree(pVM, &pPatch->pTempInfo->IllegalInstrTree); + pPatch->pTempInfo->nrIllegalInstr = 0; + + pVM->patm.s.ulCallDepth--; + STAM_COUNTER_INC(&pVM->patm.s.StatInstalledFunctionPatches); + return VINF_SUCCESS; + +failure: + if (pPatchRec->CoreOffset.Key) + RTAvloU32Remove(&pVM->patm.s.PatchLookupTreeHC->PatchTreeByPatchAddr, pPatchRec->CoreOffset.Key); + + patmEmptyTree(pVM, &pPatch->FixupTree); + pPatch->nrFixups = 0; + + patmEmptyTree(pVM, &pPatch->JumpTree); + pPatch->nrJumpRecs = 0; + + patmEmptyTree(pVM, &pPatch->pTempInfo->IllegalInstrTree); + pPatch->pTempInfo->nrIllegalInstr = 0; + + /* Turn this cli patch into a dummy. */ + pPatch->uState = PATCH_REFUSED; + pPatch->pPatchBlockOffset = 0; + + // Give back the patch memory we no longer need + Assert(orgOffsetPatchMem != (uint32_t)~0); + pVM->patm.s.offPatchMem = orgOffsetPatchMem; + + pVM->patm.s.ulCallDepth--; + Log(("patmDupicateFunction %RRv failed!!\n", pInstrGC)); + return rc; +} + +/** + * Creates trampoline code to jump inside an existing patch + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pInstrGC Guest context point to privileged instruction + * @param pPatchRec Patch record + * + */ +static int patmCreateTrampoline(PVM pVM, RTRCPTR pInstrGC, PPATMPATCHREC pPatchRec) +{ + PPATCHINFO pPatch = &pPatchRec->patch; + RTRCPTR pPage, pPatchTargetGC = 0; + uint32_t orgOffsetPatchMem = UINT32_MAX; + int rc = VERR_PATCHING_REFUSED; + PPATCHINFO pPatchToJmp = NULL; /**< Patch the trampoline jumps to. */ + PTRAMPREC pTrampRec = NULL; /**< Trampoline record used to find the patch. */ + bool fInserted = false; + + Log(("patmCreateTrampoline %RRv\n", pInstrGC)); + /* Save original offset (in case of failures later on). */ + orgOffsetPatchMem = pVM->patm.s.offPatchMem; + + /* First we check if the duplicate function target lies in some existing function patch already. Will save some space. */ + /** @todo we already checked this before */ + pPage = pInstrGC & PAGE_BASE_GC_MASK; + + PPATMPATCHPAGE pPatchPage = (PPATMPATCHPAGE)RTAvloU32Get(&pVM->patm.s.PatchLookupTreeHC->PatchTreeByPage, (RTRCPTR)pPage); + if (pPatchPage) + { + uint32_t i; + + for (i=0;icCount;i++) + { + if (pPatchPage->papPatch[i]) + { + pPatchToJmp = pPatchPage->papPatch[i]; + + if ( (pPatchToJmp->flags & PATMFL_DUPLICATE_FUNCTION) + && pPatchToJmp->uState == PATCH_ENABLED) + { + pPatchTargetGC = patmGuestGCPtrToPatchGCPtr(pVM, pPatchToJmp, pInstrGC); + if (pPatchTargetGC) + { + uint32_t offsetPatch = pPatchTargetGC - pVM->patm.s.pPatchMemGC; + PRECPATCHTOGUEST pPatchToGuestRec = (PRECPATCHTOGUEST)RTAvlU32GetBestFit(&pPatchToJmp->Patch2GuestAddrTree, offsetPatch, false); + Assert(pPatchToGuestRec); + + pPatchToGuestRec->fJumpTarget = true; + Assert(pPatchTargetGC != pPatchToJmp->pPrivInstrGC); + Log(("patmCreateTrampoline: generating jump to code inside patch at %RRv (patch target %RRv)\n", pPatchToJmp->pPrivInstrGC, pPatchTargetGC)); + break; + } + } + } + } + } + AssertReturn(pPatchPage && pPatchTargetGC && pPatchToJmp, VERR_PATCHING_REFUSED); + + /* + * Only record the trampoline patch if this is the first patch to the target + * or we recorded other patches already. + * The goal is to refuse refreshing function duplicates if the guest + * modifies code after a saved state was loaded because it is not possible + * to save the relation between trampoline and target without changing the + * saved satte version. + */ + if ( !(pPatchToJmp->flags & PATMFL_EXTERNAL_JUMP_INSIDE) + || pPatchToJmp->pTrampolinePatchesHead) + { + pPatchToJmp->flags |= PATMFL_EXTERNAL_JUMP_INSIDE; + pTrampRec = (PTRAMPREC)MMR3HeapAllocZ(pVM, MM_TAG_PATM_PATCH, sizeof(*pTrampRec)); + if (!pTrampRec) + return VERR_NO_MEMORY; /* or better return VERR_PATCHING_REFUSED to let the VM continue? */ + + pTrampRec->pPatchTrampoline = pPatchRec; + } + + pPatch->nrPatch2GuestRecs = 0; + pPatch->pPatchBlockOffset = pVM->patm.s.offPatchMem; + pPatch->uCurPatchOffset = 0; + + /* Note: Set the PATM interrupt flag here; it was cleared before the patched call. (!!!) */ + rc = patmPatchGenSetPIF(pVM, pPatch, pInstrGC); + if (RT_FAILURE(rc)) + goto failure; + +#ifdef VBOX_WITH_STATISTICS + rc = patmPatchGenStats(pVM, pPatch, pInstrGC); + if (RT_FAILURE(rc)) + goto failure; +#endif + + rc = patmPatchGenPatchJump(pVM, pPatch, pInstrGC, pPatchTargetGC); + if (RT_FAILURE(rc)) + goto failure; + + /* + * Insert into patch to guest lookup tree + */ + LogFlow(("Insert %RRv patch offset %RRv\n", pPatchRec->patch.pPrivInstrGC, pPatch->pPatchBlockOffset)); + pPatchRec->CoreOffset.Key = pPatch->pPatchBlockOffset; + fInserted = RTAvloU32Insert(&pVM->patm.s.PatchLookupTreeHC->PatchTreeByPatchAddr, &pPatchRec->CoreOffset); + AssertMsg(fInserted, ("RTAvloU32Insert failed for %x\n", pPatchRec->CoreOffset.Key)); + if (!fInserted) + { + rc = VERR_PATCHING_REFUSED; + goto failure; + } + patmR3DbgAddPatch(pVM, pPatchRec); + + /* size of patch block */ + pPatch->cbPatchBlockSize = pPatch->uCurPatchOffset; + + /* Update free pointer in patch memory. */ + pVM->patm.s.offPatchMem += pPatch->cbPatchBlockSize; + /* Round to next 8 byte boundary */ + pVM->patm.s.offPatchMem = RT_ALIGN_32(pVM->patm.s.offPatchMem, 8); + + /* There's no jump from guest to patch code. */ + pPatch->cbPatchJump = 0; + + /* Enable the patch. */ + pPatch->uState = PATCH_ENABLED; + /* We allow this patch to be called as a function. */ + pPatch->flags |= PATMFL_CALLABLE_AS_FUNCTION; + + if (pTrampRec) + { + pTrampRec->pNext = pPatchToJmp->pTrampolinePatchesHead; + pPatchToJmp->pTrampolinePatchesHead = pTrampRec; + } + STAM_COUNTER_INC(&pVM->patm.s.StatInstalledTrampoline); + return VINF_SUCCESS; + +failure: + if (pPatchRec->CoreOffset.Key) + RTAvloU32Remove(&pVM->patm.s.PatchLookupTreeHC->PatchTreeByPatchAddr, pPatchRec->CoreOffset.Key); + + patmEmptyTree(pVM, &pPatch->FixupTree); + pPatch->nrFixups = 0; + + patmEmptyTree(pVM, &pPatch->JumpTree); + pPatch->nrJumpRecs = 0; + + patmEmptyTree(pVM, &pPatch->pTempInfo->IllegalInstrTree); + pPatch->pTempInfo->nrIllegalInstr = 0; + + /* Turn this cli patch into a dummy. */ + pPatch->uState = PATCH_REFUSED; + pPatch->pPatchBlockOffset = 0; + + // Give back the patch memory we no longer need + Assert(orgOffsetPatchMem != (uint32_t)~0); + pVM->patm.s.offPatchMem = orgOffsetPatchMem; + + if (pTrampRec) + MMR3HeapFree(pTrampRec); + + return rc; +} + + +/** + * Patch branch target function for call/jump at specified location. + * (in responds to a VINF_PATM_DUPLICATE_FUNCTION GC exit reason) + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pCtx Pointer to the guest CPU context. + * + */ +VMMR3_INT_DECL(int) PATMR3DuplicateFunctionRequest(PVM pVM, PCPUMCTX pCtx) +{ + RTRCPTR pBranchTarget, pPage; + int rc; + RTRCPTR pPatchTargetGC = 0; + AssertReturn(VM_IS_RAW_MODE_ENABLED(pVM), VERR_PATM_HM_IPE); + + pBranchTarget = pCtx->edx; + pBranchTarget = SELMToFlat(pVM, DISSELREG_CS, CPUMCTX2CORE(pCtx), pBranchTarget); + + /* First we check if the duplicate function target lies in some existing function patch already. Will save some space. */ + pPage = pBranchTarget & PAGE_BASE_GC_MASK; + + PPATMPATCHPAGE pPatchPage = (PPATMPATCHPAGE)RTAvloU32Get(&pVM->patm.s.PatchLookupTreeHC->PatchTreeByPage, (RTRCPTR)pPage); + if (pPatchPage) + { + uint32_t i; + + for (i=0;icCount;i++) + { + if (pPatchPage->papPatch[i]) + { + PPATCHINFO pPatch = pPatchPage->papPatch[i]; + + if ( (pPatch->flags & PATMFL_DUPLICATE_FUNCTION) + && pPatch->uState == PATCH_ENABLED) + { + pPatchTargetGC = patmGuestGCPtrToPatchGCPtr(pVM, pPatch, pBranchTarget); + if (pPatchTargetGC) + { + STAM_COUNTER_INC(&pVM->patm.s.StatDuplicateUseExisting); + break; + } + } + } + } + } + + if (pPatchTargetGC) + { + /* Create a trampoline that also sets PATM_ASMFIX_INTERRUPTFLAG. */ + rc = PATMR3InstallPatch(pVM, pBranchTarget, PATMFL_CODE32 | PATMFL_TRAMPOLINE); + } + else + { + rc = PATMR3InstallPatch(pVM, pBranchTarget, PATMFL_CODE32 | PATMFL_DUPLICATE_FUNCTION); + } + + if (rc == VINF_SUCCESS) + { + pPatchTargetGC = PATMR3QueryPatchGCPtr(pVM, pBranchTarget); + Assert(pPatchTargetGC); + } + + if (pPatchTargetGC) + { + pCtx->eax = pPatchTargetGC; + pCtx->eax = pCtx->eax - (RTRCUINTPTR)pVM->patm.s.pPatchMemGC; /* make it relative */ + } + else + { + /* We add a dummy entry into the lookup cache so we won't get bombarded with the same requests over and over again. */ + pCtx->eax = 0; + STAM_COUNTER_INC(&pVM->patm.s.StatDuplicateREQFailed); + } + Assert(PATMIsPatchGCAddr(pVM, pCtx->edi)); + rc = patmAddBranchToLookupCache(pVM, pCtx->edi, pBranchTarget, pCtx->eax); + AssertRC(rc); + + pCtx->eip += PATM_ILLEGAL_INSTR_SIZE; + STAM_COUNTER_INC(&pVM->patm.s.StatDuplicateREQSuccess); + return VINF_SUCCESS; +} + +/** + * Replaces a function call by a call to an existing function duplicate (or jmp -> jmp) + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pCpu Disassembly CPU structure ptr + * @param pInstrGC Guest context point to privileged instruction + * @param pCacheRec Cache record ptr + * + */ +static int patmReplaceFunctionCall(PVM pVM, DISCPUSTATE *pCpu, RTRCPTR pInstrGC, PPATMP2GLOOKUPREC pCacheRec) +{ + PPATCHINFO pPatch = (PPATCHINFO)pCacheRec->pPatch; + int rc = VERR_PATCHING_REFUSED; + DISCPUSTATE cpu; + RTRCPTR pTargetGC; + PPATMPATCHREC pPatchFunction; + uint32_t cbInstr; + bool disret; + + Assert(pPatch->flags & PATMFL_REPLACE_FUNCTION_CALL); + Assert((pCpu->pCurInstr->uOpcode == OP_CALL || pCpu->pCurInstr->uOpcode == OP_JMP) && pCpu->cbInstr == SIZEOF_NEARJUMP32); + + if ((pCpu->pCurInstr->uOpcode != OP_CALL && pCpu->pCurInstr->uOpcode != OP_JMP) || pCpu->cbInstr != SIZEOF_NEARJUMP32) + { + rc = VERR_PATCHING_REFUSED; + goto failure; + } + + pTargetGC = PATMResolveBranch(pCpu, pInstrGC); + if (pTargetGC == 0) + { + Log(("We don't support far jumps here!! (%08X)\n", pCpu->Param1.fUse)); + rc = VERR_PATCHING_REFUSED; + goto failure; + } + + pPatchFunction = (PPATMPATCHREC)RTAvloU32Get(&pVM->patm.s.PatchLookupTreeHC->PatchTree, pTargetGC); + if (pPatchFunction == NULL) + { + for(;;) + { + /* It could be an indirect call (call -> jmp dest). + * Note that it's dangerous to assume the jump will never change... + */ + uint8_t *pTmpInstrHC; + + pTmpInstrHC = patmR3GCVirtToHCVirt(pVM, pCacheRec, pTargetGC); + Assert(pTmpInstrHC); + if (pTmpInstrHC == 0) + break; + + disret = patmR3DisInstr(pVM, pPatch, pTargetGC, pTmpInstrHC, PATMREAD_ORGCODE, &cpu, &cbInstr); + if (disret == false || cpu.pCurInstr->uOpcode != OP_JMP) + break; + + pTargetGC = PATMResolveBranch(&cpu, pTargetGC); + if (pTargetGC == 0) + { + break; + } + + pPatchFunction = (PPATMPATCHREC)RTAvloU32Get(&pVM->patm.s.PatchLookupTreeHC->PatchTree, pTargetGC); + break; + } + if (pPatchFunction == 0) + { + AssertMsgFailed(("Unable to find duplicate function %RRv\n", pTargetGC)); + rc = VERR_PATCHING_REFUSED; + goto failure; + } + } + + // make a copy of the guest code bytes that will be overwritten + pPatch->cbPatchJump = SIZEOF_NEARJUMP32; + + rc = PGMPhysSimpleReadGCPtr(VMMGetCpu0(pVM), pPatch->aPrivInstr, pPatch->pPrivInstrGC, pPatch->cbPatchJump); + AssertRC(rc); + + /* Now replace the original call in the guest code */ + rc = patmGenCallToPatch(pVM, pPatch, PATCHCODE_PTR_GC(&pPatchFunction->patch), pCacheRec, true); + AssertRC(rc); + if (RT_FAILURE(rc)) + goto failure; + + /* Lowest and highest address for write monitoring. */ + pPatch->pInstrGCLowest = pInstrGC; + pPatch->pInstrGCHighest = pInstrGC + pCpu->cbInstr; + PATM_LOG_ORG_PATCH_INSTR(pVM, pPatch, "Call"); + + Log(("Successfully installed function replacement patch at %RRv\n", pInstrGC)); + + pPatch->uState = PATCH_ENABLED; + return VINF_SUCCESS; + +failure: + /* Turn this patch into a dummy. */ + pPatch->uState = PATCH_REFUSED; + + return rc; +} + +/** + * Replace the address in an MMIO instruction with the cached version. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pInstrGC Guest context point to privileged instruction + * @param pCpu Disassembly CPU structure ptr + * @param pCacheRec Cache record ptr + * + * @note returns failure if patching is not allowed or possible + * + */ +static int patmPatchMMIOInstr(PVM pVM, RTRCPTR pInstrGC, DISCPUSTATE *pCpu, PPATMP2GLOOKUPREC pCacheRec) +{ + PPATCHINFO pPatch = (PPATCHINFO)pCacheRec->pPatch; + uint8_t *pPB; + int rc = VERR_PATCHING_REFUSED; + + Assert(pVM->patm.s.mmio.pCachedData); + if (!pVM->patm.s.mmio.pCachedData) + goto failure; + + if (pCpu->Param2.fUse != DISUSE_DISPLACEMENT32) + goto failure; + + pPB = patmR3GCVirtToHCVirt(pVM, pCacheRec, pPatch->pPrivInstrGC); + if (pPB == 0) + goto failure; + + /* Add relocation record for cached data access. */ + if (patmPatchAddReloc32(pVM, pPatch, &pPB[pCpu->cbInstr - sizeof(RTRCPTR)], FIXUP_ABSOLUTE, pPatch->pPrivInstrGC, + pVM->patm.s.mmio.pCachedData) != VINF_SUCCESS) + { + Log(("Relocation failed for cached mmio address!!\n")); + return VERR_PATCHING_REFUSED; + } + PATM_LOG_PATCH_INSTR(pVM, pPatch, PATMREAD_ORGCODE, "MMIO patch old instruction:", ""); + + /* Save original instruction. */ + rc = PGMPhysSimpleReadGCPtr(VMMGetCpu0(pVM), pPatch->aPrivInstr, pPatch->pPrivInstrGC, pPatch->cbPrivInstr); + AssertRC(rc); + + pPatch->cbPatchJump = pPatch->cbPrivInstr; /* bit of a misnomer in this case; size of replacement instruction. */ + + /* Replace address with that of the cached item. */ + rc = PGMPhysSimpleDirtyWriteGCPtr(VMMGetCpu0(pVM), pInstrGC + pCpu->cbInstr - sizeof(RTRCPTR), + &pVM->patm.s.mmio.pCachedData, sizeof(RTRCPTR)); + AssertRC(rc); + if (RT_FAILURE(rc)) + { + goto failure; + } + + PATM_LOG_ORG_PATCH_INSTR(pVM, pPatch, "MMIO"); + pVM->patm.s.mmio.pCachedData = 0; + pVM->patm.s.mmio.GCPhys = 0; + pPatch->uState = PATCH_ENABLED; + return VINF_SUCCESS; + +failure: + /* Turn this patch into a dummy. */ + pPatch->uState = PATCH_REFUSED; + + return rc; +} + + +/** + * Replace the address in an MMIO instruction with the cached version. (instruction is part of an existing patch) + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pInstrGC Guest context point to privileged instruction + * @param pPatch Patch record + * + * @note returns failure if patching is not allowed or possible + * + */ +static int patmPatchPATMMMIOInstr(PVM pVM, RTRCPTR pInstrGC, PPATCHINFO pPatch) +{ + DISCPUSTATE cpu; + uint32_t cbInstr; + bool disret; + uint8_t *pInstrHC; + + AssertReturn(pVM->patm.s.mmio.pCachedData, VERR_INVALID_PARAMETER); + + /* Convert GC to HC address. */ + pInstrHC = patmPatchGCPtr2PatchHCPtr(pVM, pInstrGC); + AssertReturn(pInstrHC, VERR_PATCHING_REFUSED); + + /* Disassemble mmio instruction. */ + disret = patmR3DisInstrNoStrOpMode(pVM, pPatch, pInstrGC, pInstrHC, PATMREAD_ORGCODE, + &cpu, &cbInstr); + if (disret == false) + { + Log(("Disassembly failed (probably page not present) -> return to caller\n")); + return VERR_PATCHING_REFUSED; + } + + AssertMsg(cbInstr <= MAX_INSTR_SIZE, ("privileged instruction too big %d!!\n", cbInstr)); + if (cbInstr > MAX_INSTR_SIZE) + return VERR_PATCHING_REFUSED; + if (cpu.Param2.fUse != DISUSE_DISPLACEMENT32) + return VERR_PATCHING_REFUSED; + + /* Add relocation record for cached data access. */ + if (patmPatchAddReloc32(pVM, pPatch, &pInstrHC[cpu.cbInstr - sizeof(RTRCPTR)], FIXUP_ABSOLUTE) != VINF_SUCCESS) + { + Log(("Relocation failed for cached mmio address!!\n")); + return VERR_PATCHING_REFUSED; + } + /* Replace address with that of the cached item. */ + *(RTRCPTR *)&pInstrHC[cpu.cbInstr - sizeof(RTRCPTR)] = pVM->patm.s.mmio.pCachedData; + + /* Lowest and highest address for write monitoring. */ + pPatch->pInstrGCLowest = pInstrGC; + pPatch->pInstrGCHighest = pInstrGC + cpu.cbInstr; + + PATM_LOG_ORG_PATCH_INSTR(pVM, pPatch, "MMIO"); + pVM->patm.s.mmio.pCachedData = 0; + pVM->patm.s.mmio.GCPhys = 0; + return VINF_SUCCESS; +} + +/** + * Activates an int3 patch + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch record + */ +static int patmActivateInt3Patch(PVM pVM, PPATCHINFO pPatch) +{ + uint8_t bASMInt3 = 0xCC; + int rc; + + Assert(pPatch->flags & (PATMFL_INT3_REPLACEMENT|PATMFL_INT3_REPLACEMENT_BLOCK)); + Assert(pPatch->uState != PATCH_ENABLED); + + /* Replace first opcode byte with 'int 3'. */ + rc = PGMPhysSimpleDirtyWriteGCPtr(VMMGetCpu0(pVM), pPatch->pPrivInstrGC, &bASMInt3, sizeof(bASMInt3)); + AssertRC(rc); + + pPatch->cbPatchJump = sizeof(bASMInt3); + + return rc; +} + +/** + * Deactivates an int3 patch + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch record + */ +static int patmDeactivateInt3Patch(PVM pVM, PPATCHINFO pPatch) +{ + uint8_t cbASMInt3 = 1; + int rc; + + Assert(pPatch->flags & (PATMFL_INT3_REPLACEMENT|PATMFL_INT3_REPLACEMENT_BLOCK)); + Assert(pPatch->uState == PATCH_ENABLED || pPatch->uState == PATCH_DIRTY); + + /* Restore first opcode byte. */ + rc = PGMPhysSimpleDirtyWriteGCPtr(VMMGetCpu0(pVM), pPatch->pPrivInstrGC, pPatch->aPrivInstr, cbASMInt3); + AssertRC(rc); + return rc; +} + +/** + * Replace an instruction with a breakpoint (0xCC), that is handled dynamically + * in the raw-mode context. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pInstrGC Guest context point to privileged instruction + * @param pInstrHC Host context point to privileged instruction + * @param pCpu Disassembly CPU structure ptr + * @param pPatch Patch record + * + * @note returns failure if patching is not allowed or possible + * + */ +int patmR3PatchInstrInt3(PVM pVM, RTRCPTR pInstrGC, R3PTRTYPE(uint8_t *) pInstrHC, DISCPUSTATE *pCpu, PPATCHINFO pPatch) +{ + uint8_t cbASMInt3 = 1; + int rc; + RT_NOREF_PV(pInstrHC); + + /* Note: Do not use patch memory here! It might called during patch installation too. */ + PATM_LOG_PATCH_INSTR(pVM, pPatch, PATMREAD_ORGCODE, "patmR3PatchInstrInt3:", ""); + + /* Save the original instruction. */ + rc = PGMPhysSimpleReadGCPtr(VMMGetCpu0(pVM), pPatch->aPrivInstr, pPatch->pPrivInstrGC, pPatch->cbPrivInstr); + AssertRC(rc); + pPatch->cbPatchJump = cbASMInt3; /* bit of a misnomer in this case; size of replacement instruction. */ + + pPatch->flags |= PATMFL_INT3_REPLACEMENT; + + /* Replace first opcode byte with 'int 3'. */ + rc = patmActivateInt3Patch(pVM, pPatch); + if (RT_FAILURE(rc)) + goto failure; + + /* Lowest and highest address for write monitoring. */ + pPatch->pInstrGCLowest = pInstrGC; + pPatch->pInstrGCHighest = pInstrGC + pCpu->cbInstr; + + pPatch->uState = PATCH_ENABLED; + return VINF_SUCCESS; + +failure: + /* Turn this patch into a dummy. */ + return VERR_PATCHING_REFUSED; +} + +#ifdef PATM_RESOLVE_CONFLICTS_WITH_JUMP_PATCHES +/** + * Patch a jump instruction at specified location + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pInstrGC Guest context point to privileged instruction + * @param pInstrHC Host context point to privileged instruction + * @param pCpu Disassembly CPU structure ptr + * @param pPatchRec Patch record + * + * @note returns failure if patching is not allowed or possible + * + */ +int patmPatchJump(PVM pVM, RTRCPTR pInstrGC, R3PTRTYPE(uint8_t *) pInstrHC, DISCPUSTATE *pCpu, PPATMPATCHREC pPatchRec) +{ + PPATCHINFO pPatch = &pPatchRec->patch; + int rc = VERR_PATCHING_REFUSED; + + pPatch->pPatchBlockOffset = 0; /* doesn't use patch memory */ + pPatch->uCurPatchOffset = 0; + pPatch->cbPatchBlockSize = 0; + pPatch->flags |= PATMFL_SINGLE_INSTRUCTION; + + /* + * Instruction replacements such as these should never be interrupted. I've added code to EM.cpp to + * make sure this never happens. (unless a trap is triggered (intentionally or not)) + */ + switch (pCpu->pCurInstr->uOpcode) + { + case OP_JO: + case OP_JNO: + case OP_JC: + case OP_JNC: + case OP_JE: + case OP_JNE: + case OP_JBE: + case OP_JNBE: + case OP_JS: + case OP_JNS: + case OP_JP: + case OP_JNP: + case OP_JL: + case OP_JNL: + case OP_JLE: + case OP_JNLE: + case OP_JMP: + Assert(pPatch->flags & PATMFL_JUMP_CONFLICT); + Assert(pCpu->Param1.fUse & DISUSE_IMMEDIATE32_REL); + if (!(pCpu->Param1.fUse & DISUSE_IMMEDIATE32_REL)) + goto failure; + + Assert(pCpu->cbInstr == SIZEOF_NEARJUMP32 || pCpu->cbInstr == SIZEOF_NEAR_COND_JUMP32); + if (pCpu->cbInstr != SIZEOF_NEARJUMP32 && pCpu->cbInstr != SIZEOF_NEAR_COND_JUMP32) + goto failure; + + if (PAGE_ADDRESS(pInstrGC) != PAGE_ADDRESS(pInstrGC + pCpu->cbInstr)) + { + STAM_COUNTER_INC(&pVM->patm.s.StatPageBoundaryCrossed); + AssertMsgFailed(("Patch jump would cross page boundary -> refuse!!\n")); + rc = VERR_PATCHING_REFUSED; + goto failure; + } + + break; + + default: + goto failure; + } + + // make a copy of the guest code bytes that will be overwritten + Assert(pCpu->cbInstr <= sizeof(pPatch->aPrivInstr)); + Assert(pCpu->cbInstr >= SIZEOF_NEARJUMP32); + pPatch->cbPatchJump = pCpu->cbInstr; + + rc = PGMPhysSimpleReadGCPtr(VMMGetCpu0(pVM), pPatch->aPrivInstr, pPatch->pPrivInstrGC, pPatch->cbPatchJump); + AssertRC(rc); + + /* Now insert a jump in the guest code. */ + /* + * A conflict jump patch needs to be treated differently; we'll just replace the relative jump address with one that + * references the target instruction in the conflict patch. + */ + RTRCPTR pJmpDest = patmR3GuestGCPtrToPatchGCPtrSimple(pVM, pInstrGC + pCpu->cbInstr + (int32_t)pCpu->Param1.uValue); + + AssertMsg(pJmpDest, ("patmR3GuestGCPtrToPatchGCPtrSimple failed for %RRv\n", pInstrGC + pCpu->cbInstr + (int32_t)pCpu->Param1.uValue)); + pPatch->pPatchJumpDestGC = pJmpDest; + + PATMP2GLOOKUPREC cacheRec; + RT_ZERO(cacheRec); + cacheRec.pPatch = pPatch; + + rc = patmGenJumpToPatch(pVM, pPatch, &cacherec, true); + /* Free leftover lock if any. */ + if (cacheRec.Lock.pvMap) + { + PGMPhysReleasePageMappingLock(pVM, &cacheRec.Lock); + cacheRec.Lock.pvMap = NULL; + } + AssertRC(rc); + if (RT_FAILURE(rc)) + goto failure; + + pPatch->flags |= PATMFL_MUST_INSTALL_PATCHJMP; + + PATM_LOG_ORG_PATCH_INSTR(pVM, pPatch, patmGetInstructionString(pPatch->opcode, pPatch->flags)); + Log(("Successfully installed %s patch at %RRv\n", patmGetInstructionString(pPatch->opcode, pPatch->flags), pInstrGC)); + + STAM_COUNTER_INC(&pVM->patm.s.StatInstalledJump); + + /* Lowest and highest address for write monitoring. */ + pPatch->pInstrGCLowest = pInstrGC; + pPatch->pInstrGCHighest = pInstrGC + pPatch->cbPatchJump; + + pPatch->uState = PATCH_ENABLED; + return VINF_SUCCESS; + +failure: + /* Turn this cli patch into a dummy. */ + pPatch->uState = PATCH_REFUSED; + + return rc; +} +#endif /* PATM_RESOLVE_CONFLICTS_WITH_JUMP_PATCHES */ + + +/** + * Gives hint to PATM about supervisor guest instructions + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pInstrGC Guest context point to privileged instruction + * @param flags Patch flags + */ +VMMR3_INT_DECL(int) PATMR3AddHint(PVM pVM, RTRCPTR pInstrGC, uint32_t flags) +{ + Assert(pInstrGC); + Assert(flags == PATMFL_CODE32); RT_NOREF_PV(flags); + + Log(("PATMR3AddHint %RRv\n", pInstrGC)); + return PATMR3InstallPatch(pVM, pInstrGC, PATMFL_CODE32 | PATMFL_INSTR_HINT); +} + +/** + * Patch privileged instruction at specified location + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pInstrGC Guest context point to privileged instruction (0:32 flat + * address) + * @param flags Patch flags + * + * @note returns failure if patching is not allowed or possible + */ +VMMR3_INT_DECL(int) PATMR3InstallPatch(PVM pVM, RTRCPTR pInstrGC, uint64_t flags) +{ + DISCPUSTATE cpu; + R3PTRTYPE(uint8_t *) pInstrHC; + uint32_t cbInstr; + PPATMPATCHREC pPatchRec; + PCPUMCTX pCtx = 0; + bool disret; + int rc; + PVMCPU pVCpu = VMMGetCpu0(pVM); + LogFlow(("PATMR3InstallPatch: %08x (%#llx)\n", pInstrGC, flags)); + + AssertReturn(VM_IS_RAW_MODE_ENABLED(pVM), VERR_PATM_HM_IPE); + + if ( !pVM + || pInstrGC == 0 + || (flags & ~(PATMFL_CODE32|PATMFL_IDTHANDLER|PATMFL_INTHANDLER|PATMFL_SYSENTER|PATMFL_TRAPHANDLER|PATMFL_DUPLICATE_FUNCTION|PATMFL_REPLACE_FUNCTION_CALL|PATMFL_GUEST_SPECIFIC|PATMFL_INT3_REPLACEMENT|PATMFL_TRAPHANDLER_WITH_ERRORCODE|PATMFL_IDTHANDLER_WITHOUT_ENTRYPOINT|PATMFL_MMIO_ACCESS|PATMFL_TRAMPOLINE|PATMFL_INSTR_HINT|PATMFL_JUMP_CONFLICT))) + { + AssertFailed(); + return VERR_INVALID_PARAMETER; + } + + if (PATMIsEnabled(pVM) == false) + return VERR_PATCHING_REFUSED; + + /* Test for patch conflict only with patches that actually change guest code. */ + if (!(flags & (PATMFL_GUEST_SPECIFIC|PATMFL_IDTHANDLER|PATMFL_INTHANDLER|PATMFL_TRAMPOLINE))) + { + PPATCHINFO pConflictPatch = patmFindActivePatchByEntrypoint(pVM, pInstrGC); + AssertReleaseMsg(pConflictPatch == 0, ("Unable to patch overwritten instruction at %RRv (%RRv)\n", pInstrGC, pConflictPatch->pPrivInstrGC)); + if (pConflictPatch != 0) + return VERR_PATCHING_REFUSED; + } + + if (!(flags & PATMFL_CODE32)) + { + /** @todo Only 32 bits code right now */ + AssertMsgFailed(("PATMR3InstallPatch: We don't support 16 bits code at this moment!!\n")); + return VERR_NOT_IMPLEMENTED; + } + + /* We ran out of patch memory; don't bother anymore. */ + if (pVM->patm.s.fOutOfMemory == true) + return VERR_PATCHING_REFUSED; + +#if 1 /* DONT COMMIT ENABLED! */ + /* Blacklisted NT4SP1 areas - debugging why we sometimes crash early on, */ + if ( 0 + //|| (pInstrGC - 0x80010000U) < 0x10000U // NT4SP1 HAL + //|| (pInstrGC - 0x80010000U) < 0x5000U // NT4SP1 HAL + //|| (pInstrGC - 0x80013000U) < 0x2000U // NT4SP1 HAL + //|| (pInstrGC - 0x80014000U) < 0x1000U // NT4SP1 HAL + //|| (pInstrGC - 0x80014000U) < 0x800U // NT4SP1 HAL + //|| (pInstrGC - 0x80014400U) < 0x400U // NT4SP1 HAL + //|| (pInstrGC - 0x80014400U) < 0x200U // NT4SP1 HAL + //|| (pInstrGC - 0x80014400U) < 0x100U // NT4SP1 HAL + //|| (pInstrGC - 0x80014500U) < 0x100U // NT4SP1 HAL - negative + //|| (pInstrGC - 0x80014400U) < 0x80U // NT4SP1 HAL + //|| (pInstrGC - 0x80014400U) < 0x80U // NT4SP1 HAL + //|| (pInstrGC - 0x80014440U) < 0x40U // NT4SP1 HAL + //|| (pInstrGC - 0x80014440U) < 0x20U // NT4SP1 HAL + || pInstrGC == 0x80014447 /* KfLowerIrql */ + || 0) + { + Log(("PATMR3InstallPatch: %08x is blacklisted\n", pInstrGC)); + return VERR_PATCHING_REFUSED; + } +#endif + + /* Make sure the code selector is wide open; otherwise refuse. */ + pCtx = CPUMQueryGuestCtxPtr(pVCpu); + if (CPUMGetGuestCPL(pVCpu) == 0) + { + RTRCPTR pInstrGCFlat = SELMToFlat(pVM, DISSELREG_CS, CPUMCTX2CORE(pCtx), pInstrGC); + if (pInstrGCFlat != pInstrGC) + { + Log(("PATMR3InstallPatch: code selector not wide open: %04x:%RRv != %RRv eflags=%08x\n", pCtx->cs.Sel, pInstrGCFlat, pInstrGC, pCtx->eflags.u32)); + return VERR_PATCHING_REFUSED; + } + } + + /* Note: the OpenBSD specific check will break if we allow additional patches to be installed (int 3)) */ + if (!(flags & PATMFL_GUEST_SPECIFIC)) + { + /* New code. Make sure CSAM has a go at it first. */ + CSAMR3CheckCode(pVM, pInstrGC); + } + + /* Note: obsolete */ + if ( PATMIsPatchGCAddr(pVM, pInstrGC) + && (flags & PATMFL_MMIO_ACCESS)) + { + RTRCUINTPTR offset; + void *pvPatchCoreOffset; + + /* Find the patch record. */ + offset = pInstrGC - pVM->patm.s.pPatchMemGC; + pvPatchCoreOffset = RTAvloU32GetBestFit(&pVM->patm.s.PatchLookupTreeHC->PatchTreeByPatchAddr, offset, false); + if (pvPatchCoreOffset == NULL) + { + AssertMsgFailed(("PATMR3InstallPatch: patch not found at address %RRv!!\n", pInstrGC)); + return VERR_PATCH_NOT_FOUND; //fatal error + } + pPatchRec = PATM_PATCHREC_FROM_COREOFFSET(pvPatchCoreOffset); + + return patmPatchPATMMMIOInstr(pVM, pInstrGC, &pPatchRec->patch); + } + + AssertReturn(!PATMIsPatchGCAddr(pVM, pInstrGC), VERR_PATCHING_REFUSED); + + pPatchRec = (PPATMPATCHREC)RTAvloU32Get(&pVM->patm.s.PatchLookupTreeHC->PatchTree, pInstrGC); + if (pPatchRec) + { + Assert(!(flags & PATMFL_TRAMPOLINE)); + + /* Hints about existing patches are ignored. */ + if (flags & PATMFL_INSTR_HINT) + return VERR_PATCHING_REFUSED; + + if (pPatchRec->patch.uState == PATCH_DISABLE_PENDING) + { + Log(("PATMR3InstallPatch: disable operation is pending for patch at %RRv\n", pPatchRec->patch.pPrivInstrGC)); + PATMR3DisablePatch(pVM, pPatchRec->patch.pPrivInstrGC); + Assert(pPatchRec->patch.uState == PATCH_DISABLED); + } + + if (pPatchRec->patch.uState == PATCH_DISABLED) + { + /* A patch, for which we previously received a hint, will be enabled and turned into a normal patch. */ + if (pPatchRec->patch.flags & PATMFL_INSTR_HINT) + { + Log(("Enabling HINTED patch %RRv\n", pInstrGC)); + pPatchRec->patch.flags &= ~PATMFL_INSTR_HINT; + } + else + Log(("Enabling patch %RRv again\n", pInstrGC)); + + /** @todo we shouldn't disable and enable patches too often (it's relatively cheap, but pointless if it always happens) */ + rc = PATMR3EnablePatch(pVM, pInstrGC); + if (RT_SUCCESS(rc)) + return VWRN_PATCH_ENABLED; + + return rc; + } + if ( pPatchRec->patch.uState == PATCH_ENABLED + || pPatchRec->patch.uState == PATCH_DIRTY) + { + /* + * The patch might have been overwritten. + */ + STAM_COUNTER_INC(&pVM->patm.s.StatOverwritten); + if (pPatchRec->patch.uState != PATCH_REFUSED && pPatchRec->patch.uState != PATCH_UNUSABLE) + { + /* Patch must have been overwritten; remove it and pretend nothing happened. */ + Log(("Patch an existing patched instruction?!? (%RRv)\n", pInstrGC)); + if (pPatchRec->patch.flags & (PATMFL_DUPLICATE_FUNCTION|PATMFL_IDTHANDLER|PATMFL_MMIO_ACCESS|PATMFL_INT3_REPLACEMENT|PATMFL_INT3_REPLACEMENT_BLOCK)) + { + if (flags & PATMFL_IDTHANDLER) + pPatchRec->patch.flags |= (flags & (PATMFL_IDTHANDLER|PATMFL_TRAPHANDLER|PATMFL_INTHANDLER)); /* update the type */ + + return VERR_PATM_ALREADY_PATCHED; /* already done once */ + } + } + rc = PATMR3RemovePatch(pVM, pInstrGC); + if (RT_FAILURE(rc)) + return VERR_PATCHING_REFUSED; + } + else + { + AssertMsg(pPatchRec->patch.uState == PATCH_REFUSED || pPatchRec->patch.uState == PATCH_UNUSABLE, ("Patch an existing patched instruction?!? (%RRv, state=%d)\n", pInstrGC, pPatchRec->patch.uState)); + /* already tried it once! */ + return VERR_PATCHING_REFUSED; + } + } + + RTGCPHYS GCPhys; + rc = PGMGstGetPage(pVCpu, pInstrGC, NULL, &GCPhys); + if (rc != VINF_SUCCESS) + { + Log(("PGMGstGetPage failed with %Rrc\n", rc)); + return rc; + } + /* Disallow patching instructions inside ROM code; complete function duplication is allowed though. */ + if ( !(flags & (PATMFL_DUPLICATE_FUNCTION|PATMFL_TRAMPOLINE)) + && !PGMPhysIsGCPhysNormal(pVM, GCPhys)) + { + Log(("Code at %RGv (phys %RGp) is in a ROM, MMIO or invalid page - refused\n", pInstrGC, GCPhys)); + return VERR_PATCHING_REFUSED; + } + + /* Initialize cache record for guest address translations. */ + bool fInserted; + PATMP2GLOOKUPREC cacheRec; + RT_ZERO(cacheRec); + + pInstrHC = patmR3GCVirtToHCVirt(pVM, &cacheRec, pInstrGC); + AssertReturn(pInstrHC, VERR_PATCHING_REFUSED); + + /* Allocate patch record. */ + rc = MMHyperAlloc(pVM, sizeof(PATMPATCHREC), 0, MM_TAG_PATM_PATCH, (void **)&pPatchRec); + if (RT_FAILURE(rc)) + { + Log(("Out of memory!!!!\n")); + return VERR_NO_MEMORY; + } + pPatchRec->Core.Key = pInstrGC; + pPatchRec->patch.uState = PATCH_REFUSED; /* default value */ + /* Insert patch record into the lookup tree. */ + fInserted = RTAvloU32Insert(&pVM->patm.s.PatchLookupTreeHC->PatchTree, &pPatchRec->Core); + Assert(fInserted); + + pPatchRec->patch.pPrivInstrGC = pInstrGC; + pPatchRec->patch.flags = flags; + pPatchRec->patch.uOpMode = (flags & PATMFL_CODE32) ? DISCPUMODE_32BIT : DISCPUMODE_16BIT; + pPatchRec->patch.pTrampolinePatchesHead = NULL; + + pPatchRec->patch.pInstrGCLowest = pInstrGC; + pPatchRec->patch.pInstrGCHighest = pInstrGC; + + if (!(pPatchRec->patch.flags & (PATMFL_DUPLICATE_FUNCTION | PATMFL_IDTHANDLER | PATMFL_SYSENTER | PATMFL_TRAMPOLINE))) + { + /* + * Close proximity to an unusable patch is a possible hint that this patch would turn out to be dangerous too! + */ + PPATMPATCHREC pPatchNear = (PPATMPATCHREC)RTAvloU32GetBestFit(&pVM->patm.s.PatchLookupTreeHC->PatchTree, (pInstrGC + SIZEOF_NEARJUMP32 - 1), false); + if (pPatchNear) + { + if (pPatchNear->patch.uState == PATCH_UNUSABLE && pInstrGC < pPatchNear->patch.pPrivInstrGC && pInstrGC + SIZEOF_NEARJUMP32 > pPatchNear->patch.pPrivInstrGC) + { + Log(("Dangerous patch; would overwrite the unusable patch at %RRv\n", pPatchNear->patch.pPrivInstrGC)); + + pPatchRec->patch.uState = PATCH_UNUSABLE; + /* + * Leave the new patch active as it's marked unusable; to prevent us from checking it over and over again + */ + return VERR_PATCHING_REFUSED; + } + } + } + + pPatchRec->patch.pTempInfo = (PPATCHINFOTEMP)MMR3HeapAllocZ(pVM, MM_TAG_PATM_PATCH, sizeof(PATCHINFOTEMP)); + if (pPatchRec->patch.pTempInfo == 0) + { + Log(("Out of memory!!!!\n")); + return VERR_NO_MEMORY; + } + + disret = patmR3DisInstrNoStrOpMode(pVM, &pPatchRec->patch, pInstrGC, NULL, PATMREAD_ORGCODE, &cpu, &cbInstr); + if (disret == false) + { + Log(("Disassembly failed (probably page not present) -> return to caller\n")); + return VERR_PATCHING_REFUSED; + } + + AssertMsg(cbInstr <= MAX_INSTR_SIZE, ("privileged instruction too big %d!!\n", cbInstr)); + if (cbInstr > MAX_INSTR_SIZE) + return VERR_PATCHING_REFUSED; + + pPatchRec->patch.cbPrivInstr = cbInstr; + pPatchRec->patch.opcode = cpu.pCurInstr->uOpcode; + + /* Restricted hinting for now. */ + Assert(!(flags & PATMFL_INSTR_HINT) || cpu.pCurInstr->uOpcode == OP_CLI); + + /* Initialize cache record patch pointer. */ + cacheRec.pPatch = &pPatchRec->patch; + + /* Allocate statistics slot */ + if (pVM->patm.s.uCurrentPatchIdx < PATM_STAT_MAX_COUNTERS) + { + pPatchRec->patch.uPatchIdx = pVM->patm.s.uCurrentPatchIdx++; + } + else + { + Log(("WARNING: Patch index wrap around!!\n")); + pPatchRec->patch.uPatchIdx = PATM_STAT_INDEX_DUMMY; + } + + if (pPatchRec->patch.flags & PATMFL_TRAPHANDLER) + { + rc = patmInstallTrapTrampoline(pVM, pInstrGC, pPatchRec, &cacheRec); + } + else + if (pPatchRec->patch.flags & (PATMFL_DUPLICATE_FUNCTION )) + { + rc = patmDuplicateFunction(pVM, pInstrGC, pPatchRec, &cacheRec); + } + else + if (pPatchRec->patch.flags & PATMFL_TRAMPOLINE) + { + rc = patmCreateTrampoline(pVM, pInstrGC, pPatchRec); + } + else + if (pPatchRec->patch.flags & PATMFL_REPLACE_FUNCTION_CALL) + { + rc = patmReplaceFunctionCall(pVM, &cpu, pInstrGC, &cacheRec); + } + else + if (pPatchRec->patch.flags & PATMFL_INT3_REPLACEMENT) + { + rc = patmR3PatchInstrInt3(pVM, pInstrGC, pInstrHC, &cpu, &pPatchRec->patch); + } + else + if (pPatchRec->patch.flags & PATMFL_MMIO_ACCESS) + { + rc = patmPatchMMIOInstr(pVM, pInstrGC, &cpu, &cacheRec); + } + else + if (pPatchRec->patch.flags & (PATMFL_IDTHANDLER|PATMFL_SYSENTER)) + { + if (pPatchRec->patch.flags & PATMFL_SYSENTER) + pPatchRec->patch.flags |= PATMFL_IDTHANDLER; /* we treat a sysenter handler as an IDT handler */ + + rc = patmIdtHandler(pVM, pInstrGC, cbInstr, pPatchRec, &cacheRec); +#ifdef VBOX_WITH_STATISTICS + if ( rc == VINF_SUCCESS + && (pPatchRec->patch.flags & PATMFL_SYSENTER)) + { + pVM->patm.s.uSysEnterPatchIdx = pPatchRec->patch.uPatchIdx; + } +#endif + } + else + if (pPatchRec->patch.flags & PATMFL_GUEST_SPECIFIC) + { + switch (cpu.pCurInstr->uOpcode) + { + case OP_SYSENTER: + case OP_PUSH: + rc = patmR3InstallGuestSpecificPatch(pVM, &cpu, pInstrGC, pInstrHC, pPatchRec); + if (rc == VINF_SUCCESS) + { + if (rc == VINF_SUCCESS) + Log(("PATMR3InstallPatch GUEST: %s %RRv code32=%d\n", patmGetInstructionString(pPatchRec->patch.opcode, pPatchRec->patch.flags), pInstrGC, (flags & PATMFL_CODE32) ? 1 : 0)); + return rc; + } + break; + + default: + rc = VERR_NOT_IMPLEMENTED; + break; + } + } + else + { + switch (cpu.pCurInstr->uOpcode) + { + case OP_SYSENTER: + rc = patmR3InstallGuestSpecificPatch(pVM, &cpu, pInstrGC, pInstrHC, pPatchRec); + if (rc == VINF_SUCCESS) + { + Log(("PATMR3InstallPatch GUEST: %s %RRv code32=%d\n", patmGetInstructionString(pPatchRec->patch.opcode, pPatchRec->patch.flags), pInstrGC, (flags & PATMFL_CODE32) ? 1 : 0)); + return VINF_SUCCESS; + } + break; + +#ifdef PATM_RESOLVE_CONFLICTS_WITH_JUMP_PATCHES + case OP_JO: + case OP_JNO: + case OP_JC: + case OP_JNC: + case OP_JE: + case OP_JNE: + case OP_JBE: + case OP_JNBE: + case OP_JS: + case OP_JNS: + case OP_JP: + case OP_JNP: + case OP_JL: + case OP_JNL: + case OP_JLE: + case OP_JNLE: + case OP_JECXZ: + case OP_LOOP: + case OP_LOOPNE: + case OP_LOOPE: + case OP_JMP: + if (pPatchRec->patch.flags & PATMFL_JUMP_CONFLICT) + { + rc = patmPatchJump(pVM, pInstrGC, pInstrHC, &cpu, pPatchRec); + break; + } + return VERR_NOT_IMPLEMENTED; +#endif + + case OP_PUSHF: + case OP_CLI: + Log(("PATMR3InstallPatch %s %RRv code32=%d\n", patmGetInstructionString(pPatchRec->patch.opcode, pPatchRec->patch.flags), pInstrGC, (flags & PATMFL_CODE32) ? 1 : 0)); + rc = patmR3PatchBlock(pVM, pInstrGC, pInstrHC, cpu.pCurInstr->uOpcode, cbInstr, pPatchRec); + break; + +#ifndef VBOX_WITH_SAFE_STR + case OP_STR: +#endif + case OP_SGDT: + case OP_SLDT: + case OP_SIDT: + case OP_CPUID: + case OP_LSL: + case OP_LAR: + case OP_SMSW: + case OP_VERW: + case OP_VERR: + case OP_IRET: +#ifdef VBOX_WITH_RAW_RING1 + case OP_MOV: +#endif + rc = patmR3PatchInstrInt3(pVM, pInstrGC, pInstrHC, &cpu, &pPatchRec->patch); + break; + + default: + return VERR_NOT_IMPLEMENTED; + } + } + + if (rc != VINF_SUCCESS) + { + if (pPatchRec && pPatchRec->patch.nrPatch2GuestRecs) + { + patmEmptyTreeU32(pVM, &pPatchRec->patch.Patch2GuestAddrTree); + pPatchRec->patch.nrPatch2GuestRecs = 0; + } + pVM->patm.s.uCurrentPatchIdx--; + } + else + { + rc = patmInsertPatchPages(pVM, &pPatchRec->patch); + AssertRCReturn(rc, rc); + + /* Keep track upper and lower boundaries of patched instructions */ + if (pPatchRec->patch.pInstrGCLowest < pVM->patm.s.pPatchedInstrGCLowest) + pVM->patm.s.pPatchedInstrGCLowest = pPatchRec->patch.pInstrGCLowest; + if (pPatchRec->patch.pInstrGCHighest > pVM->patm.s.pPatchedInstrGCHighest) + pVM->patm.s.pPatchedInstrGCHighest = pPatchRec->patch.pInstrGCHighest; + + Log(("Patch lowest %RRv highest %RRv\n", pPatchRec->patch.pInstrGCLowest, pPatchRec->patch.pInstrGCHighest)); + Log(("Global lowest %RRv highest %RRv\n", pVM->patm.s.pPatchedInstrGCLowest, pVM->patm.s.pPatchedInstrGCHighest)); + + STAM_COUNTER_ADD(&pVM->patm.s.StatInstalled, 1); + STAM_COUNTER_ADD(&pVM->patm.s.StatPATMMemoryUsed, pPatchRec->patch.cbPatchBlockSize); + + rc = VINF_SUCCESS; + + /* Patch hints are not enabled by default. Only when the are actually encountered. */ + if (pPatchRec->patch.flags & PATMFL_INSTR_HINT) + { + rc = PATMR3DisablePatch(pVM, pInstrGC); + AssertRCReturn(rc, rc); + } + +#ifdef VBOX_WITH_STATISTICS + /* Register statistics counter */ + if (PATM_STAT_INDEX_IS_VALID(pPatchRec->patch.uPatchIdx)) + { + STAMR3RegisterCallback(pVM, &pPatchRec->patch, STAMVISIBILITY_NOT_GUI, STAMUNIT_GOOD_BAD, patmResetStat, patmPrintStat, "Patch statistics", + "/PATM/Stats/Patch/0x%RRv", pPatchRec->patch.pPrivInstrGC); +#ifndef DEBUG_sandervl + /* Full breakdown for the GUI. */ + STAMR3RegisterF(pVM, &pVM->patm.s.pStatsHC[pPatchRec->patch.uPatchIdx], STAMTYPE_RATIO_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_GOOD_BAD, PATMPatchType(pVM, &pPatchRec->patch), + "/PATM/PatchBD/0x%RRv", pPatchRec->patch.pPrivInstrGC); + STAMR3RegisterF(pVM, &pPatchRec->patch.pPatchBlockOffset,STAMTYPE_X32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES, NULL, "/PATM/PatchBD/0x%RRv/offPatchBlock", pPatchRec->patch.pPrivInstrGC); + STAMR3RegisterF(pVM, &pPatchRec->patch.cbPatchBlockSize,STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES, NULL, "/PATM/PatchBD/0x%RRv/cbPatchBlockSize", pPatchRec->patch.pPrivInstrGC); + STAMR3RegisterF(pVM, &pPatchRec->patch.cbPatchJump, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES, NULL, "/PATM/PatchBD/0x%RRv/cbPatchJump", pPatchRec->patch.pPrivInstrGC); + STAMR3RegisterF(pVM, &pPatchRec->patch.cbPrivInstr, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES, NULL, "/PATM/PatchBD/0x%RRv/cbPrivInstr", pPatchRec->patch.pPrivInstrGC); + STAMR3RegisterF(pVM, &pPatchRec->patch.cCodeWrites, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, NULL, "/PATM/PatchBD/0x%RRv/cCodeWrites", pPatchRec->patch.pPrivInstrGC); + STAMR3RegisterF(pVM, &pPatchRec->patch.cInvalidWrites, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, NULL, "/PATM/PatchBD/0x%RRv/cInvalidWrites", pPatchRec->patch.pPrivInstrGC); + STAMR3RegisterF(pVM, &pPatchRec->patch.cTraps, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, NULL, "/PATM/PatchBD/0x%RRv/cTraps", pPatchRec->patch.pPrivInstrGC); + STAMR3RegisterF(pVM, &pPatchRec->patch.flags, STAMTYPE_X64, STAMVISIBILITY_ALWAYS, STAMUNIT_NONE, NULL, "/PATM/PatchBD/0x%RRv/flags", pPatchRec->patch.pPrivInstrGC); + STAMR3RegisterF(pVM, &pPatchRec->patch.nrJumpRecs, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, NULL, "/PATM/PatchBD/0x%RRv/nrJumpRecs", pPatchRec->patch.pPrivInstrGC); + STAMR3RegisterF(pVM, &pPatchRec->patch.nrFixups, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, NULL, "/PATM/PatchBD/0x%RRv/nrFixups", pPatchRec->patch.pPrivInstrGC); + STAMR3RegisterF(pVM, &pPatchRec->patch.opcode, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, NULL, "/PATM/PatchBD/0x%RRv/opcode", pPatchRec->patch.pPrivInstrGC); + STAMR3RegisterF(pVM, &pPatchRec->patch.uOldState, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_NONE, NULL, "/PATM/PatchBD/0x%RRv/uOldState", pPatchRec->patch.pPrivInstrGC); + STAMR3RegisterF(pVM, &pPatchRec->patch.uOpMode, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_NONE, NULL, "/PATM/PatchBD/0x%RRv/uOpMode", pPatchRec->patch.pPrivInstrGC); + /// @todo change the state to be a callback so we can get a state mnemonic instead. + STAMR3RegisterF(pVM, &pPatchRec->patch.uState, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_NONE, NULL, "/PATM/PatchBD/0x%RRv/uState", pPatchRec->patch.pPrivInstrGC); +#endif + } +#endif + + /* Add debug symbol. */ + patmR3DbgAddPatch(pVM, pPatchRec); + } + /* Free leftover lock if any. */ + if (cacheRec.Lock.pvMap) + PGMPhysReleasePageMappingLock(pVM, &cacheRec.Lock); + return rc; +} + +/** + * Query instruction size + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch record + * @param pInstrGC Instruction address + */ +static uint32_t patmGetInstrSize(PVM pVM, PPATCHINFO pPatch, RTRCPTR pInstrGC) +{ + uint8_t *pInstrHC; + PGMPAGEMAPLOCK Lock; + + int rc = PGMPhysGCPtr2CCPtrReadOnly(VMMGetCpu(pVM), pInstrGC, (const void **)&pInstrHC, &Lock); + if (rc == VINF_SUCCESS) + { + DISCPUSTATE cpu; + bool disret; + uint32_t cbInstr; + + disret = patmR3DisInstr(pVM, pPatch, pInstrGC, pInstrHC, PATMREAD_ORGCODE | PATMREAD_NOCHECK, &cpu, &cbInstr); + PGMPhysReleasePageMappingLock(pVM, &Lock); + if (disret) + return cbInstr; + } + return 0; +} + +/** + * Add patch to page record + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPage Page address + * @param pPatch Patch record + */ +int patmAddPatchToPage(PVM pVM, RTRCUINTPTR pPage, PPATCHINFO pPatch) +{ + PPATMPATCHPAGE pPatchPage; + int rc; + + Log(("patmAddPatchToPage: insert patch %RHv to page %RRv\n", pPatch, pPage)); + + pPatchPage = (PPATMPATCHPAGE)RTAvloU32Get(&pVM->patm.s.PatchLookupTreeHC->PatchTreeByPage, pPage); + if (pPatchPage) + { + Assert(pPatchPage->cCount <= pPatchPage->cMaxPatches); + if (pPatchPage->cCount == pPatchPage->cMaxPatches) + { + uint32_t cMaxPatchesOld = pPatchPage->cMaxPatches; + PPATCHINFO *papPatchOld = pPatchPage->papPatch; + + pPatchPage->cMaxPatches += PATMPATCHPAGE_PREALLOC_INCREMENT; + rc = MMHyperAlloc(pVM, sizeof(pPatchPage->papPatch[0]) * pPatchPage->cMaxPatches, 0, MM_TAG_PATM_PATCH, + (void **)&pPatchPage->papPatch); + if (RT_FAILURE(rc)) + { + Log(("Out of memory!!!!\n")); + return VERR_NO_MEMORY; + } + memcpy(pPatchPage->papPatch, papPatchOld, cMaxPatchesOld * sizeof(pPatchPage->papPatch[0])); + MMHyperFree(pVM, papPatchOld); + } + pPatchPage->papPatch[pPatchPage->cCount] = pPatch; + pPatchPage->cCount++; + } + else + { + bool fInserted; + + rc = MMHyperAlloc(pVM, sizeof(PATMPATCHPAGE), 0, MM_TAG_PATM_PATCH, (void **)&pPatchPage); + if (RT_FAILURE(rc)) + { + Log(("Out of memory!!!!\n")); + return VERR_NO_MEMORY; + } + pPatchPage->Core.Key = pPage; + pPatchPage->cCount = 1; + pPatchPage->cMaxPatches = PATMPATCHPAGE_PREALLOC_INCREMENT; + + rc = MMHyperAlloc(pVM, sizeof(pPatchPage->papPatch[0]) * PATMPATCHPAGE_PREALLOC_INCREMENT, 0, MM_TAG_PATM_PATCH, + (void **)&pPatchPage->papPatch); + if (RT_FAILURE(rc)) + { + Log(("Out of memory!!!!\n")); + MMHyperFree(pVM, pPatchPage); + return VERR_NO_MEMORY; + } + pPatchPage->papPatch[0] = pPatch; + + fInserted = RTAvloU32Insert(&pVM->patm.s.PatchLookupTreeHC->PatchTreeByPage, &pPatchPage->Core); + Assert(fInserted); + pVM->patm.s.cPageRecords++; + + STAM_COUNTER_INC(&pVM->patm.s.StatPatchPageInserted); + } + CSAMR3MonitorPage(pVM, pPage, CSAM_TAG_PATM); + + /* Get the closest guest instruction (from below) */ + PRECGUESTTOPATCH pGuestToPatchRec = (PRECGUESTTOPATCH)RTAvlU32GetBestFit(&pPatch->Guest2PatchAddrTree, pPage, true); + Assert(pGuestToPatchRec); + if (pGuestToPatchRec) + { + LogFlow(("patmAddPatchToPage: lowest patch page address %RRv current lowest %RRv\n", pGuestToPatchRec->Core.Key, pPatchPage->pLowestAddrGC)); + if ( pPatchPage->pLowestAddrGC == 0 + || pPatchPage->pLowestAddrGC > (RTRCPTR)pGuestToPatchRec->Core.Key) + { + RTRCUINTPTR offset; + + pPatchPage->pLowestAddrGC = (RTRCPTR)pGuestToPatchRec->Core.Key; + + offset = pPatchPage->pLowestAddrGC & PAGE_OFFSET_MASK; + /* If we're too close to the page boundary, then make sure an + instruction from the previous page doesn't cross the + boundary itself. */ + if (offset && offset < MAX_INSTR_SIZE) + { + /* Get the closest guest instruction (from above) */ + pGuestToPatchRec = (PRECGUESTTOPATCH)RTAvlU32GetBestFit(&pPatch->Guest2PatchAddrTree, pPage-1, false); + + if (pGuestToPatchRec) + { + uint32_t size = patmGetInstrSize(pVM, pPatch, (RTRCPTR)pGuestToPatchRec->Core.Key); + if ((RTRCUINTPTR)pGuestToPatchRec->Core.Key + size > pPage) + { + pPatchPage->pLowestAddrGC = pPage; + LogFlow(("patmAddPatchToPage: new lowest %RRv\n", pPatchPage->pLowestAddrGC)); + } + } + } + } + } + + /* Get the closest guest instruction (from above) */ + pGuestToPatchRec = (PRECGUESTTOPATCH)RTAvlU32GetBestFit(&pPatch->Guest2PatchAddrTree, pPage+PAGE_SIZE-1, false); + Assert(pGuestToPatchRec); + if (pGuestToPatchRec) + { + LogFlow(("patmAddPatchToPage: highest patch page address %RRv current highest %RRv\n", pGuestToPatchRec->Core.Key, pPatchPage->pHighestAddrGC)); + if ( pPatchPage->pHighestAddrGC == 0 + || pPatchPage->pHighestAddrGC <= (RTRCPTR)pGuestToPatchRec->Core.Key) + { + pPatchPage->pHighestAddrGC = (RTRCPTR)pGuestToPatchRec->Core.Key; + /* Increase by instruction size. */ + uint32_t size = patmGetInstrSize(pVM, pPatch, pPatchPage->pHighestAddrGC); +//// Assert(size); + pPatchPage->pHighestAddrGC += size; + LogFlow(("patmAddPatchToPage: new highest %RRv\n", pPatchPage->pHighestAddrGC)); + } + } + + return VINF_SUCCESS; +} + +/** + * Remove patch from page record + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPage Page address + * @param pPatch Patch record + */ +int patmRemovePatchFromPage(PVM pVM, RTRCUINTPTR pPage, PPATCHINFO pPatch) +{ + PPATMPATCHPAGE pPatchPage; + int rc; + + pPatchPage = (PPATMPATCHPAGE)RTAvloU32Get(&pVM->patm.s.PatchLookupTreeHC->PatchTreeByPage, pPage); + Assert(pPatchPage); + + if (!pPatchPage) + return VERR_INVALID_PARAMETER; + + Assert(pPatchPage->cCount <= pPatchPage->cMaxPatches); + + Log(("patmRemovePatchPage: remove patch %RHv from page %RRv\n", pPatch, pPage)); + if (pPatchPage->cCount > 1) + { + uint32_t i; + + /* Used by multiple patches */ + for (i = 0; i < pPatchPage->cCount; i++) + { + if (pPatchPage->papPatch[i] == pPatch) + { + /* close the gap between the remaining pointers. */ + uint32_t cNew = --pPatchPage->cCount; + if (i < cNew) + pPatchPage->papPatch[i] = pPatchPage->papPatch[cNew]; + pPatchPage->papPatch[cNew] = NULL; + return VINF_SUCCESS; + } + } + AssertMsgFailed(("Unable to find patch %RHv in page %RRv\n", pPatch, pPage)); + } + else + { + PPATMPATCHPAGE pPatchNode; + + Log(("patmRemovePatchFromPage %RRv\n", pPage)); + + STAM_COUNTER_INC(&pVM->patm.s.StatPatchPageRemoved); + pPatchNode = (PPATMPATCHPAGE)RTAvloU32Remove(&pVM->patm.s.PatchLookupTreeHC->PatchTreeByPage, pPage); + Assert(pPatchNode && pPatchNode == pPatchPage); + + Assert(pPatchPage->papPatch); + rc = MMHyperFree(pVM, pPatchPage->papPatch); + AssertRC(rc); + rc = MMHyperFree(pVM, pPatchPage); + AssertRC(rc); + pVM->patm.s.cPageRecords--; + } + return VINF_SUCCESS; +} + +/** + * Insert page records for all guest pages that contain instructions that were recompiled for this patch + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch record + */ +int patmInsertPatchPages(PVM pVM, PPATCHINFO pPatch) +{ + int rc; + RTRCUINTPTR pPatchPageStart, pPatchPageEnd, pPage; + + /* Insert the pages that contain patched instructions into a lookup tree for detecting self-modifying code. */ + pPatchPageStart = (RTRCUINTPTR)pPatch->pInstrGCLowest & PAGE_BASE_GC_MASK; + pPatchPageEnd = (RTRCUINTPTR)pPatch->pInstrGCHighest & PAGE_BASE_GC_MASK; + + /** @todo optimize better (large gaps between current and next used page) */ + for(pPage = pPatchPageStart; pPage <= pPatchPageEnd; pPage += PAGE_SIZE) + { + /* Get the closest guest instruction (from above) */ + PRECGUESTTOPATCH pGuestToPatchRec = (PRECGUESTTOPATCH)RTAvlU32GetBestFit(&pPatch->Guest2PatchAddrTree, pPage, true); + if ( pGuestToPatchRec + && PAGE_ADDRESS(pGuestToPatchRec->Core.Key) == PAGE_ADDRESS(pPage) + ) + { + /* Code in page really patched -> add record */ + rc = patmAddPatchToPage(pVM, pPage, pPatch); + AssertRC(rc); + } + } + pPatch->flags |= PATMFL_CODE_MONITORED; + return VINF_SUCCESS; +} + +/** + * Remove page records for all guest pages that contain instructions that were recompiled for this patch + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch record + */ +static int patmRemovePatchPages(PVM pVM, PPATCHINFO pPatch) +{ + int rc; + RTRCUINTPTR pPatchPageStart, pPatchPageEnd, pPage; + + /* Insert the pages that contain patched instructions into a lookup tree for detecting self-modifying code. */ + pPatchPageStart = (RTRCUINTPTR)pPatch->pInstrGCLowest & PAGE_BASE_GC_MASK; + pPatchPageEnd = (RTRCUINTPTR)pPatch->pInstrGCHighest & PAGE_BASE_GC_MASK; + + for(pPage = pPatchPageStart; pPage <= pPatchPageEnd; pPage += PAGE_SIZE) + { + /* Get the closest guest instruction (from above) */ + PRECGUESTTOPATCH pGuestToPatchRec = (PRECGUESTTOPATCH)RTAvlU32GetBestFit(&pPatch->Guest2PatchAddrTree, pPage, true); + if ( pGuestToPatchRec + && PAGE_ADDRESS(pGuestToPatchRec->Core.Key) == PAGE_ADDRESS(pPage) /** @todo bird: PAGE_ADDRESS is for the current context really. check out these. */ + ) + { + /* Code in page really patched -> remove record */ + rc = patmRemovePatchFromPage(pVM, pPage, pPatch); + AssertRC(rc); + } + } + pPatch->flags &= ~PATMFL_CODE_MONITORED; + return VINF_SUCCESS; +} + +/** + * Notifies PATM about a (potential) write to code that has been patched. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param GCPtr GC pointer to write address + * @param cbWrite Nr of bytes to write + * + */ +VMMR3_INT_DECL(int) PATMR3PatchWrite(PVM pVM, RTRCPTR GCPtr, uint32_t cbWrite) +{ + RTRCUINTPTR pWritePageStart, pWritePageEnd, pPage; + + Log(("PATMR3PatchWrite %RRv %x\n", GCPtr, cbWrite)); + + Assert(VM_IS_EMT(pVM)); + AssertReturn(VM_IS_RAW_MODE_ENABLED(pVM), VERR_PATM_HM_IPE); + + /* Quick boundary check */ + if ( GCPtr < pVM->patm.s.pPatchedInstrGCLowest + || GCPtr > pVM->patm.s.pPatchedInstrGCHighest + ) + return VINF_SUCCESS; + + STAM_PROFILE_ADV_START(&pVM->patm.s.StatPatchWrite, a); + + pWritePageStart = (RTRCUINTPTR)GCPtr & PAGE_BASE_GC_MASK; + pWritePageEnd = ((RTRCUINTPTR)GCPtr + cbWrite - 1) & PAGE_BASE_GC_MASK; + + for (pPage = pWritePageStart; pPage <= pWritePageEnd; pPage += PAGE_SIZE) + { +loop_start: + PPATMPATCHPAGE pPatchPage = (PPATMPATCHPAGE)RTAvloU32Get(&pVM->patm.s.PatchLookupTreeHC->PatchTreeByPage, (RTRCPTR)pPage); + if (pPatchPage) + { + uint32_t i; + bool fValidPatchWrite = false; + + /* Quick check to see if the write is in the patched part of the page */ + if ( pPatchPage->pLowestAddrGC > (RTRCPTR)((RTRCUINTPTR)GCPtr + cbWrite - 1) + || pPatchPage->pHighestAddrGC < GCPtr) + { + break; + } + + for (i=0;icCount;i++) + { + if (pPatchPage->papPatch[i]) + { + PPATCHINFO pPatch = pPatchPage->papPatch[i]; + RTRCPTR pPatchInstrGC; + //unused: bool fForceBreak = false; + + Assert(pPatchPage->papPatch[i]->flags & PATMFL_CODE_MONITORED); + /** @todo inefficient and includes redundant checks for multiple pages. */ + for (uint32_t j=0; jcbPatchJump + && pGuestPtrGC >= pPatch->pPrivInstrGC + && pGuestPtrGC < pPatch->pPrivInstrGC + pPatch->cbPatchJump) + { + /* The guest is about to overwrite the 5 byte jump to patch code. Remove the patch. */ + Log(("PATMR3PatchWrite: overwriting jump to patch code -> remove patch.\n")); + int rc = PATMR3RemovePatch(pVM, pPatch->pPrivInstrGC); + if (rc == VINF_SUCCESS) + /* Note: jump back to the start as the pPatchPage has been deleted or changed */ + goto loop_start; + + continue; + } + + /* Find the closest instruction from below; the above quick check ensured that we are indeed in patched code */ + pPatchInstrGC = patmGuestGCPtrToPatchGCPtr(pVM, pPatch, pGuestPtrGC); + if (!pPatchInstrGC) + { + RTRCPTR pClosestInstrGC; + uint32_t size; + + pPatchInstrGC = patmGuestGCPtrToClosestPatchGCPtr(pVM, pPatch, pGuestPtrGC); + if (pPatchInstrGC) + { + pClosestInstrGC = patmPatchGCPtr2GuestGCPtr(pVM, pPatch, pPatchInstrGC); + Assert(pClosestInstrGC <= pGuestPtrGC); + size = patmGetInstrSize(pVM, pPatch, pClosestInstrGC); + /* Check if this is not a write into a gap between two patches */ + if (pClosestInstrGC + size - 1 < pGuestPtrGC) + pPatchInstrGC = 0; + } + } + if (pPatchInstrGC) + { + uint32_t PatchOffset = pPatchInstrGC - pVM->patm.s.pPatchMemGC; /* Offset in memory reserved for PATM. */ + + fValidPatchWrite = true; + + PRECPATCHTOGUEST pPatchToGuestRec = (PRECPATCHTOGUEST)RTAvlU32Get(&pPatch->Patch2GuestAddrTree, PatchOffset); + Assert(pPatchToGuestRec); + if (pPatchToGuestRec && !pPatchToGuestRec->fDirty) + { + Log(("PATMR3PatchWrite: Found patched instruction %RRv -> %RRv\n", pGuestPtrGC, pPatchInstrGC)); + + if (++pPatch->cCodeWrites > PATM_MAX_CODE_WRITES) + { + LogRel(("PATM: Disable block at %RRv - write %RRv-%RRv\n", pPatch->pPrivInstrGC, pGuestPtrGC, pGuestPtrGC+cbWrite)); + + patmR3MarkDirtyPatch(pVM, pPatch); + + /* Note: jump back to the start as the pPatchPage has been deleted or changed */ + goto loop_start; + } + else + { + /* Replace the patch instruction with a breakpoint; when it's hit, then we'll attempt to recompile the instruction again. */ + uint8_t *pInstrHC = patmPatchGCPtr2PatchHCPtr(pVM, pPatchInstrGC); + + pPatchToGuestRec->u8DirtyOpcode = *pInstrHC; + pPatchToGuestRec->fDirty = true; + + *pInstrHC = 0xCC; + + STAM_COUNTER_INC(&pVM->patm.s.StatInstrDirty); + } + } + /* else already marked dirty */ + } + } + } + } /* for each patch */ + + if (fValidPatchWrite == false) + { + /* Write to a part of the page that either: + * - doesn't contain any code (shared code/data); rather unlikely + * - old code page that's no longer in active use. + */ +invalid_write_loop_start: + pPatchPage = (PPATMPATCHPAGE)RTAvloU32Get(&pVM->patm.s.PatchLookupTreeHC->PatchTreeByPage, (RTRCPTR)pPage); + + if (pPatchPage) + { + for (i=0;icCount;i++) + { + PPATCHINFO pPatch = pPatchPage->papPatch[i]; + + if (pPatch->cInvalidWrites > PATM_MAX_INVALID_WRITES) + { + /* Note: possibly dangerous assumption that all future writes will be harmless. */ + if (pPatch->flags & PATMFL_IDTHANDLER) + { + LogRel(("PATM: Stop monitoring IDT handler pages at %RRv - invalid write %RRv-%RRv (this is not a fatal error)\n", pPatch->pPrivInstrGC, GCPtr, GCPtr+cbWrite)); + + Assert(pPatch->flags & PATMFL_CODE_MONITORED); + int rc = patmRemovePatchPages(pVM, pPatch); + AssertRC(rc); + } + else + { + LogRel(("PATM: Disable block at %RRv - invalid write %RRv-%RRv \n", pPatch->pPrivInstrGC, GCPtr, GCPtr+cbWrite)); + patmR3MarkDirtyPatch(pVM, pPatch); + } + /* Note: jump back to the start as the pPatchPage has been deleted or changed */ + goto invalid_write_loop_start; + } + } /* for */ + } + } + } + } + STAM_PROFILE_ADV_STOP(&pVM->patm.s.StatPatchWrite, a); + return VINF_SUCCESS; + +} + +/** + * Disable all patches in a flushed page + * + * @returns VBox status code + * @param pVM The cross context VM structure. + * @param addr GC address of the page to flush + * @note Currently only called by CSAMR3FlushPage; optimization to avoid + * having to double check if the physical address has changed + */ +VMMR3_INT_DECL(int) PATMR3FlushPage(PVM pVM, RTRCPTR addr) +{ + AssertReturn(VM_IS_RAW_MODE_ENABLED(pVM), VERR_PATM_HM_IPE); + + addr &= PAGE_BASE_GC_MASK; + + PPATMPATCHPAGE pPatchPage = (PPATMPATCHPAGE)RTAvloU32Get(&pVM->patm.s.PatchLookupTreeHC->PatchTreeByPage, addr); + if (pPatchPage) + { + int i; + + /* From top to bottom as the array is modified by PATMR3MarkDirtyPatch. */ + for (i=(int)pPatchPage->cCount-1;i>=0;i--) + { + if (pPatchPage->papPatch[i]) + { + PPATCHINFO pPatch = pPatchPage->papPatch[i]; + + Log(("PATMR3FlushPage %RRv remove patch at %RRv\n", addr, pPatch->pPrivInstrGC)); + patmR3MarkDirtyPatch(pVM, pPatch); + } + } + STAM_COUNTER_INC(&pVM->patm.s.StatFlushed); + } + return VINF_SUCCESS; +} + +/** + * Checks if the instructions at the specified address has been patched already. + * + * @returns boolean, patched or not + * @param pVM The cross context VM structure. + * @param pInstrGC Guest context pointer to instruction + */ +VMMR3_INT_DECL(bool) PATMR3HasBeenPatched(PVM pVM, RTRCPTR pInstrGC) +{ + Assert(VM_IS_RAW_MODE_ENABLED(pVM)); + PPATMPATCHREC pPatchRec; + pPatchRec = (PPATMPATCHREC)RTAvloU32Get(&pVM->patm.s.PatchLookupTreeHC->PatchTree, pInstrGC); + if (pPatchRec && pPatchRec->patch.uState == PATCH_ENABLED) + return true; + return false; +} + +/** + * Query the opcode of the original code that was overwritten by the 5 bytes patch jump + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pInstrGC GC address of instr + * @param pByte opcode byte pointer (OUT) + * + */ +VMMR3DECL(int) PATMR3QueryOpcode(PVM pVM, RTRCPTR pInstrGC, uint8_t *pByte) +{ + PPATMPATCHREC pPatchRec; + + /** @todo this will not work for aliased pages! (never has, but so far not a problem for us) */ + + /* Shortcut. */ + if (!PATMIsEnabled(pVM)) + return VERR_PATCH_NOT_FOUND; + Assert(VM_IS_RAW_MODE_ENABLED(pVM)); + if ( pInstrGC < pVM->patm.s.pPatchedInstrGCLowest + || pInstrGC > pVM->patm.s.pPatchedInstrGCHighest) + return VERR_PATCH_NOT_FOUND; + + pPatchRec = (PPATMPATCHREC)RTAvloU32GetBestFit(&pVM->patm.s.PatchLookupTreeHC->PatchTree, pInstrGC, false); + // if the patch is enabled and the pointer lies within 5 bytes of this priv instr ptr, then we've got a hit! + if ( pPatchRec + && pPatchRec->patch.uState == PATCH_ENABLED + && pInstrGC >= pPatchRec->patch.pPrivInstrGC + && pInstrGC < pPatchRec->patch.pPrivInstrGC + pPatchRec->patch.cbPatchJump) + { + RTRCPTR offset = pInstrGC - pPatchRec->patch.pPrivInstrGC; + *pByte = pPatchRec->patch.aPrivInstr[offset]; + + if (pPatchRec->patch.cbPatchJump == 1) + { + Log(("PATMR3QueryOpcode: returning opcode %2X for instruction at %RRv\n", *pByte, pInstrGC)); + } + STAM_COUNTER_ADD(&pVM->patm.s.StatNrOpcodeRead, 1); + return VINF_SUCCESS; + } + return VERR_PATCH_NOT_FOUND; +} + +/** + * Read instruction bytes of the original code that was overwritten by the 5 + * bytes patch jump. + * + * @returns VINF_SUCCESS or VERR_PATCH_NOT_FOUND. + * @param pVM The cross context VM structure. + * @param GCPtrInstr GC address of instr + * @param pbDst The output buffer. + * @param cbToRead The maximum number bytes to read. + * @param pcbRead Where to return the acutal number of bytes read. + */ +VMMR3_INT_DECL(int) PATMR3ReadOrgInstr(PVM pVM, RTGCPTR32 GCPtrInstr, uint8_t *pbDst, size_t cbToRead, size_t *pcbRead) +{ + /* Shortcut. */ + if (!PATMIsEnabled(pVM)) + return VERR_PATCH_NOT_FOUND; + Assert(VM_IS_RAW_MODE_ENABLED(pVM)); + if ( GCPtrInstr < pVM->patm.s.pPatchedInstrGCLowest + || GCPtrInstr > pVM->patm.s.pPatchedInstrGCHighest) + return VERR_PATCH_NOT_FOUND; + + /** @todo this will not work for aliased pages! (never has, but so far not a problem for us) */ + + /* + * If the patch is enabled and the pointer lies within 5 bytes of this + * priv instr ptr, then we've got a hit! + */ + RTGCPTR32 off; + PPATMPATCHREC pPatchRec = (PPATMPATCHREC)RTAvloU32GetBestFit(&pVM->patm.s.PatchLookupTreeHC->PatchTree, + GCPtrInstr, false /*fAbove*/); + if ( pPatchRec + && pPatchRec->patch.uState == PATCH_ENABLED + && (off = GCPtrInstr - pPatchRec->patch.pPrivInstrGC) < pPatchRec->patch.cbPatchJump) + { + uint8_t const *pbSrc = &pPatchRec->patch.aPrivInstr[off]; + uint32_t const cbMax = pPatchRec->patch.cbPatchJump - off; + if (cbToRead > cbMax) + cbToRead = cbMax; + switch (cbToRead) + { + case 5: pbDst[4] = pbSrc[4]; RT_FALL_THRU(); + case 4: pbDst[3] = pbSrc[3]; RT_FALL_THRU(); + case 3: pbDst[2] = pbSrc[2]; RT_FALL_THRU(); + case 2: pbDst[1] = pbSrc[1]; RT_FALL_THRU(); + case 1: pbDst[0] = pbSrc[0]; + break; + default: + memcpy(pbDst, pbSrc, cbToRead); + } + *pcbRead = cbToRead; + + if (pPatchRec->patch.cbPatchJump == 1) + Log(("PATMR3ReadOrgInstr: returning opcode %.*Rhxs for instruction at %RX32\n", cbToRead, pbSrc, GCPtrInstr)); + STAM_COUNTER_ADD(&pVM->patm.s.StatNrOpcodeRead, 1); + return VINF_SUCCESS; + } + + return VERR_PATCH_NOT_FOUND; +} + +/** + * Disable patch for privileged instruction at specified location + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pInstrGC Guest context point to privileged instruction + * + * @note returns failure if patching is not allowed or possible + * + */ +VMMR3_INT_DECL(int) PATMR3DisablePatch(PVM pVM, RTRCPTR pInstrGC) +{ + PPATMPATCHREC pPatchRec; + PPATCHINFO pPatch; + + Log(("PATMR3DisablePatch: %RRv\n", pInstrGC)); + AssertReturn(VM_IS_RAW_MODE_ENABLED(pVM), VERR_PATM_HM_IPE); + pPatchRec = (PPATMPATCHREC)RTAvloU32Get(&pVM->patm.s.PatchLookupTreeHC->PatchTree, pInstrGC); + if (pPatchRec) + { + int rc = VINF_SUCCESS; + + pPatch = &pPatchRec->patch; + + /* Already disabled? */ + if (pPatch->uState == PATCH_DISABLED) + return VINF_SUCCESS; + + /* Clear the IDT entries for the patch we're disabling. */ + /* Note: very important as we clear IF in the patch itself */ + /** @todo this needs to be changed */ + if (pPatch->flags & PATMFL_IDTHANDLER) + { + uint32_t iGate; + + iGate = TRPMR3QueryGateByHandler(pVM, PATCHCODE_PTR_GC(pPatch)); + if (iGate != (uint32_t)~0) + { + TRPMR3SetGuestTrapHandler(pVM, iGate, TRPM_INVALID_HANDLER); + if (++cIDTHandlersDisabled < 256) + LogRel(("PATM: Disabling IDT %x patch handler %RRv\n", iGate, pInstrGC)); + } + } + + /* Mark the entry with a breakpoint in case somebody else calls it later on (cli patch used as a function, function, trampoline or idt patches) */ + if ( pPatch->pPatchBlockOffset + && pPatch->uState == PATCH_ENABLED) + { + Log(("Invalidate patch at %RRv (HC=%RRv)\n", PATCHCODE_PTR_GC(pPatch), PATCHCODE_PTR_HC(pPatch))); + pPatch->bDirtyOpcode = *PATCHCODE_PTR_HC(pPatch); + *PATCHCODE_PTR_HC(pPatch) = 0xCC; + } + + /* IDT or function patches haven't changed any guest code. */ + if (pPatch->flags & PATMFL_PATCHED_GUEST_CODE) + { + Assert(pPatch->flags & PATMFL_MUST_INSTALL_PATCHJMP); + Assert(!(pPatch->flags & (PATMFL_DUPLICATE_FUNCTION|PATMFL_IDTHANDLER|PATMFL_TRAMPOLINE|PATMFL_INT3_REPLACEMENT|PATMFL_INT3_REPLACEMENT_BLOCK))); + + if (pPatch->uState != PATCH_REFUSED) + { + uint8_t temp[16]; + + Assert(pPatch->cbPatchJump < sizeof(temp)); + + /* Let's first check if the guest code is still the same. */ + rc = PGMPhysSimpleReadGCPtr(VMMGetCpu0(pVM), temp, pPatch->pPrivInstrGC, pPatch->cbPatchJump); + Assert(rc == VINF_SUCCESS || rc == VERR_PAGE_TABLE_NOT_PRESENT || rc == VERR_PAGE_NOT_PRESENT); + if (rc == VINF_SUCCESS) + { + RTRCINTPTR displ = (RTRCUINTPTR)PATCHCODE_PTR_GC(pPatch) - ((RTRCUINTPTR)pPatch->pPrivInstrGC + SIZEOF_NEARJUMP32); + + if ( temp[0] != 0xE9 /* jmp opcode */ + || *(RTRCINTPTR *)(&temp[1]) != displ + ) + { + Log(("PATMR3DisablePatch: Can't disable a patch who's guest code has changed!!\n")); + STAM_COUNTER_INC(&pVM->patm.s.StatOverwritten); + /* Remove it completely */ + pPatch->uState = PATCH_DISABLED; /* don't call PATMR3DisablePatch again */ + rc = PATMR3RemovePatch(pVM, pInstrGC); + AssertRC(rc); + return VWRN_PATCH_REMOVED; + } + patmRemoveJumpToPatch(pVM, pPatch); + } + else + { + Log(("PATMR3DisablePatch: unable to disable patch -> mark PATCH_DISABLE_PENDING\n")); + pPatch->uState = PATCH_DISABLE_PENDING; + } + } + else + { + AssertMsgFailed(("Patch was refused!\n")); + return VERR_PATCH_ALREADY_DISABLED; + } + } + else + if (pPatch->flags & (PATMFL_INT3_REPLACEMENT|PATMFL_INT3_REPLACEMENT_BLOCK)) + { + uint8_t temp[16]; + + Assert(pPatch->cbPatchJump < sizeof(temp)); + + /* Let's first check if the guest code is still the same. */ + rc = PGMPhysSimpleReadGCPtr(VMMGetCpu0(pVM), temp, pPatch->pPrivInstrGC, pPatch->cbPatchJump); + Assert(rc == VINF_SUCCESS || rc == VERR_PAGE_TABLE_NOT_PRESENT || rc == VERR_PAGE_NOT_PRESENT); + if (rc == VINF_SUCCESS) + { + if (temp[0] != 0xCC) + { + Log(("PATMR3DisablePatch: Can't disable a patch who's guest code has changed!!\n")); + STAM_COUNTER_INC(&pVM->patm.s.StatOverwritten); + /* Remove it completely */ + pPatch->uState = PATCH_DISABLED; /* don't call PATMR3DisablePatch again */ + rc = PATMR3RemovePatch(pVM, pInstrGC); + AssertRC(rc); + return VWRN_PATCH_REMOVED; + } + patmDeactivateInt3Patch(pVM, pPatch); + } + } + + if (rc == VINF_SUCCESS) + { + /* Save old state and mark this one as disabled (so it can be enabled later on). */ + if (pPatch->uState == PATCH_DISABLE_PENDING) + { + /* Just to be safe, let's make sure this one can never be reused; the patch might be marked dirty already (int3 at start) */ + pPatch->uState = PATCH_UNUSABLE; + } + else + if (pPatch->uState != PATCH_DIRTY) + { + pPatch->uOldState = pPatch->uState; + pPatch->uState = PATCH_DISABLED; + } + STAM_COUNTER_ADD(&pVM->patm.s.StatDisabled, 1); + } + + Log(("PATMR3DisablePatch: disabled patch at %RRv\n", pInstrGC)); + return VINF_SUCCESS; + } + Log(("Patch not found!\n")); + return VERR_PATCH_NOT_FOUND; +} + +/** + * Permanently disable patch for privileged instruction at specified location + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pInstrGC Guest context instruction pointer + * @param pConflictAddr Guest context pointer which conflicts with specified patch + * @param pConflictPatch Conflicting patch + * + */ +static int patmDisableUnusablePatch(PVM pVM, RTRCPTR pInstrGC, RTRCPTR pConflictAddr, PPATCHINFO pConflictPatch) +{ + NOREF(pConflictAddr); +#ifdef PATM_RESOLVE_CONFLICTS_WITH_JUMP_PATCHES + PATCHINFO patch; + DISCPUSTATE cpu; + R3PTRTYPE(uint8_t *) pInstrHC; + uint32_t cbInstr; + bool disret; + int rc; + + RT_ZERO(patch); + pInstrHC = patmR3GCVirtToHCVirt(pVM, &patch, pInstrGC); + disret = patmR3DisInstr(pVM, &patch, pInstrGC, pInstrHC, PATMREAD_ORGCODE, &cpu, &cbInstr); + /* + * If it's a 5 byte relative jump, then we can work around the problem by replacing the 32 bits relative offset + * with one that jumps right into the conflict patch. + * Otherwise we must disable the conflicting patch to avoid serious problems. + */ + if ( disret == true + && (pConflictPatch->flags & PATMFL_CODE32) + && (cpu.pCurInstr->uOpcode == OP_JMP || (cpu.pCurInstr->fOpType & DISOPTYPE_COND_CONTROLFLOW)) + && (cpu.Param1.fUse & DISUSE_IMMEDIATE32_REL)) + { + /* Hint patches must be enabled first. */ + if (pConflictPatch->flags & PATMFL_INSTR_HINT) + { + Log(("Enabling HINTED patch %RRv\n", pConflictPatch->pPrivInstrGC)); + pConflictPatch->flags &= ~PATMFL_INSTR_HINT; + rc = PATMR3EnablePatch(pVM, pConflictPatch->pPrivInstrGC); + Assert(rc == VINF_SUCCESS || rc == VERR_PATCH_NOT_FOUND); + /* Enabling might fail if the patched code has changed in the meantime. */ + if (rc != VINF_SUCCESS) + return rc; + } + + rc = PATMR3InstallPatch(pVM, pInstrGC, PATMFL_CODE32 | PATMFL_JUMP_CONFLICT); + if (RT_SUCCESS(rc)) + { + Log(("PATM -> CONFLICT: Installed JMP patch for patch conflict at %RRv\n", pInstrGC)); + STAM_COUNTER_INC(&pVM->patm.s.StatFixedConflicts); + return VINF_SUCCESS; + } + } +#else + RT_NOREF_PV(pInstrGC); +#endif + + if (pConflictPatch->opcode == OP_CLI) + { + /* Turn it into an int3 patch; our GC trap handler will call the generated code manually. */ + Log(("PATM -> CONFLICT: Found active patch at instruction %RRv with target %RRv -> turn into int 3 patch!!\n", pInstrGC, pConflictPatch->pPrivInstrGC)); + int rc = PATMR3DisablePatch(pVM, pConflictPatch->pPrivInstrGC); + if (rc == VWRN_PATCH_REMOVED) + return VINF_SUCCESS; + if (RT_SUCCESS(rc)) + { + pConflictPatch->flags &= ~(PATMFL_MUST_INSTALL_PATCHJMP|PATMFL_INSTR_HINT); + pConflictPatch->flags |= PATMFL_INT3_REPLACEMENT_BLOCK; + rc = PATMR3EnablePatch(pVM, pConflictPatch->pPrivInstrGC); + if (rc == VERR_PATCH_NOT_FOUND) + return VINF_SUCCESS; /* removed already */ + + AssertRC(rc); + if (RT_SUCCESS(rc)) + { + STAM_COUNTER_INC(&pVM->patm.s.StatInt3Callable); + return VINF_SUCCESS; + } + } + /* else turned into unusable patch (see below) */ + } + else + { + Log(("PATM -> CONFLICT: Found active patch at instruction %RRv with target %RRv -> DISABLING it!!\n", pInstrGC, pConflictPatch->pPrivInstrGC)); + int rc = PATMR3DisablePatch(pVM, pConflictPatch->pPrivInstrGC); + if (rc == VWRN_PATCH_REMOVED) + return VINF_SUCCESS; + } + + /* No need to monitor the code anymore. */ + if (pConflictPatch->flags & PATMFL_CODE_MONITORED) + { + int rc = patmRemovePatchPages(pVM, pConflictPatch); + AssertRC(rc); + } + pConflictPatch->uState = PATCH_UNUSABLE; + STAM_COUNTER_INC(&pVM->patm.s.StatUnusable); + return VERR_PATCH_DISABLED; +} + +/** + * Enable patch for privileged instruction at specified location + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pInstrGC Guest context point to privileged instruction + * + * @note returns failure if patching is not allowed or possible + * + */ +VMMR3_INT_DECL(int) PATMR3EnablePatch(PVM pVM, RTRCPTR pInstrGC) +{ + PPATMPATCHREC pPatchRec; + PPATCHINFO pPatch; + + Log(("PATMR3EnablePatch %RRv\n", pInstrGC)); + AssertReturn(VM_IS_RAW_MODE_ENABLED(pVM), VERR_PATM_HM_IPE); + pPatchRec = (PPATMPATCHREC)RTAvloU32Get(&pVM->patm.s.PatchLookupTreeHC->PatchTree, pInstrGC); + if (pPatchRec) + { + int rc = VINF_SUCCESS; + + pPatch = &pPatchRec->patch; + + if (pPatch->uState == PATCH_DISABLED) + { + if (pPatch->flags & PATMFL_MUST_INSTALL_PATCHJMP) + { + Assert(!(pPatch->flags & PATMFL_PATCHED_GUEST_CODE)); + uint8_t temp[16]; + + Assert(pPatch->cbPatchJump < sizeof(temp)); + + /* Let's first check if the guest code is still the same. */ + int rc2 = PGMPhysSimpleReadGCPtr(VMMGetCpu0(pVM), temp, pPatch->pPrivInstrGC, pPatch->cbPatchJump); + AssertRC(rc2); + if (rc2 == VINF_SUCCESS) + { + if (memcmp(temp, pPatch->aPrivInstr, pPatch->cbPatchJump)) + { + Log(("PATMR3EnablePatch: Can't enable a patch who's guest code has changed!!\n")); + STAM_COUNTER_INC(&pVM->patm.s.StatOverwritten); + /* Remove it completely */ + rc = PATMR3RemovePatch(pVM, pInstrGC); + AssertRC(rc); + return VERR_PATCH_NOT_FOUND; + } + + PATMP2GLOOKUPREC cacheRec; + RT_ZERO(cacheRec); + cacheRec.pPatch = pPatch; + + rc2 = patmGenJumpToPatch(pVM, pPatch, &cacheRec, false); + /* Free leftover lock if any. */ + if (cacheRec.Lock.pvMap) + { + PGMPhysReleasePageMappingLock(pVM, &cacheRec.Lock); + cacheRec.Lock.pvMap = NULL; + } + AssertRC(rc2); + if (RT_FAILURE(rc2)) + return rc2; + +#ifdef DEBUG + { + DISCPUSTATE cpu; + char szOutput[256]; + uint32_t cbInstr; + uint32_t i = 0; + bool disret; + while(i < pPatch->cbPatchJump) + { + disret = patmR3DisInstrToStr(pVM, pPatch, pPatch->pPrivInstrGC + i, NULL, PATMREAD_ORGCODE, + &cpu, &cbInstr, szOutput, sizeof(szOutput)); + Log(("Renewed patch instr: %s", szOutput)); + i += cbInstr; + } + } +#endif + } + } + else + if (pPatch->flags & (PATMFL_INT3_REPLACEMENT|PATMFL_INT3_REPLACEMENT_BLOCK)) + { + uint8_t temp[16]; + + Assert(pPatch->cbPatchJump < sizeof(temp)); + + /* Let's first check if the guest code is still the same. */ + int rc2 = PGMPhysSimpleReadGCPtr(VMMGetCpu0(pVM), temp, pPatch->pPrivInstrGC, pPatch->cbPatchJump); + AssertRC(rc2); + + if (memcmp(temp, pPatch->aPrivInstr, pPatch->cbPatchJump)) + { + Log(("PATMR3EnablePatch: Can't enable a patch who's guest code has changed!!\n")); + STAM_COUNTER_INC(&pVM->patm.s.StatOverwritten); + rc = PATMR3RemovePatch(pVM, pInstrGC); + AssertRC(rc); + return VERR_PATCH_NOT_FOUND; + } + + rc2 = patmActivateInt3Patch(pVM, pPatch); + if (RT_FAILURE(rc2)) + return rc2; + } + + pPatch->uState = pPatch->uOldState; //restore state + + /* Restore the entry breakpoint with the original opcode (see PATMR3DisablePatch). */ + if (pPatch->pPatchBlockOffset) + *PATCHCODE_PTR_HC(pPatch) = pPatch->bDirtyOpcode; + + STAM_COUNTER_ADD(&pVM->patm.s.StatEnabled, 1); + } + else + Log(("PATMR3EnablePatch: Unable to enable patch %RRv with state %d\n", pInstrGC, pPatch->uState)); + + return rc; + } + return VERR_PATCH_NOT_FOUND; +} + +/** + * Remove patch for privileged instruction at specified location + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatchRec Patch record + * @param fForceRemove Remove *all* patches + */ +int patmR3RemovePatch(PVM pVM, PPATMPATCHREC pPatchRec, bool fForceRemove) +{ + PPATCHINFO pPatch; + + pPatch = &pPatchRec->patch; + + /* Strictly forbidden to remove such patches. There can be dependencies!! */ + if (!fForceRemove && (pPatch->flags & (PATMFL_DUPLICATE_FUNCTION|PATMFL_CODE_REFERENCED))) + { + Log(("PATMRemovePatch %RRv REFUSED!\n", pPatch->pPrivInstrGC)); + return VERR_ACCESS_DENIED; + } + Log(("PATMRemovePatch %RRv\n", pPatch->pPrivInstrGC)); + + /* Note: NEVER EVER REUSE PATCH MEMORY */ + /* Note: PATMR3DisablePatch puts a breakpoint (0xCC) at the entry of this patch */ + + if (pPatchRec->patch.pPatchBlockOffset) + { + PAVLOU32NODECORE pNode; + + pNode = RTAvloU32Remove(&pVM->patm.s.PatchLookupTreeHC->PatchTreeByPatchAddr, pPatchRec->patch.pPatchBlockOffset); + Assert(pNode); + } + + if (pPatchRec->patch.flags & PATMFL_CODE_MONITORED) + { + int rc = patmRemovePatchPages(pVM, &pPatchRec->patch); + AssertRC(rc); + } + +#ifdef VBOX_WITH_STATISTICS + if (PATM_STAT_INDEX_IS_VALID(pPatchRec->patch.uPatchIdx)) + { + STAMR3DeregisterF(pVM->pUVM, "/PATM/Stats/Patch/0x%RRv", pPatchRec->patch.pPrivInstrGC); + STAMR3DeregisterF(pVM->pUVM, "/PATM/PatchBD/0x%RRv*", pPatchRec->patch.pPrivInstrGC); + } +#endif + + /* Note: no need to free Guest2PatchAddrTree as those records share memory with Patch2GuestAddrTree records. */ + patmEmptyTreeU32(pVM, &pPatch->Patch2GuestAddrTree); + pPatch->nrPatch2GuestRecs = 0; + Assert(pPatch->Patch2GuestAddrTree == 0); + + patmEmptyTree(pVM, &pPatch->FixupTree); + pPatch->nrFixups = 0; + Assert(pPatch->FixupTree == 0); + + if (pPatchRec->patch.pTempInfo) + MMR3HeapFree(pPatchRec->patch.pTempInfo); + + /* Note: might fail, because it has already been removed (e.g. during reset). */ + RTAvloU32Remove(&pVM->patm.s.PatchLookupTreeHC->PatchTree, pPatchRec->Core.Key); + + /* Free the patch record */ + MMHyperFree(pVM, pPatchRec); + return VINF_SUCCESS; +} + +/** + * RTAvlU32DoWithAll() worker. + * Checks whether the current trampoline instruction is the jump to the target patch + * and updates the displacement to jump to the new target. + * + * @returns VBox status code. + * @retval VERR_ALREADY_EXISTS if the jump was found. + * @param pNode The current patch to guest record to check. + * @param pvUser The refresh state. + */ +static DECLCALLBACK(int) patmR3PatchRefreshFindTrampolinePatch(PAVLU32NODECORE pNode, void *pvUser) +{ + PRECPATCHTOGUEST pPatch2GuestRec = (PRECPATCHTOGUEST)pNode; + PPATMREFRESHPATCH pRefreshPatchState = (PPATMREFRESHPATCH)pvUser; + PVM pVM = pRefreshPatchState->pVM; + + uint8_t *pPatchInstr = (uint8_t *)(pVM->patm.s.pPatchMemHC + pPatch2GuestRec->Core.Key); + + /* + * Check if the patch instruction starts with a jump. + * ASSUMES that there is no other patch to guest record that starts + * with a jump. + */ + if (*pPatchInstr == 0xE9) + { + /* Jump found, update the displacement. */ + RTRCPTR pPatchTargetGC = patmGuestGCPtrToPatchGCPtr(pVM, pRefreshPatchState->pPatchRec, + pRefreshPatchState->pPatchTrampoline->pPrivInstrGC); + int32_t displ = pPatchTargetGC - (pVM->patm.s.pPatchMemGC + pPatch2GuestRec->Core.Key + SIZEOF_NEARJUMP32); + + LogFlow(("Updating trampoline patch new patch target %RRv, new displacment %d (old was %d)\n", + pPatchTargetGC, displ, *(uint32_t *)&pPatchInstr[1])); + + *(uint32_t *)&pPatchInstr[1] = displ; + return VERR_ALREADY_EXISTS; /** @todo better return code */ + } + + return VINF_SUCCESS; +} + +/** + * Attempt to refresh the patch by recompiling its entire code block + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatchRec Patch record + */ +int patmR3RefreshPatch(PVM pVM, PPATMPATCHREC pPatchRec) +{ + PPATCHINFO pPatch; + int rc; + RTRCPTR pInstrGC = pPatchRec->patch.pPrivInstrGC; + PTRAMPREC pTrampolinePatchesHead = NULL; + + Log(("patmR3RefreshPatch: attempt to refresh patch at %RRv\n", pInstrGC)); + + pPatch = &pPatchRec->patch; + AssertReturn(pPatch->flags & (PATMFL_DUPLICATE_FUNCTION|PATMFL_IDTHANDLER|PATMFL_TRAPHANDLER), VERR_PATCHING_REFUSED); + if (pPatch->flags & PATMFL_EXTERNAL_JUMP_INSIDE) + { + if (!pPatch->pTrampolinePatchesHead) + { + /* + * It is sometimes possible that there are trampoline patches to this patch + * but they are not recorded (after a saved state load for example). + * Refuse to refresh those patches. + * Can hurt performance in theory if the patched code is modified by the guest + * and is executed often. However most of the time states are saved after the guest + * code was modified and is not updated anymore afterwards so this shouldn't be a + * big problem. + */ + Log(("patmR3RefreshPatch: refused because external jumps to this patch exist but the jumps are not recorded\n")); + return VERR_PATCHING_REFUSED; + } + Log(("patmR3RefreshPatch: external jumps to this patch exist, updating\n")); + pTrampolinePatchesHead = pPatch->pTrampolinePatchesHead; + } + + /* Note: quite ugly to enable/disable/remove/insert old and new patches, but there's no easy way around it. */ + + rc = PATMR3DisablePatch(pVM, pInstrGC); + AssertRC(rc); + + /* Kick it out of the lookup tree to make sure PATMR3InstallPatch doesn't fail (hack alert) */ + RTAvloU32Remove(&pVM->patm.s.PatchLookupTreeHC->PatchTree, pPatchRec->Core.Key); +#ifdef VBOX_WITH_STATISTICS + if (PATM_STAT_INDEX_IS_VALID(pPatchRec->patch.uPatchIdx)) + { + STAMR3DeregisterF(pVM->pUVM, "/PATM/Stats/Patch/0x%RRv", pPatchRec->patch.pPrivInstrGC); + STAMR3DeregisterF(pVM->pUVM, "/PATM/PatchBD/0x%RRv*", pPatchRec->patch.pPrivInstrGC); + } +#endif + + /** Note: We don't attempt to reuse patch memory here as it's quite common that the new code block requires more memory. */ + + /* Attempt to install a new patch. */ + rc = PATMR3InstallPatch(pVM, pInstrGC, pPatch->flags & (PATMFL_CODE32|PATMFL_IDTHANDLER|PATMFL_INTHANDLER|PATMFL_TRAPHANDLER|PATMFL_DUPLICATE_FUNCTION|PATMFL_TRAPHANDLER_WITH_ERRORCODE|PATMFL_IDTHANDLER_WITHOUT_ENTRYPOINT)); + if (RT_SUCCESS(rc)) + { + RTRCPTR pPatchTargetGC; + PPATMPATCHREC pNewPatchRec; + + /* Determine target address in new patch */ + pPatchTargetGC = PATMR3QueryPatchGCPtr(pVM, pInstrGC); + Assert(pPatchTargetGC); + if (!pPatchTargetGC) + { + rc = VERR_PATCHING_REFUSED; + goto failure; + } + + /* Reset offset into patch memory to put the next code blocks right at the beginning. */ + pPatch->uCurPatchOffset = 0; + + /* insert jump to new patch in old patch block */ + rc = patmPatchGenPatchJump(pVM, pPatch, pInstrGC, pPatchTargetGC, false /* no lookup record */); + if (RT_FAILURE(rc)) + goto failure; + + pNewPatchRec = (PPATMPATCHREC)RTAvloU32Get(&pVM->patm.s.PatchLookupTreeHC->PatchTree, pInstrGC); + Assert(pNewPatchRec); /* can't fail */ + + /* Remove old patch (only do that when everything is finished) */ + int rc2 = patmR3RemovePatch(pVM, pPatchRec, true /* force removal */); + AssertRC(rc2); + + /* Put the new patch back into the tree, because removing the old one kicked this one out. (hack alert) */ + bool fInserted = RTAvloU32Insert(&pVM->patm.s.PatchLookupTreeHC->PatchTree, &pNewPatchRec->Core); + Assert(fInserted); NOREF(fInserted); + + Log(("PATM: patmR3RefreshPatch: succeeded to refresh patch at %RRv \n", pInstrGC)); + STAM_COUNTER_INC(&pVM->patm.s.StatPatchRefreshSuccess); + + /* Used by another patch, so don't remove it! */ + pNewPatchRec->patch.flags |= PATMFL_CODE_REFERENCED; + + if (pTrampolinePatchesHead) + { + /* Update all trampoline patches to jump to the new patch. */ + PTRAMPREC pTrampRec = NULL; + PATMREFRESHPATCH RefreshPatch; + + RefreshPatch.pVM = pVM; + RefreshPatch.pPatchRec = &pNewPatchRec->patch; + + pTrampRec = pTrampolinePatchesHead; + + while (pTrampRec) + { + PPATCHINFO pPatchTrampoline = &pTrampRec->pPatchTrampoline->patch; + + RefreshPatch.pPatchTrampoline = pPatchTrampoline; + /* + * We have to find the right patch2guest record because there might be others + * for statistics. + */ + rc = RTAvlU32DoWithAll(&pPatchTrampoline->Patch2GuestAddrTree, true, + patmR3PatchRefreshFindTrampolinePatch, &RefreshPatch); + Assert(rc == VERR_ALREADY_EXISTS); + rc = VINF_SUCCESS; + pTrampRec = pTrampRec->pNext; + } + pNewPatchRec->patch.pTrampolinePatchesHead = pTrampolinePatchesHead; + pNewPatchRec->patch.flags |= PATMFL_EXTERNAL_JUMP_INSIDE; + /* Clear the list of trampoline patches for the old patch (safety precaution). */ + pPatchRec->patch.pTrampolinePatchesHead = NULL; + } + } + +failure: + if (RT_FAILURE(rc)) + { + LogRel(("PATM: patmR3RefreshPatch: failed to refresh patch at %RRv. Reactiving old one. \n", pInstrGC)); + + /* Remove the new inactive patch */ + rc = PATMR3RemovePatch(pVM, pInstrGC); + AssertRC(rc); + + /* Put the old patch back into the tree (or else it won't be saved) (hack alert) */ + bool fInserted = RTAvloU32Insert(&pVM->patm.s.PatchLookupTreeHC->PatchTree, &pPatchRec->Core); + Assert(fInserted); NOREF(fInserted); + + /* Enable again in case the dirty instruction is near the end and there are safe code paths. */ + int rc2 = PATMR3EnablePatch(pVM, pInstrGC); + AssertRC(rc2); + + STAM_COUNTER_INC(&pVM->patm.s.StatPatchRefreshFailed); + } + return rc; +} + +/** + * Find patch for privileged instruction at specified location + * + * @returns Patch structure pointer if found; else NULL + * @param pVM The cross context VM structure. + * @param pInstrGC Guest context point to instruction that might lie + * within 5 bytes of an existing patch jump + * @param fIncludeHints Include hinted patches or not + */ +PPATCHINFO patmFindActivePatchByEntrypoint(PVM pVM, RTRCPTR pInstrGC, bool fIncludeHints) +{ + PPATMPATCHREC pPatchRec = (PPATMPATCHREC)RTAvloU32GetBestFit(&pVM->patm.s.PatchLookupTreeHC->PatchTree, pInstrGC, false); + /* if the patch is enabled, the pointer is not identical to the privileged patch ptr and it lies within 5 bytes of this priv instr ptr, then we've got a hit! */ + if (pPatchRec) + { + if ( pPatchRec->patch.uState == PATCH_ENABLED + && (pPatchRec->patch.flags & PATMFL_PATCHED_GUEST_CODE) + && pInstrGC > pPatchRec->patch.pPrivInstrGC + && pInstrGC < pPatchRec->patch.pPrivInstrGC + pPatchRec->patch.cbPatchJump) + { + Log(("Found active patch at %RRv (org %RRv)\n", pInstrGC, pPatchRec->patch.pPrivInstrGC)); + return &pPatchRec->patch; + } + else + if ( fIncludeHints + && pPatchRec->patch.uState == PATCH_DISABLED + && (pPatchRec->patch.flags & PATMFL_INSTR_HINT) + && pInstrGC > pPatchRec->patch.pPrivInstrGC + && pInstrGC < pPatchRec->patch.pPrivInstrGC + pPatchRec->patch.cbPatchJump) + { + Log(("Found HINT patch at %RRv (org %RRv)\n", pInstrGC, pPatchRec->patch.pPrivInstrGC)); + return &pPatchRec->patch; + } + } + return NULL; +} + +/** + * Checks whether the GC address is inside a generated patch jump + * + * @returns true -> yes, false -> no + * @param pVM The cross context VM structure. + * @param pAddr Guest context address. + * @param pPatchAddr Guest context patch address (if true). + */ +VMMR3_INT_DECL(bool) PATMR3IsInsidePatchJump(PVM pVM, RTRCPTR pAddr, PRTGCPTR32 pPatchAddr) +{ + RTRCPTR addr; + PPATCHINFO pPatch; + + Assert(VM_IS_RAW_MODE_ENABLED(pVM)); + if (PATMIsEnabled(pVM) == false) + return false; + + if (pPatchAddr == NULL) + pPatchAddr = &addr; + + *pPatchAddr = 0; + + pPatch = patmFindActivePatchByEntrypoint(pVM, pAddr); + if (pPatch) + *pPatchAddr = pPatch->pPrivInstrGC; + + return *pPatchAddr == 0 ? false : true; +} + +/** + * Remove patch for privileged instruction at specified location + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pInstrGC Guest context point to privileged instruction + * + * @note returns failure if patching is not allowed or possible + * + */ +VMMR3_INT_DECL(int) PATMR3RemovePatch(PVM pVM, RTRCPTR pInstrGC) +{ + PPATMPATCHREC pPatchRec; + + AssertReturn(VM_IS_RAW_MODE_ENABLED(pVM), VERR_PATM_HM_IPE); + pPatchRec = (PPATMPATCHREC)RTAvloU32Get(&pVM->patm.s.PatchLookupTreeHC->PatchTree, pInstrGC); + if (pPatchRec) + { + int rc = PATMR3DisablePatch(pVM, pInstrGC); + if (rc == VWRN_PATCH_REMOVED) + return VINF_SUCCESS; + + return patmR3RemovePatch(pVM, pPatchRec, false); + } + AssertFailed(); + return VERR_PATCH_NOT_FOUND; +} + +/** + * Mark patch as dirty + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch record + * + * @note returns failure if patching is not allowed or possible + * + */ +static int patmR3MarkDirtyPatch(PVM pVM, PPATCHINFO pPatch) +{ + if (pPatch->pPatchBlockOffset) + { + Log(("Invalidate patch at %RRv (HC=%RRv)\n", PATCHCODE_PTR_GC(pPatch), PATCHCODE_PTR_HC(pPatch))); + pPatch->bDirtyOpcode = *PATCHCODE_PTR_HC(pPatch); + *PATCHCODE_PTR_HC(pPatch) = 0xCC; + } + + STAM_COUNTER_INC(&pVM->patm.s.StatDirty); + /* Put back the replaced instruction. */ + int rc = PATMR3DisablePatch(pVM, pPatch->pPrivInstrGC); + if (rc == VWRN_PATCH_REMOVED) + return VINF_SUCCESS; + + /* Note: we don't restore patch pages for patches that are not enabled! */ + /* Note: be careful when changing this behaviour!! */ + + /* The patch pages are no longer marked for self-modifying code detection */ + if (pPatch->flags & PATMFL_CODE_MONITORED) + { + rc = patmRemovePatchPages(pVM, pPatch); + AssertRCReturn(rc, rc); + } + pPatch->uState = PATCH_DIRTY; + + /* Paranoia; make sure this patch is not somewhere in the callchain, so prevent ret instructions from succeeding. */ + CTXSUFF(pVM->patm.s.pGCState)->Psp = PATM_STACK_SIZE; + + return VINF_SUCCESS; +} + +/** + * Query the corresponding GC instruction pointer from a pointer inside the patch block itself + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch block structure pointer + * @param pPatchGC GC address in patch block + */ +RTRCPTR patmPatchGCPtr2GuestGCPtr(PVM pVM, PPATCHINFO pPatch, RCPTRTYPE(uint8_t *) pPatchGC) +{ + Assert(pPatch->Patch2GuestAddrTree); + /* Get the closest record from below. */ + PRECPATCHTOGUEST pPatchToGuestRec = (PRECPATCHTOGUEST)RTAvlU32GetBestFit(&pPatch->Patch2GuestAddrTree, pPatchGC - pVM->patm.s.pPatchMemGC, false); + if (pPatchToGuestRec) + return pPatchToGuestRec->pOrgInstrGC; + + return 0; +} + +/** + * Converts Guest code GC ptr to Patch code GC ptr (if found) + * + * @returns corresponding GC pointer in patch block + * @param pVM The cross context VM structure. + * @param pPatch Current patch block pointer + * @param pInstrGC Guest context pointer to privileged instruction + * + */ +RTRCPTR patmGuestGCPtrToPatchGCPtr(PVM pVM, PPATCHINFO pPatch, RCPTRTYPE(uint8_t*) pInstrGC) +{ + if (pPatch->Guest2PatchAddrTree) + { + PRECGUESTTOPATCH pGuestToPatchRec = (PRECGUESTTOPATCH)RTAvlU32Get(&pPatch->Guest2PatchAddrTree, pInstrGC); + if (pGuestToPatchRec) + return pVM->patm.s.pPatchMemGC + pGuestToPatchRec->PatchOffset; + } + + return 0; +} + +#ifdef PATM_RESOLVE_CONFLICTS_WITH_JUMP_PATCHES +/** + * Converts Guest code GC ptr to Patch code GC ptr (if found) + * + * @returns corresponding GC pointer in patch block + * @param pVM The cross context VM structure. + * @param pInstrGC Guest context pointer to privileged instruction + */ +static RTRCPTR patmR3GuestGCPtrToPatchGCPtrSimple(PVM pVM, RCPTRTYPE(uint8_t*) pInstrGC) +{ + PPATMPATCHREC pPatchRec = (PPATMPATCHREC)RTAvloU32GetBestFit(&pVM->patm.s.PatchLookupTreeHC->PatchTree, pInstrGC, false); + if (pPatchRec && pPatchRec->patch.uState == PATCH_ENABLED && pInstrGC >= pPatchRec->patch.pPrivInstrGC) + return patmGuestGCPtrToPatchGCPtr(pVM, &pPatchRec->patch, pInstrGC); + return NIL_RTRCPTR; +} +#endif + +/** + * Converts Guest code GC ptr to Patch code GC ptr (or nearest from below if no + * identical match) + * + * @returns corresponding GC pointer in patch block + * @param pVM The cross context VM structure. + * @param pPatch Current patch block pointer + * @param pInstrGC Guest context pointer to privileged instruction + * + */ +RTRCPTR patmGuestGCPtrToClosestPatchGCPtr(PVM pVM, PPATCHINFO pPatch, RCPTRTYPE(uint8_t*) pInstrGC) +{ + PRECGUESTTOPATCH pGuestToPatchRec = (PRECGUESTTOPATCH)RTAvlU32GetBestFit(&pPatch->Guest2PatchAddrTree, pInstrGC, false); + if (pGuestToPatchRec) + return pVM->patm.s.pPatchMemGC + pGuestToPatchRec->PatchOffset; + return NIL_RTRCPTR; +} + +/** + * Query the corresponding GC instruction pointer from a pointer inside the patch block itself + * + * @returns original GC instruction pointer or 0 if not found + * @param pVM The cross context VM structure. + * @param pPatchGC GC address in patch block + * @param pEnmState State of the translated address (out) + * + */ +VMMR3_INT_DECL(RTRCPTR) PATMR3PatchToGCPtr(PVM pVM, RTRCPTR pPatchGC, PATMTRANSSTATE *pEnmState) +{ + PPATMPATCHREC pPatchRec; + void *pvPatchCoreOffset; + RTRCPTR pPrivInstrGC; + + Assert(PATMIsPatchGCAddr(pVM, pPatchGC)); + Assert(VM_IS_RAW_MODE_ENABLED(pVM)); + pvPatchCoreOffset = RTAvloU32GetBestFit(&pVM->patm.s.PatchLookupTreeHC->PatchTreeByPatchAddr, pPatchGC - pVM->patm.s.pPatchMemGC, false); + if (pvPatchCoreOffset == 0) + { + Log(("PATMR3PatchToGCPtr failed for %RRv offset %x\n", pPatchGC, pPatchGC - pVM->patm.s.pPatchMemGC)); + return 0; + } + pPatchRec = PATM_PATCHREC_FROM_COREOFFSET(pvPatchCoreOffset); + pPrivInstrGC = patmPatchGCPtr2GuestGCPtr(pVM, &pPatchRec->patch, pPatchGC); + if (pEnmState) + { + AssertMsg(pPrivInstrGC && ( pPatchRec->patch.uState == PATCH_ENABLED + || pPatchRec->patch.uState == PATCH_DIRTY + || pPatchRec->patch.uState == PATCH_DISABLE_PENDING + || pPatchRec->patch.uState == PATCH_UNUSABLE), + ("pPrivInstrGC=%RRv uState=%d\n", pPrivInstrGC, pPatchRec->patch.uState)); + + if ( !pPrivInstrGC + || pPatchRec->patch.uState == PATCH_UNUSABLE + || pPatchRec->patch.uState == PATCH_REFUSED) + { + pPrivInstrGC = 0; + *pEnmState = PATMTRANS_FAILED; + } + else + if (pVM->patm.s.pGCStateHC->GCPtrInhibitInterrupts == pPrivInstrGC) + { + *pEnmState = PATMTRANS_INHIBITIRQ; + } + else + if ( pPatchRec->patch.uState == PATCH_ENABLED + && !(pPatchRec->patch.flags & (PATMFL_DUPLICATE_FUNCTION|PATMFL_IDTHANDLER|PATMFL_TRAMPOLINE)) + && pPrivInstrGC > pPatchRec->patch.pPrivInstrGC + && pPrivInstrGC < pPatchRec->patch.pPrivInstrGC + pPatchRec->patch.cbPatchJump) + { + *pEnmState = PATMTRANS_OVERWRITTEN; + } + else + if (patmFindActivePatchByEntrypoint(pVM, pPrivInstrGC)) + { + *pEnmState = PATMTRANS_OVERWRITTEN; + } + else + if (pPrivInstrGC == pPatchRec->patch.pPrivInstrGC) + { + *pEnmState = PATMTRANS_PATCHSTART; + } + else + *pEnmState = PATMTRANS_SAFE; + } + return pPrivInstrGC; +} + +/** + * Returns the GC pointer of the patch for the specified GC address + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pAddrGC Guest context address + */ +VMMR3_INT_DECL(RTRCPTR) PATMR3QueryPatchGCPtr(PVM pVM, RTRCPTR pAddrGC) +{ + PPATMPATCHREC pPatchRec; + + Assert(VM_IS_RAW_MODE_ENABLED(pVM)); + + /* Find the patch record. */ + pPatchRec = (PPATMPATCHREC)RTAvloU32Get(&pVM->patm.s.PatchLookupTreeHC->PatchTree, pAddrGC); + /** @todo we should only use patches that are enabled! always did this, but it's incorrect! */ + if (pPatchRec && (pPatchRec->patch.uState == PATCH_ENABLED || pPatchRec->patch.uState == PATCH_DIRTY)) + return PATCHCODE_PTR_GC(&pPatchRec->patch); + return NIL_RTRCPTR; +} + +/** + * Attempt to recover dirty instructions + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pCtx Pointer to the guest CPU context. + * @param pPatch Patch record. + * @param pPatchToGuestRec Patch to guest address record. + * @param pEip GC pointer of trapping instruction. + */ +static int patmR3HandleDirtyInstr(PVM pVM, PCPUMCTX pCtx, PPATMPATCHREC pPatch, PRECPATCHTOGUEST pPatchToGuestRec, RTRCPTR pEip) +{ + DISCPUSTATE CpuOld, CpuNew; + uint8_t *pPatchInstrHC, *pCurPatchInstrHC; + int rc; + RTRCPTR pCurInstrGC, pCurPatchInstrGC; + uint32_t cbDirty; + PRECPATCHTOGUEST pRec; + RTRCPTR const pOrgInstrGC = pPatchToGuestRec->pOrgInstrGC; + PVMCPU pVCpu = VMMGetCpu0(pVM); + Log(("patmR3HandleDirtyInstr: dirty instruction at %RRv (%RRv)\n", pEip, pOrgInstrGC)); + + pRec = pPatchToGuestRec; + pCurInstrGC = pOrgInstrGC; + pCurPatchInstrGC = pEip; + cbDirty = 0; + pPatchInstrHC = patmPatchGCPtr2PatchHCPtr(pVM, pCurPatchInstrGC); + + /* Find all adjacent dirty instructions */ + while (true) + { + if (pRec->fJumpTarget) + { + LogRel(("PATM: patmR3HandleDirtyInstr: dirty instruction at %RRv (%RRv) ignored, because instruction in function was reused as target of jump\n", pEip, pOrgInstrGC)); + pRec->fDirty = false; + return VERR_PATCHING_REFUSED; + } + + /* Restore original instruction opcode byte so we can check if the write was indeed safe. */ + pCurPatchInstrHC = patmPatchGCPtr2PatchHCPtr(pVM, pCurPatchInstrGC); + *pCurPatchInstrHC = pRec->u8DirtyOpcode; + + /* Only harmless instructions are acceptable. */ + rc = CPUMR3DisasmInstrCPU(pVM, pVCpu, pCtx, pCurPatchInstrGC, &CpuOld, 0); + if ( RT_FAILURE(rc) + || !(CpuOld.pCurInstr->fOpType & DISOPTYPE_HARMLESS)) + { + if (RT_SUCCESS(rc)) + cbDirty += CpuOld.cbInstr; + else + if (!cbDirty) + cbDirty = 1; + break; + } + +#ifdef DEBUG + char szBuf[256]; + DBGFR3DisasInstrEx(pVM->pUVM, pVCpu->idCpu, pCtx->cs.Sel, pCurPatchInstrGC, DBGF_DISAS_FLAGS_DEFAULT_MODE, + szBuf, sizeof(szBuf), NULL); + Log(("DIRTY: %s\n", szBuf)); +#endif + /* Mark as clean; if we fail we'll let it always fault. */ + pRec->fDirty = false; + + /* Remove old lookup record. */ + patmr3RemoveP2GLookupRecord(pVM, &pPatch->patch, pCurPatchInstrGC); + pPatchToGuestRec = NULL; + + pCurPatchInstrGC += CpuOld.cbInstr; + cbDirty += CpuOld.cbInstr; + + /* Let's see if there's another dirty instruction right after. */ + pRec = (PRECPATCHTOGUEST)RTAvlU32GetBestFit(&pPatch->patch.Patch2GuestAddrTree, pCurPatchInstrGC - pVM->patm.s.pPatchMemGC, true); + if (!pRec || !pRec->fDirty) + break; /* no more dirty instructions */ + + /* In case of complex instructions the next guest instruction could be quite far off. */ + pCurPatchInstrGC = pRec->Core.Key + pVM->patm.s.pPatchMemGC; + } + + if ( RT_SUCCESS(rc) + && (CpuOld.pCurInstr->fOpType & DISOPTYPE_HARMLESS) + ) + { + uint32_t cbLeft; + + pCurPatchInstrHC = pPatchInstrHC; + pCurPatchInstrGC = pEip; + cbLeft = cbDirty; + + while (cbLeft && RT_SUCCESS(rc)) + { + bool fValidInstr; + + rc = CPUMR3DisasmInstrCPU(pVM, pVCpu, pCtx, pCurInstrGC, &CpuNew, 0); + + fValidInstr = !!(CpuNew.pCurInstr->fOpType & DISOPTYPE_HARMLESS); + if ( !fValidInstr + && (CpuNew.pCurInstr->fOpType & DISOPTYPE_RELATIVE_CONTROLFLOW) + ) + { + RTRCPTR pTargetGC = PATMResolveBranch(&CpuNew, pCurInstrGC); + + if ( pTargetGC >= pOrgInstrGC + && pTargetGC <= pOrgInstrGC + cbDirty + ) + { + /* A relative jump to an instruction inside or to the end of the dirty block is acceptable. */ + fValidInstr = true; + } + } + + /* If the instruction is completely harmless (which implies a 1:1 patch copy). */ + if ( rc == VINF_SUCCESS + && CpuNew.cbInstr <= cbLeft /* must still fit */ + && fValidInstr + ) + { +#ifdef DEBUG + char szBuf[256]; + DBGFR3DisasInstrEx(pVM->pUVM, pVCpu->idCpu, pCtx->cs.Sel, pCurInstrGC, DBGF_DISAS_FLAGS_DEFAULT_MODE, + szBuf, sizeof(szBuf), NULL); + Log(("NEW: %s\n", szBuf)); +#endif + + /* Copy the new instruction. */ + rc = PGMPhysSimpleReadGCPtr(VMMGetCpu0(pVM), pCurPatchInstrHC, pCurInstrGC, CpuNew.cbInstr); + AssertRC(rc); + + /* Add a new lookup record for the duplicated instruction. */ + patmR3AddP2GLookupRecord(pVM, &pPatch->patch, pCurPatchInstrHC, pCurInstrGC, PATM_LOOKUP_BOTHDIR); + } + else + { +#ifdef DEBUG + char szBuf[256]; + DBGFR3DisasInstrEx(pVM->pUVM, pVCpu->idCpu, pCtx->cs.Sel, pCurInstrGC, DBGF_DISAS_FLAGS_DEFAULT_MODE, + szBuf, sizeof(szBuf), NULL); + Log(("NEW: %s (FAILED)\n", szBuf)); +#endif + /* Restore the old lookup record for the duplicated instruction. */ + patmR3AddP2GLookupRecord(pVM, &pPatch->patch, pCurPatchInstrHC, pCurInstrGC, PATM_LOOKUP_BOTHDIR); + + /** @todo in theory we need to restore the lookup records for the remaining dirty instructions too! */ + rc = VERR_PATCHING_REFUSED; + break; + } + pCurInstrGC += CpuNew.cbInstr; + pCurPatchInstrHC += CpuNew.cbInstr; + pCurPatchInstrGC += CpuNew.cbInstr; + cbLeft -= CpuNew.cbInstr; + + /* Check if we expanded a complex guest instruction into a patch stream (e.g. call) */ + if (!cbLeft) + { + /* If the next patch instruction doesn't correspond to the next guest instruction, then we have some extra room to fill. */ + if (RTAvlU32Get(&pPatch->patch.Patch2GuestAddrTree, pCurPatchInstrGC - pVM->patm.s.pPatchMemGC) == NULL) + { + pRec = (PRECPATCHTOGUEST)RTAvlU32GetBestFit(&pPatch->patch.Patch2GuestAddrTree, pCurPatchInstrGC - pVM->patm.s.pPatchMemGC, true); + if (pRec) + { + unsigned cbFiller = pRec->Core.Key + pVM->patm.s.pPatchMemGC - pCurPatchInstrGC; + uint8_t *pPatchFillHC = patmPatchGCPtr2PatchHCPtr(pVM, pCurPatchInstrGC); + + Assert(!pRec->fDirty); + + Log(("Room left in patched instruction stream (%d bytes)\n", cbFiller)); + if (cbFiller >= SIZEOF_NEARJUMP32) + { + pPatchFillHC[0] = 0xE9; + *(uint32_t *)&pPatchFillHC[1] = cbFiller - SIZEOF_NEARJUMP32; +#ifdef DEBUG + char szBuf[256]; + DBGFR3DisasInstrEx(pVM->pUVM, pVCpu->idCpu, pCtx->cs.Sel, pCurPatchInstrGC, + DBGF_DISAS_FLAGS_DEFAULT_MODE, szBuf, sizeof(szBuf), NULL); + Log(("FILL: %s\n", szBuf)); +#endif + } + else + { + for (unsigned i = 0; i < cbFiller; i++) + { + pPatchFillHC[i] = 0x90; /* NOP */ +#ifdef DEBUG + char szBuf[256]; + DBGFR3DisasInstrEx(pVM->pUVM, pVCpu->idCpu, pCtx->cs.Sel, pCurPatchInstrGC + i, + DBGF_DISAS_FLAGS_DEFAULT_MODE, szBuf, sizeof(szBuf), NULL); + Log(("FILL: %s\n", szBuf)); +#endif + } + } + } + } + } + } + } + else + rc = VERR_PATCHING_REFUSED; + + if (RT_SUCCESS(rc)) + { + STAM_COUNTER_INC(&pVM->patm.s.StatInstrDirtyGood); + } + else + { + STAM_COUNTER_INC(&pVM->patm.s.StatInstrDirtyBad); + Assert(cbDirty); + + /* Mark the whole instruction stream with breakpoints. */ + if (cbDirty) + memset(pPatchInstrHC, 0xCC, cbDirty); + + if ( pVM->patm.s.fOutOfMemory == false + && (pPatch->patch.flags & (PATMFL_DUPLICATE_FUNCTION|PATMFL_IDTHANDLER|PATMFL_TRAPHANDLER))) + { + rc = patmR3RefreshPatch(pVM, pPatch); + if (RT_FAILURE(rc)) + { + LogRel(("PATM: Failed to refresh dirty patch at %RRv. Disabling it.\n", pPatch->patch.pPrivInstrGC)); + } + /* Even if we succeed, we must go back to the original instruction as the patched one could be invalid. */ + rc = VERR_PATCHING_REFUSED; + } + } + return rc; +} + +/** + * Handle trap inside patch code + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pCtx Pointer to the guest CPU context. + * @param pEip GC pointer of trapping instruction. + * @param ppNewEip GC pointer to new instruction. + */ +VMMR3_INT_DECL(int) PATMR3HandleTrap(PVM pVM, PCPUMCTX pCtx, RTRCPTR pEip, RTGCPTR *ppNewEip) +{ + PPATMPATCHREC pPatch = 0; + void *pvPatchCoreOffset; + RTRCUINTPTR offset; + RTRCPTR pNewEip; + int rc ; + PRECPATCHTOGUEST pPatchToGuestRec = 0; + PVMCPU pVCpu = VMMGetCpu0(pVM); + + AssertReturn(VM_IS_RAW_MODE_ENABLED(pVM), VERR_PATM_HM_IPE); + Assert(pVM->cCpus == 1); + + pNewEip = 0; + *ppNewEip = 0; + + STAM_PROFILE_ADV_START(&pVM->patm.s.StatHandleTrap, a); + + /* Find the patch record. */ + /* Note: there might not be a patch to guest translation record (global function) */ + offset = pEip - pVM->patm.s.pPatchMemGC; + pvPatchCoreOffset = RTAvloU32GetBestFit(&pVM->patm.s.PatchLookupTreeHC->PatchTreeByPatchAddr, offset, false); + if (pvPatchCoreOffset) + { + pPatch = PATM_PATCHREC_FROM_COREOFFSET(pvPatchCoreOffset); + + Assert(offset >= pPatch->patch.pPatchBlockOffset && offset < pPatch->patch.pPatchBlockOffset + pPatch->patch.cbPatchBlockSize); + + if (pPatch->patch.uState == PATCH_DIRTY) + { + Log(("PATMR3HandleTrap: trap in dirty patch at %RRv\n", pEip)); + if (pPatch->patch.flags & (PATMFL_DUPLICATE_FUNCTION|PATMFL_CODE_REFERENCED)) + { + /* Function duplication patches set fPIF to 1 on entry */ + pVM->patm.s.pGCStateHC->fPIF = 1; + } + } + else + if (pPatch->patch.uState == PATCH_DISABLED) + { + Log(("PATMR3HandleTrap: trap in disabled patch at %RRv\n", pEip)); + if (pPatch->patch.flags & (PATMFL_DUPLICATE_FUNCTION|PATMFL_CODE_REFERENCED)) + { + /* Function duplication patches set fPIF to 1 on entry */ + pVM->patm.s.pGCStateHC->fPIF = 1; + } + } + else + if (pPatch->patch.uState == PATCH_DISABLE_PENDING) + { + RTRCPTR pPrivInstrGC = pPatch->patch.pPrivInstrGC; + + Log(("PATMR3HandleTrap: disable operation is pending for patch at %RRv\n", pPatch->patch.pPrivInstrGC)); + rc = PATMR3DisablePatch(pVM, pPatch->patch.pPrivInstrGC); + AssertReleaseMsg(rc != VWRN_PATCH_REMOVED, ("PATMR3DisablePatch removed patch at %RRv\n", pPrivInstrGC)); + AssertMsg(pPatch->patch.uState == PATCH_DISABLED || pPatch->patch.uState == PATCH_UNUSABLE, ("Unexpected failure to disable patch state=%d rc=%Rrc\n", pPatch->patch.uState, rc)); + } + + pPatchToGuestRec = (PRECPATCHTOGUEST)RTAvlU32GetBestFit(&pPatch->patch.Patch2GuestAddrTree, offset, false); + AssertReleaseMsg(pPatchToGuestRec, ("PATMR3HandleTrap: Unable to find corresponding guest address for %RRv (offset %x)\n", pEip, offset)); + + pNewEip = pPatchToGuestRec->pOrgInstrGC; + pPatch->patch.cTraps++; + PATM_STAT_FAULT_INC(&pPatch->patch); + } + else + AssertReleaseMsg(pVM->patm.s.pGCStateHC->fPIF == 0, ("PATMR3HandleTrap: Unable to find translation record for %RRv (PIF=0)\n", pEip)); + + /* Check if we were interrupted in PATM generated instruction code. */ + if (pVM->patm.s.pGCStateHC->fPIF == 0) + { + DISCPUSTATE Cpu; + rc = CPUMR3DisasmInstrCPU(pVM, pVCpu, pCtx, pEip, &Cpu, "PIF Trap: "); + AssertRC(rc); + + if ( rc == VINF_SUCCESS + && ( Cpu.pCurInstr->uOpcode == OP_PUSHF + || Cpu.pCurInstr->uOpcode == OP_PUSH + || Cpu.pCurInstr->uOpcode == OP_CALL) + ) + { + uint64_t fFlags; + + STAM_COUNTER_INC(&pVM->patm.s.StatPushTrap); + + if (Cpu.pCurInstr->uOpcode == OP_PUSH) + { + rc = PGMShwGetPage(pVCpu, pCtx->esp, &fFlags, NULL); + if ( rc == VINF_SUCCESS + && ((fFlags & (X86_PTE_P|X86_PTE_RW)) == (X86_PTE_P|X86_PTE_RW)) ) + { + /* The stack address is fine, so the push argument is a pointer -> emulate this instruction */ + + /* Reset the PATM stack. */ + CTXSUFF(pVM->patm.s.pGCState)->Psp = PATM_STACK_SIZE; + + pVM->patm.s.pGCStateHC->fPIF = 1; + + Log(("Faulting push -> go back to the original instruction\n")); + + /* continue at the original instruction */ + *ppNewEip = pNewEip - SELMToFlat(pVM, DISSELREG_CS, CPUMCTX2CORE(pCtx), 0); + STAM_PROFILE_ADV_STOP(&pVM->patm.s.StatHandleTrap, a); + return VINF_SUCCESS; + } + } + + /* Typical pushf (most patches)/push (call patch) trap because of a monitored page. */ + rc = PGMShwMakePageWritable(pVCpu, pCtx->esp, 0 /*fFlags*/); + AssertMsgRC(rc, ("PGMShwModifyPage -> rc=%Rrc\n", rc)); + if (rc == VINF_SUCCESS) + { + /* The guest page *must* be present. */ + rc = PGMGstGetPage(pVCpu, pCtx->esp, &fFlags, NULL); + if ( rc == VINF_SUCCESS + && (fFlags & X86_PTE_P)) + { + STAM_PROFILE_ADV_STOP(&pVM->patm.s.StatHandleTrap, a); + return VINF_PATCH_CONTINUE; + } + } + } + else + if (pPatch->patch.pPrivInstrGC == pNewEip) + { + /* Invalidated patch or first instruction overwritten. + * We can ignore the fPIF state in this case. + */ + /* Reset the PATM stack. */ + CTXSUFF(pVM->patm.s.pGCState)->Psp = PATM_STACK_SIZE; + + Log(("Call to invalidated patch -> go back to the original instruction\n")); + + pVM->patm.s.pGCStateHC->fPIF = 1; + + /* continue at the original instruction */ + *ppNewEip = pNewEip - SELMToFlat(pVM, DISSELREG_CS, CPUMCTX2CORE(pCtx), 0); + STAM_PROFILE_ADV_STOP(&pVM->patm.s.StatHandleTrap, a); + return VINF_SUCCESS; + } + + char szBuf[256]; + DBGFR3DisasInstrEx(pVM->pUVM, pVCpu->idCpu, pCtx->cs.Sel, pEip, DBGF_DISAS_FLAGS_DEFAULT_MODE, szBuf, sizeof(szBuf), NULL); + + /* Very bad. We crashed in emitted code. Probably stack? */ + if (pPatch) + { + AssertLogRelMsg(pVM->patm.s.pGCStateHC->fPIF == 1, + ("Crash in patch code %RRv (%RRv) esp=%RX32\nPatch state=%x flags=%RX64 fDirty=%d\n%s\n", + pEip, pNewEip, CPUMGetGuestESP(pVCpu), pPatch->patch.uState, pPatch->patch.flags, + pPatchToGuestRec->fDirty, szBuf)); + } + else + AssertLogRelMsg(pVM->patm.s.pGCStateHC->fPIF == 1, + ("Crash in patch code %RRv (%RRv) esp=%RX32\n%s\n", pEip, pNewEip, CPUMGetGuestESP(pVCpu), szBuf)); + EMR3FatalError(pVCpu, VERR_PATM_IPE_TRAP_IN_PATCH_CODE); + } + + /* From here on, we must have a valid patch to guest translation. */ + if (pvPatchCoreOffset == 0) + { + STAM_PROFILE_ADV_STOP(&pVM->patm.s.StatHandleTrap, a); + AssertMsgFailed(("PATMR3HandleTrap: patch not found at address %RRv!!\n", pEip)); + return VERR_PATCH_NOT_FOUND; + } + + /* Take care of dirty/changed instructions. */ + if (pPatchToGuestRec->fDirty) + { + Assert(pPatchToGuestRec->Core.Key == offset); + Assert(pVM->patm.s.pGCStateHC->fPIF == 1); + + rc = patmR3HandleDirtyInstr(pVM, pCtx, pPatch, pPatchToGuestRec, pEip); + if (RT_SUCCESS(rc)) + { + /* Retry the current instruction. */ + pNewEip = pEip; + rc = VINF_PATCH_CONTINUE; /* Continue at current patch instruction. */ + } + else + { + /* Reset the PATM stack. */ + CTXSUFF(pVM->patm.s.pGCState)->Psp = PATM_STACK_SIZE; + + rc = VINF_SUCCESS; /* Continue at original instruction. */ + } + + *ppNewEip = pNewEip - SELMToFlat(pVM, DISSELREG_CS, CPUMCTX2CORE(pCtx), 0); + STAM_PROFILE_ADV_STOP(&pVM->patm.s.StatHandleTrap, a); + return rc; + } + +#ifdef VBOX_STRICT + if (pPatch->patch.flags & PATMFL_DUPLICATE_FUNCTION) + { + DISCPUSTATE cpu; + bool disret; + uint32_t cbInstr; + PATMP2GLOOKUPREC cacheRec; + RT_ZERO(cacheRec); + cacheRec.pPatch = &pPatch->patch; + + disret = patmR3DisInstr(pVM, &pPatch->patch, pNewEip, patmR3GCVirtToHCVirt(pVM, &cacheRec, pNewEip), PATMREAD_RAWCODE, + &cpu, &cbInstr); + if (cacheRec.Lock.pvMap) + PGMPhysReleasePageMappingLock(pVM, &cacheRec.Lock); + + if (disret && cpu.pCurInstr->uOpcode == OP_RETN) + { + RTRCPTR retaddr; + PCPUMCTX pCtx2; + + pCtx2 = CPUMQueryGuestCtxPtr(pVCpu); + + rc = PGMPhysSimpleReadGCPtr(pVCpu, &retaddr, pCtx2->esp, sizeof(retaddr)); + AssertRC(rc); + + Log(("Return failed at %RRv (%RRv)\n", pEip, pNewEip)); + Log(("Expected return address %RRv found address %RRv Psp=%x\n", pVM->patm.s.pGCStackHC[(pVM->patm.s.pGCStateHC->Psp+PATM_STACK_SIZE)/sizeof(RTRCPTR)], retaddr, pVM->patm.s.pGCStateHC->Psp)); + } + } +#endif + + /* Return original address, correct by subtracting the CS base address. */ + *ppNewEip = pNewEip - SELMToFlat(pVM, DISSELREG_CS, CPUMCTX2CORE(pCtx), 0); + + /* Reset the PATM stack. */ + CTXSUFF(pVM->patm.s.pGCState)->Psp = PATM_STACK_SIZE; + + if (pVM->patm.s.pGCStateHC->GCPtrInhibitInterrupts == pNewEip) + { + /* Must be a faulting instruction after sti; currently only sysexit, hlt or iret */ + Log(("PATMR3HandleTrap %RRv -> inhibit irqs set!\n", pEip)); +#ifdef VBOX_STRICT + DISCPUSTATE cpu; + bool disret; + uint32_t cbInstr; + PATMP2GLOOKUPREC cacheRec; + RT_ZERO(cacheRec); + cacheRec.pPatch = &pPatch->patch; + + disret = patmR3DisInstr(pVM, &pPatch->patch, pNewEip, patmR3GCVirtToHCVirt(pVM, &cacheRec, pNewEip), PATMREAD_ORGCODE, + &cpu, &cbInstr); + if (cacheRec.Lock.pvMap) + PGMPhysReleasePageMappingLock(pVM, &cacheRec.Lock); + + if (disret && (cpu.pCurInstr->uOpcode == OP_SYSEXIT || cpu.pCurInstr->uOpcode == OP_HLT || cpu.pCurInstr->uOpcode == OP_INT3)) + { + disret = patmR3DisInstr(pVM, &pPatch->patch, pNewEip, patmR3GCVirtToHCVirt(pVM, &cacheRec, pNewEip), PATMREAD_RAWCODE, + &cpu, &cbInstr); + if (cacheRec.Lock.pvMap) + PGMPhysReleasePageMappingLock(pVM, &cacheRec.Lock); + + Assert(cpu.pCurInstr->uOpcode == OP_SYSEXIT || cpu.pCurInstr->uOpcode == OP_HLT || cpu.pCurInstr->uOpcode == OP_IRET); + } +#endif + EMSetInhibitInterruptsPC(pVCpu, pNewEip); + pVM->patm.s.pGCStateHC->GCPtrInhibitInterrupts = 0; + } + + Log2(("pPatchBlockGC %RRv - pEip %RRv corresponding GC address %RRv\n", PATCHCODE_PTR_GC(&pPatch->patch), pEip, pNewEip)); + DBGFR3_DISAS_INSTR_LOG(pVCpu, pCtx->cs.Sel, pNewEip, "PATCHRET: "); + if (pNewEip >= pPatch->patch.pPrivInstrGC && pNewEip < pPatch->patch.pPrivInstrGC + pPatch->patch.cbPatchJump) + { + /* We can't jump back to code that we've overwritten with a 5 byte jump! */ + Log(("Disabling patch at location %RRv due to trap too close to the privileged instruction \n", pPatch->patch.pPrivInstrGC)); + PATMR3DisablePatch(pVM, pPatch->patch.pPrivInstrGC); + STAM_PROFILE_ADV_STOP(&pVM->patm.s.StatHandleTrap, a); + return VERR_PATCH_DISABLED; + } + +#ifdef PATM_REMOVE_PATCH_ON_TOO_MANY_TRAPS + /** @todo compare to nr of successful runs. add some aging algorithm and determine the best time to disable the patch */ + if (pPatch->patch.cTraps > MAX_PATCH_TRAPS) + { + Log(("Disabling patch at location %RRv due to too many traps inside patch code\n", pPatch->patch.pPrivInstrGC)); + //we are only wasting time, back out the patch + PATMR3DisablePatch(pVM, pPatch->patch.pPrivInstrGC); + pTrapRec->pNextPatchInstr = 0; + STAM_PROFILE_ADV_STOP(&pVM->patm.s.StatHandleTrap, a); + return VERR_PATCH_DISABLED; + } +#endif + + STAM_PROFILE_ADV_STOP(&pVM->patm.s.StatHandleTrap, a); + return VINF_SUCCESS; +} + + +/** + * Handle page-fault in monitored page + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) PATMR3HandleMonitoredPage(PVM pVM) +{ + AssertReturn(VM_IS_RAW_MODE_ENABLED(pVM), VERR_PATM_HM_IPE); + PVMCPU pVCpu = VMMGetCpu0(pVM); + + RTRCPTR addr = pVM->patm.s.pvFaultMonitor; + addr &= PAGE_BASE_GC_MASK; + + int rc = PGMHandlerVirtualDeregister(pVM, pVCpu, addr, false /*fHypervisor*/); + AssertRC(rc); NOREF(rc); + + PPATMPATCHREC pPatchRec = (PPATMPATCHREC)RTAvloU32GetBestFit(&pVM->patm.s.PatchLookupTreeHC->PatchTree, addr, false); + if (pPatchRec && pPatchRec->patch.uState == PATCH_ENABLED && PAGE_ADDRESS(pPatchRec->patch.pPrivInstrGC) == PAGE_ADDRESS(addr)) + { + STAM_COUNTER_INC(&pVM->patm.s.StatMonitored); + Log(("Renewing patch at %RRv\n", pPatchRec->patch.pPrivInstrGC)); + rc = PATMR3DisablePatch(pVM, pPatchRec->patch.pPrivInstrGC); + if (rc == VWRN_PATCH_REMOVED) + return VINF_SUCCESS; + + PATMR3EnablePatch(pVM, pPatchRec->patch.pPrivInstrGC); + + if (addr == pPatchRec->patch.pPrivInstrGC) + addr++; + } + + for(;;) + { + pPatchRec = (PPATMPATCHREC)RTAvloU32GetBestFit(&pVM->patm.s.PatchLookupTreeHC->PatchTree, addr, true); + + if (!pPatchRec || PAGE_ADDRESS(pPatchRec->patch.pPrivInstrGC) != PAGE_ADDRESS(addr)) + break; + + if (pPatchRec && pPatchRec->patch.uState == PATCH_ENABLED) + { + STAM_COUNTER_INC(&pVM->patm.s.StatMonitored); + Log(("Renewing patch at %RRv\n", pPatchRec->patch.pPrivInstrGC)); + PATMR3DisablePatch(pVM, pPatchRec->patch.pPrivInstrGC); + PATMR3EnablePatch(pVM, pPatchRec->patch.pPrivInstrGC); + } + addr = pPatchRec->patch.pPrivInstrGC + 1; + } + + pVM->patm.s.pvFaultMonitor = 0; + return VINF_SUCCESS; +} + + +#ifdef VBOX_WITH_STATISTICS + +static const char *PATMPatchType(PVM pVM, PPATCHINFO pPatch) +{ + if (pPatch->flags & PATMFL_SYSENTER) + { + return "SYSENT"; + } + else + if (pPatch->flags & (PATMFL_TRAPHANDLER|PATMFL_INTHANDLER)) + { + static char szTrap[16]; + uint32_t iGate; + + iGate = TRPMR3QueryGateByHandler(pVM, PATCHCODE_PTR_GC(pPatch)); + if (iGate < 256) + RTStrPrintf(szTrap, sizeof(szTrap), (pPatch->flags & PATMFL_INTHANDLER) ? "INT-%2X" : "TRAP-%2X", iGate); + else + RTStrPrintf(szTrap, sizeof(szTrap), (pPatch->flags & PATMFL_INTHANDLER) ? "INT-??" : "TRAP-??"); + return szTrap; + } + else + if (pPatch->flags & (PATMFL_DUPLICATE_FUNCTION)) + return "DUPFUNC"; + else + if (pPatch->flags & PATMFL_REPLACE_FUNCTION_CALL) + return "FUNCCALL"; + else + if (pPatch->flags & PATMFL_TRAMPOLINE) + return "TRAMP"; + else + return patmGetInstructionString(pPatch->opcode, pPatch->flags); +} + +static const char *PATMPatchState(PVM pVM, PPATCHINFO pPatch) +{ + NOREF(pVM); + switch(pPatch->uState) + { + case PATCH_ENABLED: + return "ENA"; + case PATCH_DISABLED: + return "DIS"; + case PATCH_DIRTY: + return "DIR"; + case PATCH_UNUSABLE: + return "UNU"; + case PATCH_REFUSED: + return "REF"; + case PATCH_DISABLE_PENDING: + return "DIP"; + default: + AssertFailed(); + return " "; + } +} + +/** + * Resets the sample. + * @param pVM The cross context VM structure. + * @param pvSample The sample registered using STAMR3RegisterCallback. + */ +static void patmResetStat(PVM pVM, void *pvSample) +{ + PPATCHINFO pPatch = (PPATCHINFO)pvSample; + Assert(pPatch); + + pVM->patm.s.pStatsHC[pPatch->uPatchIdx].u32A = 0; + pVM->patm.s.pStatsHC[pPatch->uPatchIdx].u32B = 0; +} + +/** + * Prints the sample into the buffer. + * + * @param pVM The cross context VM structure. + * @param pvSample The sample registered using STAMR3RegisterCallback. + * @param pszBuf The buffer to print into. + * @param cchBuf The size of the buffer. + */ +static void patmPrintStat(PVM pVM, void *pvSample, char *pszBuf, size_t cchBuf) +{ + PPATCHINFO pPatch = (PPATCHINFO)pvSample; + Assert(pPatch); + + Assert(pPatch->uState != PATCH_REFUSED); + Assert(!(pPatch->flags & (PATMFL_REPLACE_FUNCTION_CALL|PATMFL_MMIO_ACCESS))); + + RTStrPrintf(pszBuf, cchBuf, "size %04x ->%3s %8s - %08d - %08d", + pPatch->cbPatchBlockSize, PATMPatchState(pVM, pPatch), PATMPatchType(pVM, pPatch), + pVM->patm.s.pStatsHC[pPatch->uPatchIdx].u32A, pVM->patm.s.pStatsHC[pPatch->uPatchIdx].u32B); +} + +/** + * Returns the GC address of the corresponding patch statistics counter + * + * @returns Stat address + * @param pVM The cross context VM structure. + * @param pPatch Patch structure + */ +RTRCPTR patmPatchQueryStatAddress(PVM pVM, PPATCHINFO pPatch) +{ + Assert(pPatch->uPatchIdx != PATM_STAT_INDEX_NONE); + return pVM->patm.s.pStatsGC + sizeof(STAMRATIOU32) * pPatch->uPatchIdx + RT_UOFFSETOF(STAMRATIOU32, u32A); +} + +#endif /* VBOX_WITH_STATISTICS */ +#ifdef VBOX_WITH_DEBUGGER + +/** + * @callback_method_impl{FNDBGCCMD, The '.patmoff' command.} + */ +static DECLCALLBACK(int) patmr3CmdOff(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PUVM pUVM, PCDBGCVAR paArgs, unsigned cArgs) +{ + /* + * Validate input. + */ + NOREF(cArgs); NOREF(paArgs); + DBGC_CMDHLP_REQ_UVM_RET(pCmdHlp, pCmd, pUVM); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + if (!VM_IS_RAW_MODE_ENABLED(pVM)) + return DBGCCmdHlpPrintf(pCmdHlp, "PATM is permanently disabled by HM/NEM.\n"); + + RTAvloU32DoWithAll(&pVM->patm.s.PatchLookupTreeHC->PatchTree, true, DisableAllPatches, pVM); + PATMR3AllowPatching(pVM->pUVM, false); + return pCmdHlp->pfnPrintf(pCmdHlp, NULL, "Patching disabled\n"); +} + +/** + * @callback_method_impl{FNDBGCCMD, The '.patmon' command.} + */ +static DECLCALLBACK(int) patmr3CmdOn(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PUVM pUVM, PCDBGCVAR paArgs, unsigned cArgs) +{ + /* + * Validate input. + */ + NOREF(cArgs); NOREF(paArgs); + DBGC_CMDHLP_REQ_UVM_RET(pCmdHlp, pCmd, pUVM); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + if (!VM_IS_RAW_MODE_ENABLED(pVM)) + return DBGCCmdHlpPrintf(pCmdHlp, "PATM is permanently disabled by HM/NEM.\n"); + + PATMR3AllowPatching(pVM->pUVM, true); + RTAvloU32DoWithAll(&pVM->patm.s.PatchLookupTreeHC->PatchTree, true, EnableAllPatches, pVM); + return pCmdHlp->pfnPrintf(pCmdHlp, NULL, "Patching enabled\n"); +} + +#endif /* VBOX_WITH_DEBUGGER */ + diff --git a/src/VBox/VMM/VMMR3/PATMA.asm b/src/VBox/VMM/VMMR3/PATMA.asm new file mode 100644 index 00000000..527c3fa1 --- /dev/null +++ b/src/VBox/VMM/VMMR3/PATMA.asm @@ -0,0 +1,2600 @@ +; $Id: PATMA.asm $ +;; @file +; PATM Assembly Routines. +; + +; +; Copyright (C) 2006-2019 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; + +;; +; @note This method has problems in theory. If we fault for any reason, then we won't be able to restore +; the guest's context properly!! +; E.g if one of the push instructions causes a fault or SS isn't wide open and our patch GC state accesses aren't valid. +; @assumptions +; - Enough stack for a few pushes +; - The SS selector has base 0 and limit 0xffffffff +; +; @todo stack probing is currently hardcoded and not present everywhere (search for 'probe stack') + + +;******************************************************************************* +;* Header Files * +;******************************************************************************* +%include "VBox/asmdefs.mac" +%include "VBox/err.mac" +%include "iprt/x86.mac" +%include "VBox/vmm/cpum.mac" +%include "VBox/vmm/vm.mac" +%include "PATMA.mac" + + +;******************************************************************************* +;* Defined Constants And Macros * +;******************************************************************************* +%ifdef DEBUG +; Noisy, but useful for debugging certain problems +;;;%define PATM_LOG_PATCHINSTR +;;%define PATM_LOG_PATCHIRET +%endif + +;; +; Simple PATCHASMRECORD initializer +; @param %1 The patch function name. +; @param %2 The number of fixups. +; +%macro PATCHASMRECORD_INIT 2 +istruc PATCHASMRECORD + at PATCHASMRECORD.pbFunction, RTCCPTR_DEF NAME(%1) + at PATCHASMRECORD.offJump, DD 0 + at PATCHASMRECORD.offRelJump, DD 0 + at PATCHASMRECORD.offSizeOverride,DD 0 + at PATCHASMRECORD.cbFunction, DD NAME(%1 %+ _EndProc) - NAME(%1) + at PATCHASMRECORD.cRelocs, DD %2 +iend +%endmacro + +;; +; Simple PATCHASMRECORD initializer +; @param %1 The patch function name. +; @param %2 Jump lable. +; @param %3 The number of fixups. +; +%macro PATCHASMRECORD_INIT_JUMP 3 +istruc PATCHASMRECORD + at PATCHASMRECORD.pbFunction, RTCCPTR_DEF NAME(%1) + at PATCHASMRECORD.offJump, DD %2 - NAME(%1) + at PATCHASMRECORD.offRelJump, DD 0 + at PATCHASMRECORD.offSizeOverride,DD 0 + at PATCHASMRECORD.cbFunction, DD NAME(%1 %+ _EndProc) - NAME(%1) + at PATCHASMRECORD.cRelocs, DD %3 +iend +%endmacro + +;; +; Simple PATCHASMRECORD initializer +; @param %1 The patch function name. +; @param %2 Jump lable (or nothing). +; @param %3 Relative jump label (or nothing). +; @param %4 Size override label (or nothing). +; @param %5 The number of fixups. +; +%macro PATCHASMRECORD_INIT_EX 5 +istruc PATCHASMRECORD + at PATCHASMRECORD.pbFunction, RTCCPTR_DEF NAME(%1) +%ifid %2 + at PATCHASMRECORD.offJump, DD %2 - NAME(%1) +%else + at PATCHASMRECORD.offJump, DD 0 +%endif +%ifid %3 + at PATCHASMRECORD.offRelJump, DD %3 - NAME(%1) +%else + at PATCHASMRECORD.offRelJump, DD 0 +%endif +%ifid %4 + at PATCHASMRECORD.offSizeOverride,DD %4 - NAME(%1) +%else + at PATCHASMRECORD.offSizeOverride,DD 0 +%endif + at PATCHASMRECORD.cbFunction, DD NAME(%1 %+ _EndProc) - NAME(%1) + at PATCHASMRECORD.cRelocs, DD %5 +iend +%endmacro + +;; +; Switches to the code section and aligns the function. +; +; @remarks This section must be different from the patch readonly data section! +; +%macro BEGIN_PATCH_CODE_SECTION 0 +BEGINCODE +align 32 +%endmacro +%macro BEGIN_PATCH_CODE_SECTION_NO_ALIGN 0 +BEGINCODE +%endmacro + +;; +; Switches to the data section for the read-only patch descriptor data and +; aligns it appropriately. +; +; @remarks This section must be different from the patch code section! +; +%macro BEGIN_PATCH_RODATA_SECTION 0 +BEGINDATA +align 16 +%endmacro +%macro BEGIN_PATCH_RODATA_SECTION_NO_ALIGN 0 +BEGINDATA +%endmacro + + +;; +; Starts a patch. +; +; @param %1 The patch record name (externally visible). +; @param %2 The patch function name (considered internal). +; +%macro BEGIN_PATCH 2 +; The patch record. +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME %1 +PATCHASMRECORD_INIT PATMCpuidReplacement, (RT_CONCAT(%1,_FixupEnd) - RT_CONCAT(%1,_FixupStart)) / 8 +RT_CONCAT(%1,_FixupStart): + +; The patch code. +BEGIN_PATCH_CODE_SECTION +BEGINPROC %2 +%endmacro + +;; +; Emit a fixup. +; @param %1 The fixup type. +%macro PATCH_FIXUP 1 +BEGIN_PATCH_RODATA_SECTION_NO_ALIGN + dd %1, 0 +BEGIN_PATCH_CODE_SECTION_NO_ALIGN +%endmacro + +;; +; Emit a fixup with extra info. +; @param %1 The fixup type. +; @param %2 The extra fixup info. +%macro PATCH_FIXUP_2 2 +BEGIN_PATCH_RODATA_SECTION_NO_ALIGN + dd %1, %2 +BEGIN_PATCH_CODE_SECTION_NO_ALIGN +%endmacro + +;; +; Ends a patch. +; +; This terminates the function and fixup array. +; +; @param %1 The patch record name (externally visible). +; @param %2 The patch function name (considered internal). +; +%macro END_PATCH 2 +ENDPROC %2 + +; Terminate the fixup array. +BEGIN_PATCH_RODATA_SECTION_NO_ALIGN +RT_CONCAT(%1,_FixupEnd): + dd 0ffffffffh, 0ffffffffh +BEGIN_PATCH_CODE_SECTION_NO_ALIGN +%endmacro + + +; +; Switch to 32-bit mode (x86). +; +%ifdef RT_ARCH_AMD64 + BITS 32 +%endif + + +%ifdef VBOX_WITH_STATISTICS +; +; Patch call statistics +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMStats + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 + pushf + inc dword [ss:PATM_ASMFIX_ALLPATCHCALLS] + inc dword [ss:PATM_ASMFIX_PERPATCHCALLS] + popf + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 +ENDPROC PATMStats + +; Patch record for statistics +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmStatsRecord + PATCHASMRECORD_INIT PATMStats, 4 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_ALLPATCHCALLS, 0 + DD PATM_ASMFIX_PERPATCHCALLS, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh +%endif ; VBOX_WITH_STATISTICS + + +; +; Set PATM_ASMFIX_INTERRUPTFLAG +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMSetPIF + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 +ENDPROC PATMSetPIF + +; Patch record for setting PATM_ASMFIX_INTERRUPTFLAG +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmSetPIFRecord + PATCHASMRECORD_INIT PATMSetPIF, 1 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + +; +; Clear PATM_ASMFIX_INTERRUPTFLAG +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMClearPIF + ; probe stack here as we can't recover from page faults later on + not dword [esp-64] + not dword [esp-64] + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 +ENDPROC PATMClearPIF + +; Patch record for clearing PATM_ASMFIX_INTERRUPTFLAG +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmClearPIFRecord + PATCHASMRECORD_INIT PATMClearPIF, 1 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + +; +; Clear PATM_ASMFIX_INHIBITIRQADDR and fault if IF=0 +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMClearInhibitIRQFaultIF0 + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 + mov dword [ss:PATM_ASMFIX_INHIBITIRQADDR], 0 + pushf + + test dword [ss:PATM_ASMFIX_VMFLAGS], X86_EFL_IF + jz PATMClearInhibitIRQFaultIF0_Fault + + ; if interrupts are pending, then we must go back to the host context to handle them! + test dword [ss:PATM_ASMFIX_VM_FORCEDACTIONS], VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_TIMER | VMCPU_FF_REQUEST + jz PATMClearInhibitIRQFaultIF0_Continue + + ; Go to our hypervisor trap handler to dispatch the pending irq + mov dword [ss:PATM_ASMFIX_TEMP_EAX], eax + mov dword [ss:PATM_ASMFIX_TEMP_ECX], ecx + mov dword [ss:PATM_ASMFIX_TEMP_EDI], edi + mov dword [ss:PATM_ASMFIX_TEMP_RESTORE_FLAGS], PATM_RESTORE_EAX | PATM_RESTORE_ECX | PATM_RESTORE_EDI + mov eax, PATM_ACTION_DISPATCH_PENDING_IRQ + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], eax + mov ecx, PATM_ACTION_MAGIC + mov edi, PATM_ASMFIX_NEXTINSTRADDR + popfd ; restore flags we pushed above (the or instruction changes the flags as well) + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + ; does not return + +PATMClearInhibitIRQFaultIF0_Fault: + popf + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + PATM_INT3 + +PATMClearInhibitIRQFaultIF0_Continue: + popf + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 +ENDPROC PATMClearInhibitIRQFaultIF0 + +; Patch record for clearing PATM_ASMFIX_INHIBITIRQADDR +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmClearInhibitIRQFaultIF0Record + PATCHASMRECORD_INIT PATMClearInhibitIRQFaultIF0, 12 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_INHIBITIRQADDR, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_VM_FORCEDACTIONS, 0 + DD PATM_ASMFIX_TEMP_EAX, 0 + DD PATM_ASMFIX_TEMP_ECX, 0 + DD PATM_ASMFIX_TEMP_EDI, 0 + DD PATM_ASMFIX_TEMP_RESTORE_FLAGS, 0 + DD PATM_ASMFIX_PENDINGACTION, 0 + DD PATM_ASMFIX_NEXTINSTRADDR, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; Clear PATM_ASMFIX_INHIBITIRQADDR and continue if IF=0 (duplicated function only; never jump back to guest code afterwards!!) +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMClearInhibitIRQContIF0 + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 + mov dword [ss:PATM_ASMFIX_INHIBITIRQADDR], 0 + pushf + + test dword [ss:PATM_ASMFIX_VMFLAGS], X86_EFL_IF + jz PATMClearInhibitIRQContIF0_Continue + + ; if interrupts are pending, then we must go back to the host context to handle them! + test dword [ss:PATM_ASMFIX_VM_FORCEDACTIONS], VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_TIMER | VMCPU_FF_REQUEST + jz PATMClearInhibitIRQContIF0_Continue + + ; Go to our hypervisor trap handler to dispatch the pending irq + mov dword [ss:PATM_ASMFIX_TEMP_EAX], eax + mov dword [ss:PATM_ASMFIX_TEMP_ECX], ecx + mov dword [ss:PATM_ASMFIX_TEMP_EDI], edi + mov dword [ss:PATM_ASMFIX_TEMP_RESTORE_FLAGS], PATM_RESTORE_EAX | PATM_RESTORE_ECX | PATM_RESTORE_EDI + mov eax, PATM_ACTION_DISPATCH_PENDING_IRQ + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], eax + mov ecx, PATM_ACTION_MAGIC + mov edi, PATM_ASMFIX_NEXTINSTRADDR + popfd ; restore flags we pushed above (the or instruction changes the flags as well) + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + ; does not return + +PATMClearInhibitIRQContIF0_Continue: + popf + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 +ENDPROC PATMClearInhibitIRQContIF0 + +; Patch record for clearing PATM_ASMFIX_INHIBITIRQADDR +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmClearInhibitIRQContIF0Record + PATCHASMRECORD_INIT PATMClearInhibitIRQContIF0, 11 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_INHIBITIRQADDR, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_VM_FORCEDACTIONS, 0 + DD PATM_ASMFIX_TEMP_EAX, 0 + DD PATM_ASMFIX_TEMP_ECX, 0 + DD PATM_ASMFIX_TEMP_EDI, 0 + DD PATM_ASMFIX_TEMP_RESTORE_FLAGS, 0 + DD PATM_ASMFIX_PENDINGACTION, 0 + DD PATM_ASMFIX_NEXTINSTRADDR, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMCliReplacement + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 + pushf +%ifdef PATM_LOG_PATCHINSTR + push eax + push ecx + mov eax, PATM_ACTION_LOG_CLI + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], eax + mov ecx, PATM_ACTION_MAGIC + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + pop ecx + pop eax +%endif + + and dword [ss:PATM_ASMFIX_VMFLAGS], ~X86_EFL_IF + popf + + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + DB 0xE9 +PATMCliJump: + DD PATM_ASMFIX_JUMPDELTA +ENDPROC PATMCliReplacement + +; Patch record for 'cli' +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmCliRecord +%ifdef PATM_LOG_PATCHINSTR + PATCHASMRECORD_INIT_JUMP PATMCliReplacement, PATMCliJump, 4 +%else + PATCHASMRECORD_INIT_JUMP PATMCliReplacement, PATMCliJump, 3 +%endif + DD PATM_ASMFIX_INTERRUPTFLAG, 0 +%ifdef PATM_LOG_PATCHINSTR + DD PATM_ASMFIX_PENDINGACTION, 0 +%endif + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMStiReplacement + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 + mov dword [ss:PATM_ASMFIX_INHIBITIRQADDR], PATM_ASMFIX_NEXTINSTRADDR + pushf +%ifdef PATM_LOG_PATCHINSTR + push eax + push ecx + mov eax, PATM_ACTION_LOG_STI + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], eax + mov ecx, PATM_ACTION_MAGIC + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + pop ecx + pop eax +%endif + or dword [ss:PATM_ASMFIX_VMFLAGS], X86_EFL_IF + popf + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 +ENDPROC PATMStiReplacement + +; Patch record for 'sti' +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmStiRecord +%ifdef PATM_LOG_PATCHINSTR + PATCHASMRECORD_INIT PATMStiReplacement, 6 +%else + PATCHASMRECORD_INIT PATMStiReplacement, 5 +%endif + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_INHIBITIRQADDR, 0 + DD PATM_ASMFIX_NEXTINSTRADDR, 0 +%ifdef PATM_LOG_PATCHINSTR + DD PATM_ASMFIX_PENDINGACTION, 0 +%endif + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; Trampoline code for trap entry (without error code on the stack) +; +; esp + 32 - GS (V86 only) +; esp + 28 - FS (V86 only) +; esp + 24 - DS (V86 only) +; esp + 20 - ES (V86 only) +; esp + 16 - SS (if transfer to inner ring) +; esp + 12 - ESP (if transfer to inner ring) +; esp + 8 - EFLAGS +; esp + 4 - CS +; esp - EIP +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMTrapEntry + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 + pushf + +%ifdef PATM_LOG_PATCHIRET + push eax + push ecx + push edx + lea edx, dword [ss:esp+12+4] ;3 dwords + pushed flags -> iret eip + mov eax, PATM_ACTION_LOG_GATE_ENTRY + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], eax + mov ecx, PATM_ACTION_MAGIC + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + pop edx + pop ecx + pop eax +%endif + + test dword [esp+12], X86_EFL_VM + jnz PATMTrapNoRing1 + + ; make sure the saved CS selector for ring 1 is made 0 + test dword [esp+8], 2 + jnz PATMTrapNoRing1 + test dword [esp+8], 1 + jz PATMTrapNoRing1 + and dword [esp+8], dword ~1 ; yasm / nasm dword +PATMTrapNoRing1: + + ; correct EFLAGS on the stack to include the current IOPL + push eax + mov eax, dword [ss:PATM_ASMFIX_VMFLAGS] + and eax, X86_EFL_IOPL + and dword [esp+16], ~X86_EFL_IOPL ; esp+16 = eflags = esp+8+4(efl)+4(eax) + or dword [esp+16], eax + pop eax + + popf + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + DB 0xE9 +PATMTrapEntryJump: + DD PATM_ASMFIX_JUMPDELTA +ENDPROC PATMTrapEntry + +; Patch record for trap gate entrypoint +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmTrapEntryRecord +%ifdef PATM_LOG_PATCHIRET + PATCHASMRECORD_INIT_JUMP PATMTrapEntry, PATMTrapEntryJump, 4 +%else + PATCHASMRECORD_INIT_JUMP PATMTrapEntry, PATMTrapEntryJump, 3 +%endif + DD PATM_ASMFIX_INTERRUPTFLAG, 0 +%ifdef PATM_LOG_PATCHIRET + DD PATM_ASMFIX_PENDINGACTION, 0 +%endif + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; Trampoline code for trap entry (with error code on the stack) +; +; esp + 36 - GS (V86 only) +; esp + 32 - FS (V86 only) +; esp + 28 - DS (V86 only) +; esp + 24 - ES (V86 only) +; esp + 20 - SS (if transfer to inner ring) +; esp + 16 - ESP (if transfer to inner ring) +; esp + 12 - EFLAGS +; esp + 8 - CS +; esp + 4 - EIP +; esp - error code +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMTrapEntryErrorCode +PATMTrapErrorCodeEntryStart: + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 + pushf + +%ifdef PATM_LOG_PATCHIRET + push eax + push ecx + push edx + lea edx, dword [ss:esp+12+4+4] ;3 dwords + pushed flags + error code -> iret eip + mov eax, PATM_ACTION_LOG_GATE_ENTRY + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], eax + mov ecx, PATM_ACTION_MAGIC + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + pop edx + pop ecx + pop eax +%endif + + test dword [esp+16], X86_EFL_VM + jnz PATMTrapErrorCodeNoRing1 + + ; make sure the saved CS selector for ring 1 is made 0 + test dword [esp+12], 2 + jnz PATMTrapErrorCodeNoRing1 + test dword [esp+12], 1 + jz PATMTrapErrorCodeNoRing1 + and dword [esp+12], dword ~1 ; yasm / nasm dword +PATMTrapErrorCodeNoRing1: + + ; correct EFLAGS on the stack to include the current IOPL + push eax + mov eax, dword [ss:PATM_ASMFIX_VMFLAGS] + and eax, X86_EFL_IOPL + and dword [esp+20], ~X86_EFL_IOPL ; esp+20 = eflags = esp+8+4(efl)+4(error code)+4(eax) + or dword [esp+20], eax + pop eax + + popf + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + DB 0xE9 +PATMTrapErrorCodeEntryJump: + DD PATM_ASMFIX_JUMPDELTA +ENDPROC PATMTrapEntryErrorCode + +; Patch record for trap gate entrypoint +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmTrapEntryRecordErrorCode +%ifdef PATM_LOG_PATCHIRET + PATCHASMRECORD_INIT_JUMP PATMTrapEntryErrorCode, PATMTrapErrorCodeEntryJump, 4 +%else + PATCHASMRECORD_INIT_JUMP PATMTrapEntryErrorCode, PATMTrapErrorCodeEntryJump, 3 +%endif + DD PATM_ASMFIX_INTERRUPTFLAG, 0 +%ifdef PATM_LOG_PATCHIRET + DD PATM_ASMFIX_PENDINGACTION, 0 +%endif + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; Trampoline code for interrupt gate entry (without error code on the stack) +; +; esp + 32 - GS (V86 only) +; esp + 28 - FS (V86 only) +; esp + 24 - DS (V86 only) +; esp + 20 - ES (V86 only) +; esp + 16 - SS (if transfer to inner ring) +; esp + 12 - ESP (if transfer to inner ring) +; esp + 8 - EFLAGS +; esp + 4 - CS +; esp - EIP +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMIntEntry + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 + pushf + +%ifdef PATM_LOG_PATCHIRET + push eax + push ecx + push edx + lea edx, dword [ss:esp+12+4] ;3 dwords + pushed flags -> iret eip + mov eax, PATM_ACTION_LOG_GATE_ENTRY + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], eax + mov ecx, PATM_ACTION_MAGIC + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + pop edx + pop ecx + pop eax +%endif + + test dword [esp+12], X86_EFL_VM + jnz PATMIntNoRing1 + + ; make sure the saved CS selector for ring 1 is made 0 + test dword [esp+8], 2 + jnz PATMIntNoRing1 + test dword [esp+8], 1 + jz PATMIntNoRing1 + and dword [esp+8], dword ~1 ; yasm / nasm dword +PATMIntNoRing1: + + ; correct EFLAGS on the stack to include the current IOPL + push eax + mov eax, dword [ss:PATM_ASMFIX_VMFLAGS] + and eax, X86_EFL_IOPL + and dword [esp+16], ~X86_EFL_IOPL ; esp+16 = eflags = esp+8+4(efl)+4(eax) + or dword [esp+16], eax + pop eax + + popf + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 +ENDPROC PATMIntEntry + +; Patch record for interrupt gate entrypoint +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmIntEntryRecord +%ifdef PATM_LOG_PATCHIRET + PATCHASMRECORD_INIT PATMIntEntry, 4 +%else + PATCHASMRECORD_INIT PATMIntEntry, 3 +%endif + DD PATM_ASMFIX_INTERRUPTFLAG, 0 +%ifdef PATM_LOG_PATCHIRET + DD PATM_ASMFIX_PENDINGACTION, 0 +%endif + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; Trampoline code for interrupt gate entry (*with* error code on the stack) +; +; esp + 36 - GS (V86 only) +; esp + 32 - FS (V86 only) +; esp + 28 - DS (V86 only) +; esp + 24 - ES (V86 only) +; esp + 20 - SS (if transfer to inner ring) +; esp + 16 - ESP (if transfer to inner ring) +; esp + 12 - EFLAGS +; esp + 8 - CS +; esp + 4 - EIP +; esp - error code +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMIntEntryErrorCode + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 + pushf + +%ifdef PATM_LOG_PATCHIRET + push eax + push ecx + push edx + lea edx, dword [ss:esp+12+4+4] ;3 dwords + pushed flags + error code -> iret eip + mov eax, PATM_ACTION_LOG_GATE_ENTRY + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], eax + mov ecx, PATM_ACTION_MAGIC + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + pop edx + pop ecx + pop eax +%endif + + test dword [esp+16], X86_EFL_VM + jnz PATMIntNoRing1_ErrorCode + + ; make sure the saved CS selector for ring 1 is made 0 + test dword [esp+12], 2 + jnz PATMIntNoRing1_ErrorCode + test dword [esp+12], 1 + jz PATMIntNoRing1_ErrorCode + and dword [esp+12], dword ~1 ; yasm / nasm dword +PATMIntNoRing1_ErrorCode: + + ; correct EFLAGS on the stack to include the current IOPL + push eax + mov eax, dword [ss:PATM_ASMFIX_VMFLAGS] + and eax, X86_EFL_IOPL + and dword [esp+20], ~X86_EFL_IOPL ; esp+20 = eflags = esp+8+4(efl)+4(eax)+4(error code) + or dword [esp+20], eax + pop eax + + popf + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 +ENDPROC PATMIntEntryErrorCode + +; Patch record for interrupt gate entrypoint +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmIntEntryRecordErrorCode +%ifdef PATM_LOG_PATCHIRET + PATCHASMRECORD_INIT PATMIntEntryErrorCode, 4 +%else + PATCHASMRECORD_INIT PATMIntEntryErrorCode, 3 +%endif + DD PATM_ASMFIX_INTERRUPTFLAG, 0 +%ifdef PATM_LOG_PATCHIRET + DD PATM_ASMFIX_PENDINGACTION, 0 +%endif + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; 32 bits Popf replacement that faults when IF remains 0 +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMPopf32Replacement + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 +%ifdef PATM_LOG_PATCHINSTR + push eax + push ecx + mov eax, PATM_ACTION_LOG_POPF_IF1 + test dword [esp+8], X86_EFL_IF + jnz PATMPopf32_Log + mov eax, PATM_ACTION_LOG_POPF_IF0 + +PATMPopf32_Log: + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], eax + mov ecx, PATM_ACTION_MAGIC + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + pop ecx + pop eax +%endif + + test dword [esp], X86_EFL_IF + jnz PATMPopf32_Ok + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + PATM_INT3 + +PATMPopf32_Ok: + ; Note: we don't allow popf instructions to change the current IOPL; we simply ignore such changes (!!!) + ; In this particular patch it's rather unlikely the pushf was included, so we have no way to check if the flags on the stack were correctly synced + ; PATMPopf32Replacement_NoExit is different, because it's only used in IDT and function patches + or dword [ss:PATM_ASMFIX_VMFLAGS], X86_EFL_IF + + ; if interrupts are pending, then we must go back to the host context to handle them! + test dword [ss:PATM_ASMFIX_VM_FORCEDACTIONS], VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_TIMER | VMCPU_FF_REQUEST + jz PATMPopf32_Continue + + ; Go to our hypervisor trap handler to dispatch the pending irq + mov dword [ss:PATM_ASMFIX_TEMP_EAX], eax + mov dword [ss:PATM_ASMFIX_TEMP_ECX], ecx + mov dword [ss:PATM_ASMFIX_TEMP_EDI], edi + mov dword [ss:PATM_ASMFIX_TEMP_RESTORE_FLAGS], PATM_RESTORE_EAX | PATM_RESTORE_ECX | PATM_RESTORE_EDI + mov eax, PATM_ACTION_DISPATCH_PENDING_IRQ + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], eax + mov ecx, PATM_ACTION_MAGIC + mov edi, PATM_ASMFIX_NEXTINSTRADDR + + popfd ; restore flags we pushed above (the or instruction changes the flags as well) + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + ; does not return + +PATMPopf32_Continue: + popfd ; restore flags we pushed above + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + DB 0xE9 +PATMPopf32Jump: + DD PATM_ASMFIX_JUMPDELTA +ENDPROC PATMPopf32Replacement + +; Patch record for 'popfd' +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmPopf32Record +%ifdef PATM_LOG_PATCHINSTR + PATCHASMRECORD_INIT_JUMP PATMPopf32Replacement, PATMPopf32Jump, 12 +%else + PATCHASMRECORD_INIT_JUMP PATMPopf32Replacement, PATMPopf32Jump, 11 +%endif + DD PATM_ASMFIX_INTERRUPTFLAG, 0 +%ifdef PATM_LOG_PATCHINSTR + DD PATM_ASMFIX_PENDINGACTION, 0 +%endif + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_VM_FORCEDACTIONS, 0 + DD PATM_ASMFIX_TEMP_EAX, 0 + DD PATM_ASMFIX_TEMP_ECX, 0 + DD PATM_ASMFIX_TEMP_EDI, 0 + DD PATM_ASMFIX_TEMP_RESTORE_FLAGS, 0 + DD PATM_ASMFIX_PENDINGACTION, 0 + DD PATM_ASMFIX_NEXTINSTRADDR, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; no need to check the IF flag when popf isn't an exit point of a patch (e.g. function duplication) +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMPopf32Replacement_NoExit + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 +%ifdef PATM_LOG_PATCHINSTR + push eax + push ecx + mov eax, PATM_ACTION_LOG_POPF_IF1 + test dword [esp+8], X86_EFL_IF + jnz PATMPopf32_NoExitLog + mov eax, PATM_ACTION_LOG_POPF_IF0 + +PATMPopf32_NoExitLog: + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], eax + mov ecx, PATM_ACTION_MAGIC + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + pop ecx + pop eax +%endif + test dword [esp], X86_EFL_IF + jz PATMPopf32_NoExit_Continue + + ; if interrupts are pending, then we must go back to the host context to handle them! + test dword [ss:PATM_ASMFIX_VM_FORCEDACTIONS], VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_TIMER | VMCPU_FF_REQUEST + jz PATMPopf32_NoExit_Continue + + ; Go to our hypervisor trap handler to dispatch the pending irq + mov dword [ss:PATM_ASMFIX_TEMP_EAX], eax + mov dword [ss:PATM_ASMFIX_TEMP_ECX], ecx + mov dword [ss:PATM_ASMFIX_TEMP_EDI], edi + mov dword [ss:PATM_ASMFIX_TEMP_RESTORE_FLAGS], PATM_RESTORE_EAX | PATM_RESTORE_ECX | PATM_RESTORE_EDI + mov eax, PATM_ACTION_DISPATCH_PENDING_IRQ + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], eax + mov ecx, PATM_ACTION_MAGIC + mov edi, PATM_ASMFIX_NEXTINSTRADDR + + pop dword [ss:PATM_ASMFIX_VMFLAGS] ; restore flags now (the or instruction changes the flags as well) + push dword [ss:PATM_ASMFIX_VMFLAGS] + popfd + + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + ; does not return + +PATMPopf32_NoExit_Continue: + pop dword [ss:PATM_ASMFIX_VMFLAGS] + push dword [ss:PATM_ASMFIX_VMFLAGS] + popfd + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 +ENDPROC PATMPopf32Replacement_NoExit + +; Patch record for 'popfd' +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmPopf32Record_NoExit +%ifdef PATM_LOG_PATCHINSTR + PATCHASMRECORD_INIT PATMPopf32Replacement_NoExit, 14 +%else + PATCHASMRECORD_INIT PATMPopf32Replacement_NoExit, 13 +%endif + DD PATM_ASMFIX_INTERRUPTFLAG, 0 +%ifdef PATM_LOG_PATCHINSTR + DD PATM_ASMFIX_PENDINGACTION, 0 +%endif + DD PATM_ASMFIX_VM_FORCEDACTIONS, 0 + DD PATM_ASMFIX_TEMP_EAX, 0 + DD PATM_ASMFIX_TEMP_ECX, 0 + DD PATM_ASMFIX_TEMP_EDI, 0 + DD PATM_ASMFIX_TEMP_RESTORE_FLAGS, 0 + DD PATM_ASMFIX_PENDINGACTION, 0 + DD PATM_ASMFIX_NEXTINSTRADDR, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; 16 bits Popf replacement that faults when IF remains 0 +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMPopf16Replacement + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 + test word [esp], X86_EFL_IF + jnz PATMPopf16_Ok + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + PATM_INT3 + +PATMPopf16_Ok: + ; if interrupts are pending, then we must go back to the host context to handle them! + ; @note we destroy the flags here, but that should really not matter (PATM_INT3 case) + test dword [ss:PATM_ASMFIX_VM_FORCEDACTIONS], VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_TIMER | VMCPU_FF_REQUEST + jz PATMPopf16_Continue + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + PATM_INT3 + +PATMPopf16_Continue: + + pop word [ss:PATM_ASMFIX_VMFLAGS] + push word [ss:PATM_ASMFIX_VMFLAGS] + and dword [ss:PATM_ASMFIX_VMFLAGS], PATM_VIRTUAL_FLAGS_MASK + or dword [ss:PATM_ASMFIX_VMFLAGS], PATM_VIRTUAL_FLAGS_MASK + + DB 0x66 ; size override + popf ;after the and and or operations!! (flags must be preserved) + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + + DB 0xE9 +PATMPopf16Jump: + DD PATM_ASMFIX_JUMPDELTA +ENDPROC PATMPopf16Replacement + +; Patch record for 'popf' +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmPopf16Record + PATCHASMRECORD_INIT_JUMP PATMPopf16Replacement, PATMPopf16Jump, 9 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_VM_FORCEDACTIONS, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; 16 bits Popf replacement that faults when IF remains 0 +; @todo not necessary to fault in that case (see 32 bits version) +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMPopf16Replacement_NoExit + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 + test word [esp], X86_EFL_IF + jnz PATMPopf16_Ok_NoExit + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + PATM_INT3 + +PATMPopf16_Ok_NoExit: + ; if interrupts are pending, then we must go back to the host context to handle them! + ; @note we destroy the flags here, but that should really not matter (PATM_INT3 case) + test dword [ss:PATM_ASMFIX_VM_FORCEDACTIONS], VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_TIMER | VMCPU_FF_REQUEST + jz PATMPopf16_Continue_NoExit + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + PATM_INT3 + +PATMPopf16_Continue_NoExit: + + pop word [ss:PATM_ASMFIX_VMFLAGS] + push word [ss:PATM_ASMFIX_VMFLAGS] + and dword [ss:PATM_ASMFIX_VMFLAGS], PATM_VIRTUAL_FLAGS_MASK + or dword [ss:PATM_ASMFIX_VMFLAGS], PATM_VIRTUAL_FLAGS_MASK + + DB 0x66 ; size override + popf ;after the and and or operations!! (flags must be preserved) + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 +ENDPROC PATMPopf16Replacement_NoExit + +; Patch record for 'popf' +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmPopf16Record_NoExit + PATCHASMRECORD_INIT PATMPopf16Replacement_NoExit, 9 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_VM_FORCEDACTIONS, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMPushf32Replacement + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 + pushfd +%ifdef PATM_LOG_PATCHINSTR + push eax + push ecx + mov eax, PATM_ACTION_LOG_PUSHF + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], eax + mov ecx, PATM_ACTION_MAGIC + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + pop ecx + pop eax +%endif + + pushfd + push eax + mov eax, dword [esp+8] + and eax, PATM_FLAGS_MASK + or eax, dword [ss:PATM_ASMFIX_VMFLAGS] + mov dword [esp+8], eax + pop eax + popfd + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 +ENDPROC PATMPushf32Replacement + +; Patch record for 'pushfd' +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmPushf32Record +%ifdef PATM_LOG_PATCHINSTR + PATCHASMRECORD_INIT PATMPushf32Replacement, 4 +%else + PATCHASMRECORD_INIT PATMPushf32Replacement, 3 +%endif + DD PATM_ASMFIX_INTERRUPTFLAG, 0 +%ifdef PATM_LOG_PATCHINSTR + DD PATM_ASMFIX_PENDINGACTION, 0 +%endif + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMPushf16Replacement + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 + DB 0x66 ; size override + pushf + DB 0x66 ; size override + pushf + push eax + xor eax, eax + mov ax, word [esp+6] + and eax, PATM_FLAGS_MASK + or eax, dword [ss:PATM_ASMFIX_VMFLAGS] + mov word [esp+6], ax + pop eax + + DB 0x66 ; size override + popf + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 +ENDPROC PATMPushf16Replacement + +; Patch record for 'pushf' +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmPushf16Record + PATCHASMRECORD_INIT PATMPushf16Replacement, 3 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMPushCSReplacement + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 + push cs + pushfd + + test dword [esp+4], 2 + jnz pushcs_notring1 + + ; change dpl from 1 to 0 + and dword [esp+4], dword ~1 ; yasm / nasm dword + +pushcs_notring1: + popfd + + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + DB 0xE9 +PATMPushCSJump: + DD PATM_ASMFIX_JUMPDELTA +ENDPROC PATMPushCSReplacement + +; Patch record for 'push cs' +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmPushCSRecord + PATCHASMRECORD_INIT_JUMP PATMPushCSReplacement, PATMPushCSJump, 2 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; +;**************************************************** +; Abstract: +; +; if eflags.NT==0 && iretstack.eflags.VM==0 && iretstack.eflags.IOPL==0 +; then +; if return to ring 0 (iretstack.new_cs & 3 == 0) +; then +; if iretstack.new_eflags.IF == 1 && iretstack.new_eflags.IOPL == 0 +; then +; iretstack.new_cs |= 1 +; else +; int 3 +; endif +; uVMFlags &= ~X86_EFL_IF +; iret +; else +; int 3 +;**************************************************** +; +; Stack: +; +; esp + 32 - GS (V86 only) +; esp + 28 - FS (V86 only) +; esp + 24 - DS (V86 only) +; esp + 20 - ES (V86 only) +; esp + 16 - SS (if transfer to outer ring) +; esp + 12 - ESP (if transfer to outer ring) +; esp + 8 - EFLAGS +; esp + 4 - CS +; esp - EIP +;; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMIretReplacement + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 + pushfd + +%ifdef PATM_LOG_PATCHIRET + push eax + push ecx + push edx + lea edx, dword [ss:esp+12+4] ;3 dwords + pushed flags -> iret eip + mov eax, PATM_ACTION_LOG_IRET + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], eax + mov ecx, PATM_ACTION_MAGIC + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + pop edx + pop ecx + pop eax +%endif + + test dword [esp], X86_EFL_NT + jnz near iret_fault1 + + ; we can't do an iret to v86 code, as we run with CPL=1. The iret would attempt a protected mode iret and (most likely) fault. + test dword [esp+12], X86_EFL_VM + jnz near iret_return_to_v86 + + ;;!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + ;;@todo: not correct for iret back to ring 2!!!!! + ;;!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + test dword [esp+8], 2 + jnz iret_notring0 + + test dword [esp+12], X86_EFL_IF + jz near iret_clearIF + + ; force ring 1 CS RPL + or dword [esp+8], 1 ;-> @todo we leave traces or raw mode if we jump back to the host context to handle pending interrupts! (below) +iret_notring0: + +; if interrupts are pending, then we must go back to the host context to handle them! +; Note: This is very important as pending pic interrupts can be overridden by apic interrupts if we don't check early enough (Fedora 5 boot) +; @@todo fix this properly, so we can dispatch pending interrupts in GC + test dword [ss:PATM_ASMFIX_VM_FORCEDACTIONS], VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC + jz iret_continue + +; Go to our hypervisor trap handler to dispatch the pending irq + mov dword [ss:PATM_ASMFIX_TEMP_EAX], eax + mov dword [ss:PATM_ASMFIX_TEMP_ECX], ecx + mov dword [ss:PATM_ASMFIX_TEMP_EDI], edi + mov dword [ss:PATM_ASMFIX_TEMP_RESTORE_FLAGS], PATM_RESTORE_EAX | PATM_RESTORE_ECX | PATM_RESTORE_EDI + mov eax, PATM_ACTION_PENDING_IRQ_AFTER_IRET + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], eax + mov ecx, PATM_ACTION_MAGIC + mov edi, PATM_ASMFIX_CURINSTRADDR + + popfd + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + ; does not return + +iret_continue : + ; This section must *always* be executed (!!) + ; Extract the IOPL from the return flags, save them to our virtual flags and + ; put them back to zero + ; @note we assume iretd doesn't fault!!! + push eax + mov eax, dword [esp+16] + and eax, X86_EFL_IOPL + and dword [ss:PATM_ASMFIX_VMFLAGS], ~X86_EFL_IOPL + or dword [ss:PATM_ASMFIX_VMFLAGS], eax + pop eax + and dword [esp+12], ~X86_EFL_IOPL + + ; Set IF again; below we make sure this won't cause problems. + or dword [ss:PATM_ASMFIX_VMFLAGS], X86_EFL_IF + + ; make sure iret is executed fully (including the iret below; cli ... iret can otherwise be interrupted) + mov dword [ss:PATM_ASMFIX_INHIBITIRQADDR], PATM_ASMFIX_CURINSTRADDR + + popfd + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + iretd + PATM_INT3 + +iret_fault: + popfd + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + PATM_INT3 + +iret_fault1: + nop + popfd + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + PATM_INT3 + +iret_clearIF: + push dword [esp+4] ; eip to return to + pushfd + push eax + push PATM_ASMFIX_FIXUP + DB 0E8h ; call + DD PATM_ASMFIX_IRET_FUNCTION + add esp, 4 ; pushed address of jump table + + cmp eax, 0 + je near iret_fault3 + + mov dword [esp+12+4], eax ; stored eip in iret frame + pop eax + popfd + add esp, 4 ; pushed eip + + ; always ring 0 return -> change to ring 1 (CS in iret frame) + or dword [esp+8], 1 + + ; This section must *always* be executed (!!) + ; Extract the IOPL from the return flags, save them to our virtual flags and + ; put them back to zero + push eax + mov eax, dword [esp+16] + and eax, X86_EFL_IOPL + and dword [ss:PATM_ASMFIX_VMFLAGS], ~X86_EFL_IOPL + or dword [ss:PATM_ASMFIX_VMFLAGS], eax + pop eax + and dword [esp+12], ~X86_EFL_IOPL + + ; Clear IF + and dword [ss:PATM_ASMFIX_VMFLAGS], ~X86_EFL_IF + popfd + + ; the patched destination code will set PATM_ASMFIX_INTERRUPTFLAG after the return! + iretd + +iret_return_to_v86: + test dword [esp+12], X86_EFL_IF + jz iret_fault + + ; Go to our hypervisor trap handler to perform the iret to v86 code + mov dword [ss:PATM_ASMFIX_TEMP_EAX], eax + mov dword [ss:PATM_ASMFIX_TEMP_ECX], ecx + mov dword [ss:PATM_ASMFIX_TEMP_RESTORE_FLAGS], PATM_RESTORE_EAX | PATM_RESTORE_ECX + mov eax, PATM_ACTION_DO_V86_IRET + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], eax + mov ecx, PATM_ACTION_MAGIC + + popfd + + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + ; does not return + + +iret_fault3: + pop eax + popfd + add esp, 4 ; pushed eip + jmp iret_fault + +align 4 +PATMIretTable: + DW PATM_MAX_JUMPTABLE_ENTRIES ; nrSlots + DW 0 ; ulInsertPos + DD 0 ; cAddresses + TIMES PATCHJUMPTABLE_SIZE DB 0 ; lookup slots + +ENDPROC PATMIretReplacement + +; Patch record for 'iretd' +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmIretRecord +%ifdef PATM_LOG_PATCHIRET + PATCHASMRECORD_INIT PATMIretReplacement, 26 +%else + PATCHASMRECORD_INIT PATMIretReplacement, 25 +%endif + DD PATM_ASMFIX_INTERRUPTFLAG, 0 +%ifdef PATM_LOG_PATCHIRET + DD PATM_ASMFIX_PENDINGACTION, 0 +%endif + DD PATM_ASMFIX_VM_FORCEDACTIONS, 0 + DD PATM_ASMFIX_TEMP_EAX, 0 + DD PATM_ASMFIX_TEMP_ECX, 0 + DD PATM_ASMFIX_TEMP_EDI, 0 + DD PATM_ASMFIX_TEMP_RESTORE_FLAGS, 0 + DD PATM_ASMFIX_PENDINGACTION, 0 + DD PATM_ASMFIX_CURINSTRADDR, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_INHIBITIRQADDR, 0 + DD PATM_ASMFIX_CURINSTRADDR, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_FIXUP, PATMIretTable - NAME(PATMIretReplacement) + DD PATM_ASMFIX_IRET_FUNCTION, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_TEMP_EAX, 0 + DD PATM_ASMFIX_TEMP_ECX, 0 + DD PATM_ASMFIX_TEMP_RESTORE_FLAGS, 0 + DD PATM_ASMFIX_PENDINGACTION, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; +;**************************************************** +; Abstract: +; +; if eflags.NT==0 && iretstack.eflags.VM==0 && iretstack.eflags.IOPL==0 +; then +; if return to ring 0 (iretstack.new_cs & 3 == 0) +; then +; if iretstack.new_eflags.IF == 1 && iretstack.new_eflags.IOPL == 0 +; then +; iretstack.new_cs |= 1 +; else +; int 3 +; endif +; uVMFlags &= ~X86_EFL_IF +; iret +; else +; int 3 +;**************************************************** +; +; Stack: +; +; esp + 32 - GS (V86 only) +; esp + 28 - FS (V86 only) +; esp + 24 - DS (V86 only) +; esp + 20 - ES (V86 only) +; esp + 16 - SS (if transfer to outer ring) +; esp + 12 - ESP (if transfer to outer ring) +; esp + 8 - EFLAGS +; esp + 4 - CS +; esp - EIP +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMIretRing1Replacement + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 + pushfd + +%ifdef PATM_LOG_PATCHIRET + push eax + push ecx + push edx + lea edx, dword [ss:esp+12+4] ;3 dwords + pushed flags -> iret eip + mov eax, PATM_ACTION_LOG_IRET + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], eax + mov ecx, PATM_ACTION_MAGIC + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + pop edx + pop ecx + pop eax +%endif + + test dword [esp], X86_EFL_NT + jnz near iretring1_fault1 + + ; we can't do an iret to v86 code, as we run with CPL=1. The iret would attempt a protected mode iret and (most likely) fault. + test dword [esp+12], X86_EFL_VM + jnz near iretring1_return_to_v86 + + ;;!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + ;;@todo: not correct for iret back to ring 2!!!!! + ;;!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + test dword [esp+8], 2 + jnz iretring1_checkpendingirq + + test dword [esp+12], X86_EFL_IF + jz near iretring1_clearIF + +iretring1_checkpendingirq: + +; if interrupts are pending, then we must go back to the host context to handle them! +; Note: This is very important as pending pic interrupts can be overridden by apic interrupts if we don't check early enough (Fedora 5 boot) +; @@todo fix this properly, so we can dispatch pending interrupts in GC + test dword [ss:PATM_ASMFIX_VM_FORCEDACTIONS], VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC + jz iretring1_continue + +; Go to our hypervisor trap handler to dispatch the pending irq + mov dword [ss:PATM_ASMFIX_TEMP_EAX], eax + mov dword [ss:PATM_ASMFIX_TEMP_ECX], ecx + mov dword [ss:PATM_ASMFIX_TEMP_EDI], edi + mov dword [ss:PATM_ASMFIX_TEMP_RESTORE_FLAGS], PATM_RESTORE_EAX | PATM_RESTORE_ECX | PATM_RESTORE_EDI + mov eax, PATM_ACTION_PENDING_IRQ_AFTER_IRET + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], eax + mov ecx, PATM_ACTION_MAGIC + mov edi, PATM_ASMFIX_CURINSTRADDR + + popfd + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + ; does not return + +iretring1_continue: + + test dword [esp+8], 2 + jnz iretring1_notring01 + + test dword [esp+8], 1 + jz iretring1_ring0 + + ; ring 1 return change CS & SS RPL to 2 from 1 + and dword [esp+8], ~1 ; CS + or dword [esp+8], 2 + + and dword [esp+20], ~1 ; SS + or dword [esp+20], 2 + + jmp short iretring1_notring01 +iretring1_ring0: + ; force ring 1 CS RPL + or dword [esp+8], 1 + +iretring1_notring01: + ; This section must *always* be executed (!!) + ; Extract the IOPL from the return flags, save them to our virtual flags and + ; put them back to zero + ; @note we assume iretd doesn't fault!!! + push eax + mov eax, dword [esp+16] + and eax, X86_EFL_IOPL + and dword [ss:PATM_ASMFIX_VMFLAGS], ~X86_EFL_IOPL + or dword [ss:PATM_ASMFIX_VMFLAGS], eax + pop eax + and dword [esp+12], ~X86_EFL_IOPL + + ; Set IF again; below we make sure this won't cause problems. + or dword [ss:PATM_ASMFIX_VMFLAGS], X86_EFL_IF + + ; make sure iret is executed fully (including the iret below; cli ... iret can otherwise be interrupted) + mov dword [ss:PATM_ASMFIX_INHIBITIRQADDR], PATM_ASMFIX_CURINSTRADDR + + popfd + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + iretd + PATM_INT3 + +iretring1_fault: + popfd + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + PATM_INT3 + +iretring1_fault1: + nop + popfd + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + PATM_INT3 + +iretring1_clearIF: + push dword [esp+4] ; eip to return to + pushfd + push eax + push PATM_ASMFIX_FIXUP + DB 0E8h ; call + DD PATM_ASMFIX_IRET_FUNCTION + add esp, 4 ; pushed address of jump table + + cmp eax, 0 + je near iretring1_fault3 + + mov dword [esp+12+4], eax ; stored eip in iret frame + pop eax + popfd + add esp, 4 ; pushed eip + + ; This section must *always* be executed (!!) + ; Extract the IOPL from the return flags, save them to our virtual flags and + ; put them back to zero + push eax + mov eax, dword [esp+16] + and eax, X86_EFL_IOPL + and dword [ss:PATM_ASMFIX_VMFLAGS], ~X86_EFL_IOPL + or dword [ss:PATM_ASMFIX_VMFLAGS], eax + pop eax + and dword [esp+12], ~X86_EFL_IOPL + + ; Clear IF + and dword [ss:PATM_ASMFIX_VMFLAGS], ~X86_EFL_IF + popfd + + test dword [esp+8], 1 + jz iretring1_clearIF_ring0 + + ; ring 1 return change CS & SS RPL to 2 from 1 + and dword [esp+8], ~1 ; CS + or dword [esp+8], 2 + + and dword [esp+20], ~1 ; SS + or dword [esp+20], 2 + ; the patched destination code will set PATM_ASMFIX_INTERRUPTFLAG after the return! + iretd + +iretring1_clearIF_ring0: + ; force ring 1 CS RPL + or dword [esp+8], 1 + ; the patched destination code will set PATM_ASMFIX_INTERRUPTFLAG after the return! + iretd + +iretring1_return_to_v86: + test dword [esp+12], X86_EFL_IF + jz iretring1_fault + + ; Go to our hypervisor trap handler to perform the iret to v86 code + mov dword [ss:PATM_ASMFIX_TEMP_EAX], eax + mov dword [ss:PATM_ASMFIX_TEMP_ECX], ecx + mov dword [ss:PATM_ASMFIX_TEMP_RESTORE_FLAGS], PATM_RESTORE_EAX | PATM_RESTORE_ECX + mov eax, PATM_ACTION_DO_V86_IRET + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], eax + mov ecx, PATM_ACTION_MAGIC + + popfd + + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + ; does not return + + +iretring1_fault3: + pop eax + popfd + add esp, 4 ; pushed eip + jmp iretring1_fault + +align 4 +PATMIretRing1Table: + DW PATM_MAX_JUMPTABLE_ENTRIES ; nrSlots + DW 0 ; ulInsertPos + DD 0 ; cAddresses + TIMES PATCHJUMPTABLE_SIZE DB 0 ; lookup slots + +ENDPROC PATMIretRing1Replacement + +; Patch record for 'iretd' +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmIretRing1Record +%ifdef PATM_LOG_PATCHIRET + PATCHASMRECORD_INIT PATMIretRing1Replacement, 26 +%else + PATCHASMRECORD_INIT PATMIretRing1Replacement, 25 +%endif + DD PATM_ASMFIX_INTERRUPTFLAG, 0 +%ifdef PATM_LOG_PATCHIRET + DD PATM_ASMFIX_PENDINGACTION, 0 +%endif + DD PATM_ASMFIX_VM_FORCEDACTIONS, 0 + DD PATM_ASMFIX_TEMP_EAX, 0 + DD PATM_ASMFIX_TEMP_ECX, 0 + DD PATM_ASMFIX_TEMP_EDI, 0 + DD PATM_ASMFIX_TEMP_RESTORE_FLAGS, 0 + DD PATM_ASMFIX_PENDINGACTION, 0 + DD PATM_ASMFIX_CURINSTRADDR, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_INHIBITIRQADDR, 0 + DD PATM_ASMFIX_CURINSTRADDR, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_FIXUP, PATMIretRing1Table - NAME(PATMIretRing1Replacement) + DD PATM_ASMFIX_IRET_FUNCTION, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_TEMP_EAX, 0 + DD PATM_ASMFIX_TEMP_ECX, 0 + DD PATM_ASMFIX_TEMP_RESTORE_FLAGS, 0 + DD PATM_ASMFIX_PENDINGACTION, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; global function for implementing 'iret' to code with IF cleared +; +; Caller is responsible for right stack layout +; + 16 original return address +; + 12 eflags +; + 8 eax +; + 4 Jump table address +;( + 0 return address ) +; +; @note assumes PATM_ASMFIX_INTERRUPTFLAG is zero +; @note assumes it can trash eax and eflags +; +; @returns eax=0 on failure +; otherwise return address in eax +; +; @note NEVER change this without bumping the SSM version +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMIretFunction + push ecx + push edx + push edi + + ; Event order: + ; 1) Check if the return patch address can be found in the lookup table + ; 2) Query return patch address from the hypervisor + + ; 1) Check if the return patch address can be found in the lookup table + mov edx, dword [esp+12+16] ; pushed target address + + xor eax, eax ; default result -> nothing found + mov edi, dword [esp+12+4] ; jump table + mov ecx, [ss:edi + PATCHJUMPTABLE.cAddresses] + cmp ecx, 0 + je near PATMIretFunction_AskHypervisor + +PATMIretFunction_SearchStart: + cmp [ss:edi + PATCHJUMPTABLE.Slot_pInstrGC + eax*8], edx ; edx = GC address to search for + je near PATMIretFunction_SearchHit + inc eax + cmp eax, ecx + jl near PATMIretFunction_SearchStart + +PATMIretFunction_AskHypervisor: + ; 2) Query return patch address from the hypervisor + ; @todo private ugly interface, since we have nothing generic at the moment + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], PATM_ACTION_LOOKUP_ADDRESS + mov eax, PATM_ACTION_LOOKUP_ADDRESS + mov ecx, PATM_ACTION_MAGIC + mov edi, dword [esp+12+4] ; jump table address + mov edx, dword [esp+12+16] ; original return address + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + jmp near PATMIretFunction_SearchEnd + +PATMIretFunction_SearchHit: + mov eax, [ss:edi + PATCHJUMPTABLE.Slot_pRelPatchGC + eax*8] ; found a match! + ;@note can be zero, so the next check is required!! + +PATMIretFunction_SearchEnd: + cmp eax, 0 + jz PATMIretFunction_Failure + + add eax, PATM_ASMFIX_PATCHBASE + + pop edi + pop edx + pop ecx + ret + +PATMIretFunction_Failure: + ;signal error + xor eax, eax + pop edi + pop edx + pop ecx + ret +ENDPROC PATMIretFunction + +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmIretFunctionRecord + PATCHASMRECORD_INIT PATMIretFunction, 2 + DD PATM_ASMFIX_PENDINGACTION, 0 + DD PATM_ASMFIX_PATCHBASE, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; PATMCpuidReplacement +; +; Calls a helper function that does the job. +; +; This way we can change the CPUID structures and how we organize them without +; breaking patches. It also saves a bit of memory for patch code and fixups. +; +BEGIN_PATCH g_patmCpuidRecord, PATMCpuidReplacement + not dword [esp-32] ; probe stack before starting + not dword [esp-32] + + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 +PATCH_FIXUP PATM_ASMFIX_INTERRUPTFLAG + pushf + + db 0e8h ; call + dd PATM_ASMFIX_PATCH_HLP_CPUM_CPUID +PATCH_FIXUP PATM_ASMFIX_PATCH_HLP_CPUM_CPUID + + popf + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 +PATCH_FIXUP PATM_ASMFIX_INTERRUPTFLAG +END_PATCH g_patmCpuidRecord, PATMCpuidReplacement + + +; +; +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMJEcxReplacement + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 + pushfd +PATMJEcxSizeOverride: + DB 0x90 ; nop + cmp ecx, dword 0 ; yasm / nasm dword + jnz PATMJEcxContinue + + popfd + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + DB 0xE9 +PATMJEcxJump: + DD PATM_ASMFIX_JUMPDELTA + +PATMJEcxContinue: + popfd + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 +ENDPROC PATMJEcxReplacement + +; Patch record for 'JEcx' +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmJEcxRecord + PATCHASMRECORD_INIT_EX PATMJEcxReplacement, , PATMJEcxJump, PATMJEcxSizeOverride, 3 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMLoopReplacement + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 + pushfd +PATMLoopSizeOverride: + DB 0x90 ; nop + dec ecx + jz PATMLoopContinue + + popfd + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + DB 0xE9 +PATMLoopJump: + DD PATM_ASMFIX_JUMPDELTA + +PATMLoopContinue: + popfd + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 +ENDPROC PATMLoopReplacement + +; Patch record for 'Loop' +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmLoopRecord + PATCHASMRECORD_INIT_EX PATMLoopReplacement, , PATMLoopJump, PATMLoopSizeOverride, 3 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; jump if ZF=1 AND (E)CX != 0 +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMLoopZReplacement + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 + jnz NAME(PATMLoopZReplacement_EndProc) + pushfd +PATMLoopZSizeOverride: + DB 0x90 ; nop + dec ecx + jz PATMLoopZContinue + + popfd + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + DB 0xE9 +PATMLoopZJump: + DD PATM_ASMFIX_JUMPDELTA + +PATMLoopZContinue: + popfd + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 +ENDPROC PATMLoopZReplacement + +; Patch record for 'Loopz' +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmLoopZRecord + PATCHASMRECORD_INIT_EX PATMLoopZReplacement, , PATMLoopZJump, PATMLoopZSizeOverride, 3 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; jump if ZF=0 AND (E)CX != 0 +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMLoopNZReplacement + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 + jz NAME(PATMLoopNZReplacement_EndProc) + pushfd +PATMLoopNZSizeOverride: + DB 0x90 ; nop + dec ecx + jz PATMLoopNZContinue + + popfd + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + DB 0xE9 +PATMLoopNZJump: + DD PATM_ASMFIX_JUMPDELTA + +PATMLoopNZContinue: + popfd + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 +ENDPROC PATMLoopNZReplacement + +; Patch record for 'LoopNZ' +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmLoopNZRecord + PATCHASMRECORD_INIT_EX PATMLoopNZReplacement, , PATMLoopNZJump, PATMLoopNZSizeOverride, 3 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; Global patch function for indirect calls +; Caller is responsible for clearing PATM_ASMFIX_INTERRUPTFLAG and doing: +; + 20 push [pTargetGC] +; + 16 pushfd +; + 12 push [JumpTableAddress] +; + 8 push [PATMRelReturnAddress] +; + 4 push [GuestReturnAddress] +;( + 0 return address ) +; +; @note NEVER change this without bumping the SSM version +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMLookupAndCall + push eax + push edx + push edi + push ecx + + mov eax, dword [esp+16+4] ; guest return address + mov dword [ss:PATM_ASMFIX_CALL_RETURN_ADDR], eax ; temporary storage + + mov edx, dword [esp+16+20] ; pushed target address + + xor eax, eax ; default result -> nothing found + mov edi, dword [esp+16+12] ; jump table + mov ecx, [ss:edi + PATCHJUMPTABLE.cAddresses] + cmp ecx, 0 + je near PATMLookupAndCall_QueryPATM + +PATMLookupAndCall_SearchStart: + cmp [ss:edi + PATCHJUMPTABLE.Slot_pInstrGC + eax*8], edx ; edx = GC address to search for + je near PATMLookupAndCall_SearchHit + inc eax + cmp eax, ecx + jl near PATMLookupAndCall_SearchStart + +PATMLookupAndCall_QueryPATM: + ; nothing found -> let our trap handler try to find it + ; @todo private ugly interface, since we have nothing generic at the moment + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], PATM_ACTION_LOOKUP_ADDRESS + mov eax, PATM_ACTION_LOOKUP_ADDRESS + mov ecx, PATM_ACTION_MAGIC + ; edx = GC address to find + ; edi = jump table address + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + + jmp near PATMLookupAndCall_SearchEnd + +PATMLookupAndCall_Failure: + ; return to caller; it must raise an error, due to patch to guest address translation (remember that there's only one copy of this code block). + pop ecx + pop edi + pop edx + pop eax + ret + +PATMLookupAndCall_SearchHit: + mov eax, [ss:edi + PATCHJUMPTABLE.Slot_pRelPatchGC + eax*8] ; found a match! + + ;@note can be zero, so the next check is required!! + +PATMLookupAndCall_SearchEnd: + cmp eax, 0 + je near PATMLookupAndCall_Failure + + mov ecx, eax ; ECX = target address (relative!) + add ecx, PATM_ASMFIX_PATCHBASE ; Make it absolute + + mov edx, dword PATM_ASMFIX_STACKPTR + cmp dword [ss:edx], PATM_STACK_SIZE + ja near PATMLookupAndCall_Failure ; should never happen actually!!! + cmp dword [ss:edx], 0 + je near PATMLookupAndCall_Failure ; no more room + + ; save the patch return address on our private stack + sub dword [ss:edx], 4 ; sizeof(RTGCPTR) + mov eax, dword PATM_ASMFIX_STACKBASE + add eax, dword [ss:edx] ; stack base + stack position + mov edi, dword [esp+16+8] ; PATM return address + mov dword [ss:eax], edi ; relative address of patch return (instruction following this block) + + ; save the original return address as well (checked by ret to make sure the guest hasn't messed around with the stack) + mov edi, dword PATM_ASMFIX_STACKBASE_GUEST + add edi, dword [ss:edx] ; stack base (guest) + stack position + mov eax, dword [esp+16+4] ; guest return address + mov dword [ss:edi], eax + + mov dword [ss:PATM_ASMFIX_CALL_PATCH_TARGET_ADDR], ecx ; temporarily store the target address + pop ecx + pop edi + pop edx + pop eax + add esp, 24 ; parameters + return address pushed by caller (changes the flags, but that shouldn't matter) + +%ifdef PATM_LOG_PATCHINSTR + push eax + push ecx + push edx + lea edx, [esp + 12 - 4] ; stack address to store return address + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], PATM_ACTION_LOG_CALL + mov eax, PATM_ACTION_LOG_CALL + mov ecx, PATM_ACTION_MAGIC + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + pop edx + pop ecx + pop eax +%endif + + push dword [ss:PATM_ASMFIX_CALL_RETURN_ADDR] ; push original guest return address + + ; the called function will set PATM_ASMFIX_INTERRUPTFLAG (!!) + jmp dword [ss:PATM_ASMFIX_CALL_PATCH_TARGET_ADDR] + ; returning here -> do not add code here or after the jmp!!!!! +ENDPROC PATMLookupAndCall + +; Patch record for indirect calls and jumps +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmLookupAndCallRecord +%ifdef PATM_LOG_PATCHINSTR + PATCHASMRECORD_INIT PATMLookupAndCall, 10 +%else + PATCHASMRECORD_INIT PATMLookupAndCall, 9 +%endif + DD PATM_ASMFIX_CALL_RETURN_ADDR, 0 + DD PATM_ASMFIX_PENDINGACTION, 0 + DD PATM_ASMFIX_PATCHBASE, 0 + DD PATM_ASMFIX_STACKPTR, 0 + DD PATM_ASMFIX_STACKBASE, 0 + DD PATM_ASMFIX_STACKBASE_GUEST, 0 + DD PATM_ASMFIX_CALL_PATCH_TARGET_ADDR, 0 +%ifdef PATM_LOG_PATCHINSTR + DD PATM_ASMFIX_PENDINGACTION, 0 +%endif + DD PATM_ASMFIX_CALL_RETURN_ADDR, 0 + DD PATM_ASMFIX_CALL_PATCH_TARGET_ADDR, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; Global patch function for indirect jumps +; Caller is responsible for clearing PATM_ASMFIX_INTERRUPTFLAG and doing: +; + 8 push [pTargetGC] +; + 4 push [JumpTableAddress] +;( + 0 return address ) +; And saving eflags in PATM_ASMFIX_TEMP_EFLAGS +; +; @note NEVER change this without bumping the SSM version +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMLookupAndJump + push eax + push edx + push edi + push ecx + + mov edx, dword [esp+16+8] ; pushed target address + + xor eax, eax ; default result -> nothing found + mov edi, dword [esp+16+4] ; jump table + mov ecx, [ss:edi + PATCHJUMPTABLE.cAddresses] + cmp ecx, 0 + je near PATMLookupAndJump_QueryPATM + +PATMLookupAndJump_SearchStart: + cmp [ss:edi + PATCHJUMPTABLE.Slot_pInstrGC + eax*8], edx ; edx = GC address to search for + je near PATMLookupAndJump_SearchHit + inc eax + cmp eax, ecx + jl near PATMLookupAndJump_SearchStart + +PATMLookupAndJump_QueryPATM: + ; nothing found -> let our trap handler try to find it + ; @todo private ugly interface, since we have nothing generic at the moment + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], PATM_ACTION_LOOKUP_ADDRESS + mov eax, PATM_ACTION_LOOKUP_ADDRESS + mov ecx, PATM_ACTION_MAGIC + ; edx = GC address to find + ; edi = jump table address + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + + jmp near PATMLookupAndJump_SearchEnd + +PATMLookupAndJump_Failure: + ; return to caller; it must raise an error, due to patch to guest address translation (remember that there's only one copy of this code block). + pop ecx + pop edi + pop edx + pop eax + ret + +PATMLookupAndJump_SearchHit: + mov eax, [ss:edi + PATCHJUMPTABLE.Slot_pRelPatchGC + eax*8] ; found a match! + + ;@note can be zero, so the next check is required!! + +PATMLookupAndJump_SearchEnd: + cmp eax, 0 + je near PATMLookupAndJump_Failure + + mov ecx, eax ; ECX = target address (relative!) + add ecx, PATM_ASMFIX_PATCHBASE ; Make it absolute + + ; save jump patch target + mov dword [ss:PATM_ASMFIX_TEMP_EAX], ecx + pop ecx + pop edi + pop edx + pop eax + add esp, 12 ; parameters + return address pushed by caller + ; restore flags (just to be sure) + push dword [ss:PATM_ASMFIX_TEMP_EFLAGS] + popfd + + ; the jump destination will set PATM_ASMFIX_INTERRUPTFLAG (!!) + jmp dword [ss:PATM_ASMFIX_TEMP_EAX] ; call duplicated patch destination address +ENDPROC PATMLookupAndJump + +; Patch record for indirect calls and jumps +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmLookupAndJumpRecord + PATCHASMRECORD_INIT PATMLookupAndJump, 5 + DD PATM_ASMFIX_PENDINGACTION, 0 + DD PATM_ASMFIX_PATCHBASE, 0 + DD PATM_ASMFIX_TEMP_EAX, 0 + DD PATM_ASMFIX_TEMP_EFLAGS, 0 + DD PATM_ASMFIX_TEMP_EAX, 0 + DD 0ffffffffh, 0ffffffffh + + +; Patch function for static calls +; @note static calls have only one lookup slot! +; Caller is responsible for clearing PATM_ASMFIX_INTERRUPTFLAG and adding: +; push [pTargetGC] +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMCall + pushfd + push PATM_ASMFIX_FIXUP ; fixup for jump table below + push PATM_ASMFIX_PATCHNEXTBLOCK + push PATM_ASMFIX_RETURNADDR + DB 0E8h ; call + DD PATM_ASMFIX_LOOKUP_AND_CALL_FUNCTION + ; we only return in case of a failure + add esp, 12 ; pushed address of jump table + popfd + add esp, 4 ; pushed by caller (changes the flags, but that shouldn't matter (@todo)) + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + PATM_INT3 +%ifdef DEBUG + ; for disassembly + jmp NAME(PATMCall_EndProc) +%endif + +align 4 +PATMCallTable: + DW 1 ; nrSlots + DW 0 ; ulInsertPos + DD 0 ; cAddresses + TIMES PATCHDIRECTJUMPTABLE_SIZE DB 0 ; only one lookup slot + + ; returning here -> do not add code here or after the jmp!!!!! +ENDPROC PATMCall + +; Patch record for direct calls +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmCallRecord + PATCHASMRECORD_INIT PATMCall, 5 + DD PATM_ASMFIX_FIXUP, PATMCallTable - NAME(PATMCall) + DD PATM_ASMFIX_PATCHNEXTBLOCK, 0 + DD PATM_ASMFIX_RETURNADDR, 0 + DD PATM_ASMFIX_LOOKUP_AND_CALL_FUNCTION, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + + +; Patch function for indirect calls +; Caller is responsible for clearing PATM_ASMFIX_INTERRUPTFLAG and adding: +; push [pTargetGC] +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMCallIndirect + pushfd + push PATM_ASMFIX_FIXUP ; fixup for jump table below + push PATM_ASMFIX_PATCHNEXTBLOCK + push PATM_ASMFIX_RETURNADDR + DB 0E8h ; call + DD PATM_ASMFIX_LOOKUP_AND_CALL_FUNCTION + ; we only return in case of a failure + add esp, 12 ; pushed address of jump table + popfd + add esp, 4 ; pushed by caller (changes the flags, but that shouldn't matter (@todo)) + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + PATM_INT3 +%ifdef DEBUG + ; for disassembly + jmp NAME(PATMCallIndirect_EndProc) +%endif + +align 4 +PATMCallIndirectTable: + DW PATM_MAX_JUMPTABLE_ENTRIES ; nrSlots + DW 0 ; ulInsertPos + DD 0 ; cAddresses + TIMES PATCHJUMPTABLE_SIZE DB 0 ; lookup slots + + ; returning here -> do not add code here or after the jmp!!!!! +ENDPROC PATMCallIndirect + +; Patch record for indirect calls +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmCallIndirectRecord + PATCHASMRECORD_INIT PATMCallIndirect, 5 + DD PATM_ASMFIX_FIXUP, PATMCallIndirectTable - NAME(PATMCallIndirect) + DD PATM_ASMFIX_PATCHNEXTBLOCK, 0 + DD PATM_ASMFIX_RETURNADDR, 0 + DD PATM_ASMFIX_LOOKUP_AND_CALL_FUNCTION, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; Patch function for indirect jumps +; Caller is responsible for clearing PATM_ASMFIX_INTERRUPTFLAG and adding: +; push [pTargetGC] +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMJumpIndirect + ; save flags (just to be sure) + pushfd + pop dword [ss:PATM_ASMFIX_TEMP_EFLAGS] + + push PATM_ASMFIX_FIXUP ; fixup for jump table below + DB 0E8h ; call + DD PATM_ASMFIX_LOOKUP_AND_JUMP_FUNCTION + ; we only return in case of a failure + add esp, 8 ; pushed address of jump table + pushed target address + + ; restore flags (just to be sure) + push dword [ss:PATM_ASMFIX_TEMP_EFLAGS] + popfd + + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + PATM_INT3 + +%ifdef DEBUG + ; for disassembly + jmp NAME(PATMJumpIndirect_EndProc) +%endif + +align 4 +PATMJumpIndirectTable: + DW PATM_MAX_JUMPTABLE_ENTRIES ; nrSlots + DW 0 ; ulInsertPos + DD 0 ; cAddresses + TIMES PATCHJUMPTABLE_SIZE DB 0 ; lookup slots + + ; returning here -> do not add code here or after the jmp!!!!! +ENDPROC PATMJumpIndirect + +; Patch record for indirect jumps +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmJumpIndirectRecord + PATCHASMRECORD_INIT PATMJumpIndirect, 5 + DD PATM_ASMFIX_TEMP_EFLAGS, 0 + DD PATM_ASMFIX_FIXUP, PATMJumpIndirectTable - NAME(PATMJumpIndirect) + DD PATM_ASMFIX_LOOKUP_AND_JUMP_FUNCTION, 0 + DD PATM_ASMFIX_TEMP_EFLAGS, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; return from duplicated function +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMRet + ; probe stack here as we can't recover from page faults later on + not dword [esp-32] + not dword [esp-32] + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 + pushfd + push eax + push PATM_ASMFIX_FIXUP + DB 0E8h ; call + DD PATM_ASMFIX_RETURN_FUNCTION + add esp, 4 ; pushed address of jump table + + cmp eax, 0 + jne near PATMRet_Success + + pop eax + popfd + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + PATM_INT3 + +%ifdef DEBUG + ; for disassembly + jmp PATMRet_Success +%endif +align 4 +PATMRetTable: + DW PATM_MAX_JUMPTABLE_ENTRIES ; nrSlots + DW 0 ; ulInsertPos + DD 0 ; cAddresses + TIMES PATCHJUMPTABLE_SIZE DB 0 ; lookup slots + +PATMRet_Success: + mov dword [esp+8], eax ; overwrite the saved return address + pop eax + popf + ; caller will duplicate the ret or ret n instruction + ; the patched call will set PATM_ASMFIX_INTERRUPTFLAG after the return! +ENDPROC PATMRet + +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmRetRecord + PATCHASMRECORD_INIT PATMRet, 4 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_FIXUP, PATMRetTable - NAME(PATMRet) + DD PATM_ASMFIX_RETURN_FUNCTION, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; global function for implementing 'retn' +; +; Caller is responsible for right stack layout +; + 16 original return address +; + 12 eflags +; + 8 eax +; + 4 Jump table address +;( + 0 return address ) +; +; @note assumes PATM_ASMFIX_INTERRUPTFLAG is zero +; @note assumes it can trash eax and eflags +; +; @returns eax=0 on failure +; otherwise return address in eax +; +; @note NEVER change this without bumping the SSM version +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMRetFunction + push ecx + push edx + push edi + + ; Event order: + ; (@todo figure out which path is taken most often (1 or 2)) + ; 1) Check if the return patch address was pushed onto the PATM stack + ; 2) Check if the return patch address can be found in the lookup table + ; 3) Query return patch address from the hypervisor + + + ; 1) Check if the return patch address was pushed on the PATM stack + cmp dword [ss:PATM_ASMFIX_STACKPTR], PATM_STACK_SIZE + jae near PATMRetFunction_FindReturnAddress + + mov edx, dword PATM_ASMFIX_STACKPTR + + ; check if the return address is what we expect it to be + mov eax, dword PATM_ASMFIX_STACKBASE_GUEST + add eax, dword [ss:edx] ; stack base + stack position + mov eax, dword [ss:eax] ; original return address + cmp eax, dword [esp+12+16] ; pushed return address + + ; the return address was changed -> let our trap handler try to find it + ; (can happen when the guest messes with the stack (seen it) or when we didn't call this function ourselves) + jne near PATMRetFunction_FindReturnAddress + + ; found it, convert relative to absolute patch address and return the result to the caller + mov eax, dword PATM_ASMFIX_STACKBASE + add eax, dword [ss:edx] ; stack base + stack position + mov eax, dword [ss:eax] ; relative patm return address + add eax, PATM_ASMFIX_PATCHBASE + +%ifdef PATM_LOG_PATCHINSTR + push eax + push ebx + push ecx + push edx + mov edx, eax ; return address + lea ebx, [esp+16+12+16] ; stack address containing the return address + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], PATM_ACTION_LOG_RET + mov eax, PATM_ACTION_LOG_RET + mov ecx, PATM_ACTION_MAGIC + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + pop edx + pop ecx + pop ebx + pop eax +%endif + + add dword [ss:edx], 4 ; pop return address from the PATM stack (sizeof(RTGCPTR); @note hardcoded assumption!) + + pop edi + pop edx + pop ecx + ret + +PATMRetFunction_FindReturnAddress: + ; 2) Check if the return patch address can be found in the lookup table + mov edx, dword [esp+12+16] ; pushed target address + + xor eax, eax ; default result -> nothing found + mov edi, dword [esp+12+4] ; jump table + mov ecx, [ss:edi + PATCHJUMPTABLE.cAddresses] + cmp ecx, 0 + je near PATMRetFunction_AskHypervisor + +PATMRetFunction_SearchStart: + cmp [ss:edi + PATCHJUMPTABLE.Slot_pInstrGC + eax*8], edx ; edx = GC address to search for + je near PATMRetFunction_SearchHit + inc eax + cmp eax, ecx + jl near PATMRetFunction_SearchStart + +PATMRetFunction_AskHypervisor: + ; 3) Query return patch address from the hypervisor + ; @todo private ugly interface, since we have nothing generic at the moment + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], PATM_ACTION_LOOKUP_ADDRESS + mov eax, PATM_ACTION_LOOKUP_ADDRESS + mov ecx, PATM_ACTION_MAGIC + mov edi, dword [esp+12+4] ; jump table address + mov edx, dword [esp+12+16] ; original return address + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + jmp near PATMRetFunction_SearchEnd + +PATMRetFunction_SearchHit: + mov eax, [ss:edi + PATCHJUMPTABLE.Slot_pRelPatchGC + eax*8] ; found a match! + ;@note can be zero, so the next check is required!! + +PATMRetFunction_SearchEnd: + cmp eax, 0 + jz PATMRetFunction_Failure + + add eax, PATM_ASMFIX_PATCHBASE + +%ifdef PATM_LOG_PATCHINSTR + push eax + push ebx + push ecx + push edx + mov edx, eax ; return address + lea ebx, [esp+16+12+16] ; stack address containing the return address + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], PATM_ACTION_LOG_RET + mov eax, PATM_ACTION_LOG_RET + mov ecx, PATM_ACTION_MAGIC + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + pop edx + pop ecx + pop ebx + pop eax +%endif + + pop edi + pop edx + pop ecx + ret + +PATMRetFunction_Failure: + ;signal error + xor eax, eax + pop edi + pop edx + pop ecx + ret +ENDPROC PATMRetFunction + +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmRetFunctionRecord +%ifdef PATM_LOG_PATCHINSTR + PATCHASMRECORD_INIT PATMRetFunction, 9 +%else + PATCHASMRECORD_INIT PATMRetFunction, 7 +%endif + DD PATM_ASMFIX_STACKPTR, 0 + DD PATM_ASMFIX_STACKPTR, 0 + DD PATM_ASMFIX_STACKBASE_GUEST, 0 + DD PATM_ASMFIX_STACKBASE, 0 + DD PATM_ASMFIX_PATCHBASE, 0 +%ifdef PATM_LOG_PATCHINSTR + DD PATM_ASMFIX_PENDINGACTION, 0 +%endif + DD PATM_ASMFIX_PENDINGACTION, 0 + DD PATM_ASMFIX_PATCHBASE, 0 +%ifdef PATM_LOG_PATCHINSTR + DD PATM_ASMFIX_PENDINGACTION, 0 +%endif + DD 0ffffffffh, 0ffffffffh + + +; +; Jump to original instruction if IF=1 +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMCheckIF + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 + pushf + test dword [ss:PATM_ASMFIX_VMFLAGS], X86_EFL_IF + jnz PATMCheckIF_Safe + nop + + ; IF=0 -> unsafe, so we must call the duplicated function (which we don't do here) + popf + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + jmp NAME(PATMCheckIF_EndProc) + +PATMCheckIF_Safe: + ; invalidate the PATM stack as we'll jump back to guest code + mov dword [ss:PATM_ASMFIX_STACKPTR], PATM_STACK_SIZE + +%ifdef PATM_LOG_PATCHINSTR + push eax + push ecx + lock or dword [ss:PATM_ASMFIX_PENDINGACTION], PATM_ACTION_LOG_IF1 + mov eax, PATM_ACTION_LOG_IF1 + mov ecx, PATM_ACTION_MAGIC + db 0fh, 0bh ; illegal instr (hardcoded assumption in PATMHandleIllegalInstrTrap) + pop ecx + pop eax +%endif + popf + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + ; IF=1 -> we can safely jump back to the original instruction + DB 0xE9 +PATMCheckIF_Jump: + DD PATM_ASMFIX_JUMPDELTA +ENDPROC PATMCheckIF + +; Patch record for call instructions +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmCheckIFRecord +%ifdef PATM_LOG_PATCHINSTR + PATCHASMRECORD_INIT_JUMP PATMCheckIF, PATMCheckIF_Jump, 6 +%else + PATCHASMRECORD_INIT_JUMP PATMCheckIF, PATMCheckIF_Jump, 5 +%endif + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_STACKPTR, 0 +%ifdef PATM_LOG_PATCHINSTR + DD PATM_ASMFIX_PENDINGACTION, 0 +%endif + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; Jump back to guest if IF=1, else fault +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMJumpToGuest_IF1 + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 0 + pushf + test dword [ss:PATM_ASMFIX_VMFLAGS], X86_EFL_IF + jnz PATMJumpToGuest_IF1_Safe + nop + + ; IF=0 -> unsafe, so fault + popf + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + PATM_INT3 + +PATMJumpToGuest_IF1_Safe: + ; IF=1 -> we can safely jump back to the original instruction + popf + mov dword [ss:PATM_ASMFIX_INTERRUPTFLAG], 1 + DB 0xE9 +PATMJumpToGuest_IF1_Jump: + DD PATM_ASMFIX_JUMPDELTA +ENDPROC PATMJumpToGuest_IF1 + +; Patch record for call instructions +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME PATMJumpToGuest_IF1Record + PATCHASMRECORD_INIT_JUMP PATMJumpToGuest_IF1, PATMJumpToGuest_IF1_Jump, 4 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_VMFLAGS, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD PATM_ASMFIX_INTERRUPTFLAG, 0 + DD 0ffffffffh, 0ffffffffh + + +; +; Check and correct RPL of pushed ss. +; +BEGIN_PATCH_CODE_SECTION +BEGINPROC PATMMovFromSS + push eax + pushfd + mov ax, ss + and ax, 3 + cmp ax, 1 + jne near PATMMovFromSS_Continue + + and dword [esp+8], ~3 ; clear RPL 1 +PATMMovFromSS_Continue: + popfd + pop eax +ENDPROC PATMMovFromSS + +BEGIN_PATCH_RODATA_SECTION +GLOBALNAME g_patmMovFromSSRecord + PATCHASMRECORD_INIT PATMMovFromSS, 0 + DD 0ffffffffh, 0ffffffffh + + + + +;; For assertion during init (to make absolutely sure the flags are in sync in vm.mac & vm.h) +BEGINCONST +GLOBALNAME g_fPatmInterruptFlag + DD VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_TIMER | VMCPU_FF_REQUEST + diff --git a/src/VBox/VMM/VMMR3/PATMA.mac b/src/VBox/VMM/VMMR3/PATMA.mac new file mode 100644 index 00000000..b0423502 --- /dev/null +++ b/src/VBox/VMM/VMMR3/PATMA.mac @@ -0,0 +1,164 @@ +; $Id: PATMA.mac $ +;; @file +; PATM macros & definitions (identical to PATMA.h!). +; + +; +; Copyright (C) 2006-2019 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; + +; hardcoded dependency on single byte int 3 +%define PATM_INT3 int3 + +;; @name Patch Fixup Types +; @remarks These fixups types are part of the saved state. +; @{ +%define PATM_ASMFIX_VMFLAGS 0xF1ABCD00 +%ifdef VBOX_WITH_STATISTICS + %define PATM_ASMFIX_ALLPATCHCALLS 0xF1ABCD01 + %define PATM_ASMFIX_PERPATCHCALLS 0xF1ABCD02 +%endif +%define PATM_ASMFIX_JUMPDELTA 0xF1ABCD03 +%ifdef VBOX_WITH_STATISTICS + %define PATM_ASMFIX_IRETEFLAGS 0xF1ABCD04 + %define PATM_ASMFIX_IRETCS 0xF1ABCD05 + %define PATM_ASMFIX_IRETEIP 0xF1ABCD06 +%endif +%define PATM_ASMFIX_FIXUP 0xF1ABCD07 +%define PATM_ASMFIX_PENDINGACTION 0xF1ABCD08 +%define PATM_ASMFIX_CPUID_STD_PTR 0xF1ABCD09 +%define PATM_ASMFIX_CPUID_EXT_PTR 0xF1ABCD0a +%define PATM_ASMFIX_CPUID_DEF_PTR 0xF1ABCD0b +%define PATM_ASMFIX_STACKBASE 0xF1ABCD0c ;;< Stack to store our private patch return addresses +%define PATM_ASMFIX_STACKBASE_GUEST 0xF1ABCD0d ;;< Stack to store guest return addresses +%define PATM_ASMFIX_STACKPTR 0xF1ABCD0e +%define PATM_ASMFIX_PATCHBASE 0xF1ABCD0f +%define PATM_ASMFIX_INTERRUPTFLAG 0xF1ABCD10 +%define PATM_ASMFIX_INHIBITIRQADDR 0xF1ABCD11 +%define PATM_ASMFIX_VM_FORCEDACTIONS 0xF1ABCD12 +%define PATM_ASMFIX_TEMP_EAX 0xF1ABCD13 ;;< Location for original EAX register +%define PATM_ASMFIX_TEMP_ECX 0xF1ABCD14 ;;< Location for original ECX register +%define PATM_ASMFIX_TEMP_EDI 0xF1ABCD15 ;;< Location for original EDI register +%define PATM_ASMFIX_TEMP_EFLAGS 0xF1ABCD16 ;;< Location for original eflags +%define PATM_ASMFIX_TEMP_RESTORE_FLAGS 0xF1ABCD17 ;;< Which registers to restore +%define PATM_ASMFIX_CALL_PATCH_TARGET_ADDR 0xF1ABCD18 +%define PATM_ASMFIX_CALL_RETURN_ADDR 0xF1ABCD19 +%define PATM_ASMFIX_CPUID_CENTAUR_PTR 0xF1ABCD1a +%define PATM_ASMFIX_REUSE_LATER_0 0xF1ABCD1b +%define PATM_ASMFIX_REUSE_LATER_1 0xF1ABCD1c +%define PATM_ASMFIX_REUSE_LATER_2 0xF1ABCD1d +%define PATM_ASMFIX_REUSE_LATER_3 0xF1ABCD1e +%define PATM_ASMFIX_PATCH_HLP_CPUM_CPUID 0xF1ABCD1f + +;/* Anything larger doesn't require a fixup */ +%define PATM_ASMFIX_NO_FIXUP 0xF1ABCE00 +%define PATM_ASMFIX_CPUID_STD_MAX 0xF1ABCE00 +%define PATM_ASMFIX_CPUID_EXT_MAX 0xF1ABCE01 +%define PATM_ASMFIX_RETURNADDR 0xF1ABCE02 +%define PATM_ASMFIX_PATCHNEXTBLOCK 0xF1ABCE03 +%define PATM_ASMFIX_CALLTARGET 0xF1ABCE04 ;;< relative call target +%define PATM_ASMFIX_NEXTINSTRADDR 0xF1ABCE05 ;;< absolute guest address of the next instruction +%define PATM_ASMFIX_CURINSTRADDR 0xF1ABCE06 ;;< absolute guest address of the current instruction +%define PATM_ASMFIX_LOOKUP_AND_CALL_FUNCTION 0xF1ABCE07 ;;< Relative address of global PATM lookup and call function. +%define PATM_ASMFIX_RETURN_FUNCTION 0xF1ABCE08 ;;< Relative address of global PATM return function. +%define PATM_ASMFIX_LOOKUP_AND_JUMP_FUNCTION 0xF1ABCE09 ;;< Relative address of global PATM lookup and jump function. +%define PATM_ASMFIX_IRET_FUNCTION 0xF1ABCE0A ;;< Relative address of global PATM iret function. +%define PATM_ASMFIX_CPUID_CENTAUR_MAX 0xF1ABCE0B +;; @} + + +;; Everything except IOPL, NT, IF, VM, VIF, VIP and RF +%define PATM_FLAGS_MASK (X86_EFL_CF|X86_EFL_PF|X86_EFL_AF|X86_EFL_ZF|X86_EFL_SF|X86_EFL_TF|X86_EFL_DF|X86_EFL_OF|X86_EFL_AC|X86_EFL_ID) + +; currently only IF & IOPL +%define PATM_VIRTUAL_FLAGS_MASK (X86_EFL_IF|X86_EFL_IOPL) + +; PATM stack size (identical in PATMA.h!!) +%define PATM_STACK_SIZE (4096) +%define PATM_STACK_TOTAL_SIZE (2 * PATM_STACK_SIZE) +%define PATM_MAX_STACK (PATM_STACK_SIZE / RTRCPTR_CB) + +;; @name Patch Manager pending actions (in GCSTATE). +;; @{ +%define PATM_ACTION_LOOKUP_ADDRESS 1 +%define PATM_ACTION_DISPATCH_PENDING_IRQ 2 +%define PATM_ACTION_PENDING_IRQ_AFTER_IRET 3 +%define PATM_ACTION_DO_V86_IRET 4 +%define PATM_ACTION_LOG_IF1 5 +%define PATM_ACTION_LOG_CLI 6 +%define PATM_ACTION_LOG_STI 7 +%define PATM_ACTION_LOG_POPF_IF1 8 +%define PATM_ACTION_LOG_POPF_IF0 9 +%define PATM_ACTION_LOG_PUSHF 10 +%define PATM_ACTION_LOG_IRET 11 +%define PATM_ACTION_LOG_RET 12 +%define PATM_ACTION_LOG_CALL 13 +%define PATM_ACTION_LOG_GATE_ENTRY 14 +;; @} + +;; Magic dword found in ecx for patm pending actions. +%define PATM_ACTION_MAGIC 0xABCD4321 + +;; @name PATM_ASMFIX_TEMP_RESTORE_FLAGS +;; @{ +%define PATM_RESTORE_EAX RT_BIT(0) +%define PATM_RESTORE_ECX RT_BIT(1) +%define PATM_RESTORE_EDI RT_BIT(2) +;; @} + + +;; +; Relocation entry for PATCHASMRECORD. +; +struc PATCHASMRELOC + ;; The relocation type. + .uType resd 1 + ;; Additional information specific to the relocation type. + .uCode resd 1 +endstruc + +;; +; Assembly patch descriptor record. +; +struc PATCHASMRECORD + ;; Pointer to the patch code. + .pbFunction RTCCPTR_RES 1 + ;; Offset of the jump table? + .offJump resd 1 + ;; Used only by loop/loopz/loopnz. + .offRelJump resd 1 + ;; Size override byte position. + .offSizeOverride resd 1 + ;; The size of the patch function. + .cbFunction resd 1 + ;; The number of relocations in aRelocs. + .cRelocs resd 1 + ;; Variable sized relocation table. (put after the iend, so no included) + ;.aRelocs resb PATCHASMRELOC_size +endstruc + +;/* For indirect calls/jump (identical in PATMA.h & PATMA.mac!) */ +%define PATM_MAX_JUMPTABLE_ENTRIES 16 + +struc PATCHJUMPTABLE + .nrSlots resw 1 + .ulInsertPos resw 1 + .cAddresses resd 1 + ; array of pInstrGC and pRelPatchGC pairs (nrSlot times) + .Slot_pInstrGC resd 1 + .Slot_pRelPatchGC resd 1 +endstruc + +;/* Must match structure size!! +%define PATCHJUMPTABLE_SIZE (PATM_MAX_JUMPTABLE_ENTRIES*2*4) +;/* Direct calls need only one lookup slot */ +%define PATCHDIRECTJUMPTABLE_SIZE (2*4) + diff --git a/src/VBox/VMM/VMMR3/PATMGuest.cpp b/src/VBox/VMM/VMMR3/PATMGuest.cpp new file mode 100644 index 00000000..948d4b06 --- /dev/null +++ b/src/VBox/VMM/VMMR3/PATMGuest.cpp @@ -0,0 +1,247 @@ +/* $Id: PATMGuest.cpp $ */ +/** @file + * PATMGuest - Guest OS Patching Manager (non-generic) + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PATM +#include +#include +#include +#include +#include +#include "PATMInternal.h" +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +/* + * ntdll!KiFastSystemCall: + * 7c90eb8b 8bd4 mov edx,esp + * 7c90eb8d 0f34 sysenter + * 7c90eb8f 90 nop + * 7c90eb90 90 nop + * 7c90eb91 90 nop + * 7c90eb92 90 nop + * 7c90eb93 90 nop + * ntdll!KiFastSystemCallRet: + * 7c90eb94 c3 ret + * + * ntdll!KiIntSystemCall: + * 7c90eba5 8d542408 lea edx,[esp+0x8] + * 7c90eba9 cd2e int 2e + * 7c90ebab c3 ret + * + */ +static uint8_t uFnKiFastSystemCall[7] = {0x8b, 0xd4, 0x0f, 0x34, 0x90, 0x90, 0x90}; +static uint8_t uFnKiIntSystemCall[7] = {0x8d, 0x54, 0x24, 0x08, 0xcd, 0x2e, 0xc3}; + +/* + * OpenBSD 3.7 & 3.8: + * + * D0101B6D: push CS [0E] + * D0101B6E: push ESI [56] + * D0101B6F: cli [FA] + */ +static uint8_t uFnOpenBSDHandlerPrefix1[3] = { 0x0E, 0x56, 0xFA }; +/* + * OpenBSD 3.9 & 4.0 + * + * D0101BD1: push CS [0E] + * D0101BD2: push ESI [56] + * D0101BD3: push 0x00 [6A 00] + * D0101BD4: push 0x03 [6A 03] + */ +static uint8_t uFnOpenBSDHandlerPrefix2[6] = { 0x0E, 0x56, 0x6A, 0x00, 0x6A, 0x03 }; + + +/** + * Check Windows XP sysenter heuristics and install patch + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pInstrGC GC Instruction pointer for sysenter + * @param pPatchRec Patch structure + * + */ +int PATMPatchSysenterXP(PVM pVM, RTGCPTR32 pInstrGC, PPATMPATCHREC pPatchRec) +{ + PPATCHINFO pPatch = &pPatchRec->patch; + uint8_t uTemp[16]; + RTGCPTR32 lpfnKiFastSystemCall, lpfnKiIntSystemCall = 0; /* (initializing it to shut up warning.) */ + int rc, i; + PVMCPU pVCpu = VMMGetCpu0(pVM); + + Assert(sizeof(uTemp) > sizeof(uFnKiIntSystemCall)); + Assert(sizeof(uTemp) > sizeof(uFnKiFastSystemCall)); + + /* Guest OS specific patch; check heuristics first */ + + /* check the epilog of KiFastSystemCall */ + lpfnKiFastSystemCall = pInstrGC - 2; + rc = PGMPhysSimpleReadGCPtr(pVCpu, uTemp, lpfnKiFastSystemCall, sizeof(uFnKiFastSystemCall)); + if ( RT_FAILURE(rc) + || memcmp(uFnKiFastSystemCall, uTemp, sizeof(uFnKiFastSystemCall))) + { + return VERR_PATCHING_REFUSED; + } + + /* Now search for KiIntSystemCall */ + for (i=0;i<64;i++) + { + rc = PGMPhysSimpleReadGCPtr(pVCpu, uTemp, pInstrGC + i, sizeof(uFnKiIntSystemCall)); + if(RT_FAILURE(rc)) + { + break; + } + if(!memcmp(uFnKiIntSystemCall, uTemp, sizeof(uFnKiIntSystemCall))) + { + lpfnKiIntSystemCall = pInstrGC + i; + /* Found it! */ + break; + } + } + if (i == 64) + { + Log(("KiIntSystemCall not found!!\n")); + return VERR_PATCHING_REFUSED; + } + + if (PAGE_ADDRESS(lpfnKiFastSystemCall) != PAGE_ADDRESS(lpfnKiIntSystemCall)) + { + Log(("KiFastSystemCall and KiIntSystemCall not in the same page!!\n")); + return VERR_PATCHING_REFUSED; + } + + // make a copy of the guest code bytes that will be overwritten + rc = PGMPhysSimpleReadGCPtr(pVCpu, pPatch->aPrivInstr, pPatch->pPrivInstrGC, SIZEOF_NEARJUMP32); + AssertRC(rc); + + /* Now we simply jump from the fast version to the 'old and slow' system call */ + uTemp[0] = 0xE9; + *(RTGCPTR32 *)&uTemp[1] = lpfnKiIntSystemCall - (pInstrGC + SIZEOF_NEARJUMP32); + rc = PGMPhysSimpleDirtyWriteGCPtr(pVCpu, pInstrGC, uTemp, SIZEOF_NEARJUMP32); + if (RT_FAILURE(rc)) + { + Log(("PGMPhysSimpleDirtyWriteGCPtr failed with rc=%Rrc!!\n", rc)); + return VERR_PATCHING_REFUSED; + } + +#ifdef LOG_ENABLED + Log(("Sysenter Patch code ----------------------------------------------------------\n")); + PATMP2GLOOKUPREC cacheRec; + RT_ZERO(cacheRec); + cacheRec.pPatch = pPatch; + + patmr3DisasmCodeStream(pVM, pInstrGC, pInstrGC, patmR3DisasmCallback, &cacheRec); + /* Free leftover lock if any. */ + if (cacheRec.Lock.pvMap) + PGMPhysReleasePageMappingLock(pVM, &cacheRec.Lock); + Log(("Sysenter Patch code ends -----------------------------------------------------\n")); +#endif + + pPatch->uState = PATCH_ENABLED; + return VINF_SUCCESS; +} + +/** + * Patch OpenBSD interrupt handler prefix + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pCpu Disassembly state of instruction. + * @param pInstrGC GC Instruction pointer for instruction + * @param pInstrHC GC Instruction pointer for instruction + * @param pPatchRec Patch structure + * + */ +int PATMPatchOpenBSDHandlerPrefix(PVM pVM, PDISCPUSTATE pCpu, RTGCPTR32 pInstrGC, uint8_t *pInstrHC, PPATMPATCHREC pPatchRec) +{ + uint8_t uTemp[16]; + int rc; + + Assert(sizeof(uTemp) > RT_MAX(sizeof(uFnOpenBSDHandlerPrefix1), sizeof(uFnOpenBSDHandlerPrefix2))); + + /* Guest OS specific patch; check heuristics first */ + + rc = PGMPhysSimpleReadGCPtr(VMMGetCpu0(pVM), uTemp, pInstrGC, RT_MAX(sizeof(uFnOpenBSDHandlerPrefix1), sizeof(uFnOpenBSDHandlerPrefix2))); + if ( RT_FAILURE(rc) + || ( memcmp(uFnOpenBSDHandlerPrefix1, uTemp, sizeof(uFnOpenBSDHandlerPrefix1)) + && memcmp(uFnOpenBSDHandlerPrefix2, uTemp, sizeof(uFnOpenBSDHandlerPrefix2)))) + { + return VERR_PATCHING_REFUSED; + } + /* Found it; patch the push cs */ + pPatchRec->patch.flags &= ~(PATMFL_GUEST_SPECIFIC); /* prevent a breakpoint from being triggered */ + return patmR3PatchInstrInt3(pVM, pInstrGC, pInstrHC, pCpu, &pPatchRec->patch); +} + +/** + * Install guest OS specific patch + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pCpu Disassembly state of instruction. + * @param pInstrGC GC Instruction pointer for instruction + * @param pInstrHC GC Instruction pointer for instruction + * @param pPatchRec Patch structure + * + */ +int patmR3InstallGuestSpecificPatch(PVM pVM, PDISCPUSTATE pCpu, RTGCPTR32 pInstrGC, uint8_t *pInstrHC, PPATMPATCHREC pPatchRec) +{ + int rc; + + /** @todo might have to check if the patch crosses a page boundary. Currently not necessary, but that might change in the future!! */ + switch (pCpu->pCurInstr->uOpcode) + { + case OP_SYSENTER: + pPatchRec->patch.flags |= PATMFL_SYSENTER_XP | PATMFL_USER_MODE | PATMFL_GUEST_SPECIFIC; + + rc = PATMPatchSysenterXP(pVM, pInstrGC, pPatchRec); + if (RT_FAILURE(rc)) + { + return VERR_PATCHING_REFUSED; + } + return VINF_SUCCESS; + + case OP_PUSH: + /* OpenBSD guest specific patch for the following code block: + * + * pushf + * push cs <- dangerous because of DPL 0 tests + * push esi + * cli + */ + if (pCpu->pCurInstr->fParam1 == OP_PARM_REG_CS) + return PATMPatchOpenBSDHandlerPrefix(pVM, pCpu, pInstrGC, pInstrHC, pPatchRec); + + return VERR_PATCHING_REFUSED; + + default: + AssertMsgFailed(("PATMInstallGuestSpecificPatch: unknown opcode %d\n", pCpu->pCurInstr->uOpcode)); + return VERR_PATCHING_REFUSED; + } +} + diff --git a/src/VBox/VMM/VMMR3/PATMPatch.cpp b/src/VBox/VMM/VMMR3/PATMPatch.cpp new file mode 100644 index 00000000..6156bdd7 --- /dev/null +++ b/src/VBox/VMM/VMMR3/PATMPatch.cpp @@ -0,0 +1,1627 @@ +/* $Id: PATMPatch.cpp $ */ +/** @file + * PATMPatch - Dynamic Guest OS Instruction patches + * + * NOTE: CSAM assumes patch memory is never reused!! + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PATM +#include +#include +#include +#include +#include +#include +#include +#include +#include "PATMInternal.h" +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include "PATMA.h" +#include "PATMPatch.h" + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * Internal structure for passing more information about call fixups to + * patmPatchGenCode. + */ +typedef struct +{ + RTRCPTR pTargetGC; + RTRCPTR pCurInstrGC; + RTRCPTR pNextInstrGC; + RTRCPTR pReturnGC; +} PATMCALLINFO, *PPATMCALLINFO; + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** Value to use when not sure about the patch size. */ +#define PATCHGEN_DEF_SIZE 256 + +#define PATCHGEN_PROLOG_NODEF(pVM, pPatch, a_cbMaxEmit) \ + do { \ + cbGivenPatchSize = (a_cbMaxEmit) + 16U /*jmp++*/; \ + if (RT_LIKELY((pPatch)->pPatchBlockOffset + pPatch->uCurPatchOffset + cbGivenPatchSize < pVM->patm.s.cbPatchMem)) \ + pPB = PATCHCODE_PTR_HC(pPatch) + pPatch->uCurPatchOffset; \ + else \ + { \ + pVM->patm.s.fOutOfMemory = true; \ + AssertMsgFailed(("offPatch=%#x + offEmit=%#x + a_cbMaxEmit=%#x + jmp --> cbTotalWithFudge=%#x >= cbPatchMem=%#x", \ + (pPatch)->pPatchBlockOffset, pPatch->uCurPatchOffset, a_cbMaxEmit, \ + (pPatch)->pPatchBlockOffset + pPatch->uCurPatchOffset + cbGivenPatchSize, pVM->patm.s.cbPatchMem)); \ + return VERR_NO_MEMORY; \ + } \ + } while (0) + +#define PATCHGEN_PROLOG(pVM, pPatch, a_cbMaxEmit) \ + uint8_t *pPB; \ + uint32_t cbGivenPatchSize; \ + PATCHGEN_PROLOG_NODEF(pVM, pPatch, a_cbMaxEmit) + +#define PATCHGEN_EPILOG(pPatch, a_cbActual) \ + do { \ + AssertMsg((a_cbActual) <= cbGivenPatchSize, ("a_cbActual=%#x cbGivenPatchSize=%#x\n", a_cbActual, cbGivenPatchSize)); \ + Assert((a_cbActual) <= 640); \ + pPatch->uCurPatchOffset += (a_cbActual); \ + } while (0) + + + + +int patmPatchAddReloc32(PVM pVM, PPATCHINFO pPatch, uint8_t *pRelocHC, uint32_t uType, + RTRCPTR pSource /*= 0*/, RTRCPTR pDest /*= 0*/) +{ + PRELOCREC pRec; + + Assert( uType == FIXUP_ABSOLUTE + || ( ( uType == FIXUP_ABSOLUTE_IN_PATCH_ASM_TMPL + || uType == FIXUP_CONSTANT_IN_PATCH_ASM_TMPL + || uType == FIXUP_REL_HELPER_IN_PATCH_ASM_TMPL) + && pSource == pDest + && PATM_IS_ASMFIX(pSource)) + || ((uType == FIXUP_REL_JMPTOPATCH || uType == FIXUP_REL_JMPTOGUEST) && pSource && pDest)); + + LogFlow(("patmPatchAddReloc32 type=%d pRelocGC=%RRv source=%RRv dest=%RRv\n", uType, pRelocHC - pVM->patm.s.pPatchMemGC + pVM->patm.s.pPatchMemGC , pSource, pDest)); + + pRec = (PRELOCREC)MMR3HeapAllocZ(pVM, MM_TAG_PATM_PATCH, sizeof(*pRec)); + Assert(pRec); + pRec->Core.Key = (AVLPVKEY)pRelocHC; + pRec->pRelocPos = pRelocHC; /** @todo redundant. */ + pRec->pSource = pSource; + pRec->pDest = pDest; + pRec->uType = uType; + + bool ret = RTAvlPVInsert(&pPatch->FixupTree, &pRec->Core); + Assert(ret); NOREF(ret); + pPatch->nrFixups++; + + return VINF_SUCCESS; +} + +int patmPatchAddJump(PVM pVM, PPATCHINFO pPatch, uint8_t *pJumpHC, uint32_t offset, RTRCPTR pTargetGC, uint32_t opcode) +{ + PJUMPREC pRec; + + pRec = (PJUMPREC)MMR3HeapAllocZ(pVM, MM_TAG_PATM_PATCH, sizeof(*pRec)); + Assert(pRec); + + pRec->Core.Key = (AVLPVKEY)pJumpHC; + pRec->pJumpHC = pJumpHC; /** @todo redundant. */ + pRec->offDispl = offset; + pRec->pTargetGC = pTargetGC; + pRec->opcode = opcode; + + bool ret = RTAvlPVInsert(&pPatch->JumpTree, &pRec->Core); + Assert(ret); NOREF(ret); + pPatch->nrJumpRecs++; + + return VINF_SUCCESS; +} + +static uint32_t patmPatchGenCode(PVM pVM, PPATCHINFO pPatch, uint8_t *pPB, PCPATCHASMRECORD pAsmRecord, + RCPTRTYPE(uint8_t *) pReturnAddrGC, bool fGenJump, + PPATMCALLINFO pCallInfo = 0) +{ + Assert(fGenJump == false || pReturnAddrGC); + Assert(fGenJump == false || pAsmRecord->offJump); + Assert(pAsmRecord); + Assert(pAsmRecord->cbFunction > sizeof(pAsmRecord->aRelocs[0].uType) * pAsmRecord->cRelocs); + + // Copy the code block + memcpy(pPB, pAsmRecord->pbFunction, pAsmRecord->cbFunction); + + // Process all fixups + uint32_t i, j; + for (j = 0, i = 0; i < pAsmRecord->cRelocs; i++) + { + for (; j < pAsmRecord->cbFunction; j++) + { + if (*(uint32_t*)&pPB[j] == pAsmRecord->aRelocs[i].uType) + { + RCPTRTYPE(uint32_t *) dest; + +#ifdef VBOX_STRICT + if (pAsmRecord->aRelocs[i].uType == PATM_ASMFIX_FIXUP) + Assert(pAsmRecord->aRelocs[i].uInfo != 0); + else + Assert(pAsmRecord->aRelocs[i].uInfo == 0); +#endif + + /* + * BE VERY CAREFUL WITH THESE FIXUPS. TAKE INTO ACCOUNT THAT PROBLEMS MAY ARISE WHEN RESTORING + * A SAVED STATE WITH A DIFFERENT HYPERVISOR LAYOUT. + */ + uint32_t uRelocType = FIXUP_ABSOLUTE_IN_PATCH_ASM_TMPL; + switch (pAsmRecord->aRelocs[i].uType) + { + /* + * PATMGCSTATE member fixups. + */ + case PATM_ASMFIX_VMFLAGS: + dest = pVM->patm.s.pGCStateGC + RT_OFFSETOF(PATMGCSTATE, uVMFlags); + break; + case PATM_ASMFIX_PENDINGACTION: + dest = pVM->patm.s.pGCStateGC + RT_OFFSETOF(PATMGCSTATE, uPendingAction); + break; + case PATM_ASMFIX_STACKPTR: + dest = pVM->patm.s.pGCStateGC + RT_OFFSETOF(PATMGCSTATE, Psp); + break; + case PATM_ASMFIX_INTERRUPTFLAG: + dest = pVM->patm.s.pGCStateGC + RT_OFFSETOF(PATMGCSTATE, fPIF); + break; + case PATM_ASMFIX_INHIBITIRQADDR: + dest = pVM->patm.s.pGCStateGC + RT_OFFSETOF(PATMGCSTATE, GCPtrInhibitInterrupts); + break; + case PATM_ASMFIX_TEMP_EAX: + dest = pVM->patm.s.pGCStateGC + RT_OFFSETOF(PATMGCSTATE, Restore.uEAX); + break; + case PATM_ASMFIX_TEMP_ECX: + dest = pVM->patm.s.pGCStateGC + RT_OFFSETOF(PATMGCSTATE, Restore.uECX); + break; + case PATM_ASMFIX_TEMP_EDI: + dest = pVM->patm.s.pGCStateGC + RT_OFFSETOF(PATMGCSTATE, Restore.uEDI); + break; + case PATM_ASMFIX_TEMP_EFLAGS: + dest = pVM->patm.s.pGCStateGC + RT_OFFSETOF(PATMGCSTATE, Restore.eFlags); + break; + case PATM_ASMFIX_TEMP_RESTORE_FLAGS: + dest = pVM->patm.s.pGCStateGC + RT_OFFSETOF(PATMGCSTATE, Restore.uFlags); + break; + case PATM_ASMFIX_CALL_PATCH_TARGET_ADDR: + dest = pVM->patm.s.pGCStateGC + RT_OFFSETOF(PATMGCSTATE, GCCallPatchTargetAddr); + break; + case PATM_ASMFIX_CALL_RETURN_ADDR: + dest = pVM->patm.s.pGCStateGC + RT_OFFSETOF(PATMGCSTATE, GCCallReturnAddr); + break; +#ifdef VBOX_WITH_STATISTICS + case PATM_ASMFIX_ALLPATCHCALLS: + dest = pVM->patm.s.pGCStateGC + RT_OFFSETOF(PATMGCSTATE, uPatchCalls); + break; + case PATM_ASMFIX_IRETEFLAGS: + dest = pVM->patm.s.pGCStateGC + RT_OFFSETOF(PATMGCSTATE, uIretEFlags); + break; + case PATM_ASMFIX_IRETCS: + dest = pVM->patm.s.pGCStateGC + RT_OFFSETOF(PATMGCSTATE, uIretCS); + break; + case PATM_ASMFIX_IRETEIP: + dest = pVM->patm.s.pGCStateGC + RT_OFFSETOF(PATMGCSTATE, uIretEIP); + break; +#endif + + + case PATM_ASMFIX_FIXUP: + /* Offset in aRelocs[i].uInfo is from the base of the function. */ + dest = (RTGCUINTPTR32)pVM->patm.s.pPatchMemGC + pAsmRecord->aRelocs[i].uInfo + + (RTGCUINTPTR32)(pPB - pVM->patm.s.pPatchMemHC); + break; + +#ifdef VBOX_WITH_STATISTICS + case PATM_ASMFIX_PERPATCHCALLS: + dest = patmPatchQueryStatAddress(pVM, pPatch); + break; +#endif + + /* The first part of our PATM stack is used to store offsets of patch return addresses; the 2nd + * part to store the original return addresses. + */ + case PATM_ASMFIX_STACKBASE: + dest = pVM->patm.s.pGCStackGC; + break; + + case PATM_ASMFIX_STACKBASE_GUEST: + dest = pVM->patm.s.pGCStackGC + PATM_STACK_SIZE; + break; + + case PATM_ASMFIX_RETURNADDR: /* absolute guest address; no fixup required */ + Assert(pCallInfo && pAsmRecord->aRelocs[i].uType >= PATM_ASMFIX_NO_FIXUP); + dest = pCallInfo->pReturnGC; + break; + + case PATM_ASMFIX_PATCHNEXTBLOCK: /* relative address of instruction following this block */ + Assert(pCallInfo && pAsmRecord->aRelocs[i].uType >= PATM_ASMFIX_NO_FIXUP); + + /** @note hardcoded assumption that we must return to the instruction following this block */ + dest = (uintptr_t)pPB - (uintptr_t)pVM->patm.s.pPatchMemHC + pAsmRecord->cbFunction; + break; + + case PATM_ASMFIX_CALLTARGET: /* relative to patch address; no fixup required */ + Assert(pCallInfo && pAsmRecord->aRelocs[i].uType >= PATM_ASMFIX_NO_FIXUP); + + /* Address must be filled in later. (see patmr3SetBranchTargets) */ + patmPatchAddJump(pVM, pPatch, &pPB[j-1], 1, pCallInfo->pTargetGC, OP_CALL); + dest = PATM_ILLEGAL_DESTINATION; + break; + + case PATM_ASMFIX_PATCHBASE: /* Patch GC base address */ + dest = pVM->patm.s.pPatchMemGC; + break; + + case PATM_ASMFIX_NEXTINSTRADDR: + Assert(pCallInfo); + /* pNextInstrGC can be 0 if several instructions, that inhibit irqs, follow each other */ + dest = pCallInfo->pNextInstrGC; + break; + + case PATM_ASMFIX_CURINSTRADDR: + Assert(pCallInfo); + dest = pCallInfo->pCurInstrGC; + break; + + /* Relative address of global patm lookup and call function. */ + case PATM_ASMFIX_LOOKUP_AND_CALL_FUNCTION: + { + RTRCPTR pInstrAfterCall = pVM->patm.s.pPatchMemGC + + (RTGCUINTPTR32)(&pPB[j] + sizeof(RTRCPTR) - pVM->patm.s.pPatchMemHC); + Assert(pVM->patm.s.pfnHelperCallGC); + Assert(sizeof(uint32_t) == sizeof(RTRCPTR)); + + /* Relative value is target minus address of instruction after the actual call instruction. */ + dest = pVM->patm.s.pfnHelperCallGC - pInstrAfterCall; + break; + } + + case PATM_ASMFIX_RETURN_FUNCTION: + { + RTRCPTR pInstrAfterCall = pVM->patm.s.pPatchMemGC + + (RTGCUINTPTR32)(&pPB[j] + sizeof(RTRCPTR) - pVM->patm.s.pPatchMemHC); + Assert(pVM->patm.s.pfnHelperRetGC); + Assert(sizeof(uint32_t) == sizeof(RTRCPTR)); + + /* Relative value is target minus address of instruction after the actual call instruction. */ + dest = pVM->patm.s.pfnHelperRetGC - pInstrAfterCall; + break; + } + + case PATM_ASMFIX_IRET_FUNCTION: + { + RTRCPTR pInstrAfterCall = pVM->patm.s.pPatchMemGC + + (RTGCUINTPTR32)(&pPB[j] + sizeof(RTRCPTR) - pVM->patm.s.pPatchMemHC); + Assert(pVM->patm.s.pfnHelperIretGC); + Assert(sizeof(uint32_t) == sizeof(RTRCPTR)); + + /* Relative value is target minus address of instruction after the actual call instruction. */ + dest = pVM->patm.s.pfnHelperIretGC - pInstrAfterCall; + break; + } + + case PATM_ASMFIX_LOOKUP_AND_JUMP_FUNCTION: + { + RTRCPTR pInstrAfterCall = pVM->patm.s.pPatchMemGC + + (RTGCUINTPTR32)(&pPB[j] + sizeof(RTRCPTR) - pVM->patm.s.pPatchMemHC); + Assert(pVM->patm.s.pfnHelperJumpGC); + Assert(sizeof(uint32_t) == sizeof(RTRCPTR)); + + /* Relative value is target minus address of instruction after the actual call instruction. */ + dest = pVM->patm.s.pfnHelperJumpGC - pInstrAfterCall; + break; + } + + case PATM_ASMFIX_CPUID_STD_MAX: /* saved state only */ + dest = CPUMR3GetGuestCpuIdPatmStdMax(pVM); + break; + case PATM_ASMFIX_CPUID_EXT_MAX: /* saved state only */ + dest = CPUMR3GetGuestCpuIdPatmExtMax(pVM); + break; + case PATM_ASMFIX_CPUID_CENTAUR_MAX: /* saved state only */ + dest = CPUMR3GetGuestCpuIdPatmCentaurMax(pVM); + break; + + /* + * The following fixups needs to be recalculated when loading saved state + * Note! Earlier saved state versions had different hacks for detecting some of these. + */ + case PATM_ASMFIX_VM_FORCEDACTIONS: + dest = pVM->pVMRC + RT_OFFSETOF(VM, aCpus[0].fLocalForcedActions); + break; + + case PATM_ASMFIX_CPUID_DEF_PTR: /* saved state only */ + dest = CPUMR3GetGuestCpuIdPatmDefRCPtr(pVM); + break; + case PATM_ASMFIX_CPUID_STD_PTR: /* saved state only */ + dest = CPUMR3GetGuestCpuIdPatmStdRCPtr(pVM); + break; + case PATM_ASMFIX_CPUID_EXT_PTR: /* saved state only */ + dest = CPUMR3GetGuestCpuIdPatmExtRCPtr(pVM); + break; + case PATM_ASMFIX_CPUID_CENTAUR_PTR: /* saved state only */ + dest = CPUMR3GetGuestCpuIdPatmCentaurRCPtr(pVM); + break; + + /* + * The following fixups are constants and helper code calls that only + * needs to be corrected when loading saved state. + */ + case PATM_ASMFIX_HELPER_CPUM_CPUID: + { + int rc = PDMR3LdrGetSymbolRC(pVM, NULL, "CPUMPatchHlpCpuId", &dest); + AssertReleaseRCBreakStmt(rc, dest = PATM_ILLEGAL_DESTINATION); + uRelocType = FIXUP_REL_HELPER_IN_PATCH_ASM_TMPL; + break; + } + + /* + * Unknown fixup. + */ + case PATM_ASMFIX_REUSE_LATER_0: + case PATM_ASMFIX_REUSE_LATER_1: + case PATM_ASMFIX_REUSE_LATER_2: + case PATM_ASMFIX_REUSE_LATER_3: + default: + AssertReleaseMsgFailed(("Unknown fixup: %#x\n", pAsmRecord->aRelocs[i].uType)); + dest = PATM_ILLEGAL_DESTINATION; + break; + } + + if (uRelocType == FIXUP_REL_HELPER_IN_PATCH_ASM_TMPL) + { + RTRCUINTPTR RCPtrAfter = pVM->patm.s.pPatchMemGC + + (RTRCUINTPTR)(&pPB[j + sizeof(RTRCPTR)] - pVM->patm.s.pPatchMemHC); + dest -= RCPtrAfter; + } + + *(PRTRCPTR)&pPB[j] = dest; + + if (pAsmRecord->aRelocs[i].uType < PATM_ASMFIX_NO_FIXUP) + { + patmPatchAddReloc32(pVM, pPatch, &pPB[j], uRelocType, + pAsmRecord->aRelocs[i].uType /*pSources*/, pAsmRecord->aRelocs[i].uType /*pDest*/); + } + break; + } + } + Assert(j < pAsmRecord->cbFunction); + } + Assert(pAsmRecord->aRelocs[i].uInfo == 0xffffffff); + + /* Add the jump back to guest code (if required) */ + if (fGenJump) + { + int32_t displ = pReturnAddrGC - (PATCHCODE_PTR_GC(pPatch) + pPatch->uCurPatchOffset + pAsmRecord->offJump - 1 + SIZEOF_NEARJUMP32); + + /* Add lookup record for patch to guest address translation */ + Assert(pPB[pAsmRecord->offJump - 1] == 0xE9); + patmR3AddP2GLookupRecord(pVM, pPatch, &pPB[pAsmRecord->offJump - 1], pReturnAddrGC, PATM_LOOKUP_PATCH2GUEST); + + *(uint32_t *)&pPB[pAsmRecord->offJump] = displ; + patmPatchAddReloc32(pVM, pPatch, &pPB[pAsmRecord->offJump], FIXUP_REL_JMPTOGUEST, + PATCHCODE_PTR_GC(pPatch) + pPatch->uCurPatchOffset + pAsmRecord->offJump - 1 + SIZEOF_NEARJUMP32, + pReturnAddrGC); + } + + // Calculate the right size of this patch block + if ((fGenJump && pAsmRecord->offJump) || (!fGenJump && !pAsmRecord->offJump)) + return pAsmRecord->cbFunction; + // if a jump instruction is present and we don't want one, then subtract SIZEOF_NEARJUMP32 + return pAsmRecord->cbFunction - SIZEOF_NEARJUMP32; +} + +/* Read bytes and check for overwritten instructions. */ +static int patmPatchReadBytes(PVM pVM, uint8_t *pDest, RTRCPTR pSrc, uint32_t cb) +{ + int rc = PGMPhysSimpleReadGCPtr(&pVM->aCpus[0], pDest, pSrc, cb); + AssertRCReturn(rc, rc); + /* + * Could be patched already; make sure this is checked! + */ + for (uint32_t i=0;icbInstr; + PATCHGEN_PROLOG(pVM, pPatch, cbInstrShutUpGcc); + + int rc = patmPatchReadBytes(pVM, pPB, pCurInstrGC, cbInstrShutUpGcc); + AssertRC(rc); + PATCHGEN_EPILOG(pPatch, cbInstrShutUpGcc); + return rc; +} + +int patmPatchGenIret(PVM pVM, PPATCHINFO pPatch, RTRCPTR pCurInstrGC, bool fSizeOverride) +{ + uint32_t size; + PATMCALLINFO callInfo; + PCPATCHASMRECORD pPatchAsmRec = EMIsRawRing1Enabled(pVM) ? &g_patmIretRing1Record : &g_patmIretRecord; + + PATCHGEN_PROLOG(pVM, pPatch, pPatchAsmRec->cbFunction); + + AssertMsg(fSizeOverride == false, ("operand size override!!\n")); RT_NOREF_PV(fSizeOverride); + callInfo.pCurInstrGC = pCurInstrGC; + + size = patmPatchGenCode(pVM, pPatch, pPB, pPatchAsmRec, 0, false, &callInfo); + + PATCHGEN_EPILOG(pPatch, size); + return VINF_SUCCESS; +} + +int patmPatchGenCli(PVM pVM, PPATCHINFO pPatch) +{ + uint32_t size; + PATCHGEN_PROLOG(pVM, pPatch, g_patmCliRecord.cbFunction); + + size = patmPatchGenCode(pVM, pPatch, pPB, &g_patmCliRecord, 0, false); + + PATCHGEN_EPILOG(pPatch, size); + return VINF_SUCCESS; +} + +/* + * Generate an STI patch + */ +int patmPatchGenSti(PVM pVM, PPATCHINFO pPatch, RTRCPTR pCurInstrGC, RTRCPTR pNextInstrGC) +{ + PATMCALLINFO callInfo; + uint32_t size; + + Log(("patmPatchGenSti at %RRv; next %RRv\n", pCurInstrGC, pNextInstrGC)); RT_NOREF_PV(pCurInstrGC); + PATCHGEN_PROLOG(pVM, pPatch, g_patmStiRecord.cbFunction); + callInfo.pNextInstrGC = pNextInstrGC; + size = patmPatchGenCode(pVM, pPatch, pPB, &g_patmStiRecord, 0, false, &callInfo); + PATCHGEN_EPILOG(pPatch, size); + + return VINF_SUCCESS; +} + + +int patmPatchGenPopf(PVM pVM, PPATCHINFO pPatch, RCPTRTYPE(uint8_t *) pReturnAddrGC, bool fSizeOverride, bool fGenJumpBack) +{ + uint32_t size; + PATMCALLINFO callInfo; + PCPATCHASMRECORD pPatchAsmRec; + if (fSizeOverride == true) + pPatchAsmRec = fGenJumpBack ? &g_patmPopf16Record : &g_patmPopf16Record_NoExit; + else + pPatchAsmRec = fGenJumpBack ? &g_patmPopf32Record : &g_patmPopf32Record_NoExit; + + PATCHGEN_PROLOG(pVM, pPatch, pPatchAsmRec->cbFunction); + + callInfo.pNextInstrGC = pReturnAddrGC; + + Log(("patmPatchGenPopf at %RRv\n", pReturnAddrGC)); + + /* Note: keep IOPL in mind when changing any of this!! (see comments in PATMA.asm, PATMPopf32Replacement) */ + if (fSizeOverride == true) + Log(("operand size override!!\n")); + size = patmPatchGenCode(pVM, pPatch, pPB, pPatchAsmRec, pReturnAddrGC, fGenJumpBack, &callInfo); + + PATCHGEN_EPILOG(pPatch, size); + STAM_COUNTER_INC(&pVM->patm.s.StatGenPopf); + return VINF_SUCCESS; +} + +int patmPatchGenPushf(PVM pVM, PPATCHINFO pPatch, bool fSizeOverride) +{ + uint32_t size; + PCPATCHASMRECORD pPatchAsmRec = fSizeOverride == true ? &g_patmPushf16Record : &g_patmPushf32Record; + PATCHGEN_PROLOG(pVM, pPatch, pPatchAsmRec->cbFunction); + + size = patmPatchGenCode(pVM, pPatch, pPB, pPatchAsmRec, 0, false); + + PATCHGEN_EPILOG(pPatch, size); + return VINF_SUCCESS; +} + +int patmPatchGenPushCS(PVM pVM, PPATCHINFO pPatch) +{ + uint32_t size; + PATCHGEN_PROLOG(pVM, pPatch, g_patmPushCSRecord.cbFunction); + size = patmPatchGenCode(pVM, pPatch, pPB, &g_patmPushCSRecord, 0, false); + PATCHGEN_EPILOG(pPatch, size); + return VINF_SUCCESS; +} + +int patmPatchGenLoop(PVM pVM, PPATCHINFO pPatch, RCPTRTYPE(uint8_t *) pTargetGC, uint32_t opcode, bool fSizeOverride) +{ + uint32_t size = 0; + PCPATCHASMRECORD pPatchAsmRec; + switch (opcode) + { + case OP_LOOP: + pPatchAsmRec = &g_patmLoopRecord; + break; + case OP_LOOPNE: + pPatchAsmRec = &g_patmLoopNZRecord; + break; + case OP_LOOPE: + pPatchAsmRec = &g_patmLoopZRecord; + break; + case OP_JECXZ: + pPatchAsmRec = &g_patmJEcxRecord; + break; + default: + AssertMsgFailed(("PatchGenLoop: invalid opcode %d\n", opcode)); + return VERR_INVALID_PARAMETER; + } + Assert(pPatchAsmRec->offSizeOverride && pPatchAsmRec->offRelJump); + + PATCHGEN_PROLOG(pVM, pPatch, pPatchAsmRec->cbFunction); + Log(("PatchGenLoop %d jump %d to %08x offrel=%d\n", opcode, pPatch->nrJumpRecs, pTargetGC, pPatchAsmRec->offRelJump)); + + // Generate the patch code + size = patmPatchGenCode(pVM, pPatch, pPB, pPatchAsmRec, 0, false); + + if (fSizeOverride) + { + pPB[pPatchAsmRec->offSizeOverride] = 0x66; // ecx -> cx or vice versa + } + + *(RTRCPTR *)&pPB[pPatchAsmRec->offRelJump] = 0xDEADBEEF; + + patmPatchAddJump(pVM, pPatch, &pPB[pPatchAsmRec->offRelJump - 1], 1, pTargetGC, opcode); + + PATCHGEN_EPILOG(pPatch, size); + return VINF_SUCCESS; +} + +int patmPatchGenRelJump(PVM pVM, PPATCHINFO pPatch, RCPTRTYPE(uint8_t *) pTargetGC, uint32_t opcode, bool fSizeOverride) +{ + uint32_t offset = 0; + PATCHGEN_PROLOG(pVM, pPatch, PATCHGEN_DEF_SIZE); + + // internal relative jumps from patch code to patch code; no relocation record required + + Assert(PATMIsPatchGCAddr(pVM, pTargetGC) == false); + + switch (opcode) + { + case OP_JO: + pPB[1] = 0x80; + break; + case OP_JNO: + pPB[1] = 0x81; + break; + case OP_JC: + pPB[1] = 0x82; + break; + case OP_JNC: + pPB[1] = 0x83; + break; + case OP_JE: + pPB[1] = 0x84; + break; + case OP_JNE: + pPB[1] = 0x85; + break; + case OP_JBE: + pPB[1] = 0x86; + break; + case OP_JNBE: + pPB[1] = 0x87; + break; + case OP_JS: + pPB[1] = 0x88; + break; + case OP_JNS: + pPB[1] = 0x89; + break; + case OP_JP: + pPB[1] = 0x8A; + break; + case OP_JNP: + pPB[1] = 0x8B; + break; + case OP_JL: + pPB[1] = 0x8C; + break; + case OP_JNL: + pPB[1] = 0x8D; + break; + case OP_JLE: + pPB[1] = 0x8E; + break; + case OP_JNLE: + pPB[1] = 0x8F; + break; + + case OP_JMP: + /* If interrupted here, then jump to the target instruction. Used by PATM.cpp for jumping to known instructions. */ + /* Add lookup record for patch to guest address translation */ + patmR3AddP2GLookupRecord(pVM, pPatch, pPB, pTargetGC, PATM_LOOKUP_PATCH2GUEST); + + pPB[0] = 0xE9; + break; + + case OP_JECXZ: + case OP_LOOP: + case OP_LOOPNE: + case OP_LOOPE: + return patmPatchGenLoop(pVM, pPatch, pTargetGC, opcode, fSizeOverride); + + default: + AssertMsg(0, ("Invalid jump opcode %d\n", opcode)); + return VERR_PATCHING_REFUSED; + } + if (opcode != OP_JMP) + { + pPB[0] = 0xF; + offset += 2; + } + else offset++; + + *(RTRCPTR *)&pPB[offset] = 0xDEADBEEF; + + patmPatchAddJump(pVM, pPatch, pPB, offset, pTargetGC, opcode); + + offset += sizeof(RTRCPTR); + + PATCHGEN_EPILOG(pPatch, offset); + return VINF_SUCCESS; +} + +/* + * Rewrite call to dynamic or currently unknown function (on-demand patching of function) + */ +int patmPatchGenCall(PVM pVM, PPATCHINFO pPatch, DISCPUSTATE *pCpu, RTRCPTR pCurInstrGC, RTRCPTR pTargetGC, bool fIndirect) +{ + PATMCALLINFO callInfo; + uint32_t offset; + uint32_t i, size; + int rc; + + /** @note Don't check for IF=1 here. The ret instruction will do this. */ + /** @note It's dangerous to do this for 'normal' patches. the jump target might be inside the generated patch jump. (seen this!) */ + + /* 1: Clear PATM interrupt flag on entry. */ + rc = patmPatchGenClearPIF(pVM, pPatch, pCurInstrGC); + if (rc == VERR_NO_MEMORY) + return rc; + AssertRCReturn(rc, rc); + + PATCHGEN_PROLOG(pVM, pPatch, PATCHGEN_DEF_SIZE); + /* 2: We must push the target address onto the stack before appending the indirect call code. */ + + if (fIndirect) + { + Log(("patmPatchGenIndirectCall\n")); + Assert(pCpu->Param1.cb == 4); + Assert(OP_PARM_VTYPE(pCpu->pCurInstr->fParam1) != OP_PARM_J); + + /* We push it onto the stack here, so the guest's context isn't ruined when this happens to cause + * a page fault. The assembly code restores the stack afterwards. + */ + offset = 0; + /* include prefix byte to make sure we don't use the incorrect selector register. */ + if (pCpu->fPrefix & DISPREFIX_SEG) + pPB[offset++] = DISQuerySegPrefixByte(pCpu); + pPB[offset++] = 0xFF; // push r/m32 + pPB[offset++] = MAKE_MODRM(pCpu->ModRM.Bits.Mod, 6 /* group 5 */, pCpu->ModRM.Bits.Rm); + i = 2; /* standard offset of modrm bytes */ + if (pCpu->fPrefix & DISPREFIX_OPSIZE) + i++; //skip operand prefix + if (pCpu->fPrefix & DISPREFIX_SEG) + i++; //skip segment prefix + + rc = patmPatchReadBytes(pVM, &pPB[offset], (RTRCPTR)((RTGCUINTPTR32)pCurInstrGC + i), pCpu->cbInstr - i); + AssertRCReturn(rc, rc); + offset += (pCpu->cbInstr - i); + } + else + { + AssertMsg(PATMIsPatchGCAddr(pVM, pTargetGC) == false, ("Target is already a patch address (%RRv)?!?\n", pTargetGC)); + Assert(pTargetGC); + Assert(OP_PARM_VTYPE(pCpu->pCurInstr->fParam1) == OP_PARM_J); + + /** @todo wasting memory as the complex search is overkill and we need only one lookup slot... */ + + /* Relative call to patch code (patch to patch -> no fixup). */ + Log(("PatchGenCall from %RRv (next=%RRv) to %RRv\n", pCurInstrGC, pCurInstrGC + pCpu->cbInstr, pTargetGC)); + + /* We push it onto the stack here, so the guest's context isn't ruined when this happens to cause + * a page fault. The assembly code restores the stack afterwards. + */ + offset = 0; + pPB[offset++] = 0x68; // push %Iv + *(RTRCPTR *)&pPB[offset] = pTargetGC; + offset += sizeof(RTRCPTR); + } + + /* align this block properly to make sure the jump table will not be misaligned. */ + size = (RTHCUINTPTR)&pPB[offset] & 3; + if (size) + size = 4 - size; + + for (i=0;icbFunction); + callInfo.pReturnGC = pCurInstrGC + pCpu->cbInstr; + callInfo.pTargetGC = (fIndirect) ? 0xDEADBEEF : pTargetGC; + size = patmPatchGenCode(pVM, pPatch, pPB, pPatchAsmRec, 0, false, &callInfo); + PATCHGEN_EPILOG(pPatch, size); + + /* Need to set PATM_ASMFIX_INTERRUPTFLAG after the patched ret returns here. */ + rc = patmPatchGenSetPIF(pVM, pPatch, pCurInstrGC); + if (rc == VERR_NO_MEMORY) + return rc; + AssertRCReturn(rc, rc); + + STAM_COUNTER_INC(&pVM->patm.s.StatGenCall); + return VINF_SUCCESS; +} + +/** + * Generate indirect jump to unknown destination + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch record + * @param pCpu Disassembly state + * @param pCurInstrGC Current instruction address + */ +int patmPatchGenJump(PVM pVM, PPATCHINFO pPatch, DISCPUSTATE *pCpu, RTRCPTR pCurInstrGC) +{ + PATMCALLINFO callInfo; + uint32_t offset; + uint32_t i, size; + int rc; + + /* 1: Clear PATM interrupt flag on entry. */ + rc = patmPatchGenClearPIF(pVM, pPatch, pCurInstrGC); + if (rc == VERR_NO_MEMORY) + return rc; + AssertRCReturn(rc, rc); + + PATCHGEN_PROLOG(pVM, pPatch, PATCHGEN_DEF_SIZE); + /* 2: We must push the target address onto the stack before appending the indirect call code. */ + + Log(("patmPatchGenIndirectJump\n")); + Assert(pCpu->Param1.cb == 4); + Assert(OP_PARM_VTYPE(pCpu->pCurInstr->fParam1) != OP_PARM_J); + + /* We push it onto the stack here, so the guest's context isn't ruined when this happens to cause + * a page fault. The assembly code restores the stack afterwards. + */ + offset = 0; + /* include prefix byte to make sure we don't use the incorrect selector register. */ + if (pCpu->fPrefix & DISPREFIX_SEG) + pPB[offset++] = DISQuerySegPrefixByte(pCpu); + + pPB[offset++] = 0xFF; // push r/m32 + pPB[offset++] = MAKE_MODRM(pCpu->ModRM.Bits.Mod, 6 /* group 5 */, pCpu->ModRM.Bits.Rm); + i = 2; /* standard offset of modrm bytes */ + if (pCpu->fPrefix & DISPREFIX_OPSIZE) + i++; //skip operand prefix + if (pCpu->fPrefix & DISPREFIX_SEG) + i++; //skip segment prefix + + rc = patmPatchReadBytes(pVM, &pPB[offset], (RTRCPTR)((RTGCUINTPTR32)pCurInstrGC + i), pCpu->cbInstr - i); + AssertRCReturn(rc, rc); + offset += (pCpu->cbInstr - i); + + /* align this block properly to make sure the jump table will not be misaligned. */ + size = (RTHCUINTPTR)&pPB[offset] & 3; + if (size) + size = 4 - size; + + for (i=0;icbInstr; + callInfo.pTargetGC = 0xDEADBEEF; + size = patmPatchGenCode(pVM, pPatch, pPB, &g_patmJumpIndirectRecord, 0, false, &callInfo); + PATCHGEN_EPILOG(pPatch, size); + + STAM_COUNTER_INC(&pVM->patm.s.StatGenJump); + return VINF_SUCCESS; +} + +/** + * Generate return instruction + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch structure + * @param pCpu Disassembly struct + * @param pCurInstrGC Current instruction pointer + * + */ +int patmPatchGenRet(PVM pVM, PPATCHINFO pPatch, DISCPUSTATE *pCpu, RCPTRTYPE(uint8_t *) pCurInstrGC) +{ + RTRCPTR pPatchRetInstrGC; + + /* Remember start of this patch for below. */ + pPatchRetInstrGC = PATCHCODE_PTR_GC(pPatch) + pPatch->uCurPatchOffset; + + Log(("patmPatchGenRet %RRv\n", pCurInstrGC)); + + /** @note optimization: multiple identical ret instruction in a single patch can share a single patched ret. */ + if ( pPatch->pTempInfo->pPatchRetInstrGC + && pPatch->pTempInfo->uPatchRetParam1 == (uint32_t)pCpu->Param1.uValue) /* nr of bytes popped off the stack should be identical of course! */ + { + Assert(pCpu->pCurInstr->uOpcode == OP_RETN); + STAM_COUNTER_INC(&pVM->patm.s.StatGenRetReused); + + return patmPatchGenPatchJump(pVM, pPatch, pCurInstrGC, pPatch->pTempInfo->pPatchRetInstrGC); + } + + /* Jump back to the original instruction if IF is set again. */ + Assert(!patmFindActivePatchByEntrypoint(pVM, pCurInstrGC)); + int rc = patmPatchGenCheckIF(pVM, pPatch, pCurInstrGC); + AssertRCReturn(rc, rc); + + /* align this block properly to make sure the jump table will not be misaligned. */ + PATCHGEN_PROLOG(pVM, pPatch, 4); + uint32_t size = (RTHCUINTPTR)pPB & 3; + if (size) + size = 4 - size; + + for (uint32_t i = 0; i < size; i++) + pPB[i] = 0x90; /* nop */ + PATCHGEN_EPILOG(pPatch, size); + + PATCHGEN_PROLOG_NODEF(pVM, pPatch, g_patmRetRecord.cbFunction); + size = patmPatchGenCode(pVM, pPatch, pPB, &g_patmRetRecord, 0, false); + PATCHGEN_EPILOG(pPatch, size); + + STAM_COUNTER_INC(&pVM->patm.s.StatGenRet); + /* Duplicate the ret or ret n instruction; it will use the PATM return address */ + rc = patmPatchGenDuplicate(pVM, pPatch, pCpu, pCurInstrGC); + + if (rc == VINF_SUCCESS) + { + pPatch->pTempInfo->pPatchRetInstrGC = pPatchRetInstrGC; + pPatch->pTempInfo->uPatchRetParam1 = pCpu->Param1.uValue; + } + return rc; +} + +/** + * Generate all global patm functions + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch structure + * + */ +int patmPatchGenGlobalFunctions(PVM pVM, PPATCHINFO pPatch) +{ + pVM->patm.s.pfnHelperCallGC = PATCHCODE_PTR_GC(pPatch) + pPatch->uCurPatchOffset; + PATCHGEN_PROLOG(pVM, pPatch, g_patmLookupAndCallRecord.cbFunction); + uint32_t size = patmPatchGenCode(pVM, pPatch, pPB, &g_patmLookupAndCallRecord, 0, false); + PATCHGEN_EPILOG(pPatch, size); + + /* Round to next 8 byte boundary. */ + pPatch->uCurPatchOffset = RT_ALIGN_32(pPatch->uCurPatchOffset, 8); + + pVM->patm.s.pfnHelperRetGC = PATCHCODE_PTR_GC(pPatch) + pPatch->uCurPatchOffset; + PATCHGEN_PROLOG_NODEF(pVM, pPatch, g_patmRetFunctionRecord.cbFunction); + size = patmPatchGenCode(pVM, pPatch, pPB, &g_patmRetFunctionRecord, 0, false); + PATCHGEN_EPILOG(pPatch, size); + + /* Round to next 8 byte boundary. */ + pPatch->uCurPatchOffset = RT_ALIGN_32(pPatch->uCurPatchOffset, 8); + + pVM->patm.s.pfnHelperJumpGC = PATCHCODE_PTR_GC(pPatch) + pPatch->uCurPatchOffset; + PATCHGEN_PROLOG_NODEF(pVM, pPatch, g_patmLookupAndJumpRecord.cbFunction); + size = patmPatchGenCode(pVM, pPatch, pPB, &g_patmLookupAndJumpRecord, 0, false); + PATCHGEN_EPILOG(pPatch, size); + + /* Round to next 8 byte boundary. */ + pPatch->uCurPatchOffset = RT_ALIGN_32(pPatch->uCurPatchOffset, 8); + + pVM->patm.s.pfnHelperIretGC = PATCHCODE_PTR_GC(pPatch) + pPatch->uCurPatchOffset; + PATCHGEN_PROLOG_NODEF(pVM, pPatch, g_patmIretFunctionRecord.cbFunction); + size = patmPatchGenCode(pVM, pPatch, pPB, &g_patmIretFunctionRecord, 0, false); + PATCHGEN_EPILOG(pPatch, size); + + Log(("pfnHelperCallGC %RRv\n", pVM->patm.s.pfnHelperCallGC)); + Log(("pfnHelperRetGC %RRv\n", pVM->patm.s.pfnHelperRetGC)); + Log(("pfnHelperJumpGC %RRv\n", pVM->patm.s.pfnHelperJumpGC)); + Log(("pfnHelperIretGC %RRv\n", pVM->patm.s.pfnHelperIretGC)); + + return VINF_SUCCESS; +} + +/** + * Generate illegal instruction (int 3) + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch structure + * + */ +int patmPatchGenIllegalInstr(PVM pVM, PPATCHINFO pPatch) +{ + PATCHGEN_PROLOG(pVM, pPatch, 1); + + pPB[0] = 0xCC; + + PATCHGEN_EPILOG(pPatch, 1); + return VINF_SUCCESS; +} + +/** + * Check virtual IF flag and jump back to original guest code if set + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch structure + * @param pCurInstrGC Guest context pointer to the current instruction + * + */ +int patmPatchGenCheckIF(PVM pVM, PPATCHINFO pPatch, RTRCPTR pCurInstrGC) +{ + uint32_t size; + + PATCHGEN_PROLOG(pVM, pPatch, g_patmCheckIFRecord.cbFunction); + + /* Add lookup record for patch to guest address translation */ + patmR3AddP2GLookupRecord(pVM, pPatch, pPB, pCurInstrGC, PATM_LOOKUP_PATCH2GUEST); + + /* Generate code to check for IF=1 before executing the call to the duplicated function. */ + size = patmPatchGenCode(pVM, pPatch, pPB, &g_patmCheckIFRecord, pCurInstrGC, true); + + PATCHGEN_EPILOG(pPatch, size); + return VINF_SUCCESS; +} + +/** + * Set PATM interrupt flag + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch structure + * @param pInstrGC Corresponding guest instruction + * + */ +int patmPatchGenSetPIF(PVM pVM, PPATCHINFO pPatch, RTRCPTR pInstrGC) +{ + PATCHGEN_PROLOG(pVM, pPatch, g_patmSetPIFRecord.cbFunction); + + /* Add lookup record for patch to guest address translation */ + patmR3AddP2GLookupRecord(pVM, pPatch, pPB, pInstrGC, PATM_LOOKUP_PATCH2GUEST); + + uint32_t size = patmPatchGenCode(pVM, pPatch, pPB, &g_patmSetPIFRecord, 0, false); + PATCHGEN_EPILOG(pPatch, size); + return VINF_SUCCESS; +} + +/** + * Clear PATM interrupt flag + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch structure + * @param pInstrGC Corresponding guest instruction + * + */ +int patmPatchGenClearPIF(PVM pVM, PPATCHINFO pPatch, RTRCPTR pInstrGC) +{ + PATCHGEN_PROLOG(pVM, pPatch, g_patmSetPIFRecord.cbFunction); + + /* Add lookup record for patch to guest address translation */ + patmR3AddP2GLookupRecord(pVM, pPatch, pPB, pInstrGC, PATM_LOOKUP_PATCH2GUEST); + + uint32_t size = patmPatchGenCode(pVM, pPatch, pPB, &g_patmClearPIFRecord, 0, false); + PATCHGEN_EPILOG(pPatch, size); + return VINF_SUCCESS; +} + + +/** + * Clear PATM inhibit irq flag + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch structure + * @param pNextInstrGC Next guest instruction + */ +int patmPatchGenClearInhibitIRQ(PVM pVM, PPATCHINFO pPatch, RTRCPTR pNextInstrGC) +{ + PATMCALLINFO callInfo; + PCPATCHASMRECORD pPatchAsmRec = pPatch->flags & PATMFL_DUPLICATE_FUNCTION + ? &g_patmClearInhibitIRQContIF0Record : &g_patmClearInhibitIRQFaultIF0Record; + PATCHGEN_PROLOG(pVM, pPatch, pPatchAsmRec->cbFunction); + + Assert((pPatch->flags & (PATMFL_GENERATE_JUMPTOGUEST|PATMFL_DUPLICATE_FUNCTION)) != (PATMFL_GENERATE_JUMPTOGUEST|PATMFL_DUPLICATE_FUNCTION)); + + /* Add lookup record for patch to guest address translation */ + patmR3AddP2GLookupRecord(pVM, pPatch, pPB, pNextInstrGC, PATM_LOOKUP_PATCH2GUEST); + + callInfo.pNextInstrGC = pNextInstrGC; + + uint32_t size = patmPatchGenCode(pVM, pPatch, pPB, pPatchAsmRec, 0, false, &callInfo); + + PATCHGEN_EPILOG(pPatch, size); + return VINF_SUCCESS; +} + +/** + * Generate an interrupt handler entrypoint + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch record + * @param pIntHandlerGC IDT handler address + * + ** @todo must check if virtual IF is already cleared on entry!!!!!!!!!!!!!!!!!!!!!!! + */ +int patmPatchGenIntEntry(PVM pVM, PPATCHINFO pPatch, RTRCPTR pIntHandlerGC) +{ + int rc = VINF_SUCCESS; + + if (!EMIsRawRing1Enabled(pVM)) /* direct passthru of interrupts is not allowed in the ring-1 support case as we can't + deal with the ring-1/2 ambiguity in the patm asm code and we don't need it either as + TRPMForwardTrap takes care of the details. */ + { + uint32_t size; + PCPATCHASMRECORD pPatchAsmRec = pPatch->flags & PATMFL_INTHANDLER_WITH_ERRORCODE + ? &g_patmIntEntryRecordErrorCode : &g_patmIntEntryRecord; + PATCHGEN_PROLOG(pVM, pPatch, pPatchAsmRec->cbFunction); + + /* Add lookup record for patch to guest address translation */ + patmR3AddP2GLookupRecord(pVM, pPatch, pPB, pIntHandlerGC, PATM_LOOKUP_PATCH2GUEST); + + /* Generate entrypoint for the interrupt handler (correcting CS in the interrupt stack frame) */ + size = patmPatchGenCode(pVM, pPatch, pPB, pPatchAsmRec, 0, false); + + PATCHGEN_EPILOG(pPatch, size); + } + + // Interrupt gates set IF to 0 + rc = patmPatchGenCli(pVM, pPatch); + AssertRCReturn(rc, rc); + + return rc; +} + +/** + * Generate a trap handler entrypoint + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch record + * @param pTrapHandlerGC IDT handler address + */ +int patmPatchGenTrapEntry(PVM pVM, PPATCHINFO pPatch, RTRCPTR pTrapHandlerGC) +{ + uint32_t size; + PCPATCHASMRECORD pPatchAsmRec = (pPatch->flags & PATMFL_TRAPHANDLER_WITH_ERRORCODE) + ? &g_patmTrapEntryRecordErrorCode : &g_patmTrapEntryRecord; + + Assert(!EMIsRawRing1Enabled(pVM)); + + PATCHGEN_PROLOG(pVM, pPatch, pPatchAsmRec->cbFunction); + + /* Add lookup record for patch to guest address translation */ + patmR3AddP2GLookupRecord(pVM, pPatch, pPB, pTrapHandlerGC, PATM_LOOKUP_PATCH2GUEST); + + /* Generate entrypoint for the trap handler (correcting CS in the interrupt stack frame) */ + size = patmPatchGenCode(pVM, pPatch, pPB, pPatchAsmRec, pTrapHandlerGC, true); + PATCHGEN_EPILOG(pPatch, size); + + return VINF_SUCCESS; +} + +#ifdef VBOX_WITH_STATISTICS +int patmPatchGenStats(PVM pVM, PPATCHINFO pPatch, RTRCPTR pInstrGC) +{ + uint32_t size; + + PATCHGEN_PROLOG(pVM, pPatch, g_patmStatsRecord.cbFunction); + + /* Add lookup record for stats code -> guest handler. */ + patmR3AddP2GLookupRecord(pVM, pPatch, pPB, pInstrGC, PATM_LOOKUP_PATCH2GUEST); + + /* Generate code to keep calling statistics for this patch */ + size = patmPatchGenCode(pVM, pPatch, pPB, &g_patmStatsRecord, pInstrGC, false); + PATCHGEN_EPILOG(pPatch, size); + + return VINF_SUCCESS; +} +#endif + +/** + * Debug register moves to or from general purpose registers + * mov GPR, DRx + * mov DRx, GPR + * + * @todo: if we ever want to support hardware debug registers natively, then + * this will need to be changed! + */ +int patmPatchGenMovDebug(PVM pVM, PPATCHINFO pPatch, DISCPUSTATE *pCpu) +{ + int rc = VINF_SUCCESS; + unsigned reg, mod, rm, dbgreg; + uint32_t offset; + + PATCHGEN_PROLOG(pVM, pPatch, PATCHGEN_DEF_SIZE); + + mod = 0; //effective address (only) + rm = 5; //disp32 + if (pCpu->pCurInstr->fParam1 == OP_PARM_Dd) + { + Assert(0); // You not come here. Illegal! + + // mov DRx, GPR + pPB[0] = 0x89; //mov disp32, GPR + Assert(pCpu->Param1.fUse & DISUSE_REG_DBG); + Assert(pCpu->Param2.fUse & DISUSE_REG_GEN32); + + dbgreg = pCpu->Param1.Base.idxDbgReg; + reg = pCpu->Param2.Base.idxGenReg; + } + else + { + // mov GPR, DRx + Assert(pCpu->Param1.fUse & DISUSE_REG_GEN32); + Assert(pCpu->Param2.fUse & DISUSE_REG_DBG); + + pPB[0] = 0x8B; // mov GPR, disp32 + reg = pCpu->Param1.Base.idxGenReg; + dbgreg = pCpu->Param2.Base.idxDbgReg; + } + + pPB[1] = MAKE_MODRM(mod, reg, rm); + + AssertReturn(dbgreg <= DISDREG_DR7, VERR_INVALID_PARAMETER); + offset = RT_UOFFSETOF_DYN(CPUMCTX, dr[dbgreg]); + + *(RTRCPTR *)&pPB[2] = pVM->patm.s.pCPUMCtxGC + offset; + patmPatchAddReloc32(pVM, pPatch, &pPB[2], FIXUP_ABSOLUTE); + + PATCHGEN_EPILOG(pPatch, 2 + sizeof(RTRCPTR)); + return rc; +} + +/* + * Control register moves to or from general purpose registers + * mov GPR, CRx + * mov CRx, GPR + */ +int patmPatchGenMovControl(PVM pVM, PPATCHINFO pPatch, DISCPUSTATE *pCpu) +{ + int rc = VINF_SUCCESS; + int reg, mod, rm, ctrlreg; + uint32_t offset; + + PATCHGEN_PROLOG(pVM, pPatch, PATCHGEN_DEF_SIZE); + + mod = 0; //effective address (only) + rm = 5; //disp32 + if (pCpu->pCurInstr->fParam1 == OP_PARM_Cd) + { + Assert(0); // You not come here. Illegal! + + // mov CRx, GPR + pPB[0] = 0x89; //mov disp32, GPR + ctrlreg = pCpu->Param1.Base.idxCtrlReg; + reg = pCpu->Param2.Base.idxGenReg; + Assert(pCpu->Param1.fUse & DISUSE_REG_CR); + Assert(pCpu->Param2.fUse & DISUSE_REG_GEN32); + } + else + { + // mov GPR, CRx + Assert(pCpu->Param1.fUse & DISUSE_REG_GEN32); + Assert(pCpu->Param2.fUse & DISUSE_REG_CR); + + pPB[0] = 0x8B; // mov GPR, disp32 + reg = pCpu->Param1.Base.idxGenReg; + ctrlreg = pCpu->Param2.Base.idxCtrlReg; + } + + pPB[1] = MAKE_MODRM(mod, reg, rm); + + /// @todo make this an array in the context structure + switch (ctrlreg) + { + case DISCREG_CR0: + offset = RT_OFFSETOF(CPUMCTX, cr0); + break; + case DISCREG_CR2: + offset = RT_OFFSETOF(CPUMCTX, cr2); + break; + case DISCREG_CR3: + offset = RT_OFFSETOF(CPUMCTX, cr3); + break; + case DISCREG_CR4: + offset = RT_OFFSETOF(CPUMCTX, cr4); + break; + default: /* Shut up compiler warning. */ + AssertFailed(); + offset = 0; + break; + } + *(RTRCPTR *)&pPB[2] = pVM->patm.s.pCPUMCtxGC + offset; + patmPatchAddReloc32(pVM, pPatch, &pPB[2], FIXUP_ABSOLUTE); + + PATCHGEN_EPILOG(pPatch, 2 + sizeof(RTRCPTR)); + return rc; +} + +/* + * mov GPR, SS + */ +int patmPatchGenMovFromSS(PVM pVM, PPATCHINFO pPatch, DISCPUSTATE *pCpu, RTRCPTR pCurInstrGC) +{ + uint32_t size, offset; + + Log(("patmPatchGenMovFromSS %RRv\n", pCurInstrGC)); RT_NOREF_PV(pCurInstrGC); + + Assert(pPatch->flags & PATMFL_CODE32); + + PATCHGEN_PROLOG(pVM, pPatch, g_patmClearPIFRecord.cbFunction + 2 + g_patmMovFromSSRecord.cbFunction + 2 + g_patmSetPIFRecord.cbFunction); + size = patmPatchGenCode(pVM, pPatch, pPB, &g_patmClearPIFRecord, 0, false); + PATCHGEN_EPILOG(pPatch, size); + + /* push ss */ + PATCHGEN_PROLOG_NODEF(pVM, pPatch, 2); + offset = 0; + if (pCpu->fPrefix & DISPREFIX_OPSIZE) + pPB[offset++] = 0x66; /* size override -> 16 bits push */ + pPB[offset++] = 0x16; + PATCHGEN_EPILOG(pPatch, offset); + + /* checks and corrects RPL of pushed ss*/ + PATCHGEN_PROLOG_NODEF(pVM, pPatch, g_patmMovFromSSRecord.cbFunction); + size = patmPatchGenCode(pVM, pPatch, pPB, &g_patmMovFromSSRecord, 0, false); + PATCHGEN_EPILOG(pPatch, size); + + /* pop general purpose register */ + PATCHGEN_PROLOG_NODEF(pVM, pPatch, 2); + offset = 0; + if (pCpu->fPrefix & DISPREFIX_OPSIZE) + pPB[offset++] = 0x66; /* size override -> 16 bits pop */ + pPB[offset++] = 0x58 + pCpu->Param1.Base.idxGenReg; + PATCHGEN_EPILOG(pPatch, offset); + + + PATCHGEN_PROLOG_NODEF(pVM, pPatch, g_patmSetPIFRecord.cbFunction); + size = patmPatchGenCode(pVM, pPatch, pPB, &g_patmSetPIFRecord, 0, false); + PATCHGEN_EPILOG(pPatch, size); + + return VINF_SUCCESS; +} + + +/** + * Generate an sldt or str patch instruction + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch record + * @param pCpu Disassembly state + * @param pCurInstrGC Guest instruction address + */ +int patmPatchGenSldtStr(PVM pVM, PPATCHINFO pPatch, DISCPUSTATE *pCpu, RTRCPTR pCurInstrGC) +{ + // sldt %Ew + int rc = VINF_SUCCESS; + uint32_t offset = 0; + uint32_t i; + + /** @todo segment prefix (untested) */ + Assert(pCpu->fPrefix == DISPREFIX_NONE || pCpu->fPrefix == DISPREFIX_OPSIZE); + + PATCHGEN_PROLOG(pVM, pPatch, PATCHGEN_DEF_SIZE); + + if (pCpu->Param1.fUse == DISUSE_REG_GEN32 || pCpu->Param1.fUse == DISUSE_REG_GEN16) + { + /* Register operand */ + // 8B 15 [32 bits addr] mov edx, CPUMCTX.tr/ldtr + + if (pCpu->fPrefix == DISPREFIX_OPSIZE) + pPB[offset++] = 0x66; + + pPB[offset++] = 0x8B; // mov destreg, CPUMCTX.tr/ldtr + /* Modify REG part according to destination of original instruction */ + pPB[offset++] = MAKE_MODRM(0, pCpu->Param1.Base.idxGenReg, 5); + if (pCpu->pCurInstr->uOpcode == OP_STR) + { + *(RTRCPTR *)&pPB[offset] = pVM->patm.s.pCPUMCtxGC + RT_OFFSETOF(CPUMCTX, tr); + } + else + { + *(RTRCPTR *)&pPB[offset] = pVM->patm.s.pCPUMCtxGC + RT_OFFSETOF(CPUMCTX, ldtr); + } + patmPatchAddReloc32(pVM, pPatch, &pPB[offset], FIXUP_ABSOLUTE); + offset += sizeof(RTRCPTR); + } + else + { + /* Memory operand */ + //50 push eax + //52 push edx + //8D 15 48 7C 42 00 lea edx, dword ptr [dest] + //66 A1 48 7C 42 00 mov ax, CPUMCTX.tr/ldtr + //66 89 02 mov word ptr [edx],ax + //5A pop edx + //58 pop eax + + pPB[offset++] = 0x50; // push eax + pPB[offset++] = 0x52; // push edx + + if (pCpu->fPrefix == DISPREFIX_SEG) + { + pPB[offset++] = DISQuerySegPrefixByte(pCpu); + } + pPB[offset++] = 0x8D; // lea edx, dword ptr [dest] + // duplicate and modify modrm byte and additional bytes if present (e.g. direct address) + pPB[offset++] = MAKE_MODRM(pCpu->ModRM.Bits.Mod, DISGREG_EDX , pCpu->ModRM.Bits.Rm); + + i = 3; /* standard offset of modrm bytes */ + if (pCpu->fPrefix == DISPREFIX_OPSIZE) + i++; //skip operand prefix + if (pCpu->fPrefix == DISPREFIX_SEG) + i++; //skip segment prefix + + rc = patmPatchReadBytes(pVM, &pPB[offset], (RTRCPTR)((RTGCUINTPTR32)pCurInstrGC + i), pCpu->cbInstr - i); + AssertRCReturn(rc, rc); + offset += (pCpu->cbInstr - i); + + pPB[offset++] = 0x66; // mov ax, CPUMCTX.tr/ldtr + pPB[offset++] = 0xA1; + if (pCpu->pCurInstr->uOpcode == OP_STR) + { + *(RTRCPTR *)&pPB[offset] = pVM->patm.s.pCPUMCtxGC + RT_OFFSETOF(CPUMCTX, tr); + } + else + { + *(RTRCPTR *)&pPB[offset] = pVM->patm.s.pCPUMCtxGC + RT_OFFSETOF(CPUMCTX, ldtr); + } + patmPatchAddReloc32(pVM, pPatch, &pPB[offset], FIXUP_ABSOLUTE); + offset += sizeof(RTRCPTR); + + pPB[offset++] = 0x66; // mov word ptr [edx],ax + pPB[offset++] = 0x89; + pPB[offset++] = 0x02; + + pPB[offset++] = 0x5A; // pop edx + pPB[offset++] = 0x58; // pop eax + } + + PATCHGEN_EPILOG(pPatch, offset); + + return rc; +} + +/** + * Generate an sgdt or sidt patch instruction + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch record + * @param pCpu Disassembly state + * @param pCurInstrGC Guest instruction address + */ +int patmPatchGenSxDT(PVM pVM, PPATCHINFO pPatch, DISCPUSTATE *pCpu, RTRCPTR pCurInstrGC) +{ + int rc = VINF_SUCCESS; + uint32_t offset = 0, offset_base, offset_limit; + uint32_t i; + + /** @todo segment prefix (untested) */ + Assert(pCpu->fPrefix == DISPREFIX_NONE); + + // sgdt %Ms + // sidt %Ms + + switch (pCpu->pCurInstr->uOpcode) + { + case OP_SGDT: + offset_base = RT_OFFSETOF(CPUMCTX, gdtr.pGdt); + offset_limit = RT_OFFSETOF(CPUMCTX, gdtr.cbGdt); + break; + + case OP_SIDT: + offset_base = RT_OFFSETOF(CPUMCTX, idtr.pIdt); + offset_limit = RT_OFFSETOF(CPUMCTX, idtr.cbIdt); + break; + + default: + return VERR_INVALID_PARAMETER; + } + +//50 push eax +//52 push edx +//8D 15 48 7C 42 00 lea edx, dword ptr [dest] +//66 A1 48 7C 42 00 mov ax, CPUMCTX.gdtr.limit +//66 89 02 mov word ptr [edx],ax +//A1 48 7C 42 00 mov eax, CPUMCTX.gdtr.base +//89 42 02 mov dword ptr [edx+2],eax +//5A pop edx +//58 pop eax + + PATCHGEN_PROLOG(pVM, pPatch, PATCHGEN_DEF_SIZE); + pPB[offset++] = 0x50; // push eax + pPB[offset++] = 0x52; // push edx + + if (pCpu->fPrefix == DISPREFIX_SEG) + { + pPB[offset++] = DISQuerySegPrefixByte(pCpu); + } + pPB[offset++] = 0x8D; // lea edx, dword ptr [dest] + // duplicate and modify modrm byte and additional bytes if present (e.g. direct address) + pPB[offset++] = MAKE_MODRM(pCpu->ModRM.Bits.Mod, DISGREG_EDX , pCpu->ModRM.Bits.Rm); + + i = 3; /* standard offset of modrm bytes */ + if (pCpu->fPrefix == DISPREFIX_OPSIZE) + i++; //skip operand prefix + if (pCpu->fPrefix == DISPREFIX_SEG) + i++; //skip segment prefix + rc = patmPatchReadBytes(pVM, &pPB[offset], (RTRCPTR)((RTGCUINTPTR32)pCurInstrGC + i), pCpu->cbInstr - i); + AssertRCReturn(rc, rc); + offset += (pCpu->cbInstr - i); + + pPB[offset++] = 0x66; // mov ax, CPUMCTX.gdtr.limit + pPB[offset++] = 0xA1; + *(RTRCPTR *)&pPB[offset] = pVM->patm.s.pCPUMCtxGC + offset_limit; + patmPatchAddReloc32(pVM, pPatch, &pPB[offset], FIXUP_ABSOLUTE); + offset += sizeof(RTRCPTR); + + pPB[offset++] = 0x66; // mov word ptr [edx],ax + pPB[offset++] = 0x89; + pPB[offset++] = 0x02; + + pPB[offset++] = 0xA1; // mov eax, CPUMCTX.gdtr.base + *(RTRCPTR *)&pPB[offset] = pVM->patm.s.pCPUMCtxGC + offset_base; + patmPatchAddReloc32(pVM, pPatch, &pPB[offset], FIXUP_ABSOLUTE); + offset += sizeof(RTRCPTR); + + pPB[offset++] = 0x89; // mov dword ptr [edx+2],eax + pPB[offset++] = 0x42; + pPB[offset++] = 0x02; + + pPB[offset++] = 0x5A; // pop edx + pPB[offset++] = 0x58; // pop eax + + PATCHGEN_EPILOG(pPatch, offset); + + return rc; +} + +/** + * Generate a cpuid patch instruction + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch record + * @param pCurInstrGC Guest instruction address + */ +int patmPatchGenCpuid(PVM pVM, PPATCHINFO pPatch, RTRCPTR pCurInstrGC) +{ + uint32_t size; + PATCHGEN_PROLOG(pVM, pPatch, g_patmCpuidRecord.cbFunction); + + size = patmPatchGenCode(pVM, pPatch, pPB, &g_patmCpuidRecord, 0, false); + + PATCHGEN_EPILOG(pPatch, size); + NOREF(pCurInstrGC); + return VINF_SUCCESS; +} + +/** + * Generate the jump from guest to patch code + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch record + * @param pReturnAddrGC Guest code target of the jump. + * @param fClearInhibitIRQs Clear inhibit irq flag + */ +int patmPatchGenJumpToGuest(PVM pVM, PPATCHINFO pPatch, RCPTRTYPE(uint8_t *) pReturnAddrGC, bool fClearInhibitIRQs) +{ + int rc = VINF_SUCCESS; + uint32_t size; + + if (fClearInhibitIRQs) + { + rc = patmPatchGenClearInhibitIRQ(pVM, pPatch, pReturnAddrGC); + if (rc == VERR_NO_MEMORY) + return rc; + AssertRCReturn(rc, rc); + } + + PATCHGEN_PROLOG(pVM, pPatch, PATMJumpToGuest_IF1Record.cbFunction); + + /* Add lookup record for patch to guest address translation */ + patmR3AddP2GLookupRecord(pVM, pPatch, pPB, pReturnAddrGC, PATM_LOOKUP_PATCH2GUEST); + + /* Generate code to jump to guest code if IF=1, else fault. */ + size = patmPatchGenCode(pVM, pPatch, pPB, &PATMJumpToGuest_IF1Record, pReturnAddrGC, true); + PATCHGEN_EPILOG(pPatch, size); + + return rc; +} + +/* + * Relative jump from patch code to patch code (no fixup required) + */ +int patmPatchGenPatchJump(PVM pVM, PPATCHINFO pPatch, RTRCPTR pCurInstrGC, RCPTRTYPE(uint8_t *) pPatchAddrGC, bool fAddLookupRecord) +{ + int32_t displ; + int rc = VINF_SUCCESS; + + Assert(PATMIsPatchGCAddr(pVM, pPatchAddrGC)); + PATCHGEN_PROLOG(pVM, pPatch, SIZEOF_NEARJUMP32); + + if (fAddLookupRecord) + { + /* Add lookup record for patch to guest address translation */ + patmR3AddP2GLookupRecord(pVM, pPatch, pPB, pCurInstrGC, PATM_LOOKUP_PATCH2GUEST); + } + + pPB[0] = 0xE9; //JMP + + displ = pPatchAddrGC - (PATCHCODE_PTR_GC(pPatch) + pPatch->uCurPatchOffset + SIZEOF_NEARJUMP32); + + *(uint32_t *)&pPB[1] = displ; + + PATCHGEN_EPILOG(pPatch, SIZEOF_NEARJUMP32); + + return rc; +} diff --git a/src/VBox/VMM/VMMR3/PATMPatch.h b/src/VBox/VMM/VMMR3/PATMPatch.h new file mode 100644 index 00000000..d5f9c2d7 --- /dev/null +++ b/src/VBox/VMM/VMMR3/PATMPatch.h @@ -0,0 +1,156 @@ +/* $Id: PATMPatch.h $ */ +/** @file + * PATMPatch - Internal header file. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_VMMR3_PATMPatch_h +#define VMM_INCLUDED_SRC_VMMR3_PATMPatch_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +int patmPatchAddReloc32(PVM pVM, PPATCHINFO pPatch, uint8_t *pRelocHC, uint32_t uType, RTRCPTR pSource = 0, RTRCPTR pDest = 0); +int patmPatchAddJump(PVM pVM, PPATCHINFO pPatch, uint8_t *pJumpHC, uint32_t offset, RTRCPTR pTargetGC, uint32_t opcode); + +int patmPatchGenCpuid(PVM pVM, PPATCHINFO pPatch, RTRCPTR pCurInstrGC); +int patmPatchGenSxDT(PVM pVM, PPATCHINFO pPatch, DISCPUSTATE *pCpu, RTRCPTR pCurInstrGC); +int patmPatchGenSldtStr(PVM pVM, PPATCHINFO pPatch, DISCPUSTATE *pCpu, RTRCPTR pCurInstrGC); +int patmPatchGenMovControl(PVM pVM, PPATCHINFO pPatch, DISCPUSTATE *pCpu); +int patmPatchGenMovDebug(PVM pVM, PPATCHINFO pPatch, DISCPUSTATE *pCpu); +int patmPatchGenMovFromSS(PVM pVM, PPATCHINFO pPatch, DISCPUSTATE *pCpu, RTRCPTR pCurInstrGC); +int patmPatchGenRelJump(PVM pVM, PPATCHINFO pPatch, RCPTRTYPE(uint8_t *) pTargetGC, uint32_t opcode, bool fSizeOverride); +int patmPatchGenLoop(PVM pVM, PPATCHINFO pPatch, RCPTRTYPE(uint8_t *) pTargetGC, uint32_t opcode, bool fSizeOverride); +int patmPatchGenPushf(PVM pVM, PPATCHINFO pPatch, bool fSizeOverride); +int patmPatchGenPopf(PVM pVM, PPATCHINFO pPatch, RCPTRTYPE(uint8_t *) pReturnAddrGC, bool fSizeOverride, bool fGenJumpBack); +int patmPatchGenSti(PVM pVM, PPATCHINFO pPatch, RTRCPTR pCurInstrGC, RTRCPTR pNextInstrGC); + +int patmPatchGenCli(PVM pVM, PPATCHINFO pPatch); +int patmPatchGenIret(PVM pVM, PPATCHINFO pPatch, RTRCPTR pCurInstrGC, bool fSizeOverride); +int patmPatchGenDuplicate(PVM pVM, PPATCHINFO pPatch, DISCPUSTATE *pCpu, RCPTRTYPE(uint8_t *) pCurInstrGC); +int patmPatchGenPushCS(PVM pVM, PPATCHINFO pPatch); + +int patmPatchGenStats(PVM pVM, PPATCHINFO pPatch, RTRCPTR pInstrGC); + +int patmPatchGenCall(PVM pVM, PPATCHINFO pPatch, DISCPUSTATE *pCpu, RTRCPTR pInstrGC, RTRCPTR pTargetGC, bool fIndirect); +int patmPatchGenRet(PVM pVM, PPATCHINFO pPatch, DISCPUSTATE *pCpu, RCPTRTYPE(uint8_t *) pCurInstrGC); + +int patmPatchGenPatchJump(PVM pVM, PPATCHINFO pPatch, RTRCPTR pCurInstrGC, RCPTRTYPE(uint8_t *) pPatchAddrGC, bool fAddLookupRecord = true); + +/** + * Generate indirect jump to unknown destination + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch record + * @param pCpu Disassembly state + * @param pCurInstrGC Current instruction address + */ +int patmPatchGenJump(PVM pVM, PPATCHINFO pPatch, DISCPUSTATE *pCpu, RTRCPTR pCurInstrGC); + +/** + * Generate a trap handler entrypoint + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch record + * @param pTrapHandlerGC IDT handler address + */ +int patmPatchGenTrapEntry(PVM pVM, PPATCHINFO pPatch, RTRCPTR pTrapHandlerGC); + +/** + * Generate an interrupt handler entrypoint + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch record + * @param pIntHandlerGC IDT handler address + */ +int patmPatchGenIntEntry(PVM pVM, PPATCHINFO pPatch, RTRCPTR pIntHandlerGC); + +/** + * Generate the jump from guest to patch code + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch record. + * @param pReturnAddrGC Guest code target of the jump. + * @param fClearInhibitIRQs Clear inhibit irq flag. + */ +int patmPatchGenJumpToGuest(PVM pVM, PPATCHINFO pPatch, RCPTRTYPE(uint8_t *) pReturnAddrGC, bool fClearInhibitIRQs = false); + +/** + * Generate illegal instruction (int 3) + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch structure + * + */ +int patmPatchGenIllegalInstr(PVM pVM, PPATCHINFO pPatch); + +/** + * Set PATM interrupt flag + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch structure + * @param pInstrGC Corresponding guest instruction + * + */ +int patmPatchGenSetPIF(PVM pVM, PPATCHINFO pPatch, RTRCPTR pInstrGC); + +/** + * Clear PATM interrupt flag + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch structure + * @param pInstrGC Corresponding guest instruction + * + */ +int patmPatchGenClearPIF(PVM pVM, PPATCHINFO pPatch, RTRCPTR pInstrGC); + +/** + * Clear PATM inhibit irq flag + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch structure + * @param pNextInstrGC Next guest instruction + */ +int patmPatchGenClearInhibitIRQ(PVM pVM, PPATCHINFO pPatch, RTRCPTR pNextInstrGC); + +/** + * Check virtual IF flag and jump back to original guest code if set + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch structure + * @param pCurInstrGC Guest context pointer to the current instruction + * + */ +int patmPatchGenCheckIF(PVM pVM, PPATCHINFO pPatch, RTRCPTR pCurInstrGC); + +/** + * Generate all global patm functions + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatch Patch structure + * + */ +int patmPatchGenGlobalFunctions(PVM pVM, PPATCHINFO pPatch); + +#endif /* !VMM_INCLUDED_SRC_VMMR3_PATMPatch_h */ diff --git a/src/VBox/VMM/VMMR3/PATMR3Dbg.cpp b/src/VBox/VMM/VMMR3/PATMR3Dbg.cpp new file mode 100644 index 00000000..a96f5d6e --- /dev/null +++ b/src/VBox/VMM/VMMR3/PATMR3Dbg.cpp @@ -0,0 +1,404 @@ +/* $Id: PATMR3Dbg.cpp $ */ +/** @file + * PATM - Dynamic Guest OS Patching Manager, Debugger Related Parts. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PATM +#include +#include +#include +#include "PATMInternal.h" +#include "PATMA.h" +#include +#include +#include + +#include +#include +#include + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** Adds a structure member to a debug (pseudo) module as a symbol. */ +#define ADD_MEMBER(a_hDbgMod, a_Struct, a_Member, a_pszName) \ + do { \ + rc = RTDbgModSymbolAdd(hDbgMod, a_pszName, 0 /*iSeg*/, RT_OFFSETOF(a_Struct, a_Member), \ + RT_SIZEOFMEMB(a_Struct, a_Member), 0 /*fFlags*/, NULL /*piOrdinal*/); \ + AssertRC(rc); \ + } while (0) + +/** Adds a structure member to a debug (pseudo) module as a symbol. */ +#define ADD_FUNC(a_hDbgMod, a_BaseRCPtr, a_FuncRCPtr, a_cbFunc, a_pszName) \ + do { \ + int rcAddFunc = RTDbgModSymbolAdd(hDbgMod, a_pszName, 0 /*iSeg*/, \ + (RTRCUINTPTR)a_FuncRCPtr - (RTRCUINTPTR)(a_BaseRCPtr), \ + a_cbFunc, 0 /*fFlags*/, NULL /*piOrdinal*/); \ + AssertRC(rcAddFunc); \ + } while (0) + + + +/** + * Called by PATMR3Init. + * + * @param pVM The cross context VM structure. + */ +void patmR3DbgInit(PVM pVM) +{ + pVM->patm.s.hDbgModPatchMem = NIL_RTDBGMOD; +} + + +/** + * Called by PATMR3Term. + * + * @param pVM The cross context VM structure. + */ +void patmR3DbgTerm(PVM pVM) +{ + if (pVM->patm.s.hDbgModPatchMem != NIL_RTDBGMOD) + { + RTDbgModRelease(pVM->patm.s.hDbgModPatchMem); + pVM->patm.s.hDbgModPatchMem = NIL_RTDBGMOD; + } +} + + +/** + * Called by when the patch memory is reinitialized. + * + * @param pVM The cross context VM structure. + */ +void patmR3DbgReset(PVM pVM) +{ + if (pVM->patm.s.hDbgModPatchMem != NIL_RTDBGMOD) + { + RTDbgModRemoveAll(pVM->patm.s.hDbgModPatchMem, true); + } +} + + +static size_t patmR3DbgDescribePatchAsSymbol(PPATMPATCHREC pPatchRec, char *pszName, size_t cbLeft) +{ + char * const pszNameStart = pszName; +#define ADD_SZ(a_sz) \ + do { \ + if (cbLeft >= sizeof(a_sz)) \ + { \ + memcpy(pszName, a_sz, sizeof(a_sz)); \ + pszName += sizeof(a_sz) - 1; \ + cbLeft -= sizeof(a_sz) - 1;\ + }\ + } while (0) + + /* Start the name off with the address of the guest code. */ + size_t cch = RTStrPrintf(pszName, cbLeft, "Patch_%#08x", pPatchRec->patch.pPrivInstrGC); + cbLeft -= cch; + pszName += cch; + + /* Append flags. */ + uint64_t fFlags = pPatchRec->patch.flags; + if (fFlags & PATMFL_INTHANDLER) + ADD_SZ("_IntHandler"); + if (fFlags & PATMFL_SYSENTER) + ADD_SZ("_SysEnter"); + if (fFlags & PATMFL_GUEST_SPECIFIC) + ADD_SZ("_GuestSpecific"); + if (fFlags & PATMFL_USER_MODE) + ADD_SZ("_UserMode"); + if (fFlags & PATMFL_IDTHANDLER) + ADD_SZ("_IdtHnd"); + if (fFlags & PATMFL_TRAPHANDLER) + ADD_SZ("_TrapHnd"); + if (fFlags & PATMFL_DUPLICATE_FUNCTION) + ADD_SZ("_DupFunc"); + if (fFlags & PATMFL_REPLACE_FUNCTION_CALL) + ADD_SZ("_ReplFunc"); + if (fFlags & PATMFL_TRAPHANDLER_WITH_ERRORCODE) + ADD_SZ("_TrapHndErrCd"); + if (fFlags & PATMFL_MMIO_ACCESS) + ADD_SZ("_MmioAccess"); + if (fFlags & PATMFL_SYSENTER_XP) + ADD_SZ("_SysEnterXP"); + if (fFlags & PATMFL_INT3_REPLACEMENT) + ADD_SZ("_Int3Repl"); + if (fFlags & PATMFL_SUPPORT_CALLS) + ADD_SZ("_SupCalls"); + if (fFlags & PATMFL_SUPPORT_INDIRECT_CALLS) + ADD_SZ("_SupIndirCalls"); + if (fFlags & PATMFL_IDTHANDLER_WITHOUT_ENTRYPOINT) + ADD_SZ("_IdtHandlerWE"); + if (fFlags & PATMFL_INHIBIT_IRQS) + ADD_SZ("_InhibitIrqs"); + if (fFlags & PATMFL_RECOMPILE_NEXT) + ADD_SZ("_RecompileNext"); + if (fFlags & PATMFL_CALLABLE_AS_FUNCTION) + ADD_SZ("_Callable"); + if (fFlags & PATMFL_TRAMPOLINE) + ADD_SZ("_Trampoline"); + if (fFlags & PATMFL_PATCHED_GUEST_CODE) + ADD_SZ("_PatchedGuestCode"); + if (fFlags & PATMFL_MUST_INSTALL_PATCHJMP) + ADD_SZ("_MustInstallPatchJmp"); + if (fFlags & PATMFL_INT3_REPLACEMENT_BLOCK) + ADD_SZ("_Int3ReplBlock"); + if (fFlags & PATMFL_EXTERNAL_JUMP_INSIDE) + ADD_SZ("_ExtJmp"); + if (fFlags & PATMFL_CODE_REFERENCED) + ADD_SZ("_CodeRefed"); + + return pszName - pszNameStart; +} + + +/** + * Called when a new patch is added or when first populating the address space. + * + * @param pVM The cross context VM structure. + * @param pPatchRec The patch record. + */ +void patmR3DbgAddPatch(PVM pVM, PPATMPATCHREC pPatchRec) +{ + if ( pVM->patm.s.hDbgModPatchMem != NIL_RTDBGMOD + && pPatchRec->patch.pPatchBlockOffset > 0 + && !(pPatchRec->patch.flags & PATMFL_GLOBAL_FUNCTIONS)) + { + /** @todo find a cheap way of checking whether we've already added the patch. + * Using a flag would be nice, except I don't want to consider saved + * state considerations right now (I don't recall if we're still + * depending on structure layout there or not). */ + char szName[256]; + size_t off = patmR3DbgDescribePatchAsSymbol(pPatchRec, szName, sizeof(szName)); + + /* If we have a symbol near the guest address, append that. */ + if (off + 8 <= sizeof(szName)) + { + RTDBGSYMBOL Symbol; + RTGCINTPTR offDisp; + DBGFADDRESS Addr; + + int rc = DBGFR3AsSymbolByAddr(pVM->pUVM, DBGF_AS_GLOBAL, + DBGFR3AddrFromFlat(pVM->pUVM, &Addr, pPatchRec->patch.pPrivInstrGC), + RTDBGSYMADDR_FLAGS_LESS_OR_EQUAL | RTDBGSYMADDR_FLAGS_SKIP_ABS_IN_DEFERRED, + &offDisp, &Symbol, NULL /*phMod*/); + if (RT_SUCCESS(rc)) + { + szName[off++] = '_'; + szName[off++] = '_'; + RTStrCopy(&szName[off], sizeof(szName) - off, Symbol.szName); + } + } + + /* Add it (may fail due to enable/disable patches). */ + RTDbgModSymbolAdd(pVM->patm.s.hDbgModPatchMem, szName, 0 /*iSeg*/, + pPatchRec->patch.pPatchBlockOffset, + pPatchRec->patch.cbPatchBlockSize, + 0 /*fFlags*/, NULL /*piOrdinal*/); + } +} + + +/** + * Enumeration callback used by patmR3DbgAddPatches + * + * @returns 0 (continue enum) + * @param pNode The patch record node. + * @param pvUser The cross context VM structure. + */ +static DECLCALLBACK(int) patmR3DbgAddPatchCallback(PAVLOU32NODECORE pNode, void *pvUser) +{ + patmR3DbgAddPatch((PVM)pvUser, (PPATMPATCHREC)pNode); + return 0; +} + + +/** + * Populates an empty "patches" (hDbgModPatchMem) module with patch symbols. + * + * @param pVM The cross context VM structure. + * @param hDbgMod The debug module handle. + */ +static void patmR3DbgAddPatches(PVM pVM, RTDBGMOD hDbgMod) +{ + /* + * Global functions and a start marker. + */ + ADD_FUNC(hDbgMod, pVM->patm.s.pPatchMemGC, pVM->patm.s.pfnHelperCallGC, g_patmLookupAndCallRecord.cbFunction, "PATMLookupAndCall"); + ADD_FUNC(hDbgMod, pVM->patm.s.pPatchMemGC, pVM->patm.s.pfnHelperRetGC, g_patmRetFunctionRecord.cbFunction, "PATMRetFunction"); + ADD_FUNC(hDbgMod, pVM->patm.s.pPatchMemGC, pVM->patm.s.pfnHelperJumpGC, g_patmLookupAndJumpRecord.cbFunction, "PATMLookupAndJump"); + ADD_FUNC(hDbgMod, pVM->patm.s.pPatchMemGC, pVM->patm.s.pfnHelperIretGC, g_patmIretFunctionRecord.cbFunction, "PATMIretFunction"); + + ADD_FUNC(hDbgMod, pVM->patm.s.pPatchMemGC, pVM->patm.s.pPatchMemGC, 0, "PatchMemStart"); + ADD_FUNC(hDbgMod, pVM->patm.s.pPatchMemGC, pVM->patm.s.pGCStackGC, PATM_STACK_TOTAL_SIZE, "PATMStack"); + + /* + * The patches. + */ + RTAvloU32DoWithAll(&pVM->patm.s.PatchLookupTreeHC->PatchTree, true /*fFromLeft*/, patmR3DbgAddPatchCallback, pVM); +} + + +/** + * Populate DBGF_AS_RC with PATM symbols. + * + * Called by dbgfR3AsLazyPopulate when DBGF_AS_RC or DBGF_AS_RC_AND_GC_GLOBAL is + * accessed for the first time. + * + * @param pVM The cross context VM structure. + * @param hDbgAs The DBGF_AS_RC address space handle. + */ +VMMR3_INT_DECL(void) PATMR3DbgPopulateAddrSpace(PVM pVM, RTDBGAS hDbgAs) +{ + AssertReturnVoid(VM_IS_RAW_MODE_ENABLED(pVM)); + + /* + * Add a fake debug module for the PATMGCSTATE structure. + */ + RTDBGMOD hDbgMod; + int rc = RTDbgModCreate(&hDbgMod, "patmgcstate", sizeof(PATMGCSTATE), 0 /*fFlags*/); + if (RT_SUCCESS(rc)) + { + ADD_MEMBER(hDbgMod, PATMGCSTATE, uVMFlags, "uVMFlags"); + ADD_MEMBER(hDbgMod, PATMGCSTATE, uPendingAction, "uPendingAction"); + ADD_MEMBER(hDbgMod, PATMGCSTATE, uPatchCalls, "uPatchCalls"); + ADD_MEMBER(hDbgMod, PATMGCSTATE, uScratch, "uScratch"); + ADD_MEMBER(hDbgMod, PATMGCSTATE, uIretEFlags, "uIretEFlags"); + ADD_MEMBER(hDbgMod, PATMGCSTATE, uIretCS, "uIretCS"); + ADD_MEMBER(hDbgMod, PATMGCSTATE, uIretEIP, "uIretEIP"); + ADD_MEMBER(hDbgMod, PATMGCSTATE, Psp, "Psp"); + ADD_MEMBER(hDbgMod, PATMGCSTATE, fPIF, "fPIF"); + ADD_MEMBER(hDbgMod, PATMGCSTATE, GCPtrInhibitInterrupts, "GCPtrInhibitInterrupts"); + ADD_MEMBER(hDbgMod, PATMGCSTATE, GCCallPatchTargetAddr, "GCCallPatchTargetAddr"); + ADD_MEMBER(hDbgMod, PATMGCSTATE, GCCallReturnAddr, "GCCallReturnAddr"); + ADD_MEMBER(hDbgMod, PATMGCSTATE, Restore.uEAX, "Restore.uEAX"); + ADD_MEMBER(hDbgMod, PATMGCSTATE, Restore.uECX, "Restore.uECX"); + ADD_MEMBER(hDbgMod, PATMGCSTATE, Restore.uEDI, "Restore.uEDI"); + ADD_MEMBER(hDbgMod, PATMGCSTATE, Restore.eFlags, "Restore.eFlags"); + ADD_MEMBER(hDbgMod, PATMGCSTATE, Restore.uFlags, "Restore.uFlags"); + + rc = RTDbgAsModuleLink(hDbgAs, hDbgMod, pVM->patm.s.pGCStateGC, 0 /*fFlags*/); + AssertLogRelRC(rc); + RTDbgModRelease(hDbgMod); + } + + /* + * Add something for the stats so we get some kind of symbols for + * references to them while disassembling patches. + */ + rc = RTDbgModCreate(&hDbgMod, "patmstats", PATM_STAT_MEMSIZE, 0 /*fFlags*/); + if (RT_SUCCESS(rc)) + { + ADD_FUNC(hDbgMod, pVM->patm.s.pStatsGC, pVM->patm.s.pStatsGC, PATM_STAT_MEMSIZE, "PATMMemStatsStart"); + + rc = RTDbgAsModuleLink(hDbgAs, hDbgMod, pVM->patm.s.pStatsGC, 0 /*fFlags*/); + AssertLogRelRC(rc); + RTDbgModRelease(hDbgMod); + } + + /* + * Add a fake debug module for the patches and stack. + */ + rc = RTDbgModCreate(&hDbgMod, "patches", pVM->patm.s.cbPatchMem + PATM_STACK_TOTAL_SIZE + PAGE_SIZE, 0 /*fFlags*/); + if (RT_SUCCESS(rc)) + { + pVM->patm.s.hDbgModPatchMem = hDbgMod; + patmR3DbgAddPatches(pVM, hDbgMod); + + rc = RTDbgAsModuleLink(hDbgAs, hDbgMod, pVM->patm.s.pPatchMemGC, 0 /*fFlags*/); + AssertLogRelRC(rc); + } +} + + +/** + * Annotates an instruction if patched. + * + * @param pVM The cross context VM structure. + * @param RCPtr The instruction address. + * @param cbInstr The instruction length. + * @param pszBuf The output buffer. This will be an empty string if the + * instruction wasn't patched. If it's patched, it will + * hold a symbol-like string describing the patch. + * @param cbBuf The size of the output buffer. + */ +VMMR3_INT_DECL(void) PATMR3DbgAnnotatePatchedInstruction(PVM pVM, RTRCPTR RCPtr, uint8_t cbInstr, char *pszBuf, size_t cbBuf) +{ + /* + * Always zero the buffer. + */ + AssertReturnVoid(cbBuf > 0); + *pszBuf = '\0'; + + /* + * Drop out immediately if it cannot be a patched instruction. + */ + if (!PATMIsEnabled(pVM)) + return; + if ( RCPtr < pVM->patm.s.pPatchedInstrGCLowest + || RCPtr > pVM->patm.s.pPatchedInstrGCHighest) + return; + + /* + * Look for a patch record covering any part of the instruction. + * + * The first query results in a patched less or equal to RCPtr. While the + * second results in one that's greater than RCPtr. + */ + PPATMPATCHREC pPatchRec; + pPatchRec = (PPATMPATCHREC)RTAvloU32GetBestFit(&pVM->patm.s.PatchLookupTreeHC->PatchTree, RCPtr, false /*fFromAbove*/); + if ( !pPatchRec + || RCPtr - pPatchRec->patch.pPrivInstrGC > pPatchRec->patch.cbPrivInstr) + { + pPatchRec = (PPATMPATCHREC)RTAvloU32GetBestFit(&pVM->patm.s.PatchLookupTreeHC->PatchTree, RCPtr, true /*fFromAbove*/); + if ( !pPatchRec + || (RTRCPTR)(RCPtr + cbInstr) < pPatchRec->patch.pPrivInstrGC ) + return; + } + + /* + * Lazy bird uses the symbol name generation code for describing the patch. + */ + size_t off = patmR3DbgDescribePatchAsSymbol(pPatchRec, pszBuf, cbBuf); + if (off + 1 < cbBuf) + { + const char *pszState; + switch (pPatchRec->patch.uState) + { + case PATCH_REFUSED: pszState = "Refused"; break; + case PATCH_DISABLED: pszState = "Disabled"; break; + case PATCH_ENABLED: pszState = "Enabled"; break; + case PATCH_UNUSABLE: pszState = "Unusable"; break; + case PATCH_DIRTY: pszState = "Dirty"; break; + case PATCH_DISABLE_PENDING: pszState = "DisablePending"; break; + default: pszState = "State???"; AssertFailed(); break; + } + + if (pPatchRec->patch.cbPatchBlockSize > 0) + off += RTStrPrintf(&pszBuf[off], cbBuf - off, " - %s (%u b) - %#x LB %#x", + pszState, pPatchRec->patch.cbPatchJump, + pPatchRec->patch.pPatchBlockOffset + pVM->patm.s.pPatchMemGC, + pPatchRec->patch.cbPatchBlockSize); + else + off += RTStrPrintf(&pszBuf[off], cbBuf - off, " - %s (%u b)", pszState, pPatchRec->patch.cbPatchJump); + } + +} + diff --git a/src/VBox/VMM/VMMR3/PATMSSM.cpp b/src/VBox/VMM/VMMR3/PATMSSM.cpp new file mode 100644 index 00000000..32aa5044 --- /dev/null +++ b/src/VBox/VMM/VMMR3/PATMSSM.cpp @@ -0,0 +1,1549 @@ +/* $Id: PATMSSM.cpp $ */ +/** @file + * PATMSSM - Dynamic Guest OS Patching Manager; Save and load state + * + * NOTE: CSAM assumes patch memory is never reused!! + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PATM +#include +#include +#include +#include +#include +#include +#include +#include +#include "PATMInternal.h" +#include "PATMPatch.h" +#include "PATMA.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * Patch information - SSM version. + * + * the difference is the missing pTrampolinePatchesHead member + * to avoid changing the saved state version for now (will come later). + */ +typedef struct PATCHINFOSSM +{ + uint32_t uState; + uint32_t uOldState; + DISCPUMODE uOpMode; + + /* GC pointer of privileged instruction */ + RCPTRTYPE(uint8_t *) pPrivInstrGC; + R3PTRTYPE(uint8_t *) unusedHC; /** @todo Can't remove due to structure size dependencies in saved states. */ + uint8_t aPrivInstr[MAX_INSTR_SIZE]; + uint32_t cbPrivInstr; + uint32_t opcode; //opcode for priv instr (OP_*) + uint32_t cbPatchJump; //patch jump size + + /* Only valid for PATMFL_JUMP_CONFLICT patches */ + RTRCPTR pPatchJumpDestGC; + + RTGCUINTPTR32 pPatchBlockOffset; + uint32_t cbPatchBlockSize; + uint32_t uCurPatchOffset; +#if HC_ARCH_BITS == 64 + uint32_t Alignment0; /**< Align flags correctly. */ +#endif + + uint64_t flags; + + /** + * Lowest and highest patched GC instruction address. To optimize searches. + */ + RTRCPTR pInstrGCLowest; + RTRCPTR pInstrGCHighest; + + /* Tree of fixup records for the patch. */ + R3PTRTYPE(PAVLPVNODECORE) FixupTree; + uint32_t nrFixups; + + /* Tree of jumps inside the generated patch code. */ + uint32_t nrJumpRecs; + R3PTRTYPE(PAVLPVNODECORE) JumpTree; + + /** + * Lookup trees for determining the corresponding guest address of an + * instruction in the patch block. + */ + R3PTRTYPE(PAVLU32NODECORE) Patch2GuestAddrTree; + R3PTRTYPE(PAVLU32NODECORE) Guest2PatchAddrTree; + uint32_t nrPatch2GuestRecs; +#if HC_ARCH_BITS == 64 + uint32_t Alignment1; +#endif + + /* Unused, but can't remove due to structure size dependencies in the saved state. */ + PATMP2GLOOKUPREC_OBSOLETE unused; + + /* Temporary information during patch creation. Don't waste hypervisor memory for this. */ + R3PTRTYPE(PPATCHINFOTEMP) pTempInfo; + + /* Count the number of writes to the corresponding guest code. */ + uint32_t cCodeWrites; + + /* Count the number of invalid writes to pages monitored for the patch. */ + //some statistics to determine if we should keep this patch activated + uint32_t cTraps; + + uint32_t cInvalidWrites; + + // Index into the uPatchRun and uPatchTrap arrays (0..MAX_PATCHES-1) + uint32_t uPatchIdx; + + /* First opcode byte, that's overwritten when a patch is marked dirty. */ + uint8_t bDirtyOpcode; + uint8_t Alignment2[7]; /**< Align the structure size on a 8-byte boundary. */ +} PATCHINFOSSM, *PPATCHINFOSSM; + +/** + * Lookup record for patches - SSM version. + */ +typedef struct PATMPATCHRECSSM +{ + /** The key is a GC virtual address. */ + AVLOU32NODECORE Core; + /** The key is a patch offset. */ + AVLOU32NODECORE CoreOffset; + + PATCHINFOSSM patch; +} PATMPATCHRECSSM, *PPATMPATCHRECSSM; + + +/** + * Callback arguments. + */ +typedef struct PATMCALLBACKARGS +{ + PVM pVM; + PSSMHANDLE pSSM; + PPATMPATCHREC pPatchRec; +} PATMCALLBACKARGS; +typedef PATMCALLBACKARGS *PPATMCALLBACKARGS; + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static int patmCorrectFixup(PVM pVM, unsigned ulSSMVersion, PATM &patmInfo, PPATCHINFO pPatch, PRELOCREC pRec, + int32_t offset, RTRCPTR *pFixup); + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +/** + * SSM descriptor table for the PATM structure. + */ +static SSMFIELD const g_aPatmFields[] = +{ + /** @todo there are a bunch more fields here which can be marked as ignored. */ + SSMFIELD_ENTRY_IGNORE( PATM, offVM), + SSMFIELD_ENTRY_RCPTR( PATM, pPatchMemGC), + SSMFIELD_ENTRY_IGN_HCPTR( PATM, pPatchMemHC), + SSMFIELD_ENTRY( PATM, cbPatchMem), + SSMFIELD_ENTRY( PATM, offPatchMem), + SSMFIELD_ENTRY( PATM, fOutOfMemory), + SSMFIELD_ENTRY_PAD_HC_AUTO( 3, 3), + SSMFIELD_ENTRY( PATM, deltaReloc), + SSMFIELD_ENTRY_IGN_HCPTR( PATM, pGCStateHC), + SSMFIELD_ENTRY_RCPTR( PATM, pGCStateGC), + SSMFIELD_ENTRY_RCPTR( PATM, pGCStackGC), + SSMFIELD_ENTRY_IGN_HCPTR( PATM, pGCStackHC), + SSMFIELD_ENTRY_RCPTR( PATM, pCPUMCtxGC), + SSMFIELD_ENTRY_RCPTR( PATM, pStatsGC), + SSMFIELD_ENTRY_IGN_HCPTR( PATM, pStatsHC), + SSMFIELD_ENTRY( PATM, uCurrentPatchIdx), + SSMFIELD_ENTRY( PATM, ulCallDepth), + SSMFIELD_ENTRY( PATM, cPageRecords), + SSMFIELD_ENTRY_RCPTR( PATM, pPatchedInstrGCLowest), + SSMFIELD_ENTRY_RCPTR( PATM, pPatchedInstrGCHighest), + SSMFIELD_ENTRY_RCPTR( PATM, PatchLookupTreeGC), + SSMFIELD_ENTRY_IGN_HCPTR( PATM, PatchLookupTreeHC), + SSMFIELD_ENTRY_RCPTR( PATM, pfnHelperCallGC), + SSMFIELD_ENTRY_RCPTR( PATM, pfnHelperRetGC), + SSMFIELD_ENTRY_RCPTR( PATM, pfnHelperJumpGC), + SSMFIELD_ENTRY_RCPTR( PATM, pfnHelperIretGC), + SSMFIELD_ENTRY_IGN_HCPTR( PATM, pGlobalPatchRec), + SSMFIELD_ENTRY_RCPTR( PATM, pfnSysEnterGC), + SSMFIELD_ENTRY_RCPTR( PATM, pfnSysEnterPatchGC), + SSMFIELD_ENTRY( PATM, uSysEnterPatchIdx), + SSMFIELD_ENTRY_RCPTR( PATM, pvFaultMonitor), + SSMFIELD_ENTRY_GCPHYS( PATM, mmio.GCPhys), + SSMFIELD_ENTRY_RCPTR( PATM, mmio.pCachedData), + SSMFIELD_ENTRY_IGN_RCPTR( PATM, mmio.Alignment0), + SSMFIELD_ENTRY_IGN_HCPTR( PATM, savedstate.pSSM), + SSMFIELD_ENTRY( PATM, savedstate.cPatches), + SSMFIELD_ENTRY_PAD_HC64( PATM, savedstate.Alignment0, sizeof(uint32_t)), + SSMFIELD_ENTRY_IGNORE( PATM, StatNrOpcodeRead), + SSMFIELD_ENTRY_IGNORE( PATM, StatDisabled), + SSMFIELD_ENTRY_IGNORE( PATM, StatUnusable), + SSMFIELD_ENTRY_IGNORE( PATM, StatEnabled), + SSMFIELD_ENTRY_IGNORE( PATM, StatInstalled), + SSMFIELD_ENTRY_IGNORE( PATM, StatInstalledFunctionPatches), + SSMFIELD_ENTRY_IGNORE( PATM, StatInstalledTrampoline), + SSMFIELD_ENTRY_IGNORE( PATM, StatInstalledJump), + SSMFIELD_ENTRY_IGNORE( PATM, StatInt3Callable), + SSMFIELD_ENTRY_IGNORE( PATM, StatInt3BlockRun), + SSMFIELD_ENTRY_IGNORE( PATM, StatOverwritten), + SSMFIELD_ENTRY_IGNORE( PATM, StatFixedConflicts), + SSMFIELD_ENTRY_IGNORE( PATM, StatFlushed), + SSMFIELD_ENTRY_IGNORE( PATM, StatPageBoundaryCrossed), + SSMFIELD_ENTRY_IGNORE( PATM, StatMonitored), + SSMFIELD_ENTRY_IGNORE( PATM, StatHandleTrap), + SSMFIELD_ENTRY_IGNORE( PATM, StatSwitchBack), + SSMFIELD_ENTRY_IGNORE( PATM, StatSwitchBackFail), + SSMFIELD_ENTRY_IGNORE( PATM, StatPATMMemoryUsed), + SSMFIELD_ENTRY_IGNORE( PATM, StatDuplicateREQSuccess), + SSMFIELD_ENTRY_IGNORE( PATM, StatDuplicateREQFailed), + SSMFIELD_ENTRY_IGNORE( PATM, StatDuplicateUseExisting), + SSMFIELD_ENTRY_IGNORE( PATM, StatFunctionFound), + SSMFIELD_ENTRY_IGNORE( PATM, StatFunctionNotFound), + SSMFIELD_ENTRY_IGNORE( PATM, StatPatchWrite), + SSMFIELD_ENTRY_IGNORE( PATM, StatPatchWriteDetect), + SSMFIELD_ENTRY_IGNORE( PATM, StatDirty), + SSMFIELD_ENTRY_IGNORE( PATM, StatPushTrap), + SSMFIELD_ENTRY_IGNORE( PATM, StatPatchWriteInterpreted), + SSMFIELD_ENTRY_IGNORE( PATM, StatPatchWriteInterpretedFailed), + SSMFIELD_ENTRY_IGNORE( PATM, StatSysEnter), + SSMFIELD_ENTRY_IGNORE( PATM, StatSysExit), + SSMFIELD_ENTRY_IGNORE( PATM, StatEmulIret), + SSMFIELD_ENTRY_IGNORE( PATM, StatEmulIretFailed), + SSMFIELD_ENTRY_IGNORE( PATM, StatInstrDirty), + SSMFIELD_ENTRY_IGNORE( PATM, StatInstrDirtyGood), + SSMFIELD_ENTRY_IGNORE( PATM, StatInstrDirtyBad), + SSMFIELD_ENTRY_IGNORE( PATM, StatPatchPageInserted), + SSMFIELD_ENTRY_IGNORE( PATM, StatPatchPageRemoved), + SSMFIELD_ENTRY_IGNORE( PATM, StatPatchRefreshSuccess), + SSMFIELD_ENTRY_IGNORE( PATM, StatPatchRefreshFailed), + SSMFIELD_ENTRY_IGNORE( PATM, StatGenRet), + SSMFIELD_ENTRY_IGNORE( PATM, StatGenRetReused), + SSMFIELD_ENTRY_IGNORE( PATM, StatGenJump), + SSMFIELD_ENTRY_IGNORE( PATM, StatGenCall), + SSMFIELD_ENTRY_IGNORE( PATM, StatGenPopf), + SSMFIELD_ENTRY_IGNORE( PATM, StatCheckPendingIRQ), + SSMFIELD_ENTRY_IGNORE( PATM, StatFunctionLookupReplace), + SSMFIELD_ENTRY_IGNORE( PATM, StatFunctionLookupInsert), + SSMFIELD_ENTRY_IGNORE( PATM, StatU32FunctionMaxSlotsUsed), + SSMFIELD_ENTRY_IGNORE( PATM, Alignment0), + SSMFIELD_ENTRY_TERM() +}; + +/** + * SSM descriptor table for the PATM structure starting with r86139. + */ +static SSMFIELD const g_aPatmFields86139[] = +{ + /** @todo there are a bunch more fields here which can be marked as ignored. */ + SSMFIELD_ENTRY_IGNORE( PATM, offVM), + SSMFIELD_ENTRY_RCPTR( PATM, pPatchMemGC), + SSMFIELD_ENTRY_IGN_HCPTR( PATM, pPatchMemHC), + SSMFIELD_ENTRY( PATM, cbPatchMem), + SSMFIELD_ENTRY( PATM, offPatchMem), + SSMFIELD_ENTRY( PATM, fOutOfMemory), + SSMFIELD_ENTRY_PAD_HC_AUTO( 3, 3), + SSMFIELD_ENTRY( PATM, deltaReloc), + SSMFIELD_ENTRY_IGN_HCPTR( PATM, pGCStateHC), + SSMFIELD_ENTRY_RCPTR( PATM, pGCStateGC), + SSMFIELD_ENTRY_RCPTR( PATM, pGCStackGC), + SSMFIELD_ENTRY_IGN_HCPTR( PATM, pGCStackHC), + SSMFIELD_ENTRY_RCPTR( PATM, pCPUMCtxGC), + SSMFIELD_ENTRY_RCPTR( PATM, pStatsGC), + SSMFIELD_ENTRY_IGN_HCPTR( PATM, pStatsHC), + SSMFIELD_ENTRY( PATM, uCurrentPatchIdx), + SSMFIELD_ENTRY( PATM, ulCallDepth), + SSMFIELD_ENTRY( PATM, cPageRecords), + SSMFIELD_ENTRY_RCPTR( PATM, pPatchedInstrGCLowest), + SSMFIELD_ENTRY_RCPTR( PATM, pPatchedInstrGCHighest), + SSMFIELD_ENTRY_RCPTR( PATM, PatchLookupTreeGC), + SSMFIELD_ENTRY_IGN_HCPTR( PATM, PatchLookupTreeHC), + SSMFIELD_ENTRY_RCPTR( PATM, pfnHelperCallGC), + SSMFIELD_ENTRY_RCPTR( PATM, pfnHelperRetGC), + SSMFIELD_ENTRY_RCPTR( PATM, pfnHelperJumpGC), + SSMFIELD_ENTRY_RCPTR( PATM, pfnHelperIretGC), + SSMFIELD_ENTRY_IGN_HCPTR( PATM, pGlobalPatchRec), + SSMFIELD_ENTRY_RCPTR( PATM, pfnSysEnterGC), + SSMFIELD_ENTRY_RCPTR( PATM, pfnSysEnterPatchGC), + SSMFIELD_ENTRY( PATM, uSysEnterPatchIdx), + SSMFIELD_ENTRY_RCPTR( PATM, pvFaultMonitor), + SSMFIELD_ENTRY_GCPHYS( PATM, mmio.GCPhys), + SSMFIELD_ENTRY_RCPTR( PATM, mmio.pCachedData), + SSMFIELD_ENTRY_IGN_RCPTR( PATM, mmio.Alignment0), + SSMFIELD_ENTRY_IGN_HCPTR( PATM, savedstate.pSSM), + SSMFIELD_ENTRY( PATM, savedstate.cPatches), + SSMFIELD_ENTRY_PAD_HC64( PATM, savedstate.Alignment0, sizeof(uint32_t)), + SSMFIELD_ENTRY_IGN_HCPTR( PATM, hDbgModPatchMem), + SSMFIELD_ENTRY_PAD_HC32( PATM, Alignment0, sizeof(uint32_t)), + SSMFIELD_ENTRY_IGNORE( PATM, StatNrOpcodeRead), + SSMFIELD_ENTRY_IGNORE( PATM, StatDisabled), + SSMFIELD_ENTRY_IGNORE( PATM, StatUnusable), + SSMFIELD_ENTRY_IGNORE( PATM, StatEnabled), + SSMFIELD_ENTRY_IGNORE( PATM, StatInstalled), + SSMFIELD_ENTRY_IGNORE( PATM, StatInstalledFunctionPatches), + SSMFIELD_ENTRY_IGNORE( PATM, StatInstalledTrampoline), + SSMFIELD_ENTRY_IGNORE( PATM, StatInstalledJump), + SSMFIELD_ENTRY_IGNORE( PATM, StatInt3Callable), + SSMFIELD_ENTRY_IGNORE( PATM, StatInt3BlockRun), + SSMFIELD_ENTRY_IGNORE( PATM, StatOverwritten), + SSMFIELD_ENTRY_IGNORE( PATM, StatFixedConflicts), + SSMFIELD_ENTRY_IGNORE( PATM, StatFlushed), + SSMFIELD_ENTRY_IGNORE( PATM, StatPageBoundaryCrossed), + SSMFIELD_ENTRY_IGNORE( PATM, StatMonitored), + SSMFIELD_ENTRY_IGNORE( PATM, StatHandleTrap), + SSMFIELD_ENTRY_IGNORE( PATM, StatSwitchBack), + SSMFIELD_ENTRY_IGNORE( PATM, StatSwitchBackFail), + SSMFIELD_ENTRY_IGNORE( PATM, StatPATMMemoryUsed), + SSMFIELD_ENTRY_IGNORE( PATM, StatDuplicateREQSuccess), + SSMFIELD_ENTRY_IGNORE( PATM, StatDuplicateREQFailed), + SSMFIELD_ENTRY_IGNORE( PATM, StatDuplicateUseExisting), + SSMFIELD_ENTRY_IGNORE( PATM, StatFunctionFound), + SSMFIELD_ENTRY_IGNORE( PATM, StatFunctionNotFound), + SSMFIELD_ENTRY_IGNORE( PATM, StatPatchWrite), + SSMFIELD_ENTRY_IGNORE( PATM, StatPatchWriteDetect), + SSMFIELD_ENTRY_IGNORE( PATM, StatDirty), + SSMFIELD_ENTRY_IGNORE( PATM, StatPushTrap), + SSMFIELD_ENTRY_IGNORE( PATM, StatPatchWriteInterpreted), + SSMFIELD_ENTRY_IGNORE( PATM, StatPatchWriteInterpretedFailed), + SSMFIELD_ENTRY_IGNORE( PATM, StatSysEnter), + SSMFIELD_ENTRY_IGNORE( PATM, StatSysExit), + SSMFIELD_ENTRY_IGNORE( PATM, StatEmulIret), + SSMFIELD_ENTRY_IGNORE( PATM, StatEmulIretFailed), + SSMFIELD_ENTRY_IGNORE( PATM, StatInstrDirty), + SSMFIELD_ENTRY_IGNORE( PATM, StatInstrDirtyGood), + SSMFIELD_ENTRY_IGNORE( PATM, StatInstrDirtyBad), + SSMFIELD_ENTRY_IGNORE( PATM, StatPatchPageInserted), + SSMFIELD_ENTRY_IGNORE( PATM, StatPatchPageRemoved), + SSMFIELD_ENTRY_IGNORE( PATM, StatPatchRefreshSuccess), + SSMFIELD_ENTRY_IGNORE( PATM, StatPatchRefreshFailed), + SSMFIELD_ENTRY_IGNORE( PATM, StatGenRet), + SSMFIELD_ENTRY_IGNORE( PATM, StatGenRetReused), + SSMFIELD_ENTRY_IGNORE( PATM, StatGenJump), + SSMFIELD_ENTRY_IGNORE( PATM, StatGenCall), + SSMFIELD_ENTRY_IGNORE( PATM, StatGenPopf), + SSMFIELD_ENTRY_IGNORE( PATM, StatCheckPendingIRQ), + SSMFIELD_ENTRY_IGNORE( PATM, StatFunctionLookupReplace), + SSMFIELD_ENTRY_IGNORE( PATM, StatFunctionLookupInsert), + SSMFIELD_ENTRY_IGNORE( PATM, StatU32FunctionMaxSlotsUsed), + SSMFIELD_ENTRY_IGNORE( PATM, Alignment0), + SSMFIELD_ENTRY_TERM() +}; + +/** + * SSM descriptor table for the PATMGCSTATE structure. + */ +static SSMFIELD const g_aPatmGCStateFields[] = +{ + SSMFIELD_ENTRY( PATMGCSTATE, uVMFlags), + SSMFIELD_ENTRY( PATMGCSTATE, uPendingAction), + SSMFIELD_ENTRY( PATMGCSTATE, uPatchCalls), + SSMFIELD_ENTRY( PATMGCSTATE, uScratch), + SSMFIELD_ENTRY( PATMGCSTATE, uIretEFlags), + SSMFIELD_ENTRY( PATMGCSTATE, uIretCS), + SSMFIELD_ENTRY( PATMGCSTATE, uIretEIP), + SSMFIELD_ENTRY( PATMGCSTATE, Psp), + SSMFIELD_ENTRY( PATMGCSTATE, fPIF), + SSMFIELD_ENTRY_RCPTR( PATMGCSTATE, GCPtrInhibitInterrupts), + SSMFIELD_ENTRY_RCPTR( PATMGCSTATE, GCCallPatchTargetAddr), + SSMFIELD_ENTRY_RCPTR( PATMGCSTATE, GCCallReturnAddr), + SSMFIELD_ENTRY( PATMGCSTATE, Restore.uEAX), + SSMFIELD_ENTRY( PATMGCSTATE, Restore.uECX), + SSMFIELD_ENTRY( PATMGCSTATE, Restore.uEDI), + SSMFIELD_ENTRY( PATMGCSTATE, Restore.eFlags), + SSMFIELD_ENTRY( PATMGCSTATE, Restore.uFlags), + SSMFIELD_ENTRY_TERM() +}; + +/** + * SSM descriptor table for the PATMPATCHREC structure. + */ +static SSMFIELD const g_aPatmPatchRecFields[] = +{ + SSMFIELD_ENTRY( PATMPATCHRECSSM, Core.Key), + SSMFIELD_ENTRY_IGNORE( PATMPATCHRECSSM, Core.pLeft), + SSMFIELD_ENTRY_IGNORE( PATMPATCHRECSSM, Core.pRight), + SSMFIELD_ENTRY_IGNORE( PATMPATCHRECSSM, Core.uchHeight), + SSMFIELD_ENTRY_PAD_HC_AUTO( 3, 3), + SSMFIELD_ENTRY( PATMPATCHRECSSM, CoreOffset.Key), + SSMFIELD_ENTRY_IGNORE( PATMPATCHRECSSM, CoreOffset.pLeft), + SSMFIELD_ENTRY_IGNORE( PATMPATCHRECSSM, CoreOffset.pRight), + SSMFIELD_ENTRY_IGNORE( PATMPATCHRECSSM, CoreOffset.uchHeight), + SSMFIELD_ENTRY_PAD_HC_AUTO( 3, 3), + SSMFIELD_ENTRY( PATMPATCHRECSSM, patch.uState), + SSMFIELD_ENTRY( PATMPATCHRECSSM, patch.uOldState), + SSMFIELD_ENTRY( PATMPATCHRECSSM, patch.uOpMode), + SSMFIELD_ENTRY_RCPTR( PATMPATCHRECSSM, patch.pPrivInstrGC), + SSMFIELD_ENTRY_IGN_HCPTR( PATMPATCHRECSSM, patch.unusedHC), + SSMFIELD_ENTRY( PATMPATCHRECSSM, patch.aPrivInstr), + SSMFIELD_ENTRY( PATMPATCHRECSSM, patch.cbPrivInstr), + SSMFIELD_ENTRY( PATMPATCHRECSSM, patch.opcode), + SSMFIELD_ENTRY( PATMPATCHRECSSM, patch.cbPatchJump), + SSMFIELD_ENTRY_RCPTR( PATMPATCHRECSSM, patch.pPatchJumpDestGC), + SSMFIELD_ENTRY( PATMPATCHRECSSM, patch.pPatchBlockOffset), + SSMFIELD_ENTRY( PATMPATCHRECSSM, patch.cbPatchBlockSize), + SSMFIELD_ENTRY( PATMPATCHRECSSM, patch.uCurPatchOffset), + SSMFIELD_ENTRY_PAD_HC64( PATMPATCHRECSSM, patch.Alignment0, sizeof(uint32_t)), + SSMFIELD_ENTRY( PATMPATCHRECSSM, patch.flags), + SSMFIELD_ENTRY_RCPTR( PATMPATCHRECSSM, patch.pInstrGCLowest), + SSMFIELD_ENTRY_RCPTR( PATMPATCHRECSSM, patch.pInstrGCHighest), + SSMFIELD_ENTRY_IGN_HCPTR( PATMPATCHRECSSM, patch.FixupTree), + SSMFIELD_ENTRY( PATMPATCHRECSSM, patch.nrFixups), + SSMFIELD_ENTRY( PATMPATCHRECSSM, patch.nrJumpRecs), // should be zero? + SSMFIELD_ENTRY_IGN_HCPTR( PATMPATCHRECSSM, patch.JumpTree), + SSMFIELD_ENTRY_IGN_HCPTR( PATMPATCHRECSSM, patch.Patch2GuestAddrTree), + SSMFIELD_ENTRY_IGN_HCPTR( PATMPATCHRECSSM, patch.Guest2PatchAddrTree), + SSMFIELD_ENTRY( PATMPATCHRECSSM, patch.nrPatch2GuestRecs), + SSMFIELD_ENTRY_PAD_HC64( PATMPATCHRECSSM, patch.Alignment1, sizeof(uint32_t)), + SSMFIELD_ENTRY_IGN_HCPTR( PATMPATCHRECSSM, patch.unused.pPatchLocStartHC), // saved as zero + SSMFIELD_ENTRY_IGN_HCPTR( PATMPATCHRECSSM, patch.unused.pPatchLocEndHC), // ditto + SSMFIELD_ENTRY_IGN_RCPTR( PATMPATCHRECSSM, patch.unused.pGuestLoc), // ditto + SSMFIELD_ENTRY_IGNORE( PATMPATCHRECSSM, patch.unused.opsize), // ditto + SSMFIELD_ENTRY_IGN_HCPTR( PATMPATCHRECSSM, patch.pTempInfo), + SSMFIELD_ENTRY( PATMPATCHRECSSM, patch.cCodeWrites), + SSMFIELD_ENTRY( PATMPATCHRECSSM, patch.cTraps), + SSMFIELD_ENTRY( PATMPATCHRECSSM, patch.cInvalidWrites), + SSMFIELD_ENTRY( PATMPATCHRECSSM, patch.uPatchIdx), + SSMFIELD_ENTRY( PATMPATCHRECSSM, patch.bDirtyOpcode), + SSMFIELD_ENTRY_IGNORE( PATMPATCHRECSSM, patch.Alignment2), + SSMFIELD_ENTRY_TERM() +}; + +/** + * SSM descriptor table for the RELOCREC structure. + */ +static SSMFIELD const g_aPatmRelocRec[] = +{ + SSMFIELD_ENTRY_HCPTR_HACK_U32( RELOCREC, Core.Key), // Used to store the relocation type + SSMFIELD_ENTRY_IGN_HCPTR( RELOCREC, Core.pLeft), + SSMFIELD_ENTRY_IGN_HCPTR( RELOCREC, Core.pRight), + SSMFIELD_ENTRY_IGNORE( RELOCREC, Core.uchHeight), + SSMFIELD_ENTRY_PAD_HC_AUTO( 3, 7), + SSMFIELD_ENTRY( RELOCREC, uType), + SSMFIELD_ENTRY_PAD_HC_AUTO( 0, 4), + SSMFIELD_ENTRY_HCPTR_HACK_U32( RELOCREC, pRelocPos), // converted to a patch member offset. + SSMFIELD_ENTRY_RCPTR( RELOCREC, pSource), + SSMFIELD_ENTRY_RCPTR( RELOCREC, pDest), + SSMFIELD_ENTRY_TERM() +}; + +/** + * SSM descriptor table for the RECPATCHTOGUEST structure. + */ +static SSMFIELD const g_aPatmRecPatchToGuest[] = +{ + SSMFIELD_ENTRY( RECPATCHTOGUEST, Core.Key), + SSMFIELD_ENTRY_PAD_HC_AUTO( 0, 4), + SSMFIELD_ENTRY_IGN_HCPTR( RECPATCHTOGUEST, Core.pLeft), + SSMFIELD_ENTRY_IGN_HCPTR( RECPATCHTOGUEST, Core.pRight), + SSMFIELD_ENTRY_IGNORE( RECPATCHTOGUEST, Core.uchHeight), + SSMFIELD_ENTRY_PAD_HC_AUTO( 3, 7), + SSMFIELD_ENTRY_RCPTR( RECPATCHTOGUEST, pOrgInstrGC), + SSMFIELD_ENTRY( RECPATCHTOGUEST, enmType), + SSMFIELD_ENTRY( RECPATCHTOGUEST, fDirty), + SSMFIELD_ENTRY( RECPATCHTOGUEST, fJumpTarget), + SSMFIELD_ENTRY( RECPATCHTOGUEST, u8DirtyOpcode), + SSMFIELD_ENTRY_PAD_HC_AUTO( 1, 5), + SSMFIELD_ENTRY_TERM() +}; + +#ifdef VBOX_STRICT + +/** + * Callback function for RTAvlPVDoWithAll + * + * Counts the number of patches in the tree + * + * @returns VBox status code. + * @param pNode Current node + * @param pcPatches Pointer to patch counter (uint32_t) + */ +static DECLCALLBACK(int) patmCountLeafPV(PAVLPVNODECORE pNode, void *pcPatches) +{ + NOREF(pNode); + *(uint32_t *)pcPatches = *(uint32_t *)pcPatches + 1; + return VINF_SUCCESS; +} + +/** + * Callback function for RTAvlU32DoWithAll + * + * Counts the number of patches in the tree + * + * @returns VBox status code. + * @param pNode Current node + * @param pcPatches Pointer to patch counter (uint32_t) + */ +static DECLCALLBACK(int) patmCountLeaf(PAVLU32NODECORE pNode, void *pcPatches) +{ + NOREF(pNode); + *(uint32_t *)pcPatches = *(uint32_t *)pcPatches + 1; + return VINF_SUCCESS; +} + +#endif /* VBOX_STRICT */ + +/** + * Callback function for RTAvloU32DoWithAll + * + * Counts the number of patches in the tree + * + * @returns VBox status code. + * @param pNode Current node + * @param pcPatches Pointer to patch counter + */ +static DECLCALLBACK(int) patmCountPatch(PAVLOU32NODECORE pNode, void *pcPatches) +{ + NOREF(pNode); + *(uint32_t *)pcPatches = *(uint32_t *)pcPatches + 1; + return VINF_SUCCESS; +} + +/** + * Callback function for RTAvlU32DoWithAll + * + * Saves all patch to guest lookup records. + * + * @returns VBox status code. + * @param pNode Current node + * @param pvUser Pointer to PATMCALLBACKARGS. + */ +static DECLCALLBACK(int) patmSaveP2GLookupRecords(PAVLU32NODECORE pNode, void *pvUser) +{ + PPATMCALLBACKARGS pArgs = (PPATMCALLBACKARGS)pvUser; + PRECPATCHTOGUEST pPatchToGuestRec = (PRECPATCHTOGUEST)pNode; + + /* Save the lookup record. */ + int rc = SSMR3PutStructEx(pArgs->pSSM, pPatchToGuestRec, sizeof(RECPATCHTOGUEST), 0 /*fFlags*/, + &g_aPatmRecPatchToGuest[0], NULL); + AssertRCReturn(rc, rc); + + return VINF_SUCCESS; +} + +/** + * Callback function for RTAvlPVDoWithAll + * + * Saves all patch to guest lookup records. + * + * @returns VBox status code. + * @param pNode Current node + * @param pvUser Pointer to PATMCALLBACKARGS. + */ +static DECLCALLBACK(int) patmSaveFixupRecords(PAVLPVNODECORE pNode, void *pvUser) +{ + PPATMCALLBACKARGS pArgs = (PPATMCALLBACKARGS)pvUser; + RELOCREC rec = *(PRELOCREC)pNode; + + /* Convert pointer to an offset into patch memory. May not be applicable + to all fixup types, thus the UINT32_MAX. */ + AssertMsg( rec.pRelocPos + || ( rec.uType == FIXUP_REL_JMPTOPATCH + && !(pArgs->pPatchRec->patch.flags & PATMFL_PATCHED_GUEST_CODE)), + ("uState=%#x uType=%#x flags=%#RX64\n", pArgs->pPatchRec->patch.uState, rec.uType, pArgs->pPatchRec->patch.flags)); + uintptr_t offRelocPos = (uintptr_t)rec.pRelocPos - (uintptr_t)pArgs->pVM->patm.s.pPatchMemHC; + if (offRelocPos > pArgs->pVM->patm.s.cbPatchMem) + offRelocPos = UINT32_MAX; + rec.pRelocPos = (uint8_t *)offRelocPos; + + /* Zero rec.Core.Key since it's unused and may trigger SSM check due to the hack below. */ + rec.Core.Key = 0; + + /* Save the lookup record. */ + int rc = SSMR3PutStructEx(pArgs->pSSM, &rec, sizeof(rec), 0 /*fFlags*/, &g_aPatmRelocRec[0], NULL); + AssertRCReturn(rc, rc); + + return VINF_SUCCESS; +} + +/** + * Converts a saved state patch record to the memory record. + * + * @returns nothing. + * @param pPatch The memory record. + * @param pPatchSSM The SSM version of the patch record. + */ +static void patmR3PatchConvertSSM2Mem(PPATMPATCHREC pPatch, PPATMPATCHRECSSM pPatchSSM) +{ + /* + * Only restore the patch part of the tree record; not the internal data (except the key of course) + */ + pPatch->Core.Key = pPatchSSM->Core.Key; + pPatch->CoreOffset.Key = pPatchSSM->CoreOffset.Key; + pPatch->patch.uState = pPatchSSM->patch.uState; + pPatch->patch.uOldState = pPatchSSM->patch.uOldState; + pPatch->patch.uOpMode = pPatchSSM->patch.uOpMode; + pPatch->patch.pPrivInstrGC = pPatchSSM->patch.pPrivInstrGC; + pPatch->patch.unusedHC = pPatchSSM->patch.unusedHC; + memcpy(&pPatch->patch.aPrivInstr[0], &pPatchSSM->patch.aPrivInstr[0], MAX_INSTR_SIZE); + pPatch->patch.cbPrivInstr = pPatchSSM->patch.cbPrivInstr; + pPatch->patch.opcode = pPatchSSM->patch.opcode; + pPatch->patch.cbPatchJump = pPatchSSM->patch.cbPatchJump; + pPatch->patch.pPatchJumpDestGC = pPatchSSM->patch.pPatchJumpDestGC; + pPatch->patch.pPatchBlockOffset = pPatchSSM->patch.pPatchBlockOffset; + pPatch->patch.cbPatchBlockSize = pPatchSSM->patch.cbPatchBlockSize; + pPatch->patch.uCurPatchOffset = pPatchSSM->patch.uCurPatchOffset; + pPatch->patch.flags = pPatchSSM->patch.flags; + pPatch->patch.pInstrGCLowest = pPatchSSM->patch.pInstrGCLowest; + pPatch->patch.pInstrGCHighest = pPatchSSM->patch.pInstrGCHighest; + pPatch->patch.FixupTree = pPatchSSM->patch.FixupTree; + pPatch->patch.nrFixups = pPatchSSM->patch.nrFixups; + pPatch->patch.nrJumpRecs = pPatchSSM->patch.nrJumpRecs; + pPatch->patch.JumpTree = pPatchSSM->patch.JumpTree; + pPatch->patch.Patch2GuestAddrTree = pPatchSSM->patch.Patch2GuestAddrTree; + pPatch->patch.Guest2PatchAddrTree = pPatchSSM->patch.Guest2PatchAddrTree; + pPatch->patch.nrPatch2GuestRecs = pPatchSSM->patch.nrPatch2GuestRecs; + pPatch->patch.unused = pPatchSSM->patch.unused; + pPatch->patch.pTempInfo = pPatchSSM->patch.pTempInfo; + pPatch->patch.cCodeWrites = pPatchSSM->patch.cCodeWrites; + pPatch->patch.cTraps = pPatchSSM->patch.cTraps; + pPatch->patch.cInvalidWrites = pPatchSSM->patch.cInvalidWrites; + pPatch->patch.uPatchIdx = pPatchSSM->patch.uPatchIdx; + pPatch->patch.bDirtyOpcode = pPatchSSM->patch.bDirtyOpcode; + pPatch->patch.pTrampolinePatchesHead = NULL; +} + +/** + * Converts a memory patch record to the saved state version. + * + * @returns nothing. + * @param pPatchSSM The saved state record. + * @param pPatch The memory version to save. + */ +static void patmR3PatchConvertMem2SSM(PPATMPATCHRECSSM pPatchSSM, PPATMPATCHREC pPatch) +{ + pPatchSSM->Core = pPatch->Core; + pPatchSSM->CoreOffset = pPatch->CoreOffset; + pPatchSSM->patch.uState = pPatch->patch.uState; + pPatchSSM->patch.uOldState = pPatch->patch.uOldState; + pPatchSSM->patch.uOpMode = pPatch->patch.uOpMode; + pPatchSSM->patch.pPrivInstrGC = pPatch->patch.pPrivInstrGC; + pPatchSSM->patch.unusedHC = pPatch->patch.unusedHC; + memcpy(&pPatchSSM->patch.aPrivInstr[0], &pPatch->patch.aPrivInstr[0], MAX_INSTR_SIZE); + pPatchSSM->patch.cbPrivInstr = pPatch->patch.cbPrivInstr; + pPatchSSM->patch.opcode = pPatch->patch.opcode; + pPatchSSM->patch.cbPatchJump = pPatch->patch.cbPatchJump; + pPatchSSM->patch.pPatchJumpDestGC = pPatch->patch.pPatchJumpDestGC; + pPatchSSM->patch.pPatchBlockOffset = pPatch->patch.pPatchBlockOffset; + pPatchSSM->patch.cbPatchBlockSize = pPatch->patch.cbPatchBlockSize; + pPatchSSM->patch.uCurPatchOffset = pPatch->patch.uCurPatchOffset; + pPatchSSM->patch.flags = pPatch->patch.flags; + pPatchSSM->patch.pInstrGCLowest = pPatch->patch.pInstrGCLowest; + pPatchSSM->patch.pInstrGCHighest = pPatch->patch.pInstrGCHighest; + pPatchSSM->patch.FixupTree = pPatch->patch.FixupTree; + pPatchSSM->patch.nrFixups = pPatch->patch.nrFixups; + pPatchSSM->patch.nrJumpRecs = pPatch->patch.nrJumpRecs; + pPatchSSM->patch.JumpTree = pPatch->patch.JumpTree; + pPatchSSM->patch.Patch2GuestAddrTree = pPatch->patch.Patch2GuestAddrTree; + pPatchSSM->patch.Guest2PatchAddrTree = pPatch->patch.Guest2PatchAddrTree; + pPatchSSM->patch.nrPatch2GuestRecs = pPatch->patch.nrPatch2GuestRecs; + pPatchSSM->patch.unused = pPatch->patch.unused; + pPatchSSM->patch.pTempInfo = pPatch->patch.pTempInfo; + pPatchSSM->patch.cCodeWrites = pPatch->patch.cCodeWrites; + pPatchSSM->patch.cTraps = pPatch->patch.cTraps; + pPatchSSM->patch.cInvalidWrites = pPatch->patch.cInvalidWrites; + pPatchSSM->patch.uPatchIdx = pPatch->patch.uPatchIdx; + pPatchSSM->patch.bDirtyOpcode = pPatch->patch.bDirtyOpcode; +} + +/** + * Callback function for RTAvloU32DoWithAll + * + * Saves the state of the patch that's being enumerated + * + * @returns VBox status code. + * @param pNode Current node + * @param pvUser Pointer to PATMCALLBACKARGS. + */ +static DECLCALLBACK(int) patmSavePatchState(PAVLOU32NODECORE pNode, void *pvUser) +{ + PPATMCALLBACKARGS pArgs = (PPATMCALLBACKARGS)pvUser; + PPATMPATCHREC pPatch = (PPATMPATCHREC)pNode; + PATMPATCHRECSSM patch; + int rc; + + pArgs->pPatchRec = pPatch; + Assert(!(pPatch->patch.flags & PATMFL_GLOBAL_FUNCTIONS)); + + patmR3PatchConvertMem2SSM(&patch, pPatch); + Log4(("patmSavePatchState: cbPatchJump=%u uCurPathOffset=%#x pInstrGCLowest/Higest=%#x/%#x nrFixups=%#x nrJumpRecs=%#x\n", + patch.patch.cbPatchJump, patch.patch.uCurPatchOffset, patch.patch.pInstrGCLowest, patch.patch.pInstrGCHighest, + patch.patch.nrFixups, patch.patch.nrJumpRecs)); + + /* + * Reset HC pointers that need to be recalculated when loading the state + */ +#ifdef VBOX_STRICT + PVM pVM = pArgs->pVM; /* For PATCHCODE_PTR_HC. */ + AssertMsg(patch.patch.uState == PATCH_REFUSED || (patch.patch.pPatchBlockOffset || (patch.patch.flags & (PATMFL_SYSENTER_XP|PATMFL_INT3_REPLACEMENT))), + ("State = %x pPatchBlockHC=%08x flags=%x\n", patch.patch.uState, PATCHCODE_PTR_HC(&patch.patch), patch.patch.flags)); +#endif + Assert(pPatch->patch.JumpTree == 0); + Assert(!pPatch->patch.pTempInfo || pPatch->patch.pTempInfo->DisasmJumpTree == 0); + Assert(!pPatch->patch.pTempInfo || pPatch->patch.pTempInfo->IllegalInstrTree == 0); + + /* Save the patch record itself */ + rc = SSMR3PutStructEx(pArgs->pSSM, &patch, sizeof(patch), 0 /*fFlags*/, &g_aPatmPatchRecFields[0], NULL); + AssertRCReturn(rc, rc); + + /* + * Reset HC pointers in fixup records and save them. + */ +#ifdef VBOX_STRICT + uint32_t nrFixupRecs = 0; + RTAvlPVDoWithAll(&pPatch->patch.FixupTree, true, patmCountLeafPV, &nrFixupRecs); + AssertMsg(nrFixupRecs == pPatch->patch.nrFixups, ("Fixup inconsistency! counted %d vs %d\n", nrFixupRecs, pPatch->patch.nrFixups)); +#endif + rc = RTAvlPVDoWithAll(&pPatch->patch.FixupTree, true, patmSaveFixupRecords, pArgs); + AssertRCReturn(rc, rc); + +#ifdef VBOX_STRICT + uint32_t nrLookupRecords = 0; + RTAvlU32DoWithAll(&pPatch->patch.Patch2GuestAddrTree, true, patmCountLeaf, &nrLookupRecords); + Assert(nrLookupRecords == pPatch->patch.nrPatch2GuestRecs); +#endif + + rc = RTAvlU32DoWithAll(&pPatch->patch.Patch2GuestAddrTree, true, patmSaveP2GLookupRecords, pArgs); + AssertRCReturn(rc, rc); + + pArgs->pPatchRec = NULL; + return VINF_SUCCESS; +} + +/** + * Execute state save operation. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM SSM operation handle. + */ +DECLCALLBACK(int) patmR3Save(PVM pVM, PSSMHANDLE pSSM) +{ + PATM patmInfo = pVM->patm.s; + int rc; + + pVM->patm.s.savedstate.pSSM = pSSM; + + /* + * Reset HC pointers that need to be recalculated when loading the state + */ + patmInfo.pPatchMemHC = NULL; + patmInfo.pGCStateHC = 0; + patmInfo.pvFaultMonitor = 0; + + Assert(patmInfo.ulCallDepth == 0); + + /* + * Count the number of patches in the tree (feeling lazy) + */ + patmInfo.savedstate.cPatches = 0; + RTAvloU32DoWithAll(&pVM->patm.s.PatchLookupTreeHC->PatchTree, true, patmCountPatch, &patmInfo.savedstate.cPatches); + + /* + * Save PATM structure + */ + rc = SSMR3PutStructEx(pSSM, &patmInfo, sizeof(patmInfo), 0 /*fFlags*/, &g_aPatmFields[0], NULL); + AssertRCReturn(rc, rc); + + /* + * Save patch memory contents + */ + rc = SSMR3PutMem(pSSM, pVM->patm.s.pPatchMemHC, pVM->patm.s.cbPatchMem); + AssertRCReturn(rc, rc); + + /* + * Save GC state memory + */ + rc = SSMR3PutStructEx(pSSM, pVM->patm.s.pGCStateHC, sizeof(PATMGCSTATE), 0 /*fFlags*/, &g_aPatmGCStateFields[0], NULL); + AssertRCReturn(rc, rc); + + /* + * Save PATM stack page + */ + SSMR3PutU32(pSSM, PATM_STACK_TOTAL_SIZE); + rc = SSMR3PutMem(pSSM, pVM->patm.s.pGCStackHC, PATM_STACK_TOTAL_SIZE); + AssertRCReturn(rc, rc); + + /* + * Save all patches + */ + PATMCALLBACKARGS Args; + Args.pVM = pVM; + Args.pSSM = pSSM; + rc = RTAvloU32DoWithAll(&pVM->patm.s.PatchLookupTreeHC->PatchTree, true, patmSavePatchState, &Args); + AssertRCReturn(rc, rc); + + /* Note! Patch statistics are not saved. */ + + return VINF_SUCCESS; +} + + +/** + * Execute state load operation. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM SSM operation handle. + * @param uVersion Data layout version. + * @param uPass The data pass. + */ +DECLCALLBACK(int) patmR3Load(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + PATM patmInfo; + int rc; + + if ( uVersion != PATM_SAVED_STATE_VERSION + && uVersion != PATM_SAVED_STATE_VERSION_NO_RAW_MEM + && uVersion != PATM_SAVED_STATE_VERSION_MEM + && uVersion != PATM_SAVED_STATE_VERSION_FIXUP_HACK + && uVersion != PATM_SAVED_STATE_VERSION_VER16 + ) + { + AssertMsgFailed(("patmR3Load: Invalid version uVersion=%d!\n", uVersion)); + return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION; + } + uint32_t const fStructRestoreFlags = uVersion <= PATM_SAVED_STATE_VERSION_MEM ? SSMSTRUCT_FLAGS_MEM_BAND_AID_RELAXED : 0; + Assert(uPass == SSM_PASS_FINAL); NOREF(uPass); + + pVM->patm.s.savedstate.pSSM = pSSM; + + /* + * Restore PATM structure + */ + RT_ZERO(patmInfo); + if ( uVersion == PATM_SAVED_STATE_VERSION_MEM + && SSMR3HandleRevision(pSSM) >= 86139 + && SSMR3HandleVersion(pSSM) >= VBOX_FULL_VERSION_MAKE(4, 2, 51)) + rc = SSMR3GetStructEx(pSSM, &patmInfo, sizeof(patmInfo), SSMSTRUCT_FLAGS_MEM_BAND_AID_RELAXED, + &g_aPatmFields86139[0], NULL); + else + rc = SSMR3GetStructEx(pSSM, &patmInfo, sizeof(patmInfo), fStructRestoreFlags, &g_aPatmFields[0], NULL); + AssertRCReturn(rc, rc); + + /* Relative calls are made to the helper functions. Therefor their relative location must not change! */ + /* Note: we reuse the saved global helpers and assume they are identical, which is kind of dangerous. */ + AssertLogRelReturn((pVM->patm.s.pfnHelperCallGC - pVM->patm.s.pPatchMemGC) == (patmInfo.pfnHelperCallGC - patmInfo.pPatchMemGC), + VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + AssertLogRelReturn((pVM->patm.s.pfnHelperRetGC - pVM->patm.s.pPatchMemGC) == (patmInfo.pfnHelperRetGC - patmInfo.pPatchMemGC), + VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + AssertLogRelReturn((pVM->patm.s.pfnHelperJumpGC - pVM->patm.s.pPatchMemGC) == (patmInfo.pfnHelperJumpGC - patmInfo.pPatchMemGC), + VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + AssertLogRelReturn((pVM->patm.s.pfnHelperIretGC - pVM->patm.s.pPatchMemGC) == (patmInfo.pfnHelperIretGC - patmInfo.pPatchMemGC), + VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + AssertLogRelReturn(pVM->patm.s.cbPatchMem == patmInfo.cbPatchMem, VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + + pVM->patm.s.offPatchMem = patmInfo.offPatchMem; + pVM->patm.s.deltaReloc = patmInfo.deltaReloc; + pVM->patm.s.uCurrentPatchIdx = patmInfo.uCurrentPatchIdx; + pVM->patm.s.fOutOfMemory = patmInfo.fOutOfMemory; + + /* Lowest and highest patched instruction */ + pVM->patm.s.pPatchedInstrGCLowest = patmInfo.pPatchedInstrGCLowest; + pVM->patm.s.pPatchedInstrGCHighest = patmInfo.pPatchedInstrGCHighest; + + /* Sysenter handlers */ + pVM->patm.s.pfnSysEnterGC = patmInfo.pfnSysEnterGC; + pVM->patm.s.pfnSysEnterPatchGC = patmInfo.pfnSysEnterPatchGC; + pVM->patm.s.uSysEnterPatchIdx = patmInfo.uSysEnterPatchIdx; + + Assert(patmInfo.ulCallDepth == 0 && pVM->patm.s.ulCallDepth == 0); + + Log(("pPatchMemGC %RRv vs old %RRv\n", pVM->patm.s.pPatchMemGC, patmInfo.pPatchMemGC)); + Log(("pGCStateGC %RRv vs old %RRv\n", pVM->patm.s.pGCStateGC, patmInfo.pGCStateGC)); + Log(("pGCStackGC %RRv vs old %RRv\n", pVM->patm.s.pGCStackGC, patmInfo.pGCStackGC)); + Log(("pCPUMCtxGC %RRv vs old %RRv\n", pVM->patm.s.pCPUMCtxGC, patmInfo.pCPUMCtxGC)); + + + /** @note patch statistics are not restored. */ + + /* + * Restore patch memory contents + */ + Log(("Restore patch memory: new %RRv old %RRv\n", pVM->patm.s.pPatchMemGC, patmInfo.pPatchMemGC)); + rc = SSMR3GetMem(pSSM, pVM->patm.s.pPatchMemHC, pVM->patm.s.cbPatchMem); + AssertRCReturn(rc, rc); + + /* + * Restore GC state memory + */ + RT_BZERO(pVM->patm.s.pGCStateHC, sizeof(PATMGCSTATE)); + rc = SSMR3GetStructEx(pSSM, pVM->patm.s.pGCStateHC, sizeof(PATMGCSTATE), fStructRestoreFlags, &g_aPatmGCStateFields[0], NULL); + AssertRCReturn(rc, rc); + + /* + * Restore PATM stack page + */ + uint32_t cbStack = PATM_STACK_TOTAL_SIZE; + if (uVersion > PATM_SAVED_STATE_VERSION_MEM) + { + rc = SSMR3GetU32(pSSM, &cbStack); + AssertRCReturn(rc, rc); + } + AssertCompile(!(PATM_STACK_TOTAL_SIZE & 31)); + AssertLogRelMsgReturn(cbStack > 0 && cbStack <= PATM_STACK_TOTAL_SIZE && !(cbStack & 31), + ("cbStack=%#x vs %#x", cbStack, PATM_STACK_TOTAL_SIZE), + VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + rc = SSMR3GetMem(pSSM, pVM->patm.s.pGCStackHC, cbStack); + AssertRCReturn(rc, rc); + if (cbStack < PATM_STACK_TOTAL_SIZE) + memset((uint8_t *)pVM->patm.s.pGCStackHC + cbStack, 0, PATM_STACK_TOTAL_SIZE - cbStack); + + /* + * Load all patches + */ + for (unsigned i = 0; i < patmInfo.savedstate.cPatches; i++) + { + PATMPATCHRECSSM patch; + PATMPATCHREC *pPatchRec; + + RT_ZERO(patch); + rc = SSMR3GetStructEx(pSSM, &patch, sizeof(patch), fStructRestoreFlags, &g_aPatmPatchRecFields[0], NULL); + AssertRCReturn(rc, rc); + Log4(("patmR3Load: cbPatchJump=%u uCurPathOffset=%#x pInstrGCLowest/Higest=%#x/%#x nrFixups=%#x nrJumpRecs=%#x\n", + patch.patch.cbPatchJump, patch.patch.uCurPatchOffset, patch.patch.pInstrGCLowest, patch.patch.pInstrGCHighest, + patch.patch.nrFixups, patch.patch.nrJumpRecs)); + + Assert(!(patch.patch.flags & PATMFL_GLOBAL_FUNCTIONS)); + + rc = MMHyperAlloc(pVM, sizeof(PATMPATCHREC), 0, MM_TAG_PATM_PATCH, (void **)&pPatchRec); + if (RT_FAILURE(rc)) + { + AssertMsgFailed(("Out of memory!!!!\n")); + return VERR_NO_MEMORY; + } + + /* Convert SSM version to memory. */ + patmR3PatchConvertSSM2Mem(pPatchRec, &patch); + + Log(("Restoring patch %RRv -> %RRv state %x\n", pPatchRec->patch.pPrivInstrGC, patmInfo.pPatchMemGC + pPatchRec->patch.pPatchBlockOffset, pPatchRec->patch.uState)); + bool ret = RTAvloU32Insert(&pVM->patm.s.PatchLookupTreeHC->PatchTree, &pPatchRec->Core); + Assert(ret); + if (pPatchRec->patch.uState != PATCH_REFUSED) + { + if (pPatchRec->patch.pPatchBlockOffset) + { + /* We actually generated code for this patch. */ + ret = RTAvloU32Insert(&pVM->patm.s.PatchLookupTreeHC->PatchTreeByPatchAddr, &pPatchRec->CoreOffset); + AssertMsg(ret, ("Inserting patch %RRv offset %08RX32 failed!!\n", pPatchRec->patch.pPrivInstrGC, pPatchRec->CoreOffset.Key)); + } + } + /* Set to zero as we don't need it anymore. */ + pPatchRec->patch.pTempInfo = 0; + + PATMP2GLOOKUPREC cacheRec; + RT_ZERO(cacheRec); + cacheRec.pPatch = &pPatchRec->patch; + + uint8_t *pPrivInstrHC = patmR3GCVirtToHCVirt(pVM, &cacheRec, pPatchRec->patch.pPrivInstrGC); + /* Can fail due to page or page table not present. */ + + /* + * Restore fixup records and correct HC pointers in fixup records + */ + pPatchRec->patch.FixupTree = 0; + pPatchRec->patch.nrFixups = 0; /* increased by patmPatchAddReloc32 */ + for (unsigned j = 0; j < patch.patch.nrFixups; j++) + { + RELOCREC rec; + int32_t offset; + RTRCPTR *pFixup; + + RT_ZERO(rec); + rc = SSMR3GetStructEx(pSSM, &rec, sizeof(rec), fStructRestoreFlags, &g_aPatmRelocRec[0], NULL); + AssertRCReturn(rc, rc); + + if (pPrivInstrHC) + { + /* rec.pRelocPos now contains the relative position inside the hypervisor area. */ + offset = (int32_t)(intptr_t)rec.pRelocPos; + /* Convert to HC pointer again. */ + if ((uintptr_t)rec.pRelocPos < pVM->patm.s.cbPatchMem) + rec.pRelocPos = pVM->patm.s.pPatchMemHC + (uintptr_t)rec.pRelocPos; + else + rec.pRelocPos = NULL; + pFixup = (RTRCPTR *)rec.pRelocPos; + + if (pPatchRec->patch.uState != PATCH_REFUSED) + { + if ( rec.uType == FIXUP_REL_JMPTOPATCH + && (pPatchRec->patch.flags & PATMFL_PATCHED_GUEST_CODE)) + { + Assert(pPatchRec->patch.cbPatchJump == SIZEOF_NEARJUMP32 || pPatchRec->patch.cbPatchJump == SIZEOF_NEAR_COND_JUMP32); + unsigned offset2 = (pPatchRec->patch.cbPatchJump == SIZEOF_NEARJUMP32) ? 1 : 2; + + rec.pRelocPos = pPrivInstrHC + offset2; + pFixup = (RTRCPTR *)rec.pRelocPos; + } + + rc = patmCorrectFixup(pVM, uVersion, patmInfo, &pPatchRec->patch, &rec, offset, pFixup); + AssertRCReturn(rc, rc); + } + + rc = patmPatchAddReloc32(pVM, &pPatchRec->patch, rec.pRelocPos, rec.uType, rec.pSource, rec.pDest); + AssertRCReturn(rc, rc); + } + } + /* Release previous lock if any. */ + if (cacheRec.Lock.pvMap) + PGMPhysReleasePageMappingLock(pVM, &cacheRec.Lock); + + /* And all patch to guest lookup records */ + Assert(pPatchRec->patch.nrPatch2GuestRecs || pPatchRec->patch.uState == PATCH_REFUSED || (pPatchRec->patch.flags & (PATMFL_SYSENTER_XP | PATMFL_IDTHANDLER | PATMFL_TRAPHANDLER | PATMFL_INT3_REPLACEMENT))); + + pPatchRec->patch.Patch2GuestAddrTree = 0; + pPatchRec->patch.Guest2PatchAddrTree = 0; + if (pPatchRec->patch.nrPatch2GuestRecs) + { + RECPATCHTOGUEST rec; + uint32_t nrPatch2GuestRecs = pPatchRec->patch.nrPatch2GuestRecs; + + pPatchRec->patch.nrPatch2GuestRecs = 0; /* incremented by patmr3AddP2GLookupRecord */ + for (uint32_t j=0;jpatch, (uintptr_t)rec.Core.Key + pVM->patm.s.pPatchMemHC, rec.pOrgInstrGC, rec.enmType, rec.fDirty); + } + Assert(pPatchRec->patch.Patch2GuestAddrTree); + } + + if (pPatchRec->patch.flags & PATMFL_CODE_MONITORED) + { + /* Insert the guest page lookup records (for detection self-modifying code) */ + rc = patmInsertPatchPages(pVM, &pPatchRec->patch); + AssertRCReturn(rc, rc); + } + +#if 0 /* can fail def LOG_ENABLED */ + if ( pPatchRec->patch.uState != PATCH_REFUSED + && !(pPatchRec->patch.flags & PATMFL_INT3_REPLACEMENT)) + { + pPatchRec->patch.pTempInfo = (PPATCHINFOTEMP)MMR3HeapAllocZ(pVM, MM_TAG_PATM_PATCH, sizeof(PATCHINFOTEMP)); + Log(("Patch code ----------------------------------------------------------\n")); + patmr3DisasmCodeStream(pVM, PATCHCODE_PTR_GC(&pPatchRec->patch), PATCHCODE_PTR_GC(&pPatchRec->patch), patmr3DisasmCallback, &pPatchRec->patch); + Log(("Patch code ends -----------------------------------------------------\n")); + MMR3HeapFree(pPatchRec->patch.pTempInfo); + pPatchRec->patch.pTempInfo = NULL; + } +#endif + /* Remove the patch in case the gc mapping is not present. */ + if ( !pPrivInstrHC + && pPatchRec->patch.uState == PATCH_ENABLED) + { + Log(("Remove patch %RGv due to failed HC address translation\n", pPatchRec->patch.pPrivInstrGC)); + PATMR3RemovePatch(pVM, pPatchRec->patch.pPrivInstrGC); + } + } + + /* + * Correct absolute fixups in the global patch. (helper functions) + * Bit of a mess. Uses the new patch record, but restored patch functions. + */ + PRELOCREC pRec = 0; + AVLPVKEY key = 0; + + Log(("Correct fixups in global helper functions\n")); + while (true) + { + int32_t offset; + RTRCPTR *pFixup; + + /* Get the record that's closest from above */ + pRec = (PRELOCREC)RTAvlPVGetBestFit(&pVM->patm.s.pGlobalPatchRec->patch.FixupTree, key, true); + if (pRec == 0) + break; + + key = (AVLPVKEY)(pRec->pRelocPos + 1); /* search for the next record during the next round. */ + + /* rec.pRelocPos now contains the relative position inside the hypervisor area. */ + offset = (int32_t)(pRec->pRelocPos - pVM->patm.s.pPatchMemHC); + pFixup = (RTRCPTR *)pRec->pRelocPos; + + /* Correct fixups that refer to PATM structures in the hypervisor region (their addresses might have changed). */ + rc = patmCorrectFixup(pVM, uVersion, patmInfo, &pVM->patm.s.pGlobalPatchRec->patch, pRec, offset, pFixup); + AssertRCReturn(rc, rc); + } + +#ifdef VBOX_WITH_STATISTICS + /* + * Restore relevant old statistics + */ + pVM->patm.s.StatDisabled = patmInfo.StatDisabled; + pVM->patm.s.StatUnusable = patmInfo.StatUnusable; + pVM->patm.s.StatEnabled = patmInfo.StatEnabled; + pVM->patm.s.StatInstalled = patmInfo.StatInstalled; +#endif + + return VINF_SUCCESS; +} + +/** + * Correct fixups to predefined hypervisor PATM regions. (their addresses might have changed) + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param uVersion Saved state version. + * @param patmInfo Saved PATM structure + * @param pPatch Patch record + * @param pRec Relocation record + * @param offset Offset of referenced data/code + * @param pFixup Fixup address + */ +static int patmCorrectFixup(PVM pVM, unsigned uVersion, PATM &patmInfo, PPATCHINFO pPatch, PRELOCREC pRec, + int32_t offset, RTRCPTR *pFixup) +{ + int32_t delta = pVM->patm.s.pPatchMemGC - patmInfo.pPatchMemGC; + RT_NOREF1(offset); + + switch (pRec->uType) + { + case FIXUP_ABSOLUTE: + case FIXUP_ABSOLUTE_IN_PATCH_ASM_TMPL: + { + Assert( pRec->uType != PATM_SAVED_STATE_VERSION_NO_RAW_MEM + || (pRec->pSource == pRec->pDest && PATM_IS_ASMFIX(pRec->pSource)) ); + + /* bird: What is this for exactly? Only the MMIO fixups used to have pSource set. */ + if ( pRec->pSource + && !PATMIsPatchGCAddr(pVM, (RTRCUINTPTR)pRec->pSource) + && pRec->uType != FIXUP_ABSOLUTE_IN_PATCH_ASM_TMPL) + break; + + RTRCPTR const uFixup = *pFixup; + if ( uFixup >= patmInfo.pGCStateGC + && uFixup < patmInfo.pGCStateGC + sizeof(PATMGCSTATE)) + { + LogFlow(("Changing absolute GCState at %RRv from %RRv to %RRv\n", patmInfo.pPatchMemGC + offset, uFixup, (uFixup - patmInfo.pGCStateGC) + pVM->patm.s.pGCStateGC)); + *pFixup = (uFixup - patmInfo.pGCStateGC) + pVM->patm.s.pGCStateGC; + } + else if ( uFixup >= patmInfo.pCPUMCtxGC + && uFixup < patmInfo.pCPUMCtxGC + sizeof(CPUMCTX)) + { + LogFlow(("Changing absolute CPUMCTX at %RRv from %RRv to %RRv\n", patmInfo.pPatchMemGC + offset, uFixup, (uFixup - patmInfo.pCPUMCtxGC) + pVM->patm.s.pCPUMCtxGC)); + + /* The CPUMCTX structure has completely changed, so correct the offsets too. */ + if (uVersion == PATM_SAVED_STATE_VERSION_VER16) + { + unsigned offCpumCtx = uFixup - patmInfo.pCPUMCtxGC; + + /* ''case RT_OFFSETOF()'' does not work as gcc refuses to use & as a constant expression. + * Defining RT_OFFSETOF as __builtin_offsetof for gcc would make this possible. But this + * function is not available in older gcc versions, at least not in gcc-3.3 */ + if (offCpumCtx == (unsigned)RT_OFFSETOF(CPUMCTX_VER1_6, dr0)) + { + LogFlow(("Changing dr[0] offset from %x to %x\n", offCpumCtx, RT_OFFSETOF(CPUMCTX, dr[0]))); + *pFixup = pVM->patm.s.pCPUMCtxGC + RT_OFFSETOF(CPUMCTX, dr[0]); + } + else if (offCpumCtx == (unsigned)RT_OFFSETOF(CPUMCTX_VER1_6, dr1)) + { + LogFlow(("Changing dr[1] offset from %x to %x\n", offCpumCtx, RT_OFFSETOF(CPUMCTX, dr[1]))); + *pFixup = pVM->patm.s.pCPUMCtxGC + RT_OFFSETOF(CPUMCTX, dr[1]); + } + else if (offCpumCtx == (unsigned)RT_OFFSETOF(CPUMCTX_VER1_6, dr2)) + { + LogFlow(("Changing dr[2] offset from %x to %x\n", offCpumCtx, RT_OFFSETOF(CPUMCTX, dr[2]))); + *pFixup = pVM->patm.s.pCPUMCtxGC + RT_OFFSETOF(CPUMCTX, dr[2]); + } + else if (offCpumCtx == (unsigned)RT_OFFSETOF(CPUMCTX_VER1_6, dr3)) + { + LogFlow(("Changing dr[3] offset from %x to %x\n", offCpumCtx, RT_OFFSETOF(CPUMCTX, dr[3]))); + *pFixup = pVM->patm.s.pCPUMCtxGC + RT_OFFSETOF(CPUMCTX, dr[3]); + } + else if (offCpumCtx == (unsigned)RT_OFFSETOF(CPUMCTX_VER1_6, dr4)) + { + LogFlow(("Changing dr[4] offset from %x to %x\n", offCpumCtx, RT_OFFSETOF(CPUMCTX, dr[4]))); + *pFixup = pVM->patm.s.pCPUMCtxGC + RT_OFFSETOF(CPUMCTX, dr[4]); + } + else if (offCpumCtx == (unsigned)RT_OFFSETOF(CPUMCTX_VER1_6, dr5)) + { + LogFlow(("Changing dr[5] offset from %x to %x\n", offCpumCtx, RT_OFFSETOF(CPUMCTX, dr[5]))); + *pFixup = pVM->patm.s.pCPUMCtxGC + RT_OFFSETOF(CPUMCTX, dr[5]); + } + else if (offCpumCtx == (unsigned)RT_OFFSETOF(CPUMCTX_VER1_6, dr6)) + { + LogFlow(("Changing dr[6] offset from %x to %x\n", offCpumCtx, RT_OFFSETOF(CPUMCTX, dr[6]))); + *pFixup = pVM->patm.s.pCPUMCtxGC + RT_OFFSETOF(CPUMCTX, dr[6]); + } + else if (offCpumCtx == (unsigned)RT_OFFSETOF(CPUMCTX_VER1_6, dr7)) + { + LogFlow(("Changing dr[7] offset from %x to %x\n", offCpumCtx, RT_OFFSETOF(CPUMCTX, dr[7]))); + *pFixup = pVM->patm.s.pCPUMCtxGC + RT_OFFSETOF(CPUMCTX, dr[7]); + } + else if (offCpumCtx == (unsigned)RT_OFFSETOF(CPUMCTX_VER1_6, cr0)) + { + LogFlow(("Changing cr0 offset from %x to %x\n", offCpumCtx, RT_OFFSETOF(CPUMCTX, cr0))); + *pFixup = pVM->patm.s.pCPUMCtxGC + RT_OFFSETOF(CPUMCTX, cr0); + } + else if (offCpumCtx == (unsigned)RT_OFFSETOF(CPUMCTX_VER1_6, cr2)) + { + LogFlow(("Changing cr2 offset from %x to %x\n", offCpumCtx, RT_OFFSETOF(CPUMCTX, cr2))); + *pFixup = pVM->patm.s.pCPUMCtxGC + RT_OFFSETOF(CPUMCTX, cr2); + } + else if (offCpumCtx == (unsigned)RT_OFFSETOF(CPUMCTX_VER1_6, cr3)) + { + LogFlow(("Changing cr3 offset from %x to %x\n", offCpumCtx, RT_OFFSETOF(CPUMCTX, cr3))); + *pFixup = pVM->patm.s.pCPUMCtxGC + RT_OFFSETOF(CPUMCTX, cr3); + } + else if (offCpumCtx == (unsigned)RT_OFFSETOF(CPUMCTX_VER1_6, cr4)) + { + LogFlow(("Changing cr4 offset from %x to %x\n", offCpumCtx, RT_OFFSETOF(CPUMCTX, cr4))); + *pFixup = pVM->patm.s.pCPUMCtxGC + RT_OFFSETOF(CPUMCTX, cr4); + } + else if (offCpumCtx == (unsigned)RT_OFFSETOF(CPUMCTX_VER1_6, tr)) + { + LogFlow(("Changing tr offset from %x to %x\n", offCpumCtx, RT_OFFSETOF(CPUMCTX, tr))); + *pFixup = pVM->patm.s.pCPUMCtxGC + RT_OFFSETOF(CPUMCTX, tr); + } + else if (offCpumCtx == (unsigned)RT_OFFSETOF(CPUMCTX_VER1_6, ldtr)) + { + LogFlow(("Changing ldtr offset from %x to %x\n", offCpumCtx, RT_OFFSETOF(CPUMCTX, ldtr))); + *pFixup = pVM->patm.s.pCPUMCtxGC + RT_OFFSETOF(CPUMCTX, ldtr); + } + else if (offCpumCtx == (unsigned)RT_OFFSETOF(CPUMCTX_VER1_6, gdtr.pGdt)) + { + LogFlow(("Changing pGdt offset from %x to %x\n", offCpumCtx, RT_OFFSETOF(CPUMCTX, gdtr.pGdt))); + *pFixup = pVM->patm.s.pCPUMCtxGC + RT_OFFSETOF(CPUMCTX, gdtr.pGdt); + } + else if (offCpumCtx == (unsigned)RT_OFFSETOF(CPUMCTX_VER1_6, gdtr.cbGdt)) + { + LogFlow(("Changing cbGdt offset from %x to %x\n", offCpumCtx, RT_OFFSETOF(CPUMCTX, gdtr.cbGdt))); + *pFixup = pVM->patm.s.pCPUMCtxGC + RT_OFFSETOF(CPUMCTX, gdtr.cbGdt); + } + else if (offCpumCtx == (unsigned)RT_OFFSETOF(CPUMCTX_VER1_6, idtr.pIdt)) + { + LogFlow(("Changing pIdt offset from %x to %x\n", offCpumCtx, RT_OFFSETOF(CPUMCTX, idtr.pIdt))); + *pFixup = pVM->patm.s.pCPUMCtxGC + RT_OFFSETOF(CPUMCTX, idtr.pIdt); + } + else if (offCpumCtx == (unsigned)RT_OFFSETOF(CPUMCTX_VER1_6, idtr.cbIdt)) + { + LogFlow(("Changing cbIdt offset from %x to %x\n", offCpumCtx, RT_OFFSETOF(CPUMCTX, idtr.cbIdt))); + *pFixup = pVM->patm.s.pCPUMCtxGC + RT_OFFSETOF(CPUMCTX, idtr.cbIdt); + } + else + AssertMsgFailed(("Unexpected CPUMCTX offset %x\n", offCpumCtx)); + } + else + *pFixup = (uFixup - patmInfo.pCPUMCtxGC) + pVM->patm.s.pCPUMCtxGC; + } + else if ( uFixup >= patmInfo.pStatsGC + && uFixup < patmInfo.pStatsGC + PATM_STAT_MEMSIZE) + { + LogFlow(("Changing absolute Stats at %RRv from %RRv to %RRv\n", patmInfo.pPatchMemGC + offset, uFixup, (uFixup - patmInfo.pStatsGC) + pVM->patm.s.pStatsGC)); + *pFixup = (uFixup - patmInfo.pStatsGC) + pVM->patm.s.pStatsGC; + } + else if ( uFixup >= patmInfo.pGCStackGC + && uFixup < patmInfo.pGCStackGC + PATM_STACK_TOTAL_SIZE) + { + LogFlow(("Changing absolute Stack at %RRv from %RRv to %RRv\n", patmInfo.pPatchMemGC + offset, uFixup, (uFixup - patmInfo.pGCStackGC) + pVM->patm.s.pGCStackGC)); + *pFixup = (uFixup - patmInfo.pGCStackGC) + pVM->patm.s.pGCStackGC; + } + else if ( uFixup >= patmInfo.pPatchMemGC + && uFixup < patmInfo.pPatchMemGC + patmInfo.cbPatchMem) + { + LogFlow(("Changing absolute PatchMem at %RRv from %RRv to %RRv\n", patmInfo.pPatchMemGC + offset, uFixup, (uFixup - patmInfo.pPatchMemGC) + pVM->patm.s.pPatchMemGC)); + *pFixup = (uFixup - patmInfo.pPatchMemGC) + pVM->patm.s.pPatchMemGC; + } + /* + * For PATM_SAVED_STATE_VERSION_FIXUP_HACK and earlier boldly ASSUME: + * 1. That pCPUMCtxGC is in the VM structure and that its location is + * at the first page of the same 4 MB chunk. + * 2. That the forced actions were in the first 32 bytes of the VM + * structure. + * 3. That the CPUM leaves are less than 8KB into the structure. + */ + else if ( uVersion <= PATM_SAVED_STATE_VERSION_FIXUP_HACK + && uFixup - (patmInfo.pCPUMCtxGC & UINT32_C(0xffc00000)) < UINT32_C(32)) + { + LogFlow(("Changing fLocalForcedActions fixup from %RRv to %RRv\n", uFixup, pVM->pVMRC + RT_OFFSETOF(VM, aCpus[0].fLocalForcedActions))); + *pFixup = pVM->pVMRC + RT_OFFSETOF(VM, aCpus[0].fLocalForcedActions); + pRec->pSource = pRec->pDest = PATM_ASMFIX_VM_FORCEDACTIONS; + pRec->uType = FIXUP_ABSOLUTE_IN_PATCH_ASM_TMPL; + } + else if ( uVersion <= PATM_SAVED_STATE_VERSION_FIXUP_HACK + && uFixup - (patmInfo.pCPUMCtxGC & UINT32_C(0xffc00000)) < UINT32_C(8192)) + { + static int cCpuidFixup = 0; + + /* Very dirty assumptions about the cpuid patch and cpuid ordering. */ + switch (cCpuidFixup & 3) + { + case 0: + *pFixup = CPUMR3GetGuestCpuIdPatmDefRCPtr(pVM); + pRec->pSource = pRec->pDest = PATM_ASMFIX_CPUID_DEF_PTR; + pRec->uType = FIXUP_ABSOLUTE_IN_PATCH_ASM_TMPL; + break; + case 1: + *pFixup = CPUMR3GetGuestCpuIdPatmStdRCPtr(pVM); + pRec->pSource = pRec->pDest = PATM_ASMFIX_CPUID_STD_PTR; + pRec->uType = FIXUP_ABSOLUTE_IN_PATCH_ASM_TMPL; + break; + case 2: + *pFixup = CPUMR3GetGuestCpuIdPatmExtRCPtr(pVM); + pRec->pSource = pRec->pDest = PATM_ASMFIX_CPUID_EXT_PTR; + pRec->uType = FIXUP_ABSOLUTE_IN_PATCH_ASM_TMPL; + break; + case 3: + *pFixup = CPUMR3GetGuestCpuIdPatmCentaurRCPtr(pVM); + pRec->pSource = pRec->pDest = PATM_ASMFIX_CPUID_CENTAUR_PTR; + pRec->uType = FIXUP_ABSOLUTE_IN_PATCH_ASM_TMPL; + break; + } + LogFlow(("Changing cpuid fixup %d from %RRv to %RRv\n", cCpuidFixup, uFixup, *pFixup)); + cCpuidFixup++; + } + /* + * For PATM_SAVED_STATE_VERSION_MEM thru PATM_SAVED_STATE_VERSION_NO_RAW_MEM + * we abused Core.Key to store the type for fixups needing correcting on load. + */ + else if ( uVersion >= PATM_SAVED_STATE_VERSION_MEM + && uVersion <= PATM_SAVED_STATE_VERSION_NO_RAW_MEM) + { + /* Core.Key abused to store the type of fixup. */ + switch ((uintptr_t)pRec->Core.Key) + { + case PATM_FIXUP_CPU_FF_ACTION: + *pFixup = pVM->pVMRC + RT_OFFSETOF(VM, aCpus[0].fLocalForcedActions); + pRec->pSource = pRec->pDest = PATM_ASMFIX_VM_FORCEDACTIONS; + pRec->uType = FIXUP_ABSOLUTE_IN_PATCH_ASM_TMPL; + LogFlow(("Changing cpu ff action fixup from %x to %x\n", uFixup, *pFixup)); + break; + case PATM_FIXUP_CPUID_DEFAULT: + *pFixup = CPUMR3GetGuestCpuIdPatmDefRCPtr(pVM); + pRec->pSource = pRec->pDest = PATM_ASMFIX_CPUID_DEF_PTR; + pRec->uType = FIXUP_ABSOLUTE_IN_PATCH_ASM_TMPL; + LogFlow(("Changing cpuid def fixup from %x to %x\n", uFixup, *pFixup)); + break; + case PATM_FIXUP_CPUID_STANDARD: + *pFixup = CPUMR3GetGuestCpuIdPatmStdRCPtr(pVM); + pRec->pSource = pRec->pDest = PATM_ASMFIX_CPUID_STD_PTR; + pRec->uType = FIXUP_ABSOLUTE_IN_PATCH_ASM_TMPL; + LogFlow(("Changing cpuid std fixup from %x to %x\n", uFixup, *pFixup)); + break; + case PATM_FIXUP_CPUID_EXTENDED: + *pFixup = CPUMR3GetGuestCpuIdPatmExtRCPtr(pVM); + pRec->pSource = pRec->pDest = PATM_ASMFIX_CPUID_EXT_PTR; + pRec->uType = FIXUP_ABSOLUTE_IN_PATCH_ASM_TMPL; + LogFlow(("Changing cpuid ext fixup from %x to %x\n", uFixup, *pFixup)); + break; + case PATM_FIXUP_CPUID_CENTAUR: + *pFixup = CPUMR3GetGuestCpuIdPatmCentaurRCPtr(pVM); + pRec->pSource = pRec->pDest = PATM_ASMFIX_CPUID_CENTAUR_PTR; + pRec->uType = FIXUP_ABSOLUTE_IN_PATCH_ASM_TMPL; + LogFlow(("Changing cpuid centaur fixup from %x to %x\n", uFixup, *pFixup)); + break; + default: + AssertMsgFailed(("Unexpected fixup value %p\n", (uintptr_t)pRec->Core.Key)); + break; + } + } + /* + * After PATM_SAVED_STATE_VERSION_NO_RAW_MEM we changed the fixup type + * and instead put the patch fixup code in the source and target addresses. + */ + else if ( uVersion > PATM_SAVED_STATE_VERSION_NO_RAW_MEM + && pRec->uType == FIXUP_ABSOLUTE_IN_PATCH_ASM_TMPL) + { + Assert(pRec->pSource == pRec->pDest); Assert(PATM_IS_ASMFIX(pRec->pSource)); + switch (pRec->pSource) + { + case PATM_ASMFIX_VM_FORCEDACTIONS: + *pFixup = pVM->pVMRC + RT_OFFSETOF(VM, aCpus[0].fLocalForcedActions); + break; + case PATM_ASMFIX_CPUID_DEF_PTR: + *pFixup = CPUMR3GetGuestCpuIdPatmDefRCPtr(pVM); + break; + case PATM_ASMFIX_CPUID_STD_PTR: /* Saved again patches only. */ + *pFixup = CPUMR3GetGuestCpuIdPatmStdRCPtr(pVM); + break; + case PATM_ASMFIX_CPUID_EXT_PTR: /* Saved again patches only. */ + *pFixup = CPUMR3GetGuestCpuIdPatmExtRCPtr(pVM); + break; + case PATM_ASMFIX_CPUID_CENTAUR_PTR: /* Saved again patches only. */ + *pFixup = CPUMR3GetGuestCpuIdPatmCentaurRCPtr(pVM); + break; + case PATM_ASMFIX_REUSE_LATER_0: /* Was only used for a few days. Don't want to keep this legacy around. */ + case PATM_ASMFIX_REUSE_LATER_1: + AssertLogRelMsgFailedReturn(("Unsupported PATM fixup. You have to discard this saved state or snapshot."), + VERR_INTERNAL_ERROR); + break; + } + } + /* + * Constant that may change between VM version needs fixing up. + */ + else if (pRec->uType == FIXUP_CONSTANT_IN_PATCH_ASM_TMPL) + { + AssertLogRelReturn(uVersion > PATM_SAVED_STATE_VERSION_NO_RAW_MEM, VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + Assert(pRec->pSource == pRec->pDest); Assert(PATM_IS_ASMFIX(pRec->pSource)); + switch (pRec->pSource) + { + case PATM_ASMFIX_REUSE_LATER_2: /* Was only used for a few days. Don't want to keep this legacy around. */ + case PATM_ASMFIX_REUSE_LATER_3: + AssertLogRelMsgFailedReturn(("Unsupported PATM fixup. You have to discard this saved state or snapshot."), + VERR_INTERNAL_ERROR); + break; + default: + AssertLogRelMsgFailed(("Unknown FIXUP_CONSTANT_IN_PATCH_ASM_TMPL fixup: %#x\n", pRec->pSource)); + return VERR_SSM_DATA_UNIT_FORMAT_CHANGED; + } + } + /* + * Relative fixups for calling or jumping to helper functions inside VMMRC. + * (The distance between the helper function and the patch is subject to + * new code being added to VMMRC as well as VM configurations influencing + * heap allocations and so on and so forth.) + */ + else if (pRec->uType == FIXUP_REL_HELPER_IN_PATCH_ASM_TMPL) + { + AssertLogRelReturn(uVersion > PATM_SAVED_STATE_VERSION_NO_RAW_MEM, VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + Assert(pRec->pSource == pRec->pDest); Assert(PATM_IS_ASMFIX(pRec->pSource)); + int rc; + RTRCPTR uRCPtrDest; + switch (pRec->pSource) + { + case PATM_ASMFIX_HELPER_CPUM_CPUID: + rc = PDMR3LdrGetSymbolRC(pVM, NULL, "CPUMPatchHlpCpuId", &uRCPtrDest); + AssertLogRelRCReturn(rc, rc); + break; + default: + AssertLogRelMsgFailed(("Unknown FIXUP_REL_HLP_CALL_IN_PATCH_ASM_TMPL fixup: %#x\n", pRec->pSource)); + return VERR_SSM_DATA_UNIT_FORMAT_CHANGED; + } + RTRCPTR uRCPtrAfter = pVM->patm.s.pPatchMemGC + ((uintptr_t)&pFixup[1] - (uintptr_t)pVM->patm.s.pPatchMemHC); + *pFixup = uRCPtrDest - uRCPtrAfter; + } + +#ifdef RT_OS_WINDOWS + AssertCompile(RT_OFFSETOF(VM, fGlobalForcedActions) < 32); +#endif + break; + } + + case FIXUP_REL_JMPTOPATCH: + { + RTRCPTR pTarget = (RTRCPTR)((RTRCINTPTR)pRec->pDest + delta); + + if ( pPatch->uState == PATCH_ENABLED + && (pPatch->flags & PATMFL_PATCHED_GUEST_CODE)) + { + uint8_t oldJump[SIZEOF_NEAR_COND_JUMP32]; + uint8_t temp[SIZEOF_NEAR_COND_JUMP32]; + RTRCPTR pJumpOffGC; + RTRCINTPTR displ = (RTRCINTPTR)pTarget - (RTRCINTPTR)pRec->pSource; + RTRCINTPTR displOld= (RTRCINTPTR)pRec->pDest - (RTRCINTPTR)pRec->pSource; + + Log(("Relative fixup (g2p) %08X -> %08X at %08X (source=%08x, target=%08x)\n", *(int32_t*)pRec->pRelocPos, displ, pRec->pRelocPos, pRec->pSource, pRec->pDest)); + + Assert(pRec->pSource - pPatch->cbPatchJump == pPatch->pPrivInstrGC); +#ifdef PATM_RESOLVE_CONFLICTS_WITH_JUMP_PATCHES + if (pPatch->cbPatchJump == SIZEOF_NEAR_COND_JUMP32) + { + Assert(pPatch->flags & PATMFL_JUMP_CONFLICT); + + pJumpOffGC = pPatch->pPrivInstrGC + 2; //two byte opcode + oldJump[0] = pPatch->aPrivInstr[0]; + oldJump[1] = pPatch->aPrivInstr[1]; + *(RTRCUINTPTR *)&oldJump[2] = displOld; + } + else +#endif + if (pPatch->cbPatchJump == SIZEOF_NEARJUMP32) + { + pJumpOffGC = pPatch->pPrivInstrGC + 1; //one byte opcode + oldJump[0] = 0xE9; + *(RTRCUINTPTR *)&oldJump[1] = displOld; + } + else + { + AssertMsgFailed(("Invalid patch jump size %d\n", pPatch->cbPatchJump)); + break; + } + Assert(pPatch->cbPatchJump <= sizeof(temp)); + + /* + * Read old patch jump and compare it to the one we previously installed + */ + int rc = PGMPhysSimpleReadGCPtr(VMMGetCpu0(pVM), temp, pPatch->pPrivInstrGC, pPatch->cbPatchJump); + Assert(RT_SUCCESS(rc) || rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT); + + if (rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT) + { + RTRCPTR pPage = pPatch->pPrivInstrGC & PAGE_BASE_GC_MASK; + rc = PGMR3HandlerVirtualRegister(pVM, VMMGetCpu(pVM), pVM->patm.s.hMonitorPageType, + pPage, + pPage + (PAGE_SIZE - 1) /* inclusive! */, + (void *)(uintptr_t)pPage, NIL_RTRCPTR /*pvUserRC*/, NULL /*pszDesc*/); + Assert(RT_SUCCESS(rc) || rc == VERR_PGM_HANDLER_VIRTUAL_CONFLICT); + } + else + if (memcmp(temp, oldJump, pPatch->cbPatchJump)) + { + Log(("PATM: Patch jump was overwritten -> disabling patch!!\n")); + /* + * Disable patch; this is not a good solution + */ + /** @todo hopefully it was completely overwritten (if the read was successful)!!!! */ + pPatch->uState = PATCH_DISABLED; + } + else + if (RT_SUCCESS(rc)) + { + rc = PGMPhysSimpleDirtyWriteGCPtr(VMMGetCpu0(pVM), pJumpOffGC, &displ, sizeof(displ)); + AssertRC(rc); + } + else + AssertMsgFailed(("Unexpected error %d from MMR3PhysReadGCVirt\n", rc)); + } + else + Log(("Skip the guest jump to patch code for this disabled patch %08X\n", pRec->pRelocPos)); + + pRec->pDest = pTarget; + break; + } + + case FIXUP_REL_JMPTOGUEST: + { + RTRCPTR pSource = (RTRCPTR)((RTRCINTPTR)pRec->pSource + delta); + RTRCINTPTR displ = (RTRCINTPTR)pRec->pDest - (RTRCINTPTR)pSource; + + Assert(!(pPatch->flags & PATMFL_GLOBAL_FUNCTIONS)); + Log(("Relative fixup (p2g) %08X -> %08X at %08X (source=%08x, target=%08x)\n", *(int32_t*)pRec->pRelocPos, displ, pRec->pRelocPos, pRec->pSource, pRec->pDest)); + *(RTRCUINTPTR *)pRec->pRelocPos = displ; + pRec->pSource = pSource; + break; + + } + } + return VINF_SUCCESS; +} + diff --git a/src/VBox/VMM/VMMR3/PDM.cpp b/src/VBox/VMM/VMMR3/PDM.cpp new file mode 100644 index 00000000..8dda83b0 --- /dev/null +++ b/src/VBox/VMM/VMMR3/PDM.cpp @@ -0,0 +1,2972 @@ +/* $Id: PDM.cpp $ */ +/** @file + * PDM - Pluggable Device Manager. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/** @page pg_pdm PDM - The Pluggable Device & Driver Manager + * + * The PDM handles devices and their drivers in a flexible and dynamic manner. + * + * VirtualBox is designed to be very configurable, i.e. the ability to select + * virtual devices and configure them uniquely for a VM. For this reason + * virtual devices are not statically linked with the VMM but loaded, linked and + * instantiated at runtime by PDM using the information found in the + * Configuration Manager (CFGM). + * + * While the chief purpose of PDM is to manager of devices their drivers, it + * also serves as somewhere to put usful things like cross context queues, cross + * context synchronization (like critsect), VM centric thread management, + * asynchronous I/O framework, and so on. + * + * @sa @ref grp_pdm + * @subpage pg_pdm_block_cache + * + * + * @section sec_pdm_dev The Pluggable Devices + * + * Devices register themselves when the module containing them is loaded. PDM + * will call the entry point 'VBoxDevicesRegister' when loading a device module. + * The device module will then use the supplied callback table to check the VMM + * version and to register its devices. Each device has an unique name (within + * the VM configuration anyway). The name is not only used in PDM, but also in + * CFGM to organize device and device instance settings, and by anyone who wants + * to talk to a specific device instance. + * + * When all device modules have been successfully loaded PDM will instantiate + * those devices which are configured for the VM. Note that a device may have + * more than one instance, take network adaptors as an example. When + * instantiating a device PDM provides device instance memory and a callback + * table (aka Device Helpers / DevHlp) with the VM APIs which the device + * instance is trusted with. + * + * Some devices are trusted devices, most are not. The trusted devices are an + * integrated part of the VM and can obtain the VM handle, thus enabling them to + * call any VM API. Untrusted devices can only use the callbacks provided + * during device instantiation. + * + * The main purpose in having DevHlps rather than just giving all the devices + * the VM handle and let them call the internal VM APIs directly, is both to + * create a binary interface that can be supported across releases and to + * create a barrier between devices and the VM. (The trusted / untrusted bit + * hasn't turned out to be of much use btw., but it's easy to maintain so there + * isn't any point in removing it.) + * + * A device can provide a ring-0 and/or a raw-mode context extension to improve + * the VM performance by handling exits and traps (respectively) without + * requiring context switches (to ring-3). Callbacks for MMIO and I/O ports + * need to be registered specifically for the additional contexts for this to + * make sense. Also, the device has to be trusted to be loaded into R0/RC + * because of the extra privilege it entails. Note that raw-mode code and data + * will be subject to relocation. + * + * + * @subsection sec_pdm_dev_pci PCI Devices + * + * A PDM device usually registers one a PCI device during it's instantiation, + * legacy devices may register zero, while a few (currently none) more + * complicated devices may register multiple PCI functions or devices. + * + * The bus, device and function assignments can either be done explictly via the + * configuration or the registration call, or it can be left up to the PCI bus. + * The typical VBox configuration construct (ConsoleImpl2.cpp) will do explict + * assignments for all devices it's BusAssignmentManager class knows about. + * + * For explict CFGM style configuration, the "PCIBusNo", "PCIDeviceNo", and + * "PCIFunctionNo" values in the PDM device instance configuration (not the + * "config" subkey, but the top level one) will be picked up for the primary PCI + * device. The primary PCI configuration is by default the first one, but this + * can be controlled using the @a idxDevCfg parameter of the + * PDMDEVHLPR3::pfnPCIRegister method. For subsequent configuration (@a + * idxDevCfg > 0) the values are taken from the "PciDevNN" subkey, where "NN" is + * replaced by the @a idxDevCfg value. + * + * There's currently a limit of 256 PCI devices per PDM device. + * + * + * @section sec_pdm_special_devs Special Devices + * + * Several kinds of devices interacts with the VMM and/or other device and PDM + * will work like a mediator for these. The typical pattern is that the device + * calls a special registration device helper with a set of callbacks, PDM + * responds by copying this and providing a pointer to a set helper callbacks + * for that particular kind of device. Unlike interfaces where the callback + * table pointer is used a 'this' pointer, these arrangements will use the + * device instance pointer (PPDMDEVINS) as a kind of 'this' pointer. + * + * For an example of this kind of setup, see the PIC. The PIC registers itself + * by calling PDMDEVHLPR3::pfnPICRegister. PDM saves the device instance, + * copies the callback tables (PDMPICREG), resolving the ring-0 and raw-mode + * addresses in the process, and hands back the pointer to a set of helper + * methods (PDMPICHLPR3). The PCI device then queries the ring-0 and raw-mode + * helpers using PDMPICHLPR3::pfnGetR0Helpers and PDMPICHLPR3::pfnGetRCHelpers. + * The PCI device repeats ths pfnGetRCHelpers call in it's relocation method + * since the address changes when RC is relocated. + * + * @see grp_pdm_device + * + * + * @section sec_pdm_usbdev The Pluggable USB Devices + * + * USB devices are handled a little bit differently than other devices. The + * general concepts wrt. pluggability are mostly the same, but the details + * varies. The registration entry point is 'VBoxUsbRegister', the device + * instance is PDMUSBINS and the callbacks helpers are different. Also, USB + * device are restricted to ring-3 and cannot have any ring-0 or raw-mode + * extensions (at least not yet). + * + * The way USB devices work differs greatly from other devices though since they + * aren't attaches directly to the PCI/ISA/whatever system buses but via a + * USB host control (OHCI, UHCI or EHCI). USB devices handle USB requests + * (URBs) and does not register I/O ports, MMIO ranges or PCI bus + * devices/functions. + * + * @see grp_pdm_usbdev + * + * + * @section sec_pdm_drv The Pluggable Drivers + * + * The VM devices are often accessing host hardware or OS facilities. For most + * devices these facilities can be abstracted in one or more levels. These + * abstractions are called drivers. + * + * For instance take a DVD/CD drive. This can be connected to a SCSI + * controller, an ATA controller or a SATA controller. The basics of the DVD/CD + * drive implementation remains the same - eject, insert, read, seek, and such. + * (For the scsi SCSCI, you might want to speak SCSI directly to, but that can of + * course be fixed - see SCSI passthru.) So, it + * makes much sense to have a generic CD/DVD driver which implements this. + * + * Then the media 'inserted' into the DVD/CD drive can be a ISO image, or it can + * be read from a real CD or DVD drive (there are probably other custom formats + * someone could desire to read or construct too). So, it would make sense to + * have abstracted interfaces for dealing with this in a generic way so the + * cdrom unit doesn't have to implement it all. Thus we have created the + * CDROM/DVD media driver family. + * + * So, for this example the IDE controller #1 (i.e. secondary) will have + * the DVD/CD Driver attached to it's LUN #0 (master). When a media is mounted + * the DVD/CD Driver will have a ISO, HostDVD or RAW (media) Driver attached. + * + * It is possible to configure many levels of drivers inserting filters, loggers, + * or whatever you desire into the chain. We're using this for network sniffing, + * for instance. + * + * The drivers are loaded in a similar manner to that of a device, namely by + * iterating a keyspace in CFGM, load the modules listed there and call + * 'VBoxDriversRegister' with a callback table. + * + * @see grp_pdm_driver + * + * + * @section sec_pdm_ifs Interfaces + * + * The pluggable drivers and devices expose one standard interface (callback + * table) which is used to construct, destruct, attach, detach,( ++,) and query + * other interfaces. A device will query the interfaces required for it's + * operation during init and hot-plug. PDM may query some interfaces during + * runtime mounting too. + * + * An interface here means a function table contained within the device or + * driver instance data. Its methods are invoked with the function table pointer + * as the first argument and they will calculate the address of the device or + * driver instance data from it. (This is one of the aspects which *might* have + * been better done in C++.) + * + * @see grp_pdm_interfaces + * + * + * @section sec_pdm_utils Utilities + * + * As mentioned earlier, PDM is the location of any usful constructs that doesn't + * quite fit into IPRT. The next subsections will discuss these. + * + * One thing these APIs all have in common is that resources will be associated + * with a device / driver and automatically freed after it has been destroyed if + * the destructor didn't do this. + * + * + * @subsection sec_pdm_async_completion Async I/O + * + * The PDM Async I/O API provides a somewhat platform agnostic interface for + * asynchronous I/O. For reasons of performance and complexity this does not + * build upon any IPRT API. + * + * @todo more details. + * + * @see grp_pdm_async_completion + * + * + * @subsection sec_pdm_async_task Async Task - not implemented + * + * @todo implement and describe + * + * @see grp_pdm_async_task + * + * + * @subsection sec_pdm_critsect Critical Section + * + * The PDM Critical Section API is currently building on the IPRT API with the + * same name. It adds the possibility to use critical sections in ring-0 and + * raw-mode as well as in ring-3. There are certain restrictions on the RC and + * R0 usage though since we're not able to wait on it, nor wake up anyone that + * is waiting on it. These restrictions origins with the use of a ring-3 event + * semaphore. In a later incarnation we plan to replace the ring-3 event + * semaphore with a ring-0 one, thus enabling us to wake up waiters while + * exectuing in ring-0 and making the hardware assisted execution mode more + * efficient. (Raw-mode won't benefit much from this, naturally.) + * + * @see grp_pdm_critsect + * + * + * @subsection sec_pdm_queue Queue + * + * The PDM Queue API is for queuing one or more tasks for later consumption in + * ring-3 by EMT, and optionally forcing a delayed or ASAP return to ring-3. The + * queues can also be run on a timer basis as an alternative to the ASAP thing. + * The queue will be flushed at forced action time. + * + * A queue can also be used by another thread (a I/O worker for instance) to + * send work / events over to the EMT. + * + * @see grp_pdm_queue + * + * + * @subsection sec_pdm_task Task - not implemented yet + * + * The PDM Task API is for flagging a task for execution at a later point when + * we're back in ring-3, optionally forcing the ring-3 return to happen ASAP. + * As you can see the concept is similar to queues only simpler. + * + * A task can also be scheduled by another thread (a I/O worker for instance) as + * a mean of getting something done in EMT. + * + * @see grp_pdm_task + * + * + * @subsection sec_pdm_thread Thread + * + * The PDM Thread API is there to help devices and drivers manage their threads + * correctly wrt. power on, suspend, resume, power off and destruction. + * + * The general usage pattern for threads in the employ of devices and drivers is + * that they shuffle data or requests while the VM is running and stop doing + * this when the VM is paused or powered down. Rogue threads running while the + * VM is paused can cause the state to change during saving or have other + * unwanted side effects. The PDM Threads API ensures that this won't happen. + * + * @see grp_pdm_thread + * + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PDM +#define PDMPCIDEV_INCLUDE_PRIVATE /* Hack to get pdmpcidevint.h included at the right point. */ +#include "PDMInternal.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** The PDM saved state version. */ +#define PDM_SAVED_STATE_VERSION 5 +/** Before the PDM audio architecture was introduced there was an "AudioSniffer" + * device which took care of multiplexing input/output audio data from/to various places. + * Thus this device is not needed/used anymore. */ +#define PDM_SAVED_STATE_VERSION_PRE_PDM_AUDIO 4 +#define PDM_SAVED_STATE_VERSION_PRE_NMI_FF 3 + +/** The number of nanoseconds a suspend callback needs to take before + * PDMR3Suspend warns about it taking too long. */ +#define PDMSUSPEND_WARN_AT_NS UINT64_C(1200000000) + +/** The number of nanoseconds a suspend callback needs to take before + * PDMR3PowerOff warns about it taking too long. */ +#define PDMPOWEROFF_WARN_AT_NS UINT64_C( 900000000) + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * Statistics of asynchronous notification tasks - used by reset, suspend and + * power off. + */ +typedef struct PDMNOTIFYASYNCSTATS +{ + /** The start timestamp. */ + uint64_t uStartNsTs; + /** When to log the next time. */ + uint64_t cNsElapsedNextLog; + /** The loop counter. */ + uint32_t cLoops; + /** The number of pending asynchronous notification tasks. */ + uint32_t cAsync; + /** The name of the operation (log prefix). */ + const char *pszOp; + /** The current list buffer position. */ + size_t offList; + /** String containing a list of the pending tasks. */ + char szList[1024]; +} PDMNOTIFYASYNCSTATS; +/** Pointer to the stats of pending asynchronous notification tasks. */ +typedef PDMNOTIFYASYNCSTATS *PPDMNOTIFYASYNCSTATS; + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static DECLCALLBACK(int) pdmR3LiveExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uPass); +static DECLCALLBACK(int) pdmR3SaveExec(PVM pVM, PSSMHANDLE pSSM); +static DECLCALLBACK(int) pdmR3LoadExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass); +static DECLCALLBACK(int) pdmR3LoadPrep(PVM pVM, PSSMHANDLE pSSM); + +static FNDBGFHANDLERINT pdmR3InfoTracingIds; + + +/** + * Initializes the PDM part of the UVM. + * + * This doesn't really do much right now but has to be here for the sake + * of completeness. + * + * @returns VBox status code. + * @param pUVM Pointer to the user mode VM structure. + */ +VMMR3_INT_DECL(int) PDMR3InitUVM(PUVM pUVM) +{ + AssertCompile(sizeof(pUVM->pdm.s) <= sizeof(pUVM->pdm.padding)); + AssertRelease(sizeof(pUVM->pdm.s) <= sizeof(pUVM->pdm.padding)); + pUVM->pdm.s.pModules = NULL; + pUVM->pdm.s.pCritSects = NULL; + pUVM->pdm.s.pRwCritSects = NULL; + return RTCritSectInit(&pUVM->pdm.s.ListCritSect); +} + + +/** + * Initializes the PDM. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) PDMR3Init(PVM pVM) +{ + LogFlow(("PDMR3Init\n")); + + /* + * Assert alignment and sizes. + */ + AssertRelease(!(RT_UOFFSETOF(VM, pdm.s) & 31)); + AssertRelease(sizeof(pVM->pdm.s) <= sizeof(pVM->pdm.padding)); + AssertCompileMemberAlignment(PDM, CritSect, sizeof(uintptr_t)); + + /* + * Init the structure. + */ + pVM->pdm.s.GCPhysVMMDevHeap = NIL_RTGCPHYS; + //pVM->pdm.s.idTracingDev = 0; + pVM->pdm.s.idTracingOther = 1024; + + /* + * Initialize critical sections first. + */ + int rc = pdmR3CritSectBothInitStats(pVM); + if (RT_SUCCESS(rc)) + rc = PDMR3CritSectInit(pVM, &pVM->pdm.s.CritSect, RT_SRC_POS, "PDM"); + if (RT_SUCCESS(rc)) + { + rc = PDMR3CritSectInit(pVM, &pVM->pdm.s.NopCritSect, RT_SRC_POS, "NOP"); + if (RT_SUCCESS(rc)) + pVM->pdm.s.NopCritSect.s.Core.fFlags |= RTCRITSECT_FLAGS_NOP; + } + + /* + * Initialize sub components. + */ + if (RT_SUCCESS(rc)) + rc = pdmR3LdrInitU(pVM->pUVM); +#ifdef VBOX_WITH_PDM_ASYNC_COMPLETION + if (RT_SUCCESS(rc)) + rc = pdmR3AsyncCompletionInit(pVM); +#endif +#ifdef VBOX_WITH_NETSHAPER + if (RT_SUCCESS(rc)) + rc = pdmR3NetShaperInit(pVM); +#endif + if (RT_SUCCESS(rc)) + rc = pdmR3BlkCacheInit(pVM); + if (RT_SUCCESS(rc)) + rc = pdmR3DrvInit(pVM); + if (RT_SUCCESS(rc)) + rc = pdmR3DevInit(pVM); + if (RT_SUCCESS(rc)) + { + /* + * Register the saved state data unit. + */ + rc = SSMR3RegisterInternal(pVM, "pdm", 1, PDM_SAVED_STATE_VERSION, 128, + NULL, pdmR3LiveExec, NULL, + NULL, pdmR3SaveExec, NULL, + pdmR3LoadPrep, pdmR3LoadExec, NULL); + if (RT_SUCCESS(rc)) + { + /* + * Register the info handlers. + */ + DBGFR3InfoRegisterInternal(pVM, "pdmtracingids", + "Displays the tracing IDs assigned by PDM to devices, USB device, drivers and more.", + pdmR3InfoTracingIds); + + LogFlow(("PDM: Successfully initialized\n")); + return rc; + } + } + + /* + * Cleanup and return failure. + */ + PDMR3Term(pVM); + LogFlow(("PDMR3Init: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Init phase completed callback. + * + * We use this for calling PDMDEVREG::pfnInitComplete callback after everything + * else has been initialized. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param enmWhat The phase that was completed. + */ +VMMR3_INT_DECL(int) PDMR3InitCompleted(PVM pVM, VMINITCOMPLETED enmWhat) +{ +#ifdef VBOX_WITH_RAW_MODE + if (enmWhat == VMINITCOMPLETED_RC) +#else + if (enmWhat == VMINITCOMPLETED_RING0) +#endif + return pdmR3DevInitComplete(pVM); + return VINF_SUCCESS; +} + + +/** + * Applies relocations to data and code managed by this + * component. This function will be called at init and + * whenever the VMM need to relocate it self inside the GC. + * + * @param pVM The cross context VM structure. + * @param offDelta Relocation delta relative to old location. + * @remark The loader subcomponent is relocated by PDMR3LdrRelocate() very + * early in the relocation phase. + */ +VMMR3_INT_DECL(void) PDMR3Relocate(PVM pVM, RTGCINTPTR offDelta) +{ + LogFlow(("PDMR3Relocate\n")); + + /* + * Queues. + */ + pdmR3QueueRelocate(pVM, offDelta); + pVM->pdm.s.pDevHlpQueueRC = PDMQueueRCPtr(pVM->pdm.s.pDevHlpQueueR3); + + /* + * Critical sections. + */ + pdmR3CritSectBothRelocate(pVM); + + /* + * The registered PIC. + */ + if (pVM->pdm.s.Pic.pDevInsRC) + { + pVM->pdm.s.Pic.pDevInsRC += offDelta; + pVM->pdm.s.Pic.pfnSetIrqRC += offDelta; + pVM->pdm.s.Pic.pfnGetInterruptRC += offDelta; + } + + /* + * The registered APIC. + */ + if (pVM->pdm.s.Apic.pDevInsRC) + pVM->pdm.s.Apic.pDevInsRC += offDelta; + + /* + * The registered I/O APIC. + */ + if (pVM->pdm.s.IoApic.pDevInsRC) + { + pVM->pdm.s.IoApic.pDevInsRC += offDelta; + pVM->pdm.s.IoApic.pfnSetIrqRC += offDelta; + if (pVM->pdm.s.IoApic.pfnSendMsiRC) + pVM->pdm.s.IoApic.pfnSendMsiRC += offDelta; + if (pVM->pdm.s.IoApic.pfnSetEoiRC) + pVM->pdm.s.IoApic.pfnSetEoiRC += offDelta; + } + + /* + * The register PCI Buses. + */ + for (unsigned i = 0; i < RT_ELEMENTS(pVM->pdm.s.aPciBuses); i++) + { + if (pVM->pdm.s.aPciBuses[i].pDevInsRC) + { + pVM->pdm.s.aPciBuses[i].pDevInsRC += offDelta; + pVM->pdm.s.aPciBuses[i].pfnSetIrqRC += offDelta; + } + } + + /* + * Devices & Drivers. + */ + int rc; + PCPDMDEVHLPRC pDevHlpRC = NIL_RTRCPTR; + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + rc = PDMR3LdrGetSymbolRC(pVM, NULL, "g_pdmRCDevHlp", &pDevHlpRC); + AssertReleaseMsgRC(rc, ("rc=%Rrc when resolving g_pdmRCDevHlp\n", rc)); + } + + PCPDMDRVHLPRC pDrvHlpRC = NIL_RTRCPTR; + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + rc = PDMR3LdrGetSymbolRC(pVM, NULL, "g_pdmRCDevHlp", &pDrvHlpRC); + AssertReleaseMsgRC(rc, ("rc=%Rrc when resolving g_pdmRCDevHlp\n", rc)); + } + + for (PPDMDEVINS pDevIns = pVM->pdm.s.pDevInstances; pDevIns; pDevIns = pDevIns->Internal.s.pNextR3) + { + if (pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_RC) + { + pDevIns->pHlpRC = pDevHlpRC; + pDevIns->pvInstanceDataRC = MMHyperR3ToRC(pVM, pDevIns->pvInstanceDataR3); + if (pDevIns->pCritSectRoR3) + pDevIns->pCritSectRoRC = MMHyperR3ToRC(pVM, pDevIns->pCritSectRoR3); + pDevIns->Internal.s.pVMRC = pVM->pVMRC; + + PPDMPCIDEV pPciDev = pDevIns->Internal.s.pHeadPciDevR3; + if (pPciDev) + { + pDevIns->Internal.s.pHeadPciDevRC = MMHyperR3ToRC(pVM, pPciDev); + do + { + pPciDev->Int.s.pDevInsRC = MMHyperR3ToRC(pVM, pPciDev->Int.s.pDevInsR3); + pPciDev->Int.s.pPdmBusRC = MMHyperR3ToRC(pVM, pPciDev->Int.s.pPdmBusR3); + if (pPciDev->Int.s.pNextR3) + pPciDev->Int.s.pNextRC = MMHyperR3ToRC(pVM, pPciDev->Int.s.pNextR3); + pPciDev = pPciDev->Int.s.pNextR3; + } while (pPciDev); + } + + if (pDevIns->pReg->pfnRelocate) + { + LogFlow(("PDMR3Relocate: Relocating device '%s'/%d\n", + pDevIns->pReg->szName, pDevIns->iInstance)); + pDevIns->pReg->pfnRelocate(pDevIns, offDelta); + } + } + + for (PPDMLUN pLun = pDevIns->Internal.s.pLunsR3; pLun; pLun = pLun->pNext) + { + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns; pDrvIns = pDrvIns->Internal.s.pDown) + { + if (pDrvIns->pReg->fFlags & PDM_DRVREG_FLAGS_RC) + { + pDrvIns->pHlpRC = pDrvHlpRC; + pDrvIns->pvInstanceDataRC = MMHyperR3ToRC(pVM, pDrvIns->pvInstanceDataR3); + pDrvIns->Internal.s.pVMRC = pVM->pVMRC; + if (pDrvIns->pReg->pfnRelocate) + { + LogFlow(("PDMR3Relocate: Relocating driver '%s'/%u attached to '%s'/%d/%u\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, + pDevIns->pReg->szName, pDevIns->iInstance, pLun->iLun)); + pDrvIns->pReg->pfnRelocate(pDrvIns, offDelta); + } + } + } + } + + } +} + + +/** + * Worker for pdmR3Term that terminates a LUN chain. + * + * @param pVM The cross context VM structure. + * @param pLun The head of the chain. + * @param pszDevice The name of the device (for logging). + * @param iInstance The device instance number (for logging). + */ +static void pdmR3TermLuns(PVM pVM, PPDMLUN pLun, const char *pszDevice, unsigned iInstance) +{ + RT_NOREF2(pszDevice, iInstance); + + for (; pLun; pLun = pLun->pNext) + { + /* + * Destroy them one at a time from the bottom up. + * (The serial device/drivers depends on this - bad.) + */ + PPDMDRVINS pDrvIns = pLun->pBottom; + pLun->pBottom = pLun->pTop = NULL; + while (pDrvIns) + { + PPDMDRVINS pDrvNext = pDrvIns->Internal.s.pUp; + + if (pDrvIns->pReg->pfnDestruct) + { + LogFlow(("pdmR3DevTerm: Destroying - driver '%s'/%d on LUN#%d of device '%s'/%d\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, pLun->iLun, pszDevice, iInstance)); + pDrvIns->pReg->pfnDestruct(pDrvIns); + } + pDrvIns->Internal.s.pDrv->cInstances--; + + /* Order of resource freeing like in pdmR3DrvDestroyChain, but + * not all need to be done as they are done globally later. */ + //PDMR3QueueDestroyDriver(pVM, pDrvIns); + TMR3TimerDestroyDriver(pVM, pDrvIns); + SSMR3DeregisterDriver(pVM, pDrvIns, NULL, 0); + //pdmR3ThreadDestroyDriver(pVM, pDrvIns); + //DBGFR3InfoDeregisterDriver(pVM, pDrvIns, NULL); + //pdmR3CritSectBothDeleteDriver(pVM, pDrvIns); + //PDMR3BlkCacheReleaseDriver(pVM, pDrvIns); +#ifdef VBOX_WITH_PDM_ASYNC_COMPLETION + //pdmR3AsyncCompletionTemplateDestroyDriver(pVM, pDrvIns); +#endif + + /* Clear the driver struture to catch sloppy code. */ + ASMMemFill32(pDrvIns, RT_UOFFSETOF_DYN(PDMDRVINS, achInstanceData[pDrvIns->pReg->cbInstance]), 0xdeadd0d0); + + pDrvIns = pDrvNext; + } + } +} + + +/** + * Terminates the PDM. + * + * Termination means cleaning up and freeing all resources, + * the VM it self is at this point powered off or suspended. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) PDMR3Term(PVM pVM) +{ + LogFlow(("PDMR3Term:\n")); + AssertMsg(PDMCritSectIsInitialized(&pVM->pdm.s.CritSect), ("bad init order!\n")); + + /* + * Iterate the device instances and attach drivers, doing + * relevant destruction processing. + * + * N.B. There is no need to mess around freeing memory allocated + * from any MM heap since MM will do that in its Term function. + */ + /* usb ones first. */ + for (PPDMUSBINS pUsbIns = pVM->pdm.s.pUsbInstances; pUsbIns; pUsbIns = pUsbIns->Internal.s.pNext) + { + pdmR3TermLuns(pVM, pUsbIns->Internal.s.pLuns, pUsbIns->pReg->szName, pUsbIns->iInstance); + + /* + * Detach it from the HUB (if it's actually attached to one) so the HUB has + * a chance to stop accessing any data. + */ + PPDMUSBHUB pHub = pUsbIns->Internal.s.pHub; + if (pHub) + { + int rc = pHub->Reg.pfnDetachDevice(pHub->pDrvIns, pUsbIns, pUsbIns->Internal.s.iPort); + if (RT_FAILURE(rc)) + { + LogRel(("PDM: Failed to detach USB device '%s' instance %d from %p: %Rrc\n", + pUsbIns->pReg->szName, pUsbIns->iInstance, pHub, rc)); + } + else + { + pHub->cAvailablePorts++; + Assert(pHub->cAvailablePorts > 0 && pHub->cAvailablePorts <= pHub->cPorts); + pUsbIns->Internal.s.pHub = NULL; + } + } + + if (pUsbIns->pReg->pfnDestruct) + { + LogFlow(("pdmR3DevTerm: Destroying - device '%s'/%d\n", + pUsbIns->pReg->szName, pUsbIns->iInstance)); + pUsbIns->pReg->pfnDestruct(pUsbIns); + } + + //TMR3TimerDestroyUsb(pVM, pUsbIns); + //SSMR3DeregisterUsb(pVM, pUsbIns, NULL, 0); + pdmR3ThreadDestroyUsb(pVM, pUsbIns); + } + + /* then the 'normal' ones. */ + for (PPDMDEVINS pDevIns = pVM->pdm.s.pDevInstances; pDevIns; pDevIns = pDevIns->Internal.s.pNextR3) + { + pdmR3TermLuns(pVM, pDevIns->Internal.s.pLunsR3, pDevIns->pReg->szName, pDevIns->iInstance); + + if (pDevIns->pReg->pfnDestruct) + { + LogFlow(("pdmR3DevTerm: Destroying - device '%s'/%d\n", + pDevIns->pReg->szName, pDevIns->iInstance)); + pDevIns->pReg->pfnDestruct(pDevIns); + } + + TMR3TimerDestroyDevice(pVM, pDevIns); + SSMR3DeregisterDevice(pVM, pDevIns, NULL, 0); + pdmR3CritSectBothDeleteDevice(pVM, pDevIns); + pdmR3ThreadDestroyDevice(pVM, pDevIns); + PDMR3QueueDestroyDevice(pVM, pDevIns); + PGMR3PhysMMIOExDeregister(pVM, pDevIns, UINT32_MAX, UINT32_MAX); +#ifdef VBOX_WITH_PDM_ASYNC_COMPLETION + pdmR3AsyncCompletionTemplateDestroyDevice(pVM, pDevIns); +#endif + DBGFR3InfoDeregisterDevice(pVM, pDevIns, NULL); + } + + /* + * Destroy all threads. + */ + pdmR3ThreadDestroyAll(pVM); + + /* + * Destroy the block cache. + */ + pdmR3BlkCacheTerm(pVM); + +#ifdef VBOX_WITH_NETSHAPER + /* + * Destroy network bandwidth groups. + */ + pdmR3NetShaperTerm(pVM); +#endif +#ifdef VBOX_WITH_PDM_ASYNC_COMPLETION + /* + * Free async completion managers. + */ + pdmR3AsyncCompletionTerm(pVM); +#endif + + /* + * Free modules. + */ + pdmR3LdrTermU(pVM->pUVM); + + /* + * Destroy the PDM lock. + */ + PDMR3CritSectDelete(&pVM->pdm.s.CritSect); + /* The MiscCritSect is deleted by PDMR3CritSectBothTerm later. */ + + LogFlow(("PDMR3Term: returns %Rrc\n", VINF_SUCCESS)); + return VINF_SUCCESS; +} + + +/** + * Terminates the PDM part of the UVM. + * + * This will unload any modules left behind. + * + * @param pUVM Pointer to the user mode VM structure. + */ +VMMR3_INT_DECL(void) PDMR3TermUVM(PUVM pUVM) +{ + /* + * In the normal cause of events we will now call pdmR3LdrTermU for + * the second time. In the case of init failure however, this might + * the first time, which is why we do it. + */ + pdmR3LdrTermU(pUVM); + + Assert(pUVM->pdm.s.pCritSects == NULL); + Assert(pUVM->pdm.s.pRwCritSects == NULL); + RTCritSectDelete(&pUVM->pdm.s.ListCritSect); +} + + +/** + * Bits that are saved in pass 0 and in the final pass. + * + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + */ +static void pdmR3SaveBoth(PVM pVM, PSSMHANDLE pSSM) +{ + /* + * Save the list of device instances so we can check that they're all still + * there when we load the state and that nothing new has been added. + */ + uint32_t i = 0; + for (PPDMDEVINS pDevIns = pVM->pdm.s.pDevInstances; pDevIns; pDevIns = pDevIns->Internal.s.pNextR3, i++) + { + SSMR3PutU32(pSSM, i); + SSMR3PutStrZ(pSSM, pDevIns->pReg->szName); + SSMR3PutU32(pSSM, pDevIns->iInstance); + } + SSMR3PutU32(pSSM, UINT32_MAX); /* terminator */ +} + + +/** + * Live save. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + * @param uPass The pass. + */ +static DECLCALLBACK(int) pdmR3LiveExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uPass) +{ + LogFlow(("pdmR3LiveExec:\n")); + AssertReturn(uPass == 0, VERR_SSM_UNEXPECTED_PASS); + pdmR3SaveBoth(pVM, pSSM); + return VINF_SSM_DONT_CALL_AGAIN; +} + + +/** + * Execute state save operation. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + */ +static DECLCALLBACK(int) pdmR3SaveExec(PVM pVM, PSSMHANDLE pSSM) +{ + LogFlow(("pdmR3SaveExec:\n")); + + /* + * Save interrupt and DMA states. + */ + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[idCpu]; + SSMR3PutU32(pSSM, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC)); + SSMR3PutU32(pSSM, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_PIC)); + SSMR3PutU32(pSSM, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NMI)); + SSMR3PutU32(pSSM, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_SMI)); + } + SSMR3PutU32(pSSM, VM_FF_IS_SET(pVM, VM_FF_PDM_DMA)); + + pdmR3SaveBoth(pVM, pSSM); + return VINF_SUCCESS; +} + + +/** + * Prepare state load operation. + * + * This will dispatch pending operations and clear the FFs governed by PDM and its devices. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM The SSM handle. + */ +static DECLCALLBACK(int) pdmR3LoadPrep(PVM pVM, PSSMHANDLE pSSM) +{ + LogFlow(("pdmR3LoadPrep: %s%s\n", + VM_FF_IS_SET(pVM, VM_FF_PDM_QUEUES) ? " VM_FF_PDM_QUEUES" : "", + VM_FF_IS_SET(pVM, VM_FF_PDM_DMA) ? " VM_FF_PDM_DMA" : "")); +#ifdef LOG_ENABLED + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[idCpu]; + LogFlow(("pdmR3LoadPrep: VCPU %u %s%s\n", idCpu, + VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC) ? " VMCPU_FF_INTERRUPT_APIC" : "", + VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_PIC) ? " VMCPU_FF_INTERRUPT_PIC" : "")); + } +#endif + NOREF(pSSM); + + /* + * In case there is work pending that will raise an interrupt, + * start a DMA transfer, or release a lock. (unlikely) + */ + if (VM_FF_IS_SET(pVM, VM_FF_PDM_QUEUES)) + PDMR3QueueFlushAll(pVM); + + /* Clear the FFs. */ + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[idCpu]; + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_APIC); + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_PIC); + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI); + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_SMI); + } + VM_FF_CLEAR(pVM, VM_FF_PDM_DMA); + + return VINF_SUCCESS; +} + + +/** + * Execute state load operation. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM SSM operation handle. + * @param uVersion Data layout version. + * @param uPass The data pass. + */ +static DECLCALLBACK(int) pdmR3LoadExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + int rc; + + LogFlow(("pdmR3LoadExec: uPass=%#x\n", uPass)); + + /* + * Validate version. + */ + if ( uVersion != PDM_SAVED_STATE_VERSION + && uVersion != PDM_SAVED_STATE_VERSION_PRE_NMI_FF + && uVersion != PDM_SAVED_STATE_VERSION_PRE_PDM_AUDIO) + { + AssertMsgFailed(("Invalid version uVersion=%d!\n", uVersion)); + return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION; + } + + if (uPass == SSM_PASS_FINAL) + { + /* + * Load the interrupt and DMA states. + */ + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[idCpu]; + + /* APIC interrupt */ + uint32_t fInterruptPending = 0; + rc = SSMR3GetU32(pSSM, &fInterruptPending); + if (RT_FAILURE(rc)) + return rc; + if (fInterruptPending & ~1) + { + AssertMsgFailed(("fInterruptPending=%#x (APIC)\n", fInterruptPending)); + return VERR_SSM_DATA_UNIT_FORMAT_CHANGED; + } + AssertRelease(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC)); + if (fInterruptPending) + VMCPU_FF_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC); + + /* PIC interrupt */ + fInterruptPending = 0; + rc = SSMR3GetU32(pSSM, &fInterruptPending); + if (RT_FAILURE(rc)) + return rc; + if (fInterruptPending & ~1) + { + AssertMsgFailed(("fInterruptPending=%#x (PIC)\n", fInterruptPending)); + return VERR_SSM_DATA_UNIT_FORMAT_CHANGED; + } + AssertRelease(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_PIC)); + if (fInterruptPending) + VMCPU_FF_SET(pVCpu, VMCPU_FF_INTERRUPT_PIC); + + if (uVersion > PDM_SAVED_STATE_VERSION_PRE_NMI_FF) + { + /* NMI interrupt */ + fInterruptPending = 0; + rc = SSMR3GetU32(pSSM, &fInterruptPending); + if (RT_FAILURE(rc)) + return rc; + if (fInterruptPending & ~1) + { + AssertMsgFailed(("fInterruptPending=%#x (NMI)\n", fInterruptPending)); + return VERR_SSM_DATA_UNIT_FORMAT_CHANGED; + } + AssertRelease(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NMI)); + if (fInterruptPending) + VMCPU_FF_SET(pVCpu, VMCPU_FF_INTERRUPT_NMI); + + /* SMI interrupt */ + fInterruptPending = 0; + rc = SSMR3GetU32(pSSM, &fInterruptPending); + if (RT_FAILURE(rc)) + return rc; + if (fInterruptPending & ~1) + { + AssertMsgFailed(("fInterruptPending=%#x (SMI)\n", fInterruptPending)); + return VERR_SSM_DATA_UNIT_FORMAT_CHANGED; + } + AssertRelease(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_SMI)); + if (fInterruptPending) + VMCPU_FF_SET(pVCpu, VMCPU_FF_INTERRUPT_SMI); + } + } + + /* DMA pending */ + uint32_t fDMAPending = 0; + rc = SSMR3GetU32(pSSM, &fDMAPending); + if (RT_FAILURE(rc)) + return rc; + if (fDMAPending & ~1) + { + AssertMsgFailed(("fDMAPending=%#x\n", fDMAPending)); + return VERR_SSM_DATA_UNIT_FORMAT_CHANGED; + } + if (fDMAPending) + VM_FF_SET(pVM, VM_FF_PDM_DMA); + Log(("pdmR3LoadExec: VM_FF_PDM_DMA=%RTbool\n", VM_FF_IS_SET(pVM, VM_FF_PDM_DMA))); + } + + /* + * Load the list of devices and verify that they are all there. + */ + for (PPDMDEVINS pDevIns = pVM->pdm.s.pDevInstances; pDevIns; pDevIns = pDevIns->Internal.s.pNextR3) + pDevIns->Internal.s.fIntFlags &= ~PDMDEVINSINT_FLAGS_FOUND; + + for (uint32_t i = 0; ; i++) + { + /* Get the sequence number / terminator. */ + uint32_t u32Sep; + rc = SSMR3GetU32(pSSM, &u32Sep); + if (RT_FAILURE(rc)) + return rc; + if (u32Sep == UINT32_MAX) + break; + if (u32Sep != i) + AssertMsgFailedReturn(("Out of sequence. u32Sep=%#x i=%#x\n", u32Sep, i), VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + + /* Get the name and instance number. */ + char szName[RT_SIZEOFMEMB(PDMDEVREG, szName)]; + rc = SSMR3GetStrZ(pSSM, szName, sizeof(szName)); + if (RT_FAILURE(rc)) + return rc; + uint32_t iInstance; + rc = SSMR3GetU32(pSSM, &iInstance); + if (RT_FAILURE(rc)) + return rc; + + /* Try locate it. */ + PPDMDEVINS pDevIns; + for (pDevIns = pVM->pdm.s.pDevInstances; pDevIns; pDevIns = pDevIns->Internal.s.pNextR3) + if ( !RTStrCmp(szName, pDevIns->pReg->szName) + && pDevIns->iInstance == iInstance) + { + AssertLogRelMsgReturn(!(pDevIns->Internal.s.fIntFlags & PDMDEVINSINT_FLAGS_FOUND), + ("%s/#%u\n", pDevIns->pReg->szName, pDevIns->iInstance), + VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + pDevIns->Internal.s.fIntFlags |= PDMDEVINSINT_FLAGS_FOUND; + break; + } + + if (!pDevIns) + { + bool fSkip = false; + + /* Skip the non-existing (deprecated) "AudioSniffer" device stored in the saved state. */ + if ( uVersion <= PDM_SAVED_STATE_VERSION_PRE_PDM_AUDIO + && !RTStrCmp(szName, "AudioSniffer")) + fSkip = true; + + if (!fSkip) + { + LogRel(("Device '%s'/%d not found in current config\n", szName, iInstance)); + if (SSMR3HandleGetAfter(pSSM) != SSMAFTER_DEBUG_IT) + return SSMR3SetCfgError(pSSM, RT_SRC_POS, N_("Device '%s'/%d not found in current config"), szName, iInstance); + } + } + } + + /* + * Check that no additional devices were configured. + */ + for (PPDMDEVINS pDevIns = pVM->pdm.s.pDevInstances; pDevIns; pDevIns = pDevIns->Internal.s.pNextR3) + if (!(pDevIns->Internal.s.fIntFlags & PDMDEVINSINT_FLAGS_FOUND)) + { + LogRel(("Device '%s'/%d not found in the saved state\n", pDevIns->pReg->szName, pDevIns->iInstance)); + if (SSMR3HandleGetAfter(pSSM) != SSMAFTER_DEBUG_IT) + return SSMR3SetCfgError(pSSM, RT_SRC_POS, N_("Device '%s'/%d not found in the saved state"), + pDevIns->pReg->szName, pDevIns->iInstance); + } + + return VINF_SUCCESS; +} + + +/** + * Worker for PDMR3PowerOn that deals with one driver. + * + * @param pDrvIns The driver instance. + * @param pszDevName The parent device name. + * @param iDevInstance The parent device instance number. + * @param iLun The parent LUN number. + */ +DECLINLINE(int) pdmR3PowerOnDrv(PPDMDRVINS pDrvIns, const char *pszDevName, uint32_t iDevInstance, uint32_t iLun) +{ + Assert(pDrvIns->Internal.s.fVMSuspended); + if (pDrvIns->pReg->pfnPowerOn) + { + LogFlow(("PDMR3PowerOn: Notifying - driver '%s'/%d on LUN#%d of device '%s'/%d\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, iLun, pszDevName, iDevInstance)); + int rc = VINF_SUCCESS; pDrvIns->pReg->pfnPowerOn(pDrvIns); + if (RT_FAILURE(rc)) + { + LogRel(("PDMR3PowerOn: Driver '%s'/%d on LUN#%d of device '%s'/%d -> %Rrc\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, iLun, pszDevName, iDevInstance, rc)); + return rc; + } + } + pDrvIns->Internal.s.fVMSuspended = false; + return VINF_SUCCESS; +} + + +/** + * Worker for PDMR3PowerOn that deals with one USB device instance. + * + * @returns VBox status code. + * @param pUsbIns The USB device instance. + */ +DECLINLINE(int) pdmR3PowerOnUsb(PPDMUSBINS pUsbIns) +{ + Assert(pUsbIns->Internal.s.fVMSuspended); + if (pUsbIns->pReg->pfnVMPowerOn) + { + LogFlow(("PDMR3PowerOn: Notifying - device '%s'/%d\n", pUsbIns->pReg->szName, pUsbIns->iInstance)); + int rc = VINF_SUCCESS; pUsbIns->pReg->pfnVMPowerOn(pUsbIns); + if (RT_FAILURE(rc)) + { + LogRel(("PDMR3PowerOn: Device '%s'/%d -> %Rrc\n", pUsbIns->pReg->szName, pUsbIns->iInstance, rc)); + return rc; + } + } + pUsbIns->Internal.s.fVMSuspended = false; + return VINF_SUCCESS; +} + + +/** + * Worker for PDMR3PowerOn that deals with one device instance. + * + * @returns VBox status code. + * @param pDevIns The device instance. + */ +DECLINLINE(int) pdmR3PowerOnDev(PPDMDEVINS pDevIns) +{ + Assert(pDevIns->Internal.s.fIntFlags & PDMDEVINSINT_FLAGS_SUSPENDED); + if (pDevIns->pReg->pfnPowerOn) + { + LogFlow(("PDMR3PowerOn: Notifying - device '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance)); + PDMCritSectEnter(pDevIns->pCritSectRoR3, VERR_IGNORED); + int rc = VINF_SUCCESS; pDevIns->pReg->pfnPowerOn(pDevIns); + PDMCritSectLeave(pDevIns->pCritSectRoR3); + if (RT_FAILURE(rc)) + { + LogRel(("PDMR3PowerOn: Device '%s'/%d -> %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; + } + } + pDevIns->Internal.s.fIntFlags &= ~PDMDEVINSINT_FLAGS_SUSPENDED; + return VINF_SUCCESS; +} + + +/** + * This function will notify all the devices and their + * attached drivers about the VM now being powered on. + * + * @param pVM The cross context VM structure. + */ +VMMR3DECL(void) PDMR3PowerOn(PVM pVM) +{ + LogFlow(("PDMR3PowerOn:\n")); + + /* + * Iterate thru the device instances and USB device instances, + * processing the drivers associated with those. + */ + int rc = VINF_SUCCESS; + for (PPDMDEVINS pDevIns = pVM->pdm.s.pDevInstances; pDevIns && RT_SUCCESS(rc); pDevIns = pDevIns->Internal.s.pNextR3) + { + for (PPDMLUN pLun = pDevIns->Internal.s.pLunsR3; pLun && RT_SUCCESS(rc); pLun = pLun->pNext) + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns && RT_SUCCESS(rc); pDrvIns = pDrvIns->Internal.s.pDown) + rc = pdmR3PowerOnDrv(pDrvIns, pDevIns->pReg->szName, pDevIns->iInstance, pLun->iLun); + if (RT_SUCCESS(rc)) + rc = pdmR3PowerOnDev(pDevIns); + } + +#ifdef VBOX_WITH_USB + for (PPDMUSBINS pUsbIns = pVM->pdm.s.pUsbInstances; pUsbIns && RT_SUCCESS(rc); pUsbIns = pUsbIns->Internal.s.pNext) + { + for (PPDMLUN pLun = pUsbIns->Internal.s.pLuns; pLun && RT_SUCCESS(rc); pLun = pLun->pNext) + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns && RT_SUCCESS(rc); pDrvIns = pDrvIns->Internal.s.pDown) + rc = pdmR3PowerOnDrv(pDrvIns, pUsbIns->pReg->szName, pUsbIns->iInstance, pLun->iLun); + if (RT_SUCCESS(rc)) + rc = pdmR3PowerOnUsb(pUsbIns); + } +#endif + +#ifdef VBOX_WITH_PDM_ASYNC_COMPLETION + pdmR3AsyncCompletionResume(pVM); +#endif + + /* + * Resume all threads. + */ + if (RT_SUCCESS(rc)) + pdmR3ThreadResumeAll(pVM); + + /* + * On failure, clean up via PDMR3Suspend. + */ + if (RT_FAILURE(rc)) + PDMR3Suspend(pVM); + + LogFlow(("PDMR3PowerOn: returns %Rrc\n", rc)); + return /*rc*/; +} + + +/** + * Initializes the asynchronous notifi stats structure. + * + * @param pThis The asynchronous notifification stats. + * @param pszOp The name of the operation. + */ +static void pdmR3NotifyAsyncInit(PPDMNOTIFYASYNCSTATS pThis, const char *pszOp) +{ + pThis->uStartNsTs = RTTimeNanoTS(); + pThis->cNsElapsedNextLog = 0; + pThis->cLoops = 0; + pThis->cAsync = 0; + pThis->pszOp = pszOp; + pThis->offList = 0; + pThis->szList[0] = '\0'; +} + + +/** + * Begin a new loop, prepares to gather new stats. + * + * @param pThis The asynchronous notifification stats. + */ +static void pdmR3NotifyAsyncBeginLoop(PPDMNOTIFYASYNCSTATS pThis) +{ + pThis->cLoops++; + pThis->cAsync = 0; + pThis->offList = 0; + pThis->szList[0] = '\0'; +} + + +/** + * Records a device or USB device with a pending asynchronous notification. + * + * @param pThis The asynchronous notifification stats. + * @param pszName The name of the thing. + * @param iInstance The instance number. + */ +static void pdmR3NotifyAsyncAdd(PPDMNOTIFYASYNCSTATS pThis, const char *pszName, uint32_t iInstance) +{ + pThis->cAsync++; + if (pThis->offList < sizeof(pThis->szList) - 4) + pThis->offList += RTStrPrintf(&pThis->szList[pThis->offList], sizeof(pThis->szList) - pThis->offList, + pThis->offList == 0 ? "%s/%u" : ", %s/%u", + pszName, iInstance); +} + + +/** + * Records the asynchronous completition of a reset, suspend or power off. + * + * @param pThis The asynchronous notifification stats. + * @param pszDrvName The driver name. + * @param iDrvInstance The driver instance number. + * @param pszDevName The device or USB device name. + * @param iDevInstance The device or USB device instance number. + * @param iLun The LUN. + */ +static void pdmR3NotifyAsyncAddDrv(PPDMNOTIFYASYNCSTATS pThis, const char *pszDrvName, uint32_t iDrvInstance, + const char *pszDevName, uint32_t iDevInstance, uint32_t iLun) +{ + pThis->cAsync++; + if (pThis->offList < sizeof(pThis->szList) - 8) + pThis->offList += RTStrPrintf(&pThis->szList[pThis->offList], sizeof(pThis->szList) - pThis->offList, + pThis->offList == 0 ? "%s/%u/%u/%s/%u" : ", %s/%u/%u/%s/%u", + pszDevName, iDevInstance, iLun, pszDrvName, iDrvInstance); +} + + +/** + * Log the stats. + * + * @param pThis The asynchronous notifification stats. + */ +static void pdmR3NotifyAsyncLog(PPDMNOTIFYASYNCSTATS pThis) +{ + /* + * Return if we shouldn't log at this point. + * We log with an internval increasing from 0 sec to 60 sec. + */ + if (!pThis->cAsync) + return; + + uint64_t cNsElapsed = RTTimeNanoTS() - pThis->uStartNsTs; + if (cNsElapsed < pThis->cNsElapsedNextLog) + return; + + if (pThis->cNsElapsedNextLog == 0) + pThis->cNsElapsedNextLog = RT_NS_1SEC; + else if (pThis->cNsElapsedNextLog >= RT_NS_1MIN / 2) + pThis->cNsElapsedNextLog = RT_NS_1MIN; + else + pThis->cNsElapsedNextLog *= 2; + + /* + * Do the logging. + */ + LogRel(("%s: after %5llu ms, %u loops: %u async tasks - %s\n", + pThis->pszOp, cNsElapsed / RT_NS_1MS, pThis->cLoops, pThis->cAsync, pThis->szList)); +} + + +/** + * Wait for events and process pending requests. + * + * @param pThis The asynchronous notifification stats. + * @param pVM The cross context VM structure. + */ +static void pdmR3NotifyAsyncWaitAndProcessRequests(PPDMNOTIFYASYNCSTATS pThis, PVM pVM) +{ + VM_ASSERT_EMT0(pVM); + int rc = VMR3AsyncPdmNotificationWaitU(&pVM->pUVM->aCpus[0]); + AssertReleaseMsg(rc == VINF_SUCCESS, ("%Rrc - %s - %s\n", rc, pThis->pszOp, pThis->szList)); + + rc = VMR3ReqProcessU(pVM->pUVM, VMCPUID_ANY, true /*fPriorityOnly*/); + AssertReleaseMsg(rc == VINF_SUCCESS, ("%Rrc - %s - %s\n", rc, pThis->pszOp, pThis->szList)); + rc = VMR3ReqProcessU(pVM->pUVM, 0/*idDstCpu*/, true /*fPriorityOnly*/); + AssertReleaseMsg(rc == VINF_SUCCESS, ("%Rrc - %s - %s\n", rc, pThis->pszOp, pThis->szList)); +} + + +/** + * Worker for PDMR3Reset that deals with one driver. + * + * @param pDrvIns The driver instance. + * @param pAsync The structure for recording asynchronous + * notification tasks. + * @param pszDevName The parent device name. + * @param iDevInstance The parent device instance number. + * @param iLun The parent LUN number. + */ +DECLINLINE(bool) pdmR3ResetDrv(PPDMDRVINS pDrvIns, PPDMNOTIFYASYNCSTATS pAsync, + const char *pszDevName, uint32_t iDevInstance, uint32_t iLun) +{ + if (!pDrvIns->Internal.s.fVMReset) + { + pDrvIns->Internal.s.fVMReset = true; + if (pDrvIns->pReg->pfnReset) + { + if (!pDrvIns->Internal.s.pfnAsyncNotify) + { + LogFlow(("PDMR3Reset: Notifying - driver '%s'/%d on LUN#%d of device '%s'/%d\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, iLun, pszDevName, iDevInstance)); + pDrvIns->pReg->pfnReset(pDrvIns); + if (pDrvIns->Internal.s.pfnAsyncNotify) + LogFlow(("PDMR3Reset: Async notification started - driver '%s'/%d on LUN#%d of device '%s'/%d\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, iLun, pszDevName, iDevInstance)); + } + else if (pDrvIns->Internal.s.pfnAsyncNotify(pDrvIns)) + { + LogFlow(("PDMR3Reset: Async notification completed - driver '%s'/%d on LUN#%d of device '%s'/%d\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, iLun, pszDevName, iDevInstance)); + pDrvIns->Internal.s.pfnAsyncNotify = NULL; + } + if (pDrvIns->Internal.s.pfnAsyncNotify) + { + pDrvIns->Internal.s.fVMReset = false; + pdmR3NotifyAsyncAddDrv(pAsync, pDrvIns->Internal.s.pDrv->pReg->szName, pDrvIns->iInstance, + pszDevName, iDevInstance, iLun); + return false; + } + } + } + return true; +} + + +/** + * Worker for PDMR3Reset that deals with one USB device instance. + * + * @param pUsbIns The USB device instance. + * @param pAsync The structure for recording asynchronous + * notification tasks. + */ +DECLINLINE(void) pdmR3ResetUsb(PPDMUSBINS pUsbIns, PPDMNOTIFYASYNCSTATS pAsync) +{ + if (!pUsbIns->Internal.s.fVMReset) + { + pUsbIns->Internal.s.fVMReset = true; + if (pUsbIns->pReg->pfnVMReset) + { + if (!pUsbIns->Internal.s.pfnAsyncNotify) + { + LogFlow(("PDMR3Reset: Notifying - device '%s'/%d\n", pUsbIns->pReg->szName, pUsbIns->iInstance)); + pUsbIns->pReg->pfnVMReset(pUsbIns); + if (pUsbIns->Internal.s.pfnAsyncNotify) + LogFlow(("PDMR3Reset: Async notification started - device '%s'/%d\n", pUsbIns->pReg->szName, pUsbIns->iInstance)); + } + else if (pUsbIns->Internal.s.pfnAsyncNotify(pUsbIns)) + { + LogFlow(("PDMR3Reset: Async notification completed - device '%s'/%d\n", pUsbIns->pReg->szName, pUsbIns->iInstance)); + pUsbIns->Internal.s.pfnAsyncNotify = NULL; + } + if (pUsbIns->Internal.s.pfnAsyncNotify) + { + pUsbIns->Internal.s.fVMReset = false; + pdmR3NotifyAsyncAdd(pAsync, pUsbIns->Internal.s.pUsbDev->pReg->szName, pUsbIns->iInstance); + } + } + } +} + + +/** + * Worker for PDMR3Reset that deals with one device instance. + * + * @param pDevIns The device instance. + * @param pAsync The structure for recording asynchronous + * notification tasks. + */ +DECLINLINE(void) pdmR3ResetDev(PPDMDEVINS pDevIns, PPDMNOTIFYASYNCSTATS pAsync) +{ + if (!(pDevIns->Internal.s.fIntFlags & PDMDEVINSINT_FLAGS_RESET)) + { + pDevIns->Internal.s.fIntFlags |= PDMDEVINSINT_FLAGS_RESET; + if (pDevIns->pReg->pfnReset) + { + uint64_t cNsElapsed = RTTimeNanoTS(); + PDMCritSectEnter(pDevIns->pCritSectRoR3, VERR_IGNORED); + + if (!pDevIns->Internal.s.pfnAsyncNotify) + { + LogFlow(("PDMR3Reset: Notifying - device '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance)); + pDevIns->pReg->pfnReset(pDevIns); + if (pDevIns->Internal.s.pfnAsyncNotify) + LogFlow(("PDMR3Reset: Async notification started - device '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance)); + } + else if (pDevIns->Internal.s.pfnAsyncNotify(pDevIns)) + { + LogFlow(("PDMR3Reset: Async notification completed - device '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance)); + pDevIns->Internal.s.pfnAsyncNotify = NULL; + } + if (pDevIns->Internal.s.pfnAsyncNotify) + { + pDevIns->Internal.s.fIntFlags &= ~PDMDEVINSINT_FLAGS_RESET; + pdmR3NotifyAsyncAdd(pAsync, pDevIns->Internal.s.pDevR3->pReg->szName, pDevIns->iInstance); + } + + PDMCritSectLeave(pDevIns->pCritSectRoR3); + cNsElapsed = RTTimeNanoTS() - cNsElapsed; + if (cNsElapsed >= PDMSUSPEND_WARN_AT_NS) + LogRel(("PDMR3Reset: Device '%s'/%d took %'llu ns to reset\n", + pDevIns->pReg->szName, pDevIns->iInstance, cNsElapsed)); + } + } +} + + +/** + * Resets a virtual CPU. + * + * Used by PDMR3Reset and CPU hot plugging. + * + * @param pVCpu The cross context virtual CPU structure. + */ +VMMR3_INT_DECL(void) PDMR3ResetCpu(PVMCPU pVCpu) +{ + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_APIC); + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_PIC); + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI); + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_SMI); +} + + +/** + * This function will notify all the devices and their attached drivers about + * the VM now being reset. + * + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(void) PDMR3Reset(PVM pVM) +{ + LogFlow(("PDMR3Reset:\n")); + + /* + * Clear all the reset flags. + */ + for (PPDMDEVINS pDevIns = pVM->pdm.s.pDevInstances; pDevIns; pDevIns = pDevIns->Internal.s.pNextR3) + { + pDevIns->Internal.s.fIntFlags &= ~PDMDEVINSINT_FLAGS_RESET; + for (PPDMLUN pLun = pDevIns->Internal.s.pLunsR3; pLun; pLun = pLun->pNext) + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns; pDrvIns = pDrvIns->Internal.s.pDown) + pDrvIns->Internal.s.fVMReset = false; + } +#ifdef VBOX_WITH_USB + for (PPDMUSBINS pUsbIns = pVM->pdm.s.pUsbInstances; pUsbIns; pUsbIns = pUsbIns->Internal.s.pNext) + { + pUsbIns->Internal.s.fVMReset = false; + for (PPDMLUN pLun = pUsbIns->Internal.s.pLuns; pLun; pLun = pLun->pNext) + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns; pDrvIns = pDrvIns->Internal.s.pDown) + pDrvIns->Internal.s.fVMReset = false; + } +#endif + + /* + * The outer loop repeats until there are no more async requests. + */ + PDMNOTIFYASYNCSTATS Async; + pdmR3NotifyAsyncInit(&Async, "PDMR3Reset"); + for (;;) + { + pdmR3NotifyAsyncBeginLoop(&Async); + + /* + * Iterate thru the device instances and USB device instances, + * processing the drivers associated with those. + */ + for (PPDMDEVINS pDevIns = pVM->pdm.s.pDevInstances; pDevIns; pDevIns = pDevIns->Internal.s.pNextR3) + { + unsigned const cAsyncStart = Async.cAsync; + + if (pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_FIRST_RESET_NOTIFICATION) + pdmR3ResetDev(pDevIns, &Async); + + if (Async.cAsync == cAsyncStart) + for (PPDMLUN pLun = pDevIns->Internal.s.pLunsR3; pLun; pLun = pLun->pNext) + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns; pDrvIns = pDrvIns->Internal.s.pDown) + if (!pdmR3ResetDrv(pDrvIns, &Async, pDevIns->pReg->szName, pDevIns->iInstance, pLun->iLun)) + break; + + if ( Async.cAsync == cAsyncStart + && !(pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_FIRST_RESET_NOTIFICATION)) + pdmR3ResetDev(pDevIns, &Async); + } + +#ifdef VBOX_WITH_USB + for (PPDMUSBINS pUsbIns = pVM->pdm.s.pUsbInstances; pUsbIns; pUsbIns = pUsbIns->Internal.s.pNext) + { + unsigned const cAsyncStart = Async.cAsync; + + for (PPDMLUN pLun = pUsbIns->Internal.s.pLuns; pLun; pLun = pLun->pNext) + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns; pDrvIns = pDrvIns->Internal.s.pDown) + if (!pdmR3ResetDrv(pDrvIns, &Async, pUsbIns->pReg->szName, pUsbIns->iInstance, pLun->iLun)) + break; + + if (Async.cAsync == cAsyncStart) + pdmR3ResetUsb(pUsbIns, &Async); + } +#endif + if (!Async.cAsync) + break; + pdmR3NotifyAsyncLog(&Async); + pdmR3NotifyAsyncWaitAndProcessRequests(&Async, pVM); + } + + /* + * Clear all pending interrupts and DMA operations. + */ + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + PDMR3ResetCpu(&pVM->aCpus[idCpu]); + VM_FF_CLEAR(pVM, VM_FF_PDM_DMA); + + LogFlow(("PDMR3Reset: returns void\n")); +} + + +/** + * This function will tell all the devices to setup up their memory structures + * after VM construction and after VM reset. + * + * @param pVM The cross context VM structure. + * @param fAtReset Indicates the context, after reset if @c true or after + * construction if @c false. + */ +VMMR3_INT_DECL(void) PDMR3MemSetup(PVM pVM, bool fAtReset) +{ + LogFlow(("PDMR3MemSetup: fAtReset=%RTbool\n", fAtReset)); + PDMDEVMEMSETUPCTX const enmCtx = fAtReset ? PDMDEVMEMSETUPCTX_AFTER_RESET : PDMDEVMEMSETUPCTX_AFTER_CONSTRUCTION; + + /* + * Iterate thru the device instances and work the callback. + */ + for (PPDMDEVINS pDevIns = pVM->pdm.s.pDevInstances; pDevIns; pDevIns = pDevIns->Internal.s.pNextR3) + if (pDevIns->pReg->pfnMemSetup) + { + PDMCritSectEnter(pDevIns->pCritSectRoR3, VERR_IGNORED); + pDevIns->pReg->pfnMemSetup(pDevIns, enmCtx); + PDMCritSectLeave(pDevIns->pCritSectRoR3); + } + + LogFlow(("PDMR3MemSetup: returns void\n")); +} + + +/** + * Retrieves and resets the info left behind by PDMDevHlpVMReset. + * + * @returns True if hard reset, false if soft reset. + * @param pVM The cross context VM structure. + * @param fOverride If non-zero, the override flags will be used instead + * of the reset flags kept by PDM. (For triple faults.) + * @param pfResetFlags Where to return the reset flags (PDMVMRESET_F_XXX). + * @thread EMT + */ +VMMR3_INT_DECL(bool) PDMR3GetResetInfo(PVM pVM, uint32_t fOverride, uint32_t *pfResetFlags) +{ + VM_ASSERT_EMT(pVM); + + /* + * Get the reset flags. + */ + uint32_t fResetFlags; + fResetFlags = ASMAtomicXchgU32(&pVM->pdm.s.fResetFlags, 0); + if (fOverride) + fResetFlags = fOverride; + *pfResetFlags = fResetFlags; + + /* + * To try avoid trouble, we never ever do soft/warm resets on SMP systems + * with more than CPU #0 active. However, if only one CPU is active we + * will ask the firmware what it wants us to do (because the firmware may + * depend on the VMM doing a lot of what is normally its responsibility, + * like clearing memory). + */ + bool fOtherCpusActive = false; + VMCPUID iCpu = pVM->cCpus; + while (iCpu-- > 1) + { + EMSTATE enmState = EMGetState(&pVM->aCpus[iCpu]); + if ( enmState != EMSTATE_WAIT_SIPI + && enmState != EMSTATE_NONE) + { + fOtherCpusActive = true; + break; + } + } + + bool fHardReset = fOtherCpusActive + || (fResetFlags & PDMVMRESET_F_SRC_MASK) < PDMVMRESET_F_LAST_ALWAYS_HARD + || !pVM->pdm.s.pFirmware + || pVM->pdm.s.pFirmware->Reg.pfnIsHardReset(pVM->pdm.s.pFirmware->pDevIns, fResetFlags); + + Log(("PDMR3GetResetInfo: returns fHardReset=%RTbool fResetFlags=%#x\n", fHardReset, fResetFlags)); + return fHardReset; +} + + +/** + * Performs a soft reset of devices. + * + * @param pVM The cross context VM structure. + * @param fResetFlags PDMVMRESET_F_XXX. + */ +VMMR3_INT_DECL(void) PDMR3SoftReset(PVM pVM, uint32_t fResetFlags) +{ + LogFlow(("PDMR3SoftReset: fResetFlags=%#x\n", fResetFlags)); + + /* + * Iterate thru the device instances and work the callback. + */ + for (PPDMDEVINS pDevIns = pVM->pdm.s.pDevInstances; pDevIns; pDevIns = pDevIns->Internal.s.pNextR3) + if (pDevIns->pReg->pfnSoftReset) + { + PDMCritSectEnter(pDevIns->pCritSectRoR3, VERR_IGNORED); + pDevIns->pReg->pfnSoftReset(pDevIns, fResetFlags); + PDMCritSectLeave(pDevIns->pCritSectRoR3); + } + + LogFlow(("PDMR3SoftReset: returns void\n")); +} + + +/** + * Worker for PDMR3Suspend that deals with one driver. + * + * @param pDrvIns The driver instance. + * @param pAsync The structure for recording asynchronous + * notification tasks. + * @param pszDevName The parent device name. + * @param iDevInstance The parent device instance number. + * @param iLun The parent LUN number. + */ +DECLINLINE(bool) pdmR3SuspendDrv(PPDMDRVINS pDrvIns, PPDMNOTIFYASYNCSTATS pAsync, + const char *pszDevName, uint32_t iDevInstance, uint32_t iLun) +{ + if (!pDrvIns->Internal.s.fVMSuspended) + { + pDrvIns->Internal.s.fVMSuspended = true; + if (pDrvIns->pReg->pfnSuspend) + { + uint64_t cNsElapsed = RTTimeNanoTS(); + + if (!pDrvIns->Internal.s.pfnAsyncNotify) + { + LogFlow(("PDMR3Suspend: Notifying - driver '%s'/%d on LUN#%d of device '%s'/%d\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, iLun, pszDevName, iDevInstance)); + pDrvIns->pReg->pfnSuspend(pDrvIns); + if (pDrvIns->Internal.s.pfnAsyncNotify) + LogFlow(("PDMR3Suspend: Async notification started - driver '%s'/%d on LUN#%d of device '%s'/%d\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, iLun, pszDevName, iDevInstance)); + } + else if (pDrvIns->Internal.s.pfnAsyncNotify(pDrvIns)) + { + LogFlow(("PDMR3Suspend: Async notification completed - driver '%s'/%d on LUN#%d of device '%s'/%d\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, iLun, pszDevName, iDevInstance)); + pDrvIns->Internal.s.pfnAsyncNotify = NULL; + } + + cNsElapsed = RTTimeNanoTS() - cNsElapsed; + if (cNsElapsed >= PDMSUSPEND_WARN_AT_NS) + LogRel(("PDMR3Suspend: Driver '%s'/%d on LUN#%d of device '%s'/%d took %'llu ns to suspend\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, iLun, pszDevName, iDevInstance, cNsElapsed)); + + if (pDrvIns->Internal.s.pfnAsyncNotify) + { + pDrvIns->Internal.s.fVMSuspended = false; + pdmR3NotifyAsyncAddDrv(pAsync, pDrvIns->Internal.s.pDrv->pReg->szName, pDrvIns->iInstance, pszDevName, iDevInstance, iLun); + return false; + } + } + } + return true; +} + + +/** + * Worker for PDMR3Suspend that deals with one USB device instance. + * + * @param pUsbIns The USB device instance. + * @param pAsync The structure for recording asynchronous + * notification tasks. + */ +DECLINLINE(void) pdmR3SuspendUsb(PPDMUSBINS pUsbIns, PPDMNOTIFYASYNCSTATS pAsync) +{ + if (!pUsbIns->Internal.s.fVMSuspended) + { + pUsbIns->Internal.s.fVMSuspended = true; + if (pUsbIns->pReg->pfnVMSuspend) + { + uint64_t cNsElapsed = RTTimeNanoTS(); + + if (!pUsbIns->Internal.s.pfnAsyncNotify) + { + LogFlow(("PDMR3Suspend: Notifying - USB device '%s'/%d\n", pUsbIns->pReg->szName, pUsbIns->iInstance)); + pUsbIns->pReg->pfnVMSuspend(pUsbIns); + if (pUsbIns->Internal.s.pfnAsyncNotify) + LogFlow(("PDMR3Suspend: Async notification started - USB device '%s'/%d\n", pUsbIns->pReg->szName, pUsbIns->iInstance)); + } + else if (pUsbIns->Internal.s.pfnAsyncNotify(pUsbIns)) + { + LogFlow(("PDMR3Suspend: Async notification completed - USB device '%s'/%d\n", pUsbIns->pReg->szName, pUsbIns->iInstance)); + pUsbIns->Internal.s.pfnAsyncNotify = NULL; + } + if (pUsbIns->Internal.s.pfnAsyncNotify) + { + pUsbIns->Internal.s.fVMSuspended = false; + pdmR3NotifyAsyncAdd(pAsync, pUsbIns->Internal.s.pUsbDev->pReg->szName, pUsbIns->iInstance); + } + + cNsElapsed = RTTimeNanoTS() - cNsElapsed; + if (cNsElapsed >= PDMSUSPEND_WARN_AT_NS) + LogRel(("PDMR3Suspend: USB device '%s'/%d took %'llu ns to suspend\n", + pUsbIns->pReg->szName, pUsbIns->iInstance, cNsElapsed)); + } + } +} + + +/** + * Worker for PDMR3Suspend that deals with one device instance. + * + * @param pDevIns The device instance. + * @param pAsync The structure for recording asynchronous + * notification tasks. + */ +DECLINLINE(void) pdmR3SuspendDev(PPDMDEVINS pDevIns, PPDMNOTIFYASYNCSTATS pAsync) +{ + if (!(pDevIns->Internal.s.fIntFlags & PDMDEVINSINT_FLAGS_SUSPENDED)) + { + pDevIns->Internal.s.fIntFlags |= PDMDEVINSINT_FLAGS_SUSPENDED; + if (pDevIns->pReg->pfnSuspend) + { + uint64_t cNsElapsed = RTTimeNanoTS(); + PDMCritSectEnter(pDevIns->pCritSectRoR3, VERR_IGNORED); + + if (!pDevIns->Internal.s.pfnAsyncNotify) + { + LogFlow(("PDMR3Suspend: Notifying - device '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance)); + pDevIns->pReg->pfnSuspend(pDevIns); + if (pDevIns->Internal.s.pfnAsyncNotify) + LogFlow(("PDMR3Suspend: Async notification started - device '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance)); + } + else if (pDevIns->Internal.s.pfnAsyncNotify(pDevIns)) + { + LogFlow(("PDMR3Suspend: Async notification completed - device '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance)); + pDevIns->Internal.s.pfnAsyncNotify = NULL; + } + if (pDevIns->Internal.s.pfnAsyncNotify) + { + pDevIns->Internal.s.fIntFlags &= ~PDMDEVINSINT_FLAGS_SUSPENDED; + pdmR3NotifyAsyncAdd(pAsync, pDevIns->Internal.s.pDevR3->pReg->szName, pDevIns->iInstance); + } + + PDMCritSectLeave(pDevIns->pCritSectRoR3); + cNsElapsed = RTTimeNanoTS() - cNsElapsed; + if (cNsElapsed >= PDMSUSPEND_WARN_AT_NS) + LogRel(("PDMR3Suspend: Device '%s'/%d took %'llu ns to suspend\n", + pDevIns->pReg->szName, pDevIns->iInstance, cNsElapsed)); + } + } +} + + +/** + * This function will notify all the devices and their attached drivers about + * the VM now being suspended. + * + * @param pVM The cross context VM structure. + * @thread EMT(0) + */ +VMMR3_INT_DECL(void) PDMR3Suspend(PVM pVM) +{ + LogFlow(("PDMR3Suspend:\n")); + VM_ASSERT_EMT0(pVM); + uint64_t cNsElapsed = RTTimeNanoTS(); + + /* + * The outer loop repeats until there are no more async requests. + * + * Note! We depend on the suspended indicators to be in the desired state + * and we do not reset them before starting because this allows + * PDMR3PowerOn and PDMR3Resume to use PDMR3Suspend for cleaning up + * on failure. + */ + PDMNOTIFYASYNCSTATS Async; + pdmR3NotifyAsyncInit(&Async, "PDMR3Suspend"); + for (;;) + { + pdmR3NotifyAsyncBeginLoop(&Async); + + /* + * Iterate thru the device instances and USB device instances, + * processing the drivers associated with those. + * + * The attached drivers are normally processed first. Some devices + * (like DevAHCI) though needs to be notified before the drivers so + * that it doesn't kick off any new requests after the drivers stopped + * taking any. (DrvVD changes to read-only in this particular case.) + */ + for (PPDMDEVINS pDevIns = pVM->pdm.s.pDevInstances; pDevIns; pDevIns = pDevIns->Internal.s.pNextR3) + { + unsigned const cAsyncStart = Async.cAsync; + + if (pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_FIRST_SUSPEND_NOTIFICATION) + pdmR3SuspendDev(pDevIns, &Async); + + if (Async.cAsync == cAsyncStart) + for (PPDMLUN pLun = pDevIns->Internal.s.pLunsR3; pLun; pLun = pLun->pNext) + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns; pDrvIns = pDrvIns->Internal.s.pDown) + if (!pdmR3SuspendDrv(pDrvIns, &Async, pDevIns->pReg->szName, pDevIns->iInstance, pLun->iLun)) + break; + + if ( Async.cAsync == cAsyncStart + && !(pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_FIRST_SUSPEND_NOTIFICATION)) + pdmR3SuspendDev(pDevIns, &Async); + } + +#ifdef VBOX_WITH_USB + for (PPDMUSBINS pUsbIns = pVM->pdm.s.pUsbInstances; pUsbIns; pUsbIns = pUsbIns->Internal.s.pNext) + { + unsigned const cAsyncStart = Async.cAsync; + + for (PPDMLUN pLun = pUsbIns->Internal.s.pLuns; pLun; pLun = pLun->pNext) + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns; pDrvIns = pDrvIns->Internal.s.pDown) + if (!pdmR3SuspendDrv(pDrvIns, &Async, pUsbIns->pReg->szName, pUsbIns->iInstance, pLun->iLun)) + break; + + if (Async.cAsync == cAsyncStart) + pdmR3SuspendUsb(pUsbIns, &Async); + } +#endif + if (!Async.cAsync) + break; + pdmR3NotifyAsyncLog(&Async); + pdmR3NotifyAsyncWaitAndProcessRequests(&Async, pVM); + } + + /* + * Suspend all threads. + */ + pdmR3ThreadSuspendAll(pVM); + + cNsElapsed = RTTimeNanoTS() - cNsElapsed; + LogRel(("PDMR3Suspend: %'llu ns run time\n", cNsElapsed)); +} + + +/** + * Worker for PDMR3Resume that deals with one driver. + * + * @param pDrvIns The driver instance. + * @param pszDevName The parent device name. + * @param iDevInstance The parent device instance number. + * @param iLun The parent LUN number. + */ +DECLINLINE(int) pdmR3ResumeDrv(PPDMDRVINS pDrvIns, const char *pszDevName, uint32_t iDevInstance, uint32_t iLun) +{ + Assert(pDrvIns->Internal.s.fVMSuspended); + if (pDrvIns->pReg->pfnResume) + { + LogFlow(("PDMR3Resume: Notifying - driver '%s'/%d on LUN#%d of device '%s'/%d\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, iLun, pszDevName, iDevInstance)); + int rc = VINF_SUCCESS; pDrvIns->pReg->pfnResume(pDrvIns); + if (RT_FAILURE(rc)) + { + LogRel(("PDMR3Resume: Driver '%s'/%d on LUN#%d of device '%s'/%d -> %Rrc\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, iLun, pszDevName, iDevInstance, rc)); + return rc; + } + } + pDrvIns->Internal.s.fVMSuspended = false; + return VINF_SUCCESS; +} + + +/** + * Worker for PDMR3Resume that deals with one USB device instance. + * + * @returns VBox status code. + * @param pUsbIns The USB device instance. + */ +DECLINLINE(int) pdmR3ResumeUsb(PPDMUSBINS pUsbIns) +{ + Assert(pUsbIns->Internal.s.fVMSuspended); + if (pUsbIns->pReg->pfnVMResume) + { + LogFlow(("PDMR3Resume: Notifying - device '%s'/%d\n", pUsbIns->pReg->szName, pUsbIns->iInstance)); + int rc = VINF_SUCCESS; pUsbIns->pReg->pfnVMResume(pUsbIns); + if (RT_FAILURE(rc)) + { + LogRel(("PDMR3Resume: Device '%s'/%d -> %Rrc\n", pUsbIns->pReg->szName, pUsbIns->iInstance, rc)); + return rc; + } + } + pUsbIns->Internal.s.fVMSuspended = false; + return VINF_SUCCESS; +} + + +/** + * Worker for PDMR3Resume that deals with one device instance. + * + * @returns VBox status code. + * @param pDevIns The device instance. + */ +DECLINLINE(int) pdmR3ResumeDev(PPDMDEVINS pDevIns) +{ + Assert(pDevIns->Internal.s.fIntFlags & PDMDEVINSINT_FLAGS_SUSPENDED); + if (pDevIns->pReg->pfnResume) + { + LogFlow(("PDMR3Resume: Notifying - device '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance)); + PDMCritSectEnter(pDevIns->pCritSectRoR3, VERR_IGNORED); + int rc = VINF_SUCCESS; pDevIns->pReg->pfnResume(pDevIns); + PDMCritSectLeave(pDevIns->pCritSectRoR3); + if (RT_FAILURE(rc)) + { + LogRel(("PDMR3Resume: Device '%s'/%d -> %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; + } + } + pDevIns->Internal.s.fIntFlags &= ~PDMDEVINSINT_FLAGS_SUSPENDED; + return VINF_SUCCESS; +} + + +/** + * This function will notify all the devices and their + * attached drivers about the VM now being resumed. + * + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(void) PDMR3Resume(PVM pVM) +{ + LogFlow(("PDMR3Resume:\n")); + + /* + * Iterate thru the device instances and USB device instances, + * processing the drivers associated with those. + */ + int rc = VINF_SUCCESS; + for (PPDMDEVINS pDevIns = pVM->pdm.s.pDevInstances; pDevIns && RT_SUCCESS(rc); pDevIns = pDevIns->Internal.s.pNextR3) + { + for (PPDMLUN pLun = pDevIns->Internal.s.pLunsR3; pLun && RT_SUCCESS(rc); pLun = pLun->pNext) + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns && RT_SUCCESS(rc); pDrvIns = pDrvIns->Internal.s.pDown) + rc = pdmR3ResumeDrv(pDrvIns, pDevIns->pReg->szName, pDevIns->iInstance, pLun->iLun); + if (RT_SUCCESS(rc)) + rc = pdmR3ResumeDev(pDevIns); + } + +#ifdef VBOX_WITH_USB + for (PPDMUSBINS pUsbIns = pVM->pdm.s.pUsbInstances; pUsbIns && RT_SUCCESS(rc); pUsbIns = pUsbIns->Internal.s.pNext) + { + for (PPDMLUN pLun = pUsbIns->Internal.s.pLuns; pLun && RT_SUCCESS(rc); pLun = pLun->pNext) + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns && RT_SUCCESS(rc); pDrvIns = pDrvIns->Internal.s.pDown) + rc = pdmR3ResumeDrv(pDrvIns, pUsbIns->pReg->szName, pUsbIns->iInstance, pLun->iLun); + if (RT_SUCCESS(rc)) + rc = pdmR3ResumeUsb(pUsbIns); + } +#endif + + /* + * Resume all threads. + */ + if (RT_SUCCESS(rc)) + pdmR3ThreadResumeAll(pVM); + + /* + * Resume the block cache. + */ + if (RT_SUCCESS(rc)) + pdmR3BlkCacheResume(pVM); + + /* + * On failure, clean up via PDMR3Suspend. + */ + if (RT_FAILURE(rc)) + PDMR3Suspend(pVM); + + LogFlow(("PDMR3Resume: returns %Rrc\n", rc)); + return /*rc*/; +} + + +/** + * Worker for PDMR3PowerOff that deals with one driver. + * + * @param pDrvIns The driver instance. + * @param pAsync The structure for recording asynchronous + * notification tasks. + * @param pszDevName The parent device name. + * @param iDevInstance The parent device instance number. + * @param iLun The parent LUN number. + */ +DECLINLINE(bool) pdmR3PowerOffDrv(PPDMDRVINS pDrvIns, PPDMNOTIFYASYNCSTATS pAsync, + const char *pszDevName, uint32_t iDevInstance, uint32_t iLun) +{ + if (!pDrvIns->Internal.s.fVMSuspended) + { + pDrvIns->Internal.s.fVMSuspended = true; + if (pDrvIns->pReg->pfnPowerOff) + { + uint64_t cNsElapsed = RTTimeNanoTS(); + + if (!pDrvIns->Internal.s.pfnAsyncNotify) + { + LogFlow(("PDMR3PowerOff: Notifying - driver '%s'/%d on LUN#%d of device '%s'/%d\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, iLun, pszDevName, iDevInstance)); + pDrvIns->pReg->pfnPowerOff(pDrvIns); + if (pDrvIns->Internal.s.pfnAsyncNotify) + LogFlow(("PDMR3PowerOff: Async notification started - driver '%s'/%d on LUN#%d of device '%s'/%d\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, iLun, pszDevName, iDevInstance)); + } + else if (pDrvIns->Internal.s.pfnAsyncNotify(pDrvIns)) + { + LogFlow(("PDMR3PowerOff: Async notification completed - driver '%s'/%d on LUN#%d of device '%s'/%d\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, iLun, pszDevName, iDevInstance)); + pDrvIns->Internal.s.pfnAsyncNotify = NULL; + } + + cNsElapsed = RTTimeNanoTS() - cNsElapsed; + if (cNsElapsed >= PDMPOWEROFF_WARN_AT_NS) + LogRel(("PDMR3PowerOff: Driver '%s'/%d on LUN#%d of device '%s'/%d took %'llu ns to power off\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, iLun, pszDevName, iDevInstance, cNsElapsed)); + + if (pDrvIns->Internal.s.pfnAsyncNotify) + { + pDrvIns->Internal.s.fVMSuspended = false; + pdmR3NotifyAsyncAddDrv(pAsync, pDrvIns->Internal.s.pDrv->pReg->szName, pDrvIns->iInstance, + pszDevName, iDevInstance, iLun); + return false; + } + } + } + return true; +} + + +/** + * Worker for PDMR3PowerOff that deals with one USB device instance. + * + * @param pUsbIns The USB device instance. + * @param pAsync The structure for recording asynchronous + * notification tasks. + */ +DECLINLINE(void) pdmR3PowerOffUsb(PPDMUSBINS pUsbIns, PPDMNOTIFYASYNCSTATS pAsync) +{ + if (!pUsbIns->Internal.s.fVMSuspended) + { + pUsbIns->Internal.s.fVMSuspended = true; + if (pUsbIns->pReg->pfnVMPowerOff) + { + uint64_t cNsElapsed = RTTimeNanoTS(); + + if (!pUsbIns->Internal.s.pfnAsyncNotify) + { + LogFlow(("PDMR3PowerOff: Notifying - USB device '%s'/%d\n", pUsbIns->pReg->szName, pUsbIns->iInstance)); + pUsbIns->pReg->pfnVMPowerOff(pUsbIns); + if (pUsbIns->Internal.s.pfnAsyncNotify) + LogFlow(("PDMR3PowerOff: Async notification started - USB device '%s'/%d\n", pUsbIns->pReg->szName, pUsbIns->iInstance)); + } + else if (pUsbIns->Internal.s.pfnAsyncNotify(pUsbIns)) + { + LogFlow(("PDMR3PowerOff: Async notification completed - USB device '%s'/%d\n", pUsbIns->pReg->szName, pUsbIns->iInstance)); + pUsbIns->Internal.s.pfnAsyncNotify = NULL; + } + if (pUsbIns->Internal.s.pfnAsyncNotify) + { + pUsbIns->Internal.s.fVMSuspended = false; + pdmR3NotifyAsyncAdd(pAsync, pUsbIns->Internal.s.pUsbDev->pReg->szName, pUsbIns->iInstance); + } + + cNsElapsed = RTTimeNanoTS() - cNsElapsed; + if (cNsElapsed >= PDMPOWEROFF_WARN_AT_NS) + LogRel(("PDMR3PowerOff: USB device '%s'/%d took %'llu ns to power off\n", + pUsbIns->pReg->szName, pUsbIns->iInstance, cNsElapsed)); + + } + } +} + + +/** + * Worker for PDMR3PowerOff that deals with one device instance. + * + * @param pDevIns The device instance. + * @param pAsync The structure for recording asynchronous + * notification tasks. + */ +DECLINLINE(void) pdmR3PowerOffDev(PPDMDEVINS pDevIns, PPDMNOTIFYASYNCSTATS pAsync) +{ + if (!(pDevIns->Internal.s.fIntFlags & PDMDEVINSINT_FLAGS_SUSPENDED)) + { + pDevIns->Internal.s.fIntFlags |= PDMDEVINSINT_FLAGS_SUSPENDED; + if (pDevIns->pReg->pfnPowerOff) + { + uint64_t cNsElapsed = RTTimeNanoTS(); + PDMCritSectEnter(pDevIns->pCritSectRoR3, VERR_IGNORED); + + if (!pDevIns->Internal.s.pfnAsyncNotify) + { + LogFlow(("PDMR3PowerOff: Notifying - device '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance)); + pDevIns->pReg->pfnPowerOff(pDevIns); + if (pDevIns->Internal.s.pfnAsyncNotify) + LogFlow(("PDMR3PowerOff: Async notification started - device '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance)); + } + else if (pDevIns->Internal.s.pfnAsyncNotify(pDevIns)) + { + LogFlow(("PDMR3PowerOff: Async notification completed - device '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance)); + pDevIns->Internal.s.pfnAsyncNotify = NULL; + } + if (pDevIns->Internal.s.pfnAsyncNotify) + { + pDevIns->Internal.s.fIntFlags &= ~PDMDEVINSINT_FLAGS_SUSPENDED; + pdmR3NotifyAsyncAdd(pAsync, pDevIns->Internal.s.pDevR3->pReg->szName, pDevIns->iInstance); + } + + PDMCritSectLeave(pDevIns->pCritSectRoR3); + cNsElapsed = RTTimeNanoTS() - cNsElapsed; + if (cNsElapsed >= PDMPOWEROFF_WARN_AT_NS) + LogFlow(("PDMR3PowerOff: Device '%s'/%d took %'llu ns to power off\n", + pDevIns->pReg->szName, pDevIns->iInstance, cNsElapsed)); + } + } +} + + +/** + * This function will notify all the devices and their + * attached drivers about the VM being powered off. + * + * @param pVM The cross context VM structure. + */ +VMMR3DECL(void) PDMR3PowerOff(PVM pVM) +{ + LogFlow(("PDMR3PowerOff:\n")); + uint64_t cNsElapsed = RTTimeNanoTS(); + + /* + * Clear the suspended flags on all devices and drivers first because they + * might have been set during a suspend but the power off callbacks should + * be called in any case. + */ + for (PPDMDEVINS pDevIns = pVM->pdm.s.pDevInstances; pDevIns; pDevIns = pDevIns->Internal.s.pNextR3) + { + pDevIns->Internal.s.fIntFlags &= ~PDMDEVINSINT_FLAGS_SUSPENDED; + + for (PPDMLUN pLun = pDevIns->Internal.s.pLunsR3; pLun; pLun = pLun->pNext) + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns; pDrvIns = pDrvIns->Internal.s.pDown) + pDrvIns->Internal.s.fVMSuspended = false; + } + +#ifdef VBOX_WITH_USB + for (PPDMUSBINS pUsbIns = pVM->pdm.s.pUsbInstances; pUsbIns; pUsbIns = pUsbIns->Internal.s.pNext) + { + pUsbIns->Internal.s.fVMSuspended = false; + + for (PPDMLUN pLun = pUsbIns->Internal.s.pLuns; pLun; pLun = pLun->pNext) + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns; pDrvIns = pDrvIns->Internal.s.pDown) + pDrvIns->Internal.s.fVMSuspended = false; + } +#endif + + /* + * The outer loop repeats until there are no more async requests. + */ + PDMNOTIFYASYNCSTATS Async; + pdmR3NotifyAsyncInit(&Async, "PDMR3PowerOff"); + for (;;) + { + pdmR3NotifyAsyncBeginLoop(&Async); + + /* + * Iterate thru the device instances and USB device instances, + * processing the drivers associated with those. + * + * The attached drivers are normally processed first. Some devices + * (like DevAHCI) though needs to be notified before the drivers so + * that it doesn't kick off any new requests after the drivers stopped + * taking any. (DrvVD changes to read-only in this particular case.) + */ + for (PPDMDEVINS pDevIns = pVM->pdm.s.pDevInstances; pDevIns; pDevIns = pDevIns->Internal.s.pNextR3) + { + unsigned const cAsyncStart = Async.cAsync; + + if (pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_FIRST_POWEROFF_NOTIFICATION) + pdmR3PowerOffDev(pDevIns, &Async); + + if (Async.cAsync == cAsyncStart) + for (PPDMLUN pLun = pDevIns->Internal.s.pLunsR3; pLun; pLun = pLun->pNext) + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns; pDrvIns = pDrvIns->Internal.s.pDown) + if (!pdmR3PowerOffDrv(pDrvIns, &Async, pDevIns->pReg->szName, pDevIns->iInstance, pLun->iLun)) + break; + + if ( Async.cAsync == cAsyncStart + && !(pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_FIRST_POWEROFF_NOTIFICATION)) + pdmR3PowerOffDev(pDevIns, &Async); + } + +#ifdef VBOX_WITH_USB + for (PPDMUSBINS pUsbIns = pVM->pdm.s.pUsbInstances; pUsbIns; pUsbIns = pUsbIns->Internal.s.pNext) + { + unsigned const cAsyncStart = Async.cAsync; + + for (PPDMLUN pLun = pUsbIns->Internal.s.pLuns; pLun; pLun = pLun->pNext) + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns; pDrvIns = pDrvIns->Internal.s.pDown) + if (!pdmR3PowerOffDrv(pDrvIns, &Async, pUsbIns->pReg->szName, pUsbIns->iInstance, pLun->iLun)) + break; + + if (Async.cAsync == cAsyncStart) + pdmR3PowerOffUsb(pUsbIns, &Async); + } +#endif + if (!Async.cAsync) + break; + pdmR3NotifyAsyncLog(&Async); + pdmR3NotifyAsyncWaitAndProcessRequests(&Async, pVM); + } + + /* + * Suspend all threads. + */ + pdmR3ThreadSuspendAll(pVM); + + cNsElapsed = RTTimeNanoTS() - cNsElapsed; + LogRel(("PDMR3PowerOff: %'llu ns run time\n", cNsElapsed)); +} + + +/** + * Queries the base interface of a device instance. + * + * The caller can use this to query other interfaces the device implements + * and use them to talk to the device. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszDevice Device name. + * @param iInstance Device instance. + * @param ppBase Where to store the pointer to the base device interface on success. + * @remark We're not doing any locking ATM, so don't try call this at times when the + * device chain is known to be updated. + */ +VMMR3DECL(int) PDMR3QueryDevice(PUVM pUVM, const char *pszDevice, unsigned iInstance, PPDMIBASE *ppBase) +{ + LogFlow(("PDMR3DeviceQuery: pszDevice=%p:{%s} iInstance=%u ppBase=%p\n", pszDevice, pszDevice, iInstance, ppBase)); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, VERR_INVALID_VM_HANDLE); + + /* + * Iterate registered devices looking for the device. + */ + size_t cchDevice = strlen(pszDevice); + for (PPDMDEV pDev = pUVM->pVM->pdm.s.pDevs; pDev; pDev = pDev->pNext) + { + if ( pDev->cchName == cchDevice + && !memcmp(pDev->pReg->szName, pszDevice, cchDevice)) + { + /* + * Iterate device instances. + */ + for (PPDMDEVINS pDevIns = pDev->pInstances; pDevIns; pDevIns = pDevIns->Internal.s.pPerDeviceNextR3) + { + if (pDevIns->iInstance == iInstance) + { + if (pDevIns->IBase.pfnQueryInterface) + { + *ppBase = &pDevIns->IBase; + LogFlow(("PDMR3DeviceQuery: return VINF_SUCCESS and *ppBase=%p\n", *ppBase)); + return VINF_SUCCESS; + } + + LogFlow(("PDMR3DeviceQuery: returns VERR_PDM_DEVICE_INSTANCE_NO_IBASE\n")); + return VERR_PDM_DEVICE_INSTANCE_NO_IBASE; + } + } + + LogFlow(("PDMR3DeviceQuery: returns VERR_PDM_DEVICE_INSTANCE_NOT_FOUND\n")); + return VERR_PDM_DEVICE_INSTANCE_NOT_FOUND; + } + } + + LogFlow(("PDMR3QueryDevice: returns VERR_PDM_DEVICE_NOT_FOUND\n")); + return VERR_PDM_DEVICE_NOT_FOUND; +} + + +/** + * Queries the base interface of a device LUN. + * + * This differs from PDMR3QueryLun by that it returns the interface on the + * device and not the top level driver. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszDevice Device name. + * @param iInstance Device instance. + * @param iLun The Logical Unit to obtain the interface of. + * @param ppBase Where to store the base interface pointer. + * @remark We're not doing any locking ATM, so don't try call this at times when the + * device chain is known to be updated. + */ +VMMR3DECL(int) PDMR3QueryDeviceLun(PUVM pUVM, const char *pszDevice, unsigned iInstance, unsigned iLun, PPDMIBASE *ppBase) +{ + LogFlow(("PDMR3QueryDeviceLun: pszDevice=%p:{%s} iInstance=%u iLun=%u ppBase=%p\n", + pszDevice, pszDevice, iInstance, iLun, ppBase)); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, VERR_INVALID_VM_HANDLE); + + /* + * Find the LUN. + */ + PPDMLUN pLun; + int rc = pdmR3DevFindLun(pUVM->pVM, pszDevice, iInstance, iLun, &pLun); + if (RT_SUCCESS(rc)) + { + *ppBase = pLun->pBase; + LogFlow(("PDMR3QueryDeviceLun: return VINF_SUCCESS and *ppBase=%p\n", *ppBase)); + return VINF_SUCCESS; + } + LogFlow(("PDMR3QueryDeviceLun: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Query the interface of the top level driver on a LUN. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszDevice Device name. + * @param iInstance Device instance. + * @param iLun The Logical Unit to obtain the interface of. + * @param ppBase Where to store the base interface pointer. + * @remark We're not doing any locking ATM, so don't try call this at times when the + * device chain is known to be updated. + */ +VMMR3DECL(int) PDMR3QueryLun(PUVM pUVM, const char *pszDevice, unsigned iInstance, unsigned iLun, PPDMIBASE *ppBase) +{ + LogFlow(("PDMR3QueryLun: pszDevice=%p:{%s} iInstance=%u iLun=%u ppBase=%p\n", + pszDevice, pszDevice, iInstance, iLun, ppBase)); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + /* + * Find the LUN. + */ + PPDMLUN pLun; + int rc = pdmR3DevFindLun(pVM, pszDevice, iInstance, iLun, &pLun); + if (RT_SUCCESS(rc)) + { + if (pLun->pTop) + { + *ppBase = &pLun->pTop->IBase; + LogFlow(("PDMR3QueryLun: return %Rrc and *ppBase=%p\n", VINF_SUCCESS, *ppBase)); + return VINF_SUCCESS; + } + rc = VERR_PDM_NO_DRIVER_ATTACHED_TO_LUN; + } + LogFlow(("PDMR3QueryLun: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Query the interface of a named driver on a LUN. + * + * If the driver appears more than once in the driver chain, the first instance + * is returned. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszDevice Device name. + * @param iInstance Device instance. + * @param iLun The Logical Unit to obtain the interface of. + * @param pszDriver The driver name. + * @param ppBase Where to store the base interface pointer. + * + * @remark We're not doing any locking ATM, so don't try call this at times when the + * device chain is known to be updated. + */ +VMMR3DECL(int) PDMR3QueryDriverOnLun(PUVM pUVM, const char *pszDevice, unsigned iInstance, unsigned iLun, const char *pszDriver, PPPDMIBASE ppBase) +{ + LogFlow(("PDMR3QueryDriverOnLun: pszDevice=%p:{%s} iInstance=%u iLun=%u pszDriver=%p:{%s} ppBase=%p\n", + pszDevice, pszDevice, iInstance, iLun, pszDriver, pszDriver, ppBase)); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, VERR_INVALID_VM_HANDLE); + + /* + * Find the LUN. + */ + PPDMLUN pLun; + int rc = pdmR3DevFindLun(pUVM->pVM, pszDevice, iInstance, iLun, &pLun); + if (RT_SUCCESS(rc)) + { + if (pLun->pTop) + { + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns; pDrvIns = pDrvIns->Internal.s.pDown) + if (!strcmp(pDrvIns->pReg->szName, pszDriver)) + { + *ppBase = &pDrvIns->IBase; + LogFlow(("PDMR3QueryDriverOnLun: return %Rrc and *ppBase=%p\n", VINF_SUCCESS, *ppBase)); + return VINF_SUCCESS; + + } + rc = VERR_PDM_DRIVER_NOT_FOUND; + } + else + rc = VERR_PDM_NO_DRIVER_ATTACHED_TO_LUN; + } + LogFlow(("PDMR3QueryDriverOnLun: returns %Rrc\n", rc)); + return rc; +} + +/** + * Executes pending DMA transfers. + * Forced Action handler. + * + * @param pVM The cross context VM structure. + */ +VMMR3DECL(void) PDMR3DmaRun(PVM pVM) +{ + /* Note! Not really SMP safe; restrict it to VCPU 0. */ + if (VMMGetCpuId(pVM) != 0) + return; + + if (VM_FF_TEST_AND_CLEAR(pVM, VM_FF_PDM_DMA)) + { + if (pVM->pdm.s.pDmac) + { + bool fMore = pVM->pdm.s.pDmac->Reg.pfnRun(pVM->pdm.s.pDmac->pDevIns); + if (fMore) + VM_FF_SET(pVM, VM_FF_PDM_DMA); + } + } +} + + +/** + * Service a VMMCALLRING3_PDM_LOCK call. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) PDMR3LockCall(PVM pVM) +{ + return PDMR3CritSectEnterEx(&pVM->pdm.s.CritSect, true /* fHostCall */); +} + + +/** + * Allocates memory from the VMM device heap. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param cbSize Allocation size. + * @param pfnNotify Mapping/unmapping notification callback. + * @param ppv Ring-3 pointer. (out) + */ +VMMR3_INT_DECL(int) PDMR3VmmDevHeapAlloc(PVM pVM, size_t cbSize, PFNPDMVMMDEVHEAPNOTIFY pfnNotify, RTR3PTR *ppv) +{ +#ifdef DEBUG_bird + if (!cbSize || cbSize > pVM->pdm.s.cbVMMDevHeapLeft) + return VERR_NO_MEMORY; +#else + AssertReturn(cbSize && cbSize <= pVM->pdm.s.cbVMMDevHeapLeft, VERR_NO_MEMORY); +#endif + + Log(("PDMR3VMMDevHeapAlloc: %#zx\n", cbSize)); + + /** @todo Not a real heap as there's currently only one user. */ + *ppv = pVM->pdm.s.pvVMMDevHeap; + pVM->pdm.s.cbVMMDevHeapLeft = 0; + pVM->pdm.s.pfnVMMDevHeapNotify = pfnNotify; + return VINF_SUCCESS; +} + + +/** + * Frees memory from the VMM device heap + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pv Ring-3 pointer. + */ +VMMR3_INT_DECL(int) PDMR3VmmDevHeapFree(PVM pVM, RTR3PTR pv) +{ + Log(("PDMR3VmmDevHeapFree: %RHv\n", pv)); RT_NOREF_PV(pv); + + /** @todo not a real heap as there's currently only one user. */ + pVM->pdm.s.cbVMMDevHeapLeft = pVM->pdm.s.cbVMMDevHeap; + pVM->pdm.s.pfnVMMDevHeapNotify = NULL; + return VINF_SUCCESS; +} + + +/** + * Worker for DBGFR3TraceConfig that checks if the given tracing group name + * matches a device or driver name and applies the tracing config change. + * + * @returns VINF_SUCCESS or VERR_NOT_FOUND. + * @param pVM The cross context VM structure. + * @param pszName The tracing config group name. This is NULL if + * the operation applies to every device and + * driver. + * @param cchName The length to match. + * @param fEnable Whether to enable or disable the corresponding + * trace points. + * @param fApply Whether to actually apply the changes or just do + * existence checks. + */ +VMMR3_INT_DECL(int) PDMR3TracingConfig(PVM pVM, const char *pszName, size_t cchName, bool fEnable, bool fApply) +{ + /** @todo This code is potentially racing driver attaching and detaching. */ + + /* + * Applies to all. + */ + if (pszName == NULL) + { + AssertReturn(fApply, VINF_SUCCESS); + + for (PPDMDEVINS pDevIns = pVM->pdm.s.pDevInstances; pDevIns; pDevIns = pDevIns->Internal.s.pNextR3) + { + pDevIns->fTracing = fEnable; + for (PPDMLUN pLun = pDevIns->Internal.s.pLunsR3; pLun; pLun = pLun->pNext) + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns; pDrvIns = pDrvIns->Internal.s.pDown) + pDrvIns->fTracing = fEnable; + } + +#ifdef VBOX_WITH_USB + for (PPDMUSBINS pUsbIns = pVM->pdm.s.pUsbInstances; pUsbIns; pUsbIns = pUsbIns->Internal.s.pNext) + { + pUsbIns->fTracing = fEnable; + for (PPDMLUN pLun = pUsbIns->Internal.s.pLuns; pLun; pLun = pLun->pNext) + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns; pDrvIns = pDrvIns->Internal.s.pDown) + pDrvIns->fTracing = fEnable; + + } +#endif + return VINF_SUCCESS; + } + + /* + * Specific devices, USB devices or drivers. + * Decode prefix to figure which of these it applies to. + */ + if (cchName <= 3) + return VERR_NOT_FOUND; + + uint32_t cMatches = 0; + if (!strncmp("dev", pszName, 3)) + { + for (PPDMDEVINS pDevIns = pVM->pdm.s.pDevInstances; pDevIns; pDevIns = pDevIns->Internal.s.pNextR3) + { + const char *pszDevName = pDevIns->Internal.s.pDevR3->pReg->szName; + size_t cchDevName = strlen(pszDevName); + if ( ( cchDevName == cchName + && RTStrNICmp(pszName, pszDevName, cchDevName)) + || ( cchDevName == cchName - 3 + && RTStrNICmp(pszName + 3, pszDevName, cchDevName)) ) + { + cMatches++; + if (fApply) + pDevIns->fTracing = fEnable; + } + } + } + else if (!strncmp("usb", pszName, 3)) + { + for (PPDMUSBINS pUsbIns = pVM->pdm.s.pUsbInstances; pUsbIns; pUsbIns = pUsbIns->Internal.s.pNext) + { + const char *pszUsbName = pUsbIns->Internal.s.pUsbDev->pReg->szName; + size_t cchUsbName = strlen(pszUsbName); + if ( ( cchUsbName == cchName + && RTStrNICmp(pszName, pszUsbName, cchUsbName)) + || ( cchUsbName == cchName - 3 + && RTStrNICmp(pszName + 3, pszUsbName, cchUsbName)) ) + { + cMatches++; + if (fApply) + pUsbIns->fTracing = fEnable; + } + } + } + else if (!strncmp("drv", pszName, 3)) + { + AssertReturn(fApply, VINF_SUCCESS); + + for (PPDMDEVINS pDevIns = pVM->pdm.s.pDevInstances; pDevIns; pDevIns = pDevIns->Internal.s.pNextR3) + for (PPDMLUN pLun = pDevIns->Internal.s.pLunsR3; pLun; pLun = pLun->pNext) + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns; pDrvIns = pDrvIns->Internal.s.pDown) + { + const char *pszDrvName = pDrvIns->Internal.s.pDrv->pReg->szName; + size_t cchDrvName = strlen(pszDrvName); + if ( ( cchDrvName == cchName + && RTStrNICmp(pszName, pszDrvName, cchDrvName)) + || ( cchDrvName == cchName - 3 + && RTStrNICmp(pszName + 3, pszDrvName, cchDrvName)) ) + { + cMatches++; + if (fApply) + pDrvIns->fTracing = fEnable; + } + } + +#ifdef VBOX_WITH_USB + for (PPDMUSBINS pUsbIns = pVM->pdm.s.pUsbInstances; pUsbIns; pUsbIns = pUsbIns->Internal.s.pNext) + for (PPDMLUN pLun = pUsbIns->Internal.s.pLuns; pLun; pLun = pLun->pNext) + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns; pDrvIns = pDrvIns->Internal.s.pDown) + { + const char *pszDrvName = pDrvIns->Internal.s.pDrv->pReg->szName; + size_t cchDrvName = strlen(pszDrvName); + if ( ( cchDrvName == cchName + && RTStrNICmp(pszName, pszDrvName, cchDrvName)) + || ( cchDrvName == cchName - 3 + && RTStrNICmp(pszName + 3, pszDrvName, cchDrvName)) ) + { + cMatches++; + if (fApply) + pDrvIns->fTracing = fEnable; + } + } +#endif + } + else + return VERR_NOT_FOUND; + + return cMatches > 0 ? VINF_SUCCESS : VERR_NOT_FOUND; +} + + +/** + * Worker for DBGFR3TraceQueryConfig that checks whether all drivers, devices, + * and USB device have the same tracing settings. + * + * @returns true / false. + * @param pVM The cross context VM structure. + * @param fEnabled The tracing setting to check for. + */ +VMMR3_INT_DECL(bool) PDMR3TracingAreAll(PVM pVM, bool fEnabled) +{ + for (PPDMDEVINS pDevIns = pVM->pdm.s.pDevInstances; pDevIns; pDevIns = pDevIns->Internal.s.pNextR3) + { + if (pDevIns->fTracing != (uint32_t)fEnabled) + return false; + + for (PPDMLUN pLun = pDevIns->Internal.s.pLunsR3; pLun; pLun = pLun->pNext) + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns; pDrvIns = pDrvIns->Internal.s.pDown) + if (pDrvIns->fTracing != (uint32_t)fEnabled) + return false; + } + +#ifdef VBOX_WITH_USB + for (PPDMUSBINS pUsbIns = pVM->pdm.s.pUsbInstances; pUsbIns; pUsbIns = pUsbIns->Internal.s.pNext) + { + if (pUsbIns->fTracing != (uint32_t)fEnabled) + return false; + + for (PPDMLUN pLun = pUsbIns->Internal.s.pLuns; pLun; pLun = pLun->pNext) + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns; pDrvIns = pDrvIns->Internal.s.pDown) + if (pDrvIns->fTracing != (uint32_t)fEnabled) + return false; + } +#endif + + return true; +} + + +/** + * Worker for PDMR3TracingQueryConfig that adds a prefixed name to the output + * string. + * + * @returns VINF_SUCCESS or VERR_BUFFER_OVERFLOW + * @param ppszDst The pointer to the output buffer pointer. + * @param pcbDst The pointer to the output buffer size. + * @param fSpace Whether to add a space before the name. + * @param pszPrefix The name prefix. + * @param pszName The name. + */ +static int pdmR3TracingAdd(char **ppszDst, size_t *pcbDst, bool fSpace, const char *pszPrefix, const char *pszName) +{ + size_t const cchPrefix = strlen(pszPrefix); + if (!RTStrNICmp(pszPrefix, pszName, cchPrefix)) + pszName += cchPrefix; + size_t const cchName = strlen(pszName); + + size_t const cchThis = cchName + cchPrefix + fSpace; + if (cchThis >= *pcbDst) + return VERR_BUFFER_OVERFLOW; + if (fSpace) + { + **ppszDst = ' '; + memcpy(*ppszDst + 1, pszPrefix, cchPrefix); + memcpy(*ppszDst + 1 + cchPrefix, pszName, cchName + 1); + } + else + { + memcpy(*ppszDst, pszPrefix, cchPrefix); + memcpy(*ppszDst + cchPrefix, pszName, cchName + 1); + } + *ppszDst += cchThis; + *pcbDst -= cchThis; + return VINF_SUCCESS; +} + + +/** + * Worker for DBGFR3TraceQueryConfig use when not everything is either enabled + * or disabled. + * + * @returns VINF_SUCCESS or VERR_BUFFER_OVERFLOW + * @param pVM The cross context VM structure. + * @param pszConfig Where to store the config spec. + * @param cbConfig The size of the output buffer. + */ +VMMR3_INT_DECL(int) PDMR3TracingQueryConfig(PVM pVM, char *pszConfig, size_t cbConfig) +{ + int rc; + char *pszDst = pszConfig; + size_t cbDst = cbConfig; + + for (PPDMDEVINS pDevIns = pVM->pdm.s.pDevInstances; pDevIns; pDevIns = pDevIns->Internal.s.pNextR3) + { + if (pDevIns->fTracing) + { + rc = pdmR3TracingAdd(&pszDst, &cbDst, pszDst != pszConfig, "dev", pDevIns->Internal.s.pDevR3->pReg->szName); + if (RT_FAILURE(rc)) + return rc; + } + + for (PPDMLUN pLun = pDevIns->Internal.s.pLunsR3; pLun; pLun = pLun->pNext) + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns; pDrvIns = pDrvIns->Internal.s.pDown) + if (pDrvIns->fTracing) + { + rc = pdmR3TracingAdd(&pszDst, &cbDst, pszDst != pszConfig, "drv", pDrvIns->Internal.s.pDrv->pReg->szName); + if (RT_FAILURE(rc)) + return rc; + } + } + +#ifdef VBOX_WITH_USB + for (PPDMUSBINS pUsbIns = pVM->pdm.s.pUsbInstances; pUsbIns; pUsbIns = pUsbIns->Internal.s.pNext) + { + if (pUsbIns->fTracing) + { + rc = pdmR3TracingAdd(&pszDst, &cbDst, pszDst != pszConfig, "usb", pUsbIns->Internal.s.pUsbDev->pReg->szName); + if (RT_FAILURE(rc)) + return rc; + } + + for (PPDMLUN pLun = pUsbIns->Internal.s.pLuns; pLun; pLun = pLun->pNext) + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns; pDrvIns = pDrvIns->Internal.s.pDown) + if (pDrvIns->fTracing) + { + rc = pdmR3TracingAdd(&pszDst, &cbDst, pszDst != pszConfig, "drv", pDrvIns->Internal.s.pDrv->pReg->szName); + if (RT_FAILURE(rc)) + return rc; + } + } +#endif + + return VINF_SUCCESS; +} + + +/** + * Checks that a PDMDRVREG::szName, PDMDEVREG::szName or PDMUSBREG::szName + * field contains only a limited set of ASCII characters. + * + * @returns true / false. + * @param pszName The name to validate. + */ +bool pdmR3IsValidName(const char *pszName) +{ + char ch; + while ( (ch = *pszName) != '\0' + && ( RT_C_IS_ALNUM(ch) + || ch == '-' + || ch == ' ' /** @todo disallow this! */ + || ch == '_') ) + pszName++; + return ch == '\0'; +} + + +/** + * Info handler for 'pdmtracingids'. + * + * @param pVM The cross context VM structure. + * @param pHlp The output helpers. + * @param pszArgs The optional user arguments. + * + * @remarks Can be called on most threads. + */ +static DECLCALLBACK(void) pdmR3InfoTracingIds(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + /* + * Parse the argument (optional). + */ + if ( pszArgs + && *pszArgs + && strcmp(pszArgs, "all") + && strcmp(pszArgs, "devices") + && strcmp(pszArgs, "drivers") + && strcmp(pszArgs, "usb")) + { + pHlp->pfnPrintf(pHlp, "Unable to grok '%s'\n", pszArgs); + return; + } + bool fAll = !pszArgs || !*pszArgs || !strcmp(pszArgs, "all"); + bool fDevices = fAll || !strcmp(pszArgs, "devices"); + bool fUsbDevs = fAll || !strcmp(pszArgs, "usb"); + bool fDrivers = fAll || !strcmp(pszArgs, "drivers"); + + /* + * Produce the requested output. + */ +/** @todo lock PDM lists! */ + /* devices */ + if (fDevices) + { + pHlp->pfnPrintf(pHlp, "Device tracing IDs:\n"); + for (PPDMDEVINS pDevIns = pVM->pdm.s.pDevInstances; pDevIns; pDevIns = pDevIns->Internal.s.pNextR3) + pHlp->pfnPrintf(pHlp, "%05u %s\n", pDevIns->idTracing, pDevIns->Internal.s.pDevR3->pReg->szName); + } + + /* USB devices */ + if (fUsbDevs) + { + pHlp->pfnPrintf(pHlp, "USB device tracing IDs:\n"); + for (PPDMUSBINS pUsbIns = pVM->pdm.s.pUsbInstances; pUsbIns; pUsbIns = pUsbIns->Internal.s.pNext) + pHlp->pfnPrintf(pHlp, "%05u %s\n", pUsbIns->idTracing, pUsbIns->Internal.s.pUsbDev->pReg->szName); + } + + /* Drivers */ + if (fDrivers) + { + pHlp->pfnPrintf(pHlp, "Driver tracing IDs:\n"); + for (PPDMDEVINS pDevIns = pVM->pdm.s.pDevInstances; pDevIns; pDevIns = pDevIns->Internal.s.pNextR3) + { + for (PPDMLUN pLun = pDevIns->Internal.s.pLunsR3; pLun; pLun = pLun->pNext) + { + uint32_t iLevel = 0; + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns; pDrvIns = pDrvIns->Internal.s.pDown, iLevel++) + pHlp->pfnPrintf(pHlp, "%05u %s (level %u, lun %u, dev %s)\n", + pDrvIns->idTracing, pDrvIns->Internal.s.pDrv->pReg->szName, + iLevel, pLun->iLun, pDevIns->Internal.s.pDevR3->pReg->szName); + } + } + + for (PPDMUSBINS pUsbIns = pVM->pdm.s.pUsbInstances; pUsbIns; pUsbIns = pUsbIns->Internal.s.pNext) + { + for (PPDMLUN pLun = pUsbIns->Internal.s.pLuns; pLun; pLun = pLun->pNext) + { + uint32_t iLevel = 0; + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns; pDrvIns = pDrvIns->Internal.s.pDown, iLevel++) + pHlp->pfnPrintf(pHlp, "%05u %s (level %u, lun %u, dev %s)\n", + pDrvIns->idTracing, pDrvIns->Internal.s.pDrv->pReg->szName, + iLevel, pLun->iLun, pUsbIns->Internal.s.pUsbDev->pReg->szName); + } + } + } +} + diff --git a/src/VBox/VMM/VMMR3/PDMAsyncCompletion.cpp b/src/VBox/VMM/VMMR3/PDMAsyncCompletion.cpp new file mode 100644 index 00000000..f3b66f4a --- /dev/null +++ b/src/VBox/VMM/VMMR3/PDMAsyncCompletion.cpp @@ -0,0 +1,1808 @@ +/* $Id: PDMAsyncCompletion.cpp $ */ +/** @file + * PDM Async I/O - Transport data asynchronous in R3 using EMT. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION +#include "PDMInternal.h" +#include +#include +#ifdef VBOX_WITH_REM +# include +#endif +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "PDMAsyncCompletionInternal.h" + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * Async I/O type. + */ +typedef enum PDMASYNCCOMPLETIONTEMPLATETYPE +{ + /** Device . */ + PDMASYNCCOMPLETIONTEMPLATETYPE_DEV = 1, + /** Driver consumer. */ + PDMASYNCCOMPLETIONTEMPLATETYPE_DRV, + /** Internal consumer. */ + PDMASYNCCOMPLETIONTEMPLATETYPE_INTERNAL, + /** Usb consumer. */ + PDMASYNCCOMPLETIONTEMPLATETYPE_USB +} PDMASYNCTEMPLATETYPE; + +/** + * PDM Async I/O template. + */ +typedef struct PDMASYNCCOMPLETIONTEMPLATE +{ + /** Pointer to the next template in the list. */ + R3PTRTYPE(PPDMASYNCCOMPLETIONTEMPLATE) pNext; + /** Pointer to the previous template in the list. */ + R3PTRTYPE(PPDMASYNCCOMPLETIONTEMPLATE) pPrev; + /** Type specific data. */ + union + { + /** PDMASYNCCOMPLETIONTEMPLATETYPE_DEV */ + struct + { + /** Pointer to consumer function. */ + R3PTRTYPE(PFNPDMASYNCCOMPLETEDEV) pfnCompleted; + /** Pointer to the device instance owning the template. */ + R3PTRTYPE(PPDMDEVINS) pDevIns; + } Dev; + /** PDMASYNCCOMPLETIONTEMPLATETYPE_DRV */ + struct + { + /** Pointer to consumer function. */ + R3PTRTYPE(PFNPDMASYNCCOMPLETEDRV) pfnCompleted; + /** Pointer to the driver instance owning the template. */ + R3PTRTYPE(PPDMDRVINS) pDrvIns; + /** User argument given during template creation. + * This is only here to make things much easier + * for DrVVD. */ + void *pvTemplateUser; + } Drv; + /** PDMASYNCCOMPLETIONTEMPLATETYPE_INTERNAL */ + struct + { + /** Pointer to consumer function. */ + R3PTRTYPE(PFNPDMASYNCCOMPLETEINT) pfnCompleted; + /** Pointer to user data. */ + R3PTRTYPE(void *) pvUser; + } Int; + /** PDMASYNCCOMPLETIONTEMPLATETYPE_USB */ + struct + { + /** Pointer to consumer function. */ + R3PTRTYPE(PFNPDMASYNCCOMPLETEUSB) pfnCompleted; + /** Pointer to the usb instance owning the template. */ + R3PTRTYPE(PPDMUSBINS) pUsbIns; + } Usb; + } u; + /** Template type. */ + PDMASYNCCOMPLETIONTEMPLATETYPE enmType; + /** Pointer to the VM. */ + R3PTRTYPE(PVM) pVM; + /** Use count of the template. */ + volatile uint32_t cUsed; +} PDMASYNCCOMPLETIONTEMPLATE; + +/** + * Bandwidth control manager instance data + */ +typedef struct PDMACBWMGR +{ + /** Pointer to the next manager in the list. */ + struct PDMACBWMGR *pNext; + /** Pointer to the shared UVM structure. */ + PPDMASYNCCOMPLETIONEPCLASS pEpClass; + /** Identifier of the manager. */ + char *pszId; + /** Maximum number of bytes the endpoints are allowed to transfer (Max is 4GB/s currently) */ + volatile uint32_t cbTransferPerSecMax; + /** Number of bytes we start with */ + volatile uint32_t cbTransferPerSecStart; + /** Step after each update */ + volatile uint32_t cbTransferPerSecStep; + /** Number of bytes we are allowed to transfer till the next update. + * Reset by the refresh timer. */ + volatile uint32_t cbTransferAllowed; + /** Timestamp of the last update */ + volatile uint64_t tsUpdatedLast; + /** Reference counter - How many endpoints are associated with this manager. */ + volatile uint32_t cRefs; +} PDMACBWMGR; +/** Pointer to a bandwidth control manager pointer. */ +typedef PPDMACBWMGR *PPPDMACBWMGR; + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static void pdmR3AsyncCompletionPutTask(PPDMASYNCCOMPLETIONENDPOINT pEndpoint, PPDMASYNCCOMPLETIONTASK pTask); + + +/** + * Internal worker for the creation apis + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param ppTemplate Where to store the template handle. + * @param enmType Async completion template type (dev, drv, usb, int). + */ +static int pdmR3AsyncCompletionTemplateCreate(PVM pVM, PPPDMASYNCCOMPLETIONTEMPLATE ppTemplate, + PDMASYNCCOMPLETIONTEMPLATETYPE enmType) +{ + PUVM pUVM = pVM->pUVM; + + AssertPtrReturn(ppTemplate, VERR_INVALID_POINTER); + + PPDMASYNCCOMPLETIONTEMPLATE pTemplate; + int rc = MMR3HeapAllocZEx(pVM, MM_TAG_PDM_ASYNC_COMPLETION, sizeof(PDMASYNCCOMPLETIONTEMPLATE), (void **)&pTemplate); + if (RT_FAILURE(rc)) + return rc; + + /* + * Initialize fields. + */ + pTemplate->pVM = pVM; + pTemplate->cUsed = 0; + pTemplate->enmType = enmType; + + /* + * Add template to the global VM template list. + */ + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + pTemplate->pNext = pUVM->pdm.s.pAsyncCompletionTemplates; + if (pUVM->pdm.s.pAsyncCompletionTemplates) + pUVM->pdm.s.pAsyncCompletionTemplates->pPrev = pTemplate; + pUVM->pdm.s.pAsyncCompletionTemplates = pTemplate; + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + + *ppTemplate = pTemplate; + return VINF_SUCCESS; +} + + +#ifdef SOME_UNUSED_FUNCTION +/** + * Creates a async completion template for a device instance. + * + * The template is used when creating new completion tasks. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDevIns The device instance. + * @param ppTemplate Where to store the template pointer on success. + * @param pfnCompleted The completion callback routine. + * @param pszDesc Description. + */ +int pdmR3AsyncCompletionTemplateCreateDevice(PVM pVM, PPDMDEVINS pDevIns, PPPDMASYNCCOMPLETIONTEMPLATE ppTemplate, + PFNPDMASYNCCOMPLETEDEV pfnCompleted, const char *pszDesc) +{ + LogFlow(("%s: pDevIns=%p ppTemplate=%p pfnCompleted=%p pszDesc=%s\n", + __FUNCTION__, pDevIns, ppTemplate, pfnCompleted, pszDesc)); + + /* + * Validate input. + */ + VM_ASSERT_EMT(pVM); + AssertPtrReturn(pfnCompleted, VERR_INVALID_POINTER); + AssertPtrReturn(ppTemplate, VERR_INVALID_POINTER); + + /* + * Create the template. + */ + PPDMASYNCCOMPLETIONTEMPLATE pTemplate; + int rc = pdmR3AsyncCompletionTemplateCreate(pVM, &pTemplate, PDMASYNCCOMPLETIONTEMPLATETYPE_DEV); + if (RT_SUCCESS(rc)) + { + pTemplate->u.Dev.pDevIns = pDevIns; + pTemplate->u.Dev.pfnCompleted = pfnCompleted; + + *ppTemplate = pTemplate; + Log(("PDM: Created device template %p: pfnCompleted=%p pDevIns=%p\n", + pTemplate, pfnCompleted, pDevIns)); + } + + return rc; +} +#endif /* SOME_UNUSED_FUNCTION */ + + +/** + * Creates a async completion template for a driver instance. + * + * The template is used when creating new completion tasks. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDrvIns The driver instance. + * @param ppTemplate Where to store the template pointer on success. + * @param pfnCompleted The completion callback routine. + * @param pvTemplateUser Template user argument + * @param pszDesc Description. + */ +int pdmR3AsyncCompletionTemplateCreateDriver(PVM pVM, PPDMDRVINS pDrvIns, PPPDMASYNCCOMPLETIONTEMPLATE ppTemplate, + PFNPDMASYNCCOMPLETEDRV pfnCompleted, void *pvTemplateUser, + const char *pszDesc) +{ + LogFlow(("PDMR3AsyncCompletionTemplateCreateDriver: pDrvIns=%p ppTemplate=%p pfnCompleted=%p pszDesc=%s\n", + pDrvIns, ppTemplate, pfnCompleted, pszDesc)); + RT_NOREF_PV(pszDesc); /** @todo async template description */ + + /* + * Validate input. + */ + AssertPtrReturn(pfnCompleted, VERR_INVALID_POINTER); + AssertPtrReturn(ppTemplate, VERR_INVALID_POINTER); + + /* + * Create the template. + */ + PPDMASYNCCOMPLETIONTEMPLATE pTemplate; + int rc = pdmR3AsyncCompletionTemplateCreate(pVM, &pTemplate, PDMASYNCCOMPLETIONTEMPLATETYPE_DRV); + if (RT_SUCCESS(rc)) + { + pTemplate->u.Drv.pDrvIns = pDrvIns; + pTemplate->u.Drv.pfnCompleted = pfnCompleted; + pTemplate->u.Drv.pvTemplateUser = pvTemplateUser; + + *ppTemplate = pTemplate; + Log(("PDM: Created driver template %p: pfnCompleted=%p pDrvIns=%p\n", + pTemplate, pfnCompleted, pDrvIns)); + } + + return rc; +} + + +#ifdef SOME_UNUSED_FUNCTION +/** + * Creates a async completion template for a USB device instance. + * + * The template is used when creating new completion tasks. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pUsbIns The USB device instance. + * @param ppTemplate Where to store the template pointer on success. + * @param pfnCompleted The completion callback routine. + * @param pszDesc Description. + */ +int pdmR3AsyncCompletionTemplateCreateUsb(PVM pVM, PPDMUSBINS pUsbIns, PPPDMASYNCCOMPLETIONTEMPLATE ppTemplate, + PFNPDMASYNCCOMPLETEUSB pfnCompleted, const char *pszDesc) +{ + LogFlow(("pdmR3AsyncCompletionTemplateCreateUsb: pUsbIns=%p ppTemplate=%p pfnCompleted=%p pszDesc=%s\n", pUsbIns, ppTemplate, pfnCompleted, pszDesc)); + + /* + * Validate input. + */ + VM_ASSERT_EMT(pVM); + AssertPtrReturn(pfnCompleted, VERR_INVALID_POINTER); + AssertPtrReturn(ppTemplate, VERR_INVALID_POINTER); + + /* + * Create the template. + */ + PPDMASYNCCOMPLETIONTEMPLATE pTemplate; + int rc = pdmR3AsyncCompletionTemplateCreate(pVM, &pTemplate, PDMASYNCCOMPLETIONTEMPLATETYPE_USB); + if (RT_SUCCESS(rc)) + { + pTemplate->u.Usb.pUsbIns = pUsbIns; + pTemplate->u.Usb.pfnCompleted = pfnCompleted; + + *ppTemplate = pTemplate; + Log(("PDM: Created usb template %p: pfnCompleted=%p pDevIns=%p\n", + pTemplate, pfnCompleted, pUsbIns)); + } + + return rc; +} +#endif + + +/** + * Creates a async completion template for internally by the VMM. + * + * The template is used when creating new completion tasks. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param ppTemplate Where to store the template pointer on success. + * @param pfnCompleted The completion callback routine. + * @param pvUser2 The 2nd user argument for the callback. + * @param pszDesc Description. + * @internal + */ +VMMR3DECL(int) PDMR3AsyncCompletionTemplateCreateInternal(PVM pVM, PPPDMASYNCCOMPLETIONTEMPLATE ppTemplate, + PFNPDMASYNCCOMPLETEINT pfnCompleted, void *pvUser2, const char *pszDesc) +{ + LogFlow(("PDMR3AsyncCompletionTemplateCreateInternal: ppTemplate=%p pfnCompleted=%p pvUser2=%p pszDesc=%s\n", + ppTemplate, pfnCompleted, pvUser2, pszDesc)); + RT_NOREF_PV(pszDesc); /** @todo async template description */ + + + /* + * Validate input. + */ + VM_ASSERT_EMT(pVM); + AssertPtrReturn(pfnCompleted, VERR_INVALID_POINTER); + AssertPtrReturn(ppTemplate, VERR_INVALID_POINTER); + + /* + * Create the template. + */ + PPDMASYNCCOMPLETIONTEMPLATE pTemplate; + int rc = pdmR3AsyncCompletionTemplateCreate(pVM, &pTemplate, PDMASYNCCOMPLETIONTEMPLATETYPE_INTERNAL); + if (RT_SUCCESS(rc)) + { + pTemplate->u.Int.pvUser = pvUser2; + pTemplate->u.Int.pfnCompleted = pfnCompleted; + + *ppTemplate = pTemplate; + Log(("PDM: Created internal template %p: pfnCompleted=%p pvUser2=%p\n", + pTemplate, pfnCompleted, pvUser2)); + } + + return rc; +} + + +/** + * Destroys the specified async completion template. + * + * @returns VBox status codes: + * @retval VINF_SUCCESS on success. + * @retval VERR_PDM_ASYNC_TEMPLATE_BUSY if the template is still in use. + * + * @param pTemplate The template in question. + */ +VMMR3DECL(int) PDMR3AsyncCompletionTemplateDestroy(PPDMASYNCCOMPLETIONTEMPLATE pTemplate) +{ + LogFlow(("%s: pTemplate=%p\n", __FUNCTION__, pTemplate)); + + if (!pTemplate) + { + AssertMsgFailed(("pTemplate is NULL!\n")); + return VERR_INVALID_PARAMETER; + } + + /* + * Check if the template is still used. + */ + if (pTemplate->cUsed > 0) + { + AssertMsgFailed(("Template is still in use\n")); + return VERR_PDM_ASYNC_TEMPLATE_BUSY; + } + + /* + * Unlink the template from the list. + */ + PUVM pUVM = pTemplate->pVM->pUVM; + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + + PPDMASYNCCOMPLETIONTEMPLATE pPrev = pTemplate->pPrev; + PPDMASYNCCOMPLETIONTEMPLATE pNext = pTemplate->pNext; + + if (pPrev) + pPrev->pNext = pNext; + else + pUVM->pdm.s.pAsyncCompletionTemplates = pNext; + + if (pNext) + pNext->pPrev = pPrev; + + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + + /* + * Free the template. + */ + MMR3HeapFree(pTemplate); + + return VINF_SUCCESS; +} + + +/** + * Destroys all the specified async completion templates for the given device instance. + * + * @returns VBox status codes: + * @retval VINF_SUCCESS on success. + * @retval VERR_PDM_ASYNC_TEMPLATE_BUSY if one or more of the templates are still in use. + * + * @param pVM The cross context VM structure. + * @param pDevIns The device instance. + */ +int pdmR3AsyncCompletionTemplateDestroyDevice(PVM pVM, PPDMDEVINS pDevIns) +{ + LogFlow(("pdmR3AsyncCompletionTemplateDestroyDevice: pDevIns=%p\n", pDevIns)); + + /* + * Validate input. + */ + if (!pDevIns) + return VERR_INVALID_PARAMETER; + VM_ASSERT_EMT(pVM); + + /* + * Unlink it. + */ + PUVM pUVM = pVM->pUVM; + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + PPDMASYNCCOMPLETIONTEMPLATE pTemplate = pUVM->pdm.s.pAsyncCompletionTemplates; + while (pTemplate) + { + if ( pTemplate->enmType == PDMASYNCCOMPLETIONTEMPLATETYPE_DEV + && pTemplate->u.Dev.pDevIns == pDevIns) + { + PPDMASYNCCOMPLETIONTEMPLATE pTemplateDestroy = pTemplate; + pTemplate = pTemplate->pNext; + int rc = PDMR3AsyncCompletionTemplateDestroy(pTemplateDestroy); + if (RT_FAILURE(rc)) + { + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + return rc; + } + } + else + pTemplate = pTemplate->pNext; + } + + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + return VINF_SUCCESS; +} + + +/** + * Destroys all the specified async completion templates for the given driver instance. + * + * @returns VBox status codes: + * @retval VINF_SUCCESS on success. + * @retval VERR_PDM_ASYNC_TEMPLATE_BUSY if one or more of the templates are still in use. + * + * @param pVM The cross context VM structure. + * @param pDrvIns The driver instance. + */ +int pdmR3AsyncCompletionTemplateDestroyDriver(PVM pVM, PPDMDRVINS pDrvIns) +{ + LogFlow(("pdmR3AsyncCompletionTemplateDestroyDriver: pDevIns=%p\n", pDrvIns)); + + /* + * Validate input. + */ + if (!pDrvIns) + return VERR_INVALID_PARAMETER; + VM_ASSERT_EMT(pVM); + + /* + * Unlink it. + */ + PUVM pUVM = pVM->pUVM; + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + PPDMASYNCCOMPLETIONTEMPLATE pTemplate = pUVM->pdm.s.pAsyncCompletionTemplates; + while (pTemplate) + { + if ( pTemplate->enmType == PDMASYNCCOMPLETIONTEMPLATETYPE_DRV + && pTemplate->u.Drv.pDrvIns == pDrvIns) + { + PPDMASYNCCOMPLETIONTEMPLATE pTemplateDestroy = pTemplate; + pTemplate = pTemplate->pNext; + int rc = PDMR3AsyncCompletionTemplateDestroy(pTemplateDestroy); + if (RT_FAILURE(rc)) + { + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + return rc; + } + } + else + pTemplate = pTemplate->pNext; + } + + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + return VINF_SUCCESS; +} + + +/** + * Destroys all the specified async completion templates for the given USB device instance. + * + * @returns VBox status codes: + * @retval VINF_SUCCESS on success. + * @retval VERR_PDM_ASYNC_TEMPLATE_BUSY if one or more of the templates are still in use. + * + * @param pVM The cross context VM structure. + * @param pUsbIns The USB device instance. + */ +int pdmR3AsyncCompletionTemplateDestroyUsb(PVM pVM, PPDMUSBINS pUsbIns) +{ + LogFlow(("pdmR3AsyncCompletionTemplateDestroyUsb: pUsbIns=%p\n", pUsbIns)); + + /* + * Validate input. + */ + if (!pUsbIns) + return VERR_INVALID_PARAMETER; + VM_ASSERT_EMT(pVM); + + /* + * Unlink it. + */ + PUVM pUVM = pVM->pUVM; + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + PPDMASYNCCOMPLETIONTEMPLATE pTemplate = pUVM->pdm.s.pAsyncCompletionTemplates; + while (pTemplate) + { + if ( pTemplate->enmType == PDMASYNCCOMPLETIONTEMPLATETYPE_USB + && pTemplate->u.Usb.pUsbIns == pUsbIns) + { + PPDMASYNCCOMPLETIONTEMPLATE pTemplateDestroy = pTemplate; + pTemplate = pTemplate->pNext; + int rc = PDMR3AsyncCompletionTemplateDestroy(pTemplateDestroy); + if (RT_FAILURE(rc)) + { + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + return rc; + } + } + else + pTemplate = pTemplate->pNext; + } + + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + return VINF_SUCCESS; +} + + +/** Lazy coder. */ +static PPDMACBWMGR pdmacBwMgrFindById(PPDMASYNCCOMPLETIONEPCLASS pEpClass, const char *pszId) +{ + PPDMACBWMGR pBwMgr = NULL; + + if (pszId) + { + int rc = RTCritSectEnter(&pEpClass->CritSect); AssertRC(rc); + + pBwMgr = pEpClass->pBwMgrsHead; + while ( pBwMgr + && RTStrCmp(pBwMgr->pszId, pszId)) + pBwMgr = pBwMgr->pNext; + + rc = RTCritSectLeave(&pEpClass->CritSect); AssertRC(rc); + } + + return pBwMgr; +} + + +/** Lazy coder. */ +static void pdmacBwMgrLink(PPDMACBWMGR pBwMgr) +{ + PPDMASYNCCOMPLETIONEPCLASS pEpClass = pBwMgr->pEpClass; + int rc = RTCritSectEnter(&pEpClass->CritSect); AssertRC(rc); + + pBwMgr->pNext = pEpClass->pBwMgrsHead; + pEpClass->pBwMgrsHead = pBwMgr; + + rc = RTCritSectLeave(&pEpClass->CritSect); AssertRC(rc); +} + + +#ifdef SOME_UNUSED_FUNCTION +/** Lazy coder. */ +static void pdmacBwMgrUnlink(PPDMACBWMGR pBwMgr) +{ + PPDMASYNCCOMPLETIONEPCLASS pEpClass = pBwMgr->pEpClass; + int rc = RTCritSectEnter(&pEpClass->CritSect); AssertRC(rc); + + if (pBwMgr == pEpClass->pBwMgrsHead) + pEpClass->pBwMgrsHead = pBwMgr->pNext; + else + { + PPDMACBWMGR pPrev = pEpClass->pBwMgrsHead; + while ( pPrev + && pPrev->pNext != pBwMgr) + pPrev = pPrev->pNext; + + AssertPtr(pPrev); + pPrev->pNext = pBwMgr->pNext; + } + + rc = RTCritSectLeave(&pEpClass->CritSect); AssertRC(rc); +} +#endif /* SOME_UNUSED_FUNCTION */ + + +/** Lazy coder. */ +static int pdmacAsyncCompletionBwMgrCreate(PPDMASYNCCOMPLETIONEPCLASS pEpClass, const char *pszBwMgr, uint32_t cbTransferPerSecMax, + uint32_t cbTransferPerSecStart, uint32_t cbTransferPerSecStep) +{ + LogFlowFunc(("pEpClass=%#p pszBwMgr=%#p{%s} cbTransferPerSecMax=%u cbTransferPerSecStart=%u cbTransferPerSecStep=%u\n", + pEpClass, pszBwMgr, pszBwMgr, cbTransferPerSecMax, cbTransferPerSecStart, cbTransferPerSecStep)); + + AssertPtrReturn(pEpClass, VERR_INVALID_POINTER); + AssertPtrReturn(pszBwMgr, VERR_INVALID_POINTER); + AssertReturn(*pszBwMgr != '\0', VERR_INVALID_PARAMETER); + + int rc; + PPDMACBWMGR pBwMgr = pdmacBwMgrFindById(pEpClass, pszBwMgr); + if (!pBwMgr) + { + rc = MMR3HeapAllocZEx(pEpClass->pVM, MM_TAG_PDM_ASYNC_COMPLETION, + sizeof(PDMACBWMGR), + (void **)&pBwMgr); + if (RT_SUCCESS(rc)) + { + pBwMgr->pszId = RTStrDup(pszBwMgr); + if (pBwMgr->pszId) + { + pBwMgr->pEpClass = pEpClass; + pBwMgr->cRefs = 0; + + /* Init I/O flow control. */ + pBwMgr->cbTransferPerSecMax = cbTransferPerSecMax; + pBwMgr->cbTransferPerSecStart = cbTransferPerSecStart; + pBwMgr->cbTransferPerSecStep = cbTransferPerSecStep; + + pBwMgr->cbTransferAllowed = pBwMgr->cbTransferPerSecStart; + pBwMgr->tsUpdatedLast = RTTimeSystemNanoTS(); + + pdmacBwMgrLink(pBwMgr); + rc = VINF_SUCCESS; + } + else + { + rc = VERR_NO_MEMORY; + MMR3HeapFree(pBwMgr); + } + } + } + else + rc = VERR_ALREADY_EXISTS; + + LogFlowFunc(("returns rc=%Rrc\n", rc)); + return rc; +} + + +/** Lazy coder. */ +DECLINLINE(void) pdmacBwMgrRetain(PPDMACBWMGR pBwMgr) +{ + ASMAtomicIncU32(&pBwMgr->cRefs); +} + + +/** Lazy coder. */ +DECLINLINE(void) pdmacBwMgrRelease(PPDMACBWMGR pBwMgr) +{ + Assert(pBwMgr->cRefs > 0); + ASMAtomicDecU32(&pBwMgr->cRefs); +} + + +/** + * Checks if the endpoint is allowed to transfer the given amount of bytes. + * + * @returns true if the endpoint is allowed to transfer the data. + * false otherwise + * @param pEndpoint The endpoint. + * @param cbTransfer The number of bytes to transfer. + * @param pmsWhenNext Where to store the number of milliseconds + * until the bandwidth is refreshed. + * Only set if false is returned. + */ +bool pdmacEpIsTransferAllowed(PPDMASYNCCOMPLETIONENDPOINT pEndpoint, uint32_t cbTransfer, RTMSINTERVAL *pmsWhenNext) +{ + bool fAllowed = true; + PPDMACBWMGR pBwMgr = ASMAtomicReadPtrT(&pEndpoint->pBwMgr, PPDMACBWMGR); + + LogFlowFunc(("pEndpoint=%p pBwMgr=%p cbTransfer=%u\n", pEndpoint, pBwMgr, cbTransfer)); + + if (pBwMgr) + { + uint32_t cbOld = ASMAtomicSubU32(&pBwMgr->cbTransferAllowed, cbTransfer); + if (RT_LIKELY(cbOld >= cbTransfer)) + fAllowed = true; + else + { + fAllowed = false; + + /* We are out of resources Check if we can update again. */ + uint64_t tsNow = RTTimeSystemNanoTS(); + uint64_t tsUpdatedLast = ASMAtomicUoReadU64(&pBwMgr->tsUpdatedLast); + + if (tsNow - tsUpdatedLast >= (1000*1000*1000)) + { + if (ASMAtomicCmpXchgU64(&pBwMgr->tsUpdatedLast, tsNow, tsUpdatedLast)) + { + if (pBwMgr->cbTransferPerSecStart < pBwMgr->cbTransferPerSecMax) + { + pBwMgr->cbTransferPerSecStart = RT_MIN(pBwMgr->cbTransferPerSecMax, pBwMgr->cbTransferPerSecStart + pBwMgr->cbTransferPerSecStep); + LogFlow(("AIOMgr: Increasing maximum bandwidth to %u bytes/sec\n", pBwMgr->cbTransferPerSecStart)); + } + + /* Update */ + uint32_t cbTransferAllowedNew = pBwMgr->cbTransferPerSecStart > cbTransfer + ? pBwMgr->cbTransferPerSecStart - cbTransfer + : 0; + ASMAtomicWriteU32(&pBwMgr->cbTransferAllowed, cbTransferAllowedNew); + fAllowed = true; + LogFlow(("AIOMgr: Refreshed bandwidth\n")); + } + } + else + { + ASMAtomicAddU32(&pBwMgr->cbTransferAllowed, cbTransfer); + *pmsWhenNext = ((1000*1000*1000) - (tsNow - tsUpdatedLast)) / (1000*1000); + } + } + } + + LogFlowFunc(("fAllowed=%RTbool\n", fAllowed)); + return fAllowed; +} + + +/** + * Called by the endpoint if a task has finished. + * + * @returns nothing + * @param pTask Pointer to the finished task. + * @param rc Status code of the completed request. + * @param fCallCompletionHandler Flag whether the completion handler should be called to + * inform the owner of the task that it has completed. + */ +void pdmR3AsyncCompletionCompleteTask(PPDMASYNCCOMPLETIONTASK pTask, int rc, bool fCallCompletionHandler) +{ + LogFlow(("%s: pTask=%#p fCallCompletionHandler=%RTbool\n", __FUNCTION__, pTask, fCallCompletionHandler)); + + if (fCallCompletionHandler) + { + PPDMASYNCCOMPLETIONTEMPLATE pTemplate = pTask->pEndpoint->pTemplate; + + switch (pTemplate->enmType) + { + case PDMASYNCCOMPLETIONTEMPLATETYPE_DEV: + pTemplate->u.Dev.pfnCompleted(pTemplate->u.Dev.pDevIns, pTask->pvUser, rc); + break; + + case PDMASYNCCOMPLETIONTEMPLATETYPE_DRV: + pTemplate->u.Drv.pfnCompleted(pTemplate->u.Drv.pDrvIns, pTemplate->u.Drv.pvTemplateUser, pTask->pvUser, rc); + break; + + case PDMASYNCCOMPLETIONTEMPLATETYPE_USB: + pTemplate->u.Usb.pfnCompleted(pTemplate->u.Usb.pUsbIns, pTask->pvUser, rc); + break; + + case PDMASYNCCOMPLETIONTEMPLATETYPE_INTERNAL: + pTemplate->u.Int.pfnCompleted(pTemplate->pVM, pTask->pvUser, pTemplate->u.Int.pvUser, rc); + break; + + default: + AssertMsgFailed(("Unknown template type!\n")); + } + } + + pdmR3AsyncCompletionPutTask(pTask->pEndpoint, pTask); +} + + +/** + * Worker initializing a endpoint class. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pEpClassOps Pointer to the endpoint class structure. + * @param pCfgHandle Pointer to the CFGM tree. + */ +int pdmR3AsyncCompletionEpClassInit(PVM pVM, PCPDMASYNCCOMPLETIONEPCLASSOPS pEpClassOps, PCFGMNODE pCfgHandle) +{ + /* Validate input. */ + AssertPtrReturn(pEpClassOps, VERR_INVALID_POINTER); + AssertReturn(pEpClassOps->u32Version == PDMAC_EPCLASS_OPS_VERSION, VERR_VERSION_MISMATCH); + AssertReturn(pEpClassOps->u32VersionEnd == PDMAC_EPCLASS_OPS_VERSION, VERR_VERSION_MISMATCH); + + LogFlow(("pdmR3AsyncCompletionEpClassInit: pVM=%p pEpClassOps=%p{%s}\n", pVM, pEpClassOps, pEpClassOps->pszName)); + + /* Allocate global class data. */ + PPDMASYNCCOMPLETIONEPCLASS pEndpointClass = NULL; + + int rc = MMR3HeapAllocZEx(pVM, MM_TAG_PDM_ASYNC_COMPLETION, + pEpClassOps->cbEndpointClassGlobal, + (void **)&pEndpointClass); + if (RT_SUCCESS(rc)) + { + /* Initialize common data. */ + pEndpointClass->pVM = pVM; + pEndpointClass->pEndpointOps = pEpClassOps; + + rc = RTCritSectInit(&pEndpointClass->CritSect); + if (RT_SUCCESS(rc)) + { + PCFGMNODE pCfgNodeClass = CFGMR3GetChild(pCfgHandle, pEpClassOps->pszName); + + /* Create task cache */ + rc = RTMemCacheCreate(&pEndpointClass->hMemCacheTasks, pEpClassOps->cbTask, + 0, UINT32_MAX, NULL, NULL, NULL, 0); + if (RT_SUCCESS(rc)) + { + /* Call the specific endpoint class initializer. */ + rc = pEpClassOps->pfnInitialize(pEndpointClass, pCfgNodeClass); + if (RT_SUCCESS(rc)) + { + /* Create all bandwidth groups for resource control. */ + PCFGMNODE pCfgBwGrp = CFGMR3GetChild(pCfgNodeClass, "BwGroups"); + if (pCfgBwGrp) + { + for (PCFGMNODE pCur = CFGMR3GetFirstChild(pCfgBwGrp); pCur; pCur = CFGMR3GetNextChild(pCur)) + { + size_t cbName = CFGMR3GetNameLen(pCur) + 1; + char *pszBwGrpId = (char *)RTMemAllocZ(cbName); + if (pszBwGrpId) + { + rc = CFGMR3GetName(pCur, pszBwGrpId, cbName); + if (RT_SUCCESS(rc)) + { + uint32_t cbMax; + rc = CFGMR3QueryU32(pCur, "Max", &cbMax); + if (RT_SUCCESS(rc)) + { + uint32_t cbStart; + rc = CFGMR3QueryU32Def(pCur, "Start", &cbStart, cbMax); + if (RT_SUCCESS(rc)) + { + uint32_t cbStep; + rc = CFGMR3QueryU32Def(pCur, "Step", &cbStep, 0); + if (RT_SUCCESS(rc)) + rc = pdmacAsyncCompletionBwMgrCreate(pEndpointClass, pszBwGrpId, + cbMax, cbStart, cbStep); + } + } + } + RTMemFree(pszBwGrpId); + } + else + rc = VERR_NO_MEMORY; + if (RT_FAILURE(rc)) + break; + } + } + if (RT_SUCCESS(rc)) + { + PUVM pUVM = pVM->pUVM; + AssertMsg(!pUVM->pdm.s.apAsyncCompletionEndpointClass[pEpClassOps->enmClassType], + ("Endpoint class was already initialized\n")); + +#ifdef VBOX_WITH_STATISTICS + CFGMR3QueryBoolDef(pCfgNodeClass, "AdvancedStatistics", &pEndpointClass->fGatherAdvancedStatistics, true); +#else + CFGMR3QueryBoolDef(pCfgNodeClass, "AdvancedStatistics", &pEndpointClass->fGatherAdvancedStatistics, false); +#endif + + pUVM->pdm.s.apAsyncCompletionEndpointClass[pEpClassOps->enmClassType] = pEndpointClass; + LogFlowFunc((": Initialized endpoint class \"%s\" rc=%Rrc\n", pEpClassOps->pszName, rc)); + return VINF_SUCCESS; + } + } + RTMemCacheDestroy(pEndpointClass->hMemCacheTasks); + } + RTCritSectDelete(&pEndpointClass->CritSect); + } + MMR3HeapFree(pEndpointClass); + } + + LogFlowFunc((": Failed to initialize endpoint class rc=%Rrc\n", rc)); + + return rc; +} + + +/** + * Worker terminating all endpoint classes. + * + * @returns nothing + * @param pEndpointClass Pointer to the endpoint class to terminate. + * + * @remarks This method ensures that any still open endpoint is closed. + */ +static void pdmR3AsyncCompletionEpClassTerminate(PPDMASYNCCOMPLETIONEPCLASS pEndpointClass) +{ + PVM pVM = pEndpointClass->pVM; + + /* Close all still open endpoints. */ + while (pEndpointClass->pEndpointsHead) + PDMR3AsyncCompletionEpClose(pEndpointClass->pEndpointsHead); + + /* Destroy the bandwidth managers. */ + PPDMACBWMGR pBwMgr = pEndpointClass->pBwMgrsHead; + while (pBwMgr) + { + PPDMACBWMGR pFree = pBwMgr; + pBwMgr = pBwMgr->pNext; + MMR3HeapFree(pFree); + } + + /* Call the termination callback of the class. */ + pEndpointClass->pEndpointOps->pfnTerminate(pEndpointClass); + + RTMemCacheDestroy(pEndpointClass->hMemCacheTasks); + RTCritSectDelete(&pEndpointClass->CritSect); + + /* Free the memory of the class finally and clear the entry in the class array. */ + pVM->pUVM->pdm.s.apAsyncCompletionEndpointClass[pEndpointClass->pEndpointOps->enmClassType] = NULL; + MMR3HeapFree(pEndpointClass); +} + + +/** + * Records the size of the request in the statistics. + * + * @returns nothing. + * @param pEndpoint The endpoint to register the request size for. + * @param cbReq Size of the request. + */ +static void pdmR3AsyncCompletionStatisticsRecordSize(PPDMASYNCCOMPLETIONENDPOINT pEndpoint, size_t cbReq) +{ + if (cbReq < 512) + STAM_REL_COUNTER_INC(&pEndpoint->StatReqSizeSmaller512); + else if (cbReq < _1K) + STAM_REL_COUNTER_INC(&pEndpoint->StatReqSize512To1K); + else if (cbReq < _2K) + STAM_REL_COUNTER_INC(&pEndpoint->StatReqSize1KTo2K); + else if (cbReq < _4K) + STAM_REL_COUNTER_INC(&pEndpoint->StatReqSize2KTo4K); + else if (cbReq < _8K) + STAM_REL_COUNTER_INC(&pEndpoint->StatReqSize4KTo8K); + else if (cbReq < _16K) + STAM_REL_COUNTER_INC(&pEndpoint->StatReqSize8KTo16K); + else if (cbReq < _32K) + STAM_REL_COUNTER_INC(&pEndpoint->StatReqSize16KTo32K); + else if (cbReq < _64K) + STAM_REL_COUNTER_INC(&pEndpoint->StatReqSize32KTo64K); + else if (cbReq < _128K) + STAM_REL_COUNTER_INC(&pEndpoint->StatReqSize64KTo128K); + else if (cbReq < _256K) + STAM_REL_COUNTER_INC(&pEndpoint->StatReqSize128KTo256K); + else if (cbReq < _512K) + STAM_REL_COUNTER_INC(&pEndpoint->StatReqSize256KTo512K); + else + STAM_REL_COUNTER_INC(&pEndpoint->StatReqSizeOver512K); + + if (cbReq & ((size_t)512 - 1)) + STAM_REL_COUNTER_INC(&pEndpoint->StatReqsUnaligned512); + else if (cbReq & ((size_t)_4K - 1)) + STAM_REL_COUNTER_INC(&pEndpoint->StatReqsUnaligned4K); + else if (cbReq & ((size_t)_8K - 1)) + STAM_REL_COUNTER_INC(&pEndpoint->StatReqsUnaligned8K); +} + + +/** + * Records the required processing time of a request. + * + * @returns nothing. + * @param pEndpoint The endpoint. + * @param cNsRun The request time in nanoseconds. + */ +static void pdmR3AsyncCompletionStatisticsRecordCompletionTime(PPDMASYNCCOMPLETIONENDPOINT pEndpoint, uint64_t cNsRun) +{ + PSTAMCOUNTER pStatCounter; + if (cNsRun < RT_NS_1US) + pStatCounter = &pEndpoint->StatTaskRunTimesNs[cNsRun / (RT_NS_1US / 10)]; + else if (cNsRun < RT_NS_1MS) + pStatCounter = &pEndpoint->StatTaskRunTimesUs[cNsRun / (RT_NS_1MS / 10)]; + else if (cNsRun < RT_NS_1SEC) + pStatCounter = &pEndpoint->StatTaskRunTimesMs[cNsRun / (RT_NS_1SEC / 10)]; + else if (cNsRun < RT_NS_1SEC_64*100) + pStatCounter = &pEndpoint->StatTaskRunTimesSec[cNsRun / (RT_NS_1SEC_64*100 / 10)]; + else + pStatCounter = &pEndpoint->StatTaskRunOver100Sec; + STAM_REL_COUNTER_INC(pStatCounter); + + STAM_REL_COUNTER_INC(&pEndpoint->StatIoOpsCompleted); + pEndpoint->cIoOpsCompleted++; + uint64_t tsMsCur = RTTimeMilliTS(); + uint64_t tsInterval = tsMsCur - pEndpoint->tsIntervalStartMs; + if (tsInterval >= 1000) + { + pEndpoint->StatIoOpsPerSec.c = pEndpoint->cIoOpsCompleted / (tsInterval / 1000); + pEndpoint->tsIntervalStartMs = tsMsCur; + pEndpoint->cIoOpsCompleted = 0; + } +} + + +/** + * Registers advanced statistics for the given endpoint. + * + * @returns VBox status code. + * @param pEndpoint The endpoint to register the advanced statistics for. + */ +static int pdmR3AsyncCompletionStatisticsRegister(PPDMASYNCCOMPLETIONENDPOINT pEndpoint) +{ + int rc = VINF_SUCCESS; + PVM pVM = pEndpoint->pEpClass->pVM; + + pEndpoint->tsIntervalStartMs = RTTimeMilliTS(); + + for (unsigned i = 0; i < RT_ELEMENTS(pEndpoint->StatTaskRunTimesNs) && RT_SUCCESS(rc); i++) + rc = STAMR3RegisterF(pVM, &pEndpoint->StatTaskRunTimesNs[i], STAMTYPE_COUNTER, + STAMVISIBILITY_USED, STAMUNIT_OCCURENCES, + "Nanosecond resolution runtime statistics", + "/PDM/AsyncCompletion/File/%s/%d/TaskRun1Ns-%u-%u", + RTPathFilename(pEndpoint->pszUri), pEndpoint->iStatId, i*100, i*100+100-1); + + for (unsigned i = 0; i < RT_ELEMENTS(pEndpoint->StatTaskRunTimesUs) && RT_SUCCESS(rc); i++) + rc = STAMR3RegisterF(pVM, &pEndpoint->StatTaskRunTimesUs[i], STAMTYPE_COUNTER, + STAMVISIBILITY_USED, STAMUNIT_OCCURENCES, + "Microsecond resolution runtime statistics", + "/PDM/AsyncCompletion/File/%s/%d/TaskRun2MicroSec-%u-%u", + RTPathFilename(pEndpoint->pszUri), pEndpoint->iStatId, i*100, i*100+100-1); + + for (unsigned i = 0; i < RT_ELEMENTS(pEndpoint->StatTaskRunTimesMs) && RT_SUCCESS(rc); i++) + rc = STAMR3RegisterF(pVM, &pEndpoint->StatTaskRunTimesMs[i], STAMTYPE_COUNTER, + STAMVISIBILITY_USED, STAMUNIT_OCCURENCES, + "Milliseconds resolution runtime statistics", + "/PDM/AsyncCompletion/File/%s/%d/TaskRun3Ms-%u-%u", + RTPathFilename(pEndpoint->pszUri), pEndpoint->iStatId, i*100, i*100+100-1); + + for (unsigned i = 0; i < RT_ELEMENTS(pEndpoint->StatTaskRunTimesMs) && RT_SUCCESS(rc); i++) + rc = STAMR3RegisterF(pVM, &pEndpoint->StatTaskRunTimesSec[i], STAMTYPE_COUNTER, + STAMVISIBILITY_USED, STAMUNIT_OCCURENCES, + "Second resolution runtime statistics", + "/PDM/AsyncCompletion/File/%s/%d/TaskRun4Sec-%u-%u", + RTPathFilename(pEndpoint->pszUri), pEndpoint->iStatId, i*10, i*10+10-1); + + if (RT_SUCCESS(rc)) + rc = STAMR3RegisterF(pVM, &pEndpoint->StatTaskRunOver100Sec, STAMTYPE_COUNTER, + STAMVISIBILITY_USED, STAMUNIT_OCCURENCES, + "Tasks which ran more than 100sec", + "/PDM/AsyncCompletion/File/%s/%d/TaskRunSecGreater100Sec", + RTPathFilename(pEndpoint->pszUri), pEndpoint->iStatId); + + if (RT_SUCCESS(rc)) + rc = STAMR3RegisterF(pVM, &pEndpoint->StatIoOpsPerSec, STAMTYPE_COUNTER, + STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, + "Processed I/O operations per second", + "/PDM/AsyncCompletion/File/%s/%d/IoOpsPerSec", + RTPathFilename(pEndpoint->pszUri), pEndpoint->iStatId); + + if (RT_SUCCESS(rc)) + rc = STAMR3RegisterF(pVM, &pEndpoint->StatIoOpsStarted, STAMTYPE_COUNTER, + STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, + "Started I/O operations for this endpoint", + "/PDM/AsyncCompletion/File/%s/%d/IoOpsStarted", + RTPathFilename(pEndpoint->pszUri), pEndpoint->iStatId); + + if (RT_SUCCESS(rc)) + rc = STAMR3RegisterF(pVM, &pEndpoint->StatIoOpsCompleted, STAMTYPE_COUNTER, + STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, + "Completed I/O operations for this endpoint", + "/PDM/AsyncCompletion/File/%s/%d/IoOpsCompleted", + RTPathFilename(pEndpoint->pszUri), pEndpoint->iStatId); + + if (RT_SUCCESS(rc)) + rc = STAMR3RegisterF(pVM, &pEndpoint->StatReqSizeSmaller512, STAMTYPE_COUNTER, + STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, + "Number of requests with a size smaller than 512 bytes", + "/PDM/AsyncCompletion/File/%s/%d/ReqSizeSmaller512", + RTPathFilename(pEndpoint->pszUri), pEndpoint->iStatId); + + if (RT_SUCCESS(rc)) + rc = STAMR3RegisterF(pVM, &pEndpoint->StatReqSize512To1K, STAMTYPE_COUNTER, + STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, + "Number of requests with a size between 512 bytes and 1KB", + "/PDM/AsyncCompletion/File/%s/%d/ReqSize512To1K", + RTPathFilename(pEndpoint->pszUri), pEndpoint->iStatId); + + if (RT_SUCCESS(rc)) + rc = STAMR3RegisterF(pVM, &pEndpoint->StatReqSize1KTo2K, STAMTYPE_COUNTER, + STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, + "Number of requests with a size between 1KB and 2KB", + "/PDM/AsyncCompletion/File/%s/%d/ReqSize1KTo2K", + RTPathFilename(pEndpoint->pszUri), pEndpoint->iStatId); + + if (RT_SUCCESS(rc)) + rc = STAMR3RegisterF(pVM, &pEndpoint->StatReqSize2KTo4K, STAMTYPE_COUNTER, + STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, + "Number of requests with a size between 2KB and 4KB", + "/PDM/AsyncCompletion/File/%s/%d/ReqSize2KTo4K", + RTPathFilename(pEndpoint->pszUri), pEndpoint->iStatId); + + if (RT_SUCCESS(rc)) + rc = STAMR3RegisterF(pVM, &pEndpoint->StatReqSize4KTo8K, STAMTYPE_COUNTER, + STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, + "Number of requests with a size between 4KB and 8KB", + "/PDM/AsyncCompletion/File/%s/%d/ReqSize4KTo8K", + RTPathFilename(pEndpoint->pszUri), pEndpoint->iStatId); + + if (RT_SUCCESS(rc)) + rc = STAMR3RegisterF(pVM, &pEndpoint->StatReqSize8KTo16K, STAMTYPE_COUNTER, + STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, + "Number of requests with a size between 8KB and 16KB", + "/PDM/AsyncCompletion/File/%s/%d/ReqSize8KTo16K", + RTPathFilename(pEndpoint->pszUri), pEndpoint->iStatId); + + if (RT_SUCCESS(rc)) + rc = STAMR3RegisterF(pVM, &pEndpoint->StatReqSize16KTo32K, STAMTYPE_COUNTER, + STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, + "Number of requests with a size between 16KB and 32KB", + "/PDM/AsyncCompletion/File/%s/%d/ReqSize16KTo32K", + RTPathFilename(pEndpoint->pszUri), pEndpoint->iStatId); + + if (RT_SUCCESS(rc)) + rc = STAMR3RegisterF(pVM, &pEndpoint->StatReqSize32KTo64K, STAMTYPE_COUNTER, + STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, + "Number of requests with a size between 32KB and 64KB", + "/PDM/AsyncCompletion/File/%s/%d/ReqSize32KTo64K", + RTPathFilename(pEndpoint->pszUri), pEndpoint->iStatId); + + if (RT_SUCCESS(rc)) + rc = STAMR3RegisterF(pVM, &pEndpoint->StatReqSize64KTo128K, STAMTYPE_COUNTER, + STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, + "Number of requests with a size between 64KB and 128KB", + "/PDM/AsyncCompletion/File/%s/%d/ReqSize64KTo128K", + RTPathFilename(pEndpoint->pszUri), pEndpoint->iStatId); + + if (RT_SUCCESS(rc)) + rc = STAMR3RegisterF(pVM, &pEndpoint->StatReqSize128KTo256K, STAMTYPE_COUNTER, + STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, + "Number of requests with a size between 128KB and 256KB", + "/PDM/AsyncCompletion/File/%s/%d/ReqSize128KTo256K", + RTPathFilename(pEndpoint->pszUri), pEndpoint->iStatId); + + if (RT_SUCCESS(rc)) + rc = STAMR3RegisterF(pVM, &pEndpoint->StatReqSize256KTo512K, STAMTYPE_COUNTER, + STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, + "Number of requests with a size between 256KB and 512KB", + "/PDM/AsyncCompletion/File/%s/%d/ReqSize256KTo512K", + RTPathFilename(pEndpoint->pszUri), pEndpoint->iStatId); + + if (RT_SUCCESS(rc)) + rc = STAMR3RegisterF(pVM, &pEndpoint->StatReqSizeOver512K, STAMTYPE_COUNTER, + STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, + "Number of requests with a size over 512KB", + "/PDM/AsyncCompletion/File/%s/%d/ReqSizeOver512K", + RTPathFilename(pEndpoint->pszUri), pEndpoint->iStatId); + + if (RT_SUCCESS(rc)) + rc = STAMR3RegisterF(pVM, &pEndpoint->StatReqsUnaligned512, STAMTYPE_COUNTER, + STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, + "Number of requests which size is not aligned to 512 bytes", + "/PDM/AsyncCompletion/File/%s/%d/ReqsUnaligned512", + RTPathFilename(pEndpoint->pszUri), pEndpoint->iStatId); + + if (RT_SUCCESS(rc)) + rc = STAMR3RegisterF(pVM, &pEndpoint->StatReqsUnaligned4K, STAMTYPE_COUNTER, + STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, + "Number of requests which size is not aligned to 4KB", + "/PDM/AsyncCompletion/File/%s/%d/ReqsUnaligned4K", + RTPathFilename(pEndpoint->pszUri), pEndpoint->iStatId); + + if (RT_SUCCESS(rc)) + rc = STAMR3RegisterF(pVM, &pEndpoint->StatReqsUnaligned8K, STAMTYPE_COUNTER, + STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, + "Number of requests which size is not aligned to 8KB", + "/PDM/AsyncCompletion/File/%s/%d/ReqsUnaligned8K", + RTPathFilename(pEndpoint->pszUri), pEndpoint->iStatId); + + return rc; +} + + +/** + * Deregisters advanced statistics for one endpoint. + * + * @returns nothing. + * @param pEndpoint The endpoint to deregister the advanced statistics for. + */ +static void pdmR3AsyncCompletionStatisticsDeregister(PPDMASYNCCOMPLETIONENDPOINT pEndpoint) +{ + /* I hope this doesn't remove too much... */ + STAMR3DeregisterF(pEndpoint->pEpClass->pVM->pUVM, "/PDM/AsyncCompletion/File/%s/*", RTPathFilename(pEndpoint->pszUri)); +} + + +/** + * Initialize the async completion manager. + * + * @returns VBox status code + * @param pVM The cross context VM structure. + */ +int pdmR3AsyncCompletionInit(PVM pVM) +{ + LogFlowFunc((": pVM=%p\n", pVM)); + + VM_ASSERT_EMT(pVM); + + PCFGMNODE pCfgRoot = CFGMR3GetRoot(pVM); + PCFGMNODE pCfgAsyncCompletion = CFGMR3GetChild(CFGMR3GetChild(pCfgRoot, "PDM"), "AsyncCompletion"); + + int rc = pdmR3AsyncCompletionEpClassInit(pVM, &g_PDMAsyncCompletionEndpointClassFile, pCfgAsyncCompletion); + LogFlowFunc((": pVM=%p rc=%Rrc\n", pVM, rc)); + return rc; +} + + +/** + * Terminates the async completion manager. + * + * @returns VBox status code + * @param pVM The cross context VM structure. + */ +int pdmR3AsyncCompletionTerm(PVM pVM) +{ + LogFlowFunc((": pVM=%p\n", pVM)); + PUVM pUVM = pVM->pUVM; + + for (size_t i = 0; i < RT_ELEMENTS(pUVM->pdm.s.apAsyncCompletionEndpointClass); i++) + if (pUVM->pdm.s.apAsyncCompletionEndpointClass[i]) + pdmR3AsyncCompletionEpClassTerminate(pUVM->pdm.s.apAsyncCompletionEndpointClass[i]); + + return VINF_SUCCESS; +} + + +/** + * Resume worker for the async completion manager. + * + * @returns nothing. + * @param pVM The cross context VM structure. + */ +void pdmR3AsyncCompletionResume(PVM pVM) +{ + LogFlowFunc((": pVM=%p\n", pVM)); + PUVM pUVM = pVM->pUVM; + + /* Log the bandwidth groups and all assigned endpoints. */ + for (size_t i = 0; i < RT_ELEMENTS(pUVM->pdm.s.apAsyncCompletionEndpointClass); i++) + if (pUVM->pdm.s.apAsyncCompletionEndpointClass[i]) + { + PPDMASYNCCOMPLETIONEPCLASS pEpClass = pUVM->pdm.s.apAsyncCompletionEndpointClass[i]; + PPDMACBWMGR pBwMgr = pEpClass->pBwMgrsHead; + PPDMASYNCCOMPLETIONENDPOINT pEp; + + if (pBwMgr) + LogRel(("AIOMgr: Bandwidth groups for class '%s'\n", i == PDMASYNCCOMPLETIONEPCLASSTYPE_FILE + ? "File" : "")); + + while (pBwMgr) + { + LogRel(("AIOMgr: Id: %s\n", pBwMgr->pszId)); + LogRel(("AIOMgr: Max: %u B/s\n", pBwMgr->cbTransferPerSecMax)); + LogRel(("AIOMgr: Start: %u B/s\n", pBwMgr->cbTransferPerSecStart)); + LogRel(("AIOMgr: Step: %u B/s\n", pBwMgr->cbTransferPerSecStep)); + LogRel(("AIOMgr: Endpoints:\n")); + + pEp = pEpClass->pEndpointsHead; + while (pEp) + { + if (pEp->pBwMgr == pBwMgr) + LogRel(("AIOMgr: %s\n", pEp->pszUri)); + + pEp = pEp->pNext; + } + + pBwMgr = pBwMgr->pNext; + } + + /* Print all endpoints without assigned bandwidth groups. */ + pEp = pEpClass->pEndpointsHead; + if (pEp) + LogRel(("AIOMgr: Endpoints without assigned bandwidth groups:\n")); + + while (pEp) + { + if (!pEp->pBwMgr) + LogRel(("AIOMgr: %s\n", pEp->pszUri)); + + pEp = pEp->pNext; + } + } +} + + +/** + * Tries to get a free task from the endpoint or class cache + * allocating the task if it fails. + * + * @returns Pointer to a new and initialized task or NULL + * @param pEndpoint The endpoint the task is for. + * @param pvUser Opaque user data for the task. + */ +static PPDMASYNCCOMPLETIONTASK pdmR3AsyncCompletionGetTask(PPDMASYNCCOMPLETIONENDPOINT pEndpoint, void *pvUser) +{ + PPDMASYNCCOMPLETIONEPCLASS pEndpointClass = pEndpoint->pEpClass; + PPDMASYNCCOMPLETIONTASK pTask = (PPDMASYNCCOMPLETIONTASK)RTMemCacheAlloc(pEndpointClass->hMemCacheTasks); + if (RT_LIKELY(pTask)) + { + /* Initialize common parts. */ + pTask->pvUser = pvUser; + pTask->pEndpoint = pEndpoint; + /* Clear list pointers for safety. */ + pTask->pPrev = NULL; + pTask->pNext = NULL; + pTask->tsNsStart = RTTimeNanoTS(); + STAM_REL_COUNTER_INC(&pEndpoint->StatIoOpsStarted); + } + + return pTask; +} + + +/** + * Puts a task in one of the caches. + * + * @returns nothing. + * @param pEndpoint The endpoint the task belongs to. + * @param pTask The task to cache. + */ +static void pdmR3AsyncCompletionPutTask(PPDMASYNCCOMPLETIONENDPOINT pEndpoint, PPDMASYNCCOMPLETIONTASK pTask) +{ + PPDMASYNCCOMPLETIONEPCLASS pEndpointClass = pEndpoint->pEpClass; + uint64_t cNsRun = RTTimeNanoTS() - pTask->tsNsStart; + + if (RT_UNLIKELY(cNsRun >= RT_NS_10SEC)) + LogRel(("AsyncCompletion: Task %#p completed after %llu seconds\n", pTask, cNsRun / RT_NS_1SEC)); + + if (pEndpointClass->fGatherAdvancedStatistics) + pdmR3AsyncCompletionStatisticsRecordCompletionTime(pEndpoint, cNsRun); + + RTMemCacheFree(pEndpointClass->hMemCacheTasks, pTask); +} + + +static unsigned +pdmR3AsyncCompletionGetStatId(PPDMASYNCCOMPLETIONEPCLASS pEndpointClass, const char *pszUri) +{ + PPDMASYNCCOMPLETIONENDPOINT pEndpoint = pEndpointClass->pEndpointsHead; + const char *pszFilename = RTPathFilename(pszUri); + unsigned iStatId = 0; + + while (pEndpoint) + { + if ( !RTStrCmp(RTPathFilename(pEndpoint->pszUri), pszFilename) + && pEndpoint->iStatId >= iStatId) + iStatId = pEndpoint->iStatId + 1; + + pEndpoint = pEndpoint->pNext; + } + + return iStatId; +} + +/** + * Opens a file as an async completion endpoint. + * + * @returns VBox status code. + * @param ppEndpoint Where to store the opaque endpoint handle on success. + * @param pszFilename Path to the file which is to be opened. (UTF-8) + * @param fFlags Open flags, see grp_pdmacep_file_flags. + * @param pTemplate Handle to the completion callback template to use + * for this end point. + */ +VMMR3DECL(int) PDMR3AsyncCompletionEpCreateForFile(PPPDMASYNCCOMPLETIONENDPOINT ppEndpoint, + const char *pszFilename, uint32_t fFlags, + PPDMASYNCCOMPLETIONTEMPLATE pTemplate) +{ + LogFlowFunc((": ppEndpoint=%p pszFilename=%p{%s} fFlags=%u pTemplate=%p\n", + ppEndpoint, pszFilename, pszFilename, fFlags, pTemplate)); + + /* Sanity checks. */ + AssertPtrReturn(ppEndpoint, VERR_INVALID_POINTER); + AssertPtrReturn(pszFilename, VERR_INVALID_POINTER); + AssertPtrReturn(pTemplate, VERR_INVALID_POINTER); + + /* Check that the flags are valid. */ + AssertReturn(((~(PDMACEP_FILE_FLAGS_READ_ONLY | PDMACEP_FILE_FLAGS_DONT_LOCK | PDMACEP_FILE_FLAGS_HOST_CACHE_ENABLED) & fFlags) == 0), + VERR_INVALID_PARAMETER); + + PVM pVM = pTemplate->pVM; + PUVM pUVM = pVM->pUVM; + PPDMASYNCCOMPLETIONEPCLASS pEndpointClass = pUVM->pdm.s.apAsyncCompletionEndpointClass[PDMASYNCCOMPLETIONEPCLASSTYPE_FILE]; + PPDMASYNCCOMPLETIONENDPOINT pEndpoint = NULL; + + AssertMsg(pEndpointClass, ("File endpoint class was not initialized\n")); + + /* Create an endpoint. */ + int rc = MMR3HeapAllocZEx(pVM, MM_TAG_PDM_ASYNC_COMPLETION, + pEndpointClass->pEndpointOps->cbEndpoint, + (void **)&pEndpoint); + if (RT_SUCCESS(rc)) + { + /* Initialize common parts. */ + pEndpoint->pNext = NULL; + pEndpoint->pPrev = NULL; + pEndpoint->pEpClass = pEndpointClass; + pEndpoint->pTemplate = pTemplate; + pEndpoint->pszUri = RTStrDup(pszFilename); + pEndpoint->iStatId = pdmR3AsyncCompletionGetStatId(pEndpointClass, pszFilename); + pEndpoint->pBwMgr = NULL; + + if ( pEndpoint->pszUri + && RT_SUCCESS(rc)) + { + /* Call the initializer for the endpoint. */ + rc = pEndpointClass->pEndpointOps->pfnEpInitialize(pEndpoint, pszFilename, fFlags); + if (RT_SUCCESS(rc)) + { + if (pEndpointClass->fGatherAdvancedStatistics) + rc = pdmR3AsyncCompletionStatisticsRegister(pEndpoint); + + if (RT_SUCCESS(rc)) + { + /* Link it into the list of endpoints. */ + rc = RTCritSectEnter(&pEndpointClass->CritSect); + AssertMsg(RT_SUCCESS(rc), ("Failed to enter critical section rc=%Rrc\n", rc)); + + pEndpoint->pNext = pEndpointClass->pEndpointsHead; + if (pEndpointClass->pEndpointsHead) + pEndpointClass->pEndpointsHead->pPrev = pEndpoint; + + pEndpointClass->pEndpointsHead = pEndpoint; + pEndpointClass->cEndpoints++; + + rc = RTCritSectLeave(&pEndpointClass->CritSect); + AssertMsg(RT_SUCCESS(rc), ("Failed to enter critical section rc=%Rrc\n", rc)); + + /* Reference the template. */ + ASMAtomicIncU32(&pTemplate->cUsed); + + *ppEndpoint = pEndpoint; + LogFlowFunc((": Created endpoint for %s\n", pszFilename)); + return VINF_SUCCESS; + } + else + pEndpointClass->pEndpointOps->pfnEpClose(pEndpoint); + + if (pEndpointClass->fGatherAdvancedStatistics) + pdmR3AsyncCompletionStatisticsDeregister(pEndpoint); + } + RTStrFree(pEndpoint->pszUri); + } + MMR3HeapFree(pEndpoint); + } + + LogFlowFunc((": Creation of endpoint for %s failed: rc=%Rrc\n", pszFilename, rc)); + return rc; +} + + +/** + * Closes a endpoint waiting for any pending tasks to finish. + * + * @returns nothing. + * @param pEndpoint Handle of the endpoint. + */ +VMMR3DECL(void) PDMR3AsyncCompletionEpClose(PPDMASYNCCOMPLETIONENDPOINT pEndpoint) +{ + LogFlowFunc((": pEndpoint=%p\n", pEndpoint)); + + /* Sanity checks. */ + AssertReturnVoid(VALID_PTR(pEndpoint)); + + PPDMASYNCCOMPLETIONEPCLASS pEndpointClass = pEndpoint->pEpClass; + pEndpointClass->pEndpointOps->pfnEpClose(pEndpoint); + + /* Drop reference from the template. */ + ASMAtomicDecU32(&pEndpoint->pTemplate->cUsed); + + /* Unlink the endpoint from the list. */ + int rc = RTCritSectEnter(&pEndpointClass->CritSect); + AssertMsg(RT_SUCCESS(rc), ("Failed to enter critical section rc=%Rrc\n", rc)); + + PPDMASYNCCOMPLETIONENDPOINT pEndpointNext = pEndpoint->pNext; + PPDMASYNCCOMPLETIONENDPOINT pEndpointPrev = pEndpoint->pPrev; + + if (pEndpointPrev) + pEndpointPrev->pNext = pEndpointNext; + else + pEndpointClass->pEndpointsHead = pEndpointNext; + if (pEndpointNext) + pEndpointNext->pPrev = pEndpointPrev; + + pEndpointClass->cEndpoints--; + + rc = RTCritSectLeave(&pEndpointClass->CritSect); + AssertMsg(RT_SUCCESS(rc), ("Failed to enter critical section rc=%Rrc\n", rc)); + + if (pEndpointClass->fGatherAdvancedStatistics) + pdmR3AsyncCompletionStatisticsDeregister(pEndpoint); + + RTStrFree(pEndpoint->pszUri); + MMR3HeapFree(pEndpoint); +} + + +/** + * Creates a read task on the given endpoint. + * + * @returns VBox status code. + * @param pEndpoint The file endpoint to read from. + * @param off Where to start reading from. + * @param paSegments Scatter gather list to store the data in. + * @param cSegments Number of segments in the list. + * @param cbRead The overall number of bytes to read. + * @param pvUser Opaque user data returned in the completion callback + * upon completion of the task. + * @param ppTask Where to store the task handle on success. + */ +VMMR3DECL(int) PDMR3AsyncCompletionEpRead(PPDMASYNCCOMPLETIONENDPOINT pEndpoint, RTFOFF off, + PCRTSGSEG paSegments, unsigned cSegments, + size_t cbRead, void *pvUser, + PPPDMASYNCCOMPLETIONTASK ppTask) +{ + AssertPtrReturn(pEndpoint, VERR_INVALID_POINTER); + AssertPtrReturn(paSegments, VERR_INVALID_POINTER); + AssertPtrReturn(ppTask, VERR_INVALID_POINTER); + AssertReturn(cSegments > 0, VERR_INVALID_PARAMETER); + AssertReturn(cbRead > 0, VERR_INVALID_PARAMETER); + AssertReturn(off >= 0, VERR_INVALID_PARAMETER); + + PPDMASYNCCOMPLETIONTASK pTask; + + pTask = pdmR3AsyncCompletionGetTask(pEndpoint, pvUser); + if (!pTask) + return VERR_NO_MEMORY; + + int rc = pEndpoint->pEpClass->pEndpointOps->pfnEpRead(pTask, pEndpoint, off, + paSegments, cSegments, cbRead); + if (RT_SUCCESS(rc)) + { + if (pEndpoint->pEpClass->fGatherAdvancedStatistics) + pdmR3AsyncCompletionStatisticsRecordSize(pEndpoint, cbRead); + + *ppTask = pTask; + } + else + pdmR3AsyncCompletionPutTask(pEndpoint, pTask); + + return rc; +} + + +/** + * Creates a write task on the given endpoint. + * + * @returns VBox status code. + * @param pEndpoint The file endpoint to write to. + * @param off Where to start writing at. + * @param paSegments Scatter gather list of the data to write. + * @param cSegments Number of segments in the list. + * @param cbWrite The overall number of bytes to write. + * @param pvUser Opaque user data returned in the completion callback + * upon completion of the task. + * @param ppTask Where to store the task handle on success. + */ +VMMR3DECL(int) PDMR3AsyncCompletionEpWrite(PPDMASYNCCOMPLETIONENDPOINT pEndpoint, RTFOFF off, + PCRTSGSEG paSegments, unsigned cSegments, + size_t cbWrite, void *pvUser, + PPPDMASYNCCOMPLETIONTASK ppTask) +{ + AssertPtrReturn(pEndpoint, VERR_INVALID_POINTER); + AssertPtrReturn(paSegments, VERR_INVALID_POINTER); + AssertPtrReturn(ppTask, VERR_INVALID_POINTER); + AssertReturn(cSegments > 0, VERR_INVALID_PARAMETER); + AssertReturn(cbWrite > 0, VERR_INVALID_PARAMETER); + AssertReturn(off >= 0, VERR_INVALID_PARAMETER); + + PPDMASYNCCOMPLETIONTASK pTask; + + pTask = pdmR3AsyncCompletionGetTask(pEndpoint, pvUser); + if (!pTask) + return VERR_NO_MEMORY; + + int rc = pEndpoint->pEpClass->pEndpointOps->pfnEpWrite(pTask, pEndpoint, off, + paSegments, cSegments, cbWrite); + if (RT_SUCCESS(rc)) + { + if (pEndpoint->pEpClass->fGatherAdvancedStatistics) + pdmR3AsyncCompletionStatisticsRecordSize(pEndpoint, cbWrite); + + *ppTask = pTask; + } + else + pdmR3AsyncCompletionPutTask(pEndpoint, pTask); + + return rc; +} + + +/** + * Creates a flush task on the given endpoint. + * + * Every read and write task initiated before the flush task is + * finished upon completion of this task. + * + * @returns VBox status code. + * @param pEndpoint The file endpoint to flush. + * @param pvUser Opaque user data returned in the completion callback + * upon completion of the task. + * @param ppTask Where to store the task handle on success. + */ +VMMR3DECL(int) PDMR3AsyncCompletionEpFlush(PPDMASYNCCOMPLETIONENDPOINT pEndpoint, void *pvUser, PPPDMASYNCCOMPLETIONTASK ppTask) +{ + AssertPtrReturn(pEndpoint, VERR_INVALID_POINTER); + AssertPtrReturn(ppTask, VERR_INVALID_POINTER); + + PPDMASYNCCOMPLETIONTASK pTask; + + pTask = pdmR3AsyncCompletionGetTask(pEndpoint, pvUser); + if (!pTask) + return VERR_NO_MEMORY; + + int rc = pEndpoint->pEpClass->pEndpointOps->pfnEpFlush(pTask, pEndpoint); + if (RT_SUCCESS(rc)) + *ppTask = pTask; + else + pdmR3AsyncCompletionPutTask(pEndpoint, pTask); + + return rc; +} + + +/** + * Queries the size of an endpoint. + * + * Not that some endpoints may not support this and will return an error + * (sockets for example). + * + * @returns VBox status code. + * @retval VERR_NOT_SUPPORTED if the endpoint does not support this operation. + * @param pEndpoint The file endpoint. + * @param pcbSize Where to store the size of the endpoint. + */ +VMMR3DECL(int) PDMR3AsyncCompletionEpGetSize(PPDMASYNCCOMPLETIONENDPOINT pEndpoint, + uint64_t *pcbSize) +{ + AssertPtrReturn(pEndpoint, VERR_INVALID_POINTER); + AssertPtrReturn(pcbSize, VERR_INVALID_POINTER); + + if (pEndpoint->pEpClass->pEndpointOps->pfnEpGetSize) + return pEndpoint->pEpClass->pEndpointOps->pfnEpGetSize(pEndpoint, pcbSize); + return VERR_NOT_SUPPORTED; +} + + +/** + * Sets the size of an endpoint. + * + * Not that some endpoints may not support this and will return an error + * (sockets for example). + * + * @returns VBox status code. + * @retval VERR_NOT_SUPPORTED if the endpoint does not support this operation. + * @param pEndpoint The file endpoint. + * @param cbSize The size to set. + * + * @note PDMR3AsyncCompletionEpFlush should be called before this operation is executed. + */ +VMMR3DECL(int) PDMR3AsyncCompletionEpSetSize(PPDMASYNCCOMPLETIONENDPOINT pEndpoint, uint64_t cbSize) +{ + AssertPtrReturn(pEndpoint, VERR_INVALID_POINTER); + + if (pEndpoint->pEpClass->pEndpointOps->pfnEpSetSize) + return pEndpoint->pEpClass->pEndpointOps->pfnEpSetSize(pEndpoint, cbSize); + return VERR_NOT_SUPPORTED; +} + + +/** + * Assigns or removes a bandwidth control manager to/from the endpoint. + * + * @returns VBox status code. + * @param pEndpoint The endpoint. + * @param pszBwMgr The identifer of the new bandwidth manager to assign + * or NULL to remove the current one. + */ +VMMR3DECL(int) PDMR3AsyncCompletionEpSetBwMgr(PPDMASYNCCOMPLETIONENDPOINT pEndpoint, const char *pszBwMgr) +{ + AssertPtrReturn(pEndpoint, VERR_INVALID_POINTER); + PPDMACBWMGR pBwMgrOld = NULL; + PPDMACBWMGR pBwMgrNew = NULL; + + int rc = VINF_SUCCESS; + if (pszBwMgr) + { + pBwMgrNew = pdmacBwMgrFindById(pEndpoint->pEpClass, pszBwMgr); + if (pBwMgrNew) + pdmacBwMgrRetain(pBwMgrNew); + else + rc = VERR_NOT_FOUND; + } + + if (RT_SUCCESS(rc)) + { + pBwMgrOld = ASMAtomicXchgPtrT(&pEndpoint->pBwMgr, pBwMgrNew, PPDMACBWMGR); + if (pBwMgrOld) + pdmacBwMgrRelease(pBwMgrOld); + } + + return rc; +} + + +/** + * Cancels an async completion task. + * + * If you want to use this method, you have to take great create to make sure + * you will never attempt cancel a task which has been completed. Since there is + * no reference counting or anything on the task it self, you have to serialize + * the cancelation and completion paths such that the aren't racing one another. + * + * @returns VBox status code + * @param pTask The Task to cancel. + */ +VMMR3DECL(int) PDMR3AsyncCompletionTaskCancel(PPDMASYNCCOMPLETIONTASK pTask) +{ + NOREF(pTask); + return VERR_NOT_IMPLEMENTED; +} + + +/** + * Changes the limit of a bandwidth manager for file endpoints to the given value. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszBwMgr The identifer of the bandwidth manager to change. + * @param cbMaxNew The new maximum for the bandwidth manager in bytes/sec. + */ +VMMR3DECL(int) PDMR3AsyncCompletionBwMgrSetMaxForFile(PUVM pUVM, const char *pszBwMgr, uint32_t cbMaxNew) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertPtrReturn(pszBwMgr, VERR_INVALID_POINTER); + + int rc = VINF_SUCCESS; + PPDMASYNCCOMPLETIONEPCLASS pEpClass = pVM->pUVM->pdm.s.apAsyncCompletionEndpointClass[PDMASYNCCOMPLETIONEPCLASSTYPE_FILE]; + PPDMACBWMGR pBwMgr = pdmacBwMgrFindById(pEpClass, pszBwMgr); + if (pBwMgr) + { + /* + * Set the new value for the start and max value to let the manager pick up + * the new limit immediately. + */ + ASMAtomicWriteU32(&pBwMgr->cbTransferPerSecMax, cbMaxNew); + ASMAtomicWriteU32(&pBwMgr->cbTransferPerSecStart, cbMaxNew); + } + else + rc = VERR_NOT_FOUND; + + return rc; +} + diff --git a/src/VBox/VMM/VMMR3/PDMAsyncCompletionFile.cpp b/src/VBox/VMM/VMMR3/PDMAsyncCompletionFile.cpp new file mode 100644 index 00000000..89c6f3fb --- /dev/null +++ b/src/VBox/VMM/VMMR3/PDMAsyncCompletionFile.cpp @@ -0,0 +1,1293 @@ +/* $Id: PDMAsyncCompletionFile.cpp $ */ +/** @file + * PDM Async I/O - Transport data asynchronous in R3 using EMT. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION +#include "PDMInternal.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "PDMAsyncCompletionFileInternal.h" + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +#ifdef VBOX_WITH_DEBUGGER +static FNDBGCCMD pdmacEpFileErrorInject; +# ifdef PDM_ASYNC_COMPLETION_FILE_WITH_DELAY +static FNDBGCCMD pdmacEpFileDelayInject; +# endif +#endif + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +#ifdef VBOX_WITH_DEBUGGER +static const DBGCVARDESC g_aInjectErrorArgs[] = +{ + /* cTimesMin, cTimesMax, enmCategory, fFlags, pszName, pszDescription */ + { 1, 1, DBGCVAR_CAT_STRING, 0, "direction", "write/read." }, + { 1, 1, DBGCVAR_CAT_STRING, 0, "filename", "Filename." }, + { 1, 1, DBGCVAR_CAT_NUMBER, 0, "errcode", "VBox status code." }, +}; + +# ifdef PDM_ASYNC_COMPLETION_FILE_WITH_DELAY +static const DBGCVARDESC g_aInjectDelayArgs[] = +{ + /* cTimesMin, cTimesMax, enmCategory, fFlags, pszName, pszDescription */ + { 1, 1, DBGCVAR_CAT_STRING, 0, "direction", "write|read|flush|any." }, + { 1, 1, DBGCVAR_CAT_STRING, 0, "filename", "Filename." }, + { 1, 1, DBGCVAR_CAT_NUMBER, 0, "delay", "Delay in milliseconds." }, + { 1, 1, DBGCVAR_CAT_NUMBER, 0, "jitter", "Jitter of the delay." }, + { 1, 1, DBGCVAR_CAT_NUMBER, 0, "reqs", "Number of requests to delay." } + +}; +# endif + +/** Command descriptors. */ +static const DBGCCMD g_aCmds[] = +{ + /* pszCmd, cArgsMin, cArgsMax, paArgDesc, cArgDescs, fFlags, pfnHandler pszSyntax,.pszDescription */ + { "injecterror", 3, 3, &g_aInjectErrorArgs[0], 3, 0, pdmacEpFileErrorInject, "", "Inject error into I/O subsystem." } +# ifdef PDM_ASYNC_COMPLETION_FILE_WITH_DELAY + ,{ "injectdelay", 3, 5, &g_aInjectDelayArgs[0], RT_ELEMENTS(g_aInjectDelayArgs), 0, pdmacEpFileDelayInject, "", "Inject a delay of a request." } +# endif +}; +#endif + + +/** + * Frees a task. + * + * @returns nothing. + * @param pEndpoint Pointer to the endpoint the segment was for. + * @param pTask The task to free. + */ +void pdmacFileTaskFree(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask) +{ + PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass; + + LogFlowFunc((": pEndpoint=%p pTask=%p\n", pEndpoint, pTask)); + + /* Try the per endpoint cache first. */ + if (pEndpoint->cTasksCached < pEpClass->cTasksCacheMax) + { + /* Add it to the list. */ + pEndpoint->pTasksFreeTail->pNext = pTask; + pEndpoint->pTasksFreeTail = pTask; + ASMAtomicIncU32(&pEndpoint->cTasksCached); + } + else + { + Log(("Freeing task %p because all caches are full\n", pTask)); + MMR3HeapFree(pTask); + } +} + +/** + * Allocates a task segment + * + * @returns Pointer to the new task segment or NULL + * @param pEndpoint Pointer to the endpoint + */ +PPDMACTASKFILE pdmacFileTaskAlloc(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint) +{ + PPDMACTASKFILE pTask = NULL; + + /* Try the small per endpoint cache first. */ + if (pEndpoint->pTasksFreeHead == pEndpoint->pTasksFreeTail) + { + /* Try the bigger endpoint class cache. */ + PPDMASYNCCOMPLETIONEPCLASSFILE pEndpointClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass; + + /* + * Allocate completely new. + * If this fails we return NULL. + */ + int rc = MMR3HeapAllocZEx(pEndpointClass->Core.pVM, MM_TAG_PDM_ASYNC_COMPLETION, + sizeof(PDMACTASKFILE), + (void **)&pTask); + if (RT_FAILURE(rc)) + pTask = NULL; + + LogFlow(("Allocated task %p\n", pTask)); + } + else + { + /* Grab a free task from the head. */ + AssertMsg(pEndpoint->cTasksCached > 0, ("No tasks cached but list contains more than one element\n")); + + pTask = pEndpoint->pTasksFreeHead; + pEndpoint->pTasksFreeHead = pTask->pNext; + ASMAtomicDecU32(&pEndpoint->cTasksCached); + } + + pTask->pNext = NULL; + + return pTask; +} + +PPDMACTASKFILE pdmacFileEpGetNewTasks(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint) +{ + /* + * Get pending tasks. + */ + PPDMACTASKFILE pTasks = ASMAtomicXchgPtrT(&pEndpoint->pTasksNewHead, NULL, PPDMACTASKFILE); + + /* Reverse the list to process in FIFO order. */ + if (pTasks) + { + PPDMACTASKFILE pTask = pTasks; + + pTasks = NULL; + + while (pTask) + { + PPDMACTASKFILE pCur = pTask; + pTask = pTask->pNext; + pCur->pNext = pTasks; + pTasks = pCur; + } + } + + return pTasks; +} + +static void pdmacFileAioMgrWakeup(PPDMACEPFILEMGR pAioMgr) +{ + bool fWokenUp = ASMAtomicXchgBool(&pAioMgr->fWokenUp, true); + if (!fWokenUp) + { + bool fWaitingEventSem = ASMAtomicReadBool(&pAioMgr->fWaitingEventSem); + if (fWaitingEventSem) + { + int rc = RTSemEventSignal(pAioMgr->EventSem); + AssertRC(rc); + } + } +} + +static int pdmacFileAioMgrWaitForBlockingEvent(PPDMACEPFILEMGR pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT enmEvent) +{ + ASMAtomicWriteU32((volatile uint32_t *)&pAioMgr->enmBlockingEvent, enmEvent); + Assert(!pAioMgr->fBlockingEventPending); + ASMAtomicXchgBool(&pAioMgr->fBlockingEventPending, true); + + /* Wakeup the async I/O manager */ + pdmacFileAioMgrWakeup(pAioMgr); + + /* Wait for completion. */ + int rc = RTSemEventWait(pAioMgr->EventSemBlock, RT_INDEFINITE_WAIT); + AssertRC(rc); + + ASMAtomicXchgBool(&pAioMgr->fBlockingEventPending, false); + ASMAtomicWriteU32((volatile uint32_t *)&pAioMgr->enmBlockingEvent, PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID); + + return rc; +} + +int pdmacFileAioMgrAddEndpoint(PPDMACEPFILEMGR pAioMgr, PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint) +{ + LogFlowFunc(("pAioMgr=%#p pEndpoint=%#p{%s}\n", pAioMgr, pEndpoint, pEndpoint->Core.pszUri)); + + /* Update the assigned I/O manager. */ + ASMAtomicWritePtr(&pEndpoint->pAioMgr, pAioMgr); + + int rc = RTCritSectEnter(&pAioMgr->CritSectBlockingEvent); + AssertRCReturn(rc, rc); + + ASMAtomicWritePtr(&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint, pEndpoint); + rc = pdmacFileAioMgrWaitForBlockingEvent(pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT); + ASMAtomicWriteNullPtr(&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint); + + RTCritSectLeave(&pAioMgr->CritSectBlockingEvent); + + return rc; +} + +#ifdef SOME_UNUSED_FUNCTION +static int pdmacFileAioMgrRemoveEndpoint(PPDMACEPFILEMGR pAioMgr, PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint) +{ + int rc = RTCritSectEnter(&pAioMgr->CritSectBlockingEvent); + AssertRCReturn(rc, rc); + + ASMAtomicWritePtr(&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint, pEndpoint); + rc = pdmacFileAioMgrWaitForBlockingEvent(pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT); + ASMAtomicWriteNullPtr(&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint); + + RTCritSectLeave(&pAioMgr->CritSectBlockingEvent); + + return rc; +} +#endif + +static int pdmacFileAioMgrCloseEndpoint(PPDMACEPFILEMGR pAioMgr, PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint) +{ + int rc = RTCritSectEnter(&pAioMgr->CritSectBlockingEvent); + AssertRCReturn(rc, rc); + + ASMAtomicWritePtr(&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint, pEndpoint); + rc = pdmacFileAioMgrWaitForBlockingEvent(pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT); + ASMAtomicWriteNullPtr(&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint); + + RTCritSectLeave(&pAioMgr->CritSectBlockingEvent); + + return rc; +} + +static int pdmacFileAioMgrShutdown(PPDMACEPFILEMGR pAioMgr) +{ + int rc = RTCritSectEnter(&pAioMgr->CritSectBlockingEvent); + AssertRCReturn(rc, rc); + + rc = pdmacFileAioMgrWaitForBlockingEvent(pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN); + + RTCritSectLeave(&pAioMgr->CritSectBlockingEvent); + + return rc; +} + +int pdmacFileEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask) +{ + PPDMACTASKFILE pNext; + do + { + pNext = pEndpoint->pTasksNewHead; + pTask->pNext = pNext; + } while (!ASMAtomicCmpXchgPtr(&pEndpoint->pTasksNewHead, pTask, pNext)); + + pdmacFileAioMgrWakeup(ASMAtomicReadPtrT(&pEndpoint->pAioMgr, PPDMACEPFILEMGR)); + + return VINF_SUCCESS; +} + +static DECLCALLBACK(void) pdmacFileEpTaskCompleted(PPDMACTASKFILE pTask, void *pvUser, int rc) +{ + PPDMASYNCCOMPLETIONTASKFILE pTaskFile = (PPDMASYNCCOMPLETIONTASKFILE)pvUser; + + LogFlowFunc(("pTask=%#p pvUser=%#p rc=%Rrc\n", pTask, pvUser, rc)); + + if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH) + pdmR3AsyncCompletionCompleteTask(&pTaskFile->Core, rc, true); + else + { + Assert((uint32_t)pTask->DataSeg.cbSeg == pTask->DataSeg.cbSeg && (int32_t)pTask->DataSeg.cbSeg >= 0); + uint32_t uOld = ASMAtomicSubS32(&pTaskFile->cbTransferLeft, (int32_t)pTask->DataSeg.cbSeg); + + /* The first error will be returned. */ + if (RT_FAILURE(rc)) + ASMAtomicCmpXchgS32(&pTaskFile->rc, rc, VINF_SUCCESS); +#ifdef VBOX_WITH_DEBUGGER + else + { + PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pTaskFile->Core.pEndpoint; + + /* Overwrite with injected error code. */ + if (pTask->enmTransferType == PDMACTASKFILETRANSFER_READ) + rc = ASMAtomicXchgS32(&pEpFile->rcReqRead, VINF_SUCCESS); + else + rc = ASMAtomicXchgS32(&pEpFile->rcReqWrite, VINF_SUCCESS); + + if (RT_FAILURE(rc)) + ASMAtomicCmpXchgS32(&pTaskFile->rc, rc, VINF_SUCCESS); + } +#endif + + if (!(uOld - pTask->DataSeg.cbSeg) + && !ASMAtomicXchgBool(&pTaskFile->fCompleted, true)) + { +#ifdef PDM_ASYNC_COMPLETION_FILE_WITH_DELAY + PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pTaskFile->Core.pEndpoint; + PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEpFile->Core.pEpClass; + + /* Check if we should delay completion of the request. */ + if ( ASMAtomicReadU32(&pEpFile->msDelay) > 0 + && ASMAtomicReadU32(&pEpFile->cReqsDelay) > 0) + { + uint64_t tsDelay = pEpFile->msDelay; + + if (pEpFile->msJitter) + tsDelay = (RTRandU32() % 100) > 50 ? pEpFile->msDelay + (RTRandU32() % pEpFile->msJitter) + : pEpFile->msDelay - (RTRandU32() % pEpFile->msJitter); + ASMAtomicDecU32(&pEpFile->cReqsDelay); + + /* Arm the delay. */ + pTaskFile->tsDelayEnd = RTTimeProgramMilliTS() + tsDelay; + + /* Append to the list. */ + PPDMASYNCCOMPLETIONTASKFILE pHead = NULL; + do + { + pHead = ASMAtomicReadPtrT(&pEpFile->pDelayedHead, PPDMASYNCCOMPLETIONTASKFILE); + pTaskFile->pDelayedNext = pHead; + } while (!ASMAtomicCmpXchgPtr(&pEpFile->pDelayedHead, pTaskFile, pHead)); + + if (tsDelay < pEpClassFile->cMilliesNext) + { + ASMAtomicWriteU64(&pEpClassFile->cMilliesNext, tsDelay); + TMTimerSetMillies(pEpClassFile->pTimer, tsDelay); + } + + LogRel(("AIOMgr: Delaying request %#p for %u ms\n", pTaskFile, tsDelay)); + } + else +#endif + pdmR3AsyncCompletionCompleteTask(&pTaskFile->Core, pTaskFile->rc, true); + } + } +} + +DECLINLINE(void) pdmacFileEpTaskInit(PPDMASYNCCOMPLETIONTASK pTask, size_t cbTransfer) +{ + PPDMASYNCCOMPLETIONTASKFILE pTaskFile = (PPDMASYNCCOMPLETIONTASKFILE)pTask; + + Assert((uint32_t)cbTransfer == cbTransfer && (int32_t)cbTransfer >= 0); + ASMAtomicWriteS32(&pTaskFile->cbTransferLeft, (int32_t)cbTransfer); + ASMAtomicWriteBool(&pTaskFile->fCompleted, false); + ASMAtomicWriteS32(&pTaskFile->rc, VINF_SUCCESS); +} + +int pdmacFileEpTaskInitiate(PPDMASYNCCOMPLETIONTASK pTask, + PPDMASYNCCOMPLETIONENDPOINT pEndpoint, RTFOFF off, + PCRTSGSEG paSegments, size_t cSegments, + size_t cbTransfer, PDMACTASKFILETRANSFER enmTransfer) +{ + PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint; + PPDMASYNCCOMPLETIONTASKFILE pTaskFile = (PPDMASYNCCOMPLETIONTASKFILE)pTask; + + Assert( (enmTransfer == PDMACTASKFILETRANSFER_READ) + || (enmTransfer == PDMACTASKFILETRANSFER_WRITE)); + + for (size_t i = 0; i < cSegments; i++) + { + PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEpFile); + AssertPtr(pIoTask); + + pIoTask->pEndpoint = pEpFile; + pIoTask->enmTransferType = enmTransfer; + pIoTask->Off = off; + pIoTask->DataSeg.cbSeg = paSegments[i].cbSeg; + pIoTask->DataSeg.pvSeg = paSegments[i].pvSeg; + pIoTask->pvUser = pTaskFile; + pIoTask->pfnCompleted = pdmacFileEpTaskCompleted; + + /* Send it off to the I/O manager. */ + pdmacFileEpAddTask(pEpFile, pIoTask); + off += paSegments[i].cbSeg; + cbTransfer -= paSegments[i].cbSeg; + } + + AssertMsg(!cbTransfer, ("Incomplete transfer %u bytes left\n", cbTransfer)); + + return VINF_AIO_TASK_PENDING; +} + +/** + * Creates a new async I/O manager. + * + * @returns VBox status code. + * @param pEpClass Pointer to the endpoint class data. + * @param ppAioMgr Where to store the pointer to the new async I/O manager on success. + * @param enmMgrType Wanted manager type - can be overwritten by the global override. + */ +int pdmacFileAioMgrCreate(PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass, PPPDMACEPFILEMGR ppAioMgr, + PDMACEPFILEMGRTYPE enmMgrType) +{ + LogFlowFunc((": Entered\n")); + + PPDMACEPFILEMGR pAioMgrNew; + int rc = MMR3HeapAllocZEx(pEpClass->Core.pVM, MM_TAG_PDM_ASYNC_COMPLETION, sizeof(PDMACEPFILEMGR), (void **)&pAioMgrNew); + if (RT_SUCCESS(rc)) + { + if (enmMgrType < pEpClass->enmMgrTypeOverride) + pAioMgrNew->enmMgrType = enmMgrType; + else + pAioMgrNew->enmMgrType = pEpClass->enmMgrTypeOverride; + + pAioMgrNew->msBwLimitExpired = RT_INDEFINITE_WAIT; + + rc = RTSemEventCreate(&pAioMgrNew->EventSem); + if (RT_SUCCESS(rc)) + { + rc = RTSemEventCreate(&pAioMgrNew->EventSemBlock); + if (RT_SUCCESS(rc)) + { + rc = RTCritSectInit(&pAioMgrNew->CritSectBlockingEvent); + if (RT_SUCCESS(rc)) + { + /* Init the rest of the manager. */ + if (pAioMgrNew->enmMgrType != PDMACEPFILEMGRTYPE_SIMPLE) + rc = pdmacFileAioMgrNormalInit(pAioMgrNew); + + if (RT_SUCCESS(rc)) + { + pAioMgrNew->enmState = PDMACEPFILEMGRSTATE_RUNNING; + + rc = RTThreadCreateF(&pAioMgrNew->Thread, + pAioMgrNew->enmMgrType == PDMACEPFILEMGRTYPE_SIMPLE + ? pdmacFileAioMgrFailsafe + : pdmacFileAioMgrNormal, + pAioMgrNew, + 0, + RTTHREADTYPE_IO, + 0, + "AioMgr%d-%s", pEpClass->cAioMgrs, + pAioMgrNew->enmMgrType == PDMACEPFILEMGRTYPE_SIMPLE + ? "F" + : "N"); + if (RT_SUCCESS(rc)) + { + /* Link it into the list. */ + RTCritSectEnter(&pEpClass->CritSect); + pAioMgrNew->pNext = pEpClass->pAioMgrHead; + if (pEpClass->pAioMgrHead) + pEpClass->pAioMgrHead->pPrev = pAioMgrNew; + pEpClass->pAioMgrHead = pAioMgrNew; + pEpClass->cAioMgrs++; + RTCritSectLeave(&pEpClass->CritSect); + + *ppAioMgr = pAioMgrNew; + + Log(("PDMAC: Successfully created new file AIO Mgr {%s}\n", RTThreadGetName(pAioMgrNew->Thread))); + return VINF_SUCCESS; + } + pdmacFileAioMgrNormalDestroy(pAioMgrNew); + } + RTCritSectDelete(&pAioMgrNew->CritSectBlockingEvent); + } + RTSemEventDestroy(pAioMgrNew->EventSem); + } + RTSemEventDestroy(pAioMgrNew->EventSemBlock); + } + MMR3HeapFree(pAioMgrNew); + } + + LogFlowFunc((": Leave rc=%Rrc\n", rc)); + + return rc; +} + +/** + * Destroys a async I/O manager. + * + * @returns nothing. + * @param pEpClassFile Pointer to globals for the file endpoint class. + * @param pAioMgr The async I/O manager to destroy. + */ +static void pdmacFileAioMgrDestroy(PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile, PPDMACEPFILEMGR pAioMgr) +{ + int rc = pdmacFileAioMgrShutdown(pAioMgr); + AssertRC(rc); + + /* Unlink from the list. */ + rc = RTCritSectEnter(&pEpClassFile->CritSect); + AssertRC(rc); + + PPDMACEPFILEMGR pPrev = pAioMgr->pPrev; + PPDMACEPFILEMGR pNext = pAioMgr->pNext; + + if (pPrev) + pPrev->pNext = pNext; + else + pEpClassFile->pAioMgrHead = pNext; + + if (pNext) + pNext->pPrev = pPrev; + + pEpClassFile->cAioMgrs--; + rc = RTCritSectLeave(&pEpClassFile->CritSect); + AssertRC(rc); + + /* Free the resources. */ + RTCritSectDelete(&pAioMgr->CritSectBlockingEvent); + RTSemEventDestroy(pAioMgr->EventSem); + RTSemEventDestroy(pAioMgr->EventSemBlock); + if (pAioMgr->enmMgrType != PDMACEPFILEMGRTYPE_SIMPLE) + pdmacFileAioMgrNormalDestroy(pAioMgr); + + MMR3HeapFree(pAioMgr); +} + +static int pdmacFileMgrTypeFromName(const char *pszVal, PPDMACEPFILEMGRTYPE penmMgrType) +{ + int rc = VINF_SUCCESS; + + if (!RTStrCmp(pszVal, "Simple")) + *penmMgrType = PDMACEPFILEMGRTYPE_SIMPLE; + else if (!RTStrCmp(pszVal, "Async")) + *penmMgrType = PDMACEPFILEMGRTYPE_ASYNC; + else + rc = VERR_CFGM_CONFIG_UNKNOWN_VALUE; + + return rc; +} + +static const char *pdmacFileMgrTypeToName(PDMACEPFILEMGRTYPE enmMgrType) +{ + if (enmMgrType == PDMACEPFILEMGRTYPE_SIMPLE) + return "Simple"; + if (enmMgrType == PDMACEPFILEMGRTYPE_ASYNC) + return "Async"; + + return NULL; +} + +static int pdmacFileBackendTypeFromName(const char *pszVal, PPDMACFILEEPBACKEND penmBackendType) +{ + int rc = VINF_SUCCESS; + + if (!RTStrCmp(pszVal, "Buffered")) + *penmBackendType = PDMACFILEEPBACKEND_BUFFERED; + else if (!RTStrCmp(pszVal, "NonBuffered")) + *penmBackendType = PDMACFILEEPBACKEND_NON_BUFFERED; + else + rc = VERR_CFGM_CONFIG_UNKNOWN_VALUE; + + return rc; +} + +static const char *pdmacFileBackendTypeToName(PDMACFILEEPBACKEND enmBackendType) +{ + if (enmBackendType == PDMACFILEEPBACKEND_BUFFERED) + return "Buffered"; + if (enmBackendType == PDMACFILEEPBACKEND_NON_BUFFERED) + return "NonBuffered"; + + return NULL; +} + +#ifdef VBOX_WITH_DEBUGGER + +/** + * @callback_method_impl{FNDBGCCMD, The '.injecterror' command.} + */ +static DECLCALLBACK(int) pdmacEpFileErrorInject(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PUVM pUVM, PCDBGCVAR pArgs, unsigned cArgs) +{ + /* + * Validate input. + */ + DBGC_CMDHLP_REQ_UVM_RET(pCmdHlp, pCmd, pUVM); + DBGC_CMDHLP_ASSERT_PARSER_RET(pCmdHlp, pCmd, -1, cArgs == 3); + DBGC_CMDHLP_ASSERT_PARSER_RET(pCmdHlp, pCmd, 0, pArgs[0].enmType == DBGCVAR_TYPE_STRING); + DBGC_CMDHLP_ASSERT_PARSER_RET(pCmdHlp, pCmd, 1, pArgs[1].enmType == DBGCVAR_TYPE_STRING); + DBGC_CMDHLP_ASSERT_PARSER_RET(pCmdHlp, pCmd, 2, pArgs[2].enmType == DBGCVAR_TYPE_NUMBER); + + PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile; + pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pUVM->pdm.s.apAsyncCompletionEndpointClass[PDMASYNCCOMPLETIONEPCLASSTYPE_FILE]; + + /* Syntax is "read|write " */ + bool fWrite; + if (!RTStrCmp(pArgs[0].u.pszString, "read")) + fWrite = false; + else if (!RTStrCmp(pArgs[0].u.pszString, "write")) + fWrite = true; + else + return DBGCCmdHlpFail(pCmdHlp, pCmd, "invalid transfer direction '%s'", pArgs[0].u.pszString); + + int32_t rcToInject = (int32_t)pArgs[2].u.u64Number; + if ((uint64_t)rcToInject != pArgs[2].u.u64Number) + return DBGCCmdHlpFail(pCmdHlp, pCmd, "The status code '%lld' is out of range", pArgs[0].u.u64Number); + + /* + * Search for the matching endpoint. + */ + RTCritSectEnter(&pEpClassFile->Core.CritSect); + + PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEpClassFile->Core.pEndpointsHead; + while (pEpFile) + { + if (!RTStrCmp(pArgs[1].u.pszString, RTPathFilename(pEpFile->Core.pszUri))) + break; + pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEpFile->Core.pNext; + } + + if (pEpFile) + { + /* + * Do the job. + */ + if (fWrite) + ASMAtomicXchgS32(&pEpFile->rcReqWrite, rcToInject); + else + ASMAtomicXchgS32(&pEpFile->rcReqRead, rcToInject); + + DBGCCmdHlpPrintf(pCmdHlp, "Injected %Rrc into '%s' for %s\n", + (int)rcToInject, pArgs[1].u.pszString, pArgs[0].u.pszString); + } + + RTCritSectLeave(&pEpClassFile->Core.CritSect); + + if (!pEpFile) + return DBGCCmdHlpFail(pCmdHlp, pCmd, "No file with name '%s' found", pArgs[1].u.pszString); + return VINF_SUCCESS; +} + +# ifdef PDM_ASYNC_COMPLETION_FILE_WITH_DELAY +/** + * @callback_method_impl{FNDBGCCMD, The '.injectdelay' command.} + */ +static DECLCALLBACK(int) pdmacEpFileDelayInject(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PUVM pUVM, PCDBGCVAR pArgs, unsigned cArgs) +{ + /* + * Validate input. + */ + DBGC_CMDHLP_REQ_UVM_RET(pCmdHlp, pCmd, pUVM); + DBGC_CMDHLP_ASSERT_PARSER_RET(pCmdHlp, pCmd, -1, cArgs >= 3); + DBGC_CMDHLP_ASSERT_PARSER_RET(pCmdHlp, pCmd, 0, pArgs[0].enmType == DBGCVAR_TYPE_STRING); + DBGC_CMDHLP_ASSERT_PARSER_RET(pCmdHlp, pCmd, 1, pArgs[1].enmType == DBGCVAR_TYPE_STRING); + DBGC_CMDHLP_ASSERT_PARSER_RET(pCmdHlp, pCmd, 2, pArgs[2].enmType == DBGCVAR_TYPE_NUMBER); + + PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile; + pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pUVM->pdm.s.apAsyncCompletionEndpointClass[PDMASYNCCOMPLETIONEPCLASSTYPE_FILE]; + + /* Syntax is "read|write|flush|any [reqs]" */ + PDMACFILEREQTYPEDELAY enmDelayType = PDMACFILEREQTYPEDELAY_ANY; + if (!RTStrCmp(pArgs[0].u.pszString, "read")) + enmDelayType = PDMACFILEREQTYPEDELAY_READ; + else if (!RTStrCmp(pArgs[0].u.pszString, "write")) + enmDelayType = PDMACFILEREQTYPEDELAY_WRITE; + else if (!RTStrCmp(pArgs[0].u.pszString, "flush")) + enmDelayType = PDMACFILEREQTYPEDELAY_FLUSH; + else if (!RTStrCmp(pArgs[0].u.pszString, "any")) + enmDelayType = PDMACFILEREQTYPEDELAY_ANY; + else + return DBGCCmdHlpFail(pCmdHlp, pCmd, "invalid transfer direction '%s'", pArgs[0].u.pszString); + + uint32_t msDelay = (uint32_t)pArgs[2].u.u64Number; + if ((uint64_t)msDelay != pArgs[2].u.u64Number) + return DBGCCmdHlpFail(pCmdHlp, pCmd, "The delay '%lld' is out of range", pArgs[0].u.u64Number); + + uint32_t cReqsDelay = 1; + uint32_t msJitter = 0; + if (cArgs >= 4) + msJitter = (uint32_t)pArgs[3].u.u64Number; + if (cArgs == 5) + cReqsDelay = (uint32_t)pArgs[4].u.u64Number; + + /* + * Search for the matching endpoint. + */ + RTCritSectEnter(&pEpClassFile->Core.CritSect); + + PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEpClassFile->Core.pEndpointsHead; + while (pEpFile) + { + if (!RTStrCmp(pArgs[1].u.pszString, RTPathFilename(pEpFile->Core.pszUri))) + break; + pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEpFile->Core.pNext; + } + + if (pEpFile) + { + ASMAtomicWriteSize(&pEpFile->enmTypeDelay, enmDelayType); + ASMAtomicWriteU32(&pEpFile->msDelay, msDelay); + ASMAtomicWriteU32(&pEpFile->msJitter, msJitter); + ASMAtomicWriteU32(&pEpFile->cReqsDelay, cReqsDelay); + + DBGCCmdHlpPrintf(pCmdHlp, "Injected delay for the next %u requests of %u ms into '%s' for %s\n", + cReqsDelay, msDelay, pArgs[1].u.pszString, pArgs[0].u.pszString); + } + + RTCritSectLeave(&pEpClassFile->Core.CritSect); + + if (!pEpFile) + return DBGCCmdHlpFail(pCmdHlp, pCmd, "No file with name '%s' found", pArgs[1].u.pszString); + return VINF_SUCCESS; +} + +static DECLCALLBACK(void) pdmacR3TimerCallback(PVM pVM, PTMTIMER pTimer, void *pvUser) +{ + uint64_t tsCur = RTTimeProgramMilliTS(); + uint64_t cMilliesNext = UINT64_MAX; + PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pvUser; + + ASMAtomicWriteU64(&pEpClassFile->cMilliesNext, UINT64_MAX); + + /* Go through all endpoints and check for expired requests. */ + PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEpClassFile->Core.pEndpointsHead; + + while (pEpFile) + { + /* Check for an expired delay. */ + if (pEpFile->pDelayedHead != NULL) + { + PPDMASYNCCOMPLETIONTASKFILE pTaskFile = ASMAtomicXchgPtrT(&pEpFile->pDelayedHead, NULL, PPDMASYNCCOMPLETIONTASKFILE); + + while (pTaskFile) + { + PPDMASYNCCOMPLETIONTASKFILE pTmp = pTaskFile; + pTaskFile = pTaskFile->pDelayedNext; + + if (tsCur >= pTmp->tsDelayEnd) + { + LogRel(("AIOMgr: Delayed request %#p completed\n", pTmp)); + pdmR3AsyncCompletionCompleteTask(&pTmp->Core, pTmp->rc, true); + } + else + { + /* Prepend to the delayed list again. */ + PPDMASYNCCOMPLETIONTASKFILE pHead = NULL; + + if (pTmp->tsDelayEnd - tsCur < cMilliesNext) + cMilliesNext = pTmp->tsDelayEnd - tsCur; + + do + { + pHead = ASMAtomicReadPtrT(&pEpFile->pDelayedHead, PPDMASYNCCOMPLETIONTASKFILE); + pTmp->pDelayedNext = pHead; + } while (!ASMAtomicCmpXchgPtr(&pEpFile->pDelayedHead, pTmp, pHead)); + } + } + } + + pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEpFile->Core.pNext; + } + + if (cMilliesNext < pEpClassFile->cMilliesNext) + { + ASMAtomicWriteU64(&pEpClassFile->cMilliesNext, cMilliesNext); + TMTimerSetMillies(pEpClassFile->pTimer, cMilliesNext); + } +} + +# endif /* PDM_ASYNC_COMPLETION_FILE_WITH_DELAY */ + +#endif /* VBOX_WITH_DEBUGGER */ + +static DECLCALLBACK(int) pdmacFileInitialize(PPDMASYNCCOMPLETIONEPCLASS pClassGlobals, PCFGMNODE pCfgNode) +{ + PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pClassGlobals; + RTFILEAIOLIMITS AioLimits; /** < Async I/O limitations. */ + + int rc = RTFileAioGetLimits(&AioLimits); +#ifdef DEBUG + if (RT_SUCCESS(rc) && RTEnvExist("VBOX_ASYNC_IO_FAILBACK")) + rc = VERR_ENV_VAR_NOT_FOUND; +#endif + if (RT_FAILURE(rc)) + { + LogRel(("AIO: Async I/O manager not supported (rc=%Rrc). Falling back to simple manager\n", rc)); + pEpClassFile->enmMgrTypeOverride = PDMACEPFILEMGRTYPE_SIMPLE; + pEpClassFile->enmEpBackendDefault = PDMACFILEEPBACKEND_BUFFERED; + } + else + { + pEpClassFile->uBitmaskAlignment = AioLimits.cbBufferAlignment ? ~((RTR3UINTPTR)AioLimits.cbBufferAlignment - 1) : RTR3UINTPTR_MAX; + pEpClassFile->cReqsOutstandingMax = AioLimits.cReqsOutstandingMax; + + if (pCfgNode) + { + /* Query the default manager type */ + char *pszVal = NULL; + rc = CFGMR3QueryStringAllocDef(pCfgNode, "IoMgr", &pszVal, "Async"); + AssertLogRelRCReturn(rc, rc); + + rc = pdmacFileMgrTypeFromName(pszVal, &pEpClassFile->enmMgrTypeOverride); + MMR3HeapFree(pszVal); + if (RT_FAILURE(rc)) + return rc; + + LogRel(("AIOMgr: Default manager type is '%s'\n", pdmacFileMgrTypeToName(pEpClassFile->enmMgrTypeOverride))); + + /* Query default backend type */ + rc = CFGMR3QueryStringAllocDef(pCfgNode, "FileBackend", &pszVal, "NonBuffered"); + AssertLogRelRCReturn(rc, rc); + + rc = pdmacFileBackendTypeFromName(pszVal, &pEpClassFile->enmEpBackendDefault); + MMR3HeapFree(pszVal); + if (RT_FAILURE(rc)) + return rc; + + LogRel(("AIOMgr: Default file backend is '%s'\n", pdmacFileBackendTypeToName(pEpClassFile->enmEpBackendDefault))); + +#ifdef RT_OS_LINUX + if ( pEpClassFile->enmMgrTypeOverride == PDMACEPFILEMGRTYPE_ASYNC + && pEpClassFile->enmEpBackendDefault == PDMACFILEEPBACKEND_BUFFERED) + { + LogRel(("AIOMgr: Linux does not support buffered async I/O, changing to non buffered\n")); + pEpClassFile->enmEpBackendDefault = PDMACFILEEPBACKEND_NON_BUFFERED; + } +#endif + } + else + { + /* No configuration supplied, set defaults */ + pEpClassFile->enmEpBackendDefault = PDMACFILEEPBACKEND_NON_BUFFERED; + pEpClassFile->enmMgrTypeOverride = PDMACEPFILEMGRTYPE_ASYNC; + } + } + + /* Init critical section. */ + rc = RTCritSectInit(&pEpClassFile->CritSect); + +#ifdef VBOX_WITH_DEBUGGER + /* Install the error injection handler. */ + if (RT_SUCCESS(rc)) + { + rc = DBGCRegisterCommands(&g_aCmds[0], RT_ELEMENTS(g_aCmds)); + AssertRC(rc); + } + +#ifdef PDM_ASYNC_COMPLETION_FILE_WITH_DELAY + rc = TMR3TimerCreateInternal(pEpClassFile->Core.pVM, TMCLOCK_REAL, pdmacR3TimerCallback, pEpClassFile, "AC Delay", &pEpClassFile->pTimer); + AssertRC(rc); + pEpClassFile->cMilliesNext = UINT64_MAX; +#endif +#endif + + return rc; +} + +static DECLCALLBACK(void) pdmacFileTerminate(PPDMASYNCCOMPLETIONEPCLASS pClassGlobals) +{ + PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pClassGlobals; + + /* All endpoints should be closed at this point. */ + AssertMsg(!pEpClassFile->Core.pEndpointsHead, ("There are still endpoints left\n")); + + /* Destroy all left async I/O managers. */ + while (pEpClassFile->pAioMgrHead) + pdmacFileAioMgrDestroy(pEpClassFile, pEpClassFile->pAioMgrHead); + + RTCritSectDelete(&pEpClassFile->CritSect); +} + +static DECLCALLBACK(int) pdmacFileEpInitialize(PPDMASYNCCOMPLETIONENDPOINT pEndpoint, + const char *pszUri, uint32_t fFlags) +{ + PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint; + PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->pEpClass; + PDMACEPFILEMGRTYPE enmMgrType = pEpClassFile->enmMgrTypeOverride; + PDMACFILEEPBACKEND enmEpBackend = pEpClassFile->enmEpBackendDefault; + + AssertMsgReturn((fFlags & ~(PDMACEP_FILE_FLAGS_READ_ONLY | PDMACEP_FILE_FLAGS_DONT_LOCK | PDMACEP_FILE_FLAGS_HOST_CACHE_ENABLED)) == 0, + ("PDMAsyncCompletion: Invalid flag specified\n"), VERR_INVALID_PARAMETER); + + unsigned fFileFlags = RTFILE_O_OPEN; + + /* + * Revert to the simple manager and the buffered backend if + * the host cache should be enabled. + */ + if (fFlags & PDMACEP_FILE_FLAGS_HOST_CACHE_ENABLED) + { + enmMgrType = PDMACEPFILEMGRTYPE_SIMPLE; + enmEpBackend = PDMACFILEEPBACKEND_BUFFERED; + } + + if (fFlags & PDMACEP_FILE_FLAGS_READ_ONLY) + fFileFlags |= RTFILE_O_READ | RTFILE_O_DENY_NONE; + else + { + fFileFlags |= RTFILE_O_READWRITE; + + /* + * Opened in read/write mode. Check whether the caller wants to + * avoid the lock. Return an error in case caching is enabled + * because this can lead to data corruption. + */ + if (fFlags & PDMACEP_FILE_FLAGS_DONT_LOCK) + fFileFlags |= RTFILE_O_DENY_NONE; + else + fFileFlags |= RTFILE_O_DENY_WRITE; + } + + if (enmMgrType == PDMACEPFILEMGRTYPE_ASYNC) + fFileFlags |= RTFILE_O_ASYNC_IO; + + int rc; + if (enmEpBackend == PDMACFILEEPBACKEND_NON_BUFFERED) + { + /* + * We only disable the cache if the size of the file is a multiple of 512. + * Certain hosts like Windows, Linux and Solaris require that transfer sizes + * are aligned to the volume sector size. + * If not we just make sure that the data is written to disk with RTFILE_O_WRITE_THROUGH + * which will trash the host cache but ensures that the host cache will not + * contain dirty buffers. + */ + RTFILE hFile; + rc = RTFileOpen(&hFile, pszUri, RTFILE_O_READ | RTFILE_O_OPEN | RTFILE_O_DENY_NONE); + if (RT_SUCCESS(rc)) + { + uint64_t cbSize; + + rc = RTFileGetSize(hFile, &cbSize); + + if (RT_SUCCESS(rc) && ((cbSize % 512) == 0)) + fFileFlags |= RTFILE_O_NO_CACHE; + else + { + /* Downgrade to the buffered backend */ + enmEpBackend = PDMACFILEEPBACKEND_BUFFERED; + +#ifdef RT_OS_LINUX + fFileFlags &= ~RTFILE_O_ASYNC_IO; + enmMgrType = PDMACEPFILEMGRTYPE_SIMPLE; +#endif + } + RTFileClose(hFile); + } + } + + /* Open with final flags. */ + rc = RTFileOpen(&pEpFile->hFile, pszUri, fFileFlags); + if ( rc == VERR_INVALID_FUNCTION + || rc == VERR_INVALID_PARAMETER) + { + LogRel(("AIOMgr: pdmacFileEpInitialize: RTFileOpen %s / %08x failed with %Rrc\n", + pszUri, fFileFlags, rc)); + /* + * Solaris doesn't support directio on ZFS so far. :-\ + * Trying to enable it returns VERR_INVALID_FUNCTION + * (ENOTTY). Remove it and hope for the best. + * ZFS supports write throttling in case applications + * write more data than can be synced to the disk + * without blocking the whole application. + * + * On Linux we have the same problem with cifs. + * Have to disable async I/O here too because it requires O_DIRECT. + */ + fFileFlags &= ~RTFILE_O_NO_CACHE; + enmEpBackend = PDMACFILEEPBACKEND_BUFFERED; + +#ifdef RT_OS_LINUX + fFileFlags &= ~RTFILE_O_ASYNC_IO; + enmMgrType = PDMACEPFILEMGRTYPE_SIMPLE; +#endif + + /* Open again. */ + rc = RTFileOpen(&pEpFile->hFile, pszUri, fFileFlags); + + if (RT_FAILURE(rc)) + { + LogRel(("AIOMgr: pdmacFileEpInitialize: RTFileOpen %s / %08x failed AGAIN(!) with %Rrc\n", + pszUri, fFileFlags, rc)); + } + } + + if (RT_SUCCESS(rc)) + { + pEpFile->fFlags = fFileFlags; + + rc = RTFileGetSize(pEpFile->hFile, (uint64_t *)&pEpFile->cbFile); + if (RT_SUCCESS(rc)) + { + /* Initialize the segment cache */ + rc = MMR3HeapAllocZEx(pEpClassFile->Core.pVM, MM_TAG_PDM_ASYNC_COMPLETION, + sizeof(PDMACTASKFILE), + (void **)&pEpFile->pTasksFreeHead); + if (RT_SUCCESS(rc)) + { + PPDMACEPFILEMGR pAioMgr = NULL; + + pEpFile->pTasksFreeTail = pEpFile->pTasksFreeHead; + pEpFile->cTasksCached = 0; + pEpFile->enmBackendType = enmEpBackend; + /* + * Disable async flushes on Solaris for now. + * They cause weird hangs which needs more investigations. + */ +#ifndef RT_OS_SOLARIS + pEpFile->fAsyncFlushSupported = true; +#else + pEpFile->fAsyncFlushSupported = false; +#endif + + if (enmMgrType == PDMACEPFILEMGRTYPE_SIMPLE) + { + /* Simple mode. Every file has its own async I/O manager. */ + rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgr, PDMACEPFILEMGRTYPE_SIMPLE); + } + else + { + pAioMgr = pEpClassFile->pAioMgrHead; + + /* Check for an idling manager of the same type */ + while (pAioMgr) + { + if (pAioMgr->enmMgrType == enmMgrType) + break; + pAioMgr = pAioMgr->pNext; + } + + if (!pAioMgr) + rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgr, enmMgrType); + } + + if (RT_SUCCESS(rc)) + { + pEpFile->AioMgr.pTreeRangesLocked = (PAVLRFOFFTREE)RTMemAllocZ(sizeof(AVLRFOFFTREE)); + if (!pEpFile->AioMgr.pTreeRangesLocked) + rc = VERR_NO_MEMORY; + else + { + pEpFile->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE; + + /* Assign the endpoint to the thread. */ + rc = pdmacFileAioMgrAddEndpoint(pAioMgr, pEpFile); + if (RT_FAILURE(rc)) + { + RTMemFree(pEpFile->AioMgr.pTreeRangesLocked); + MMR3HeapFree(pEpFile->pTasksFreeHead); + } + } + } + else if (rc == VERR_FILE_AIO_INSUFFICIENT_EVENTS) + { + PUVM pUVM = VMR3GetUVM(pEpClassFile->Core.pVM); +#if defined(RT_OS_LINUX) + rc = VMR3SetError(pUVM, rc, RT_SRC_POS, + N_("Failed to create I/O manager for VM due to insufficient resources on the host. " + "Either increase the amount of allowed events in /proc/sys/fs/aio-max-nr or enable " + "the host I/O cache")); +#else + rc = VMR3SetError(pUVM, rc, RT_SRC_POS, + N_("Failed to create I/O manager for VM due to insufficient resources on the host. " + "Enable the host I/O cache")); +#endif + } + else + { + PUVM pUVM = VMR3GetUVM(pEpClassFile->Core.pVM); + rc = VMR3SetError(pUVM, rc, RT_SRC_POS, + N_("Failed to create I/O manager for VM due to an unknown error")); + } + } + } + + if (RT_FAILURE(rc)) + RTFileClose(pEpFile->hFile); + } + +#ifdef VBOX_WITH_STATISTICS + if (RT_SUCCESS(rc)) + { + STAMR3RegisterF(pEpClassFile->Core.pVM, &pEpFile->StatRead, + STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS, + STAMUNIT_TICKS_PER_CALL, "Time taken to read from the endpoint", + "/PDM/AsyncCompletion/File/%s/%d/Read", RTPathFilename(pEpFile->Core.pszUri), pEpFile->Core.iStatId); + + STAMR3RegisterF(pEpClassFile->Core.pVM, &pEpFile->StatWrite, + STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS, + STAMUNIT_TICKS_PER_CALL, "Time taken to write to the endpoint", + "/PDM/AsyncCompletion/File/%s/%d/Write", RTPathFilename(pEpFile->Core.pszUri), pEpFile->Core.iStatId); + } +#endif + + if (RT_SUCCESS(rc)) + LogRel(("AIOMgr: Endpoint for file '%s' (flags %08x) created successfully\n", pszUri, pEpFile->fFlags)); + + return rc; +} + +static DECLCALLBACK(int) pdmacFileEpRangesLockedDestroy(PAVLRFOFFNODECORE pNode, void *pvUser) +{ + NOREF(pNode); NOREF(pvUser); + AssertMsgFailed(("The locked ranges tree should be empty at that point\n")); + return VINF_SUCCESS; +} + +static DECLCALLBACK(int) pdmacFileEpClose(PPDMASYNCCOMPLETIONENDPOINT pEndpoint) +{ + PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint; + PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->pEpClass; + + /* Make sure that all tasks finished for this endpoint. */ + int rc = pdmacFileAioMgrCloseEndpoint(pEpFile->pAioMgr, pEpFile); + AssertRC(rc); + + /* + * If the async I/O manager is in failsafe mode this is the only endpoint + * he processes and thus can be destroyed now. + */ + if (pEpFile->pAioMgr->enmMgrType == PDMACEPFILEMGRTYPE_SIMPLE) + pdmacFileAioMgrDestroy(pEpClassFile, pEpFile->pAioMgr); + + /* Free cached tasks. */ + PPDMACTASKFILE pTask = pEpFile->pTasksFreeHead; + + while (pTask) + { + PPDMACTASKFILE pTaskFree = pTask; + pTask = pTask->pNext; + MMR3HeapFree(pTaskFree); + } + + /* Destroy the locked ranges tree now. */ + RTAvlrFileOffsetDestroy(pEpFile->AioMgr.pTreeRangesLocked, pdmacFileEpRangesLockedDestroy, NULL); + RTMemFree(pEpFile->AioMgr.pTreeRangesLocked); + pEpFile->AioMgr.pTreeRangesLocked = NULL; + + RTFileClose(pEpFile->hFile); + +#ifdef VBOX_WITH_STATISTICS + /* Not sure if this might be unnecessary because of similar statement in pdmR3AsyncCompletionStatisticsDeregister? */ + STAMR3DeregisterF(pEpClassFile->Core.pVM->pUVM, "/PDM/AsyncCompletion/File/%s/*", RTPathFilename(pEpFile->Core.pszUri)); +#endif + + return VINF_SUCCESS; +} + +static DECLCALLBACK(int) pdmacFileEpRead(PPDMASYNCCOMPLETIONTASK pTask, + PPDMASYNCCOMPLETIONENDPOINT pEndpoint, RTFOFF off, + PCRTSGSEG paSegments, size_t cSegments, + size_t cbRead) +{ + PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint; + + LogFlowFunc(("pTask=%#p pEndpoint=%#p off=%RTfoff paSegments=%#p cSegments=%zu cbRead=%zu\n", + pTask, pEndpoint, off, paSegments, cSegments, cbRead)); + + if (RT_UNLIKELY((uint64_t)off + cbRead > pEpFile->cbFile)) + return VERR_EOF; + + STAM_PROFILE_ADV_START(&pEpFile->StatRead, Read); + pdmacFileEpTaskInit(pTask, cbRead); + int rc = pdmacFileEpTaskInitiate(pTask, pEndpoint, off, paSegments, cSegments, cbRead, + PDMACTASKFILETRANSFER_READ); + STAM_PROFILE_ADV_STOP(&pEpFile->StatRead, Read); + + return rc; +} + +static DECLCALLBACK(int) pdmacFileEpWrite(PPDMASYNCCOMPLETIONTASK pTask, + PPDMASYNCCOMPLETIONENDPOINT pEndpoint, RTFOFF off, + PCRTSGSEG paSegments, size_t cSegments, + size_t cbWrite) +{ + PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint; + + if (RT_UNLIKELY(pEpFile->fReadonly)) + return VERR_NOT_SUPPORTED; + + STAM_PROFILE_ADV_START(&pEpFile->StatWrite, Write); + + pdmacFileEpTaskInit(pTask, cbWrite); + + int rc = pdmacFileEpTaskInitiate(pTask, pEndpoint, off, paSegments, cSegments, cbWrite, + PDMACTASKFILETRANSFER_WRITE); + + STAM_PROFILE_ADV_STOP(&pEpFile->StatWrite, Write); + + return rc; +} + +static DECLCALLBACK(int) pdmacFileEpFlush(PPDMASYNCCOMPLETIONTASK pTask, + PPDMASYNCCOMPLETIONENDPOINT pEndpoint) +{ + PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint; + PPDMASYNCCOMPLETIONTASKFILE pTaskFile = (PPDMASYNCCOMPLETIONTASKFILE)pTask; + + if (RT_UNLIKELY(pEpFile->fReadonly)) + return VERR_NOT_SUPPORTED; + + pdmacFileEpTaskInit(pTask, 0); + + PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEpFile); + if (RT_UNLIKELY(!pIoTask)) + return VERR_NO_MEMORY; + + pIoTask->pEndpoint = pEpFile; + pIoTask->enmTransferType = PDMACTASKFILETRANSFER_FLUSH; + pIoTask->pvUser = pTaskFile; + pIoTask->pfnCompleted = pdmacFileEpTaskCompleted; + pdmacFileEpAddTask(pEpFile, pIoTask); + + return VINF_AIO_TASK_PENDING; +} + +static DECLCALLBACK(int) pdmacFileEpGetSize(PPDMASYNCCOMPLETIONENDPOINT pEndpoint, uint64_t *pcbSize) +{ + PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint; + + *pcbSize = ASMAtomicReadU64(&pEpFile->cbFile); + + return VINF_SUCCESS; +} + +static DECLCALLBACK(int) pdmacFileEpSetSize(PPDMASYNCCOMPLETIONENDPOINT pEndpoint, uint64_t cbSize) +{ + int rc; + PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint; + + rc = RTFileSetSize(pEpFile->hFile, cbSize); + if (RT_SUCCESS(rc)) + ASMAtomicWriteU64(&pEpFile->cbFile, cbSize); + + return rc; +} + +const PDMASYNCCOMPLETIONEPCLASSOPS g_PDMAsyncCompletionEndpointClassFile = +{ + /* u32Version */ + PDMAC_EPCLASS_OPS_VERSION, + /* pcszName */ + "File", + /* enmClassType */ + PDMASYNCCOMPLETIONEPCLASSTYPE_FILE, + /* cbEndpointClassGlobal */ + sizeof(PDMASYNCCOMPLETIONEPCLASSFILE), + /* cbEndpoint */ + sizeof(PDMASYNCCOMPLETIONENDPOINTFILE), + /* cbTask */ + sizeof(PDMASYNCCOMPLETIONTASKFILE), + /* pfnInitialize */ + pdmacFileInitialize, + /* pfnTerminate */ + pdmacFileTerminate, + /* pfnEpInitialize. */ + pdmacFileEpInitialize, + /* pfnEpClose */ + pdmacFileEpClose, + /* pfnEpRead */ + pdmacFileEpRead, + /* pfnEpWrite */ + pdmacFileEpWrite, + /* pfnEpFlush */ + pdmacFileEpFlush, + /* pfnEpGetSize */ + pdmacFileEpGetSize, + /* pfnEpSetSize */ + pdmacFileEpSetSize, + /* u32VersionEnd */ + PDMAC_EPCLASS_OPS_VERSION +}; + diff --git a/src/VBox/VMM/VMMR3/PDMAsyncCompletionFileFailsafe.cpp b/src/VBox/VMM/VMMR3/PDMAsyncCompletionFileFailsafe.cpp new file mode 100644 index 00000000..f5ff3e06 --- /dev/null +++ b/src/VBox/VMM/VMMR3/PDMAsyncCompletionFileFailsafe.cpp @@ -0,0 +1,268 @@ +/* $Id: PDMAsyncCompletionFileFailsafe.cpp $ */ +/** @file + * PDM Async I/O - Transport data asynchronous in R3 using EMT. + * Simple File I/O manager. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION +#include +#include +#include + +#include "PDMAsyncCompletionFileInternal.h" + +/** + * Put a list of tasks in the pending request list of an endpoint. + */ +DECLINLINE(void) pdmacFileAioMgrEpAddTaskList(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTaskHead) +{ + /* Add the rest of the tasks to the pending list */ + if (!pEndpoint->AioMgr.pReqsPendingHead) + { + Assert(!pEndpoint->AioMgr.pReqsPendingTail); + pEndpoint->AioMgr.pReqsPendingHead = pTaskHead; + } + else + { + Assert(pEndpoint->AioMgr.pReqsPendingTail); + pEndpoint->AioMgr.pReqsPendingTail->pNext = pTaskHead; + } + + /* Update the tail. */ + while (pTaskHead->pNext) + pTaskHead = pTaskHead->pNext; + + pEndpoint->AioMgr.pReqsPendingTail = pTaskHead; + pTaskHead->pNext = NULL; +} + +/** + * Processes a given task list for assigned to the given endpoint. + */ +static int pdmacFileAioMgrFailsafeProcessEndpointTaskList(PPDMACEPFILEMGR pAioMgr, + PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, + PPDMACTASKFILE pTasks) +{ + int rc = VINF_SUCCESS; + + while (pTasks) + { + RTMSINTERVAL msWhenNext; + PPDMACTASKFILE pCurr = pTasks; + + if (!pdmacEpIsTransferAllowed(&pEndpoint->Core, (uint32_t)pCurr->DataSeg.cbSeg, &msWhenNext)) + { + pAioMgr->msBwLimitExpired = RT_MIN(pAioMgr->msBwLimitExpired, msWhenNext); + break; + } + + pTasks = pTasks->pNext; + + switch (pCurr->enmTransferType) + { + case PDMACTASKFILETRANSFER_FLUSH: + { + rc = RTFileFlush(pEndpoint->hFile); + break; + } + case PDMACTASKFILETRANSFER_READ: + case PDMACTASKFILETRANSFER_WRITE: + { + if (pCurr->enmTransferType == PDMACTASKFILETRANSFER_READ) + { + rc = RTFileReadAt(pEndpoint->hFile, pCurr->Off, + pCurr->DataSeg.pvSeg, + pCurr->DataSeg.cbSeg, + NULL); + } + else + { + if (RT_UNLIKELY((uint64_t)pCurr->Off + pCurr->DataSeg.cbSeg > pEndpoint->cbFile)) + { + ASMAtomicWriteU64(&pEndpoint->cbFile, pCurr->Off + pCurr->DataSeg.cbSeg); + RTFileSetSize(pEndpoint->hFile, pCurr->Off + pCurr->DataSeg.cbSeg); + } + + rc = RTFileWriteAt(pEndpoint->hFile, pCurr->Off, + pCurr->DataSeg.pvSeg, + pCurr->DataSeg.cbSeg, + NULL); + } + + break; + } + default: + AssertMsgFailed(("Invalid transfer type %d\n", pTasks->enmTransferType)); + } + + pCurr->pfnCompleted(pCurr, pCurr->pvUser, rc); + pdmacFileTaskFree(pEndpoint, pCurr); + } + + if (pTasks) + { + /* Add the rest of the tasks to the pending list */ + pdmacFileAioMgrEpAddTaskList(pEndpoint, pTasks); + } + + return VINF_SUCCESS; +} + +static int pdmacFileAioMgrFailsafeProcessEndpoint(PPDMACEPFILEMGR pAioMgr, + PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint) +{ + int rc = VINF_SUCCESS; + PPDMACTASKFILE pTasks = pEndpoint->AioMgr.pReqsPendingHead; + + pEndpoint->AioMgr.pReqsPendingHead = NULL; + pEndpoint->AioMgr.pReqsPendingTail = NULL; + + /* Process the request pending list first in case the endpoint was migrated due to an error. */ + if (pTasks) + rc = pdmacFileAioMgrFailsafeProcessEndpointTaskList(pAioMgr, pEndpoint, pTasks); + + if (RT_SUCCESS(rc)) + { + pTasks = pdmacFileEpGetNewTasks(pEndpoint); + + if (pTasks) + rc = pdmacFileAioMgrFailsafeProcessEndpointTaskList(pAioMgr, pEndpoint, pTasks); + } + + return rc; +} + +/** + * A fallback method in case something goes wrong with the normal + * I/O manager. + */ +DECLCALLBACK(int) pdmacFileAioMgrFailsafe(RTTHREAD hThreadSelf, void *pvUser) +{ + int rc = VINF_SUCCESS; + PPDMACEPFILEMGR pAioMgr = (PPDMACEPFILEMGR)pvUser; + NOREF(hThreadSelf); + + while ( (pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING) + || (pAioMgr->enmState == PDMACEPFILEMGRSTATE_SUSPENDING)) + { + ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, true); + if (!ASMAtomicReadBool(&pAioMgr->fWokenUp)) + rc = RTSemEventWait(pAioMgr->EventSem, pAioMgr->msBwLimitExpired); + ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, false); + Assert(RT_SUCCESS(rc) || rc == VERR_TIMEOUT); + + LogFlow(("Got woken up\n")); + ASMAtomicWriteBool(&pAioMgr->fWokenUp, false); + + /* Process endpoint events first. */ + PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pAioMgr->pEndpointsHead; + while (pEndpoint) + { + pAioMgr->msBwLimitExpired = RT_INDEFINITE_WAIT; + rc = pdmacFileAioMgrFailsafeProcessEndpoint(pAioMgr, pEndpoint); + AssertRC(rc); + pEndpoint = pEndpoint->AioMgr.pEndpointNext; + } + + /* Now check for an external blocking event. */ + if (pAioMgr->fBlockingEventPending) + { + switch (pAioMgr->enmBlockingEvent) + { + case PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT: + { + PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointNew = pAioMgr->BlockingEventData.AddEndpoint.pEndpoint; + AssertMsg(VALID_PTR(pEndpointNew), ("Adding endpoint event without a endpoint to add\n")); + + pEndpointNew->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE; + + pEndpointNew->AioMgr.pEndpointNext = pAioMgr->pEndpointsHead; + pEndpointNew->AioMgr.pEndpointPrev = NULL; + if (pAioMgr->pEndpointsHead) + pAioMgr->pEndpointsHead->AioMgr.pEndpointPrev = pEndpointNew; + pAioMgr->pEndpointsHead = pEndpointNew; + + pAioMgr->cEndpoints++; + + /* + * Process the task list the first time. There might be pending requests + * if the endpoint was migrated from another endpoint. + */ + rc = pdmacFileAioMgrFailsafeProcessEndpoint(pAioMgr, pEndpointNew); + AssertRC(rc); + break; + } + case PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT: + { + PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove = pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint; + AssertMsg(VALID_PTR(pEndpointRemove), ("Removing endpoint event without a endpoint to remove\n")); + + pEndpointRemove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING; + + PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEndpointRemove->AioMgr.pEndpointPrev; + PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEndpointRemove->AioMgr.pEndpointNext; + + if (pPrev) + pPrev->AioMgr.pEndpointNext = pNext; + else + pAioMgr->pEndpointsHead = pNext; + + if (pNext) + pNext->AioMgr.pEndpointPrev = pPrev; + + pAioMgr->cEndpoints--; + break; + } + case PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT: + { + PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointClose = pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint; + AssertMsg(VALID_PTR(pEndpointClose), ("Close endpoint event without a endpoint to Close\n")); + + pEndpointClose->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING; + + /* Make sure all tasks finished. */ + rc = pdmacFileAioMgrFailsafeProcessEndpoint(pAioMgr, pEndpointClose); + AssertRC(rc); + break; + } + case PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN: + pAioMgr->enmState = PDMACEPFILEMGRSTATE_SHUTDOWN; + break; + case PDMACEPFILEAIOMGRBLOCKINGEVENT_SUSPEND: + pAioMgr->enmState = PDMACEPFILEMGRSTATE_SUSPENDING; + break; + case PDMACEPFILEAIOMGRBLOCKINGEVENT_RESUME: + pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING; + break; + default: + AssertMsgFailed(("Invalid event type %d\n", pAioMgr->enmBlockingEvent)); + } + + ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false); + pAioMgr->enmBlockingEvent = PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID; + + /* Release the waiting thread. */ + rc = RTSemEventSignal(pAioMgr->EventSemBlock); + AssertRC(rc); + } + } + + return rc; +} + diff --git a/src/VBox/VMM/VMMR3/PDMAsyncCompletionFileNormal.cpp b/src/VBox/VMM/VMMR3/PDMAsyncCompletionFileNormal.cpp new file mode 100644 index 00000000..584f8345 --- /dev/null +++ b/src/VBox/VMM/VMMR3/PDMAsyncCompletionFileNormal.cpp @@ -0,0 +1,1732 @@ +/* $Id: PDMAsyncCompletionFileNormal.cpp $ */ +/** @file + * PDM Async I/O - Async File I/O manager. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION +#include +#include +#include +#include +#include +#include +#include + +#include "PDMAsyncCompletionFileInternal.h" + +/** The update period for the I/O load statistics in ms. */ +#define PDMACEPFILEMGR_LOAD_UPDATE_PERIOD 1000 +/** Maximum number of requests a manager will handle. */ +#define PDMACEPFILEMGR_REQS_STEP 64 + + +/********************************************************************************************************************************* +* Internal functions * +*********************************************************************************************************************************/ +static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead, + PPDMACEPFILEMGR pAioMgr, + PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint); + +static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr, + PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, + PPDMACFILERANGELOCK pRangeLock); + +static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq, + int rc, size_t cbTransfered); + + +int pdmacFileAioMgrNormalInit(PPDMACEPFILEMGR pAioMgr) +{ + pAioMgr->cRequestsActiveMax = PDMACEPFILEMGR_REQS_STEP; + + int rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, RTFILEAIO_UNLIMITED_REQS, 0 /* fFlags */); + if (rc == VERR_OUT_OF_RANGE) + rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, pAioMgr->cRequestsActiveMax, 0 /* fFlags */); + + if (RT_SUCCESS(rc)) + { + /* Initialize request handle array. */ + pAioMgr->iFreeEntry = 0; + pAioMgr->cReqEntries = pAioMgr->cRequestsActiveMax; + pAioMgr->pahReqsFree = (RTFILEAIOREQ *)RTMemAllocZ(pAioMgr->cReqEntries * sizeof(RTFILEAIOREQ)); + + if (pAioMgr->pahReqsFree) + { + /* Create the range lock memcache. */ + rc = RTMemCacheCreate(&pAioMgr->hMemCacheRangeLocks, sizeof(PDMACFILERANGELOCK), + 0, UINT32_MAX, NULL, NULL, NULL, 0); + if (RT_SUCCESS(rc)) + return VINF_SUCCESS; + + RTMemFree(pAioMgr->pahReqsFree); + } + else + { + RTFileAioCtxDestroy(pAioMgr->hAioCtx); + rc = VERR_NO_MEMORY; + } + } + + return rc; +} + +void pdmacFileAioMgrNormalDestroy(PPDMACEPFILEMGR pAioMgr) +{ + RTFileAioCtxDestroy(pAioMgr->hAioCtx); + + while (pAioMgr->iFreeEntry > 0) + { + pAioMgr->iFreeEntry--; + Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] != NIL_RTFILEAIOREQ); + RTFileAioReqDestroy(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry]); + } + + RTMemFree(pAioMgr->pahReqsFree); + RTMemCacheDestroy(pAioMgr->hMemCacheRangeLocks); +} + +#if 0 /* currently unused */ +/** + * Sorts the endpoint list with insertion sort. + */ +static void pdmacFileAioMgrNormalEndpointsSortByLoad(PPDMACEPFILEMGR pAioMgr) +{ + PPDMASYNCCOMPLETIONENDPOINTFILE pEpPrev, pEpCurr, pEpNextToSort; + + pEpPrev = pAioMgr->pEndpointsHead; + pEpCurr = pEpPrev->AioMgr.pEndpointNext; + + while (pEpCurr) + { + /* Remember the next element to sort because the list might change. */ + pEpNextToSort = pEpCurr->AioMgr.pEndpointNext; + + /* Unlink the current element from the list. */ + PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev; + PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext; + + if (pPrev) + pPrev->AioMgr.pEndpointNext = pNext; + else + pAioMgr->pEndpointsHead = pNext; + + if (pNext) + pNext->AioMgr.pEndpointPrev = pPrev; + + /* Go back until we reached the place to insert the current endpoint into. */ + while (pEpPrev && (pEpPrev->AioMgr.cReqsPerSec < pEpCurr->AioMgr.cReqsPerSec)) + pEpPrev = pEpPrev->AioMgr.pEndpointPrev; + + /* Link the endpoint into the list. */ + if (pEpPrev) + pNext = pEpPrev->AioMgr.pEndpointNext; + else + pNext = pAioMgr->pEndpointsHead; + + pEpCurr->AioMgr.pEndpointNext = pNext; + pEpCurr->AioMgr.pEndpointPrev = pEpPrev; + + if (pNext) + pNext->AioMgr.pEndpointPrev = pEpCurr; + + if (pEpPrev) + pEpPrev->AioMgr.pEndpointNext = pEpCurr; + else + pAioMgr->pEndpointsHead = pEpCurr; + + pEpCurr = pEpNextToSort; + } + +#ifdef DEBUG + /* Validate sorting algorithm */ + unsigned cEndpoints = 0; + pEpCurr = pAioMgr->pEndpointsHead; + + AssertMsg(pEpCurr, ("No endpoint in the list?\n")); + AssertMsg(!pEpCurr->AioMgr.pEndpointPrev, ("First element in the list points to previous element\n")); + + while (pEpCurr) + { + cEndpoints++; + + PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext; + PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev; + + Assert(!pNext || pNext->AioMgr.cReqsPerSec <= pEpCurr->AioMgr.cReqsPerSec); + Assert(!pPrev || pPrev->AioMgr.cReqsPerSec >= pEpCurr->AioMgr.cReqsPerSec); + + pEpCurr = pNext; + } + + AssertMsg(cEndpoints == pAioMgr->cEndpoints, ("Endpoints lost during sort!\n")); + +#endif +} +#endif /* currently unused */ + +/** + * Removes an endpoint from the currently assigned manager. + * + * @returns TRUE if there are still requests pending on the current manager for this endpoint. + * FALSE otherwise. + * @param pEndpointRemove The endpoint to remove. + */ +static bool pdmacFileAioMgrNormalRemoveEndpoint(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove) +{ + PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEndpointRemove->AioMgr.pEndpointPrev; + PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEndpointRemove->AioMgr.pEndpointNext; + PPDMACEPFILEMGR pAioMgr = pEndpointRemove->pAioMgr; + + pAioMgr->cEndpoints--; + + if (pPrev) + pPrev->AioMgr.pEndpointNext = pNext; + else + pAioMgr->pEndpointsHead = pNext; + + if (pNext) + pNext->AioMgr.pEndpointPrev = pPrev; + + /* Make sure that there is no request pending on this manager for the endpoint. */ + if (!pEndpointRemove->AioMgr.cRequestsActive) + { + Assert(!pEndpointRemove->pFlushReq); + + /* Reopen the file so that the new endpoint can re-associate with the file */ + RTFileClose(pEndpointRemove->hFile); + int rc = RTFileOpen(&pEndpointRemove->hFile, pEndpointRemove->Core.pszUri, pEndpointRemove->fFlags); + AssertRC(rc); + return false; + } + + return true; +} + +#if 0 /* currently unused */ + +static bool pdmacFileAioMgrNormalIsBalancePossible(PPDMACEPFILEMGR pAioMgr) +{ + /* Balancing doesn't make sense with only one endpoint. */ + if (pAioMgr->cEndpoints == 1) + return false; + + /* Doesn't make sens to move endpoints if only one produces the whole load */ + unsigned cEndpointsWithLoad = 0; + + PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead; + + while (pCurr) + { + if (pCurr->AioMgr.cReqsPerSec) + cEndpointsWithLoad++; + + pCurr = pCurr->AioMgr.pEndpointNext; + } + + return (cEndpointsWithLoad > 1); +} + +/** + * Creates a new I/O manager and spreads the I/O load of the endpoints + * between the given I/O manager and the new one. + * + * @returns nothing. + * @param pAioMgr The I/O manager with high I/O load. + */ +static void pdmacFileAioMgrNormalBalanceLoad(PPDMACEPFILEMGR pAioMgr) +{ + /* + * Check if balancing would improve the situation. + */ + if (pdmacFileAioMgrNormalIsBalancePossible(pAioMgr)) + { + PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass; + PPDMACEPFILEMGR pAioMgrNew = NULL; + + int rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgrNew, PDMACEPFILEMGRTYPE_ASYNC); + if (RT_SUCCESS(rc)) + { + /* We will sort the list by request count per second. */ + pdmacFileAioMgrNormalEndpointsSortByLoad(pAioMgr); + + /* Now move some endpoints to the new manager. */ + unsigned cReqsHere = pAioMgr->pEndpointsHead->AioMgr.cReqsPerSec; + unsigned cReqsOther = 0; + PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead->AioMgr.pEndpointNext; + + while (pCurr) + { + if (cReqsHere <= cReqsOther) + { + /* + * The other manager has more requests to handle now. + * We will keep the current endpoint. + */ + Log(("Keeping endpoint %#p{%s} with %u reqs/s\n", pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec)); + cReqsHere += pCurr->AioMgr.cReqsPerSec; + pCurr = pCurr->AioMgr.pEndpointNext; + } + else + { + /* Move to other endpoint. */ + Log(("Moving endpoint %#p{%s} with %u reqs/s to other manager\n", pCurr, pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec)); + cReqsOther += pCurr->AioMgr.cReqsPerSec; + + PPDMASYNCCOMPLETIONENDPOINTFILE pMove = pCurr; + + pCurr = pCurr->AioMgr.pEndpointNext; + + bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pMove); + + if (fReqsPending) + { + pMove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING; + pMove->AioMgr.fMoving = true; + pMove->AioMgr.pAioMgrDst = pAioMgrNew; + } + else + { + pMove->AioMgr.fMoving = false; + pMove->AioMgr.pAioMgrDst = NULL; + pdmacFileAioMgrAddEndpoint(pAioMgrNew, pMove); + } + } + } + } + else + { + /* Don't process further but leave a log entry about reduced performance. */ + LogRel(("AIOMgr: Could not create new I/O manager (rc=%Rrc). Expect reduced performance\n", rc)); + } + } + else + Log(("AIOMgr: Load balancing would not improve anything\n")); +} + +#endif /* unused */ + +/** + * Increase the maximum number of active requests for the given I/O manager. + * + * @returns VBox status code. + * @param pAioMgr The I/O manager to grow. + */ +static int pdmacFileAioMgrNormalGrow(PPDMACEPFILEMGR pAioMgr) +{ + LogFlowFunc(("pAioMgr=%#p\n", pAioMgr)); + + AssertMsg( pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING + && !pAioMgr->cRequestsActive, + ("Invalid state of the I/O manager\n")); + +#ifdef RT_OS_WINDOWS + /* + * Reopen the files of all assigned endpoints first so we can assign them to the new + * I/O context. + */ + PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead; + + while (pCurr) + { + RTFileClose(pCurr->hFile); + int rc2 = RTFileOpen(&pCurr->hFile, pCurr->Core.pszUri, pCurr->fFlags); AssertRC(rc2); + + pCurr = pCurr->AioMgr.pEndpointNext; + } +#endif + + /* Create the new bigger context. */ + pAioMgr->cRequestsActiveMax += PDMACEPFILEMGR_REQS_STEP; + + RTFILEAIOCTX hAioCtxNew = NIL_RTFILEAIOCTX; + int rc = RTFileAioCtxCreate(&hAioCtxNew, RTFILEAIO_UNLIMITED_REQS, 0 /* fFlags */); + if (rc == VERR_OUT_OF_RANGE) + rc = RTFileAioCtxCreate(&hAioCtxNew, pAioMgr->cRequestsActiveMax, 0 /* fFlags */); + + if (RT_SUCCESS(rc)) + { + /* Close the old context. */ + rc = RTFileAioCtxDestroy(pAioMgr->hAioCtx); + AssertRC(rc); /** @todo r=bird: Ignoring error code, will propagate. */ + + pAioMgr->hAioCtx = hAioCtxNew; + + /* Create a new I/O task handle array */ + uint32_t cReqEntriesNew = pAioMgr->cRequestsActiveMax + 1; + RTFILEAIOREQ *pahReqNew = (RTFILEAIOREQ *)RTMemAllocZ(cReqEntriesNew * sizeof(RTFILEAIOREQ)); + + if (pahReqNew) + { + /* Copy the cached request handles. */ + for (uint32_t iReq = 0; iReq < pAioMgr->cReqEntries; iReq++) + pahReqNew[iReq] = pAioMgr->pahReqsFree[iReq]; + + RTMemFree(pAioMgr->pahReqsFree); + pAioMgr->pahReqsFree = pahReqNew; + pAioMgr->cReqEntries = cReqEntriesNew; + LogFlowFunc(("I/O manager increased to handle a maximum of %u requests\n", + pAioMgr->cRequestsActiveMax)); + } + else + rc = VERR_NO_MEMORY; + } + +#ifdef RT_OS_WINDOWS + /* Assign the file to the new context. */ + pCurr = pAioMgr->pEndpointsHead; + while (pCurr) + { + rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pCurr->hFile); + AssertRC(rc); /** @todo r=bird: Ignoring error code, will propagate. */ + + pCurr = pCurr->AioMgr.pEndpointNext; + } +#endif + + if (RT_FAILURE(rc)) + { + LogFlow(("Increasing size of the I/O manager failed with rc=%Rrc\n", rc)); + pAioMgr->cRequestsActiveMax -= PDMACEPFILEMGR_REQS_STEP; + } + + pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING; + LogFlowFunc(("returns rc=%Rrc\n", rc)); + + return rc; +} + +/** + * Checks if a given status code is fatal. + * Non fatal errors can be fixed by migrating the endpoint to a + * failsafe manager. + * + * @returns true If the error is fatal and migrating to a failsafe manager doesn't help + * false If the error can be fixed by a migration. (image on NFS disk for example) + * @param rcReq The status code to check. + */ +DECLINLINE(bool) pdmacFileAioMgrNormalRcIsFatal(int rcReq) +{ + return rcReq == VERR_DEV_IO_ERROR + || rcReq == VERR_FILE_IO_ERROR + || rcReq == VERR_DISK_IO_ERROR + || rcReq == VERR_DISK_FULL + || rcReq == VERR_FILE_TOO_BIG; +} + +/** + * Error handler which will create the failsafe managers and destroy the failed I/O manager. + * + * @returns VBox status code + * @param pAioMgr The I/O manager the error occurred on. + * @param rc The error code. + * @param SRC_POS The source location of the error (use RT_SRC_POS). + */ +static int pdmacFileAioMgrNormalErrorHandler(PPDMACEPFILEMGR pAioMgr, int rc, RT_SRC_POS_DECL) +{ + LogRel(("AIOMgr: I/O manager %#p encountered a critical error (rc=%Rrc) during operation. Falling back to failsafe mode. Expect reduced performance\n", + pAioMgr, rc)); + LogRel(("AIOMgr: Error happened in %s:(%u){%s}\n", RT_SRC_POS_ARGS)); + LogRel(("AIOMgr: Please contact the product vendor\n")); + + PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass; + + pAioMgr->enmState = PDMACEPFILEMGRSTATE_FAULT; + ASMAtomicWriteU32((volatile uint32_t *)&pEpClassFile->enmMgrTypeOverride, PDMACEPFILEMGRTYPE_SIMPLE); + + AssertMsgFailed(("Implement\n")); + return VINF_SUCCESS; +} + +/** + * Put a list of tasks in the pending request list of an endpoint. + */ +DECLINLINE(void) pdmacFileAioMgrEpAddTaskList(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTaskHead) +{ + /* Add the rest of the tasks to the pending list */ + if (!pEndpoint->AioMgr.pReqsPendingHead) + { + Assert(!pEndpoint->AioMgr.pReqsPendingTail); + pEndpoint->AioMgr.pReqsPendingHead = pTaskHead; + } + else + { + Assert(pEndpoint->AioMgr.pReqsPendingTail); + pEndpoint->AioMgr.pReqsPendingTail->pNext = pTaskHead; + } + + /* Update the tail. */ + while (pTaskHead->pNext) + pTaskHead = pTaskHead->pNext; + + pEndpoint->AioMgr.pReqsPendingTail = pTaskHead; + pTaskHead->pNext = NULL; +} + +/** + * Put one task in the pending request list of an endpoint. + */ +DECLINLINE(void) pdmacFileAioMgrEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask) +{ + /* Add the rest of the tasks to the pending list */ + if (!pEndpoint->AioMgr.pReqsPendingHead) + { + Assert(!pEndpoint->AioMgr.pReqsPendingTail); + pEndpoint->AioMgr.pReqsPendingHead = pTask; + } + else + { + Assert(pEndpoint->AioMgr.pReqsPendingTail); + pEndpoint->AioMgr.pReqsPendingTail->pNext = pTask; + } + + pEndpoint->AioMgr.pReqsPendingTail = pTask; + pTask->pNext = NULL; +} + +/** + * Allocates a async I/O request. + * + * @returns Handle to the request. + * @param pAioMgr The I/O manager. + */ +static RTFILEAIOREQ pdmacFileAioMgrNormalRequestAlloc(PPDMACEPFILEMGR pAioMgr) +{ + /* Get a request handle. */ + RTFILEAIOREQ hReq; + if (pAioMgr->iFreeEntry > 0) + { + pAioMgr->iFreeEntry--; + hReq = pAioMgr->pahReqsFree[pAioMgr->iFreeEntry]; + pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = NIL_RTFILEAIOREQ; + Assert(hReq != NIL_RTFILEAIOREQ); + } + else + { + int rc = RTFileAioReqCreate(&hReq); + AssertRCReturn(rc, NIL_RTFILEAIOREQ); + } + + return hReq; +} + +/** + * Frees a async I/O request handle. + * + * @returns nothing. + * @param pAioMgr The I/O manager. + * @param hReq The I/O request handle to free. + */ +static void pdmacFileAioMgrNormalRequestFree(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq) +{ + Assert(pAioMgr->iFreeEntry < pAioMgr->cReqEntries); + Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] == NIL_RTFILEAIOREQ); + + pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = hReq; + pAioMgr->iFreeEntry++; +} + +/** + * Wrapper around RTFIleAioCtxSubmit() which is also doing error handling. + */ +static int pdmacFileAioMgrNormalReqsEnqueue(PPDMACEPFILEMGR pAioMgr, + PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, + PRTFILEAIOREQ pahReqs, unsigned cReqs) +{ + pAioMgr->cRequestsActive += cReqs; + pEndpoint->AioMgr.cRequestsActive += cReqs; + + LogFlow(("Enqueuing %d requests. I/O manager has a total of %d active requests now\n", cReqs, pAioMgr->cRequestsActive)); + LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive)); + + int rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, pahReqs, cReqs); + if (RT_FAILURE(rc)) + { + if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES) + { + PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass; + + /* Append any not submitted task to the waiting list. */ + for (size_t i = 0; i < cReqs; i++) + { + int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL); + + if (rcReq != VERR_FILE_AIO_IN_PROGRESS) + { + PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(pahReqs[i]); + + Assert(pTask->hReq == pahReqs[i]); + pdmacFileAioMgrEpAddTask(pEndpoint, pTask); + pAioMgr->cRequestsActive--; + pEndpoint->AioMgr.cRequestsActive--; + + if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH) + { + /* Clear the pending flush */ + Assert(pEndpoint->pFlushReq == pTask); + pEndpoint->pFlushReq = NULL; + } + } + } + + pAioMgr->cRequestsActiveMax = pAioMgr->cRequestsActive; + + /* Print an entry in the release log */ + if (RT_UNLIKELY(!pEpClass->fOutOfResourcesWarningPrinted)) + { + pEpClass->fOutOfResourcesWarningPrinted = true; + LogRel(("AIOMgr: Host limits number of active IO requests to %u. Expect a performance impact.\n", + pAioMgr->cRequestsActive)); + } + + LogFlow(("Removed requests. I/O manager has a total of %u active requests now\n", pAioMgr->cRequestsActive)); + LogFlow(("Endpoint has a total of %u active requests now\n", pEndpoint->AioMgr.cRequestsActive)); + rc = VINF_SUCCESS; + } + else /* Another kind of error happened (full disk, ...) */ + { + /* An error happened. Find out which one caused the error and resubmit all other tasks. */ + for (size_t i = 0; i < cReqs; i++) + { + int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL); + + if (rcReq == VERR_FILE_AIO_NOT_SUBMITTED) + { + /* We call ourself again to do any error handling which might come up now. */ + rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &pahReqs[i], 1); + AssertRC(rc); + } + else if (rcReq != VERR_FILE_AIO_IN_PROGRESS) + pdmacFileAioMgrNormalReqCompleteRc(pAioMgr, pahReqs[i], rcReq, 0); + } + + + if ( pEndpoint->pFlushReq + && !pAioMgr->cRequestsActive + && !pEndpoint->fAsyncFlushSupported) + { + /* + * Complete a pending flush if we don't have requests enqueued and the host doesn't support + * the async flush API. + * Happens only if this we just noticed that this is not supported + * and the only active request was a flush. + */ + PPDMACTASKFILE pFlush = pEndpoint->pFlushReq; + pEndpoint->pFlushReq = NULL; + pFlush->pfnCompleted(pFlush, pFlush->pvUser, VINF_SUCCESS); + pdmacFileTaskFree(pEndpoint, pFlush); + } + } + } + + return VINF_SUCCESS; +} + +static bool pdmacFileAioMgrNormalIsRangeLocked(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, + RTFOFF offStart, size_t cbRange, + PPDMACTASKFILE pTask, bool fAlignedReq) +{ + AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE + || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ, + ("Invalid task type %d\n", pTask->enmTransferType)); + + /* + * If there is no unaligned request active and the current one is aligned + * just pass it through. + */ + if (!pEndpoint->AioMgr.cLockedReqsActive && fAlignedReq) + return false; + + PPDMACFILERANGELOCK pRangeLock; + pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetRangeGet(pEndpoint->AioMgr.pTreeRangesLocked, offStart); + if (!pRangeLock) + { + pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetGetBestFit(pEndpoint->AioMgr.pTreeRangesLocked, offStart, true); + /* Check if we intersect with the range. */ + if ( !pRangeLock + || !( (pRangeLock->Core.Key) <= (offStart + (RTFOFF)cbRange - 1) + && (pRangeLock->Core.KeyLast) >= offStart)) + { + pRangeLock = NULL; /* False alarm */ + } + } + + /* Check whether we have one of the situations explained below */ + if (pRangeLock) + { + /* Add to the list. */ + pTask->pNext = NULL; + + if (!pRangeLock->pWaitingTasksHead) + { + Assert(!pRangeLock->pWaitingTasksTail); + pRangeLock->pWaitingTasksHead = pTask; + pRangeLock->pWaitingTasksTail = pTask; + } + else + { + AssertPtr(pRangeLock->pWaitingTasksTail); + pRangeLock->pWaitingTasksTail->pNext = pTask; + pRangeLock->pWaitingTasksTail = pTask; + } + return true; + } + + return false; +} + +static int pdmacFileAioMgrNormalRangeLock(PPDMACEPFILEMGR pAioMgr, + PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, + RTFOFF offStart, size_t cbRange, + PPDMACTASKFILE pTask, bool fAlignedReq) +{ + LogFlowFunc(("pAioMgr=%#p pEndpoint=%#p offStart=%RTfoff cbRange=%zu pTask=%#p\n", + pAioMgr, pEndpoint, offStart, cbRange, pTask)); + + AssertMsg(!pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbRange, pTask, fAlignedReq), + ("Range is already locked offStart=%RTfoff cbRange=%u\n", + offStart, cbRange)); + + /* + * If there is no unaligned request active and the current one is aligned + * just don't use the lock. + */ + if (!pEndpoint->AioMgr.cLockedReqsActive && fAlignedReq) + { + pTask->pRangeLock = NULL; + return VINF_SUCCESS; + } + + PPDMACFILERANGELOCK pRangeLock = (PPDMACFILERANGELOCK)RTMemCacheAlloc(pAioMgr->hMemCacheRangeLocks); + if (!pRangeLock) + return VERR_NO_MEMORY; + + /* Init the lock. */ + pRangeLock->Core.Key = offStart; + pRangeLock->Core.KeyLast = offStart + cbRange - 1; + pRangeLock->cRefs = 1; + pRangeLock->fReadLock = pTask->enmTransferType == PDMACTASKFILETRANSFER_READ; + pRangeLock->pWaitingTasksHead = NULL; + pRangeLock->pWaitingTasksTail = NULL; + + bool fInserted = RTAvlrFileOffsetInsert(pEndpoint->AioMgr.pTreeRangesLocked, &pRangeLock->Core); + AssertMsg(fInserted, ("Range lock was not inserted!\n")); NOREF(fInserted); + + /* Let the task point to its lock. */ + pTask->pRangeLock = pRangeLock; + pEndpoint->AioMgr.cLockedReqsActive++; + + return VINF_SUCCESS; +} + +static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr, + PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, + PPDMACFILERANGELOCK pRangeLock) +{ + PPDMACTASKFILE pTasksWaitingHead; + + LogFlowFunc(("pAioMgr=%#p pEndpoint=%#p pRangeLock=%#p\n", + pAioMgr, pEndpoint, pRangeLock)); + + /* pRangeLock can be NULL if there was no lock assigned with the task. */ + if (!pRangeLock) + return NULL; + + Assert(pRangeLock->cRefs == 1); + + RTAvlrFileOffsetRemove(pEndpoint->AioMgr.pTreeRangesLocked, pRangeLock->Core.Key); + pTasksWaitingHead = pRangeLock->pWaitingTasksHead; + pRangeLock->pWaitingTasksHead = NULL; + pRangeLock->pWaitingTasksTail = NULL; + RTMemCacheFree(pAioMgr->hMemCacheRangeLocks, pRangeLock); + pEndpoint->AioMgr.cLockedReqsActive--; + + return pTasksWaitingHead; +} + +static int pdmacFileAioMgrNormalTaskPrepareBuffered(PPDMACEPFILEMGR pAioMgr, + PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, + PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq) +{ + AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE + || (uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) <= pEndpoint->cbFile, + ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n", + pTask->Off, pTask->DataSeg.cbSeg, pEndpoint->cbFile)); + + pTask->fPrefetch = false; + pTask->cbBounceBuffer = 0; + + /* + * Before we start to setup the request we have to check whether there is a task + * already active which range intersects with ours. We have to defer execution + * of this task in two cases: + * - The pending task is a write and the current is either read or write + * - The pending task is a read and the current task is a write task. + * + * To check whether a range is currently "locked" we use the AVL tree where every pending task + * is stored by its file offset range. The current task will be added to the active task + * and will be executed when the active one completes. (The method below + * which checks whether a range is already used will add the task) + * + * This is necessary because of the requirement to align all requests to a 512 boundary + * which is enforced by the host OS (Linux and Windows atm). It is possible that + * we have to process unaligned tasks and need to align them using bounce buffers. + * While the data is fetched from the file another request might arrive writing to + * the same range. This will result in data corruption if both are executed concurrently. + */ + int rc = VINF_SUCCESS; + bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, pTask->Off, pTask->DataSeg.cbSeg, pTask, + true /* fAlignedReq */); + if (!fLocked) + { + /* Get a request handle. */ + RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr); + AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n")); + + if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE) + { + /* Grow the file if needed. */ + if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile)) + { + ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg); + RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg); + } + + rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile, + pTask->Off, pTask->DataSeg.pvSeg, + pTask->DataSeg.cbSeg, pTask); + } + else + rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile, + pTask->Off, pTask->DataSeg.pvSeg, + pTask->DataSeg.cbSeg, pTask); + AssertRC(rc); + + rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, pTask->Off, + pTask->DataSeg.cbSeg, + pTask, true /* fAlignedReq */); + + if (RT_SUCCESS(rc)) + { + pTask->hReq = hReq; + *phReq = hReq; + } + } + else + LogFlow(("Task %#p was deferred because the access range is locked\n", pTask)); + + return rc; +} + +static int pdmacFileAioMgrNormalTaskPrepareNonBuffered(PPDMACEPFILEMGR pAioMgr, + PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, + PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq) +{ + /* + * Check if the alignment requirements are met. + * Offset, transfer size and buffer address + * need to be on a 512 boundary. + */ + RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1); + size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg + (pTask->Off - offStart), 512); + PDMACTASKFILETRANSFER enmTransferType = pTask->enmTransferType; + bool fAlignedReq = cbToTransfer == pTask->DataSeg.cbSeg + && offStart == pTask->Off; + + AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE + || (uint64_t)(offStart + cbToTransfer) <= pEndpoint->cbFile, + ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n", + offStart, cbToTransfer, pEndpoint->cbFile)); + + pTask->fPrefetch = false; + + /* + * Before we start to setup the request we have to check whether there is a task + * already active which range intersects with ours. We have to defer execution + * of this task in two cases: + * - The pending task is a write and the current is either read or write + * - The pending task is a read and the current task is a write task. + * + * To check whether a range is currently "locked" we use the AVL tree where every pending task + * is stored by its file offset range. The current task will be added to the active task + * and will be executed when the active one completes. (The method below + * which checks whether a range is already used will add the task) + * + * This is necessary because of the requirement to align all requests to a 512 boundary + * which is enforced by the host OS (Linux and Windows atm). It is possible that + * we have to process unaligned tasks and need to align them using bounce buffers. + * While the data is fetched from the file another request might arrive writing to + * the same range. This will result in data corruption if both are executed concurrently. + */ + int rc = VINF_SUCCESS; + bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbToTransfer, pTask, fAlignedReq); + if (!fLocked) + { + PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass; + void *pvBuf = pTask->DataSeg.pvSeg; + + /* Get a request handle. */ + RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr); + AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n")); + + if ( !fAlignedReq + || ((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) != (RTR3UINTPTR)pvBuf)) + { + LogFlow(("Using bounce buffer for task %#p cbToTransfer=%zd cbSeg=%zd offStart=%RTfoff off=%RTfoff\n", + pTask, cbToTransfer, pTask->DataSeg.cbSeg, offStart, pTask->Off)); + + /* Create bounce buffer. */ + pTask->cbBounceBuffer = cbToTransfer; + + AssertMsg(pTask->Off >= offStart, ("Overflow in calculation Off=%llu offStart=%llu\n", + pTask->Off, offStart)); + pTask->offBounceBuffer = pTask->Off - offStart; + + /** @todo I think we need something like a RTMemAllocAligned method here. + * Current assumption is that the maximum alignment is 4096byte + * (GPT disk on Windows) + * so we can use RTMemPageAlloc here. + */ + pTask->pvBounceBuffer = RTMemPageAlloc(cbToTransfer); + if (RT_LIKELY(pTask->pvBounceBuffer)) + { + pvBuf = pTask->pvBounceBuffer; + + if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE) + { + if ( RT_UNLIKELY(cbToTransfer != pTask->DataSeg.cbSeg) + || RT_UNLIKELY(offStart != pTask->Off)) + { + /* We have to fill the buffer first before we can update the data. */ + LogFlow(("Prefetching data for task %#p\n", pTask)); + pTask->fPrefetch = true; + enmTransferType = PDMACTASKFILETRANSFER_READ; + } + else + memcpy(pvBuf, pTask->DataSeg.pvSeg, pTask->DataSeg.cbSeg); + } + } + else + rc = VERR_NO_MEMORY; + } + else + pTask->cbBounceBuffer = 0; + + if (RT_SUCCESS(rc)) + { + AssertMsg((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) == (RTR3UINTPTR)pvBuf, + ("AIO: Alignment restrictions not met! pvBuf=%p uBitmaskAlignment=%p\n", pvBuf, pEpClassFile->uBitmaskAlignment)); + + if (enmTransferType == PDMACTASKFILETRANSFER_WRITE) + { + /* Grow the file if needed. */ + if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile)) + { + ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg); + RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg); + } + + rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile, + offStart, pvBuf, cbToTransfer, pTask); + } + else + rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile, + offStart, pvBuf, cbToTransfer, pTask); + AssertRC(rc); + + rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, offStart, cbToTransfer, pTask, fAlignedReq); + if (RT_SUCCESS(rc)) + { + pTask->hReq = hReq; + *phReq = hReq; + } + else + { + /* Cleanup */ + if (pTask->cbBounceBuffer) + RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer); + } + } + } + else + LogFlow(("Task %#p was deferred because the access range is locked\n", pTask)); + + return rc; +} + +static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead, + PPDMACEPFILEMGR pAioMgr, + PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint) +{ + RTFILEAIOREQ apReqs[20]; + unsigned cRequests = 0; + int rc = VINF_SUCCESS; + + AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE, + ("Trying to process request lists of a non active endpoint!\n")); + + /* Go through the list and queue the requests until we get a flush request */ + while ( pTaskHead + && !pEndpoint->pFlushReq + && (pAioMgr->cRequestsActive + cRequests < pAioMgr->cRequestsActiveMax) + && RT_SUCCESS(rc)) + { + RTMSINTERVAL msWhenNext; + PPDMACTASKFILE pCurr = pTaskHead; + + if (!pdmacEpIsTransferAllowed(&pEndpoint->Core, (uint32_t)pCurr->DataSeg.cbSeg, &msWhenNext)) + { + pAioMgr->msBwLimitExpired = RT_MIN(pAioMgr->msBwLimitExpired, msWhenNext); + break; + } + + pTaskHead = pTaskHead->pNext; + + pCurr->pNext = NULL; + + AssertMsg(VALID_PTR(pCurr->pEndpoint) && (pCurr->pEndpoint == pEndpoint), + ("Endpoints do not match\n")); + + switch (pCurr->enmTransferType) + { + case PDMACTASKFILETRANSFER_FLUSH: + { + /* If there is no data transfer request this flush request finished immediately. */ + if (pEndpoint->fAsyncFlushSupported) + { + /* Issue a flush to the host. */ + RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr); + AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n")); + + LogFlow(("Flush request %#p\n", hReq)); + + rc = RTFileAioReqPrepareFlush(hReq, pEndpoint->hFile, pCurr); + if (RT_FAILURE(rc)) + { + if (rc == VERR_NOT_SUPPORTED) + LogRel(("AIOMgr: Async flushes not supported\n")); + else + LogRel(("AIOMgr: Preparing flush failed with %Rrc, disabling async flushes\n", rc)); + pEndpoint->fAsyncFlushSupported = false; + pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq); + rc = VINF_SUCCESS; /* Fake success */ + } + else + { + pCurr->hReq = hReq; + apReqs[cRequests] = hReq; + pEndpoint->AioMgr.cReqsProcessed++; + cRequests++; + } + } + + if ( !pEndpoint->AioMgr.cRequestsActive + && !pEndpoint->fAsyncFlushSupported) + { + pCurr->pfnCompleted(pCurr, pCurr->pvUser, VINF_SUCCESS); + pdmacFileTaskFree(pEndpoint, pCurr); + } + else + { + Assert(!pEndpoint->pFlushReq); + pEndpoint->pFlushReq = pCurr; + } + break; + } + case PDMACTASKFILETRANSFER_READ: + case PDMACTASKFILETRANSFER_WRITE: + { + RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ; + + if (pCurr->hReq == NIL_RTFILEAIOREQ) + { + if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_BUFFERED) + rc = pdmacFileAioMgrNormalTaskPrepareBuffered(pAioMgr, pEndpoint, pCurr, &hReq); + else if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_NON_BUFFERED) + rc = pdmacFileAioMgrNormalTaskPrepareNonBuffered(pAioMgr, pEndpoint, pCurr, &hReq); + else + AssertMsgFailed(("Invalid backend type %d\n", pEndpoint->enmBackendType)); + + AssertRC(rc); + } + else + { + LogFlow(("Task %#p has I/O request %#p already\n", pCurr, pCurr->hReq)); + hReq = pCurr->hReq; + } + + LogFlow(("Read/Write request %#p\n", hReq)); + + if (hReq != NIL_RTFILEAIOREQ) + { + apReqs[cRequests] = hReq; + cRequests++; + } + break; + } + default: + AssertMsgFailed(("Invalid transfer type %d\n", pCurr->enmTransferType)); + } /* switch transfer type */ + + /* Queue the requests if the array is full. */ + if (cRequests == RT_ELEMENTS(apReqs)) + { + rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests); + cRequests = 0; + AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES), + ("Unexpected return code\n")); + } + } + + if (cRequests) + { + rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests); + AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES), + ("Unexpected return code rc=%Rrc\n", rc)); + } + + if (pTaskHead) + { + /* Add the rest of the tasks to the pending list */ + pdmacFileAioMgrEpAddTaskList(pEndpoint, pTaskHead); + + if (RT_UNLIKELY( pAioMgr->cRequestsActiveMax == pAioMgr->cRequestsActive + && !pEndpoint->pFlushReq)) + { +#if 0 + /* + * The I/O manager has no room left for more requests + * but there are still requests to process. + * Create a new I/O manager and let it handle some endpoints. + */ + pdmacFileAioMgrNormalBalanceLoad(pAioMgr); +#else + /* Grow the I/O manager */ + pAioMgr->enmState = PDMACEPFILEMGRSTATE_GROWING; +#endif + } + } + + /* Insufficient resources are not fatal. */ + if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES) + rc = VINF_SUCCESS; + + return rc; +} + +/** + * Adds all pending requests for the given endpoint + * until a flush request is encountered or there is no + * request anymore. + * + * @returns VBox status code. + * @param pAioMgr The async I/O manager for the endpoint + * @param pEndpoint The endpoint to get the requests from. + */ +static int pdmacFileAioMgrNormalQueueReqs(PPDMACEPFILEMGR pAioMgr, + PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint) +{ + int rc = VINF_SUCCESS; + PPDMACTASKFILE pTasksHead = NULL; + + AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE, + ("Trying to process request lists of a non active endpoint!\n")); + + Assert(!pEndpoint->pFlushReq); + + /* Check the pending list first */ + if (pEndpoint->AioMgr.pReqsPendingHead) + { + LogFlow(("Queuing pending requests first\n")); + + pTasksHead = pEndpoint->AioMgr.pReqsPendingHead; + /* + * Clear the list as the processing routine will insert them into the list + * again if it gets a flush request. + */ + pEndpoint->AioMgr.pReqsPendingHead = NULL; + pEndpoint->AioMgr.pReqsPendingTail = NULL; + rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint); + AssertRC(rc); /** @todo r=bird: status code potentially overwritten. */ + } + + if (!pEndpoint->pFlushReq && !pEndpoint->AioMgr.pReqsPendingHead) + { + /* Now the request queue. */ + pTasksHead = pdmacFileEpGetNewTasks(pEndpoint); + if (pTasksHead) + { + rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint); + AssertRC(rc); + } + } + + return rc; +} + +static int pdmacFileAioMgrNormalProcessBlockingEvent(PPDMACEPFILEMGR pAioMgr) +{ + int rc = VINF_SUCCESS; + bool fNotifyWaiter = false; + + LogFlowFunc((": Enter\n")); + + Assert(pAioMgr->fBlockingEventPending); + + switch (pAioMgr->enmBlockingEvent) + { + case PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT: + { + PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointNew = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE); + AssertMsg(VALID_PTR(pEndpointNew), ("Adding endpoint event without a endpoint to add\n")); + + pEndpointNew->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE; + + pEndpointNew->AioMgr.pEndpointNext = pAioMgr->pEndpointsHead; + pEndpointNew->AioMgr.pEndpointPrev = NULL; + if (pAioMgr->pEndpointsHead) + pAioMgr->pEndpointsHead->AioMgr.pEndpointPrev = pEndpointNew; + pAioMgr->pEndpointsHead = pEndpointNew; + + /* Assign the completion point to this file. */ + rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pEndpointNew->hFile); + fNotifyWaiter = true; + pAioMgr->cEndpoints++; + break; + } + case PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT: + { + PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE); + AssertMsg(VALID_PTR(pEndpointRemove), ("Removing endpoint event without a endpoint to remove\n")); + + pEndpointRemove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING; + fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointRemove); + break; + } + case PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT: + { + PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointClose = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE); + AssertMsg(VALID_PTR(pEndpointClose), ("Close endpoint event without a endpoint to close\n")); + + if (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE) + { + LogFlowFunc((": Closing endpoint %#p{%s}\n", pEndpointClose, pEndpointClose->Core.pszUri)); + + /* Make sure all tasks finished. Process the queues a last time first. */ + rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpointClose); + AssertRC(rc); + + pEndpointClose->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING; + fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointClose); + } + else if ( (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING) + && (!pEndpointClose->AioMgr.cRequestsActive)) + fNotifyWaiter = true; + break; + } + case PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN: + { + pAioMgr->enmState = PDMACEPFILEMGRSTATE_SHUTDOWN; + if (!pAioMgr->cRequestsActive) + fNotifyWaiter = true; + break; + } + case PDMACEPFILEAIOMGRBLOCKINGEVENT_SUSPEND: + { + pAioMgr->enmState = PDMACEPFILEMGRSTATE_SUSPENDING; + break; + } + case PDMACEPFILEAIOMGRBLOCKINGEVENT_RESUME: + { + pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING; + fNotifyWaiter = true; + break; + } + default: + AssertReleaseMsgFailed(("Invalid event type %d\n", pAioMgr->enmBlockingEvent)); + } + + if (fNotifyWaiter) + { + ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false); + pAioMgr->enmBlockingEvent = PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID; + + /* Release the waiting thread. */ + LogFlow(("Signalling waiter\n")); + rc = RTSemEventSignal(pAioMgr->EventSemBlock); + AssertRC(rc); + } + + LogFlowFunc((": Leave\n")); + return rc; +} + +/** + * Checks all endpoints for pending events or new requests. + * + * @returns VBox status code. + * @param pAioMgr The I/O manager handle. + */ +static int pdmacFileAioMgrNormalCheckEndpoints(PPDMACEPFILEMGR pAioMgr) +{ + /* Check the assigned endpoints for new tasks if there isn't a flush request active at the moment. */ + int rc = VINF_SUCCESS; + PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pAioMgr->pEndpointsHead; + + pAioMgr->msBwLimitExpired = RT_INDEFINITE_WAIT; + + while (pEndpoint) + { + if (!pEndpoint->pFlushReq + && (pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE) + && !pEndpoint->AioMgr.fMoving) + { + rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpoint); + if (RT_FAILURE(rc)) + return rc; + } + else if ( !pEndpoint->AioMgr.cRequestsActive + && pEndpoint->enmState != PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE) + { + /* Reopen the file so that the new endpoint can re-associate with the file */ + RTFileClose(pEndpoint->hFile); + rc = RTFileOpen(&pEndpoint->hFile, pEndpoint->Core.pszUri, pEndpoint->fFlags); + AssertRC(rc); + + if (pEndpoint->AioMgr.fMoving) + { + pEndpoint->AioMgr.fMoving = false; + pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint); + } + else + { + Assert(pAioMgr->fBlockingEventPending); + ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false); + + /* Release the waiting thread. */ + LogFlow(("Signalling waiter\n")); + rc = RTSemEventSignal(pAioMgr->EventSemBlock); + AssertRC(rc); + } + } + + pEndpoint = pEndpoint->AioMgr.pEndpointNext; + } + + return rc; +} + +/** + * Wrapper around pdmacFileAioMgrNormalReqCompleteRc(). + */ +static void pdmacFileAioMgrNormalReqComplete(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq) +{ + size_t cbTransfered = 0; + int rcReq = RTFileAioReqGetRC(hReq, &cbTransfered); + + pdmacFileAioMgrNormalReqCompleteRc(pAioMgr, hReq, rcReq, cbTransfered); +} + +static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq, + int rcReq, size_t cbTransfered) +{ + int rc = VINF_SUCCESS; + PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint; + PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(hReq); + PPDMACTASKFILE pTasksWaiting; + + LogFlowFunc(("pAioMgr=%#p hReq=%#p\n", pAioMgr, hReq)); + + pEndpoint = pTask->pEndpoint; + + pTask->hReq = NIL_RTFILEAIOREQ; + + pAioMgr->cRequestsActive--; + pEndpoint->AioMgr.cRequestsActive--; + pEndpoint->AioMgr.cReqsProcessed++; + + /* + * It is possible that the request failed on Linux with kernels < 2.6.23 + * if the passed buffer was allocated with remap_pfn_range or if the file + * is on an NFS endpoint which does not support async and direct I/O at the same time. + * The endpoint will be migrated to a failsafe manager in case a request fails. + */ + if (RT_FAILURE(rcReq)) + { + /* Free bounce buffers and the IPRT request. */ + pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq); + + if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH) + { + LogRel(("AIOMgr: Flush failed with %Rrc, disabling async flushes\n", rcReq)); + pEndpoint->fAsyncFlushSupported = false; + AssertMsg(pEndpoint->pFlushReq == pTask, ("Failed flush request doesn't match active one\n")); + /* The other method will take over now. */ + + pEndpoint->pFlushReq = NULL; + /* Call completion callback */ + LogFlow(("Flush task=%#p completed with %Rrc\n", pTask, VINF_SUCCESS)); + pTask->pfnCompleted(pTask, pTask->pvUser, VINF_SUCCESS); + pdmacFileTaskFree(pEndpoint, pTask); + } + else + { + /* Free the lock and process pending tasks if necessary */ + pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock); + rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint); + AssertRC(rc); + + if (pTask->cbBounceBuffer) + RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer); + + /* + * Fatal errors are reported to the guest and non-fatal errors + * will cause a migration to the failsafe manager in the hope + * that the error disappears. + */ + if (!pdmacFileAioMgrNormalRcIsFatal(rcReq)) + { + /* Queue the request on the pending list. */ + pTask->pNext = pEndpoint->AioMgr.pReqsPendingHead; + pEndpoint->AioMgr.pReqsPendingHead = pTask; + + /* Create a new failsafe manager if necessary. */ + if (!pEndpoint->AioMgr.fMoving) + { + PPDMACEPFILEMGR pAioMgrFailsafe; + + LogRel(("%s: Request %#p failed with rc=%Rrc, migrating endpoint %s to failsafe manager.\n", + RTThreadGetName(pAioMgr->Thread), pTask, rcReq, pEndpoint->Core.pszUri)); + + pEndpoint->AioMgr.fMoving = true; + + rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass, + &pAioMgrFailsafe, PDMACEPFILEMGRTYPE_SIMPLE); + AssertRC(rc); + + pEndpoint->AioMgr.pAioMgrDst = pAioMgrFailsafe; + + /* Update the flags to open the file with. Disable async I/O and enable the host cache. */ + pEndpoint->fFlags &= ~(RTFILE_O_ASYNC_IO | RTFILE_O_NO_CACHE); + } + + /* If this was the last request for the endpoint migrate it to the new manager. */ + if (!pEndpoint->AioMgr.cRequestsActive) + { + bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint); + Assert(!fReqsPending); NOREF(fReqsPending); + + rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint); + AssertRC(rc); + } + } + else + { + pTask->pfnCompleted(pTask, pTask->pvUser, rcReq); + pdmacFileTaskFree(pEndpoint, pTask); + } + } + } + else + { + if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH) + { + /* Clear pending flush */ + AssertMsg(pEndpoint->pFlushReq == pTask, ("Completed flush request doesn't match active one\n")); + pEndpoint->pFlushReq = NULL; + pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq); + + /* Call completion callback */ + LogFlow(("Flush task=%#p completed with %Rrc\n", pTask, rcReq)); + pTask->pfnCompleted(pTask, pTask->pvUser, rcReq); + pdmacFileTaskFree(pEndpoint, pTask); + } + else + { + /* + * Restart an incomplete transfer. + * This usually means that the request will return an error now + * but to get the cause of the error (disk full, file too big, I/O error, ...) + * the transfer needs to be continued. + */ + pTask->cbTransfered += cbTransfered; + + if (RT_UNLIKELY( pTask->cbTransfered < pTask->DataSeg.cbSeg + || ( pTask->cbBounceBuffer + && pTask->cbTransfered < pTask->cbBounceBuffer))) + { + RTFOFF offStart; + size_t cbToTransfer; + uint8_t *pbBuf = NULL; + + LogFlow(("Restarting incomplete transfer %#p (%zu bytes transferred)\n", + pTask, cbTransfered)); + Assert(cbTransfered % 512 == 0); + + if (pTask->cbBounceBuffer) + { + AssertPtr(pTask->pvBounceBuffer); + offStart = (pTask->Off & ~((RTFOFF)512-1)) + pTask->cbTransfered; + cbToTransfer = pTask->cbBounceBuffer - pTask->cbTransfered; + pbBuf = (uint8_t *)pTask->pvBounceBuffer + pTask->cbTransfered; + } + else + { + Assert(!pTask->pvBounceBuffer); + offStart = pTask->Off + pTask->cbTransfered; + cbToTransfer = pTask->DataSeg.cbSeg - pTask->cbTransfered; + pbBuf = (uint8_t *)pTask->DataSeg.pvSeg + pTask->cbTransfered; + } + + if (pTask->fPrefetch || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ) + { + rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile, offStart, + pbBuf, cbToTransfer, pTask); + } + else + { + AssertMsg(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE, + ("Invalid transfer type\n")); + rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile, offStart, + pbBuf, cbToTransfer, pTask); + } + AssertRC(rc); + + pTask->hReq = hReq; + rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &hReq, 1); + AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES), + ("Unexpected return code rc=%Rrc\n", rc)); + } + else if (pTask->fPrefetch) + { + Assert(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE); + Assert(pTask->cbBounceBuffer); + + memcpy(((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer, + pTask->DataSeg.pvSeg, + pTask->DataSeg.cbSeg); + + /* Write it now. */ + pTask->fPrefetch = false; + RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1); + size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg + (pTask->Off - offStart), 512); + + pTask->cbTransfered = 0; + + /* Grow the file if needed. */ + if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile)) + { + ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg); + RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg); + } + + rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile, + offStart, pTask->pvBounceBuffer, cbToTransfer, pTask); + AssertRC(rc); + pTask->hReq = hReq; + rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &hReq, 1); + AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES), + ("Unexpected return code rc=%Rrc\n", rc)); + } + else + { + if (RT_SUCCESS(rc) && pTask->cbBounceBuffer) + { + if (pTask->enmTransferType == PDMACTASKFILETRANSFER_READ) + memcpy(pTask->DataSeg.pvSeg, + ((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer, + pTask->DataSeg.cbSeg); + + RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer); + } + + pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq); + + /* Free the lock and process pending tasks if necessary */ + pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock); + if (pTasksWaiting) + { + rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint); + AssertRC(rc); + } + + /* Call completion callback */ + LogFlow(("Task=%#p completed with %Rrc\n", pTask, rcReq)); + pTask->pfnCompleted(pTask, pTask->pvUser, rcReq); + pdmacFileTaskFree(pEndpoint, pTask); + + /* + * If there is no request left on the endpoint but a flush request is set + * it completed now and we notify the owner. + * Furthermore we look for new requests and continue. + */ + if (!pEndpoint->AioMgr.cRequestsActive && pEndpoint->pFlushReq) + { + /* Call completion callback */ + pTask = pEndpoint->pFlushReq; + pEndpoint->pFlushReq = NULL; + + AssertMsg(pTask->pEndpoint == pEndpoint, ("Endpoint of the flush request does not match assigned one\n")); + + pTask->pfnCompleted(pTask, pTask->pvUser, VINF_SUCCESS); + pdmacFileTaskFree(pEndpoint, pTask); + } + else if (RT_UNLIKELY(!pEndpoint->AioMgr.cRequestsActive && pEndpoint->AioMgr.fMoving)) + { + /* If the endpoint is about to be migrated do it now. */ + bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint); + Assert(!fReqsPending); NOREF(fReqsPending); + + rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint); + AssertRC(rc); + } + } + } /* Not a flush request */ + } /* request completed successfully */ +} + +/** Helper macro for checking for error codes. */ +#define CHECK_RC(pAioMgr, rc) \ + if (RT_FAILURE(rc)) \ + {\ + int rc2 = pdmacFileAioMgrNormalErrorHandler(pAioMgr, rc, RT_SRC_POS);\ + return rc2;\ + } + +/** + * The normal I/O manager using the RTFileAio* API + * + * @returns VBox status code. + * @param hThreadSelf Handle of the thread. + * @param pvUser Opaque user data. + */ +DECLCALLBACK(int) pdmacFileAioMgrNormal(RTTHREAD hThreadSelf, void *pvUser) +{ + int rc = VINF_SUCCESS; + PPDMACEPFILEMGR pAioMgr = (PPDMACEPFILEMGR)pvUser; + uint64_t uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD; + NOREF(hThreadSelf); + + while ( pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING + || pAioMgr->enmState == PDMACEPFILEMGRSTATE_SUSPENDING + || pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING) + { + if (!pAioMgr->cRequestsActive) + { + ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, true); + if (!ASMAtomicReadBool(&pAioMgr->fWokenUp)) + rc = RTSemEventWait(pAioMgr->EventSem, pAioMgr->msBwLimitExpired); + ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, false); + Assert(RT_SUCCESS(rc) || rc == VERR_TIMEOUT); + + LogFlow(("Got woken up\n")); + ASMAtomicWriteBool(&pAioMgr->fWokenUp, false); + } + + /* Check for an external blocking event first. */ + if (pAioMgr->fBlockingEventPending) + { + rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr); + CHECK_RC(pAioMgr, rc); + } + + if (RT_LIKELY( pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING + || pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING)) + { + /* We got woken up because an endpoint issued new requests. Queue them. */ + rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr); + CHECK_RC(pAioMgr, rc); + + while (pAioMgr->cRequestsActive) + { + RTFILEAIOREQ apReqs[20]; + uint32_t cReqsCompleted = 0; + size_t cReqsWait; + + if (pAioMgr->cRequestsActive > RT_ELEMENTS(apReqs)) + cReqsWait = RT_ELEMENTS(apReqs); + else + cReqsWait = pAioMgr->cRequestsActive; + + LogFlow(("Waiting for %d of %d tasks to complete\n", 1, cReqsWait)); + + rc = RTFileAioCtxWait(pAioMgr->hAioCtx, + 1, + RT_INDEFINITE_WAIT, apReqs, + cReqsWait, &cReqsCompleted); + if (RT_FAILURE(rc) && (rc != VERR_INTERRUPTED)) + CHECK_RC(pAioMgr, rc); + + LogFlow(("%d tasks completed\n", cReqsCompleted)); + + for (uint32_t i = 0; i < cReqsCompleted; i++) + pdmacFileAioMgrNormalReqComplete(pAioMgr, apReqs[i]); + + /* Check for an external blocking event before we go to sleep again. */ + if (pAioMgr->fBlockingEventPending) + { + rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr); + CHECK_RC(pAioMgr, rc); + } + + /* Update load statistics. */ + uint64_t uMillisCurr = RTTimeMilliTS(); + if (uMillisCurr > uMillisEnd) + { + PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointCurr = pAioMgr->pEndpointsHead; + + /* Calculate timespan. */ + uMillisCurr -= uMillisEnd; + + while (pEndpointCurr) + { + pEndpointCurr->AioMgr.cReqsPerSec = pEndpointCurr->AioMgr.cReqsProcessed / (uMillisCurr + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD); + pEndpointCurr->AioMgr.cReqsProcessed = 0; + pEndpointCurr = pEndpointCurr->AioMgr.pEndpointNext; + } + + /* Set new update interval */ + uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD; + } + + /* Check endpoints for new requests. */ + if (pAioMgr->enmState != PDMACEPFILEMGRSTATE_GROWING) + { + rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr); + CHECK_RC(pAioMgr, rc); + } + } /* while requests are active. */ + + if (pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING) + { + rc = pdmacFileAioMgrNormalGrow(pAioMgr); + AssertRC(rc); + Assert(pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING); + + rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr); + CHECK_RC(pAioMgr, rc); + } + } /* if still running */ + } /* while running */ + + LogFlowFunc(("rc=%Rrc\n", rc)); + return rc; +} + +#undef CHECK_RC + diff --git a/src/VBox/VMM/VMMR3/PDMBlkCache.cpp b/src/VBox/VMM/VMMR3/PDMBlkCache.cpp new file mode 100644 index 00000000..7a0fa776 --- /dev/null +++ b/src/VBox/VMM/VMMR3/PDMBlkCache.cpp @@ -0,0 +1,2805 @@ +/* $Id: PDMBlkCache.cpp $ */ +/** @file + * PDM Block Cache. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +/** @page pg_pdm_block_cache PDM Block Cache - The I/O cache + * This component implements an I/O cache based on the 2Q cache algorithm. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PDM_BLK_CACHE +#include "PDMInternal.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "PDMBlkCacheInternal.h" + +#ifdef VBOX_STRICT +# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) \ + do \ + { \ + AssertMsg(RTCritSectIsOwner(&Cache->CritSect), \ + ("Thread does not own critical section\n"));\ + } while (0) + +# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) \ + do \ + { \ + AssertMsg(RTSemRWIsWriteOwner(pEpCache->SemRWEntries), \ + ("Thread is not exclusive owner of the per endpoint RW semaphore\n")); \ + } while (0) + +# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) \ + do \ + { \ + AssertMsg(RTSemRWIsReadOwner(pEpCache->SemRWEntries), \ + ("Thread is not read owner of the per endpoint RW semaphore\n")); \ + } while (0) + +#else +# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) do { } while (0) +# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) do { } while (0) +# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) do { } while (0) +#endif + +#define PDM_BLK_CACHE_SAVED_STATE_VERSION 1 + +/* Enable to enable some tracing in the block cache code for investigating issues. */ +/*#define VBOX_BLKCACHE_TRACING 1*/ + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ + +static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache, + uint64_t off, size_t cbData, uint8_t *pbBuffer); +static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry); + + +/** + * Add message to the VM trace buffer. + * + * @returns nothing. + * @param pBlkCache The block cache. + * @param pszFmt The format string. + * @param ... Additional parameters for the string formatter. + */ +DECLINLINE(void) pdmBlkCacheR3TraceMsgF(PPDMBLKCACHE pBlkCache, const char *pszFmt, ...) +{ +#if defined(VBOX_BLKCACHE_TRACING) + va_list va; + va_start(va, pszFmt); + RTTraceBufAddMsgV(pBlkCache->pCache->pVM->CTX_SUFF(hTraceBuf), pszFmt, va); + va_end(va); +#else + RT_NOREF2(pBlkCache, pszFmt); +#endif +} + +/** + * Decrement the reference counter of the given cache entry. + * + * @returns nothing. + * @param pEntry The entry to release. + */ +DECLINLINE(void) pdmBlkCacheEntryRelease(PPDMBLKCACHEENTRY pEntry) +{ + AssertMsg(pEntry->cRefs > 0, ("Trying to release a not referenced entry\n")); + ASMAtomicDecU32(&pEntry->cRefs); +} + +/** + * Increment the reference counter of the given cache entry. + * + * @returns nothing. + * @param pEntry The entry to reference. + */ +DECLINLINE(void) pdmBlkCacheEntryRef(PPDMBLKCACHEENTRY pEntry) +{ + ASMAtomicIncU32(&pEntry->cRefs); +} + +#ifdef VBOX_STRICT +static void pdmBlkCacheValidate(PPDMBLKCACHEGLOBAL pCache) +{ + /* Amount of cached data should never exceed the maximum amount. */ + AssertMsg(pCache->cbCached <= pCache->cbMax, + ("Current amount of cached data exceeds maximum\n")); + + /* The amount of cached data in the LRU and FRU list should match cbCached */ + AssertMsg(pCache->LruRecentlyUsedIn.cbCached + pCache->LruFrequentlyUsed.cbCached == pCache->cbCached, + ("Amount of cached data doesn't match\n")); + + AssertMsg(pCache->LruRecentlyUsedOut.cbCached <= pCache->cbRecentlyUsedOutMax, + ("Paged out list exceeds maximum\n")); +} +#endif + +DECLINLINE(void) pdmBlkCacheLockEnter(PPDMBLKCACHEGLOBAL pCache) +{ + RTCritSectEnter(&pCache->CritSect); +#ifdef VBOX_STRICT + pdmBlkCacheValidate(pCache); +#endif +} + +DECLINLINE(void) pdmBlkCacheLockLeave(PPDMBLKCACHEGLOBAL pCache) +{ +#ifdef VBOX_STRICT + pdmBlkCacheValidate(pCache); +#endif + RTCritSectLeave(&pCache->CritSect); +} + +DECLINLINE(void) pdmBlkCacheSub(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount) +{ + PDMACFILECACHE_IS_CRITSECT_OWNER(pCache); + pCache->cbCached -= cbAmount; +} + +DECLINLINE(void) pdmBlkCacheAdd(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount) +{ + PDMACFILECACHE_IS_CRITSECT_OWNER(pCache); + pCache->cbCached += cbAmount; +} + +DECLINLINE(void) pdmBlkCacheListAdd(PPDMBLKLRULIST pList, uint32_t cbAmount) +{ + pList->cbCached += cbAmount; +} + +DECLINLINE(void) pdmBlkCacheListSub(PPDMBLKLRULIST pList, uint32_t cbAmount) +{ + pList->cbCached -= cbAmount; +} + +#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS +/** + * Checks consistency of a LRU list. + * + * @returns nothing + * @param pList The LRU list to check. + * @param pNotInList Element which is not allowed to occur in the list. + */ +static void pdmBlkCacheCheckList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pNotInList) +{ + PPDMBLKCACHEENTRY pCurr = pList->pHead; + + /* Check that there are no double entries and no cycles in the list. */ + while (pCurr) + { + PPDMBLKCACHEENTRY pNext = pCurr->pNext; + + while (pNext) + { + AssertMsg(pCurr != pNext, + ("Entry %#p is at least two times in list %#p or there is a cycle in the list\n", + pCurr, pList)); + pNext = pNext->pNext; + } + + AssertMsg(pCurr != pNotInList, ("Not allowed entry %#p is in list\n", pCurr)); + + if (!pCurr->pNext) + AssertMsg(pCurr == pList->pTail, ("End of list reached but last element is not list tail\n")); + + pCurr = pCurr->pNext; + } +} +#endif + +/** + * Unlinks a cache entry from the LRU list it is assigned to. + * + * @returns nothing. + * @param pEntry The entry to unlink. + */ +static void pdmBlkCacheEntryRemoveFromList(PPDMBLKCACHEENTRY pEntry) +{ + PPDMBLKLRULIST pList = pEntry->pList; + PPDMBLKCACHEENTRY pPrev, pNext; + + LogFlowFunc((": Deleting entry %#p from list %#p\n", pEntry, pList)); + + AssertPtr(pList); + +#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS + pdmBlkCacheCheckList(pList, NULL); +#endif + + pPrev = pEntry->pPrev; + pNext = pEntry->pNext; + + AssertMsg(pEntry != pPrev, ("Entry links to itself as previous element\n")); + AssertMsg(pEntry != pNext, ("Entry links to itself as next element\n")); + + if (pPrev) + pPrev->pNext = pNext; + else + { + pList->pHead = pNext; + + if (pNext) + pNext->pPrev = NULL; + } + + if (pNext) + pNext->pPrev = pPrev; + else + { + pList->pTail = pPrev; + + if (pPrev) + pPrev->pNext = NULL; + } + + pEntry->pList = NULL; + pEntry->pPrev = NULL; + pEntry->pNext = NULL; + pdmBlkCacheListSub(pList, pEntry->cbData); +#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS + pdmBlkCacheCheckList(pList, pEntry); +#endif +} + +/** + * Adds a cache entry to the given LRU list unlinking it from the currently + * assigned list if needed. + * + * @returns nothing. + * @param pList List to the add entry to. + * @param pEntry Entry to add. + */ +static void pdmBlkCacheEntryAddToList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pEntry) +{ + LogFlowFunc((": Adding entry %#p to list %#p\n", pEntry, pList)); +#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS + pdmBlkCacheCheckList(pList, NULL); +#endif + + /* Remove from old list if needed */ + if (pEntry->pList) + pdmBlkCacheEntryRemoveFromList(pEntry); + + pEntry->pNext = pList->pHead; + if (pList->pHead) + pList->pHead->pPrev = pEntry; + else + { + Assert(!pList->pTail); + pList->pTail = pEntry; + } + + pEntry->pPrev = NULL; + pList->pHead = pEntry; + pdmBlkCacheListAdd(pList, pEntry->cbData); + pEntry->pList = pList; +#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS + pdmBlkCacheCheckList(pList, NULL); +#endif +} + +/** + * Destroys a LRU list freeing all entries. + * + * @returns nothing + * @param pList Pointer to the LRU list to destroy. + * + * @note The caller must own the critical section of the cache. + */ +static void pdmBlkCacheDestroyList(PPDMBLKLRULIST pList) +{ + while (pList->pHead) + { + PPDMBLKCACHEENTRY pEntry = pList->pHead; + + pList->pHead = pEntry->pNext; + + AssertMsg(!(pEntry->fFlags & (PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY)), + ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags)); + + RTMemPageFree(pEntry->pbData, pEntry->cbData); + RTMemFree(pEntry); + } +} + +/** + * Tries to remove the given amount of bytes from a given list in the cache + * moving the entries to one of the given ghosts lists + * + * @returns Amount of data which could be freed. + * @param pCache Pointer to the global cache data. + * @param cbData The amount of the data to free. + * @param pListSrc The source list to evict data from. + * @param pGhostListDst Where the ghost list removed entries should be + * moved to, NULL if the entry should be freed. + * @param fReuseBuffer Flag whether a buffer should be reused if it has + * the same size + * @param ppbBuffer Where to store the address of the buffer if an + * entry with the same size was found and + * fReuseBuffer is true. + * + * @note This function may return fewer bytes than requested because entries + * may be marked as non evictable if they are used for I/O at the + * moment. + */ +static size_t pdmBlkCacheEvictPagesFrom(PPDMBLKCACHEGLOBAL pCache, size_t cbData, + PPDMBLKLRULIST pListSrc, PPDMBLKLRULIST pGhostListDst, + bool fReuseBuffer, uint8_t **ppbBuffer) +{ + size_t cbEvicted = 0; + + PDMACFILECACHE_IS_CRITSECT_OWNER(pCache); + + AssertMsg(cbData > 0, ("Evicting 0 bytes not possible\n")); + AssertMsg( !pGhostListDst + || (pGhostListDst == &pCache->LruRecentlyUsedOut), + ("Destination list must be NULL or the recently used but paged out list\n")); + + if (fReuseBuffer) + { + AssertPtr(ppbBuffer); + *ppbBuffer = NULL; + } + + /* Start deleting from the tail. */ + PPDMBLKCACHEENTRY pEntry = pListSrc->pTail; + + while ((cbEvicted < cbData) && pEntry) + { + PPDMBLKCACHEENTRY pCurr = pEntry; + + pEntry = pEntry->pPrev; + + /* We can't evict pages which are currently in progress or dirty but not in progress */ + if ( !(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE) + && (ASMAtomicReadU32(&pCurr->cRefs) == 0)) + { + /* Ok eviction candidate. Grab the endpoint semaphore and check again + * because somebody else might have raced us. */ + PPDMBLKCACHE pBlkCache = pCurr->pBlkCache; + RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT); + + if (!(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE) + && (ASMAtomicReadU32(&pCurr->cRefs) == 0)) + { + LogFlow(("Evicting entry %#p (%u bytes)\n", pCurr, pCurr->cbData)); + + if (fReuseBuffer && pCurr->cbData == cbData) + { + STAM_COUNTER_INC(&pCache->StatBuffersReused); + *ppbBuffer = pCurr->pbData; + } + else if (pCurr->pbData) + RTMemPageFree(pCurr->pbData, pCurr->cbData); + + pCurr->pbData = NULL; + cbEvicted += pCurr->cbData; + + pdmBlkCacheEntryRemoveFromList(pCurr); + pdmBlkCacheSub(pCache, pCurr->cbData); + + if (pGhostListDst) + { + RTSemRWReleaseWrite(pBlkCache->SemRWEntries); + + PPDMBLKCACHEENTRY pGhostEntFree = pGhostListDst->pTail; + + /* We have to remove the last entries from the paged out list. */ + while ( pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax + && pGhostEntFree) + { + PPDMBLKCACHEENTRY pFree = pGhostEntFree; + PPDMBLKCACHE pBlkCacheFree = pFree->pBlkCache; + + pGhostEntFree = pGhostEntFree->pPrev; + + RTSemRWRequestWrite(pBlkCacheFree->SemRWEntries, RT_INDEFINITE_WAIT); + + if (ASMAtomicReadU32(&pFree->cRefs) == 0) + { + pdmBlkCacheEntryRemoveFromList(pFree); + + STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache); + RTAvlrU64Remove(pBlkCacheFree->pTree, pFree->Core.Key); + STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache); + + RTMemFree(pFree); + } + + RTSemRWReleaseWrite(pBlkCacheFree->SemRWEntries); + } + + if (pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax) + { + /* Couldn't remove enough entries. Delete */ + STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache); + RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key); + STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache); + + RTMemFree(pCurr); + } + else + pdmBlkCacheEntryAddToList(pGhostListDst, pCurr); + } + else + { + /* Delete the entry from the AVL tree it is assigned to. */ + STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache); + RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key); + STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache); + + RTSemRWReleaseWrite(pBlkCache->SemRWEntries); + RTMemFree(pCurr); + } + } + + } + else + LogFlow(("Entry %#p (%u bytes) is still in progress and can't be evicted\n", pCurr, pCurr->cbData)); + } + + return cbEvicted; +} + +static bool pdmBlkCacheReclaim(PPDMBLKCACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer) +{ + size_t cbRemoved = 0; + + if ((pCache->cbCached + cbData) < pCache->cbMax) + return true; + else if ((pCache->LruRecentlyUsedIn.cbCached + cbData) > pCache->cbRecentlyUsedInMax) + { + /* Try to evict as many bytes as possible from A1in */ + cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruRecentlyUsedIn, + &pCache->LruRecentlyUsedOut, fReuseBuffer, ppbBuffer); + + /* + * If it was not possible to remove enough entries + * try the frequently accessed cache. + */ + if (cbRemoved < cbData) + { + Assert(!fReuseBuffer || !*ppbBuffer); /* It is not possible that we got a buffer with the correct size but we didn't freed enough data. */ + + /* + * If we removed something we can't pass the reuse buffer flag anymore because + * we don't need to evict that much data + */ + if (!cbRemoved) + cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed, + NULL, fReuseBuffer, ppbBuffer); + else + cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData - cbRemoved, &pCache->LruFrequentlyUsed, + NULL, false, NULL); + } + } + else + { + /* We have to remove entries from frequently access list. */ + cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed, + NULL, fReuseBuffer, ppbBuffer); + } + + LogFlowFunc((": removed %u bytes, requested %u\n", cbRemoved, cbData)); + return (cbRemoved >= cbData); +} + +DECLINLINE(int) pdmBlkCacheEnqueue(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cbXfer, PPDMBLKCACHEIOXFER pIoXfer) +{ + int rc = VINF_SUCCESS; + + LogFlowFunc(("%s: Enqueuing hIoXfer=%#p enmXferDir=%d\n", + __FUNCTION__, pIoXfer, pIoXfer->enmXferDir)); + + ASMAtomicIncU32(&pBlkCache->cIoXfersActive); + pdmBlkCacheR3TraceMsgF(pBlkCache, "BlkCache: I/O req %#p (%RTbool , %d) queued (%u now active)", + pIoXfer, pIoXfer->fIoCache, pIoXfer->enmXferDir, pBlkCache->cIoXfersActive); + + switch (pBlkCache->enmType) + { + case PDMBLKCACHETYPE_DEV: + { + rc = pBlkCache->u.Dev.pfnXferEnqueue(pBlkCache->u.Dev.pDevIns, + pIoXfer->enmXferDir, + off, cbXfer, + &pIoXfer->SgBuf, pIoXfer); + break; + } + case PDMBLKCACHETYPE_DRV: + { + rc = pBlkCache->u.Drv.pfnXferEnqueue(pBlkCache->u.Drv.pDrvIns, + pIoXfer->enmXferDir, + off, cbXfer, + &pIoXfer->SgBuf, pIoXfer); + break; + } + case PDMBLKCACHETYPE_USB: + { + rc = pBlkCache->u.Usb.pfnXferEnqueue(pBlkCache->u.Usb.pUsbIns, + pIoXfer->enmXferDir, + off, cbXfer, + &pIoXfer->SgBuf, pIoXfer); + break; + } + case PDMBLKCACHETYPE_INTERNAL: + { + rc = pBlkCache->u.Int.pfnXferEnqueue(pBlkCache->u.Int.pvUser, + pIoXfer->enmXferDir, + off, cbXfer, + &pIoXfer->SgBuf, pIoXfer); + break; + } + default: + AssertMsgFailed(("Unknown block cache type!\n")); + } + + if (RT_FAILURE(rc)) + { + pdmBlkCacheR3TraceMsgF(pBlkCache, "BlkCache: Queueing I/O req %#p failed %Rrc", pIoXfer, rc); + ASMAtomicDecU32(&pBlkCache->cIoXfersActive); + } + + LogFlowFunc(("%s: returns rc=%Rrc\n", __FUNCTION__, rc)); + return rc; +} + +/** + * Initiates a read I/O task for the given entry. + * + * @returns VBox status code. + * @param pEntry The entry to fetch the data to. + */ +static int pdmBlkCacheEntryReadFromMedium(PPDMBLKCACHEENTRY pEntry) +{ + PPDMBLKCACHE pBlkCache = pEntry->pBlkCache; + LogFlowFunc((": Reading data into cache entry %#p\n", pEntry)); + + /* Make sure no one evicts the entry while it is accessed. */ + pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS; + + PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER)); + if (RT_UNLIKELY(!pIoXfer)) + return VERR_NO_MEMORY; + + AssertMsg(pEntry->pbData, ("Entry is in ghost state\n")); + + pIoXfer->fIoCache = true; + pIoXfer->pEntry = pEntry; + pIoXfer->SgSeg.pvSeg = pEntry->pbData; + pIoXfer->SgSeg.cbSeg = pEntry->cbData; + pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_READ; + RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1); + + return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pEntry->cbData, pIoXfer); +} + +/** + * Initiates a write I/O task for the given entry. + * + * @returns nothing. + * @param pEntry The entry to read the data from. + */ +static int pdmBlkCacheEntryWriteToMedium(PPDMBLKCACHEENTRY pEntry) +{ + PPDMBLKCACHE pBlkCache = pEntry->pBlkCache; + LogFlowFunc((": Writing data from cache entry %#p\n", pEntry)); + + /* Make sure no one evicts the entry while it is accessed. */ + pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS; + + PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER)); + if (RT_UNLIKELY(!pIoXfer)) + return VERR_NO_MEMORY; + + AssertMsg(pEntry->pbData, ("Entry is in ghost state\n")); + + pIoXfer->fIoCache = true; + pIoXfer->pEntry = pEntry; + pIoXfer->SgSeg.pvSeg = pEntry->pbData; + pIoXfer->SgSeg.cbSeg = pEntry->cbData; + pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_WRITE; + RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1); + + return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pEntry->cbData, pIoXfer); +} + +/** + * Passthrough a part of a request directly to the I/O manager handling the + * endpoint. + * + * @returns VBox status code. + * @param pBlkCache The endpoint cache. + * @param pReq The request. + * @param pSgBuf The scatter/gather buffer. + * @param offStart Offset to start transfer from. + * @param cbData Amount of data to transfer. + * @param enmXferDir The transfer type (read/write) + */ +static int pdmBlkCacheRequestPassthrough(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq, + PRTSGBUF pSgBuf, uint64_t offStart, size_t cbData, + PDMBLKCACHEXFERDIR enmXferDir) +{ + + PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER)); + if (RT_UNLIKELY(!pIoXfer)) + return VERR_NO_MEMORY; + + ASMAtomicIncU32(&pReq->cXfersPending); + pIoXfer->fIoCache = false; + pIoXfer->pReq = pReq; + pIoXfer->enmXferDir = enmXferDir; + if (pSgBuf) + { + RTSgBufClone(&pIoXfer->SgBuf, pSgBuf); + RTSgBufAdvance(pSgBuf, cbData); + } + + return pdmBlkCacheEnqueue(pBlkCache, offStart, cbData, pIoXfer); +} + +/** + * Commit a single dirty entry to the endpoint + * + * @returns nothing + * @param pEntry The entry to commit. + */ +static void pdmBlkCacheEntryCommit(PPDMBLKCACHEENTRY pEntry) +{ + AssertMsg( (pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY) + && !(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS), + ("Invalid flags set for entry %#p\n", pEntry)); + + pdmBlkCacheEntryWriteToMedium(pEntry); +} + +/** + * Commit all dirty entries for a single endpoint. + * + * @returns nothing. + * @param pBlkCache The endpoint cache to commit. + */ +static void pdmBlkCacheCommit(PPDMBLKCACHE pBlkCache) +{ + uint32_t cbCommitted = 0; + + /* Return if the cache was suspended. */ + if (pBlkCache->fSuspended) + return; + + RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT); + + /* The list is moved to a new header to reduce locking overhead. */ + RTLISTANCHOR ListDirtyNotCommitted; + + RTSpinlockAcquire(pBlkCache->LockList); + RTListMove(&ListDirtyNotCommitted, &pBlkCache->ListDirtyNotCommitted); + RTSpinlockRelease(pBlkCache->LockList); + + if (!RTListIsEmpty(&ListDirtyNotCommitted)) + { + PPDMBLKCACHEENTRY pEntry = RTListGetFirst(&ListDirtyNotCommitted, PDMBLKCACHEENTRY, NodeNotCommitted); + + while (!RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted)) + { + PPDMBLKCACHEENTRY pNext = RTListNodeGetNext(&pEntry->NodeNotCommitted, PDMBLKCACHEENTRY, + NodeNotCommitted); + pdmBlkCacheEntryCommit(pEntry); + cbCommitted += pEntry->cbData; + RTListNodeRemove(&pEntry->NodeNotCommitted); + pEntry = pNext; + } + + /* Commit the last endpoint */ + Assert(RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted)); + pdmBlkCacheEntryCommit(pEntry); + cbCommitted += pEntry->cbData; + RTListNodeRemove(&pEntry->NodeNotCommitted); + AssertMsg(RTListIsEmpty(&ListDirtyNotCommitted), + ("Committed all entries but list is not empty\n")); + } + + RTSemRWReleaseWrite(pBlkCache->SemRWEntries); + AssertMsg(pBlkCache->pCache->cbDirty >= cbCommitted, + ("Number of committed bytes exceeds number of dirty bytes\n")); + uint32_t cbDirtyOld = ASMAtomicSubU32(&pBlkCache->pCache->cbDirty, cbCommitted); + + /* Reset the commit timer if we don't have any dirty bits. */ + if ( !(cbDirtyOld - cbCommitted) + && pBlkCache->pCache->u32CommitTimeoutMs != 0) + TMTimerStop(pBlkCache->pCache->pTimerCommit); +} + +/** + * Commit all dirty entries in the cache. + * + * @returns nothing. + * @param pCache The global cache instance. + */ +static void pdmBlkCacheCommitDirtyEntries(PPDMBLKCACHEGLOBAL pCache) +{ + bool fCommitInProgress = ASMAtomicXchgBool(&pCache->fCommitInProgress, true); + + if (!fCommitInProgress) + { + pdmBlkCacheLockEnter(pCache); + Assert(!RTListIsEmpty(&pCache->ListUsers)); + + PPDMBLKCACHE pBlkCache = RTListGetFirst(&pCache->ListUsers, PDMBLKCACHE, NodeCacheUser); + AssertPtr(pBlkCache); + + while (!RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser)) + { + pdmBlkCacheCommit(pBlkCache); + + pBlkCache = RTListNodeGetNext(&pBlkCache->NodeCacheUser, PDMBLKCACHE, + NodeCacheUser); + } + + /* Commit the last endpoint */ + Assert(RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser)); + pdmBlkCacheCommit(pBlkCache); + + pdmBlkCacheLockLeave(pCache); + ASMAtomicWriteBool(&pCache->fCommitInProgress, false); + } +} + +/** + * Adds the given entry as a dirty to the cache. + * + * @returns Flag whether the amount of dirty bytes in the cache exceeds the threshold + * @param pBlkCache The endpoint cache the entry belongs to. + * @param pEntry The entry to add. + */ +static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry) +{ + bool fDirtyBytesExceeded = false; + PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache; + + /* If the commit timer is disabled we commit right away. */ + if (pCache->u32CommitTimeoutMs == 0) + { + pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY; + pdmBlkCacheEntryCommit(pEntry); + } + else if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY)) + { + pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY; + + RTSpinlockAcquire(pBlkCache->LockList); + RTListAppend(&pBlkCache->ListDirtyNotCommitted, &pEntry->NodeNotCommitted); + RTSpinlockRelease(pBlkCache->LockList); + + uint32_t cbDirty = ASMAtomicAddU32(&pCache->cbDirty, pEntry->cbData); + + /* Prevent committing if the VM was suspended. */ + if (RT_LIKELY(!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))) + fDirtyBytesExceeded = (cbDirty + pEntry->cbData >= pCache->cbCommitDirtyThreshold); + else if (!cbDirty && pCache->u32CommitTimeoutMs > 0) + { + /* Arm the commit timer. */ + TMTimerSetMillies(pCache->pTimerCommit, pCache->u32CommitTimeoutMs); + } + } + + return fDirtyBytesExceeded; +} + +static PPDMBLKCACHE pdmR3BlkCacheFindById(PPDMBLKCACHEGLOBAL pBlkCacheGlobal, const char *pcszId) +{ + bool fFound = false; + + PPDMBLKCACHE pBlkCache; + RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser) + { + if (!RTStrCmp(pBlkCache->pszId, pcszId)) + { + fFound = true; + break; + } + } + + return fFound ? pBlkCache : NULL; +} + +/** + * Commit timer callback. + */ +static DECLCALLBACK(void) pdmBlkCacheCommitTimerCallback(PVM pVM, PTMTIMER pTimer, void *pvUser) +{ + PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser; + NOREF(pVM); NOREF(pTimer); + + LogFlowFunc(("Commit interval expired, commiting dirty entries\n")); + + if ( ASMAtomicReadU32(&pCache->cbDirty) > 0 + && !ASMAtomicReadBool(&pCache->fIoErrorVmSuspended)) + pdmBlkCacheCommitDirtyEntries(pCache); + + LogFlowFunc(("Entries committed, going to sleep\n")); +} + +static DECLCALLBACK(int) pdmR3BlkCacheSaveExec(PVM pVM, PSSMHANDLE pSSM) +{ + PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal; + + AssertPtr(pBlkCacheGlobal); + + pdmBlkCacheLockEnter(pBlkCacheGlobal); + + SSMR3PutU32(pSSM, pBlkCacheGlobal->cRefs); + + /* Go through the list and save all dirty entries. */ + PPDMBLKCACHE pBlkCache; + RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser) + { + uint32_t cEntries = 0; + PPDMBLKCACHEENTRY pEntry; + + RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT); + SSMR3PutU32(pSSM, (uint32_t)strlen(pBlkCache->pszId)); + SSMR3PutStrZ(pSSM, pBlkCache->pszId); + + /* Count the number of entries to safe. */ + RTListForEach(&pBlkCache->ListDirtyNotCommitted, pEntry, PDMBLKCACHEENTRY, NodeNotCommitted) + { + cEntries++; + } + + SSMR3PutU32(pSSM, cEntries); + + /* Walk the list of all dirty entries and save them. */ + RTListForEach(&pBlkCache->ListDirtyNotCommitted, pEntry, PDMBLKCACHEENTRY, NodeNotCommitted) + { + /* A few sanity checks. */ + AssertMsg(!pEntry->cRefs, ("The entry is still referenced\n")); + AssertMsg(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY, ("Entry is not dirty\n")); + AssertMsg(!(pEntry->fFlags & ~PDMBLKCACHE_ENTRY_IS_DIRTY), ("Invalid flags set\n")); + AssertMsg(!pEntry->pWaitingHead && !pEntry->pWaitingTail, ("There are waiting requests\n")); + AssertMsg( pEntry->pList == &pBlkCacheGlobal->LruRecentlyUsedIn + || pEntry->pList == &pBlkCacheGlobal->LruFrequentlyUsed, + ("Invalid list\n")); + AssertMsg(pEntry->cbData == pEntry->Core.KeyLast - pEntry->Core.Key + 1, + ("Size and range do not match\n")); + + /* Save */ + SSMR3PutU64(pSSM, pEntry->Core.Key); + SSMR3PutU32(pSSM, pEntry->cbData); + SSMR3PutMem(pSSM, pEntry->pbData, pEntry->cbData); + } + + RTSemRWReleaseRead(pBlkCache->SemRWEntries); + } + + pdmBlkCacheLockLeave(pBlkCacheGlobal); + + /* Terminator */ + return SSMR3PutU32(pSSM, UINT32_MAX); +} + +static DECLCALLBACK(int) pdmR3BlkCacheLoadExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal; + uint32_t cRefs; + + NOREF(uPass); + AssertPtr(pBlkCacheGlobal); + + pdmBlkCacheLockEnter(pBlkCacheGlobal); + + if (uVersion != PDM_BLK_CACHE_SAVED_STATE_VERSION) + return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION; + + SSMR3GetU32(pSSM, &cRefs); + + /* + * Fewer users in the saved state than in the current VM are allowed + * because that means that there are only new ones which don't have any saved state + * which can get lost. + * More saved state entries than registered cache users are only allowed if the + * missing users don't have any data saved in the cache. + */ + int rc = VINF_SUCCESS; + char *pszId = NULL; + + while ( cRefs > 0 + && RT_SUCCESS(rc)) + { + PPDMBLKCACHE pBlkCache = NULL; + uint32_t cbId = 0; + + SSMR3GetU32(pSSM, &cbId); + Assert(cbId > 0); + + cbId++; /* Include terminator */ + pszId = (char *)RTMemAllocZ(cbId * sizeof(char)); + if (!pszId) + { + rc = VERR_NO_MEMORY; + break; + } + + rc = SSMR3GetStrZ(pSSM, pszId, cbId); + AssertRC(rc); + + /* Search for the block cache with the provided id. */ + pBlkCache = pdmR3BlkCacheFindById(pBlkCacheGlobal, pszId); + + /* Get the entries */ + uint32_t cEntries; + SSMR3GetU32(pSSM, &cEntries); + + if (!pBlkCache && (cEntries > 0)) + { + rc = SSMR3SetCfgError(pSSM, RT_SRC_POS, + N_("The VM is missing a block device and there is data in the cache. Please make sure the source and target VMs have compatible storage configurations")); + break; + } + + RTMemFree(pszId); + pszId = NULL; + + while (cEntries > 0) + { + PPDMBLKCACHEENTRY pEntry; + uint64_t off; + uint32_t cbEntry; + + SSMR3GetU64(pSSM, &off); + SSMR3GetU32(pSSM, &cbEntry); + + pEntry = pdmBlkCacheEntryAlloc(pBlkCache, off, cbEntry, NULL); + if (!pEntry) + { + rc = VERR_NO_MEMORY; + break; + } + + rc = SSMR3GetMem(pSSM, pEntry->pbData, cbEntry); + if (RT_FAILURE(rc)) + { + RTMemFree(pEntry->pbData); + RTMemFree(pEntry); + break; + } + + /* Insert into the tree. */ + bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core); + Assert(fInserted); NOREF(fInserted); + + /* Add to the dirty list. */ + pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry); + pdmBlkCacheEntryAddToList(&pBlkCacheGlobal->LruRecentlyUsedIn, pEntry); + pdmBlkCacheAdd(pBlkCacheGlobal, cbEntry); + pdmBlkCacheEntryRelease(pEntry); + cEntries--; + } + + cRefs--; + } + + if (pszId) + RTMemFree(pszId); + + if (cRefs && RT_SUCCESS(rc)) + rc = SSMR3SetCfgError(pSSM, RT_SRC_POS, + N_("Unexpected error while restoring state. Please make sure the source and target VMs have compatible storage configurations")); + + pdmBlkCacheLockLeave(pBlkCacheGlobal); + + if (RT_SUCCESS(rc)) + { + uint32_t u32 = 0; + rc = SSMR3GetU32(pSSM, &u32); + if (RT_SUCCESS(rc)) + AssertMsgReturn(u32 == UINT32_MAX, ("%#x\n", u32), VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + } + + return rc; +} + +int pdmR3BlkCacheInit(PVM pVM) +{ + int rc = VINF_SUCCESS; + PUVM pUVM = pVM->pUVM; + PPDMBLKCACHEGLOBAL pBlkCacheGlobal; + + LogFlowFunc((": pVM=%p\n", pVM)); + + VM_ASSERT_EMT(pVM); + + PCFGMNODE pCfgRoot = CFGMR3GetRoot(pVM); + PCFGMNODE pCfgBlkCache = CFGMR3GetChild(CFGMR3GetChild(pCfgRoot, "PDM"), "BlkCache"); + + pBlkCacheGlobal = (PPDMBLKCACHEGLOBAL)RTMemAllocZ(sizeof(PDMBLKCACHEGLOBAL)); + if (!pBlkCacheGlobal) + return VERR_NO_MEMORY; + + RTListInit(&pBlkCacheGlobal->ListUsers); + pBlkCacheGlobal->pVM = pVM; + pBlkCacheGlobal->cRefs = 0; + pBlkCacheGlobal->cbCached = 0; + pBlkCacheGlobal->fCommitInProgress = false; + + /* Initialize members */ + pBlkCacheGlobal->LruRecentlyUsedIn.pHead = NULL; + pBlkCacheGlobal->LruRecentlyUsedIn.pTail = NULL; + pBlkCacheGlobal->LruRecentlyUsedIn.cbCached = 0; + + pBlkCacheGlobal->LruRecentlyUsedOut.pHead = NULL; + pBlkCacheGlobal->LruRecentlyUsedOut.pTail = NULL; + pBlkCacheGlobal->LruRecentlyUsedOut.cbCached = 0; + + pBlkCacheGlobal->LruFrequentlyUsed.pHead = NULL; + pBlkCacheGlobal->LruFrequentlyUsed.pTail = NULL; + pBlkCacheGlobal->LruFrequentlyUsed.cbCached = 0; + + do + { + rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheSize", &pBlkCacheGlobal->cbMax, 5 * _1M); + AssertLogRelRCBreak(rc); + LogFlowFunc(("Maximum number of bytes cached %u\n", pBlkCacheGlobal->cbMax)); + + pBlkCacheGlobal->cbRecentlyUsedInMax = (pBlkCacheGlobal->cbMax / 100) * 25; /* 25% of the buffer size */ + pBlkCacheGlobal->cbRecentlyUsedOutMax = (pBlkCacheGlobal->cbMax / 100) * 50; /* 50% of the buffer size */ + LogFlowFunc(("cbRecentlyUsedInMax=%u cbRecentlyUsedOutMax=%u\n", + pBlkCacheGlobal->cbRecentlyUsedInMax, pBlkCacheGlobal->cbRecentlyUsedOutMax)); + + /** @todo r=aeichner: Experiment to find optimal default values */ + rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitIntervalMs", &pBlkCacheGlobal->u32CommitTimeoutMs, 10000 /* 10sec */); + AssertLogRelRCBreak(rc); + rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitThreshold", &pBlkCacheGlobal->cbCommitDirtyThreshold, pBlkCacheGlobal->cbMax / 2); + AssertLogRelRCBreak(rc); + } while (0); + + if (RT_SUCCESS(rc)) + { + STAMR3Register(pVM, &pBlkCacheGlobal->cbMax, + STAMTYPE_U32, STAMVISIBILITY_ALWAYS, + "/PDM/BlkCache/cbMax", + STAMUNIT_BYTES, + "Maximum cache size"); + STAMR3Register(pVM, &pBlkCacheGlobal->cbCached, + STAMTYPE_U32, STAMVISIBILITY_ALWAYS, + "/PDM/BlkCache/cbCached", + STAMUNIT_BYTES, + "Currently used cache"); + STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedIn.cbCached, + STAMTYPE_U32, STAMVISIBILITY_ALWAYS, + "/PDM/BlkCache/cbCachedMruIn", + STAMUNIT_BYTES, + "Number of bytes cached in MRU list"); + STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedOut.cbCached, + STAMTYPE_U32, STAMVISIBILITY_ALWAYS, + "/PDM/BlkCache/cbCachedMruOut", + STAMUNIT_BYTES, + "Number of bytes cached in FRU list"); + STAMR3Register(pVM, &pBlkCacheGlobal->LruFrequentlyUsed.cbCached, + STAMTYPE_U32, STAMVISIBILITY_ALWAYS, + "/PDM/BlkCache/cbCachedFru", + STAMUNIT_BYTES, + "Number of bytes cached in FRU ghost list"); + +#ifdef VBOX_WITH_STATISTICS + STAMR3Register(pVM, &pBlkCacheGlobal->cHits, + STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, + "/PDM/BlkCache/CacheHits", + STAMUNIT_COUNT, "Number of hits in the cache"); + STAMR3Register(pVM, &pBlkCacheGlobal->cPartialHits, + STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, + "/PDM/BlkCache/CachePartialHits", + STAMUNIT_COUNT, "Number of partial hits in the cache"); + STAMR3Register(pVM, &pBlkCacheGlobal->cMisses, + STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, + "/PDM/BlkCache/CacheMisses", + STAMUNIT_COUNT, "Number of misses when accessing the cache"); + STAMR3Register(pVM, &pBlkCacheGlobal->StatRead, + STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, + "/PDM/BlkCache/CacheRead", + STAMUNIT_BYTES, "Number of bytes read from the cache"); + STAMR3Register(pVM, &pBlkCacheGlobal->StatWritten, + STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, + "/PDM/BlkCache/CacheWritten", + STAMUNIT_BYTES, "Number of bytes written to the cache"); + STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeGet, + STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS, + "/PDM/BlkCache/CacheTreeGet", + STAMUNIT_TICKS_PER_CALL, "Time taken to access an entry in the tree"); + STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeInsert, + STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS, + "/PDM/BlkCache/CacheTreeInsert", + STAMUNIT_TICKS_PER_CALL, "Time taken to insert an entry in the tree"); + STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeRemove, + STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS, + "/PDM/BlkCache/CacheTreeRemove", + STAMUNIT_TICKS_PER_CALL, "Time taken to remove an entry an the tree"); + STAMR3Register(pVM, &pBlkCacheGlobal->StatBuffersReused, + STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, + "/PDM/BlkCache/CacheBuffersReused", + STAMUNIT_COUNT, "Number of times a buffer could be reused"); +#endif + + /* Initialize the critical section */ + rc = RTCritSectInit(&pBlkCacheGlobal->CritSect); + } + + if (RT_SUCCESS(rc)) + { + /* Create the commit timer */ + if (pBlkCacheGlobal->u32CommitTimeoutMs > 0) + rc = TMR3TimerCreateInternal(pVM, TMCLOCK_REAL, + pdmBlkCacheCommitTimerCallback, + pBlkCacheGlobal, + "BlkCache-Commit", + &pBlkCacheGlobal->pTimerCommit); + + if (RT_SUCCESS(rc)) + { + /* Register saved state handler. */ + rc = SSMR3RegisterInternal(pVM, "pdmblkcache", 0, PDM_BLK_CACHE_SAVED_STATE_VERSION, pBlkCacheGlobal->cbMax, + NULL, NULL, NULL, + NULL, pdmR3BlkCacheSaveExec, NULL, + NULL, pdmR3BlkCacheLoadExec, NULL); + if (RT_SUCCESS(rc)) + { + LogRel(("BlkCache: Cache successfully initialized. Cache size is %u bytes\n", pBlkCacheGlobal->cbMax)); + LogRel(("BlkCache: Cache commit interval is %u ms\n", pBlkCacheGlobal->u32CommitTimeoutMs)); + LogRel(("BlkCache: Cache commit threshold is %u bytes\n", pBlkCacheGlobal->cbCommitDirtyThreshold)); + pUVM->pdm.s.pBlkCacheGlobal = pBlkCacheGlobal; + return VINF_SUCCESS; + } + } + + RTCritSectDelete(&pBlkCacheGlobal->CritSect); + } + + if (pBlkCacheGlobal) + RTMemFree(pBlkCacheGlobal); + + LogFlowFunc((": returns rc=%Rrc\n", rc)); + return rc; +} + +void pdmR3BlkCacheTerm(PVM pVM) +{ + PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal; + + if (pBlkCacheGlobal) + { + /* Make sure no one else uses the cache now */ + pdmBlkCacheLockEnter(pBlkCacheGlobal); + + /* Cleanup deleting all cache entries waiting for in progress entries to finish. */ + pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedIn); + pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedOut); + pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruFrequentlyUsed); + + pdmBlkCacheLockLeave(pBlkCacheGlobal); + + RTCritSectDelete(&pBlkCacheGlobal->CritSect); + RTMemFree(pBlkCacheGlobal); + pVM->pUVM->pdm.s.pBlkCacheGlobal = NULL; + } +} + +int pdmR3BlkCacheResume(PVM pVM) +{ + PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal; + + LogFlowFunc(("pVM=%#p\n", pVM)); + + if ( pBlkCacheGlobal + && ASMAtomicXchgBool(&pBlkCacheGlobal->fIoErrorVmSuspended, false)) + { + /* The VM was suspended because of an I/O error, commit all dirty entries. */ + pdmBlkCacheCommitDirtyEntries(pBlkCacheGlobal); + } + + return VINF_SUCCESS; +} + +static int pdmR3BlkCacheRetain(PVM pVM, PPPDMBLKCACHE ppBlkCache, const char *pcszId) +{ + int rc = VINF_SUCCESS; + PPDMBLKCACHE pBlkCache = NULL; + PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal; + + if (!pBlkCacheGlobal) + return VERR_NOT_SUPPORTED; + + /* + * Check that no other user cache has the same id first, + * Unique id's are necessary in case the state is saved. + */ + pdmBlkCacheLockEnter(pBlkCacheGlobal); + + pBlkCache = pdmR3BlkCacheFindById(pBlkCacheGlobal, pcszId); + + if (!pBlkCache) + { + pBlkCache = (PPDMBLKCACHE)RTMemAllocZ(sizeof(PDMBLKCACHE)); + + if (pBlkCache) + pBlkCache->pszId = RTStrDup(pcszId); + + if ( pBlkCache + && pBlkCache->pszId) + { + pBlkCache->fSuspended = false; + pBlkCache->cIoXfersActive = 0; + pBlkCache->pCache = pBlkCacheGlobal; + RTListInit(&pBlkCache->ListDirtyNotCommitted); + + rc = RTSpinlockCreate(&pBlkCache->LockList, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "pdmR3BlkCacheRetain"); + if (RT_SUCCESS(rc)) + { + rc = RTSemRWCreate(&pBlkCache->SemRWEntries); + if (RT_SUCCESS(rc)) + { + pBlkCache->pTree = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRFOFFTREE)); + if (pBlkCache->pTree) + { +#ifdef VBOX_WITH_STATISTICS + STAMR3RegisterF(pBlkCacheGlobal->pVM, &pBlkCache->StatWriteDeferred, + STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, + STAMUNIT_COUNT, "Number of deferred writes", + "/PDM/BlkCache/%s/Cache/DeferredWrites", pBlkCache->pszId); +#endif + + /* Add to the list of users. */ + pBlkCacheGlobal->cRefs++; + RTListAppend(&pBlkCacheGlobal->ListUsers, &pBlkCache->NodeCacheUser); + pdmBlkCacheLockLeave(pBlkCacheGlobal); + + *ppBlkCache = pBlkCache; + LogFlowFunc(("returns success\n")); + return VINF_SUCCESS; + } + + rc = VERR_NO_MEMORY; + RTSemRWDestroy(pBlkCache->SemRWEntries); + } + + RTSpinlockDestroy(pBlkCache->LockList); + } + + RTStrFree(pBlkCache->pszId); + } + else + rc = VERR_NO_MEMORY; + + if (pBlkCache) + RTMemFree(pBlkCache); + } + else + rc = VERR_ALREADY_EXISTS; + + pdmBlkCacheLockLeave(pBlkCacheGlobal); + + LogFlowFunc(("Leave rc=%Rrc\n", rc)); + return rc; +} + +VMMR3DECL(int) PDMR3BlkCacheRetainDriver(PVM pVM, PPDMDRVINS pDrvIns, PPPDMBLKCACHE ppBlkCache, + PFNPDMBLKCACHEXFERCOMPLETEDRV pfnXferComplete, + PFNPDMBLKCACHEXFERENQUEUEDRV pfnXferEnqueue, + PFNPDMBLKCACHEXFERENQUEUEDISCARDDRV pfnXferEnqueueDiscard, + const char *pcszId) +{ + int rc = VINF_SUCCESS; + PPDMBLKCACHE pBlkCache; + + rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId); + if (RT_SUCCESS(rc)) + { + pBlkCache->enmType = PDMBLKCACHETYPE_DRV; + pBlkCache->u.Drv.pfnXferComplete = pfnXferComplete; + pBlkCache->u.Drv.pfnXferEnqueue = pfnXferEnqueue; + pBlkCache->u.Drv.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard; + pBlkCache->u.Drv.pDrvIns = pDrvIns; + *ppBlkCache = pBlkCache; + } + + LogFlowFunc(("Leave rc=%Rrc\n", rc)); + return rc; +} + +VMMR3DECL(int) PDMR3BlkCacheRetainDevice(PVM pVM, PPDMDEVINS pDevIns, PPPDMBLKCACHE ppBlkCache, + PFNPDMBLKCACHEXFERCOMPLETEDEV pfnXferComplete, + PFNPDMBLKCACHEXFERENQUEUEDEV pfnXferEnqueue, + PFNPDMBLKCACHEXFERENQUEUEDISCARDDEV pfnXferEnqueueDiscard, + const char *pcszId) +{ + int rc = VINF_SUCCESS; + PPDMBLKCACHE pBlkCache; + + rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId); + if (RT_SUCCESS(rc)) + { + pBlkCache->enmType = PDMBLKCACHETYPE_DEV; + pBlkCache->u.Dev.pfnXferComplete = pfnXferComplete; + pBlkCache->u.Dev.pfnXferEnqueue = pfnXferEnqueue; + pBlkCache->u.Dev.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard; + pBlkCache->u.Dev.pDevIns = pDevIns; + *ppBlkCache = pBlkCache; + } + + LogFlowFunc(("Leave rc=%Rrc\n", rc)); + return rc; + +} + +VMMR3DECL(int) PDMR3BlkCacheRetainUsb(PVM pVM, PPDMUSBINS pUsbIns, PPPDMBLKCACHE ppBlkCache, + PFNPDMBLKCACHEXFERCOMPLETEUSB pfnXferComplete, + PFNPDMBLKCACHEXFERENQUEUEUSB pfnXferEnqueue, + PFNPDMBLKCACHEXFERENQUEUEDISCARDUSB pfnXferEnqueueDiscard, + const char *pcszId) +{ + int rc = VINF_SUCCESS; + PPDMBLKCACHE pBlkCache; + + rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId); + if (RT_SUCCESS(rc)) + { + pBlkCache->enmType = PDMBLKCACHETYPE_USB; + pBlkCache->u.Usb.pfnXferComplete = pfnXferComplete; + pBlkCache->u.Usb.pfnXferEnqueue = pfnXferEnqueue; + pBlkCache->u.Usb.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard; + pBlkCache->u.Usb.pUsbIns = pUsbIns; + *ppBlkCache = pBlkCache; + } + + LogFlowFunc(("Leave rc=%Rrc\n", rc)); + return rc; + +} + +VMMR3DECL(int) PDMR3BlkCacheRetainInt(PVM pVM, void *pvUser, PPPDMBLKCACHE ppBlkCache, + PFNPDMBLKCACHEXFERCOMPLETEINT pfnXferComplete, + PFNPDMBLKCACHEXFERENQUEUEINT pfnXferEnqueue, + PFNPDMBLKCACHEXFERENQUEUEDISCARDINT pfnXferEnqueueDiscard, + const char *pcszId) +{ + int rc = VINF_SUCCESS; + PPDMBLKCACHE pBlkCache; + + rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId); + if (RT_SUCCESS(rc)) + { + pBlkCache->enmType = PDMBLKCACHETYPE_INTERNAL; + pBlkCache->u.Int.pfnXferComplete = pfnXferComplete; + pBlkCache->u.Int.pfnXferEnqueue = pfnXferEnqueue; + pBlkCache->u.Int.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard; + pBlkCache->u.Int.pvUser = pvUser; + *ppBlkCache = pBlkCache; + } + + LogFlowFunc(("Leave rc=%Rrc\n", rc)); + return rc; + +} + +/** + * Callback for the AVL destroy routine. Frees a cache entry for this endpoint. + * + * @returns IPRT status code. + * @param pNode The node to destroy. + * @param pvUser Opaque user data. + */ +static DECLCALLBACK(int) pdmBlkCacheEntryDestroy(PAVLRU64NODECORE pNode, void *pvUser) +{ + PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode; + PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser; + PPDMBLKCACHE pBlkCache = pEntry->pBlkCache; + + while (ASMAtomicReadU32(&pEntry->fFlags) & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS) + { + /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */ + pdmBlkCacheEntryRef(pEntry); + RTSemRWReleaseWrite(pBlkCache->SemRWEntries); + pdmBlkCacheLockLeave(pCache); + + RTThreadSleep(250); + + /* Re-enter all locks */ + pdmBlkCacheLockEnter(pCache); + RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT); + pdmBlkCacheEntryRelease(pEntry); + } + + AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS), + ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags)); + + bool fUpdateCache = pEntry->pList == &pCache->LruFrequentlyUsed + || pEntry->pList == &pCache->LruRecentlyUsedIn; + + pdmBlkCacheEntryRemoveFromList(pEntry); + + if (fUpdateCache) + pdmBlkCacheSub(pCache, pEntry->cbData); + + RTMemPageFree(pEntry->pbData, pEntry->cbData); + RTMemFree(pEntry); + + return VINF_SUCCESS; +} + +/** + * Destroys all cache resources used by the given endpoint. + * + * @returns nothing. + * @param pBlkCache Block cache handle. + */ +VMMR3DECL(void) PDMR3BlkCacheRelease(PPDMBLKCACHE pBlkCache) +{ + PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache; + + /* + * Commit all dirty entries now (they are waited on for completion during the + * destruction of the AVL tree below). + * The exception is if the VM was paused because of an I/O error before. + */ + if (!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended)) + pdmBlkCacheCommit(pBlkCache); + + /* Make sure nobody is accessing the cache while we delete the tree. */ + pdmBlkCacheLockEnter(pCache); + RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT); + RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache); + RTSemRWReleaseWrite(pBlkCache->SemRWEntries); + + RTSpinlockDestroy(pBlkCache->LockList); + + pCache->cRefs--; + RTListNodeRemove(&pBlkCache->NodeCacheUser); + + pdmBlkCacheLockLeave(pCache); + + RTMemFree(pBlkCache->pTree); + pBlkCache->pTree = NULL; + RTSemRWDestroy(pBlkCache->SemRWEntries); + +#ifdef VBOX_WITH_STATISTICS + STAMR3DeregisterF(pCache->pVM->pUVM, "/PDM/BlkCache/%s/Cache/DeferredWrites", pBlkCache->pszId); +#endif + + RTStrFree(pBlkCache->pszId); + RTMemFree(pBlkCache); +} + +VMMR3DECL(void) PDMR3BlkCacheReleaseDevice(PVM pVM, PPDMDEVINS pDevIns) +{ + LogFlow(("%s: pDevIns=%p\n", __FUNCTION__, pDevIns)); + + /* + * Validate input. + */ + if (!pDevIns) + return; + VM_ASSERT_EMT(pVM); + + PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal; + PPDMBLKCACHE pBlkCache, pBlkCacheNext; + + /* Return silently if not supported. */ + if (!pBlkCacheGlobal) + return; + + pdmBlkCacheLockEnter(pBlkCacheGlobal); + + RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser) + { + if ( pBlkCache->enmType == PDMBLKCACHETYPE_DEV + && pBlkCache->u.Dev.pDevIns == pDevIns) + PDMR3BlkCacheRelease(pBlkCache); + } + + pdmBlkCacheLockLeave(pBlkCacheGlobal); +} + +VMMR3DECL(void) PDMR3BlkCacheReleaseDriver(PVM pVM, PPDMDRVINS pDrvIns) +{ + LogFlow(("%s: pDrvIns=%p\n", __FUNCTION__, pDrvIns)); + + /* + * Validate input. + */ + if (!pDrvIns) + return; + VM_ASSERT_EMT(pVM); + + PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal; + PPDMBLKCACHE pBlkCache, pBlkCacheNext; + + /* Return silently if not supported. */ + if (!pBlkCacheGlobal) + return; + + pdmBlkCacheLockEnter(pBlkCacheGlobal); + + RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser) + { + if ( pBlkCache->enmType == PDMBLKCACHETYPE_DRV + && pBlkCache->u.Drv.pDrvIns == pDrvIns) + PDMR3BlkCacheRelease(pBlkCache); + } + + pdmBlkCacheLockLeave(pBlkCacheGlobal); +} + +VMMR3DECL(void) PDMR3BlkCacheReleaseUsb(PVM pVM, PPDMUSBINS pUsbIns) +{ + LogFlow(("%s: pUsbIns=%p\n", __FUNCTION__, pUsbIns)); + + /* + * Validate input. + */ + if (!pUsbIns) + return; + VM_ASSERT_EMT(pVM); + + PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal; + PPDMBLKCACHE pBlkCache, pBlkCacheNext; + + /* Return silently if not supported. */ + if (!pBlkCacheGlobal) + return; + + pdmBlkCacheLockEnter(pBlkCacheGlobal); + + RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser) + { + if ( pBlkCache->enmType == PDMBLKCACHETYPE_USB + && pBlkCache->u.Usb.pUsbIns == pUsbIns) + PDMR3BlkCacheRelease(pBlkCache); + } + + pdmBlkCacheLockLeave(pBlkCacheGlobal); +} + +static PPDMBLKCACHEENTRY pdmBlkCacheGetCacheEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off) +{ + STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeGet, Cache); + + RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT); + PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)RTAvlrU64RangeGet(pBlkCache->pTree, off); + if (pEntry) + pdmBlkCacheEntryRef(pEntry); + RTSemRWReleaseRead(pBlkCache->SemRWEntries); + + STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeGet, Cache); + + return pEntry; +} + +/** + * Return the best fit cache entries for the given offset. + * + * @returns nothing. + * @param pBlkCache The endpoint cache. + * @param off The offset. + * @param ppEntryAbove Where to store the pointer to the best fit entry above + * the given offset. NULL if not required. + */ +static void pdmBlkCacheGetCacheBestFitEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off, PPDMBLKCACHEENTRY *ppEntryAbove) +{ + STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeGet, Cache); + + RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT); + if (ppEntryAbove) + { + *ppEntryAbove = (PPDMBLKCACHEENTRY)RTAvlrU64GetBestFit(pBlkCache->pTree, off, true /*fAbove*/); + if (*ppEntryAbove) + pdmBlkCacheEntryRef(*ppEntryAbove); + } + + RTSemRWReleaseRead(pBlkCache->SemRWEntries); + + STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeGet, Cache); +} + +static void pdmBlkCacheInsertEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry) +{ + STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeInsert, Cache); + RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT); + bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core); + AssertMsg(fInserted, ("Node was not inserted into tree\n")); NOREF(fInserted); + STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeInsert, Cache); + RTSemRWReleaseWrite(pBlkCache->SemRWEntries); +} + +/** + * Allocates and initializes a new entry for the cache. + * The entry has a reference count of 1. + * + * @returns Pointer to the new cache entry or NULL if out of memory. + * @param pBlkCache The cache the entry belongs to. + * @param off Start offset. + * @param cbData Size of the cache entry. + * @param pbBuffer Pointer to the buffer to use. + * NULL if a new buffer should be allocated. + * The buffer needs to have the same size of the entry. + */ +static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cbData, uint8_t *pbBuffer) +{ + AssertReturn(cbData <= UINT32_MAX, NULL); + PPDMBLKCACHEENTRY pEntryNew = (PPDMBLKCACHEENTRY)RTMemAllocZ(sizeof(PDMBLKCACHEENTRY)); + + if (RT_UNLIKELY(!pEntryNew)) + return NULL; + + pEntryNew->Core.Key = off; + pEntryNew->Core.KeyLast = off + cbData - 1; + pEntryNew->pBlkCache = pBlkCache; + pEntryNew->fFlags = 0; + pEntryNew->cRefs = 1; /* We are using it now. */ + pEntryNew->pList = NULL; + pEntryNew->cbData = (uint32_t)cbData; + pEntryNew->pWaitingHead = NULL; + pEntryNew->pWaitingTail = NULL; + if (pbBuffer) + pEntryNew->pbData = pbBuffer; + else + pEntryNew->pbData = (uint8_t *)RTMemPageAlloc(cbData); + + if (RT_UNLIKELY(!pEntryNew->pbData)) + { + RTMemFree(pEntryNew); + return NULL; + } + + return pEntryNew; +} + +/** + * Checks that a set of flags is set/clear acquiring the R/W semaphore + * in exclusive mode. + * + * @returns true if the flag in fSet is set and the one in fClear is clear. + * false otherwise. + * The R/W semaphore is only held if true is returned. + * + * @param pBlkCache The endpoint cache instance data. + * @param pEntry The entry to check the flags for. + * @param fSet The flag which is tested to be set. + * @param fClear The flag which is tested to be clear. + */ +DECLINLINE(bool) pdmBlkCacheEntryFlagIsSetClearAcquireLock(PPDMBLKCACHE pBlkCache, + PPDMBLKCACHEENTRY pEntry, + uint32_t fSet, uint32_t fClear) +{ + uint32_t fFlags = ASMAtomicReadU32(&pEntry->fFlags); + bool fPassed = ((fFlags & fSet) && !(fFlags & fClear)); + + if (fPassed) + { + /* Acquire the lock and check again because the completion callback might have raced us. */ + RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT); + + fFlags = ASMAtomicReadU32(&pEntry->fFlags); + fPassed = ((fFlags & fSet) && !(fFlags & fClear)); + + /* Drop the lock if we didn't passed the test. */ + if (!fPassed) + RTSemRWReleaseWrite(pBlkCache->SemRWEntries); + } + + return fPassed; +} + +/** + * Adds a segment to the waiting list for a cache entry + * which is currently in progress. + * + * @returns nothing. + * @param pEntry The cache entry to add the segment to. + * @param pWaiter The waiter entry to add. + */ +DECLINLINE(void) pdmBlkCacheEntryAddWaiter(PPDMBLKCACHEENTRY pEntry, + PPDMBLKCACHEWAITER pWaiter) +{ + pWaiter->pNext = NULL; + + if (pEntry->pWaitingHead) + { + AssertPtr(pEntry->pWaitingTail); + + pEntry->pWaitingTail->pNext = pWaiter; + pEntry->pWaitingTail = pWaiter; + } + else + { + Assert(!pEntry->pWaitingTail); + + pEntry->pWaitingHead = pWaiter; + pEntry->pWaitingTail = pWaiter; + } +} + +/** + * Add a buffer described by the I/O memory context + * to the entry waiting for completion. + * + * @returns VBox status code. + * @param pEntry The entry to add the buffer to. + * @param pReq The request. + * @param pSgBuf The scatter/gather buffer. Will be advanced by cbData. + * @param offDiff Offset from the start of the buffer in the entry. + * @param cbData Amount of data to wait for onthis entry. + * @param fWrite Flag whether the task waits because it wants to write to + * the cache entry. + */ +static int pdmBlkCacheEntryWaitersAdd(PPDMBLKCACHEENTRY pEntry, PPDMBLKCACHEREQ pReq, + PRTSGBUF pSgBuf, uint64_t offDiff, size_t cbData, bool fWrite) +{ + PPDMBLKCACHEWAITER pWaiter = (PPDMBLKCACHEWAITER)RTMemAllocZ(sizeof(PDMBLKCACHEWAITER)); + if (!pWaiter) + return VERR_NO_MEMORY; + + ASMAtomicIncU32(&pReq->cXfersPending); + pWaiter->pReq = pReq; + pWaiter->offCacheEntry = offDiff; + pWaiter->cbTransfer = cbData; + pWaiter->fWrite = fWrite; + RTSgBufClone(&pWaiter->SgBuf, pSgBuf); + RTSgBufAdvance(pSgBuf, cbData); + + pdmBlkCacheEntryAddWaiter(pEntry, pWaiter); + + return VINF_SUCCESS; +} + +/** + * Calculate aligned offset and size for a new cache entry which do not + * intersect with an already existing entry and the file end. + * + * @returns The number of bytes the entry can hold of the requested amount + * of bytes. + * @param pBlkCache The endpoint cache. + * @param off The start offset. + * @param cb The number of bytes the entry needs to hold at + * least. + * @param pcbEntry Where to store the number of bytes the entry can hold. + * Can be less than given because of other entries. + */ +static uint32_t pdmBlkCacheEntryBoundariesCalc(PPDMBLKCACHE pBlkCache, + uint64_t off, uint32_t cb, + uint32_t *pcbEntry) +{ + /* Get the best fit entries around the offset */ + PPDMBLKCACHEENTRY pEntryAbove = NULL; + pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove); + + /* Log the info */ + LogFlow(("%sest fit entry above off=%llu (BestFit=%llu BestFitEnd=%llu BestFitSize=%u)\n", + pEntryAbove ? "B" : "No b", + off, + pEntryAbove ? pEntryAbove->Core.Key : 0, + pEntryAbove ? pEntryAbove->Core.KeyLast : 0, + pEntryAbove ? pEntryAbove->cbData : 0)); + + uint32_t cbNext; + uint32_t cbInEntry; + if ( pEntryAbove + && off + cb > pEntryAbove->Core.Key) + { + cbInEntry = (uint32_t)(pEntryAbove->Core.Key - off); + cbNext = (uint32_t)(pEntryAbove->Core.Key - off); + } + else + { + cbInEntry = cb; + cbNext = cb; + } + + /* A few sanity checks */ + AssertMsg(!pEntryAbove || off + cbNext <= pEntryAbove->Core.Key, + ("Aligned size intersects with another cache entry\n")); + Assert(cbInEntry <= cbNext); + + if (pEntryAbove) + pdmBlkCacheEntryRelease(pEntryAbove); + + LogFlow(("off=%llu cbNext=%u\n", off, cbNext)); + + *pcbEntry = cbNext; + + return cbInEntry; +} + +/** + * Create a new cache entry evicting data from the cache if required. + * + * @returns Pointer to the new cache entry or NULL + * if not enough bytes could be evicted from the cache. + * @param pBlkCache The endpoint cache. + * @param off The offset. + * @param cb Number of bytes the cache entry should have. + * @param pcbData Where to store the number of bytes the new + * entry can hold. May be lower than actually + * requested due to another entry intersecting the + * access range. + */ +static PPDMBLKCACHEENTRY pdmBlkCacheEntryCreate(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cb, size_t *pcbData) +{ + uint32_t cbEntry = 0; + + *pcbData = pdmBlkCacheEntryBoundariesCalc(pBlkCache, off, (uint32_t)cb, &cbEntry); + AssertReturn(cb <= UINT32_MAX, NULL); + + PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache; + pdmBlkCacheLockEnter(pCache); + + PPDMBLKCACHEENTRY pEntryNew = NULL; + uint8_t *pbBuffer = NULL; + bool fEnough = pdmBlkCacheReclaim(pCache, cbEntry, true, &pbBuffer); + if (fEnough) + { + LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbEntry)); + + pEntryNew = pdmBlkCacheEntryAlloc(pBlkCache, off, cbEntry, pbBuffer); + if (RT_LIKELY(pEntryNew)) + { + pdmBlkCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew); + pdmBlkCacheAdd(pCache, cbEntry); + pdmBlkCacheLockLeave(pCache); + + pdmBlkCacheInsertEntry(pBlkCache, pEntryNew); + + AssertMsg( (off >= pEntryNew->Core.Key) + && (off + *pcbData <= pEntryNew->Core.KeyLast + 1), + ("Overflow in calculation off=%llu\n", off)); + } + else + pdmBlkCacheLockLeave(pCache); + } + else + pdmBlkCacheLockLeave(pCache); + + return pEntryNew; +} + +static PPDMBLKCACHEREQ pdmBlkCacheReqAlloc(void *pvUser) +{ + PPDMBLKCACHEREQ pReq = (PPDMBLKCACHEREQ)RTMemAlloc(sizeof(PDMBLKCACHEREQ)); + + if (RT_LIKELY(pReq)) + { + pReq->pvUser = pvUser; + pReq->rcReq = VINF_SUCCESS; + pReq->cXfersPending = 0; + } + + return pReq; +} + +static void pdmBlkCacheReqComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq) +{ + switch (pBlkCache->enmType) + { + case PDMBLKCACHETYPE_DEV: + { + pBlkCache->u.Dev.pfnXferComplete(pBlkCache->u.Dev.pDevIns, + pReq->pvUser, pReq->rcReq); + break; + } + case PDMBLKCACHETYPE_DRV: + { + pBlkCache->u.Drv.pfnXferComplete(pBlkCache->u.Drv.pDrvIns, + pReq->pvUser, pReq->rcReq); + break; + } + case PDMBLKCACHETYPE_USB: + { + pBlkCache->u.Usb.pfnXferComplete(pBlkCache->u.Usb.pUsbIns, + pReq->pvUser, pReq->rcReq); + break; + } + case PDMBLKCACHETYPE_INTERNAL: + { + pBlkCache->u.Int.pfnXferComplete(pBlkCache->u.Int.pvUser, + pReq->pvUser, pReq->rcReq); + break; + } + default: + AssertMsgFailed(("Unknown block cache type!\n")); + } + + RTMemFree(pReq); +} + +static bool pdmBlkCacheReqUpdate(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq, + int rcReq, bool fCallHandler) +{ + if (RT_FAILURE(rcReq)) + ASMAtomicCmpXchgS32(&pReq->rcReq, rcReq, VINF_SUCCESS); + + AssertMsg(pReq->cXfersPending > 0, ("No transfers are pending for this request\n")); + uint32_t cXfersPending = ASMAtomicDecU32(&pReq->cXfersPending); + + if (!cXfersPending) + { + if (fCallHandler) + pdmBlkCacheReqComplete(pBlkCache, pReq); + return true; + } + + LogFlowFunc(("pReq=%#p cXfersPending=%u\n", pReq, cXfersPending)); + return false; +} + +VMMR3DECL(int) PDMR3BlkCacheRead(PPDMBLKCACHE pBlkCache, uint64_t off, + PCRTSGBUF pSgBuf, size_t cbRead, void *pvUser) +{ + int rc = VINF_SUCCESS; + PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache; + PPDMBLKCACHEENTRY pEntry; + PPDMBLKCACHEREQ pReq; + + LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pSgBuf=%#p cbRead=%u pvUser=%#p\n", + pBlkCache, pBlkCache->pszId, off, pSgBuf, cbRead, pvUser)); + + AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER); + AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE); + + RTSGBUF SgBuf; + RTSgBufClone(&SgBuf, pSgBuf); + + /* Allocate new request structure. */ + pReq = pdmBlkCacheReqAlloc(pvUser); + if (RT_UNLIKELY(!pReq)) + return VERR_NO_MEMORY; + + /* Increment data transfer counter to keep the request valid while we access it. */ + ASMAtomicIncU32(&pReq->cXfersPending); + + while (cbRead) + { + size_t cbToRead; + + pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off); + + /* + * If there is no entry we try to create a new one eviciting unused pages + * if the cache is full. If this is not possible we will pass the request through + * and skip the caching (all entries may be still in progress so they can't + * be evicted) + * If we have an entry it can be in one of the LRU lists where the entry + * contains data (recently used or frequently used LRU) so we can just read + * the data we need and put the entry at the head of the frequently used LRU list. + * In case the entry is in one of the ghost lists it doesn't contain any data. + * We have to fetch it again evicting pages from either T1 or T2 to make room. + */ + if (pEntry) + { + uint64_t offDiff = off - pEntry->Core.Key; + + AssertMsg(off >= pEntry->Core.Key, + ("Overflow in calculation off=%llu OffsetAligned=%llu\n", + off, pEntry->Core.Key)); + + AssertPtr(pEntry->pList); + + cbToRead = RT_MIN(pEntry->cbData - offDiff, cbRead); + + AssertMsg(off + cbToRead <= pEntry->Core.Key + pEntry->Core.KeyLast + 1, + ("Buffer of cache entry exceeded off=%llu cbToRead=%d\n", + off, cbToRead)); + + cbRead -= cbToRead; + + if (!cbRead) + STAM_COUNTER_INC(&pCache->cHits); + else + STAM_COUNTER_INC(&pCache->cPartialHits); + + STAM_COUNTER_ADD(&pCache->StatRead, cbToRead); + + /* Ghost lists contain no data. */ + if ( (pEntry->pList == &pCache->LruRecentlyUsedIn) + || (pEntry->pList == &pCache->LruFrequentlyUsed)) + { + if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry, + PDMBLKCACHE_ENTRY_IO_IN_PROGRESS, + PDMBLKCACHE_ENTRY_IS_DIRTY)) + { + /* Entry didn't completed yet. Append to the list */ + pdmBlkCacheEntryWaitersAdd(pEntry, pReq, + &SgBuf, offDiff, cbToRead, + false /* fWrite */); + RTSemRWReleaseWrite(pBlkCache->SemRWEntries); + } + else + { + /* Read as much as we can from the entry. */ + RTSgBufCopyFromBuf(&SgBuf, pEntry->pbData + offDiff, cbToRead); + } + + /* Move this entry to the top position */ + if (pEntry->pList == &pCache->LruFrequentlyUsed) + { + pdmBlkCacheLockEnter(pCache); + pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry); + pdmBlkCacheLockLeave(pCache); + } + /* Release the entry */ + pdmBlkCacheEntryRelease(pEntry); + } + else + { + uint8_t *pbBuffer = NULL; + + LogFlow(("Fetching data for ghost entry %#p from file\n", pEntry)); + + pdmBlkCacheLockEnter(pCache); + pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */ + bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer); + + /* Move the entry to Am and fetch it to the cache. */ + if (fEnough) + { + pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry); + pdmBlkCacheAdd(pCache, pEntry->cbData); + pdmBlkCacheLockLeave(pCache); + + if (pbBuffer) + pEntry->pbData = pbBuffer; + else + pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData); + AssertPtr(pEntry->pbData); + + pdmBlkCacheEntryWaitersAdd(pEntry, pReq, + &SgBuf, offDiff, cbToRead, + false /* fWrite */); + pdmBlkCacheEntryReadFromMedium(pEntry); + /* Release the entry */ + pdmBlkCacheEntryRelease(pEntry); + } + else + { + RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT); + STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache); + RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key); + STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache); + RTSemRWReleaseWrite(pBlkCache->SemRWEntries); + + pdmBlkCacheLockLeave(pCache); + + RTMemFree(pEntry); + + pdmBlkCacheRequestPassthrough(pBlkCache, pReq, + &SgBuf, off, cbToRead, + PDMBLKCACHEXFERDIR_READ); + } + } + } + else + { +#ifdef VBOX_WITH_IO_READ_CACHE + /* No entry found for this offset. Create a new entry and fetch the data to the cache. */ + PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache, + off, cbRead, + &cbToRead); + + cbRead -= cbToRead; + + if (pEntryNew) + { + if (!cbRead) + STAM_COUNTER_INC(&pCache->cMisses); + else + STAM_COUNTER_INC(&pCache->cPartialHits); + + pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq, + &SgBuf, + off - pEntryNew->Core.Key, + cbToRead, + false /* fWrite */); + pdmBlkCacheEntryReadFromMedium(pEntryNew); + pdmBlkCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */ + } + else + { + /* + * There is not enough free space in the cache. + * Pass the request directly to the I/O manager. + */ + LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToRead)); + + pdmBlkCacheRequestPassthrough(pBlkCache, pReq, + &SgBuf, off, cbToRead, + PDMBLKCACHEXFERDIR_READ); + } +#else + /* Clip read size if necessary. */ + PPDMBLKCACHEENTRY pEntryAbove; + pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove); + + if (pEntryAbove) + { + if (off + cbRead > pEntryAbove->Core.Key) + cbToRead = pEntryAbove->Core.Key - off; + else + cbToRead = cbRead; + + pdmBlkCacheEntryRelease(pEntryAbove); + } + else + cbToRead = cbRead; + + cbRead -= cbToRead; + pdmBlkCacheRequestPassthrough(pBlkCache, pReq, + &SgBuf, off, cbToRead, + PDMBLKCACHEXFERDIR_READ); +#endif + } + off += cbToRead; + } + + if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false)) + rc = VINF_AIO_TASK_PENDING; + else + { + rc = pReq->rcReq; + RTMemFree(pReq); + } + + LogFlowFunc((": Leave rc=%Rrc\n", rc)); + + return rc; +} + +VMMR3DECL(int) PDMR3BlkCacheWrite(PPDMBLKCACHE pBlkCache, uint64_t off, PCRTSGBUF pSgBuf, size_t cbWrite, void *pvUser) +{ + int rc = VINF_SUCCESS; + PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache; + PPDMBLKCACHEENTRY pEntry; + PPDMBLKCACHEREQ pReq; + + LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pSgBuf=%#p cbWrite=%u pvUser=%#p\n", + pBlkCache, pBlkCache->pszId, off, pSgBuf, cbWrite, pvUser)); + + AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER); + AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE); + + RTSGBUF SgBuf; + RTSgBufClone(&SgBuf, pSgBuf); + + /* Allocate new request structure. */ + pReq = pdmBlkCacheReqAlloc(pvUser); + if (RT_UNLIKELY(!pReq)) + return VERR_NO_MEMORY; + + /* Increment data transfer counter to keep the request valid while we access it. */ + ASMAtomicIncU32(&pReq->cXfersPending); + + while (cbWrite) + { + size_t cbToWrite; + + pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off); + if (pEntry) + { + /* Write the data into the entry and mark it as dirty */ + AssertPtr(pEntry->pList); + + uint64_t offDiff = off - pEntry->Core.Key; + AssertMsg(off >= pEntry->Core.Key, ("Overflow in calculation off=%llu OffsetAligned=%llu\n", off, pEntry->Core.Key)); + + cbToWrite = RT_MIN(pEntry->cbData - offDiff, cbWrite); + cbWrite -= cbToWrite; + + if (!cbWrite) + STAM_COUNTER_INC(&pCache->cHits); + else + STAM_COUNTER_INC(&pCache->cPartialHits); + + STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite); + + /* Ghost lists contain no data. */ + if ( (pEntry->pList == &pCache->LruRecentlyUsedIn) + || (pEntry->pList == &pCache->LruFrequentlyUsed)) + { + /* Check if the entry is dirty. */ + if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry, + PDMBLKCACHE_ENTRY_IS_DIRTY, + 0)) + { + /* If it is already dirty but not in progress just update the data. */ + if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS)) + RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite); + else + { + /* The data isn't written to the file yet */ + pdmBlkCacheEntryWaitersAdd(pEntry, pReq, + &SgBuf, offDiff, cbToWrite, + true /* fWrite */); + STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred); + } + + RTSemRWReleaseWrite(pBlkCache->SemRWEntries); + } + else /* Dirty bit not set */ + { + /* + * Check if a read is in progress for this entry. + * We have to defer processing in that case. + */ + if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry, + PDMBLKCACHE_ENTRY_IO_IN_PROGRESS, + 0)) + { + pdmBlkCacheEntryWaitersAdd(pEntry, pReq, + &SgBuf, offDiff, cbToWrite, + true /* fWrite */); + STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred); + RTSemRWReleaseWrite(pBlkCache->SemRWEntries); + } + else /* I/O in progress flag not set */ + { + /* Write as much as we can into the entry and update the file. */ + RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite); + + bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry); + if (fCommit) + pdmBlkCacheCommitDirtyEntries(pCache); + } + } /* Dirty bit not set */ + + /* Move this entry to the top position */ + if (pEntry->pList == &pCache->LruFrequentlyUsed) + { + pdmBlkCacheLockEnter(pCache); + pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry); + pdmBlkCacheLockLeave(pCache); + } + + pdmBlkCacheEntryRelease(pEntry); + } + else /* Entry is on the ghost list */ + { + uint8_t *pbBuffer = NULL; + + pdmBlkCacheLockEnter(pCache); + pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */ + bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer); + + if (fEnough) + { + /* Move the entry to Am and fetch it to the cache. */ + pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry); + pdmBlkCacheAdd(pCache, pEntry->cbData); + pdmBlkCacheLockLeave(pCache); + + if (pbBuffer) + pEntry->pbData = pbBuffer; + else + pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData); + AssertPtr(pEntry->pbData); + + pdmBlkCacheEntryWaitersAdd(pEntry, pReq, + &SgBuf, offDiff, cbToWrite, + true /* fWrite */); + STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred); + pdmBlkCacheEntryReadFromMedium(pEntry); + + /* Release the reference. If it is still needed the I/O in progress flag should protect it now. */ + pdmBlkCacheEntryRelease(pEntry); + } + else + { + RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT); + STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache); + RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key); + STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache); + RTSemRWReleaseWrite(pBlkCache->SemRWEntries); + + pdmBlkCacheLockLeave(pCache); + + RTMemFree(pEntry); + pdmBlkCacheRequestPassthrough(pBlkCache, pReq, + &SgBuf, off, cbToWrite, + PDMBLKCACHEXFERDIR_WRITE); + } + } + } + else /* No entry found */ + { + /* + * No entry found. Try to create a new cache entry to store the data in and if that fails + * write directly to the file. + */ + PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache, + off, cbWrite, + &cbToWrite); + + cbWrite -= cbToWrite; + + if (pEntryNew) + { + uint64_t offDiff = off - pEntryNew->Core.Key; + + STAM_COUNTER_INC(&pCache->cHits); + + /* + * Check if it is possible to just write the data without waiting + * for it to get fetched first. + */ + if (!offDiff && pEntryNew->cbData == cbToWrite) + { + RTSgBufCopyToBuf(&SgBuf, pEntryNew->pbData, cbToWrite); + + bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntryNew); + if (fCommit) + pdmBlkCacheCommitDirtyEntries(pCache); + STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite); + } + else + { + /* Defer the write and fetch the data from the endpoint. */ + pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq, + &SgBuf, offDiff, cbToWrite, + true /* fWrite */); + STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred); + pdmBlkCacheEntryReadFromMedium(pEntryNew); + } + + pdmBlkCacheEntryRelease(pEntryNew); + } + else + { + /* + * There is not enough free space in the cache. + * Pass the request directly to the I/O manager. + */ + LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToWrite)); + + STAM_COUNTER_INC(&pCache->cMisses); + + pdmBlkCacheRequestPassthrough(pBlkCache, pReq, + &SgBuf, off, cbToWrite, + PDMBLKCACHEXFERDIR_WRITE); + } + } + + off += cbToWrite; + } + + if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false)) + rc = VINF_AIO_TASK_PENDING; + else + { + rc = pReq->rcReq; + RTMemFree(pReq); + } + + LogFlowFunc((": Leave rc=%Rrc\n", rc)); + + return rc; +} + +VMMR3DECL(int) PDMR3BlkCacheFlush(PPDMBLKCACHE pBlkCache, void *pvUser) +{ + int rc = VINF_SUCCESS; + PPDMBLKCACHEREQ pReq; + + LogFlowFunc((": pBlkCache=%#p{%s}\n", pBlkCache, pBlkCache->pszId)); + + AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER); + AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE); + + /* Commit dirty entries in the cache. */ + pdmBlkCacheCommit(pBlkCache); + + /* Allocate new request structure. */ + pReq = pdmBlkCacheReqAlloc(pvUser); + if (RT_UNLIKELY(!pReq)) + return VERR_NO_MEMORY; + + rc = pdmBlkCacheRequestPassthrough(pBlkCache, pReq, NULL, 0, 0, + PDMBLKCACHEXFERDIR_FLUSH); + AssertRC(rc); + + LogFlowFunc((": Leave rc=%Rrc\n", rc)); + return VINF_AIO_TASK_PENDING; +} + +VMMR3DECL(int) PDMR3BlkCacheDiscard(PPDMBLKCACHE pBlkCache, PCRTRANGE paRanges, + unsigned cRanges, void *pvUser) +{ + int rc = VINF_SUCCESS; + PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache; + PPDMBLKCACHEENTRY pEntry; + PPDMBLKCACHEREQ pReq; + + LogFlowFunc((": pBlkCache=%#p{%s} paRanges=%#p cRanges=%u pvUser=%#p\n", + pBlkCache, pBlkCache->pszId, paRanges, cRanges, pvUser)); + + AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER); + AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE); + + /* Allocate new request structure. */ + pReq = pdmBlkCacheReqAlloc(pvUser); + if (RT_UNLIKELY(!pReq)) + return VERR_NO_MEMORY; + + /* Increment data transfer counter to keep the request valid while we access it. */ + ASMAtomicIncU32(&pReq->cXfersPending); + + for (unsigned i = 0; i < cRanges; i++) + { + uint64_t offCur = paRanges[i].offStart; + size_t cbLeft = paRanges[i].cbRange; + + while (cbLeft) + { + size_t cbThisDiscard = 0; + + pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, offCur); + + if (pEntry) + { + /* Write the data into the entry and mark it as dirty */ + AssertPtr(pEntry->pList); + + uint64_t offDiff = offCur - pEntry->Core.Key; + + AssertMsg(offCur >= pEntry->Core.Key, + ("Overflow in calculation offCur=%llu OffsetAligned=%llu\n", + offCur, pEntry->Core.Key)); + + cbThisDiscard = RT_MIN(pEntry->cbData - offDiff, cbLeft); + + /* Ghost lists contain no data. */ + if ( (pEntry->pList == &pCache->LruRecentlyUsedIn) + || (pEntry->pList == &pCache->LruFrequentlyUsed)) + { + /* Check if the entry is dirty. */ + if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry, + PDMBLKCACHE_ENTRY_IS_DIRTY, + 0)) + { + /* If it is dirty but not yet in progress remove it. */ + if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS)) + { + pdmBlkCacheLockEnter(pCache); + pdmBlkCacheEntryRemoveFromList(pEntry); + + STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache); + RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key); + STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache); + + pdmBlkCacheLockLeave(pCache); + + RTMemFree(pEntry); + } + else + { +#if 0 + /* The data isn't written to the file yet */ + pdmBlkCacheEntryWaitersAdd(pEntry, pReq, + &SgBuf, offDiff, cbToWrite, + true /* fWrite */); + STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred); +#endif + } + + RTSemRWReleaseWrite(pBlkCache->SemRWEntries); + pdmBlkCacheEntryRelease(pEntry); + } + else /* Dirty bit not set */ + { + /* + * Check if a read is in progress for this entry. + * We have to defer processing in that case. + */ + if(pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry, + PDMBLKCACHE_ENTRY_IO_IN_PROGRESS, + 0)) + { +#if 0 + pdmBlkCacheEntryWaitersAdd(pEntry, pReq, + &SgBuf, offDiff, cbToWrite, + true /* fWrite */); +#endif + STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred); + RTSemRWReleaseWrite(pBlkCache->SemRWEntries); + pdmBlkCacheEntryRelease(pEntry); + } + else /* I/O in progress flag not set */ + { + pdmBlkCacheLockEnter(pCache); + pdmBlkCacheEntryRemoveFromList(pEntry); + + RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT); + STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache); + RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key); + STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache); + RTSemRWReleaseWrite(pBlkCache->SemRWEntries); + + pdmBlkCacheLockLeave(pCache); + + RTMemFree(pEntry); + } + } /* Dirty bit not set */ + } + else /* Entry is on the ghost list just remove cache entry. */ + { + pdmBlkCacheLockEnter(pCache); + pdmBlkCacheEntryRemoveFromList(pEntry); + + RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT); + STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache); + RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key); + STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache); + RTSemRWReleaseWrite(pBlkCache->SemRWEntries); + + pdmBlkCacheLockLeave(pCache); + + RTMemFree(pEntry); + } + } + /* else: no entry found. */ + + offCur += cbThisDiscard; + cbLeft -= cbThisDiscard; + } + } + + if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false)) + rc = VINF_AIO_TASK_PENDING; + else + { + rc = pReq->rcReq; + RTMemFree(pReq); + } + + LogFlowFunc((": Leave rc=%Rrc\n", rc)); + + return rc; +} + +/** + * Completes a task segment freeing all resources and completes the task handle + * if everything was transferred. + * + * @returns Next task segment handle. + * @param pBlkCache The endpoint block cache. + * @param pWaiter Task segment to complete. + * @param rc Status code to set. + */ +static PPDMBLKCACHEWAITER pdmBlkCacheWaiterComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEWAITER pWaiter, int rc) +{ + PPDMBLKCACHEWAITER pNext = pWaiter->pNext; + PPDMBLKCACHEREQ pReq = pWaiter->pReq; + + pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, true); + + RTMemFree(pWaiter); + + return pNext; +} + +static void pdmBlkCacheIoXferCompleteEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer) +{ + PPDMBLKCACHEENTRY pEntry = hIoXfer->pEntry; + PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache; + + /* Reference the entry now as we are clearing the I/O in progress flag + * which protected the entry till now. */ + pdmBlkCacheEntryRef(pEntry); + + RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT); + pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IO_IN_PROGRESS; + + /* Process waiting segment list. The data in entry might have changed in-between. */ + bool fDirty = false; + PPDMBLKCACHEWAITER pComplete = pEntry->pWaitingHead; + PPDMBLKCACHEWAITER pCurr = pComplete; + + AssertMsg((pCurr && pEntry->pWaitingTail) || (!pCurr && !pEntry->pWaitingTail), + ("The list tail was not updated correctly\n")); + pEntry->pWaitingTail = NULL; + pEntry->pWaitingHead = NULL; + + if (hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_WRITE) + { + /* + * An error here is difficult to handle as the original request completed already. + * The error is logged for now and the VM is paused. + * If the user continues the entry is written again in the hope + * the user fixed the problem and the next write succeeds. + */ + if (RT_FAILURE(rcIoXfer)) + { + LogRel(("I/O cache: Error while writing entry at offset %llu (%u bytes) to medium \"%s\" (rc=%Rrc)\n", + pEntry->Core.Key, pEntry->cbData, pBlkCache->pszId, rcIoXfer)); + + if (!ASMAtomicXchgBool(&pCache->fIoErrorVmSuspended, true)) + { + int rc = VMSetRuntimeError(pCache->pVM, VMSETRTERR_FLAGS_SUSPEND | VMSETRTERR_FLAGS_NO_WAIT, "BLKCACHE_IOERR", + N_("The I/O cache encountered an error while updating data in medium \"%s\" (rc=%Rrc). " + "Make sure there is enough free space on the disk and that the disk is working properly. " + "Operation can be resumed afterwards"), + pBlkCache->pszId, rcIoXfer); + AssertRC(rc); + } + + /* Mark the entry as dirty again to get it added to the list later on. */ + fDirty = true; + } + + pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IS_DIRTY; + + while (pCurr) + { + AssertMsg(pCurr->fWrite, ("Completed write entries should never have read tasks attached\n")); + + RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer); + fDirty = true; + pCurr = pCurr->pNext; + } + } + else + { + AssertMsg(hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_READ, ("Invalid transfer type\n")); + AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY), + ("Invalid flags set\n")); + + while (pCurr) + { + if (pCurr->fWrite) + { + RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer); + fDirty = true; + } + else + RTSgBufCopyFromBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer); + + pCurr = pCurr->pNext; + } + } + + bool fCommit = false; + if (fDirty) + fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry); + + RTSemRWReleaseWrite(pBlkCache->SemRWEntries); + + /* Dereference so that it isn't protected anymore except we issued anyother write for it. */ + pdmBlkCacheEntryRelease(pEntry); + + if (fCommit) + pdmBlkCacheCommitDirtyEntries(pCache); + + /* Complete waiters now. */ + while (pComplete) + pComplete = pdmBlkCacheWaiterComplete(pBlkCache, pComplete, rcIoXfer); +} + +VMMR3DECL(void) PDMR3BlkCacheIoXferComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer) +{ + LogFlowFunc(("pBlkCache=%#p hIoXfer=%#p rcIoXfer=%Rrc\n", pBlkCache, hIoXfer, rcIoXfer)); + + if (hIoXfer->fIoCache) + pdmBlkCacheIoXferCompleteEntry(pBlkCache, hIoXfer, rcIoXfer); + else + pdmBlkCacheReqUpdate(pBlkCache, hIoXfer->pReq, rcIoXfer, true); + + ASMAtomicDecU32(&pBlkCache->cIoXfersActive); + pdmBlkCacheR3TraceMsgF(pBlkCache, "BlkCache: I/O req %#p (%RTbool) completed (%u now active)", + hIoXfer, hIoXfer->fIoCache, pBlkCache->cIoXfersActive); + RTMemFree(hIoXfer); +} + +/** + * Callback for the AVL do with all routine. Waits for a cachen entry to finish any pending I/O. + * + * @returns IPRT status code. + * @param pNode The node to destroy. + * @param pvUser Opaque user data. + */ +static DECLCALLBACK(int) pdmBlkCacheEntryQuiesce(PAVLRU64NODECORE pNode, void *pvUser) +{ + PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode; + PPDMBLKCACHE pBlkCache = pEntry->pBlkCache; + NOREF(pvUser); + + while (ASMAtomicReadU32(&pEntry->fFlags) & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS) + { + /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */ + pdmBlkCacheEntryRef(pEntry); + RTSemRWReleaseWrite(pBlkCache->SemRWEntries); + + RTThreadSleep(1); + + /* Re-enter all locks and drop the reference. */ + RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT); + pdmBlkCacheEntryRelease(pEntry); + } + + AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS), + ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags)); + + return VINF_SUCCESS; +} + +VMMR3DECL(int) PDMR3BlkCacheSuspend(PPDMBLKCACHE pBlkCache) +{ + int rc = VINF_SUCCESS; + LogFlowFunc(("pBlkCache=%#p\n", pBlkCache)); + + AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER); + + if (!ASMAtomicReadBool(&pBlkCache->pCache->fIoErrorVmSuspended)) + pdmBlkCacheCommit(pBlkCache); /* Can issue new I/O requests. */ + ASMAtomicXchgBool(&pBlkCache->fSuspended, true); + + /* Wait for all I/O to complete. */ + RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT); + rc = RTAvlrU64DoWithAll(pBlkCache->pTree, true, pdmBlkCacheEntryQuiesce, NULL); + AssertRC(rc); + RTSemRWReleaseWrite(pBlkCache->SemRWEntries); + + return rc; +} + +VMMR3DECL(int) PDMR3BlkCacheResume(PPDMBLKCACHE pBlkCache) +{ + LogFlowFunc(("pBlkCache=%#p\n", pBlkCache)); + + AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER); + + ASMAtomicXchgBool(&pBlkCache->fSuspended, false); + + return VINF_SUCCESS; +} + +VMMR3DECL(int) PDMR3BlkCacheClear(PPDMBLKCACHE pBlkCache) +{ + int rc = VINF_SUCCESS; + PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache; + + /* + * Commit all dirty entries now (they are waited on for completion during the + * destruction of the AVL tree below). + * The exception is if the VM was paused because of an I/O error before. + */ + if (!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended)) + pdmBlkCacheCommit(pBlkCache); + + /* Make sure nobody is accessing the cache while we delete the tree. */ + pdmBlkCacheLockEnter(pCache); + RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT); + RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache); + RTSemRWReleaseWrite(pBlkCache->SemRWEntries); + + pdmBlkCacheLockLeave(pCache); + return rc; +} + diff --git a/src/VBox/VMM/VMMR3/PDMCritSect.cpp b/src/VBox/VMM/VMMR3/PDMCritSect.cpp new file mode 100644 index 00000000..9bd369d9 --- /dev/null +++ b/src/VBox/VMM/VMMR3/PDMCritSect.cpp @@ -0,0 +1,1078 @@ +/* $Id: PDMCritSect.cpp $ */ +/** @file + * PDM - Critical Sections, Ring-3. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PDM//_CRITSECT +#include "PDMInternal.h" +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static int pdmR3CritSectDeleteOne(PVM pVM, PUVM pUVM, PPDMCRITSECTINT pCritSect, PPDMCRITSECTINT pPrev, bool fFinal); +static int pdmR3CritSectRwDeleteOne(PVM pVM, PUVM pUVM, PPDMCRITSECTRWINT pCritSect, PPDMCRITSECTRWINT pPrev, bool fFinal); + + + +/** + * Register statistics related to the critical sections. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +int pdmR3CritSectBothInitStats(PVM pVM) +{ + RT_NOREF_PV(pVM); + STAM_REG(pVM, &pVM->pdm.s.StatQueuedCritSectLeaves, STAMTYPE_COUNTER, "/PDM/QueuedCritSectLeaves", STAMUNIT_OCCURENCES, + "Number of times a critical section leave request needed to be queued for ring-3 execution."); + return VINF_SUCCESS; +} + + +/** + * Relocates all the critical sections. + * + * @param pVM The cross context VM structure. + */ +void pdmR3CritSectBothRelocate(PVM pVM) +{ + PUVM pUVM = pVM->pUVM; + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + + for (PPDMCRITSECTINT pCur = pUVM->pdm.s.pCritSects; + pCur; + pCur = pCur->pNext) + pCur->pVMRC = pVM->pVMRC; + + for (PPDMCRITSECTRWINT pCur = pUVM->pdm.s.pRwCritSects; + pCur; + pCur = pCur->pNext) + pCur->pVMRC = pVM->pVMRC; + + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); +} + + +/** + * Deletes all remaining critical sections. + * + * This is called at the very end of the termination process. It is also called + * at the end of vmR3CreateU failure cleanup, which may cause it to be called + * twice depending on where vmR3CreateU actually failed. We have to do the + * latter call because other components expect the critical sections to be + * automatically deleted. + * + * @returns VBox status code. + * First error code, rest is lost. + * @param pVM The cross context VM structure. + * @remark Don't confuse this with PDMR3CritSectDelete. + */ +VMMR3_INT_DECL(int) PDMR3CritSectBothTerm(PVM pVM) +{ + PUVM pUVM = pVM->pUVM; + int rc = VINF_SUCCESS; + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + + while (pUVM->pdm.s.pCritSects) + { + int rc2 = pdmR3CritSectDeleteOne(pVM, pUVM, pUVM->pdm.s.pCritSects, NULL, true /* final */); + AssertRC(rc2); + if (RT_FAILURE(rc2) && RT_SUCCESS(rc)) + rc = rc2; + } + + while (pUVM->pdm.s.pRwCritSects) + { + int rc2 = pdmR3CritSectRwDeleteOne(pVM, pUVM, pUVM->pdm.s.pRwCritSects, NULL, true /* final */); + AssertRC(rc2); + if (RT_FAILURE(rc2) && RT_SUCCESS(rc)) + rc = rc2; + } + + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + return rc; +} + + +/** + * Initializes a critical section and inserts it into the list. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pCritSect The critical section. + * @param pvKey The owner key. + * @param SRC_POS The source position. + * @param pszNameFmt Format string for naming the critical section. For + * statistics and lock validation. + * @param va Arguments for the format string. + */ +static int pdmR3CritSectInitOne(PVM pVM, PPDMCRITSECTINT pCritSect, void *pvKey, RT_SRC_POS_DECL, + const char *pszNameFmt, va_list va) +{ + VM_ASSERT_EMT(pVM); + Assert(pCritSect->Core.u32Magic != RTCRITSECT_MAGIC); + + /* + * Allocate the semaphore. + */ + AssertCompile(sizeof(SUPSEMEVENT) == sizeof(pCritSect->Core.EventSem)); + int rc = SUPSemEventCreate(pVM->pSession, (PSUPSEMEVENT)&pCritSect->Core.EventSem); + if (RT_SUCCESS(rc)) + { + /* Only format the name once. */ + char *pszName = RTStrAPrintf2V(pszNameFmt, va); /** @todo plug the "leak"... */ + if (pszName) + { + RT_SRC_POS_NOREF(); +#ifndef PDMCRITSECT_STRICT + pCritSect->Core.pValidatorRec = NULL; +#else + rc = RTLockValidatorRecExclCreate(&pCritSect->Core.pValidatorRec, +# ifdef RT_LOCK_STRICT_ORDER + RTLockValidatorClassForSrcPos(RT_SRC_POS_ARGS, "%s", pszName), +# else + NIL_RTLOCKVALCLASS, +# endif + RTLOCKVAL_SUB_CLASS_NONE, + pCritSect, true, "%s", pszName); +#endif + if (RT_SUCCESS(rc)) + { + /* + * Initialize the structure (first bit is c&p from RTCritSectInitEx). + */ + pCritSect->Core.u32Magic = RTCRITSECT_MAGIC; + pCritSect->Core.fFlags = 0; + pCritSect->Core.cNestings = 0; + pCritSect->Core.cLockers = -1; + pCritSect->Core.NativeThreadOwner = NIL_RTNATIVETHREAD; + pCritSect->pVMR3 = pVM; + pCritSect->pVMR0 = pVM->pVMR0; + pCritSect->pVMRC = pVM->pVMRC; + pCritSect->pvKey = pvKey; + pCritSect->fAutomaticDefaultCritsect = false; + pCritSect->fUsedByTimerOrSimilar = false; + pCritSect->hEventToSignal = NIL_SUPSEMEVENT; + pCritSect->pszName = pszName; + + STAMR3RegisterF(pVM, &pCritSect->StatContentionRZLock, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, NULL, "/PDM/CritSects/%s/ContentionRZLock", pCritSect->pszName); + STAMR3RegisterF(pVM, &pCritSect->StatContentionRZUnlock,STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, NULL, "/PDM/CritSects/%s/ContentionRZUnlock", pCritSect->pszName); + STAMR3RegisterF(pVM, &pCritSect->StatContentionR3, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, NULL, "/PDM/CritSects/%s/ContentionR3", pCritSect->pszName); +#ifdef VBOX_WITH_STATISTICS + STAMR3RegisterF(pVM, &pCritSect->StatLocked, STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_OCCURENCE, NULL, "/PDM/CritSects/%s/Locked", pCritSect->pszName); +#endif + + PUVM pUVM = pVM->pUVM; + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + pCritSect->pNext = pUVM->pdm.s.pCritSects; + pUVM->pdm.s.pCritSects = pCritSect; + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + + return VINF_SUCCESS; + } + + RTStrFree(pszName); + } + else + rc = VERR_NO_STR_MEMORY; + SUPSemEventClose(pVM->pSession, (SUPSEMEVENT)pCritSect->Core.EventSem); + } + return rc; +} + + +/** + * Initializes a read/write critical section and inserts it into the list. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pCritSect The read/write critical section. + * @param pvKey The owner key. + * @param SRC_POS The source position. + * @param pszNameFmt Format string for naming the critical section. For + * statistics and lock validation. + * @param va Arguments for the format string. + */ +static int pdmR3CritSectRwInitOne(PVM pVM, PPDMCRITSECTRWINT pCritSect, void *pvKey, RT_SRC_POS_DECL, + const char *pszNameFmt, va_list va) +{ + VM_ASSERT_EMT(pVM); + Assert(pCritSect->Core.u32Magic != RTCRITSECTRW_MAGIC); + + /* + * Allocate the semaphores. + */ + AssertCompile(sizeof(SUPSEMEVENT) == sizeof(pCritSect->Core.hEvtWrite)); + int rc = SUPSemEventCreate(pVM->pSession, (PSUPSEMEVENT)&pCritSect->Core.hEvtWrite); + if (RT_SUCCESS(rc)) + { + AssertCompile(sizeof(SUPSEMEVENTMULTI) == sizeof(pCritSect->Core.hEvtRead)); + rc = SUPSemEventMultiCreate(pVM->pSession, (PSUPSEMEVENT)&pCritSect->Core.hEvtRead); + if (RT_SUCCESS(rc)) + { + /* Only format the name once. */ + char *pszName = RTStrAPrintf2V(pszNameFmt, va); /** @todo plug the "leak"... */ + if (pszName) + { + pCritSect->Core.pValidatorRead = NULL; + pCritSect->Core.pValidatorWrite = NULL; + RT_SRC_POS_NOREF(); +#ifdef PDMCRITSECTRW_STRICT +# ifdef RT_LOCK_STRICT_ORDER + RTLOCKVALCLASS hClass = RTLockValidatorClassForSrcPos(RT_SRC_POS_ARGS, "%s", pszName); +# else + RTLOCKVALCLASS hClass = NIL_RTLOCKVALCLASS; +# endif + rc = RTLockValidatorRecExclCreate(&pCritSect->Core.pValidatorWrite, hClass, RTLOCKVAL_SUB_CLASS_NONE, + pCritSect, true, "%s", pszName); + if (RT_SUCCESS(rc)) + rc = RTLockValidatorRecSharedCreate(&pCritSect->Core.pValidatorRead, hClass, RTLOCKVAL_SUB_CLASS_NONE, + pCritSect, false /*fSignaller*/, true, "%s", pszName); +#endif + if (RT_SUCCESS(rc)) + { + /* + * Initialize the structure (first bit is c&p from RTCritSectRwInitEx). + */ + pCritSect->Core.u32Magic = RTCRITSECTRW_MAGIC; + pCritSect->Core.fNeedReset = false; + pCritSect->Core.u64State = 0; + pCritSect->Core.hNativeWriter = NIL_RTNATIVETHREAD; + pCritSect->Core.cWriterReads = 0; + pCritSect->Core.cWriteRecursions = 0; +#if HC_ARCH_BITS == 32 + pCritSect->Core.HCPtrPadding = NIL_RTHCPTR; +#endif + pCritSect->pVMR3 = pVM; + pCritSect->pVMR0 = pVM->pVMR0; + pCritSect->pVMRC = pVM->pVMRC; + pCritSect->pvKey = pvKey; + pCritSect->pszName = pszName; + + STAMR3RegisterF(pVM, &pCritSect->StatContentionRZEnterExcl, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, NULL, "/PDM/CritSectsRw/%s/ContentionRZEnterExcl", pCritSect->pszName); + STAMR3RegisterF(pVM, &pCritSect->StatContentionRZLeaveExcl, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, NULL, "/PDM/CritSectsRw/%s/ContentionRZLeaveExcl", pCritSect->pszName); + STAMR3RegisterF(pVM, &pCritSect->StatContentionRZEnterShared, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, NULL, "/PDM/CritSectsRw/%s/ContentionRZEnterShared", pCritSect->pszName); + STAMR3RegisterF(pVM, &pCritSect->StatContentionRZLeaveShared, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, NULL, "/PDM/CritSectsRw/%s/ContentionRZLeaveShared", pCritSect->pszName); + STAMR3RegisterF(pVM, &pCritSect->StatContentionR3EnterExcl, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, NULL, "/PDM/CritSectsRw/%s/ContentionR3EnterExcl", pCritSect->pszName); + STAMR3RegisterF(pVM, &pCritSect->StatContentionR3EnterShared, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, NULL, "/PDM/CritSectsRw/%s/ContentionR3EnterShared", pCritSect->pszName); + STAMR3RegisterF(pVM, &pCritSect->StatRZEnterExcl, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, NULL, "/PDM/CritSectsRw/%s/RZEnterExcl", pCritSect->pszName); + STAMR3RegisterF(pVM, &pCritSect->StatRZEnterShared, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, NULL, "/PDM/CritSectsRw/%s/RZEnterShared", pCritSect->pszName); + STAMR3RegisterF(pVM, &pCritSect->StatR3EnterExcl, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, NULL, "/PDM/CritSectsRw/%s/R3EnterExcl", pCritSect->pszName); + STAMR3RegisterF(pVM, &pCritSect->StatR3EnterShared, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, NULL, "/PDM/CritSectsRw/%s/R3EnterShared", pCritSect->pszName); +#ifdef VBOX_WITH_STATISTICS + STAMR3RegisterF(pVM, &pCritSect->StatWriteLocked, STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_OCCURENCE, NULL, "/PDM/CritSectsRw/%s/WriteLocked", pCritSect->pszName); +#endif + + PUVM pUVM = pVM->pUVM; + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + pCritSect->pNext = pUVM->pdm.s.pRwCritSects; + pUVM->pdm.s.pRwCritSects = pCritSect; + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + + return VINF_SUCCESS; + } + + RTStrFree(pszName); + } + else + rc = VERR_NO_STR_MEMORY; + SUPSemEventMultiClose(pVM->pSession, (SUPSEMEVENT)pCritSect->Core.hEvtRead); + } + SUPSemEventClose(pVM->pSession, (SUPSEMEVENT)pCritSect->Core.hEvtWrite); + } + return rc; +} + + +/** + * Initializes a PDM critical section for internal use. + * + * The PDM critical sections are derived from the IPRT critical sections, but + * works in ring-0 and raw-mode context as well. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pCritSect Pointer to the critical section. + * @param SRC_POS Use RT_SRC_POS. + * @param pszNameFmt Format string for naming the critical section. For + * statistics and lock validation. + * @param ... Arguments for the format string. + * @thread EMT + */ +VMMR3DECL(int) PDMR3CritSectInit(PVM pVM, PPDMCRITSECT pCritSect, RT_SRC_POS_DECL, const char *pszNameFmt, ...) +{ +#if HC_ARCH_BITS == 64 && GC_ARCH_BITS == 32 + AssertCompile(sizeof(pCritSect->padding) >= sizeof(pCritSect->s)); +#endif + Assert(RT_ALIGN_P(pCritSect, sizeof(uintptr_t)) == pCritSect); + va_list va; + va_start(va, pszNameFmt); + int rc = pdmR3CritSectInitOne(pVM, &pCritSect->s, pCritSect, RT_SRC_POS_ARGS, pszNameFmt, va); + va_end(va); + return rc; +} + + +/** + * Initializes a PDM read/write critical section for internal use. + * + * The PDM read/write critical sections are derived from the IPRT read/write + * critical sections, but works in ring-0 and raw-mode context as well. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pCritSect Pointer to the read/write critical section. + * @param SRC_POS Use RT_SRC_POS. + * @param pszNameFmt Format string for naming the critical section. For + * statistics and lock validation. + * @param ... Arguments for the format string. + * @thread EMT + */ +VMMR3DECL(int) PDMR3CritSectRwInit(PVM pVM, PPDMCRITSECTRW pCritSect, RT_SRC_POS_DECL, const char *pszNameFmt, ...) +{ +#if HC_ARCH_BITS == 64 && GC_ARCH_BITS == 32 + AssertCompile(sizeof(pCritSect->padding) >= sizeof(pCritSect->s)); +#endif + Assert(RT_ALIGN_P(pCritSect, sizeof(uintptr_t)) == pCritSect); + va_list va; + va_start(va, pszNameFmt); + int rc = pdmR3CritSectRwInitOne(pVM, &pCritSect->s, pCritSect, RT_SRC_POS_ARGS, pszNameFmt, va); + va_end(va); + return rc; +} + + +/** + * Initializes a PDM critical section for a device. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDevIns Device instance. + * @param pCritSect Pointer to the critical section. + * @param SRC_POS The source position. Optional. + * @param pszNameFmt Format string for naming the critical section. For + * statistics and lock validation. + * @param va Arguments for the format string. + */ +int pdmR3CritSectInitDevice(PVM pVM, PPDMDEVINS pDevIns, PPDMCRITSECT pCritSect, RT_SRC_POS_DECL, + const char *pszNameFmt, va_list va) +{ + return pdmR3CritSectInitOne(pVM, &pCritSect->s, pDevIns, RT_SRC_POS_ARGS, pszNameFmt, va); +} + + +/** + * Initializes a PDM read/write critical section for a device. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDevIns Device instance. + * @param pCritSect Pointer to the read/write critical section. + * @param SRC_POS The source position. Optional. + * @param pszNameFmt Format string for naming the critical section. For + * statistics and lock validation. + * @param va Arguments for the format string. + */ +int pdmR3CritSectRwInitDevice(PVM pVM, PPDMDEVINS pDevIns, PPDMCRITSECTRW pCritSect, RT_SRC_POS_DECL, + const char *pszNameFmt, va_list va) +{ + return pdmR3CritSectRwInitOne(pVM, &pCritSect->s, pDevIns, RT_SRC_POS_ARGS, pszNameFmt, va); +} + + +/** + * Initializes the automatic default PDM critical section for a device. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDevIns Device instance. + * @param SRC_POS The source position. Optional. + * @param pCritSect Pointer to the critical section. + * @param pszNameFmt Format string for naming the critical section. For + * statistics and lock validation. + * @param ... Arguments for the format string. + */ +int pdmR3CritSectInitDeviceAuto(PVM pVM, PPDMDEVINS pDevIns, PPDMCRITSECT pCritSect, RT_SRC_POS_DECL, + const char *pszNameFmt, ...) +{ + va_list va; + va_start(va, pszNameFmt); + int rc = pdmR3CritSectInitOne(pVM, &pCritSect->s, pDevIns, RT_SRC_POS_ARGS, pszNameFmt, va); + if (RT_SUCCESS(rc)) + pCritSect->s.fAutomaticDefaultCritsect = true; + va_end(va); + return rc; +} + + +/** + * Initializes a PDM critical section for a driver. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDrvIns Driver instance. + * @param pCritSect Pointer to the critical section. + * @param SRC_POS The source position. Optional. + * @param pszNameFmt Format string for naming the critical section. For + * statistics and lock validation. + * @param ... Arguments for the format string. + */ +int pdmR3CritSectInitDriver(PVM pVM, PPDMDRVINS pDrvIns, PPDMCRITSECT pCritSect, RT_SRC_POS_DECL, + const char *pszNameFmt, ...) +{ + va_list va; + va_start(va, pszNameFmt); + int rc = pdmR3CritSectInitOne(pVM, &pCritSect->s, pDrvIns, RT_SRC_POS_ARGS, pszNameFmt, va); + va_end(va); + return rc; +} + + +/** + * Initializes a PDM read/write critical section for a driver. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDrvIns Driver instance. + * @param pCritSect Pointer to the read/write critical section. + * @param SRC_POS The source position. Optional. + * @param pszNameFmt Format string for naming the critical section. For + * statistics and lock validation. + * @param ... Arguments for the format string. + */ +int pdmR3CritSectRwInitDriver(PVM pVM, PPDMDRVINS pDrvIns, PPDMCRITSECTRW pCritSect, RT_SRC_POS_DECL, + const char *pszNameFmt, ...) +{ + va_list va; + va_start(va, pszNameFmt); + int rc = pdmR3CritSectRwInitOne(pVM, &pCritSect->s, pDrvIns, RT_SRC_POS_ARGS, pszNameFmt, va); + va_end(va); + return rc; +} + + +/** + * Deletes one critical section. + * + * @returns Return code from RTCritSectDelete. + * + * @param pVM The cross context VM structure. + * @param pUVM The user mode VM handle. + * @param pCritSect The critical section. + * @param pPrev The previous critical section in the list. + * @param fFinal Set if this is the final call and statistics shouldn't be deregistered. + * + * @remarks Caller must have entered the ListCritSect. + */ +static int pdmR3CritSectDeleteOne(PVM pVM, PUVM pUVM, PPDMCRITSECTINT pCritSect, PPDMCRITSECTINT pPrev, bool fFinal) +{ + /* + * Assert free waiters and so on (c&p from RTCritSectDelete). + */ + Assert(pCritSect->Core.u32Magic == RTCRITSECT_MAGIC); + //Assert(pCritSect->Core.cNestings == 0); - we no longer reset this when leaving. + Assert(pCritSect->Core.cLockers == -1); + Assert(pCritSect->Core.NativeThreadOwner == NIL_RTNATIVETHREAD); + Assert(RTCritSectIsOwner(&pUVM->pdm.s.ListCritSect)); + + /* + * Unlink it. + */ + if (pPrev) + pPrev->pNext = pCritSect->pNext; + else + pUVM->pdm.s.pCritSects = pCritSect->pNext; + + /* + * Delete it (parts taken from RTCritSectDelete). + * In case someone is waiting we'll signal the semaphore cLockers + 1 times. + */ + ASMAtomicWriteU32(&pCritSect->Core.u32Magic, 0); + SUPSEMEVENT hEvent = (SUPSEMEVENT)pCritSect->Core.EventSem; + pCritSect->Core.EventSem = NIL_RTSEMEVENT; + while (pCritSect->Core.cLockers-- >= 0) + SUPSemEventSignal(pVM->pSession, hEvent); + ASMAtomicWriteS32(&pCritSect->Core.cLockers, -1); + int rc = SUPSemEventClose(pVM->pSession, hEvent); + AssertRC(rc); + RTLockValidatorRecExclDestroy(&pCritSect->Core.pValidatorRec); + pCritSect->pNext = NULL; + pCritSect->pvKey = NULL; + pCritSect->pVMR3 = NULL; + pCritSect->pVMR0 = NIL_RTR0PTR; + pCritSect->pVMRC = NIL_RTRCPTR; + if (!fFinal) + STAMR3DeregisterF(pVM->pUVM, "/PDM/CritSects/%s/*", pCritSect->pszName); + RTStrFree((char *)pCritSect->pszName); + pCritSect->pszName = NULL; + return rc; +} + + +/** + * Deletes one read/write critical section. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pUVM The user mode VM handle. + * @param pCritSect The read/write critical section. + * @param pPrev The previous critical section in the list. + * @param fFinal Set if this is the final call and statistics shouldn't be deregistered. + * + * @remarks Caller must have entered the ListCritSect. + */ +static int pdmR3CritSectRwDeleteOne(PVM pVM, PUVM pUVM, PPDMCRITSECTRWINT pCritSect, PPDMCRITSECTRWINT pPrev, bool fFinal) +{ + /* + * Assert free waiters and so on (c&p from RTCritSectRwDelete). + */ + Assert(pCritSect->Core.u32Magic == RTCRITSECTRW_MAGIC); + //Assert(pCritSect->Core.cNestings == 0); + //Assert(pCritSect->Core.cLockers == -1); + Assert(pCritSect->Core.hNativeWriter == NIL_RTNATIVETHREAD); + + /* + * Invalidate the structure and free the semaphores. + */ + if (!ASMAtomicCmpXchgU32(&pCritSect->Core.u32Magic, RTCRITSECTRW_MAGIC_DEAD, RTCRITSECTRW_MAGIC)) + AssertFailed(); + + /* + * Unlink it. + */ + if (pPrev) + pPrev->pNext = pCritSect->pNext; + else + pUVM->pdm.s.pRwCritSects = pCritSect->pNext; + + /* + * Delete it (parts taken from RTCritSectRwDelete). + * In case someone is waiting we'll signal the semaphore cLockers + 1 times. + */ + pCritSect->Core.fFlags = 0; + pCritSect->Core.u64State = 0; + + SUPSEMEVENT hEvtWrite = (SUPSEMEVENT)pCritSect->Core.hEvtWrite; + pCritSect->Core.hEvtWrite = NIL_RTSEMEVENT; + AssertCompile(sizeof(hEvtWrite) == sizeof(pCritSect->Core.hEvtWrite)); + + SUPSEMEVENTMULTI hEvtRead = (SUPSEMEVENTMULTI)pCritSect->Core.hEvtRead; + pCritSect->Core.hEvtRead = NIL_RTSEMEVENTMULTI; + AssertCompile(sizeof(hEvtRead) == sizeof(pCritSect->Core.hEvtRead)); + + int rc1 = SUPSemEventClose(pVM->pSession, hEvtWrite); AssertRC(rc1); + int rc2 = SUPSemEventMultiClose(pVM->pSession, hEvtRead); AssertRC(rc2); + + RTLockValidatorRecSharedDestroy(&pCritSect->Core.pValidatorRead); + RTLockValidatorRecExclDestroy(&pCritSect->Core.pValidatorWrite); + + pCritSect->pNext = NULL; + pCritSect->pvKey = NULL; + pCritSect->pVMR3 = NULL; + pCritSect->pVMR0 = NIL_RTR0PTR; + pCritSect->pVMRC = NIL_RTRCPTR; + if (!fFinal) + STAMR3DeregisterF(pVM->pUVM, "/PDM/CritSectsRw/%s/*", pCritSect->pszName); + RTStrFree((char *)pCritSect->pszName); + pCritSect->pszName = NULL; + + return RT_SUCCESS(rc1) ? rc2 : rc1; +} + + +/** + * Deletes all critical sections with a give initializer key. + * + * @returns VBox status code. + * The entire list is processed on failure, so we'll only + * return the first error code. This shouldn't be a problem + * since errors really shouldn't happen here. + * @param pVM The cross context VM structure. + * @param pvKey The initializer key. + */ +static int pdmR3CritSectDeleteByKey(PVM pVM, void *pvKey) +{ + /* + * Iterate the list and match key. + */ + PUVM pUVM = pVM->pUVM; + int rc = VINF_SUCCESS; + PPDMCRITSECTINT pPrev = NULL; + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + PPDMCRITSECTINT pCur = pUVM->pdm.s.pCritSects; + while (pCur) + { + if (pCur->pvKey == pvKey) + { + int rc2 = pdmR3CritSectDeleteOne(pVM, pUVM, pCur, pPrev, false /* not final */); + AssertRC(rc2); + if (RT_FAILURE(rc2) && RT_SUCCESS(rc)) + rc = rc2; + } + + /* next */ + pPrev = pCur; + pCur = pCur->pNext; + } + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + return rc; +} + + +/** + * Deletes all read/write critical sections with a give initializer key. + * + * @returns VBox status code. + * The entire list is processed on failure, so we'll only + * return the first error code. This shouldn't be a problem + * since errors really shouldn't happen here. + * @param pVM The cross context VM structure. + * @param pvKey The initializer key. + */ +static int pdmR3CritSectRwDeleteByKey(PVM pVM, void *pvKey) +{ + /* + * Iterate the list and match key. + */ + PUVM pUVM = pVM->pUVM; + int rc = VINF_SUCCESS; + PPDMCRITSECTRWINT pPrev = NULL; + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + PPDMCRITSECTRWINT pCur = pUVM->pdm.s.pRwCritSects; + while (pCur) + { + if (pCur->pvKey == pvKey) + { + int rc2 = pdmR3CritSectRwDeleteOne(pVM, pUVM, pCur, pPrev, false /* not final */); + AssertRC(rc2); + if (RT_FAILURE(rc2) && RT_SUCCESS(rc)) + rc = rc2; + } + + /* next */ + pPrev = pCur; + pCur = pCur->pNext; + } + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + return rc; +} + + +/** + * Deletes all undeleted critical sections (both types) initialized by a given + * device. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDevIns The device handle. + */ +int pdmR3CritSectBothDeleteDevice(PVM pVM, PPDMDEVINS pDevIns) +{ + int rc1 = pdmR3CritSectDeleteByKey(pVM, pDevIns); + int rc2 = pdmR3CritSectRwDeleteByKey(pVM, pDevIns); + return RT_SUCCESS(rc1) ? rc2 : rc1; +} + + +/** + * Deletes all undeleted critical sections (both types) initialized by a given + * driver. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDrvIns The driver handle. + */ +int pdmR3CritSectBothDeleteDriver(PVM pVM, PPDMDRVINS pDrvIns) +{ + int rc1 = pdmR3CritSectDeleteByKey(pVM, pDrvIns); + int rc2 = pdmR3CritSectRwDeleteByKey(pVM, pDrvIns); + return RT_SUCCESS(rc1) ? rc2 : rc1; +} + + +/** + * Deletes the critical section. + * + * @returns VBox status code. + * @param pCritSect The PDM critical section to destroy. + */ +VMMR3DECL(int) PDMR3CritSectDelete(PPDMCRITSECT pCritSect) +{ + if (!RTCritSectIsInitialized(&pCritSect->s.Core)) + return VINF_SUCCESS; + + /* + * Find and unlink it. + */ + PVM pVM = pCritSect->s.pVMR3; + PUVM pUVM = pVM->pUVM; + AssertReleaseReturn(pVM, VERR_PDM_CRITSECT_IPE); + PPDMCRITSECTINT pPrev = NULL; + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + PPDMCRITSECTINT pCur = pUVM->pdm.s.pCritSects; + while (pCur) + { + if (pCur == &pCritSect->s) + { + int rc = pdmR3CritSectDeleteOne(pVM, pUVM, pCur, pPrev, false /* not final */); + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + return rc; + } + + /* next */ + pPrev = pCur; + pCur = pCur->pNext; + } + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + AssertReleaseMsgFailed(("pCritSect=%p wasn't found!\n", pCritSect)); + return VERR_PDM_CRITSECT_NOT_FOUND; +} + + +/** + * Deletes the read/write critical section. + * + * @returns VBox status code. + * @param pCritSect The PDM read/write critical section to destroy. + */ +VMMR3DECL(int) PDMR3CritSectRwDelete(PPDMCRITSECTRW pCritSect) +{ + if (!PDMCritSectRwIsInitialized(pCritSect)) + return VINF_SUCCESS; + + /* + * Find and unlink it. + */ + PVM pVM = pCritSect->s.pVMR3; + PUVM pUVM = pVM->pUVM; + AssertReleaseReturn(pVM, VERR_PDM_CRITSECT_IPE); + PPDMCRITSECTRWINT pPrev = NULL; + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + PPDMCRITSECTRWINT pCur = pUVM->pdm.s.pRwCritSects; + while (pCur) + { + if (pCur == &pCritSect->s) + { + int rc = pdmR3CritSectRwDeleteOne(pVM, pUVM, pCur, pPrev, false /* not final */); + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + return rc; + } + + /* next */ + pPrev = pCur; + pCur = pCur->pNext; + } + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + AssertReleaseMsgFailed(("pCritSect=%p wasn't found!\n", pCritSect)); + return VERR_PDM_CRITSECT_NOT_FOUND; +} + + +/** + * Gets the name of the critical section. + * + * + * @returns Pointer to the critical section name (read only) on success, + * NULL on failure (invalid critical section). + * @param pCritSect The critical section. + */ +VMMR3DECL(const char *) PDMR3CritSectName(PCPDMCRITSECT pCritSect) +{ + AssertPtrReturn(pCritSect, NULL); + AssertReturn(pCritSect->s.Core.u32Magic == RTCRITSECT_MAGIC, NULL); + return pCritSect->s.pszName; +} + + +/** + * Gets the name of the read/write critical section. + * + * + * @returns Pointer to the critical section name (read only) on success, + * NULL on failure (invalid critical section). + * @param pCritSect The read/write critical section. + */ +VMMR3DECL(const char *) PDMR3CritSectRwName(PCPDMCRITSECTRW pCritSect) +{ + AssertPtrReturn(pCritSect, NULL); + AssertReturn(pCritSect->s.Core.u32Magic == RTCRITSECTRW_MAGIC, NULL); + return pCritSect->s.pszName; +} + + +/** + * Yield the critical section if someone is waiting on it. + * + * When yielding, we'll leave the critical section and try to make sure the + * other waiting threads get a chance of entering before we reclaim it. + * + * @retval true if yielded. + * @retval false if not yielded. + * @param pCritSect The critical section. + */ +VMMR3DECL(bool) PDMR3CritSectYield(PPDMCRITSECT pCritSect) +{ + AssertPtrReturn(pCritSect, false); + AssertReturn(pCritSect->s.Core.u32Magic == RTCRITSECT_MAGIC, false); + Assert(pCritSect->s.Core.NativeThreadOwner == RTThreadNativeSelf()); + Assert(!(pCritSect->s.Core.fFlags & RTCRITSECT_FLAGS_NOP)); + + /* No recursion allowed here. */ + int32_t const cNestings = pCritSect->s.Core.cNestings; + AssertReturn(cNestings == 1, false); + + int32_t const cLockers = ASMAtomicReadS32(&pCritSect->s.Core.cLockers); + if (cLockers < cNestings) + return false; + +#ifdef PDMCRITSECT_STRICT + RTLOCKVALSRCPOS const SrcPos = pCritSect->s.Core.pValidatorRec->SrcPos; +#endif + PDMCritSectLeave(pCritSect); + + /* + * If we're lucky, then one of the waiters has entered the lock already. + * We spin a little bit in hope for this to happen so we can avoid the + * yield detour. + */ + if (ASMAtomicUoReadS32(&pCritSect->s.Core.cNestings) == 0) + { + int cLoops = 20; + while ( cLoops > 0 + && ASMAtomicUoReadS32(&pCritSect->s.Core.cNestings) == 0 + && ASMAtomicUoReadS32(&pCritSect->s.Core.cLockers) >= 0) + { + ASMNopPause(); + cLoops--; + } + if (cLoops == 0) + RTThreadYield(); + } + +#ifdef PDMCRITSECT_STRICT + int rc = PDMCritSectEnterDebug(pCritSect, VERR_IGNORED, + SrcPos.uId, SrcPos.pszFile, SrcPos.uLine, SrcPos.pszFunction); +#else + int rc = PDMCritSectEnter(pCritSect, VERR_IGNORED); +#endif + AssertLogRelRC(rc); + return true; +} + + +/** + * PDMR3CritSectBothCountOwned worker. + * + * @param pszName The critical section name. + * @param ppszNames Pointer to the pszNames variable. + * @param pcchLeft Pointer to the cchLeft variable. + * @param fFirst Whether this is the first name or not. + */ +static void pdmR3CritSectAppendNameToList(char const *pszName, char **ppszNames, size_t *pcchLeft, bool fFirst) +{ + size_t cchLeft = *pcchLeft; + if (cchLeft) + { + char *pszNames = *ppszNames; + + /* try add comma. */ + if (fFirst) + { + *pszNames++ = ','; + if (--cchLeft) + { + *pszNames++ = ' '; + cchLeft--; + } + } + + /* try copy the name. */ + if (cchLeft) + { + size_t const cchName = strlen(pszName); + if (cchName < cchLeft) + { + memcpy(pszNames, pszName, cchName); + pszNames += cchName; + cchLeft -= cchName; + } + else + { + if (cchLeft > 2) + { + memcpy(pszNames, pszName, cchLeft - 2); + pszNames += cchLeft - 2; + cchLeft = 2; + } + while (cchLeft-- > 0) + *pszNames++ = '+'; + } + } + *pszNames = '\0'; + + *pcchLeft = cchLeft; + *ppszNames = pszNames; + } +} + + +/** + * Counts the critical sections (both type) owned by the calling thread, + * optionally returning a comma separated list naming them. + * + * Read ownerships are not included in non-strict builds. + * + * This is for diagnostic purposes only. + * + * @returns Lock count. + * + * @param pVM The cross context VM structure. + * @param pszNames Where to return the critical section names. + * @param cbNames The size of the buffer. + */ +VMMR3DECL(uint32_t) PDMR3CritSectCountOwned(PVM pVM, char *pszNames, size_t cbNames) +{ + /* + * Init the name buffer. + */ + size_t cchLeft = cbNames; + if (cchLeft) + { + cchLeft--; + pszNames[0] = pszNames[cchLeft] = '\0'; + } + + /* + * Iterate the critical sections. + */ + uint32_t cCritSects = 0; + RTNATIVETHREAD const hNativeThread = RTThreadNativeSelf(); + /* This is unsafe, but wtf. */ + for (PPDMCRITSECTINT pCur = pVM->pUVM->pdm.s.pCritSects; + pCur; + pCur = pCur->pNext) + { + /* Same as RTCritSectIsOwner(). */ + if (pCur->Core.NativeThreadOwner == hNativeThread) + { + cCritSects++; + pdmR3CritSectAppendNameToList(pCur->pszName, &pszNames, &cchLeft, cCritSects == 1); + } + } + + /* This is unsafe, but wtf. */ + for (PPDMCRITSECTRWINT pCur = pVM->pUVM->pdm.s.pRwCritSects; + pCur; + pCur = pCur->pNext) + { + if ( pCur->Core.hNativeWriter == hNativeThread + || PDMCritSectRwIsReadOwner((PPDMCRITSECTRW)pCur, false /*fWannaHear*/) ) + { + cCritSects++; + pdmR3CritSectAppendNameToList(pCur->pszName, &pszNames, &cchLeft, cCritSects == 1); + } + } + + return cCritSects; +} + + +/** + * Leave all critical sections the calling thread owns. + * + * This is only used when entering guru meditation in order to prevent other + * EMTs and I/O threads from deadlocking. + * + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(void) PDMR3CritSectLeaveAll(PVM pVM) +{ + RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf(); + PUVM pUVM = pVM->pUVM; + + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + for (PPDMCRITSECTINT pCur = pUVM->pdm.s.pCritSects; + pCur; + pCur = pCur->pNext) + { + while ( pCur->Core.NativeThreadOwner == hNativeSelf + && pCur->Core.cNestings > 0) + PDMCritSectLeave((PPDMCRITSECT)pCur); + } + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); +} + + +/** + * Gets the address of the NOP critical section. + * + * The NOP critical section will not perform any thread serialization but let + * all enter immediately and concurrently. + * + * @returns The address of the NOP critical section. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(PPDMCRITSECT) PDMR3CritSectGetNop(PVM pVM) +{ + VM_ASSERT_VALID_EXT_RETURN(pVM, NULL); + return &pVM->pdm.s.NopCritSect; +} + + +/** + * Gets the ring-0 address of the NOP critical section. + * + * @returns The ring-0 address of the NOP critical section. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(R0PTRTYPE(PPDMCRITSECT)) PDMR3CritSectGetNopR0(PVM pVM) +{ + VM_ASSERT_VALID_EXT_RETURN(pVM, NIL_RTR0PTR); + return MMHyperR3ToR0(pVM, &pVM->pdm.s.NopCritSect); +} + + +/** + * Gets the raw-mode context address of the NOP critical section. + * + * @returns The raw-mode context address of the NOP critical section. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(RCPTRTYPE(PPDMCRITSECT)) PDMR3CritSectGetNopRC(PVM pVM) +{ + VM_ASSERT_VALID_EXT_RETURN(pVM, NIL_RTRCPTR); + return MMHyperR3ToRC(pVM, &pVM->pdm.s.NopCritSect); +} + diff --git a/src/VBox/VMM/VMMR3/PDMDevHlp.cpp b/src/VBox/VMM/VMMR3/PDMDevHlp.cpp new file mode 100644 index 00000000..a060e1f4 --- /dev/null +++ b/src/VBox/VMM/VMMR3/PDMDevHlp.cpp @@ -0,0 +1,4080 @@ +/* $Id: PDMDevHlp.cpp $ */ +/** @file + * PDM - Pluggable Device and Driver Manager, Device Helpers. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PDM_DEVICE +#define PDMPCIDEV_INCLUDE_PRIVATE /* Hack to get pdmpcidevint.h included at the right point. */ +#include "PDMInternal.h" +#include +#include +#include +#include +#include +#ifdef VBOX_WITH_REM +# include +#endif +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dtrace/VBoxVMM.h" +#include "PDMInline.h" + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** @def PDM_DEVHLP_DEADLOCK_DETECTION + * Define this to enable the deadlock detection when accessing physical memory. + */ +#if /*defined(DEBUG_bird) ||*/ defined(DOXYGEN_RUNNING) +# define PDM_DEVHLP_DEADLOCK_DETECTION /**< @todo enable DevHlp deadlock detection! */ +#endif + + + +/** + * Wrapper around PDMR3LdrGetSymbolRCLazy. + */ +DECLINLINE(int) pdmR3DevGetSymbolRCLazy(PPDMDEVINS pDevIns, const char *pszSymbol, PRTRCPTR ppvValue) +{ + PVM pVM = pDevIns->Internal.s.pVMR3; + if (!VM_IS_RAW_MODE_ENABLED(pVM)) + { + *ppvValue = NIL_RTRCPTR; + return VINF_SUCCESS; + } + return PDMR3LdrGetSymbolRCLazy(pVM, + pDevIns->Internal.s.pDevR3->pReg->szRCMod, + pDevIns->Internal.s.pDevR3->pszRCSearchPath, + pszSymbol, ppvValue); +} + + +/** + * Wrapper around PDMR3LdrGetSymbolR0Lazy. + */ +DECLINLINE(int) pdmR3DevGetSymbolR0Lazy(PPDMDEVINS pDevIns, const char *pszSymbol, PRTR0PTR ppvValue) +{ + return PDMR3LdrGetSymbolR0Lazy(pDevIns->Internal.s.pVMR3, + pDevIns->Internal.s.pDevR3->pReg->szR0Mod, + pDevIns->Internal.s.pDevR3->pszR0SearchPath, + pszSymbol, ppvValue); +} + + +/** @name R3 DevHlp + * @{ + */ + + +/** @interface_method_impl{PDMDEVHLPR3,pfnIOPortRegister} */ +static DECLCALLBACK(int) pdmR3DevHlp_IOPortRegister(PPDMDEVINS pDevIns, RTIOPORT Port, RTIOPORT cPorts, RTHCPTR pvUser, PFNIOMIOPORTOUT pfnOut, PFNIOMIOPORTIN pfnIn, + PFNIOMIOPORTOUTSTRING pfnOutStr, PFNIOMIOPORTINSTRING pfnInStr, const char *pszDesc) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR3DevHlp_IOPortRegister: caller='%s'/%d: Port=%#x cPorts=%#x pvUser=%p pfnOut=%p pfnIn=%p pfnOutStr=%p pfnInStr=%p p32_tszDesc=%p:{%s}\n", pDevIns->pReg->szName, pDevIns->iInstance, + Port, cPorts, pvUser, pfnOut, pfnIn, pfnOutStr, pfnInStr, pszDesc, pszDesc)); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + +#if 0 /** @todo needs a real string cache for this */ + if (pDevIns->iInstance > 0) + { + char *pszDesc2 = MMR3HeapAPrintf(pVM, MM_TAG_PDM_DEVICE_DESC, "%s [%u]", pszDesc, pDevIns->iInstance); + if (pszDesc2) + pszDesc = pszDesc2; + } +#endif + + int rc = IOMR3IOPortRegisterR3(pDevIns->Internal.s.pVMR3, pDevIns, Port, cPorts, pvUser, + pfnOut, pfnIn, pfnOutStr, pfnInStr, pszDesc); + + LogFlow(("pdmR3DevHlp_IOPortRegister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnIOPortRegisterRC} */ +static DECLCALLBACK(int) pdmR3DevHlp_IOPortRegisterRC(PPDMDEVINS pDevIns, RTIOPORT Port, RTIOPORT cPorts, RTRCPTR pvUser, + const char *pszOut, const char *pszIn, + const char *pszOutStr, const char *pszInStr, const char *pszDesc) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DevHlp_IOPortRegisterRC: caller='%s'/%d: Port=%#x cPorts=%#x pvUser=%p pszOut=%p:{%s} pszIn=%p:{%s} pszOutStr=%p:{%s} pszInStr=%p:{%s} pszDesc=%p:{%s}\n", pDevIns->pReg->szName, pDevIns->iInstance, + Port, cPorts, pvUser, pszOut, pszOut, pszIn, pszIn, pszOutStr, pszOutStr, pszInStr, pszInStr, pszDesc, pszDesc)); + + /* + * Resolve the functions (one of the can be NULL). + */ + int rc = VINF_SUCCESS; + if ( pDevIns->pReg->szRCMod[0] + && (pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_RC) + && VM_IS_RAW_MODE_ENABLED(pVM)) + { + RTRCPTR RCPtrIn = NIL_RTRCPTR; + if (pszIn) + { + rc = pdmR3DevGetSymbolRCLazy(pDevIns, pszIn, &RCPtrIn); + AssertMsgRC(rc, ("Failed to resolve %s.%s (pszIn)\n", pDevIns->pReg->szRCMod, pszIn)); + } + RTRCPTR RCPtrOut = NIL_RTRCPTR; + if (pszOut && RT_SUCCESS(rc)) + { + rc = pdmR3DevGetSymbolRCLazy(pDevIns, pszOut, &RCPtrOut); + AssertMsgRC(rc, ("Failed to resolve %s.%s (pszOut)\n", pDevIns->pReg->szRCMod, pszOut)); + } + RTRCPTR RCPtrInStr = NIL_RTRCPTR; + if (pszInStr && RT_SUCCESS(rc)) + { + rc = pdmR3DevGetSymbolRCLazy(pDevIns, pszInStr, &RCPtrInStr); + AssertMsgRC(rc, ("Failed to resolve %s.%s (pszInStr)\n", pDevIns->pReg->szRCMod, pszInStr)); + } + RTRCPTR RCPtrOutStr = NIL_RTRCPTR; + if (pszOutStr && RT_SUCCESS(rc)) + { + rc = pdmR3DevGetSymbolRCLazy(pDevIns, pszOutStr, &RCPtrOutStr); + AssertMsgRC(rc, ("Failed to resolve %s.%s (pszOutStr)\n", pDevIns->pReg->szRCMod, pszOutStr)); + } + + if (RT_SUCCESS(rc)) + { +#if 0 /** @todo needs a real string cache for this */ + if (pDevIns->iInstance > 0) + { + char *pszDesc2 = MMR3HeapAPrintf(pVM, MM_TAG_PDM_DEVICE_DESC, "%s [%u]", pszDesc, pDevIns->iInstance); + if (pszDesc2) + pszDesc = pszDesc2; + } +#endif + + rc = IOMR3IOPortRegisterRC(pVM, pDevIns, Port, cPorts, pvUser, RCPtrOut, RCPtrIn, RCPtrOutStr, RCPtrInStr, pszDesc); + } + } + else if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + AssertMsgFailed(("No RC module for this driver!\n")); + rc = VERR_INVALID_PARAMETER; + } + + LogFlow(("pdmR3DevHlp_IOPortRegisterRC: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnIOPortRegisterR0} */ +static DECLCALLBACK(int) pdmR3DevHlp_IOPortRegisterR0(PPDMDEVINS pDevIns, RTIOPORT Port, RTIOPORT cPorts, RTR0PTR pvUser, + const char *pszOut, const char *pszIn, + const char *pszOutStr, const char *pszInStr, const char *pszDesc) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + LogFlow(("pdmR3DevHlp_IOPortRegisterR0: caller='%s'/%d: Port=%#x cPorts=%#x pvUser=%p pszOut=%p:{%s} pszIn=%p:{%s} pszOutStr=%p:{%s} pszInStr=%p:{%s} pszDesc=%p:{%s}\n", pDevIns->pReg->szName, pDevIns->iInstance, + Port, cPorts, pvUser, pszOut, pszOut, pszIn, pszIn, pszOutStr, pszOutStr, pszInStr, pszInStr, pszDesc, pszDesc)); + + /* + * Resolve the functions (one of the can be NULL). + */ + int rc = VINF_SUCCESS; + if ( pDevIns->pReg->szR0Mod[0] + && (pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_R0)) + { + R0PTRTYPE(PFNIOMIOPORTIN) pfnR0PtrIn = 0; + if (pszIn) + { + rc = pdmR3DevGetSymbolR0Lazy(pDevIns, pszIn, &pfnR0PtrIn); + AssertMsgRC(rc, ("Failed to resolve %s.%s (pszIn)\n", pDevIns->pReg->szR0Mod, pszIn)); + } + R0PTRTYPE(PFNIOMIOPORTOUT) pfnR0PtrOut = 0; + if (pszOut && RT_SUCCESS(rc)) + { + rc = pdmR3DevGetSymbolR0Lazy(pDevIns, pszOut, &pfnR0PtrOut); + AssertMsgRC(rc, ("Failed to resolve %s.%s (pszOut)\n", pDevIns->pReg->szR0Mod, pszOut)); + } + R0PTRTYPE(PFNIOMIOPORTINSTRING) pfnR0PtrInStr = 0; + if (pszInStr && RT_SUCCESS(rc)) + { + rc = pdmR3DevGetSymbolR0Lazy(pDevIns, pszInStr, &pfnR0PtrInStr); + AssertMsgRC(rc, ("Failed to resolve %s.%s (pszInStr)\n", pDevIns->pReg->szR0Mod, pszInStr)); + } + R0PTRTYPE(PFNIOMIOPORTOUTSTRING) pfnR0PtrOutStr = 0; + if (pszOutStr && RT_SUCCESS(rc)) + { + rc = pdmR3DevGetSymbolR0Lazy(pDevIns, pszOutStr, &pfnR0PtrOutStr); + AssertMsgRC(rc, ("Failed to resolve %s.%s (pszOutStr)\n", pDevIns->pReg->szR0Mod, pszOutStr)); + } + + if (RT_SUCCESS(rc)) + { +#if 0 /** @todo needs a real string cache for this */ + if (pDevIns->iInstance > 0) + { + char *pszDesc2 = MMR3HeapAPrintf(pVM, MM_TAG_PDM_DEVICE_DESC, "%s [%u]", pszDesc, pDevIns->iInstance); + if (pszDesc2) + pszDesc = pszDesc2; + } +#endif + + rc = IOMR3IOPortRegisterR0(pDevIns->Internal.s.pVMR3, pDevIns, Port, cPorts, pvUser, pfnR0PtrOut, pfnR0PtrIn, pfnR0PtrOutStr, pfnR0PtrInStr, pszDesc); + } + } + else + { + AssertMsgFailed(("No R0 module for this driver!\n")); + rc = VERR_INVALID_PARAMETER; + } + + LogFlow(("pdmR3DevHlp_IOPortRegisterR0: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnIOPortDeregister} */ +static DECLCALLBACK(int) pdmR3DevHlp_IOPortDeregister(PPDMDEVINS pDevIns, RTIOPORT Port, RTIOPORT cPorts) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + LogFlow(("pdmR3DevHlp_IOPortDeregister: caller='%s'/%d: Port=%#x cPorts=%#x\n", pDevIns->pReg->szName, pDevIns->iInstance, + Port, cPorts)); + + int rc = IOMR3IOPortDeregister(pDevIns->Internal.s.pVMR3, pDevIns, Port, cPorts); + + LogFlow(("pdmR3DevHlp_IOPortDeregister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnMMIORegister} */ +static DECLCALLBACK(int) pdmR3DevHlp_MMIORegister(PPDMDEVINS pDevIns, RTGCPHYS GCPhysStart, RTGCPHYS cbRange, RTHCPTR pvUser, + PFNIOMMMIOWRITE pfnWrite, PFNIOMMMIOREAD pfnRead, PFNIOMMMIOFILL pfnFill, + uint32_t fFlags, const char *pszDesc) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DevHlp_MMIORegister: caller='%s'/%d: GCPhysStart=%RGp cbRange=%RGp pvUser=%p pfnWrite=%p pfnRead=%p pfnFill=%p fFlags=%#x pszDesc=%p:{%s}\n", + pDevIns->pReg->szName, pDevIns->iInstance, GCPhysStart, cbRange, pvUser, pfnWrite, pfnRead, pfnFill, pszDesc, fFlags, pszDesc)); + + if (pDevIns->iInstance > 0) + { + char *pszDesc2 = MMR3HeapAPrintf(pVM, MM_TAG_PDM_DEVICE_DESC, "%s [%u]", pszDesc, pDevIns->iInstance); + if (pszDesc2) + pszDesc = pszDesc2; + } + + int rc = IOMR3MmioRegisterR3(pVM, pDevIns, GCPhysStart, cbRange, pvUser, + pfnWrite, pfnRead, pfnFill, fFlags, pszDesc); + + LogFlow(("pdmR3DevHlp_MMIORegister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnMMIORegisterRC} */ +static DECLCALLBACK(int) pdmR3DevHlp_MMIORegisterRC(PPDMDEVINS pDevIns, RTGCPHYS GCPhysStart, RTGCPHYS cbRange, RTRCPTR pvUser, + const char *pszWrite, const char *pszRead, const char *pszFill) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DevHlp_MMIORegisterRC: caller='%s'/%d: GCPhysStart=%RGp cbRange=%RGp pvUser=%p pszWrite=%p:{%s} pszRead=%p:{%s} pszFill=%p:{%s}\n", + pDevIns->pReg->szName, pDevIns->iInstance, GCPhysStart, cbRange, pvUser, pszWrite, pszWrite, pszRead, pszRead, pszFill, pszFill)); + + + /* + * Resolve the functions. + * Not all function have to present, leave it to IOM to enforce this. + */ + int rc = VINF_SUCCESS; + if ( pDevIns->pReg->szRCMod[0] + && (pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_RC) + && VM_IS_RAW_MODE_ENABLED(pVM)) + { + RTRCPTR RCPtrWrite = NIL_RTRCPTR; + if (pszWrite) + rc = pdmR3DevGetSymbolRCLazy(pDevIns, pszWrite, &RCPtrWrite); + + RTRCPTR RCPtrRead = NIL_RTRCPTR; + int rc2 = VINF_SUCCESS; + if (pszRead) + rc2 = pdmR3DevGetSymbolRCLazy(pDevIns, pszRead, &RCPtrRead); + + RTRCPTR RCPtrFill = NIL_RTRCPTR; + int rc3 = VINF_SUCCESS; + if (pszFill) + rc3 = pdmR3DevGetSymbolRCLazy(pDevIns, pszFill, &RCPtrFill); + + if (RT_SUCCESS(rc) && RT_SUCCESS(rc2) && RT_SUCCESS(rc3)) + rc = IOMR3MmioRegisterRC(pVM, pDevIns, GCPhysStart, cbRange, pvUser, RCPtrWrite, RCPtrRead, RCPtrFill); + else + { + AssertMsgRC(rc, ("Failed to resolve %s.%s (pszWrite)\n", pDevIns->pReg->szRCMod, pszWrite)); + AssertMsgRC(rc2, ("Failed to resolve %s.%s (pszRead)\n", pDevIns->pReg->szRCMod, pszRead)); + AssertMsgRC(rc3, ("Failed to resolve %s.%s (pszFill)\n", pDevIns->pReg->szRCMod, pszFill)); + if (RT_FAILURE(rc2) && RT_SUCCESS(rc)) + rc = rc2; + if (RT_FAILURE(rc3) && RT_SUCCESS(rc)) + rc = rc3; + } + } + else if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + AssertMsgFailed(("No RC module for this driver!\n")); + rc = VERR_INVALID_PARAMETER; + } + + LogFlow(("pdmR3DevHlp_MMIORegisterRC: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + +/** @interface_method_impl{PDMDEVHLPR3,pfnMMIORegisterR0} */ +static DECLCALLBACK(int) pdmR3DevHlp_MMIORegisterR0(PPDMDEVINS pDevIns, RTGCPHYS GCPhysStart, RTGCPHYS cbRange, RTR0PTR pvUser, + const char *pszWrite, const char *pszRead, const char *pszFill) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + LogFlow(("pdmR3DevHlp_MMIORegisterHC: caller='%s'/%d: GCPhysStart=%RGp cbRange=%RGp pvUser=%p pszWrite=%p:{%s} pszRead=%p:{%s} pszFill=%p:{%s}\n", + pDevIns->pReg->szName, pDevIns->iInstance, GCPhysStart, cbRange, pvUser, pszWrite, pszWrite, pszRead, pszRead, pszFill, pszFill)); + + /* + * Resolve the functions. + * Not all function have to present, leave it to IOM to enforce this. + */ + int rc = VINF_SUCCESS; + if ( pDevIns->pReg->szR0Mod[0] + && (pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_R0)) + { + R0PTRTYPE(PFNIOMMMIOWRITE) pfnR0PtrWrite = 0; + if (pszWrite) + rc = pdmR3DevGetSymbolR0Lazy(pDevIns, pszWrite, &pfnR0PtrWrite); + R0PTRTYPE(PFNIOMMMIOREAD) pfnR0PtrRead = 0; + int rc2 = VINF_SUCCESS; + if (pszRead) + rc2 = pdmR3DevGetSymbolR0Lazy(pDevIns, pszRead, &pfnR0PtrRead); + R0PTRTYPE(PFNIOMMMIOFILL) pfnR0PtrFill = 0; + int rc3 = VINF_SUCCESS; + if (pszFill) + rc3 = pdmR3DevGetSymbolR0Lazy(pDevIns, pszFill, &pfnR0PtrFill); + if (RT_SUCCESS(rc) && RT_SUCCESS(rc2) && RT_SUCCESS(rc3)) + rc = IOMR3MmioRegisterR0(pDevIns->Internal.s.pVMR3, pDevIns, GCPhysStart, cbRange, pvUser, + pfnR0PtrWrite, pfnR0PtrRead, pfnR0PtrFill); + else + { + AssertMsgRC(rc, ("Failed to resolve %s.%s (pszWrite)\n", pDevIns->pReg->szR0Mod, pszWrite)); + AssertMsgRC(rc2, ("Failed to resolve %s.%s (pszRead)\n", pDevIns->pReg->szR0Mod, pszRead)); + AssertMsgRC(rc3, ("Failed to resolve %s.%s (pszFill)\n", pDevIns->pReg->szR0Mod, pszFill)); + if (RT_FAILURE(rc2) && RT_SUCCESS(rc)) + rc = rc2; + if (RT_FAILURE(rc3) && RT_SUCCESS(rc)) + rc = rc3; + } + } + else + { + AssertMsgFailed(("No R0 module for this driver!\n")); + rc = VERR_INVALID_PARAMETER; + } + + LogFlow(("pdmR3DevHlp_MMIORegisterR0: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnMMIODeregister} */ +static DECLCALLBACK(int) pdmR3DevHlp_MMIODeregister(PPDMDEVINS pDevIns, RTGCPHYS GCPhysStart, RTGCPHYS cbRange) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + LogFlow(("pdmR3DevHlp_MMIODeregister: caller='%s'/%d: GCPhysStart=%RGp cbRange=%RGp\n", + pDevIns->pReg->szName, pDevIns->iInstance, GCPhysStart, cbRange)); + + int rc = IOMR3MmioDeregister(pDevIns->Internal.s.pVMR3, pDevIns, GCPhysStart, cbRange); + + LogFlow(("pdmR3DevHlp_MMIODeregister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** + * @copydoc PDMDEVHLPR3::pfnMMIO2Register + */ +static DECLCALLBACK(int) pdmR3DevHlp_MMIO2Register(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t iRegion, RTGCPHYS cb, + uint32_t fFlags, void **ppv, const char *pszDesc) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + LogFlow(("pdmR3DevHlp_MMIO2Register: caller='%s'/%d: pPciDev=%p (%#x) iRegion=%#x cb=%#RGp fFlags=%RX32 ppv=%p pszDescp=%p:{%s}\n", + pDevIns->pReg->szName, pDevIns->iInstance, pPciDev, pPciDev ? pPciDev->uDevFn : UINT32_MAX, iRegion, + cb, fFlags, ppv, pszDesc, pszDesc)); + AssertReturn(!pPciDev || pPciDev->Int.s.pDevInsR3 == pDevIns, VERR_INVALID_PARAMETER); + +/** @todo PGMR3PhysMMIO2Register mangles the description, move it here and + * use a real string cache. */ + int rc = PGMR3PhysMMIO2Register(pDevIns->Internal.s.pVMR3, pDevIns, pPciDev ? pPciDev->Int.s.idxDevCfg : 254, iRegion, + cb, fFlags, ppv, pszDesc); + + LogFlow(("pdmR3DevHlp_MMIO2Register: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** + * @interface_method_impl{PDMDEVHLPR3,pfnMMIOExPreRegister} + */ +static DECLCALLBACK(int) +pdmR3DevHlp_MMIOExPreRegister(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t iRegion, RTGCPHYS cbRegion, uint32_t fFlags, + const char *pszDesc, + RTHCPTR pvUser, PFNIOMMMIOWRITE pfnWrite, PFNIOMMMIOREAD pfnRead, PFNIOMMMIOFILL pfnFill, + RTR0PTR pvUserR0, const char *pszWriteR0, const char *pszReadR0, const char *pszFillR0, + RTRCPTR pvUserRC, const char *pszWriteRC, const char *pszReadRC, const char *pszFillRC) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DevHlp_MMIOExPreRegister: caller='%s'/%d: pPciDev=%p:{%#x} iRegion=%#x cbRegion=%#RGp fFlags=%RX32 pszDesc=%p:{%s}\n" + " pvUser=%p pfnWrite=%p pfnRead=%p pfnFill=%p\n" + " pvUserR0=%p pszWriteR0=%s pszReadR0=%s pszFillR0=%s\n" + " pvUserRC=%p pszWriteRC=%s pszReadRC=%s pszFillRC=%s\n", + pDevIns->pReg->szName, pDevIns->iInstance, pPciDev, pPciDev ? pPciDev->uDevFn : UINT32_MAX, iRegion, cbRegion, + fFlags, pszDesc, pszDesc, + pvUser, pfnWrite, pfnRead, pfnFill, + pvUserR0, pszWriteR0, pszReadR0, pszFillR0, + pvUserRC, pszWriteRC, pszReadRC, pszFillRC)); + AssertReturn(!pPciDev || pPciDev->Int.s.pDevInsR3 == pDevIns, VERR_INVALID_PARAMETER); + + /* + * Resolve the functions. + */ + AssertLogRelReturn( (!pszWriteR0 && !pszReadR0 && !pszFillR0) + || (pDevIns->pReg->szR0Mod[0] && (pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_R0)), + VERR_INVALID_PARAMETER); + AssertLogRelReturn( (!pszWriteRC && !pszReadRC && !pszFillRC) + || (pDevIns->pReg->szRCMod[0] && (pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_RC)), + VERR_INVALID_PARAMETER); + + /* Ring-0 */ + int rc; + R0PTRTYPE(PFNIOMMMIOWRITE) pfnWriteR0 = 0; + if (pszWriteR0) + { + rc = pdmR3DevGetSymbolR0Lazy(pDevIns, pszWriteR0, &pfnWriteR0); + AssertLogRelMsgRCReturn(rc, ("pszWriteR0=%s rc=%Rrc\n", pszWriteR0, rc), rc); + } + + R0PTRTYPE(PFNIOMMMIOREAD) pfnReadR0 = 0; + if (pszReadR0) + { + rc = pdmR3DevGetSymbolR0Lazy(pDevIns, pszReadR0, &pfnReadR0); + AssertLogRelMsgRCReturn(rc, ("pszReadR0=%s rc=%Rrc\n", pszReadR0, rc), rc); + } + R0PTRTYPE(PFNIOMMMIOFILL) pfnFillR0 = 0; + if (pszFillR0) + { + rc = pdmR3DevGetSymbolR0Lazy(pDevIns, pszFillR0, &pfnFillR0); + AssertLogRelMsgRCReturn(rc, ("pszFillR0=%s rc=%Rrc\n", pszFillR0, rc), rc); + } + + /* Raw-mode */ + rc = VINF_SUCCESS; + RCPTRTYPE(PFNIOMMMIOWRITE) pfnWriteRC = 0; + if (pszWriteRC) + { + rc = pdmR3DevGetSymbolRCLazy(pDevIns, pszWriteRC, &pfnWriteRC); + AssertLogRelMsgRCReturn(rc, ("pszWriteRC=%s rc=%Rrc\n", pszWriteRC, rc), rc); + } + + RCPTRTYPE(PFNIOMMMIOREAD) pfnReadRC = 0; + if (pszReadRC) + { + rc = pdmR3DevGetSymbolRCLazy(pDevIns, pszReadRC, &pfnReadRC); + AssertLogRelMsgRCReturn(rc, ("pszReadRC=%s rc=%Rrc\n", pszReadRC, rc), rc); + } + RCPTRTYPE(PFNIOMMMIOFILL) pfnFillRC = 0; + if (pszFillRC) + { + rc = pdmR3DevGetSymbolRCLazy(pDevIns, pszFillRC, &pfnFillRC); + AssertLogRelMsgRCReturn(rc, ("pszFillRC=%s rc=%Rrc\n", pszFillRC, rc), rc); + } + + /* + * Call IOM to make the registration. + */ + rc = IOMR3MmioExPreRegister(pVM, pDevIns, pPciDev ? pPciDev->Int.s.idxDevCfg : 254, iRegion, cbRegion, fFlags, pszDesc, + pvUser, pfnWrite, pfnRead, pfnFill, + pvUserR0, pfnWriteR0, pfnReadR0, pfnFillR0, + pvUserRC, pfnWriteRC, pfnReadRC, pfnFillRC); + + LogFlow(("pdmR3DevHlp_MMIOExPreRegister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** + * @copydoc PDMDEVHLPR3::pfnMMIOExDeregister + */ +static DECLCALLBACK(int) pdmR3DevHlp_MMIOExDeregister(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t iRegion) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + LogFlow(("pdmR3DevHlp_MMIOExDeregister: caller='%s'/%d: pPciDev=%p:{%#x} iRegion=%#x\n", + pDevIns->pReg->szName, pDevIns->iInstance, pPciDev, pPciDev ? pPciDev->uDevFn : UINT32_MAX, iRegion)); + + AssertReturn(iRegion <= UINT8_MAX || iRegion == UINT32_MAX, VERR_INVALID_PARAMETER); + AssertReturn(!pPciDev || pPciDev->Int.s.pDevInsR3 == pDevIns, VERR_INVALID_PARAMETER); + + int rc = PGMR3PhysMMIOExDeregister(pDevIns->Internal.s.pVMR3, pDevIns, pPciDev ? pPciDev->Int.s.idxDevCfg : 254, iRegion); + + LogFlow(("pdmR3DevHlp_MMIOExDeregister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** + * @copydoc PDMDEVHLPR3::pfnMMIOExMap + */ +static DECLCALLBACK(int) pdmR3DevHlp_MMIOExMap(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t iRegion, RTGCPHYS GCPhys) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + LogFlow(("pdmR3DevHlp_MMIOExMap: caller='%s'/%d: pPciDev=%p:{%#x} iRegion=%#x GCPhys=%#RGp\n", + pDevIns->pReg->szName, pDevIns->iInstance, pPciDev, pPciDev ? pPciDev->uDevFn : UINT32_MAX, iRegion, GCPhys)); + AssertReturn(!pPciDev || pPciDev->Int.s.pDevInsR3 != NULL, VERR_INVALID_PARAMETER); + + int rc = PGMR3PhysMMIOExMap(pDevIns->Internal.s.pVMR3, pDevIns, pPciDev ? pPciDev->Int.s.idxDevCfg : 254, iRegion, GCPhys); + + LogFlow(("pdmR3DevHlp_MMIOExMap: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** + * @copydoc PDMDEVHLPR3::pfnMMIOExUnmap + */ +static DECLCALLBACK(int) pdmR3DevHlp_MMIOExUnmap(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t iRegion, RTGCPHYS GCPhys) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + LogFlow(("pdmR3DevHlp_MMIOExUnmap: caller='%s'/%d: pPciDev=%p:{%#x} iRegion=%#x GCPhys=%#RGp\n", + pDevIns->pReg->szName, pDevIns->iInstance, pPciDev, pPciDev ? pPciDev->uDevFn : UINT32_MAX, iRegion, GCPhys)); + AssertReturn(!pPciDev || pPciDev->Int.s.pDevInsR3 != NULL, VERR_INVALID_PARAMETER); + + int rc = PGMR3PhysMMIOExUnmap(pDevIns->Internal.s.pVMR3, pDevIns, pPciDev ? pPciDev->Int.s.idxDevCfg : 254, iRegion, GCPhys); + + LogFlow(("pdmR3DevHlp_MMIOExUnmap: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** + * @copydoc PDMDEVHLPR3::pfnMMIOExReduce + */ +static DECLCALLBACK(int) pdmR3DevHlp_MMIOExReduce(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t iRegion, RTGCPHYS cbRegion) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + LogFlow(("pdmR3DevHlp_MMIOExReduce: caller='%s'/%d: pPciDev=%p:{%#x} iRegion=%#x cbRegion=%RGp\n", + pDevIns->pReg->szName, pDevIns->iInstance, pPciDev, pPciDev ? pPciDev->uDevFn : UINT32_MAX, iRegion, cbRegion)); + AssertReturn(!pPciDev || pPciDev->Int.s.pDevInsR3 != NULL, VERR_INVALID_PARAMETER); + + int rc = PGMR3PhysMMIOExReduce(pDevIns->Internal.s.pVMR3, pDevIns, pPciDev ? pPciDev->Int.s.idxDevCfg : 254, iRegion, cbRegion); + + LogFlow(("pdmR3DevHlp_MMIOExReduce: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** + * @copydoc PDMDEVHLPR3::pfnMMHyperMapMMIO2 + */ +static DECLCALLBACK(int) pdmR3DevHlp_MMHyperMapMMIO2(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t iRegion, RTGCPHYS off, + RTGCPHYS cb, const char *pszDesc, PRTRCPTR pRCPtr) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DevHlp_MMHyperMapMMIO2: caller='%s'/%d: pPciDev=%p:{%#x} iRegion=%#x off=%RGp cb=%RGp pszDesc=%p:{%s} pRCPtr=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, pPciDev, pPciDev ? pPciDev->uDevFn : UINT32_MAX, iRegion, off, cb, pszDesc, pszDesc, pRCPtr)); + AssertReturn(!pPciDev || pPciDev->Int.s.pDevInsR3 == pDevIns, VERR_INVALID_PARAMETER); + + if (pDevIns->iInstance > 0) + { + char *pszDesc2 = MMR3HeapAPrintf(pVM, MM_TAG_PDM_DEVICE_DESC, "%s [%u]", pszDesc, pDevIns->iInstance); + if (pszDesc2) + pszDesc = pszDesc2; + } + + int rc = MMR3HyperMapMMIO2(pVM, pDevIns, pPciDev ? pPciDev->Int.s.idxDevCfg : 254, iRegion, off, cb, pszDesc, pRCPtr); + + LogFlow(("pdmR3DevHlp_MMHyperMapMMIO2: caller='%s'/%d: returns %Rrc *pRCPtr=%RRv\n", pDevIns->pReg->szName, pDevIns->iInstance, rc, *pRCPtr)); + return rc; +} + + +/** + * @copydoc PDMDEVHLPR3::pfnMMIO2MapKernel + */ +static DECLCALLBACK(int) pdmR3DevHlp_MMIO2MapKernel(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t iRegion, RTGCPHYS off, + RTGCPHYS cb,const char *pszDesc, PRTR0PTR pR0Ptr) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DevHlp_MMIO2MapKernel: caller='%s'/%d: pPciDev=%p:{%#x} iRegion=%#x off=%RGp cb=%RGp pszDesc=%p:{%s} pR0Ptr=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, pPciDev, pPciDev ? pPciDev->uDevFn : UINT32_MAX, iRegion, off, cb, pszDesc, pszDesc, pR0Ptr)); + AssertReturn(!pPciDev || pPciDev->Int.s.pDevInsR3 == pDevIns, VERR_INVALID_PARAMETER); + + if (pDevIns->iInstance > 0) + { + char *pszDesc2 = MMR3HeapAPrintf(pVM, MM_TAG_PDM_DEVICE_DESC, "%s [%u]", pszDesc, pDevIns->iInstance); + if (pszDesc2) + pszDesc = pszDesc2; + } + + int rc = PGMR3PhysMMIO2MapKernel(pVM, pDevIns, pPciDev ? pPciDev->Int.s.idxDevCfg : 254, iRegion, off, cb, pszDesc, pR0Ptr); + + LogFlow(("pdmR3DevHlp_MMIO2MapKernel: caller='%s'/%d: returns %Rrc *pR0Ptr=%RHv\n", pDevIns->pReg->szName, pDevIns->iInstance, rc, *pR0Ptr)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnROMRegister} */ +static DECLCALLBACK(int) pdmR3DevHlp_ROMRegister(PPDMDEVINS pDevIns, RTGCPHYS GCPhysStart, uint32_t cbRange, + const void *pvBinary, uint32_t cbBinary, uint32_t fFlags, const char *pszDesc) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + LogFlow(("pdmR3DevHlp_ROMRegister: caller='%s'/%d: GCPhysStart=%RGp cbRange=%#x pvBinary=%p cbBinary=%#x fFlags=%#RX32 pszDesc=%p:{%s}\n", + pDevIns->pReg->szName, pDevIns->iInstance, GCPhysStart, cbRange, pvBinary, cbBinary, fFlags, pszDesc, pszDesc)); + +/** @todo can we mangle pszDesc? */ + int rc = PGMR3PhysRomRegister(pDevIns->Internal.s.pVMR3, pDevIns, GCPhysStart, cbRange, pvBinary, cbBinary, fFlags, pszDesc); + + LogFlow(("pdmR3DevHlp_ROMRegister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnROMProtectShadow} */ +static DECLCALLBACK(int) pdmR3DevHlp_ROMProtectShadow(PPDMDEVINS pDevIns, RTGCPHYS GCPhysStart, uint32_t cbRange, PGMROMPROT enmProt) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR3DevHlp_ROMProtectShadow: caller='%s'/%d: GCPhysStart=%RGp cbRange=%#x enmProt=%d\n", + pDevIns->pReg->szName, pDevIns->iInstance, GCPhysStart, cbRange, enmProt)); + + int rc = PGMR3PhysRomProtect(pDevIns->Internal.s.pVMR3, GCPhysStart, cbRange, enmProt); + + LogFlow(("pdmR3DevHlp_ROMProtectShadow: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnSSMRegister} */ +static DECLCALLBACK(int) pdmR3DevHlp_SSMRegister(PPDMDEVINS pDevIns, uint32_t uVersion, size_t cbGuess, const char *pszBefore, + PFNSSMDEVLIVEPREP pfnLivePrep, PFNSSMDEVLIVEEXEC pfnLiveExec, PFNSSMDEVLIVEVOTE pfnLiveVote, + PFNSSMDEVSAVEPREP pfnSavePrep, PFNSSMDEVSAVEEXEC pfnSaveExec, PFNSSMDEVSAVEDONE pfnSaveDone, + PFNSSMDEVLOADPREP pfnLoadPrep, PFNSSMDEVLOADEXEC pfnLoadExec, PFNSSMDEVLOADDONE pfnLoadDone) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + LogFlow(("pdmR3DevHlp_SSMRegister: caller='%s'/%d: uVersion=%#x cbGuess=%#x pszBefore=%p:{%s}\n" + " pfnLivePrep=%p pfnLiveExec=%p pfnLiveVote=%p pfnSavePrep=%p pfnSaveExec=%p pfnSaveDone=%p pszLoadPrep=%p pfnLoadExec=%p pfnLoadDone=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, uVersion, cbGuess, pszBefore, pszBefore, + pfnLivePrep, pfnLiveExec, pfnLiveVote, + pfnSavePrep, pfnSaveExec, pfnSaveDone, + pfnLoadPrep, pfnLoadExec, pfnLoadDone)); + + int rc = SSMR3RegisterDevice(pDevIns->Internal.s.pVMR3, pDevIns, pDevIns->pReg->szName, pDevIns->iInstance, + uVersion, cbGuess, pszBefore, + pfnLivePrep, pfnLiveExec, pfnLiveVote, + pfnSavePrep, pfnSaveExec, pfnSaveDone, + pfnLoadPrep, pfnLoadExec, pfnLoadDone); + + LogFlow(("pdmR3DevHlp_SSMRegister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnTMTimerCreate} */ +static DECLCALLBACK(int) pdmR3DevHlp_TMTimerCreate(PPDMDEVINS pDevIns, TMCLOCK enmClock, PFNTMTIMERDEV pfnCallback, void *pvUser, uint32_t fFlags, const char *pszDesc, PPTMTIMERR3 ppTimer) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DevHlp_TMTimerCreate: caller='%s'/%d: enmClock=%d pfnCallback=%p pvUser=%p fFlags=%#x pszDesc=%p:{%s} ppTimer=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, enmClock, pfnCallback, pvUser, fFlags, pszDesc, pszDesc, ppTimer)); + + if (pDevIns->iInstance > 0) /** @todo use a string cache here later. */ + { + char *pszDesc2 = MMR3HeapAPrintf(pVM, MM_TAG_PDM_DEVICE_DESC, "%s [%u]", pszDesc, pDevIns->iInstance); + if (pszDesc2) + pszDesc = pszDesc2; + } + + int rc = TMR3TimerCreateDevice(pVM, pDevIns, enmClock, pfnCallback, pvUser, fFlags, pszDesc, ppTimer); + + LogFlow(("pdmR3DevHlp_TMTimerCreate: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnTMUtcNow} */ +static DECLCALLBACK(PRTTIMESPEC) pdmR3DevHlp_TMUtcNow(PPDMDEVINS pDevIns, PRTTIMESPEC pTime) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR3DevHlp_TMUtcNow: caller='%s'/%d: pTime=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, pTime)); + + pTime = TMR3UtcNow(pDevIns->Internal.s.pVMR3, pTime); + + LogFlow(("pdmR3DevHlp_TMUtcNow: caller='%s'/%d: returns %RU64\n", pDevIns->pReg->szName, pDevIns->iInstance, RTTimeSpecGetNano(pTime))); + return pTime; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnTMTimeVirtGet} */ +static DECLCALLBACK(uint64_t) pdmR3DevHlp_TMTimeVirtGet(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR3DevHlp_TMTimeVirtGet: caller='%s'/%d\n", + pDevIns->pReg->szName, pDevIns->iInstance)); + + uint64_t u64Time = TMVirtualSyncGet(pDevIns->Internal.s.pVMR3); + + LogFlow(("pdmR3DevHlp_TMTimeVirtGet: caller='%s'/%d: returns %RU64\n", pDevIns->pReg->szName, pDevIns->iInstance, u64Time)); + return u64Time; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnTMTimeVirtGetFreq} */ +static DECLCALLBACK(uint64_t) pdmR3DevHlp_TMTimeVirtGetFreq(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR3DevHlp_TMTimeVirtGetFreq: caller='%s'/%d\n", + pDevIns->pReg->szName, pDevIns->iInstance)); + + uint64_t u64Freq = TMVirtualGetFreq(pDevIns->Internal.s.pVMR3); + + LogFlow(("pdmR3DevHlp_TMTimeVirtGetFreq: caller='%s'/%d: returns %RU64\n", pDevIns->pReg->szName, pDevIns->iInstance, u64Freq)); + return u64Freq; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnTMTimeVirtGetNano} */ +static DECLCALLBACK(uint64_t) pdmR3DevHlp_TMTimeVirtGetNano(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR3DevHlp_TMTimeVirtGetNano: caller='%s'/%d\n", + pDevIns->pReg->szName, pDevIns->iInstance)); + + uint64_t u64Time = TMVirtualGet(pDevIns->Internal.s.pVMR3); + uint64_t u64Nano = TMVirtualToNano(pDevIns->Internal.s.pVMR3, u64Time); + + LogFlow(("pdmR3DevHlp_TMTimeVirtGetNano: caller='%s'/%d: returns %RU64\n", pDevIns->pReg->szName, pDevIns->iInstance, u64Nano)); + return u64Nano; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnGetSupDrvSession} */ +static DECLCALLBACK(PSUPDRVSESSION) pdmR3DevHlp_GetSupDrvSession(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR3DevHlp_GetSupDrvSession: caller='%s'/%d\n", + pDevIns->pReg->szName, pDevIns->iInstance)); + + PSUPDRVSESSION pSession = pDevIns->Internal.s.pVMR3->pSession; + + LogFlow(("pdmR3DevHlp_GetSupDrvSession: caller='%s'/%d: returns %#p\n", pDevIns->pReg->szName, pDevIns->iInstance, pSession)); + return pSession; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnQueryGenericUserObject} */ +static DECLCALLBACK(void *) pdmR3DevHlp_QueryGenericUserObject(PPDMDEVINS pDevIns, PCRTUUID pUuid) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR3DevHlp_QueryGenericUserObject: caller='%s'/%d: pUuid=%p:%RTuuid\n", + pDevIns->pReg->szName, pDevIns->iInstance, pUuid, pUuid)); + +#if defined(DEBUG_bird) || defined(DEBUG_ramshankar) || defined(DEBUG_sunlover) || defined(DEBUG_michael) || defined(DEBUG_andy) + AssertMsgFailed(("'%s' wants %RTuuid - external only interface!\n", pDevIns->pReg->szName, pUuid)); +#endif + + void *pvRet; + PUVM pUVM = pDevIns->Internal.s.pVMR3->pUVM; + if (pUVM->pVmm2UserMethods->pfnQueryGenericObject) + pvRet = pUVM->pVmm2UserMethods->pfnQueryGenericObject(pUVM->pVmm2UserMethods, pUVM, pUuid); + else + pvRet = NULL; + + LogRel(("pdmR3DevHlp_QueryGenericUserObject: caller='%s'/%d: returns %#p for %RTuuid\n", + pDevIns->pReg->szName, pDevIns->iInstance, pvRet, pUuid)); + return pvRet; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnPhysRead} */ +static DECLCALLBACK(int) pdmR3DevHlp_PhysRead(PPDMDEVINS pDevIns, RTGCPHYS GCPhys, void *pvBuf, size_t cbRead) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + LogFlow(("pdmR3DevHlp_PhysRead: caller='%s'/%d: GCPhys=%RGp pvBuf=%p cbRead=%#x\n", + pDevIns->pReg->szName, pDevIns->iInstance, GCPhys, pvBuf, cbRead)); + +#if defined(VBOX_STRICT) && defined(PDM_DEVHLP_DEADLOCK_DETECTION) + if (!VM_IS_EMT(pVM)) + { + char szNames[128]; + uint32_t cLocks = PDMR3CritSectCountOwned(pVM, szNames, sizeof(szNames)); + AssertMsg(cLocks == 0, ("cLocks=%u %s\n", cLocks, szNames)); + } +#endif + + VBOXSTRICTRC rcStrict; + if (VM_IS_EMT(pVM)) + rcStrict = PGMPhysRead(pVM, GCPhys, pvBuf, cbRead, PGMACCESSORIGIN_DEVICE); + else + rcStrict = PGMR3PhysReadExternal(pVM, GCPhys, pvBuf, cbRead, PGMACCESSORIGIN_DEVICE); + AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); /** @todo track down the users for this bugger. */ + + Log(("pdmR3DevHlp_PhysRead: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, VBOXSTRICTRC_VAL(rcStrict) )); + return VBOXSTRICTRC_VAL(rcStrict); +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnPhysWrite} */ +static DECLCALLBACK(int) pdmR3DevHlp_PhysWrite(PPDMDEVINS pDevIns, RTGCPHYS GCPhys, const void *pvBuf, size_t cbWrite) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + LogFlow(("pdmR3DevHlp_PhysWrite: caller='%s'/%d: GCPhys=%RGp pvBuf=%p cbWrite=%#x\n", + pDevIns->pReg->szName, pDevIns->iInstance, GCPhys, pvBuf, cbWrite)); + +#if defined(VBOX_STRICT) && defined(PDM_DEVHLP_DEADLOCK_DETECTION) + if (!VM_IS_EMT(pVM)) + { + char szNames[128]; + uint32_t cLocks = PDMR3CritSectCountOwned(pVM, szNames, sizeof(szNames)); + AssertMsg(cLocks == 0, ("cLocks=%u %s\n", cLocks, szNames)); + } +#endif + + VBOXSTRICTRC rcStrict; + if (VM_IS_EMT(pVM)) + rcStrict = PGMPhysWrite(pVM, GCPhys, pvBuf, cbWrite, PGMACCESSORIGIN_DEVICE); + else + rcStrict = PGMR3PhysWriteExternal(pVM, GCPhys, pvBuf, cbWrite, PGMACCESSORIGIN_DEVICE); + AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); /** @todo track down the users for this bugger. */ + + Log(("pdmR3DevHlp_PhysWrite: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, VBOXSTRICTRC_VAL(rcStrict) )); + return VBOXSTRICTRC_VAL(rcStrict); +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnPhysGCPhys2CCPtr} */ +static DECLCALLBACK(int) pdmR3DevHlp_PhysGCPhys2CCPtr(PPDMDEVINS pDevIns, RTGCPHYS GCPhys, uint32_t fFlags, void **ppv, PPGMPAGEMAPLOCK pLock) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + LogFlow(("pdmR3DevHlp_PhysGCPhys2CCPtr: caller='%s'/%d: GCPhys=%RGp fFlags=%#x ppv=%p pLock=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, GCPhys, fFlags, ppv, pLock)); + AssertReturn(!fFlags, VERR_INVALID_PARAMETER); + +#if defined(VBOX_STRICT) && defined(PDM_DEVHLP_DEADLOCK_DETECTION) + if (!VM_IS_EMT(pVM)) + { + char szNames[128]; + uint32_t cLocks = PDMR3CritSectCountOwned(pVM, szNames, sizeof(szNames)); + AssertMsg(cLocks == 0, ("cLocks=%u %s\n", cLocks, szNames)); + } +#endif + + int rc = PGMR3PhysGCPhys2CCPtrExternal(pVM, GCPhys, ppv, pLock); + + Log(("pdmR3DevHlp_PhysGCPhys2CCPtr: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnPhysGCPhys2CCPtrReadOnly} */ +static DECLCALLBACK(int) pdmR3DevHlp_PhysGCPhys2CCPtrReadOnly(PPDMDEVINS pDevIns, RTGCPHYS GCPhys, uint32_t fFlags, const void **ppv, PPGMPAGEMAPLOCK pLock) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + LogFlow(("pdmR3DevHlp_PhysGCPhys2CCPtrReadOnly: caller='%s'/%d: GCPhys=%RGp fFlags=%#x ppv=%p pLock=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, GCPhys, fFlags, ppv, pLock)); + AssertReturn(!fFlags, VERR_INVALID_PARAMETER); + +#if defined(VBOX_STRICT) && defined(PDM_DEVHLP_DEADLOCK_DETECTION) + if (!VM_IS_EMT(pVM)) + { + char szNames[128]; + uint32_t cLocks = PDMR3CritSectCountOwned(pVM, szNames, sizeof(szNames)); + AssertMsg(cLocks == 0, ("cLocks=%u %s\n", cLocks, szNames)); + } +#endif + + int rc = PGMR3PhysGCPhys2CCPtrReadOnlyExternal(pVM, GCPhys, ppv, pLock); + + Log(("pdmR3DevHlp_PhysGCPhys2CCPtrReadOnly: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnPhysReleasePageMappingLock} */ +static DECLCALLBACK(void) pdmR3DevHlp_PhysReleasePageMappingLock(PPDMDEVINS pDevIns, PPGMPAGEMAPLOCK pLock) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + LogFlow(("pdmR3DevHlp_PhysReleasePageMappingLock: caller='%s'/%d: pLock=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, pLock)); + + PGMPhysReleasePageMappingLock(pVM, pLock); + + Log(("pdmR3DevHlp_PhysReleasePageMappingLock: caller='%s'/%d: returns void\n", pDevIns->pReg->szName, pDevIns->iInstance)); +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnPhysReadGCVirt} */ +static DECLCALLBACK(int) pdmR3DevHlp_PhysReadGCVirt(PPDMDEVINS pDevIns, void *pvDst, RTGCPTR GCVirtSrc, size_t cb) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DevHlp_PhysReadGCVirt: caller='%s'/%d: pvDst=%p GCVirt=%RGv cb=%#x\n", + pDevIns->pReg->szName, pDevIns->iInstance, pvDst, GCVirtSrc, cb)); + + PVMCPU pVCpu = VMMGetCpu(pVM); + if (!pVCpu) + return VERR_ACCESS_DENIED; +#if defined(VBOX_STRICT) && defined(PDM_DEVHLP_DEADLOCK_DETECTION) + /** @todo SMP. */ +#endif + + int rc = PGMPhysSimpleReadGCPtr(pVCpu, pvDst, GCVirtSrc, cb); + + LogFlow(("pdmR3DevHlp_PhysReadGCVirt: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnPhysWriteGCVirt} */ +static DECLCALLBACK(int) pdmR3DevHlp_PhysWriteGCVirt(PPDMDEVINS pDevIns, RTGCPTR GCVirtDst, const void *pvSrc, size_t cb) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DevHlp_PhysWriteGCVirt: caller='%s'/%d: GCVirtDst=%RGv pvSrc=%p cb=%#x\n", + pDevIns->pReg->szName, pDevIns->iInstance, GCVirtDst, pvSrc, cb)); + + PVMCPU pVCpu = VMMGetCpu(pVM); + if (!pVCpu) + return VERR_ACCESS_DENIED; +#if defined(VBOX_STRICT) && defined(PDM_DEVHLP_DEADLOCK_DETECTION) + /** @todo SMP. */ +#endif + + int rc = PGMPhysSimpleWriteGCPtr(pVCpu, GCVirtDst, pvSrc, cb); + + LogFlow(("pdmR3DevHlp_PhysWriteGCVirt: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnPhysGCPtr2GCPhys} */ +static DECLCALLBACK(int) pdmR3DevHlp_PhysGCPtr2GCPhys(PPDMDEVINS pDevIns, RTGCPTR GCPtr, PRTGCPHYS pGCPhys) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DevHlp_PhysGCPtr2GCPhys: caller='%s'/%d: GCPtr=%RGv pGCPhys=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, GCPtr, pGCPhys)); + + PVMCPU pVCpu = VMMGetCpu(pVM); + if (!pVCpu) + return VERR_ACCESS_DENIED; +#if defined(VBOX_STRICT) && defined(PDM_DEVHLP_DEADLOCK_DETECTION) + /** @todo SMP. */ +#endif + + int rc = PGMPhysGCPtr2GCPhys(pVCpu, GCPtr, pGCPhys); + + LogFlow(("pdmR3DevHlp_PhysGCPtr2GCPhys: caller='%s'/%d: returns %Rrc *pGCPhys=%RGp\n", pDevIns->pReg->szName, pDevIns->iInstance, rc, *pGCPhys)); + + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnMMHeapAlloc} */ +static DECLCALLBACK(void *) pdmR3DevHlp_MMHeapAlloc(PPDMDEVINS pDevIns, size_t cb) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR3DevHlp_MMHeapAlloc: caller='%s'/%d: cb=%#x\n", pDevIns->pReg->szName, pDevIns->iInstance, cb)); + + void *pv = MMR3HeapAlloc(pDevIns->Internal.s.pVMR3, MM_TAG_PDM_DEVICE_USER, cb); + + LogFlow(("pdmR3DevHlp_MMHeapAlloc: caller='%s'/%d: returns %p\n", pDevIns->pReg->szName, pDevIns->iInstance, pv)); + return pv; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnMMHeapAllocZ} */ +static DECLCALLBACK(void *) pdmR3DevHlp_MMHeapAllocZ(PPDMDEVINS pDevIns, size_t cb) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR3DevHlp_MMHeapAllocZ: caller='%s'/%d: cb=%#x\n", pDevIns->pReg->szName, pDevIns->iInstance, cb)); + + void *pv = MMR3HeapAllocZ(pDevIns->Internal.s.pVMR3, MM_TAG_PDM_DEVICE_USER, cb); + + LogFlow(("pdmR3DevHlp_MMHeapAllocZ: caller='%s'/%d: returns %p\n", pDevIns->pReg->szName, pDevIns->iInstance, pv)); + return pv; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnMMHeapFree} */ +static DECLCALLBACK(void) pdmR3DevHlp_MMHeapFree(PPDMDEVINS pDevIns, void *pv) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); RT_NOREF_PV(pDevIns); + LogFlow(("pdmR3DevHlp_MMHeapFree: caller='%s'/%d: pv=%p\n", pDevIns->pReg->szName, pDevIns->iInstance, pv)); + + MMR3HeapFree(pv); + + LogFlow(("pdmR3DevHlp_MMHeapAlloc: caller='%s'/%d: returns void\n", pDevIns->pReg->szName, pDevIns->iInstance)); +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnVMState} */ +static DECLCALLBACK(VMSTATE) pdmR3DevHlp_VMState(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + + VMSTATE enmVMState = VMR3GetState(pDevIns->Internal.s.pVMR3); + + LogFlow(("pdmR3DevHlp_VMState: caller='%s'/%d: returns %d (%s)\n", pDevIns->pReg->szName, pDevIns->iInstance, + enmVMState, VMR3GetStateName(enmVMState))); + return enmVMState; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnVMTeleportedAndNotFullyResumedYet} */ +static DECLCALLBACK(bool) pdmR3DevHlp_VMTeleportedAndNotFullyResumedYet(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + + bool fRc = VMR3TeleportedAndNotFullyResumedYet(pDevIns->Internal.s.pVMR3); + + LogFlow(("pdmR3DevHlp_VMState: caller='%s'/%d: returns %RTbool\n", pDevIns->pReg->szName, pDevIns->iInstance, + fRc)); + return fRc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnVMSetError} */ +static DECLCALLBACK(int) pdmR3DevHlp_VMSetError(PPDMDEVINS pDevIns, int rc, RT_SRC_POS_DECL, const char *pszFormat, ...) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + va_list args; + va_start(args, pszFormat); + int rc2 = VMSetErrorV(pDevIns->Internal.s.pVMR3, rc, RT_SRC_POS_ARGS, pszFormat, args); Assert(rc2 == rc); NOREF(rc2); + va_end(args); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnVMSetErrorV} */ +static DECLCALLBACK(int) pdmR3DevHlp_VMSetErrorV(PPDMDEVINS pDevIns, int rc, RT_SRC_POS_DECL, const char *pszFormat, va_list va) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + int rc2 = VMSetErrorV(pDevIns->Internal.s.pVMR3, rc, RT_SRC_POS_ARGS, pszFormat, va); Assert(rc2 == rc); NOREF(rc2); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnVMSetRuntimeError} */ +static DECLCALLBACK(int) pdmR3DevHlp_VMSetRuntimeError(PPDMDEVINS pDevIns, uint32_t fFlags, const char *pszErrorId, const char *pszFormat, ...) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + va_list args; + va_start(args, pszFormat); + int rc = VMSetRuntimeErrorV(pDevIns->Internal.s.pVMR3, fFlags, pszErrorId, pszFormat, args); + va_end(args); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnVMSetRuntimeErrorV} */ +static DECLCALLBACK(int) pdmR3DevHlp_VMSetRuntimeErrorV(PPDMDEVINS pDevIns, uint32_t fFlags, const char *pszErrorId, const char *pszFormat, va_list va) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + int rc = VMSetRuntimeErrorV(pDevIns->Internal.s.pVMR3, fFlags, pszErrorId, pszFormat, va); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnDBGFStopV} */ +static DECLCALLBACK(int) pdmR3DevHlp_DBGFStopV(PPDMDEVINS pDevIns, const char *pszFile, unsigned iLine, const char *pszFunction, const char *pszFormat, va_list args) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); +#ifdef LOG_ENABLED + va_list va2; + va_copy(va2, args); + LogFlow(("pdmR3DevHlp_DBGFStopV: caller='%s'/%d: pszFile=%p:{%s} iLine=%d pszFunction=%p:{%s} pszFormat=%p:{%s} (%N)\n", + pDevIns->pReg->szName, pDevIns->iInstance, pszFile, pszFile, iLine, pszFunction, pszFunction, pszFormat, pszFormat, pszFormat, &va2)); + va_end(va2); +#endif + + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + int rc = DBGFR3EventSrcV(pVM, DBGFEVENT_DEV_STOP, pszFile, iLine, pszFunction, pszFormat, args); + if (rc == VERR_DBGF_NOT_ATTACHED) + rc = VINF_SUCCESS; + + LogFlow(("pdmR3DevHlp_DBGFStopV: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnDBGFInfoRegister} */ +static DECLCALLBACK(int) pdmR3DevHlp_DBGFInfoRegister(PPDMDEVINS pDevIns, const char *pszName, const char *pszDesc, PFNDBGFHANDLERDEV pfnHandler) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR3DevHlp_DBGFInfoRegister: caller='%s'/%d: pszName=%p:{%s} pszDesc=%p:{%s} pfnHandler=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, pszName, pszName, pszDesc, pszDesc, pfnHandler)); + + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + int rc = DBGFR3InfoRegisterDevice(pVM, pszName, pszDesc, pfnHandler, pDevIns); + + LogFlow(("pdmR3DevHlp_DBGFInfoRegister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnDBGFRegRegister} */ +static DECLCALLBACK(int) pdmR3DevHlp_DBGFRegRegister(PPDMDEVINS pDevIns, PCDBGFREGDESC paRegisters) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR3DevHlp_DBGFRegRegister: caller='%s'/%d: paRegisters=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, paRegisters)); + + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + int rc = DBGFR3RegRegisterDevice(pVM, paRegisters, pDevIns, pDevIns->pReg->szName, pDevIns->iInstance); + + LogFlow(("pdmR3DevHlp_DBGFRegRegister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnDBGFTraceBuf} */ +static DECLCALLBACK(RTTRACEBUF) pdmR3DevHlp_DBGFTraceBuf(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + RTTRACEBUF hTraceBuf = pDevIns->Internal.s.pVMR3->hTraceBufR3; + LogFlow(("pdmR3DevHlp_DBGFTraceBuf: caller='%s'/%d: returns %p\n", pDevIns->pReg->szName, pDevIns->iInstance, hTraceBuf)); + return hTraceBuf; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnSTAMRegister} */ +static DECLCALLBACK(void) pdmR3DevHlp_STAMRegister(PPDMDEVINS pDevIns, void *pvSample, STAMTYPE enmType, const char *pszName, + STAMUNIT enmUnit, const char *pszDesc) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + + STAM_REG(pVM, pvSample, enmType, pszName, enmUnit, pszDesc); + RT_NOREF_PV(pVM); RT_NOREF6(pDevIns, pvSample, enmType, pszName, enmUnit, pszDesc); +} + + + +/** @interface_method_impl{PDMDEVHLPR3,pfnSTAMRegisterF} */ +static DECLCALLBACK(void) pdmR3DevHlp_STAMRegisterF(PPDMDEVINS pDevIns, void *pvSample, STAMTYPE enmType, STAMVISIBILITY enmVisibility, + STAMUNIT enmUnit, const char *pszDesc, const char *pszName, ...) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + + va_list args; + va_start(args, pszName); + int rc = STAMR3RegisterV(pVM, pvSample, enmType, enmVisibility, enmUnit, pszDesc, pszName, args); + va_end(args); + AssertRC(rc); + + NOREF(pVM); +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnSTAMRegisterV} */ +static DECLCALLBACK(void) pdmR3DevHlp_STAMRegisterV(PPDMDEVINS pDevIns, void *pvSample, STAMTYPE enmType, STAMVISIBILITY enmVisibility, + STAMUNIT enmUnit, const char *pszDesc, const char *pszName, va_list args) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + + int rc = STAMR3RegisterV(pVM, pvSample, enmType, enmVisibility, enmUnit, pszDesc, pszName, args); + AssertRC(rc); + + NOREF(pVM); +} + + +/** + * @interface_method_impl{PDMDEVHLPR3,pfnPCIRegister} + */ +static DECLCALLBACK(int) pdmR3DevHlp_PCIRegister(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t idxDevCfg, uint32_t fFlags, + uint8_t uPciDevNo, uint8_t uPciFunNo, const char *pszName) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DevHlp_PCIRegister: caller='%s'/%d: pPciDev=%p:{.config={%#.256Rhxs} idxDevCfg=%d fFlags=%#x uPciDevNo=%#x uPciFunNo=%#x pszName=%p:{%s}\n", + pDevIns->pReg->szName, pDevIns->iInstance, pPciDev, pPciDev->abConfig, idxDevCfg, fFlags, uPciDevNo, uPciFunNo, pszName, pszName ? pszName : "")); + + /* + * Validate input. + */ + AssertLogRelMsgReturn(RT_VALID_PTR(pPciDev), + ("'%s'/%d: Invalid pPciDev value: %p\n", pDevIns->pReg->szName, pDevIns->iInstance, pPciDev), + VERR_INVALID_POINTER); + AssertLogRelMsgReturn(PDMPciDevGetVendorId(pPciDev), + ("'%s'/%d: Vendor ID is not set!\n", pDevIns->pReg->szName, pDevIns->iInstance), + VERR_INVALID_POINTER); + AssertLogRelMsgReturn(idxDevCfg < 256 || idxDevCfg == PDMPCIDEVREG_CFG_NEXT, + ("'%s'/%d: Invalid config selector: %#x\n", pDevIns->pReg->szName, pDevIns->iInstance, idxDevCfg), + VERR_OUT_OF_RANGE); + AssertLogRelMsgReturn( uPciDevNo < 32 + || uPciDevNo == PDMPCIDEVREG_DEV_NO_FIRST_UNUSED + || uPciDevNo == PDMPCIDEVREG_DEV_NO_SAME_AS_PREV, + ("'%s'/%d: Invalid PCI device number: %#x\n", pDevIns->pReg->szName, pDevIns->iInstance, uPciDevNo), + VERR_INVALID_PARAMETER); + AssertLogRelMsgReturn( uPciFunNo < 8 + || uPciFunNo == PDMPCIDEVREG_FUN_NO_FIRST_UNUSED, + ("'%s'/%d: Invalid PCI funcion number: %#x\n", pDevIns->pReg->szName, pDevIns->iInstance, uPciFunNo), + VERR_INVALID_PARAMETER); + AssertLogRelMsgReturn(!(fFlags & ~PDMPCIDEVREG_F_VALID_MASK), + ("'%s'/%d: Invalid flags: %#x\n", pDevIns->pReg->szName, pDevIns->iInstance, fFlags), + VERR_INVALID_FLAGS); + if (!pszName) + pszName = pDevIns->pReg->szName; + AssertLogRelReturn(RT_VALID_PTR(pszName), VERR_INVALID_POINTER); + + /* + * Find the last(/previous) registered PCI device (for linking and more), + * checking for duplicate registration attempts while doing so. + */ + uint32_t idxDevCfgNext = 0; + PPDMPCIDEV pPrevPciDev = pDevIns->Internal.s.pHeadPciDevR3; + while (pPrevPciDev) + { + AssertLogRelMsgReturn(pPrevPciDev != pPciDev, + ("'%s'/%d attempted to register the same PCI device (%p) twice\n", + pDevIns->pReg->szName, pDevIns->iInstance, pPciDev), + VERR_DUPLICATE); + AssertLogRelMsgReturn(pPrevPciDev->Int.s.idxDevCfg != idxDevCfg, + ("'%s'/%d attempted to use the same device config index (%u) twice\n", + pDevIns->pReg->szName, pDevIns->iInstance, idxDevCfg), + VERR_ALREADY_LOADED); + if (pPrevPciDev->Int.s.idxDevCfg >= idxDevCfgNext) + idxDevCfgNext = pPrevPciDev->Int.s.idxDevCfg + 1; + + if (!pPrevPciDev->Int.s.pNextR3) + break; + pPrevPciDev = pPrevPciDev->Int.s.pNextR3; + } + + /* + * Resolve the PCI configuration node for the device. The default (zero'th) + * is the same as the PDM device, the rest are "PciCfg1..255" CFGM sub-nodes. + */ + if (idxDevCfg == PDMPCIDEVREG_CFG_NEXT) + { + idxDevCfg = idxDevCfgNext; + AssertLogRelMsgReturn(idxDevCfg < 256, ("'%s'/%d: PDMPCIDEVREG_IDX_DEV_CFG_NEXT ran out of valid indexes (ends at 255)\n", + pDevIns->pReg->szName, pDevIns->iInstance), + VERR_OUT_OF_RANGE); + } + + PCFGMNODE pCfg = pDevIns->Internal.s.pCfgHandle; + if (idxDevCfg != 0) + pCfg = CFGMR3GetChildF(pDevIns->Internal.s.pCfgHandle, "PciCfg%u", idxDevCfg); + + /* + * We resolve PDMPCIDEVREG_DEV_NO_SAME_AS_PREV, the PCI bus handles + * PDMPCIDEVREG_DEV_NO_FIRST_UNUSED and PDMPCIDEVREG_FUN_NO_FIRST_UNUSED. + */ + uint8_t const uPciDevNoRaw = uPciDevNo; + uint32_t uDefPciBusNo = 0; + if (uPciDevNo == PDMPCIDEVREG_DEV_NO_SAME_AS_PREV) + { + if (pPrevPciDev) + { + uPciDevNo = pPrevPciDev->uDevFn >> 3; + uDefPciBusNo = pPrevPciDev->Int.s.pPdmBusR3->iBus; + } + else + { + /* Look for PCI device registered with an earlier device instance so we can more + easily have multiple functions spanning multiple PDM device instances. */ + PPDMPCIDEV pOtherPciDev = NULL; + PPDMDEVINS pPrevIns = pDevIns->Internal.s.pDevR3->pInstances; + while (pPrevIns != pDevIns && pPrevIns) + { + pOtherPciDev = pPrevIns->Internal.s.pHeadPciDevR3; + pPrevIns = pPrevIns->Internal.s.pNextR3; + } + Assert(pPrevIns == pDevIns); + AssertLogRelMsgReturn(pOtherPciDev, + ("'%s'/%d: Can't use PDMPCIDEVREG_DEV_NO_SAME_AS_PREV without a previously registered PCI device by the same or earlier PDM device instance!\n", + pDevIns->pReg->szName, pDevIns->iInstance), + VERR_WRONG_ORDER); + + while (pOtherPciDev->Int.s.pNextR3) + pOtherPciDev = pOtherPciDev->Int.s.pNextR3; + uPciDevNo = pOtherPciDev->uDevFn >> 3; + uDefPciBusNo = pOtherPciDev->Int.s.pPdmBusR3->iBus; + } + } + + /* + * Choose the PCI bus for the device. + * + * This is simple. If the device was configured for a particular bus, the PCIBusNo + * configuration value will be set. If not the default bus is 0. + */ + /** @cfgm{/Devices/NAME/XX/[PciCfgYY/]PCIBusNo, uint8_t, 0, 7, 0} + * Selects the PCI bus number of a device. The default value isn't necessarily + * zero if the device is registered using PDMPCIDEVREG_DEV_NO_SAME_AS_PREV, it + * will then also inherit the bus number from the previously registered device. + */ + uint8_t u8Bus; + int rc = CFGMR3QueryU8Def(pCfg, "PCIBusNo", &u8Bus, (uint8_t)uDefPciBusNo); + AssertLogRelMsgRCReturn(rc, ("Configuration error: PCIBusNo query failed with rc=%Rrc (%s/%d)\n", + rc, pDevIns->pReg->szName, pDevIns->iInstance), rc); + AssertLogRelMsgReturn(u8Bus < RT_ELEMENTS(pVM->pdm.s.aPciBuses), + ("Configuration error: PCIBusNo=%d, max is %d. (%s/%d)\n", u8Bus, + RT_ELEMENTS(pVM->pdm.s.aPciBuses), pDevIns->pReg->szName, pDevIns->iInstance), + VERR_PDM_NO_PCI_BUS); + PPDMPCIBUS pBus = pPciDev->Int.s.pPdmBusR3 = &pVM->pdm.s.aPciBuses[u8Bus]; + if (pBus->pDevInsR3) + { + /* + * Check the configuration for PCI device and function assignment. + */ + /** @cfgm{/Devices/NAME/XX/[PciCfgYY/]PCIDeviceNo, uint8_t, 0, 31} + * Overrides the default PCI device number of a device. + */ + uint8_t uCfgDevice; + rc = CFGMR3QueryU8(pCfg, "PCIDeviceNo", &uCfgDevice); + if (RT_SUCCESS(rc)) + { + AssertMsgReturn(uCfgDevice <= 31, + ("Configuration error: PCIDeviceNo=%d, max is 31. (%s/%d/%d)\n", + uCfgDevice, pDevIns->pReg->szName, pDevIns->iInstance, idxDevCfg), + VERR_PDM_BAD_PCI_CONFIG); + uPciDevNo = uCfgDevice; + } + else + AssertMsgReturn(rc == VERR_CFGM_VALUE_NOT_FOUND || rc == VERR_CFGM_NO_PARENT, + ("Configuration error: PCIDeviceNo query failed with rc=%Rrc (%s/%d/%d)\n", + rc, pDevIns->pReg->szName, pDevIns->iInstance, idxDevCfg), + rc); + + /** @cfgm{/Devices/NAME/XX/[PciCfgYY/]PCIFunctionNo, uint8_t, 0, 7} + * Overrides the default PCI function number of a device. + */ + uint8_t uCfgFunction; + rc = CFGMR3QueryU8(pCfg, "PCIFunctionNo", &uCfgFunction); + if (RT_SUCCESS(rc)) + { + AssertMsgReturn(uCfgFunction <= 7, + ("Configuration error: PCIFunctionNo=%#x, max is 7. (%s/%d/%d)\n", + uCfgFunction, pDevIns->pReg->szName, pDevIns->iInstance, idxDevCfg), + VERR_PDM_BAD_PCI_CONFIG); + uPciFunNo = uCfgFunction; + } + else + AssertMsgReturn(rc == VERR_CFGM_VALUE_NOT_FOUND || rc == VERR_CFGM_NO_PARENT, + ("Configuration error: PCIFunctionNo query failed with rc=%Rrc (%s/%d/%d)\n", + rc, pDevIns->pReg->szName, pDevIns->iInstance, idxDevCfg), + rc); + + + /* + * Initialize the internal data. We only do the wipe and the members + * owned by PDM, the PCI bus does the rest in the registration call. + */ + RT_ZERO(pPciDev->Int); + + pPciDev->Int.s.idxDevCfg = idxDevCfg; + pPciDev->Int.s.fReassignableDevNo = uPciDevNoRaw >= VBOX_PCI_MAX_DEVICES; + pPciDev->Int.s.fReassignableFunNo = uPciFunNo >= VBOX_PCI_MAX_FUNCTIONS; + pPciDev->Int.s.pDevInsR3 = pDevIns; + pPciDev->Int.s.pPdmBusR3 = pBus; + if (pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_R0) + { + pPciDev->Int.s.pDevInsR0 = MMHyperR3ToR0(pVM, pDevIns); + pPciDev->Int.s.pPdmBusR0 = MMHyperR3ToR0(pVM, pBus); + } + else + { + pPciDev->Int.s.pDevInsR0 = NIL_RTR0PTR; + pPciDev->Int.s.pPdmBusR0 = NIL_RTR0PTR; + } + + if (pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_RC) + { + pPciDev->Int.s.pDevInsRC = MMHyperR3ToRC(pVM, pDevIns); + pPciDev->Int.s.pPdmBusRC = MMHyperR3ToRC(pVM, pBus); + } + else + { + pPciDev->Int.s.pDevInsRC = NIL_RTRCPTR; + pPciDev->Int.s.pPdmBusRC = NIL_RTRCPTR; + } + + /* Set some of the public members too. */ + pPciDev->pszNameR3 = pszName; + + /* + * Call the pci bus device to do the actual registration. + */ + pdmLock(pVM); + rc = pBus->pfnRegisterR3(pBus->pDevInsR3, pPciDev, fFlags, uPciDevNo, uPciFunNo, pszName); + pdmUnlock(pVM); + if (RT_SUCCESS(rc)) + { + + /* + * Link it. + */ + if (pPrevPciDev) + { + Assert(!pPrevPciDev->Int.s.pNextR3); + pPrevPciDev->Int.s.pNextR3 = pPciDev; + if (pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_R0) + pPrevPciDev->Int.s.pNextR0 = MMHyperR3ToR0(pVM, pPciDev); + else + pPrevPciDev->Int.s.pNextR0 = NIL_RTR0PTR; + if (pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_RC) + pPrevPciDev->Int.s.pNextRC = MMHyperR3ToRC(pVM, pPciDev); + else + pPrevPciDev->Int.s.pNextRC = NIL_RTRCPTR; + } + else + { + Assert(!pDevIns->Internal.s.pHeadPciDevR3); + pDevIns->Internal.s.pHeadPciDevR3 = pPciDev; + if (pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_R0) + pDevIns->Internal.s.pHeadPciDevR0 = MMHyperR3ToR0(pVM, pPciDev); + else + pDevIns->Internal.s.pHeadPciDevR0 = NIL_RTR0PTR; + if (pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_RC) + pDevIns->Internal.s.pHeadPciDevRC = MMHyperR3ToRC(pVM, pPciDev); + else + pDevIns->Internal.s.pHeadPciDevRC = NIL_RTRCPTR; + } + + Log(("PDM: Registered device '%s'/%d as PCI device %d on bus %d\n", + pDevIns->pReg->szName, pDevIns->iInstance, pPciDev->uDevFn, pBus->iBus)); + } + } + else + { + AssertLogRelMsgFailed(("Configuration error: No PCI bus available. This could be related to init order too!\n")); + rc = VERR_PDM_NO_PCI_BUS; + } + + LogFlow(("pdmR3DevHlp_PCIRegister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnPCIRegisterMsi} */ +static DECLCALLBACK(int) pdmR3DevHlp_PCIRegisterMsi(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, PPDMMSIREG pMsiReg) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + if (!pPciDev) /* NULL is an alias for the default PCI device. */ + pPciDev = pDevIns->Internal.s.pHeadPciDevR3; + AssertReturn(pPciDev, VERR_PDM_NOT_PCI_DEVICE); + LogFlow(("pdmR3DevHlp_PCIRegisterMsi: caller='%s'/%d: pPciDev=%p:{%#x} pMsgReg=%p:{cMsiVectors=%d, cMsixVectors=%d}\n", + pDevIns->pReg->szName, pDevIns->iInstance, pPciDev, pPciDev->uDevFn, pMsiReg, pMsiReg->cMsiVectors, pMsiReg->cMsixVectors)); + + PPDMPCIBUS pBus = pPciDev->Int.s.pPdmBusR3; Assert(pBus); + PVM pVM = pDevIns->Internal.s.pVMR3; + pdmLock(pVM); + int rc; + if (pBus->pfnRegisterMsiR3) + rc = pBus->pfnRegisterMsiR3(pBus->pDevInsR3, pPciDev, pMsiReg); + else + rc = VERR_NOT_IMPLEMENTED; + pdmUnlock(pVM); + + LogFlow(("pdmR3DevHlp_PCIRegisterMsi: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnPCIIORegionRegister} */ +static DECLCALLBACK(int) pdmR3DevHlp_PCIIORegionRegister(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t iRegion, + RTGCPHYS cbRegion, PCIADDRESSSPACE enmType, PFNPCIIOREGIONMAP pfnCallback) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + if (!pPciDev) /* NULL is an alias for the default PCI device. */ + pPciDev = pDevIns->Internal.s.pHeadPciDevR3; + AssertReturn(pPciDev, VERR_PDM_NOT_PCI_DEVICE); + LogFlow(("pdmR3DevHlp_PCIIORegionRegister: caller='%s'/%d: pPciDev=%p:{%#x} iRegion=%d cbRegion=%RGp enmType=%d pfnCallback=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, pPciDev, pPciDev->uDevFn, iRegion, cbRegion, enmType, pfnCallback)); + + /* + * Validate input. + */ + if (iRegion >= VBOX_PCI_NUM_REGIONS) + { + Assert(iRegion < VBOX_PCI_NUM_REGIONS); + LogFlow(("pdmR3DevHlp_PCIIORegionRegister: caller='%s'/%d: returns %Rrc (iRegion)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + + switch ((int)enmType) + { + case PCI_ADDRESS_SPACE_IO: + /* + * Sanity check: don't allow to register more than 32K of the PCI I/O space. + */ + AssertLogRelMsgReturn(cbRegion <= _32K, + ("caller='%s'/%d: %#x\n", pDevIns->pReg->szName, pDevIns->iInstance, cbRegion), + VERR_INVALID_PARAMETER); + break; + + case PCI_ADDRESS_SPACE_MEM: + case PCI_ADDRESS_SPACE_MEM_PREFETCH: + /* + * Sanity check: Don't allow to register more than 2GB of the PCI MMIO space. + */ + AssertLogRelMsgReturn(cbRegion <= MM_MMIO_32_MAX, + ("caller='%s'/%d: %RGp (max %RGp)\n", + pDevIns->pReg->szName, pDevIns->iInstance, cbRegion, (RTGCPHYS)MM_MMIO_32_MAX), + VERR_OUT_OF_RANGE); + break; + + case PCI_ADDRESS_SPACE_BAR64 | PCI_ADDRESS_SPACE_MEM: + case PCI_ADDRESS_SPACE_BAR64 | PCI_ADDRESS_SPACE_MEM_PREFETCH: + /* + * Sanity check: Don't allow to register more than 64GB of the 64-bit PCI MMIO space. + */ + AssertLogRelMsgReturn(cbRegion <= MM_MMIO_64_MAX, + ("caller='%s'/%d: %RGp (max %RGp)\n", + pDevIns->pReg->szName, pDevIns->iInstance, cbRegion, MM_MMIO_64_MAX), + VERR_OUT_OF_RANGE); + break; + + default: + AssertMsgFailed(("enmType=%#x is unknown\n", enmType)); + LogFlow(("pdmR3DevHlp_PCIIORegionRegister: caller='%s'/%d: returns %Rrc (enmType)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + if (!pfnCallback) + { + Assert(pfnCallback); + LogFlow(("pdmR3DevHlp_PCIIORegionRegister: caller='%s'/%d: returns %Rrc (callback)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + AssertRelease(VMR3GetState(pVM) != VMSTATE_RUNNING); + + /* + * We're currently restricted to page aligned MMIO regions. + */ + if ( ((enmType & ~(PCI_ADDRESS_SPACE_BAR64 | PCI_ADDRESS_SPACE_MEM_PREFETCH)) == PCI_ADDRESS_SPACE_MEM) + && cbRegion != RT_ALIGN_64(cbRegion, PAGE_SIZE)) + { + Log(("pdmR3DevHlp_PCIIORegionRegister: caller='%s'/%d: aligning cbRegion %RGp -> %RGp\n", + pDevIns->pReg->szName, pDevIns->iInstance, cbRegion, RT_ALIGN_64(cbRegion, PAGE_SIZE))); + cbRegion = RT_ALIGN_64(cbRegion, PAGE_SIZE); + } + + /* + * For registering PCI MMIO memory or PCI I/O memory, the size of the region must be a power of 2! + */ + int iLastSet = ASMBitLastSetU64(cbRegion); + Assert(iLastSet > 0); + uint64_t cbRegionAligned = RT_BIT_64(iLastSet - 1); + if (cbRegion > cbRegionAligned) + cbRegion = cbRegionAligned * 2; /* round up */ + + PPDMPCIBUS pBus = pPciDev->Int.s.pPdmBusR3; + Assert(pBus); + pdmLock(pVM); + int rc = pBus->pfnIORegionRegisterR3(pBus->pDevInsR3, pPciDev, iRegion, cbRegion, enmType, pfnCallback); + pdmUnlock(pVM); + + LogFlow(("pdmR3DevHlp_PCIIORegionRegister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnPCISetConfigCallbacks} */ +static DECLCALLBACK(void) pdmR3DevHlp_PCISetConfigCallbacks(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, PFNPCICONFIGREAD pfnRead, PPFNPCICONFIGREAD ppfnReadOld, + PFNPCICONFIGWRITE pfnWrite, PPFNPCICONFIGWRITE ppfnWriteOld) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + if (!pPciDev) /* NULL is an alias for the default PCI device. */ + pPciDev = pDevIns->Internal.s.pHeadPciDevR3; + AssertReturnVoid(pPciDev); + LogFlow(("pdmR3DevHlp_PCISetConfigCallbacks: caller='%s'/%d: pPciDev=%p pfnRead=%p ppfnReadOld=%p pfnWrite=%p ppfnWriteOld=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, pPciDev, pfnRead, ppfnReadOld, pfnWrite, ppfnWriteOld)); + + /* + * Validate input and resolve defaults. + */ + AssertPtr(pfnRead); + AssertPtr(pfnWrite); + AssertPtrNull(ppfnReadOld); + AssertPtrNull(ppfnWriteOld); + AssertPtrNull(pPciDev); + + PPDMPCIBUS pBus = pPciDev->Int.s.pPdmBusR3; + AssertRelease(pBus); + AssertRelease(VMR3GetState(pVM) != VMSTATE_RUNNING); + + /* + * Do the job. + */ + pdmLock(pVM); + pBus->pfnSetConfigCallbacksR3(pBus->pDevInsR3, pPciDev, pfnRead, ppfnReadOld, pfnWrite, ppfnWriteOld); + pdmUnlock(pVM); + + LogFlow(("pdmR3DevHlp_PCISetConfigCallbacks: caller='%s'/%d: returns void\n", pDevIns->pReg->szName, pDevIns->iInstance)); +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnPCIPhysRead} */ +static DECLCALLBACK(int) +pdmR3DevHlp_PCIPhysRead(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, RTGCPHYS GCPhys, void *pvBuf, size_t cbRead) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + if (!pPciDev) /* NULL is an alias for the default PCI device. */ + pPciDev = pDevIns->Internal.s.pHeadPciDevR3; + AssertReturn(pPciDev, VERR_PDM_NOT_PCI_DEVICE); + +#ifndef PDM_DO_NOT_RESPECT_PCI_BM_BIT + /* + * Just check the busmaster setting here and forward the request to the generic read helper. + */ + if (PCIDevIsBusmaster(pPciDev)) + { /* likely */ } + else + { + Log(("pdmR3DevHlp_PCIPhysRead: caller='%s'/%d: returns %Rrc - Not bus master! GCPhys=%RGp cbRead=%#zx\n", + pDevIns->pReg->szName, pDevIns->iInstance, VERR_PDM_NOT_PCI_BUS_MASTER, GCPhys, cbRead)); + memset(pvBuf, 0xff, cbRead); + return VERR_PDM_NOT_PCI_BUS_MASTER; + } +#endif + + return pDevIns->pHlpR3->pfnPhysRead(pDevIns, GCPhys, pvBuf, cbRead); +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnPCIPhysWrite} */ +static DECLCALLBACK(int) +pdmR3DevHlp_PCIPhysWrite(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, RTGCPHYS GCPhys, const void *pvBuf, size_t cbWrite) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + if (!pPciDev) /* NULL is an alias for the default PCI device. */ + pPciDev = pDevIns->Internal.s.pHeadPciDevR3; + AssertReturn(pPciDev, VERR_PDM_NOT_PCI_DEVICE); + +#ifndef PDM_DO_NOT_RESPECT_PCI_BM_BIT + /* + * Just check the busmaster setting here and forward the request to the generic read helper. + */ + if (PCIDevIsBusmaster(pPciDev)) + { /* likely */ } + else + { + Log(("pdmR3DevHlp_PCIPhysWrite: caller='%s'/%d: returns %Rrc - Not bus master! GCPhys=%RGp cbWrite=%#zx\n", + pDevIns->pReg->szName, pDevIns->iInstance, VERR_PDM_NOT_PCI_BUS_MASTER, GCPhys, cbWrite)); + return VERR_PDM_NOT_PCI_BUS_MASTER; + } +#endif + + return pDevIns->pHlpR3->pfnPhysWrite(pDevIns, GCPhys, pvBuf, cbWrite); +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnPCISetIrq} */ +static DECLCALLBACK(void) pdmR3DevHlp_PCISetIrq(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, int iIrq, int iLevel) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + if (!pPciDev) /* NULL is an alias for the default PCI device. */ + pPciDev = pDevIns->Internal.s.pHeadPciDevR3; + AssertReturnVoid(pPciDev); + LogFlow(("pdmR3DevHlp_PCISetIrq: caller='%s'/%d: pPciDev=%p:{%#x} iIrq=%d iLevel=%d\n", + pDevIns->pReg->szName, pDevIns->iInstance, pPciDev, pPciDev->uDevFn, iIrq, iLevel)); + + /* + * Validate input. + */ + Assert(iIrq == 0); + Assert((uint32_t)iLevel <= PDM_IRQ_LEVEL_FLIP_FLOP); + + /* + * Must have a PCI device registered! + */ + PPDMPCIBUS pBus = pPciDev->Int.s.pPdmBusR3; + Assert(pBus); + PVM pVM = pDevIns->Internal.s.pVMR3; + + pdmLock(pVM); + uint32_t uTagSrc; + if (iLevel & PDM_IRQ_LEVEL_HIGH) + { + pDevIns->Internal.s.uLastIrqTag = uTagSrc = pdmCalcIrqTag(pVM, pDevIns->idTracing); + if (iLevel == PDM_IRQ_LEVEL_HIGH) + VBOXVMM_PDM_IRQ_HIGH(VMMGetCpu(pVM), RT_LOWORD(uTagSrc), RT_HIWORD(uTagSrc)); + else + VBOXVMM_PDM_IRQ_HILO(VMMGetCpu(pVM), RT_LOWORD(uTagSrc), RT_HIWORD(uTagSrc)); + } + else + uTagSrc = pDevIns->Internal.s.uLastIrqTag; + + pBus->pfnSetIrqR3(pBus->pDevInsR3, pPciDev, iIrq, iLevel, uTagSrc); + + if (iLevel == PDM_IRQ_LEVEL_LOW) + VBOXVMM_PDM_IRQ_LOW(VMMGetCpu(pVM), RT_LOWORD(uTagSrc), RT_HIWORD(uTagSrc)); + pdmUnlock(pVM); + + LogFlow(("pdmR3DevHlp_PCISetIrq: caller='%s'/%d: returns void\n", pDevIns->pReg->szName, pDevIns->iInstance)); +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnPCISetIrqNoWait} */ +static DECLCALLBACK(void) pdmR3DevHlp_PCISetIrqNoWait(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, int iIrq, int iLevel) +{ + pdmR3DevHlp_PCISetIrq(pDevIns, pPciDev, iIrq, iLevel); +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnISASetIrq} */ +static DECLCALLBACK(void) pdmR3DevHlp_ISASetIrq(PPDMDEVINS pDevIns, int iIrq, int iLevel) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR3DevHlp_ISASetIrq: caller='%s'/%d: iIrq=%d iLevel=%d\n", pDevIns->pReg->szName, pDevIns->iInstance, iIrq, iLevel)); + + /* + * Validate input. + */ + Assert(iIrq < 16); + Assert((uint32_t)iLevel <= PDM_IRQ_LEVEL_FLIP_FLOP); + + PVM pVM = pDevIns->Internal.s.pVMR3; + + /* + * Do the job. + */ + pdmLock(pVM); + uint32_t uTagSrc; + if (iLevel & PDM_IRQ_LEVEL_HIGH) + { + pDevIns->Internal.s.uLastIrqTag = uTagSrc = pdmCalcIrqTag(pVM, pDevIns->idTracing); + if (iLevel == PDM_IRQ_LEVEL_HIGH) + VBOXVMM_PDM_IRQ_HIGH(VMMGetCpu(pVM), RT_LOWORD(uTagSrc), RT_HIWORD(uTagSrc)); + else + VBOXVMM_PDM_IRQ_HILO(VMMGetCpu(pVM), RT_LOWORD(uTagSrc), RT_HIWORD(uTagSrc)); + } + else + uTagSrc = pDevIns->Internal.s.uLastIrqTag; + + PDMIsaSetIrq(pVM, iIrq, iLevel, uTagSrc); /* (The API takes the lock recursively.) */ + + if (iLevel == PDM_IRQ_LEVEL_LOW) + VBOXVMM_PDM_IRQ_LOW(VMMGetCpu(pVM), RT_LOWORD(uTagSrc), RT_HIWORD(uTagSrc)); + pdmUnlock(pVM); + + LogFlow(("pdmR3DevHlp_ISASetIrq: caller='%s'/%d: returns void\n", pDevIns->pReg->szName, pDevIns->iInstance)); +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnISASetIrqNoWait} */ +static DECLCALLBACK(void) pdmR3DevHlp_ISASetIrqNoWait(PPDMDEVINS pDevIns, int iIrq, int iLevel) +{ + pdmR3DevHlp_ISASetIrq(pDevIns, iIrq, iLevel); +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnIoApicSendMsi} */ +static DECLCALLBACK(void) pdmR3DevHlp_IoApicSendMsi(PPDMDEVINS pDevIns, RTGCPHYS GCPhys, uint32_t uValue) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR3DevHlp_IoApicSendMsi: caller='%s'/%d: GCPhys=%RGp uValue=%#x\n", pDevIns->pReg->szName, pDevIns->iInstance, GCPhys, uValue)); + + /* + * Validate input. + */ + Assert(GCPhys != 0); + Assert(uValue != 0); + + PVM pVM = pDevIns->Internal.s.pVMR3; + + /* + * Do the job. + */ + pdmLock(pVM); + uint32_t uTagSrc; + pDevIns->Internal.s.uLastIrqTag = uTagSrc = pdmCalcIrqTag(pVM, pDevIns->idTracing); + VBOXVMM_PDM_IRQ_HILO(VMMGetCpu(pVM), RT_LOWORD(uTagSrc), RT_HIWORD(uTagSrc)); + + PDMIoApicSendMsi(pVM, GCPhys, uValue, uTagSrc); /* (The API takes the lock recursively.) */ + + pdmUnlock(pVM); + + LogFlow(("pdmR3DevHlp_IoApicSendMsi: caller='%s'/%d: returns void\n", pDevIns->pReg->szName, pDevIns->iInstance)); +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnDriverAttach} */ +static DECLCALLBACK(int) pdmR3DevHlp_DriverAttach(PPDMDEVINS pDevIns, uint32_t iLun, PPDMIBASE pBaseInterface, PPDMIBASE *ppBaseInterface, const char *pszDesc) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DevHlp_DriverAttach: caller='%s'/%d: iLun=%d pBaseInterface=%p ppBaseInterface=%p pszDesc=%p:{%s}\n", + pDevIns->pReg->szName, pDevIns->iInstance, iLun, pBaseInterface, ppBaseInterface, pszDesc, pszDesc)); + + /* + * Lookup the LUN, it might already be registered. + */ + PPDMLUN pLunPrev = NULL; + PPDMLUN pLun = pDevIns->Internal.s.pLunsR3; + for (; pLun; pLunPrev = pLun, pLun = pLun->pNext) + if (pLun->iLun == iLun) + break; + + /* + * Create the LUN if if wasn't found, else check if driver is already attached to it. + */ + if (!pLun) + { + if ( !pBaseInterface + || !pszDesc + || !*pszDesc) + { + Assert(pBaseInterface); + Assert(pszDesc || *pszDesc); + return VERR_INVALID_PARAMETER; + } + + pLun = (PPDMLUN)MMR3HeapAlloc(pVM, MM_TAG_PDM_LUN, sizeof(*pLun)); + if (!pLun) + return VERR_NO_MEMORY; + + pLun->iLun = iLun; + pLun->pNext = pLunPrev ? pLunPrev->pNext : NULL; + pLun->pTop = NULL; + pLun->pBottom = NULL; + pLun->pDevIns = pDevIns; + pLun->pUsbIns = NULL; + pLun->pszDesc = pszDesc; + pLun->pBase = pBaseInterface; + if (!pLunPrev) + pDevIns->Internal.s.pLunsR3 = pLun; + else + pLunPrev->pNext = pLun; + Log(("pdmR3DevHlp_DriverAttach: Registered LUN#%d '%s' with device '%s'/%d.\n", + iLun, pszDesc, pDevIns->pReg->szName, pDevIns->iInstance)); + } + else if (pLun->pTop) + { + AssertMsgFailed(("Already attached! The device should keep track of such things!\n")); + LogFlow(("pdmR3DevHlp_DriverAttach: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_PDM_DRIVER_ALREADY_ATTACHED)); + return VERR_PDM_DRIVER_ALREADY_ATTACHED; + } + Assert(pLun->pBase == pBaseInterface); + + + /* + * Get the attached driver configuration. + */ + int rc; + PCFGMNODE pNode = CFGMR3GetChildF(pDevIns->Internal.s.pCfgHandle, "LUN#%u", iLun); + if (pNode) + rc = pdmR3DrvInstantiate(pVM, pNode, pBaseInterface, NULL /*pDrvAbove*/, pLun, ppBaseInterface); + else + rc = VERR_PDM_NO_ATTACHED_DRIVER; + + LogFlow(("pdmR3DevHlp_DriverAttach: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnDriverDetach} */ +static DECLCALLBACK(int) pdmR3DevHlp_DriverDetach(PPDMDEVINS pDevIns, PPDMDRVINS pDrvIns, uint32_t fFlags) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); RT_NOREF_PV(pDevIns); + LogFlow(("pdmR3DevHlp_DriverDetach: caller='%s'/%d: pDrvIns=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, pDrvIns)); + +#ifdef VBOX_STRICT + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); +#endif + + int rc = pdmR3DrvDetach(pDrvIns, fFlags); + + LogFlow(("pdmR3DevHlp_DriverDetach: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnQueueCreate} */ +static DECLCALLBACK(int) pdmR3DevHlp_QueueCreate(PPDMDEVINS pDevIns, size_t cbItem, uint32_t cItems, uint32_t cMilliesInterval, + PFNPDMQUEUEDEV pfnCallback, bool fRZEnabled, const char *pszName, PPDMQUEUE *ppQueue) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR3DevHlp_QueueCreate: caller='%s'/%d: cbItem=%#x cItems=%#x cMilliesInterval=%u pfnCallback=%p fRZEnabled=%RTbool pszName=%p:{%s} ppQueue=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, cbItem, cItems, cMilliesInterval, pfnCallback, fRZEnabled, pszName, pszName, ppQueue)); + + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + + if (pDevIns->iInstance > 0) + { + pszName = MMR3HeapAPrintf(pVM, MM_TAG_PDM_DEVICE_DESC, "%s_%u", pszName, pDevIns->iInstance); + AssertLogRelReturn(pszName, VERR_NO_MEMORY); + } + + int rc = PDMR3QueueCreateDevice(pVM, pDevIns, cbItem, cItems, cMilliesInterval, pfnCallback, fRZEnabled, pszName, ppQueue); + + LogFlow(("pdmR3DevHlp_QueueCreate: caller='%s'/%d: returns %Rrc *ppQueue=%p\n", pDevIns->pReg->szName, pDevIns->iInstance, rc, *ppQueue)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnCritSectInit} */ +static DECLCALLBACK(int) pdmR3DevHlp_CritSectInit(PPDMDEVINS pDevIns, PPDMCRITSECT pCritSect, RT_SRC_POS_DECL, + const char *pszNameFmt, va_list va) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR3DevHlp_CritSectInit: caller='%s'/%d: pCritSect=%p pszNameFmt=%p:{%s}\n", + pDevIns->pReg->szName, pDevIns->iInstance, pCritSect, pszNameFmt, pszNameFmt)); + + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + int rc = pdmR3CritSectInitDevice(pVM, pDevIns, pCritSect, RT_SRC_POS_ARGS, pszNameFmt, va); + + LogFlow(("pdmR3DevHlp_CritSectInit: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnCritSectGetNop} */ +static DECLCALLBACK(PPDMCRITSECT) pdmR3DevHlp_CritSectGetNop(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + + PPDMCRITSECT pCritSect = PDMR3CritSectGetNop(pVM); + LogFlow(("pdmR3DevHlp_CritSectGetNop: caller='%s'/%d: return %p\n", + pDevIns->pReg->szName, pDevIns->iInstance, pCritSect)); + return pCritSect; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnCritSectGetNopR0} */ +static DECLCALLBACK(R0PTRTYPE(PPDMCRITSECT)) pdmR3DevHlp_CritSectGetNopR0(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + + R0PTRTYPE(PPDMCRITSECT) pCritSect = PDMR3CritSectGetNopR0(pVM); + LogFlow(("pdmR3DevHlp_CritSectGetNopR0: caller='%s'/%d: return %RHv\n", + pDevIns->pReg->szName, pDevIns->iInstance, pCritSect)); + return pCritSect; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnCritSectGetNopRC} */ +static DECLCALLBACK(RCPTRTYPE(PPDMCRITSECT)) pdmR3DevHlp_CritSectGetNopRC(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + + RCPTRTYPE(PPDMCRITSECT) pCritSect = PDMR3CritSectGetNopRC(pVM); + LogFlow(("pdmR3DevHlp_CritSectGetNopRC: caller='%s'/%d: return %RRv\n", + pDevIns->pReg->szName, pDevIns->iInstance, pCritSect)); + return pCritSect; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnSetDeviceCritSect} */ +static DECLCALLBACK(int) pdmR3DevHlp_SetDeviceCritSect(PPDMDEVINS pDevIns, PPDMCRITSECT pCritSect) +{ + /* + * Validate input. + * + * Note! We only allow the automatically created default critical section + * to be replaced by this API. + */ + PDMDEV_ASSERT_DEVINS(pDevIns); + AssertPtrReturn(pCritSect, VERR_INVALID_POINTER); + LogFlow(("pdmR3DevHlp_SetDeviceCritSect: caller='%s'/%d: pCritSect=%p (%s)\n", + pDevIns->pReg->szName, pDevIns->iInstance, pCritSect, pCritSect->s.pszName)); + AssertReturn(PDMCritSectIsInitialized(pCritSect), VERR_INVALID_PARAMETER); + PVM pVM = pDevIns->Internal.s.pVMR3; + AssertReturn(pCritSect->s.pVMR3 == pVM, VERR_INVALID_PARAMETER); + + VM_ASSERT_EMT(pVM); + VM_ASSERT_STATE_RETURN(pVM, VMSTATE_CREATING, VERR_WRONG_ORDER); + + AssertReturn(pDevIns->pCritSectRoR3, VERR_PDM_DEV_IPE_1); + AssertReturn(pDevIns->pCritSectRoR3->s.fAutomaticDefaultCritsect, VERR_WRONG_ORDER); + AssertReturn(!pDevIns->pCritSectRoR3->s.fUsedByTimerOrSimilar, VERR_WRONG_ORDER); + AssertReturn(pDevIns->pCritSectRoR3 != pCritSect, VERR_INVALID_PARAMETER); + + /* + * Replace the critical section and destroy the automatic default section. + */ + PPDMCRITSECT pOldCritSect = pDevIns->pCritSectRoR3; + pDevIns->pCritSectRoR3 = pCritSect; + if (pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_R0) + pDevIns->pCritSectRoR0 = MMHyperCCToR0(pVM, pDevIns->pCritSectRoR3); + else + Assert(pDevIns->pCritSectRoR0 == NIL_RTRCPTR); + + if (pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_RC) + pDevIns->pCritSectRoRC = MMHyperCCToRC(pVM, pDevIns->pCritSectRoR3); + else + Assert(pDevIns->pCritSectRoRC == NIL_RTRCPTR); + + PDMR3CritSectDelete(pOldCritSect); + if (pDevIns->pReg->fFlags & (PDM_DEVREG_FLAGS_RC | PDM_DEVREG_FLAGS_R0)) + MMHyperFree(pVM, pOldCritSect); + else + MMR3HeapFree(pOldCritSect); + + LogFlow(("pdmR3DevHlp_SetDeviceCritSect: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, VINF_SUCCESS)); + return VINF_SUCCESS; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnThreadCreate} */ +static DECLCALLBACK(int) pdmR3DevHlp_ThreadCreate(PPDMDEVINS pDevIns, PPPDMTHREAD ppThread, void *pvUser, PFNPDMTHREADDEV pfnThread, + PFNPDMTHREADWAKEUPDEV pfnWakeup, size_t cbStack, RTTHREADTYPE enmType, const char *pszName) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + LogFlow(("pdmR3DevHlp_ThreadCreate: caller='%s'/%d: ppThread=%p pvUser=%p pfnThread=%p pfnWakeup=%p cbStack=%#zx enmType=%d pszName=%p:{%s}\n", + pDevIns->pReg->szName, pDevIns->iInstance, ppThread, pvUser, pfnThread, pfnWakeup, cbStack, enmType, pszName, pszName)); + + int rc = pdmR3ThreadCreateDevice(pDevIns->Internal.s.pVMR3, pDevIns, ppThread, pvUser, pfnThread, pfnWakeup, cbStack, enmType, pszName); + + LogFlow(("pdmR3DevHlp_ThreadCreate: caller='%s'/%d: returns %Rrc *ppThread=%RTthrd\n", pDevIns->pReg->szName, pDevIns->iInstance, + rc, *ppThread)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnSetAsyncNotification} */ +static DECLCALLBACK(int) pdmR3DevHlp_SetAsyncNotification(PPDMDEVINS pDevIns, PFNPDMDEVASYNCNOTIFY pfnAsyncNotify) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VM_ASSERT_EMT0(pDevIns->Internal.s.pVMR3); + LogFlow(("pdmR3DevHlp_SetAsyncNotification: caller='%s'/%d: pfnAsyncNotify=%p\n", pDevIns->pReg->szName, pDevIns->iInstance, pfnAsyncNotify)); + + int rc = VINF_SUCCESS; + AssertStmt(pfnAsyncNotify, rc = VERR_INVALID_PARAMETER); + AssertStmt(!pDevIns->Internal.s.pfnAsyncNotify, rc = VERR_WRONG_ORDER); + AssertStmt(pDevIns->Internal.s.fIntFlags & (PDMDEVINSINT_FLAGS_SUSPENDED | PDMDEVINSINT_FLAGS_RESET), rc = VERR_WRONG_ORDER); + VMSTATE enmVMState = VMR3GetState(pDevIns->Internal.s.pVMR3); + AssertStmt( enmVMState == VMSTATE_SUSPENDING + || enmVMState == VMSTATE_SUSPENDING_EXT_LS + || enmVMState == VMSTATE_SUSPENDING_LS + || enmVMState == VMSTATE_RESETTING + || enmVMState == VMSTATE_RESETTING_LS + || enmVMState == VMSTATE_POWERING_OFF + || enmVMState == VMSTATE_POWERING_OFF_LS, + rc = VERR_INVALID_STATE); + + if (RT_SUCCESS(rc)) + pDevIns->Internal.s.pfnAsyncNotify = pfnAsyncNotify; + + LogFlow(("pdmR3DevHlp_SetAsyncNotification: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnAsyncNotificationCompleted} */ +static DECLCALLBACK(void) pdmR3DevHlp_AsyncNotificationCompleted(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + + VMSTATE enmVMState = VMR3GetState(pVM); + if ( enmVMState == VMSTATE_SUSPENDING + || enmVMState == VMSTATE_SUSPENDING_EXT_LS + || enmVMState == VMSTATE_SUSPENDING_LS + || enmVMState == VMSTATE_RESETTING + || enmVMState == VMSTATE_RESETTING_LS + || enmVMState == VMSTATE_POWERING_OFF + || enmVMState == VMSTATE_POWERING_OFF_LS) + { + LogFlow(("pdmR3DevHlp_AsyncNotificationCompleted: caller='%s'/%d:\n", pDevIns->pReg->szName, pDevIns->iInstance)); + VMR3AsyncPdmNotificationWakeupU(pVM->pUVM); + } + else + LogFlow(("pdmR3DevHlp_AsyncNotificationCompleted: caller='%s'/%d: enmVMState=%d\n", pDevIns->pReg->szName, pDevIns->iInstance, enmVMState)); +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnRTCRegister} */ +static DECLCALLBACK(int) pdmR3DevHlp_RTCRegister(PPDMDEVINS pDevIns, PCPDMRTCREG pRtcReg, PCPDMRTCHLP *ppRtcHlp) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + LogFlow(("pdmR3DevHlp_RTCRegister: caller='%s'/%d: pRtcReg=%p:{.u32Version=%#x, .pfnWrite=%p, .pfnRead=%p} ppRtcHlp=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, pRtcReg, pRtcReg->u32Version, pRtcReg->pfnWrite, + pRtcReg->pfnWrite, ppRtcHlp)); + + /* + * Validate input. + */ + if (pRtcReg->u32Version != PDM_RTCREG_VERSION) + { + AssertMsgFailed(("u32Version=%#x expected %#x\n", pRtcReg->u32Version, + PDM_RTCREG_VERSION)); + LogFlow(("pdmR3DevHlp_RTCRegister: caller='%s'/%d: returns %Rrc (version)\n", + pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + if ( !pRtcReg->pfnWrite + || !pRtcReg->pfnRead) + { + Assert(pRtcReg->pfnWrite); + Assert(pRtcReg->pfnRead); + LogFlow(("pdmR3DevHlp_RTCRegister: caller='%s'/%d: returns %Rrc (callbacks)\n", + pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + + if (!ppRtcHlp) + { + Assert(ppRtcHlp); + LogFlow(("pdmR3DevHlp_RTCRegister: caller='%s'/%d: returns %Rrc (ppRtcHlp)\n", + pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + + /* + * Only one DMA device. + */ + PVM pVM = pDevIns->Internal.s.pVMR3; + if (pVM->pdm.s.pRtc) + { + AssertMsgFailed(("Only one RTC device is supported!\n")); + LogFlow(("pdmR3DevHlp_RTCRegister: caller='%s'/%d: returns %Rrc\n", + pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + + /* + * Allocate and initialize pci bus structure. + */ + int rc = VINF_SUCCESS; + PPDMRTC pRtc = (PPDMRTC)MMR3HeapAlloc(pDevIns->Internal.s.pVMR3, MM_TAG_PDM_DEVICE, sizeof(*pRtc)); + if (pRtc) + { + pRtc->pDevIns = pDevIns; + pRtc->Reg = *pRtcReg; + pVM->pdm.s.pRtc = pRtc; + + /* set the helper pointer. */ + *ppRtcHlp = &g_pdmR3DevRtcHlp; + Log(("PDM: Registered RTC device '%s'/%d pDevIns=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, pDevIns)); + } + else + rc = VERR_NO_MEMORY; + + LogFlow(("pdmR3DevHlp_RTCRegister: caller='%s'/%d: returns %Rrc\n", + pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnDMARegister} */ +static DECLCALLBACK(int) pdmR3DevHlp_DMARegister(PPDMDEVINS pDevIns, unsigned uChannel, PFNDMATRANSFERHANDLER pfnTransferHandler, void *pvUser) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DevHlp_DMARegister: caller='%s'/%d: uChannel=%d pfnTransferHandler=%p pvUser=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, uChannel, pfnTransferHandler, pvUser)); + int rc = VINF_SUCCESS; + if (pVM->pdm.s.pDmac) + pVM->pdm.s.pDmac->Reg.pfnRegister(pVM->pdm.s.pDmac->pDevIns, uChannel, pfnTransferHandler, pvUser); + else + { + AssertMsgFailed(("Configuration error: No DMAC controller available. This could be related to init order too!\n")); + rc = VERR_PDM_NO_DMAC_INSTANCE; + } + LogFlow(("pdmR3DevHlp_DMARegister: caller='%s'/%d: returns %Rrc\n", + pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnDMAReadMemory} */ +static DECLCALLBACK(int) pdmR3DevHlp_DMAReadMemory(PPDMDEVINS pDevIns, unsigned uChannel, void *pvBuffer, uint32_t off, uint32_t cbBlock, uint32_t *pcbRead) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DevHlp_DMAReadMemory: caller='%s'/%d: uChannel=%d pvBuffer=%p off=%#x cbBlock=%#x pcbRead=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, uChannel, pvBuffer, off, cbBlock, pcbRead)); + int rc = VINF_SUCCESS; + if (pVM->pdm.s.pDmac) + { + uint32_t cb = pVM->pdm.s.pDmac->Reg.pfnReadMemory(pVM->pdm.s.pDmac->pDevIns, uChannel, pvBuffer, off, cbBlock); + if (pcbRead) + *pcbRead = cb; + } + else + { + AssertMsgFailed(("Configuration error: No DMAC controller available. This could be related to init order too!\n")); + rc = VERR_PDM_NO_DMAC_INSTANCE; + } + LogFlow(("pdmR3DevHlp_DMAReadMemory: caller='%s'/%d: returns %Rrc\n", + pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnDMAWriteMemory} */ +static DECLCALLBACK(int) pdmR3DevHlp_DMAWriteMemory(PPDMDEVINS pDevIns, unsigned uChannel, const void *pvBuffer, uint32_t off, uint32_t cbBlock, uint32_t *pcbWritten) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DevHlp_DMAWriteMemory: caller='%s'/%d: uChannel=%d pvBuffer=%p off=%#x cbBlock=%#x pcbWritten=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, uChannel, pvBuffer, off, cbBlock, pcbWritten)); + int rc = VINF_SUCCESS; + if (pVM->pdm.s.pDmac) + { + uint32_t cb = pVM->pdm.s.pDmac->Reg.pfnWriteMemory(pVM->pdm.s.pDmac->pDevIns, uChannel, pvBuffer, off, cbBlock); + if (pcbWritten) + *pcbWritten = cb; + } + else + { + AssertMsgFailed(("Configuration error: No DMAC controller available. This could be related to init order too!\n")); + rc = VERR_PDM_NO_DMAC_INSTANCE; + } + LogFlow(("pdmR3DevHlp_DMAWriteMemory: caller='%s'/%d: returns %Rrc\n", + pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnDMASetDREQ} */ +static DECLCALLBACK(int) pdmR3DevHlp_DMASetDREQ(PPDMDEVINS pDevIns, unsigned uChannel, unsigned uLevel) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DevHlp_DMASetDREQ: caller='%s'/%d: uChannel=%d uLevel=%d\n", + pDevIns->pReg->szName, pDevIns->iInstance, uChannel, uLevel)); + int rc = VINF_SUCCESS; + if (pVM->pdm.s.pDmac) + pVM->pdm.s.pDmac->Reg.pfnSetDREQ(pVM->pdm.s.pDmac->pDevIns, uChannel, uLevel); + else + { + AssertMsgFailed(("Configuration error: No DMAC controller available. This could be related to init order too!\n")); + rc = VERR_PDM_NO_DMAC_INSTANCE; + } + LogFlow(("pdmR3DevHlp_DMASetDREQ: caller='%s'/%d: returns %Rrc\n", + pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + +/** @interface_method_impl{PDMDEVHLPR3,pfnDMAGetChannelMode} */ +static DECLCALLBACK(uint8_t) pdmR3DevHlp_DMAGetChannelMode(PPDMDEVINS pDevIns, unsigned uChannel) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DevHlp_DMAGetChannelMode: caller='%s'/%d: uChannel=%d\n", + pDevIns->pReg->szName, pDevIns->iInstance, uChannel)); + uint8_t u8Mode; + if (pVM->pdm.s.pDmac) + u8Mode = pVM->pdm.s.pDmac->Reg.pfnGetChannelMode(pVM->pdm.s.pDmac->pDevIns, uChannel); + else + { + AssertMsgFailed(("Configuration error: No DMAC controller available. This could be related to init order too!\n")); + u8Mode = 3 << 2 /* illegal mode type */; + } + LogFlow(("pdmR3DevHlp_DMAGetChannelMode: caller='%s'/%d: returns %#04x\n", + pDevIns->pReg->szName, pDevIns->iInstance, u8Mode)); + return u8Mode; +} + +/** @interface_method_impl{PDMDEVHLPR3,pfnDMASchedule} */ +static DECLCALLBACK(void) pdmR3DevHlp_DMASchedule(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DevHlp_DMASchedule: caller='%s'/%d: VM_FF_PDM_DMA %d -> 1\n", + pDevIns->pReg->szName, pDevIns->iInstance, VM_FF_IS_SET(pVM, VM_FF_PDM_DMA))); + + AssertMsg(pVM->pdm.s.pDmac, ("Configuration error: No DMAC controller available. This could be related to init order too!\n")); + VM_FF_SET(pVM, VM_FF_PDM_DMA); +#ifdef VBOX_WITH_REM + REMR3NotifyDmaPending(pVM); +#endif + VMR3NotifyGlobalFFU(pVM->pUVM, VMNOTIFYFF_FLAGS_DONE_REM); +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnCMOSWrite} */ +static DECLCALLBACK(int) pdmR3DevHlp_CMOSWrite(PPDMDEVINS pDevIns, unsigned iReg, uint8_t u8Value) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + + LogFlow(("pdmR3DevHlp_CMOSWrite: caller='%s'/%d: iReg=%#04x u8Value=%#04x\n", + pDevIns->pReg->szName, pDevIns->iInstance, iReg, u8Value)); + int rc; + if (pVM->pdm.s.pRtc) + { + PPDMDEVINS pDevInsRtc = pVM->pdm.s.pRtc->pDevIns; + rc = PDMCritSectEnter(pDevInsRtc->pCritSectRoR3, VERR_IGNORED); + if (RT_SUCCESS(rc)) + { + rc = pVM->pdm.s.pRtc->Reg.pfnWrite(pDevInsRtc, iReg, u8Value); + PDMCritSectLeave(pDevInsRtc->pCritSectRoR3); + } + } + else + rc = VERR_PDM_NO_RTC_INSTANCE; + + LogFlow(("pdmR3DevHlp_CMOSWrite: caller='%s'/%d: return %Rrc\n", + pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnCMOSRead} */ +static DECLCALLBACK(int) pdmR3DevHlp_CMOSRead(PPDMDEVINS pDevIns, unsigned iReg, uint8_t *pu8Value) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + + LogFlow(("pdmR3DevHlp_CMOSWrite: caller='%s'/%d: iReg=%#04x pu8Value=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, iReg, pu8Value)); + int rc; + if (pVM->pdm.s.pRtc) + { + PPDMDEVINS pDevInsRtc = pVM->pdm.s.pRtc->pDevIns; + rc = PDMCritSectEnter(pDevInsRtc->pCritSectRoR3, VERR_IGNORED); + if (RT_SUCCESS(rc)) + { + rc = pVM->pdm.s.pRtc->Reg.pfnRead(pDevInsRtc, iReg, pu8Value); + PDMCritSectLeave(pDevInsRtc->pCritSectRoR3); + } + } + else + rc = VERR_PDM_NO_RTC_INSTANCE; + + LogFlow(("pdmR3DevHlp_CMOSWrite: caller='%s'/%d: return %Rrc\n", + pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnAssertEMT} */ +static DECLCALLBACK(bool) pdmR3DevHlp_AssertEMT(PPDMDEVINS pDevIns, const char *pszFile, unsigned iLine, const char *pszFunction) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + if (VM_IS_EMT(pDevIns->Internal.s.pVMR3)) + return true; + + char szMsg[100]; + RTStrPrintf(szMsg, sizeof(szMsg), "AssertEMT '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance); + RTAssertMsg1Weak(szMsg, iLine, pszFile, pszFunction); + AssertBreakpoint(); + return false; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnAssertOther} */ +static DECLCALLBACK(bool) pdmR3DevHlp_AssertOther(PPDMDEVINS pDevIns, const char *pszFile, unsigned iLine, const char *pszFunction) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + if (!VM_IS_EMT(pDevIns->Internal.s.pVMR3)) + return true; + + char szMsg[100]; + RTStrPrintf(szMsg, sizeof(szMsg), "AssertOther '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance); + RTAssertMsg1Weak(szMsg, iLine, pszFile, pszFunction); + AssertBreakpoint(); + return false; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnLdrGetRCInterfaceSymbols} */ +static DECLCALLBACK(int) pdmR3DevHlp_LdrGetRCInterfaceSymbols(PPDMDEVINS pDevIns, void *pvInterface, size_t cbInterface, + const char *pszSymPrefix, const char *pszSymList) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + LogFlow(("pdmR3DevHlp_PDMLdrGetRCInterfaceSymbols: caller='%s'/%d: pvInterface=%p cbInterface=%zu pszSymPrefix=%p:{%s} pszSymList=%p:{%s}\n", + pDevIns->pReg->szName, pDevIns->iInstance, pvInterface, cbInterface, pszSymPrefix, pszSymPrefix, pszSymList, pszSymList)); + + int rc; + if ( strncmp(pszSymPrefix, "dev", 3) == 0 + && RTStrIStr(pszSymPrefix + 3, pDevIns->pReg->szName) != NULL) + { + if (pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_RC) + rc = PDMR3LdrGetInterfaceSymbols(pDevIns->Internal.s.pVMR3, + pvInterface, cbInterface, + pDevIns->pReg->szRCMod, pDevIns->Internal.s.pDevR3->pszRCSearchPath, + pszSymPrefix, pszSymList, + false /*fRing0OrRC*/); + else + { + AssertMsgFailed(("Not a raw-mode enabled driver\n")); + rc = VERR_PERMISSION_DENIED; + } + } + else + { + AssertMsgFailed(("Invalid prefix '%s' for '%s'; must start with 'dev' and contain the driver name!\n", + pszSymPrefix, pDevIns->pReg->szName)); + rc = VERR_INVALID_NAME; + } + + LogFlow(("pdmR3DevHlp_PDMLdrGetRCInterfaceSymbols: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, + pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnLdrGetR0InterfaceSymbols} */ +static DECLCALLBACK(int) pdmR3DevHlp_LdrGetR0InterfaceSymbols(PPDMDEVINS pDevIns, void *pvInterface, size_t cbInterface, + const char *pszSymPrefix, const char *pszSymList) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + LogFlow(("pdmR3DevHlp_PDMLdrGetR0InterfaceSymbols: caller='%s'/%d: pvInterface=%p cbInterface=%zu pszSymPrefix=%p:{%s} pszSymList=%p:{%s}\n", + pDevIns->pReg->szName, pDevIns->iInstance, pvInterface, cbInterface, pszSymPrefix, pszSymPrefix, pszSymList, pszSymList)); + + int rc; + if ( strncmp(pszSymPrefix, "dev", 3) == 0 + && RTStrIStr(pszSymPrefix + 3, pDevIns->pReg->szName) != NULL) + { + if (pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_R0) + rc = PDMR3LdrGetInterfaceSymbols(pDevIns->Internal.s.pVMR3, + pvInterface, cbInterface, + pDevIns->pReg->szR0Mod, pDevIns->Internal.s.pDevR3->pszR0SearchPath, + pszSymPrefix, pszSymList, + true /*fRing0OrRC*/); + else + { + AssertMsgFailed(("Not a ring-0 enabled driver\n")); + rc = VERR_PERMISSION_DENIED; + } + } + else + { + AssertMsgFailed(("Invalid prefix '%s' for '%s'; must start with 'dev' and contain the driver name!\n", + pszSymPrefix, pDevIns->pReg->szName)); + rc = VERR_INVALID_NAME; + } + + LogFlow(("pdmR3DevHlp_PDMLdrGetR0InterfaceSymbols: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, + pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnCallR0} */ +static DECLCALLBACK(int) pdmR3DevHlp_CallR0(PPDMDEVINS pDevIns, uint32_t uOperation, uint64_t u64Arg) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DevHlp_CallR0: caller='%s'/%d: uOperation=%#x u64Arg=%#RX64\n", + pDevIns->pReg->szName, pDevIns->iInstance, uOperation, u64Arg)); + + /* + * Resolve the ring-0 entry point. There is not need to remember this like + * we do for drivers since this is mainly for construction time hacks and + * other things that aren't performance critical. + */ + int rc; + if (pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_R0) + { + char szSymbol[ sizeof("devR0") + sizeof(pDevIns->pReg->szName) + sizeof("ReqHandler")]; + strcat(strcat(strcpy(szSymbol, "devR0"), pDevIns->pReg->szName), "ReqHandler"); + szSymbol[sizeof("devR0") - 1] = RT_C_TO_UPPER(szSymbol[sizeof("devR0") - 1]); + + PFNPDMDRVREQHANDLERR0 pfnReqHandlerR0; + rc = pdmR3DevGetSymbolR0Lazy(pDevIns, szSymbol, &pfnReqHandlerR0); + if (RT_SUCCESS(rc)) + { + /* + * Make the ring-0 call. + */ + PDMDEVICECALLREQHANDLERREQ Req; + Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + Req.Hdr.cbReq = sizeof(Req); + Req.pDevInsR0 = PDMDEVINS_2_R0PTR(pDevIns); + Req.pfnReqHandlerR0 = pfnReqHandlerR0; + Req.uOperation = uOperation; + Req.u32Alignment = 0; + Req.u64Arg = u64Arg; + rc = SUPR3CallVMMR0Ex(pVM->pVMR0, NIL_VMCPUID, VMMR0_DO_PDM_DEVICE_CALL_REQ_HANDLER, 0, &Req.Hdr); + } + else + pfnReqHandlerR0 = NIL_RTR0PTR; + } + else + rc = VERR_ACCESS_DENIED; + LogFlow(("pdmR3DevHlp_CallR0: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, + pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnVMGetSuspendReason} */ +static DECLCALLBACK(VMSUSPENDREASON) pdmR3DevHlp_VMGetSuspendReason(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + VMSUSPENDREASON enmReason = VMR3GetSuspendReason(pVM->pUVM); + LogFlow(("pdmR3DevHlp_VMGetSuspendReason: caller='%s'/%d: returns %d\n", + pDevIns->pReg->szName, pDevIns->iInstance, enmReason)); + return enmReason; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnVMGetResumeReason} */ +static DECLCALLBACK(VMRESUMEREASON) pdmR3DevHlp_VMGetResumeReason(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + VMRESUMEREASON enmReason = VMR3GetResumeReason(pVM->pUVM); + LogFlow(("pdmR3DevHlp_VMGetResumeReason: caller='%s'/%d: returns %d\n", + pDevIns->pReg->szName, pDevIns->iInstance, enmReason)); + return enmReason; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnGetUVM} */ +static DECLCALLBACK(PUVM) pdmR3DevHlp_GetUVM(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR3DevHlp_GetUVM: caller='%s'/%d: returns %p\n", pDevIns->pReg->szName, pDevIns->iInstance, pDevIns->Internal.s.pVMR3)); + return pDevIns->Internal.s.pVMR3->pUVM; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnGetVM} */ +static DECLCALLBACK(PVM) pdmR3DevHlp_GetVM(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR3DevHlp_GetVM: caller='%s'/%d: returns %p\n", pDevIns->pReg->szName, pDevIns->iInstance, pDevIns->Internal.s.pVMR3)); + return pDevIns->Internal.s.pVMR3; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnGetVMCPU} */ +static DECLCALLBACK(PVMCPU) pdmR3DevHlp_GetVMCPU(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + LogFlow(("pdmR3DevHlp_GetVMCPU: caller='%s'/%d for CPU %u\n", pDevIns->pReg->szName, pDevIns->iInstance, VMMGetCpuId(pDevIns->Internal.s.pVMR3))); + return VMMGetCpu(pDevIns->Internal.s.pVMR3); +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnGetCurrentCpuId} */ +static DECLCALLBACK(VMCPUID) pdmR3DevHlp_GetCurrentCpuId(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VMCPUID idCpu = VMMGetCpuId(pDevIns->Internal.s.pVMR3); + LogFlow(("pdmR3DevHlp_GetCurrentCpuId: caller='%s'/%d for CPU %u\n", pDevIns->pReg->szName, pDevIns->iInstance, idCpu)); + return idCpu; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnPCIBusRegister} */ +static DECLCALLBACK(int) pdmR3DevHlp_PCIBusRegister(PPDMDEVINS pDevIns, PPDMPCIBUSREG pPciBusReg, + PCPDMPCIHLPR3 *ppPciHlpR3, uint32_t *piBus) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DevHlp_PCIBusRegister: caller='%s'/%d: pPciBusReg=%p:{.u32Version=%#x, .pfnRegisterR3=%p, .pfnIORegionRegisterR3=%p, " + ".pfnSetIrqR3=%p, .pszSetIrqRC=%p:{%s}, .pszSetIrqR0=%p:{%s}} ppPciHlpR3=%p piBus=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, pPciBusReg, pPciBusReg->u32Version, pPciBusReg->pfnRegisterR3, + pPciBusReg->pfnIORegionRegisterR3, pPciBusReg->pfnSetIrqR3, pPciBusReg->pszSetIrqRC, pPciBusReg->pszSetIrqRC, + pPciBusReg->pszSetIrqR0, pPciBusReg->pszSetIrqR0, ppPciHlpR3, piBus)); + + /* + * Validate the structure. + */ + if (pPciBusReg->u32Version != PDM_PCIBUSREG_VERSION) + { + AssertMsgFailed(("u32Version=%#x expected %#x\n", pPciBusReg->u32Version, PDM_PCIBUSREG_VERSION)); + LogFlow(("pdmR3DevHlp_PCIRegister: caller='%s'/%d: returns %Rrc (version)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + if ( !pPciBusReg->pfnRegisterR3 + || !pPciBusReg->pfnIORegionRegisterR3 + || !pPciBusReg->pfnSetIrqR3) + { + Assert(pPciBusReg->pfnRegisterR3); + Assert(pPciBusReg->pfnIORegionRegisterR3); + Assert(pPciBusReg->pfnSetIrqR3); + LogFlow(("pdmR3DevHlp_PCIBusRegister: caller='%s'/%d: returns %Rrc (R3 callbacks)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + if ( pPciBusReg->pszSetIrqRC + && !VALID_PTR(pPciBusReg->pszSetIrqRC)) + { + Assert(VALID_PTR(pPciBusReg->pszSetIrqRC)); + LogFlow(("pdmR3DevHlp_PCIBusRegister: caller='%s'/%d: returns %Rrc (GC callbacks)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + if ( pPciBusReg->pszSetIrqR0 + && !VALID_PTR(pPciBusReg->pszSetIrqR0)) + { + Assert(VALID_PTR(pPciBusReg->pszSetIrqR0)); + LogFlow(("pdmR3DevHlp_PCIBusRegister: caller='%s'/%d: returns %Rrc (GC callbacks)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + if (!ppPciHlpR3) + { + Assert(ppPciHlpR3); + LogFlow(("pdmR3DevHlp_PCIBusRegister: caller='%s'/%d: returns %Rrc (ppPciHlpR3)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + AssertLogRelMsgReturn(RT_VALID_PTR(piBus) || !piBus, + ("caller='%s'/%d: piBus=%p\n", pDevIns->pReg->szName, pDevIns->iInstance, piBus), + VERR_INVALID_POINTER); + + /* + * Find free PCI bus entry. + */ + unsigned iBus = 0; + for (iBus = 0; iBus < RT_ELEMENTS(pVM->pdm.s.aPciBuses); iBus++) + if (!pVM->pdm.s.aPciBuses[iBus].pDevInsR3) + break; + if (iBus >= RT_ELEMENTS(pVM->pdm.s.aPciBuses)) + { + AssertMsgFailed(("Too many PCI buses. Max=%u\n", RT_ELEMENTS(pVM->pdm.s.aPciBuses))); + LogFlow(("pdmR3DevHlp_PCIBusRegister: caller='%s'/%d: returns %Rrc (pci bus)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + PPDMPCIBUS pPciBus = &pVM->pdm.s.aPciBuses[iBus]; + + /* + * Resolve and init the RC bits. + */ + if (pPciBusReg->pszSetIrqRC) + { + int rc = pdmR3DevGetSymbolRCLazy(pDevIns, pPciBusReg->pszSetIrqRC, &pPciBus->pfnSetIrqRC); + AssertMsgRC(rc, ("%s::%s rc=%Rrc\n", pDevIns->pReg->szRCMod, pPciBusReg->pszSetIrqRC, rc)); + if (RT_FAILURE(rc)) + { + LogFlow(("pdmR3DevHlp_PCIRegister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; + } + pPciBus->pDevInsRC = PDMDEVINS_2_RCPTR(pDevIns); + } + else + { + pPciBus->pfnSetIrqRC = 0; + pPciBus->pDevInsRC = 0; + } + + /* + * Resolve and init the R0 bits. + */ + if (pPciBusReg->pszSetIrqR0) + { + int rc = pdmR3DevGetSymbolR0Lazy(pDevIns, pPciBusReg->pszSetIrqR0, &pPciBus->pfnSetIrqR0); + AssertMsgRC(rc, ("%s::%s rc=%Rrc\n", pDevIns->pReg->szR0Mod, pPciBusReg->pszSetIrqR0, rc)); + if (RT_FAILURE(rc)) + { + LogFlow(("pdmR3DevHlp_PCIRegister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; + } + pPciBus->pDevInsR0 = PDMDEVINS_2_R0PTR(pDevIns); + } + else + { + pPciBus->pfnSetIrqR0 = 0; + pPciBus->pDevInsR0 = 0; + } + + /* + * Init the R3 bits. + */ + pPciBus->iBus = iBus; + pPciBus->pDevInsR3 = pDevIns; + pPciBus->pfnRegisterR3 = pPciBusReg->pfnRegisterR3; + pPciBus->pfnRegisterMsiR3 = pPciBusReg->pfnRegisterMsiR3; + pPciBus->pfnIORegionRegisterR3 = pPciBusReg->pfnIORegionRegisterR3; + pPciBus->pfnSetConfigCallbacksR3 = pPciBusReg->pfnSetConfigCallbacksR3; + pPciBus->pfnSetIrqR3 = pPciBusReg->pfnSetIrqR3; + + Log(("PDM: Registered PCI bus device '%s'/%d pDevIns=%p\n", pDevIns->pReg->szName, pDevIns->iInstance, pDevIns)); + + /* set the helper pointer and return. */ + *ppPciHlpR3 = &g_pdmR3DevPciHlp; + if (piBus) + *piBus = iBus; + LogFlow(("pdmR3DevHlp_PCIBusRegister: caller='%s'/%d: returns %Rrc *piBus=%u\n", pDevIns->pReg->szName, pDevIns->iInstance, VINF_SUCCESS, iBus)); + return VINF_SUCCESS; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnPICRegister} */ +static DECLCALLBACK(int) pdmR3DevHlp_PICRegister(PPDMDEVINS pDevIns, PPDMPICREG pPicReg, PCPDMPICHLPR3 *ppPicHlpR3) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + LogFlow(("pdmR3DevHlp_PICRegister: caller='%s'/%d: pPicReg=%p:{.u32Version=%#x, .pfnSetIrqR3=%p, .pfnGetInterruptR3=%p, .pszGetIrqRC=%p:{%s}, .pszGetInterruptRC=%p:{%s}, .pszGetIrqR0=%p:{%s}, .pszGetInterruptR0=%p:{%s} } ppPicHlpR3=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, pPicReg, pPicReg->u32Version, pPicReg->pfnSetIrqR3, pPicReg->pfnGetInterruptR3, + pPicReg->pszSetIrqRC, pPicReg->pszSetIrqRC, pPicReg->pszGetInterruptRC, pPicReg->pszGetInterruptRC, + pPicReg->pszSetIrqR0, pPicReg->pszSetIrqR0, pPicReg->pszGetInterruptR0, pPicReg->pszGetInterruptR0, + ppPicHlpR3)); + + /* + * Validate input. + */ + if (pPicReg->u32Version != PDM_PICREG_VERSION) + { + AssertMsgFailed(("u32Version=%#x expected %#x\n", pPicReg->u32Version, PDM_PICREG_VERSION)); + LogFlow(("pdmR3DevHlp_PICRegister: caller='%s'/%d: returns %Rrc (version)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + if ( !pPicReg->pfnSetIrqR3 + || !pPicReg->pfnGetInterruptR3) + { + Assert(pPicReg->pfnSetIrqR3); + Assert(pPicReg->pfnGetInterruptR3); + LogFlow(("pdmR3DevHlp_PICRegister: caller='%s'/%d: returns %Rrc (R3 callbacks)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + if ( ( pPicReg->pszSetIrqRC + || pPicReg->pszGetInterruptRC) + && ( !VALID_PTR(pPicReg->pszSetIrqRC) + || !VALID_PTR(pPicReg->pszGetInterruptRC)) + ) + { + Assert(VALID_PTR(pPicReg->pszSetIrqRC)); + Assert(VALID_PTR(pPicReg->pszGetInterruptRC)); + LogFlow(("pdmR3DevHlp_PICRegister: caller='%s'/%d: returns %Rrc (RC callbacks)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + if ( pPicReg->pszSetIrqRC + && !(pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_RC)) + { + Assert(pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_RC); + LogFlow(("pdmR3DevHlp_PICRegister: caller='%s'/%d: returns %Rrc (RC flag)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + if ( pPicReg->pszSetIrqR0 + && !(pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_R0)) + { + Assert(pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_R0); + LogFlow(("pdmR3DevHlp_PICRegister: caller='%s'/%d: returns %Rrc (R0 flag)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + if (!ppPicHlpR3) + { + Assert(ppPicHlpR3); + LogFlow(("pdmR3DevHlp_PICRegister: caller='%s'/%d: returns %Rrc (ppPicHlpR3)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + + /* + * Only one PIC device. + */ + PVM pVM = pDevIns->Internal.s.pVMR3; + if (pVM->pdm.s.Pic.pDevInsR3) + { + AssertMsgFailed(("Only one pic device is supported!\n")); + LogFlow(("pdmR3DevHlp_PICRegister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + + /* + * RC stuff. + */ + if (pPicReg->pszSetIrqRC) + { + int rc = pdmR3DevGetSymbolRCLazy(pDevIns, pPicReg->pszSetIrqRC, &pVM->pdm.s.Pic.pfnSetIrqRC); + AssertMsgRC(rc, ("%s::%s rc=%Rrc\n", pDevIns->pReg->szRCMod, pPicReg->pszSetIrqRC, rc)); + if (RT_SUCCESS(rc)) + { + rc = pdmR3DevGetSymbolRCLazy(pDevIns, pPicReg->pszGetInterruptRC, &pVM->pdm.s.Pic.pfnGetInterruptRC); + AssertMsgRC(rc, ("%s::%s rc=%Rrc\n", pDevIns->pReg->szRCMod, pPicReg->pszGetInterruptRC, rc)); + } + if (RT_FAILURE(rc)) + { + LogFlow(("pdmR3DevHlp_PICRegister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; + } + pVM->pdm.s.Pic.pDevInsRC = PDMDEVINS_2_RCPTR(pDevIns); + } + else + { + pVM->pdm.s.Pic.pDevInsRC = 0; + pVM->pdm.s.Pic.pfnSetIrqRC = 0; + pVM->pdm.s.Pic.pfnGetInterruptRC = 0; + } + + /* + * R0 stuff. + */ + if (pPicReg->pszSetIrqR0) + { + int rc = pdmR3DevGetSymbolR0Lazy(pDevIns, pPicReg->pszSetIrqR0, &pVM->pdm.s.Pic.pfnSetIrqR0); + AssertMsgRC(rc, ("%s::%s rc=%Rrc\n", pDevIns->pReg->szR0Mod, pPicReg->pszSetIrqR0, rc)); + if (RT_SUCCESS(rc)) + { + rc = pdmR3DevGetSymbolR0Lazy(pDevIns, pPicReg->pszGetInterruptR0, &pVM->pdm.s.Pic.pfnGetInterruptR0); + AssertMsgRC(rc, ("%s::%s rc=%Rrc\n", pDevIns->pReg->szR0Mod, pPicReg->pszGetInterruptR0, rc)); + } + if (RT_FAILURE(rc)) + { + LogFlow(("pdmR3DevHlp_PICRegister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; + } + pVM->pdm.s.Pic.pDevInsR0 = PDMDEVINS_2_R0PTR(pDevIns); + Assert(pVM->pdm.s.Pic.pDevInsR0); + } + else + { + pVM->pdm.s.Pic.pfnSetIrqR0 = 0; + pVM->pdm.s.Pic.pfnGetInterruptR0 = 0; + pVM->pdm.s.Pic.pDevInsR0 = 0; + } + + /* + * R3 stuff. + */ + pVM->pdm.s.Pic.pDevInsR3 = pDevIns; + pVM->pdm.s.Pic.pfnSetIrqR3 = pPicReg->pfnSetIrqR3; + pVM->pdm.s.Pic.pfnGetInterruptR3 = pPicReg->pfnGetInterruptR3; + Log(("PDM: Registered PIC device '%s'/%d pDevIns=%p\n", pDevIns->pReg->szName, pDevIns->iInstance, pDevIns)); + + /* set the helper pointer and return. */ + *ppPicHlpR3 = &g_pdmR3DevPicHlp; + LogFlow(("pdmR3DevHlp_PICRegister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, VINF_SUCCESS)); + return VINF_SUCCESS; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnAPICRegister} */ +static DECLCALLBACK(int) pdmR3DevHlp_APICRegister(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + + /* + * Only one APIC device. On SMP we have single logical device covering all LAPICs, + * as they need to communicate and share state easily. + */ + PVM pVM = pDevIns->Internal.s.pVMR3; + if (pVM->pdm.s.Apic.pDevInsR3) + { + AssertMsgFailed(("Only one APIC device is supported!\n")); + LogFlow(("pdmR3DevHlp_APICRegister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + + /* + * Initialize the RC, R0 and HC bits. + */ + pVM->pdm.s.Apic.pDevInsRC = PDMDEVINS_2_RCPTR(pDevIns); + Assert(pVM->pdm.s.Apic.pDevInsRC); + + pVM->pdm.s.Apic.pDevInsR0 = PDMDEVINS_2_R0PTR(pDevIns); + Assert(pVM->pdm.s.Apic.pDevInsR0); + + pVM->pdm.s.Apic.pDevInsR3 = pDevIns; + LogFlow(("pdmR3DevHlp_APICRegister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, VINF_SUCCESS)); + return VINF_SUCCESS; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnIOAPICRegister} */ +static DECLCALLBACK(int) pdmR3DevHlp_IOAPICRegister(PPDMDEVINS pDevIns, PPDMIOAPICREG pIoApicReg, PCPDMIOAPICHLPR3 *ppIoApicHlpR3) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + LogFlow(("pdmR3DevHlp_IOAPICRegister: caller='%s'/%d: pIoApicReg=%p:{.u32Version=%#x, .pfnSetIrqR3=%p, .pszSetIrqRC=%p:{%s}, .pszSetIrqR0=%p:{%s}} ppIoApicHlpR3=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, pIoApicReg, pIoApicReg->u32Version, pIoApicReg->pfnSetIrqR3, + pIoApicReg->pszSetIrqRC, pIoApicReg->pszSetIrqRC, pIoApicReg->pszSetIrqR0, pIoApicReg->pszSetIrqR0, ppIoApicHlpR3)); + + /* + * Validate input. + */ + if (pIoApicReg->u32Version != PDM_IOAPICREG_VERSION) + { + AssertMsgFailed(("u32Version=%#x expected %#x\n", pIoApicReg->u32Version, PDM_IOAPICREG_VERSION)); + LogFlow(("pdmR3DevHlp_IOAPICRegister: caller='%s'/%d: returns %Rrc (version)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + if (!pIoApicReg->pfnSetIrqR3 || !pIoApicReg->pfnSendMsiR3 || !pIoApicReg->pfnSetEoiR3) + { + Assert(pIoApicReg->pfnSetIrqR3); + LogFlow(("pdmR3DevHlp_IOAPICRegister: caller='%s'/%d: returns %Rrc (R3 callbacks)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + if ( pIoApicReg->pszSetIrqRC + && !VALID_PTR(pIoApicReg->pszSetIrqRC)) + { + Assert(VALID_PTR(pIoApicReg->pszSetIrqRC)); + LogFlow(("pdmR3DevHlp_IOAPICRegister: caller='%s'/%d: returns %Rrc (GC callbacks)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + if ( pIoApicReg->pszSendMsiRC + && !VALID_PTR(pIoApicReg->pszSendMsiRC)) + { + Assert(VALID_PTR(pIoApicReg->pszSendMsiRC)); + LogFlow(("pdmR3DevHlp_IOAPICRegister: caller='%s'/%d: returns %Rrc (GC callbacks)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + if ( pIoApicReg->pszSetEoiRC + && !VALID_PTR(pIoApicReg->pszSetEoiRC)) + { + Assert(VALID_PTR(pIoApicReg->pszSetEoiRC)); + LogFlow(("pdmR3DevHlp_IOAPICRegister: caller='%s'/%d: returns %Rrc (GC callbacks)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + if ( pIoApicReg->pszSetIrqR0 + && !VALID_PTR(pIoApicReg->pszSetIrqR0)) + { + Assert(VALID_PTR(pIoApicReg->pszSetIrqR0)); + LogFlow(("pdmR3DevHlp_IOAPICRegister: caller='%s'/%d: returns %Rrc (GC callbacks)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + if ( pIoApicReg->pszSendMsiR0 + && !VALID_PTR(pIoApicReg->pszSendMsiR0)) + { + Assert(VALID_PTR(pIoApicReg->pszSendMsiR0)); + LogFlow(("pdmR3DevHlp_IOAPICRegister: caller='%s'/%d: returns %Rrc (GC callbacks)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + if ( pIoApicReg->pszSetEoiR0 + && !VALID_PTR(pIoApicReg->pszSetEoiR0)) + { + Assert(VALID_PTR(pIoApicReg->pszSetEoiR0)); + LogFlow(("pdmR3DevHlp_IOAPICRegister: caller='%s'/%d: returns %Rrc (GC callbacks)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + if (!ppIoApicHlpR3) + { + Assert(ppIoApicHlpR3); + LogFlow(("pdmR3DevHlp_IOAPICRegister: caller='%s'/%d: returns %Rrc (ppApicHlp)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + + /* + * The I/O APIC requires the APIC to be present (hacks++). + * If the I/O APIC does GC stuff so must the APIC. + */ + PVM pVM = pDevIns->Internal.s.pVMR3; + if (!pVM->pdm.s.Apic.pDevInsR3) + { + AssertMsgFailed(("Configuration error / Init order error! No APIC!\n")); + LogFlow(("pdmR3DevHlp_IOAPICRegister: caller='%s'/%d: returns %Rrc (no APIC)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + if ( pIoApicReg->pszSetIrqRC + && !pVM->pdm.s.Apic.pDevInsRC) + { + AssertMsgFailed(("Configuration error! APIC doesn't do GC, I/O APIC does!\n")); + LogFlow(("pdmR3DevHlp_IOAPICRegister: caller='%s'/%d: returns %Rrc (no GC APIC)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + + /* + * Only one I/O APIC device. + */ + if (pVM->pdm.s.IoApic.pDevInsR3) + { + AssertMsgFailed(("Only one ioapic device is supported!\n")); + LogFlow(("pdmR3DevHlp_IOAPICRegister: caller='%s'/%d: returns %Rrc (only one)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + + /* + * Resolve & initialize the GC bits. + */ + if (pIoApicReg->pszSetIrqRC) + { + int rc = pdmR3DevGetSymbolRCLazy(pDevIns, pIoApicReg->pszSetIrqRC, &pVM->pdm.s.IoApic.pfnSetIrqRC); + AssertMsgRC(rc, ("%s::%s rc=%Rrc\n", pDevIns->pReg->szRCMod, pIoApicReg->pszSetIrqRC, rc)); + if (RT_FAILURE(rc)) + { + LogFlow(("pdmR3DevHlp_IOAPICRegister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; + } + pVM->pdm.s.IoApic.pDevInsRC = PDMDEVINS_2_RCPTR(pDevIns); + } + else + { + pVM->pdm.s.IoApic.pDevInsRC = 0; + pVM->pdm.s.IoApic.pfnSetIrqRC = 0; + } + + if (pIoApicReg->pszSendMsiRC) + { + int rc = pdmR3DevGetSymbolRCLazy(pDevIns, pIoApicReg->pszSendMsiRC, &pVM->pdm.s.IoApic.pfnSendMsiRC); + AssertMsgRC(rc, ("%s::%s rc=%Rrc\n", pDevIns->pReg->szRCMod, pIoApicReg->pszSendMsiRC, rc)); + if (RT_FAILURE(rc)) + { + LogFlow(("pdmR3DevHlp_IOAPICRegister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; + } + } + else + { + pVM->pdm.s.IoApic.pfnSendMsiRC = 0; + } + + if (pIoApicReg->pszSetEoiRC) + { + int rc = pdmR3DevGetSymbolRCLazy(pDevIns, pIoApicReg->pszSetEoiRC, &pVM->pdm.s.IoApic.pfnSetEoiRC); + AssertMsgRC(rc, ("%s::%s rc=%Rrc\n", pDevIns->pReg->szRCMod, pIoApicReg->pszSetEoiRC, rc)); + if (RT_FAILURE(rc)) + { + LogFlow(("pdmR3DevHlp_IOAPICRegister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; + } + } + else + { + pVM->pdm.s.IoApic.pfnSetEoiRC = 0; + } + + /* + * Resolve & initialize the R0 bits. + */ + if (pIoApicReg->pszSetIrqR0) + { + int rc = pdmR3DevGetSymbolR0Lazy(pDevIns, pIoApicReg->pszSetIrqR0, &pVM->pdm.s.IoApic.pfnSetIrqR0); + AssertMsgRC(rc, ("%s::%s rc=%Rrc\n", pDevIns->pReg->szR0Mod, pIoApicReg->pszSetIrqR0, rc)); + if (RT_FAILURE(rc)) + { + LogFlow(("pdmR3DevHlp_IOAPICRegister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; + } + pVM->pdm.s.IoApic.pDevInsR0 = PDMDEVINS_2_R0PTR(pDevIns); + Assert(pVM->pdm.s.IoApic.pDevInsR0); + } + else + { + pVM->pdm.s.IoApic.pfnSetIrqR0 = 0; + pVM->pdm.s.IoApic.pDevInsR0 = 0; + } + + if (pIoApicReg->pszSendMsiR0) + { + int rc = pdmR3DevGetSymbolR0Lazy(pDevIns, pIoApicReg->pszSendMsiR0, &pVM->pdm.s.IoApic.pfnSendMsiR0); + AssertMsgRC(rc, ("%s::%s rc=%Rrc\n", pDevIns->pReg->szR0Mod, pIoApicReg->pszSendMsiR0, rc)); + if (RT_FAILURE(rc)) + { + LogFlow(("pdmR3DevHlp_IOAPICRegister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; + } + } + else + { + pVM->pdm.s.IoApic.pfnSendMsiR0 = 0; + } + + if (pIoApicReg->pszSetEoiR0) + { + int rc = pdmR3DevGetSymbolR0Lazy(pDevIns, pIoApicReg->pszSetEoiR0, &pVM->pdm.s.IoApic.pfnSetEoiR0); + AssertMsgRC(rc, ("%s::%s rc=%Rrc\n", pDevIns->pReg->szR0Mod, pIoApicReg->pszSetEoiR0, rc)); + if (RT_FAILURE(rc)) + { + LogFlow(("pdmR3DevHlp_IOAPICRegister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; + } + } + else + { + pVM->pdm.s.IoApic.pfnSetEoiR0 = 0; + } + + + /* + * Initialize the R3 bits. + */ + pVM->pdm.s.IoApic.pDevInsR3 = pDevIns; + pVM->pdm.s.IoApic.pfnSetIrqR3 = pIoApicReg->pfnSetIrqR3; + pVM->pdm.s.IoApic.pfnSendMsiR3 = pIoApicReg->pfnSendMsiR3; + pVM->pdm.s.IoApic.pfnSetEoiR3 = pIoApicReg->pfnSetEoiR3; + Log(("PDM: Registered I/O APIC device '%s'/%d pDevIns=%p\n", pDevIns->pReg->szName, pDevIns->iInstance, pDevIns)); + + /* set the helper pointer and return. */ + *ppIoApicHlpR3 = &g_pdmR3DevIoApicHlp; + LogFlow(("pdmR3DevHlp_IOAPICRegister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, VINF_SUCCESS)); + return VINF_SUCCESS; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnHPETRegister} */ +static DECLCALLBACK(int) pdmR3DevHlp_HPETRegister(PPDMDEVINS pDevIns, PPDMHPETREG pHpetReg, PCPDMHPETHLPR3 *ppHpetHlpR3) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); RT_NOREF_PV(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + LogFlow(("pdmR3DevHlp_HPETRegister: caller='%s'/%d:\n", pDevIns->pReg->szName, pDevIns->iInstance)); + + /* + * Validate input. + */ + if (pHpetReg->u32Version != PDM_HPETREG_VERSION) + { + AssertMsgFailed(("u32Version=%#x expected %#x\n", pHpetReg->u32Version, PDM_HPETREG_VERSION)); + LogFlow(("pdmR3DevHlp_HPETRegister: caller='%s'/%d: returns %Rrc (version)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + + if (!ppHpetHlpR3) + { + Assert(ppHpetHlpR3); + LogFlow(("pdmR3DevHlp_HPETRegister: caller='%s'/%d: returns %Rrc (ppApicHlpR3)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + + /* set the helper pointer and return. */ + *ppHpetHlpR3 = &g_pdmR3DevHpetHlp; + LogFlow(("pdmR3DevHlp_HPETRegister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, VINF_SUCCESS)); + return VINF_SUCCESS; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnPciRawRegister} */ +static DECLCALLBACK(int) pdmR3DevHlp_PciRawRegister(PPDMDEVINS pDevIns, PPDMPCIRAWREG pPciRawReg, PCPDMPCIRAWHLPR3 *ppPciRawHlpR3) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); RT_NOREF_PV(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + LogFlow(("pdmR3DevHlp_PciRawRegister: caller='%s'/%d:\n", pDevIns->pReg->szName, pDevIns->iInstance)); + + /* + * Validate input. + */ + if (pPciRawReg->u32Version != PDM_PCIRAWREG_VERSION) + { + AssertMsgFailed(("u32Version=%#x expected %#x\n", pPciRawReg->u32Version, PDM_PCIRAWREG_VERSION)); + LogFlow(("pdmR3DevHlp_PciRawRegister: caller='%s'/%d: returns %Rrc (version)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + + if (!ppPciRawHlpR3) + { + Assert(ppPciRawHlpR3); + LogFlow(("pdmR3DevHlp_PciRawRegister: caller='%s'/%d: returns %Rrc (ppPciRawHlpR3)\n", pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + + /* set the helper pointer and return. */ + *ppPciRawHlpR3 = &g_pdmR3DevPciRawHlp; + LogFlow(("pdmR3DevHlp_PciRawRegister: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, VINF_SUCCESS)); + return VINF_SUCCESS; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnDMACRegister} */ +static DECLCALLBACK(int) pdmR3DevHlp_DMACRegister(PPDMDEVINS pDevIns, PPDMDMACREG pDmacReg, PCPDMDMACHLP *ppDmacHlp) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + LogFlow(("pdmR3DevHlp_DMACRegister: caller='%s'/%d: pDmacReg=%p:{.u32Version=%#x, .pfnRun=%p, .pfnRegister=%p, .pfnReadMemory=%p, .pfnWriteMemory=%p, .pfnSetDREQ=%p, .pfnGetChannelMode=%p} ppDmacHlp=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, pDmacReg, pDmacReg->u32Version, pDmacReg->pfnRun, pDmacReg->pfnRegister, + pDmacReg->pfnReadMemory, pDmacReg->pfnWriteMemory, pDmacReg->pfnSetDREQ, pDmacReg->pfnGetChannelMode, ppDmacHlp)); + + /* + * Validate input. + */ + if (pDmacReg->u32Version != PDM_DMACREG_VERSION) + { + AssertMsgFailed(("u32Version=%#x expected %#x\n", pDmacReg->u32Version, + PDM_DMACREG_VERSION)); + LogFlow(("pdmR3DevHlp_DMACRegister: caller='%s'/%d: returns %Rrc (version)\n", + pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + if ( !pDmacReg->pfnRun + || !pDmacReg->pfnRegister + || !pDmacReg->pfnReadMemory + || !pDmacReg->pfnWriteMemory + || !pDmacReg->pfnSetDREQ + || !pDmacReg->pfnGetChannelMode) + { + Assert(pDmacReg->pfnRun); + Assert(pDmacReg->pfnRegister); + Assert(pDmacReg->pfnReadMemory); + Assert(pDmacReg->pfnWriteMemory); + Assert(pDmacReg->pfnSetDREQ); + Assert(pDmacReg->pfnGetChannelMode); + LogFlow(("pdmR3DevHlp_DMACRegister: caller='%s'/%d: returns %Rrc (callbacks)\n", + pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + + if (!ppDmacHlp) + { + Assert(ppDmacHlp); + LogFlow(("pdmR3DevHlp_DMACRegister: caller='%s'/%d: returns %Rrc (ppDmacHlp)\n", + pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + + /* + * Only one DMA device. + */ + PVM pVM = pDevIns->Internal.s.pVMR3; + if (pVM->pdm.s.pDmac) + { + AssertMsgFailed(("Only one DMA device is supported!\n")); + LogFlow(("pdmR3DevHlp_DMACRegister: caller='%s'/%d: returns %Rrc\n", + pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + + /* + * Allocate and initialize pci bus structure. + */ + int rc = VINF_SUCCESS; + PPDMDMAC pDmac = (PPDMDMAC)MMR3HeapAlloc(pDevIns->Internal.s.pVMR3, MM_TAG_PDM_DEVICE, sizeof(*pDmac)); + if (pDmac) + { + pDmac->pDevIns = pDevIns; + pDmac->Reg = *pDmacReg; + pVM->pdm.s.pDmac = pDmac; + + /* set the helper pointer. */ + *ppDmacHlp = &g_pdmR3DevDmacHlp; + Log(("PDM: Registered DMAC device '%s'/%d pDevIns=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, pDevIns)); + } + else + rc = VERR_NO_MEMORY; + + LogFlow(("pdmR3DevHlp_DMACRegister: caller='%s'/%d: returns %Rrc\n", + pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** + * @copydoc PDMDEVHLPR3::pfnRegisterVMMDevHeap + */ +static DECLCALLBACK(int) pdmR3DevHlp_RegisterVMMDevHeap(PPDMDEVINS pDevIns, RTGCPHYS GCPhys, RTR3PTR pvHeap, unsigned cbHeap) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DevHlp_RegisterVMMDevHeap: caller='%s'/%d: GCPhys=%RGp pvHeap=%p cbHeap=%#x\n", + pDevIns->pReg->szName, pDevIns->iInstance, GCPhys, pvHeap, cbHeap)); + + if (pVM->pdm.s.pvVMMDevHeap == NULL) + { + pVM->pdm.s.pvVMMDevHeap = pvHeap; + pVM->pdm.s.GCPhysVMMDevHeap = GCPhys; + pVM->pdm.s.cbVMMDevHeap = cbHeap; + pVM->pdm.s.cbVMMDevHeapLeft = cbHeap; + } + else + { + Assert(pVM->pdm.s.pvVMMDevHeap == pvHeap); + Assert(pVM->pdm.s.cbVMMDevHeap == cbHeap); + Assert(pVM->pdm.s.GCPhysVMMDevHeap != GCPhys || GCPhys == NIL_RTGCPHYS); + if (pVM->pdm.s.GCPhysVMMDevHeap != GCPhys) + { + pVM->pdm.s.GCPhysVMMDevHeap = GCPhys; + if (pVM->pdm.s.pfnVMMDevHeapNotify) + pVM->pdm.s.pfnVMMDevHeapNotify(pVM, pvHeap, GCPhys); + } + } + + LogFlow(("pdmR3DevHlp_RegisterVMMDevHeap: caller='%s'/%d: returns %Rrc\n", + pDevIns->pReg->szName, pDevIns->iInstance, VINF_SUCCESS)); + return VINF_SUCCESS; +} + + +/** + * @interface_method_impl{PDMDEVHLPR3,pfnFirmwareRegister} + */ +static DECLCALLBACK(int) pdmR3DevHlp_FirmwareRegister(PPDMDEVINS pDevIns, PCPDMFWREG pFwReg, PCPDMFWHLPR3 *ppFwHlp) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + LogFlow(("pdmR3DevHlp_FirmwareRegister: caller='%s'/%d: pFWReg=%p:{.u32Version=%#x, .pfnIsHardReset=%p, .u32TheEnd=%#x} ppFwHlp=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, pFwReg, pFwReg->u32Version, pFwReg->pfnIsHardReset, pFwReg->u32TheEnd, ppFwHlp)); + + /* + * Validate input. + */ + if (pFwReg->u32Version != PDM_FWREG_VERSION) + { + AssertMsgFailed(("u32Version=%#x expected %#x\n", pFwReg->u32Version, PDM_FWREG_VERSION)); + LogFlow(("pdmR3DevHlp_FirmwareRegister: caller='%s'/%d: returns %Rrc (version)\n", + pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + if (!pFwReg->pfnIsHardReset) + { + Assert(pFwReg->pfnIsHardReset); + LogFlow(("pdmR3DevHlp_FirmwareRegister: caller='%s'/%d: returns %Rrc (callbacks)\n", + pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + + if (!ppFwHlp) + { + Assert(ppFwHlp); + LogFlow(("pdmR3DevHlp_FirmwareRegister: caller='%s'/%d: returns %Rrc (ppFwHlp)\n", + pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + + /* + * Only one DMA device. + */ + PVM pVM = pDevIns->Internal.s.pVMR3; + if (pVM->pdm.s.pFirmware) + { + AssertMsgFailed(("Only one firmware device is supported!\n")); + LogFlow(("pdmR3DevHlp_FirmwareRegister: caller='%s'/%d: returns %Rrc\n", + pDevIns->pReg->szName, pDevIns->iInstance, VERR_INVALID_PARAMETER)); + return VERR_INVALID_PARAMETER; + } + + /* + * Allocate and initialize pci bus structure. + */ + int rc = VINF_SUCCESS; + PPDMFW pFirmware = (PPDMFW)MMR3HeapAlloc(pDevIns->Internal.s.pVMR3, MM_TAG_PDM_DEVICE, sizeof(*pFirmware)); + if (pFirmware) + { + pFirmware->pDevIns = pDevIns; + pFirmware->Reg = *pFwReg; + pVM->pdm.s.pFirmware = pFirmware; + + /* set the helper pointer. */ + *ppFwHlp = &g_pdmR3DevFirmwareHlp; + Log(("PDM: Registered firmware device '%s'/%d pDevIns=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, pDevIns)); + } + else + rc = VERR_NO_MEMORY; + + LogFlow(("pdmR3DevHlp_FirmwareRegister: caller='%s'/%d: returns %Rrc\n", + pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnVMReset} */ +static DECLCALLBACK(int) pdmR3DevHlp_VMReset(PPDMDEVINS pDevIns, uint32_t fFlags) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DevHlp_VMReset: caller='%s'/%d: fFlags=%#x VM_FF_RESET %d -> 1\n", + pDevIns->pReg->szName, pDevIns->iInstance, fFlags, VM_FF_IS_SET(pVM, VM_FF_RESET))); + + /* + * We postpone this operation because we're likely to be inside a I/O instruction + * and the EIP will be updated when we return. + * We still return VINF_EM_RESET to break out of any execution loops and force FF evaluation. + */ + bool fHaltOnReset; + int rc = CFGMR3QueryBool(CFGMR3GetChild(CFGMR3GetRoot(pVM), "PDM"), "HaltOnReset", &fHaltOnReset); + if (RT_SUCCESS(rc) && fHaltOnReset) + { + Log(("pdmR3DevHlp_VMReset: Halt On Reset!\n")); + rc = VINF_EM_HALT; + } + else + { + pVM->pdm.s.fResetFlags = fFlags; + VM_FF_SET(pVM, VM_FF_RESET); + rc = VINF_EM_RESET; + } + + LogFlow(("pdmR3DevHlp_VMReset: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnVMSuspend} */ +static DECLCALLBACK(int) pdmR3DevHlp_VMSuspend(PPDMDEVINS pDevIns) +{ + int rc; + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DevHlp_VMSuspend: caller='%s'/%d:\n", + pDevIns->pReg->szName, pDevIns->iInstance)); + + /** @todo Always take the SMP path - fewer code paths. */ + if (pVM->cCpus > 1) + { + /* We own the IOM lock here and could cause a deadlock by waiting for a VCPU that is blocking on the IOM lock. */ + rc = VMR3ReqCallNoWait(pVM, VMCPUID_ANY_QUEUE, (PFNRT)VMR3Suspend, 2, pVM->pUVM, VMSUSPENDREASON_VM); + AssertRC(rc); + rc = VINF_EM_SUSPEND; + } + else + rc = VMR3Suspend(pVM->pUVM, VMSUSPENDREASON_VM); + + LogFlow(("pdmR3DevHlp_VMSuspend: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** + * Worker for pdmR3DevHlp_VMSuspendSaveAndPowerOff that is invoked via a queued + * EMT request to avoid deadlocks. + * + * @returns VBox status code fit for scheduling. + * @param pVM The cross context VM structure. + * @param pDevIns The device that triggered this action. + */ +static DECLCALLBACK(int) pdmR3DevHlp_VMSuspendSaveAndPowerOffWorker(PVM pVM, PPDMDEVINS pDevIns) +{ + /* + * Suspend the VM first then do the saving. + */ + int rc = VMR3Suspend(pVM->pUVM, VMSUSPENDREASON_VM); + if (RT_SUCCESS(rc)) + { + PUVM pUVM = pVM->pUVM; + rc = pUVM->pVmm2UserMethods->pfnSaveState(pVM->pUVM->pVmm2UserMethods, pUVM); + + /* + * On success, power off the VM, on failure we'll leave it suspended. + */ + if (RT_SUCCESS(rc)) + { + rc = VMR3PowerOff(pVM->pUVM); + if (RT_FAILURE(rc)) + LogRel(("%s/SSP: VMR3PowerOff failed: %Rrc\n", pDevIns->pReg->szName, rc)); + } + else + LogRel(("%s/SSP: pfnSaveState failed: %Rrc\n", pDevIns->pReg->szName, rc)); + } + else + LogRel(("%s/SSP: Suspend failed: %Rrc\n", pDevIns->pReg->szName, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnVMSuspendSaveAndPowerOff} */ +static DECLCALLBACK(int) pdmR3DevHlp_VMSuspendSaveAndPowerOff(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DevHlp_VMSuspendSaveAndPowerOff: caller='%s'/%d:\n", + pDevIns->pReg->szName, pDevIns->iInstance)); + + int rc; + if ( pVM->pUVM->pVmm2UserMethods + && pVM->pUVM->pVmm2UserMethods->pfnSaveState) + { + rc = VMR3ReqCallNoWait(pVM, VMCPUID_ANY_QUEUE, (PFNRT)pdmR3DevHlp_VMSuspendSaveAndPowerOffWorker, 2, pVM, pDevIns); + if (RT_SUCCESS(rc)) + { + LogRel(("%s: Suspending, Saving and Powering Off the VM\n", pDevIns->pReg->szName)); + rc = VINF_EM_SUSPEND; + } + } + else + rc = VERR_NOT_SUPPORTED; + + LogFlow(("pdmR3DevHlp_VMSuspendSaveAndPowerOff: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnVMPowerOff} */ +static DECLCALLBACK(int) pdmR3DevHlp_VMPowerOff(PPDMDEVINS pDevIns) +{ + int rc; + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DevHlp_VMPowerOff: caller='%s'/%d:\n", + pDevIns->pReg->szName, pDevIns->iInstance)); + + /** @todo Always take the SMP path - fewer code paths. */ + if (pVM->cCpus > 1) + { + /* We might be holding locks here and could cause a deadlock since + VMR3PowerOff rendezvous with the other CPUs. */ + rc = VMR3ReqCallNoWait(pVM, VMCPUID_ANY_QUEUE, (PFNRT)VMR3PowerOff, 1, pVM->pUVM); + AssertRC(rc); + /* Set the VCPU state to stopped here as well to make sure no + inconsistency with the EM state occurs. */ + VMCPU_SET_STATE(VMMGetCpu(pVM), VMCPUSTATE_STOPPED); + rc = VINF_EM_OFF; + } + else + rc = VMR3PowerOff(pVM->pUVM); + + LogFlow(("pdmR3DevHlp_VMPowerOff: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnA20IsEnabled} */ +static DECLCALLBACK(bool) pdmR3DevHlp_A20IsEnabled(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + + bool fRc = PGMPhysIsA20Enabled(VMMGetCpu(pDevIns->Internal.s.pVMR3)); + + LogFlow(("pdmR3DevHlp_A20IsEnabled: caller='%s'/%d: returns %d\n", pDevIns->pReg->szName, pDevIns->iInstance, fRc)); + return fRc; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnA20Set} */ +static DECLCALLBACK(void) pdmR3DevHlp_A20Set(PPDMDEVINS pDevIns, bool fEnable) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + LogFlow(("pdmR3DevHlp_A20Set: caller='%s'/%d: fEnable=%d\n", pDevIns->pReg->szName, pDevIns->iInstance, fEnable)); + PGMR3PhysSetA20(VMMGetCpu(pDevIns->Internal.s.pVMR3), fEnable); +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnGetCpuId} */ +static DECLCALLBACK(void) pdmR3DevHlp_GetCpuId(PPDMDEVINS pDevIns, uint32_t iLeaf, + uint32_t *pEax, uint32_t *pEbx, uint32_t *pEcx, uint32_t *pEdx) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + + LogFlow(("pdmR3DevHlp_GetCpuId: caller='%s'/%d: iLeaf=%d pEax=%p pEbx=%p pEcx=%p pEdx=%p\n", + pDevIns->pReg->szName, pDevIns->iInstance, iLeaf, pEax, pEbx, pEcx, pEdx)); + AssertPtr(pEax); AssertPtr(pEbx); AssertPtr(pEcx); AssertPtr(pEdx); + + CPUMGetGuestCpuId(VMMGetCpu(pDevIns->Internal.s.pVMR3), iLeaf, 0 /*iSubLeaf*/, pEax, pEbx, pEcx, pEdx); + + LogFlow(("pdmR3DevHlp_GetCpuId: caller='%s'/%d: returns void - *pEax=%#x *pEbx=%#x *pEcx=%#x *pEdx=%#x\n", + pDevIns->pReg->szName, pDevIns->iInstance, *pEax, *pEbx, *pEcx, *pEdx)); +} + + +/** + * The device helper structure for trusted devices. + */ +const PDMDEVHLPR3 g_pdmR3DevHlpTrusted = +{ + PDM_DEVHLPR3_VERSION, + pdmR3DevHlp_IOPortRegister, + pdmR3DevHlp_IOPortRegisterRC, + pdmR3DevHlp_IOPortRegisterR0, + pdmR3DevHlp_IOPortDeregister, + pdmR3DevHlp_MMIORegister, + pdmR3DevHlp_MMIORegisterRC, + pdmR3DevHlp_MMIORegisterR0, + pdmR3DevHlp_MMIODeregister, + pdmR3DevHlp_MMIO2Register, + pdmR3DevHlp_MMIOExPreRegister, + pdmR3DevHlp_MMIOExDeregister, + pdmR3DevHlp_MMIOExMap, + pdmR3DevHlp_MMIOExUnmap, + pdmR3DevHlp_MMIOExReduce, + pdmR3DevHlp_MMHyperMapMMIO2, + pdmR3DevHlp_MMIO2MapKernel, + pdmR3DevHlp_ROMRegister, + pdmR3DevHlp_ROMProtectShadow, + pdmR3DevHlp_SSMRegister, + pdmR3DevHlp_TMTimerCreate, + pdmR3DevHlp_TMUtcNow, + pdmR3DevHlp_PhysRead, + pdmR3DevHlp_PhysWrite, + pdmR3DevHlp_PhysGCPhys2CCPtr, + pdmR3DevHlp_PhysGCPhys2CCPtrReadOnly, + pdmR3DevHlp_PhysReleasePageMappingLock, + pdmR3DevHlp_PhysReadGCVirt, + pdmR3DevHlp_PhysWriteGCVirt, + pdmR3DevHlp_PhysGCPtr2GCPhys, + pdmR3DevHlp_MMHeapAlloc, + pdmR3DevHlp_MMHeapAllocZ, + pdmR3DevHlp_MMHeapFree, + pdmR3DevHlp_VMState, + pdmR3DevHlp_VMTeleportedAndNotFullyResumedYet, + pdmR3DevHlp_VMSetError, + pdmR3DevHlp_VMSetErrorV, + pdmR3DevHlp_VMSetRuntimeError, + pdmR3DevHlp_VMSetRuntimeErrorV, + pdmR3DevHlp_DBGFStopV, + pdmR3DevHlp_DBGFInfoRegister, + pdmR3DevHlp_DBGFRegRegister, + pdmR3DevHlp_DBGFTraceBuf, + pdmR3DevHlp_STAMRegister, + pdmR3DevHlp_STAMRegisterF, + pdmR3DevHlp_STAMRegisterV, + pdmR3DevHlp_PCIRegister, + pdmR3DevHlp_PCIRegisterMsi, + pdmR3DevHlp_PCIIORegionRegister, + pdmR3DevHlp_PCISetConfigCallbacks, + pdmR3DevHlp_PCIPhysRead, + pdmR3DevHlp_PCIPhysWrite, + pdmR3DevHlp_PCISetIrq, + pdmR3DevHlp_PCISetIrqNoWait, + pdmR3DevHlp_ISASetIrq, + pdmR3DevHlp_ISASetIrqNoWait, + pdmR3DevHlp_IoApicSendMsi, + pdmR3DevHlp_DriverAttach, + pdmR3DevHlp_DriverDetach, + pdmR3DevHlp_QueueCreate, + pdmR3DevHlp_CritSectInit, + pdmR3DevHlp_CritSectGetNop, + pdmR3DevHlp_CritSectGetNopR0, + pdmR3DevHlp_CritSectGetNopRC, + pdmR3DevHlp_SetDeviceCritSect, + pdmR3DevHlp_ThreadCreate, + pdmR3DevHlp_SetAsyncNotification, + pdmR3DevHlp_AsyncNotificationCompleted, + pdmR3DevHlp_RTCRegister, + pdmR3DevHlp_PCIBusRegister, + pdmR3DevHlp_PICRegister, + pdmR3DevHlp_APICRegister, + pdmR3DevHlp_IOAPICRegister, + pdmR3DevHlp_HPETRegister, + pdmR3DevHlp_PciRawRegister, + pdmR3DevHlp_DMACRegister, + pdmR3DevHlp_DMARegister, + pdmR3DevHlp_DMAReadMemory, + pdmR3DevHlp_DMAWriteMemory, + pdmR3DevHlp_DMASetDREQ, + pdmR3DevHlp_DMAGetChannelMode, + pdmR3DevHlp_DMASchedule, + pdmR3DevHlp_CMOSWrite, + pdmR3DevHlp_CMOSRead, + pdmR3DevHlp_AssertEMT, + pdmR3DevHlp_AssertOther, + pdmR3DevHlp_LdrGetRCInterfaceSymbols, + pdmR3DevHlp_LdrGetR0InterfaceSymbols, + pdmR3DevHlp_CallR0, + pdmR3DevHlp_VMGetSuspendReason, + pdmR3DevHlp_VMGetResumeReason, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + pdmR3DevHlp_GetUVM, + pdmR3DevHlp_GetVM, + pdmR3DevHlp_GetVMCPU, + pdmR3DevHlp_GetCurrentCpuId, + pdmR3DevHlp_RegisterVMMDevHeap, + pdmR3DevHlp_FirmwareRegister, + pdmR3DevHlp_VMReset, + pdmR3DevHlp_VMSuspend, + pdmR3DevHlp_VMSuspendSaveAndPowerOff, + pdmR3DevHlp_VMPowerOff, + pdmR3DevHlp_A20IsEnabled, + pdmR3DevHlp_A20Set, + pdmR3DevHlp_GetCpuId, + pdmR3DevHlp_TMTimeVirtGet, + pdmR3DevHlp_TMTimeVirtGetFreq, + pdmR3DevHlp_TMTimeVirtGetNano, + pdmR3DevHlp_GetSupDrvSession, + pdmR3DevHlp_QueryGenericUserObject, + PDM_DEVHLPR3_VERSION /* the end */ +}; + + + + +/** @interface_method_impl{PDMDEVHLPR3,pfnGetUVM} */ +static DECLCALLBACK(PUVM) pdmR3DevHlp_Untrusted_GetUVM(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + AssertReleaseMsgFailed(("Untrusted device called trusted helper! '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance)); + return NULL; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnGetVM} */ +static DECLCALLBACK(PVM) pdmR3DevHlp_Untrusted_GetVM(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + AssertReleaseMsgFailed(("Untrusted device called trusted helper! '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance)); + return NULL; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnGetVMCPU} */ +static DECLCALLBACK(PVMCPU) pdmR3DevHlp_Untrusted_GetVMCPU(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + AssertReleaseMsgFailed(("Untrusted device called trusted helper! '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance)); + return NULL; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnGetCurrentCpuId} */ +static DECLCALLBACK(VMCPUID) pdmR3DevHlp_Untrusted_GetCurrentCpuId(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + AssertReleaseMsgFailed(("Untrusted device called trusted helper! '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance)); + return NIL_VMCPUID; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnRegisterVMMDevHeap} */ +static DECLCALLBACK(int) pdmR3DevHlp_Untrusted_RegisterVMMDevHeap(PPDMDEVINS pDevIns, RTGCPHYS GCPhys, + RTR3PTR pvHeap, unsigned cbHeap) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + NOREF(GCPhys); NOREF(pvHeap); NOREF(cbHeap); + AssertReleaseMsgFailed(("Untrusted device called trusted helper! '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance)); + return VERR_ACCESS_DENIED; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnFirmwareRegister} */ +static DECLCALLBACK(int) pdmR3DevHlp_Untrusted_FirmwareRegister(PPDMDEVINS pDevIns, PCPDMFWREG pFwReg, PCPDMFWHLPR3 *ppFwHlp) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + NOREF(pFwReg); NOREF(ppFwHlp); + AssertReleaseMsgFailed(("Untrusted device called trusted helper! '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance)); + return VERR_ACCESS_DENIED; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnVMReset} */ +static DECLCALLBACK(int) pdmR3DevHlp_Untrusted_VMReset(PPDMDEVINS pDevIns, uint32_t fFlags) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); NOREF(fFlags); + AssertReleaseMsgFailed(("Untrusted device called trusted helper! '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance)); + return VERR_ACCESS_DENIED; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnVMSuspend} */ +static DECLCALLBACK(int) pdmR3DevHlp_Untrusted_VMSuspend(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + AssertReleaseMsgFailed(("Untrusted device called trusted helper! '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance)); + return VERR_ACCESS_DENIED; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnVMSuspendSaveAndPowerOff} */ +static DECLCALLBACK(int) pdmR3DevHlp_Untrusted_VMSuspendSaveAndPowerOff(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + AssertReleaseMsgFailed(("Untrusted device called trusted helper! '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance)); + return VERR_ACCESS_DENIED; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnVMPowerOff} */ +static DECLCALLBACK(int) pdmR3DevHlp_Untrusted_VMPowerOff(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + AssertReleaseMsgFailed(("Untrusted device called trusted helper! '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance)); + return VERR_ACCESS_DENIED; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnA20IsEnabled} */ +static DECLCALLBACK(bool) pdmR3DevHlp_Untrusted_A20IsEnabled(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + AssertReleaseMsgFailed(("Untrusted device called trusted helper! '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance)); + return false; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnA20Set} */ +static DECLCALLBACK(void) pdmR3DevHlp_Untrusted_A20Set(PPDMDEVINS pDevIns, bool fEnable) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + AssertReleaseMsgFailed(("Untrusted device called trusted helper! '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance)); + NOREF(fEnable); +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnGetCpuId} */ +static DECLCALLBACK(void) pdmR3DevHlp_Untrusted_GetCpuId(PPDMDEVINS pDevIns, uint32_t iLeaf, + uint32_t *pEax, uint32_t *pEbx, uint32_t *pEcx, uint32_t *pEdx) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + NOREF(iLeaf); NOREF(pEax); NOREF(pEbx); NOREF(pEcx); NOREF(pEdx); + AssertReleaseMsgFailed(("Untrusted device called trusted helper! '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance)); +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnGetSupDrvSession} */ +static DECLCALLBACK(PSUPDRVSESSION) pdmR3DevHlp_Untrusted_GetSupDrvSession(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + AssertReleaseMsgFailed(("Untrusted device called trusted helper! '%s'/%d\n", pDevIns->pReg->szName, pDevIns->iInstance)); + return (PSUPDRVSESSION)0; +} + + +/** @interface_method_impl{PDMDEVHLPR3,pfnQueryGenericUserObject} */ +static DECLCALLBACK(void *) pdmR3DevHlp_Untrusted_QueryGenericUserObject(PPDMDEVINS pDevIns, PCRTUUID pUuid) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + AssertReleaseMsgFailed(("Untrusted device called trusted helper! '%s'/%d %RTuuid\n", + pDevIns->pReg->szName, pDevIns->iInstance, pUuid)); + return NULL; +} + + +/** + * The device helper structure for non-trusted devices. + */ +const PDMDEVHLPR3 g_pdmR3DevHlpUnTrusted = +{ + PDM_DEVHLPR3_VERSION, + pdmR3DevHlp_IOPortRegister, + pdmR3DevHlp_IOPortRegisterRC, + pdmR3DevHlp_IOPortRegisterR0, + pdmR3DevHlp_IOPortDeregister, + pdmR3DevHlp_MMIORegister, + pdmR3DevHlp_MMIORegisterRC, + pdmR3DevHlp_MMIORegisterR0, + pdmR3DevHlp_MMIODeregister, + pdmR3DevHlp_MMIO2Register, + pdmR3DevHlp_MMIOExPreRegister, + pdmR3DevHlp_MMIOExDeregister, + pdmR3DevHlp_MMIOExMap, + pdmR3DevHlp_MMIOExUnmap, + pdmR3DevHlp_MMIOExReduce, + pdmR3DevHlp_MMHyperMapMMIO2, + pdmR3DevHlp_MMIO2MapKernel, + pdmR3DevHlp_ROMRegister, + pdmR3DevHlp_ROMProtectShadow, + pdmR3DevHlp_SSMRegister, + pdmR3DevHlp_TMTimerCreate, + pdmR3DevHlp_TMUtcNow, + pdmR3DevHlp_PhysRead, + pdmR3DevHlp_PhysWrite, + pdmR3DevHlp_PhysGCPhys2CCPtr, + pdmR3DevHlp_PhysGCPhys2CCPtrReadOnly, + pdmR3DevHlp_PhysReleasePageMappingLock, + pdmR3DevHlp_PhysReadGCVirt, + pdmR3DevHlp_PhysWriteGCVirt, + pdmR3DevHlp_PhysGCPtr2GCPhys, + pdmR3DevHlp_MMHeapAlloc, + pdmR3DevHlp_MMHeapAllocZ, + pdmR3DevHlp_MMHeapFree, + pdmR3DevHlp_VMState, + pdmR3DevHlp_VMTeleportedAndNotFullyResumedYet, + pdmR3DevHlp_VMSetError, + pdmR3DevHlp_VMSetErrorV, + pdmR3DevHlp_VMSetRuntimeError, + pdmR3DevHlp_VMSetRuntimeErrorV, + pdmR3DevHlp_DBGFStopV, + pdmR3DevHlp_DBGFInfoRegister, + pdmR3DevHlp_DBGFRegRegister, + pdmR3DevHlp_DBGFTraceBuf, + pdmR3DevHlp_STAMRegister, + pdmR3DevHlp_STAMRegisterF, + pdmR3DevHlp_STAMRegisterV, + pdmR3DevHlp_PCIRegister, + pdmR3DevHlp_PCIRegisterMsi, + pdmR3DevHlp_PCIIORegionRegister, + pdmR3DevHlp_PCISetConfigCallbacks, + pdmR3DevHlp_PCIPhysRead, + pdmR3DevHlp_PCIPhysWrite, + pdmR3DevHlp_PCISetIrq, + pdmR3DevHlp_PCISetIrqNoWait, + pdmR3DevHlp_ISASetIrq, + pdmR3DevHlp_ISASetIrqNoWait, + pdmR3DevHlp_IoApicSendMsi, + pdmR3DevHlp_DriverAttach, + pdmR3DevHlp_DriverDetach, + pdmR3DevHlp_QueueCreate, + pdmR3DevHlp_CritSectInit, + pdmR3DevHlp_CritSectGetNop, + pdmR3DevHlp_CritSectGetNopR0, + pdmR3DevHlp_CritSectGetNopRC, + pdmR3DevHlp_SetDeviceCritSect, + pdmR3DevHlp_ThreadCreate, + pdmR3DevHlp_SetAsyncNotification, + pdmR3DevHlp_AsyncNotificationCompleted, + pdmR3DevHlp_RTCRegister, + pdmR3DevHlp_PCIBusRegister, + pdmR3DevHlp_PICRegister, + pdmR3DevHlp_APICRegister, + pdmR3DevHlp_IOAPICRegister, + pdmR3DevHlp_HPETRegister, + pdmR3DevHlp_PciRawRegister, + pdmR3DevHlp_DMACRegister, + pdmR3DevHlp_DMARegister, + pdmR3DevHlp_DMAReadMemory, + pdmR3DevHlp_DMAWriteMemory, + pdmR3DevHlp_DMASetDREQ, + pdmR3DevHlp_DMAGetChannelMode, + pdmR3DevHlp_DMASchedule, + pdmR3DevHlp_CMOSWrite, + pdmR3DevHlp_CMOSRead, + pdmR3DevHlp_AssertEMT, + pdmR3DevHlp_AssertOther, + pdmR3DevHlp_LdrGetRCInterfaceSymbols, + pdmR3DevHlp_LdrGetR0InterfaceSymbols, + pdmR3DevHlp_CallR0, + pdmR3DevHlp_VMGetSuspendReason, + pdmR3DevHlp_VMGetResumeReason, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + pdmR3DevHlp_Untrusted_GetUVM, + pdmR3DevHlp_Untrusted_GetVM, + pdmR3DevHlp_Untrusted_GetVMCPU, + pdmR3DevHlp_Untrusted_GetCurrentCpuId, + pdmR3DevHlp_Untrusted_RegisterVMMDevHeap, + pdmR3DevHlp_Untrusted_FirmwareRegister, + pdmR3DevHlp_Untrusted_VMReset, + pdmR3DevHlp_Untrusted_VMSuspend, + pdmR3DevHlp_Untrusted_VMSuspendSaveAndPowerOff, + pdmR3DevHlp_Untrusted_VMPowerOff, + pdmR3DevHlp_Untrusted_A20IsEnabled, + pdmR3DevHlp_Untrusted_A20Set, + pdmR3DevHlp_Untrusted_GetCpuId, + pdmR3DevHlp_TMTimeVirtGet, + pdmR3DevHlp_TMTimeVirtGetFreq, + pdmR3DevHlp_TMTimeVirtGetNano, + pdmR3DevHlp_Untrusted_GetSupDrvSession, + pdmR3DevHlp_Untrusted_QueryGenericUserObject, + PDM_DEVHLPR3_VERSION /* the end */ +}; + + + +/** + * Queue consumer callback for internal component. + * + * @returns Success indicator. + * If false the item will not be removed and the flushing will stop. + * @param pVM The cross context VM structure. + * @param pItem The item to consume. Upon return this item will be freed. + */ +DECLCALLBACK(bool) pdmR3DevHlpQueueConsumer(PVM pVM, PPDMQUEUEITEMCORE pItem) +{ + PPDMDEVHLPTASK pTask = (PPDMDEVHLPTASK)pItem; + LogFlow(("pdmR3DevHlpQueueConsumer: enmOp=%d pDevIns=%p\n", pTask->enmOp, pTask->pDevInsR3)); + switch (pTask->enmOp) + { + case PDMDEVHLPTASKOP_ISA_SET_IRQ: + PDMIsaSetIrq(pVM, pTask->u.IsaSetIRQ.iIrq, pTask->u.IsaSetIRQ.iLevel, pTask->u.IsaSetIRQ.uTagSrc); + break; + + case PDMDEVHLPTASKOP_PCI_SET_IRQ: + { + /* Same as pdmR3DevHlp_PCISetIrq, except we've got a tag already. */ + PPDMPCIDEV pPciDev = pTask->u.PciSetIRQ.pPciDevR3; + if (pPciDev) + { + PPDMPCIBUS pBus = pPciDev->Int.s.pPdmBusR3; + Assert(pBus); + + pdmLock(pVM); + pBus->pfnSetIrqR3(pBus->pDevInsR3, pPciDev, pTask->u.PciSetIRQ.iIrq, + pTask->u.PciSetIRQ.iLevel, pTask->u.PciSetIRQ.uTagSrc); + pdmUnlock(pVM); + } + else + AssertReleaseMsgFailed(("No PCI device registered!\n")); + break; + } + + case PDMDEVHLPTASKOP_IOAPIC_SET_IRQ: + PDMIoApicSetIrq(pVM, pTask->u.IoApicSetIRQ.iIrq, pTask->u.IoApicSetIRQ.iLevel, pTask->u.IoApicSetIRQ.uTagSrc); + break; + + default: + AssertReleaseMsgFailed(("Invalid operation %d\n", pTask->enmOp)); + break; + } + return true; +} + +/** @} */ + diff --git a/src/VBox/VMM/VMMR3/PDMDevMiscHlp.cpp b/src/VBox/VMM/VMMR3/PDMDevMiscHlp.cpp new file mode 100644 index 00000000..0bf18e46 --- /dev/null +++ b/src/VBox/VMM/VMMR3/PDMDevMiscHlp.cpp @@ -0,0 +1,557 @@ +/* $Id: PDMDevMiscHlp.cpp $ */ +/** @file + * PDM - Pluggable Device and Driver Manager, Misc. Device Helpers. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PDM_DEVICE +#include "PDMInternal.h" +#include +#include +#include +#include +#ifdef VBOX_WITH_REM +# include +#endif +#include +#include + +#include +#include +#include +#include +#include + + +#include "PDMInline.h" +#include "dtrace/VBoxVMM.h" + + + +/** @name Ring-3 PIC Helpers + * @{ + */ + +/** @interface_method_impl{PDMPICHLPR3,pfnSetInterruptFF} */ +static DECLCALLBACK(void) pdmR3PicHlp_SetInterruptFF(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + PVMCPU pVCpu = &pVM->aCpus[0]; /* for PIC we always deliver to CPU 0, MP use APIC */ + APICLocalInterrupt(pVCpu, 0 /* u8Pin */, 1 /* u8Level */, VINF_SUCCESS /* rcRZ */); +} + + +/** @interface_method_impl{PDMPICHLPR3,pfnClearInterruptFF} */ +static DECLCALLBACK(void) pdmR3PicHlp_ClearInterruptFF(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + PVMCPU pVCpu = &pVM->aCpus[0]; /* for PIC we always deliver to CPU 0, MP use APIC */ + APICLocalInterrupt(pVCpu, 0 /* u8Pin */, 0 /* u8Level */, VINF_SUCCESS /* rcRZ */); +} + + +/** @interface_method_impl{PDMPICHLPR3,pfnLock} */ +static DECLCALLBACK(int) pdmR3PicHlp_Lock(PPDMDEVINS pDevIns, int rc) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + return pdmLockEx(pDevIns->Internal.s.pVMR3, rc); +} + + +/** @interface_method_impl{PDMPICHLPR3,pfnUnlock} */ +static DECLCALLBACK(void) pdmR3PicHlp_Unlock(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + pdmUnlock(pDevIns->Internal.s.pVMR3); +} + + +/** @interface_method_impl{PDMPICHLPR3,pfnGetRCHelpers} */ +static DECLCALLBACK(PCPDMPICHLPRC) pdmR3PicHlp_GetRCHelpers(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + + RTRCPTR pRCHelpers = NIL_RTRCPTR; + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + int rc = PDMR3LdrGetSymbolRC(pVM, NULL, "g_pdmRCPicHlp", &pRCHelpers); + AssertReleaseRC(rc); + AssertRelease(pRCHelpers); + } + + LogFlow(("pdmR3PicHlp_GetRCHelpers: caller='%s'/%d: returns %RRv\n", + pDevIns->pReg->szName, pDevIns->iInstance, pRCHelpers)); + return pRCHelpers; +} + + +/** @interface_method_impl{PDMPICHLPR3,pfnGetR0Helpers} */ +static DECLCALLBACK(PCPDMPICHLPR0) pdmR3PicHlp_GetR0Helpers(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + PCPDMPICHLPR0 pR0Helpers = 0; + int rc = PDMR3LdrGetSymbolR0(pVM, NULL, "g_pdmR0PicHlp", &pR0Helpers); + AssertReleaseRC(rc); + AssertRelease(pR0Helpers); + LogFlow(("pdmR3PicHlp_GetR0Helpers: caller='%s'/%d: returns %RHv\n", + pDevIns->pReg->szName, pDevIns->iInstance, pR0Helpers)); + return pR0Helpers; +} + + +/** + * PIC Device Helpers. + */ +const PDMPICHLPR3 g_pdmR3DevPicHlp = +{ + PDM_PICHLPR3_VERSION, + pdmR3PicHlp_SetInterruptFF, + pdmR3PicHlp_ClearInterruptFF, + pdmR3PicHlp_Lock, + pdmR3PicHlp_Unlock, + pdmR3PicHlp_GetRCHelpers, + pdmR3PicHlp_GetR0Helpers, + PDM_PICHLPR3_VERSION /* the end */ +}; + +/** @} */ + + +/** @name Ring-3 I/O APIC Helpers + * @{ + */ + +/** @interface_method_impl{PDMIOAPICHLPR3,pfnApicBusDeliver} */ +static DECLCALLBACK(int) pdmR3IoApicHlp_ApicBusDeliver(PPDMDEVINS pDevIns, uint8_t u8Dest, uint8_t u8DestMode, + uint8_t u8DeliveryMode, uint8_t uVector, uint8_t u8Polarity, + uint8_t u8TriggerMode, uint32_t uTagSrc) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + LogFlow(("pdmR3IoApicHlp_ApicBusDeliver: caller='%s'/%d: u8Dest=%RX8 u8DestMode=%RX8 u8DeliveryMode=%RX8 uVector=%RX8 u8Polarity=%RX8 u8TriggerMode=%RX8 uTagSrc=%#x\n", + pDevIns->pReg->szName, pDevIns->iInstance, u8Dest, u8DestMode, u8DeliveryMode, uVector, u8Polarity, u8TriggerMode, uTagSrc)); + return APICBusDeliver(pVM, u8Dest, u8DestMode, u8DeliveryMode, uVector, u8Polarity, u8TriggerMode, uTagSrc); +} + + +/** @interface_method_impl{PDMIOAPICHLPR3,pfnLock} */ +static DECLCALLBACK(int) pdmR3IoApicHlp_Lock(PPDMDEVINS pDevIns, int rc) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR3IoApicHlp_Lock: caller='%s'/%d: rc=%Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return pdmLockEx(pDevIns->Internal.s.pVMR3, rc); +} + + +/** @interface_method_impl{PDMIOAPICHLPR3,pfnUnlock} */ +static DECLCALLBACK(void) pdmR3IoApicHlp_Unlock(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR3IoApicHlp_Unlock: caller='%s'/%d:\n", pDevIns->pReg->szName, pDevIns->iInstance)); + pdmUnlock(pDevIns->Internal.s.pVMR3); +} + + +/** @interface_method_impl{PDMIOAPICHLPR3,pfnGetRCHelpers} */ +static DECLCALLBACK(PCPDMIOAPICHLPRC) pdmR3IoApicHlp_GetRCHelpers(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + + RTRCPTR pRCHelpers = NIL_RTRCPTR; + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + int rc = PDMR3LdrGetSymbolRC(pVM, NULL, "g_pdmRCIoApicHlp", &pRCHelpers); + AssertReleaseRC(rc); + AssertRelease(pRCHelpers); + } + + LogFlow(("pdmR3IoApicHlp_GetRCHelpers: caller='%s'/%d: returns %RRv\n", + pDevIns->pReg->szName, pDevIns->iInstance, pRCHelpers)); + return pRCHelpers; +} + + +/** @interface_method_impl{PDMIOAPICHLPR3,pfnGetR0Helpers} */ +static DECLCALLBACK(PCPDMIOAPICHLPR0) pdmR3IoApicHlp_GetR0Helpers(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + PCPDMIOAPICHLPR0 pR0Helpers = 0; + int rc = PDMR3LdrGetSymbolR0(pVM, NULL, "g_pdmR0IoApicHlp", &pR0Helpers); + AssertReleaseRC(rc); + AssertRelease(pR0Helpers); + LogFlow(("pdmR3IoApicHlp_GetR0Helpers: caller='%s'/%d: returns %RHv\n", + pDevIns->pReg->szName, pDevIns->iInstance, pR0Helpers)); + return pR0Helpers; +} + + +/** + * I/O APIC Device Helpers. + */ +const PDMIOAPICHLPR3 g_pdmR3DevIoApicHlp = +{ + PDM_IOAPICHLPR3_VERSION, + pdmR3IoApicHlp_ApicBusDeliver, + pdmR3IoApicHlp_Lock, + pdmR3IoApicHlp_Unlock, + pdmR3IoApicHlp_GetRCHelpers, + pdmR3IoApicHlp_GetR0Helpers, + PDM_IOAPICHLPR3_VERSION /* the end */ +}; + +/** @} */ + + + + +/** @name Ring-3 PCI Bus Helpers + * @{ + */ + +/** @interface_method_impl{PDMPCIHLPR3,pfnIsaSetIrq} */ +static DECLCALLBACK(void) pdmR3PciHlp_IsaSetIrq(PPDMDEVINS pDevIns, int iIrq, int iLevel, uint32_t uTagSrc) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + Log4(("pdmR3PciHlp_IsaSetIrq: iIrq=%d iLevel=%d uTagSrc=%#x\n", iIrq, iLevel, uTagSrc)); + PDMIsaSetIrq(pDevIns->Internal.s.pVMR3, iIrq, iLevel, uTagSrc); +} + +/** @interface_method_impl{PDMPCIHLPR3,pfnIoApicSetIrq} */ +static DECLCALLBACK(void) pdmR3PciHlp_IoApicSetIrq(PPDMDEVINS pDevIns, int iIrq, int iLevel, uint32_t uTagSrc) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + Log4(("pdmR3PciHlp_IoApicSetIrq: iIrq=%d iLevel=%d uTagSrc=%#x\n", iIrq, iLevel, uTagSrc)); + PDMIoApicSetIrq(pDevIns->Internal.s.pVMR3, iIrq, iLevel, uTagSrc); +} + +/** @interface_method_impl{PDMPCIHLPR3,pfnIoApicSendMsi} */ +static DECLCALLBACK(void) pdmR3PciHlp_IoApicSendMsi(PPDMDEVINS pDevIns, RTGCPHYS GCPhys, uint32_t uValue, uint32_t uTagSrc) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + Log4(("pdmR3PciHlp_IoApicSendMsi: address=%p value=%x uTagSrc=%#x\n", GCPhys, uValue, uTagSrc)); + PDMIoApicSendMsi(pDevIns->Internal.s.pVMR3, GCPhys, uValue, uTagSrc); +} + +/** @interface_method_impl{PDMPCIHLPR3,pfnIsMMIOExBase} */ +static DECLCALLBACK(bool) pdmR3PciHlp_IsMMIO2Base(PPDMDEVINS pDevIns, PPDMDEVINS pOwner, RTGCPHYS GCPhys) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + VM_ASSERT_EMT(pDevIns->Internal.s.pVMR3); + bool fRc = PGMR3PhysMMIOExIsBase(pDevIns->Internal.s.pVMR3, pOwner, GCPhys); + Log4(("pdmR3PciHlp_IsMMIOExBase: pOwner=%p GCPhys=%RGp -> %RTbool\n", pOwner, GCPhys, fRc)); + return fRc; +} + + +/** @interface_method_impl{PDMPCIHLPR3,pfnLock} */ +static DECLCALLBACK(int) pdmR3PciHlp_Lock(PPDMDEVINS pDevIns, int rc) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR3PciHlp_Lock: caller='%s'/%d: rc=%Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return pdmLockEx(pDevIns->Internal.s.pVMR3, rc); +} + + +/** @interface_method_impl{PDMPCIHLPR3,pfnUnlock} */ +static DECLCALLBACK(void) pdmR3PciHlp_Unlock(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR3PciHlp_Unlock: caller='%s'/%d:\n", pDevIns->pReg->szName, pDevIns->iInstance)); + pdmUnlock(pDevIns->Internal.s.pVMR3); +} + + +/** @interface_method_impl{PDMPCIHLPR3,pfnGetRCHelpers} */ +static DECLCALLBACK(PCPDMPCIHLPRC) pdmR3PciHlp_GetRCHelpers(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + + RTRCPTR pRCHelpers = NIL_RTRCPTR; + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + int rc = PDMR3LdrGetSymbolRC(pVM, NULL, "g_pdmRCPciHlp", &pRCHelpers); + AssertReleaseRC(rc); + AssertRelease(pRCHelpers); + } + + LogFlow(("pdmR3PciHlp_GetRCHelpers: caller='%s'/%d: returns %RRv\n", + pDevIns->pReg->szName, pDevIns->iInstance, pRCHelpers)); + return pRCHelpers; +} + + +/** @interface_method_impl{PDMPCIHLPR3,pfnGetR0Helpers} */ +static DECLCALLBACK(PCPDMPCIHLPR0) pdmR3PciHlp_GetR0Helpers(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + PCPDMPCIHLPR0 pR0Helpers = 0; + int rc = PDMR3LdrGetSymbolR0(pVM, NULL, "g_pdmR0PciHlp", &pR0Helpers); + AssertReleaseRC(rc); + AssertRelease(pR0Helpers); + LogFlow(("pdmR3PciHlp_GetR0Helpers: caller='%s'/%d: returns %RHv\n", + pDevIns->pReg->szName, pDevIns->iInstance, pR0Helpers)); + return pR0Helpers; +} + + +/** + * PCI Bus Device Helpers. + */ +const PDMPCIHLPR3 g_pdmR3DevPciHlp = +{ + PDM_PCIHLPR3_VERSION, + pdmR3PciHlp_IsaSetIrq, + pdmR3PciHlp_IoApicSetIrq, + pdmR3PciHlp_IoApicSendMsi, + pdmR3PciHlp_IsMMIO2Base, + pdmR3PciHlp_GetRCHelpers, + pdmR3PciHlp_GetR0Helpers, + pdmR3PciHlp_Lock, + pdmR3PciHlp_Unlock, + PDM_PCIHLPR3_VERSION, /* the end */ +}; + +/** @} */ + + + + +/** @name Ring-3 HPET Helpers + * {@ + */ + +/** @interface_method_impl{PDMHPETHLPR3,pfnSetLegacyMode} */ +static DECLCALLBACK(int) pdmR3HpetHlp_SetLegacyMode(PPDMDEVINS pDevIns, bool fActivated) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR3HpetHlp_SetLegacyMode: caller='%s'/%d: fActivated=%RTbool\n", pDevIns->pReg->szName, pDevIns->iInstance, fActivated)); + + size_t i; + int rc = VINF_SUCCESS; + static const char * const s_apszDevsToNotify[] = + { + "i8254", + "mc146818" + }; + for (i = 0; i < RT_ELEMENTS(s_apszDevsToNotify); i++) + { + PPDMIBASE pBase; + rc = PDMR3QueryDevice(pDevIns->Internal.s.pVMR3->pUVM, "i8254", 0, &pBase); + if (RT_SUCCESS(rc)) + { + PPDMIHPETLEGACYNOTIFY pPort = PDMIBASE_QUERY_INTERFACE(pBase, PDMIHPETLEGACYNOTIFY); + AssertLogRelMsgBreakStmt(pPort, ("%s\n", s_apszDevsToNotify[i]), rc = VERR_PDM_HPET_LEGACY_NOTIFY_MISSING); + pPort->pfnModeChanged(pPort, fActivated); + } + else if ( rc == VERR_PDM_DEVICE_NOT_FOUND + || rc == VERR_PDM_DEVICE_INSTANCE_NOT_FOUND) + rc = VINF_SUCCESS; /* the device isn't configured, ignore. */ + else + AssertLogRelMsgFailedBreak(("%s -> %Rrc\n", s_apszDevsToNotify[i], rc)); + } + + /* Don't bother cleaning up, any failure here will cause a guru meditation. */ + + LogFlow(("pdmR3HpetHlp_SetLegacyMode: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMHPETHLPR3,pfnSetIrq} */ +static DECLCALLBACK(int) pdmR3HpetHlp_SetIrq(PPDMDEVINS pDevIns, int iIrq, int iLevel) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR3HpetHlp_SetIrq: caller='%s'/%d: iIrq=%d iLevel=%d\n", pDevIns->pReg->szName, pDevIns->iInstance, iIrq, iLevel)); + PVM pVM = pDevIns->Internal.s.pVMR3; + + pdmLock(pVM); + uint32_t uTagSrc; + if (iLevel & PDM_IRQ_LEVEL_HIGH) + { + pDevIns->Internal.s.uLastIrqTag = uTagSrc = pdmCalcIrqTag(pVM, pDevIns->idTracing); + if (iLevel == PDM_IRQ_LEVEL_HIGH) + VBOXVMM_PDM_IRQ_HIGH(VMMGetCpu(pVM), RT_LOWORD(uTagSrc), RT_HIWORD(uTagSrc)); + else + VBOXVMM_PDM_IRQ_HILO(VMMGetCpu(pVM), RT_LOWORD(uTagSrc), RT_HIWORD(uTagSrc)); + } + else + uTagSrc = pDevIns->Internal.s.uLastIrqTag; + + PDMIsaSetIrq(pVM, iIrq, iLevel, uTagSrc); /* (The API takes the lock recursively.) */ + + if (iLevel == PDM_IRQ_LEVEL_LOW) + VBOXVMM_PDM_IRQ_LOW(VMMGetCpu(pVM), RT_LOWORD(uTagSrc), RT_HIWORD(uTagSrc)); + pdmUnlock(pVM); + return 0; +} + + +/** @interface_method_impl{PDMHPETHLPR3,pfnGetRCHelpers} */ +static DECLCALLBACK(PCPDMHPETHLPRC) pdmR3HpetHlp_GetRCHelpers(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + + RTRCPTR pRCHelpers = NIL_RTRCPTR; + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + int rc = PDMR3LdrGetSymbolRC(pVM, NULL, "g_pdmRCHpetHlp", &pRCHelpers); + AssertReleaseRC(rc); + AssertRelease(pRCHelpers); + } + + LogFlow(("pdmR3HpetHlp_GetGCHelpers: caller='%s'/%d: returns %RRv\n", + pDevIns->pReg->szName, pDevIns->iInstance, pRCHelpers)); + return pRCHelpers; +} + + +/** @interface_method_impl{PDMHPETHLPR3,pfnGetR0Helpers} */ +static DECLCALLBACK(PCPDMHPETHLPR0) pdmR3HpetHlp_GetR0Helpers(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + PCPDMHPETHLPR0 pR0Helpers = 0; + int rc = PDMR3LdrGetSymbolR0(pVM, NULL, "g_pdmR0HpetHlp", &pR0Helpers); + AssertReleaseRC(rc); + AssertRelease(pR0Helpers); + LogFlow(("pdmR3HpetHlp_GetR0Helpers: caller='%s'/%d: returns %RHv\n", + pDevIns->pReg->szName, pDevIns->iInstance, pR0Helpers)); + return pR0Helpers; +} + + +/** + * HPET Device Helpers. + */ +const PDMHPETHLPR3 g_pdmR3DevHpetHlp = +{ + PDM_HPETHLPR3_VERSION, + pdmR3HpetHlp_GetRCHelpers, + pdmR3HpetHlp_GetR0Helpers, + pdmR3HpetHlp_SetLegacyMode, + pdmR3HpetHlp_SetIrq, + PDM_HPETHLPR3_VERSION, /* the end */ +}; + +/** @} */ + + +/** @name Ring-3 Raw PCI Device Helpers + * {@ + */ + +/** @interface_method_impl{PDMPCIRAWHLPR3,pfnGetRCHelpers} */ +static DECLCALLBACK(PCPDMPCIRAWHLPRC) pdmR3PciRawHlp_GetRCHelpers(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + + RTRCPTR pRCHelpers = NIL_RTRCPTR; + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + int rc = PDMR3LdrGetSymbolRC(pVM, NULL, "g_pdmRCPciRawHlp", &pRCHelpers); + AssertReleaseRC(rc); + AssertRelease(pRCHelpers); + } + + LogFlow(("pdmR3PciRawHlp_GetGCHelpers: caller='%s'/%d: returns %RRv\n", + pDevIns->pReg->szName, pDevIns->iInstance, pRCHelpers)); + return pRCHelpers; +} + + +/** @interface_method_impl{PDMPCIRAWHLPR3,pfnGetR0Helpers} */ +static DECLCALLBACK(PCPDMPCIRAWHLPR0) pdmR3PciRawHlp_GetR0Helpers(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + PVM pVM = pDevIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + PCPDMHPETHLPR0 pR0Helpers = NIL_RTR0PTR; + int rc = PDMR3LdrGetSymbolR0(pVM, NULL, "g_pdmR0PciRawHlp", &pR0Helpers); + AssertReleaseRC(rc); + AssertRelease(pR0Helpers); + LogFlow(("pdmR3PciRawHlp_GetR0Helpers: caller='%s'/%d: returns %RHv\n", + pDevIns->pReg->szName, pDevIns->iInstance, pR0Helpers)); + return pR0Helpers; +} + + +/** + * Raw PCI Device Helpers. + */ +const PDMPCIRAWHLPR3 g_pdmR3DevPciRawHlp = +{ + PDM_PCIRAWHLPR3_VERSION, + pdmR3PciRawHlp_GetRCHelpers, + pdmR3PciRawHlp_GetR0Helpers, + PDM_PCIRAWHLPR3_VERSION, /* the end */ +}; + +/** @} */ + + +/* none yet */ + +/** + * Firmware Device Helpers. + */ +const PDMFWHLPR3 g_pdmR3DevFirmwareHlp = +{ + PDM_FWHLPR3_VERSION, + PDM_FWHLPR3_VERSION +}; + +/** + * DMAC Device Helpers. + */ +const PDMDMACHLP g_pdmR3DevDmacHlp = +{ + PDM_DMACHLP_VERSION +}; + + + + +/* none yet */ + +/** + * RTC Device Helpers. + */ +const PDMRTCHLP g_pdmR3DevRtcHlp = +{ + PDM_RTCHLP_VERSION +}; + diff --git a/src/VBox/VMM/VMMR3/PDMDevice.cpp b/src/VBox/VMM/VMMR3/PDMDevice.cpp new file mode 100644 index 00000000..0d0d76d0 --- /dev/null +++ b/src/VBox/VMM/VMMR3/PDMDevice.cpp @@ -0,0 +1,1089 @@ +/* $Id: PDMDevice.cpp $ */ +/** @file + * PDM - Pluggable Device and Driver Manager, Device parts. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PDM_DEVICE +#include "PDMInternal.h" +#include +#include +#include +#include +#include +#include +#include +#ifdef VBOX_WITH_REM +# include +#endif +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * Internal callback structure pointer. + * The main purpose is to define the extra data we associate + * with PDMDEVREGCB so we can find the VM instance and so on. + */ +typedef struct PDMDEVREGCBINT +{ + /** The callback structure. */ + PDMDEVREGCB Core; + /** A bit of padding. */ + uint32_t u32[4]; + /** VM Handle. */ + PVM pVM; + /** Pointer to the configuration node the registrations should be + * associated with. Can be NULL. */ + PCFGMNODE pCfgNode; +} PDMDEVREGCBINT; +/** Pointer to a PDMDEVREGCBINT structure. */ +typedef PDMDEVREGCBINT *PPDMDEVREGCBINT; +/** Pointer to a const PDMDEVREGCBINT structure. */ +typedef const PDMDEVREGCBINT *PCPDMDEVREGCBINT; + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static DECLCALLBACK(int) pdmR3DevReg_Register(PPDMDEVREGCB pCallbacks, PCPDMDEVREG pReg); +static int pdmR3DevLoadModules(PVM pVM); +static int pdmR3DevLoad(PVM pVM, PPDMDEVREGCBINT pRegCB, const char *pszFilename, const char *pszName); + + + + +/** + * This function will initialize the devices for this VM instance. + * + * + * First of all this mean loading the builtin device and letting them + * register themselves. Beyond that any additional device modules are + * loaded and called for registration. + * + * Then the device configuration is enumerated, the instantiation order + * is determined, and finally they are instantiated. + * + * After all devices have been successfully instantiated the primary + * PCI Bus device is called to emulate the PCI BIOS, i.e. making the + * resource assignments. If there is no PCI device, this step is of course + * skipped. + * + * Finally the init completion routines of the instantiated devices + * are called. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +int pdmR3DevInit(PVM pVM) +{ + LogFlow(("pdmR3DevInit:\n")); + + AssertRelease(!(RT_UOFFSETOF(PDMDEVINS, achInstanceData) & 15)); + AssertRelease(sizeof(pVM->pdm.s.pDevInstances->Internal.s) <= sizeof(pVM->pdm.s.pDevInstances->Internal.padding)); + + /* + * Load device modules. + */ + int rc = pdmR3DevLoadModules(pVM); + if (RT_FAILURE(rc)) + return rc; + +#ifdef VBOX_WITH_USB + /* ditto for USB Devices. */ + rc = pdmR3UsbLoadModules(pVM); + if (RT_FAILURE(rc)) + return rc; +#endif + + /* + * Get the RC & R0 devhlps and create the devhlp R3 task queue. + */ + PCPDMDEVHLPRC pHlpRC = NIL_RTRCPTR; + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + rc = PDMR3LdrGetSymbolRC(pVM, NULL, "g_pdmRCDevHlp", &pHlpRC); + AssertReleaseRCReturn(rc, rc); + } + + PCPDMDEVHLPR0 pHlpR0; + rc = PDMR3LdrGetSymbolR0(pVM, NULL, "g_pdmR0DevHlp", &pHlpR0); + AssertReleaseRCReturn(rc, rc); + + rc = PDMR3QueueCreateInternal(pVM, sizeof(PDMDEVHLPTASK), 8, 0, pdmR3DevHlpQueueConsumer, true, "DevHlp", + &pVM->pdm.s.pDevHlpQueueR3); + AssertRCReturn(rc, rc); + pVM->pdm.s.pDevHlpQueueR0 = PDMQueueR0Ptr(pVM->pdm.s.pDevHlpQueueR3); + pVM->pdm.s.pDevHlpQueueRC = PDMQueueRCPtr(pVM->pdm.s.pDevHlpQueueR3); + + + /* + * + * Enumerate the device instance configurations + * and come up with a instantiation order. + * + */ + /* Switch to /Devices, which contains the device instantiations. */ + PCFGMNODE pDevicesNode = CFGMR3GetChild(CFGMR3GetRoot(pVM), "Devices"); + + /* + * Count the device instances. + */ + PCFGMNODE pCur; + PCFGMNODE pInstanceNode; + unsigned cDevs = 0; + for (pCur = CFGMR3GetFirstChild(pDevicesNode); pCur; pCur = CFGMR3GetNextChild(pCur)) + for (pInstanceNode = CFGMR3GetFirstChild(pCur); pInstanceNode; pInstanceNode = CFGMR3GetNextChild(pInstanceNode)) + cDevs++; + if (!cDevs) + { + Log(("PDM: No devices were configured!\n")); + return VINF_SUCCESS; + } + Log2(("PDM: cDevs=%u\n", cDevs)); + + /* + * Collect info on each device instance. + */ + struct DEVORDER + { + /** Configuration node. */ + PCFGMNODE pNode; + /** Pointer to device. */ + PPDMDEV pDev; + /** Init order. */ + uint32_t u32Order; + /** VBox instance number. */ + uint32_t iInstance; + } *paDevs = (struct DEVORDER *)alloca(sizeof(paDevs[0]) * (cDevs + 1)); /* (One extra for swapping) */ + Assert(paDevs); + unsigned i = 0; + for (pCur = CFGMR3GetFirstChild(pDevicesNode); pCur; pCur = CFGMR3GetNextChild(pCur)) + { + /* Get the device name. */ + char szName[sizeof(paDevs[0].pDev->pReg->szName)]; + rc = CFGMR3GetName(pCur, szName, sizeof(szName)); + AssertMsgRCReturn(rc, ("Configuration error: device name is too long (or something)! rc=%Rrc\n", rc), rc); + + /* Find the device. */ + PPDMDEV pDev = pdmR3DevLookup(pVM, szName); + AssertLogRelMsgReturn(pDev, ("Configuration error: device '%s' not found!\n", szName), VERR_PDM_DEVICE_NOT_FOUND); + + /* Configured priority or use default based on device class? */ + uint32_t u32Order; + rc = CFGMR3QueryU32(pCur, "Priority", &u32Order); + if (rc == VERR_CFGM_VALUE_NOT_FOUND) + { + uint32_t u32 = pDev->pReg->fClass; + for (u32Order = 1; !(u32 & u32Order); u32Order <<= 1) + /* nop */; + } + else + AssertMsgRCReturn(rc, ("Configuration error: reading \"Priority\" for the '%s' device failed rc=%Rrc!\n", szName, rc), rc); + + /* Enumerate the device instances. */ + uint32_t const iStart = i; + for (pInstanceNode = CFGMR3GetFirstChild(pCur); pInstanceNode; pInstanceNode = CFGMR3GetNextChild(pInstanceNode)) + { + paDevs[i].pNode = pInstanceNode; + paDevs[i].pDev = pDev; + paDevs[i].u32Order = u32Order; + + /* Get the instance number. */ + char szInstance[32]; + rc = CFGMR3GetName(pInstanceNode, szInstance, sizeof(szInstance)); + AssertMsgRCReturn(rc, ("Configuration error: instance name is too long (or something)! rc=%Rrc\n", rc), rc); + char *pszNext = NULL; + rc = RTStrToUInt32Ex(szInstance, &pszNext, 0, &paDevs[i].iInstance); + AssertMsgRCReturn(rc, ("Configuration error: RTStrToInt32Ex failed on the instance name '%s'! rc=%Rrc\n", szInstance, rc), rc); + AssertMsgReturn(!*pszNext, ("Configuration error: the instance name '%s' isn't all digits. (%s)\n", szInstance, pszNext), VERR_INVALID_PARAMETER); + + /* next instance */ + i++; + } + + /* check the number of instances */ + if (i - iStart > pDev->pReg->cMaxInstances) + AssertLogRelMsgFailedReturn(("Configuration error: Too many instances of %s was configured: %u, max %u\n", + szName, i - iStart, pDev->pReg->cMaxInstances), + VERR_PDM_TOO_MANY_DEVICE_INSTANCES); + } /* devices */ + Assert(i == cDevs); + + /* + * Sort (bubble) the device array ascending on u32Order and instance number + * for a device. + */ + unsigned c = cDevs - 1; + while (c) + { + unsigned j = 0; + for (i = 0; i < c; i++) + if ( paDevs[i].u32Order > paDevs[i + 1].u32Order + || ( paDevs[i].u32Order == paDevs[i + 1].u32Order + && paDevs[i].iInstance > paDevs[i + 1].iInstance + && paDevs[i].pDev == paDevs[i + 1].pDev) ) + { + paDevs[cDevs] = paDevs[i + 1]; + paDevs[i + 1] = paDevs[i]; + paDevs[i] = paDevs[cDevs]; + j = i; + } + c = j; + } + + + /* + * + * Instantiate the devices. + * + */ + for (i = 0; i < cDevs; i++) + { + /* + * Gather a bit of config. + */ + /* trusted */ + bool fTrusted; + rc = CFGMR3QueryBool(paDevs[i].pNode, "Trusted", &fTrusted); + if (rc == VERR_CFGM_VALUE_NOT_FOUND) + fTrusted = false; + else if (RT_FAILURE(rc)) + { + AssertMsgFailed(("configuration error: failed to query boolean \"Trusted\", rc=%Rrc\n", rc)); + return rc; + } + /* config node */ + PCFGMNODE pConfigNode = CFGMR3GetChild(paDevs[i].pNode, "Config"); + if (!pConfigNode) + { + rc = CFGMR3InsertNode(paDevs[i].pNode, "Config", &pConfigNode); + if (RT_FAILURE(rc)) + { + AssertMsgFailed(("Failed to create Config node! rc=%Rrc\n", rc)); + return rc; + } + } + CFGMR3SetRestrictedRoot(pConfigNode); + + /* + * Allocate the device instance and critical section. + */ + AssertReturn(paDevs[i].pDev->cInstances < paDevs[i].pDev->pReg->cMaxInstances, VERR_PDM_TOO_MANY_DEVICE_INSTANCES); + size_t cb = RT_UOFFSETOF_DYN(PDMDEVINS, achInstanceData[paDevs[i].pDev->pReg->cbInstance]); + cb = RT_ALIGN_Z(cb, 16); + PPDMDEVINS pDevIns; + if (paDevs[i].pDev->pReg->fFlags & (PDM_DEVREG_FLAGS_RC | PDM_DEVREG_FLAGS_R0)) + rc = MMR3HyperAllocOnceNoRel(pVM, cb, 0, MM_TAG_PDM_DEVICE, (void **)&pDevIns); + else + rc = MMR3HeapAllocZEx(pVM, MM_TAG_PDM_DEVICE, cb, (void **)&pDevIns); + AssertLogRelMsgRCReturn(rc, + ("Failed to allocate %d bytes of instance data for device '%s'. rc=%Rrc\n", + cb, paDevs[i].pDev->pReg->szName, rc), + rc); + PPDMCRITSECT pCritSect; + if (paDevs[i].pDev->pReg->fFlags & (PDM_DEVREG_FLAGS_RC | PDM_DEVREG_FLAGS_R0)) + rc = MMHyperAlloc(pVM, sizeof(*pCritSect), 0, MM_TAG_PDM_DEVICE, (void **)&pCritSect); + else + rc = MMR3HeapAllocZEx(pVM, MM_TAG_PDM_DEVICE, sizeof(*pCritSect), (void **)&pCritSect); + AssertLogRelMsgRCReturn(rc, ("Failed to allocate a critical section for the device (%Rrc)\n", rc), rc); + + /* + * Initialize it. + */ + pDevIns->u32Version = PDM_DEVINS_VERSION; + pDevIns->iInstance = paDevs[i].iInstance; + //pDevIns->Internal.s.pNextR3 = NULL; + //pDevIns->Internal.s.pPerDeviceNextR3 = NULL; + pDevIns->Internal.s.pDevR3 = paDevs[i].pDev; + pDevIns->Internal.s.pVMR3 = pVM; + pDevIns->Internal.s.pVMR0 = pVM->pVMR0; + pDevIns->Internal.s.pVMRC = pVM->pVMRC; + //pDevIns->Internal.s.pLunsR3 = NULL; + pDevIns->Internal.s.pCfgHandle = paDevs[i].pNode; + //pDevIns->Internal.s.pHeadPciDevR3 = NULL; + //pDevIns->Internal.s.pHeadPciDevR0 = 0; + //pDevIns->Internal.s.pHeadPciDevRC = 0; + pDevIns->Internal.s.fIntFlags = PDMDEVINSINT_FLAGS_SUSPENDED; + //pDevIns->Internal.s.uLastIrqTag = 0; + pDevIns->pHlpR3 = fTrusted ? &g_pdmR3DevHlpTrusted : &g_pdmR3DevHlpUnTrusted; + pDevIns->pHlpRC = pHlpRC; + pDevIns->pHlpR0 = pHlpR0; + pDevIns->pReg = paDevs[i].pDev->pReg; + pDevIns->pCfg = pConfigNode; + //pDevIns->IBase.pfnQueryInterface = NULL; + //pDevIns->fTracing = 0; + pDevIns->idTracing = ++pVM->pdm.s.idTracingDev; + pDevIns->pvInstanceDataR3 = &pDevIns->achInstanceData[0]; + pDevIns->pvInstanceDataRC = pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_RC + ? MMHyperR3ToRC(pVM, pDevIns->pvInstanceDataR3) : NIL_RTRCPTR; + pDevIns->pvInstanceDataR0 = pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_R0 + ? MMHyperR3ToR0(pVM, pDevIns->pvInstanceDataR3) : NIL_RTR0PTR; + + pDevIns->pCritSectRoR3 = pCritSect; + pDevIns->pCritSectRoRC = pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_RC + ? MMHyperR3ToRC(pVM, pCritSect) : NIL_RTRCPTR; + pDevIns->pCritSectRoR0 = pDevIns->pReg->fFlags & PDM_DEVREG_FLAGS_R0 + ? MMHyperR3ToR0(pVM, pCritSect) : NIL_RTR0PTR; + + rc = pdmR3CritSectInitDeviceAuto(pVM, pDevIns, pCritSect, RT_SRC_POS, + "%s#%uAuto", pDevIns->pReg->szName, pDevIns->iInstance); + AssertLogRelRCReturn(rc, rc); + + /* + * Link it into all the lists. + */ + /* The global instance FIFO. */ + PPDMDEVINS pPrev1 = pVM->pdm.s.pDevInstances; + if (!pPrev1) + pVM->pdm.s.pDevInstances = pDevIns; + else + { + while (pPrev1->Internal.s.pNextR3) + pPrev1 = pPrev1->Internal.s.pNextR3; + pPrev1->Internal.s.pNextR3 = pDevIns; + } + + /* The per device instance FIFO. */ + PPDMDEVINS pPrev2 = paDevs[i].pDev->pInstances; + if (!pPrev2) + paDevs[i].pDev->pInstances = pDevIns; + else + { + while (pPrev2->Internal.s.pPerDeviceNextR3) + pPrev2 = pPrev2->Internal.s.pPerDeviceNextR3; + pPrev2->Internal.s.pPerDeviceNextR3 = pDevIns; + } + + /* + * Call the constructor. + */ + paDevs[i].pDev->cInstances++; + Log(("PDM: Constructing device '%s' instance %d...\n", pDevIns->pReg->szName, pDevIns->iInstance)); + rc = pDevIns->pReg->pfnConstruct(pDevIns, pDevIns->iInstance, pDevIns->pCfg); + if (RT_FAILURE(rc)) + { + LogRel(("PDM: Failed to construct '%s'/%d! %Rra\n", pDevIns->pReg->szName, pDevIns->iInstance, rc)); + paDevs[i].pDev->cInstances--; + /* Because we're damn lazy, the destructor will be called even if + the constructor fails. So, no unlinking. */ + return rc == VERR_VERSION_MISMATCH ? VERR_PDM_DEVICE_VERSION_MISMATCH : rc; + } + } /* for device instances */ + +#ifdef VBOX_WITH_USB + /* ditto for USB Devices. */ + rc = pdmR3UsbInstantiateDevices(pVM); + if (RT_FAILURE(rc)) + return rc; +#endif + + LogFlow(("pdmR3DevInit: returns %Rrc\n", VINF_SUCCESS)); + return VINF_SUCCESS; +} + + +/** + * Performs the init complete callback after ring-0 and raw-mode has been + * initialized. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +int pdmR3DevInitComplete(PVM pVM) +{ + int rc; + + /* + * Iterate thru the device instances and work the callback. + */ + for (PPDMDEVINS pDevIns = pVM->pdm.s.pDevInstances; pDevIns; pDevIns = pDevIns->Internal.s.pNextR3) + { + if (pDevIns->pReg->pfnInitComplete) + { + PDMCritSectEnter(pDevIns->pCritSectRoR3, VERR_IGNORED); + rc = pDevIns->pReg->pfnInitComplete(pDevIns); + PDMCritSectLeave(pDevIns->pCritSectRoR3); + if (RT_FAILURE(rc)) + { + AssertMsgFailed(("InitComplete on device '%s'/%d failed with rc=%Rrc\n", + pDevIns->pReg->szName, pDevIns->iInstance, rc)); + return rc; + } + } + } + +#ifdef VBOX_WITH_USB + rc = pdmR3UsbVMInitComplete(pVM); + if (RT_FAILURE(rc)) + { + Log(("pdmR3DevInit: returns %Rrc\n", rc)); + return rc; + } +#endif + + LogFlow(("pdmR3DevInit: returns %Rrc\n", VINF_SUCCESS)); + return VINF_SUCCESS; +} + + +/** + * Lookups a device structure by name. + * @internal + */ +PPDMDEV pdmR3DevLookup(PVM pVM, const char *pszName) +{ + size_t cchName = strlen(pszName); + for (PPDMDEV pDev = pVM->pdm.s.pDevs; pDev; pDev = pDev->pNext) + if ( pDev->cchName == cchName + && !strcmp(pDev->pReg->szName, pszName)) + return pDev; + return NULL; +} + + +/** + * Loads the device modules. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +static int pdmR3DevLoadModules(PVM pVM) +{ + /* + * Initialize the callback structure. + */ + PDMDEVREGCBINT RegCB; + RegCB.Core.u32Version = PDM_DEVREG_CB_VERSION; + RegCB.Core.pfnRegister = pdmR3DevReg_Register; + RegCB.pVM = pVM; + RegCB.pCfgNode = NULL; + + /* + * Load the internal VMM APIC device. + */ + int rc = APICR3RegisterDevice(&RegCB.Core); + AssertRCReturn(rc, rc); + + /* + * Load the builtin module. + */ + PCFGMNODE pDevicesNode = CFGMR3GetChild(CFGMR3GetRoot(pVM), "PDM/Devices"); + bool fLoadBuiltin; + rc = CFGMR3QueryBool(pDevicesNode, "LoadBuiltin", &fLoadBuiltin); + if (rc == VERR_CFGM_VALUE_NOT_FOUND || rc == VERR_CFGM_NO_PARENT) + fLoadBuiltin = true; + else if (RT_FAILURE(rc)) + { + AssertMsgFailed(("Configuration error: Querying boolean \"LoadBuiltin\" failed with %Rrc\n", rc)); + return rc; + } + if (fLoadBuiltin) + { + /* make filename */ + char *pszFilename = pdmR3FileR3("VBoxDD", true /*fShared*/); + if (!pszFilename) + return VERR_NO_TMP_MEMORY; + rc = pdmR3DevLoad(pVM, &RegCB, pszFilename, "VBoxDD"); + RTMemTmpFree(pszFilename); + if (RT_FAILURE(rc)) + return rc; + + /* make filename */ + pszFilename = pdmR3FileR3("VBoxDD2", true /*fShared*/); + if (!pszFilename) + return VERR_NO_TMP_MEMORY; + rc = pdmR3DevLoad(pVM, &RegCB, pszFilename, "VBoxDD2"); + RTMemTmpFree(pszFilename); + if (RT_FAILURE(rc)) + return rc; + } + + /* + * Load additional device modules. + */ + PCFGMNODE pCur; + for (pCur = CFGMR3GetFirstChild(pDevicesNode); pCur; pCur = CFGMR3GetNextChild(pCur)) + { + /* + * Get the name and path. + */ + char szName[PDMMOD_NAME_LEN]; + rc = CFGMR3GetName(pCur, &szName[0], sizeof(szName)); + if (rc == VERR_CFGM_NOT_ENOUGH_SPACE) + { + AssertMsgFailed(("configuration error: The module name is too long, cchName=%zu.\n", CFGMR3GetNameLen(pCur))); + return VERR_PDM_MODULE_NAME_TOO_LONG; + } + else if (RT_FAILURE(rc)) + { + AssertMsgFailed(("CFGMR3GetName -> %Rrc.\n", rc)); + return rc; + } + + /* the path is optional, if no path the module name + path is used. */ + char szFilename[RTPATH_MAX]; + rc = CFGMR3QueryString(pCur, "Path", &szFilename[0], sizeof(szFilename)); + if (rc == VERR_CFGM_VALUE_NOT_FOUND) + strcpy(szFilename, szName); + else if (RT_FAILURE(rc)) + { + AssertMsgFailed(("configuration error: Failure to query the module path, rc=%Rrc.\n", rc)); + return rc; + } + + /* prepend path? */ + if (!RTPathHavePath(szFilename)) + { + char *psz = pdmR3FileR3(szFilename, false /*fShared*/); + if (!psz) + return VERR_NO_TMP_MEMORY; + size_t cch = strlen(psz) + 1; + if (cch > sizeof(szFilename)) + { + RTMemTmpFree(psz); + AssertMsgFailed(("Filename too long! cch=%d '%s'\n", cch, psz)); + return VERR_FILENAME_TOO_LONG; + } + memcpy(szFilename, psz, cch); + RTMemTmpFree(psz); + } + + /* + * Load the module and register it's devices. + */ + RegCB.pCfgNode = pCur; + rc = pdmR3DevLoad(pVM, &RegCB, szFilename, szName); + if (RT_FAILURE(rc)) + return rc; + } + + return VINF_SUCCESS; +} + + +/** + * Loads one device module and call the registration entry point. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pRegCB The registration callback stuff. + * @param pszFilename Module filename. + * @param pszName Module name. + */ +static int pdmR3DevLoad(PVM pVM, PPDMDEVREGCBINT pRegCB, const char *pszFilename, const char *pszName) +{ + /* + * Load it. + */ + int rc = pdmR3LoadR3U(pVM->pUVM, pszFilename, pszName); + if (RT_SUCCESS(rc)) + { + /* + * Get the registration export and call it. + */ + FNPDMVBOXDEVICESREGISTER *pfnVBoxDevicesRegister; + rc = PDMR3LdrGetSymbolR3(pVM, pszName, "VBoxDevicesRegister", (void **)&pfnVBoxDevicesRegister); + if (RT_SUCCESS(rc)) + { + Log(("PDM: Calling VBoxDevicesRegister (%p) of %s (%s)\n", pfnVBoxDevicesRegister, pszName, pszFilename)); + rc = pfnVBoxDevicesRegister(&pRegCB->Core, VBOX_VERSION); + if (RT_SUCCESS(rc)) + Log(("PDM: Successfully loaded device module %s (%s).\n", pszName, pszFilename)); + else + AssertMsgFailed(("VBoxDevicesRegister failed with rc=%Rrc for module %s (%s)\n", rc, pszName, pszFilename)); + } + else + { + AssertMsgFailed(("Failed to locate 'VBoxDevicesRegister' in %s (%s) rc=%Rrc\n", pszName, pszFilename, rc)); + if (rc == VERR_SYMBOL_NOT_FOUND) + rc = VERR_PDM_NO_REGISTRATION_EXPORT; + } + } + else + AssertMsgFailed(("Failed to load %s %s!\n", pszFilename, pszName)); + return rc; +} + + +/** + * @interface_method_impl{PDMDEVREGCB,pfnRegister} + */ +static DECLCALLBACK(int) pdmR3DevReg_Register(PPDMDEVREGCB pCallbacks, PCPDMDEVREG pReg) +{ + /* + * Validate the registration structure. + */ + Assert(pReg); + AssertMsgReturn(pReg->u32Version == PDM_DEVREG_VERSION, + ("Unknown struct version %#x!\n", pReg->u32Version), + VERR_PDM_UNKNOWN_DEVREG_VERSION); + + AssertMsgReturn( pReg->szName[0] + && strlen(pReg->szName) < sizeof(pReg->szName) + && pdmR3IsValidName(pReg->szName), + ("Invalid name '%.*s'\n", sizeof(pReg->szName), pReg->szName), + VERR_PDM_INVALID_DEVICE_REGISTRATION); + AssertMsgReturn( !(pReg->fFlags & PDM_DEVREG_FLAGS_RC) + || ( pReg->szRCMod[0] + && strlen(pReg->szRCMod) < sizeof(pReg->szRCMod)), + ("Invalid GC module name '%s' - (Device %s)\n", pReg->szRCMod, pReg->szName), + VERR_PDM_INVALID_DEVICE_REGISTRATION); + AssertMsgReturn( !(pReg->fFlags & PDM_DEVREG_FLAGS_R0) + || ( pReg->szR0Mod[0] + && strlen(pReg->szR0Mod) < sizeof(pReg->szR0Mod)), + ("Invalid R0 module name '%s' - (Device %s)\n", pReg->szR0Mod, pReg->szName), + VERR_PDM_INVALID_DEVICE_REGISTRATION); + AssertMsgReturn((pReg->fFlags & PDM_DEVREG_FLAGS_HOST_BITS_MASK) == PDM_DEVREG_FLAGS_HOST_BITS_DEFAULT, + ("Invalid host bits flags! fFlags=%#x (Device %s)\n", pReg->fFlags, pReg->szName), + VERR_PDM_INVALID_DEVICE_HOST_BITS); + AssertMsgReturn((pReg->fFlags & PDM_DEVREG_FLAGS_GUEST_BITS_MASK), + ("Invalid guest bits flags! fFlags=%#x (Device %s)\n", pReg->fFlags, pReg->szName), + VERR_PDM_INVALID_DEVICE_REGISTRATION); + AssertMsgReturn(pReg->fClass, + ("No class! (Device %s)\n", pReg->szName), + VERR_PDM_INVALID_DEVICE_REGISTRATION); + AssertMsgReturn(pReg->cMaxInstances > 0, + ("Max instances %u! (Device %s)\n", pReg->cMaxInstances, pReg->szName), + VERR_PDM_INVALID_DEVICE_REGISTRATION); + AssertMsgReturn(pReg->cbInstance <= (uint32_t)(pReg->fFlags & (PDM_DEVREG_FLAGS_RC | PDM_DEVREG_FLAGS_R0) ? 96 * _1K : _1M), + ("Instance size %d bytes! (Device %s)\n", pReg->cbInstance, pReg->szName), + VERR_PDM_INVALID_DEVICE_REGISTRATION); + AssertMsgReturn(pReg->pfnConstruct, + ("No constructor! (Device %s)\n", pReg->szName), + VERR_PDM_INVALID_DEVICE_REGISTRATION); + AssertLogRelMsgReturn((pReg->fFlags & PDM_DEVREG_FLAGS_GUEST_BITS_MASK) == PDM_DEVREG_FLAGS_GUEST_BITS_DEFAULT, + ("PDM: Rejected device '%s' because it didn't match the guest bits.\n", pReg->szName), + VERR_PDM_INVALID_DEVICE_GUEST_BITS); + AssertLogRelMsg(pReg->u32VersionEnd == PDM_DEVREG_VERSION, + ("u32VersionEnd=%#x, expected %#x. (szName=%s)\n", + pReg->u32VersionEnd, PDM_DEVREG_VERSION, pReg->szName)); + + /* + * Check for duplicate and find FIFO entry at the same time. + */ + PCPDMDEVREGCBINT pRegCB = (PCPDMDEVREGCBINT)pCallbacks; + PPDMDEV pDevPrev = NULL; + PPDMDEV pDev = pRegCB->pVM->pdm.s.pDevs; + for (; pDev; pDevPrev = pDev, pDev = pDev->pNext) + AssertMsgReturn(strcmp(pDev->pReg->szName, pReg->szName), + ("Device '%s' already exists\n", pReg->szName), + VERR_PDM_DEVICE_NAME_CLASH); + + /* + * Allocate new device structure, initialize and insert it into the list. + */ + int rc; + pDev = (PPDMDEV)MMR3HeapAlloc(pRegCB->pVM, MM_TAG_PDM_DEVICE, sizeof(*pDev)); + if (pDev) + { + pDev->pNext = NULL; + pDev->cInstances = 0; + pDev->pInstances = NULL; + pDev->pReg = pReg; + pDev->cchName = (uint32_t)strlen(pReg->szName); + rc = CFGMR3QueryStringAllocDef( pRegCB->pCfgNode, "RCSearchPath", &pDev->pszRCSearchPath, NULL); + if (RT_SUCCESS(rc)) + rc = CFGMR3QueryStringAllocDef(pRegCB->pCfgNode, "R0SearchPath", &pDev->pszR0SearchPath, NULL); + if (RT_SUCCESS(rc)) + { + if (pDevPrev) + pDevPrev->pNext = pDev; + else + pRegCB->pVM->pdm.s.pDevs = pDev; + Log(("PDM: Registered device '%s'\n", pReg->szName)); + return VINF_SUCCESS; + } + + MMR3HeapFree(pDev); + } + else + rc = VERR_NO_MEMORY; + return rc; +} + + +/** + * Locates a LUN. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pszDevice Device name. + * @param iInstance Device instance. + * @param iLun The Logical Unit to obtain the interface of. + * @param ppLun Where to store the pointer to the LUN if found. + * @thread Try only do this in EMT... + */ +int pdmR3DevFindLun(PVM pVM, const char *pszDevice, unsigned iInstance, unsigned iLun, PPPDMLUN ppLun) +{ + /* + * Iterate registered devices looking for the device. + */ + size_t cchDevice = strlen(pszDevice); + for (PPDMDEV pDev = pVM->pdm.s.pDevs; pDev; pDev = pDev->pNext) + { + if ( pDev->cchName == cchDevice + && !memcmp(pDev->pReg->szName, pszDevice, cchDevice)) + { + /* + * Iterate device instances. + */ + for (PPDMDEVINS pDevIns = pDev->pInstances; pDevIns; pDevIns = pDevIns->Internal.s.pPerDeviceNextR3) + { + if (pDevIns->iInstance == iInstance) + { + /* + * Iterate luns. + */ + for (PPDMLUN pLun = pDevIns->Internal.s.pLunsR3; pLun; pLun = pLun->pNext) + { + if (pLun->iLun == iLun) + { + *ppLun = pLun; + return VINF_SUCCESS; + } + } + return VERR_PDM_LUN_NOT_FOUND; + } + } + return VERR_PDM_DEVICE_INSTANCE_NOT_FOUND; + } + } + return VERR_PDM_DEVICE_NOT_FOUND; +} + + +/** + * Attaches a preconfigured driver to an existing device instance. + * + * This is used to change drivers and suchlike at runtime. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszDevice Device name. + * @param iInstance Device instance. + * @param iLun The Logical Unit to obtain the interface of. + * @param fFlags Flags, combination of the PDMDEVATT_FLAGS_* \#defines. + * @param ppBase Where to store the base interface pointer. Optional. + * @thread EMT + */ +VMMR3DECL(int) PDMR3DeviceAttach(PUVM pUVM, const char *pszDevice, unsigned iInstance, unsigned iLun, uint32_t fFlags, PPPDMIBASE ppBase) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_EMT(pVM); + LogFlow(("PDMR3DeviceAttach: pszDevice=%p:{%s} iInstance=%d iLun=%d fFlags=%#x ppBase=%p\n", + pszDevice, pszDevice, iInstance, iLun, fFlags, ppBase)); + + /* + * Find the LUN in question. + */ + PPDMLUN pLun; + int rc = pdmR3DevFindLun(pVM, pszDevice, iInstance, iLun, &pLun); + if (RT_SUCCESS(rc)) + { + /* + * Can we attach anything at runtime? + */ + PPDMDEVINS pDevIns = pLun->pDevIns; + if (pDevIns->pReg->pfnAttach) + { + if (!pLun->pTop) + { + PDMCritSectEnter(pDevIns->pCritSectRoR3, VERR_IGNORED); + rc = pDevIns->pReg->pfnAttach(pDevIns, iLun, fFlags); + PDMCritSectLeave(pDevIns->pCritSectRoR3); + } + else + rc = VERR_PDM_DRIVER_ALREADY_ATTACHED; + } + else + rc = VERR_PDM_DEVICE_NO_RT_ATTACH; + + if (ppBase) + *ppBase = pLun->pTop ? &pLun->pTop->IBase : NULL; + } + else if (ppBase) + *ppBase = NULL; + + if (ppBase) + LogFlow(("PDMR3DeviceAttach: returns %Rrc *ppBase=%p\n", rc, *ppBase)); + else + LogFlow(("PDMR3DeviceAttach: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Detaches a driver chain from an existing device instance. + * + * This is used to change drivers and suchlike at runtime. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszDevice Device name. + * @param iInstance Device instance. + * @param iLun The Logical Unit to obtain the interface of. + * @param fFlags Flags, combination of the PDMDEVATT_FLAGS_* \#defines. + * @thread EMT + */ +VMMR3DECL(int) PDMR3DeviceDetach(PUVM pUVM, const char *pszDevice, unsigned iInstance, unsigned iLun, uint32_t fFlags) +{ + return PDMR3DriverDetach(pUVM, pszDevice, iInstance, iLun, NULL, 0, fFlags); +} + + +/** + * References the critical section associated with a device for the use by a + * timer or similar created by the device. + * + * @returns Pointer to the critical section. + * @param pVM The cross context VM structure. + * @param pDevIns The device instance in question. + * + * @internal + */ +VMMR3_INT_DECL(PPDMCRITSECT) PDMR3DevGetCritSect(PVM pVM, PPDMDEVINS pDevIns) +{ + VM_ASSERT_EMT(pVM); RT_NOREF_PV(pVM); + VM_ASSERT_STATE(pVM, VMSTATE_CREATING); + AssertPtr(pDevIns); + + PPDMCRITSECT pCritSect = pDevIns->pCritSectRoR3; + AssertPtr(pCritSect); + pCritSect->s.fUsedByTimerOrSimilar = true; + + return pCritSect; +} + + +/** + * Attaches a preconfigured driver to an existing device or driver instance. + * + * This is used to change drivers and suchlike at runtime. The driver or device + * at the end of the chain will be told to attach to whatever is configured + * below it. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszDevice Device name. + * @param iInstance Device instance. + * @param iLun The Logical Unit to obtain the interface of. + * @param fFlags Flags, combination of the PDMDEVATT_FLAGS_* \#defines. + * @param ppBase Where to store the base interface pointer. Optional. + * + * @thread EMT + */ +VMMR3DECL(int) PDMR3DriverAttach(PUVM pUVM, const char *pszDevice, unsigned iInstance, unsigned iLun, uint32_t fFlags, PPPDMIBASE ppBase) +{ + LogFlow(("PDMR3DriverAttach: pszDevice=%p:{%s} iInstance=%d iLun=%d fFlags=%#x ppBase=%p\n", + pszDevice, pszDevice, iInstance, iLun, fFlags, ppBase)); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_EMT(pVM); + + if (ppBase) + *ppBase = NULL; + + /* + * Find the LUN in question. + */ + PPDMLUN pLun; + int rc = pdmR3DevFindLun(pVM, pszDevice, iInstance, iLun, &pLun); + if (RT_SUCCESS(rc)) + { + /* + * Anything attached to the LUN? + */ + PPDMDRVINS pDrvIns = pLun->pTop; + if (!pDrvIns) + { + /* No, ask the device to attach to the new stuff. */ + PPDMDEVINS pDevIns = pLun->pDevIns; + if (pDevIns->pReg->pfnAttach) + { + PDMCritSectEnter(pDevIns->pCritSectRoR3, VERR_IGNORED); + rc = pDevIns->pReg->pfnAttach(pDevIns, iLun, fFlags); + if (RT_SUCCESS(rc) && ppBase) + *ppBase = pLun->pTop ? &pLun->pTop->IBase : NULL; + PDMCritSectLeave(pDevIns->pCritSectRoR3); + } + else + rc = VERR_PDM_DEVICE_NO_RT_ATTACH; + } + else + { + /* Yes, find the bottom most driver and ask it to attach to the new stuff. */ + while (pDrvIns->Internal.s.pDown) + pDrvIns = pDrvIns->Internal.s.pDown; + if (pDrvIns->pReg->pfnAttach) + { + rc = pDrvIns->pReg->pfnAttach(pDrvIns, fFlags); + if (RT_SUCCESS(rc) && ppBase) + *ppBase = pDrvIns->Internal.s.pDown + ? &pDrvIns->Internal.s.pDown->IBase + : NULL; + } + else + rc = VERR_PDM_DRIVER_NO_RT_ATTACH; + } + } + + if (ppBase) + LogFlow(("PDMR3DriverAttach: returns %Rrc *ppBase=%p\n", rc, *ppBase)); + else + LogFlow(("PDMR3DriverAttach: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Detaches the specified driver instance. + * + * This is used to replumb drivers at runtime for simulating hot plugging and + * media changes. + * + * This is a superset of PDMR3DeviceDetach. It allows detaching drivers from + * any driver or device by specifying the driver to start detaching at. The + * only prerequisite is that the driver or device above implements the + * pfnDetach callback (PDMDRVREG / PDMDEVREG). + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszDevice Device name. + * @param iDevIns Device instance. + * @param iLun The Logical Unit in which to look for the driver. + * @param pszDriver The name of the driver which to detach. If NULL + * then the entire driver chain is detatched. + * @param iOccurrence The occurrence of that driver in the chain. This is + * usually 0. + * @param fFlags Flags, combination of the PDMDEVATT_FLAGS_* \#defines. + * @thread EMT + */ +VMMR3DECL(int) PDMR3DriverDetach(PUVM pUVM, const char *pszDevice, unsigned iDevIns, unsigned iLun, + const char *pszDriver, unsigned iOccurrence, uint32_t fFlags) +{ + LogFlow(("PDMR3DriverDetach: pszDevice=%p:{%s} iDevIns=%u iLun=%u pszDriver=%p:{%s} iOccurrence=%u fFlags=%#x\n", + pszDevice, pszDevice, iDevIns, iLun, pszDriver, pszDriver, iOccurrence, fFlags)); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_EMT(pVM); + AssertPtr(pszDevice); + AssertPtrNull(pszDriver); + Assert(iOccurrence == 0 || pszDriver); + Assert(!(fFlags & ~(PDM_TACH_FLAGS_NOT_HOT_PLUG))); + + /* + * Find the LUN in question. + */ + PPDMLUN pLun; + int rc = pdmR3DevFindLun(pVM, pszDevice, iDevIns, iLun, &pLun); + if (RT_SUCCESS(rc)) + { + /* + * Locate the driver. + */ + PPDMDRVINS pDrvIns = pLun->pTop; + if (pDrvIns) + { + if (pszDriver) + { + while (pDrvIns) + { + if (!strcmp(pDrvIns->pReg->szName, pszDriver)) + { + if (iOccurrence == 0) + break; + iOccurrence--; + } + pDrvIns = pDrvIns->Internal.s.pDown; + } + } + if (pDrvIns) + rc = pdmR3DrvDetach(pDrvIns, fFlags); + else + rc = VERR_PDM_DRIVER_INSTANCE_NOT_FOUND; + } + else + rc = VINF_PDM_NO_DRIVER_ATTACHED_TO_LUN; + } + + LogFlow(("PDMR3DriverDetach: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Runtime detach and reattach of a new driver chain or sub chain. + * + * This is intended to be called on a non-EMT thread, this will instantiate the + * new driver (sub-)chain, and then the EMTs will do the actual replumbing. The + * destruction of the old driver chain will be taken care of on the calling + * thread. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszDevice Device name. + * @param iDevIns Device instance. + * @param iLun The Logical Unit in which to look for the driver. + * @param pszDriver The name of the driver which to detach and replace. + * If NULL then the entire driver chain is to be + * reattached. + * @param iOccurrence The occurrence of that driver in the chain. This is + * usually 0. + * @param fFlags Flags, combination of the PDMDEVATT_FLAGS_* \#defines. + * @param pCfg The configuration of the new driver chain that is + * going to be attached. The subtree starts with the + * node containing a Driver key, a Config subtree and + * optionally an AttachedDriver subtree. + * If this parameter is NULL, then this call will work + * like at a non-pause version of PDMR3DriverDetach. + * @param ppBase Where to store the base interface pointer to the new + * driver. Optional. + * + * @thread Any thread. The EMTs will be involved at some point though. + */ +VMMR3DECL(int) PDMR3DriverReattach(PUVM pUVM, const char *pszDevice, unsigned iDevIns, unsigned iLun, + const char *pszDriver, unsigned iOccurrence, uint32_t fFlags, + PCFGMNODE pCfg, PPPDMIBASE ppBase) +{ + NOREF(pUVM); NOREF(pszDevice); NOREF(iDevIns); NOREF(iLun); NOREF(pszDriver); NOREF(iOccurrence); + NOREF(fFlags); NOREF(pCfg); NOREF(ppBase); + return VERR_NOT_IMPLEMENTED; +} + diff --git a/src/VBox/VMM/VMMR3/PDMDriver.cpp b/src/VBox/VMM/VMMR3/PDMDriver.cpp new file mode 100644 index 00000000..ec00fe20 --- /dev/null +++ b/src/VBox/VMM/VMMR3/PDMDriver.cpp @@ -0,0 +1,1870 @@ +/* $Id: PDMDriver.cpp $ */ +/** @file + * PDM - Pluggable Device and Driver Manager, Driver parts. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PDM_DRIVER +#include "PDMInternal.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * Internal callback structure pointer. + * + * The main purpose is to define the extra data we associate + * with PDMDRVREGCB so we can find the VM instance and so on. + */ +typedef struct PDMDRVREGCBINT +{ + /** The callback structure. */ + PDMDRVREGCB Core; + /** A bit of padding. */ + uint32_t u32[4]; + /** VM Handle. */ + PVM pVM; + /** Pointer to the configuration node the registrations should be + * associated with. Can be NULL. */ + PCFGMNODE pCfgNode; +} PDMDRVREGCBINT, *PPDMDRVREGCBINT; +typedef const PDMDRVREGCBINT *PCPDMDRVREGCBINT; + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static DECLCALLBACK(int) pdmR3DrvRegister(PCPDMDRVREGCB pCallbacks, PCPDMDRVREG pReg); +static int pdmR3DrvLoad(PVM pVM, PPDMDRVREGCBINT pRegCB, const char *pszFilename, const char *pszName); + + +/** + * Register drivers in a statically linked environment. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pfnCallback Driver registration callback + */ +VMMR3DECL(int) PDMR3DrvStaticRegistration(PVM pVM, FNPDMVBOXDRIVERSREGISTER pfnCallback) +{ + /* + * The registration callbacks. + */ + PDMDRVREGCBINT RegCB; + RegCB.Core.u32Version = PDM_DRVREG_CB_VERSION; + RegCB.Core.pfnRegister = pdmR3DrvRegister; + RegCB.pVM = pVM; + RegCB.pCfgNode = NULL; + + int rc = pfnCallback(&RegCB.Core, VBOX_VERSION); + if (RT_FAILURE(rc)) + AssertMsgFailed(("VBoxDriversRegister failed with rc=%Rrc\n", rc)); + + return rc; +} + + +/** + * This function will initialize the drivers for this VM instance. + * + * First of all this mean loading the builtin drivers and letting them + * register themselves. Beyond that any additional driver modules are + * loaded and called for registration. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +int pdmR3DrvInit(PVM pVM) +{ + LogFlow(("pdmR3DrvInit:\n")); + + AssertRelease(!(RT_UOFFSETOF(PDMDRVINS, achInstanceData) & 15)); + PPDMDRVINS pDrvInsAssert; NOREF(pDrvInsAssert); + AssertCompile(sizeof(pDrvInsAssert->Internal.s) <= sizeof(pDrvInsAssert->Internal.padding)); + AssertRelease(sizeof(pDrvInsAssert->Internal.s) <= sizeof(pDrvInsAssert->Internal.padding)); + + /* + * The registration callbacks. + */ + PDMDRVREGCBINT RegCB; + RegCB.Core.u32Version = PDM_DRVREG_CB_VERSION; + RegCB.Core.pfnRegister = pdmR3DrvRegister; + RegCB.pVM = pVM; + RegCB.pCfgNode = NULL; + + /* + * Load the builtin module + */ + PCFGMNODE pDriversNode = CFGMR3GetChild(CFGMR3GetRoot(pVM), "PDM/Drivers"); + bool fLoadBuiltin; + int rc = CFGMR3QueryBool(pDriversNode, "LoadBuiltin", &fLoadBuiltin); + if (rc == VERR_CFGM_VALUE_NOT_FOUND || rc == VERR_CFGM_NO_PARENT) + fLoadBuiltin = true; + else if (RT_FAILURE(rc)) + { + AssertMsgFailed(("Configuration error: Querying boolean \"LoadBuiltin\" failed with %Rrc\n", rc)); + return rc; + } + if (fLoadBuiltin) + { + /* make filename */ + char *pszFilename = pdmR3FileR3("VBoxDD", true /*fShared*/); + if (!pszFilename) + return VERR_NO_TMP_MEMORY; + rc = pdmR3DrvLoad(pVM, &RegCB, pszFilename, "VBoxDD"); + RTMemTmpFree(pszFilename); + if (RT_FAILURE(rc)) + return rc; + } + + /* + * Load additional driver modules. + */ + for (PCFGMNODE pCur = CFGMR3GetFirstChild(pDriversNode); pCur; pCur = CFGMR3GetNextChild(pCur)) + { + /* + * Get the name and path. + */ + char szName[PDMMOD_NAME_LEN]; + rc = CFGMR3GetName(pCur, &szName[0], sizeof(szName)); + if (rc == VERR_CFGM_NOT_ENOUGH_SPACE) + { + AssertMsgFailed(("configuration error: The module name is too long, cchName=%zu.\n", CFGMR3GetNameLen(pCur))); + return VERR_PDM_MODULE_NAME_TOO_LONG; + } + else if (RT_FAILURE(rc)) + { + AssertMsgFailed(("CFGMR3GetName -> %Rrc.\n", rc)); + return rc; + } + + /* the path is optional, if no path the module name + path is used. */ + char szFilename[RTPATH_MAX]; + rc = CFGMR3QueryString(pCur, "Path", &szFilename[0], sizeof(szFilename)); + if (rc == VERR_CFGM_VALUE_NOT_FOUND || rc == VERR_CFGM_NO_PARENT) + strcpy(szFilename, szName); + else if (RT_FAILURE(rc)) + { + AssertMsgFailed(("configuration error: Failure to query the module path, rc=%Rrc.\n", rc)); + return rc; + } + + /* prepend path? */ + if (!RTPathHavePath(szFilename)) + { + char *psz = pdmR3FileR3(szFilename, false /*fShared*/); + if (!psz) + return VERR_NO_TMP_MEMORY; + size_t cch = strlen(psz) + 1; + if (cch > sizeof(szFilename)) + { + RTMemTmpFree(psz); + AssertMsgFailed(("Filename too long! cch=%d '%s'\n", cch, psz)); + return VERR_FILENAME_TOO_LONG; + } + memcpy(szFilename, psz, cch); + RTMemTmpFree(psz); + } + + /* + * Load the module and register it's drivers. + */ + RegCB.pCfgNode = pCur; + rc = pdmR3DrvLoad(pVM, &RegCB, szFilename, szName); + if (RT_FAILURE(rc)) + return rc; + } + + LogFlow(("pdmR3DrvInit: returns VINF_SUCCESS\n")); + return VINF_SUCCESS; +} + + +/** + * Loads one driver module and call the registration entry point. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pRegCB The registration callback stuff. + * @param pszFilename Module filename. + * @param pszName Module name. + */ +static int pdmR3DrvLoad(PVM pVM, PPDMDRVREGCBINT pRegCB, const char *pszFilename, const char *pszName) +{ + /* + * Load it. + */ + int rc = pdmR3LoadR3U(pVM->pUVM, pszFilename, pszName); + if (RT_SUCCESS(rc)) + { + /* + * Get the registration export and call it. + */ + FNPDMVBOXDRIVERSREGISTER *pfnVBoxDriversRegister; + rc = PDMR3LdrGetSymbolR3(pVM, pszName, "VBoxDriversRegister", (void **)&pfnVBoxDriversRegister); + if (RT_SUCCESS(rc)) + { + Log(("PDM: Calling VBoxDriversRegister (%p) of %s (%s)\n", pfnVBoxDriversRegister, pszName, pszFilename)); + rc = pfnVBoxDriversRegister(&pRegCB->Core, VBOX_VERSION); + if (RT_SUCCESS(rc)) + Log(("PDM: Successfully loaded driver module %s (%s).\n", pszName, pszFilename)); + else + AssertMsgFailed(("VBoxDriversRegister failed with rc=%Rrc\n", rc)); + } + else + { + AssertMsgFailed(("Failed to locate 'VBoxDriversRegister' in %s (%s) rc=%Rrc\n", pszName, pszFilename, rc)); + if (rc == VERR_SYMBOL_NOT_FOUND) + rc = VERR_PDM_NO_REGISTRATION_EXPORT; + } + } + else + AssertMsgFailed(("Failed to load %s (%s) rc=%Rrc!\n", pszName, pszFilename, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDRVREGCB,pfnRegister} */ +static DECLCALLBACK(int) pdmR3DrvRegister(PCPDMDRVREGCB pCallbacks, PCPDMDRVREG pReg) +{ + /* + * Validate the registration structure. + */ + AssertPtrReturn(pReg, VERR_INVALID_POINTER); + AssertMsgReturn(pReg->u32Version == PDM_DRVREG_VERSION, + ("%#x\n", pReg->u32Version), + VERR_PDM_UNKNOWN_DRVREG_VERSION); + AssertReturn(pReg->szName[0], VERR_PDM_INVALID_DRIVER_REGISTRATION); + AssertMsgReturn(RTStrEnd(pReg->szName, sizeof(pReg->szName)), + ("%.*s\n", sizeof(pReg->szName), pReg->szName), + VERR_PDM_INVALID_DRIVER_REGISTRATION); + AssertMsgReturn(pdmR3IsValidName(pReg->szName), ("%.*s\n", sizeof(pReg->szName), pReg->szName), + VERR_PDM_INVALID_DRIVER_REGISTRATION); + AssertMsgReturn( !(pReg->fFlags & PDM_DRVREG_FLAGS_R0) + || ( pReg->szR0Mod[0] + && RTStrEnd(pReg->szR0Mod, sizeof(pReg->szR0Mod))), + ("%s: %.*s\n", pReg->szName, sizeof(pReg->szR0Mod), pReg->szR0Mod), + VERR_PDM_INVALID_DRIVER_REGISTRATION); + AssertMsgReturn( !(pReg->fFlags & PDM_DRVREG_FLAGS_RC) + || ( pReg->szRCMod[0] + && RTStrEnd(pReg->szRCMod, sizeof(pReg->szRCMod))), + ("%s: %.*s\n", pReg->szName, sizeof(pReg->szRCMod), pReg->szRCMod), + VERR_PDM_INVALID_DRIVER_REGISTRATION); + AssertMsgReturn(VALID_PTR(pReg->pszDescription), + ("%s: %p\n", pReg->szName, pReg->pszDescription), + VERR_PDM_INVALID_DRIVER_REGISTRATION); + AssertMsgReturn(!(pReg->fFlags & ~(PDM_DRVREG_FLAGS_HOST_BITS_MASK | PDM_DRVREG_FLAGS_R0 | PDM_DRVREG_FLAGS_RC)), + ("%s: %#x\n", pReg->szName, pReg->fFlags), + VERR_PDM_INVALID_DRIVER_REGISTRATION); + AssertMsgReturn((pReg->fFlags & PDM_DRVREG_FLAGS_HOST_BITS_MASK) == PDM_DRVREG_FLAGS_HOST_BITS_DEFAULT, + ("%s: %#x\n", pReg->szName, pReg->fFlags), + VERR_PDM_INVALID_DRIVER_HOST_BITS); + AssertMsgReturn(pReg->cMaxInstances > 0, + ("%s: %#x\n", pReg->szName, pReg->cMaxInstances), + VERR_PDM_INVALID_DRIVER_REGISTRATION); + AssertMsgReturn(pReg->cbInstance <= _1M, + ("%s: %#x\n", pReg->szName, pReg->cbInstance), + VERR_PDM_INVALID_DRIVER_REGISTRATION); + AssertMsgReturn(VALID_PTR(pReg->pfnConstruct), + ("%s: %p\n", pReg->szName, pReg->pfnConstruct), + VERR_PDM_INVALID_DRIVER_REGISTRATION); + AssertMsgReturn(VALID_PTR(pReg->pfnRelocate) || !(pReg->fFlags & PDM_DRVREG_FLAGS_RC), + ("%s: %#x\n", pReg->szName, pReg->cbInstance), + VERR_PDM_INVALID_DRIVER_REGISTRATION); + AssertMsgReturn(pReg->pfnSoftReset == NULL, + ("%s: %p\n", pReg->szName, pReg->pfnSoftReset), + VERR_PDM_INVALID_DRIVER_REGISTRATION); + AssertMsgReturn(pReg->u32VersionEnd == PDM_DRVREG_VERSION, + ("%s: %#x\n", pReg->szName, pReg->u32VersionEnd), + VERR_PDM_INVALID_DRIVER_REGISTRATION); + + /* + * Check for duplicate and find FIFO entry at the same time. + */ + PCPDMDRVREGCBINT pRegCB = (PCPDMDRVREGCBINT)pCallbacks; + PPDMDRV pDrvPrev = NULL; + PPDMDRV pDrv = pRegCB->pVM->pdm.s.pDrvs; + for (; pDrv; pDrvPrev = pDrv, pDrv = pDrv->pNext) + { + if (!strcmp(pDrv->pReg->szName, pReg->szName)) + { + AssertMsgFailed(("Driver '%s' already exists\n", pReg->szName)); + return VERR_PDM_DRIVER_NAME_CLASH; + } + } + + /* + * Allocate new driver structure and insert it into the list. + */ + int rc; + pDrv = (PPDMDRV)MMR3HeapAlloc(pRegCB->pVM, MM_TAG_PDM_DRIVER, sizeof(*pDrv)); + if (pDrv) + { + pDrv->pNext = NULL; + pDrv->cInstances = 0; + pDrv->iNextInstance = 0; + pDrv->pReg = pReg; + rc = CFGMR3QueryStringAllocDef( pRegCB->pCfgNode, "RCSearchPath", &pDrv->pszRCSearchPath, NULL); + if (RT_SUCCESS(rc)) + rc = CFGMR3QueryStringAllocDef(pRegCB->pCfgNode, "R0SearchPath", &pDrv->pszR0SearchPath, NULL); + if (RT_SUCCESS(rc)) + { + if (pDrvPrev) + pDrvPrev->pNext = pDrv; + else + pRegCB->pVM->pdm.s.pDrvs = pDrv; + Log(("PDM: Registered driver '%s'\n", pReg->szName)); + return VINF_SUCCESS; + } + MMR3HeapFree(pDrv); + } + else + rc = VERR_NO_MEMORY; + return rc; +} + + +/** + * Lookups a driver structure by name. + * @internal + */ +PPDMDRV pdmR3DrvLookup(PVM pVM, const char *pszName) +{ + for (PPDMDRV pDrv = pVM->pdm.s.pDrvs; pDrv; pDrv = pDrv->pNext) + if (!strcmp(pDrv->pReg->szName, pszName)) + return pDrv; + return NULL; +} + + +/** + * Transforms the driver chain as it's being instantiated. + * + * Worker for pdmR3DrvInstantiate. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDrvAbove The driver above, NULL if top. + * @param pLun The LUN. + * @param ppNode The AttachedDriver node, replaced if any + * morphing took place. + */ +static int pdmR3DrvMaybeTransformChain(PVM pVM, PPDMDRVINS pDrvAbove, PPDMLUN pLun, PCFGMNODE *ppNode) +{ + /* + * The typical state of affairs is that there are no injections. + */ + PCFGMNODE pCurTrans = CFGMR3GetFirstChild(CFGMR3GetChild(CFGMR3GetRoot(pVM), "PDM/DriverTransformations")); + if (!pCurTrans) + return VINF_SUCCESS; + + /* + * Gather the attributes used in the matching process. + */ + const char *pszDevice = pLun->pDevIns + ? pLun->pDevIns->Internal.s.pDevR3->pReg->szName + : pLun->pUsbIns->Internal.s.pUsbDev->pReg->szName; + char szLun[32]; + RTStrPrintf(szLun, sizeof(szLun), "%u", pLun->iLun); + const char *pszAbove = pDrvAbove ? pDrvAbove->Internal.s.pDrv->pReg->szName : ""; + char *pszThisDrv; + int rc = CFGMR3QueryStringAlloc(*ppNode, "Driver", &pszThisDrv); + AssertMsgRCReturn(rc, ("Query for string value of \"Driver\" -> %Rrc\n", rc), + rc == VERR_CFGM_VALUE_NOT_FOUND ? VERR_PDM_CFG_MISSING_DRIVER_NAME : rc); + + uint64_t uInjectTransformationAbove = 0; + if (pDrvAbove) + { + rc = CFGMR3QueryIntegerDef(CFGMR3GetParent(*ppNode), "InjectTransformationPtr", &uInjectTransformationAbove, 0); + AssertLogRelRCReturn(rc, rc); + } + + + /* + * Enumerate possible driver chain transformations. + */ + unsigned cTransformations = 0; + for (; pCurTrans != NULL; pCurTrans = CFGMR3GetNextChild(pCurTrans)) + { + char szCurTransNm[256]; + rc = CFGMR3GetName(pCurTrans, szCurTransNm, sizeof(szCurTransNm)); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/PDM/DriverTransformations/<name>/Device,string,*} + * One or more simple wildcard patters separated by '|' for matching + * the devices this transformation rule applies to. */ + char *pszMultiPat; + rc = CFGMR3QueryStringAllocDef(pCurTrans, "Device", &pszMultiPat, "*"); + AssertLogRelRCReturn(rc, rc); + bool fMatch = RTStrSimplePatternMultiMatch(pszMultiPat, RTSTR_MAX, pszDevice, RTSTR_MAX, NULL); + MMR3HeapFree(pszMultiPat); + if (!fMatch) + continue; + + /** @cfgm{/PDM/DriverTransformations/<name>/LUN,string,*} + * One or more simple wildcard patters separated by '|' for matching + * the LUNs this transformation rule applies to. */ + rc = CFGMR3QueryStringAllocDef(pCurTrans, "LUN", &pszMultiPat, "*"); + AssertLogRelRCReturn(rc, rc); + fMatch = RTStrSimplePatternMultiMatch(pszMultiPat, RTSTR_MAX, szLun, RTSTR_MAX, NULL); + MMR3HeapFree(pszMultiPat); + if (!fMatch) + continue; + + /** @cfgm{/PDM/DriverTransformations/<name>/BelowDriver,string,*} + * One or more simple wildcard patters separated by '|' for matching the + * drivers the transformation should be applied below. This means, that + * when the drivers matched here attached another driver below them, the + * transformation will be applied. To represent the device, '<top>' + * is used. */ + rc = CFGMR3QueryStringAllocDef(pCurTrans, "BelowDriver", &pszMultiPat, "*"); + AssertLogRelRCReturn(rc, rc); + fMatch = RTStrSimplePatternMultiMatch(pszMultiPat, RTSTR_MAX, pszAbove, RTSTR_MAX, NULL); + MMR3HeapFree(pszMultiPat); + if (!fMatch) + continue; + + /** @cfgm{/PDM/DriverTransformations/<name>/AboveDriver,string,*} + * One or more simple wildcard patters separated by '|' for matching the + * drivers the transformation should be applie above or at (depending on + * the action). The value being matched against here is the driver that + * is in the process of being attached, so for mergeconfig actions this is + * usually what you need to match on. */ + rc = CFGMR3QueryStringAlloc(pCurTrans, "AboveDriver", &pszMultiPat); + if (rc == VERR_CFGM_VALUE_NOT_FOUND) + rc = VINF_SUCCESS; + else + { + AssertLogRelRCReturn(rc, rc); + fMatch = RTStrSimplePatternMultiMatch(pszMultiPat, RTSTR_MAX, pszThisDrv, RTSTR_MAX, NULL); + MMR3HeapFree(pszMultiPat); + if (!fMatch) + continue; + if (uInjectTransformationAbove == (uintptr_t)pCurTrans) + continue; + } + + /* + * We've got a match! Now, what are we supposed to do? + */ + /** @cfgm{/PDM/DriverTransformations/<name>/Action,string,inject} + * The action that the transformation takes. Possible values are: + * - inject + * - mergeconfig: This merges and the content of the 'Config' key under the + * transformation into the driver's own 'Config' key, replacing any + * duplicates. + * - remove + * - removetree + * - replace + * - replacetree + */ + char szAction[16]; + rc = CFGMR3QueryStringDef(pCurTrans, "Action", szAction, sizeof(szAction), "inject"); + AssertLogRelRCReturn(rc, rc); + AssertLogRelMsgReturn( !strcmp(szAction, "inject") + || !strcmp(szAction, "mergeconfig") + || !strcmp(szAction, "remove") + || !strcmp(szAction, "removetree") + || !strcmp(szAction, "replace") + || !strcmp(szAction, "replacetree") + , + ("Action='%s', valid values are 'inject', 'mergeconfig', 'replace', 'replacetree', 'remove', 'removetree'.\n", szAction), + VERR_PDM_MISCONFIGURED_DRV_TRANSFORMATION); + LogRel(("PDMDriver: Applying '%s' to '%s'::[%s]...'%s': %s\n", szCurTransNm, pszDevice, szLun, pszThisDrv, szAction)); + CFGMR3Dump(*ppNode); + CFGMR3Dump(pCurTrans); + + /* Get the attached driver to inject. */ + PCFGMNODE pTransAttDrv = NULL; + if (!strcmp(szAction, "inject") || !strcmp(szAction, "replace") || !strcmp(szAction, "replacetree")) + { + pTransAttDrv = CFGMR3GetChild(pCurTrans, "AttachedDriver"); + AssertLogRelMsgReturn(pTransAttDrv, + ("An %s transformation requires an AttachedDriver child node!\n", szAction), + VERR_PDM_MISCONFIGURED_DRV_TRANSFORMATION); + } + + + /* + * Remove the node. + */ + if (!strcmp(szAction, "remove") || !strcmp(szAction, "removetree")) + { + PCFGMNODE pBelowThis = CFGMR3GetChild(*ppNode, "AttachedDriver"); + if (!pBelowThis || !strcmp(szAction, "removetree")) + { + CFGMR3RemoveNode(*ppNode); + *ppNode = NULL; + } + else + { + PCFGMNODE pBelowThisCopy; + rc = CFGMR3DuplicateSubTree(pBelowThis, &pBelowThisCopy); + AssertLogRelRCReturn(rc, rc); + + rc = CFGMR3ReplaceSubTree(*ppNode, pBelowThisCopy); + AssertLogRelRCReturnStmt(rc, CFGMR3RemoveNode(pBelowThis), rc); + } + } + /* + * Replace the driver about to be instantiated. + */ + else if (!strcmp(szAction, "replace") || !strcmp(szAction, "replacetree")) + { + PCFGMNODE pTransCopy; + rc = CFGMR3DuplicateSubTree(pTransAttDrv, &pTransCopy); + AssertLogRelRCReturn(rc, rc); + + PCFGMNODE pBelowThis = CFGMR3GetChild(*ppNode, "AttachedDriver"); + if (!pBelowThis || !strcmp(szAction, "replacetree")) + rc = VINF_SUCCESS; + else + { + PCFGMNODE pBelowThisCopy; + rc = CFGMR3DuplicateSubTree(pBelowThis, &pBelowThisCopy); + if (RT_SUCCESS(rc)) + { + rc = CFGMR3InsertSubTree(pTransCopy, "AttachedDriver", pBelowThisCopy, NULL); + AssertLogRelRC(rc); + if (RT_FAILURE(rc)) + CFGMR3RemoveNode(pBelowThisCopy); + } + } + if (RT_SUCCESS(rc)) + rc = CFGMR3ReplaceSubTree(*ppNode, pTransCopy); + if (RT_FAILURE(rc)) + CFGMR3RemoveNode(pTransCopy); + } + /* + * Inject a driver before the driver about to be instantiated. + */ + else if (!strcmp(szAction, "inject")) + { + PCFGMNODE pTransCopy; + rc = CFGMR3DuplicateSubTree(pTransAttDrv, &pTransCopy); + AssertLogRelRCReturn(rc, rc); + + PCFGMNODE pThisCopy; + rc = CFGMR3DuplicateSubTree(*ppNode, &pThisCopy); + if (RT_SUCCESS(rc)) + { + rc = CFGMR3InsertSubTree(pTransCopy, "AttachedDriver", pThisCopy, NULL); + if (RT_SUCCESS(rc)) + { + rc = CFGMR3InsertInteger(pTransCopy, "InjectTransformationPtr", (uintptr_t)pCurTrans); + AssertLogRelRC(rc); + rc = CFGMR3InsertString(pTransCopy, "InjectTransformationNm", szCurTransNm); + AssertLogRelRC(rc); + if (RT_SUCCESS(rc)) + rc = CFGMR3ReplaceSubTree(*ppNode, pTransCopy); + } + else + { + AssertLogRelRC(rc); + CFGMR3RemoveNode(pThisCopy); + } + } + if (RT_FAILURE(rc)) + CFGMR3RemoveNode(pTransCopy); + } + /* + * Merge the Config node of the transformation with the one of the + * current driver. + */ + else if (!strcmp(szAction, "mergeconfig")) + { + PCFGMNODE pTransConfig = CFGMR3GetChild(pCurTrans, "Config"); + AssertLogRelReturn(pTransConfig, VERR_PDM_MISCONFIGURED_DRV_TRANSFORMATION); + + PCFGMNODE pDrvConfig = CFGMR3GetChild(*ppNode, "Config"); + if (*ppNode) + CFGMR3InsertNode(*ppNode, "Config", &pDrvConfig); + AssertLogRelReturn(pDrvConfig, VERR_PDM_CANNOT_TRANSFORM_REMOVED_DRIVER); + + rc = CFGMR3CopyTree(pDrvConfig, pTransConfig, CFGM_COPY_FLAGS_REPLACE_VALUES | CFGM_COPY_FLAGS_MERGE_KEYS); + AssertLogRelRCReturn(rc, rc); + } + else + AssertFailed(); + + cTransformations++; + if (*ppNode) + CFGMR3Dump(*ppNode); + else + LogRel(("PDMDriver: The transformation removed the driver.\n")); + } + + /* + * Note what happened in the release log. + */ + if (cTransformations > 0) + LogRel(("PDMDriver: Transformations done. Applied %u driver transformations.\n", cTransformations)); + + return rc; +} + + +/** + * Instantiate a driver. + * + * @returns VBox status code, including informational statuses. + * + * @param pVM The cross context VM structure. + * @param pNode The CFGM node for the driver. + * @param pBaseInterface The base interface. + * @param pDrvAbove The driver above it. NULL if it's the top-most + * driver. + * @param pLun The LUN the driver is being attached to. NULL + * if we're instantiating a driver chain before + * attaching it - untested. + * @param ppBaseInterface Where to return the pointer to the base + * interface of the newly created driver. + * + * @remarks Recursive calls to this function is normal as the drivers will + * attach to anything below them during the pfnContruct call. + * + * @todo Need to extend this interface a bit so that the driver + * transformation feature can attach drivers to unconfigured LUNs and + * at the end of chains. + */ +int pdmR3DrvInstantiate(PVM pVM, PCFGMNODE pNode, PPDMIBASE pBaseInterface, PPDMDRVINS pDrvAbove, + PPDMLUN pLun, PPDMIBASE *ppBaseInterface) +{ + Assert(!pDrvAbove || !pDrvAbove->Internal.s.pDown); + Assert(!pDrvAbove || !pDrvAbove->pDownBase); + + Assert(pBaseInterface->pfnQueryInterface(pBaseInterface, PDMIBASE_IID) == pBaseInterface); + + /* + * Do driver chain injections + */ + int rc = pdmR3DrvMaybeTransformChain(pVM, pDrvAbove, pLun, &pNode); + if (RT_FAILURE(rc)) + return rc; + if (!pNode) + return VERR_PDM_NO_ATTACHED_DRIVER; + + /* + * Find the driver. + */ + char *pszName; + rc = CFGMR3QueryStringAlloc(pNode, "Driver", &pszName); + if (RT_SUCCESS(rc)) + { + PPDMDRV pDrv = pdmR3DrvLookup(pVM, pszName); + if ( pDrv + && pDrv->cInstances < pDrv->pReg->cMaxInstances) + { + /* config node */ + PCFGMNODE pConfigNode = CFGMR3GetChild(pNode, "Config"); + if (!pConfigNode) + rc = CFGMR3InsertNode(pNode, "Config", &pConfigNode); + if (RT_SUCCESS(rc)) + { + CFGMR3SetRestrictedRoot(pConfigNode); + + /* + * Allocate the driver instance. + */ + size_t cb = RT_UOFFSETOF_DYN(PDMDRVINS, achInstanceData[pDrv->pReg->cbInstance]); + cb = RT_ALIGN_Z(cb, 16); + bool const fHyperHeap = !!(pDrv->pReg->fFlags & (PDM_DRVREG_FLAGS_R0 | PDM_DRVREG_FLAGS_RC)); + PPDMDRVINS pNew; + if (fHyperHeap) + rc = MMHyperAlloc(pVM, cb, 64, MM_TAG_PDM_DRIVER, (void **)&pNew); + else + rc = MMR3HeapAllocZEx(pVM, MM_TAG_PDM_DRIVER, cb, (void **)&pNew); + if (RT_SUCCESS(rc)) + { + /* + * Initialize the instance structure (declaration order). + */ + pNew->u32Version = PDM_DRVINS_VERSION; + pNew->iInstance = pDrv->iNextInstance; + pNew->Internal.s.pUp = pDrvAbove ? pDrvAbove : NULL; + //pNew->Internal.s.pDown = NULL; + pNew->Internal.s.pLun = pLun; + pNew->Internal.s.pDrv = pDrv; + pNew->Internal.s.pVMR3 = pVM; + pNew->Internal.s.pVMR0 = pDrv->pReg->fFlags & PDM_DRVREG_FLAGS_R0 ? pVM->pVMR0 : NIL_RTR0PTR; + pNew->Internal.s.pVMRC = pDrv->pReg->fFlags & PDM_DRVREG_FLAGS_RC ? pVM->pVMRC : NIL_RTRCPTR; + //pNew->Internal.s.fDetaching = false; + pNew->Internal.s.fVMSuspended = true; /** @todo should be 'false', if driver is attached at runtime. */ + //pNew->Internal.s.fVMReset = false; + pNew->Internal.s.fHyperHeap = fHyperHeap; + //pNew->Internal.s.pfnAsyncNotify = NULL; + pNew->Internal.s.pCfgHandle = pNode; + pNew->pReg = pDrv->pReg; + pNew->pCfg = pConfigNode; + pNew->pUpBase = pBaseInterface; + Assert(!pDrvAbove || pBaseInterface == &pDrvAbove->IBase); + //pNew->pDownBase = NULL; + //pNew->IBase.pfnQueryInterface = NULL; + //pNew->fTracing = 0; + pNew->idTracing = ++pVM->pdm.s.idTracingOther; + pNew->pHlpR3 = &g_pdmR3DrvHlp; + pNew->pvInstanceDataR3 = &pNew->achInstanceData[0]; + if (pDrv->pReg->fFlags & PDM_DRVREG_FLAGS_R0) + { + pNew->pvInstanceDataR0 = MMHyperR3ToR0(pVM, &pNew->achInstanceData[0]); + rc = PDMR3LdrGetSymbolR0(pVM, NULL, "g_pdmR0DrvHlp", &pNew->pHlpR0); + AssertReleaseRCReturn(rc, rc); + } + if ( (pDrv->pReg->fFlags & PDM_DRVREG_FLAGS_RC) + && VM_IS_RAW_MODE_ENABLED(pVM)) + { + pNew->pvInstanceDataR0 = MMHyperR3ToRC(pVM, &pNew->achInstanceData[0]); + rc = PDMR3LdrGetSymbolRC(pVM, NULL, "g_pdmRCDrvHlp", &pNew->pHlpRC); + AssertReleaseRCReturn(rc, rc); + } + + pDrv->iNextInstance++; + pDrv->cInstances++; + + /* + * Link with it with the driver above / LUN. + */ + if (pDrvAbove) + { + pDrvAbove->pDownBase = &pNew->IBase; + pDrvAbove->Internal.s.pDown = pNew; + } + else if (pLun) + pLun->pTop = pNew; + if (pLun) + pLun->pBottom = pNew; + + /* + * Invoke the constructor. + */ + rc = pDrv->pReg->pfnConstruct(pNew, pNew->pCfg, 0 /*fFlags*/); + if (RT_SUCCESS(rc)) + { + AssertPtr(pNew->IBase.pfnQueryInterface); + Assert(pNew->IBase.pfnQueryInterface(&pNew->IBase, PDMIBASE_IID) == &pNew->IBase); + + /* Success! */ + *ppBaseInterface = &pNew->IBase; + if (pLun) + Log(("PDM: Attached driver %p:'%s'/%d to LUN#%d on device '%s'/%d, pDrvAbove=%p:'%s'/%d\n", + pNew, pDrv->pReg->szName, pNew->iInstance, + pLun->iLun, + pLun->pDevIns ? pLun->pDevIns->pReg->szName : pLun->pUsbIns->pReg->szName, + pLun->pDevIns ? pLun->pDevIns->iInstance : pLun->pUsbIns->iInstance, + pDrvAbove, pDrvAbove ? pDrvAbove->pReg->szName : "", pDrvAbove ? pDrvAbove->iInstance : UINT32_MAX)); + else + Log(("PDM: Attached driver %p:'%s'/%d, pDrvAbove=%p:'%s'/%d\n", + pNew, pDrv->pReg->szName, pNew->iInstance, + pDrvAbove, pDrvAbove ? pDrvAbove->pReg->szName : "", pDrvAbove ? pDrvAbove->iInstance : UINT32_MAX)); + } + else + { + pdmR3DrvDestroyChain(pNew, PDM_TACH_FLAGS_NO_CALLBACKS); + if (rc == VERR_VERSION_MISMATCH) + rc = VERR_PDM_DRIVER_VERSION_MISMATCH; + } + } + else + AssertMsgFailed(("Failed to allocate %d bytes for instantiating driver '%s'! rc=%Rrc\n", cb, pszName, rc)); + } + else + AssertMsgFailed(("Failed to create Config node! rc=%Rrc\n", rc)); + } + else if (pDrv) + { + AssertMsgFailed(("Too many instances of driver '%s', max is %u\n", pszName, pDrv->pReg->cMaxInstances)); + rc = VERR_PDM_TOO_MANY_DRIVER_INSTANCES; + } + else + { + AssertMsgFailed(("Driver '%s' wasn't found!\n", pszName)); + rc = VERR_PDM_DRIVER_NOT_FOUND; + } + MMR3HeapFree(pszName); + } + else + { + if (rc == VERR_CFGM_VALUE_NOT_FOUND) + rc = VERR_PDM_CFG_MISSING_DRIVER_NAME; + else + AssertMsgFailed(("Query for string value of \"Driver\" -> %Rrc\n", rc)); + } + return rc; +} + + +/** + * Detaches a driver from whatever it's attached to. + * This will of course lead to the destruction of the driver and all drivers below it in the chain. + * + * @returns VINF_SUCCESS + * @param pDrvIns The driver instance to detach. + * @param fFlags Flags, combination of the PDMDEVATT_FLAGS_* \#defines. + */ +int pdmR3DrvDetach(PPDMDRVINS pDrvIns, uint32_t fFlags) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + LogFlow(("pdmR3DrvDetach: pDrvIns=%p '%s'/%d\n", pDrvIns, pDrvIns->pReg->szName, pDrvIns->iInstance)); + VM_ASSERT_EMT(pDrvIns->Internal.s.pVMR3); + + /* + * Check that we're not doing this recursively, that could have unwanted sideeffects! + */ + if (pDrvIns->Internal.s.fDetaching) + { + AssertMsgFailed(("Recursive detach! '%s'/%d\n", pDrvIns->pReg->szName, pDrvIns->iInstance)); + return VINF_SUCCESS; + } + + /* + * Check that we actually can detach this instance. + * The requirement is that the driver/device above has a detach method. + */ + if ( pDrvIns->Internal.s.pUp + ? !pDrvIns->Internal.s.pUp->pReg->pfnDetach + : pDrvIns->Internal.s.pLun->pDevIns + ? !pDrvIns->Internal.s.pLun->pDevIns->pReg->pfnDetach + : !pDrvIns->Internal.s.pLun->pUsbIns->pReg->pfnDriverDetach + ) + { + AssertMsgFailed(("Cannot detach driver instance because the driver/device above doesn't support it!\n")); + return VERR_PDM_DRIVER_DETACH_NOT_POSSIBLE; + } + + /* + * Join paths with pdmR3DrvDestroyChain. + */ + pdmR3DrvDestroyChain(pDrvIns, fFlags); + return VINF_SUCCESS; +} + + +/** + * Destroys a driver chain starting with the specified driver. + * + * This is used when unplugging a device at run time. + * + * @param pDrvIns Pointer to the driver instance to start with. + * @param fFlags PDM_TACH_FLAGS_NOT_HOT_PLUG, PDM_TACH_FLAGS_NO_CALLBACKS + * or 0. + */ +void pdmR3DrvDestroyChain(PPDMDRVINS pDrvIns, uint32_t fFlags) +{ + PVM pVM = pDrvIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + + /* + * Detach the bottommost driver until we've detached pDrvIns. + */ + pDrvIns->Internal.s.fDetaching = true; + PPDMDRVINS pCur; + do + { + /* find the driver to detach. */ + pCur = pDrvIns; + while (pCur->Internal.s.pDown) + pCur = pCur->Internal.s.pDown; + LogFlow(("pdmR3DrvDestroyChain: pCur=%p '%s'/%d\n", pCur, pCur->pReg->szName, pCur->iInstance)); + + /* + * Unlink it and notify parent. + */ + pCur->Internal.s.fDetaching = true; + + PPDMLUN pLun = pCur->Internal.s.pLun; + Assert(pLun->pBottom == pCur); + pLun->pBottom = pCur->Internal.s.pUp; + + if (pCur->Internal.s.pUp) + { + /* driver parent */ + PPDMDRVINS pParent = pCur->Internal.s.pUp; + pCur->Internal.s.pUp = NULL; + pParent->Internal.s.pDown = NULL; + + if (!(fFlags & PDM_TACH_FLAGS_NO_CALLBACKS) && pParent->pReg->pfnDetach) + pParent->pReg->pfnDetach(pParent, fFlags); + + pParent->pDownBase = NULL; + } + else + { + /* device parent */ + Assert(pLun->pTop == pCur); + pLun->pTop = NULL; + if (!(fFlags & PDM_TACH_FLAGS_NO_CALLBACKS)) + { + if (pLun->pDevIns) + { + if (pLun->pDevIns->pReg->pfnDetach) + { + PDMCritSectEnter(pLun->pDevIns->pCritSectRoR3, VERR_IGNORED); + pLun->pDevIns->pReg->pfnDetach(pLun->pDevIns, pLun->iLun, fFlags); + PDMCritSectLeave(pLun->pDevIns->pCritSectRoR3); + } + } + else + { + if (pLun->pUsbIns->pReg->pfnDriverDetach) + { + /** @todo USB device locking? */ + pLun->pUsbIns->pReg->pfnDriverDetach(pLun->pUsbIns, pLun->iLun, fFlags); + } + } + } + } + + /* + * Call destructor. + */ + pCur->pUpBase = NULL; + if (pCur->pReg->pfnDestruct) + pCur->pReg->pfnDestruct(pCur); + pCur->Internal.s.pDrv->cInstances--; + + /* + * Free all resources allocated by the driver. + */ + /* Queues. */ + int rc = PDMR3QueueDestroyDriver(pVM, pCur); + AssertRC(rc); + + /* Timers. */ + rc = TMR3TimerDestroyDriver(pVM, pCur); + AssertRC(rc); + + /* SSM data units. */ + rc = SSMR3DeregisterDriver(pVM, pCur, NULL, 0); + AssertRC(rc); + + /* PDM threads. */ + rc = pdmR3ThreadDestroyDriver(pVM, pCur); + AssertRC(rc); + + /* Info handlers. */ + rc = DBGFR3InfoDeregisterDriver(pVM, pCur, NULL); + AssertRC(rc); + + /* PDM critsects. */ + rc = pdmR3CritSectBothDeleteDriver(pVM, pCur); + AssertRC(rc); + + /* Block caches. */ + PDMR3BlkCacheReleaseDriver(pVM, pCur); + +#ifdef VBOX_WITH_PDM_ASYNC_COMPLETION + /* Completion templates.*/ + pdmR3AsyncCompletionTemplateDestroyDriver(pVM, pCur); +#endif + + /* Finally, the driver it self. */ + bool fHyperHeap = pCur->Internal.s.fHyperHeap; + ASMMemFill32(pCur, RT_UOFFSETOF_DYN(PDMDRVINS, achInstanceData[pCur->pReg->cbInstance]), 0xdeadd0d0); + if (fHyperHeap) + MMHyperFree(pVM, pCur); + else + MMR3HeapFree(pCur); + + } while (pCur != pDrvIns); +} + + + + +/** @name Driver Helpers + * @{ + */ + +/** @interface_method_impl{PDMDRVHLPR3,pfnAttach} */ +static DECLCALLBACK(int) pdmR3DrvHlp_Attach(PPDMDRVINS pDrvIns, uint32_t fFlags, PPDMIBASE *ppBaseInterface) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + PVM pVM = pDrvIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DrvHlp_Attach: caller='%s'/%d: fFlags=%#x\n", pDrvIns->pReg->szName, pDrvIns->iInstance, fFlags)); + Assert(!(fFlags & ~(PDM_TACH_FLAGS_NOT_HOT_PLUG))); + RT_NOREF_PV(fFlags); + + /* + * Check that there isn't anything attached already. + */ + int rc; + if (!pDrvIns->Internal.s.pDown) + { + Assert(pDrvIns->Internal.s.pLun->pBottom == pDrvIns); + + /* + * Get the attached driver configuration. + */ + PCFGMNODE pNode = CFGMR3GetChild(pDrvIns->Internal.s.pCfgHandle, "AttachedDriver"); + if (pNode) + rc = pdmR3DrvInstantiate(pVM, pNode, &pDrvIns->IBase, pDrvIns, pDrvIns->Internal.s.pLun, ppBaseInterface); + else + rc = VERR_PDM_NO_ATTACHED_DRIVER; + } + else + { + AssertMsgFailed(("Already got a driver attached. The driver should keep track of such things!\n")); + rc = VERR_PDM_DRIVER_ALREADY_ATTACHED; + } + + LogFlow(("pdmR3DrvHlp_Attach: caller='%s'/%d: return %Rrc\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnDetach} */ +static DECLCALLBACK(int) pdmR3DrvHlp_Detach(PPDMDRVINS pDrvIns, uint32_t fFlags) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + LogFlow(("pdmR3DrvHlp_Detach: caller='%s'/%d: fFlags=%#x\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, fFlags)); + VM_ASSERT_EMT(pDrvIns->Internal.s.pVMR3); + + /* + * Anything attached? + */ + int rc; + if (pDrvIns->Internal.s.pDown) + rc = pdmR3DrvDetach(pDrvIns->Internal.s.pDown, fFlags); + else + { + AssertMsgFailed(("Nothing attached!\n")); + rc = VERR_PDM_NO_DRIVER_ATTACHED; + } + + LogFlow(("pdmR3DrvHlp_Detach: caller='%s'/%d: returns %Rrc\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnDetachSelf} */ +static DECLCALLBACK(int) pdmR3DrvHlp_DetachSelf(PPDMDRVINS pDrvIns, uint32_t fFlags) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + LogFlow(("pdmR3DrvHlp_DetachSelf: caller='%s'/%d: fFlags=%#x\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, fFlags)); + VM_ASSERT_EMT(pDrvIns->Internal.s.pVMR3); + + int rc = pdmR3DrvDetach(pDrvIns, fFlags); + + LogFlow(("pdmR3DrvHlp_Detach: returns %Rrc\n", rc)); /* pDrvIns is freed by now. */ + return rc; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnMountPrepare} */ +static DECLCALLBACK(int) pdmR3DrvHlp_MountPrepare(PPDMDRVINS pDrvIns, const char *pszFilename, const char *pszCoreDriver) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + LogFlow(("pdmR3DrvHlp_MountPrepare: caller='%s'/%d: pszFilename=%p:{%s} pszCoreDriver=%p:{%s}\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, pszFilename, pszFilename, pszCoreDriver, pszCoreDriver)); + VM_ASSERT_EMT(pDrvIns->Internal.s.pVMR3); + + /* + * Do the caller have anything attached below itself? + */ + if (pDrvIns->Internal.s.pDown) + { + AssertMsgFailed(("Cannot prepare a mount when something's attached to you!\n")); + return VERR_PDM_DRIVER_ALREADY_ATTACHED; + } + + /* + * We're asked to prepare, so we'll start off by nuking the + * attached configuration tree. + */ + PCFGMNODE pNode = CFGMR3GetChild(pDrvIns->Internal.s.pCfgHandle, "AttachedDriver"); + if (pNode) + CFGMR3RemoveNode(pNode); + + /* + * If there is no core driver, we'll have to probe for it. + */ + if (!pszCoreDriver) + { + /** @todo implement image probing. */ + AssertReleaseMsgFailed(("Not implemented!\n")); + return VERR_NOT_IMPLEMENTED; + } + + /* + * Construct the basic attached driver configuration. + */ + int rc = CFGMR3InsertNode(pDrvIns->Internal.s.pCfgHandle, "AttachedDriver", &pNode); + if (RT_SUCCESS(rc)) + { + rc = CFGMR3InsertString(pNode, "Driver", pszCoreDriver); + if (RT_SUCCESS(rc)) + { + PCFGMNODE pCfg; + rc = CFGMR3InsertNode(pNode, "Config", &pCfg); + if (RT_SUCCESS(rc)) + { + rc = CFGMR3InsertString(pCfg, "Path", pszFilename); + if (RT_SUCCESS(rc)) + { + LogFlow(("pdmR3DrvHlp_MountPrepare: caller='%s'/%d: returns %Rrc (Driver=%s)\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, rc, pszCoreDriver)); + return rc; + } + else + AssertMsgFailed(("Path string insert failed, rc=%Rrc\n", rc)); + } + else + AssertMsgFailed(("Config node failed, rc=%Rrc\n", rc)); + } + else + AssertMsgFailed(("Driver string insert failed, rc=%Rrc\n", rc)); + CFGMR3RemoveNode(pNode); + } + else + AssertMsgFailed(("AttachedDriver node insert failed, rc=%Rrc\n", rc)); + + LogFlow(("pdmR3DrvHlp_MountPrepare: caller='%s'/%d: returns %Rrc\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnAssertEMT} */ +static DECLCALLBACK(bool) pdmR3DrvHlp_AssertEMT(PPDMDRVINS pDrvIns, const char *pszFile, unsigned iLine, const char *pszFunction) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + if (VM_IS_EMT(pDrvIns->Internal.s.pVMR3)) + return true; + + char szMsg[100]; + RTStrPrintf(szMsg, sizeof(szMsg), "AssertEMT '%s'/%d\n", pDrvIns->pReg->szName, pDrvIns->iInstance); + RTAssertMsg1Weak(szMsg, iLine, pszFile, pszFunction); + AssertBreakpoint(); + VM_ASSERT_EMT(pDrvIns->Internal.s.pVMR3); + return false; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnAssertOther} */ +static DECLCALLBACK(bool) pdmR3DrvHlp_AssertOther(PPDMDRVINS pDrvIns, const char *pszFile, unsigned iLine, const char *pszFunction) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + if (!VM_IS_EMT(pDrvIns->Internal.s.pVMR3)) + return true; + + char szMsg[100]; + RTStrPrintf(szMsg, sizeof(szMsg), "AssertOther '%s'/%d\n", pDrvIns->pReg->szName, pDrvIns->iInstance); + RTAssertMsg1Weak(szMsg, iLine, pszFile, pszFunction); + AssertBreakpoint(); + VM_ASSERT_EMT(pDrvIns->Internal.s.pVMR3); + return false; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnVMSetError} */ +static DECLCALLBACK(int) pdmR3DrvHlp_VMSetError(PPDMDRVINS pDrvIns, int rc, RT_SRC_POS_DECL, const char *pszFormat, ...) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + va_list args; + va_start(args, pszFormat); + int rc2 = VMSetErrorV(pDrvIns->Internal.s.pVMR3, rc, RT_SRC_POS_ARGS, pszFormat, args); Assert(rc2 == rc); NOREF(rc2); + va_end(args); + return rc; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnVMSetErrorV} */ +static DECLCALLBACK(int) pdmR3DrvHlp_VMSetErrorV(PPDMDRVINS pDrvIns, int rc, RT_SRC_POS_DECL, const char *pszFormat, va_list va) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + int rc2 = VMSetErrorV(pDrvIns->Internal.s.pVMR3, rc, RT_SRC_POS_ARGS, pszFormat, va); Assert(rc2 == rc); NOREF(rc2); + return rc; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnVMSetRuntimeError} */ +static DECLCALLBACK(int) pdmR3DrvHlp_VMSetRuntimeError(PPDMDRVINS pDrvIns, uint32_t fFlags, const char *pszErrorId, const char *pszFormat, ...) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + va_list args; + va_start(args, pszFormat); + int rc = VMSetRuntimeErrorV(pDrvIns->Internal.s.pVMR3, fFlags, pszErrorId, pszFormat, args); + va_end(args); + return rc; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnVMSetRuntimeErrorV} */ +static DECLCALLBACK(int) pdmR3DrvHlp_VMSetRuntimeErrorV(PPDMDRVINS pDrvIns, uint32_t fFlags, const char *pszErrorId, const char *pszFormat, va_list va) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + int rc = VMSetRuntimeErrorV(pDrvIns->Internal.s.pVMR3, fFlags, pszErrorId, pszFormat, va); + return rc; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnVMState} */ +static DECLCALLBACK(VMSTATE) pdmR3DrvHlp_VMState(PPDMDRVINS pDrvIns) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + + VMSTATE enmVMState = VMR3GetState(pDrvIns->Internal.s.pVMR3); + + LogFlow(("pdmR3DrvHlp_VMState: caller='%s'/%d: returns %d (%s)\n", pDrvIns->pReg->szName, pDrvIns->iInstance, + enmVMState, VMR3GetStateName(enmVMState))); + return enmVMState; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnVMTeleportedAndNotFullyResumedYet} */ +static DECLCALLBACK(bool) pdmR3DrvHlp_VMTeleportedAndNotFullyResumedYet(PPDMDRVINS pDrvIns) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + + bool fRc = VMR3TeleportedAndNotFullyResumedYet(pDrvIns->Internal.s.pVMR3); + + LogFlow(("pdmR3DrvHlp_VMState: caller='%s'/%d: returns %RTbool)\n", pDrvIns->pReg->szName, pDrvIns->iInstance, + fRc)); + return fRc; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnGetSupDrvSession} */ +static DECLCALLBACK(PSUPDRVSESSION) pdmR3DrvHlp_GetSupDrvSession(PPDMDRVINS pDrvIns) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + + PSUPDRVSESSION pSession = pDrvIns->Internal.s.pVMR3->pSession; + LogFlow(("pdmR3DrvHlp_GetSupDrvSession: caller='%s'/%d: returns %p)\n", pDrvIns->pReg->szName, pDrvIns->iInstance, + pSession)); + return pSession; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnQueueCreate} */ +static DECLCALLBACK(int) pdmR3DrvHlp_QueueCreate(PPDMDRVINS pDrvIns, uint32_t cbItem, uint32_t cItems, uint32_t cMilliesInterval, + PFNPDMQUEUEDRV pfnCallback, const char *pszName, PPDMQUEUE *ppQueue) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + LogFlow(("pdmR3DrvHlp_PDMQueueCreate: caller='%s'/%d: cbItem=%d cItems=%d cMilliesInterval=%d pfnCallback=%p pszName=%p:{%s} ppQueue=%p\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, cbItem, cItems, cMilliesInterval, pfnCallback, pszName, pszName, ppQueue)); + PVM pVM = pDrvIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + + if (pDrvIns->iInstance > 0) + { + pszName = MMR3HeapAPrintf(pVM, MM_TAG_PDM_DRIVER_DESC, "%s_%u", pszName, pDrvIns->iInstance); + AssertLogRelReturn(pszName, VERR_NO_MEMORY); + } + + int rc = PDMR3QueueCreateDriver(pVM, pDrvIns, cbItem, cItems, cMilliesInterval, pfnCallback, pszName, ppQueue); + + LogFlow(("pdmR3DrvHlp_PDMQueueCreate: caller='%s'/%d: returns %Rrc *ppQueue=%p\n", pDrvIns->pReg->szName, pDrvIns->iInstance, rc, *ppQueue)); + return rc; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnTMGetVirtualFreq} */ +static DECLCALLBACK(uint64_t) pdmR3DrvHlp_TMGetVirtualFreq(PPDMDRVINS pDrvIns) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + + return TMVirtualGetFreq(pDrvIns->Internal.s.pVMR3); +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnTMGetVirtualTime} */ +static DECLCALLBACK(uint64_t) pdmR3DrvHlp_TMGetVirtualTime(PPDMDRVINS pDrvIns) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + + return TMVirtualGet(pDrvIns->Internal.s.pVMR3); +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnTMTimerCreate} */ +static DECLCALLBACK(int) pdmR3DrvHlp_TMTimerCreate(PPDMDRVINS pDrvIns, TMCLOCK enmClock, PFNTMTIMERDRV pfnCallback, void *pvUser, uint32_t fFlags, const char *pszDesc, PPTMTIMERR3 ppTimer) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + LogFlow(("pdmR3DrvHlp_TMTimerCreate: caller='%s'/%d: enmClock=%d pfnCallback=%p pvUser=%p fFlags=%#x pszDesc=%p:{%s} ppTimer=%p\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, enmClock, pfnCallback, pvUser, fFlags, pszDesc, pszDesc, ppTimer)); + + int rc = TMR3TimerCreateDriver(pDrvIns->Internal.s.pVMR3, pDrvIns, enmClock, pfnCallback, pvUser, fFlags, pszDesc, ppTimer); + + LogFlow(("pdmR3DrvHlp_TMTimerCreate: caller='%s'/%d: returns %Rrc *ppTimer=%p\n", pDrvIns->pReg->szName, pDrvIns->iInstance, rc, *ppTimer)); + return rc; +} + + + +/** @interface_method_impl{PDMDRVHLPR3,pfnSSMRegister} */ +static DECLCALLBACK(int) pdmR3DrvHlp_SSMRegister(PPDMDRVINS pDrvIns, uint32_t uVersion, size_t cbGuess, + PFNSSMDRVLIVEPREP pfnLivePrep, PFNSSMDRVLIVEEXEC pfnLiveExec, PFNSSMDRVLIVEVOTE pfnLiveVote, + PFNSSMDRVSAVEPREP pfnSavePrep, PFNSSMDRVSAVEEXEC pfnSaveExec, PFNSSMDRVSAVEDONE pfnSaveDone, + PFNSSMDRVLOADPREP pfnLoadPrep, PFNSSMDRVLOADEXEC pfnLoadExec, PFNSSMDRVLOADDONE pfnLoadDone) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + VM_ASSERT_EMT(pDrvIns->Internal.s.pVMR3); + LogFlow(("pdmR3DrvHlp_SSMRegister: caller='%s'/%d: uVersion=%#x cbGuess=%#x \n" + " pfnLivePrep=%p pfnLiveExec=%p pfnLiveVote=%p pfnSavePrep=%p pfnSaveExec=%p pfnSaveDone=%p pszLoadPrep=%p pfnLoadExec=%p pfnLoaddone=%p\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, uVersion, cbGuess, + pfnLivePrep, pfnLiveExec, pfnLiveVote, + pfnSavePrep, pfnSaveExec, pfnSaveDone, pfnLoadPrep, pfnLoadExec, pfnLoadDone)); + + int rc = SSMR3RegisterDriver(pDrvIns->Internal.s.pVMR3, pDrvIns, pDrvIns->pReg->szName, pDrvIns->iInstance, + uVersion, cbGuess, + pfnLivePrep, pfnLiveExec, pfnLiveVote, + pfnSavePrep, pfnSaveExec, pfnSaveDone, + pfnLoadPrep, pfnLoadExec, pfnLoadDone); + + LogFlow(("pdmR3DrvHlp_SSMRegister: caller='%s'/%d: returns %Rrc\n", pDrvIns->pReg->szName, pDrvIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnSSMDeregister} */ +static DECLCALLBACK(int) pdmR3DrvHlp_SSMDeregister(PPDMDRVINS pDrvIns, const char *pszName, uint32_t uInstance) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + VM_ASSERT_EMT(pDrvIns->Internal.s.pVMR3); + LogFlow(("pdmR3DrvHlp_SSMDeregister: caller='%s'/%d: pszName=%p:{%s} uInstance=%#x\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, pszName, pszName, uInstance)); + + int rc = SSMR3DeregisterDriver(pDrvIns->Internal.s.pVMR3, pDrvIns, pszName, uInstance); + + LogFlow(("pdmR3DrvHlp_SSMDeregister: caller='%s'/%d: returns %Rrc\n", pDrvIns->pReg->szName, pDrvIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnDBGFInfoRegister} */ +static DECLCALLBACK(int) pdmR3DrvHlp_DBGFInfoRegister(PPDMDRVINS pDrvIns, const char *pszName, const char *pszDesc, PFNDBGFHANDLERDRV pfnHandler) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + LogFlow(("pdmR3DrvHlp_DBGFInfoRegister: caller='%s'/%d: pszName=%p:{%s} pszDesc=%p:{%s} pfnHandler=%p\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, pszName, pszName, pszDesc, pszDesc, pfnHandler)); + + int rc = DBGFR3InfoRegisterDriver(pDrvIns->Internal.s.pVMR3, pszName, pszDesc, pfnHandler, pDrvIns); + + LogFlow(("pdmR3DrvHlp_DBGFInfoRegister: caller='%s'/%d: returns %Rrc\n", pDrvIns->pReg->szName, pDrvIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnDBGFInfoDeregister} */ +static DECLCALLBACK(int) pdmR3DrvHlp_DBGFInfoDeregister(PPDMDRVINS pDrvIns, const char *pszName) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + LogFlow(("pdmR3DrvHlp_DBGFInfoDeregister: caller='%s'/%d: pszName=%p:{%s}\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, pszName, pszName)); + + int rc = DBGFR3InfoDeregisterDriver(pDrvIns->Internal.s.pVMR3, pDrvIns, pszName); + + LogFlow(("pdmR3DrvHlp_DBGFInfoDeregister: caller='%s'/%d: returns %Rrc\n", pDrvIns->pReg->szName, pDrvIns->iInstance, rc)); + + return rc; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnSTAMRegister} */ +static DECLCALLBACK(void) pdmR3DrvHlp_STAMRegister(PPDMDRVINS pDrvIns, void *pvSample, STAMTYPE enmType, const char *pszName, + STAMUNIT enmUnit, const char *pszDesc) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + VM_ASSERT_EMT(pDrvIns->Internal.s.pVMR3); + + STAM_REG(pDrvIns->Internal.s.pVMR3, pvSample, enmType, pszName, enmUnit, pszDesc); + RT_NOREF6(pDrvIns, pvSample, enmType, pszName, enmUnit, pszDesc); + /** @todo track the samples so they can be dumped & deregistered when the driver instance is destroyed. + * For now we just have to be careful not to use this call for drivers which can be unloaded. */ +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnSTAMRegisterF} */ +static DECLCALLBACK(void) pdmR3DrvHlp_STAMRegisterF(PPDMDRVINS pDrvIns, void *pvSample, STAMTYPE enmType, STAMVISIBILITY enmVisibility, + STAMUNIT enmUnit, const char *pszDesc, const char *pszName, ...) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + VM_ASSERT_EMT(pDrvIns->Internal.s.pVMR3); + + va_list args; + va_start(args, pszName); + int rc = STAMR3RegisterV(pDrvIns->Internal.s.pVMR3, pvSample, enmType, enmVisibility, enmUnit, pszDesc, pszName, args); + va_end(args); + AssertRC(rc); +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnSTAMRegisterV} */ +static DECLCALLBACK(void) pdmR3DrvHlp_STAMRegisterV(PPDMDRVINS pDrvIns, void *pvSample, STAMTYPE enmType, STAMVISIBILITY enmVisibility, + STAMUNIT enmUnit, const char *pszDesc, const char *pszName, va_list args) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + VM_ASSERT_EMT(pDrvIns->Internal.s.pVMR3); + + int rc = STAMR3RegisterV(pDrvIns->Internal.s.pVMR3, pvSample, enmType, enmVisibility, enmUnit, pszDesc, pszName, args); + AssertRC(rc); +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnSTAMDeregister} */ +static DECLCALLBACK(int) pdmR3DrvHlp_STAMDeregister(PPDMDRVINS pDrvIns, void *pvSample) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + VM_ASSERT_EMT(pDrvIns->Internal.s.pVMR3); + + return STAMR3DeregisterByAddr(pDrvIns->Internal.s.pVMR3->pUVM, pvSample); +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnSUPCallVMMR0Ex} */ +static DECLCALLBACK(int) pdmR3DrvHlp_SUPCallVMMR0Ex(PPDMDRVINS pDrvIns, unsigned uOperation, void *pvArg, unsigned cbArg) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + LogFlow(("pdmR3DrvHlp_SSMCallVMMR0Ex: caller='%s'/%d: uOperation=%u pvArg=%p cbArg=%d\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, uOperation, pvArg, cbArg)); + RT_NOREF_PV(cbArg); + + int rc; + if ( uOperation >= VMMR0_DO_SRV_START + && uOperation < VMMR0_DO_SRV_END) + rc = SUPR3CallVMMR0Ex(pDrvIns->Internal.s.pVMR3->pVMR0, NIL_VMCPUID, uOperation, 0, (PSUPVMMR0REQHDR)pvArg); + else + { + AssertMsgFailed(("Invalid uOperation=%u\n", uOperation)); + rc = VERR_INVALID_PARAMETER; + } + + LogFlow(("pdmR3DrvHlp_SUPCallVMMR0Ex: caller='%s'/%d: returns %Rrc\n", pDrvIns->pReg->szName, pDrvIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnUSBRegisterHub} */ +static DECLCALLBACK(int) pdmR3DrvHlp_USBRegisterHub(PPDMDRVINS pDrvIns, uint32_t fVersions, uint32_t cPorts, PCPDMUSBHUBREG pUsbHubReg, PPCPDMUSBHUBHLP ppUsbHubHlp) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + VM_ASSERT_EMT(pDrvIns->Internal.s.pVMR3); + LogFlow(("pdmR3DrvHlp_USBRegisterHub: caller='%s'/%d: fVersions=%#x cPorts=%#x pUsbHubReg=%p ppUsbHubHlp=%p\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, fVersions, cPorts, pUsbHubReg, ppUsbHubHlp)); + +#ifdef VBOX_WITH_USB + int rc = pdmR3UsbRegisterHub(pDrvIns->Internal.s.pVMR3, pDrvIns, fVersions, cPorts, pUsbHubReg, ppUsbHubHlp); +#else + int rc = VERR_NOT_SUPPORTED; +#endif + + LogFlow(("pdmR3DrvHlp_USBRegisterHub: caller='%s'/%d: returns %Rrc\n", pDrvIns->pReg->szName, pDrvIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnSetAsyncNotification} */ +static DECLCALLBACK(int) pdmR3DrvHlp_SetAsyncNotification(PPDMDRVINS pDrvIns, PFNPDMDRVASYNCNOTIFY pfnAsyncNotify) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + VM_ASSERT_EMT0(pDrvIns->Internal.s.pVMR3); + LogFlow(("pdmR3DrvHlp_SetAsyncNotification: caller='%s'/%d: pfnAsyncNotify=%p\n", pDrvIns->pReg->szName, pDrvIns->iInstance, pfnAsyncNotify)); + + int rc = VINF_SUCCESS; + AssertStmt(pfnAsyncNotify, rc = VERR_INVALID_PARAMETER); + AssertStmt(!pDrvIns->Internal.s.pfnAsyncNotify, rc = VERR_WRONG_ORDER); + AssertStmt(pDrvIns->Internal.s.fVMSuspended || pDrvIns->Internal.s.fVMReset, rc = VERR_WRONG_ORDER); + VMSTATE enmVMState = VMR3GetState(pDrvIns->Internal.s.pVMR3); + AssertStmt( enmVMState == VMSTATE_SUSPENDING + || enmVMState == VMSTATE_SUSPENDING_EXT_LS + || enmVMState == VMSTATE_SUSPENDING_LS + || enmVMState == VMSTATE_RESETTING + || enmVMState == VMSTATE_RESETTING_LS + || enmVMState == VMSTATE_POWERING_OFF + || enmVMState == VMSTATE_POWERING_OFF_LS, + rc = VERR_INVALID_STATE); + + if (RT_SUCCESS(rc)) + pDrvIns->Internal.s.pfnAsyncNotify = pfnAsyncNotify; + + LogFlow(("pdmR3DrvHlp_SetAsyncNotification: caller='%s'/%d: returns %Rrc\n", pDrvIns->pReg->szName, pDrvIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnAsyncNotificationCompleted} */ +static DECLCALLBACK(void) pdmR3DrvHlp_AsyncNotificationCompleted(PPDMDRVINS pDrvIns) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + PVM pVM = pDrvIns->Internal.s.pVMR3; + + VMSTATE enmVMState = VMR3GetState(pVM); + if ( enmVMState == VMSTATE_SUSPENDING + || enmVMState == VMSTATE_SUSPENDING_EXT_LS + || enmVMState == VMSTATE_SUSPENDING_LS + || enmVMState == VMSTATE_RESETTING + || enmVMState == VMSTATE_RESETTING_LS + || enmVMState == VMSTATE_POWERING_OFF + || enmVMState == VMSTATE_POWERING_OFF_LS) + { + LogFlow(("pdmR3DrvHlp_AsyncNotificationCompleted: caller='%s'/%d:\n", pDrvIns->pReg->szName, pDrvIns->iInstance)); + VMR3AsyncPdmNotificationWakeupU(pVM->pUVM); + } + else + LogFlow(("pdmR3DrvHlp_AsyncNotificationCompleted: caller='%s'/%d: enmVMState=%d\n", pDrvIns->pReg->szName, pDrvIns->iInstance, enmVMState)); +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnThreadCreate} */ +static DECLCALLBACK(int) pdmR3DrvHlp_ThreadCreate(PPDMDRVINS pDrvIns, PPPDMTHREAD ppThread, void *pvUser, PFNPDMTHREADDRV pfnThread, + PFNPDMTHREADWAKEUPDRV pfnWakeup, size_t cbStack, RTTHREADTYPE enmType, const char *pszName) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + VM_ASSERT_EMT(pDrvIns->Internal.s.pVMR3); + LogFlow(("pdmR3DrvHlp_ThreadCreate: caller='%s'/%d: ppThread=%p pvUser=%p pfnThread=%p pfnWakeup=%p cbStack=%#zx enmType=%d pszName=%p:{%s}\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, ppThread, pvUser, pfnThread, pfnWakeup, cbStack, enmType, pszName, pszName)); + + int rc = pdmR3ThreadCreateDriver(pDrvIns->Internal.s.pVMR3, pDrvIns, ppThread, pvUser, pfnThread, pfnWakeup, cbStack, enmType, pszName); + + LogFlow(("pdmR3DrvHlp_ThreadCreate: caller='%s'/%d: returns %Rrc *ppThread=%RTthrd\n", pDrvIns->pReg->szName, pDrvIns->iInstance, + rc, *ppThread)); + return rc; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnAsyncCompletionTemplateCreate} */ +static DECLCALLBACK(int) pdmR3DrvHlp_AsyncCompletionTemplateCreate(PPDMDRVINS pDrvIns, PPPDMASYNCCOMPLETIONTEMPLATE ppTemplate, + PFNPDMASYNCCOMPLETEDRV pfnCompleted, void *pvTemplateUser, + const char *pszDesc) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + LogFlow(("pdmR3DrvHlp_AsyncCompletionTemplateCreate: caller='%s'/%d: ppTemplate=%p pfnCompleted=%p pszDesc=%p:{%s}\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, ppTemplate, pfnCompleted, pszDesc, pszDesc)); + + int rc = pdmR3AsyncCompletionTemplateCreateDriver(pDrvIns->Internal.s.pVMR3, pDrvIns, ppTemplate, pfnCompleted, pvTemplateUser, pszDesc); + + LogFlow(("pdmR3DrvHlp_AsyncCompletionTemplateCreate: caller='%s'/%d: returns %Rrc *ppThread=%p\n", pDrvIns->pReg->szName, + pDrvIns->iInstance, rc, *ppTemplate)); + return rc; +} + + +#ifdef VBOX_WITH_NETSHAPER +/** @interface_method_impl{PDMDRVHLPR3,pfnNetShaperAttach} */ +static DECLCALLBACK(int) pdmR3DrvHlp_NetShaperAttach(PPDMDRVINS pDrvIns, const char *pszBwGroup, PPDMNSFILTER pFilter) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + LogFlow(("pdmR3DrvHlp_NetShaperAttach: caller='%s'/%d: pFilter=%p pszBwGroup=%p:{%s}\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, pFilter, pszBwGroup, pszBwGroup)); + + int rc = PDMR3NsAttach(pDrvIns->Internal.s.pVMR3->pUVM, pDrvIns, pszBwGroup, pFilter); + + LogFlow(("pdmR3DrvHlp_NetShaperAttach: caller='%s'/%d: returns %Rrc\n", pDrvIns->pReg->szName, + pDrvIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnNetShaperDetach} */ +static DECLCALLBACK(int) pdmR3DrvHlp_NetShaperDetach(PPDMDRVINS pDrvIns, PPDMNSFILTER pFilter) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + LogFlow(("pdmR3DrvHlp_NetShaperDetach: caller='%s'/%d: pFilter=%p\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, pFilter)); + + int rc = PDMR3NsDetach(pDrvIns->Internal.s.pVMR3->pUVM, pDrvIns, pFilter); + + LogFlow(("pdmR3DrvHlp_NetShaperDetach: caller='%s'/%d: returns %Rrc\n", pDrvIns->pReg->szName, + pDrvIns->iInstance, rc)); + return rc; +} +#endif /* VBOX_WITH_NETSHAPER */ + + +/** @interface_method_impl{PDMDRVHLPR3,pfnLdrGetRCInterfaceSymbols} */ +static DECLCALLBACK(int) pdmR3DrvHlp_LdrGetRCInterfaceSymbols(PPDMDRVINS pDrvIns, void *pvInterface, size_t cbInterface, + const char *pszSymPrefix, const char *pszSymList) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + VM_ASSERT_EMT(pDrvIns->Internal.s.pVMR3); + LogFlow(("pdmR3DrvHlp_LdrGetRCInterfaceSymbols: caller='%s'/%d: pvInterface=%p cbInterface=%zu pszSymPrefix=%p:{%s} pszSymList=%p:{%s}\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, pvInterface, cbInterface, pszSymPrefix, pszSymPrefix, pszSymList, pszSymList)); + + int rc; + if ( strncmp(pszSymPrefix, "drv", 3) == 0 + && RTStrIStr(pszSymPrefix + 3, pDrvIns->pReg->szName) != NULL) + { + if (pDrvIns->pReg->fFlags & PDM_DRVREG_FLAGS_RC) + rc = PDMR3LdrGetInterfaceSymbols(pDrvIns->Internal.s.pVMR3, + pvInterface, cbInterface, + pDrvIns->pReg->szRCMod, pDrvIns->Internal.s.pDrv->pszRCSearchPath, + pszSymPrefix, pszSymList, + false /*fRing0OrRC*/); + else + { + AssertMsgFailed(("Not a raw-mode enabled driver\n")); + rc = VERR_PERMISSION_DENIED; + } + } + else + { + AssertMsgFailed(("Invalid prefix '%s' for '%s'; must start with 'drv' and contain the driver name!\n", + pszSymPrefix, pDrvIns->pReg->szName)); + rc = VERR_INVALID_NAME; + } + + LogFlow(("pdmR3DrvHlp_LdrGetRCInterfaceSymbols: caller='%s'/%d: returns %Rrc\n", pDrvIns->pReg->szName, + pDrvIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnLdrGetR0InterfaceSymbols} */ +static DECLCALLBACK(int) pdmR3DrvHlp_LdrGetR0InterfaceSymbols(PPDMDRVINS pDrvIns, void *pvInterface, size_t cbInterface, + const char *pszSymPrefix, const char *pszSymList) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + VM_ASSERT_EMT(pDrvIns->Internal.s.pVMR3); + LogFlow(("pdmR3DrvHlp_LdrGetR0InterfaceSymbols: caller='%s'/%d: pvInterface=%p cbInterface=%zu pszSymPrefix=%p:{%s} pszSymList=%p:{%s}\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, pvInterface, cbInterface, pszSymPrefix, pszSymPrefix, pszSymList, pszSymList)); + + int rc; + if ( strncmp(pszSymPrefix, "drv", 3) == 0 + && RTStrIStr(pszSymPrefix + 3, pDrvIns->pReg->szName) != NULL) + { + if (pDrvIns->pReg->fFlags & PDM_DRVREG_FLAGS_R0) + rc = PDMR3LdrGetInterfaceSymbols(pDrvIns->Internal.s.pVMR3, + pvInterface, cbInterface, + pDrvIns->pReg->szR0Mod, pDrvIns->Internal.s.pDrv->pszR0SearchPath, + pszSymPrefix, pszSymList, + true /*fRing0OrRC*/); + else + { + AssertMsgFailed(("Not a ring-0 enabled driver\n")); + rc = VERR_PERMISSION_DENIED; + } + } + else + { + AssertMsgFailed(("Invalid prefix '%s' for '%s'; must start with 'drv' and contain the driver name!\n", + pszSymPrefix, pDrvIns->pReg->szName)); + rc = VERR_INVALID_NAME; + } + + LogFlow(("pdmR3DrvHlp_LdrGetR0InterfaceSymbols: caller='%s'/%d: returns %Rrc\n", pDrvIns->pReg->szName, + pDrvIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnCritSectInit} */ +static DECLCALLBACK(int) pdmR3DrvHlp_CritSectInit(PPDMDRVINS pDrvIns, PPDMCRITSECT pCritSect, + RT_SRC_POS_DECL, const char *pszName) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + PVM pVM = pDrvIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3DrvHlp_CritSectInit: caller='%s'/%d: pCritSect=%p pszName=%s\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, pCritSect, pszName)); + + int rc = pdmR3CritSectInitDriver(pVM, pDrvIns, pCritSect, RT_SRC_POS_ARGS, "%s_%u", pszName, pDrvIns->iInstance); + + LogFlow(("pdmR3DrvHlp_CritSectInit: caller='%s'/%d: returns %Rrc\n", pDrvIns->pReg->szName, + pDrvIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnCallR0} */ +static DECLCALLBACK(int) pdmR3DrvHlp_CallR0(PPDMDRVINS pDrvIns, uint32_t uOperation, uint64_t u64Arg) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + PVM pVM = pDrvIns->Internal.s.pVMR3; + LogFlow(("pdmR3DrvHlp_CallR0: caller='%s'/%d: uOperation=%#x u64Arg=%#RX64\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, uOperation, u64Arg)); + + /* + * Lazy resolve the ring-0 entry point. + */ + int rc = VINF_SUCCESS; + PFNPDMDRVREQHANDLERR0 pfnReqHandlerR0 = pDrvIns->Internal.s.pfnReqHandlerR0; + if (RT_UNLIKELY(pfnReqHandlerR0 == NIL_RTR0PTR)) + { + if (pDrvIns->pReg->fFlags & PDM_DRVREG_FLAGS_R0) + { + char szSymbol[ sizeof("drvR0") + sizeof(pDrvIns->pReg->szName) + sizeof("ReqHandler")]; + strcat(strcat(strcpy(szSymbol, "drvR0"), pDrvIns->pReg->szName), "ReqHandler"); + szSymbol[sizeof("drvR0") - 1] = RT_C_TO_UPPER(szSymbol[sizeof("drvR0") - 1]); + + rc = PDMR3LdrGetSymbolR0Lazy(pVM, pDrvIns->pReg->szR0Mod, pDrvIns->Internal.s.pDrv->pszR0SearchPath, szSymbol, + &pfnReqHandlerR0); + if (RT_SUCCESS(rc)) + pDrvIns->Internal.s.pfnReqHandlerR0 = pfnReqHandlerR0; + else + pfnReqHandlerR0 = NIL_RTR0PTR; + } + else + rc = VERR_ACCESS_DENIED; + } + if (RT_LIKELY(pfnReqHandlerR0 != NIL_RTR0PTR)) + { + /* + * Make the ring-0 call. + */ + PDMDRIVERCALLREQHANDLERREQ Req; + Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + Req.Hdr.cbReq = sizeof(Req); + Req.pDrvInsR0 = PDMDRVINS_2_R0PTR(pDrvIns); + Req.uOperation = uOperation; + Req.u32Alignment = 0; + Req.u64Arg = u64Arg; + rc = SUPR3CallVMMR0Ex(pVM->pVMR0, NIL_VMCPUID, VMMR0_DO_PDM_DRIVER_CALL_REQ_HANDLER, 0, &Req.Hdr); + } + + LogFlow(("pdmR3DrvHlp_CallR0: caller='%s'/%d: returns %Rrc\n", pDrvIns->pReg->szName, + pDrvIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnFTSetCheckpoint} */ +static DECLCALLBACK(int) pdmR3DrvHlp_FTSetCheckpoint(PPDMDRVINS pDrvIns, FTMCHECKPOINTTYPE enmType) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + return FTMSetCheckpoint(pDrvIns->Internal.s.pVMR3, enmType); +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnBlkCacheRetain} */ +static DECLCALLBACK(int) pdmR3DrvHlp_BlkCacheRetain(PPDMDRVINS pDrvIns, PPPDMBLKCACHE ppBlkCache, + PFNPDMBLKCACHEXFERCOMPLETEDRV pfnXferComplete, + PFNPDMBLKCACHEXFERENQUEUEDRV pfnXferEnqueue, + PFNPDMBLKCACHEXFERENQUEUEDISCARDDRV pfnXferEnqueueDiscard, + const char *pcszId) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + return PDMR3BlkCacheRetainDriver(pDrvIns->Internal.s.pVMR3, pDrvIns, ppBlkCache, + pfnXferComplete, pfnXferEnqueue, pfnXferEnqueueDiscard, pcszId); +} + + + +/** @interface_method_impl{PDMDRVHLPR3,pfnVMGetSuspendReason} */ +static DECLCALLBACK(VMSUSPENDREASON) pdmR3DrvHlp_VMGetSuspendReason(PPDMDRVINS pDrvIns) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + PVM pVM = pDrvIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + VMSUSPENDREASON enmReason = VMR3GetSuspendReason(pVM->pUVM); + LogFlow(("pdmR3DrvHlp_VMGetSuspendReason: caller='%s'/%d: returns %d\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, enmReason)); + return enmReason; +} + + +/** @interface_method_impl{PDMDRVHLPR3,pfnVMGetResumeReason} */ +static DECLCALLBACK(VMRESUMEREASON) pdmR3DrvHlp_VMGetResumeReason(PPDMDRVINS pDrvIns) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + PVM pVM = pDrvIns->Internal.s.pVMR3; + VM_ASSERT_EMT(pVM); + VMRESUMEREASON enmReason = VMR3GetResumeReason(pVM->pUVM); + LogFlow(("pdmR3DrvHlp_VMGetResumeReason: caller='%s'/%d: returns %d\n", + pDrvIns->pReg->szName, pDrvIns->iInstance, enmReason)); + return enmReason; +} + + +/** + * The driver helper structure. + */ +const PDMDRVHLPR3 g_pdmR3DrvHlp = +{ + PDM_DRVHLPR3_VERSION, + pdmR3DrvHlp_Attach, + pdmR3DrvHlp_Detach, + pdmR3DrvHlp_DetachSelf, + pdmR3DrvHlp_MountPrepare, + pdmR3DrvHlp_AssertEMT, + pdmR3DrvHlp_AssertOther, + pdmR3DrvHlp_VMSetError, + pdmR3DrvHlp_VMSetErrorV, + pdmR3DrvHlp_VMSetRuntimeError, + pdmR3DrvHlp_VMSetRuntimeErrorV, + pdmR3DrvHlp_VMState, + pdmR3DrvHlp_VMTeleportedAndNotFullyResumedYet, + pdmR3DrvHlp_GetSupDrvSession, + pdmR3DrvHlp_QueueCreate, + pdmR3DrvHlp_TMGetVirtualFreq, + pdmR3DrvHlp_TMGetVirtualTime, + pdmR3DrvHlp_TMTimerCreate, + pdmR3DrvHlp_SSMRegister, + pdmR3DrvHlp_SSMDeregister, + pdmR3DrvHlp_DBGFInfoRegister, + pdmR3DrvHlp_DBGFInfoDeregister, + pdmR3DrvHlp_STAMRegister, + pdmR3DrvHlp_STAMRegisterF, + pdmR3DrvHlp_STAMRegisterV, + pdmR3DrvHlp_STAMDeregister, + pdmR3DrvHlp_SUPCallVMMR0Ex, + pdmR3DrvHlp_USBRegisterHub, + pdmR3DrvHlp_SetAsyncNotification, + pdmR3DrvHlp_AsyncNotificationCompleted, + pdmR3DrvHlp_ThreadCreate, + pdmR3DrvHlp_AsyncCompletionTemplateCreate, +#ifdef VBOX_WITH_NETSHAPER + pdmR3DrvHlp_NetShaperAttach, + pdmR3DrvHlp_NetShaperDetach, +#endif /* VBOX_WITH_NETSHAPER */ + pdmR3DrvHlp_LdrGetRCInterfaceSymbols, + pdmR3DrvHlp_LdrGetR0InterfaceSymbols, + pdmR3DrvHlp_CritSectInit, + pdmR3DrvHlp_CallR0, + pdmR3DrvHlp_FTSetCheckpoint, + pdmR3DrvHlp_BlkCacheRetain, + pdmR3DrvHlp_VMGetSuspendReason, + pdmR3DrvHlp_VMGetResumeReason, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + PDM_DRVHLPR3_VERSION /* u32TheEnd */ +}; + +/** @} */ diff --git a/src/VBox/VMM/VMMR3/PDMLdr.cpp b/src/VBox/VMM/VMMR3/PDMLdr.cpp new file mode 100644 index 00000000..e35c7e3a --- /dev/null +++ b/src/VBox/VMM/VMMR3/PDMLdr.cpp @@ -0,0 +1,1735 @@ +/* $Id: PDMLdr.cpp $ */ +/** @file + * PDM - Pluggable Device Manager, module loader. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +//#define PDMLDR_FAKE_MODE + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PDM_LDR +#include "PDMInternal.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * Structure which the user argument of the RTLdrGetBits() callback points to. + * @internal + */ +typedef struct PDMGETIMPORTARGS +{ + PVM pVM; + PPDMMOD pModule; +} PDMGETIMPORTARGS, *PPDMGETIMPORTARGS; + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +#ifdef VBOX_WITH_RAW_MODE +static DECLCALLBACK(int) pdmR3GetImportRC(RTLDRMOD hLdrMod, const char *pszModule, const char *pszSymbol, unsigned uSymbol, RTUINTPTR *pValue, void *pvUser); +static char *pdmR3FileRC(const char *pszFile, const char *pszSearchPath); +#endif +static int pdmR3LoadR0U(PUVM pUVM, const char *pszFilename, const char *pszName, const char *pszSearchPath); +static char *pdmR3FileR0(const char *pszFile, const char *pszSearchPath); +static char *pdmR3File(const char *pszFile, const char *pszDefaultExt, const char *pszSearchPath, bool fShared); + + + +/** + * Loads the VMMR0.r0 module early in the init process. + * + * @returns VBox status code. + * @param pUVM Pointer to the user mode VM structure. + */ +VMMR3_INT_DECL(int) PDMR3LdrLoadVMMR0U(PUVM pUVM) +{ + return pdmR3LoadR0U(pUVM, NULL, VMMR0_MAIN_MODULE_NAME, NULL); +} + + +/** + * Init the module loader part of PDM. + * + * This routine will load the Host Context Ring-0 and Guest + * Context VMM modules. + * + * @returns VBox status code. + * @param pUVM The user mode VM structure. + */ +int pdmR3LdrInitU(PUVM pUVM) +{ +#if !defined(PDMLDR_FAKE_MODE) && defined(VBOX_WITH_RAW_MODE) + /* + * Load the mandatory RC module, the VMMR0.r0 is loaded before VM creation. + */ + PVM pVM = pUVM->pVM; AssertPtr(pVM); + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + int rc = PDMR3LdrLoadRC(pVM, NULL, VMMRC_MAIN_MODULE_NAME); + if (RT_FAILURE(rc)) + return rc; + } +#else + RT_NOREF(pUVM); +#endif + return VINF_SUCCESS; +} + + +/** + * Terminate the module loader part of PDM. + * + * This will unload and free all modules. + * + * @param pUVM The user mode VM structure. + * + * @remarks This is normally called twice during termination. + */ +void pdmR3LdrTermU(PUVM pUVM) +{ + /* + * Free the modules. + */ + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + PPDMMOD pModule = pUVM->pdm.s.pModules; + pUVM->pdm.s.pModules = NULL; + while (pModule) + { + /* free loader item. */ + if (pModule->hLdrMod != NIL_RTLDRMOD) + { + int rc2 = RTLdrClose(pModule->hLdrMod); + AssertRC(rc2); + pModule->hLdrMod = NIL_RTLDRMOD; + } + + /* free bits. */ + switch (pModule->eType) + { + case PDMMOD_TYPE_R0: + { + Assert(pModule->ImageBase); + int rc2 = SUPR3FreeModule((void *)(uintptr_t)pModule->ImageBase); + AssertRC(rc2); + pModule->ImageBase = 0; + break; + } + +#ifdef VBOX_WITH_RAW_MODE + case PDMMOD_TYPE_RC: +#endif + case PDMMOD_TYPE_R3: + /* MM will free this memory for us - it's alloc only memory. :-) */ + break; + + default: + AssertMsgFailed(("eType=%d\n", pModule->eType)); + break; + } + pModule->pvBits = NULL; + + void *pvFree = pModule; + pModule = pModule->pNext; + RTMemFree(pvFree); + } + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); +} + + +/** + * Applies relocations to RC modules. + * + * This must be done very early in the relocation + * process so that components can resolve RC symbols during relocation. + * + * @param pUVM Pointer to the user mode VM structure. + * @param offDelta Relocation delta relative to old location. + */ +VMMR3_INT_DECL(void) PDMR3LdrRelocateU(PUVM pUVM, RTGCINTPTR offDelta) +{ +#ifdef VBOX_WITH_RAW_MODE + LogFlow(("PDMR3LdrRelocate: offDelta=%RGv\n", offDelta)); + RT_NOREF1(offDelta); + + /* + * RC Modules. + */ + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + if (pUVM->pdm.s.pModules) + { + /* + * The relocation have to be done in two passes so imports + * can be correctly resolved. The first pass will update + * the ImageBase saving the current value in OldImageBase. + * The second pass will do the actual relocation. + */ + /* pass 1 */ + PPDMMOD pCur; + for (pCur = pUVM->pdm.s.pModules; pCur; pCur = pCur->pNext) + { + if (pCur->eType == PDMMOD_TYPE_RC) + { + pCur->OldImageBase = pCur->ImageBase; + pCur->ImageBase = MMHyperR3ToRC(pUVM->pVM, pCur->pvBits); + } + } + + /* pass 2 */ + for (pCur = pUVM->pdm.s.pModules; pCur; pCur = pCur->pNext) + { + if (pCur->eType == PDMMOD_TYPE_RC) + { + PDMGETIMPORTARGS Args; + Args.pVM = pUVM->pVM; + Args.pModule = pCur; + int rc = RTLdrRelocate(pCur->hLdrMod, pCur->pvBits, pCur->ImageBase, pCur->OldImageBase, + pdmR3GetImportRC, &Args); + AssertFatalMsgRC(rc, ("RTLdrRelocate failed, rc=%d\n", rc)); + } + } + } + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); +#else + RT_NOREF2(pUVM, offDelta); +#endif +} + + +/** + * Loads a module into the host context ring-3. + * + * This is used by the driver and device init functions to load modules + * containing the drivers and devices. The function can be extended to + * load modules which are not native to the environment we're running in, + * but at the moment this is not required. + * + * No reference counting is kept, since we don't implement any facilities + * for unloading the module. But the module will naturally be released + * when the VM terminates. + * + * @returns VBox status code. + * @param pUVM Pointer to the user mode VM structure. + * @param pszFilename Filename of the module binary. + * @param pszName Module name. Case sensitive and the length is limited! + */ +int pdmR3LoadR3U(PUVM pUVM, const char *pszFilename, const char *pszName) +{ + /* + * Validate input. + */ + AssertMsg(RTCritSectIsInitialized(&pUVM->pdm.s.ListCritSect), ("bad init order!\n")); + Assert(pszFilename); + size_t cchFilename = strlen(pszFilename); + Assert(pszName); + size_t cchName = strlen(pszName); + PPDMMOD pCur; + if (cchName >= sizeof(pCur->szName)) + { + AssertMsgFailed(("Name is too long, cchName=%d pszName='%s'\n", cchName, pszName)); + return VERR_INVALID_PARAMETER; + } + + /* + * Try lookup the name and see if the module exists. + */ + int rc; + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + for (pCur = pUVM->pdm.s.pModules; pCur; pCur = pCur->pNext) + { + if (!strcmp(pCur->szName, pszName)) + { + if (pCur->eType == PDMMOD_TYPE_R3) + rc = VINF_PDM_ALREADY_LOADED; + else + rc = VERR_PDM_MODULE_NAME_CLASH; + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + + AssertMsgRC(rc, ("We've already got a module '%s' loaded!\n", pszName)); + return rc; + } + } + + /* + * Allocate the module list node and initialize it. + */ + const char *pszSuff = RTLdrGetSuff(); + size_t cchSuff = RTPathHasSuffix(pszFilename) ? 0 : strlen(pszSuff); + PPDMMOD pModule = (PPDMMOD)RTMemAllocZ(RT_UOFFSETOF_DYN(PDMMOD, szFilename[cchFilename + cchSuff + 1])); + if (pModule) + { + pModule->eType = PDMMOD_TYPE_R3; + memcpy(pModule->szName, pszName, cchName); /* memory is zero'd, no need to copy terminator :-) */ + memcpy(pModule->szFilename, pszFilename, cchFilename); + memcpy(&pModule->szFilename[cchFilename], pszSuff, cchSuff); + + /* + * Load the loader item. + */ + RTERRINFOSTATIC ErrInfo; + RTErrInfoInitStatic(&ErrInfo); + rc = SUPR3HardenedLdrLoadPlugIn(pModule->szFilename, &pModule->hLdrMod, &ErrInfo.Core); + if (RT_SUCCESS(rc)) + { + pModule->pNext = pUVM->pdm.s.pModules; + pUVM->pdm.s.pModules = pModule; + } + else + { + /* Something went wrong, most likely module not found. Don't consider other unlikely errors */ + rc = VMSetError(pUVM->pVM, rc, RT_SRC_POS, + N_("Unable to load R3 module %s (%s): %s"), pModule->szFilename, pszName, ErrInfo.Core.pszMsg); + RTMemFree(pModule); + } + } + else + rc = VERR_NO_MEMORY; + + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + return rc; +} + +#ifdef VBOX_WITH_RAW_MODE + +/** + * Resolve an external symbol during RTLdrGetBits() of a RC module. + * + * @returns VBox status code. + * @param hLdrMod The loader module handle. + * @param pszModule Module name. + * @param pszSymbol Symbol name, NULL if uSymbol should be used. + * @param uSymbol Symbol ordinal, ~0 if pszSymbol should be used. + * @param pValue Where to store the symbol value (address). + * @param pvUser User argument. + */ +static DECLCALLBACK(int) pdmR3GetImportRC(RTLDRMOD hLdrMod, const char *pszModule, const char *pszSymbol, unsigned uSymbol, + RTUINTPTR *pValue, void *pvUser) +{ + PVM pVM = ((PPDMGETIMPORTARGS)pvUser)->pVM; + PPDMMOD pModule = ((PPDMGETIMPORTARGS)pvUser)->pModule; + NOREF(hLdrMod); NOREF(uSymbol); + + /* + * Adjust input. + */ + if (pszModule && !*pszModule) + pszModule = NULL; + + /* + * Builtin module. + */ + if (!pszModule || !strcmp(pszModule, "VMMRCBuiltin.rc")) + { + int rc = VINF_SUCCESS; + if (!strcmp(pszSymbol, "g_VM")) + *pValue = pVM->pVMRC; + else if (!strcmp(pszSymbol, "g_CPUM")) + *pValue = VM_RC_ADDR(pVM, &pVM->cpum); + else if ( !strncmp(pszSymbol, "g_TRPM", 6) + || !strncmp(pszSymbol, "g_trpm", 6) + || !strncmp(pszSymbol, "TRPM", 4)) + { + RTRCPTR RCPtr = 0; + rc = TRPMR3GetImportRC(pVM, pszSymbol, &RCPtr); + if (RT_SUCCESS(rc)) + *pValue = RCPtr; + } + else if ( !strncmp(pszSymbol, "VMM", 3) + || !strcmp(pszSymbol, "g_Logger") + || !strcmp(pszSymbol, "g_RelLogger")) + { + RTRCPTR RCPtr = 0; + rc = VMMR3GetImportRC(pVM, pszSymbol, &RCPtr); + if (RT_SUCCESS(rc)) + *pValue = RCPtr; + } + else if ( !strncmp(pszSymbol, "TM", 2) + || !strcmp(pszSymbol, "g_pSUPGlobalInfoPage")) + { + RTRCPTR RCPtr = 0; + rc = TMR3GetImportRC(pVM, pszSymbol, &RCPtr); + if (RT_SUCCESS(rc)) + *pValue = RCPtr; + } + else + { + AssertMsg(!pszModule, ("Unknown builtin symbol '%s' for module '%s'!\n", pszSymbol, pModule->szName)); NOREF(pModule); + rc = VERR_SYMBOL_NOT_FOUND; + } + if (RT_SUCCESS(rc) || pszModule) + { + if (RT_FAILURE(rc)) + LogRel(("PDMLdr: Couldn't find symbol '%s' in module '%s'!\n", pszSymbol, pszModule)); + return rc; + } + } + + /* + * Search for module. + */ + PUVM pUVM = pVM->pUVM; + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + PPDMMOD pCur = pUVM->pdm.s.pModules; + while (pCur) + { + if ( pCur->eType == PDMMOD_TYPE_RC + && ( !pszModule + || !strcmp(pCur->szName, pszModule)) + ) + { + /* Search for the symbol. */ + int rc = RTLdrGetSymbolEx(pCur->hLdrMod, pCur->pvBits, pCur->ImageBase, UINT32_MAX, pszSymbol, pValue); + if (RT_SUCCESS(rc)) + { + AssertMsg(*pValue - pCur->ImageBase < RTLdrSize(pCur->hLdrMod), + ("%RRv-%RRv %s %RRv\n", (RTRCPTR)pCur->ImageBase, + (RTRCPTR)(pCur->ImageBase + RTLdrSize(pCur->hLdrMod) - 1), + pszSymbol, (RTRCPTR)*pValue)); + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + return rc; + } + if (pszModule) + { + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + AssertLogRelMsgFailed(("PDMLdr: Couldn't find symbol '%s' in module '%s'!\n", pszSymbol, pszModule)); + return VERR_SYMBOL_NOT_FOUND; + } + } + + /* next */ + pCur = pCur->pNext; + } + + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + AssertLogRelMsgFailed(("Couldn't find module '%s' for resolving symbol '%s'!\n", pszModule, pszSymbol)); + return VERR_SYMBOL_NOT_FOUND; +} + + +/** + * Loads a module into the raw-mode context (i.e. into the Hypervisor memory + * region). + * + * @returns VBox status code. + * @retval VINF_PDM_ALREADY_LOADED if the module is already loaded (name + + * filename match). + * @retval VERR_PDM_MODULE_NAME_CLASH if a different file has already been + * loaded with the name module name. + * + * @param pVM The cross context VM structure. + * @param pszFilename Filename of the module binary. + * @param pszName Module name. Case sensitive and the length is limited! + */ +VMMR3DECL(int) PDMR3LdrLoadRC(PVM pVM, const char *pszFilename, const char *pszName) +{ + /* + * Validate input. + */ + AssertMsg(MMR3IsInitialized(pVM), ("bad init order!\n")); + AssertReturn(VM_IS_RAW_MODE_ENABLED(pVM), VERR_PDM_HM_IPE); + + /* + * Find the file if not specified. + */ + char *pszFile = NULL; + if (!pszFilename) + pszFilename = pszFile = pdmR3FileRC(pszName, NULL); + + /* + * Check if a module by that name is already loaded. + */ + int rc; + PUVM pUVM = pVM->pUVM; + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + PPDMMOD pCur = pUVM->pdm.s.pModules; + while (pCur) + { + if (!strcmp(pCur->szName, pszName)) + { + /* Name clash. Hopefully due to it being the same file. */ + if (!strcmp(pCur->szFilename, pszFilename)) + rc = VINF_PDM_ALREADY_LOADED; + else + { + rc = VERR_PDM_MODULE_NAME_CLASH; + AssertMsgFailed(("We've already got a module '%s' loaded!\n", pszName)); + } + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + RTMemTmpFree(pszFile); + return rc; + } + /* next */ + pCur = pCur->pNext; + } + + /* + * Allocate the module list node. + */ + PPDMMOD pModule = (PPDMMOD)RTMemAllocZ(sizeof(*pModule) + strlen(pszFilename)); + if (!pModule) + { + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + RTMemTmpFree(pszFile); + return VERR_NO_MEMORY; + } + AssertMsg(strlen(pszName) + 1 < sizeof(pModule->szName), + ("pazName is too long (%d chars) max is %d chars.\n", strlen(pszName), sizeof(pModule->szName) - 1)); + strcpy(pModule->szName, pszName); + pModule->eType = PDMMOD_TYPE_RC; + strcpy(pModule->szFilename, pszFilename); + + + /* + * Open the loader item. + */ + RTERRINFOSTATIC ErrInfo; + RTErrInfoInitStatic(&ErrInfo); + rc = SUPR3HardenedVerifyPlugIn(pszFilename, &ErrInfo.Core); + if (RT_SUCCESS(rc)) + { + RTErrInfoClear(&ErrInfo.Core); + rc = RTLdrOpen(pszFilename, 0, RTLDRARCH_X86_32, &pModule->hLdrMod); + } + if (RT_SUCCESS(rc)) + { + /* + * Allocate space in the hypervisor. + */ + size_t cb = RTLdrSize(pModule->hLdrMod); + cb = RT_ALIGN_Z(cb, PAGE_SIZE); + uint32_t cPages = (uint32_t)(cb >> PAGE_SHIFT); + if (((size_t)cPages << PAGE_SHIFT) == cb) + { + PSUPPAGE paPages = (PSUPPAGE)RTMemTmpAlloc(cPages * sizeof(paPages[0])); + if (paPages) + { + rc = SUPR3PageAllocEx(cPages, 0 /*fFlags*/, &pModule->pvBits, NULL /*pR0Ptr*/, paPages); + if (RT_SUCCESS(rc)) + { + RTGCPTR GCPtr; + rc = MMR3HyperMapPages(pVM, pModule->pvBits, NIL_RTR0PTR, + cPages, paPages, pModule->szName, &GCPtr); + if (RT_SUCCESS(rc)) + { + MMR3HyperReserve(pVM, PAGE_SIZE, "fence", NULL); + + /* + * Get relocated image bits. + */ + Assert(MMHyperR3ToRC(pVM, pModule->pvBits) == GCPtr); + pModule->ImageBase = GCPtr; + PDMGETIMPORTARGS Args; + Args.pVM = pVM; + Args.pModule = pModule; + rc = RTLdrGetBits(pModule->hLdrMod, pModule->pvBits, pModule->ImageBase, pdmR3GetImportRC, &Args); + if (RT_SUCCESS(rc)) + { +#ifdef VBOX_WITH_DTRACE_RC + /* + * Register the tracer bits if present. + */ + RTLDRADDR uValue; + rc = RTLdrGetSymbolEx(pModule->hLdrMod, pModule->pvBits, pModule->ImageBase, UINT32_MAX, + "g_VTGObjHeader", &uValue); + if (RT_SUCCESS(rc)) + { + PVTGOBJHDR pVtgHdr = (PVTGOBJHDR)MMHyperRCToCC(pVM, (RTRCPTR)uValue); + if ( pVtgHdr + && !memcmp(pVtgHdr->szMagic, VTGOBJHDR_MAGIC, sizeof(pVtgHdr->szMagic))) + rc = SUPR3TracerRegisterModule(~(uintptr_t)0, pModule->szName, pVtgHdr, uValue, + SUP_TRACER_UMOD_FLAGS_SHARED); + else + rc = pVtgHdr ? VERR_INVALID_MAGIC : VERR_INVALID_POINTER; + if (RT_FAILURE(rc)) + LogRel(("PDMLdr: Failed to register tracepoints for '%s': %Rrc\n", pModule->szName, rc)); + } +#endif + + /* + * Insert the module. + */ + if (pUVM->pdm.s.pModules) + { + /* we don't expect this list to be very long, so rather save the tail pointer. */ + pCur = pUVM->pdm.s.pModules; + while (pCur->pNext) + pCur = pCur->pNext; + pCur->pNext = pModule; + } + else + pUVM->pdm.s.pModules = pModule; /* (pNext is zeroed by alloc) */ + Log(("PDM: RC Module at %RRv %s (%s)\n", (RTRCPTR)pModule->ImageBase, pszName, pszFilename)); + + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + RTMemTmpFree(pszFile); + RTMemTmpFree(paPages); + + return VINF_SUCCESS; + } + } + else + { + AssertRC(rc); + SUPR3PageFreeEx(pModule->pvBits, cPages); + } + } + else + AssertMsgFailed(("SUPR3PageAlloc(%d,) -> %Rrc\n", cPages, rc)); + RTMemTmpFree(paPages); + } + else + rc = VERR_NO_TMP_MEMORY; + } + else + rc = VERR_OUT_OF_RANGE; + int rc2 = RTLdrClose(pModule->hLdrMod); + AssertRC(rc2); + } + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + + /* Don't consider VERR_PDM_MODULE_NAME_CLASH and VERR_NO_MEMORY above as these are very unlikely. */ + if (RT_FAILURE(rc) && RTErrInfoIsSet(&ErrInfo.Core)) + rc = VMSetError(pVM, rc, RT_SRC_POS, N_("Cannot load RC module %s: %s"), pszFilename, ErrInfo.Core.pszMsg); + else if (RT_FAILURE(rc)) + rc = VMSetError(pVM, rc, RT_SRC_POS, N_("Cannot load RC module %s"), pszFilename); + + RTMemFree(pModule); + RTMemTmpFree(pszFile); + return rc; +} + +#endif /* VBOX_WITH_RAW_MODE */ + +/** + * Loads a module into the ring-0 context. + * + * @returns VBox status code. + * @param pUVM Pointer to the user mode VM structure. + * @param pszFilename Filename of the module binary. + * @param pszName Module name. Case sensitive and the length is limited! + * @param pszSearchPath List of directories to search if @a pszFilename is + * not specified. Can be NULL, in which case the arch + * dependent install dir is searched. + */ +static int pdmR3LoadR0U(PUVM pUVM, const char *pszFilename, const char *pszName, const char *pszSearchPath) +{ + /* + * Validate input. + */ + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + PPDMMOD pCur = pUVM->pdm.s.pModules; + while (pCur) + { + if (!strcmp(pCur->szName, pszName)) + { + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + AssertMsgFailed(("We've already got a module '%s' loaded!\n", pszName)); + return VERR_PDM_MODULE_NAME_CLASH; + } + /* next */ + pCur = pCur->pNext; + } + + /* + * Find the file if not specified. + */ + char *pszFile = NULL; + if (!pszFilename) + pszFilename = pszFile = pdmR3FileR0(pszName, pszSearchPath); + + /* + * Allocate the module list node. + */ + PPDMMOD pModule = (PPDMMOD)RTMemAllocZ(sizeof(*pModule) + strlen(pszFilename)); + if (!pModule) + { + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + RTMemTmpFree(pszFile); + return VERR_NO_MEMORY; + } + AssertMsg(strlen(pszName) + 1 < sizeof(pModule->szName), + ("pazName is too long (%d chars) max is %d chars.\n", strlen(pszName), sizeof(pModule->szName) - 1)); + strcpy(pModule->szName, pszName); + pModule->eType = PDMMOD_TYPE_R0; + strcpy(pModule->szFilename, pszFilename); + + /* + * Ask the support library to load it. + */ + void *pvImageBase; + RTERRINFOSTATIC ErrInfo; + RTErrInfoInitStatic(&ErrInfo); + int rc = SUPR3LoadModule(pszFilename, pszName, &pvImageBase, &ErrInfo.Core); + if (RT_SUCCESS(rc)) + { + pModule->hLdrMod = NIL_RTLDRMOD; + pModule->ImageBase = (uintptr_t)pvImageBase; + + /* + * Insert the module. + */ + if (pUVM->pdm.s.pModules) + { + /* we don't expect this list to be very long, so rather save the tail pointer. */ + pCur = pUVM->pdm.s.pModules; + while (pCur->pNext) + pCur = pCur->pNext; + pCur->pNext = pModule; + } + else + pUVM->pdm.s.pModules = pModule; /* (pNext is zeroed by alloc) */ + Log(("PDM: R0 Module at %RHv %s (%s)\n", (RTR0PTR)pModule->ImageBase, pszName, pszFilename)); + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + RTMemTmpFree(pszFile); + return VINF_SUCCESS; + } + + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + RTMemFree(pModule); + LogRel(("PDMLdr: pdmR3LoadR0U: pszName=\"%s\" rc=%Rrc szErr=\"%s\"\n", pszName, rc, ErrInfo.Core.pszMsg)); + + /* Don't consider VERR_PDM_MODULE_NAME_CLASH and VERR_NO_MEMORY above as these are very unlikely. */ + if (RT_FAILURE(rc)) + rc = VMR3SetError(pUVM, rc, RT_SRC_POS, N_("Failed to load R0 module %s: %s"), pszFilename, ErrInfo.Core.pszMsg); + + RTMemTmpFree(pszFile); /* might be reference thru pszFilename in the above VMSetError call. */ + return rc; +} + + + +/** + * Get the address of a symbol in a given HC ring 3 module. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pszModule Module name. + * @param pszSymbol Symbol name. If it's value is less than 64k it's treated like a + * ordinal value rather than a string pointer. + * @param ppvValue Where to store the symbol value. + */ +VMMR3_INT_DECL(int) PDMR3LdrGetSymbolR3(PVM pVM, const char *pszModule, const char *pszSymbol, void **ppvValue) +{ + /* + * Validate input. + */ + AssertPtr(pVM); + AssertPtr(pszModule); + AssertPtr(ppvValue); + PUVM pUVM = pVM->pUVM; + AssertMsg(RTCritSectIsInitialized(&pUVM->pdm.s.ListCritSect), ("bad init order!\n")); + + /* + * Find the module. + */ + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + for (PPDMMOD pModule = pUVM->pdm.s.pModules; pModule; pModule = pModule->pNext) + { + if ( pModule->eType == PDMMOD_TYPE_R3 + && !strcmp(pModule->szName, pszModule)) + { + RTUINTPTR Value = 0; + int rc = RTLdrGetSymbolEx(pModule->hLdrMod, pModule->pvBits, pModule->ImageBase, UINT32_MAX, pszSymbol, &Value); + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + if (RT_SUCCESS(rc)) + { + *ppvValue = (void *)(uintptr_t)Value; + Assert((uintptr_t)*ppvValue == Value); + } + else + { + if ((uintptr_t)pszSymbol < 0x10000) + AssertMsg(rc, ("Couldn't symbol '%u' in module '%s'\n", (unsigned)(uintptr_t)pszSymbol, pszModule)); + else + AssertMsg(rc, ("Couldn't symbol '%s' in module '%s'\n", pszSymbol, pszModule)); + } + return rc; + } + } + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + AssertMsgFailed(("Couldn't locate module '%s'\n", pszModule)); + return VERR_SYMBOL_NOT_FOUND; +} + + +/** + * Get the address of a symbol in a given HC ring 0 module. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pszModule Module name. If NULL the main R0 module (VMMR0.r0) is assumes. + * @param pszSymbol Symbol name. If it's value is less than 64k it's treated like a + * ordinal value rather than a string pointer. + * @param ppvValue Where to store the symbol value. + */ +VMMR3DECL(int) PDMR3LdrGetSymbolR0(PVM pVM, const char *pszModule, const char *pszSymbol, PRTR0PTR ppvValue) +{ +#ifdef PDMLDR_FAKE_MODE + *ppvValue = 0xdeadbeef; + return VINF_SUCCESS; + +#else + /* + * Validate input. + */ + AssertPtr(pVM); + AssertPtrNull(pszModule); + AssertPtr(ppvValue); + PUVM pUVM = pVM->pUVM; + AssertMsg(RTCritSectIsInitialized(&pUVM->pdm.s.ListCritSect), ("bad init order!\n")); + + if (!pszModule) + pszModule = VMMR0_MAIN_MODULE_NAME; + + /* + * Find the module. + */ + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + for (PPDMMOD pModule = pUVM->pdm.s.pModules; pModule; pModule = pModule->pNext) + { + if ( pModule->eType == PDMMOD_TYPE_R0 + && !strcmp(pModule->szName, pszModule)) + { + int rc = SUPR3GetSymbolR0((void *)(uintptr_t)pModule->ImageBase, pszSymbol, (void **)ppvValue); + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + if (RT_FAILURE(rc)) + { + AssertMsgRC(rc, ("Couldn't find symbol '%s' in module '%s'\n", pszSymbol, pszModule)); + LogRel(("PDMLdr: PDMGetSymbol: Couldn't find symbol '%s' in module '%s'\n", pszSymbol, pszModule)); + } + return rc; + } + } + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + AssertMsgFailed(("Couldn't locate module '%s'\n", pszModule)); + return VERR_SYMBOL_NOT_FOUND; +#endif +} + + +/** + * Same as PDMR3LdrGetSymbolR0 except that the module will be attempted loaded if not found. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pszModule Module name. If NULL the main R0 module (VMMR0.r0) is assumed. + * @param pszSearchPath List of directories to search if @a pszFile is + * not qualified with a path. Can be NULL, in which + * case the arch dependent install dir is searched. + * @param pszSymbol Symbol name. If it's value is less than 64k it's treated like a + * ordinal value rather than a string pointer. + * @param ppvValue Where to store the symbol value. + */ +VMMR3DECL(int) PDMR3LdrGetSymbolR0Lazy(PVM pVM, const char *pszModule, const char *pszSearchPath, const char *pszSymbol, + PRTR0PTR ppvValue) +{ +#ifdef PDMLDR_FAKE_MODE + *ppvValue = 0xdeadbeef; + return VINF_SUCCESS; + +#else + AssertPtr(pVM); + AssertPtrNull(pszModule); + AssertPtr(ppvValue); + PUVM pUVM = pVM->pUVM; + AssertMsg(RTCritSectIsInitialized(&pUVM->pdm.s.ListCritSect), ("bad init order!\n")); + + if (pszModule) /* (We don't lazy load the main R0 module.) */ + { + /* + * Since we're lazy, we'll only check if the module is present + * and hand it over to PDMR3LdrGetSymbolR0 when that's done. + */ + AssertMsgReturn(!strpbrk(pszModule, "/\\:\n\r\t"), ("pszModule=%s\n", pszModule), VERR_INVALID_PARAMETER); + PPDMMOD pModule; + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + for (pModule = pUVM->pdm.s.pModules; pModule; pModule = pModule->pNext) + if ( pModule->eType == PDMMOD_TYPE_R0 + && !strcmp(pModule->szName, pszModule)) + break; + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + if (!pModule) + { + int rc = pdmR3LoadR0U(pUVM, NULL, pszModule, pszSearchPath); + AssertMsgRCReturn(rc, ("pszModule=%s rc=%Rrc\n", pszModule, rc), VERR_MODULE_NOT_FOUND); + } + } + + return PDMR3LdrGetSymbolR0(pVM, pszModule, pszSymbol, ppvValue); +#endif +} + + +/** + * Get the address of a symbol in a given RC module. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pszModule Module name. If NULL the main R0 module (VMMRC.rc) + * is assumes. + * @param pszSymbol Symbol name. If it's value is less than 64k it's + * treated like a ordinal value rather than a string + * pointer. + * @param pRCPtrValue Where to store the symbol value. + */ +VMMR3DECL(int) PDMR3LdrGetSymbolRC(PVM pVM, const char *pszModule, const char *pszSymbol, PRTRCPTR pRCPtrValue) +{ +#if defined(PDMLDR_FAKE_MODE) || !defined(VBOX_WITH_RAW_MODE) + RT_NOREF(pVM, pszModule, pszSymbol); + Assert(VM_IS_RAW_MODE_ENABLED(pVM)); + *pRCPtrValue = NIL_RTRCPTR; + return VINF_SUCCESS; + +#else + /* + * Validate input. + */ + AssertPtr(pVM); + AssertPtrNull(pszModule); + AssertPtr(pRCPtrValue); + AssertMsg(MMR3IsInitialized(pVM), ("bad init order!\n")); + + if (!pszModule) + pszModule = VMMRC_MAIN_MODULE_NAME; + + /* + * Find the module. + */ + PUVM pUVM = pVM->pUVM; + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + for (PPDMMOD pModule = pUVM->pdm.s.pModules; pModule; pModule = pModule->pNext) + { + if ( pModule->eType == PDMMOD_TYPE_RC + && !strcmp(pModule->szName, pszModule)) + { + RTUINTPTR Value; + int rc = RTLdrGetSymbolEx(pModule->hLdrMod, pModule->pvBits, pModule->ImageBase, UINT32_MAX, pszSymbol, &Value); + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + if (RT_SUCCESS(rc)) + { + *pRCPtrValue = (RTGCPTR)Value; + Assert(*pRCPtrValue == Value); + } + else + { + if ((uintptr_t)pszSymbol < 0x10000) + AssertMsg(rc, ("Couldn't symbol '%u' in module '%s'\n", (unsigned)(uintptr_t)pszSymbol, pszModule)); + else + AssertMsg(rc, ("Couldn't symbol '%s' in module '%s'\n", pszSymbol, pszModule)); + } + return rc; + } + } + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + AssertMsgFailed(("Couldn't locate module '%s'\n", pszModule)); + return VERR_SYMBOL_NOT_FOUND; +#endif +} + + +/** + * Same as PDMR3LdrGetSymbolRC except that the module will be attempted loaded if not found. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pszModule Module name. If NULL the main RC module (VMMRC.rc) + * is assumed. + * @param pszSearchPath List of directories to search if @a pszFile is + * not qualified with a path. Can be NULL, in which + * case the arch dependent install dir is searched. + * @param pszSymbol Symbol name. If it's value is less than 64k it's treated like a + * ordinal value rather than a string pointer. + * @param pRCPtrValue Where to store the symbol value. + */ +VMMR3DECL(int) PDMR3LdrGetSymbolRCLazy(PVM pVM, const char *pszModule, const char *pszSearchPath, const char *pszSymbol, + PRTRCPTR pRCPtrValue) +{ +#if defined(PDMLDR_FAKE_MODE) || !defined(VBOX_WITH_RAW_MODE) + RT_NOREF(pVM, pszModule, pszSearchPath, pszSymbol); + Assert(VM_IS_RAW_MODE_ENABLED(pVM)); + *pRCPtrValue = NIL_RTRCPTR; + return VINF_SUCCESS; + +#else + AssertPtr(pVM); + if (!pszModule) + pszModule = VMMRC_MAIN_MODULE_NAME; + AssertPtr(pszModule); + AssertPtr(pRCPtrValue); + AssertMsg(MMR3IsInitialized(pVM), ("bad init order!\n")); + + /* + * Since we're lazy, we'll only check if the module is present + * and hand it over to PDMR3LdrGetSymbolRC when that's done. + */ + AssertMsgReturn(!strpbrk(pszModule, "/\\:\n\r\t"), ("pszModule=%s\n", pszModule), VERR_INVALID_PARAMETER); + PUVM pUVM = pVM->pUVM; + PPDMMOD pModule; + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + for (pModule = pUVM->pdm.s.pModules; pModule; pModule = pModule->pNext) + if ( pModule->eType == PDMMOD_TYPE_RC + && !strcmp(pModule->szName, pszModule)) + break; + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + if (!pModule) + { + char *pszFilename = pdmR3FileRC(pszModule, pszSearchPath); + AssertMsgReturn(pszFilename, ("pszModule=%s\n", pszModule), VERR_MODULE_NOT_FOUND); + int rc = PDMR3LdrLoadRC(pVM, pszFilename, pszModule); + RTMemTmpFree(pszFilename); + AssertMsgRCReturn(rc, ("pszModule=%s rc=%Rrc\n", pszModule, rc), VERR_MODULE_NOT_FOUND); + } + + return PDMR3LdrGetSymbolRC(pVM, pszModule, pszSymbol, pRCPtrValue); +#endif +} + + +/** + * Constructs the full filename for a R3 image file. + * + * @returns Pointer to temporary memory containing the filename. + * Caller must free this using RTMemTmpFree(). + * @returns NULL on failure. + * + * @param pszFile File name (no path). + * @param fShared If true, search in the shared directory (/usr/lib on Unix), else + * search in the private directory (/usr/lib/virtualbox on Unix). + * Ignored if VBOX_PATH_SHARED_LIBS is not defined. + */ +char *pdmR3FileR3(const char *pszFile, bool fShared) +{ + return pdmR3File(pszFile, NULL, NULL, fShared); +} + + +/** + * Constructs the full filename for a R0 image file. + * + * @returns Pointer to temporary memory containing the filename. + * Caller must free this using RTMemTmpFree(). + * @returns NULL on failure. + * + * @param pszFile File name (no path). + * @param pszSearchPath List of directories to search if @a pszFile is + * not qualified with a path. Can be NULL, in which + * case the arch dependent install dir is searched. + */ +char *pdmR3FileR0(const char *pszFile, const char *pszSearchPath) +{ + return pdmR3File(pszFile, NULL, pszSearchPath, /*fShared=*/false); +} + + +/** + * Constructs the full filename for a RC image file. + * + * @returns Pointer to temporary memory containing the filename. + * Caller must free this using RTMemTmpFree(). + * @returns NULL on failure. + * + * @param pszFile File name (no path). + * @param pszSearchPath List of directories to search if @a pszFile is + * not qualified with a path. Can be NULL, in which + * case the arch dependent install dir is searched. + */ +char *pdmR3FileRC(const char *pszFile, const char *pszSearchPath) +{ + return pdmR3File(pszFile, NULL, pszSearchPath, /*fShared=*/false); +} + + +/** + * Worker for pdmR3File(). + * + * @returns Pointer to temporary memory containing the filename. + * Caller must free this using RTMemTmpFree(). + * @returns NULL on failure. + * + * @param pszDir Directory part + * @param pszFile File name part + * @param pszDefaultExt Extension part + */ +static char *pdmR3FileConstruct(const char *pszDir, const char *pszFile, const char *pszDefaultExt) +{ + /* + * Allocate temp memory for return buffer. + */ + size_t cchDir = strlen(pszDir); + size_t cchFile = strlen(pszFile); + size_t cchDefaultExt; + + /* + * Default extention? + */ + if (!pszDefaultExt || strchr(pszFile, '.')) + cchDefaultExt = 0; + else + cchDefaultExt = strlen(pszDefaultExt); + + size_t cchPath = cchDir + 1 + cchFile + cchDefaultExt + 1; + AssertMsgReturn(cchPath <= RTPATH_MAX, ("Path too long!\n"), NULL); + + char *pszRet = (char *)RTMemTmpAlloc(cchDir + 1 + cchFile + cchDefaultExt + 1); + AssertMsgReturn(pszRet, ("Out of temporary memory!\n"), NULL); + + /* + * Construct the filename. + */ + memcpy(pszRet, pszDir, cchDir); + pszRet[cchDir++] = '/'; /* this works everywhere */ + memcpy(pszRet + cchDir, pszFile, cchFile + 1); + if (cchDefaultExt) + memcpy(pszRet + cchDir + cchFile, pszDefaultExt, cchDefaultExt + 1); + + return pszRet; +} + + +/** + * Worker for pdmR3FileRC(), pdmR3FileR0() and pdmR3FileR3(). + * + * @returns Pointer to temporary memory containing the filename. + * Caller must free this using RTMemTmpFree(). + * @returns NULL on failure. + * @param pszFile File name (no path). + * @param pszDefaultExt The default extention, NULL if none. + * @param pszSearchPath List of directories to search if @a pszFile is + * not qualified with a path. Can be NULL, in which + * case the arch dependent install dir is searched. + * @param fShared If true, search in the shared directory (/usr/lib on Unix), else + * search in the private directory (/usr/lib/virtualbox on Unix). + * Ignored if VBOX_PATH_SHARED_LIBS is not defined. + * @todo We'll have this elsewhere than in the root later! + * @todo Remove the fShared hack again once we don't need to link against VBoxDD anymore! + */ +static char *pdmR3File(const char *pszFile, const char *pszDefaultExt, const char *pszSearchPath, bool fShared) +{ + char szPath[RTPATH_MAX]; + int rc; + + AssertLogRelReturn(!fShared || !pszSearchPath, NULL); + Assert(!RTPathHavePath(pszFile)); + + /* + * If there is a path, search it. + */ + if ( pszSearchPath + && *pszSearchPath) + { + /* Check the filename length. */ + size_t const cchFile = strlen(pszFile); + if (cchFile >= sizeof(szPath)) + return NULL; + + /* + * Walk the search path. + */ + const char *psz = pszSearchPath; + while (*psz) + { + /* Skip leading blanks - no directories with leading spaces, thank you. */ + while (RT_C_IS_BLANK(*psz)) + psz++; + + /* Find the end of this element. */ + const char *pszNext; + const char *pszEnd = strchr(psz, ';'); + if (!pszEnd) + pszEnd = pszNext = strchr(psz, '\0'); + else + pszNext = pszEnd + 1; + if (pszEnd != psz) + { + rc = RTPathJoinEx(szPath, sizeof(szPath), psz, pszEnd - psz, pszFile, cchFile); + if (RT_SUCCESS(rc)) + { + if (RTFileExists(szPath)) + { + size_t cchPath = strlen(szPath) + 1; + char *pszRet = (char *)RTMemTmpAlloc(cchPath); + if (pszRet) + memcpy(pszRet, szPath, cchPath); + return pszRet; + } + } + } + + /* advance */ + psz = pszNext; + } + } + + /* + * Use the default location. + */ + rc = fShared + ? RTPathSharedLibs( szPath, sizeof(szPath)) + : RTPathAppPrivateArch(szPath, sizeof(szPath)); + if (!RT_SUCCESS(rc)) + { + AssertMsgFailed(("RTPath[SharedLibs|AppPrivateArch](,%d) failed rc=%d!\n", sizeof(szPath), rc)); + return NULL; + } + + return pdmR3FileConstruct(szPath, pszFile, pszDefaultExt); +} + + +/** @internal */ +typedef struct QMFEIPARG +{ + RTINTPTR uPC; + + char *pszNearSym1; + size_t cchNearSym1; + RTINTPTR offNearSym1; + + char *pszNearSym2; + size_t cchNearSym2; + RTINTPTR offNearSym2; +} QMFEIPARG, *PQMFEIPARG; + + +/** + * Enumeration callback function used by RTLdrEnumSymbols(). + * + * @returns VBox status code. Failure will stop the enumeration. + * @param hLdrMod The loader module handle. + * @param pszSymbol Symbol name. NULL if ordinal only. + * @param uSymbol Symbol ordinal, ~0 if not used. + * @param Value Symbol value. + * @param pvUser The user argument specified to RTLdrEnumSymbols(). + */ +static DECLCALLBACK(int) pdmR3QueryModFromEIPEnumSymbols(RTLDRMOD hLdrMod, const char *pszSymbol, unsigned uSymbol, + RTUINTPTR Value, void *pvUser) +{ + PQMFEIPARG pArgs = (PQMFEIPARG)pvUser; + NOREF(hLdrMod); + + RTINTPTR off = Value - pArgs->uPC; + if (off <= 0) /* near1 is before or at same location. */ + { + if (off > pArgs->offNearSym1) + { + pArgs->offNearSym1 = off; + if (pArgs->pszNearSym1 && pArgs->cchNearSym1) + { + *pArgs->pszNearSym1 = '\0'; + if (pszSymbol) + strncat(pArgs->pszNearSym1, pszSymbol, pArgs->cchNearSym1); + else + { + char szOrd[32]; + RTStrPrintf(szOrd, sizeof(szOrd), "#%#x", uSymbol); + strncat(pArgs->pszNearSym1, szOrd, pArgs->cchNearSym1); + } + } + } + } + else /* near2 is after */ + { + if (off < pArgs->offNearSym2) + { + pArgs->offNearSym2 = off; + if (pArgs->pszNearSym2 && pArgs->cchNearSym2) + { + *pArgs->pszNearSym2 = '\0'; + if (pszSymbol) + strncat(pArgs->pszNearSym2, pszSymbol, pArgs->cchNearSym2); + else + { + char szOrd[32]; + RTStrPrintf(szOrd, sizeof(szOrd), "#%#x", uSymbol); + strncat(pArgs->pszNearSym2, szOrd, pArgs->cchNearSym2); + } + } + } + } + + return VINF_SUCCESS; +} + + +/** + * Internal worker for PDMR3LdrQueryRCModFromPC and PDMR3LdrQueryR0ModFromPC. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param uPC The program counter (eip/rip) to locate the module for. + * @param enmType The module type. + * @param pszModName Where to store the module name. + * @param cchModName Size of the module name buffer. + * @param pMod Base address of the module. + * @param pszNearSym1 Name of the closes symbol from below. + * @param cchNearSym1 Size of the buffer pointed to by pszNearSym1. + * @param pNearSym1 The address of pszNearSym1. + * @param pszNearSym2 Name of the closes symbol from below. + * @param cchNearSym2 Size of the buffer pointed to by pszNearSym2. + * @param pNearSym2 The address of pszNearSym2. + */ +static int pdmR3LdrQueryModFromPC(PVM pVM, RTUINTPTR uPC, PDMMODTYPE enmType, + char *pszModName, size_t cchModName, PRTUINTPTR pMod, + char *pszNearSym1, size_t cchNearSym1, PRTUINTPTR pNearSym1, + char *pszNearSym2, size_t cchNearSym2, PRTUINTPTR pNearSym2) +{ + PUVM pUVM = pVM->pUVM; + int rc = VERR_MODULE_NOT_FOUND; + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + for (PPDMMOD pCur= pUVM->pdm.s.pModules; pCur; pCur = pCur->pNext) + { + if (pCur->eType != enmType) + continue; + + /* The following RTLdrOpen call is a dirty hack to get ring-0 module information. */ + RTLDRMOD hLdrMod = pCur->hLdrMod; + if (hLdrMod == NIL_RTLDRMOD && uPC >= pCur->ImageBase) + { + int rc2 = RTLdrOpen(pCur->szFilename, 0 /*fFlags*/, RTLDRARCH_HOST, &hLdrMod); + if (RT_FAILURE(rc2)) + hLdrMod = NIL_RTLDRMOD; + } + + if ( hLdrMod != NIL_RTLDRMOD + && uPC - pCur->ImageBase < RTLdrSize(hLdrMod)) + { + if (pMod) + *pMod = pCur->ImageBase; + if (pszModName && cchModName) + { + *pszModName = '\0'; + strncat(pszModName, pCur->szName, cchModName); + } + if (pNearSym1) *pNearSym1 = 0; + if (pNearSym2) *pNearSym2 = 0; + if (pszNearSym1) *pszNearSym1 = '\0'; + if (pszNearSym2) *pszNearSym2 = '\0'; + + /* + * Locate the nearest symbols. + */ + QMFEIPARG Args; + Args.uPC = uPC; + Args.pszNearSym1 = pszNearSym1; + Args.cchNearSym1 = cchNearSym1; + Args.offNearSym1 = RTINTPTR_MIN; + Args.pszNearSym2 = pszNearSym2; + Args.cchNearSym2 = cchNearSym2; + Args.offNearSym2 = RTINTPTR_MAX; + + rc = RTLdrEnumSymbols(hLdrMod, RTLDR_ENUM_SYMBOL_FLAGS_ALL, pCur->pvBits, pCur->ImageBase, + pdmR3QueryModFromEIPEnumSymbols, &Args); + if (pNearSym1 && Args.offNearSym1 != RTINTPTR_MIN) + *pNearSym1 = Args.offNearSym1 + uPC; + if (pNearSym2 && Args.offNearSym2 != RTINTPTR_MAX) + *pNearSym2 = Args.offNearSym2 + uPC; + + rc = VINF_SUCCESS; + } + + if (hLdrMod != pCur->hLdrMod && hLdrMod != NIL_RTLDRMOD) + RTLdrClose(hLdrMod); + + if (RT_SUCCESS(rc)) + break; + } + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + return rc; +} + + +/** + * Queries raw-mode context module information from an PC (eip/rip). + * + * This is typically used to locate a crash address. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param uPC The program counter (eip/rip) to locate the module for. + * @param pszModName Where to store the module name. + * @param cchModName Size of the module name buffer. + * @param pMod Base address of the module. + * @param pszNearSym1 Name of the closes symbol from below. + * @param cchNearSym1 Size of the buffer pointed to by pszNearSym1. + * @param pNearSym1 The address of pszNearSym1. + * @param pszNearSym2 Name of the closes symbol from below. + * @param cchNearSym2 Size of the buffer pointed to by pszNearSym2. + * @param pNearSym2 The address of pszNearSym2. + */ +VMMR3_INT_DECL(int) PDMR3LdrQueryRCModFromPC(PVM pVM, RTRCPTR uPC, + char *pszModName, size_t cchModName, PRTRCPTR pMod, + char *pszNearSym1, size_t cchNearSym1, PRTRCPTR pNearSym1, + char *pszNearSym2, size_t cchNearSym2, PRTRCPTR pNearSym2) +{ + RTUINTPTR AddrMod = 0; + RTUINTPTR AddrNear1 = 0; + RTUINTPTR AddrNear2 = 0; + int rc = pdmR3LdrQueryModFromPC(pVM, uPC, PDMMOD_TYPE_RC, + pszModName, cchModName, &AddrMod, + pszNearSym1, cchNearSym1, &AddrNear1, + pszNearSym2, cchNearSym2, &AddrNear2); + if (RT_SUCCESS(rc)) + { + if (pMod) + *pMod = (RTRCPTR)AddrMod; + if (pNearSym1) + *pNearSym1 = (RTRCPTR)AddrNear1; + if (pNearSym2) + *pNearSym2 = (RTRCPTR)AddrNear2; + } + return rc; +} + + +/** + * Queries ring-0 context module information from an PC (eip/rip). + * + * This is typically used to locate a crash address. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param uPC The program counter (eip/rip) to locate the module for. + * @param pszModName Where to store the module name. + * @param cchModName Size of the module name buffer. + * @param pMod Base address of the module. + * @param pszNearSym1 Name of the closes symbol from below. + * @param cchNearSym1 Size of the buffer pointed to by pszNearSym1. + * @param pNearSym1 The address of pszNearSym1. + * @param pszNearSym2 Name of the closes symbol from below. + * @param cchNearSym2 Size of the buffer pointed to by pszNearSym2. Optional. + * @param pNearSym2 The address of pszNearSym2. Optional. + */ +VMMR3_INT_DECL(int) PDMR3LdrQueryR0ModFromPC(PVM pVM, RTR0PTR uPC, + char *pszModName, size_t cchModName, PRTR0PTR pMod, + char *pszNearSym1, size_t cchNearSym1, PRTR0PTR pNearSym1, + char *pszNearSym2, size_t cchNearSym2, PRTR0PTR pNearSym2) +{ + RTUINTPTR AddrMod = 0; + RTUINTPTR AddrNear1 = 0; + RTUINTPTR AddrNear2 = 0; + int rc = pdmR3LdrQueryModFromPC(pVM, uPC, PDMMOD_TYPE_R0, + pszModName, cchModName, &AddrMod, + pszNearSym1, cchNearSym1, &AddrNear1, + pszNearSym2, cchNearSym2, &AddrNear2); + if (RT_SUCCESS(rc)) + { + if (pMod) + *pMod = (RTR0PTR)AddrMod; + if (pNearSym1) + *pNearSym1 = (RTR0PTR)AddrNear1; + if (pNearSym2) + *pNearSym2 = (RTR0PTR)AddrNear2; + } + return rc; +} + + +/** + * Enumerate all PDM modules. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pfnCallback Function to call back for each of the modules. + * @param pvArg User argument. + */ +VMMR3DECL(int) PDMR3LdrEnumModules(PVM pVM, PFNPDMR3ENUM pfnCallback, void *pvArg) +{ + PUVM pUVM = pVM->pUVM; + int rc = VINF_SUCCESS; + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + for (PPDMMOD pCur = pUVM->pdm.s.pModules; pCur; pCur = pCur->pNext) + { + rc = pfnCallback(pVM, + pCur->szFilename, + pCur->szName, + pCur->ImageBase, + pCur->eType == PDMMOD_TYPE_RC ? RTLdrSize(pCur->hLdrMod) : 0, + pCur->eType == PDMMOD_TYPE_RC ? PDMLDRCTX_RAW_MODE + : pCur->eType == PDMMOD_TYPE_R0 ? PDMLDRCTX_RING_0 + : pCur->eType == PDMMOD_TYPE_R3 ? PDMLDRCTX_RING_3 + : PDMLDRCTX_INVALID, + pvArg); + if (RT_FAILURE(rc)) + break; + } + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + return rc; +} + + +/** + * Locates a module. + * + * @returns Pointer to the module if found. + * @param pUVM Pointer to the user mode VM structure. + * @param pszModule The module name. + * @param enmType The module type. + * @param fLazy Lazy loading the module if set. + * @param pszSearchPath Search path for use when lazy loading. + */ +static PPDMMOD pdmR3LdrFindModule(PUVM pUVM, const char *pszModule, PDMMODTYPE enmType, + bool fLazy, const char *pszSearchPath) +{ + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + for (PPDMMOD pModule = pUVM->pdm.s.pModules; pModule; pModule = pModule->pNext) + if ( pModule->eType == enmType + && !strcmp(pModule->szName, pszModule)) + { + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + return pModule; + } + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + if (fLazy) + { + switch (enmType) + { +#ifdef VBOX_WITH_RAW_MODE + case PDMMOD_TYPE_RC: + { + char *pszFilename = pdmR3FileRC(pszModule, pszSearchPath); + if (pszFilename) + { + int rc = PDMR3LdrLoadRC(pUVM->pVM, pszFilename, pszModule); + RTMemTmpFree(pszFilename); + if (RT_SUCCESS(rc)) + return pdmR3LdrFindModule(pUVM, pszModule, enmType, false, NULL); + } + break; + } +#endif + + case PDMMOD_TYPE_R0: + { + int rc = pdmR3LoadR0U(pUVM, NULL, pszModule, pszSearchPath); + if (RT_SUCCESS(rc)) + return pdmR3LdrFindModule(pUVM, pszModule, enmType, false, NULL); + break; + } + + default: + AssertFailed(); + } + } + return NULL; +} + + +/** + * Resolves a ring-0 or raw-mode context interface. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pvInterface Pointer to the interface structure. The symbol list + * describes the layout. + * @param cbInterface The size of the structure pvInterface is pointing + * to. For bounds checking. + * @param pszModule The module name. If NULL we assume it's the default + * R0 or RC module (@a fRing0OrRC). We'll attempt to + * load the module if it isn't found in the module + * list. + * @param pszSearchPath The module search path. If NULL, search the + * architecture dependent install directory. + * @param pszSymPrefix What to prefix the symbols in the list with. The + * idea is that you define a list that goes with an + * interface (INTERFACE_SYM_LIST) and reuse it with + * each implementation. + * @param pszSymList The symbol list for the interface. This is a + * semi-colon separated list of symbol base names. As + * mentioned above, each is prefixed with @a + * pszSymPrefix before resolving. There are a couple + * of special symbol names that will cause us to skip + * ahead a little bit: + * - U8:whatever, + * - U16:whatever, + * - U32:whatever, + * - U64:whatever, + * - RCPTR:whatever, + * - R3PTR:whatever, + * - R0PTR:whatever, + * - GCPHYS:whatever, + * - HCPHYS:whatever. + * @param fRing0 Set if it's a ring-0 context interface, clear if + * it's raw-mode context interface. + */ +VMMR3_INT_DECL(int) PDMR3LdrGetInterfaceSymbols(PVM pVM, void *pvInterface, size_t cbInterface, + const char *pszModule, const char *pszSearchPath, + const char *pszSymPrefix, const char *pszSymList, + bool fRing0) +{ + bool const fNullRun = !fRing0 && !VM_IS_RAW_MODE_ENABLED(pVM); + + /* + * Find the module. + */ + int rc = VINF_SUCCESS; + PPDMMOD pModule = NULL; + if (!fNullRun) + pModule = pdmR3LdrFindModule(pVM->pUVM, + pszModule ? pszModule : fRing0 ? "VMMR0.r0" : "VMMRC.rc", + fRing0 ? PDMMOD_TYPE_R0 : PDMMOD_TYPE_RC, + true /*fLazy*/, pszSearchPath); + if (pModule || fNullRun) + { + /* Prep the symbol name. */ + char szSymbol[256]; + size_t const cchSymPrefix = strlen(pszSymPrefix); + AssertReturn(cchSymPrefix + 5 < sizeof(szSymbol), VERR_SYMBOL_NOT_FOUND); + memcpy(szSymbol, pszSymPrefix, cchSymPrefix); + + /* + * Iterate the symbol list. + */ + uint32_t offInterface = 0; + const char *pszCur = pszSymList; + while (pszCur) + { + /* + * Find the end of the current symbol name. + */ + size_t cchSym; + const char *pszNext = strchr(pszCur, ';'); + if (pszNext) + { + cchSym = pszNext - pszCur; + pszNext++; + } + else + cchSym = strlen(pszCur); + AssertBreakStmt(cchSym > 0, rc = VERR_INVALID_PARAMETER); + + /* Is it a skip instruction? */ + const char *pszColon = (const char *)memchr(pszCur, ':', cchSym); + if (pszColon) + { + /* + * String switch on the instruction and execute it, checking + * that we didn't overshoot the interface structure. + */ +#define IS_SKIP_INSTR(szInstr) \ + ( cchSkip == sizeof(szInstr) - 1 \ + && !memcmp(pszCur, szInstr, sizeof(szInstr) - 1) ) + + size_t const cchSkip = pszColon - pszCur; + if (IS_SKIP_INSTR("U8")) + offInterface += sizeof(uint8_t); + else if (IS_SKIP_INSTR("U16")) + offInterface += sizeof(uint16_t); + else if (IS_SKIP_INSTR("U32")) + offInterface += sizeof(uint32_t); + else if (IS_SKIP_INSTR("U64")) + offInterface += sizeof(uint64_t); + else if (IS_SKIP_INSTR("RCPTR")) + offInterface += sizeof(RTRCPTR); + else if (IS_SKIP_INSTR("R3PTR")) + offInterface += sizeof(RTR3PTR); + else if (IS_SKIP_INSTR("R0PTR")) + offInterface += sizeof(RTR0PTR); + else if (IS_SKIP_INSTR("HCPHYS")) + offInterface += sizeof(RTHCPHYS); + else if (IS_SKIP_INSTR("GCPHYS")) + offInterface += sizeof(RTGCPHYS); + else + AssertMsgFailedBreakStmt(("Invalid skip instruction %.*s (prefix=%s)\n", cchSym, pszCur, pszSymPrefix), + rc = VERR_INVALID_PARAMETER); + AssertMsgBreakStmt(offInterface <= cbInterface, + ("off=%#x cb=%#x (sym=%.*s prefix=%s)\n", offInterface, cbInterface, cchSym, pszCur, pszSymPrefix), + rc = VERR_BUFFER_OVERFLOW); +#undef IS_SKIP_INSTR + } + else + { + /* + * Construct the symbol name, get its value, store it and + * advance the interface cursor. + */ + AssertReturn(cchSymPrefix + cchSym < sizeof(szSymbol), VERR_SYMBOL_NOT_FOUND); + memcpy(&szSymbol[cchSymPrefix], pszCur, cchSym); + szSymbol[cchSymPrefix + cchSym] = '\0'; + + if (fRing0) + { + void *pvValue = NULL; + if (!fNullRun) + { + rc = SUPR3GetSymbolR0((void *)(RTR0PTR)pModule->ImageBase, szSymbol, &pvValue); + AssertMsgRCBreak(rc, ("Couldn't find symbol '%s' in module '%s'\n", szSymbol, pModule->szName)); + } + + PRTR0PTR pValue = (PRTR0PTR)((uintptr_t)pvInterface + offInterface); + AssertMsgBreakStmt(offInterface + sizeof(*pValue) <= cbInterface, + ("off=%#x cb=%#x sym=%s\n", offInterface, cbInterface, szSymbol), + rc = VERR_BUFFER_OVERFLOW); + *pValue = (RTR0PTR)pvValue; + Assert((void *)*pValue == pvValue); + offInterface += sizeof(*pValue); + } + else + { + RTUINTPTR Value = 0; + if (!fNullRun) + { + rc = RTLdrGetSymbolEx(pModule->hLdrMod, pModule->pvBits, pModule->ImageBase, UINT32_MAX, szSymbol, &Value); + AssertMsgRCBreak(rc, ("Couldn't find symbol '%s' in module '%s'\n", szSymbol, pModule->szName)); + } + + PRTRCPTR pValue = (PRTRCPTR)((uintptr_t)pvInterface + offInterface); + AssertMsgBreakStmt(offInterface + sizeof(*pValue) <= cbInterface, + ("off=%#x cb=%#x sym=%s\n", offInterface, cbInterface, szSymbol), + rc = VERR_BUFFER_OVERFLOW); + *pValue = (RTRCPTR)Value; + Assert(*pValue == Value); + offInterface += sizeof(*pValue); + } + } + + /* advance */ + pszCur = pszNext; + } + + } + else + rc = VERR_MODULE_NOT_FOUND; + return rc; +} + diff --git a/src/VBox/VMM/VMMR3/PDMNetShaper.cpp b/src/VBox/VMM/VMMR3/PDMNetShaper.cpp new file mode 100644 index 00000000..10f0778b --- /dev/null +++ b/src/VBox/VMM/VMMR3/PDMNetShaper.cpp @@ -0,0 +1,554 @@ +/* $Id: PDMNetShaper.cpp $ */ +/** @file + * PDM Network Shaper - Limit network traffic according to bandwidth group settings. + */ + +/* + * Copyright (C) 2011-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_NET_SHAPER +#include "PDMInternal.h" +#include +#include +#ifdef VBOX_WITH_REM +# include +#endif +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "PDMNetShaperInternal.h" + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ + +/** + * Network shaper data. One instance per VM. + */ +typedef struct PDMNETSHAPER +{ + /** Pointer to the VM. */ + PVM pVM; + /** Critical section protecting all members below. */ + RTCRITSECT Lock; + /** Pending TX thread. */ + PPDMTHREAD pTxThread; + /** Pointer to the first bandwidth group. */ + PPDMNSBWGROUP pBwGroupsHead; +} PDMNETSHAPER; + + +/** Takes the shaper lock (asserts but doesn't return or anything on + * failure). */ +#define LOCK_NETSHAPER(a_pShaper) do { int rcShaper = RTCritSectEnter(&(a_pShaper)->Lock); AssertRC(rcShaper); } while (0) + +/** Takes the shaper lock, returns + asserts on failure. */ +#define LOCK_NETSHAPER_RETURN(a_pShaper) \ + do { int rcShaper = RTCritSectEnter(&(a_pShaper)->Lock); AssertRCReturn(rcShaper, rcShaper); } while (0) + +/** Releases the shaper lock (asserts on failure). */ +#define UNLOCK_NETSHAPER(a_pShaper) do { int rcShaper = RTCritSectLeave(&(a_pShaper)->Lock); AssertRC(rcShaper); } while (0) + + + + +static PPDMNSBWGROUP pdmNsBwGroupFindById(PPDMNETSHAPER pShaper, const char *pszId) +{ + PPDMNSBWGROUP pBwGroup = NULL; + + if (RT_VALID_PTR(pszId)) + { + LOCK_NETSHAPER(pShaper); + + pBwGroup = pShaper->pBwGroupsHead; + while ( pBwGroup + && RTStrCmp(pBwGroup->pszNameR3, pszId)) + pBwGroup = pBwGroup->pNextR3; + + UNLOCK_NETSHAPER(pShaper); + } + + return pBwGroup; +} + + +static void pdmNsBwGroupLink(PPDMNSBWGROUP pBwGroup) +{ + PPDMNETSHAPER pShaper = pBwGroup->pShaperR3; + LOCK_NETSHAPER(pShaper); + + pBwGroup->pNextR3 = pShaper->pBwGroupsHead; + pShaper->pBwGroupsHead = pBwGroup; + + UNLOCK_NETSHAPER(pShaper); +} + + +#if 0 +static void pdmNsBwGroupUnlink(PPDMNSBWGROUP pBwGroup) +{ + PPDMNETSHAPER pShaper = pBwGroup->pShaper; + LOCK_NETSHAPER(pShaper); + + if (pBwGroup == pShaper->pBwGroupsHead) + pShaper->pBwGroupsHead = pBwGroup->pNext; + else + { + PPDMNSBWGROUP pPrev = pShaper->pBwGroupsHead; + while ( pPrev + && pPrev->pNext != pBwGroup) + pPrev = pPrev->pNext; + + AssertPtr(pPrev); + pPrev->pNext = pBwGroup->pNext; + } + + UNLOCK_NETSHAPER(pShaper); +} +#endif + + +static void pdmNsBwGroupSetLimit(PPDMNSBWGROUP pBwGroup, uint64_t cbPerSecMax) +{ + pBwGroup->cbPerSecMax = cbPerSecMax; + pBwGroup->cbBucket = RT_MAX(PDM_NETSHAPER_MIN_BUCKET_SIZE, cbPerSecMax * PDM_NETSHAPER_MAX_LATENCY / 1000); + LogFlow(("pdmNsBwGroupSetLimit: New rate limit is %llu bytes per second, adjusted bucket size to %u bytes\n", + pBwGroup->cbPerSecMax, pBwGroup->cbBucket)); +} + + +static int pdmNsBwGroupCreate(PPDMNETSHAPER pShaper, const char *pszBwGroup, uint64_t cbPerSecMax) +{ + LogFlow(("pdmNsBwGroupCreate: pShaper=%#p pszBwGroup=%#p{%s} cbPerSecMax=%llu\n", pShaper, pszBwGroup, pszBwGroup, cbPerSecMax)); + + AssertPtrReturn(pShaper, VERR_INVALID_POINTER); + AssertPtrReturn(pszBwGroup, VERR_INVALID_POINTER); + AssertReturn(*pszBwGroup != '\0', VERR_INVALID_PARAMETER); + + int rc; + PPDMNSBWGROUP pBwGroup = pdmNsBwGroupFindById(pShaper, pszBwGroup); + if (!pBwGroup) + { + rc = MMHyperAlloc(pShaper->pVM, sizeof(PDMNSBWGROUP), 64, + MM_TAG_PDM_NET_SHAPER, (void **)&pBwGroup); + if (RT_SUCCESS(rc)) + { + rc = PDMR3CritSectInit(pShaper->pVM, &pBwGroup->Lock, RT_SRC_POS, "BWGRP-%s", pszBwGroup); + if (RT_SUCCESS(rc)) + { + pBwGroup->pszNameR3 = MMR3HeapStrDup(pShaper->pVM, MM_TAG_PDM_NET_SHAPER, pszBwGroup); + if (pBwGroup->pszNameR3) + { + pBwGroup->pShaperR3 = pShaper; + pBwGroup->cRefs = 0; + + pdmNsBwGroupSetLimit(pBwGroup, cbPerSecMax); + + pBwGroup->cbTokensLast = pBwGroup->cbBucket; + pBwGroup->tsUpdatedLast = RTTimeSystemNanoTS(); + + LogFlowFunc(("pszBwGroup={%s} cbBucket=%u\n", + pszBwGroup, pBwGroup->cbBucket)); + pdmNsBwGroupLink(pBwGroup); + return VINF_SUCCESS; + } + PDMR3CritSectDelete(&pBwGroup->Lock); + } + MMHyperFree(pShaper->pVM, pBwGroup); + } + else + rc = VERR_NO_MEMORY; + } + else + rc = VERR_ALREADY_EXISTS; + + LogFlowFunc(("returns rc=%Rrc\n", rc)); + return rc; +} + + +static void pdmNsBwGroupTerminate(PPDMNSBWGROUP pBwGroup) +{ + Assert(pBwGroup->cRefs == 0); + if (PDMCritSectIsInitialized(&pBwGroup->Lock)) + PDMR3CritSectDelete(&pBwGroup->Lock); +} + + +DECLINLINE(void) pdmNsBwGroupRef(PPDMNSBWGROUP pBwGroup) +{ + ASMAtomicIncU32(&pBwGroup->cRefs); +} + + +DECLINLINE(void) pdmNsBwGroupUnref(PPDMNSBWGROUP pBwGroup) +{ + Assert(pBwGroup->cRefs > 0); + ASMAtomicDecU32(&pBwGroup->cRefs); +} + + +static void pdmNsBwGroupXmitPending(PPDMNSBWGROUP pBwGroup) +{ + /* + * We don't need to hold the bandwidth group lock to iterate over the list + * of filters since the filters are removed while the shaper lock is being + * held. + */ + AssertPtr(pBwGroup); + AssertPtr(pBwGroup->pShaperR3); + Assert(RTCritSectIsOwner(&pBwGroup->pShaperR3->Lock)); + //LOCK_NETSHAPER(pShaper); + + /* Check if the group is disabled. */ + if (pBwGroup->cbPerSecMax == 0) + return; + + PPDMNSFILTER pFilter = pBwGroup->pFiltersHeadR3; + while (pFilter) + { + bool fChoked = ASMAtomicXchgBool(&pFilter->fChoked, false); + Log3((LOG_FN_FMT ": pFilter=%#p fChoked=%RTbool\n", __PRETTY_FUNCTION__, pFilter, fChoked)); + if (fChoked && pFilter->pIDrvNetR3) + { + LogFlowFunc(("Calling pfnXmitPending for pFilter=%#p\n", pFilter)); + pFilter->pIDrvNetR3->pfnXmitPending(pFilter->pIDrvNetR3); + } + + pFilter = pFilter->pNextR3; + } + + //UNLOCK_NETSHAPER(pShaper); +} + + +static void pdmNsFilterLink(PPDMNSFILTER pFilter) +{ + PPDMNSBWGROUP pBwGroup = pFilter->pBwGroupR3; + int rc = PDMCritSectEnter(&pBwGroup->Lock, VERR_SEM_BUSY); AssertRC(rc); + + pFilter->pNextR3 = pBwGroup->pFiltersHeadR3; + pBwGroup->pFiltersHeadR3 = pFilter; + + rc = PDMCritSectLeave(&pBwGroup->Lock); AssertRC(rc); +} + + +static void pdmNsFilterUnlink(PPDMNSFILTER pFilter) +{ + PPDMNSBWGROUP pBwGroup = pFilter->pBwGroupR3; + /* + * We need to make sure we hold the shaper lock since pdmNsBwGroupXmitPending() + * does not hold the bandwidth group lock while iterating over the list + * of group's filters. + */ + AssertPtr(pBwGroup); + AssertPtr(pBwGroup->pShaperR3); + Assert(RTCritSectIsOwner(&pBwGroup->pShaperR3->Lock)); + int rc = PDMCritSectEnter(&pBwGroup->Lock, VERR_SEM_BUSY); AssertRC(rc); + + if (pFilter == pBwGroup->pFiltersHeadR3) + pBwGroup->pFiltersHeadR3 = pFilter->pNextR3; + else + { + PPDMNSFILTER pPrev = pBwGroup->pFiltersHeadR3; + while ( pPrev + && pPrev->pNextR3 != pFilter) + pPrev = pPrev->pNextR3; + + AssertPtr(pPrev); + pPrev->pNextR3 = pFilter->pNextR3; + } + + rc = PDMCritSectLeave(&pBwGroup->Lock); AssertRC(rc); +} + + +/** + * Attach network filter driver from bandwidth group. + * + * @returns VBox status code. + * @param pUVM The user mode VM structure. + * @param pDrvIns The driver instance. + * @param pszBwGroup Name of the bandwidth group to attach to. + * @param pFilter Pointer to the filter we attach. + */ +VMMR3_INT_DECL(int) PDMR3NsAttach(PUVM pUVM, PPDMDRVINS pDrvIns, const char *pszBwGroup, PPDMNSFILTER pFilter) +{ + VM_ASSERT_EMT(pUVM->pVM); + AssertPtrReturn(pFilter, VERR_INVALID_POINTER); + AssertReturn(pFilter->pBwGroupR3 == NULL, VERR_ALREADY_EXISTS); + RT_NOREF_PV(pDrvIns); + + PPDMNETSHAPER pShaper = pUVM->pdm.s.pNetShaper; + LOCK_NETSHAPER_RETURN(pShaper); + + int rc = VINF_SUCCESS; + PPDMNSBWGROUP pBwGroupNew = NULL; + if (pszBwGroup) + { + pBwGroupNew = pdmNsBwGroupFindById(pShaper, pszBwGroup); + if (pBwGroupNew) + pdmNsBwGroupRef(pBwGroupNew); + else + rc = VERR_NOT_FOUND; + } + + if (RT_SUCCESS(rc)) + { + PPDMNSBWGROUP pBwGroupOld = ASMAtomicXchgPtrT(&pFilter->pBwGroupR3, pBwGroupNew, PPDMNSBWGROUP); + ASMAtomicWritePtr(&pFilter->pBwGroupR0, MMHyperR3ToR0(pUVM->pVM, pBwGroupNew)); + if (pBwGroupOld) + pdmNsBwGroupUnref(pBwGroupOld); + pdmNsFilterLink(pFilter); + } + + UNLOCK_NETSHAPER(pShaper); + return rc; +} + + +/** + * Detach network filter driver from bandwidth group. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pDrvIns The driver instance. + * @param pFilter Pointer to the filter we detach. + */ +VMMR3_INT_DECL(int) PDMR3NsDetach(PUVM pUVM, PPDMDRVINS pDrvIns, PPDMNSFILTER pFilter) +{ + RT_NOREF_PV(pDrvIns); + VM_ASSERT_EMT(pUVM->pVM); + AssertPtrReturn(pFilter, VERR_INVALID_POINTER); + + /* Now, return quietly if the filter isn't attached since driver/device + destructors are called on constructor failure. */ + if (!pFilter->pBwGroupR3) + return VINF_SUCCESS; + AssertPtrReturn(pFilter->pBwGroupR3, VERR_INVALID_POINTER); + + PPDMNETSHAPER pShaper = pUVM->pdm.s.pNetShaper; + LOCK_NETSHAPER_RETURN(pShaper); + + pdmNsFilterUnlink(pFilter); + PPDMNSBWGROUP pBwGroup = ASMAtomicXchgPtrT(&pFilter->pBwGroupR3, NULL, PPDMNSBWGROUP); + if (pBwGroup) + pdmNsBwGroupUnref(pBwGroup); + + UNLOCK_NETSHAPER(pShaper); + return VINF_SUCCESS; +} + + +/** + * Adjusts the maximum rate for the bandwidth group. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszBwGroup Name of the bandwidth group to attach to. + * @param cbPerSecMax Maximum number of bytes per second to be transmitted. + */ +VMMR3DECL(int) PDMR3NsBwGroupSetLimit(PUVM pUVM, const char *pszBwGroup, uint64_t cbPerSecMax) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PPDMNETSHAPER pShaper = pUVM->pdm.s.pNetShaper; + LOCK_NETSHAPER_RETURN(pShaper); + + int rc; + PPDMNSBWGROUP pBwGroup = pdmNsBwGroupFindById(pShaper, pszBwGroup); + if (pBwGroup) + { + rc = PDMCritSectEnter(&pBwGroup->Lock, VERR_SEM_BUSY); AssertRC(rc); + if (RT_SUCCESS(rc)) + { + pdmNsBwGroupSetLimit(pBwGroup, cbPerSecMax); + + /* Drop extra tokens */ + if (pBwGroup->cbTokensLast > pBwGroup->cbBucket) + pBwGroup->cbTokensLast = pBwGroup->cbBucket; + + int rc2 = PDMCritSectLeave(&pBwGroup->Lock); AssertRC(rc2); + } + } + else + rc = VERR_NOT_FOUND; + + UNLOCK_NETSHAPER(pShaper); + return rc; +} + + +/** + * I/O thread for pending TX. + * + * @returns VINF_SUCCESS (ignored). + * @param pVM The cross context VM structure. + * @param pThread The PDM thread data. + */ +static DECLCALLBACK(int) pdmR3NsTxThread(PVM pVM, PPDMTHREAD pThread) +{ + RT_NOREF_PV(pVM); + + PPDMNETSHAPER pShaper = (PPDMNETSHAPER)pThread->pvUser; + LogFlow(("pdmR3NsTxThread: pShaper=%p\n", pShaper)); + while (pThread->enmState == PDMTHREADSTATE_RUNNING) + { + RTThreadSleep(PDM_NETSHAPER_MAX_LATENCY); + + /* Go over all bandwidth groups/filters calling pfnXmitPending */ + LOCK_NETSHAPER(pShaper); + PPDMNSBWGROUP pBwGroup = pShaper->pBwGroupsHead; + while (pBwGroup) + { + pdmNsBwGroupXmitPending(pBwGroup); + pBwGroup = pBwGroup->pNextR3; + } + UNLOCK_NETSHAPER(pShaper); + } + return VINF_SUCCESS; +} + + +/** + * @copydoc FNPDMTHREADWAKEUPINT + */ +static DECLCALLBACK(int) pdmR3NsTxWakeUp(PVM pVM, PPDMTHREAD pThread) +{ + RT_NOREF2(pVM, pThread); + LogFlow(("pdmR3NsTxWakeUp: pShaper=%p\n", pThread->pvUser)); + /* Nothing to do */ + return VINF_SUCCESS; +} + + +/** + * Terminate the network shaper. + * + * @returns VBox error code. + * @param pVM The cross context VM structure. + * + * @remarks This method destroys all bandwidth group objects. + */ +int pdmR3NetShaperTerm(PVM pVM) +{ + PUVM pUVM = pVM->pUVM; + AssertPtrReturn(pUVM, VERR_INVALID_POINTER); + PPDMNETSHAPER pShaper = pUVM->pdm.s.pNetShaper; + AssertPtrReturn(pShaper, VERR_INVALID_POINTER); + + /* Destroy the bandwidth managers. */ + PPDMNSBWGROUP pBwGroup = pShaper->pBwGroupsHead; + while (pBwGroup) + { + PPDMNSBWGROUP pFree = pBwGroup; + pBwGroup = pBwGroup->pNextR3; + pdmNsBwGroupTerminate(pFree); + MMR3HeapFree(pFree->pszNameR3); + MMHyperFree(pVM, pFree); + } + + RTCritSectDelete(&pShaper->Lock); + MMR3HeapFree(pShaper); + pUVM->pdm.s.pNetShaper = NULL; + return VINF_SUCCESS; +} + + +/** + * Initialize the network shaper. + * + * @returns VBox status code + * @param pVM The cross context VM structure. + */ +int pdmR3NetShaperInit(PVM pVM) +{ + LogFlow(("pdmR3NetShaperInit: pVM=%p\n", pVM)); + VM_ASSERT_EMT(pVM); + PUVM pUVM = pVM->pUVM; + AssertMsgReturn(!pUVM->pdm.s.pNetShaper, ("Network shaper was already initialized\n"), VERR_WRONG_ORDER); + + PPDMNETSHAPER pShaper; + int rc = MMR3HeapAllocZEx(pVM, MM_TAG_PDM_NET_SHAPER, sizeof(PDMNETSHAPER), (void **)&pShaper); + if (RT_SUCCESS(rc)) + { + PCFGMNODE pCfgNetShaper = CFGMR3GetChild(CFGMR3GetChild(CFGMR3GetRoot(pVM), "PDM"), "NetworkShaper"); + + pShaper->pVM = pVM; + rc = RTCritSectInit(&pShaper->Lock); + if (RT_SUCCESS(rc)) + { + /* Create all bandwidth groups. */ + PCFGMNODE pCfgBwGrp = CFGMR3GetChild(pCfgNetShaper, "BwGroups"); + if (pCfgBwGrp) + { + for (PCFGMNODE pCur = CFGMR3GetFirstChild(pCfgBwGrp); pCur; pCur = CFGMR3GetNextChild(pCur)) + { + size_t cbName = CFGMR3GetNameLen(pCur) + 1; + char *pszBwGrpId = (char *)RTMemAllocZ(cbName); + if (pszBwGrpId) + { + rc = CFGMR3GetName(pCur, pszBwGrpId, cbName); + if (RT_SUCCESS(rc)) + { + uint64_t cbMax; + rc = CFGMR3QueryU64(pCur, "Max", &cbMax); + if (RT_SUCCESS(rc)) + rc = pdmNsBwGroupCreate(pShaper, pszBwGrpId, cbMax); + } + RTMemFree(pszBwGrpId); + } + else + rc = VERR_NO_MEMORY; + if (RT_FAILURE(rc)) + break; + } + } + + if (RT_SUCCESS(rc)) + { + rc = PDMR3ThreadCreate(pVM, &pShaper->pTxThread, pShaper, pdmR3NsTxThread, pdmR3NsTxWakeUp, + 0 /*cbStack*/, RTTHREADTYPE_IO, "PDMNsTx"); + if (RT_SUCCESS(rc)) + { + pUVM->pdm.s.pNetShaper = pShaper; + return VINF_SUCCESS; + } + } + + RTCritSectDelete(&pShaper->Lock); + } + + MMR3HeapFree(pShaper); + } + + LogFlow(("pdmR3NetShaperInit: pVM=%p rc=%Rrc\n", pVM, rc)); + return rc; +} + diff --git a/src/VBox/VMM/VMMR3/PDMQueue.cpp b/src/VBox/VMM/VMMR3/PDMQueue.cpp new file mode 100644 index 00000000..23dc2134 --- /dev/null +++ b/src/VBox/VMM/VMMR3/PDMQueue.cpp @@ -0,0 +1,880 @@ +/* $Id: PDMQueue.cpp $ */ +/** @file + * PDM Queue - Transport data and tasks to EMT and R3. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PDM_QUEUE +#include "PDMInternal.h" +#include +#include +#ifdef VBOX_WITH_REM +# include +#endif +#include +#include +#include + +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +DECLINLINE(void) pdmR3QueueFreeItem(PPDMQUEUE pQueue, PPDMQUEUEITEMCORE pItem); +static bool pdmR3QueueFlush(PPDMQUEUE pQueue); +static DECLCALLBACK(void) pdmR3QueueTimer(PVM pVM, PTMTIMER pTimer, void *pvUser); + + + +/** + * Internal worker for the queue creation apis. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param cbItem Item size. + * @param cItems Number of items. + * @param cMilliesInterval Number of milliseconds between polling the queue. + * If 0 then the emulation thread will be notified whenever an item arrives. + * @param fRZEnabled Set if the queue will be used from RC/R0 and need to be allocated from the hyper heap. + * @param pszName The queue name. Unique. Not copied. + * @param ppQueue Where to store the queue handle. + */ +static int pdmR3QueueCreate(PVM pVM, size_t cbItem, uint32_t cItems, uint32_t cMilliesInterval, bool fRZEnabled, + const char *pszName, PPDMQUEUE *ppQueue) +{ + PUVM pUVM = pVM->pUVM; + + /* + * Validate input. + */ + AssertMsgReturn(cbItem >= sizeof(PDMQUEUEITEMCORE) && cbItem < _1M, ("cbItem=%zu\n", cbItem), VERR_OUT_OF_RANGE); + AssertMsgReturn(cItems >= 1 && cItems <= _64K, ("cItems=%u\n", cItems), VERR_OUT_OF_RANGE); + + /* + * Align the item size and calculate the structure size. + */ + cbItem = RT_ALIGN(cbItem, sizeof(RTUINTPTR)); + size_t cb = cbItem * cItems + RT_ALIGN_Z(RT_UOFFSETOF_DYN(PDMQUEUE, aFreeItems[cItems + PDMQUEUE_FREE_SLACK]), 16); + PPDMQUEUE pQueue; + int rc; + if (fRZEnabled) + rc = MMHyperAlloc(pVM, cb, 0, MM_TAG_PDM_QUEUE, (void **)&pQueue ); + else + rc = MMR3HeapAllocZEx(pVM, MM_TAG_PDM_QUEUE, cb, (void **)&pQueue); + if (RT_FAILURE(rc)) + return rc; + + /* + * Initialize the data fields. + */ + pQueue->pVMR3 = pVM; + pQueue->pVMR0 = fRZEnabled ? pVM->pVMR0 : NIL_RTR0PTR; + pQueue->pVMRC = fRZEnabled ? pVM->pVMRC : NIL_RTRCPTR; + pQueue->pszName = pszName; + pQueue->cMilliesInterval = cMilliesInterval; + //pQueue->pTimer = NULL; + pQueue->cbItem = (uint32_t)cbItem; + pQueue->cItems = cItems; + //pQueue->pPendingR3 = NULL; + //pQueue->pPendingR0 = NULL; + //pQueue->pPendingRC = NULL; + pQueue->iFreeHead = cItems; + //pQueue->iFreeTail = 0; + PPDMQUEUEITEMCORE pItem = (PPDMQUEUEITEMCORE)((char *)pQueue + RT_ALIGN_Z(RT_UOFFSETOF_DYN(PDMQUEUE, aFreeItems[cItems + PDMQUEUE_FREE_SLACK]), 16)); + for (unsigned i = 0; i < cItems; i++, pItem = (PPDMQUEUEITEMCORE)((char *)pItem + cbItem)) + { + pQueue->aFreeItems[i].pItemR3 = pItem; + if (fRZEnabled) + { + pQueue->aFreeItems[i].pItemR0 = MMHyperR3ToR0(pVM, pItem); + pQueue->aFreeItems[i].pItemRC = MMHyperR3ToRC(pVM, pItem); + } + } + + /* + * Create timer? + */ + if (cMilliesInterval) + { + rc = TMR3TimerCreateInternal(pVM, TMCLOCK_REAL, pdmR3QueueTimer, pQueue, "Queue timer", &pQueue->pTimer); + if (RT_SUCCESS(rc)) + { + rc = TMTimerSetMillies(pQueue->pTimer, cMilliesInterval); + if (RT_FAILURE(rc)) + { + AssertMsgFailed(("TMTimerSetMillies failed rc=%Rrc\n", rc)); + int rc2 = TMR3TimerDestroy(pQueue->pTimer); AssertRC(rc2); + } + } + else + AssertMsgFailed(("TMR3TimerCreateInternal failed rc=%Rrc\n", rc)); + if (RT_FAILURE(rc)) + { + if (fRZEnabled) + MMHyperFree(pVM, pQueue); + else + MMR3HeapFree(pQueue); + return rc; + } + + /* + * Insert into the queue list for timer driven queues. + */ + pdmLock(pVM); + pQueue->pNext = pUVM->pdm.s.pQueuesTimer; + pUVM->pdm.s.pQueuesTimer = pQueue; + pdmUnlock(pVM); + } + else + { + /* + * Insert into the queue list for forced action driven queues. + * This is a FIFO, so insert at the end. + */ + /** @todo we should add a priority to the queues so we don't have to rely on + * the initialization order to deal with problems like @bugref{1605} (pgm/pcnet + * deadlock caused by the critsect queue to be last in the chain). + * - Update, the critical sections are no longer using queues, so this isn't a real + * problem any longer. The priority might be a nice feature for later though. + */ + pdmLock(pVM); + if (!pUVM->pdm.s.pQueuesForced) + pUVM->pdm.s.pQueuesForced = pQueue; + else + { + PPDMQUEUE pPrev = pUVM->pdm.s.pQueuesForced; + while (pPrev->pNext) + pPrev = pPrev->pNext; + pPrev->pNext = pQueue; + } + pdmUnlock(pVM); + } + + /* + * Register the statistics. + */ + STAMR3RegisterF(pVM, &pQueue->cbItem, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES, "Item size.", "/PDM/Queue/%s/cbItem", pQueue->pszName); + STAMR3RegisterF(pVM, &pQueue->cItems, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT, "Queue size.", "/PDM/Queue/%s/cItems", pQueue->pszName); + STAMR3RegisterF(pVM, &pQueue->StatAllocFailures, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "PDMQueueAlloc failures.", "/PDM/Queue/%s/AllocFailures", pQueue->pszName); + STAMR3RegisterF(pVM, &pQueue->StatInsert, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_CALLS, "Calls to PDMQueueInsert.", "/PDM/Queue/%s/Insert", pQueue->pszName); + STAMR3RegisterF(pVM, &pQueue->StatFlush, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_CALLS, "Calls to pdmR3QueueFlush.", "/PDM/Queue/%s/Flush", pQueue->pszName); + STAMR3RegisterF(pVM, &pQueue->StatFlushLeftovers, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Left over items after flush.", "/PDM/Queue/%s/FlushLeftovers", pQueue->pszName); +#ifdef VBOX_WITH_STATISTICS + STAMR3RegisterF(pVM, &pQueue->StatFlushPrf, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_CALLS, "Profiling pdmR3QueueFlush.", "/PDM/Queue/%s/FlushPrf", pQueue->pszName); + STAMR3RegisterF(pVM, (void *)&pQueue->cStatPending, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT, "Pending items.", "/PDM/Queue/%s/Pending", pQueue->pszName); +#endif + + *ppQueue = pQueue; + return VINF_SUCCESS; +} + + +/** + * Create a queue with a device owner. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDevIns Device instance. + * @param cbItem Size a queue item. + * @param cItems Number of items in the queue. + * @param cMilliesInterval Number of milliseconds between polling the queue. + * If 0 then the emulation thread will be notified whenever an item arrives. + * @param pfnCallback The consumer function. + * @param fRZEnabled Set if the queue must be usable from RC/R0. + * @param pszName The queue name. Unique. Not copied. + * @param ppQueue Where to store the queue handle on success. + * @thread Emulation thread only. + */ +VMMR3_INT_DECL(int) PDMR3QueueCreateDevice(PVM pVM, PPDMDEVINS pDevIns, size_t cbItem, uint32_t cItems, uint32_t cMilliesInterval, + PFNPDMQUEUEDEV pfnCallback, bool fRZEnabled, const char *pszName, PPDMQUEUE *ppQueue) +{ + LogFlow(("PDMR3QueueCreateDevice: pDevIns=%p cbItem=%d cItems=%d cMilliesInterval=%d pfnCallback=%p fRZEnabled=%RTbool pszName=%s\n", + pDevIns, cbItem, cItems, cMilliesInterval, pfnCallback, fRZEnabled, pszName)); + + /* + * Validate input. + */ + VMCPU_ASSERT_EMT(&pVM->aCpus[0]); + if (!pfnCallback) + { + AssertMsgFailed(("No consumer callback!\n")); + return VERR_INVALID_PARAMETER; + } + + /* + * Create the queue. + */ + PPDMQUEUE pQueue; + int rc = pdmR3QueueCreate(pVM, cbItem, cItems, cMilliesInterval, fRZEnabled, pszName, &pQueue); + if (RT_SUCCESS(rc)) + { + pQueue->enmType = PDMQUEUETYPE_DEV; + pQueue->u.Dev.pDevIns = pDevIns; + pQueue->u.Dev.pfnCallback = pfnCallback; + + *ppQueue = pQueue; + Log(("PDM: Created device queue %p; cbItem=%d cItems=%d cMillies=%d pfnCallback=%p pDevIns=%p\n", + pQueue, cbItem, cItems, cMilliesInterval, pfnCallback, pDevIns)); + } + return rc; +} + + +/** + * Create a queue with a driver owner. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDrvIns Driver instance. + * @param cbItem Size a queue item. + * @param cItems Number of items in the queue. + * @param cMilliesInterval Number of milliseconds between polling the queue. + * If 0 then the emulation thread will be notified whenever an item arrives. + * @param pfnCallback The consumer function. + * @param pszName The queue name. Unique. Not copied. + * @param ppQueue Where to store the queue handle on success. + * @thread Emulation thread only. + */ +VMMR3_INT_DECL(int) PDMR3QueueCreateDriver(PVM pVM, PPDMDRVINS pDrvIns, size_t cbItem, uint32_t cItems, uint32_t cMilliesInterval, + PFNPDMQUEUEDRV pfnCallback, const char *pszName, PPDMQUEUE *ppQueue) +{ + LogFlow(("PDMR3QueueCreateDriver: pDrvIns=%p cbItem=%d cItems=%d cMilliesInterval=%d pfnCallback=%p pszName=%s\n", + pDrvIns, cbItem, cItems, cMilliesInterval, pfnCallback, pszName)); + + /* + * Validate input. + */ + VMCPU_ASSERT_EMT(&pVM->aCpus[0]); + AssertPtrReturn(pfnCallback, VERR_INVALID_POINTER); + + /* + * Create the queue. + */ + PPDMQUEUE pQueue; + int rc = pdmR3QueueCreate(pVM, cbItem, cItems, cMilliesInterval, false, pszName, &pQueue); + if (RT_SUCCESS(rc)) + { + pQueue->enmType = PDMQUEUETYPE_DRV; + pQueue->u.Drv.pDrvIns = pDrvIns; + pQueue->u.Drv.pfnCallback = pfnCallback; + + *ppQueue = pQueue; + Log(("PDM: Created driver queue %p; cbItem=%d cItems=%d cMillies=%d pfnCallback=%p pDrvIns=%p\n", + pQueue, cbItem, cItems, cMilliesInterval, pfnCallback, pDrvIns)); + } + return rc; +} + + +/** + * Create a queue with an internal owner. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param cbItem Size a queue item. + * @param cItems Number of items in the queue. + * @param cMilliesInterval Number of milliseconds between polling the queue. + * If 0 then the emulation thread will be notified whenever an item arrives. + * @param pfnCallback The consumer function. + * @param fRZEnabled Set if the queue must be usable from RC/R0. + * @param pszName The queue name. Unique. Not copied. + * @param ppQueue Where to store the queue handle on success. + * @thread Emulation thread only. + */ +VMMR3_INT_DECL(int) PDMR3QueueCreateInternal(PVM pVM, size_t cbItem, uint32_t cItems, uint32_t cMilliesInterval, + PFNPDMQUEUEINT pfnCallback, bool fRZEnabled, const char *pszName, PPDMQUEUE *ppQueue) +{ + LogFlow(("PDMR3QueueCreateInternal: cbItem=%d cItems=%d cMilliesInterval=%d pfnCallback=%p fRZEnabled=%RTbool pszName=%s\n", + cbItem, cItems, cMilliesInterval, pfnCallback, fRZEnabled, pszName)); + + /* + * Validate input. + */ + VMCPU_ASSERT_EMT(&pVM->aCpus[0]); + AssertPtrReturn(pfnCallback, VERR_INVALID_POINTER); + + /* + * Create the queue. + */ + PPDMQUEUE pQueue; + int rc = pdmR3QueueCreate(pVM, cbItem, cItems, cMilliesInterval, fRZEnabled, pszName, &pQueue); + if (RT_SUCCESS(rc)) + { + pQueue->enmType = PDMQUEUETYPE_INTERNAL; + pQueue->u.Int.pfnCallback = pfnCallback; + + *ppQueue = pQueue; + Log(("PDM: Created internal queue %p; cbItem=%d cItems=%d cMillies=%d pfnCallback=%p\n", + pQueue, cbItem, cItems, cMilliesInterval, pfnCallback)); + } + return rc; +} + + +/** + * Create a queue with an external owner. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param cbItem Size a queue item. + * @param cItems Number of items in the queue. + * @param cMilliesInterval Number of milliseconds between polling the queue. + * If 0 then the emulation thread will be notified whenever an item arrives. + * @param pfnCallback The consumer function. + * @param pvUser The user argument to the consumer function. + * @param pszName The queue name. Unique. Not copied. + * @param ppQueue Where to store the queue handle on success. + * @thread Emulation thread only. + */ +VMMR3_INT_DECL(int) PDMR3QueueCreateExternal(PVM pVM, size_t cbItem, uint32_t cItems, uint32_t cMilliesInterval, + PFNPDMQUEUEEXT pfnCallback, void *pvUser, const char *pszName, PPDMQUEUE *ppQueue) +{ + LogFlow(("PDMR3QueueCreateExternal: cbItem=%d cItems=%d cMilliesInterval=%d pfnCallback=%p pszName=%s\n", cbItem, cItems, cMilliesInterval, pfnCallback, pszName)); + + /* + * Validate input. + */ + VMCPU_ASSERT_EMT(&pVM->aCpus[0]); + AssertPtrReturn(pfnCallback, VERR_INVALID_POINTER); + + /* + * Create the queue. + */ + PPDMQUEUE pQueue; + int rc = pdmR3QueueCreate(pVM, cbItem, cItems, cMilliesInterval, false, pszName, &pQueue); + if (RT_SUCCESS(rc)) + { + pQueue->enmType = PDMQUEUETYPE_EXTERNAL; + pQueue->u.Ext.pvUser = pvUser; + pQueue->u.Ext.pfnCallback = pfnCallback; + + *ppQueue = pQueue; + Log(("PDM: Created external queue %p; cbItem=%d cItems=%d cMillies=%d pfnCallback=%p pvUser=%p\n", + pQueue, cbItem, cItems, cMilliesInterval, pfnCallback, pvUser)); + } + return rc; +} + + +/** + * Destroy a queue. + * + * @returns VBox status code. + * @param pQueue Queue to destroy. + * @thread Emulation thread only. + */ +VMMR3_INT_DECL(int) PDMR3QueueDestroy(PPDMQUEUE pQueue) +{ + LogFlow(("PDMR3QueueDestroy: pQueue=%p\n", pQueue)); + + /* + * Validate input. + */ + if (!pQueue) + return VERR_INVALID_PARAMETER; + Assert(pQueue && pQueue->pVMR3); + PVM pVM = pQueue->pVMR3; + PUVM pUVM = pVM->pUVM; + + pdmLock(pVM); + + /* + * Unlink it. + */ + if (pQueue->pTimer) + { + if (pUVM->pdm.s.pQueuesTimer != pQueue) + { + PPDMQUEUE pCur = pUVM->pdm.s.pQueuesTimer; + while (pCur) + { + if (pCur->pNext == pQueue) + { + pCur->pNext = pQueue->pNext; + break; + } + pCur = pCur->pNext; + } + AssertMsg(pCur, ("Didn't find the queue!\n")); + } + else + pUVM->pdm.s.pQueuesTimer = pQueue->pNext; + } + else + { + if (pUVM->pdm.s.pQueuesForced != pQueue) + { + PPDMQUEUE pCur = pUVM->pdm.s.pQueuesForced; + while (pCur) + { + if (pCur->pNext == pQueue) + { + pCur->pNext = pQueue->pNext; + break; + } + pCur = pCur->pNext; + } + AssertMsg(pCur, ("Didn't find the queue!\n")); + } + else + pUVM->pdm.s.pQueuesForced = pQueue->pNext; + } + pQueue->pNext = NULL; + pQueue->pVMR3 = NULL; + pdmUnlock(pVM); + + /* + * Deregister statistics. + */ + STAMR3DeregisterF(pVM->pUVM, "/PDM/Queue/%s/cbItem", pQueue->pszName); + + /* + * Destroy the timer and free it. + */ + if (pQueue->pTimer) + { + TMR3TimerDestroy(pQueue->pTimer); + pQueue->pTimer = NULL; + } + if (pQueue->pVMRC) + { + pQueue->pVMRC = NIL_RTRCPTR; + pQueue->pVMR0 = NIL_RTR0PTR; + MMHyperFree(pVM, pQueue); + } + else + MMR3HeapFree(pQueue); + + return VINF_SUCCESS; +} + + +/** + * Destroy a all queues owned by the specified device. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDevIns Device instance. + * @thread Emulation thread only. + */ +VMMR3_INT_DECL(int) PDMR3QueueDestroyDevice(PVM pVM, PPDMDEVINS pDevIns) +{ + LogFlow(("PDMR3QueueDestroyDevice: pDevIns=%p\n", pDevIns)); + + /* + * Validate input. + */ + if (!pDevIns) + return VERR_INVALID_PARAMETER; + + PUVM pUVM = pVM->pUVM; + pdmLock(pVM); + + /* + * Unlink it. + */ + PPDMQUEUE pQueueNext = pUVM->pdm.s.pQueuesTimer; + PPDMQUEUE pQueue = pUVM->pdm.s.pQueuesForced; + do + { + while (pQueue) + { + if ( pQueue->enmType == PDMQUEUETYPE_DEV + && pQueue->u.Dev.pDevIns == pDevIns) + { + PPDMQUEUE pQueueDestroy = pQueue; + pQueue = pQueue->pNext; + int rc = PDMR3QueueDestroy(pQueueDestroy); + AssertRC(rc); + } + else + pQueue = pQueue->pNext; + } + + /* next queue list */ + pQueue = pQueueNext; + pQueueNext = NULL; + } while (pQueue); + + pdmUnlock(pVM); + return VINF_SUCCESS; +} + + +/** + * Destroy a all queues owned by the specified driver. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDrvIns Driver instance. + * @thread Emulation thread only. + */ +VMMR3_INT_DECL(int) PDMR3QueueDestroyDriver(PVM pVM, PPDMDRVINS pDrvIns) +{ + LogFlow(("PDMR3QueueDestroyDriver: pDrvIns=%p\n", pDrvIns)); + + /* + * Validate input. + */ + if (!pDrvIns) + return VERR_INVALID_PARAMETER; + + PUVM pUVM = pVM->pUVM; + pdmLock(pVM); + + /* + * Unlink it. + */ + PPDMQUEUE pQueueNext = pUVM->pdm.s.pQueuesTimer; + PPDMQUEUE pQueue = pUVM->pdm.s.pQueuesForced; + do + { + while (pQueue) + { + if ( pQueue->enmType == PDMQUEUETYPE_DRV + && pQueue->u.Drv.pDrvIns == pDrvIns) + { + PPDMQUEUE pQueueDestroy = pQueue; + pQueue = pQueue->pNext; + int rc = PDMR3QueueDestroy(pQueueDestroy); + AssertRC(rc); + } + else + pQueue = pQueue->pNext; + } + + /* next queue list */ + pQueue = pQueueNext; + pQueueNext = NULL; + } while (pQueue); + + pdmUnlock(pVM); + return VINF_SUCCESS; +} + + +/** + * Relocate the queues. + * + * @param pVM The cross context VM structure. + * @param offDelta The relocation delta. + */ +void pdmR3QueueRelocate(PVM pVM, RTGCINTPTR offDelta) +{ + /* + * Process the queues. + */ + PUVM pUVM = pVM->pUVM; + PPDMQUEUE pQueueNext = pUVM->pdm.s.pQueuesTimer; + PPDMQUEUE pQueue = pUVM->pdm.s.pQueuesForced; + do + { + while (pQueue) + { + if (pQueue->pVMRC) + { + pQueue->pVMRC = pVM->pVMRC; + + /* Pending RC items. */ + if (pQueue->pPendingRC) + { + pQueue->pPendingRC += offDelta; + PPDMQUEUEITEMCORE pCur = (PPDMQUEUEITEMCORE)MMHyperRCToR3(pVM, pQueue->pPendingRC); + while (pCur->pNextRC) + { + pCur->pNextRC += offDelta; + pCur = (PPDMQUEUEITEMCORE)MMHyperRCToR3(pVM, pCur->pNextRC); + } + } + + /* The free items. */ + uint32_t i = pQueue->iFreeTail; + while (i != pQueue->iFreeHead) + { + pQueue->aFreeItems[i].pItemRC = MMHyperR3ToRC(pVM, pQueue->aFreeItems[i].pItemR3); + i = (i + 1) % (pQueue->cItems + PDMQUEUE_FREE_SLACK); + } + } + + /* next queue */ + pQueue = pQueue->pNext; + } + + /* next queue list */ + pQueue = pQueueNext; + pQueueNext = NULL; + } while (pQueue); +} + + +/** + * Flush pending queues. + * This is a forced action callback. + * + * @param pVM The cross context VM structure. + * @thread Emulation thread only. + */ +VMMR3_INT_DECL(void) PDMR3QueueFlushAll(PVM pVM) +{ + VM_ASSERT_EMT(pVM); + LogFlow(("PDMR3QueuesFlush:\n")); + + /* + * Only let one EMT flushing queues at any one time to preserve the order + * and to avoid wasting time. The FF is always cleared here, because it's + * only used to get someones attention. Queue inserts occurring during the + * flush are caught using the pending bit. + * + * Note! We must check the force action and pending flags after clearing + * the active bit! + */ + VM_FF_CLEAR(pVM, VM_FF_PDM_QUEUES); + while (!ASMAtomicBitTestAndSet(&pVM->pdm.s.fQueueFlushing, PDM_QUEUE_FLUSH_FLAG_ACTIVE_BIT)) + { + ASMAtomicBitClear(&pVM->pdm.s.fQueueFlushing, PDM_QUEUE_FLUSH_FLAG_PENDING_BIT); + + for (PPDMQUEUE pCur = pVM->pUVM->pdm.s.pQueuesForced; pCur; pCur = pCur->pNext) + if ( pCur->pPendingR3 + || pCur->pPendingR0 + || pCur->pPendingRC) + pdmR3QueueFlush(pCur); + + ASMAtomicBitClear(&pVM->pdm.s.fQueueFlushing, PDM_QUEUE_FLUSH_FLAG_ACTIVE_BIT); + + /* We're done if there were no inserts while we were busy. */ + if ( !ASMBitTest(&pVM->pdm.s.fQueueFlushing, PDM_QUEUE_FLUSH_FLAG_PENDING_BIT) + && !VM_FF_IS_SET(pVM, VM_FF_PDM_QUEUES)) + break; + VM_FF_CLEAR(pVM, VM_FF_PDM_QUEUES); + } +} + + +/** + * Process pending items in one queue. + * + * @returns Success indicator. + * If false the item the consumer said "enough!". + * @param pQueue The queue. + */ +static bool pdmR3QueueFlush(PPDMQUEUE pQueue) +{ + STAM_PROFILE_START(&pQueue->StatFlushPrf,p); + + /* + * Get the lists. + */ + PPDMQUEUEITEMCORE pItems = ASMAtomicXchgPtrT(&pQueue->pPendingR3, NULL, PPDMQUEUEITEMCORE); + RTRCPTR pItemsRC = ASMAtomicXchgRCPtr(&pQueue->pPendingRC, NIL_RTRCPTR); + RTR0PTR pItemsR0 = ASMAtomicXchgR0Ptr(&pQueue->pPendingR0, NIL_RTR0PTR); + + AssertMsgReturn( pItemsR0 + || pItemsRC + || pItems, + ("Someone is racing us? This shouldn't happen!\n"), + true); + + /* + * Reverse the list (it's inserted in LIFO order to avoid semaphores, remember). + */ + PPDMQUEUEITEMCORE pCur = pItems; + pItems = NULL; + while (pCur) + { + PPDMQUEUEITEMCORE pInsert = pCur; + pCur = pCur->pNextR3; + pInsert->pNextR3 = pItems; + pItems = pInsert; + } + + /* + * Do the same for any pending RC items. + */ + while (pItemsRC) + { + PPDMQUEUEITEMCORE pInsert = (PPDMQUEUEITEMCORE)MMHyperRCToR3(pQueue->pVMR3, pItemsRC); + pItemsRC = pInsert->pNextRC; + pInsert->pNextRC = NIL_RTRCPTR; + pInsert->pNextR3 = pItems; + pItems = pInsert; + } + + /* + * Do the same for any pending R0 items. + */ + while (pItemsR0) + { + PPDMQUEUEITEMCORE pInsert = (PPDMQUEUEITEMCORE)MMHyperR0ToR3(pQueue->pVMR3, pItemsR0); + pItemsR0 = pInsert->pNextR0; + pInsert->pNextR0 = NIL_RTR0PTR; + pInsert->pNextR3 = pItems; + pItems = pInsert; + } + + /* + * Feed the items to the consumer function. + */ + Log2(("pdmR3QueueFlush: pQueue=%p enmType=%d pItems=%p\n", pQueue, pQueue->enmType, pItems)); + switch (pQueue->enmType) + { + case PDMQUEUETYPE_DEV: + while (pItems) + { + if (!pQueue->u.Dev.pfnCallback(pQueue->u.Dev.pDevIns, pItems)) + break; + pCur = pItems; + pItems = pItems->pNextR3; + pdmR3QueueFreeItem(pQueue, pCur); + } + break; + + case PDMQUEUETYPE_DRV: + while (pItems) + { + if (!pQueue->u.Drv.pfnCallback(pQueue->u.Drv.pDrvIns, pItems)) + break; + pCur = pItems; + pItems = pItems->pNextR3; + pdmR3QueueFreeItem(pQueue, pCur); + } + break; + + case PDMQUEUETYPE_INTERNAL: + while (pItems) + { + if (!pQueue->u.Int.pfnCallback(pQueue->pVMR3, pItems)) + break; + pCur = pItems; + pItems = pItems->pNextR3; + pdmR3QueueFreeItem(pQueue, pCur); + } + break; + + case PDMQUEUETYPE_EXTERNAL: + while (pItems) + { + if (!pQueue->u.Ext.pfnCallback(pQueue->u.Ext.pvUser, pItems)) + break; + pCur = pItems; + pItems = pItems->pNextR3; + pdmR3QueueFreeItem(pQueue, pCur); + } + break; + + default: + AssertMsgFailed(("Invalid queue type %d\n", pQueue->enmType)); + break; + } + + /* + * Success? + */ + if (pItems) + { + /* + * Reverse the list. + */ + pCur = pItems; + pItems = NULL; + while (pCur) + { + PPDMQUEUEITEMCORE pInsert = pCur; + pCur = pInsert->pNextR3; + pInsert->pNextR3 = pItems; + pItems = pInsert; + } + + /* + * Insert the list at the tail of the pending list. + */ + for (;;) + { + if (ASMAtomicCmpXchgPtr(&pQueue->pPendingR3, pItems, NULL)) + break; + PPDMQUEUEITEMCORE pPending = ASMAtomicXchgPtrT(&pQueue->pPendingR3, NULL, PPDMQUEUEITEMCORE); + if (pPending) + { + pCur = pPending; + while (pCur->pNextR3) + pCur = pCur->pNextR3; + pCur->pNextR3 = pItems; + pItems = pPending; + } + } + + STAM_REL_COUNTER_INC(&pQueue->StatFlushLeftovers); + STAM_PROFILE_STOP(&pQueue->StatFlushPrf,p); + return false; + } + + STAM_PROFILE_STOP(&pQueue->StatFlushPrf,p); + return true; +} + + +/** + * Free an item. + * + * @param pQueue The queue. + * @param pItem The item. + */ +DECLINLINE(void) pdmR3QueueFreeItem(PPDMQUEUE pQueue, PPDMQUEUEITEMCORE pItem) +{ + VM_ASSERT_EMT(pQueue->pVMR3); + + int i = pQueue->iFreeHead; + int iNext = (i + 1) % (pQueue->cItems + PDMQUEUE_FREE_SLACK); + + pQueue->aFreeItems[i].pItemR3 = pItem; + if (pQueue->pVMRC) + { + pQueue->aFreeItems[i].pItemRC = MMHyperR3ToRC(pQueue->pVMR3, pItem); + pQueue->aFreeItems[i].pItemR0 = MMHyperR3ToR0(pQueue->pVMR3, pItem); + } + + if (!ASMAtomicCmpXchgU32(&pQueue->iFreeHead, iNext, i)) + AssertMsgFailed(("huh? i=%d iNext=%d iFreeHead=%d iFreeTail=%d\n", i, iNext, pQueue->iFreeHead, pQueue->iFreeTail)); + STAM_STATS({ ASMAtomicDecU32(&pQueue->cStatPending); }); +} + + +/** + * Timer handler for PDM queues. + * This is called by for a single queue. + * + * @param pVM The cross context VM structure. + * @param pTimer Pointer to timer. + * @param pvUser Pointer to the queue. + */ +static DECLCALLBACK(void) pdmR3QueueTimer(PVM pVM, PTMTIMER pTimer, void *pvUser) +{ + PPDMQUEUE pQueue = (PPDMQUEUE)pvUser; + Assert(pTimer == pQueue->pTimer); NOREF(pTimer); NOREF(pVM); + + if ( pQueue->pPendingR3 + || pQueue->pPendingR0 + || pQueue->pPendingRC) + pdmR3QueueFlush(pQueue); + int rc = TMTimerSetMillies(pQueue->pTimer, pQueue->cMilliesInterval); + AssertRC(rc); +} + diff --git a/src/VBox/VMM/VMMR3/PDMThread.cpp b/src/VBox/VMM/VMMR3/PDMThread.cpp new file mode 100644 index 00000000..b1b35ce5 --- /dev/null +++ b/src/VBox/VMM/VMMR3/PDMThread.cpp @@ -0,0 +1,1090 @@ +/* $Id: PDMThread.cpp $ */ +/** @file + * PDM Thread - VM Thread Management. + */ + +/* + * Copyright (C) 2007-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +/// @todo \#define LOG_GROUP LOG_GROUP_PDM_THREAD +#include "PDMInternal.h" +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static DECLCALLBACK(int) pdmR3ThreadMain(RTTHREAD Thread, void *pvUser); + + +/** + * Wrapper around ASMAtomicCmpXchgSize. + */ +DECLINLINE(bool) pdmR3AtomicCmpXchgState(PPDMTHREAD pThread, PDMTHREADSTATE enmNewState, PDMTHREADSTATE enmOldState) +{ + bool fRc; + ASMAtomicCmpXchgSize(&pThread->enmState, enmNewState, enmOldState, fRc); + return fRc; +} + + +/** + * Does the wakeup call. + * + * @returns VBox status code. Already asserted on failure. + * @param pThread The PDM thread. + */ +static DECLCALLBACK(int) pdmR3ThreadWakeUp(PPDMTHREAD pThread) +{ + RTSemEventMultiSignal(pThread->Internal.s.SleepEvent); + + int rc; + switch (pThread->Internal.s.enmType) + { + case PDMTHREADTYPE_DEVICE: + rc = pThread->u.Dev.pfnWakeUp(pThread->u.Dev.pDevIns, pThread); + break; + + case PDMTHREADTYPE_USB: + rc = pThread->u.Usb.pfnWakeUp(pThread->u.Usb.pUsbIns, pThread); + break; + + case PDMTHREADTYPE_DRIVER: + rc = pThread->u.Drv.pfnWakeUp(pThread->u.Drv.pDrvIns, pThread); + break; + + case PDMTHREADTYPE_INTERNAL: + rc = pThread->u.Int.pfnWakeUp(pThread->Internal.s.pVM, pThread); + break; + + case PDMTHREADTYPE_EXTERNAL: + rc = pThread->u.Ext.pfnWakeUp(pThread); + break; + + default: + AssertMsgFailed(("%d\n", pThread->Internal.s.enmType)); + rc = VERR_PDM_THREAD_IPE_1; + break; + } + AssertRC(rc); + return rc; +} + + +/** + * Allocates new thread instance. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param ppThread Where to store the pointer to the instance. + */ +static int pdmR3ThreadNew(PVM pVM, PPPDMTHREAD ppThread) +{ + PPDMTHREAD pThread; + int rc = MMR3HeapAllocZEx(pVM, MM_TAG_PDM_THREAD, sizeof(*pThread), (void **)&pThread); + if (RT_FAILURE(rc)) + return rc; + + pThread->u32Version = PDMTHREAD_VERSION; + pThread->enmState = PDMTHREADSTATE_INITIALIZING; + pThread->Thread = NIL_RTTHREAD; + pThread->Internal.s.pVM = pVM; + + *ppThread = pThread; + return VINF_SUCCESS; +} + + + +/** + * Initialize a new thread, this actually creates the thread. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param ppThread Where the thread instance data handle is. + * @param cbStack The stack size, see RTThreadCreate(). + * @param enmType The thread type, see RTThreadCreate(). + * @param pszName The thread name, see RTThreadCreate(). + */ +static int pdmR3ThreadInit(PVM pVM, PPPDMTHREAD ppThread, size_t cbStack, RTTHREADTYPE enmType, const char *pszName) +{ + PPDMTHREAD pThread = *ppThread; + PUVM pUVM = pVM->pUVM; + + /* + * Initialize the remainder of the structure. + */ + pThread->Internal.s.pVM = pVM; + + int rc = RTSemEventMultiCreate(&pThread->Internal.s.BlockEvent); + if (RT_SUCCESS(rc)) + { + rc = RTSemEventMultiCreate(&pThread->Internal.s.SleepEvent); + if (RT_SUCCESS(rc)) + { + /* + * Create the thread and wait for it to initialize. + * The newly created thread will set the PDMTHREAD::Thread member. + */ + RTTHREAD Thread; + rc = RTThreadCreate(&Thread, pdmR3ThreadMain, pThread, cbStack, enmType, RTTHREADFLAGS_WAITABLE, pszName); + if (RT_SUCCESS(rc)) + { + rc = RTThreadUserWait(Thread, 60*1000); + if ( RT_SUCCESS(rc) + && pThread->enmState != PDMTHREADSTATE_SUSPENDED) + rc = VERR_PDM_THREAD_IPE_2; + if (RT_SUCCESS(rc)) + { + /* + * Insert it into the thread list. + */ + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + pThread->Internal.s.pNext = NULL; + if (pUVM->pdm.s.pThreadsTail) + pUVM->pdm.s.pThreadsTail->Internal.s.pNext = pThread; + else + pUVM->pdm.s.pThreads = pThread; + pUVM->pdm.s.pThreadsTail = pThread; + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + + rc = RTThreadUserReset(Thread); + AssertRC(rc); + return rc; + } + + /* bailout */ + RTThreadWait(Thread, 60*1000, NULL); + } + RTSemEventMultiDestroy(pThread->Internal.s.SleepEvent); + pThread->Internal.s.SleepEvent = NIL_RTSEMEVENTMULTI; + } + RTSemEventMultiDestroy(pThread->Internal.s.BlockEvent); + pThread->Internal.s.BlockEvent = NIL_RTSEMEVENTMULTI; + } + MMR3HeapFree(pThread); + *ppThread = NULL; + + return rc; +} + + +/** + * Device Helper for creating a thread associated with a device. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDevIns The device instance. + * @param ppThread Where to store the thread 'handle'. + * @param pvUser The user argument to the thread function. + * @param pfnThread The thread function. + * @param pfnWakeUp The wakup callback. This is called on the EMT thread when + * a state change is pending. + * @param cbStack See RTThreadCreate. + * @param enmType See RTThreadCreate. + * @param pszName See RTThreadCreate. + */ +int pdmR3ThreadCreateDevice(PVM pVM, PPDMDEVINS pDevIns, PPPDMTHREAD ppThread, void *pvUser, PFNPDMTHREADDEV pfnThread, + PFNPDMTHREADWAKEUPDEV pfnWakeUp, size_t cbStack, RTTHREADTYPE enmType, const char *pszName) +{ + int rc = pdmR3ThreadNew(pVM, ppThread); + if (RT_SUCCESS(rc)) + { + PPDMTHREAD pThread = *ppThread; + pThread->pvUser = pvUser; + pThread->Internal.s.enmType = PDMTHREADTYPE_DEVICE; + pThread->u.Dev.pDevIns = pDevIns; + pThread->u.Dev.pfnThread = pfnThread; + pThread->u.Dev.pfnWakeUp = pfnWakeUp; + rc = pdmR3ThreadInit(pVM, ppThread, cbStack, enmType, pszName); + } + return rc; +} + + +/** + * USB Device Helper for creating a thread associated with an USB device. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pUsbIns The USB device instance. + * @param ppThread Where to store the thread 'handle'. + * @param pvUser The user argument to the thread function. + * @param pfnThread The thread function. + * @param pfnWakeUp The wakup callback. This is called on the EMT thread when + * a state change is pending. + * @param cbStack See RTThreadCreate. + * @param enmType See RTThreadCreate. + * @param pszName See RTThreadCreate. + */ +int pdmR3ThreadCreateUsb(PVM pVM, PPDMUSBINS pUsbIns, PPPDMTHREAD ppThread, void *pvUser, PFNPDMTHREADUSB pfnThread, + PFNPDMTHREADWAKEUPUSB pfnWakeUp, size_t cbStack, RTTHREADTYPE enmType, const char *pszName) +{ + int rc = pdmR3ThreadNew(pVM, ppThread); + if (RT_SUCCESS(rc)) + { + PPDMTHREAD pThread = *ppThread; + pThread->pvUser = pvUser; + pThread->Internal.s.enmType = PDMTHREADTYPE_USB; + pThread->u.Usb.pUsbIns = pUsbIns; + pThread->u.Usb.pfnThread = pfnThread; + pThread->u.Usb.pfnWakeUp = pfnWakeUp; + rc = pdmR3ThreadInit(pVM, ppThread, cbStack, enmType, pszName); + } + return rc; +} + + +/** + * Driver Helper for creating a thread associated with a driver. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDrvIns The driver instance. + * @param ppThread Where to store the thread 'handle'. + * @param pvUser The user argument to the thread function. + * @param pfnThread The thread function. + * @param pfnWakeUp The wakup callback. This is called on the EMT thread when + * a state change is pending. + * @param cbStack See RTThreadCreate. + * @param enmType See RTThreadCreate. + * @param pszName See RTThreadCreate. + */ +int pdmR3ThreadCreateDriver(PVM pVM, PPDMDRVINS pDrvIns, PPPDMTHREAD ppThread, void *pvUser, PFNPDMTHREADDRV pfnThread, + PFNPDMTHREADWAKEUPDRV pfnWakeUp, size_t cbStack, RTTHREADTYPE enmType, const char *pszName) +{ + int rc = pdmR3ThreadNew(pVM, ppThread); + if (RT_SUCCESS(rc)) + { + PPDMTHREAD pThread = *ppThread; + pThread->pvUser = pvUser; + pThread->Internal.s.enmType = PDMTHREADTYPE_DRIVER; + pThread->u.Drv.pDrvIns = pDrvIns; + pThread->u.Drv.pfnThread = pfnThread; + pThread->u.Drv.pfnWakeUp = pfnWakeUp; + rc = pdmR3ThreadInit(pVM, ppThread, cbStack, enmType, pszName); + } + return rc; +} + + +/** + * Creates a PDM thread for internal use in the VM. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param ppThread Where to store the thread 'handle'. + * @param pvUser The user argument to the thread function. + * @param pfnThread The thread function. + * @param pfnWakeUp The wakup callback. This is called on the EMT thread when + * a state change is pending. + * @param cbStack See RTThreadCreate. + * @param enmType See RTThreadCreate. + * @param pszName See RTThreadCreate. + */ +VMMR3DECL(int) PDMR3ThreadCreate(PVM pVM, PPPDMTHREAD ppThread, void *pvUser, PFNPDMTHREADINT pfnThread, + PFNPDMTHREADWAKEUPINT pfnWakeUp, size_t cbStack, RTTHREADTYPE enmType, const char *pszName) +{ + int rc = pdmR3ThreadNew(pVM, ppThread); + if (RT_SUCCESS(rc)) + { + PPDMTHREAD pThread = *ppThread; + pThread->pvUser = pvUser; + pThread->Internal.s.enmType = PDMTHREADTYPE_INTERNAL; + pThread->u.Int.pfnThread = pfnThread; + pThread->u.Int.pfnWakeUp = pfnWakeUp; + rc = pdmR3ThreadInit(pVM, ppThread, cbStack, enmType, pszName); + } + return rc; +} + + +/** + * Creates a PDM thread for VM use by some external party. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param ppThread Where to store the thread 'handle'. + * @param pvUser The user argument to the thread function. + * @param pfnThread The thread function. + * @param pfnWakeUp The wakup callback. This is called on the EMT thread when + * a state change is pending. + * @param cbStack See RTThreadCreate. + * @param enmType See RTThreadCreate. + * @param pszName See RTThreadCreate. + */ +VMMR3DECL(int) PDMR3ThreadCreateExternal(PVM pVM, PPPDMTHREAD ppThread, void *pvUser, PFNPDMTHREADEXT pfnThread, + PFNPDMTHREADWAKEUPEXT pfnWakeUp, size_t cbStack, RTTHREADTYPE enmType, const char *pszName) +{ + int rc = pdmR3ThreadNew(pVM, ppThread); + if (RT_SUCCESS(rc)) + { + PPDMTHREAD pThread = *ppThread; + pThread->pvUser = pvUser; + pThread->Internal.s.enmType = PDMTHREADTYPE_EXTERNAL; + pThread->u.Ext.pfnThread = pfnThread; + pThread->u.Ext.pfnWakeUp = pfnWakeUp; + rc = pdmR3ThreadInit(pVM, ppThread, cbStack, enmType, pszName); + } + return rc; +} + + +/** + * Destroys a PDM thread. + * + * This will wakeup the thread, tell it to terminate, and wait for it terminate. + * + * @returns VBox status code. + * This reflects the success off destroying the thread and not the exit code + * of the thread as this is stored in *pRcThread. + * @param pThread The thread to destroy. + * @param pRcThread Where to store the thread exit code. Optional. + * @thread The emulation thread (EMT). + */ +VMMR3DECL(int) PDMR3ThreadDestroy(PPDMTHREAD pThread, int *pRcThread) +{ + /* + * Assert sanity. + */ + AssertPtrReturn(pThread, VERR_INVALID_POINTER); + AssertReturn(pThread->u32Version == PDMTHREAD_VERSION, VERR_INVALID_MAGIC); + Assert(pThread->Thread != RTThreadSelf()); + AssertPtrNullReturn(pRcThread, VERR_INVALID_POINTER); + PVM pVM = pThread->Internal.s.pVM; + VM_ASSERT_EMT(pVM); + PUVM pUVM = pVM->pUVM; + + /* + * Advance the thread to the terminating state. + */ + int rc = VINF_SUCCESS; + if (pThread->enmState <= PDMTHREADSTATE_TERMINATING) + { + for (;;) + { + PDMTHREADSTATE enmState = pThread->enmState; + switch (enmState) + { + case PDMTHREADSTATE_RUNNING: + if (!pdmR3AtomicCmpXchgState(pThread, PDMTHREADSTATE_TERMINATING, enmState)) + continue; + rc = pdmR3ThreadWakeUp(pThread); + break; + + case PDMTHREADSTATE_SUSPENDED: + case PDMTHREADSTATE_SUSPENDING: + case PDMTHREADSTATE_RESUMING: + case PDMTHREADSTATE_INITIALIZING: + if (!pdmR3AtomicCmpXchgState(pThread, PDMTHREADSTATE_TERMINATING, enmState)) + continue; + break; + + case PDMTHREADSTATE_TERMINATING: + case PDMTHREADSTATE_TERMINATED: + break; + + default: + AssertMsgFailed(("enmState=%d\n", enmState)); + rc = VERR_PDM_THREAD_IPE_2; + break; + } + break; + } + } + int rc2 = RTSemEventMultiSignal(pThread->Internal.s.BlockEvent); + AssertRC(rc2); + + /* + * Wait for it to terminate and the do cleanups. + */ + rc2 = RTThreadWait(pThread->Thread, RT_SUCCESS(rc) ? 60*1000 : 150, pRcThread); + if (RT_SUCCESS(rc2)) + { + /* make it invalid. */ + pThread->u32Version = 0xffffffff; + pThread->enmState = PDMTHREADSTATE_INVALID; + pThread->Thread = NIL_RTTHREAD; + + /* unlink */ + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + if (pUVM->pdm.s.pThreads == pThread) + { + pUVM->pdm.s.pThreads = pThread->Internal.s.pNext; + if (!pThread->Internal.s.pNext) + pUVM->pdm.s.pThreadsTail = NULL; + } + else + { + PPDMTHREAD pPrev = pUVM->pdm.s.pThreads; + while (pPrev && pPrev->Internal.s.pNext != pThread) + pPrev = pPrev->Internal.s.pNext; + Assert(pPrev); + if (pPrev) + pPrev->Internal.s.pNext = pThread->Internal.s.pNext; + if (!pThread->Internal.s.pNext) + pUVM->pdm.s.pThreadsTail = pPrev; + } + pThread->Internal.s.pNext = NULL; + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + + /* free the resources */ + RTSemEventMultiDestroy(pThread->Internal.s.BlockEvent); + pThread->Internal.s.BlockEvent = NIL_RTSEMEVENTMULTI; + + RTSemEventMultiDestroy(pThread->Internal.s.SleepEvent); + pThread->Internal.s.SleepEvent = NIL_RTSEMEVENTMULTI; + + MMR3HeapFree(pThread); + } + else if (RT_SUCCESS(rc)) + rc = rc2; + + return rc; +} + + +/** + * Destroys all threads associated with a device. + * + * This function is called by PDMDevice when a device is + * destroyed (not currently implemented). + * + * @returns VBox status code of the first failure. + * @param pVM The cross context VM structure. + * @param pDevIns the device instance. + */ +int pdmR3ThreadDestroyDevice(PVM pVM, PPDMDEVINS pDevIns) +{ + int rc = VINF_SUCCESS; + PUVM pUVM = pVM->pUVM; + + AssertPtr(pDevIns); + + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + PPDMTHREAD pThread = pUVM->pdm.s.pThreads; + while (pThread) + { + PPDMTHREAD pNext = pThread->Internal.s.pNext; + if ( pThread->Internal.s.enmType == PDMTHREADTYPE_DEVICE + && pThread->u.Dev.pDevIns == pDevIns) + { + int rc2 = PDMR3ThreadDestroy(pThread, NULL); + if (RT_FAILURE(rc2) && RT_SUCCESS(rc)) + rc = rc2; + } + pThread = pNext; + } + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + return rc; +} + + +/** + * Destroys all threads associated with an USB device. + * + * This function is called by PDMUsb when a device is destroyed. + * + * @returns VBox status code of the first failure. + * @param pVM The cross context VM structure. + * @param pUsbIns The USB device instance. + */ +int pdmR3ThreadDestroyUsb(PVM pVM, PPDMUSBINS pUsbIns) +{ + int rc = VINF_SUCCESS; + PUVM pUVM = pVM->pUVM; + + AssertPtr(pUsbIns); + + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + PPDMTHREAD pThread = pUVM->pdm.s.pThreads; + while (pThread) + { + PPDMTHREAD pNext = pThread->Internal.s.pNext; + if ( pThread->Internal.s.enmType == PDMTHREADTYPE_DEVICE + && pThread->u.Usb.pUsbIns == pUsbIns) + { + int rc2 = PDMR3ThreadDestroy(pThread, NULL); + if (RT_FAILURE(rc2) && RT_SUCCESS(rc)) + rc = rc2; + } + pThread = pNext; + } + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + return rc; +} + + +/** + * Destroys all threads associated with a driver. + * + * This function is called by PDMDriver when a driver is destroyed. + * + * @returns VBox status code of the first failure. + * @param pVM The cross context VM structure. + * @param pDrvIns The driver instance. + */ +int pdmR3ThreadDestroyDriver(PVM pVM, PPDMDRVINS pDrvIns) +{ + int rc = VINF_SUCCESS; + PUVM pUVM = pVM->pUVM; + + AssertPtr(pDrvIns); + + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + PPDMTHREAD pThread = pUVM->pdm.s.pThreads; + while (pThread) + { + PPDMTHREAD pNext = pThread->Internal.s.pNext; + if ( pThread->Internal.s.enmType == PDMTHREADTYPE_DRIVER + && pThread->u.Drv.pDrvIns == pDrvIns) + { + int rc2 = PDMR3ThreadDestroy(pThread, NULL); + if (RT_FAILURE(rc2) && RT_SUCCESS(rc)) + rc = rc2; + } + pThread = pNext; + } + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + return rc; +} + + +/** + * Called For VM power off. + * + * @param pVM The cross context VM structure. + */ +void pdmR3ThreadDestroyAll(PVM pVM) +{ + PUVM pUVM = pVM->pUVM; + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + PPDMTHREAD pThread = pUVM->pdm.s.pThreads; + while (pThread) + { + PPDMTHREAD pNext = pThread->Internal.s.pNext; + int rc2 = PDMR3ThreadDestroy(pThread, NULL); + AssertRC(rc2); + pThread = pNext; + } + Assert(!pUVM->pdm.s.pThreads && !pUVM->pdm.s.pThreadsTail); + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); +} + + +/** + * Initiate termination of the thread (self) because something failed in a bad way. + * + * @param pThread The PDM thread. + */ +static void pdmR3ThreadBailMeOut(PPDMTHREAD pThread) +{ + for (;;) + { + PDMTHREADSTATE enmState = pThread->enmState; + switch (enmState) + { + case PDMTHREADSTATE_SUSPENDING: + case PDMTHREADSTATE_SUSPENDED: + case PDMTHREADSTATE_RESUMING: + case PDMTHREADSTATE_RUNNING: + if (!pdmR3AtomicCmpXchgState(pThread, PDMTHREADSTATE_TERMINATING, enmState)) + continue; + break; + + case PDMTHREADSTATE_TERMINATING: + case PDMTHREADSTATE_TERMINATED: + break; + + case PDMTHREADSTATE_INITIALIZING: + default: + AssertMsgFailed(("enmState=%d\n", enmState)); + break; + } + break; + } +} + + +/** + * Called by the PDM thread in response to a wakeup call with + * suspending as the new state. + * + * The thread will block in side this call until the state is changed in + * response to a VM state change or to the device/driver/whatever calling the + * PDMR3ThreadResume API. + * + * @returns VBox status code. + * On failure, terminate the thread. + * @param pThread The PDM thread. + */ +VMMR3DECL(int) PDMR3ThreadIAmSuspending(PPDMTHREAD pThread) +{ + /* + * Assert sanity. + */ + AssertPtr(pThread); + AssertReturn(pThread->u32Version == PDMTHREAD_VERSION, VERR_INVALID_MAGIC); + Assert(pThread->Thread == RTThreadSelf() || pThread->enmState == PDMTHREADSTATE_INITIALIZING); + PDMTHREADSTATE enmState = pThread->enmState; + Assert( enmState == PDMTHREADSTATE_SUSPENDING + || enmState == PDMTHREADSTATE_INITIALIZING); + + /* + * Update the state, notify the control thread (the API caller) and go to sleep. + */ + int rc = VERR_WRONG_ORDER; + if (pdmR3AtomicCmpXchgState(pThread, PDMTHREADSTATE_SUSPENDED, enmState)) + { + rc = RTThreadUserSignal(pThread->Thread); + if (RT_SUCCESS(rc)) + { + rc = RTSemEventMultiWait(pThread->Internal.s.BlockEvent, RT_INDEFINITE_WAIT); + if ( RT_SUCCESS(rc) + && pThread->enmState != PDMTHREADSTATE_SUSPENDED) + return rc; + + if (RT_SUCCESS(rc)) + rc = VERR_PDM_THREAD_IPE_2; + } + } + + AssertMsgFailed(("rc=%d enmState=%d\n", rc, pThread->enmState)); + pdmR3ThreadBailMeOut(pThread); + return rc; +} + + +/** + * Called by the PDM thread in response to a resuming state. + * + * The purpose of this API is to tell the PDMR3ThreadResume caller that + * the PDM thread has successfully resumed. It will also do the + * state transition from the resuming to the running state. + * + * @returns VBox status code. + * On failure, terminate the thread. + * @param pThread The PDM thread. + */ +VMMR3DECL(int) PDMR3ThreadIAmRunning(PPDMTHREAD pThread) +{ + /* + * Assert sanity. + */ + Assert(pThread->enmState == PDMTHREADSTATE_RESUMING); + Assert(pThread->Thread == RTThreadSelf()); + + /* + * Update the state and tell the control thread (the guy calling the resume API). + */ + int rc = VERR_WRONG_ORDER; + if (pdmR3AtomicCmpXchgState(pThread, PDMTHREADSTATE_RUNNING, PDMTHREADSTATE_RESUMING)) + { + rc = RTThreadUserSignal(pThread->Thread); + if (RT_SUCCESS(rc)) + return rc; + } + + AssertMsgFailed(("rc=%d enmState=%d\n", rc, pThread->enmState)); + pdmR3ThreadBailMeOut(pThread); + return rc; +} + + +/** + * Called by the PDM thread instead of RTThreadSleep. + * + * The difference is that the sleep will be interrupted on state change. The + * thread must be in the running state, otherwise it will return immediately. + * + * @returns VBox status code. + * @retval VINF_SUCCESS on success or state change. + * @retval VERR_INTERRUPTED on signal or APC. + * + * @param pThread The PDM thread. + * @param cMillies The number of milliseconds to sleep. + */ +VMMR3DECL(int) PDMR3ThreadSleep(PPDMTHREAD pThread, RTMSINTERVAL cMillies) +{ + /* + * Assert sanity. + */ + AssertReturn(pThread->enmState > PDMTHREADSTATE_INVALID && pThread->enmState < PDMTHREADSTATE_TERMINATED, VERR_PDM_THREAD_IPE_2); + AssertReturn(pThread->Thread == RTThreadSelf(), VERR_PDM_THREAD_INVALID_CALLER); + + /* + * Reset the event semaphore, check the state and sleep. + */ + RTSemEventMultiReset(pThread->Internal.s.SleepEvent); + if (pThread->enmState != PDMTHREADSTATE_RUNNING) + return VINF_SUCCESS; + return RTSemEventMultiWaitNoResume(pThread->Internal.s.SleepEvent, cMillies); +} + + +/** + * The PDM thread function. + * + * @returns return from pfnThread. + * + * @param Thread The thread handle. + * @param pvUser Pointer to the PDMTHREAD structure. + */ +static DECLCALLBACK(int) pdmR3ThreadMain(RTTHREAD Thread, void *pvUser) +{ + PPDMTHREAD pThread = (PPDMTHREAD)pvUser; + Log(("PDMThread: Initializing thread %RTthrd / %p / '%s'...\n", Thread, pThread, RTThreadGetName(Thread))); + pThread->Thread = Thread; + + PUVM pUVM = pThread->Internal.s.pVM->pUVM; + if ( pUVM->pVmm2UserMethods + && pUVM->pVmm2UserMethods->pfnNotifyPdmtInit) + pUVM->pVmm2UserMethods->pfnNotifyPdmtInit(pUVM->pVmm2UserMethods, pUVM); + + /* + * The run loop. + * + * It handles simple thread functions which returns when they see a suspending + * request and leaves the PDMR3ThreadIAmSuspending and PDMR3ThreadIAmRunning + * parts to us. + */ + int rc; + for (;;) + { + switch (pThread->Internal.s.enmType) + { + case PDMTHREADTYPE_DEVICE: + rc = pThread->u.Dev.pfnThread(pThread->u.Dev.pDevIns, pThread); + break; + + case PDMTHREADTYPE_USB: + rc = pThread->u.Usb.pfnThread(pThread->u.Usb.pUsbIns, pThread); + break; + + case PDMTHREADTYPE_DRIVER: + rc = pThread->u.Drv.pfnThread(pThread->u.Drv.pDrvIns, pThread); + break; + + case PDMTHREADTYPE_INTERNAL: + rc = pThread->u.Int.pfnThread(pThread->Internal.s.pVM, pThread); + break; + + case PDMTHREADTYPE_EXTERNAL: + rc = pThread->u.Ext.pfnThread(pThread); + break; + + default: + AssertMsgFailed(("%d\n", pThread->Internal.s.enmType)); + rc = VERR_PDM_THREAD_IPE_1; + break; + } + if (RT_FAILURE(rc)) + break; + + /* + * If this is a simple thread function, the state will be suspending + * or initializing now. If it isn't we're supposed to terminate. + */ + if ( pThread->enmState != PDMTHREADSTATE_SUSPENDING + && pThread->enmState != PDMTHREADSTATE_INITIALIZING) + { + Assert(pThread->enmState == PDMTHREADSTATE_TERMINATING); + break; + } + rc = PDMR3ThreadIAmSuspending(pThread); + if (RT_FAILURE(rc)) + break; + if (pThread->enmState != PDMTHREADSTATE_RESUMING) + { + Assert(pThread->enmState == PDMTHREADSTATE_TERMINATING); + break; + } + + rc = PDMR3ThreadIAmRunning(pThread); + if (RT_FAILURE(rc)) + break; + } + + if (RT_FAILURE(rc)) + LogRel(("PDMThread: Thread '%s' (%RTthrd) quit unexpectedly with rc=%Rrc.\n", RTThreadGetName(Thread), Thread, rc)); + + /* + * Advance the state to terminating and then on to terminated. + */ + for (;;) + { + PDMTHREADSTATE enmState = pThread->enmState; + if ( enmState == PDMTHREADSTATE_TERMINATING + || pdmR3AtomicCmpXchgState(pThread, PDMTHREADSTATE_TERMINATING, enmState)) + break; + } + + ASMAtomicXchgSize(&pThread->enmState, PDMTHREADSTATE_TERMINATED); + int rc2 = RTThreadUserSignal(Thread); AssertRC(rc2); + + if ( pUVM->pVmm2UserMethods + && pUVM->pVmm2UserMethods->pfnNotifyPdmtTerm) + pUVM->pVmm2UserMethods->pfnNotifyPdmtTerm(pUVM->pVmm2UserMethods, pUVM); + Log(("PDMThread: Terminating thread %RTthrd / %p / '%s': %Rrc\n", Thread, pThread, RTThreadGetName(Thread), rc)); + return rc; +} + + +/** + * Initiate termination of the thread because something failed in a bad way. + * + * @param pThread The PDM thread. + */ +static void pdmR3ThreadBailOut(PPDMTHREAD pThread) +{ + for (;;) + { + PDMTHREADSTATE enmState = pThread->enmState; + switch (enmState) + { + case PDMTHREADSTATE_SUSPENDING: + case PDMTHREADSTATE_SUSPENDED: + if (!pdmR3AtomicCmpXchgState(pThread, PDMTHREADSTATE_TERMINATING, enmState)) + continue; + RTSemEventMultiSignal(pThread->Internal.s.BlockEvent); + break; + + case PDMTHREADSTATE_RESUMING: + if (!pdmR3AtomicCmpXchgState(pThread, PDMTHREADSTATE_TERMINATING, enmState)) + continue; + break; + + case PDMTHREADSTATE_RUNNING: + if (!pdmR3AtomicCmpXchgState(pThread, PDMTHREADSTATE_TERMINATING, enmState)) + continue; + pdmR3ThreadWakeUp(pThread); + break; + + case PDMTHREADSTATE_TERMINATING: + case PDMTHREADSTATE_TERMINATED: + break; + + case PDMTHREADSTATE_INITIALIZING: + default: + AssertMsgFailed(("enmState=%d\n", enmState)); + break; + } + break; + } +} + + +/** + * Suspends the thread. + * + * This can be called at the power off / suspend notifications to suspend the + * PDM thread a bit early. The thread will be automatically suspend upon + * completion of the device/driver notification cycle. + * + * The caller is responsible for serializing the control operations on the + * thread. That basically means, always do these calls from the EMT. + * + * @returns VBox status code. + * @param pThread The PDM thread. + */ +VMMR3DECL(int) PDMR3ThreadSuspend(PPDMTHREAD pThread) +{ + /* + * Assert sanity. + */ + AssertPtrReturn(pThread, VERR_INVALID_POINTER); + AssertReturn(pThread->u32Version == PDMTHREAD_VERSION, VERR_INVALID_MAGIC); + Assert(pThread->Thread != RTThreadSelf()); + + /* + * This is a noop if the thread is already suspended. + */ + if (pThread->enmState == PDMTHREADSTATE_SUSPENDED) + return VINF_SUCCESS; + + /* + * Change the state to resuming and kick the thread. + */ + int rc = RTSemEventMultiReset(pThread->Internal.s.BlockEvent); + if (RT_SUCCESS(rc)) + { + rc = RTThreadUserReset(pThread->Thread); + if (RT_SUCCESS(rc)) + { + rc = VERR_WRONG_ORDER; + if (pdmR3AtomicCmpXchgState(pThread, PDMTHREADSTATE_SUSPENDING, PDMTHREADSTATE_RUNNING)) + { + rc = pdmR3ThreadWakeUp(pThread); + if (RT_SUCCESS(rc)) + { + /* + * Wait for the thread to reach the suspended state. + */ + if (pThread->enmState != PDMTHREADSTATE_SUSPENDED) + rc = RTThreadUserWait(pThread->Thread, 60*1000); + if ( RT_SUCCESS(rc) + && pThread->enmState != PDMTHREADSTATE_SUSPENDED) + rc = VERR_PDM_THREAD_IPE_2; + if (RT_SUCCESS(rc)) + return rc; + } + } + } + } + + /* + * Something failed, initialize termination. + */ + AssertMsgFailed(("PDMR3ThreadSuspend -> rc=%Rrc enmState=%d suspending '%s'\n", + rc, pThread->enmState, RTThreadGetName(pThread->Thread))); + pdmR3ThreadBailOut(pThread); + return rc; +} + + +/** + * Suspend all running threads. + * + * This is called by PDMR3Suspend() and PDMR3PowerOff() after all the devices + * and drivers have been notified about the suspend / power off. + * + * @return VBox status code. + * @param pVM The cross context VM structure. + */ +int pdmR3ThreadSuspendAll(PVM pVM) +{ + PUVM pUVM = pVM->pUVM; + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); /* This may cause deadlocks later... */ + for (PPDMTHREAD pThread = pUVM->pdm.s.pThreads; pThread; pThread = pThread->Internal.s.pNext) + switch (pThread->enmState) + { + case PDMTHREADSTATE_RUNNING: + { + int rc = PDMR3ThreadSuspend(pThread); + AssertRCReturn(rc, rc); + break; + } + + /* suspend -> power off; voluntary suspend. */ + case PDMTHREADSTATE_SUSPENDED: + break; + + default: + AssertMsgFailed(("pThread=%p enmState=%d\n", pThread, pThread->enmState)); + break; + } + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + return VINF_SUCCESS; +} + + +/** + * Resumes the thread. + * + * This can be called the power on / resume notifications to resume the + * PDM thread a bit early. The thread will be automatically resumed upon + * return from these two notification callbacks (devices/drivers). + * + * The caller is responsible for serializing the control operations on the + * thread. That basically means, always do these calls from the EMT. + * + * @returns VBox status code. + * @param pThread The PDM thread. + */ +VMMR3DECL(int) PDMR3ThreadResume(PPDMTHREAD pThread) +{ + /* + * Assert sanity. + */ + AssertPtrReturn(pThread, VERR_INVALID_POINTER); + AssertReturn(pThread->u32Version == PDMTHREAD_VERSION, VERR_INVALID_MAGIC); + Assert(pThread->Thread != RTThreadSelf()); + + /* + * Change the state to resuming and kick the thread. + */ + int rc = RTThreadUserReset(pThread->Thread); + if (RT_SUCCESS(rc)) + { + rc = VERR_WRONG_ORDER; + if (pdmR3AtomicCmpXchgState(pThread, PDMTHREADSTATE_RESUMING, PDMTHREADSTATE_SUSPENDED)) + { + rc = RTSemEventMultiSignal(pThread->Internal.s.BlockEvent); + if (RT_SUCCESS(rc)) + { + /* + * Wait for the thread to reach the running state. + */ + rc = RTThreadUserWait(pThread->Thread, 60*1000); + if ( RT_SUCCESS(rc) + && pThread->enmState != PDMTHREADSTATE_RUNNING) + rc = VERR_PDM_THREAD_IPE_2; + if (RT_SUCCESS(rc)) + return rc; + } + } + } + + /* + * Something failed, initialize termination. + */ + AssertMsgFailed(("PDMR3ThreadResume -> rc=%Rrc enmState=%d\n", rc, pThread->enmState)); + pdmR3ThreadBailOut(pThread); + return rc; +} + + +/** + * Resumes all threads not running. + * + * This is called by PDMR3Resume() and PDMR3PowerOn() after all the devices + * and drivers have been notified about the resume / power on . + * + * @return VBox status code. + * @param pVM The cross context VM structure. + */ +int pdmR3ThreadResumeAll(PVM pVM) +{ + PUVM pUVM = pVM->pUVM; + RTCritSectEnter(&pUVM->pdm.s.ListCritSect); + for (PPDMTHREAD pThread = pUVM->pdm.s.pThreads; pThread; pThread = pThread->Internal.s.pNext) + switch (pThread->enmState) + { + case PDMTHREADSTATE_SUSPENDED: + { + int rc = PDMR3ThreadResume(pThread); + AssertRCReturn(rc, rc); + break; + } + + default: + AssertMsgFailed(("pThread=%p enmState=%d\n", pThread, pThread->enmState)); + break; + } + RTCritSectLeave(&pUVM->pdm.s.ListCritSect); + return VINF_SUCCESS; +} + diff --git a/src/VBox/VMM/VMMR3/PDMUsb.cpp b/src/VBox/VMM/VMMR3/PDMUsb.cpp new file mode 100644 index 00000000..f7ea84ee --- /dev/null +++ b/src/VBox/VMM/VMMR3/PDMUsb.cpp @@ -0,0 +1,2005 @@ +/* $Id: PDMUsb.cpp $ */ +/** @file + * PDM - Pluggable Device and Driver Manager, USB part. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PDM_DRIVER +#include "PDMInternal.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * Internal callback structure pointer. + * + * The main purpose is to define the extra data we associate + * with PDMUSBREGCB so we can find the VM instance and so on. + */ +typedef struct PDMUSBREGCBINT +{ + /** The callback structure. */ + PDMUSBREGCB Core; + /** A bit of padding. */ + uint32_t u32[4]; + /** VM Handle. */ + PVM pVM; +} PDMUSBREGCBINT, *PPDMUSBREGCBINT; +typedef const PDMUSBREGCBINT *PCPDMUSBREGCBINT; + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** @def PDMUSB_ASSERT_USBINS + * Asserts the validity of the USB device instance. + */ +#ifdef VBOX_STRICT +# define PDMUSB_ASSERT_USBINS(pUsbIns) \ + do { \ + AssertPtr(pUsbIns); \ + Assert(pUsbIns->u32Version == PDM_USBINS_VERSION); \ + Assert(pUsbIns->pvInstanceDataR3 == (void *)&pUsbIns->achInstanceData[0]); \ + } while (0) +#else +# define PDMUSB_ASSERT_USBINS(pUsbIns) do { } while (0) +#endif + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static void pdmR3UsbDestroyDevice(PVM pVM, PPDMUSBINS pUsbIns); + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +extern const PDMUSBHLP g_pdmR3UsbHlp; + + +AssertCompile(sizeof(PDMUSBINSINT) <= RT_SIZEOFMEMB(PDMUSBINS, Internal.padding)); + + +/** + * Registers a USB hub driver. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDrvIns The driver instance of the hub. + * @param fVersions Indicates the kinds of USB devices that can be attached to this HUB. + * @param cPorts The number of ports. + * @param pUsbHubReg The hub callback structure that PDMUsb uses to interact with it. + * @param ppUsbHubHlp The helper callback structure that the hub uses to talk to PDMUsb. + * @thread EMT + */ +int pdmR3UsbRegisterHub(PVM pVM, PPDMDRVINS pDrvIns, uint32_t fVersions, uint32_t cPorts, PCPDMUSBHUBREG pUsbHubReg, PPCPDMUSBHUBHLP ppUsbHubHlp) +{ + /* + * Validate input. + */ + /* The driver must be in the USB class. */ + if (!(pDrvIns->pReg->fClass & PDM_DRVREG_CLASS_USB)) + { + LogRel(("PDMUsb: pdmR3UsbRegisterHub: fClass=%#x expected %#x to be set\n", pDrvIns->pReg->fClass, PDM_DRVREG_CLASS_USB)); + return VERR_INVALID_PARAMETER; + } + AssertMsgReturn(!(fVersions & ~(VUSB_STDVER_11 | VUSB_STDVER_20 | VUSB_STDVER_30)), ("%#x\n", fVersions), VERR_INVALID_PARAMETER); + AssertPtrReturn(ppUsbHubHlp, VERR_INVALID_POINTER); + AssertPtrReturn(pUsbHubReg, VERR_INVALID_POINTER); + AssertReturn(pUsbHubReg->u32Version == PDM_USBHUBREG_VERSION, VERR_INVALID_MAGIC); + AssertReturn(pUsbHubReg->u32TheEnd == PDM_USBHUBREG_VERSION, VERR_INVALID_MAGIC); + AssertPtrReturn(pUsbHubReg->pfnAttachDevice, VERR_INVALID_PARAMETER); + AssertPtrReturn(pUsbHubReg->pfnDetachDevice, VERR_INVALID_PARAMETER); + + /* + * Check for duplicate registration and find the last hub for FIFO registration. + */ + PPDMUSBHUB pPrev = NULL; + for (PPDMUSBHUB pCur = pVM->pdm.s.pUsbHubs; pCur; pCur = pCur->pNext) + { + if (pCur->pDrvIns == pDrvIns) + return VERR_PDM_USB_HUB_EXISTS; + pPrev = pCur; + } + + /* + * Create an internal USB hub structure. + */ + PPDMUSBHUB pHub = (PPDMUSBHUB)MMR3HeapAlloc(pVM, MM_TAG_PDM_DRIVER, sizeof(*pHub)); + if (!pHub) + return VERR_NO_MEMORY; + + pHub->fVersions = fVersions; + pHub->cPorts = cPorts; + pHub->cAvailablePorts = cPorts; + pHub->pDrvIns = pDrvIns; + pHub->Reg = *pUsbHubReg; + pHub->pNext = NULL; + + /* link it */ + if (pPrev) + pPrev->pNext = pHub; + else + pVM->pdm.s.pUsbHubs = pHub; + + Log(("PDM: Registered USB hub %p/%s\n", pDrvIns, pDrvIns->pReg->szName)); + return VINF_SUCCESS; +} + + +/** + * Loads one device module and call the registration entry point. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pRegCB The registration callback stuff. + * @param pszFilename Module filename. + * @param pszName Module name. + */ +static int pdmR3UsbLoad(PVM pVM, PCPDMUSBREGCBINT pRegCB, const char *pszFilename, const char *pszName) +{ + /* + * Load it. + */ + int rc = pdmR3LoadR3U(pVM->pUVM, pszFilename, pszName); + if (RT_SUCCESS(rc)) + { + /* + * Get the registration export and call it. + */ + FNPDMVBOXUSBREGISTER *pfnVBoxUsbRegister; + rc = PDMR3LdrGetSymbolR3(pVM, pszName, "VBoxUsbRegister", (void **)&pfnVBoxUsbRegister); + if (RT_SUCCESS(rc)) + { + Log(("PDM: Calling VBoxUsbRegister (%p) of %s (%s)\n", pfnVBoxUsbRegister, pszName, pszFilename)); + rc = pfnVBoxUsbRegister(&pRegCB->Core, VBOX_VERSION); + if (RT_SUCCESS(rc)) + Log(("PDM: Successfully loaded device module %s (%s).\n", pszName, pszFilename)); + else + AssertMsgFailed(("VBoxDevicesRegister failed with rc=%Rrc for module %s (%s)\n", rc, pszName, pszFilename)); + } + else + { + AssertMsgFailed(("Failed to locate 'VBoxUsbRegister' in %s (%s) rc=%Rrc\n", pszName, pszFilename, rc)); + if (rc == VERR_SYMBOL_NOT_FOUND) + rc = VERR_PDM_NO_REGISTRATION_EXPORT; + } + } + else + AssertMsgFailed(("Failed to load VBoxDD!\n")); + return rc; +} + + + +/** + * @interface_method_impl{PDMUSBREGCB,pfnRegister} + */ +static DECLCALLBACK(int) pdmR3UsbReg_Register(PCPDMUSBREGCB pCallbacks, PCPDMUSBREG pReg) +{ + /* + * Validate the registration structure. + */ + Assert(pReg); + AssertMsgReturn(pReg->u32Version == PDM_USBREG_VERSION, + ("Unknown struct version %#x!\n", pReg->u32Version), + VERR_PDM_UNKNOWN_USBREG_VERSION); + AssertMsgReturn( pReg->szName[0] + && strlen(pReg->szName) < sizeof(pReg->szName) + && pdmR3IsValidName(pReg->szName), + ("Invalid name '%.*s'\n", sizeof(pReg->szName), pReg->szName), + VERR_PDM_INVALID_USB_REGISTRATION); + AssertMsgReturn((pReg->fFlags & ~(PDM_USBREG_HIGHSPEED_CAPABLE | PDM_USBREG_SUPERSPEED_CAPABLE | PDM_USBREG_SAVED_STATE_SUPPORTED)) == 0, + ("fFlags=%#x\n", pReg->fFlags), VERR_PDM_INVALID_USB_REGISTRATION); + AssertMsgReturn(pReg->cMaxInstances > 0, + ("Max instances %u! (USB Device %s)\n", pReg->cMaxInstances, pReg->szName), + VERR_PDM_INVALID_USB_REGISTRATION); + AssertMsgReturn(pReg->cbInstance <= _1M, + ("Instance size %d bytes! (USB Device %s)\n", pReg->cbInstance, pReg->szName), + VERR_PDM_INVALID_USB_REGISTRATION); + AssertMsgReturn(pReg->pfnConstruct, ("No constructor! (USB Device %s)\n", pReg->szName), + VERR_PDM_INVALID_USB_REGISTRATION); + + /* + * Check for duplicate and find FIFO entry at the same time. + */ + PCPDMUSBREGCBINT pRegCB = (PCPDMUSBREGCBINT)pCallbacks; + PPDMUSB pUsbPrev = NULL; + PPDMUSB pUsb = pRegCB->pVM->pdm.s.pUsbDevs; + for (; pUsb; pUsbPrev = pUsb, pUsb = pUsb->pNext) + AssertMsgReturn(strcmp(pUsb->pReg->szName, pReg->szName), + ("USB Device '%s' already exists\n", pReg->szName), + VERR_PDM_USB_NAME_CLASH); + + /* + * Allocate new device structure and insert it into the list. + */ + pUsb = (PPDMUSB)MMR3HeapAlloc(pRegCB->pVM, MM_TAG_PDM_DEVICE, sizeof(*pUsb)); + if (pUsb) + { + pUsb->pNext = NULL; + pUsb->iNextInstance = 0; + pUsb->pInstances = NULL; + pUsb->pReg = pReg; + pUsb->cchName = (RTUINT)strlen(pReg->szName); + + if (pUsbPrev) + pUsbPrev->pNext = pUsb; + else + pRegCB->pVM->pdm.s.pUsbDevs = pUsb; + Log(("PDM: Registered USB device '%s'\n", pReg->szName)); + return VINF_SUCCESS; + } + return VERR_NO_MEMORY; +} + + +/** + * Load USB Device modules. + * + * This is called by pdmR3DevInit() after it has loaded it's device modules. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +int pdmR3UsbLoadModules(PVM pVM) +{ + LogFlow(("pdmR3UsbLoadModules:\n")); + + AssertRelease(!(RT_UOFFSETOF(PDMUSBINS, achInstanceData) & 15)); + AssertRelease(sizeof(pVM->pdm.s.pUsbInstances->Internal.s) <= sizeof(pVM->pdm.s.pUsbInstances->Internal.padding)); + + /* + * Initialize the callback structure. + */ + PDMUSBREGCBINT RegCB; + RegCB.Core.u32Version = PDM_USBREG_CB_VERSION; + RegCB.Core.pfnRegister = pdmR3UsbReg_Register; + RegCB.pVM = pVM; + + /* + * Load the builtin module + */ + PCFGMNODE pUsbNode = CFGMR3GetChild(CFGMR3GetRoot(pVM), "PDM/USB/"); + bool fLoadBuiltin; + int rc = CFGMR3QueryBool(pUsbNode, "LoadBuiltin", &fLoadBuiltin); + if (rc == VERR_CFGM_VALUE_NOT_FOUND || rc == VERR_CFGM_NO_PARENT) + fLoadBuiltin = true; + else if (RT_FAILURE(rc)) + { + AssertMsgFailed(("Configuration error: Querying boolean \"LoadBuiltin\" failed with %Rrc\n", rc)); + return rc; + } + if (fLoadBuiltin) + { + /* make filename */ + char *pszFilename = pdmR3FileR3("VBoxDD", true /*fShared*/); + if (!pszFilename) + return VERR_NO_TMP_MEMORY; + rc = pdmR3UsbLoad(pVM, &RegCB, pszFilename, "VBoxDD"); + RTMemTmpFree(pszFilename); + if (RT_FAILURE(rc)) + return rc; + } + + /* + * Load additional device modules. + */ + PCFGMNODE pCur; + for (pCur = CFGMR3GetFirstChild(pUsbNode); pCur; pCur = CFGMR3GetNextChild(pCur)) + { + /* + * Get the name and path. + */ + char szName[PDMMOD_NAME_LEN]; + rc = CFGMR3GetName(pCur, &szName[0], sizeof(szName)); + if (rc == VERR_CFGM_NOT_ENOUGH_SPACE) + { + AssertMsgFailed(("configuration error: The module name is too long, cchName=%zu.\n", CFGMR3GetNameLen(pCur))); + return VERR_PDM_MODULE_NAME_TOO_LONG; + } + else if (RT_FAILURE(rc)) + { + AssertMsgFailed(("CFGMR3GetName -> %Rrc.\n", rc)); + return rc; + } + + /* the path is optional, if no path the module name + path is used. */ + char szFilename[RTPATH_MAX]; + rc = CFGMR3QueryString(pCur, "Path", &szFilename[0], sizeof(szFilename)); + if (rc == VERR_CFGM_VALUE_NOT_FOUND) + strcpy(szFilename, szName); + else if (RT_FAILURE(rc)) + { + AssertMsgFailed(("configuration error: Failure to query the module path, rc=%Rrc.\n", rc)); + return rc; + } + + /* prepend path? */ + if (!RTPathHavePath(szFilename)) + { + char *psz = pdmR3FileR3(szFilename, false /*fShared*/); + if (!psz) + return VERR_NO_TMP_MEMORY; + size_t cch = strlen(psz) + 1; + if (cch > sizeof(szFilename)) + { + RTMemTmpFree(psz); + AssertMsgFailed(("Filename too long! cch=%d '%s'\n", cch, psz)); + return VERR_FILENAME_TOO_LONG; + } + memcpy(szFilename, psz, cch); + RTMemTmpFree(psz); + } + + /* + * Load the module and register it's devices. + */ + rc = pdmR3UsbLoad(pVM, &RegCB, szFilename, szName); + if (RT_FAILURE(rc)) + return rc; + } + + return VINF_SUCCESS; +} + + +/** + * Send the init-complete notification to all the USB devices. + * + * This is called from pdmR3DevInit() after it has do its notification round. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +int pdmR3UsbVMInitComplete(PVM pVM) +{ + for (PPDMUSBINS pUsbIns = pVM->pdm.s.pUsbInstances; pUsbIns; pUsbIns = pUsbIns->Internal.s.pNext) + { + if (pUsbIns->pReg->pfnVMInitComplete) + { + int rc = pUsbIns->pReg->pfnVMInitComplete(pUsbIns); + if (RT_FAILURE(rc)) + { + AssertMsgFailed(("InitComplete on USB device '%s'/%d failed with rc=%Rrc\n", + pUsbIns->pReg->szName, pUsbIns->iInstance, rc)); + return rc; + } + } + } + return VINF_SUCCESS; +} + + +/** + * Lookups a device structure by name. + * @internal + */ +PPDMUSB pdmR3UsbLookup(PVM pVM, const char *pszName) +{ + size_t cchName = strlen(pszName); + for (PPDMUSB pUsb = pVM->pdm.s.pUsbDevs; pUsb; pUsb = pUsb->pNext) + if ( pUsb->cchName == cchName + && !strcmp(pUsb->pReg->szName, pszName)) + return pUsb; + return NULL; +} + + +/** + * Locates a suitable hub for the specified kind of device. + * + * @returns VINF_SUCCESS and *ppHub on success. + * VERR_PDM_NO_USB_HUBS or VERR_PDM_NO_USB_PORTS on failure. + * @param pVM The cross context VM structure. + * @param iUsbVersion The USB device version. + * @param ppHub Where to store the pointer to the USB hub. + */ +static int pdmR3UsbFindHub(PVM pVM, uint32_t iUsbVersion, PPDMUSBHUB *ppHub) +{ + *ppHub = NULL; + if (!pVM->pdm.s.pUsbHubs) + return VERR_PDM_NO_USB_HUBS; + + for (PPDMUSBHUB pCur = pVM->pdm.s.pUsbHubs; pCur; pCur = pCur->pNext) + if (pCur->cAvailablePorts > 0) + { + /* First check for an exact match. */ + if (pCur->fVersions & iUsbVersion) + { + *ppHub = pCur; + break; + } + /* For high-speed USB 2.0 devices only, allow USB 1.1 fallback. */ + if ((iUsbVersion & VUSB_STDVER_20) && (pCur->fVersions == VUSB_STDVER_11)) + *ppHub = pCur; + } + if (*ppHub) + return VINF_SUCCESS; + return VERR_PDM_NO_USB_PORTS; +} + + +/** + * Translates a USB version (a bit-mask) to USB speed (enum). Picks + * the highest available version. + * + * @returns VUSBSPEED enum + * + * @param iUsbVersion The USB version. + * + */ +static VUSBSPEED pdmR3UsbVer2Spd(uint32_t iUsbVersion) +{ + VUSBSPEED enmSpd = VUSB_SPEED_UNKNOWN; + Assert(iUsbVersion); + + if (iUsbVersion & VUSB_STDVER_30) + enmSpd = VUSB_SPEED_SUPER; + else if (iUsbVersion & VUSB_STDVER_20) + enmSpd = VUSB_SPEED_HIGH; + else if (iUsbVersion & VUSB_STDVER_11) + enmSpd = VUSB_SPEED_FULL; /* Can't distinguish LS vs. FS. */ + + return enmSpd; +} + + +/** + * Translates a USB speed (enum) to USB version. + * + * @returns USB version mask + * + * @param enmSpeed The USB connection speed. + * + */ +static uint32_t pdmR3UsbSpd2Ver(VUSBSPEED enmSpeed) +{ + uint32_t iUsbVersion = 0; + Assert(enmSpeed != VUSB_SPEED_UNKNOWN); + + switch (enmSpeed) + { + case VUSB_SPEED_LOW: + case VUSB_SPEED_FULL: + iUsbVersion = VUSB_STDVER_11; + break; + case VUSB_SPEED_HIGH: + iUsbVersion = VUSB_STDVER_20; + break; + case VUSB_SPEED_SUPER: + case VUSB_SPEED_SUPERPLUS: + default: + iUsbVersion = VUSB_STDVER_30; + break; + } + + return iUsbVersion; +} + + +/** + * Creates the device. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pHub The USB hub it'll be attached to. + * @param pUsbDev The USB device emulation. + * @param iInstance -1 if not called by pdmR3UsbInstantiateDevices(). + * @param pUuid The UUID for this device. + * @param ppInstanceNode Pointer to the device instance pointer. This is set to NULL if inserted + * into the tree or cleaned up. + * + * In the pdmR3UsbInstantiateDevices() case (iInstance != -1) this is + * the actual instance node and will not be cleaned up. + * + * @param enmSpeed The speed the USB device is operating at. + * @param pszCaptureFilename Path to the file for USB traffic capturing, optional. + */ +static int pdmR3UsbCreateDevice(PVM pVM, PPDMUSBHUB pHub, PPDMUSB pUsbDev, int iInstance, PCRTUUID pUuid, + PCFGMNODE *ppInstanceNode, VUSBSPEED enmSpeed, const char *pszCaptureFilename) +{ + const bool fAtRuntime = iInstance == -1; + int rc; + + AssertPtrReturn(ppInstanceNode, VERR_INVALID_POINTER); + AssertPtrReturn(*ppInstanceNode, VERR_INVALID_POINTER); + + /* + * If not called by pdmR3UsbInstantiateDevices(), we'll have to fix + * the configuration now. + */ + /* USB device node. */ + PCFGMNODE pDevNode = CFGMR3GetChildF(CFGMR3GetRoot(pVM), "USB/%s/", pUsbDev->pReg->szName); + if (!pDevNode) + { + rc = CFGMR3InsertNodeF(CFGMR3GetRoot(pVM), &pDevNode, "USB/%s/", pUsbDev->pReg->szName); + AssertRCReturn(rc, rc); + } + + /* The instance node and number. */ + PCFGMNODE pInstanceToDelete = NULL; + PCFGMNODE pInstanceNode = NULL; + if (fAtRuntime) + { + /** @todo r=bird: This code is bogus as it ASSUMES that all USB devices are + * capable of infinite number of instances. */ + rc = VINF_SUCCESS; /* Shut up stupid incorrect uninitialized warning from Visual C++ 2010. */ + for (unsigned c = 0; c < _2M; c++) + { + iInstance = pUsbDev->iNextInstance++; + rc = CFGMR3InsertNodeF(pDevNode, &pInstanceNode, "%d/", iInstance); + if (rc != VERR_CFGM_NODE_EXISTS) + break; + } + AssertRCReturn(rc, rc); + + rc = CFGMR3ReplaceSubTree(pInstanceNode, *ppInstanceNode); + AssertRCReturn(rc, rc); + *ppInstanceNode = NULL; + pInstanceToDelete = pInstanceNode; + } + else + { + Assert(iInstance >= 0); + if (iInstance >= (int)pUsbDev->iNextInstance) + pUsbDev->iNextInstance = iInstance + 1; + pInstanceNode = *ppInstanceNode; + } + + /* Make sure the instance config node exists. */ + PCFGMNODE pConfig = CFGMR3GetChild(pInstanceNode, "Config"); + if (!pConfig) + { + rc = CFGMR3InsertNode(pInstanceNode, "Config", &pConfig); + AssertRCReturn(rc, rc); + } + Assert(CFGMR3GetChild(pInstanceNode, "Config") == pConfig); + + /* The global device config node. */ + PCFGMNODE pGlobalConfig = CFGMR3GetChild(pDevNode, "GlobalConfig"); + if (!pGlobalConfig) + { + rc = CFGMR3InsertNode(pDevNode, "GlobalConfig", &pGlobalConfig); + if (RT_FAILURE(rc)) + { + CFGMR3RemoveNode(pInstanceToDelete); + AssertRCReturn(rc, rc); + } + } + + /* + * Allocate the device instance. + */ + size_t cb = RT_UOFFSETOF_DYN(PDMUSBINS, achInstanceData[pUsbDev->pReg->cbInstance]); + cb = RT_ALIGN_Z(cb, 16); + PPDMUSBINS pUsbIns; + rc = MMR3HeapAllocZEx(pVM, MM_TAG_PDM_USB, cb, (void **)&pUsbIns); + if (RT_FAILURE(rc)) + { + AssertMsgFailed(("Failed to allocate %d bytes of instance data for USB device '%s'. rc=%Rrc\n", + cb, pUsbDev->pReg->szName, rc)); + CFGMR3RemoveNode(pInstanceToDelete); + return rc; + } + + /* + * Initialize it. + */ + pUsbIns->u32Version = PDM_USBINS_VERSION; + //pUsbIns->Internal.s.pNext = NULL; + //pUsbIns->Internal.s.pPerDeviceNext = NULL; + pUsbIns->Internal.s.pUsbDev = pUsbDev; + pUsbIns->Internal.s.pVM = pVM; + //pUsbIns->Internal.s.pLuns = NULL; + pUsbIns->Internal.s.pCfg = pInstanceNode; + pUsbIns->Internal.s.pCfgDelete = pInstanceToDelete; + pUsbIns->Internal.s.pCfgGlobal = pGlobalConfig; + pUsbIns->Internal.s.Uuid = *pUuid; + //pUsbIns->Internal.s.pHub = NULL; + pUsbIns->Internal.s.iPort = UINT32_MAX; /* to be determined. */ + /* Set the flag accordingly. + * Otherwise VMPowerOff, VMSuspend will not be called for devices attached at runtime. + */ + pUsbIns->Internal.s.fVMSuspended = !fAtRuntime; + //pUsbIns->Internal.s.pfnAsyncNotify = NULL; + pUsbIns->pHlpR3 = &g_pdmR3UsbHlp; + pUsbIns->pReg = pUsbDev->pReg; + pUsbIns->pCfg = pConfig; + pUsbIns->pCfgGlobal = pGlobalConfig; + pUsbIns->iInstance = iInstance; + pUsbIns->pvInstanceDataR3 = &pUsbIns->achInstanceData[0]; + pUsbIns->pszName = RTStrDup(pUsbDev->pReg->szName); + //pUsbIns->fTracing = 0; + pUsbIns->idTracing = ++pVM->pdm.s.idTracingOther; + pUsbIns->enmSpeed = enmSpeed; + + /* + * Link it into all the lists. + */ + /* The global instance FIFO. */ + PPDMUSBINS pPrev1 = pVM->pdm.s.pUsbInstances; + if (!pPrev1) + pVM->pdm.s.pUsbInstances = pUsbIns; + else + { + while (pPrev1->Internal.s.pNext) + { + Assert(pPrev1->u32Version == PDM_USBINS_VERSION); + pPrev1 = pPrev1->Internal.s.pNext; + } + pPrev1->Internal.s.pNext = pUsbIns; + } + + /* The per device instance FIFO. */ + PPDMUSBINS pPrev2 = pUsbDev->pInstances; + if (!pPrev2) + pUsbDev->pInstances = pUsbIns; + else + { + while (pPrev2->Internal.s.pPerDeviceNext) + { + Assert(pPrev2->u32Version == PDM_USBINS_VERSION); + pPrev2 = pPrev2->Internal.s.pPerDeviceNext; + } + pPrev2->Internal.s.pPerDeviceNext = pUsbIns; + } + + /* + * Call the constructor. + */ + Log(("PDM: Constructing USB device '%s' instance %d...\n", pUsbIns->pReg->szName, pUsbIns->iInstance)); + rc = pUsbIns->pReg->pfnConstruct(pUsbIns, pUsbIns->iInstance, pUsbIns->pCfg, pUsbIns->pCfgGlobal); + if (RT_SUCCESS(rc)) + { + /* + * Attach it to the hub. + */ + Log(("PDM: Attaching it...\n")); + rc = pHub->Reg.pfnAttachDevice(pHub->pDrvIns, pUsbIns, pszCaptureFilename, &pUsbIns->Internal.s.iPort); + if (RT_SUCCESS(rc)) + { + pHub->cAvailablePorts--; + Assert((int32_t)pHub->cAvailablePorts >= 0 && pHub->cAvailablePorts < pHub->cPorts); + pUsbIns->Internal.s.pHub = pHub; + + /* Send the hot-plugged notification if applicable. */ + if (fAtRuntime && pUsbIns->pReg->pfnHotPlugged) + pUsbIns->pReg->pfnHotPlugged(pUsbIns); + + Log(("PDM: Successfully attached USB device '%s' instance %d to hub %p\n", + pUsbIns->pReg->szName, pUsbIns->iInstance, pHub)); + return VINF_SUCCESS; + } + + LogRel(("PDMUsb: Failed to attach USB device '%s' instance %d to hub %p: %Rrc\n", + pUsbIns->pReg->szName, pUsbIns->iInstance, pHub, rc)); + } + else + { + AssertMsgFailed(("Failed to construct '%s'/%d! %Rra\n", pUsbIns->pReg->szName, pUsbIns->iInstance, rc)); + if (rc == VERR_VERSION_MISMATCH) + rc = VERR_PDM_USBDEV_VERSION_MISMATCH; + } + if (fAtRuntime) + pdmR3UsbDestroyDevice(pVM, pUsbIns); + /* else: destructors are invoked later. */ + return rc; +} + + +/** + * Instantiate USB devices. + * + * This is called by pdmR3DevInit() after it has instantiated the + * other devices and their drivers. If there aren't any hubs + * around, we'll silently skip the USB devices. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +int pdmR3UsbInstantiateDevices(PVM pVM) +{ + /* + * Any hubs? + */ + if (!pVM->pdm.s.pUsbHubs) + { + Log(("PDM: No USB hubs, skipping USB device instantiation.\n")); + return VINF_SUCCESS; + } + + /* + * Count the device instances. + */ + PCFGMNODE pCur; + PCFGMNODE pUsbNode = CFGMR3GetChild(CFGMR3GetRoot(pVM), "USB/"); + PCFGMNODE pInstanceNode; + unsigned cUsbDevs = 0; + for (pCur = CFGMR3GetFirstChild(pUsbNode); pCur; pCur = CFGMR3GetNextChild(pCur)) + { + PCFGMNODE pGlobal = CFGMR3GetChild(pCur, "GlobalConfig/"); + for (pInstanceNode = CFGMR3GetFirstChild(pCur); pInstanceNode; pInstanceNode = CFGMR3GetNextChild(pInstanceNode)) + if (pInstanceNode != pGlobal) + cUsbDevs++; + } + if (!cUsbDevs) + { + Log(("PDM: No USB devices were configured!\n")); + return VINF_SUCCESS; + } + Log2(("PDM: cUsbDevs=%d!\n", cUsbDevs)); + + /* + * Collect info on each USB device instance. + */ + struct USBDEVORDER + { + /** Configuration node. */ + PCFGMNODE pNode; + /** Pointer to the USB device. */ + PPDMUSB pUsbDev; + /** Init order. */ + uint32_t u32Order; + /** VBox instance number. */ + uint32_t iInstance; + /** Device UUID. */ + RTUUID Uuid; + } *paUsbDevs = (struct USBDEVORDER *)alloca(sizeof(paUsbDevs[0]) * (cUsbDevs + 1)); /* (One extra for swapping) */ + Assert(paUsbDevs); + int rc; + unsigned i = 0; + for (pCur = CFGMR3GetFirstChild(pUsbNode); pCur; pCur = CFGMR3GetNextChild(pCur)) + { + /* Get the device name. */ + char szName[sizeof(paUsbDevs[0].pUsbDev->pReg->szName)]; + rc = CFGMR3GetName(pCur, szName, sizeof(szName)); + AssertMsgRCReturn(rc, ("Configuration error: device name is too long (or something)! rc=%Rrc\n", rc), rc); + + /* Find the device. */ + PPDMUSB pUsbDev = pdmR3UsbLookup(pVM, szName); + AssertMsgReturn(pUsbDev, ("Configuration error: device '%s' not found!\n", szName), VERR_PDM_DEVICE_NOT_FOUND); + + /* Configured priority or use default? */ + uint32_t u32Order; + rc = CFGMR3QueryU32(pCur, "Priority", &u32Order); + if (rc == VERR_CFGM_VALUE_NOT_FOUND) + u32Order = i << 4; + else + AssertMsgRCReturn(rc, ("Configuration error: reading \"Priority\" for the '%s' USB device failed rc=%Rrc!\n", szName, rc), rc); + + /* Global config. */ + PCFGMNODE pGlobal = CFGMR3GetChild(pCur, "GlobalConfig/"); + if (!pGlobal) + { + rc = CFGMR3InsertNode(pCur, "GlobalConfig/", &pGlobal); + AssertMsgRCReturn(rc, ("Failed to create GlobalConfig node! rc=%Rrc\n", rc), rc); + CFGMR3SetRestrictedRoot(pGlobal); + } + + /* Enumerate the device instances. */ + for (pInstanceNode = CFGMR3GetFirstChild(pCur); pInstanceNode; pInstanceNode = CFGMR3GetNextChild(pInstanceNode)) + { + if (pInstanceNode == pGlobal) + continue; + + /* Use the configured UUID if present, create our own otherwise. */ + char *pszUuid = NULL; + + RTUuidClear(&paUsbDevs[i].Uuid); + rc = CFGMR3QueryStringAlloc(pInstanceNode, "UUID", &pszUuid); + if (RT_SUCCESS(rc)) + { + AssertPtr(pszUuid); + + rc = RTUuidFromStr(&paUsbDevs[i].Uuid, pszUuid); + AssertMsgRCReturn(rc, ("Failed to convert UUID from string! rc=%Rrc\n", rc), rc); + MMR3HeapFree(pszUuid); + } + else if (rc == VERR_CFGM_VALUE_NOT_FOUND) + rc = RTUuidCreate(&paUsbDevs[i].Uuid); + + AssertRCReturn(rc, rc); + paUsbDevs[i].pNode = pInstanceNode; + paUsbDevs[i].pUsbDev = pUsbDev; + paUsbDevs[i].u32Order = u32Order; + + /* Get the instance number. */ + char szInstance[32]; + rc = CFGMR3GetName(pInstanceNode, szInstance, sizeof(szInstance)); + AssertMsgRCReturn(rc, ("Configuration error: instance name is too long (or something)! rc=%Rrc\n", rc), rc); + char *pszNext = NULL; + rc = RTStrToUInt32Ex(szInstance, &pszNext, 0, &paUsbDevs[i].iInstance); + AssertMsgRCReturn(rc, ("Configuration error: RTStrToInt32Ex failed on the instance name '%s'! rc=%Rrc\n", szInstance, rc), rc); + AssertMsgReturn(!*pszNext, ("Configuration error: the instance name '%s' isn't all digits. (%s)\n", szInstance, pszNext), VERR_INVALID_PARAMETER); + + /* next instance */ + i++; + } + } /* devices */ + Assert(i == cUsbDevs); + + /* + * Sort the device array ascending on u32Order. (bubble) + */ + unsigned c = cUsbDevs - 1; + while (c) + { + unsigned j = 0; + for (i = 0; i < c; i++) + if (paUsbDevs[i].u32Order > paUsbDevs[i + 1].u32Order) + { + paUsbDevs[cUsbDevs] = paUsbDevs[i + 1]; + paUsbDevs[i + 1] = paUsbDevs[i]; + paUsbDevs[i] = paUsbDevs[cUsbDevs]; + j = i; + } + c = j; + } + + /* + * Instantiate the devices. + */ + for (i = 0; i < cUsbDevs; i++) + { + /* + * Make sure there is a config node and mark it as restricted. + */ + PCFGMNODE pConfigNode = CFGMR3GetChild(paUsbDevs[i].pNode, "Config/"); + if (!pConfigNode) + { + rc = CFGMR3InsertNode(paUsbDevs[i].pNode, "Config", &pConfigNode); + AssertMsgRCReturn(rc, ("Failed to create Config node! rc=%Rrc\n", rc), rc); + } + CFGMR3SetRestrictedRoot(pConfigNode); + + /* + * Every emulated device must support USB 1.x hubs; optionally, high-speed USB 2.0 hubs + * might be also supported. This determines where to attach the device. + */ + uint32_t iUsbVersion = VUSB_STDVER_11; + + if (paUsbDevs[i].pUsbDev->pReg->fFlags & PDM_USBREG_HIGHSPEED_CAPABLE) + iUsbVersion |= VUSB_STDVER_20; + if (paUsbDevs[i].pUsbDev->pReg->fFlags & PDM_USBREG_SUPERSPEED_CAPABLE) + iUsbVersion |= VUSB_STDVER_30; + + /* + * Find a suitable hub with free ports. + */ + PPDMUSBHUB pHub; + rc = pdmR3UsbFindHub(pVM, iUsbVersion, &pHub); + if (RT_FAILURE(rc)) + { + Log(("pdmR3UsbFindHub failed %Rrc\n", rc)); + return rc; + } + + /* + * This is how we inform the device what speed it's communicating at, and hence + * which descriptors it should present to the guest. + */ + iUsbVersion &= pHub->fVersions; + + /* + * Create and attach the device. + */ + rc = pdmR3UsbCreateDevice(pVM, pHub, paUsbDevs[i].pUsbDev, paUsbDevs[i].iInstance, &paUsbDevs[i].Uuid, + &paUsbDevs[i].pNode, pdmR3UsbVer2Spd(iUsbVersion), NULL); + if (RT_FAILURE(rc)) + return rc; + } /* for device instances */ + + return VINF_SUCCESS; +} + + +/** + * Creates an emulated USB device instance at runtime. + * + * This will find an appropriate HUB for the USB device + * and try instantiate the emulated device. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszDeviceName The name of the PDM device to instantiate. + * @param pInstanceNode The instance CFGM node. + * @param pUuid The UUID to be associated with the device. + * @param pszCaptureFilename Path to the file for USB traffic capturing, optional. + * + * @thread EMT + */ +VMMR3DECL(int) PDMR3UsbCreateEmulatedDevice(PUVM pUVM, const char *pszDeviceName, PCFGMNODE pInstanceNode, PCRTUUID pUuid, + const char *pszCaptureFilename) +{ + /* + * Validate input. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT); + AssertPtrReturn(pszDeviceName, VERR_INVALID_POINTER); + AssertPtrReturn(pInstanceNode, VERR_INVALID_POINTER); + + /* + * Find the device. + */ + PPDMUSB pUsbDev = pdmR3UsbLookup(pVM, pszDeviceName); + if (!pUsbDev) + { + LogRel(("PDMUsb: PDMR3UsbCreateEmulatedDevice: The '%s' device wasn't found\n", pszDeviceName)); + return VERR_PDM_NO_USBPROXY; + } + + /* + * Every device must support USB 1.x hubs; optionally, high-speed USB 2.0 hubs + * might be also supported. This determines where to attach the device. + */ + uint32_t iUsbVersion = VUSB_STDVER_11; + if (pUsbDev->pReg->fFlags & PDM_USBREG_HIGHSPEED_CAPABLE) + iUsbVersion |= VUSB_STDVER_20; + if (pUsbDev->pReg->fFlags & PDM_USBREG_SUPERSPEED_CAPABLE) + iUsbVersion |= VUSB_STDVER_30; + + /* + * Find a suitable hub with free ports. + */ + PPDMUSBHUB pHub; + int rc = pdmR3UsbFindHub(pVM, iUsbVersion, &pHub); + if (RT_FAILURE(rc)) + { + Log(("pdmR3UsbFindHub: failed %Rrc\n", rc)); + return rc; + } + + /* + * This is how we inform the device what speed it's communicating at, and hence + * which descriptors it should present to the guest. + */ + iUsbVersion &= pHub->fVersions; + + /* + * Create and attach the device. + */ + rc = pdmR3UsbCreateDevice(pVM, pHub, pUsbDev, -1, pUuid, &pInstanceNode, + pdmR3UsbVer2Spd(iUsbVersion), pszCaptureFilename); + AssertRCReturn(rc, rc); + + return rc; +} + + +/** + * Creates a USB proxy device instance. + * + * This will find an appropriate HUB for the USB device, create the necessary CFGM stuff + * and try instantiate the proxy device. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pUuid The UUID to be associated with the device. + * @param pszBackend The proxy backend to use. + * @param pszAddress The address string. + * @param pvBackend Pointer to the backend. + * @param enmSpeed The speed the USB device is operating at. + * @param fMaskedIfs The interfaces to hide from the guest. + * @param pszCaptureFilename Path to the file for USB traffic capturing, optional. + */ +VMMR3DECL(int) PDMR3UsbCreateProxyDevice(PUVM pUVM, PCRTUUID pUuid, const char *pszBackend, const char *pszAddress, void *pvBackend, + VUSBSPEED enmSpeed, uint32_t fMaskedIfs, const char *pszCaptureFilename) +{ + /* + * Validate input. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT); + AssertPtrReturn(pUuid, VERR_INVALID_POINTER); + AssertPtrReturn(pszAddress, VERR_INVALID_POINTER); + AssertReturn( enmSpeed == VUSB_SPEED_LOW + || enmSpeed == VUSB_SPEED_FULL + || enmSpeed == VUSB_SPEED_HIGH + || enmSpeed == VUSB_SPEED_SUPER + || enmSpeed == VUSB_SPEED_SUPERPLUS, VERR_INVALID_PARAMETER); + + /* + * Find the USBProxy driver. + */ + PPDMUSB pUsbDev = pdmR3UsbLookup(pVM, "USBProxy"); + if (!pUsbDev) + { + LogRel(("PDMUsb: PDMR3UsbCreateProxyDevice: The USBProxy device class wasn't found\n")); + return VERR_PDM_NO_USBPROXY; + } + + /* + * Find a suitable hub with free ports. + */ + PPDMUSBHUB pHub; + uint32_t iUsbVersion = pdmR3UsbSpd2Ver(enmSpeed); + int rc = pdmR3UsbFindHub(pVM, iUsbVersion, &pHub); + if (RT_FAILURE(rc)) + { + Log(("pdmR3UsbFindHub: failed %Rrc\n", rc)); + return rc; + } + + /* + * Create the CFGM instance node. + */ + PCFGMNODE pInstance = CFGMR3CreateTree(pUVM); + AssertReturn(pInstance, VERR_NO_MEMORY); + do /* break loop */ + { + PCFGMNODE pConfig; + rc = CFGMR3InsertNode(pInstance, "Config", &pConfig); AssertRCBreak(rc); + rc = CFGMR3InsertString(pConfig, "Address", pszAddress); AssertRCBreak(rc); + char szUuid[RTUUID_STR_LENGTH]; + rc = RTUuidToStr(pUuid, &szUuid[0], sizeof(szUuid)); AssertRCBreak(rc); + rc = CFGMR3InsertString(pConfig, "UUID", szUuid); AssertRCBreak(rc); + rc = CFGMR3InsertString(pConfig, "Backend", pszBackend); AssertRCBreak(rc); + rc = CFGMR3InsertInteger(pConfig, "pvBackend", (uintptr_t)pvBackend); AssertRCBreak(rc); + rc = CFGMR3InsertInteger(pConfig, "MaskedIfs", fMaskedIfs); AssertRCBreak(rc); + rc = CFGMR3InsertInteger(pConfig, "Force11Device", !(pHub->fVersions & iUsbVersion)); AssertRCBreak(rc); + } while (0); /* break loop */ + if (RT_FAILURE(rc)) + { + CFGMR3RemoveNode(pInstance); + LogRel(("PDMUsb: PDMR3UsbCreateProxyDevice: failed to setup CFGM config, rc=%Rrc\n", rc)); + return rc; + } + + if (enmSpeed == VUSB_SPEED_UNKNOWN) + enmSpeed = pdmR3UsbVer2Spd(iUsbVersion); + + /* + * Finally, try to create it. + */ + rc = pdmR3UsbCreateDevice(pVM, pHub, pUsbDev, -1, pUuid, &pInstance, enmSpeed, pszCaptureFilename); + if (RT_FAILURE(rc) && pInstance) + CFGMR3RemoveNode(pInstance); + return rc; +} + + +/** + * Destroys a hot-plugged USB device. + * + * The device must be detached from the HUB at this point. + * + * @param pVM The cross context VM structure. + * @param pUsbIns The USB device instance to destroy. + * @thread EMT + */ +static void pdmR3UsbDestroyDevice(PVM pVM, PPDMUSBINS pUsbIns) +{ + Assert(!pUsbIns->Internal.s.pHub); + + /* + * Do the unplug notification. + */ + /** @todo what about the drivers? */ + if (pUsbIns->pReg->pfnHotUnplugged) + pUsbIns->pReg->pfnHotUnplugged(pUsbIns); + + /* + * Destroy the luns with their driver chains and call the device destructor. + */ + while (pUsbIns->Internal.s.pLuns) + { + PPDMLUN pLun = pUsbIns->Internal.s.pLuns; + pUsbIns->Internal.s.pLuns = pLun->pNext; + if (pLun->pTop) + pdmR3DrvDestroyChain(pLun->pTop, PDM_TACH_FLAGS_NOT_HOT_PLUG); /* Hotplugging is handled differently here atm. */ + MMR3HeapFree(pLun); + } + + /* finally, the device. */ + if (pUsbIns->pReg->pfnDestruct) + { + Log(("PDM: Destructing USB device '%s' instance %d...\n", pUsbIns->pReg->szName, pUsbIns->iInstance)); + pUsbIns->pReg->pfnDestruct(pUsbIns); + } + TMR3TimerDestroyUsb(pVM, pUsbIns); + SSMR3DeregisterUsb(pVM, pUsbIns, NULL, 0); + pdmR3ThreadDestroyUsb(pVM, pUsbIns); +#ifdef VBOX_WITH_PDM_ASYNC_COMPLETION + pdmR3AsyncCompletionTemplateDestroyUsb(pVM, pUsbIns); +#endif + + /* + * Unlink it. + */ + /* The global instance FIFO. */ + if (pVM->pdm.s.pUsbInstances == pUsbIns) + pVM->pdm.s.pUsbInstances = pUsbIns->Internal.s.pNext; + else + { + PPDMUSBINS pPrev = pVM->pdm.s.pUsbInstances; + while (pPrev && pPrev->Internal.s.pNext != pUsbIns) + { + Assert(pPrev->u32Version == PDM_USBINS_VERSION); + pPrev = pPrev->Internal.s.pNext; + } + Assert(pPrev); Assert(pPrev != pUsbIns); + if (pPrev) + pPrev->Internal.s.pNext = pUsbIns->Internal.s.pNext; + } + + /* The per device instance FIFO. */ + PPDMUSB pUsbDev = pUsbIns->Internal.s.pUsbDev; + if (pUsbDev->pInstances == pUsbIns) + pUsbDev->pInstances = pUsbIns->Internal.s.pPerDeviceNext; + else + { + PPDMUSBINS pPrev = pUsbDev->pInstances; + while (pPrev && pPrev->Internal.s.pPerDeviceNext != pUsbIns) + { + Assert(pPrev->u32Version == PDM_USBINS_VERSION); + pPrev = pPrev->Internal.s.pPerDeviceNext; + } + Assert(pPrev); Assert(pPrev != pUsbIns); + if (pPrev) + pPrev->Internal.s.pPerDeviceNext = pUsbIns->Internal.s.pPerDeviceNext; + } + + /* + * Trash it. + */ + pUsbIns->u32Version = 0; + pUsbIns->pReg = NULL; + if (pUsbIns->pszName) + { + RTStrFree(pUsbIns->pszName); + pUsbIns->pszName = NULL; + } + CFGMR3RemoveNode(pUsbIns->Internal.s.pCfgDelete); + MMR3HeapFree(pUsbIns); +} + + +/** + * Detaches and destroys a USB device. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pUuid The UUID associated with the device to detach. + * @thread EMT + */ +VMMR3DECL(int) PDMR3UsbDetachDevice(PUVM pUVM, PCRTUUID pUuid) +{ + /* + * Validate input. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_EMT(pVM); + AssertPtrReturn(pUuid, VERR_INVALID_POINTER); + + /* + * Search the global list for it. + */ + PPDMUSBINS pUsbIns = pVM->pdm.s.pUsbInstances; + for ( ; pUsbIns; pUsbIns = pUsbIns->Internal.s.pNext) + if (!RTUuidCompare(&pUsbIns->Internal.s.Uuid, pUuid)) + break; + if (!pUsbIns) + return VERR_PDM_DEVICE_INSTANCE_NOT_FOUND; /** @todo VERR_PDM_USB_INSTANCE_NOT_FOUND */ + + /* + * Detach it from the HUB (if it's actually attached to one). + */ + PPDMUSBHUB pHub = pUsbIns->Internal.s.pHub; + if (pHub) + { + int rc = pHub->Reg.pfnDetachDevice(pHub->pDrvIns, pUsbIns, pUsbIns->Internal.s.iPort); + if (RT_FAILURE(rc)) + { + LogRel(("PDMUsb: Failed to detach USB device '%s' instance %d from %p: %Rrc\n", + pUsbIns->pReg->szName, pUsbIns->iInstance, pHub, rc)); + return rc; + } + + pHub->cAvailablePorts++; + Assert(pHub->cAvailablePorts > 0 && pHub->cAvailablePorts <= pHub->cPorts); + pUsbIns->Internal.s.pHub = NULL; + } + + /* + * Notify about unplugging and destroy the device with it's drivers. + */ + pdmR3UsbDestroyDevice(pVM, pUsbIns); + + return VINF_SUCCESS; +} + + +/** + * Checks if there are any USB hubs attached. + * + * @returns true / false accordingly. + * @param pUVM The user mode VM handle. + */ +VMMR3DECL(bool) PDMR3UsbHasHub(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, false); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, false); + return pVM->pdm.s.pUsbHubs != NULL; +} + + +/** + * Locates a LUN. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pszDevice Device name. + * @param iInstance Device instance. + * @param iLun The Logical Unit to obtain the interface of. + * @param ppLun Where to store the pointer to the LUN if found. + * @thread Try only do this in EMT... + */ +static int pdmR3UsbFindLun(PVM pVM, const char *pszDevice, unsigned iInstance, unsigned iLun, PPPDMLUN ppLun) +{ + /* + * Iterate registered devices looking for the device. + */ + size_t cchDevice = strlen(pszDevice); + for (PPDMUSB pUsbDev = pVM->pdm.s.pUsbDevs; pUsbDev; pUsbDev = pUsbDev->pNext) + { + if ( pUsbDev->cchName == cchDevice + && !memcmp(pUsbDev->pReg->szName, pszDevice, cchDevice)) + { + /* + * Iterate device instances. + */ + for (PPDMUSBINS pUsbIns = pUsbDev->pInstances; pUsbIns; pUsbIns = pUsbIns->Internal.s.pPerDeviceNext) + { + if (pUsbIns->iInstance == iInstance) + { + /* + * Iterate luns. + */ + for (PPDMLUN pLun = pUsbIns->Internal.s.pLuns; pLun; pLun = pLun->pNext) + { + if (pLun->iLun == iLun) + { + *ppLun = pLun; + return VINF_SUCCESS; + } + } + return VERR_PDM_LUN_NOT_FOUND; + } + } + return VERR_PDM_DEVICE_INSTANCE_NOT_FOUND; + } + } + return VERR_PDM_DEVICE_NOT_FOUND; +} + + +/** + * Attaches a preconfigured driver to an existing device or driver instance. + * + * This is used to change drivers and suchlike at runtime. The driver or device + * at the end of the chain will be told to attach to whatever is configured + * below it. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszDevice Device name. + * @param iDevIns Device instance. + * @param iLun The Logical Unit to obtain the interface of. + * @param fFlags Flags, combination of the PDM_TACH_FLAGS_* \#defines. + * @param ppBase Where to store the base interface pointer. Optional. + * + * @thread EMT + */ +VMMR3DECL(int) PDMR3UsbDriverAttach(PUVM pUVM, const char *pszDevice, unsigned iDevIns, unsigned iLun, uint32_t fFlags, + PPPDMIBASE ppBase) +{ + LogFlow(("PDMR3UsbDriverAttach: pszDevice=%p:{%s} iDevIns=%d iLun=%d fFlags=%#x ppBase=%p\n", + pszDevice, pszDevice, iDevIns, iLun, fFlags, ppBase)); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_EMT(pVM); + + if (ppBase) + *ppBase = NULL; + + /* + * Find the LUN in question. + */ + PPDMLUN pLun; + int rc = pdmR3UsbFindLun(pVM, pszDevice, iDevIns, iLun, &pLun); + if (RT_SUCCESS(rc)) + { + /* + * Anything attached to the LUN? + */ + PPDMDRVINS pDrvIns = pLun->pTop; + if (!pDrvIns) + { + /* No, ask the device to attach to the new stuff. */ + PPDMUSBINS pUsbIns = pLun->pUsbIns; + if (pUsbIns->pReg->pfnDriverAttach) + { + rc = pUsbIns->pReg->pfnDriverAttach(pUsbIns, iLun, fFlags); + if (RT_SUCCESS(rc) && ppBase) + *ppBase = pLun->pTop ? &pLun->pTop->IBase : NULL; + } + else + rc = VERR_PDM_DEVICE_NO_RT_ATTACH; + } + else + { + /* Yes, find the bottom most driver and ask it to attach to the new stuff. */ + while (pDrvIns->Internal.s.pDown) + pDrvIns = pDrvIns->Internal.s.pDown; + if (pDrvIns->pReg->pfnAttach) + { + rc = pDrvIns->pReg->pfnAttach(pDrvIns, fFlags); + if (RT_SUCCESS(rc) && ppBase) + *ppBase = pDrvIns->Internal.s.pDown + ? &pDrvIns->Internal.s.pDown->IBase + : NULL; + } + else + rc = VERR_PDM_DRIVER_NO_RT_ATTACH; + } + } + + if (ppBase) + LogFlow(("PDMR3UsbDriverAttach: returns %Rrc *ppBase=%p\n", rc, *ppBase)); + else + LogFlow(("PDMR3UsbDriverAttach: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Detaches the specified driver instance. + * + * This is used to replumb drivers at runtime for simulating hot plugging and + * media changes. + * + * This method allows detaching drivers from + * any driver or device by specifying the driver to start detaching at. The + * only prerequisite is that the driver or device above implements the + * pfnDetach callback (PDMDRVREG / PDMUSBREG). + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszDevice Device name. + * @param iDevIns Device instance. + * @param iLun The Logical Unit in which to look for the driver. + * @param pszDriver The name of the driver which to detach. If NULL + * then the entire driver chain is detatched. + * @param iOccurrence The occurrence of that driver in the chain. This is + * usually 0. + * @param fFlags Flags, combination of the PDM_TACH_FLAGS_* \#defines. + * @thread EMT + */ +VMMR3DECL(int) PDMR3UsbDriverDetach(PUVM pUVM, const char *pszDevice, unsigned iDevIns, unsigned iLun, + const char *pszDriver, unsigned iOccurrence, uint32_t fFlags) +{ + LogFlow(("PDMR3UsbDriverDetach: pszDevice=%p:{%s} iDevIns=%u iLun=%u pszDriver=%p:{%s} iOccurrence=%u fFlags=%#x\n", + pszDevice, pszDevice, iDevIns, iLun, pszDriver, pszDriver, iOccurrence, fFlags)); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_EMT(pVM); + AssertPtr(pszDevice); + AssertPtrNull(pszDriver); + Assert(iOccurrence == 0 || pszDriver); + Assert(!(fFlags & ~(PDM_TACH_FLAGS_NOT_HOT_PLUG))); + + /* + * Find the LUN in question. + */ + PPDMLUN pLun; + int rc = pdmR3UsbFindLun(pVM, pszDevice, iDevIns, iLun, &pLun); + if (RT_SUCCESS(rc)) + { + /* + * Locate the driver. + */ + PPDMDRVINS pDrvIns = pLun->pTop; + if (pDrvIns) + { + if (pszDriver) + { + while (pDrvIns) + { + if (!strcmp(pDrvIns->pReg->szName, pszDriver)) + { + if (iOccurrence == 0) + break; + iOccurrence--; + } + pDrvIns = pDrvIns->Internal.s.pDown; + } + } + if (pDrvIns) + rc = pdmR3DrvDetach(pDrvIns, fFlags); + else + rc = VERR_PDM_DRIVER_INSTANCE_NOT_FOUND; + } + else + rc = VINF_PDM_NO_DRIVER_ATTACHED_TO_LUN; + } + + LogFlow(("PDMR3UsbDriverDetach: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Query the interface of the top level driver on a LUN. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszDevice Device name. + * @param iInstance Device instance. + * @param iLun The Logical Unit to obtain the interface of. + * @param ppBase Where to store the base interface pointer. + * @remark We're not doing any locking ATM, so don't try call this at times when the + * device chain is known to be updated. + */ +VMMR3DECL(int) PDMR3UsbQueryLun(PUVM pUVM, const char *pszDevice, unsigned iInstance, unsigned iLun, PPDMIBASE *ppBase) +{ + LogFlow(("PDMR3UsbQueryLun: pszDevice=%p:{%s} iInstance=%u iLun=%u ppBase=%p\n", + pszDevice, pszDevice, iInstance, iLun, ppBase)); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + /* + * Find the LUN. + */ + PPDMLUN pLun; + int rc = pdmR3UsbFindLun(pVM, pszDevice, iInstance, iLun, &pLun); + if (RT_SUCCESS(rc)) + { + if (pLun->pTop) + { + *ppBase = &pLun->pTop->IBase; + LogFlow(("PDMR3UsbQueryLun: return %Rrc and *ppBase=%p\n", VINF_SUCCESS, *ppBase)); + return VINF_SUCCESS; + } + rc = VERR_PDM_NO_DRIVER_ATTACHED_TO_LUN; + } + LogFlow(("PDMR3UsbQueryLun: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Query the interface of a named driver on a LUN. + * + * If the driver appears more than once in the driver chain, the first instance + * is returned. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszDevice Device name. + * @param iInstance Device instance. + * @param iLun The Logical Unit to obtain the interface of. + * @param pszDriver The driver name. + * @param ppBase Where to store the base interface pointer. + * + * @remark We're not doing any locking ATM, so don't try call this at times when the + * device chain is known to be updated. + */ +VMMR3DECL(int) PDMR3UsbQueryDriverOnLun(PUVM pUVM, const char *pszDevice, unsigned iInstance, + unsigned iLun, const char *pszDriver, PPPDMIBASE ppBase) +{ + LogFlow(("PDMR3QueryDriverOnLun: pszDevice=%p:{%s} iInstance=%u iLun=%u pszDriver=%p:{%s} ppBase=%p\n", + pszDevice, pszDevice, iInstance, iLun, pszDriver, pszDriver, ppBase)); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + /* + * Find the LUN. + */ + PPDMLUN pLun; + int rc = pdmR3UsbFindLun(pVM, pszDevice, iInstance, iLun, &pLun); + if (RT_SUCCESS(rc)) + { + if (pLun->pTop) + { + for (PPDMDRVINS pDrvIns = pLun->pTop; pDrvIns; pDrvIns = pDrvIns->Internal.s.pDown) + if (!strcmp(pDrvIns->pReg->szName, pszDriver)) + { + *ppBase = &pDrvIns->IBase; + LogFlow(("PDMR3UsbQueryDriverOnLun: return %Rrc and *ppBase=%p\n", VINF_SUCCESS, *ppBase)); + return VINF_SUCCESS; + + } + rc = VERR_PDM_DRIVER_NOT_FOUND; + } + else + rc = VERR_PDM_NO_DRIVER_ATTACHED_TO_LUN; + } + LogFlow(("PDMR3UsbQueryDriverOnLun: returns %Rrc\n", rc)); + return rc; +} + + +/** @name USB Device Helpers + * @{ + */ + +/** @interface_method_impl{PDMUSBHLP,pfnDriverAttach} */ +static DECLCALLBACK(int) pdmR3UsbHlp_DriverAttach(PPDMUSBINS pUsbIns, RTUINT iLun, PPDMIBASE pBaseInterface, + PPDMIBASE *ppBaseInterface, const char *pszDesc) +{ + PDMUSB_ASSERT_USBINS(pUsbIns); + PVM pVM = pUsbIns->Internal.s.pVM; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3UsbHlp_DriverAttach: caller='%s'/%d: iLun=%d pBaseInterface=%p ppBaseInterface=%p pszDesc=%p:{%s}\n", + pUsbIns->pReg->szName, pUsbIns->iInstance, iLun, pBaseInterface, ppBaseInterface, pszDesc, pszDesc)); + + /* + * Lookup the LUN, it might already be registered. + */ + PPDMLUN pLunPrev = NULL; + PPDMLUN pLun = pUsbIns->Internal.s.pLuns; + for (; pLun; pLunPrev = pLun, pLun = pLun->pNext) + if (pLun->iLun == iLun) + break; + + /* + * Create the LUN if if wasn't found, else check if driver is already attached to it. + */ + if (!pLun) + { + if ( !pBaseInterface + || !pszDesc + || !*pszDesc) + { + Assert(pBaseInterface); + Assert(pszDesc || *pszDesc); + return VERR_INVALID_PARAMETER; + } + + pLun = (PPDMLUN)MMR3HeapAlloc(pVM, MM_TAG_PDM_LUN, sizeof(*pLun)); + if (!pLun) + return VERR_NO_MEMORY; + + pLun->iLun = iLun; + pLun->pNext = pLunPrev ? pLunPrev->pNext : NULL; + pLun->pTop = NULL; + pLun->pBottom = NULL; + pLun->pDevIns = NULL; + pLun->pUsbIns = pUsbIns; + pLun->pszDesc = pszDesc; + pLun->pBase = pBaseInterface; + if (!pLunPrev) + pUsbIns->Internal.s.pLuns = pLun; + else + pLunPrev->pNext = pLun; + Log(("pdmR3UsbHlp_DriverAttach: Registered LUN#%d '%s' with device '%s'/%d.\n", + iLun, pszDesc, pUsbIns->pReg->szName, pUsbIns->iInstance)); + } + else if (pLun->pTop) + { + AssertMsgFailed(("Already attached! The device should keep track of such things!\n")); + LogFlow(("pdmR3UsbHlp_DriverAttach: caller='%s'/%d: returns %Rrc\n", pUsbIns->pReg->szName, pUsbIns->iInstance, VERR_PDM_DRIVER_ALREADY_ATTACHED)); + return VERR_PDM_DRIVER_ALREADY_ATTACHED; + } + Assert(pLun->pBase == pBaseInterface); + + + /* + * Get the attached driver configuration. + */ + int rc; + PCFGMNODE pNode = CFGMR3GetChildF(pUsbIns->Internal.s.pCfg, "LUN#%u", iLun); + if (pNode) + rc = pdmR3DrvInstantiate(pVM, pNode, pBaseInterface, NULL /*pDrvAbove*/, pLun, ppBaseInterface); + else + rc = VERR_PDM_NO_ATTACHED_DRIVER; + + + LogFlow(("pdmR3UsbHlp_DriverAttach: caller='%s'/%d: returns %Rrc\n", pUsbIns->pReg->szName, pUsbIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMUSBHLP,pfnAssertEMT} */ +static DECLCALLBACK(bool) pdmR3UsbHlp_AssertEMT(PPDMUSBINS pUsbIns, const char *pszFile, unsigned iLine, const char *pszFunction) +{ + PDMUSB_ASSERT_USBINS(pUsbIns); + if (VM_IS_EMT(pUsbIns->Internal.s.pVM)) + return true; + + char szMsg[100]; + RTStrPrintf(szMsg, sizeof(szMsg), "AssertEMT '%s'/%d\n", pUsbIns->pReg->szName, pUsbIns->iInstance); + RTAssertMsg1Weak(szMsg, iLine, pszFile, pszFunction); + AssertBreakpoint(); + return false; +} + + +/** @interface_method_impl{PDMUSBHLP,pfnAssertOther} */ +static DECLCALLBACK(bool) pdmR3UsbHlp_AssertOther(PPDMUSBINS pUsbIns, const char *pszFile, unsigned iLine, const char *pszFunction) +{ + PDMUSB_ASSERT_USBINS(pUsbIns); + if (!VM_IS_EMT(pUsbIns->Internal.s.pVM)) + return true; + + char szMsg[100]; + RTStrPrintf(szMsg, sizeof(szMsg), "AssertOther '%s'/%d\n", pUsbIns->pReg->szName, pUsbIns->iInstance); + RTAssertMsg1Weak(szMsg, iLine, pszFile, pszFunction); + AssertBreakpoint(); + return false; +} + + +/** @interface_method_impl{PDMUSBHLP,pfnDBGFStopV} */ +static DECLCALLBACK(int) pdmR3UsbHlp_DBGFStopV(PPDMUSBINS pUsbIns, const char *pszFile, unsigned iLine, const char *pszFunction, + const char *pszFormat, va_list va) +{ + PDMUSB_ASSERT_USBINS(pUsbIns); +#ifdef LOG_ENABLED + va_list va2; + va_copy(va2, va); + LogFlow(("pdmR3UsbHlp_DBGFStopV: caller='%s'/%d: pszFile=%p:{%s} iLine=%d pszFunction=%p:{%s} pszFormat=%p:{%s} (%N)\n", + pUsbIns->pReg->szName, pUsbIns->iInstance, pszFile, pszFile, iLine, pszFunction, pszFunction, pszFormat, pszFormat, pszFormat, &va2)); + va_end(va2); +#endif + + PVM pVM = pUsbIns->Internal.s.pVM; + VM_ASSERT_EMT(pVM); + int rc = DBGFR3EventSrcV(pVM, DBGFEVENT_DEV_STOP, pszFile, iLine, pszFunction, pszFormat, va); + if (rc == VERR_DBGF_NOT_ATTACHED) + rc = VINF_SUCCESS; + + LogFlow(("pdmR3UsbHlp_DBGFStopV: caller='%s'/%d: returns %Rrc\n", pUsbIns->pReg->szName, pUsbIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMUSBHLP,pfnDBGFInfoRegister} */ +static DECLCALLBACK(int) pdmR3UsbHlp_DBGFInfoRegister(PPDMUSBINS pUsbIns, const char *pszName, const char *pszDesc, + PFNDBGFHANDLERUSB pfnHandler) +{ + PDMUSB_ASSERT_USBINS(pUsbIns); + LogFlow(("pdmR3UsbHlp_DBGFInfoRegister: caller='%s'/%d: pszName=%p:{%s} pszDesc=%p:{%s} pfnHandler=%p\n", + pUsbIns->pReg->szName, pUsbIns->iInstance, pszName, pszName, pszDesc, pszDesc, pfnHandler)); + + PVM pVM = pUsbIns->Internal.s.pVM; + VM_ASSERT_EMT(pVM); + RT_NOREF4(pVM, pfnHandler, pszDesc, pszName); /** @todo int rc = DBGFR3InfoRegisterUsb(pVM, pszName, pszDesc, pfnHandler, pUsbIns); */ + int rc = VERR_NOT_IMPLEMENTED; AssertFailed(); + + LogFlow(("pdmR3UsbHlp_DBGFInfoRegister: caller='%s'/%d: returns %Rrc\n", pUsbIns->pReg->szName, pUsbIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMUSBHLP,pfnMMHeapAlloc} */ +static DECLCALLBACK(void *) pdmR3UsbHlp_MMHeapAlloc(PPDMUSBINS pUsbIns, size_t cb) +{ + PDMUSB_ASSERT_USBINS(pUsbIns); + LogFlow(("pdmR3UsbHlp_MMHeapAlloc: caller='%s'/%d: cb=%#x\n", pUsbIns->pReg->szName, pUsbIns->iInstance, cb)); + + void *pv = MMR3HeapAlloc(pUsbIns->Internal.s.pVM, MM_TAG_PDM_USB_USER, cb); + + LogFlow(("pdmR3UsbHlp_MMHeapAlloc: caller='%s'/%d: returns %p\n", pUsbIns->pReg->szName, pUsbIns->iInstance, pv)); + return pv; +} + + +/** @interface_method_impl{PDMUSBHLP,pfnMMHeapAllocZ} */ +static DECLCALLBACK(void *) pdmR3UsbHlp_MMHeapAllocZ(PPDMUSBINS pUsbIns, size_t cb) +{ + PDMUSB_ASSERT_USBINS(pUsbIns); + LogFlow(("pdmR3UsbHlp_MMHeapAllocZ: caller='%s'/%d: cb=%#x\n", pUsbIns->pReg->szName, pUsbIns->iInstance, cb)); + + void *pv = MMR3HeapAllocZ(pUsbIns->Internal.s.pVM, MM_TAG_PDM_USB_USER, cb); + + LogFlow(("pdmR3UsbHlp_MMHeapAllocZ: caller='%s'/%d: returns %p\n", pUsbIns->pReg->szName, pUsbIns->iInstance, pv)); + return pv; +} + + +/** @interface_method_impl{PDMUSBHLP,pfnPDMQueueCreate} */ +static DECLCALLBACK(int) pdmR3UsbHlp_PDMQueueCreate(PPDMUSBINS pUsbIns, RTUINT cbItem, RTUINT cItems, uint32_t cMilliesInterval, + PFNPDMQUEUEUSB pfnCallback, const char *pszName, PPDMQUEUE *ppQueue) +{ + PDMUSB_ASSERT_USBINS(pUsbIns); + LogFlow(("pdmR3UsbHlp_PDMQueueCreate: caller='%s'/%d: cbItem=%#x cItems=%#x cMilliesInterval=%u pfnCallback=%p pszName=%p:{%s} ppQueue=%p\n", + pUsbIns->pReg->szName, pUsbIns->iInstance, cbItem, cItems, cMilliesInterval, pfnCallback, pszName, pszName, ppQueue)); + + PVM pVM = pUsbIns->Internal.s.pVM; + VM_ASSERT_EMT(pVM); + + if (pUsbIns->iInstance > 0) + { + pszName = MMR3HeapAPrintf(pVM, MM_TAG_PDM_DEVICE_DESC, "%s_%u", pszName, pUsbIns->iInstance); + AssertLogRelReturn(pszName, VERR_NO_MEMORY); + } + + RT_NOREF5(cbItem, cItems, cMilliesInterval, pfnCallback, ppQueue); + /** @todo int rc = PDMR3QueueCreateUsb(pVM, pUsbIns, cbItem, cItems, cMilliesInterval, pfnCallback, fGCEnabled, pszName, ppQueue); */ + int rc = VERR_NOT_IMPLEMENTED; AssertFailed(); + + LogFlow(("pdmR3UsbHlp_PDMQueueCreate: caller='%s'/%d: returns %Rrc *ppQueue=%p\n", pUsbIns->pReg->szName, pUsbIns->iInstance, rc, *ppQueue)); + return rc; +} + + +/** @interface_method_impl{PDMUSBHLP,pfnSSMRegister} */ +static DECLCALLBACK(int) pdmR3UsbHlp_SSMRegister(PPDMUSBINS pUsbIns, uint32_t uVersion, size_t cbGuess, + PFNSSMUSBLIVEPREP pfnLivePrep, PFNSSMUSBLIVEEXEC pfnLiveExec, PFNSSMUSBLIVEVOTE pfnLiveVote, + PFNSSMUSBSAVEPREP pfnSavePrep, PFNSSMUSBSAVEEXEC pfnSaveExec, PFNSSMUSBSAVEDONE pfnSaveDone, + PFNSSMUSBLOADPREP pfnLoadPrep, PFNSSMUSBLOADEXEC pfnLoadExec, PFNSSMUSBLOADDONE pfnLoadDone) +{ + PDMUSB_ASSERT_USBINS(pUsbIns); + VM_ASSERT_EMT(pUsbIns->Internal.s.pVM); + LogFlow(("pdmR3UsbHlp_SSMRegister: caller='%s'/%d: uVersion=%#x cbGuess=%#x\n" + " pfnLivePrep=%p pfnLiveExec=%p pfnLiveVote=%p pfnSavePrep=%p pfnSaveExec=%p pfnSaveDone=%p pszLoadPrep=%p pfnLoadExec=%p pfnLoadDone=%p\n", + pUsbIns->pReg->szName, pUsbIns->iInstance, uVersion, cbGuess, + pfnLivePrep, pfnLiveExec, pfnLiveVote, + pfnSavePrep, pfnSaveExec, pfnSaveDone, + pfnLoadPrep, pfnLoadExec, pfnLoadDone)); + + int rc = SSMR3RegisterUsb(pUsbIns->Internal.s.pVM, pUsbIns, pUsbIns->pReg->szName, pUsbIns->iInstance, + uVersion, cbGuess, + pfnLivePrep, pfnLiveExec, pfnLiveVote, + pfnSavePrep, pfnSaveExec, pfnSaveDone, + pfnLoadPrep, pfnLoadExec, pfnLoadDone); + + LogFlow(("pdmR3UsbHlp_SSMRegister: caller='%s'/%d: returns %Rrc\n", pUsbIns->pReg->szName, pUsbIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMUSBHLP,pfnSTAMRegisterV} */ +static DECLCALLBACK(void) pdmR3UsbHlp_STAMRegisterV(PPDMUSBINS pUsbIns, void *pvSample, STAMTYPE enmType, STAMVISIBILITY enmVisibility, + STAMUNIT enmUnit, const char *pszDesc, const char *pszName, va_list va) +{ + PDMUSB_ASSERT_USBINS(pUsbIns); + PVM pVM = pUsbIns->Internal.s.pVM; + VM_ASSERT_EMT(pVM); + + int rc = STAMR3RegisterV(pVM, pvSample, enmType, enmVisibility, enmUnit, pszDesc, pszName, va); + AssertRC(rc); + + NOREF(pVM); +} + + +/** @interface_method_impl{PDMUSBHLP,pfnTMTimerCreate} */ +static DECLCALLBACK(int) pdmR3UsbHlp_TMTimerCreate(PPDMUSBINS pUsbIns, TMCLOCK enmClock, PFNTMTIMERUSB pfnCallback, void *pvUser, + uint32_t fFlags, const char *pszDesc, PPTMTIMERR3 ppTimer) +{ + PDMUSB_ASSERT_USBINS(pUsbIns); + PVM pVM = pUsbIns->Internal.s.pVM; + VM_ASSERT_EMT(pVM); + LogFlow(("pdmR3UsbHlp_TMTimerCreate: caller='%s'/%d: enmClock=%d pfnCallback=%p pvUser=%p fFlags=%#x pszDesc=%p:{%s} ppTimer=%p\n", + pUsbIns->pReg->szName, pUsbIns->iInstance, enmClock, pfnCallback, pvUser, fFlags, pszDesc, pszDesc, ppTimer)); + + if (pUsbIns->iInstance > 0) /** @todo use a string cache here later. */ + { + char *pszDesc2 = MMR3HeapAPrintf(pVM, MM_TAG_PDM_USB_DESC, "%s [%u]", pszDesc, pUsbIns->iInstance); + if (pszDesc2) + pszDesc = pszDesc2; + } + + int rc = TMR3TimerCreateUsb(pVM, pUsbIns, enmClock, pfnCallback, pvUser, fFlags, pszDesc, ppTimer); + + LogFlow(("pdmR3UsbHlp_TMTimerCreate: caller='%s'/%d: returns %Rrc\n", pUsbIns->pReg->szName, pUsbIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMUSBHLP,pfnVMSetErrorV} */ +static DECLCALLBACK(int) pdmR3UsbHlp_VMSetErrorV(PPDMUSBINS pUsbIns, int rc, RT_SRC_POS_DECL, const char *pszFormat, va_list va) +{ + PDMUSB_ASSERT_USBINS(pUsbIns); + int rc2 = VMSetErrorV(pUsbIns->Internal.s.pVM, rc, RT_SRC_POS_ARGS, pszFormat, va); Assert(rc2 == rc); NOREF(rc2); + return rc; +} + + +/** @interface_method_impl{PDMUSBHLP,pfnVMSetRuntimeErrorV} */ +static DECLCALLBACK(int) pdmR3UsbHlp_VMSetRuntimeErrorV(PPDMUSBINS pUsbIns, uint32_t fFlags, const char *pszErrorId, const char *pszFormat, va_list va) +{ + PDMUSB_ASSERT_USBINS(pUsbIns); + int rc = VMSetRuntimeErrorV(pUsbIns->Internal.s.pVM, fFlags, pszErrorId, pszFormat, va); + return rc; +} + + +/** @interface_method_impl{PDMUSBHLP,pfnVMState} */ +static DECLCALLBACK(VMSTATE) pdmR3UsbHlp_VMState(PPDMUSBINS pUsbIns) +{ + PDMUSB_ASSERT_USBINS(pUsbIns); + + VMSTATE enmVMState = VMR3GetState(pUsbIns->Internal.s.pVM); + + LogFlow(("pdmR3UsbHlp_VMState: caller='%s'/%d: returns %d (%s)\n", pUsbIns->pReg->szName, pUsbIns->iInstance, + enmVMState, VMR3GetStateName(enmVMState))); + return enmVMState; +} + +/** @interface_method_impl{PDMUSBHLP,pfnThreadCreate} */ +static DECLCALLBACK(int) pdmR3UsbHlp_ThreadCreate(PPDMUSBINS pUsbIns, PPPDMTHREAD ppThread, void *pvUser, PFNPDMTHREADUSB pfnThread, + PFNPDMTHREADWAKEUPUSB pfnWakeup, size_t cbStack, RTTHREADTYPE enmType, const char *pszName) +{ + PDMUSB_ASSERT_USBINS(pUsbIns); + VM_ASSERT_EMT(pUsbIns->Internal.s.pVM); + LogFlow(("pdmR3UsbHlp_ThreadCreate: caller='%s'/%d: ppThread=%p pvUser=%p pfnThread=%p pfnWakeup=%p cbStack=%#zx enmType=%d pszName=%p:{%s}\n", + pUsbIns->pReg->szName, pUsbIns->iInstance, ppThread, pvUser, pfnThread, pfnWakeup, cbStack, enmType, pszName, pszName)); + + int rc = pdmR3ThreadCreateUsb(pUsbIns->Internal.s.pVM, pUsbIns, ppThread, pvUser, pfnThread, pfnWakeup, cbStack, enmType, pszName); + + LogFlow(("pdmR3UsbHlp_ThreadCreate: caller='%s'/%d: returns %Rrc *ppThread=%RTthrd\n", pUsbIns->pReg->szName, pUsbIns->iInstance, + rc, *ppThread)); + return rc; +} + + +/** @interface_method_impl{PDMUSBHLP,pfnSetAsyncNotification} */ +static DECLCALLBACK(int) pdmR3UsbHlp_SetAsyncNotification(PPDMUSBINS pUsbIns, PFNPDMUSBASYNCNOTIFY pfnAsyncNotify) +{ + PDMUSB_ASSERT_USBINS(pUsbIns); + VM_ASSERT_EMT0(pUsbIns->Internal.s.pVM); + LogFlow(("pdmR3UsbHlp_SetAsyncNotification: caller='%s'/%d: pfnAsyncNotify=%p\n", pUsbIns->pReg->szName, pUsbIns->iInstance, pfnAsyncNotify)); + + int rc = VINF_SUCCESS; + AssertStmt(pfnAsyncNotify, rc = VERR_INVALID_PARAMETER); + AssertStmt(!pUsbIns->Internal.s.pfnAsyncNotify, rc = VERR_WRONG_ORDER); + AssertStmt(pUsbIns->Internal.s.fVMSuspended || pUsbIns->Internal.s.fVMReset, rc = VERR_WRONG_ORDER); + VMSTATE enmVMState = VMR3GetState(pUsbIns->Internal.s.pVM); + AssertStmt( enmVMState == VMSTATE_SUSPENDING + || enmVMState == VMSTATE_SUSPENDING_EXT_LS + || enmVMState == VMSTATE_SUSPENDING_LS + || enmVMState == VMSTATE_RESETTING + || enmVMState == VMSTATE_RESETTING_LS + || enmVMState == VMSTATE_POWERING_OFF + || enmVMState == VMSTATE_POWERING_OFF_LS, + rc = VERR_INVALID_STATE); + + if (RT_SUCCESS(rc)) + pUsbIns->Internal.s.pfnAsyncNotify = pfnAsyncNotify; + + LogFlow(("pdmR3UsbHlp_SetAsyncNotification: caller='%s'/%d: returns %Rrc\n", pUsbIns->pReg->szName, pUsbIns->iInstance, rc)); + return rc; +} + + +/** @interface_method_impl{PDMUSBHLP,pfnAsyncNotificationCompleted} */ +static DECLCALLBACK(void) pdmR3UsbHlp_AsyncNotificationCompleted(PPDMUSBINS pUsbIns) +{ + PDMUSB_ASSERT_USBINS(pUsbIns); + PVM pVM = pUsbIns->Internal.s.pVM; + + VMSTATE enmVMState = VMR3GetState(pVM); + if ( enmVMState == VMSTATE_SUSPENDING + || enmVMState == VMSTATE_SUSPENDING_EXT_LS + || enmVMState == VMSTATE_SUSPENDING_LS + || enmVMState == VMSTATE_RESETTING + || enmVMState == VMSTATE_RESETTING_LS + || enmVMState == VMSTATE_POWERING_OFF + || enmVMState == VMSTATE_POWERING_OFF_LS) + { + LogFlow(("pdmR3UsbHlp_AsyncNotificationCompleted: caller='%s'/%d:\n", pUsbIns->pReg->szName, pUsbIns->iInstance)); + VMR3AsyncPdmNotificationWakeupU(pVM->pUVM); + } + else + LogFlow(("pdmR3UsbHlp_AsyncNotificationCompleted: caller='%s'/%d: enmVMState=%d\n", pUsbIns->pReg->szName, pUsbIns->iInstance, enmVMState)); +} + + +/** @interface_method_impl{PDMUSBHLP,pfnVMGetSuspendReason} */ +static DECLCALLBACK(VMSUSPENDREASON) pdmR3UsbHlp_VMGetSuspendReason(PPDMUSBINS pUsbIns) +{ + PDMUSB_ASSERT_USBINS(pUsbIns); + PVM pVM = pUsbIns->Internal.s.pVM; + VM_ASSERT_EMT(pVM); + VMSUSPENDREASON enmReason = VMR3GetSuspendReason(pVM->pUVM); + LogFlow(("pdmR3UsbHlp_VMGetSuspendReason: caller='%s'/%d: returns %d\n", + pUsbIns->pReg->szName, pUsbIns->iInstance, enmReason)); + return enmReason; +} + + +/** @interface_method_impl{PDMUSBHLP,pfnVMGetResumeReason} */ +static DECLCALLBACK(VMRESUMEREASON) pdmR3UsbHlp_VMGetResumeReason(PPDMUSBINS pUsbIns) +{ + PDMUSB_ASSERT_USBINS(pUsbIns); + PVM pVM = pUsbIns->Internal.s.pVM; + VM_ASSERT_EMT(pVM); + VMRESUMEREASON enmReason = VMR3GetResumeReason(pVM->pUVM); + LogFlow(("pdmR3UsbHlp_VMGetResumeReason: caller='%s'/%d: returns %d\n", + pUsbIns->pReg->szName, pUsbIns->iInstance, enmReason)); + return enmReason; +} + + +/** + * The USB device helper structure. + */ +const PDMUSBHLP g_pdmR3UsbHlp = +{ + PDM_USBHLP_VERSION, + pdmR3UsbHlp_DriverAttach, + pdmR3UsbHlp_AssertEMT, + pdmR3UsbHlp_AssertOther, + pdmR3UsbHlp_DBGFStopV, + pdmR3UsbHlp_DBGFInfoRegister, + pdmR3UsbHlp_MMHeapAlloc, + pdmR3UsbHlp_MMHeapAllocZ, + pdmR3UsbHlp_PDMQueueCreate, + pdmR3UsbHlp_SSMRegister, + pdmR3UsbHlp_STAMRegisterV, + pdmR3UsbHlp_TMTimerCreate, + pdmR3UsbHlp_VMSetErrorV, + pdmR3UsbHlp_VMSetRuntimeErrorV, + pdmR3UsbHlp_VMState, + pdmR3UsbHlp_ThreadCreate, + pdmR3UsbHlp_SetAsyncNotification, + pdmR3UsbHlp_AsyncNotificationCompleted, + pdmR3UsbHlp_VMGetSuspendReason, + pdmR3UsbHlp_VMGetResumeReason, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + PDM_USBHLP_VERSION +}; + +/** @} */ diff --git a/src/VBox/VMM/VMMR3/PGM.cpp b/src/VBox/VMM/VMMR3/PGM.cpp new file mode 100644 index 00000000..3e3327af --- /dev/null +++ b/src/VBox/VMM/VMMR3/PGM.cpp @@ -0,0 +1,3013 @@ +/* $Id: PGM.cpp $ */ +/** @file + * PGM - Page Manager and Monitor. (Mixing stuff here, not good?) + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/** @page pg_pgm PGM - The Page Manager and Monitor + * + * @sa @ref grp_pgm + * @subpage pg_pgm_pool + * @subpage pg_pgm_phys + * + * + * @section sec_pgm_modes Paging Modes + * + * There are three memory contexts: Host Context (HC), Guest Context (GC) + * and intermediate context. When talking about paging HC can also be referred + * to as "host paging", and GC referred to as "shadow paging". + * + * We define three basic paging modes: 32-bit, PAE and AMD64. The host paging mode + * is defined by the host operating system. The mode used in the shadow paging mode + * depends on the host paging mode and what the mode the guest is currently in. The + * following relation between the two is defined: + * + * @verbatim + Host > 32-bit | PAE | AMD64 | + Guest | | | | + ==v================================ + 32-bit 32-bit PAE PAE + -------|--------|--------|--------| + PAE PAE PAE PAE + -------|--------|--------|--------| + AMD64 AMD64 AMD64 AMD64 + -------|--------|--------|--------| @endverbatim + * + * All configuration except those in the diagonal (upper left) are expected to + * require special effort from the switcher (i.e. a bit slower). + * + * + * + * + * @section sec_pgm_shw The Shadow Memory Context + * + * + * [..] + * + * Because of guest context mappings requires PDPT and PML4 entries to allow + * writing on AMD64, the two upper levels will have fixed flags whatever the + * guest is thinking of using there. So, when shadowing the PD level we will + * calculate the effective flags of PD and all the higher levels. In legacy + * PAE mode this only applies to the PWT and PCD bits (the rest are + * ignored/reserved/MBZ). We will ignore those bits for the present. + * + * + * + * @section sec_pgm_int The Intermediate Memory Context + * + * The world switch goes thru an intermediate memory context which purpose it is + * to provide different mappings of the switcher code. All guest mappings are also + * present in this context. + * + * The switcher code is mapped at the same location as on the host, at an + * identity mapped location (physical equals virtual address), and at the + * hypervisor location. The identity mapped location is for when the world + * switches that involves disabling paging. + * + * PGM maintain page tables for 32-bit, PAE and AMD64 paging modes. This + * simplifies switching guest CPU mode and consistency at the cost of more + * code to do the work. All memory use for those page tables is located below + * 4GB (this includes page tables for guest context mappings). + * + * Note! The intermediate memory context is also used for 64-bit guest + * execution on 32-bit hosts. Because we need to load 64-bit registers + * prior to switching to guest context, we need to be in 64-bit mode + * first. So, HM has some 64-bit worker routines in VMMRC.rc that get + * invoked via the special world switcher code in LegacyToAMD64.asm. + * + * + * @subsection subsec_pgm_int_gc Guest Context Mappings + * + * During assignment and relocation of a guest context mapping the intermediate + * memory context is used to verify the new location. + * + * Guest context mappings are currently restricted to below 4GB, for reasons + * of simplicity. This may change when we implement AMD64 support. + * + * + * + * + * @section sec_pgm_misc Misc + * + * + * @subsection sec_pgm_misc_A20 The A20 Gate + * + * PGM implements the A20 gate masking when translating a virtual guest address + * into a physical address for CPU access, i.e. PGMGstGetPage (and friends) and + * the code reading the guest page table entries during shadowing. The masking + * is done consistenly for all CPU modes, paged ones included. Large pages are + * also masked correctly. (On current CPUs, experiments indicates that AMD does + * not apply A20M in paged modes and intel only does it for the 2nd MB of + * memory.) + * + * The A20 gate implementation is per CPU core. It can be configured on a per + * core basis via the keyboard device and PC architecture device. This is + * probably not exactly how real CPUs do it, but SMP and A20 isn't a place where + * guest OSes try pushing things anyway, so who cares. (On current real systems + * the A20M signal is probably only sent to the boot CPU and it affects all + * thread and probably all cores in that package.) + * + * The keyboard device and the PC architecture device doesn't OR their A20 + * config bits together, rather they are currently implemented such that they + * mirror the CPU state. So, flipping the bit in either of them will change the + * A20 state. (On real hardware the bits of the two devices should probably be + * ORed together to indicate enabled, i.e. both needs to be cleared to disable + * A20 masking.) + * + * The A20 state will change immediately, transmeta fashion. There is no delays + * due to buses, wiring or other physical stuff. (On real hardware there are + * normally delays, the delays differs between the two devices and probably also + * between chipsets and CPU generations. Note that it's said that transmeta CPUs + * does the change immediately like us, they apparently intercept/handles the + * port accesses in microcode. Neat.) + * + * @sa http://en.wikipedia.org/wiki/A20_line#The_80286_and_the_high_memory_area + * + * + * @subsection subsec_pgm_misc_diff Differences Between Legacy PAE and Long Mode PAE + * + * The differences between legacy PAE and long mode PAE are: + * -# PDPE bits 1, 2, 5 and 6 are defined differently. In leagcy mode they are + * all marked down as must-be-zero, while in long mode 1, 2 and 5 have the + * usual meanings while 6 is ignored (AMD). This means that upon switching to + * legacy PAE mode we'll have to clear these bits and when going to long mode + * they must be set. This applies to both intermediate and shadow contexts, + * however we don't need to do it for the intermediate one since we're + * executing with CR0.WP at that time. + * -# CR3 allows a 32-byte aligned address in legacy mode, while in long mode + * a page aligned one is required. + * + * + * @section sec_pgm_handlers Access Handlers + * + * Placeholder. + * + * + * @subsection sec_pgm_handlers_phys Physical Access Handlers + * + * Placeholder. + * + * + * @subsection sec_pgm_handlers_virt Virtual Access Handlers + * + * We currently implement three types of virtual access handlers: ALL, WRITE + * and HYPERVISOR (WRITE). See PGMVIRTHANDLERKIND for some more details. + * + * The HYPERVISOR access handlers is kept in a separate tree since it doesn't apply + * to physical pages (PGMTREES::HyperVirtHandlers) and only needs to be consulted in + * a special \#PF case. The ALL and WRITE are in the PGMTREES::VirtHandlers tree, the + * rest of this section is going to be about these handlers. + * + * We'll go thru the life cycle of a handler and try make sense of it all, don't know + * how successful this is gonna be... + * + * 1. A handler is registered thru the PGMR3HandlerVirtualRegister and + * PGMHandlerVirtualRegisterEx APIs. We check for conflicting virtual handlers + * and create a new node that is inserted into the AVL tree (range key). Then + * a full PGM resync is flagged (clear pool, sync cr3, update virtual bit of PGMPAGE). + * + * 2. The following PGMSyncCR3/SyncCR3 operation will first make invoke HandlerVirtualUpdate. + * + * 2a. HandlerVirtualUpdate will will lookup all the pages covered by virtual handlers + * via the current guest CR3 and update the physical page -> virtual handler + * translation. Needless to say, this doesn't exactly scale very well. If any changes + * are detected, it will flag a virtual bit update just like we did on registration. + * PGMPHYS pages with changes will have their virtual handler state reset to NONE. + * + * 2b. The virtual bit update process will iterate all the pages covered by all the + * virtual handlers and update the PGMPAGE virtual handler state to the max of all + * virtual handlers on that page. + * + * 2c. Back in SyncCR3 we will now flush the entire shadow page cache to make sure + * we don't miss any alias mappings of the monitored pages. + * + * 2d. SyncCR3 will then proceed with syncing the CR3 table. + * + * 3. \#PF(np,read) on a page in the range. This will cause it to be synced + * read-only and resumed if it's a WRITE handler. If it's an ALL handler we + * will call the handlers like in the next step. If the physical mapping has + * changed we will - some time in the future - perform a handler callback + * (optional) and update the physical -> virtual handler cache. + * + * 4. \#PF(,write) on a page in the range. This will cause the handler to + * be invoked. + * + * 5. The guest invalidates the page and changes the physical backing or + * unmaps it. This should cause the invalidation callback to be invoked + * (it might not yet be 100% perfect). Exactly what happens next... is + * this where we mess up and end up out of sync for a while? + * + * 6. The handler is deregistered by the client via PGMHandlerVirtualDeregister. + * We will then set all PGMPAGEs in the physical -> virtual handler cache for + * this handler to NONE and trigger a full PGM resync (basically the same + * as int step 1). Which means 2 is executed again. + * + * + * @subsubsection sub_sec_pgm_handler_virt_todo TODOs + * + * There is a bunch of things that needs to be done to make the virtual handlers + * work 100% correctly and work more efficiently. + * + * The first bit hasn't been implemented yet because it's going to slow the + * whole mess down even more, and besides it seems to be working reliably for + * our current uses. OTOH, some of the optimizations might end up more or less + * implementing the missing bits, so we'll see. + * + * On the optimization side, the first thing to do is to try avoid unnecessary + * cache flushing. Then try team up with the shadowing code to track changes + * in mappings by means of access to them (shadow in), updates to shadows pages, + * invlpg, and shadow PT discarding (perhaps). + * + * Some idea that have popped up for optimization for current and new features: + * - bitmap indicating where there are virtual handlers installed. + * (4KB => 2**20 pages, page 2**12 => covers 32-bit address space 1:1!) + * - Further optimize this by min/max (needs min/max avl getters). + * - Shadow page table entry bit (if any left)? + * + */ + + +/** @page pg_pgm_phys PGM Physical Guest Memory Management + * + * + * Objectives: + * - Guest RAM over-commitment using memory ballooning, + * zero pages and general page sharing. + * - Moving or mirroring a VM onto a different physical machine. + * + * + * @section sec_pgmPhys_Definitions Definitions + * + * Allocation chunk - A RTR0MemObjAllocPhysNC object and the tracking + * machinery associated with it. + * + * + * + * + * @section sec_pgmPhys_AllocPage Allocating a page. + * + * Initially we map *all* guest memory to the (per VM) zero page, which + * means that none of the read functions will cause pages to be allocated. + * + * Exception, access bit in page tables that have been shared. This must + * be handled, but we must also make sure PGMGst*Modify doesn't make + * unnecessary modifications. + * + * Allocation points: + * - PGMPhysSimpleWriteGCPhys and PGMPhysWrite. + * - Replacing a zero page mapping at \#PF. + * - Replacing a shared page mapping at \#PF. + * - ROM registration (currently MMR3RomRegister). + * - VM restore (pgmR3Load). + * + * For the first three it would make sense to keep a few pages handy + * until we've reached the max memory commitment for the VM. + * + * For the ROM registration, we know exactly how many pages we need + * and will request these from ring-0. For restore, we will save + * the number of non-zero pages in the saved state and allocate + * them up front. This would allow the ring-0 component to refuse + * the request if the isn't sufficient memory available for VM use. + * + * Btw. for both ROM and restore allocations we won't be requiring + * zeroed pages as they are going to be filled instantly. + * + * + * @section sec_pgmPhys_FreePage Freeing a page + * + * There are a few points where a page can be freed: + * - After being replaced by the zero page. + * - After being replaced by a shared page. + * - After being ballooned by the guest additions. + * - At reset. + * - At restore. + * + * When freeing one or more pages they will be returned to the ring-0 + * component and replaced by the zero page. + * + * The reasoning for clearing out all the pages on reset is that it will + * return us to the exact same state as on power on, and may thereby help + * us reduce the memory load on the system. Further it might have a + * (temporary) positive influence on memory fragmentation (@see subsec_pgmPhys_Fragmentation). + * + * On restore, as mention under the allocation topic, pages should be + * freed / allocated depending on how many is actually required by the + * new VM state. The simplest approach is to do like on reset, and free + * all non-ROM pages and then allocate what we need. + * + * A measure to prevent some fragmentation, would be to let each allocation + * chunk have some affinity towards the VM having allocated the most pages + * from it. Also, try make sure to allocate from allocation chunks that + * are almost full. Admittedly, both these measures might work counter to + * our intentions and its probably not worth putting a lot of effort, + * cpu time or memory into this. + * + * + * @section sec_pgmPhys_SharePage Sharing a page + * + * The basic idea is that there there will be a idle priority kernel + * thread walking the non-shared VM pages hashing them and looking for + * pages with the same checksum. If such pages are found, it will compare + * them byte-by-byte to see if they actually are identical. If found to be + * identical it will allocate a shared page, copy the content, check that + * the page didn't change while doing this, and finally request both the + * VMs to use the shared page instead. If the page is all zeros (special + * checksum and byte-by-byte check) it will request the VM that owns it + * to replace it with the zero page. + * + * To make this efficient, we will have to make sure not to try share a page + * that will change its contents soon. This part requires the most work. + * A simple idea would be to request the VM to write monitor the page for + * a while to make sure it isn't modified any time soon. Also, it may + * make sense to skip pages that are being write monitored since this + * information is readily available to the thread if it works on the + * per-VM guest memory structures (presently called PGMRAMRANGE). + * + * + * @section sec_pgmPhys_Fragmentation Fragmentation Concerns and Counter Measures + * + * The pages are organized in allocation chunks in ring-0, this is a necessity + * if we wish to have an OS agnostic approach to this whole thing. (On Linux we + * could easily work on a page-by-page basis if we liked. Whether this is possible + * or efficient on NT I don't quite know.) Fragmentation within these chunks may + * become a problem as part of the idea here is that we wish to return memory to + * the host system. + * + * For instance, starting two VMs at the same time, they will both allocate the + * guest memory on-demand and if permitted their page allocations will be + * intermixed. Shut down one of the two VMs and it will be difficult to return + * any memory to the host system because the page allocation for the two VMs are + * mixed up in the same allocation chunks. + * + * To further complicate matters, when pages are freed because they have been + * ballooned or become shared/zero the whole idea is that the page is supposed + * to be reused by another VM or returned to the host system. This will cause + * allocation chunks to contain pages belonging to different VMs and prevent + * returning memory to the host when one of those VM shuts down. + * + * The only way to really deal with this problem is to move pages. This can + * either be done at VM shutdown and or by the idle priority worker thread + * that will be responsible for finding sharable/zero pages. The mechanisms + * involved for coercing a VM to move a page (or to do it for it) will be + * the same as when telling it to share/zero a page. + * + * + * @section sec_pgmPhys_Tracking Tracking Structures And Their Cost + * + * There's a difficult balance between keeping the per-page tracking structures + * (global and guest page) easy to use and keeping them from eating too much + * memory. We have limited virtual memory resources available when operating in + * 32-bit kernel space (on 64-bit there'll it's quite a different story). The + * tracking structures will be attempted designed such that we can deal with up + * to 32GB of memory on a 32-bit system and essentially unlimited on 64-bit ones. + * + * + * @subsection subsec_pgmPhys_Tracking_Kernel Kernel Space + * + * @see pg_GMM + * + * @subsection subsec_pgmPhys_Tracking_PerVM Per-VM + * + * Fixed info is the physical address of the page (HCPhys) and the page id + * (described above). Theoretically we'll need 48(-12) bits for the HCPhys part. + * Today we've restricting ourselves to 40(-12) bits because this is the current + * restrictions of all AMD64 implementations (I think Barcelona will up this + * to 48(-12) bits, not that it really matters) and I needed the bits for + * tracking mappings of a page. 48-12 = 36. That leaves 28 bits, which means a + * decent range for the page id: 2^(28+12) = 1024TB. + * + * In additions to these, we'll have to keep maintaining the page flags as we + * currently do. Although it wouldn't harm to optimize these quite a bit, like + * for instance the ROM shouldn't depend on having a write handler installed + * in order for it to become read-only. A RO/RW bit should be considered so + * that the page syncing code doesn't have to mess about checking multiple + * flag combinations (ROM || RW handler || write monitored) in order to + * figure out how to setup a shadow PTE. But this of course, is second + * priority at present. Current this requires 12 bits, but could probably + * be optimized to ~8. + * + * Then there's the 24 bits used to track which shadow page tables are + * currently mapping a page for the purpose of speeding up physical + * access handlers, and thereby the page pool cache. More bit for this + * purpose wouldn't hurt IIRC. + * + * Then there is a new bit in which we need to record what kind of page + * this is, shared, zero, normal or write-monitored-normal. This'll + * require 2 bits. One bit might be needed for indicating whether a + * write monitored page has been written to. And yet another one or + * two for tracking migration status. 3-4 bits total then. + * + * Whatever is left will can be used to record the sharabilitiy of a + * page. The page checksum will not be stored in the per-VM table as + * the idle thread will not be permitted to do modifications to it. + * It will instead have to keep its own working set of potentially + * shareable pages and their check sums and stuff. + * + * For the present we'll keep the current packing of the + * PGMRAMRANGE::aHCPhys to keep the changes simple, only of course, + * we'll have to change it to a struct with a total of 128-bits at + * our disposal. + * + * The initial layout will be like this: + * @verbatim + RTHCPHYS HCPhys; The current stuff. + 63:40 Current shadow PT tracking stuff. + 39:12 The physical page frame number. + 11:0 The current flags. + uint32_t u28PageId : 28; The page id. + uint32_t u2State : 2; The page state { zero, shared, normal, write monitored }. + uint32_t fWrittenTo : 1; Whether a write monitored page was written to. + uint32_t u1Reserved : 1; Reserved for later. + uint32_t u32Reserved; Reserved for later, mostly sharing stats. + @endverbatim + * + * The final layout will be something like this: + * @verbatim + RTHCPHYS HCPhys; The current stuff. + 63:48 High page id (12+). + 47:12 The physical page frame number. + 11:0 Low page id. + uint32_t fReadOnly : 1; Whether it's readonly page (rom or monitored in some way). + uint32_t u3Type : 3; The page type {RESERVED, MMIO, MMIO2, ROM, shadowed ROM, RAM}. + uint32_t u2PhysMon : 2; Physical access handler type {none, read, write, all}. + uint32_t u2VirtMon : 2; Virtual access handler type {none, read, write, all}.. + uint32_t u2State : 2; The page state { zero, shared, normal, write monitored }. + uint32_t fWrittenTo : 1; Whether a write monitored page was written to. + uint32_t u20Reserved : 20; Reserved for later, mostly sharing stats. + uint32_t u32Tracking; The shadow PT tracking stuff, roughly. + @endverbatim + * + * Cost wise, this means we'll double the cost for guest memory. There isn't anyway + * around that I'm afraid. It means that the cost of dealing out 32GB of memory + * to one or more VMs is: (32GB >> PAGE_SHIFT) * 16 bytes, or 128MBs. Or another + * example, the VM heap cost when assigning 1GB to a VM will be: 4MB. + * + * A couple of cost examples for the total cost per-VM + kernel. + * 32-bit Windows and 32-bit linux: + * 1GB guest ram, 256K pages: 4MB + 2MB(+) = 6MB + * 4GB guest ram, 1M pages: 16MB + 8MB(+) = 24MB + * 32GB guest ram, 8M pages: 128MB + 64MB(+) = 192MB + * 64-bit Windows and 64-bit linux: + * 1GB guest ram, 256K pages: 4MB + 3MB(+) = 7MB + * 4GB guest ram, 1M pages: 16MB + 12MB(+) = 28MB + * 32GB guest ram, 8M pages: 128MB + 96MB(+) = 224MB + * + * UPDATE - 2007-09-27: + * Will need a ballooned flag/state too because we cannot + * trust the guest 100% and reporting the same page as ballooned more + * than once will put the GMM off balance. + * + * + * @section sec_pgmPhys_Serializing Serializing Access + * + * Initially, we'll try a simple scheme: + * + * - The per-VM RAM tracking structures (PGMRAMRANGE) is only modified + * by the EMT thread of that VM while in the pgm critsect. + * - Other threads in the VM process that needs to make reliable use of + * the per-VM RAM tracking structures will enter the critsect. + * - No process external thread or kernel thread will ever try enter + * the pgm critical section, as that just won't work. + * - The idle thread (and similar threads) doesn't not need 100% reliable + * data when performing it tasks as the EMT thread will be the one to + * do the actual changes later anyway. So, as long as it only accesses + * the main ram range, it can do so by somehow preventing the VM from + * being destroyed while it works on it... + * + * - The over-commitment management, including the allocating/freeing + * chunks, is serialized by a ring-0 mutex lock (a fast one since the + * more mundane mutex implementation is broken on Linux). + * - A separate mutex is protecting the set of allocation chunks so + * that pages can be shared or/and freed up while some other VM is + * allocating more chunks. This mutex can be take from under the other + * one, but not the other way around. + * + * + * @section sec_pgmPhys_Request VM Request interface + * + * When in ring-0 it will become necessary to send requests to a VM so it can + * for instance move a page while defragmenting during VM destroy. The idle + * thread will make use of this interface to request VMs to setup shared + * pages and to perform write monitoring of pages. + * + * I would propose an interface similar to the current VMReq interface, similar + * in that it doesn't require locking and that the one sending the request may + * wait for completion if it wishes to. This shouldn't be very difficult to + * realize. + * + * The requests themselves are also pretty simple. They are basically: + * -# Check that some precondition is still true. + * -# Do the update. + * -# Update all shadow page tables involved with the page. + * + * The 3rd step is identical to what we're already doing when updating a + * physical handler, see pgmHandlerPhysicalSetRamFlagsAndFlushShadowPTs. + * + * + * + * @section sec_pgmPhys_MappingCaches Mapping Caches + * + * In order to be able to map in and out memory and to be able to support + * guest with more RAM than we've got virtual address space, we'll employing + * a mapping cache. Normally ring-0 and ring-3 can share the same cache, + * however on 32-bit darwin the ring-0 code is running in a different memory + * context and therefore needs a separate cache. In raw-mode context we also + * need a separate cache. The 32-bit darwin mapping cache and the one for + * raw-mode context share a lot of code, see PGMRZDYNMAP. + * + * + * @subsection subsec_pgmPhys_MappingCaches_R3 Ring-3 + * + * We've considered implementing the ring-3 mapping cache page based but found + * that this was bother some when one had to take into account TLBs+SMP and + * portability (missing the necessary APIs on several platforms). There were + * also some performance concerns with this approach which hadn't quite been + * worked out. + * + * Instead, we'll be mapping allocation chunks into the VM process. This simplifies + * matters greatly quite a bit since we don't need to invent any new ring-0 stuff, + * only some minor RTR0MEMOBJ mapping stuff. The main concern here is that mapping + * compared to the previous idea is that mapping or unmapping a 1MB chunk is more + * costly than a single page, although how much more costly is uncertain. We'll + * try address this by using a very big cache, preferably bigger than the actual + * VM RAM size if possible. The current VM RAM sizes should give some idea for + * 32-bit boxes, while on 64-bit we can probably get away with employing an + * unlimited cache. + * + * The cache have to parts, as already indicated, the ring-3 side and the + * ring-0 side. + * + * The ring-0 will be tied to the page allocator since it will operate on the + * memory objects it contains. It will therefore require the first ring-0 mutex + * discussed in @ref sec_pgmPhys_Serializing. We some double house keeping wrt + * to who has mapped what I think, since both VMMR0.r0 and RTR0MemObj will keep + * track of mapping relations + * + * The ring-3 part will be protected by the pgm critsect. For simplicity, we'll + * require anyone that desires to do changes to the mapping cache to do that + * from within this critsect. Alternatively, we could employ a separate critsect + * for serializing changes to the mapping cache as this would reduce potential + * contention with other threads accessing mappings unrelated to the changes + * that are in process. We can see about this later, contention will show + * up in the statistics anyway, so it'll be simple to tell. + * + * The organization of the ring-3 part will be very much like how the allocation + * chunks are organized in ring-0, that is in an AVL tree by chunk id. To avoid + * having to walk the tree all the time, we'll have a couple of lookaside entries + * like in we do for I/O ports and MMIO in IOM. + * + * The simplified flow of a PGMPhysRead/Write function: + * -# Enter the PGM critsect. + * -# Lookup GCPhys in the ram ranges and get the Page ID. + * -# Calc the Allocation Chunk ID from the Page ID. + * -# Check the lookaside entries and then the AVL tree for the Chunk ID. + * If not found in cache: + * -# Call ring-0 and request it to be mapped and supply + * a chunk to be unmapped if the cache is maxed out already. + * -# Insert the new mapping into the AVL tree (id + R3 address). + * -# Update the relevant lookaside entry and return the mapping address. + * -# Do the read/write according to monitoring flags and everything. + * -# Leave the critsect. + * + * + * @section sec_pgmPhys_Fallback Fallback + * + * Current all the "second tier" hosts will not support the RTR0MemObjAllocPhysNC + * API and thus require a fallback. + * + * So, when RTR0MemObjAllocPhysNC returns VERR_NOT_SUPPORTED the page allocator + * will return to the ring-3 caller (and later ring-0) and asking it to seed + * the page allocator with some fresh pages (VERR_GMM_SEED_ME). Ring-3 will + * then perform an SUPR3PageAlloc(cbChunk >> PAGE_SHIFT) call and make a + * "SeededAllocPages" call to ring-0. + * + * The first time ring-0 sees the VERR_NOT_SUPPORTED failure it will disable + * all page sharing (zero page detection will continue). It will also force + * all allocations to come from the VM which seeded the page. Both these + * measures are taken to make sure that there will never be any need for + * mapping anything into ring-3 - everything will be mapped already. + * + * Whether we'll continue to use the current MM locked memory management + * for this I don't quite know (I'd prefer not to and just ditch that all + * together), we'll see what's simplest to do. + * + * + * + * @section sec_pgmPhys_Changes Changes + * + * Breakdown of the changes involved? + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PGM +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef VBOX_WITH_REM +# include +#endif +#include +#include +#include +#include "PGMInternal.h" +#include +#include +#include "PGMInline.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * Argument package for pgmR3RElocatePhysHnadler, pgmR3RelocateVirtHandler and + * pgmR3RelocateHyperVirtHandler. + */ +typedef struct PGMRELOCHANDLERARGS +{ + RTGCINTPTR offDelta; + PVM pVM; +} PGMRELOCHANDLERARGS; +/** Pointer to a page access handlere relocation argument package. */ +typedef PGMRELOCHANDLERARGS const *PCPGMRELOCHANDLERARGS; + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static int pgmR3InitPaging(PVM pVM); +static int pgmR3InitStats(PVM pVM); +static DECLCALLBACK(void) pgmR3PhysInfo(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); +static DECLCALLBACK(void) pgmR3InfoMode(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); +static DECLCALLBACK(void) pgmR3InfoCr3(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); +static DECLCALLBACK(int) pgmR3RelocatePhysHandler(PAVLROGCPHYSNODECORE pNode, void *pvUser); +#ifdef VBOX_WITH_RAW_MODE +static DECLCALLBACK(int) pgmR3RelocateVirtHandler(PAVLROGCPTRNODECORE pNode, void *pvUser); +static DECLCALLBACK(int) pgmR3RelocateHyperVirtHandler(PAVLROGCPTRNODECORE pNode, void *pvUser); +#endif /* VBOX_WITH_RAW_MODE */ +#ifdef VBOX_STRICT +static FNVMATSTATE pgmR3ResetNoMorePhysWritesFlag; +#endif + +#ifdef VBOX_WITH_DEBUGGER +static FNDBGCCMD pgmR3CmdError; +static FNDBGCCMD pgmR3CmdSync; +static FNDBGCCMD pgmR3CmdSyncAlways; +# ifdef VBOX_STRICT +static FNDBGCCMD pgmR3CmdAssertCR3; +# endif +static FNDBGCCMD pgmR3CmdPhysToFile; +#endif + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +#ifdef VBOX_WITH_DEBUGGER +/** Argument descriptors for '.pgmerror' and '.pgmerroroff'. */ +static const DBGCVARDESC g_aPgmErrorArgs[] = +{ + /* cTimesMin, cTimesMax, enmCategory, fFlags, pszName, pszDescription */ + { 0, 1, DBGCVAR_CAT_STRING, 0, "where", "Error injection location." }, +}; + +static const DBGCVARDESC g_aPgmPhysToFileArgs[] = +{ + /* cTimesMin, cTimesMax, enmCategory, fFlags, pszName, pszDescription */ + { 1, 1, DBGCVAR_CAT_STRING, 0, "file", "The file name." }, + { 0, 1, DBGCVAR_CAT_STRING, 0, "nozero", "If present, zero pages are skipped." }, +}; + +# ifdef DEBUG_sandervl +static const DBGCVARDESC g_aPgmCountPhysWritesArgs[] = +{ + /* cTimesMin, cTimesMax, enmCategory, fFlags, pszName, pszDescription */ + { 1, 1, DBGCVAR_CAT_STRING, 0, "enabled", "on/off." }, + { 1, 1, DBGCVAR_CAT_NUMBER_NO_RANGE, 0, "interval", "Interval in ms." }, +}; +# endif + +/** Command descriptors. */ +static const DBGCCMD g_aCmds[] = +{ + /* pszCmd, cArgsMin, cArgsMax, paArgDesc, cArgDescs, fFlags, pfnHandler pszSyntax, ....pszDescription */ + { "pgmsync", 0, 0, NULL, 0, 0, pgmR3CmdSync, "", "Sync the CR3 page." }, + { "pgmerror", 0, 1, &g_aPgmErrorArgs[0], 1, 0, pgmR3CmdError, "", "Enables inject runtime of errors into parts of PGM." }, + { "pgmerroroff", 0, 1, &g_aPgmErrorArgs[0], 1, 0, pgmR3CmdError, "", "Disables inject runtime errors into parts of PGM." }, +# ifdef VBOX_STRICT + { "pgmassertcr3", 0, 0, NULL, 0, 0, pgmR3CmdAssertCR3, "", "Check the shadow CR3 mapping." }, +# ifdef VBOX_WITH_PAGE_SHARING + { "pgmcheckduppages", 0, 0, NULL, 0, 0, pgmR3CmdCheckDuplicatePages, "", "Check for duplicate pages in all running VMs." }, + { "pgmsharedmodules", 0, 0, NULL, 0, 0, pgmR3CmdShowSharedModules, "", "Print shared modules info." }, +# endif +# endif + { "pgmsyncalways", 0, 0, NULL, 0, 0, pgmR3CmdSyncAlways, "", "Toggle permanent CR3 syncing." }, + { "pgmphystofile", 1, 2, &g_aPgmPhysToFileArgs[0], 2, 0, pgmR3CmdPhysToFile, "", "Save the physical memory to file." }, +}; +#endif + + + + +/** + * Initiates the paging of VM. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(int) PGMR3Init(PVM pVM) +{ + LogFlow(("PGMR3Init:\n")); + PCFGMNODE pCfgPGM = CFGMR3GetChild(CFGMR3GetRoot(pVM), "/PGM"); + int rc; + + /* + * Assert alignment and sizes. + */ + AssertCompile(sizeof(pVM->pgm.s) <= sizeof(pVM->pgm.padding)); + AssertCompile(sizeof(pVM->aCpus[0].pgm.s) <= sizeof(pVM->aCpus[0].pgm.padding)); + AssertCompileMemberAlignment(PGM, CritSectX, sizeof(uintptr_t)); + + /* + * Init the structure. + */ + pVM->pgm.s.offVM = RT_UOFFSETOF(VM, pgm.s); + pVM->pgm.s.offVCpuPGM = RT_UOFFSETOF(VMCPU, pgm.s); + /*pVM->pgm.s.fRestoreRomPagesAtReset = false;*/ + + for (unsigned i = 0; i < RT_ELEMENTS(pVM->pgm.s.aHandyPages); i++) + { + pVM->pgm.s.aHandyPages[i].HCPhysGCPhys = NIL_RTHCPHYS; + pVM->pgm.s.aHandyPages[i].idPage = NIL_GMM_PAGEID; + pVM->pgm.s.aHandyPages[i].idSharedPage = NIL_GMM_PAGEID; + } + + for (unsigned i = 0; i < RT_ELEMENTS(pVM->pgm.s.aLargeHandyPage); i++) + { + pVM->pgm.s.aLargeHandyPage[i].HCPhysGCPhys = NIL_RTHCPHYS; + pVM->pgm.s.aLargeHandyPage[i].idPage = NIL_GMM_PAGEID; + pVM->pgm.s.aLargeHandyPage[i].idSharedPage = NIL_GMM_PAGEID; + } + + /* Init the per-CPU part. */ + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[idCpu]; + PPGMCPU pPGM = &pVCpu->pgm.s; + + pPGM->offVM = (uintptr_t)&pVCpu->pgm.s - (uintptr_t)pVM; + pPGM->offVCpu = RT_UOFFSETOF(VMCPU, pgm.s); + pPGM->offPGM = (uintptr_t)&pVCpu->pgm.s - (uintptr_t)&pVM->pgm.s; + + pPGM->enmShadowMode = PGMMODE_INVALID; + pPGM->enmGuestMode = PGMMODE_INVALID; + pPGM->idxGuestModeData = UINT8_MAX; + pPGM->idxShadowModeData = UINT8_MAX; + pPGM->idxBothModeData = UINT8_MAX; + + pPGM->GCPhysCR3 = NIL_RTGCPHYS; + + pPGM->pGst32BitPdR3 = NULL; + pPGM->pGstPaePdptR3 = NULL; + pPGM->pGstAmd64Pml4R3 = NULL; +#ifndef VBOX_WITH_2X_4GB_ADDR_SPACE + pPGM->pGst32BitPdR0 = NIL_RTR0PTR; + pPGM->pGstPaePdptR0 = NIL_RTR0PTR; + pPGM->pGstAmd64Pml4R0 = NIL_RTR0PTR; +#endif + pPGM->pGst32BitPdRC = NIL_RTRCPTR; + pPGM->pGstPaePdptRC = NIL_RTRCPTR; + for (unsigned i = 0; i < RT_ELEMENTS(pVCpu->pgm.s.apGstPaePDsR3); i++) + { + pPGM->apGstPaePDsR3[i] = NULL; +#ifndef VBOX_WITH_2X_4GB_ADDR_SPACE + pPGM->apGstPaePDsR0[i] = NIL_RTR0PTR; +#endif + pPGM->apGstPaePDsRC[i] = NIL_RTRCPTR; + pPGM->aGCPhysGstPaePDs[i] = NIL_RTGCPHYS; + pPGM->aGstPaePdpeRegs[i].u = UINT64_MAX; + pPGM->aGCPhysGstPaePDsMonitored[i] = NIL_RTGCPHYS; + } + + pPGM->fA20Enabled = true; + pPGM->GCPhysA20Mask = ~((RTGCPHYS)!pPGM->fA20Enabled << 20); + } + + pVM->pgm.s.enmHostMode = SUPPAGINGMODE_INVALID; + pVM->pgm.s.GCPhys4MBPSEMask = RT_BIT_64(32) - 1; /* default; checked later */ + pVM->pgm.s.GCPtrPrevRamRangeMapping = MM_HYPER_AREA_ADDRESS; + + rc = CFGMR3QueryBoolDef(CFGMR3GetRoot(pVM), "RamPreAlloc", &pVM->pgm.s.fRamPreAlloc, +#ifdef VBOX_WITH_PREALLOC_RAM_BY_DEFAULT + true +#else + false +#endif + ); + AssertLogRelRCReturn(rc, rc); + +#if HC_ARCH_BITS == 32 +# ifdef RT_OS_DARWIN + rc = CFGMR3QueryU32Def(pCfgPGM, "MaxRing3Chunks", &pVM->pgm.s.ChunkR3Map.cMax, _1G / GMM_CHUNK_SIZE * 3); +# else + rc = CFGMR3QueryU32Def(pCfgPGM, "MaxRing3Chunks", &pVM->pgm.s.ChunkR3Map.cMax, _1G / GMM_CHUNK_SIZE); +# endif +#else + rc = CFGMR3QueryU32Def(pCfgPGM, "MaxRing3Chunks", &pVM->pgm.s.ChunkR3Map.cMax, UINT32_MAX); +#endif + AssertLogRelRCReturn(rc, rc); + for (uint32_t i = 0; i < RT_ELEMENTS(pVM->pgm.s.ChunkR3Map.Tlb.aEntries); i++) + pVM->pgm.s.ChunkR3Map.Tlb.aEntries[i].idChunk = NIL_GMM_CHUNKID; + + /* + * Get the configured RAM size - to estimate saved state size. + */ + uint64_t cbRam; + rc = CFGMR3QueryU64(CFGMR3GetRoot(pVM), "RamSize", &cbRam); + if (rc == VERR_CFGM_VALUE_NOT_FOUND) + cbRam = 0; + else if (RT_SUCCESS(rc)) + { + if (cbRam < PAGE_SIZE) + cbRam = 0; + cbRam = RT_ALIGN_64(cbRam, PAGE_SIZE); + } + else + { + AssertMsgFailed(("Configuration error: Failed to query integer \"RamSize\", rc=%Rrc.\n", rc)); + return rc; + } + + /* + * Check for PCI pass-through and other configurables. + */ + rc = CFGMR3QueryBoolDef(pCfgPGM, "PciPassThrough", &pVM->pgm.s.fPciPassthrough, false); + AssertMsgRCReturn(rc, ("Configuration error: Failed to query integer \"PciPassThrough\", rc=%Rrc.\n", rc), rc); + AssertLogRelReturn(!pVM->pgm.s.fPciPassthrough || pVM->pgm.s.fRamPreAlloc, VERR_INVALID_PARAMETER); + + rc = CFGMR3QueryBoolDef(CFGMR3GetRoot(pVM), "PageFusionAllowed", &pVM->pgm.s.fPageFusionAllowed, false); + AssertLogRelRCReturn(rc, rc); + + /** @cfgm{/PGM/ZeroRamPagesOnReset, boolean, true} + * Whether to clear RAM pages on (hard) reset. */ + rc = CFGMR3QueryBoolDef(pCfgPGM, "ZeroRamPagesOnReset", &pVM->pgm.s.fZeroRamPagesOnReset, true); + AssertLogRelRCReturn(rc, rc); + +#ifdef VBOX_WITH_STATISTICS + /* + * Allocate memory for the statistics before someone tries to use them. + */ + size_t cbTotalStats = RT_ALIGN_Z(sizeof(PGMSTATS), 64) + RT_ALIGN_Z(sizeof(PGMCPUSTATS), 64) * pVM->cCpus; + void *pv; + rc = MMHyperAlloc(pVM, RT_ALIGN_Z(cbTotalStats, PAGE_SIZE), PAGE_SIZE, MM_TAG_PGM, &pv); + AssertRCReturn(rc, rc); + + pVM->pgm.s.pStatsR3 = (PGMSTATS *)pv; + pVM->pgm.s.pStatsR0 = MMHyperCCToR0(pVM, pv); + pVM->pgm.s.pStatsRC = MMHyperCCToRC(pVM, pv); + pv = (uint8_t *)pv + RT_ALIGN_Z(sizeof(PGMSTATS), 64); + + for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++) + { + pVM->aCpus[iCpu].pgm.s.pStatsR3 = (PGMCPUSTATS *)pv; + pVM->aCpus[iCpu].pgm.s.pStatsR0 = MMHyperCCToR0(pVM, pv); + pVM->aCpus[iCpu].pgm.s.pStatsRC = MMHyperCCToRC(pVM, pv); + + pv = (uint8_t *)pv + RT_ALIGN_Z(sizeof(PGMCPUSTATS), 64); + } +#endif /* VBOX_WITH_STATISTICS */ + + /* + * Register callbacks, string formatters and the saved state data unit. + */ +#ifdef VBOX_STRICT + VMR3AtStateRegister(pVM->pUVM, pgmR3ResetNoMorePhysWritesFlag, NULL); +#endif + PGMRegisterStringFormatTypes(); + + rc = pgmR3InitSavedState(pVM, cbRam); + if (RT_FAILURE(rc)) + return rc; + + /* + * Initialize the PGM critical section and flush the phys TLBs + */ + rc = PDMR3CritSectInit(pVM, &pVM->pgm.s.CritSectX, RT_SRC_POS, "PGM"); + AssertRCReturn(rc, rc); + + PGMR3PhysChunkInvalidateTLB(pVM); + pgmPhysInvalidatePageMapTLB(pVM); + + /* + * For the time being we sport a full set of handy pages in addition to the base + * memory to simplify things. + */ + rc = MMR3ReserveHandyPages(pVM, RT_ELEMENTS(pVM->pgm.s.aHandyPages)); /** @todo this should be changed to PGM_HANDY_PAGES_MIN but this needs proper testing... */ + AssertRCReturn(rc, rc); + + /* + * Trees + */ + rc = MMHyperAlloc(pVM, sizeof(PGMTREES), 0, MM_TAG_PGM, (void **)&pVM->pgm.s.pTreesR3); + if (RT_SUCCESS(rc)) + { + pVM->pgm.s.pTreesR0 = MMHyperR3ToR0(pVM, pVM->pgm.s.pTreesR3); + pVM->pgm.s.pTreesRC = MMHyperR3ToRC(pVM, pVM->pgm.s.pTreesR3); + } + + /* + * Allocate the zero page. + */ + if (RT_SUCCESS(rc)) + { + rc = MMHyperAlloc(pVM, PAGE_SIZE, PAGE_SIZE, MM_TAG_PGM, &pVM->pgm.s.pvZeroPgR3); + if (RT_SUCCESS(rc)) + { + pVM->pgm.s.pvZeroPgRC = MMHyperR3ToRC(pVM, pVM->pgm.s.pvZeroPgR3); + pVM->pgm.s.pvZeroPgR0 = MMHyperR3ToR0(pVM, pVM->pgm.s.pvZeroPgR3); + pVM->pgm.s.HCPhysZeroPg = MMR3HyperHCVirt2HCPhys(pVM, pVM->pgm.s.pvZeroPgR3); + AssertRelease(pVM->pgm.s.HCPhysZeroPg != NIL_RTHCPHYS); + } + } + + /* + * Allocate the invalid MMIO page. + * (The invalid bits in HCPhysInvMmioPg are set later on init complete.) + */ + if (RT_SUCCESS(rc)) + { + rc = MMHyperAlloc(pVM, PAGE_SIZE, PAGE_SIZE, MM_TAG_PGM, &pVM->pgm.s.pvMmioPgR3); + if (RT_SUCCESS(rc)) + { + ASMMemFill32(pVM->pgm.s.pvMmioPgR3, PAGE_SIZE, 0xfeedface); + pVM->pgm.s.HCPhysMmioPg = MMR3HyperHCVirt2HCPhys(pVM, pVM->pgm.s.pvMmioPgR3); + AssertRelease(pVM->pgm.s.HCPhysMmioPg != NIL_RTHCPHYS); + pVM->pgm.s.HCPhysInvMmioPg = pVM->pgm.s.HCPhysMmioPg; + } + } + + /* + * Register the physical access handler protecting ROMs. + */ + if (RT_SUCCESS(rc)) + rc = PGMR3HandlerPhysicalTypeRegister(pVM, PGMPHYSHANDLERKIND_WRITE, + pgmPhysRomWriteHandler, + NULL, NULL, "pgmPhysRomWritePfHandler", + NULL, NULL, "pgmPhysRomWritePfHandler", + "ROM write protection", + &pVM->pgm.s.hRomPhysHandlerType); + + /* + * Init the paging. + */ + if (RT_SUCCESS(rc)) + rc = pgmR3InitPaging(pVM); + + /* + * Init the page pool. + */ + if (RT_SUCCESS(rc)) + rc = pgmR3PoolInit(pVM); + + if (RT_SUCCESS(rc)) + { + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + rc = PGMHCChangeMode(pVM, pVCpu, PGMMODE_REAL); + if (RT_FAILURE(rc)) + break; + } + } + + if (RT_SUCCESS(rc)) + { + /* + * Info & statistics + */ + DBGFR3InfoRegisterInternalEx(pVM, "mode", + "Shows the current paging mode. " + "Recognizes 'all', 'guest', 'shadow' and 'host' as arguments, defaulting to 'all' if nothing is given.", + pgmR3InfoMode, + DBGFINFO_FLAGS_ALL_EMTS); + DBGFR3InfoRegisterInternal(pVM, "pgmcr3", + "Dumps all the entries in the top level paging table. No arguments.", + pgmR3InfoCr3); + DBGFR3InfoRegisterInternal(pVM, "phys", + "Dumps all the physical address ranges. Pass 'verbose' to get more details.", + pgmR3PhysInfo); + DBGFR3InfoRegisterInternal(pVM, "handlers", + "Dumps physical, virtual and hyper virtual handlers. " + "Pass 'phys', 'virt', 'hyper' as argument if only one kind is wanted." + "Add 'nost' if the statistics are unwanted, use together with 'all' or explicit selection.", + pgmR3InfoHandlers); + DBGFR3InfoRegisterInternal(pVM, "mappings", + "Dumps guest mappings.", + pgmR3MapInfo); + + pgmR3InitStats(pVM); + +#ifdef VBOX_WITH_DEBUGGER + /* + * Debugger commands. + */ + static bool s_fRegisteredCmds = false; + if (!s_fRegisteredCmds) + { + int rc2 = DBGCRegisterCommands(&g_aCmds[0], RT_ELEMENTS(g_aCmds)); + if (RT_SUCCESS(rc2)) + s_fRegisteredCmds = true; + } +#endif + return VINF_SUCCESS; + } + + /* Almost no cleanup necessary, MM frees all memory. */ + PDMR3CritSectDelete(&pVM->pgm.s.CritSectX); + + return rc; +} + + +/** + * Init paging. + * + * Since we need to check what mode the host is operating in before we can choose + * the right paging functions for the host we have to delay this until R0 has + * been initialized. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +static int pgmR3InitPaging(PVM pVM) +{ + /* + * Force a recalculation of modes and switcher so everyone gets notified. + */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + + pVCpu->pgm.s.enmShadowMode = PGMMODE_INVALID; + pVCpu->pgm.s.enmGuestMode = PGMMODE_INVALID; + pVCpu->pgm.s.idxGuestModeData = UINT8_MAX; + pVCpu->pgm.s.idxShadowModeData = UINT8_MAX; + pVCpu->pgm.s.idxBothModeData = UINT8_MAX; + } + + pVM->pgm.s.enmHostMode = SUPPAGINGMODE_INVALID; + + /* + * Allocate static mapping space for whatever the cr3 register + * points to and in the case of PAE mode to the 4 PDs. + */ + int rc = MMR3HyperReserve(pVM, PAGE_SIZE * 5, "CR3 mapping", &pVM->pgm.s.GCPtrCR3Mapping); + if (RT_FAILURE(rc)) + { + AssertMsgFailed(("Failed to reserve two pages for cr mapping in HMA, rc=%Rrc\n", rc)); + return rc; + } + MMR3HyperReserve(pVM, PAGE_SIZE, "fence", NULL); + + /* + * Allocate pages for the three possible intermediate contexts + * (AMD64, PAE and plain 32-Bit). We maintain all three contexts + * for the sake of simplicity. The AMD64 uses the PAE for the + * lower levels, making the total number of pages 11 (3 + 7 + 1). + * + * We assume that two page tables will be enought for the core code + * mappings (HC virtual and identity). + */ + pVM->pgm.s.pInterPD = (PX86PD)MMR3PageAllocLow(pVM); AssertReturn(pVM->pgm.s.pInterPD, VERR_NO_PAGE_MEMORY); + pVM->pgm.s.apInterPTs[0] = (PX86PT)MMR3PageAllocLow(pVM); AssertReturn(pVM->pgm.s.apInterPTs[0], VERR_NO_PAGE_MEMORY); + pVM->pgm.s.apInterPTs[1] = (PX86PT)MMR3PageAllocLow(pVM); AssertReturn(pVM->pgm.s.apInterPTs[1], VERR_NO_PAGE_MEMORY); + pVM->pgm.s.apInterPaePTs[0] = (PX86PTPAE)MMR3PageAlloc(pVM); AssertReturn(pVM->pgm.s.apInterPaePTs[0], VERR_NO_PAGE_MEMORY); + pVM->pgm.s.apInterPaePTs[1] = (PX86PTPAE)MMR3PageAlloc(pVM); AssertReturn(pVM->pgm.s.apInterPaePTs[1], VERR_NO_PAGE_MEMORY); + pVM->pgm.s.apInterPaePDs[0] = (PX86PDPAE)MMR3PageAlloc(pVM); AssertReturn(pVM->pgm.s.apInterPaePDs[0], VERR_NO_PAGE_MEMORY); + pVM->pgm.s.apInterPaePDs[1] = (PX86PDPAE)MMR3PageAlloc(pVM); AssertReturn(pVM->pgm.s.apInterPaePDs[1], VERR_NO_PAGE_MEMORY); + pVM->pgm.s.apInterPaePDs[2] = (PX86PDPAE)MMR3PageAlloc(pVM); AssertReturn(pVM->pgm.s.apInterPaePDs[2], VERR_NO_PAGE_MEMORY); + pVM->pgm.s.apInterPaePDs[3] = (PX86PDPAE)MMR3PageAlloc(pVM); AssertReturn(pVM->pgm.s.apInterPaePDs[3], VERR_NO_PAGE_MEMORY); + pVM->pgm.s.pInterPaePDPT = (PX86PDPT)MMR3PageAllocLow(pVM); AssertReturn(pVM->pgm.s.pInterPaePDPT, VERR_NO_PAGE_MEMORY); + pVM->pgm.s.pInterPaePDPT64 = (PX86PDPT)MMR3PageAllocLow(pVM); AssertReturn(pVM->pgm.s.pInterPaePDPT64, VERR_NO_PAGE_MEMORY); + pVM->pgm.s.pInterPaePML4 = (PX86PML4)MMR3PageAllocLow(pVM); AssertReturn(pVM->pgm.s.pInterPaePML4, VERR_NO_PAGE_MEMORY); + + pVM->pgm.s.HCPhysInterPD = MMPage2Phys(pVM, pVM->pgm.s.pInterPD); + AssertRelease(pVM->pgm.s.HCPhysInterPD != NIL_RTHCPHYS && !(pVM->pgm.s.HCPhysInterPD & PAGE_OFFSET_MASK)); + pVM->pgm.s.HCPhysInterPaePDPT = MMPage2Phys(pVM, pVM->pgm.s.pInterPaePDPT); + AssertRelease(pVM->pgm.s.HCPhysInterPaePDPT != NIL_RTHCPHYS && !(pVM->pgm.s.HCPhysInterPaePDPT & PAGE_OFFSET_MASK)); + pVM->pgm.s.HCPhysInterPaePML4 = MMPage2Phys(pVM, pVM->pgm.s.pInterPaePML4); + AssertRelease(pVM->pgm.s.HCPhysInterPaePML4 != NIL_RTHCPHYS && !(pVM->pgm.s.HCPhysInterPaePML4 & PAGE_OFFSET_MASK) && pVM->pgm.s.HCPhysInterPaePML4 < 0xffffffff); + + /* + * Initialize the pages, setting up the PML4 and PDPT for repetitive 4GB action. + */ + ASMMemZeroPage(pVM->pgm.s.pInterPD); + ASMMemZeroPage(pVM->pgm.s.apInterPTs[0]); + ASMMemZeroPage(pVM->pgm.s.apInterPTs[1]); + + ASMMemZeroPage(pVM->pgm.s.apInterPaePTs[0]); + ASMMemZeroPage(pVM->pgm.s.apInterPaePTs[1]); + + ASMMemZeroPage(pVM->pgm.s.pInterPaePDPT); + for (unsigned i = 0; i < RT_ELEMENTS(pVM->pgm.s.apInterPaePDs); i++) + { + ASMMemZeroPage(pVM->pgm.s.apInterPaePDs[i]); + pVM->pgm.s.pInterPaePDPT->a[i].u = X86_PDPE_P | PGM_PLXFLAGS_PERMANENT + | MMPage2Phys(pVM, pVM->pgm.s.apInterPaePDs[i]); + } + + for (unsigned i = 0; i < RT_ELEMENTS(pVM->pgm.s.pInterPaePDPT64->a); i++) + { + const unsigned iPD = i % RT_ELEMENTS(pVM->pgm.s.apInterPaePDs); + pVM->pgm.s.pInterPaePDPT64->a[i].u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A | PGM_PLXFLAGS_PERMANENT + | MMPage2Phys(pVM, pVM->pgm.s.apInterPaePDs[iPD]); + } + + RTHCPHYS HCPhysInterPaePDPT64 = MMPage2Phys(pVM, pVM->pgm.s.pInterPaePDPT64); + for (unsigned i = 0; i < RT_ELEMENTS(pVM->pgm.s.pInterPaePML4->a); i++) + pVM->pgm.s.pInterPaePML4->a[i].u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A | PGM_PLXFLAGS_PERMANENT + | HCPhysInterPaePDPT64; + + /* + * Initialize paging workers and mode from current host mode + * and the guest running in real mode. + */ + pVM->pgm.s.enmHostMode = SUPR3GetPagingMode(); + switch (pVM->pgm.s.enmHostMode) + { + case SUPPAGINGMODE_32_BIT: + case SUPPAGINGMODE_32_BIT_GLOBAL: + case SUPPAGINGMODE_PAE: + case SUPPAGINGMODE_PAE_GLOBAL: + case SUPPAGINGMODE_PAE_NX: + case SUPPAGINGMODE_PAE_GLOBAL_NX: + break; + + case SUPPAGINGMODE_AMD64: + case SUPPAGINGMODE_AMD64_GLOBAL: + case SUPPAGINGMODE_AMD64_NX: + case SUPPAGINGMODE_AMD64_GLOBAL_NX: + if (ARCH_BITS != 64) + { + AssertMsgFailed(("Host mode %d (64-bit) is not supported by non-64bit builds\n", pVM->pgm.s.enmHostMode)); + LogRel(("PGM: Host mode %d (64-bit) is not supported by non-64bit builds\n", pVM->pgm.s.enmHostMode)); + return VERR_PGM_UNSUPPORTED_HOST_PAGING_MODE; + } + break; + default: + AssertMsgFailed(("Host mode %d is not supported\n", pVM->pgm.s.enmHostMode)); + return VERR_PGM_UNSUPPORTED_HOST_PAGING_MODE; + } + + LogFlow(("pgmR3InitPaging: returns successfully\n")); +#if HC_ARCH_BITS == 64 + LogRel(("PGM: HCPhysInterPD=%RHp HCPhysInterPaePDPT=%RHp HCPhysInterPaePML4=%RHp\n", + pVM->pgm.s.HCPhysInterPD, pVM->pgm.s.HCPhysInterPaePDPT, pVM->pgm.s.HCPhysInterPaePML4)); + LogRel(("PGM: apInterPTs={%RHp,%RHp} apInterPaePTs={%RHp,%RHp} apInterPaePDs={%RHp,%RHp,%RHp,%RHp} pInterPaePDPT64=%RHp\n", + MMPage2Phys(pVM, pVM->pgm.s.apInterPTs[0]), MMPage2Phys(pVM, pVM->pgm.s.apInterPTs[1]), + MMPage2Phys(pVM, pVM->pgm.s.apInterPaePTs[0]), MMPage2Phys(pVM, pVM->pgm.s.apInterPaePTs[1]), + MMPage2Phys(pVM, pVM->pgm.s.apInterPaePDs[0]), MMPage2Phys(pVM, pVM->pgm.s.apInterPaePDs[1]), MMPage2Phys(pVM, pVM->pgm.s.apInterPaePDs[2]), MMPage2Phys(pVM, pVM->pgm.s.apInterPaePDs[3]), + MMPage2Phys(pVM, pVM->pgm.s.pInterPaePDPT64))); +#endif + + /* + * Log the host paging mode. It may come in handy. + */ + const char *pszHostMode; + switch (pVM->pgm.s.enmHostMode) + { + case SUPPAGINGMODE_32_BIT: pszHostMode = "32-bit"; break; + case SUPPAGINGMODE_32_BIT_GLOBAL: pszHostMode = "32-bit+PGE"; break; + case SUPPAGINGMODE_PAE: pszHostMode = "PAE"; break; + case SUPPAGINGMODE_PAE_GLOBAL: pszHostMode = "PAE+PGE"; break; + case SUPPAGINGMODE_PAE_NX: pszHostMode = "PAE+NXE"; break; + case SUPPAGINGMODE_PAE_GLOBAL_NX: pszHostMode = "PAE+PGE+NXE"; break; + case SUPPAGINGMODE_AMD64: pszHostMode = "AMD64"; break; + case SUPPAGINGMODE_AMD64_GLOBAL: pszHostMode = "AMD64+PGE"; break; + case SUPPAGINGMODE_AMD64_NX: pszHostMode = "AMD64+NX"; break; + case SUPPAGINGMODE_AMD64_GLOBAL_NX: pszHostMode = "AMD64+PGE+NX"; break; + default: pszHostMode = "???"; break; + } + LogRel(("PGM: Host paging mode: %s\n", pszHostMode)); + + return VINF_SUCCESS; +} + + +/** + * Init statistics + * @returns VBox status code. + */ +static int pgmR3InitStats(PVM pVM) +{ + PPGM pPGM = &pVM->pgm.s; + int rc; + + /* + * Release statistics. + */ + /* Common - misc variables */ + STAM_REL_REG(pVM, &pPGM->cAllPages, STAMTYPE_U32, "/PGM/Page/cAllPages", STAMUNIT_COUNT, "The total number of pages."); + STAM_REL_REG(pVM, &pPGM->cPrivatePages, STAMTYPE_U32, "/PGM/Page/cPrivatePages", STAMUNIT_COUNT, "The number of private pages."); + STAM_REL_REG(pVM, &pPGM->cSharedPages, STAMTYPE_U32, "/PGM/Page/cSharedPages", STAMUNIT_COUNT, "The number of shared pages."); + STAM_REL_REG(pVM, &pPGM->cReusedSharedPages, STAMTYPE_U32, "/PGM/Page/cReusedSharedPages", STAMUNIT_COUNT, "The number of reused shared pages."); + STAM_REL_REG(pVM, &pPGM->cZeroPages, STAMTYPE_U32, "/PGM/Page/cZeroPages", STAMUNIT_COUNT, "The number of zero backed pages."); + STAM_REL_REG(pVM, &pPGM->cPureMmioPages, STAMTYPE_U32, "/PGM/Page/cPureMmioPages", STAMUNIT_COUNT, "The number of pure MMIO pages."); + STAM_REL_REG(pVM, &pPGM->cMonitoredPages, STAMTYPE_U32, "/PGM/Page/cMonitoredPages", STAMUNIT_COUNT, "The number of write monitored pages."); + STAM_REL_REG(pVM, &pPGM->cWrittenToPages, STAMTYPE_U32, "/PGM/Page/cWrittenToPages", STAMUNIT_COUNT, "The number of previously write monitored pages that have been written to."); + STAM_REL_REG(pVM, &pPGM->cWriteLockedPages, STAMTYPE_U32, "/PGM/Page/cWriteLockedPages", STAMUNIT_COUNT, "The number of write(/read) locked pages."); + STAM_REL_REG(pVM, &pPGM->cReadLockedPages, STAMTYPE_U32, "/PGM/Page/cReadLockedPages", STAMUNIT_COUNT, "The number of read (only) locked pages."); + STAM_REL_REG(pVM, &pPGM->cBalloonedPages, STAMTYPE_U32, "/PGM/Page/cBalloonedPages", STAMUNIT_COUNT, "The number of ballooned pages."); + STAM_REL_REG(pVM, &pPGM->cHandyPages, STAMTYPE_U32, "/PGM/Page/cHandyPages", STAMUNIT_COUNT, "The number of handy pages (not included in cAllPages)."); + STAM_REL_REG(pVM, &pPGM->cLargePages, STAMTYPE_U32, "/PGM/Page/cLargePages", STAMUNIT_COUNT, "The number of large pages allocated (includes disabled)."); + STAM_REL_REG(pVM, &pPGM->cLargePagesDisabled, STAMTYPE_U32, "/PGM/Page/cLargePagesDisabled", STAMUNIT_COUNT, "The number of disabled large pages."); + STAM_REL_REG(pVM, &pPGM->cRelocations, STAMTYPE_COUNTER, "/PGM/cRelocations", STAMUNIT_OCCURENCES,"Number of hypervisor relocations."); + STAM_REL_REG(pVM, &pPGM->ChunkR3Map.c, STAMTYPE_U32, "/PGM/ChunkR3Map/c", STAMUNIT_COUNT, "Number of mapped chunks."); + STAM_REL_REG(pVM, &pPGM->ChunkR3Map.cMax, STAMTYPE_U32, "/PGM/ChunkR3Map/cMax", STAMUNIT_COUNT, "Maximum number of mapped chunks."); + STAM_REL_REG(pVM, &pPGM->cMappedChunks, STAMTYPE_U32, "/PGM/ChunkR3Map/Mapped", STAMUNIT_COUNT, "Number of times we mapped a chunk."); + STAM_REL_REG(pVM, &pPGM->cUnmappedChunks, STAMTYPE_U32, "/PGM/ChunkR3Map/Unmapped", STAMUNIT_COUNT, "Number of times we unmapped a chunk."); + + STAM_REL_REG(pVM, &pPGM->StatLargePageReused, STAMTYPE_COUNTER, "/PGM/LargePage/Reused", STAMUNIT_OCCURENCES, "The number of times we've reused a large page."); + STAM_REL_REG(pVM, &pPGM->StatLargePageRefused, STAMTYPE_COUNTER, "/PGM/LargePage/Refused", STAMUNIT_OCCURENCES, "The number of times we couldn't use a large page."); + STAM_REL_REG(pVM, &pPGM->StatLargePageRecheck, STAMTYPE_COUNTER, "/PGM/LargePage/Recheck", STAMUNIT_OCCURENCES, "The number of times we've rechecked a disabled large page."); + + STAM_REL_REG(pVM, &pPGM->StatShModCheck, STAMTYPE_PROFILE, "/PGM/ShMod/Check", STAMUNIT_TICKS_PER_CALL, "Profiles the shared module checking."); + + /* Live save */ + STAM_REL_REG_USED(pVM, &pPGM->LiveSave.fActive, STAMTYPE_U8, "/PGM/LiveSave/fActive", STAMUNIT_COUNT, "Active or not."); + STAM_REL_REG_USED(pVM, &pPGM->LiveSave.cIgnoredPages, STAMTYPE_U32, "/PGM/LiveSave/cIgnoredPages", STAMUNIT_COUNT, "The number of ignored pages in the RAM ranges (i.e. MMIO, MMIO2 and ROM)."); + STAM_REL_REG_USED(pVM, &pPGM->LiveSave.cDirtyPagesLong, STAMTYPE_U32, "/PGM/LiveSave/cDirtyPagesLong", STAMUNIT_COUNT, "Longer term dirty page average."); + STAM_REL_REG_USED(pVM, &pPGM->LiveSave.cDirtyPagesShort, STAMTYPE_U32, "/PGM/LiveSave/cDirtyPagesShort", STAMUNIT_COUNT, "Short term dirty page average."); + STAM_REL_REG_USED(pVM, &pPGM->LiveSave.cPagesPerSecond, STAMTYPE_U32, "/PGM/LiveSave/cPagesPerSecond", STAMUNIT_COUNT, "Pages per second."); + STAM_REL_REG_USED(pVM, &pPGM->LiveSave.cSavedPages, STAMTYPE_U64, "/PGM/LiveSave/cSavedPages", STAMUNIT_COUNT, "The total number of saved pages."); + STAM_REL_REG_USED(pVM, &pPGM->LiveSave.Ram.cReadyPages, STAMTYPE_U32, "/PGM/LiveSave/Ram/cReadPages", STAMUNIT_COUNT, "RAM: Ready pages."); + STAM_REL_REG_USED(pVM, &pPGM->LiveSave.Ram.cDirtyPages, STAMTYPE_U32, "/PGM/LiveSave/Ram/cDirtyPages", STAMUNIT_COUNT, "RAM: Dirty pages."); + STAM_REL_REG_USED(pVM, &pPGM->LiveSave.Ram.cZeroPages, STAMTYPE_U32, "/PGM/LiveSave/Ram/cZeroPages", STAMUNIT_COUNT, "RAM: Ready zero pages."); + STAM_REL_REG_USED(pVM, &pPGM->LiveSave.Ram.cMonitoredPages, STAMTYPE_U32, "/PGM/LiveSave/Ram/cMonitoredPages", STAMUNIT_COUNT, "RAM: Write monitored pages."); + STAM_REL_REG_USED(pVM, &pPGM->LiveSave.Rom.cReadyPages, STAMTYPE_U32, "/PGM/LiveSave/Rom/cReadPages", STAMUNIT_COUNT, "ROM: Ready pages."); + STAM_REL_REG_USED(pVM, &pPGM->LiveSave.Rom.cDirtyPages, STAMTYPE_U32, "/PGM/LiveSave/Rom/cDirtyPages", STAMUNIT_COUNT, "ROM: Dirty pages."); + STAM_REL_REG_USED(pVM, &pPGM->LiveSave.Rom.cZeroPages, STAMTYPE_U32, "/PGM/LiveSave/Rom/cZeroPages", STAMUNIT_COUNT, "ROM: Ready zero pages."); + STAM_REL_REG_USED(pVM, &pPGM->LiveSave.Rom.cMonitoredPages, STAMTYPE_U32, "/PGM/LiveSave/Rom/cMonitoredPages", STAMUNIT_COUNT, "ROM: Write monitored pages."); + STAM_REL_REG_USED(pVM, &pPGM->LiveSave.Mmio2.cReadyPages, STAMTYPE_U32, "/PGM/LiveSave/Mmio2/cReadPages", STAMUNIT_COUNT, "MMIO2: Ready pages."); + STAM_REL_REG_USED(pVM, &pPGM->LiveSave.Mmio2.cDirtyPages, STAMTYPE_U32, "/PGM/LiveSave/Mmio2/cDirtyPages", STAMUNIT_COUNT, "MMIO2: Dirty pages."); + STAM_REL_REG_USED(pVM, &pPGM->LiveSave.Mmio2.cZeroPages, STAMTYPE_U32, "/PGM/LiveSave/Mmio2/cZeroPages", STAMUNIT_COUNT, "MMIO2: Ready zero pages."); + STAM_REL_REG_USED(pVM, &pPGM->LiveSave.Mmio2.cMonitoredPages,STAMTYPE_U32, "/PGM/LiveSave/Mmio2/cMonitoredPages",STAMUNIT_COUNT, "MMIO2: Write monitored pages."); + +#ifdef VBOX_WITH_STATISTICS + +# define PGM_REG_COUNTER(a, b, c) \ + rc = STAMR3RegisterF(pVM, a, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, c, b); \ + AssertRC(rc); + +# define PGM_REG_COUNTER_BYTES(a, b, c) \ + rc = STAMR3RegisterF(pVM, a, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES, c, b); \ + AssertRC(rc); + +# define PGM_REG_PROFILE(a, b, c) \ + rc = STAMR3RegisterF(pVM, a, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL, c, b); \ + AssertRC(rc); + + PGMSTATS *pStats = pVM->pgm.s.pStatsR3; + + PGM_REG_PROFILE(&pStats->StatAllocLargePage, "/PGM/LargePage/Prof/Alloc", "Time spent by the host OS for large page allocation."); + PGM_REG_PROFILE(&pStats->StatClearLargePage, "/PGM/LargePage/Prof/Clear", "Time spent clearing the newly allocated large pages."); + PGM_REG_COUNTER(&pStats->StatLargePageOverflow, "/PGM/LargePage/Overflow", "The number of times allocating a large page took too long."); + PGM_REG_PROFILE(&pStats->StatR3IsValidLargePage, "/PGM/LargePage/Prof/R3/IsValid", "pgmPhysIsValidLargePage profiling - R3."); + PGM_REG_PROFILE(&pStats->StatRZIsValidLargePage, "/PGM/LargePage/Prof/RZ/IsValid", "pgmPhysIsValidLargePage profiling - RZ."); + + PGM_REG_COUNTER(&pStats->StatR3DetectedConflicts, "/PGM/R3/DetectedConflicts", "The number of times PGMR3CheckMappingConflicts() detected a conflict."); + PGM_REG_PROFILE(&pStats->StatR3ResolveConflict, "/PGM/R3/ResolveConflict", "pgmR3SyncPTResolveConflict() profiling (includes the entire relocation)."); + PGM_REG_COUNTER(&pStats->StatR3PhysRead, "/PGM/R3/Phys/Read", "The number of times PGMPhysRead was called."); + PGM_REG_COUNTER_BYTES(&pStats->StatR3PhysReadBytes, "/PGM/R3/Phys/Read/Bytes", "The number of bytes read by PGMPhysRead."); + PGM_REG_COUNTER(&pStats->StatR3PhysWrite, "/PGM/R3/Phys/Write", "The number of times PGMPhysWrite was called."); + PGM_REG_COUNTER_BYTES(&pStats->StatR3PhysWriteBytes, "/PGM/R3/Phys/Write/Bytes", "The number of bytes written by PGMPhysWrite."); + PGM_REG_COUNTER(&pStats->StatR3PhysSimpleRead, "/PGM/R3/Phys/Simple/Read", "The number of times PGMPhysSimpleReadGCPtr was called."); + PGM_REG_COUNTER_BYTES(&pStats->StatR3PhysSimpleReadBytes, "/PGM/R3/Phys/Simple/Read/Bytes", "The number of bytes read by PGMPhysSimpleReadGCPtr."); + PGM_REG_COUNTER(&pStats->StatR3PhysSimpleWrite, "/PGM/R3/Phys/Simple/Write", "The number of times PGMPhysSimpleWriteGCPtr was called."); + PGM_REG_COUNTER_BYTES(&pStats->StatR3PhysSimpleWriteBytes, "/PGM/R3/Phys/Simple/Write/Bytes", "The number of bytes written by PGMPhysSimpleWriteGCPtr."); + + PGM_REG_COUNTER(&pStats->StatRZChunkR3MapTlbHits, "/PGM/ChunkR3Map/TlbHitsRZ", "TLB hits."); + PGM_REG_COUNTER(&pStats->StatRZChunkR3MapTlbMisses, "/PGM/ChunkR3Map/TlbMissesRZ", "TLB misses."); + PGM_REG_PROFILE(&pStats->StatChunkAging, "/PGM/ChunkR3Map/Map/Aging", "Chunk aging profiling."); + PGM_REG_PROFILE(&pStats->StatChunkFindCandidate, "/PGM/ChunkR3Map/Map/Find", "Chunk unmap find profiling."); + PGM_REG_PROFILE(&pStats->StatChunkUnmap, "/PGM/ChunkR3Map/Map/Unmap", "Chunk unmap of address space profiling."); + PGM_REG_PROFILE(&pStats->StatChunkMap, "/PGM/ChunkR3Map/Map/Map", "Chunk map of address space profiling."); + + PGM_REG_COUNTER(&pStats->StatRZPageMapTlbHits, "/PGM/RZ/Page/MapTlbHits", "TLB hits."); + PGM_REG_COUNTER(&pStats->StatRZPageMapTlbMisses, "/PGM/RZ/Page/MapTlbMisses", "TLB misses."); + PGM_REG_COUNTER(&pStats->StatR3ChunkR3MapTlbHits, "/PGM/ChunkR3Map/TlbHitsR3", "TLB hits."); + PGM_REG_COUNTER(&pStats->StatR3ChunkR3MapTlbMisses, "/PGM/ChunkR3Map/TlbMissesR3", "TLB misses."); + PGM_REG_COUNTER(&pStats->StatR3PageMapTlbHits, "/PGM/R3/Page/MapTlbHits", "TLB hits."); + PGM_REG_COUNTER(&pStats->StatR3PageMapTlbMisses, "/PGM/R3/Page/MapTlbMisses", "TLB misses."); + PGM_REG_COUNTER(&pStats->StatPageMapTlbFlushes, "/PGM/R3/Page/MapTlbFlushes", "TLB flushes (all contexts)."); + PGM_REG_COUNTER(&pStats->StatPageMapTlbFlushEntry, "/PGM/R3/Page/MapTlbFlushEntry", "TLB entry flushes (all contexts)."); + + PGM_REG_COUNTER(&pStats->StatRZRamRangeTlbHits, "/PGM/RZ/RamRange/TlbHits", "TLB hits."); + PGM_REG_COUNTER(&pStats->StatRZRamRangeTlbMisses, "/PGM/RZ/RamRange/TlbMisses", "TLB misses."); + PGM_REG_COUNTER(&pStats->StatR3RamRangeTlbHits, "/PGM/R3/RamRange/TlbHits", "TLB hits."); + PGM_REG_COUNTER(&pStats->StatR3RamRangeTlbMisses, "/PGM/R3/RamRange/TlbMisses", "TLB misses."); + + PGM_REG_PROFILE(&pStats->StatRZSyncCR3HandlerVirtualUpdate, "/PGM/RZ/SyncCR3/Handlers/VirtualUpdate", "Profiling of the virtual handler updates."); + PGM_REG_PROFILE(&pStats->StatRZSyncCR3HandlerVirtualReset, "/PGM/RZ/SyncCR3/Handlers/VirtualReset", "Profiling of the virtual handler resets."); + PGM_REG_PROFILE(&pStats->StatR3SyncCR3HandlerVirtualUpdate, "/PGM/R3/SyncCR3/Handlers/VirtualUpdate", "Profiling of the virtual handler updates."); + PGM_REG_PROFILE(&pStats->StatR3SyncCR3HandlerVirtualReset, "/PGM/R3/SyncCR3/Handlers/VirtualReset", "Profiling of the virtual handler resets."); + + PGM_REG_COUNTER(&pStats->StatRZPhysHandlerReset, "/PGM/RZ/PhysHandlerReset", "The number of times PGMHandlerPhysicalReset is called."); + PGM_REG_COUNTER(&pStats->StatR3PhysHandlerReset, "/PGM/R3/PhysHandlerReset", "The number of times PGMHandlerPhysicalReset is called."); + PGM_REG_COUNTER(&pStats->StatRZPhysHandlerLookupHits, "/PGM/RZ/PhysHandlerLookupHits", "The number of cache hits when looking up physical handlers."); + PGM_REG_COUNTER(&pStats->StatR3PhysHandlerLookupHits, "/PGM/R3/PhysHandlerLookupHits", "The number of cache hits when looking up physical handlers."); + PGM_REG_COUNTER(&pStats->StatRZPhysHandlerLookupMisses, "/PGM/RZ/PhysHandlerLookupMisses", "The number of cache misses when looking up physical handlers."); + PGM_REG_COUNTER(&pStats->StatR3PhysHandlerLookupMisses, "/PGM/R3/PhysHandlerLookupMisses", "The number of cache misses when looking up physical handlers."); + PGM_REG_PROFILE(&pStats->StatRZVirtHandlerSearchByPhys, "/PGM/RZ/VirtHandlerSearchByPhys", "Profiling of pgmHandlerVirtualFindByPhysAddr."); + PGM_REG_PROFILE(&pStats->StatR3VirtHandlerSearchByPhys, "/PGM/R3/VirtHandlerSearchByPhys", "Profiling of pgmHandlerVirtualFindByPhysAddr."); + + PGM_REG_COUNTER(&pStats->StatRZPageReplaceShared, "/PGM/RZ/Page/ReplacedShared", "Times a shared page was replaced."); + PGM_REG_COUNTER(&pStats->StatRZPageReplaceZero, "/PGM/RZ/Page/ReplacedZero", "Times the zero page was replaced."); +/// @todo PGM_REG_COUNTER(&pStats->StatRZPageHandyAllocs, "/PGM/RZ/Page/HandyAllocs", "Number of times we've allocated more handy pages."); + PGM_REG_COUNTER(&pStats->StatR3PageReplaceShared, "/PGM/R3/Page/ReplacedShared", "Times a shared page was replaced."); + PGM_REG_COUNTER(&pStats->StatR3PageReplaceZero, "/PGM/R3/Page/ReplacedZero", "Times the zero page was replaced."); +/// @todo PGM_REG_COUNTER(&pStats->StatR3PageHandyAllocs, "/PGM/R3/Page/HandyAllocs", "Number of times we've allocated more handy pages."); + + PGM_REG_COUNTER(&pStats->StatRZPhysRead, "/PGM/RZ/Phys/Read", "The number of times PGMPhysRead was called."); + PGM_REG_COUNTER_BYTES(&pStats->StatRZPhysReadBytes, "/PGM/RZ/Phys/Read/Bytes", "The number of bytes read by PGMPhysRead."); + PGM_REG_COUNTER(&pStats->StatRZPhysWrite, "/PGM/RZ/Phys/Write", "The number of times PGMPhysWrite was called."); + PGM_REG_COUNTER_BYTES(&pStats->StatRZPhysWriteBytes, "/PGM/RZ/Phys/Write/Bytes", "The number of bytes written by PGMPhysWrite."); + PGM_REG_COUNTER(&pStats->StatRZPhysSimpleRead, "/PGM/RZ/Phys/Simple/Read", "The number of times PGMPhysSimpleReadGCPtr was called."); + PGM_REG_COUNTER_BYTES(&pStats->StatRZPhysSimpleReadBytes, "/PGM/RZ/Phys/Simple/Read/Bytes", "The number of bytes read by PGMPhysSimpleReadGCPtr."); + PGM_REG_COUNTER(&pStats->StatRZPhysSimpleWrite, "/PGM/RZ/Phys/Simple/Write", "The number of times PGMPhysSimpleWriteGCPtr was called."); + PGM_REG_COUNTER_BYTES(&pStats->StatRZPhysSimpleWriteBytes, "/PGM/RZ/Phys/Simple/Write/Bytes", "The number of bytes written by PGMPhysSimpleWriteGCPtr."); + + /* GC only: */ + PGM_REG_COUNTER(&pStats->StatRCInvlPgConflict, "/PGM/RC/InvlPgConflict", "Number of times PGMInvalidatePage() detected a mapping conflict."); + PGM_REG_COUNTER(&pStats->StatRCInvlPgSyncMonCR3, "/PGM/RC/InvlPgSyncMonitorCR3", "Number of times PGMInvalidatePage() ran into PGM_SYNC_MONITOR_CR3."); + + PGM_REG_COUNTER(&pStats->StatRCPhysRead, "/PGM/RC/Phys/Read", "The number of times PGMPhysRead was called."); + PGM_REG_COUNTER_BYTES(&pStats->StatRCPhysReadBytes, "/PGM/RC/Phys/Read/Bytes", "The number of bytes read by PGMPhysRead."); + PGM_REG_COUNTER(&pStats->StatRCPhysWrite, "/PGM/RC/Phys/Write", "The number of times PGMPhysWrite was called."); + PGM_REG_COUNTER_BYTES(&pStats->StatRCPhysWriteBytes, "/PGM/RC/Phys/Write/Bytes", "The number of bytes written by PGMPhysWrite."); + PGM_REG_COUNTER(&pStats->StatRCPhysSimpleRead, "/PGM/RC/Phys/Simple/Read", "The number of times PGMPhysSimpleReadGCPtr was called."); + PGM_REG_COUNTER_BYTES(&pStats->StatRCPhysSimpleReadBytes, "/PGM/RC/Phys/Simple/Read/Bytes", "The number of bytes read by PGMPhysSimpleReadGCPtr."); + PGM_REG_COUNTER(&pStats->StatRCPhysSimpleWrite, "/PGM/RC/Phys/Simple/Write", "The number of times PGMPhysSimpleWriteGCPtr was called."); + PGM_REG_COUNTER_BYTES(&pStats->StatRCPhysSimpleWriteBytes, "/PGM/RC/Phys/Simple/Write/Bytes", "The number of bytes written by PGMPhysSimpleWriteGCPtr."); + + PGM_REG_COUNTER(&pStats->StatTrackVirgin, "/PGM/Track/Virgin", "The number of first time shadowings"); + PGM_REG_COUNTER(&pStats->StatTrackAliased, "/PGM/Track/Aliased", "The number of times switching to cRef2, i.e. the page is being shadowed by two PTs."); + PGM_REG_COUNTER(&pStats->StatTrackAliasedMany, "/PGM/Track/AliasedMany", "The number of times we're tracking using cRef2."); + PGM_REG_COUNTER(&pStats->StatTrackAliasedLots, "/PGM/Track/AliasedLots", "The number of times we're hitting pages which has overflowed cRef2"); + PGM_REG_COUNTER(&pStats->StatTrackOverflows, "/PGM/Track/Overflows", "The number of times the extent list grows too long."); + PGM_REG_COUNTER(&pStats->StatTrackNoExtentsLeft, "/PGM/Track/NoExtentLeft", "The number of times the extent list was exhausted."); + PGM_REG_PROFILE(&pStats->StatTrackDeref, "/PGM/Track/Deref", "Profiling of SyncPageWorkerTrackDeref (expensive)."); + +# undef PGM_REG_COUNTER +# undef PGM_REG_PROFILE +#endif + + /* + * Note! The layout below matches the member layout exactly! + */ + + /* + * Common - stats + */ + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + PPGMCPU pPgmCpu = &pVM->aCpus[idCpu].pgm.s; + +#define PGM_REG_COUNTER(a, b, c) \ + rc = STAMR3RegisterF(pVM, a, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, c, b, idCpu); \ + AssertRC(rc); +#define PGM_REG_PROFILE(a, b, c) \ + rc = STAMR3RegisterF(pVM, a, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL, c, b, idCpu); \ + AssertRC(rc); + + PGM_REG_COUNTER(&pPgmCpu->cGuestModeChanges, "/PGM/CPU%u/cGuestModeChanges", "Number of guest mode changes."); + PGM_REG_COUNTER(&pPgmCpu->cA20Changes, "/PGM/CPU%u/cA20Changes", "Number of A20 gate changes."); + +#ifdef VBOX_WITH_STATISTICS + PGMCPUSTATS *pCpuStats = pVM->aCpus[idCpu].pgm.s.pStatsR3; + +# if 0 /* rarely useful; leave for debugging. */ + for (unsigned j = 0; j < RT_ELEMENTS(pPgmCpu->StatSyncPtPD); j++) + STAMR3RegisterF(pVM, &pCpuStats->StatSyncPtPD[i], STAMTYPE_COUNTER, STAMVISIBILITY_USED, STAMUNIT_OCCURENCES, + "The number of SyncPT per PD n.", "/PGM/CPU%u/PDSyncPT/%04X", i, j); + for (unsigned j = 0; j < RT_ELEMENTS(pCpuStats->StatSyncPagePD); j++) + STAMR3RegisterF(pVM, &pCpuStats->StatSyncPagePD[i], STAMTYPE_COUNTER, STAMVISIBILITY_USED, STAMUNIT_OCCURENCES, + "The number of SyncPage per PD n.", "/PGM/CPU%u/PDSyncPage/%04X", i, j); +# endif + /* R0 only: */ + PGM_REG_PROFILE(&pCpuStats->StatR0NpMiscfg, "/PGM/CPU%u/R0/NpMiscfg", "PGMR0Trap0eHandlerNPMisconfig() profiling."); + PGM_REG_COUNTER(&pCpuStats->StatR0NpMiscfgSyncPage, "/PGM/CPU%u/R0/NpMiscfgSyncPage", "SyncPage calls from PGMR0Trap0eHandlerNPMisconfig()."); + + /* RZ only: */ + PGM_REG_PROFILE(&pCpuStats->StatRZTrap0e, "/PGM/CPU%u/RZ/Trap0e", "Profiling of the PGMTrap0eHandler() body."); + PGM_REG_PROFILE(&pCpuStats->StatRZTrap0eTime2Ballooned, "/PGM/CPU%u/RZ/Trap0e/Time2/Ballooned", "Profiling of the Trap0eHandler body when the cause is read access to a ballooned page."); + PGM_REG_PROFILE(&pCpuStats->StatRZTrap0eTime2CSAM, "/PGM/CPU%u/RZ/Trap0e/Time2/CSAM", "Profiling of the Trap0eHandler body when the cause is CSAM."); + PGM_REG_PROFILE(&pCpuStats->StatRZTrap0eTime2DirtyAndAccessed, "/PGM/CPU%u/RZ/Trap0e/Time2/DirtyAndAccessedBits", "Profiling of the Trap0eHandler body when the cause is dirty and/or accessed bit emulation."); + PGM_REG_PROFILE(&pCpuStats->StatRZTrap0eTime2GuestTrap, "/PGM/CPU%u/RZ/Trap0e/Time2/GuestTrap", "Profiling of the Trap0eHandler body when the cause is a guest trap."); + PGM_REG_PROFILE(&pCpuStats->StatRZTrap0eTime2HndPhys, "/PGM/CPU%u/RZ/Trap0e/Time2/HandlerPhysical", "Profiling of the Trap0eHandler body when the cause is a physical handler."); + PGM_REG_PROFILE(&pCpuStats->StatRZTrap0eTime2HndVirt, "/PGM/CPU%u/RZ/Trap0e/Time2/HandlerVirtual", "Profiling of the Trap0eHandler body when the cause is a virtual handler."); + PGM_REG_PROFILE(&pCpuStats->StatRZTrap0eTime2HndUnhandled, "/PGM/CPU%u/RZ/Trap0e/Time2/HandlerUnhandled", "Profiling of the Trap0eHandler body when the cause is access outside the monitored areas of a monitored page."); + PGM_REG_PROFILE(&pCpuStats->StatRZTrap0eTime2InvalidPhys, "/PGM/CPU%u/RZ/Trap0e/Time2/InvalidPhys", "Profiling of the Trap0eHandler body when the cause is access to an invalid physical guest address."); + PGM_REG_PROFILE(&pCpuStats->StatRZTrap0eTime2MakeWritable, "/PGM/CPU%u/RZ/Trap0e/Time2/MakeWritable", "Profiling of the Trap0eHandler body when the cause is that a page needed to be made writeable."); + PGM_REG_PROFILE(&pCpuStats->StatRZTrap0eTime2Mapping, "/PGM/CPU%u/RZ/Trap0e/Time2/Mapping", "Profiling of the Trap0eHandler body when the cause is related to the guest mappings."); + PGM_REG_PROFILE(&pCpuStats->StatRZTrap0eTime2Misc, "/PGM/CPU%u/RZ/Trap0e/Time2/Misc", "Profiling of the Trap0eHandler body when the cause is not known."); + PGM_REG_PROFILE(&pCpuStats->StatRZTrap0eTime2OutOfSync, "/PGM/CPU%u/RZ/Trap0e/Time2/OutOfSync", "Profiling of the Trap0eHandler body when the cause is an out-of-sync page."); + PGM_REG_PROFILE(&pCpuStats->StatRZTrap0eTime2OutOfSyncHndPhys, "/PGM/CPU%u/RZ/Trap0e/Time2/OutOfSyncHndPhys", "Profiling of the Trap0eHandler body when the cause is an out-of-sync physical handler page."); + PGM_REG_PROFILE(&pCpuStats->StatRZTrap0eTime2OutOfSyncHndVirt, "/PGM/CPU%u/RZ/Trap0e/Time2/OutOfSyncHndVirt", "Profiling of the Trap0eHandler body when the cause is an out-of-sync virtual handler page."); + PGM_REG_PROFILE(&pCpuStats->StatRZTrap0eTime2OutOfSyncHndObs, "/PGM/CPU%u/RZ/Trap0e/Time2/OutOfSyncObsHnd", "Profiling of the Trap0eHandler body when the cause is an obsolete handler page."); + PGM_REG_PROFILE(&pCpuStats->StatRZTrap0eTime2SyncPT, "/PGM/CPU%u/RZ/Trap0e/Time2/SyncPT", "Profiling of the Trap0eHandler body when the cause is lazy syncing of a PT."); + PGM_REG_PROFILE(&pCpuStats->StatRZTrap0eTime2WPEmulation, "/PGM/CPU%u/RZ/Trap0e/Time2/WPEmulation", "Profiling of the Trap0eHandler body when the cause is CR0.WP emulation."); + PGM_REG_PROFILE(&pCpuStats->StatRZTrap0eTime2Wp0RoUsHack, "/PGM/CPU%u/RZ/Trap0e/Time2/WP0R0USHack", "Profiling of the Trap0eHandler body when the cause is CR0.WP and netware hack to be enabled."); + PGM_REG_PROFILE(&pCpuStats->StatRZTrap0eTime2Wp0RoUsUnhack, "/PGM/CPU%u/RZ/Trap0e/Time2/WP0R0USUnhack", "Profiling of the Trap0eHandler body when the cause is CR0.WP and netware hack to be disabled."); + PGM_REG_COUNTER(&pCpuStats->StatRZTrap0eConflicts, "/PGM/CPU%u/RZ/Trap0e/Conflicts", "The number of times #PF was caused by an undetected conflict."); + PGM_REG_COUNTER(&pCpuStats->StatRZTrap0eHandlersMapping, "/PGM/CPU%u/RZ/Trap0e/Handlers/Mapping", "Number of traps due to access handlers in mappings."); + PGM_REG_COUNTER(&pCpuStats->StatRZTrap0eHandlersOutOfSync, "/PGM/CPU%u/RZ/Trap0e/Handlers/OutOfSync", "Number of traps due to out-of-sync handled pages."); + PGM_REG_COUNTER(&pCpuStats->StatRZTrap0eHandlersPhysAll, "/PGM/CPU%u/RZ/Trap0e/Handlers/PhysAll", "Number of traps due to physical all-access handlers."); + PGM_REG_COUNTER(&pCpuStats->StatRZTrap0eHandlersPhysAllOpt, "/PGM/CPU%u/RZ/Trap0e/Handlers/PhysAllOpt", "Number of the physical all-access handler traps using the optimization."); + PGM_REG_COUNTER(&pCpuStats->StatRZTrap0eHandlersPhysWrite, "/PGM/CPU%u/RZ/Trap0e/Handlers/PhysWrite", "Number of traps due to physical write-access handlers."); + PGM_REG_COUNTER(&pCpuStats->StatRZTrap0eHandlersVirtual, "/PGM/CPU%u/RZ/Trap0e/Handlers/Virtual", "Number of traps due to virtual access handlers."); + PGM_REG_COUNTER(&pCpuStats->StatRZTrap0eHandlersVirtualByPhys, "/PGM/CPU%u/RZ/Trap0e/Handlers/VirtualByPhys", "Number of traps due to virtual access handlers by physical address."); + PGM_REG_COUNTER(&pCpuStats->StatRZTrap0eHandlersVirtualUnmarked,"/PGM/CPU%u/RZ/Trap0e/Handlers/VirtualUnmarked","Number of traps due to virtual access handlers by virtual address (without proper physical flags)."); + PGM_REG_COUNTER(&pCpuStats->StatRZTrap0eHandlersUnhandled, "/PGM/CPU%u/RZ/Trap0e/Handlers/Unhandled", "Number of traps due to access outside range of monitored page(s)."); + PGM_REG_COUNTER(&pCpuStats->StatRZTrap0eHandlersInvalid, "/PGM/CPU%u/RZ/Trap0e/Handlers/Invalid", "Number of traps due to access to invalid physical memory."); + PGM_REG_COUNTER(&pCpuStats->StatRZTrap0eUSNotPresentRead, "/PGM/CPU%u/RZ/Trap0e/Err/User/NPRead", "Number of user mode not present read page faults."); + PGM_REG_COUNTER(&pCpuStats->StatRZTrap0eUSNotPresentWrite, "/PGM/CPU%u/RZ/Trap0e/Err/User/NPWrite", "Number of user mode not present write page faults."); + PGM_REG_COUNTER(&pCpuStats->StatRZTrap0eUSWrite, "/PGM/CPU%u/RZ/Trap0e/Err/User/Write", "Number of user mode write page faults."); + PGM_REG_COUNTER(&pCpuStats->StatRZTrap0eUSReserved, "/PGM/CPU%u/RZ/Trap0e/Err/User/Reserved", "Number of user mode reserved bit page faults."); + PGM_REG_COUNTER(&pCpuStats->StatRZTrap0eUSNXE, "/PGM/CPU%u/RZ/Trap0e/Err/User/NXE", "Number of user mode NXE page faults."); + PGM_REG_COUNTER(&pCpuStats->StatRZTrap0eUSRead, "/PGM/CPU%u/RZ/Trap0e/Err/User/Read", "Number of user mode read page faults."); + PGM_REG_COUNTER(&pCpuStats->StatRZTrap0eSVNotPresentRead, "/PGM/CPU%u/RZ/Trap0e/Err/Supervisor/NPRead", "Number of supervisor mode not present read page faults."); + PGM_REG_COUNTER(&pCpuStats->StatRZTrap0eSVNotPresentWrite, "/PGM/CPU%u/RZ/Trap0e/Err/Supervisor/NPWrite", "Number of supervisor mode not present write page faults."); + PGM_REG_COUNTER(&pCpuStats->StatRZTrap0eSVWrite, "/PGM/CPU%u/RZ/Trap0e/Err/Supervisor/Write", "Number of supervisor mode write page faults."); + PGM_REG_COUNTER(&pCpuStats->StatRZTrap0eSVReserved, "/PGM/CPU%u/RZ/Trap0e/Err/Supervisor/Reserved", "Number of supervisor mode reserved bit page faults."); + PGM_REG_COUNTER(&pCpuStats->StatRZTrap0eSNXE, "/PGM/CPU%u/RZ/Trap0e/Err/Supervisor/NXE", "Number of supervisor mode NXE page faults."); + PGM_REG_COUNTER(&pCpuStats->StatRZTrap0eGuestPF, "/PGM/CPU%u/RZ/Trap0e/GuestPF", "Number of real guest page faults."); + PGM_REG_COUNTER(&pCpuStats->StatRZTrap0eGuestPFMapping, "/PGM/CPU%u/RZ/Trap0e/GuestPF/InMapping", "Number of real guest page faults in a mapping."); + PGM_REG_COUNTER(&pCpuStats->StatRZTrap0eWPEmulInRZ, "/PGM/CPU%u/RZ/Trap0e/WP/InRZ", "Number of guest page faults due to X86_CR0_WP emulation."); + PGM_REG_COUNTER(&pCpuStats->StatRZTrap0eWPEmulToR3, "/PGM/CPU%u/RZ/Trap0e/WP/ToR3", "Number of guest page faults due to X86_CR0_WP emulation (forward to R3 for emulation)."); +#if 0 /* rarely useful; leave for debugging. */ + for (unsigned j = 0; j < RT_ELEMENTS(pCpuStats->StatRZTrap0ePD); j++) + STAMR3RegisterF(pVM, &pCpuStats->StatRZTrap0ePD[i], STAMTYPE_COUNTER, STAMVISIBILITY_USED, STAMUNIT_OCCURENCES, + "The number of traps in page directory n.", "/PGM/CPU%u/RZ/Trap0e/PD/%04X", i, j); +#endif + PGM_REG_COUNTER(&pCpuStats->StatRZGuestCR3WriteHandled, "/PGM/CPU%u/RZ/CR3WriteHandled", "The number of times the Guest CR3 change was successfully handled."); + PGM_REG_COUNTER(&pCpuStats->StatRZGuestCR3WriteUnhandled, "/PGM/CPU%u/RZ/CR3WriteUnhandled", "The number of times the Guest CR3 change was passed back to the recompiler."); + PGM_REG_COUNTER(&pCpuStats->StatRZGuestCR3WriteConflict, "/PGM/CPU%u/RZ/CR3WriteConflict", "The number of times the Guest CR3 monitoring detected a conflict."); + PGM_REG_COUNTER(&pCpuStats->StatRZGuestROMWriteHandled, "/PGM/CPU%u/RZ/ROMWriteHandled", "The number of times the Guest ROM change was successfully handled."); + PGM_REG_COUNTER(&pCpuStats->StatRZGuestROMWriteUnhandled, "/PGM/CPU%u/RZ/ROMWriteUnhandled", "The number of times the Guest ROM change was passed back to the recompiler."); + + PGM_REG_COUNTER(&pCpuStats->StatRZDynMapMigrateInvlPg, "/PGM/CPU%u/RZ/DynMap/MigrateInvlPg", "invlpg count in PGMR0DynMapMigrateAutoSet."); + PGM_REG_PROFILE(&pCpuStats->StatRZDynMapGCPageInl, "/PGM/CPU%u/RZ/DynMap/PageGCPageInl", "Calls to pgmR0DynMapGCPageInlined."); + PGM_REG_COUNTER(&pCpuStats->StatRZDynMapGCPageInlHits, "/PGM/CPU%u/RZ/DynMap/PageGCPageInl/Hits", "Hash table lookup hits."); + PGM_REG_COUNTER(&pCpuStats->StatRZDynMapGCPageInlMisses, "/PGM/CPU%u/RZ/DynMap/PageGCPageInl/Misses", "Misses that falls back to the code common."); + PGM_REG_COUNTER(&pCpuStats->StatRZDynMapGCPageInlRamHits, "/PGM/CPU%u/RZ/DynMap/PageGCPageInl/RamHits", "1st ram range hits."); + PGM_REG_COUNTER(&pCpuStats->StatRZDynMapGCPageInlRamMisses, "/PGM/CPU%u/RZ/DynMap/PageGCPageInl/RamMisses", "1st ram range misses, takes slow path."); + PGM_REG_PROFILE(&pCpuStats->StatRZDynMapHCPageInl, "/PGM/CPU%u/RZ/DynMap/PageHCPageInl", "Calls to pgmRZDynMapHCPageInlined."); + PGM_REG_COUNTER(&pCpuStats->StatRZDynMapHCPageInlHits, "/PGM/CPU%u/RZ/DynMap/PageHCPageInl/Hits", "Hash table lookup hits."); + PGM_REG_COUNTER(&pCpuStats->StatRZDynMapHCPageInlMisses, "/PGM/CPU%u/RZ/DynMap/PageHCPageInl/Misses", "Misses that falls back to the code common."); + PGM_REG_COUNTER(&pCpuStats->StatRZDynMapPage, "/PGM/CPU%u/RZ/DynMap/Page", "Calls to pgmR0DynMapPage"); + PGM_REG_COUNTER(&pCpuStats->StatRZDynMapSetOptimize, "/PGM/CPU%u/RZ/DynMap/Page/SetOptimize", "Calls to pgmRZDynMapOptimizeAutoSet."); + PGM_REG_COUNTER(&pCpuStats->StatRZDynMapSetSearchFlushes, "/PGM/CPU%u/RZ/DynMap/Page/SetSearchFlushes", "Set search restoring to subset flushes."); + PGM_REG_COUNTER(&pCpuStats->StatRZDynMapSetSearchHits, "/PGM/CPU%u/RZ/DynMap/Page/SetSearchHits", "Set search hits."); + PGM_REG_COUNTER(&pCpuStats->StatRZDynMapSetSearchMisses, "/PGM/CPU%u/RZ/DynMap/Page/SetSearchMisses", "Set search misses."); + PGM_REG_PROFILE(&pCpuStats->StatRZDynMapHCPage, "/PGM/CPU%u/RZ/DynMap/Page/HCPage", "Calls to pgmRZDynMapHCPageCommon (ring-0)."); + PGM_REG_COUNTER(&pCpuStats->StatRZDynMapPageHits0, "/PGM/CPU%u/RZ/DynMap/Page/Hits0", "Hits at iPage+0"); + PGM_REG_COUNTER(&pCpuStats->StatRZDynMapPageHits1, "/PGM/CPU%u/RZ/DynMap/Page/Hits1", "Hits at iPage+1"); + PGM_REG_COUNTER(&pCpuStats->StatRZDynMapPageHits2, "/PGM/CPU%u/RZ/DynMap/Page/Hits2", "Hits at iPage+2"); + PGM_REG_COUNTER(&pCpuStats->StatRZDynMapPageInvlPg, "/PGM/CPU%u/RZ/DynMap/Page/InvlPg", "invlpg count in pgmR0DynMapPageSlow."); + PGM_REG_COUNTER(&pCpuStats->StatRZDynMapPageSlow, "/PGM/CPU%u/RZ/DynMap/Page/Slow", "Calls to pgmR0DynMapPageSlow - subtract this from pgmR0DynMapPage to get 1st level hits."); + PGM_REG_COUNTER(&pCpuStats->StatRZDynMapPageSlowLoopHits, "/PGM/CPU%u/RZ/DynMap/Page/SlowLoopHits" , "Hits in the loop path."); + PGM_REG_COUNTER(&pCpuStats->StatRZDynMapPageSlowLoopMisses, "/PGM/CPU%u/RZ/DynMap/Page/SlowLoopMisses", "Misses in the loop path. NonLoopMisses = Slow - SlowLoopHit - SlowLoopMisses"); + //PGM_REG_COUNTER(&pCpuStats->StatRZDynMapPageSlowLostHits, "/PGM/CPU%u/R0/DynMap/Page/SlowLostHits", "Lost hits."); + PGM_REG_COUNTER(&pCpuStats->StatRZDynMapSubsets, "/PGM/CPU%u/RZ/DynMap/Subsets", "Times PGMRZDynMapPushAutoSubset was called."); + PGM_REG_COUNTER(&pCpuStats->StatRZDynMapPopFlushes, "/PGM/CPU%u/RZ/DynMap/SubsetPopFlushes", "Times PGMRZDynMapPopAutoSubset flushes the subset."); + PGM_REG_COUNTER(&pCpuStats->aStatRZDynMapSetFilledPct[0], "/PGM/CPU%u/RZ/DynMap/SetFilledPct000..09", "00-09% filled (RC: min(set-size, dynmap-size))"); + PGM_REG_COUNTER(&pCpuStats->aStatRZDynMapSetFilledPct[1], "/PGM/CPU%u/RZ/DynMap/SetFilledPct010..19", "10-19% filled (RC: min(set-size, dynmap-size))"); + PGM_REG_COUNTER(&pCpuStats->aStatRZDynMapSetFilledPct[2], "/PGM/CPU%u/RZ/DynMap/SetFilledPct020..29", "20-29% filled (RC: min(set-size, dynmap-size))"); + PGM_REG_COUNTER(&pCpuStats->aStatRZDynMapSetFilledPct[3], "/PGM/CPU%u/RZ/DynMap/SetFilledPct030..39", "30-39% filled (RC: min(set-size, dynmap-size))"); + PGM_REG_COUNTER(&pCpuStats->aStatRZDynMapSetFilledPct[4], "/PGM/CPU%u/RZ/DynMap/SetFilledPct040..49", "40-49% filled (RC: min(set-size, dynmap-size))"); + PGM_REG_COUNTER(&pCpuStats->aStatRZDynMapSetFilledPct[5], "/PGM/CPU%u/RZ/DynMap/SetFilledPct050..59", "50-59% filled (RC: min(set-size, dynmap-size))"); + PGM_REG_COUNTER(&pCpuStats->aStatRZDynMapSetFilledPct[6], "/PGM/CPU%u/RZ/DynMap/SetFilledPct060..69", "60-69% filled (RC: min(set-size, dynmap-size))"); + PGM_REG_COUNTER(&pCpuStats->aStatRZDynMapSetFilledPct[7], "/PGM/CPU%u/RZ/DynMap/SetFilledPct070..79", "70-79% filled (RC: min(set-size, dynmap-size))"); + PGM_REG_COUNTER(&pCpuStats->aStatRZDynMapSetFilledPct[8], "/PGM/CPU%u/RZ/DynMap/SetFilledPct080..89", "80-89% filled (RC: min(set-size, dynmap-size))"); + PGM_REG_COUNTER(&pCpuStats->aStatRZDynMapSetFilledPct[9], "/PGM/CPU%u/RZ/DynMap/SetFilledPct090..99", "90-99% filled (RC: min(set-size, dynmap-size))"); + PGM_REG_COUNTER(&pCpuStats->aStatRZDynMapSetFilledPct[10], "/PGM/CPU%u/RZ/DynMap/SetFilledPct100", "100% filled (RC: min(set-size, dynmap-size))"); + + /* HC only: */ + + /* RZ & R3: */ + PGM_REG_PROFILE(&pCpuStats->StatRZSyncCR3, "/PGM/CPU%u/RZ/SyncCR3", "Profiling of the PGMSyncCR3() body."); + PGM_REG_PROFILE(&pCpuStats->StatRZSyncCR3Handlers, "/PGM/CPU%u/RZ/SyncCR3/Handlers", "Profiling of the PGMSyncCR3() update handler section."); + PGM_REG_COUNTER(&pCpuStats->StatRZSyncCR3Global, "/PGM/CPU%u/RZ/SyncCR3/Global", "The number of global CR3 syncs."); + PGM_REG_COUNTER(&pCpuStats->StatRZSyncCR3NotGlobal, "/PGM/CPU%u/RZ/SyncCR3/NotGlobal", "The number of non-global CR3 syncs."); + PGM_REG_COUNTER(&pCpuStats->StatRZSyncCR3DstCacheHit, "/PGM/CPU%u/RZ/SyncCR3/DstChacheHit", "The number of times we got some kind of a cache hit."); + PGM_REG_COUNTER(&pCpuStats->StatRZSyncCR3DstFreed, "/PGM/CPU%u/RZ/SyncCR3/DstFreed", "The number of times we've had to free a shadow entry."); + PGM_REG_COUNTER(&pCpuStats->StatRZSyncCR3DstFreedSrcNP, "/PGM/CPU%u/RZ/SyncCR3/DstFreedSrcNP", "The number of times we've had to free a shadow entry for which the source entry was not present."); + PGM_REG_COUNTER(&pCpuStats->StatRZSyncCR3DstNotPresent, "/PGM/CPU%u/RZ/SyncCR3/DstNotPresent", "The number of times we've encountered a not present shadow entry for a present guest entry."); + PGM_REG_COUNTER(&pCpuStats->StatRZSyncCR3DstSkippedGlobalPD, "/PGM/CPU%u/RZ/SyncCR3/DstSkippedGlobalPD", "The number of times a global page directory wasn't flushed."); + PGM_REG_COUNTER(&pCpuStats->StatRZSyncCR3DstSkippedGlobalPT, "/PGM/CPU%u/RZ/SyncCR3/DstSkippedGlobalPT", "The number of times a page table with only global entries wasn't flushed."); + PGM_REG_PROFILE(&pCpuStats->StatRZSyncPT, "/PGM/CPU%u/RZ/SyncPT", "Profiling of the pfnSyncPT() body."); + PGM_REG_COUNTER(&pCpuStats->StatRZSyncPTFailed, "/PGM/CPU%u/RZ/SyncPT/Failed", "The number of times pfnSyncPT() failed."); + PGM_REG_COUNTER(&pCpuStats->StatRZSyncPT4K, "/PGM/CPU%u/RZ/SyncPT/4K", "Nr of 4K PT syncs"); + PGM_REG_COUNTER(&pCpuStats->StatRZSyncPT4M, "/PGM/CPU%u/RZ/SyncPT/4M", "Nr of 4M PT syncs"); + PGM_REG_COUNTER(&pCpuStats->StatRZSyncPagePDNAs, "/PGM/CPU%u/RZ/SyncPagePDNAs", "The number of time we've marked a PD not present from SyncPage to virtualize the accessed bit."); + PGM_REG_COUNTER(&pCpuStats->StatRZSyncPagePDOutOfSync, "/PGM/CPU%u/RZ/SyncPagePDOutOfSync", "The number of time we've encountered an out-of-sync PD in SyncPage."); + PGM_REG_COUNTER(&pCpuStats->StatRZAccessedPage, "/PGM/CPU%u/RZ/AccessedPage", "The number of pages marked not present for accessed bit emulation."); + PGM_REG_PROFILE(&pCpuStats->StatRZDirtyBitTracking, "/PGM/CPU%u/RZ/DirtyPage", "Profiling the dirty bit tracking in CheckPageFault()."); + PGM_REG_COUNTER(&pCpuStats->StatRZDirtyPage, "/PGM/CPU%u/RZ/DirtyPage/Mark", "The number of pages marked read-only for dirty bit tracking."); + PGM_REG_COUNTER(&pCpuStats->StatRZDirtyPageBig, "/PGM/CPU%u/RZ/DirtyPage/MarkBig", "The number of 4MB pages marked read-only for dirty bit tracking."); + PGM_REG_COUNTER(&pCpuStats->StatRZDirtyPageSkipped, "/PGM/CPU%u/RZ/DirtyPage/Skipped", "The number of pages already dirty or readonly."); + PGM_REG_COUNTER(&pCpuStats->StatRZDirtyPageTrap, "/PGM/CPU%u/RZ/DirtyPage/Trap", "The number of traps generated for dirty bit tracking."); + PGM_REG_COUNTER(&pCpuStats->StatRZDirtyPageStale, "/PGM/CPU%u/RZ/DirtyPage/Stale", "The number of traps generated for dirty bit tracking (stale tlb entries)."); + PGM_REG_COUNTER(&pCpuStats->StatRZDirtiedPage, "/PGM/CPU%u/RZ/DirtyPage/SetDirty", "The number of pages marked dirty because of write accesses."); + PGM_REG_COUNTER(&pCpuStats->StatRZDirtyTrackRealPF, "/PGM/CPU%u/RZ/DirtyPage/RealPF", "The number of real pages faults during dirty bit tracking."); + PGM_REG_COUNTER(&pCpuStats->StatRZPageAlreadyDirty, "/PGM/CPU%u/RZ/DirtyPage/AlreadySet", "The number of pages already marked dirty because of write accesses."); + PGM_REG_PROFILE(&pCpuStats->StatRZInvalidatePage, "/PGM/CPU%u/RZ/InvalidatePage", "PGMInvalidatePage() profiling."); + PGM_REG_COUNTER(&pCpuStats->StatRZInvalidatePage4KBPages, "/PGM/CPU%u/RZ/InvalidatePage/4KBPages", "The number of times PGMInvalidatePage() was called for a 4KB page."); + PGM_REG_COUNTER(&pCpuStats->StatRZInvalidatePage4MBPages, "/PGM/CPU%u/RZ/InvalidatePage/4MBPages", "The number of times PGMInvalidatePage() was called for a 4MB page."); + PGM_REG_COUNTER(&pCpuStats->StatRZInvalidatePage4MBPagesSkip, "/PGM/CPU%u/RZ/InvalidatePage/4MBPagesSkip","The number of times PGMInvalidatePage() skipped a 4MB page."); + PGM_REG_COUNTER(&pCpuStats->StatRZInvalidatePagePDMappings, "/PGM/CPU%u/RZ/InvalidatePage/PDMappings", "The number of times PGMInvalidatePage() was called for a page directory containing mappings (no conflict)."); + PGM_REG_COUNTER(&pCpuStats->StatRZInvalidatePagePDNAs, "/PGM/CPU%u/RZ/InvalidatePage/PDNAs", "The number of times PGMInvalidatePage() was called for a not accessed page directory."); + PGM_REG_COUNTER(&pCpuStats->StatRZInvalidatePagePDNPs, "/PGM/CPU%u/RZ/InvalidatePage/PDNPs", "The number of times PGMInvalidatePage() was called for a not present page directory."); + PGM_REG_COUNTER(&pCpuStats->StatRZInvalidatePagePDOutOfSync, "/PGM/CPU%u/RZ/InvalidatePage/PDOutOfSync", "The number of times PGMInvalidatePage() was called for an out of sync page directory."); + PGM_REG_COUNTER(&pCpuStats->StatRZInvalidatePageSizeChanges, "/PGM/CPU%u/RZ/InvalidatePage/SizeChanges", "The number of times PGMInvalidatePage() was called on a page size change (4KB <-> 2/4MB)."); + PGM_REG_COUNTER(&pCpuStats->StatRZInvalidatePageSkipped, "/PGM/CPU%u/RZ/InvalidatePage/Skipped", "The number of times PGMInvalidatePage() was skipped due to not present shw or pending pending SyncCR3."); + PGM_REG_COUNTER(&pCpuStats->StatRZPageOutOfSyncSupervisor, "/PGM/CPU%u/RZ/OutOfSync/SuperVisor", "Number of traps due to pages out of sync (P) and times VerifyAccessSyncPage calls SyncPage."); + PGM_REG_COUNTER(&pCpuStats->StatRZPageOutOfSyncUser, "/PGM/CPU%u/RZ/OutOfSync/User", "Number of traps due to pages out of sync (P) and times VerifyAccessSyncPage calls SyncPage."); + PGM_REG_COUNTER(&pCpuStats->StatRZPageOutOfSyncSupervisorWrite,"/PGM/CPU%u/RZ/OutOfSync/SuperVisorWrite", "Number of traps due to pages out of sync (RW) and times VerifyAccessSyncPage calls SyncPage."); + PGM_REG_COUNTER(&pCpuStats->StatRZPageOutOfSyncUserWrite, "/PGM/CPU%u/RZ/OutOfSync/UserWrite", "Number of traps due to pages out of sync (RW) and times VerifyAccessSyncPage calls SyncPage."); + PGM_REG_COUNTER(&pCpuStats->StatRZPageOutOfSyncBallloon, "/PGM/CPU%u/RZ/OutOfSync/Balloon", "The number of times a ballooned page was accessed (read)."); + PGM_REG_PROFILE(&pCpuStats->StatRZPrefetch, "/PGM/CPU%u/RZ/Prefetch", "PGMPrefetchPage profiling."); + PGM_REG_PROFILE(&pCpuStats->StatRZFlushTLB, "/PGM/CPU%u/RZ/FlushTLB", "Profiling of the PGMFlushTLB() body."); + PGM_REG_COUNTER(&pCpuStats->StatRZFlushTLBNewCR3, "/PGM/CPU%u/RZ/FlushTLB/NewCR3", "The number of times PGMFlushTLB was called with a new CR3, non-global. (switch)"); + PGM_REG_COUNTER(&pCpuStats->StatRZFlushTLBNewCR3Global, "/PGM/CPU%u/RZ/FlushTLB/NewCR3Global", "The number of times PGMFlushTLB was called with a new CR3, global. (switch)"); + PGM_REG_COUNTER(&pCpuStats->StatRZFlushTLBSameCR3, "/PGM/CPU%u/RZ/FlushTLB/SameCR3", "The number of times PGMFlushTLB was called with the same CR3, non-global. (flush)"); + PGM_REG_COUNTER(&pCpuStats->StatRZFlushTLBSameCR3Global, "/PGM/CPU%u/RZ/FlushTLB/SameCR3Global", "The number of times PGMFlushTLB was called with the same CR3, global. (flush)"); + PGM_REG_PROFILE(&pCpuStats->StatRZGstModifyPage, "/PGM/CPU%u/RZ/GstModifyPage", "Profiling of the PGMGstModifyPage() body."); + + PGM_REG_PROFILE(&pCpuStats->StatR3SyncCR3, "/PGM/CPU%u/R3/SyncCR3", "Profiling of the PGMSyncCR3() body."); + PGM_REG_PROFILE(&pCpuStats->StatR3SyncCR3Handlers, "/PGM/CPU%u/R3/SyncCR3/Handlers", "Profiling of the PGMSyncCR3() update handler section."); + PGM_REG_COUNTER(&pCpuStats->StatR3SyncCR3Global, "/PGM/CPU%u/R3/SyncCR3/Global", "The number of global CR3 syncs."); + PGM_REG_COUNTER(&pCpuStats->StatR3SyncCR3NotGlobal, "/PGM/CPU%u/R3/SyncCR3/NotGlobal", "The number of non-global CR3 syncs."); + PGM_REG_COUNTER(&pCpuStats->StatR3SyncCR3DstCacheHit, "/PGM/CPU%u/R3/SyncCR3/DstChacheHit", "The number of times we got some kind of a cache hit."); + PGM_REG_COUNTER(&pCpuStats->StatR3SyncCR3DstFreed, "/PGM/CPU%u/R3/SyncCR3/DstFreed", "The number of times we've had to free a shadow entry."); + PGM_REG_COUNTER(&pCpuStats->StatR3SyncCR3DstFreedSrcNP, "/PGM/CPU%u/R3/SyncCR3/DstFreedSrcNP", "The number of times we've had to free a shadow entry for which the source entry was not present."); + PGM_REG_COUNTER(&pCpuStats->StatR3SyncCR3DstNotPresent, "/PGM/CPU%u/R3/SyncCR3/DstNotPresent", "The number of times we've encountered a not present shadow entry for a present guest entry."); + PGM_REG_COUNTER(&pCpuStats->StatR3SyncCR3DstSkippedGlobalPD, "/PGM/CPU%u/R3/SyncCR3/DstSkippedGlobalPD", "The number of times a global page directory wasn't flushed."); + PGM_REG_COUNTER(&pCpuStats->StatR3SyncCR3DstSkippedGlobalPT, "/PGM/CPU%u/R3/SyncCR3/DstSkippedGlobalPT", "The number of times a page table with only global entries wasn't flushed."); + PGM_REG_PROFILE(&pCpuStats->StatR3SyncPT, "/PGM/CPU%u/R3/SyncPT", "Profiling of the pfnSyncPT() body."); + PGM_REG_COUNTER(&pCpuStats->StatR3SyncPTFailed, "/PGM/CPU%u/R3/SyncPT/Failed", "The number of times pfnSyncPT() failed."); + PGM_REG_COUNTER(&pCpuStats->StatR3SyncPT4K, "/PGM/CPU%u/R3/SyncPT/4K", "Nr of 4K PT syncs"); + PGM_REG_COUNTER(&pCpuStats->StatR3SyncPT4M, "/PGM/CPU%u/R3/SyncPT/4M", "Nr of 4M PT syncs"); + PGM_REG_COUNTER(&pCpuStats->StatR3SyncPagePDNAs, "/PGM/CPU%u/R3/SyncPagePDNAs", "The number of time we've marked a PD not present from SyncPage to virtualize the accessed bit."); + PGM_REG_COUNTER(&pCpuStats->StatR3SyncPagePDOutOfSync, "/PGM/CPU%u/R3/SyncPagePDOutOfSync", "The number of time we've encountered an out-of-sync PD in SyncPage."); + PGM_REG_COUNTER(&pCpuStats->StatR3AccessedPage, "/PGM/CPU%u/R3/AccessedPage", "The number of pages marked not present for accessed bit emulation."); + PGM_REG_PROFILE(&pCpuStats->StatR3DirtyBitTracking, "/PGM/CPU%u/R3/DirtyPage", "Profiling the dirty bit tracking in CheckPageFault()."); + PGM_REG_COUNTER(&pCpuStats->StatR3DirtyPage, "/PGM/CPU%u/R3/DirtyPage/Mark", "The number of pages marked read-only for dirty bit tracking."); + PGM_REG_COUNTER(&pCpuStats->StatR3DirtyPageBig, "/PGM/CPU%u/R3/DirtyPage/MarkBig", "The number of 4MB pages marked read-only for dirty bit tracking."); + PGM_REG_COUNTER(&pCpuStats->StatR3DirtyPageSkipped, "/PGM/CPU%u/R3/DirtyPage/Skipped", "The number of pages already dirty or readonly."); + PGM_REG_COUNTER(&pCpuStats->StatR3DirtyPageTrap, "/PGM/CPU%u/R3/DirtyPage/Trap", "The number of traps generated for dirty bit tracking."); + PGM_REG_COUNTER(&pCpuStats->StatR3DirtiedPage, "/PGM/CPU%u/R3/DirtyPage/SetDirty", "The number of pages marked dirty because of write accesses."); + PGM_REG_COUNTER(&pCpuStats->StatR3DirtyTrackRealPF, "/PGM/CPU%u/R3/DirtyPage/RealPF", "The number of real pages faults during dirty bit tracking."); + PGM_REG_COUNTER(&pCpuStats->StatR3PageAlreadyDirty, "/PGM/CPU%u/R3/DirtyPage/AlreadySet", "The number of pages already marked dirty because of write accesses."); + PGM_REG_PROFILE(&pCpuStats->StatR3InvalidatePage, "/PGM/CPU%u/R3/InvalidatePage", "PGMInvalidatePage() profiling."); + PGM_REG_COUNTER(&pCpuStats->StatR3InvalidatePage4KBPages, "/PGM/CPU%u/R3/InvalidatePage/4KBPages", "The number of times PGMInvalidatePage() was called for a 4KB page."); + PGM_REG_COUNTER(&pCpuStats->StatR3InvalidatePage4MBPages, "/PGM/CPU%u/R3/InvalidatePage/4MBPages", "The number of times PGMInvalidatePage() was called for a 4MB page."); + PGM_REG_COUNTER(&pCpuStats->StatR3InvalidatePage4MBPagesSkip, "/PGM/CPU%u/R3/InvalidatePage/4MBPagesSkip","The number of times PGMInvalidatePage() skipped a 4MB page."); + PGM_REG_COUNTER(&pCpuStats->StatR3InvalidatePagePDMappings, "/PGM/CPU%u/R3/InvalidatePage/PDMappings", "The number of times PGMInvalidatePage() was called for a page directory containing mappings (no conflict)."); + PGM_REG_COUNTER(&pCpuStats->StatR3InvalidatePagePDNAs, "/PGM/CPU%u/R3/InvalidatePage/PDNAs", "The number of times PGMInvalidatePage() was called for a not accessed page directory."); + PGM_REG_COUNTER(&pCpuStats->StatR3InvalidatePagePDNPs, "/PGM/CPU%u/R3/InvalidatePage/PDNPs", "The number of times PGMInvalidatePage() was called for a not present page directory."); + PGM_REG_COUNTER(&pCpuStats->StatR3InvalidatePagePDOutOfSync, "/PGM/CPU%u/R3/InvalidatePage/PDOutOfSync", "The number of times PGMInvalidatePage() was called for an out of sync page directory."); + PGM_REG_COUNTER(&pCpuStats->StatR3InvalidatePageSizeChanges, "/PGM/CPU%u/R3/InvalidatePage/SizeChanges", "The number of times PGMInvalidatePage() was called on a page size change (4KB <-> 2/4MB)."); + PGM_REG_COUNTER(&pCpuStats->StatR3InvalidatePageSkipped, "/PGM/CPU%u/R3/InvalidatePage/Skipped", "The number of times PGMInvalidatePage() was skipped due to not present shw or pending pending SyncCR3."); + PGM_REG_COUNTER(&pCpuStats->StatR3PageOutOfSyncSupervisor, "/PGM/CPU%u/R3/OutOfSync/SuperVisor", "Number of traps due to pages out of sync and times VerifyAccessSyncPage calls SyncPage."); + PGM_REG_COUNTER(&pCpuStats->StatR3PageOutOfSyncUser, "/PGM/CPU%u/R3/OutOfSync/User", "Number of traps due to pages out of sync and times VerifyAccessSyncPage calls SyncPage."); + PGM_REG_COUNTER(&pCpuStats->StatR3PageOutOfSyncBallloon, "/PGM/CPU%u/R3/OutOfSync/Balloon", "The number of times a ballooned page was accessed (read)."); + PGM_REG_PROFILE(&pCpuStats->StatR3Prefetch, "/PGM/CPU%u/R3/Prefetch", "PGMPrefetchPage profiling."); + PGM_REG_PROFILE(&pCpuStats->StatR3FlushTLB, "/PGM/CPU%u/R3/FlushTLB", "Profiling of the PGMFlushTLB() body."); + PGM_REG_COUNTER(&pCpuStats->StatR3FlushTLBNewCR3, "/PGM/CPU%u/R3/FlushTLB/NewCR3", "The number of times PGMFlushTLB was called with a new CR3, non-global. (switch)"); + PGM_REG_COUNTER(&pCpuStats->StatR3FlushTLBNewCR3Global, "/PGM/CPU%u/R3/FlushTLB/NewCR3Global", "The number of times PGMFlushTLB was called with a new CR3, global. (switch)"); + PGM_REG_COUNTER(&pCpuStats->StatR3FlushTLBSameCR3, "/PGM/CPU%u/R3/FlushTLB/SameCR3", "The number of times PGMFlushTLB was called with the same CR3, non-global. (flush)"); + PGM_REG_COUNTER(&pCpuStats->StatR3FlushTLBSameCR3Global, "/PGM/CPU%u/R3/FlushTLB/SameCR3Global", "The number of times PGMFlushTLB was called with the same CR3, global. (flush)"); + PGM_REG_PROFILE(&pCpuStats->StatR3GstModifyPage, "/PGM/CPU%u/R3/GstModifyPage", "Profiling of the PGMGstModifyPage() body."); +#endif /* VBOX_WITH_STATISTICS */ + +#undef PGM_REG_PROFILE +#undef PGM_REG_COUNTER + + } + + return VINF_SUCCESS; +} + + +/** + * Init the PGM bits that rely on VMMR0 and MM to be fully initialized. + * + * The dynamic mapping area will also be allocated and initialized at this + * time. We could allocate it during PGMR3Init of course, but the mapping + * wouldn't be allocated at that time preventing us from setting up the + * page table entries with the dummy page. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(int) PGMR3InitDynMap(PVM pVM) +{ + RTGCPTR GCPtr; + int rc; + + /* + * Reserve space for the dynamic mappings. + */ + rc = MMR3HyperReserve(pVM, MM_HYPER_DYNAMIC_SIZE, "Dynamic mapping", &GCPtr); + if (RT_SUCCESS(rc)) + pVM->pgm.s.pbDynPageMapBaseGC = GCPtr; + + if ( RT_SUCCESS(rc) + && (pVM->pgm.s.pbDynPageMapBaseGC >> X86_PD_PAE_SHIFT) != ((pVM->pgm.s.pbDynPageMapBaseGC + MM_HYPER_DYNAMIC_SIZE - 1) >> X86_PD_PAE_SHIFT)) + { + rc = MMR3HyperReserve(pVM, MM_HYPER_DYNAMIC_SIZE, "Dynamic mapping not crossing", &GCPtr); + if (RT_SUCCESS(rc)) + pVM->pgm.s.pbDynPageMapBaseGC = GCPtr; + } + if (RT_SUCCESS(rc)) + { + AssertRelease((pVM->pgm.s.pbDynPageMapBaseGC >> X86_PD_PAE_SHIFT) == ((pVM->pgm.s.pbDynPageMapBaseGC + MM_HYPER_DYNAMIC_SIZE - 1) >> X86_PD_PAE_SHIFT)); + MMR3HyperReserve(pVM, PAGE_SIZE, "fence", NULL); + } + return rc; +} + + +/** + * Ring-3 init finalizing. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(int) PGMR3InitFinalize(PVM pVM) +{ + int rc = VERR_IPE_UNINITIALIZED_STATUS; /* (MSC incorrectly thinks it can be usused uninitialized) */ + + /* + * Reserve space for the dynamic mappings. + * Initialize the dynamic mapping pages with dummy pages to simply the cache. + */ + /* get the pointer to the page table entries. */ + PPGMMAPPING pMapping = pgmGetMapping(pVM, pVM->pgm.s.pbDynPageMapBaseGC); + AssertRelease(pMapping); + const uintptr_t off = pVM->pgm.s.pbDynPageMapBaseGC - pMapping->GCPtr; + const unsigned iPT = off >> X86_PD_SHIFT; + const unsigned iPG = (off >> X86_PT_SHIFT) & X86_PT_MASK; + pVM->pgm.s.paDynPageMap32BitPTEsGC = pMapping->aPTs[iPT].pPTRC + iPG * sizeof(pMapping->aPTs[0].pPTR3->a[0]); + pVM->pgm.s.paDynPageMapPaePTEsGC = pMapping->aPTs[iPT].paPaePTsRC + iPG * sizeof(pMapping->aPTs[0].paPaePTsR3->a[0]); + + /* init cache area */ + RTHCPHYS HCPhysDummy = MMR3PageDummyHCPhys(pVM); + for (uint32_t offDynMap = 0; offDynMap < MM_HYPER_DYNAMIC_SIZE; offDynMap += PAGE_SIZE) + { + rc = PGMMap(pVM, pVM->pgm.s.pbDynPageMapBaseGC + offDynMap, HCPhysDummy, PAGE_SIZE, 0); + AssertRCReturn(rc, rc); + } + + /* + * Determine the max physical address width (MAXPHYADDR) and apply it to + * all the mask members and stuff. + */ + uint32_t cMaxPhysAddrWidth; + uint32_t uMaxExtLeaf = ASMCpuId_EAX(0x80000000); + if ( uMaxExtLeaf >= 0x80000008 + && uMaxExtLeaf <= 0x80000fff) + { + cMaxPhysAddrWidth = ASMCpuId_EAX(0x80000008) & 0xff; + LogRel(("PGM: The CPU physical address width is %u bits\n", cMaxPhysAddrWidth)); + cMaxPhysAddrWidth = RT_MIN(52, cMaxPhysAddrWidth); + pVM->pgm.s.fLessThan52PhysicalAddressBits = cMaxPhysAddrWidth < 52; + for (uint32_t iBit = cMaxPhysAddrWidth; iBit < 52; iBit++) + pVM->pgm.s.HCPhysInvMmioPg |= RT_BIT_64(iBit); + } + else + { + LogRel(("PGM: ASSUMING CPU physical address width of 48 bits (uMaxExtLeaf=%#x)\n", uMaxExtLeaf)); + cMaxPhysAddrWidth = 48; + pVM->pgm.s.fLessThan52PhysicalAddressBits = true; + pVM->pgm.s.HCPhysInvMmioPg |= UINT64_C(0x000f0000000000); + } + + /** @todo query from CPUM. */ + pVM->pgm.s.GCPhysInvAddrMask = 0; + for (uint32_t iBit = cMaxPhysAddrWidth; iBit < 64; iBit++) + pVM->pgm.s.GCPhysInvAddrMask |= RT_BIT_64(iBit); + + /* + * Initialize the invalid paging entry masks, assuming NX is disabled. + */ + uint64_t fMbzPageFrameMask = pVM->pgm.s.GCPhysInvAddrMask & UINT64_C(0x000ffffffffff000); + for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[iCpu]; + + /** @todo The manuals are not entirely clear whether the physical + * address width is relevant. See table 5-9 in the intel + * manual vs the PDE4M descriptions. Write testcase (NP). */ + pVCpu->pgm.s.fGst32BitMbzBigPdeMask = ((uint32_t)(fMbzPageFrameMask >> (32 - 13)) & X86_PDE4M_PG_HIGH_MASK) + | X86_PDE4M_MBZ_MASK; + + pVCpu->pgm.s.fGstPaeMbzPteMask = fMbzPageFrameMask | X86_PTE_PAE_MBZ_MASK_NO_NX; + pVCpu->pgm.s.fGstPaeMbzPdeMask = fMbzPageFrameMask | X86_PDE_PAE_MBZ_MASK_NO_NX; + pVCpu->pgm.s.fGstPaeMbzBigPdeMask = fMbzPageFrameMask | X86_PDE2M_PAE_MBZ_MASK_NO_NX; + pVCpu->pgm.s.fGstPaeMbzPdpeMask = fMbzPageFrameMask | X86_PDPE_PAE_MBZ_MASK; + + pVCpu->pgm.s.fGstAmd64MbzPteMask = fMbzPageFrameMask | X86_PTE_LM_MBZ_MASK_NO_NX; + pVCpu->pgm.s.fGstAmd64MbzPdeMask = fMbzPageFrameMask | X86_PDE_LM_MBZ_MASK_NX; + pVCpu->pgm.s.fGstAmd64MbzBigPdeMask = fMbzPageFrameMask | X86_PDE2M_LM_MBZ_MASK_NX; + pVCpu->pgm.s.fGstAmd64MbzPdpeMask = fMbzPageFrameMask | X86_PDPE_LM_MBZ_MASK_NO_NX; + pVCpu->pgm.s.fGstAmd64MbzBigPdpeMask = fMbzPageFrameMask | X86_PDPE1G_LM_MBZ_MASK_NO_NX; + pVCpu->pgm.s.fGstAmd64MbzPml4eMask = fMbzPageFrameMask | X86_PML4E_MBZ_MASK_NO_NX; + + pVCpu->pgm.s.fGst64ShadowedPteMask = X86_PTE_P | X86_PTE_RW | X86_PTE_US | X86_PTE_G | X86_PTE_A | X86_PTE_D; + pVCpu->pgm.s.fGst64ShadowedPdeMask = X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_A; + pVCpu->pgm.s.fGst64ShadowedBigPdeMask = X86_PDE4M_P | X86_PDE4M_RW | X86_PDE4M_US | X86_PDE4M_A; + pVCpu->pgm.s.fGst64ShadowedBigPde4PteMask = + X86_PDE4M_P | X86_PDE4M_RW | X86_PDE4M_US | X86_PDE4M_G | X86_PDE4M_A | X86_PDE4M_D; + pVCpu->pgm.s.fGstAmd64ShadowedPdpeMask = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A; + pVCpu->pgm.s.fGstAmd64ShadowedPml4eMask = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A; + } + + /* + * Note that AMD uses all the 8 reserved bits for the address (so 40 bits in total); + * Intel only goes up to 36 bits, so we stick to 36 as well. + * Update: More recent intel manuals specifies 40 bits just like AMD. + */ + uint32_t u32Dummy, u32Features; + CPUMGetGuestCpuId(VMMGetCpu(pVM), 1, 0, &u32Dummy, &u32Dummy, &u32Dummy, &u32Features); + if (u32Features & X86_CPUID_FEATURE_EDX_PSE36) + pVM->pgm.s.GCPhys4MBPSEMask = RT_BIT_64(RT_MAX(36, cMaxPhysAddrWidth)) - 1; + else + pVM->pgm.s.GCPhys4MBPSEMask = RT_BIT_64(32) - 1; + + /* + * Allocate memory if we're supposed to do that. + */ + if (pVM->pgm.s.fRamPreAlloc) + rc = pgmR3PhysRamPreAllocate(pVM); + + //pgmLogState(pVM); + LogRel(("PGM: PGMR3InitFinalize: 4 MB PSE mask %RGp\n", pVM->pgm.s.GCPhys4MBPSEMask)); + return rc; +} + + +/** + * Init phase completed callback. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param enmWhat What has been completed. + * @thread EMT(0) + */ +VMMR3_INT_DECL(int) PGMR3InitCompleted(PVM pVM, VMINITCOMPLETED enmWhat) +{ + switch (enmWhat) + { + case VMINITCOMPLETED_HM: +#ifdef VBOX_WITH_PCI_PASSTHROUGH + if (pVM->pgm.s.fPciPassthrough) + { + AssertLogRelReturn(pVM->pgm.s.fRamPreAlloc, VERR_PCI_PASSTHROUGH_NO_RAM_PREALLOC); + AssertLogRelReturn(HMIsEnabled(pVM), VERR_PCI_PASSTHROUGH_NO_HM); + AssertLogRelReturn(HMIsNestedPagingActive(pVM), VERR_PCI_PASSTHROUGH_NO_NESTED_PAGING); + + /* + * Report assignments to the IOMMU (hope that's good enough for now). + */ + if (pVM->pgm.s.fPciPassthrough) + { + int rc = VMMR3CallR0(pVM, VMMR0_DO_PGM_PHYS_SETUP_IOMMU, 0, NULL); + AssertRCReturn(rc, rc); + } + } +#else + AssertLogRelReturn(!pVM->pgm.s.fPciPassthrough, VERR_PGM_PCI_PASSTHRU_MISCONFIG); +#endif + break; + + default: + /* shut up gcc */ + break; + } + + return VINF_SUCCESS; +} + + +/** + * Applies relocations to data and code managed by this component. + * + * This function will be called at init and whenever the VMM need to relocate it + * self inside the GC. + * + * @param pVM The cross context VM structure. + * @param offDelta Relocation delta relative to old location. + */ +VMMR3DECL(void) PGMR3Relocate(PVM pVM, RTGCINTPTR offDelta) +{ + LogFlow(("PGMR3Relocate %RGv to %RGv\n", pVM->pgm.s.GCPtrCR3Mapping, pVM->pgm.s.GCPtrCR3Mapping + offDelta)); + + /* + * Paging stuff. + */ + pVM->pgm.s.GCPtrCR3Mapping += offDelta; + + /* Shadow, guest and both mode switch & relocation for each VCPU. */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + + uintptr_t idxShw = pVCpu->pgm.s.idxShadowModeData; + if ( idxShw < RT_ELEMENTS(g_aPgmShadowModeData) + && g_aPgmShadowModeData[idxShw].pfnRelocate) + g_aPgmShadowModeData[idxShw].pfnRelocate(pVCpu, offDelta); + else + AssertFailed(); + + uintptr_t const idxGst = pVCpu->pgm.s.idxGuestModeData; + if ( idxGst < RT_ELEMENTS(g_aPgmGuestModeData) + && g_aPgmGuestModeData[idxGst].pfnRelocate) + g_aPgmGuestModeData[idxGst].pfnRelocate(pVCpu, offDelta); + else + AssertFailed(); + } + + /* + * Trees. + */ + pVM->pgm.s.pTreesRC = MMHyperR3ToRC(pVM, pVM->pgm.s.pTreesR3); + + /* + * Ram ranges. + */ + if (pVM->pgm.s.pRamRangesXR3) + { + /* Update the pSelfRC pointers and relink them. */ + for (PPGMRAMRANGE pCur = pVM->pgm.s.pRamRangesXR3; pCur; pCur = pCur->pNextR3) + if (!(pCur->fFlags & PGM_RAM_RANGE_FLAGS_FLOATING)) + pCur->pSelfRC = MMHyperCCToRC(pVM, pCur); + pgmR3PhysRelinkRamRanges(pVM); + + /* Flush the RC TLB. */ + for (unsigned i = 0; i < PGM_RAMRANGE_TLB_ENTRIES; i++) + pVM->pgm.s.apRamRangesTlbRC[i] = NIL_RTRCPTR; + } + + /* + * Update the pSelfRC pointer of the MMIO2 ram ranges since they might not + * be mapped and thus not included in the above exercise. + */ + for (PPGMREGMMIORANGE pCur = pVM->pgm.s.pRegMmioRangesR3; pCur; pCur = pCur->pNextR3) + if (!(pCur->RamRange.fFlags & PGM_RAM_RANGE_FLAGS_FLOATING)) + pCur->RamRange.pSelfRC = MMHyperCCToRC(pVM, &pCur->RamRange); + + /* + * Update the two page directories with all page table mappings. + * (One or more of them have changed, that's why we're here.) + */ + pVM->pgm.s.pMappingsRC = MMHyperR3ToRC(pVM, pVM->pgm.s.pMappingsR3); + for (PPGMMAPPING pCur = pVM->pgm.s.pMappingsR3; pCur->pNextR3; pCur = pCur->pNextR3) + pCur->pNextRC = MMHyperR3ToRC(pVM, pCur->pNextR3); + + /* Relocate GC addresses of Page Tables. */ + for (PPGMMAPPING pCur = pVM->pgm.s.pMappingsR3; pCur; pCur = pCur->pNextR3) + { + for (RTHCUINT i = 0; i < pCur->cPTs; i++) + { + pCur->aPTs[i].pPTRC = MMHyperR3ToRC(pVM, pCur->aPTs[i].pPTR3); + pCur->aPTs[i].paPaePTsRC = MMHyperR3ToRC(pVM, pCur->aPTs[i].paPaePTsR3); + } + } + + /* + * Dynamic page mapping area. + */ + pVM->pgm.s.paDynPageMap32BitPTEsGC += offDelta; + pVM->pgm.s.paDynPageMapPaePTEsGC += offDelta; + pVM->pgm.s.pbDynPageMapBaseGC += offDelta; + + if (pVM->pgm.s.pRCDynMap) + { + pVM->pgm.s.pRCDynMap += offDelta; + PPGMRCDYNMAP pDynMap = (PPGMRCDYNMAP)MMHyperRCToCC(pVM, pVM->pgm.s.pRCDynMap); + + pDynMap->paPages += offDelta; + PPGMRCDYNMAPENTRY paPages = (PPGMRCDYNMAPENTRY)MMHyperRCToCC(pVM, pDynMap->paPages); + + for (uint32_t iPage = 0; iPage < pDynMap->cPages; iPage++) + { + paPages[iPage].pvPage += offDelta; + paPages[iPage].uPte.pLegacy += offDelta; + paPages[iPage].uPte.pPae += offDelta; + } + } + + /* + * The Zero page. + */ + pVM->pgm.s.pvZeroPgR0 = MMHyperR3ToR0(pVM, pVM->pgm.s.pvZeroPgR3); +#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE + AssertRelease(pVM->pgm.s.pvZeroPgR0 != NIL_RTR0PTR || VM_IS_RAW_MODE_ENABLED(pVM)); +#else + AssertRelease(pVM->pgm.s.pvZeroPgR0 != NIL_RTR0PTR); +#endif + + /* + * Physical and virtual handlers. + */ + PGMRELOCHANDLERARGS Args = { offDelta, pVM }; + RTAvlroGCPhysDoWithAll(&pVM->pgm.s.pTreesR3->PhysHandlers, true, pgmR3RelocatePhysHandler, &Args); + pVM->pgm.s.pLastPhysHandlerRC = NIL_RTRCPTR; + + PPGMPHYSHANDLERTYPEINT pCurPhysType; + RTListOff32ForEach(&pVM->pgm.s.pTreesR3->HeadPhysHandlerTypes, pCurPhysType, PGMPHYSHANDLERTYPEINT, ListNode) + { + if (pCurPhysType->pfnHandlerRC != NIL_RTRCPTR) + pCurPhysType->pfnHandlerRC += offDelta; + if (pCurPhysType->pfnPfHandlerRC != NIL_RTRCPTR) + pCurPhysType->pfnPfHandlerRC += offDelta; + } + +#ifdef VBOX_WITH_RAW_MODE + RTAvlroGCPtrDoWithAll(&pVM->pgm.s.pTreesR3->VirtHandlers, true, pgmR3RelocateVirtHandler, &Args); + RTAvlroGCPtrDoWithAll(&pVM->pgm.s.pTreesR3->HyperVirtHandlers, true, pgmR3RelocateHyperVirtHandler, &Args); + + PPGMVIRTHANDLERTYPEINT pCurVirtType; + RTListOff32ForEach(&pVM->pgm.s.pTreesR3->HeadVirtHandlerTypes, pCurVirtType, PGMVIRTHANDLERTYPEINT, ListNode) + { + if (pCurVirtType->pfnHandlerRC != NIL_RTRCPTR) + pCurVirtType->pfnHandlerRC += offDelta; + if (pCurVirtType->pfnPfHandlerRC != NIL_RTRCPTR) + pCurVirtType->pfnPfHandlerRC += offDelta; + } +#endif + + /* + * The page pool. + */ + pgmR3PoolRelocate(pVM); + +#ifdef VBOX_WITH_STATISTICS + /* + * Statistics. + */ + pVM->pgm.s.pStatsRC = MMHyperCCToRC(pVM, pVM->pgm.s.pStatsR3); + for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++) + pVM->aCpus[iCpu].pgm.s.pStatsRC = MMHyperCCToRC(pVM, pVM->aCpus[iCpu].pgm.s.pStatsR3); +#endif +} + + +/** + * Callback function for relocating a physical access handler. + * + * @returns 0 (continue enum) + * @param pNode Pointer to a PGMPHYSHANDLER node. + * @param pvUser Pointer to a PGMRELOCHANDLERARGS. + */ +static DECLCALLBACK(int) pgmR3RelocatePhysHandler(PAVLROGCPHYSNODECORE pNode, void *pvUser) +{ + PPGMPHYSHANDLER pHandler = (PPGMPHYSHANDLER)pNode; + PCPGMRELOCHANDLERARGS pArgs = (PCPGMRELOCHANDLERARGS)pvUser; + if (pHandler->pvUserRC >= 0x10000) + pHandler->pvUserRC += pArgs->offDelta; + return 0; +} + +#ifdef VBOX_WITH_RAW_MODE + +/** + * Callback function for relocating a virtual access handler. + * + * @returns 0 (continue enum) + * @param pNode Pointer to a PGMVIRTHANDLER node. + * @param pvUser Pointer to a PGMRELOCHANDLERARGS. + */ +static DECLCALLBACK(int) pgmR3RelocateVirtHandler(PAVLROGCPTRNODECORE pNode, void *pvUser) +{ + PPGMVIRTHANDLER pHandler = (PPGMVIRTHANDLER)pNode; + PCPGMRELOCHANDLERARGS pArgs = (PCPGMRELOCHANDLERARGS)pvUser; + Assert(PGMVIRTANDLER_GET_TYPE(pArgs->pVM, pHandler)->enmKind != PGMVIRTHANDLERKIND_HYPERVISOR); + + if ( pHandler->pvUserRC != NIL_RTRCPTR + && PGMVIRTANDLER_GET_TYPE(pArgs->pVM, pHandler)->fRelocUserRC) + pHandler->pvUserRC += pArgs->offDelta; + return 0; +} + + +/** + * Callback function for relocating a virtual access handler for the hypervisor mapping. + * + * @returns 0 (continue enum) + * @param pNode Pointer to a PGMVIRTHANDLER node. + * @param pvUser Pointer to a PGMRELOCHANDLERARGS. + */ +static DECLCALLBACK(int) pgmR3RelocateHyperVirtHandler(PAVLROGCPTRNODECORE pNode, void *pvUser) +{ + PPGMVIRTHANDLER pHandler = (PPGMVIRTHANDLER)pNode; + PCPGMRELOCHANDLERARGS pArgs = (PCPGMRELOCHANDLERARGS)pvUser; + Assert(PGMVIRTANDLER_GET_TYPE(pArgs->pVM, pHandler)->enmKind == PGMVIRTHANDLERKIND_HYPERVISOR); + + if ( pHandler->pvUserRC != NIL_RTRCPTR + && PGMVIRTANDLER_GET_TYPE(pArgs->pVM, pHandler)->fRelocUserRC) + pHandler->pvUserRC += pArgs->offDelta; + return 0; +} + +#endif /* VBOX_WITH_RAW_MODE */ + +/** + * Resets a virtual CPU when unplugged. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +VMMR3DECL(void) PGMR3ResetCpu(PVM pVM, PVMCPU pVCpu) +{ + uintptr_t const idxGst = pVCpu->pgm.s.idxGuestModeData; + if ( idxGst < RT_ELEMENTS(g_aPgmGuestModeData) + && g_aPgmGuestModeData[idxGst].pfnExit) + { + int rc = g_aPgmGuestModeData[idxGst].pfnExit(pVCpu); + AssertReleaseRC(rc); + } + pVCpu->pgm.s.GCPhysCR3 = NIL_RTGCPHYS; + + int rc = PGMHCChangeMode(pVM, pVCpu, PGMMODE_REAL); + AssertReleaseRC(rc); + + STAM_REL_COUNTER_RESET(&pVCpu->pgm.s.cGuestModeChanges); + + pgmR3PoolResetUnpluggedCpu(pVM, pVCpu); + + /* + * Re-init other members. + */ + pVCpu->pgm.s.fA20Enabled = true; + pVCpu->pgm.s.GCPhysA20Mask = ~((RTGCPHYS)!pVCpu->pgm.s.fA20Enabled << 20); + + /* + * Clear the FFs PGM owns. + */ + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_PGM_SYNC_CR3); + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL); +} + + +/** + * The VM is being reset. + * + * For the PGM component this means that any PD write monitors + * needs to be removed. + * + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(void) PGMR3Reset(PVM pVM) +{ + LogFlow(("PGMR3Reset:\n")); + VM_ASSERT_EMT(pVM); + + pgmLock(pVM); + + /* + * Unfix any fixed mappings and disable CR3 monitoring. + */ + pVM->pgm.s.fMappingsFixed = false; + pVM->pgm.s.fMappingsFixedRestored = false; + pVM->pgm.s.GCPtrMappingFixed = NIL_RTGCPTR; + pVM->pgm.s.cbMappingFixed = 0; + + /* + * Exit the guest paging mode before the pgm pool gets reset. + * Important to clean up the amd64 case. + */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + uintptr_t const idxGst = pVCpu->pgm.s.idxGuestModeData; + if ( idxGst < RT_ELEMENTS(g_aPgmGuestModeData) + && g_aPgmGuestModeData[idxGst].pfnExit) + { + int rc = g_aPgmGuestModeData[idxGst].pfnExit(pVCpu); + AssertReleaseRC(rc); + } + pVCpu->pgm.s.GCPhysCR3 = NIL_RTGCPHYS; + } + +#ifdef DEBUG + DBGFR3_INFO_LOG_SAFE(pVM, "mappings", NULL); + DBGFR3_INFO_LOG_SAFE(pVM, "handlers", "all nostat"); +#endif + + /* + * Switch mode back to real mode. (Before resetting the pgm pool!) + */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + + int rc = PGMHCChangeMode(pVM, pVCpu, PGMMODE_REAL); + AssertReleaseRC(rc); + + STAM_REL_COUNTER_RESET(&pVCpu->pgm.s.cGuestModeChanges); + STAM_REL_COUNTER_RESET(&pVCpu->pgm.s.cA20Changes); + } + + /* + * Reset the shadow page pool. + */ + pgmR3PoolReset(pVM); + + /* + * Re-init various other members and clear the FFs that PGM owns. + */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + + pVCpu->pgm.s.fGst32BitPageSizeExtension = false; + PGMNotifyNxeChanged(pVCpu, false); + + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_PGM_SYNC_CR3); + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL); + + if (!pVCpu->pgm.s.fA20Enabled) + { + pVCpu->pgm.s.fA20Enabled = true; + pVCpu->pgm.s.GCPhysA20Mask = ~((RTGCPHYS)!pVCpu->pgm.s.fA20Enabled << 20); +#ifdef PGM_WITH_A20 + pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_UPDATE_PAGE_BIT_VIRTUAL; + VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); + pgmR3RefreshShadowModeAfterA20Change(pVCpu); + HMFlushTlb(pVCpu); +#endif + } + } + + //pgmLogState(pVM); + pgmUnlock(pVM); +} + + +/** + * Memory setup after VM construction or reset. + * + * @param pVM The cross context VM structure. + * @param fAtReset Indicates the context, after reset if @c true or after + * construction if @c false. + */ +VMMR3_INT_DECL(void) PGMR3MemSetup(PVM pVM, bool fAtReset) +{ + if (fAtReset) + { + pgmLock(pVM); + + int rc = pgmR3PhysRamZeroAll(pVM); + AssertReleaseRC(rc); + + rc = pgmR3PhysRomReset(pVM); + AssertReleaseRC(rc); + + pgmUnlock(pVM); + } +} + + +#ifdef VBOX_STRICT +/** + * VM state change callback for clearing fNoMorePhysWrites after + * a snapshot has been created. + */ +static DECLCALLBACK(void) pgmR3ResetNoMorePhysWritesFlag(PUVM pUVM, VMSTATE enmState, VMSTATE enmOldState, void *pvUser) +{ + if ( enmState == VMSTATE_RUNNING + || enmState == VMSTATE_RESUMING) + pUVM->pVM->pgm.s.fNoMorePhysWrites = false; + NOREF(enmOldState); NOREF(pvUser); +} +#endif + +/** + * Private API to reset fNoMorePhysWrites. + */ +VMMR3_INT_DECL(void) PGMR3ResetNoMorePhysWritesFlag(PVM pVM) +{ + pVM->pgm.s.fNoMorePhysWrites = false; +} + +/** + * Terminates the PGM. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(int) PGMR3Term(PVM pVM) +{ + /* Must free shared pages here. */ + pgmLock(pVM); + pgmR3PhysRamTerm(pVM); + pgmR3PhysRomTerm(pVM); + pgmUnlock(pVM); + + PGMDeregisterStringFormatTypes(); + return PDMR3CritSectDelete(&pVM->pgm.s.CritSectX); +} + + +/** + * Show paging mode. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helpers. + * @param pszArgs "all" (default), "guest", "shadow" or "host". + */ +static DECLCALLBACK(void) pgmR3InfoMode(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + /* digest argument. */ + bool fGuest, fShadow, fHost; + if (pszArgs) + pszArgs = RTStrStripL(pszArgs); + if (!pszArgs || !*pszArgs || strstr(pszArgs, "all")) + fShadow = fHost = fGuest = true; + else + { + fShadow = fHost = fGuest = false; + if (strstr(pszArgs, "guest")) + fGuest = true; + if (strstr(pszArgs, "shadow")) + fShadow = true; + if (strstr(pszArgs, "host")) + fHost = true; + } + + PVMCPU pVCpu = VMMGetCpu(pVM); + if (!pVCpu) + pVCpu = &pVM->aCpus[0]; + + + /* print info. */ + if (fGuest) + pHlp->pfnPrintf(pHlp, "Guest paging mode (VCPU #%u): %s (changed %RU64 times), A20 %s (changed %RU64 times)\n", + pVCpu->idCpu, PGMGetModeName(pVCpu->pgm.s.enmGuestMode), pVCpu->pgm.s.cGuestModeChanges.c, + pVCpu->pgm.s.fA20Enabled ? "enabled" : "disabled", pVCpu->pgm.s.cA20Changes.c); + if (fShadow) + pHlp->pfnPrintf(pHlp, "Shadow paging mode (VCPU #%u): %s\n", pVCpu->idCpu, PGMGetModeName(pVCpu->pgm.s.enmShadowMode)); + if (fHost) + { + const char *psz; + switch (pVM->pgm.s.enmHostMode) + { + case SUPPAGINGMODE_INVALID: psz = "invalid"; break; + case SUPPAGINGMODE_32_BIT: psz = "32-bit"; break; + case SUPPAGINGMODE_32_BIT_GLOBAL: psz = "32-bit+G"; break; + case SUPPAGINGMODE_PAE: psz = "PAE"; break; + case SUPPAGINGMODE_PAE_GLOBAL: psz = "PAE+G"; break; + case SUPPAGINGMODE_PAE_NX: psz = "PAE+NX"; break; + case SUPPAGINGMODE_PAE_GLOBAL_NX: psz = "PAE+G+NX"; break; + case SUPPAGINGMODE_AMD64: psz = "AMD64"; break; + case SUPPAGINGMODE_AMD64_GLOBAL: psz = "AMD64+G"; break; + case SUPPAGINGMODE_AMD64_NX: psz = "AMD64+NX"; break; + case SUPPAGINGMODE_AMD64_GLOBAL_NX: psz = "AMD64+G+NX"; break; + default: psz = "unknown"; break; + } + pHlp->pfnPrintf(pHlp, "Host paging mode: %s\n", psz); + } +} + + +/** + * Dump registered MMIO ranges to the log. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helpers. + * @param pszArgs Arguments, ignored. + */ +static DECLCALLBACK(void) pgmR3PhysInfo(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + bool const fVerbose = pszArgs && strstr(pszArgs, "verbose") != NULL; + + pHlp->pfnPrintf(pHlp, + "RAM ranges (pVM=%p)\n" + "%.*s %.*s\n", + pVM, + sizeof(RTGCPHYS) * 4 + 1, "GC Phys Range ", + sizeof(RTHCPTR) * 2, "pvHC "); + + for (PPGMRAMRANGE pCur = pVM->pgm.s.pRamRangesXR3; pCur; pCur = pCur->pNextR3) + { + pHlp->pfnPrintf(pHlp, + "%RGp-%RGp %RHv %s\n", + pCur->GCPhys, + pCur->GCPhysLast, + pCur->pvR3, + pCur->pszDesc); + if (fVerbose) + { + RTGCPHYS const cPages = pCur->cb >> X86_PAGE_SHIFT; + RTGCPHYS iPage = 0; + while (iPage < cPages) + { + RTGCPHYS const iFirstPage = iPage; + PGMPAGETYPE const enmType = (PGMPAGETYPE)PGM_PAGE_GET_TYPE(&pCur->aPages[iPage]); + do + iPage++; + while (iPage < cPages && (PGMPAGETYPE)PGM_PAGE_GET_TYPE(&pCur->aPages[iPage]) == enmType); + const char *pszType; + const char *pszMore = NULL; + switch (enmType) + { + case PGMPAGETYPE_RAM: + pszType = "RAM"; + break; + + case PGMPAGETYPE_MMIO2: + pszType = "MMIO2"; + break; + + case PGMPAGETYPE_MMIO2_ALIAS_MMIO: + pszType = "MMIO2-alias-MMIO"; + break; + + case PGMPAGETYPE_SPECIAL_ALIAS_MMIO: + pszType = "special-alias-MMIO"; + break; + + case PGMPAGETYPE_ROM_SHADOW: + case PGMPAGETYPE_ROM: + { + pszType = enmType == PGMPAGETYPE_ROM_SHADOW ? "ROM-shadowed" : "ROM"; + + RTGCPHYS const GCPhysFirstPg = iFirstPage * X86_PAGE_SIZE; + PPGMROMRANGE pRom = pVM->pgm.s.pRomRangesR3; + while (pRom && GCPhysFirstPg > pRom->GCPhysLast) + pRom = pRom->pNextR3; + if (pRom && GCPhysFirstPg - pRom->GCPhys < pRom->cb) + pszMore = pRom->pszDesc; + break; + } + + case PGMPAGETYPE_MMIO: + { + pszType = "MMIO"; + pgmLock(pVM); + PPGMPHYSHANDLER pHandler = pgmHandlerPhysicalLookup(pVM, iFirstPage * X86_PAGE_SIZE); + if (pHandler) + pszMore = pHandler->pszDesc; + pgmUnlock(pVM); + break; + } + + case PGMPAGETYPE_INVALID: + pszType = "invalid"; + break; + + default: + pszType = "bad"; + break; + } + if (pszMore) + pHlp->pfnPrintf(pHlp, " %RGp-%RGp %-20s %s\n", + pCur->GCPhys + iFirstPage * X86_PAGE_SIZE, + pCur->GCPhys + iPage * X86_PAGE_SIZE, + pszType, pszMore); + else + pHlp->pfnPrintf(pHlp, " %RGp-%RGp %s\n", + pCur->GCPhys + iFirstPage * X86_PAGE_SIZE, + pCur->GCPhys + iPage * X86_PAGE_SIZE, + pszType); + + } + } + } +} + + +/** + * Dump the page directory to the log. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helpers. + * @param pszArgs Arguments, ignored. + */ +static DECLCALLBACK(void) pgmR3InfoCr3(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + /** @todo SMP support!! */ + PVMCPU pVCpu = &pVM->aCpus[0]; + +/** @todo fix this! Convert the PGMR3DumpHierarchyHC functions to do guest stuff. */ + /* Big pages supported? */ + const bool fPSE = !!(CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE); + + /* Global pages supported? */ + const bool fPGE = !!(CPUMGetGuestCR4(pVCpu) & X86_CR4_PGE); + + NOREF(pszArgs); + + /* + * Get page directory addresses. + */ + pgmLock(pVM); + PX86PD pPDSrc = pgmGstGet32bitPDPtr(pVCpu); + Assert(pPDSrc); + + /* + * Iterate the page directory. + */ + for (unsigned iPD = 0; iPD < RT_ELEMENTS(pPDSrc->a); iPD++) + { + X86PDE PdeSrc = pPDSrc->a[iPD]; + if (PdeSrc.n.u1Present) + { + if (PdeSrc.b.u1Size && fPSE) + pHlp->pfnPrintf(pHlp, + "%04X - %RGp P=%d U=%d RW=%d G=%d - BIG\n", + iPD, + pgmGstGet4MBPhysPage(pVM, PdeSrc), + PdeSrc.b.u1Present, PdeSrc.b.u1User, PdeSrc.b.u1Write, PdeSrc.b.u1Global && fPGE); + else + pHlp->pfnPrintf(pHlp, + "%04X - %RGp P=%d U=%d RW=%d [G=%d]\n", + iPD, + (RTGCPHYS)(PdeSrc.u & X86_PDE_PG_MASK), + PdeSrc.n.u1Present, PdeSrc.n.u1User, PdeSrc.n.u1Write, PdeSrc.b.u1Global && fPGE); + } + } + pgmUnlock(pVM); +} + + +/** + * Service a VMMCALLRING3_PGM_LOCK call. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(int) PGMR3LockCall(PVM pVM) +{ + int rc = PDMR3CritSectEnterEx(&pVM->pgm.s.CritSectX, true /* fHostCall */); + AssertRC(rc); + return rc; +} + + +/** + * Called by pgmPoolFlushAllInt prior to flushing the pool. + * + * @returns VBox status code, fully asserted. + * @param pVCpu The cross context virtual CPU structure. + */ +int pgmR3ExitShadowModeBeforePoolFlush(PVMCPU pVCpu) +{ + /* Unmap the old CR3 value before flushing everything. */ + int rc = VINF_SUCCESS; + uintptr_t idxBth = pVCpu->pgm.s.idxBothModeData; + if ( idxBth < RT_ELEMENTS(g_aPgmBothModeData) + && g_aPgmBothModeData[idxBth].pfnMapCR3) + { + rc = g_aPgmBothModeData[idxBth].pfnUnmapCR3(pVCpu); + AssertRC(rc); + } + + /* Exit the current shadow paging mode as well; nested paging and EPT use a root CR3 which will get flushed here. */ + uintptr_t idxShw = pVCpu->pgm.s.idxShadowModeData; + if ( idxShw < RT_ELEMENTS(g_aPgmShadowModeData) + && g_aPgmShadowModeData[idxShw].pfnExit) + { + rc = g_aPgmShadowModeData[idxShw].pfnExit(pVCpu); + AssertMsgRCReturn(rc, ("Exit failed for shadow mode %d: %Rrc\n", pVCpu->pgm.s.enmShadowMode, rc), rc); + } + + Assert(pVCpu->pgm.s.pShwPageCR3R3 == NULL); + return rc; +} + + +/** + * Called by pgmPoolFlushAllInt after flushing the pool. + * + * @returns VBox status code, fully asserted. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +int pgmR3ReEnterShadowModeAfterPoolFlush(PVM pVM, PVMCPU pVCpu) +{ + pVCpu->pgm.s.enmShadowMode = PGMMODE_INVALID; + int rc = PGMHCChangeMode(pVM, pVCpu, PGMGetGuestMode(pVCpu)); + Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)); + AssertRCReturn(rc, rc); + AssertRCSuccessReturn(rc, VERR_IPE_UNEXPECTED_INFO_STATUS); + + Assert(pVCpu->pgm.s.pShwPageCR3R3 != NULL || pVCpu->pgm.s.enmShadowMode == PGMMODE_NONE); + AssertMsg( pVCpu->pgm.s.enmShadowMode >= PGMMODE_NESTED_32BIT + || CPUMGetHyperCR3(pVCpu) == PGMGetHyperCR3(pVCpu), + ("%RHp != %RHp %s\n", (RTHCPHYS)CPUMGetHyperCR3(pVCpu), PGMGetHyperCR3(pVCpu), PGMGetModeName(pVCpu->pgm.s.enmShadowMode))); + return rc; +} + + +/** + * Called by PGMR3PhysSetA20 after changing the A20 state. + * + * @param pVCpu The cross context virtual CPU structure. + */ +void pgmR3RefreshShadowModeAfterA20Change(PVMCPU pVCpu) +{ + /** @todo Probably doing a bit too much here. */ + int rc = pgmR3ExitShadowModeBeforePoolFlush(pVCpu); + AssertReleaseRC(rc); + rc = pgmR3ReEnterShadowModeAfterPoolFlush(pVCpu->CTX_SUFF(pVM), pVCpu); + AssertReleaseRC(rc); +} + + +#ifdef VBOX_WITH_DEBUGGER + +/** + * @callback_method_impl{FNDBGCCMD, The '.pgmerror' and '.pgmerroroff' commands.} + */ +static DECLCALLBACK(int) pgmR3CmdError(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PUVM pUVM, PCDBGCVAR paArgs, unsigned cArgs) +{ + /* + * Validate input. + */ + DBGC_CMDHLP_REQ_UVM_RET(pCmdHlp, pCmd, pUVM); + PVM pVM = pUVM->pVM; + DBGC_CMDHLP_ASSERT_PARSER_RET(pCmdHlp, pCmd, 0, cArgs == 0 || (cArgs == 1 && paArgs[0].enmType == DBGCVAR_TYPE_STRING)); + + if (!cArgs) + { + /* + * Print the list of error injection locations with status. + */ + DBGCCmdHlpPrintf(pCmdHlp, "PGM error inject locations:\n"); + DBGCCmdHlpPrintf(pCmdHlp, " handy - %RTbool\n", pVM->pgm.s.fErrInjHandyPages); + } + else + { + /* + * String switch on where to inject the error. + */ + bool const fNewState = !strcmp(pCmd->pszCmd, "pgmerror"); + const char *pszWhere = paArgs[0].u.pszString; + if (!strcmp(pszWhere, "handy")) + ASMAtomicWriteBool(&pVM->pgm.s.fErrInjHandyPages, fNewState); + else + return DBGCCmdHlpPrintf(pCmdHlp, "error: Invalid 'where' value: %s.\n", pszWhere); + DBGCCmdHlpPrintf(pCmdHlp, "done\n"); + } + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{FNDBGCCMD, The '.pgmsync' command.} + */ +static DECLCALLBACK(int) pgmR3CmdSync(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PUVM pUVM, PCDBGCVAR paArgs, unsigned cArgs) +{ + /* + * Validate input. + */ + NOREF(pCmd); NOREF(paArgs); NOREF(cArgs); + DBGC_CMDHLP_REQ_UVM_RET(pCmdHlp, pCmd, pUVM); + PVMCPU pVCpu = VMMR3GetCpuByIdU(pUVM, DBGCCmdHlpGetCurrentCpu(pCmdHlp)); + if (!pVCpu) + return DBGCCmdHlpFail(pCmdHlp, pCmd, "Invalid CPU ID"); + + /* + * Force page directory sync. + */ + VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); + + int rc = DBGCCmdHlpPrintf(pCmdHlp, "Forcing page directory sync.\n"); + if (RT_FAILURE(rc)) + return rc; + + return VINF_SUCCESS; +} + +#ifdef VBOX_STRICT + +/** + * EMT callback for pgmR3CmdAssertCR3. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pcErrors Where to return the error count. + */ +static DECLCALLBACK(int) pgmR3CmdAssertCR3EmtWorker(PUVM pUVM, unsigned *pcErrors) +{ + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + PVMCPU pVCpu = VMMGetCpu(pVM); + + *pcErrors = PGMAssertCR3(pVM, pVCpu, CPUMGetGuestCR3(pVCpu), CPUMGetGuestCR4(pVCpu)); + + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{FNDBGCCMD, The '.pgmassertcr3' command.} + */ +static DECLCALLBACK(int) pgmR3CmdAssertCR3(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PUVM pUVM, PCDBGCVAR paArgs, unsigned cArgs) +{ + /* + * Validate input. + */ + NOREF(pCmd); NOREF(paArgs); NOREF(cArgs); + DBGC_CMDHLP_REQ_UVM_RET(pCmdHlp, pCmd, pUVM); + + int rc = DBGCCmdHlpPrintf(pCmdHlp, "Checking shadow CR3 page tables for consistency.\n"); + if (RT_FAILURE(rc)) + return rc; + + unsigned cErrors = 0; + rc = VMR3ReqCallWaitU(pUVM, DBGCCmdHlpGetCurrentCpu(pCmdHlp), (PFNRT)pgmR3CmdAssertCR3EmtWorker, 2, pUVM, &cErrors); + if (RT_FAILURE(rc)) + return DBGCCmdHlpFail(pCmdHlp, pCmd, "VMR3ReqCallWaitU failed: %Rrc", rc); + if (cErrors > 0) + return DBGCCmdHlpFail(pCmdHlp, pCmd, "PGMAssertCR3: %u error(s)", cErrors); + return DBGCCmdHlpPrintf(pCmdHlp, "PGMAssertCR3: OK\n"); +} + +#endif /* VBOX_STRICT */ + +/** + * @callback_method_impl{FNDBGCCMD, The '.pgmsyncalways' command.} + */ +static DECLCALLBACK(int) pgmR3CmdSyncAlways(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PUVM pUVM, PCDBGCVAR paArgs, unsigned cArgs) +{ + /* + * Validate input. + */ + NOREF(pCmd); NOREF(paArgs); NOREF(cArgs); + DBGC_CMDHLP_REQ_UVM_RET(pCmdHlp, pCmd, pUVM); + PVMCPU pVCpu = VMMR3GetCpuByIdU(pUVM, DBGCCmdHlpGetCurrentCpu(pCmdHlp)); + if (!pVCpu) + return DBGCCmdHlpFail(pCmdHlp, pCmd, "Invalid CPU ID"); + + /* + * Force page directory sync. + */ + int rc; + if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_ALWAYS) + { + ASMAtomicAndU32(&pVCpu->pgm.s.fSyncFlags, ~PGM_SYNC_ALWAYS); + rc = DBGCCmdHlpPrintf(pCmdHlp, "Disabled permanent forced page directory syncing.\n"); + } + else + { + ASMAtomicOrU32(&pVCpu->pgm.s.fSyncFlags, PGM_SYNC_ALWAYS); + VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); + rc = DBGCCmdHlpPrintf(pCmdHlp, "Enabled permanent forced page directory syncing.\n"); + } + return rc; +} + + +/** + * @callback_method_impl{FNDBGCCMD, The '.pgmphystofile' command.} + */ +static DECLCALLBACK(int) pgmR3CmdPhysToFile(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PUVM pUVM, PCDBGCVAR paArgs, unsigned cArgs) +{ + /* + * Validate input. + */ + NOREF(pCmd); + DBGC_CMDHLP_REQ_UVM_RET(pCmdHlp, pCmd, pUVM); + PVM pVM = pUVM->pVM; + DBGC_CMDHLP_ASSERT_PARSER_RET(pCmdHlp, pCmd, 0, cArgs == 1 || cArgs == 2); + DBGC_CMDHLP_ASSERT_PARSER_RET(pCmdHlp, pCmd, 0, paArgs[0].enmType == DBGCVAR_TYPE_STRING); + if (cArgs == 2) + { + DBGC_CMDHLP_ASSERT_PARSER_RET(pCmdHlp, pCmd, 1, paArgs[1].enmType == DBGCVAR_TYPE_STRING); + if (strcmp(paArgs[1].u.pszString, "nozero")) + return DBGCCmdHlpFail(pCmdHlp, pCmd, "Invalid 2nd argument '%s', must be 'nozero'.\n", paArgs[1].u.pszString); + } + bool fIncZeroPgs = cArgs < 2; + + /* + * Open the output file and get the ram parameters. + */ + RTFILE hFile; + int rc = RTFileOpen(&hFile, paArgs[0].u.pszString, RTFILE_O_WRITE | RTFILE_O_CREATE_REPLACE | RTFILE_O_DENY_WRITE); + if (RT_FAILURE(rc)) + return DBGCCmdHlpPrintf(pCmdHlp, "error: RTFileOpen(,'%s',) -> %Rrc.\n", paArgs[0].u.pszString, rc); + + uint32_t cbRamHole = 0; + CFGMR3QueryU32Def(CFGMR3GetRootU(pUVM), "RamHoleSize", &cbRamHole, MM_RAM_HOLE_SIZE_DEFAULT); + uint64_t cbRam = 0; + CFGMR3QueryU64Def(CFGMR3GetRootU(pUVM), "RamSize", &cbRam, 0); + RTGCPHYS GCPhysEnd = cbRam + cbRamHole; + + /* + * Dump the physical memory, page by page. + */ + RTGCPHYS GCPhys = 0; + char abZeroPg[PAGE_SIZE]; + RT_ZERO(abZeroPg); + + pgmLock(pVM); + for (PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3; + pRam && pRam->GCPhys < GCPhysEnd && RT_SUCCESS(rc); + pRam = pRam->pNextR3) + { + /* fill the gap */ + if (pRam->GCPhys > GCPhys && fIncZeroPgs) + { + while (pRam->GCPhys > GCPhys && RT_SUCCESS(rc)) + { + rc = RTFileWrite(hFile, abZeroPg, PAGE_SIZE, NULL); + GCPhys += PAGE_SIZE; + } + } + + PCPGMPAGE pPage = &pRam->aPages[0]; + while (GCPhys < pRam->GCPhysLast && RT_SUCCESS(rc)) + { + if ( PGM_PAGE_IS_ZERO(pPage) + || PGM_PAGE_IS_BALLOONED(pPage)) + { + if (fIncZeroPgs) + { + rc = RTFileWrite(hFile, abZeroPg, PAGE_SIZE, NULL); + if (RT_FAILURE(rc)) + DBGCCmdHlpPrintf(pCmdHlp, "error: RTFileWrite -> %Rrc at GCPhys=%RGp.\n", rc, GCPhys); + } + } + else + { + switch (PGM_PAGE_GET_TYPE(pPage)) + { + case PGMPAGETYPE_RAM: + case PGMPAGETYPE_ROM_SHADOW: /* trouble?? */ + case PGMPAGETYPE_ROM: + case PGMPAGETYPE_MMIO2: + { + void const *pvPage; + PGMPAGEMAPLOCK Lock; + rc = PGMPhysGCPhys2CCPtrReadOnly(pVM, GCPhys, &pvPage, &Lock); + if (RT_SUCCESS(rc)) + { + rc = RTFileWrite(hFile, pvPage, PAGE_SIZE, NULL); + PGMPhysReleasePageMappingLock(pVM, &Lock); + if (RT_FAILURE(rc)) + DBGCCmdHlpPrintf(pCmdHlp, "error: RTFileWrite -> %Rrc at GCPhys=%RGp.\n", rc, GCPhys); + } + else + DBGCCmdHlpPrintf(pCmdHlp, "error: PGMPhysGCPhys2CCPtrReadOnly -> %Rrc at GCPhys=%RGp.\n", rc, GCPhys); + break; + } + + default: + AssertFailed(); + RT_FALL_THRU(); + case PGMPAGETYPE_MMIO: + case PGMPAGETYPE_MMIO2_ALIAS_MMIO: + case PGMPAGETYPE_SPECIAL_ALIAS_MMIO: + if (fIncZeroPgs) + { + rc = RTFileWrite(hFile, abZeroPg, PAGE_SIZE, NULL); + if (RT_FAILURE(rc)) + DBGCCmdHlpPrintf(pCmdHlp, "error: RTFileWrite -> %Rrc at GCPhys=%RGp.\n", rc, GCPhys); + } + break; + } + } + + + /* advance */ + GCPhys += PAGE_SIZE; + pPage++; + } + } + pgmUnlock(pVM); + + RTFileClose(hFile); + if (RT_SUCCESS(rc)) + return DBGCCmdHlpPrintf(pCmdHlp, "Successfully saved physical memory to '%s'.\n", paArgs[0].u.pszString); + return VINF_SUCCESS; +} + +#endif /* VBOX_WITH_DEBUGGER */ + +/** + * pvUser argument of the pgmR3CheckIntegrity*Node callbacks. + */ +typedef struct PGMCHECKINTARGS +{ + bool fLeftToRight; /**< true: left-to-right; false: right-to-left. */ + PPGMPHYSHANDLER pPrevPhys; +#ifdef VBOX_WITH_RAW_MODE + PPGMVIRTHANDLER pPrevVirt; + PPGMPHYS2VIRTHANDLER pPrevPhys2Virt; +#else + void *pvFiller1, *pvFiller2; +#endif + PVM pVM; +} PGMCHECKINTARGS, *PPGMCHECKINTARGS; + +/** + * Validate a node in the physical handler tree. + * + * @returns 0 on if ok, other wise 1. + * @param pNode The handler node. + * @param pvUser pVM. + */ +static DECLCALLBACK(int) pgmR3CheckIntegrityPhysHandlerNode(PAVLROGCPHYSNODECORE pNode, void *pvUser) +{ + PPGMCHECKINTARGS pArgs = (PPGMCHECKINTARGS)pvUser; + PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)pNode; + AssertReleaseReturn(!((uintptr_t)pCur & 7), 1); + AssertReleaseMsg(pCur->Core.Key <= pCur->Core.KeyLast, + ("pCur=%p %RGp-%RGp %s\n", pCur, pCur->Core.Key, pCur->Core.KeyLast, pCur->pszDesc)); + AssertReleaseMsg( !pArgs->pPrevPhys + || ( pArgs->fLeftToRight + ? pArgs->pPrevPhys->Core.KeyLast < pCur->Core.Key + : pArgs->pPrevPhys->Core.KeyLast > pCur->Core.Key), + ("pPrevPhys=%p %RGp-%RGp %s\n" + " pCur=%p %RGp-%RGp %s\n", + pArgs->pPrevPhys, pArgs->pPrevPhys->Core.Key, pArgs->pPrevPhys->Core.KeyLast, pArgs->pPrevPhys->pszDesc, + pCur, pCur->Core.Key, pCur->Core.KeyLast, pCur->pszDesc)); + pArgs->pPrevPhys = pCur; + return 0; +} + +#ifdef VBOX_WITH_RAW_MODE + +/** + * Validate a node in the virtual handler tree. + * + * @returns 0 on if ok, other wise 1. + * @param pNode The handler node. + * @param pvUser pVM. + */ +static DECLCALLBACK(int) pgmR3CheckIntegrityVirtHandlerNode(PAVLROGCPTRNODECORE pNode, void *pvUser) +{ + PPGMCHECKINTARGS pArgs = (PPGMCHECKINTARGS)pvUser; + PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)pNode; + AssertReleaseReturn(!((uintptr_t)pCur & 7), 1); + AssertReleaseMsg(pCur->Core.Key <= pCur->Core.KeyLast,("pCur=%p %RGv-%RGv %s\n", pCur, pCur->Core.Key, pCur->Core.KeyLast, pCur->pszDesc)); + AssertReleaseMsg( !pArgs->pPrevVirt + || (pArgs->fLeftToRight ? pArgs->pPrevVirt->Core.KeyLast < pCur->Core.Key : pArgs->pPrevVirt->Core.KeyLast > pCur->Core.Key), + ("pPrevVirt=%p %RGv-%RGv %s\n" + " pCur=%p %RGv-%RGv %s\n", + pArgs->pPrevVirt, pArgs->pPrevVirt->Core.Key, pArgs->pPrevVirt->Core.KeyLast, pArgs->pPrevVirt->pszDesc, + pCur, pCur->Core.Key, pCur->Core.KeyLast, pCur->pszDesc)); + for (unsigned iPage = 0; iPage < pCur->cPages; iPage++) + { + AssertReleaseMsg(pCur->aPhysToVirt[iPage].offVirtHandler == -(intptr_t)RT_UOFFSETOF_DYN(PGMVIRTHANDLER, aPhysToVirt[iPage]), + ("pCur=%p %RGv-%RGv %s\n" + "iPage=%d offVirtHandle=%#x expected %#x\n", + pCur, pCur->Core.Key, pCur->Core.KeyLast, pCur->pszDesc, + iPage, pCur->aPhysToVirt[iPage].offVirtHandler, -(intptr_t)RT_UOFFSETOF_DYN(PGMVIRTHANDLER, aPhysToVirt[iPage]))); + } + pArgs->pPrevVirt = pCur; + return 0; +} + + +/** + * Validate a node in the virtual handler tree. + * + * @returns 0 on if ok, other wise 1. + * @param pNode The handler node. + * @param pvUser pVM. + */ +static DECLCALLBACK(int) pgmR3CheckIntegrityPhysToVirtHandlerNode(PAVLROGCPHYSNODECORE pNode, void *pvUser) +{ + PPGMCHECKINTARGS pArgs = (PPGMCHECKINTARGS)pvUser; + PPGMPHYS2VIRTHANDLER pCur = (PPGMPHYS2VIRTHANDLER)pNode; + AssertReleaseMsgReturn(!((uintptr_t)pCur & 3), ("\n"), 1); + AssertReleaseMsgReturn(!(pCur->offVirtHandler & 3), ("\n"), 1); + AssertReleaseMsg(pCur->Core.Key <= pCur->Core.KeyLast,("pCur=%p %RGp-%RGp\n", pCur, pCur->Core.Key, pCur->Core.KeyLast)); + AssertReleaseMsg( !pArgs->pPrevPhys2Virt + || (pArgs->fLeftToRight ? pArgs->pPrevPhys2Virt->Core.KeyLast < pCur->Core.Key : pArgs->pPrevPhys2Virt->Core.KeyLast > pCur->Core.Key), + ("pPrevPhys2Virt=%p %RGp-%RGp\n" + " pCur=%p %RGp-%RGp\n", + pArgs->pPrevPhys2Virt, pArgs->pPrevPhys2Virt->Core.Key, pArgs->pPrevPhys2Virt->Core.KeyLast, + pCur, pCur->Core.Key, pCur->Core.KeyLast)); + AssertReleaseMsg( !pArgs->pPrevPhys2Virt + || (pArgs->fLeftToRight ? pArgs->pPrevPhys2Virt->Core.KeyLast < pCur->Core.Key : pArgs->pPrevPhys2Virt->Core.KeyLast > pCur->Core.Key), + ("pPrevPhys2Virt=%p %RGp-%RGp\n" + " pCur=%p %RGp-%RGp\n", + pArgs->pPrevPhys2Virt, pArgs->pPrevPhys2Virt->Core.Key, pArgs->pPrevPhys2Virt->Core.KeyLast, + pCur, pCur->Core.Key, pCur->Core.KeyLast)); + AssertReleaseMsg((pCur->offNextAlias & (PGMPHYS2VIRTHANDLER_IN_TREE | PGMPHYS2VIRTHANDLER_IS_HEAD)) == (PGMPHYS2VIRTHANDLER_IN_TREE | PGMPHYS2VIRTHANDLER_IS_HEAD), + ("pCur=%p:{.Core.Key=%RGp, .Core.KeyLast=%RGp, .offVirtHandler=%#RX32, .offNextAlias=%#RX32}\n", + pCur, pCur->Core.Key, pCur->Core.KeyLast, pCur->offVirtHandler, pCur->offNextAlias)); + if (pCur->offNextAlias & PGMPHYS2VIRTHANDLER_OFF_MASK) + { + PPGMPHYS2VIRTHANDLER pCur2 = pCur; + for (;;) + { + pCur2 = (PPGMPHYS2VIRTHANDLER)((intptr_t)pCur + (pCur->offNextAlias & PGMPHYS2VIRTHANDLER_OFF_MASK)); + AssertReleaseMsg(pCur2 != pCur, + (" pCur=%p:{.Core.Key=%RGp, .Core.KeyLast=%RGp, .offVirtHandler=%#RX32, .offNextAlias=%#RX32}\n", + pCur, pCur->Core.Key, pCur->Core.KeyLast, pCur->offVirtHandler, pCur->offNextAlias)); + AssertReleaseMsg((pCur2->offNextAlias & (PGMPHYS2VIRTHANDLER_IN_TREE | PGMPHYS2VIRTHANDLER_IS_HEAD)) == PGMPHYS2VIRTHANDLER_IN_TREE, + (" pCur=%p:{.Core.Key=%RGp, .Core.KeyLast=%RGp, .offVirtHandler=%#RX32, .offNextAlias=%#RX32}\n" + "pCur2=%p:{.Core.Key=%RGp, .Core.KeyLast=%RGp, .offVirtHandler=%#RX32, .offNextAlias=%#RX32}\n", + pCur, pCur->Core.Key, pCur->Core.KeyLast, pCur->offVirtHandler, pCur->offNextAlias, + pCur2, pCur2->Core.Key, pCur2->Core.KeyLast, pCur2->offVirtHandler, pCur2->offNextAlias)); + AssertReleaseMsg((pCur2->Core.Key ^ pCur->Core.Key) < PAGE_SIZE, + (" pCur=%p:{.Core.Key=%RGp, .Core.KeyLast=%RGp, .offVirtHandler=%#RX32, .offNextAlias=%#RX32}\n" + "pCur2=%p:{.Core.Key=%RGp, .Core.KeyLast=%RGp, .offVirtHandler=%#RX32, .offNextAlias=%#RX32}\n", + pCur, pCur->Core.Key, pCur->Core.KeyLast, pCur->offVirtHandler, pCur->offNextAlias, + pCur2, pCur2->Core.Key, pCur2->Core.KeyLast, pCur2->offVirtHandler, pCur2->offNextAlias)); + AssertReleaseMsg((pCur2->Core.KeyLast ^ pCur->Core.KeyLast) < PAGE_SIZE, + (" pCur=%p:{.Core.Key=%RGp, .Core.KeyLast=%RGp, .offVirtHandler=%#RX32, .offNextAlias=%#RX32}\n" + "pCur2=%p:{.Core.Key=%RGp, .Core.KeyLast=%RGp, .offVirtHandler=%#RX32, .offNextAlias=%#RX32}\n", + pCur, pCur->Core.Key, pCur->Core.KeyLast, pCur->offVirtHandler, pCur->offNextAlias, + pCur2, pCur2->Core.Key, pCur2->Core.KeyLast, pCur2->offVirtHandler, pCur2->offNextAlias)); + if (!(pCur2->offNextAlias & PGMPHYS2VIRTHANDLER_OFF_MASK)) + break; + } + } + + pArgs->pPrevPhys2Virt = pCur; + return 0; +} + +#endif /* VBOX_WITH_RAW_MODE */ + +/** + * Perform an integrity check on the PGM component. + * + * @returns VINF_SUCCESS if everything is fine. + * @returns VBox error status after asserting on integrity breach. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(int) PGMR3CheckIntegrity(PVM pVM) +{ + AssertReleaseReturn(pVM->pgm.s.offVM, VERR_INTERNAL_ERROR); + + /* + * Check the trees. + */ + int cErrors = 0; + const PGMCHECKINTARGS LeftToRight = { true, NULL, NULL, NULL, pVM }; + const PGMCHECKINTARGS RightToLeft = { false, NULL, NULL, NULL, pVM }; + PGMCHECKINTARGS Args = LeftToRight; + cErrors += RTAvlroGCPhysDoWithAll(&pVM->pgm.s.pTreesR3->PhysHandlers, true, pgmR3CheckIntegrityPhysHandlerNode, &Args); + Args = RightToLeft; + cErrors += RTAvlroGCPhysDoWithAll(&pVM->pgm.s.pTreesR3->PhysHandlers, false, pgmR3CheckIntegrityPhysHandlerNode, &Args); +#ifdef VBOX_WITH_RAW_MODE + Args = LeftToRight; + cErrors += RTAvlroGCPtrDoWithAll( &pVM->pgm.s.pTreesR3->VirtHandlers, true, pgmR3CheckIntegrityVirtHandlerNode, &Args); + Args = RightToLeft; + cErrors += RTAvlroGCPtrDoWithAll( &pVM->pgm.s.pTreesR3->VirtHandlers, false, pgmR3CheckIntegrityVirtHandlerNode, &Args); + Args = LeftToRight; + cErrors += RTAvlroGCPtrDoWithAll( &pVM->pgm.s.pTreesR3->HyperVirtHandlers, true, pgmR3CheckIntegrityVirtHandlerNode, &Args); + Args = RightToLeft; + cErrors += RTAvlroGCPtrDoWithAll( &pVM->pgm.s.pTreesR3->HyperVirtHandlers, false, pgmR3CheckIntegrityVirtHandlerNode, &Args); + Args = LeftToRight; + cErrors += RTAvlroGCPhysDoWithAll(&pVM->pgm.s.pTreesR3->PhysToVirtHandlers, true, pgmR3CheckIntegrityPhysToVirtHandlerNode, &Args); + Args = RightToLeft; + cErrors += RTAvlroGCPhysDoWithAll(&pVM->pgm.s.pTreesR3->PhysToVirtHandlers, false, pgmR3CheckIntegrityPhysToVirtHandlerNode, &Args); +#endif /* VBOX_WITH_RAW_MODE */ + + return !cErrors ? VINF_SUCCESS : VERR_INTERNAL_ERROR; +} + diff --git a/src/VBox/VMM/VMMR3/PGMDbg.cpp b/src/VBox/VMM/VMMR3/PGMDbg.cpp new file mode 100644 index 00000000..8cf09fe4 --- /dev/null +++ b/src/VBox/VMM/VMMR3/PGMDbg.cpp @@ -0,0 +1,2856 @@ +/* $Id: PGMDbg.cpp $ */ +/** @file + * PGM - Page Manager and Monitor - Debugger & Debugging APIs. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PGM +#include +#include +#include "PGMInternal.h" +#include +#include +#include "PGMInline.h" +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** The max needle size that we will bother searching for + * This must not be more than half a page! */ +#define MAX_NEEDLE_SIZE 256 + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * State structure for the paging hierarchy dumpers. + */ +typedef struct PGMR3DUMPHIERARCHYSTATE +{ + /** Pointer to the VM. */ + PVM pVM; + /** Output helpers. */ + PCDBGFINFOHLP pHlp; + /** Set if PSE, PAE or long mode is enabled. */ + bool fPse; + /** Set if PAE or long mode is enabled. */ + bool fPae; + /** Set if long mode is enabled. */ + bool fLme; + /** Set if nested paging. */ + bool fNp; + /** Set if EPT. */ + bool fEpt; + /** Set if NXE is enabled. */ + bool fNxe; + /** The number or chars the address needs. */ + uint8_t cchAddress; + /** The last reserved bit. */ + uint8_t uLastRsvdBit; + /** Dump the page info as well (shadow page summary / guest physical + * page summary). */ + bool fDumpPageInfo; + /** Whether or not to print the header. */ + bool fPrintHeader; + /** Whether to print the CR3 value */ + bool fPrintCr3; + /** Padding*/ + bool afReserved[5]; + /** The current address. */ + uint64_t u64Address; + /** The last address to dump structures for. */ + uint64_t u64FirstAddress; + /** The last address to dump structures for. */ + uint64_t u64LastAddress; + /** Mask with the high reserved bits set. */ + uint64_t u64HighReservedBits; + /** The number of leaf entries that we've printed. */ + uint64_t cLeaves; +} PGMR3DUMPHIERARCHYSTATE; +/** Pointer to the paging hierarchy dumper state. */ +typedef PGMR3DUMPHIERARCHYSTATE *PPGMR3DUMPHIERARCHYSTATE; + + +/** + * Assembly scanning function. + * + * @returns Pointer to possible match or NULL. + * @param pvHaystack Pointer to what we search in. + * @param cbHaystack Number of bytes to search. + * @param pvNeedle Pointer to what we search for. + * @param cbNeedle Size of what we're searching for. + */ + +typedef DECLCALLBACK(uint8_t const *) FNPGMR3DBGFIXEDMEMSCAN(void const *pvHaystack, uint32_t cbHaystack, + void const *pvNeedle, size_t cbNeedle); +/** Pointer to an fixed size and step assembly scanner function. */ +typedef FNPGMR3DBGFIXEDMEMSCAN *PFNPGMR3DBGFIXEDMEMSCAN; + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +DECLASM(uint8_t const *) pgmR3DbgFixedMemScan8Wide8Step(void const *, uint32_t, void const *, size_t cbNeedle); +DECLASM(uint8_t const *) pgmR3DbgFixedMemScan4Wide4Step(void const *, uint32_t, void const *, size_t cbNeedle); +DECLASM(uint8_t const *) pgmR3DbgFixedMemScan2Wide2Step(void const *, uint32_t, void const *, size_t cbNeedle); +DECLASM(uint8_t const *) pgmR3DbgFixedMemScan1Wide1Step(void const *, uint32_t, void const *, size_t cbNeedle); +DECLASM(uint8_t const *) pgmR3DbgFixedMemScan4Wide1Step(void const *, uint32_t, void const *, size_t cbNeedle); +DECLASM(uint8_t const *) pgmR3DbgFixedMemScan8Wide1Step(void const *, uint32_t, void const *, size_t cbNeedle); + + +/** + * Converts a R3 pointer to a GC physical address. + * + * Only for the debugger. + * + * @returns VBox status code. + * @retval VINF_SUCCESS on success, *pGCPhys is set. + * @retval VERR_INVALID_POINTER if the pointer is not within the GC physical memory. + * + * @param pUVM The user mode VM handle. + * @param R3Ptr The R3 pointer to convert. + * @param pGCPhys Where to store the GC physical address on success. + */ +VMMR3DECL(int) PGMR3DbgR3Ptr2GCPhys(PUVM pUVM, RTR3PTR R3Ptr, PRTGCPHYS pGCPhys) +{ + NOREF(pUVM); NOREF(R3Ptr); + *pGCPhys = NIL_RTGCPHYS; + return VERR_NOT_IMPLEMENTED; +} + + +/** + * Converts a R3 pointer to a HC physical address. + * + * Only for the debugger. + * + * @returns VBox status code. + * @retval VINF_SUCCESS on success, *pHCPhys is set. + * @retval VERR_PGM_PHYS_PAGE_RESERVED it it's a valid GC physical page but has no physical backing. + * @retval VERR_INVALID_POINTER if the pointer is not within the GC physical memory. + * + * @param pUVM The user mode VM handle. + * @param R3Ptr The R3 pointer to convert. + * @param pHCPhys Where to store the HC physical address on success. + */ +VMMR3DECL(int) PGMR3DbgR3Ptr2HCPhys(PUVM pUVM, RTR3PTR R3Ptr, PRTHCPHYS pHCPhys) +{ + NOREF(pUVM); NOREF(R3Ptr); + *pHCPhys = NIL_RTHCPHYS; + return VERR_NOT_IMPLEMENTED; +} + + +/** + * Converts a HC physical address to a GC physical address. + * + * Only for the debugger. + * + * @returns VBox status code + * @retval VINF_SUCCESS on success, *pGCPhys is set. + * @retval VERR_INVALID_POINTER if the HC physical address is not within the GC physical memory. + * + * @param pUVM The user mode VM handle. + * @param HCPhys The HC physical address to convert. + * @param pGCPhys Where to store the GC physical address on success. + */ +VMMR3DECL(int) PGMR3DbgHCPhys2GCPhys(PUVM pUVM, RTHCPHYS HCPhys, PRTGCPHYS pGCPhys) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, VERR_INVALID_VM_HANDLE); + + /* + * Validate and adjust the input a bit. + */ + if (HCPhys == NIL_RTHCPHYS) + return VERR_INVALID_POINTER; + unsigned off = HCPhys & PAGE_OFFSET_MASK; + HCPhys &= X86_PTE_PAE_PG_MASK; + if (HCPhys == 0) + return VERR_INVALID_POINTER; + + for (PPGMRAMRANGE pRam = pUVM->pVM->pgm.s.CTX_SUFF(pRamRangesX); + pRam; + pRam = pRam->CTX_SUFF(pNext)) + { + uint32_t iPage = pRam->cb >> PAGE_SHIFT; + while (iPage-- > 0) + if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys) + { + *pGCPhys = pRam->GCPhys + (iPage << PAGE_SHIFT) + off; + return VINF_SUCCESS; + } + } + return VERR_INVALID_POINTER; +} + + +/** + * Read physical memory API for the debugger, similar to + * PGMPhysSimpleReadGCPhys. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pvDst Where to store what's read. + * @param GCPhysSrc Where to start reading from. + * @param cb The number of bytes to attempt reading. + * @param fFlags Flags, MBZ. + * @param pcbRead For store the actual number of bytes read, pass NULL if + * partial reads are unwanted. + * @todo Unused? + */ +VMMR3_INT_DECL(int) PGMR3DbgReadGCPhys(PVM pVM, void *pvDst, RTGCPHYS GCPhysSrc, size_t cb, uint32_t fFlags, size_t *pcbRead) +{ + /* validate */ + AssertReturn(!fFlags, VERR_INVALID_PARAMETER); + AssertReturn(pVM, VERR_INVALID_PARAMETER); + + /* try simple first. */ + int rc = PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc, cb); + if (RT_SUCCESS(rc) || !pcbRead) + return rc; + + /* partial read that failed, chop it up in pages. */ + *pcbRead = 0; + rc = VINF_SUCCESS; + while (cb > 0) + { + size_t cbChunk = PAGE_SIZE; + cbChunk -= GCPhysSrc & PAGE_OFFSET_MASK; + if (cbChunk > cb) + cbChunk = cb; + + rc = PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc, cbChunk); + + /* advance */ + if (RT_FAILURE(rc)) + break; + *pcbRead += cbChunk; + cb -= cbChunk; + GCPhysSrc += cbChunk; + pvDst = (uint8_t *)pvDst + cbChunk; + } + + return *pcbRead && RT_FAILURE(rc) ? -rc : rc; +} + + +/** + * Write physical memory API for the debugger, similar to + * PGMPhysSimpleWriteGCPhys. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param GCPhysDst Where to start writing. + * @param pvSrc What to write. + * @param cb The number of bytes to attempt writing. + * @param fFlags Flags, MBZ. + * @param pcbWritten For store the actual number of bytes written, pass NULL + * if partial writes are unwanted. + * @todo Unused? + */ +VMMR3_INT_DECL(int) PGMR3DbgWriteGCPhys(PVM pVM, RTGCPHYS GCPhysDst, const void *pvSrc, size_t cb, uint32_t fFlags, size_t *pcbWritten) +{ + /* validate */ + AssertReturn(!fFlags, VERR_INVALID_PARAMETER); + AssertReturn(pVM, VERR_INVALID_PARAMETER); + + /* try simple first. */ + int rc = PGMPhysSimpleWriteGCPhys(pVM, GCPhysDst, pvSrc, cb); + if (RT_SUCCESS(rc) || !pcbWritten) + return rc; + + /* partial write that failed, chop it up in pages. */ + *pcbWritten = 0; + rc = VINF_SUCCESS; + while (cb > 0) + { + size_t cbChunk = PAGE_SIZE; + cbChunk -= GCPhysDst & PAGE_OFFSET_MASK; + if (cbChunk > cb) + cbChunk = cb; + + rc = PGMPhysSimpleWriteGCPhys(pVM, GCPhysDst, pvSrc, cbChunk); + + /* advance */ + if (RT_FAILURE(rc)) + break; + *pcbWritten += cbChunk; + cb -= cbChunk; + GCPhysDst += cbChunk; + pvSrc = (uint8_t const *)pvSrc + cbChunk; + } + + return *pcbWritten && RT_FAILURE(rc) ? -rc : rc; + +} + + +/** + * Read virtual memory API for the debugger, similar to PGMPhysSimpleReadGCPtr. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pvDst Where to store what's read. + * @param GCPtrSrc Where to start reading from. + * @param cb The number of bytes to attempt reading. + * @param fFlags Flags, MBZ. + * @param pcbRead For store the actual number of bytes read, pass NULL if + * partial reads are unwanted. + * @todo Unused? + */ +VMMR3_INT_DECL(int) PGMR3DbgReadGCPtr(PVM pVM, void *pvDst, RTGCPTR GCPtrSrc, size_t cb, uint32_t fFlags, size_t *pcbRead) +{ + /* validate */ + AssertReturn(!fFlags, VERR_INVALID_PARAMETER); + AssertReturn(pVM, VERR_INVALID_PARAMETER); + + /** @todo SMP support! */ + PVMCPU pVCpu = &pVM->aCpus[0]; + +/** @todo deal with HMA */ + /* try simple first. */ + int rc = PGMPhysSimpleReadGCPtr(pVCpu, pvDst, GCPtrSrc, cb); + if (RT_SUCCESS(rc) || !pcbRead) + return rc; + + /* partial read that failed, chop it up in pages. */ + *pcbRead = 0; + rc = VINF_SUCCESS; + while (cb > 0) + { + size_t cbChunk = PAGE_SIZE; + cbChunk -= GCPtrSrc & PAGE_OFFSET_MASK; + if (cbChunk > cb) + cbChunk = cb; + + rc = PGMPhysSimpleReadGCPtr(pVCpu, pvDst, GCPtrSrc, cbChunk); + + /* advance */ + if (RT_FAILURE(rc)) + break; + *pcbRead += cbChunk; + cb -= cbChunk; + GCPtrSrc += cbChunk; + pvDst = (uint8_t *)pvDst + cbChunk; + } + + return *pcbRead && RT_FAILURE(rc) ? -rc : rc; + +} + + +/** + * Write virtual memory API for the debugger, similar to + * PGMPhysSimpleWriteGCPtr. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param GCPtrDst Where to start writing. + * @param pvSrc What to write. + * @param cb The number of bytes to attempt writing. + * @param fFlags Flags, MBZ. + * @param pcbWritten For store the actual number of bytes written, pass NULL + * if partial writes are unwanted. + * @todo Unused? + */ +VMMR3_INT_DECL(int) PGMR3DbgWriteGCPtr(PVM pVM, RTGCPTR GCPtrDst, void const *pvSrc, size_t cb, uint32_t fFlags, size_t *pcbWritten) +{ + /* validate */ + AssertReturn(!fFlags, VERR_INVALID_PARAMETER); + AssertReturn(pVM, VERR_INVALID_PARAMETER); + + /** @todo SMP support! */ + PVMCPU pVCpu = &pVM->aCpus[0]; + +/** @todo deal with HMA */ + /* try simple first. */ + int rc = PGMPhysSimpleWriteGCPtr(pVCpu, GCPtrDst, pvSrc, cb); + if (RT_SUCCESS(rc) || !pcbWritten) + return rc; + + /* partial write that failed, chop it up in pages. */ + *pcbWritten = 0; + rc = VINF_SUCCESS; + while (cb > 0) + { + size_t cbChunk = PAGE_SIZE; + cbChunk -= GCPtrDst & PAGE_OFFSET_MASK; + if (cbChunk > cb) + cbChunk = cb; + + rc = PGMPhysSimpleWriteGCPtr(pVCpu, GCPtrDst, pvSrc, cbChunk); + + /* advance */ + if (RT_FAILURE(rc)) + break; + *pcbWritten += cbChunk; + cb -= cbChunk; + GCPtrDst += cbChunk; + pvSrc = (uint8_t const *)pvSrc + cbChunk; + } + + return *pcbWritten && RT_FAILURE(rc) ? -rc : rc; + +} + + +/** + * memchr() with alignment considerations. + * + * @returns Pointer to matching byte, NULL if none found. + * @param pb Where to search. Aligned. + * @param b What to search for. + * @param cb How much to search . + * @param uAlign The alignment restriction of the result. + */ +static const uint8_t *pgmR3DbgAlignedMemChr(const uint8_t *pb, uint8_t b, size_t cb, uint32_t uAlign) +{ + const uint8_t *pbRet; + if (uAlign <= 32) + { + pbRet = (const uint8_t *)memchr(pb, b, cb); + if ((uintptr_t)pbRet & (uAlign - 1)) + { + do + { + pbRet++; + size_t cbLeft = cb - (pbRet - pb); + if (!cbLeft) + { + pbRet = NULL; + break; + } + pbRet = (const uint8_t *)memchr(pbRet, b, cbLeft); + } while ((uintptr_t)pbRet & (uAlign - 1)); + } + } + else + { + pbRet = NULL; + if (cb) + { + for (;;) + { + if (*pb == b) + { + pbRet = pb; + break; + } + if (cb <= uAlign) + break; + cb -= uAlign; + pb += uAlign; + } + } + } + return pbRet; +} + + +/** + * Scans a page for a byte string, keeping track of potential + * cross page matches. + * + * @returns true and *poff on match. + * false on mismatch. + * @param pbPage Pointer to the current page. + * @param poff Input: The offset into the page (aligned). + * Output: The page offset of the match on success. + * @param cb The number of bytes to search, starting of *poff. + * @param uAlign The needle alignment. This is of course less than a page. + * @param pabNeedle The byte string to search for. + * @param cbNeedle The length of the byte string. + * @param pfnFixedMemScan Pointer to assembly scan function, if available for + * the given needle and alignment combination. + * @param pabPrev The buffer that keeps track of a partial match that we + * bring over from the previous page. This buffer must be + * at least cbNeedle - 1 big. + * @param pcbPrev Input: The number of partial matching bytes from the previous page. + * Output: The number of partial matching bytes from this page. + * Initialize to 0 before the first call to this function. + */ +static bool pgmR3DbgScanPage(const uint8_t *pbPage, int32_t *poff, uint32_t cb, uint32_t uAlign, + const uint8_t *pabNeedle, size_t cbNeedle, PFNPGMR3DBGFIXEDMEMSCAN pfnFixedMemScan, + uint8_t *pabPrev, size_t *pcbPrev) +{ + /* + * Try complete any partial match from the previous page. + */ + if (*pcbPrev > 0) + { + size_t cbPrev = *pcbPrev; + Assert(!*poff); + Assert(cbPrev < cbNeedle); + if (!memcmp(pbPage, pabNeedle + cbPrev, cbNeedle - cbPrev)) + { + if (cbNeedle - cbPrev > cb) + return false; + *poff = -(int32_t)cbPrev; + return true; + } + + /* check out the remainder of the previous page. */ + const uint8_t *pb = pabPrev; + for (;;) + { + if (cbPrev <= uAlign) + break; + cbPrev -= uAlign; + pb = pgmR3DbgAlignedMemChr(pb + uAlign, *pabNeedle, cbPrev, uAlign); + if (!pb) + break; + cbPrev = *pcbPrev - (pb - pabPrev); + if ( !memcmp(pb + 1, &pabNeedle[1], cbPrev - 1) + && !memcmp(pbPage, pabNeedle + cbPrev, cbNeedle - cbPrev)) + { + if (cbNeedle - cbPrev > cb) + return false; + *poff = -(int32_t)cbPrev; + return true; + } + } + + *pcbPrev = 0; + } + + /* + * Match the body of the page. + */ + const uint8_t *pb = pbPage + *poff; + const uint8_t * const pbEnd = pb + cb; + for (;;) + { + AssertMsg(((uintptr_t)pb & (uAlign - 1)) == 0, ("%#p %#x\n", pb, uAlign)); + if (pfnFixedMemScan) + pb = pfnFixedMemScan(pb, cb, pabNeedle, cbNeedle); + else + pb = pgmR3DbgAlignedMemChr(pb, *pabNeedle, cb, uAlign); + if (!pb) + break; + cb = pbEnd - pb; + if (cb >= cbNeedle) + { + /* match? */ + if (!memcmp(pb + 1, &pabNeedle[1], cbNeedle - 1)) + { + *poff = pb - pbPage; + return true; + } + } + else + { + /* partial match at the end of the page? */ + if (!memcmp(pb + 1, &pabNeedle[1], cb - 1)) + { + /* We're copying one byte more that we really need here, but wtf. */ + memcpy(pabPrev, pb, cb); + *pcbPrev = cb; + return false; + } + } + + /* no match, skip ahead. */ + if (cb <= uAlign) + break; + pb += uAlign; + cb -= uAlign; + } + + return false; +} + + +static void pgmR3DbgSelectMemScanFunction(PFNPGMR3DBGFIXEDMEMSCAN *ppfnMemScan, uint32_t GCPhysAlign, size_t cbNeedle) +{ + *ppfnMemScan = NULL; + switch (GCPhysAlign) + { + case 1: + if (cbNeedle >= 8) + *ppfnMemScan = pgmR3DbgFixedMemScan8Wide1Step; + else if (cbNeedle >= 4) + *ppfnMemScan = pgmR3DbgFixedMemScan4Wide1Step; + else + *ppfnMemScan = pgmR3DbgFixedMemScan1Wide1Step; + break; + case 2: + if (cbNeedle >= 2) + *ppfnMemScan = pgmR3DbgFixedMemScan2Wide2Step; + break; + case 4: + if (cbNeedle >= 4) + *ppfnMemScan = pgmR3DbgFixedMemScan4Wide4Step; + break; + case 8: + if (cbNeedle >= 8) + *ppfnMemScan = pgmR3DbgFixedMemScan8Wide8Step; + break; + } +} + + + +/** + * Scans guest physical memory for a byte string. + * + * @returns VBox status codes: + * @retval VINF_SUCCESS and *pGCPtrHit on success. + * @retval VERR_DBGF_MEM_NOT_FOUND if not found. + * @retval VERR_INVALID_POINTER if any of the pointer arguments are invalid. + * @retval VERR_INVALID_ARGUMENT if any other arguments are invalid. + * + * @param pVM The cross context VM structure. + * @param GCPhys Where to start searching. + * @param cbRange The number of bytes to search. + * @param GCPhysAlign The alignment of the needle. Must be a power of two + * and less or equal to 4GB. + * @param pabNeedle The byte string to search for. + * @param cbNeedle The length of the byte string. Max 256 bytes. + * @param pGCPhysHit Where to store the address of the first occurrence on success. + */ +VMMR3_INT_DECL(int) PGMR3DbgScanPhysical(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cbRange, RTGCPHYS GCPhysAlign, + const uint8_t *pabNeedle, size_t cbNeedle, PRTGCPHYS pGCPhysHit) +{ + /* + * Validate and adjust the input a bit. + */ + if (!VALID_PTR(pGCPhysHit)) + return VERR_INVALID_POINTER; + *pGCPhysHit = NIL_RTGCPHYS; + + if ( !VALID_PTR(pabNeedle) + || GCPhys == NIL_RTGCPHYS) + return VERR_INVALID_POINTER; + if (!cbNeedle) + return VERR_INVALID_PARAMETER; + if (cbNeedle > MAX_NEEDLE_SIZE) + return VERR_INVALID_PARAMETER; + + if (!cbRange) + return VERR_DBGF_MEM_NOT_FOUND; + if (GCPhys + cbNeedle - 1 < GCPhys) + return VERR_DBGF_MEM_NOT_FOUND; + + if (!GCPhysAlign) + return VERR_INVALID_PARAMETER; + if (GCPhysAlign > UINT32_MAX) + return VERR_NOT_POWER_OF_TWO; + if (GCPhysAlign & (GCPhysAlign - 1)) + return VERR_INVALID_PARAMETER; + + if (GCPhys & (GCPhysAlign - 1)) + { + RTGCPHYS Adj = GCPhysAlign - (GCPhys & (GCPhysAlign - 1)); + if ( cbRange <= Adj + || GCPhys + Adj < GCPhys) + return VERR_DBGF_MEM_NOT_FOUND; + GCPhys += Adj; + cbRange -= Adj; + } + + const bool fAllZero = ASMMemIsZero(pabNeedle, cbNeedle); + const uint32_t cIncPages = GCPhysAlign <= PAGE_SIZE + ? 1 + : GCPhysAlign >> PAGE_SHIFT; + const RTGCPHYS GCPhysLast = GCPhys + cbRange - 1 >= GCPhys + ? GCPhys + cbRange - 1 + : ~(RTGCPHYS)0; + + PFNPGMR3DBGFIXEDMEMSCAN pfnMemScan; + pgmR3DbgSelectMemScanFunction(&pfnMemScan, (uint32_t)GCPhysAlign, cbNeedle); + + /* + * Search the memory - ignore MMIO and zero pages, also don't + * bother to match across ranges. + */ + pgmLock(pVM); + for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX); + pRam; + pRam = pRam->CTX_SUFF(pNext)) + { + /* + * If the search range starts prior to the current ram range record, + * adjust the search range and possibly conclude the search. + */ + RTGCPHYS off; + if (GCPhys < pRam->GCPhys) + { + if (GCPhysLast < pRam->GCPhys) + break; + GCPhys = pRam->GCPhys; + off = 0; + } + else + off = GCPhys - pRam->GCPhys; + if (off < pRam->cb) + { + /* + * Iterate the relevant pages. + */ + uint8_t abPrev[MAX_NEEDLE_SIZE]; + size_t cbPrev = 0; + const uint32_t cPages = pRam->cb >> PAGE_SHIFT; + uint32_t iPage = off >> PAGE_SHIFT; + uint32_t offPage = GCPhys & PAGE_OFFSET_MASK; + GCPhys &= ~(RTGCPHYS)PAGE_OFFSET_MASK; + for (;; offPage = 0) + { + PPGMPAGE pPage = &pRam->aPages[iPage]; + if ( ( !PGM_PAGE_IS_ZERO(pPage) + || fAllZero) + && !PGM_PAGE_IS_MMIO_OR_ALIAS(pPage) + && !PGM_PAGE_IS_BALLOONED(pPage)) + { + void const *pvPage; + PGMPAGEMAPLOCK Lock; + int rc = PGMPhysGCPhys2CCPtrReadOnly(pVM, GCPhys, &pvPage, &Lock); + if (RT_SUCCESS(rc)) + { + int32_t offHit = offPage; + bool fRc; + if (GCPhysAlign < PAGE_SIZE) + { + uint32_t cbSearch = (GCPhys ^ GCPhysLast) & ~(RTGCPHYS)PAGE_OFFSET_MASK + ? PAGE_SIZE - (uint32_t)offPage + : (GCPhysLast & PAGE_OFFSET_MASK) + 1 - (uint32_t)offPage; + fRc = pgmR3DbgScanPage((uint8_t const *)pvPage, &offHit, cbSearch, (uint32_t)GCPhysAlign, + pabNeedle, cbNeedle, pfnMemScan, &abPrev[0], &cbPrev); + } + else + fRc = memcmp(pvPage, pabNeedle, cbNeedle) == 0 + && (GCPhysLast - GCPhys) >= cbNeedle; + PGMPhysReleasePageMappingLock(pVM, &Lock); + if (fRc) + { + *pGCPhysHit = GCPhys + offHit; + pgmUnlock(pVM); + return VINF_SUCCESS; + } + } + else + cbPrev = 0; /* ignore error. */ + } + else + cbPrev = 0; + + /* advance to the next page. */ + GCPhys += (RTGCPHYS)cIncPages << PAGE_SHIFT; + if (GCPhys >= GCPhysLast) /* (may not always hit, but we're run out of ranges.) */ + { + pgmUnlock(pVM); + return VERR_DBGF_MEM_NOT_FOUND; + } + iPage += cIncPages; + if ( iPage < cIncPages + || iPage >= cPages) + break; + } + } + } + pgmUnlock(pVM); + return VERR_DBGF_MEM_NOT_FOUND; +} + + +/** + * Scans (guest) virtual memory for a byte string. + * + * @returns VBox status codes: + * @retval VINF_SUCCESS and *pGCPtrHit on success. + * @retval VERR_DBGF_MEM_NOT_FOUND if not found. + * @retval VERR_INVALID_POINTER if any of the pointer arguments are invalid. + * @retval VERR_INVALID_ARGUMENT if any other arguments are invalid. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the CPU + * context to search from. + * @param GCPtr Where to start searching. + * @param GCPtrAlign The alignment of the needle. Must be a power of two + * and less or equal to 4GB. + * @param cbRange The number of bytes to search. Max 256 bytes. + * @param pabNeedle The byte string to search for. + * @param cbNeedle The length of the byte string. + * @param pGCPtrHit Where to store the address of the first occurrence on success. + */ +VMMR3_INT_DECL(int) PGMR3DbgScanVirtual(PVM pVM, PVMCPU pVCpu, RTGCPTR GCPtr, RTGCPTR cbRange, RTGCPTR GCPtrAlign, + const uint8_t *pabNeedle, size_t cbNeedle, PRTGCUINTPTR pGCPtrHit) +{ + VMCPU_ASSERT_EMT(pVCpu); + + /* + * Validate and adjust the input a bit. + */ + if (!VALID_PTR(pGCPtrHit)) + return VERR_INVALID_POINTER; + *pGCPtrHit = 0; + + if (!VALID_PTR(pabNeedle)) + return VERR_INVALID_POINTER; + if (!cbNeedle) + return VERR_INVALID_PARAMETER; + if (cbNeedle > MAX_NEEDLE_SIZE) + return VERR_INVALID_PARAMETER; + + if (!cbRange) + return VERR_DBGF_MEM_NOT_FOUND; + if (GCPtr + cbNeedle - 1 < GCPtr) + return VERR_DBGF_MEM_NOT_FOUND; + + if (!GCPtrAlign) + return VERR_INVALID_PARAMETER; + if (GCPtrAlign > UINT32_MAX) + return VERR_NOT_POWER_OF_TWO; + if (GCPtrAlign & (GCPtrAlign - 1)) + return VERR_INVALID_PARAMETER; + + if (GCPtr & (GCPtrAlign - 1)) + { + RTGCPTR Adj = GCPtrAlign - (GCPtr & (GCPtrAlign - 1)); + if ( cbRange <= Adj + || GCPtr + Adj < GCPtr) + return VERR_DBGF_MEM_NOT_FOUND; + GCPtr += Adj; + cbRange -= Adj; + } + + /* Only paged protected mode or long mode here, use the physical scan for + the other modes. */ + PGMMODE enmMode = PGMGetGuestMode(pVCpu); + AssertReturn(PGMMODE_WITH_PAGING(enmMode), VERR_PGM_NOT_USED_IN_MODE); + + /* + * Search the memory - ignore MMIO, zero and not-present pages. + */ + const bool fAllZero = ASMMemIsZero(pabNeedle, cbNeedle); + RTGCPTR GCPtrMask = PGMMODE_IS_LONG_MODE(enmMode) ? UINT64_MAX : UINT32_MAX; + uint8_t abPrev[MAX_NEEDLE_SIZE]; + size_t cbPrev = 0; + const uint32_t cIncPages = GCPtrAlign <= PAGE_SIZE + ? 1 + : GCPtrAlign >> PAGE_SHIFT; + const RTGCPTR GCPtrLast = GCPtr + cbRange - 1 >= GCPtr + ? (GCPtr + cbRange - 1) & GCPtrMask + : GCPtrMask; + RTGCPTR cPages = (((GCPtrLast - GCPtr) + (GCPtr & PAGE_OFFSET_MASK)) >> PAGE_SHIFT) + 1; + uint32_t offPage = GCPtr & PAGE_OFFSET_MASK; + GCPtr &= ~(RTGCPTR)PAGE_OFFSET_MASK; + + PFNPGMR3DBGFIXEDMEMSCAN pfnMemScan; + pgmR3DbgSelectMemScanFunction(&pfnMemScan, (uint32_t)GCPtrAlign, cbNeedle); + + VMSTATE enmVMState = pVM->enmVMState; + uint32_t const cYieldCountDownReload = VMSTATE_IS_RUNNING(enmVMState) ? 4096 : 65536; + uint32_t cYieldCountDown = cYieldCountDownReload; + RTGCPHYS GCPhysPrev = NIL_RTGCPHYS; + bool fFullWalk = true; + PGMPTWALKGST Walk; + RT_ZERO(Walk); + + pgmLock(pVM); + for (;; offPage = 0) + { + int rc; + if (fFullWalk) + rc = pgmGstPtWalk(pVCpu, GCPtr, &Walk); + else + rc = pgmGstPtWalkNext(pVCpu, GCPtr, &Walk); + if (RT_SUCCESS(rc) && Walk.u.Core.fSucceeded) + { + fFullWalk = false; + + /* Skip if same page as previous one (W10 optimization). */ + if ( Walk.u.Core.GCPhys != GCPhysPrev + || cbPrev != 0) + { + PPGMPAGE pPage = pgmPhysGetPage(pVM, Walk.u.Core.GCPhys); + if ( pPage + && ( !PGM_PAGE_IS_ZERO(pPage) + || fAllZero) + && !PGM_PAGE_IS_MMIO_OR_ALIAS(pPage) + && !PGM_PAGE_IS_BALLOONED(pPage)) + { + GCPhysPrev = Walk.u.Core.GCPhys; + void const *pvPage; + PGMPAGEMAPLOCK Lock; + rc = PGMPhysGCPhys2CCPtrReadOnly(pVM, Walk.u.Core.GCPhys, &pvPage, &Lock); + if (RT_SUCCESS(rc)) + { + int32_t offHit = offPage; + bool fRc; + if (GCPtrAlign < PAGE_SIZE) + { + uint32_t cbSearch = cPages > 0 + ? PAGE_SIZE - (uint32_t)offPage + : (GCPtrLast & PAGE_OFFSET_MASK) + 1 - (uint32_t)offPage; + fRc = pgmR3DbgScanPage((uint8_t const *)pvPage, &offHit, cbSearch, (uint32_t)GCPtrAlign, + pabNeedle, cbNeedle, pfnMemScan, &abPrev[0], &cbPrev); + } + else + fRc = memcmp(pvPage, pabNeedle, cbNeedle) == 0 + && (GCPtrLast - GCPtr) >= cbNeedle; + PGMPhysReleasePageMappingLock(pVM, &Lock); + if (fRc) + { + *pGCPtrHit = GCPtr + offHit; + pgmUnlock(pVM); + return VINF_SUCCESS; + } + } + else + cbPrev = 0; /* ignore error. */ + } + else + cbPrev = 0; + } + else + cbPrev = 0; + } + else + { + Assert(Walk.enmType != PGMPTWALKGSTTYPE_INVALID); + Assert(!Walk.u.Core.fSucceeded); + cbPrev = 0; /* ignore error. */ + + /* + * Try skip as much as possible. No need to figure out that a PDE + * is not present 512 times! + */ + uint64_t cPagesCanSkip; + switch (Walk.u.Core.uLevel) + { + case 1: + /* page level, use cIncPages */ + cPagesCanSkip = 1; + break; + case 2: + if (Walk.enmType == PGMPTWALKGSTTYPE_32BIT) + { + cPagesCanSkip = X86_PG_ENTRIES - ((GCPtr >> X86_PT_SHIFT) & X86_PT_MASK); + Assert(!((GCPtr + ((RTGCPTR)cPagesCanSkip << X86_PT_PAE_SHIFT)) & (RT_BIT_64(X86_PD_SHIFT) - 1))); + } + else + { + cPagesCanSkip = X86_PG_PAE_ENTRIES - ((GCPtr >> X86_PT_PAE_SHIFT) & X86_PT_PAE_MASK); + Assert(!((GCPtr + ((RTGCPTR)cPagesCanSkip << X86_PT_PAE_SHIFT)) & (RT_BIT_64(X86_PD_PAE_SHIFT) - 1))); + } + break; + case 3: + cPagesCanSkip = (X86_PG_PAE_ENTRIES - ((GCPtr >> X86_PD_PAE_SHIFT) & X86_PD_PAE_MASK)) * X86_PG_PAE_ENTRIES + - ((GCPtr >> X86_PT_PAE_SHIFT) & X86_PT_PAE_MASK); + Assert(!((GCPtr + ((RTGCPTR)cPagesCanSkip << X86_PT_PAE_SHIFT)) & (RT_BIT_64(X86_PDPT_SHIFT) - 1))); + break; + case 4: + cPagesCanSkip = (X86_PG_PAE_ENTRIES - ((GCPtr >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64)) + * X86_PG_PAE_ENTRIES * X86_PG_PAE_ENTRIES + - ((((GCPtr >> X86_PD_PAE_SHIFT) & X86_PD_PAE_MASK)) * X86_PG_PAE_ENTRIES) + - (( GCPtr >> X86_PT_PAE_SHIFT) & X86_PT_PAE_MASK); + Assert(!((GCPtr + ((RTGCPTR)cPagesCanSkip << X86_PT_PAE_SHIFT)) & (RT_BIT_64(X86_PML4_SHIFT) - 1))); + break; + case 8: + /* The CR3 value is bad, forget the whole search. */ + cPagesCanSkip = cPages; + break; + default: + AssertMsgFailed(("%d\n", Walk.u.Core.uLevel)); + cPagesCanSkip = 0; + break; + } + if (cPages <= cPagesCanSkip) + break; + fFullWalk = true; + if (cPagesCanSkip >= cIncPages) + { + cPages -= cPagesCanSkip; + GCPtr += (RTGCPTR)cPagesCanSkip << X86_PT_PAE_SHIFT; + continue; + } + } + + /* advance to the next page. */ + if (cPages <= cIncPages) + break; + cPages -= cIncPages; + GCPtr += (RTGCPTR)cIncPages << X86_PT_PAE_SHIFT; + + /* Yield the PGM lock every now and then. */ + if (!--cYieldCountDown) + { + fFullWalk = PDMR3CritSectYield(&pVM->pgm.s.CritSectX); + cYieldCountDown = cYieldCountDownReload; + } + } + pgmUnlock(pVM); + return VERR_DBGF_MEM_NOT_FOUND; +} + + +/** + * Initializes the dumper state. + * + * @param pState The state to initialize. + * @param pVM The cross context VM structure. + * @param fFlags The flags. + * @param u64FirstAddr The first address. + * @param u64LastAddr The last address. + * @param pHlp The output helpers. + */ +static void pgmR3DumpHierarchyInitState(PPGMR3DUMPHIERARCHYSTATE pState, PVM pVM, uint32_t fFlags, + uint64_t u64FirstAddr, uint64_t u64LastAddr, PCDBGFINFOHLP pHlp) +{ + pState->pVM = pVM; + pState->pHlp = pHlp ? pHlp : DBGFR3InfoLogHlp(); + pState->fPse = !!(fFlags & (DBGFPGDMP_FLAGS_PSE | DBGFPGDMP_FLAGS_PAE | DBGFPGDMP_FLAGS_LME)); + pState->fPae = !!(fFlags & (DBGFPGDMP_FLAGS_PAE | DBGFPGDMP_FLAGS_LME)); + pState->fLme = !!(fFlags & DBGFPGDMP_FLAGS_LME); + pState->fNp = !!(fFlags & DBGFPGDMP_FLAGS_NP); + pState->fEpt = !!(fFlags & DBGFPGDMP_FLAGS_EPT); + pState->fNxe = !!(fFlags & DBGFPGDMP_FLAGS_NXE); + pState->cchAddress = pState->fLme ? 16 : 8; + pState->uLastRsvdBit = pState->fNxe ? 62 : 63; + pState->fDumpPageInfo = !!(fFlags & DBGFPGDMP_FLAGS_PAGE_INFO); + pState->fPrintHeader = !!(fFlags & DBGFPGDMP_FLAGS_HEADER); + pState->fPrintCr3 = !!(fFlags & DBGFPGDMP_FLAGS_PRINT_CR3); + pState->afReserved[0] = false; + pState->afReserved[1] = false; + pState->afReserved[2] = false; + pState->afReserved[3] = false; + pState->afReserved[4] = false; + pState->u64Address = u64FirstAddr; + pState->u64FirstAddress = u64FirstAddr; + pState->u64LastAddress = u64LastAddr; + pState->u64HighReservedBits = pState->uLastRsvdBit == 62 ? UINT64_C(0x7ff) << 52 : UINT64_C(0xfff) << 52; + pState->cLeaves = 0; +} + + +/** + * The simple way out, too tired to think of a more elegant solution. + * + * @returns The base address of this page table/directory/whatever. + * @param pState The state where we get the current address. + * @param cShift The shift count for the table entries. + * @param cEntries The number of table entries. + * @param piFirst Where to return the table index of the first + * entry to dump. + * @param piLast Where to return the table index of the last + * entry. + */ +static uint64_t pgmR3DumpHierarchyCalcRange(PPGMR3DUMPHIERARCHYSTATE pState, uint32_t cShift, uint32_t cEntries, + uint32_t *piFirst, uint32_t *piLast) +{ + const uint64_t iBase = (pState->u64Address >> cShift) & ~(uint64_t)(cEntries - 1); + const uint64_t iFirst = pState->u64FirstAddress >> cShift; + const uint64_t iLast = pState->u64LastAddress >> cShift; + + if ( iBase >= iFirst + && iBase + cEntries - 1 <= iLast) + { + /* full range. */ + *piFirst = 0; + *piLast = cEntries - 1; + } + else if ( iBase + cEntries - 1 < iFirst + || iBase > iLast) + { + /* no match */ + *piFirst = cEntries; + *piLast = 0; + } + else + { + /* partial overlap */ + *piFirst = iBase <= iFirst + ? iFirst - iBase + : 0; + *piLast = iBase + cEntries - 1 <= iLast + ? cEntries - 1 + : iLast - iBase; + } + + return iBase << cShift; +} + + +/** + * Maps/finds the shadow page. + * + * @returns VBox status code. + * @param pState The dumper state. + * @param HCPhys The physical address of the shadow page. + * @param pszDesc The description. + * @param fIsMapping Set if it's a mapping. + * @param ppv Where to return the pointer. + */ +static int pgmR3DumpHierarchyShwMapPage(PPGMR3DUMPHIERARCHYSTATE pState, RTHCPHYS HCPhys, const char *pszDesc, + bool fIsMapping, void const **ppv) +{ + void *pvPage; + if (!fIsMapping) + { + int rc = MMPagePhys2PageTry(pState->pVM, HCPhys, &pvPage); + if (RT_FAILURE(rc)) + { + pState->pHlp->pfnPrintf(pState->pHlp, "%0*llx error! %s at HCPhys=%RHp was not found in the page pool!\n", + pState->cchAddress, pState->u64Address, pszDesc, HCPhys); + return rc; + } + } + else + { + pvPage = NULL; + for (PPGMMAPPING pMap = pState->pVM->pgm.s.pMappingsR3; pMap; pMap = pMap->pNextR3) + { + uint64_t off = pState->u64Address - pMap->GCPtr; + if (off < pMap->cb) + { + const int iPDE = (uint32_t)(off >> X86_PD_SHIFT); + const int iSub = (int)((off >> X86_PD_PAE_SHIFT) & 1); /* MSC is a pain sometimes */ + if ((iSub ? pMap->aPTs[iPDE].HCPhysPaePT1 : pMap->aPTs[iPDE].HCPhysPaePT0) != HCPhys) + pState->pHlp->pfnPrintf(pState->pHlp, + "%0*llx error! Mapping error! PT %d has HCPhysPT=%RHp not %RHp is in the PD.\n", + pState->cchAddress, pState->u64Address, iPDE, + iSub ? pMap->aPTs[iPDE].HCPhysPaePT1 : pMap->aPTs[iPDE].HCPhysPaePT0, HCPhys); + pvPage = &pMap->aPTs[iPDE].paPaePTsR3[iSub]; + break; + } + } + if (!pvPage) + { + pState->pHlp->pfnPrintf(pState->pHlp, "%0*llx error! PT mapping %s at HCPhys=%RHp was not found in the page pool!\n", + pState->cchAddress, pState->u64Address, pszDesc, HCPhys); + return VERR_INVALID_PARAMETER; + } + } + *ppv = pvPage; + return VINF_SUCCESS; +} + + +/** + * Dumps the a shadow page summary or smth. + * + * @param pState The dumper state. + * @param HCPhys The page address. + */ +static void pgmR3DumpHierarchyShwTablePageInfo(PPGMR3DUMPHIERARCHYSTATE pState, RTHCPHYS HCPhys) +{ + pgmLock(pState->pVM); + char szPage[80]; + PPGMPOOLPAGE pPage = pgmPoolQueryPageForDbg(pState->pVM->pgm.s.CTX_SUFF(pPool), HCPhys); + if (pPage) + RTStrPrintf(szPage, sizeof(szPage), " idx=0i%u", pPage->idx); + else + { + /* probably a mapping */ + strcpy(szPage, " not found"); + for (PPGMMAPPING pMap = pState->pVM->pgm.s.pMappingsR3; pMap; pMap = pMap->pNextR3) + { + uint64_t off = pState->u64Address - pMap->GCPtr; + if (off < pMap->cb) + { + const int iPDE = (uint32_t)(off >> X86_PD_SHIFT); + if (pMap->aPTs[iPDE].HCPhysPT == HCPhys) + RTStrPrintf(szPage, sizeof(szPage), " #%u: %s", iPDE, pMap->pszDesc); + else if (pMap->aPTs[iPDE].HCPhysPaePT0 == HCPhys) + RTStrPrintf(szPage, sizeof(szPage), " #%u/0: %s", iPDE, pMap->pszDesc); + else if (pMap->aPTs[iPDE].HCPhysPaePT1 == HCPhys) + RTStrPrintf(szPage, sizeof(szPage), " #%u/1: %s", iPDE, pMap->pszDesc); + else + continue; + break; + } + } + } + pgmUnlock(pState->pVM); + pState->pHlp->pfnPrintf(pState->pHlp, "%s", szPage); +} + + +/** + * Figures out which guest page this is and dumps a summary. + * + * @param pState The dumper state. + * @param HCPhys The page address. + * @param cbPage The page size. + */ +static void pgmR3DumpHierarchyShwGuestPageInfo(PPGMR3DUMPHIERARCHYSTATE pState, RTHCPHYS HCPhys, uint32_t cbPage) +{ + char szPage[80]; + RTGCPHYS GCPhys; + int rc = PGMR3DbgHCPhys2GCPhys(pState->pVM->pUVM, HCPhys, &GCPhys); + if (RT_SUCCESS(rc)) + { + pgmLock(pState->pVM); + PCPGMPAGE pPage = pgmPhysGetPage(pState->pVM, GCPhys); + if (pPage) + RTStrPrintf(szPage, sizeof(szPage), "%R[pgmpage]", pPage); + else + strcpy(szPage, "not found"); + pgmUnlock(pState->pVM); + pState->pHlp->pfnPrintf(pState->pHlp, " -> %RGp %s", GCPhys, szPage); + } + else + { + /* check the heap */ + uint32_t cbAlloc; + rc = MMR3HyperQueryInfoFromHCPhys(pState->pVM, HCPhys, szPage, sizeof(szPage), &cbAlloc); + if (RT_SUCCESS(rc)) + pState->pHlp->pfnPrintf(pState->pHlp, " %s %#x bytes", szPage, cbAlloc); + else + pState->pHlp->pfnPrintf(pState->pHlp, " not found"); + } + NOREF(cbPage); +} + + +/** + * Dumps a PAE shadow page table. + * + * @returns VBox status code (VINF_SUCCESS). + * @param pState The dumper state. + * @param HCPhys The page table address. + * @param fIsMapping Whether it is a mapping. + */ +static int pgmR3DumpHierarchyShwPaePT(PPGMR3DUMPHIERARCHYSTATE pState, RTHCPHYS HCPhys, bool fIsMapping) +{ + PCPGMSHWPTPAE pPT; + int rc = pgmR3DumpHierarchyShwMapPage(pState, HCPhys, "Page table", fIsMapping, (void const **)&pPT); + if (RT_FAILURE(rc)) + return rc; + + uint32_t iFirst, iLast; + uint64_t u64BaseAddress = pgmR3DumpHierarchyCalcRange(pState, X86_PT_PAE_SHIFT, X86_PG_PAE_ENTRIES, &iFirst, &iLast); + for (uint32_t i = iFirst; i <= iLast; i++) + if (PGMSHWPTEPAE_GET_U(pPT->a[i]) & X86_PTE_P) + { + pState->u64Address = u64BaseAddress + ((uint64_t)i << X86_PT_PAE_SHIFT); + if (PGMSHWPTEPAE_IS_P(pPT->a[i])) + { + X86PTEPAE Pte; + Pte.u = PGMSHWPTEPAE_GET_U(pPT->a[i]); + pState->pHlp->pfnPrintf(pState->pHlp, + pState->fLme /*P R S A D G WT CD AT NX 4M a p ? */ + ? "%016llx 3 | P %c %c %c %c %c %s %s %s %s 4K %c%c%c %016llx" + : "%08llx 2 | P %c %c %c %c %c %s %s %s %s 4K %c%c%c %016llx", + pState->u64Address, + Pte.n.u1Write ? 'W' : 'R', + Pte.n.u1User ? 'U' : 'S', + Pte.n.u1Accessed ? 'A' : '-', + Pte.n.u1Dirty ? 'D' : '-', + Pte.n.u1Global ? 'G' : '-', + Pte.n.u1WriteThru ? "WT" : "--", + Pte.n.u1CacheDisable? "CD" : "--", + Pte.n.u1PAT ? "AT" : "--", + Pte.n.u1NoExecute ? "NX" : "--", + Pte.u & PGM_PTFLAGS_TRACK_DIRTY ? 'd' : '-', + Pte.u & RT_BIT(10) ? '1' : '0', + Pte.u & PGM_PTFLAGS_CSAM_VALIDATED? 'v' : '-', + Pte.u & X86_PTE_PAE_PG_MASK); + if (pState->fDumpPageInfo) + pgmR3DumpHierarchyShwGuestPageInfo(pState, Pte.u & X86_PTE_PAE_PG_MASK, _4K); + if ((Pte.u >> 52) & 0x7ff) + pState->pHlp->pfnPrintf(pState->pHlp, " 62:52=%03llx%s", (Pte.u >> 52) & 0x7ff, pState->fLme ? "" : "!"); + pState->pHlp->pfnPrintf(pState->pHlp, "\n"); + } + else if ( (PGMSHWPTEPAE_GET_U(pPT->a[i]) & (pState->pVM->pgm.s.HCPhysInvMmioPg | X86_PTE_PAE_MBZ_MASK_NO_NX)) + == (pState->pVM->pgm.s.HCPhysInvMmioPg | X86_PTE_PAE_MBZ_MASK_NO_NX)) + pState->pHlp->pfnPrintf(pState->pHlp, + pState->fLme + ? "%016llx 3 | invalid / MMIO optimization\n" + : "%08llx 2 | invalid / MMIO optimization\n", + pState->u64Address); + else + pState->pHlp->pfnPrintf(pState->pHlp, + pState->fLme + ? "%016llx 3 | invalid: %RX64\n" + : "%08llx 2 | invalid: %RX64\n", + pState->u64Address, PGMSHWPTEPAE_GET_U(pPT->a[i])); + pState->cLeaves++; + } + return VINF_SUCCESS; +} + + +/** + * Dumps a PAE shadow page directory table. + * + * @returns VBox status code (VINF_SUCCESS). + * @param pState The dumper state. + * @param HCPhys The physical address of the page directory table. + * @param cMaxDepth The maximum depth. + */ +static int pgmR3DumpHierarchyShwPaePD(PPGMR3DUMPHIERARCHYSTATE pState, RTHCPHYS HCPhys, unsigned cMaxDepth) +{ + PCX86PDPAE pPD; + int rc = pgmR3DumpHierarchyShwMapPage(pState, HCPhys, "Page directory", false, (void const **)&pPD); + if (RT_FAILURE(rc)) + return rc; + + Assert(cMaxDepth > 0); + cMaxDepth--; + + uint32_t iFirst, iLast; + uint64_t u64BaseAddress = pgmR3DumpHierarchyCalcRange(pState, X86_PD_PAE_SHIFT, X86_PG_PAE_ENTRIES, &iFirst, &iLast); + for (uint32_t i = iFirst; i <= iLast; i++) + { + X86PDEPAE Pde = pPD->a[i]; + if (Pde.n.u1Present) + { + pState->u64Address = u64BaseAddress + ((uint64_t)i << X86_PD_PAE_SHIFT); + if (Pde.b.u1Size) + { + pState->pHlp->pfnPrintf(pState->pHlp, + pState->fLme /*P R S A D G WT CD AT NX 2M a p ? phys*/ + ? "%016llx 2 | P %c %c %c %c %c %s %s %s %s 2M %c%c%c %016llx" + : "%08llx 1 | P %c %c %c %c %c %s %s %s %s 2M %c%c%c %016llx", + pState->u64Address, + Pde.b.u1Write ? 'W' : 'R', + Pde.b.u1User ? 'U' : 'S', + Pde.b.u1Accessed ? 'A' : '-', + Pde.b.u1Dirty ? 'D' : '-', + Pde.b.u1Global ? 'G' : '-', + Pde.b.u1WriteThru ? "WT" : "--", + Pde.b.u1CacheDisable? "CD" : "--", + Pde.b.u1PAT ? "AT" : "--", + Pde.b.u1NoExecute ? "NX" : "--", + Pde.u & PGM_PDFLAGS_BIG_PAGE ? 'b' : '-', + Pde.u & PGM_PDFLAGS_MAPPING ? 'm' : '-', + Pde.u & PGM_PDFLAGS_TRACK_DIRTY ? 'd' : '-', + Pde.u & X86_PDE2M_PAE_PG_MASK); + if (pState->fDumpPageInfo) + pgmR3DumpHierarchyShwGuestPageInfo(pState, Pde.u & X86_PDE2M_PAE_PG_MASK, _2M); + if ((Pde.u >> 52) & 0x7ff) + pState->pHlp->pfnPrintf(pState->pHlp, " 62:52=%03llx%s", (Pde.u >> 52) & 0x7ff, pState->fLme ? "" : "!"); + if ((Pde.u >> 13) & 0xff) + pState->pHlp->pfnPrintf(pState->pHlp, " 20:13=%02llx%s", (Pde.u >> 13) & 0x0ff, pState->fLme ? "" : "!"); + pState->pHlp->pfnPrintf(pState->pHlp, "\n"); + + pState->cLeaves++; + } + else + { + pState->pHlp->pfnPrintf(pState->pHlp, + pState->fLme /*P R S A D G WT CD AT NX 4M a p ? phys */ + ? "%016llx 2 | P %c %c %c %c %c %s %s .. %s .. %c%c%c %016llx" + : "%08llx 1 | P %c %c %c %c %c %s %s .. %s .. %c%c%c %016llx", + pState->u64Address, + Pde.n.u1Write ? 'W' : 'R', + Pde.n.u1User ? 'U' : 'S', + Pde.n.u1Accessed ? 'A' : '-', + Pde.n.u1Reserved0 ? '?' : '.', /* ignored */ + Pde.n.u1Reserved1 ? '?' : '.', /* ignored */ + Pde.n.u1WriteThru ? "WT" : "--", + Pde.n.u1CacheDisable? "CD" : "--", + Pde.n.u1NoExecute ? "NX" : "--", + Pde.u & PGM_PDFLAGS_BIG_PAGE ? 'b' : '-', + Pde.u & PGM_PDFLAGS_MAPPING ? 'm' : '-', + Pde.u & PGM_PDFLAGS_TRACK_DIRTY ? 'd' : '-', + Pde.u & X86_PDE_PAE_PG_MASK); + if (pState->fDumpPageInfo) + pgmR3DumpHierarchyShwTablePageInfo(pState, Pde.u & X86_PDE_PAE_PG_MASK); + if ((Pde.u >> 52) & 0x7ff) + pState->pHlp->pfnPrintf(pState->pHlp, " 62:52=%03llx!", (Pde.u >> 52) & 0x7ff); + pState->pHlp->pfnPrintf(pState->pHlp, "\n"); + + if (cMaxDepth) + { + int rc2 = pgmR3DumpHierarchyShwPaePT(pState, Pde.u & X86_PDE_PAE_PG_MASK, !!(Pde.u & PGM_PDFLAGS_MAPPING)); + if (rc2 < rc && RT_SUCCESS(rc)) + rc = rc2; + } + else + pState->cLeaves++; + } + } + } + return rc; +} + + +/** + * Dumps a PAE shadow page directory pointer table. + * + * @returns VBox status code (VINF_SUCCESS). + * @param pState The dumper state. + * @param HCPhys The physical address of the page directory pointer table. + * @param cMaxDepth The maximum depth. + */ +static int pgmR3DumpHierarchyShwPaePDPT(PPGMR3DUMPHIERARCHYSTATE pState, RTHCPHYS HCPhys, unsigned cMaxDepth) +{ + /* Fend of addresses that are out of range in PAE mode - simplifies the code below. */ + if (!pState->fLme && pState->u64Address >= _4G) + return VINF_SUCCESS; + + PCX86PDPT pPDPT; + int rc = pgmR3DumpHierarchyShwMapPage(pState, HCPhys, "Page directory pointer table", false, (void const **)&pPDPT); + if (RT_FAILURE(rc)) + return rc; + + Assert(cMaxDepth > 0); + cMaxDepth--; + + uint32_t iFirst, iLast; + uint64_t u64BaseAddress = pgmR3DumpHierarchyCalcRange(pState, X86_PDPT_SHIFT, + pState->fLme ? X86_PG_AMD64_PDPE_ENTRIES : X86_PG_PAE_PDPE_ENTRIES, + &iFirst, &iLast); + for (uint32_t i = iFirst; i <= iLast; i++) + { + X86PDPE Pdpe = pPDPT->a[i]; + if (Pdpe.n.u1Present) + { + pState->u64Address = u64BaseAddress + ((uint64_t)i << X86_PDPT_SHIFT); + if (pState->fLme) + { + pState->pHlp->pfnPrintf(pState->pHlp, /*P R S A D G WT CD AT NX .. a p ? */ + "%016llx 1 | P %c %c %c %c %c %s %s %s %s .. %c%c%c %016llx", + pState->u64Address, + Pdpe.lm.u1Write ? 'W' : 'R', + Pdpe.lm.u1User ? 'U' : 'S', + Pdpe.lm.u1Accessed ? 'A' : '-', + Pdpe.lm.u3Reserved & 1? '?' : '.', /* ignored */ + Pdpe.lm.u3Reserved & 4? '!' : '.', /* mbz */ + Pdpe.lm.u1WriteThru ? "WT" : "--", + Pdpe.lm.u1CacheDisable? "CD" : "--", + Pdpe.lm.u3Reserved & 2? "!" : "..",/* mbz */ + Pdpe.lm.u1NoExecute ? "NX" : "--", + Pdpe.u & RT_BIT(9) ? '1' : '0', + Pdpe.u & PGM_PLXFLAGS_PERMANENT ? 'p' : '-', + Pdpe.u & RT_BIT(11) ? '1' : '0', + Pdpe.u & X86_PDPE_PG_MASK); + if (pState->fDumpPageInfo) + pgmR3DumpHierarchyShwTablePageInfo(pState, Pdpe.u & X86_PDPE_PG_MASK); + if ((Pdpe.u >> 52) & 0x7ff) + pState->pHlp->pfnPrintf(pState->pHlp, " 62:52=%03llx", (Pdpe.u >> 52) & 0x7ff); + } + else + { + pState->pHlp->pfnPrintf(pState->pHlp,/*P R S A D G WT CD AT NX .. a p ? */ + "%08llx 0 | P %c %c %c %c %c %s %s %s %s .. %c%c%c %016llx", + pState->u64Address, + Pdpe.n.u2Reserved & 1? '!' : '.', /* mbz */ + Pdpe.n.u2Reserved & 2? '!' : '.', /* mbz */ + Pdpe.n.u4Reserved & 1? '!' : '.', /* mbz */ + Pdpe.n.u4Reserved & 2? '!' : '.', /* mbz */ + Pdpe.n.u4Reserved & 8? '!' : '.', /* mbz */ + Pdpe.n.u1WriteThru ? "WT" : "--", + Pdpe.n.u1CacheDisable? "CD" : "--", + Pdpe.n.u4Reserved & 2? "!" : "..",/* mbz */ + Pdpe.lm.u1NoExecute ? "!!" : "..",/* mbz */ + Pdpe.u & RT_BIT(9) ? '1' : '0', + Pdpe.u & PGM_PLXFLAGS_PERMANENT ? 'p' : '-', + Pdpe.u & RT_BIT(11) ? '1' : '0', + Pdpe.u & X86_PDPE_PG_MASK); + if (pState->fDumpPageInfo) + pgmR3DumpHierarchyShwTablePageInfo(pState, Pdpe.u & X86_PDPE_PG_MASK); + if ((Pdpe.u >> 52) & 0xfff) + pState->pHlp->pfnPrintf(pState->pHlp, " 63:52=%03llx!", (Pdpe.u >> 52) & 0xfff); + } + pState->pHlp->pfnPrintf(pState->pHlp, "\n"); + + if (cMaxDepth) + { + int rc2 = pgmR3DumpHierarchyShwPaePD(pState, Pdpe.u & X86_PDPE_PG_MASK, cMaxDepth); + if (rc2 < rc && RT_SUCCESS(rc)) + rc = rc2; + } + else + pState->cLeaves++; + } + } + return rc; +} + + +/** + * Dumps a 32-bit shadow page table. + * + * @returns VBox status code (VINF_SUCCESS). + * @param pState The dumper state. + * @param HCPhys The physical address of the table. + * @param cMaxDepth The maximum depth. + */ +static int pgmR3DumpHierarchyShwPaePML4(PPGMR3DUMPHIERARCHYSTATE pState, RTHCPHYS HCPhys, unsigned cMaxDepth) +{ + PCX86PML4 pPML4; + int rc = pgmR3DumpHierarchyShwMapPage(pState, HCPhys, "Page map level 4", false, (void const **)&pPML4); + if (RT_FAILURE(rc)) + return rc; + + Assert(cMaxDepth); + cMaxDepth--; + + /* + * This is a bit tricky as we're working on unsigned addresses while the + * AMD64 spec uses signed tricks. + */ + uint32_t iFirst = (pState->u64FirstAddress >> X86_PML4_SHIFT) & X86_PML4_MASK; + uint32_t iLast = (pState->u64LastAddress >> X86_PML4_SHIFT) & X86_PML4_MASK; + if ( pState->u64LastAddress <= UINT64_C(0x00007fffffffffff) + || pState->u64FirstAddress >= UINT64_C(0xffff800000000000)) + { /* Simple, nothing to adjust */ } + else if (pState->u64FirstAddress <= UINT64_C(0x00007fffffffffff)) + iLast = X86_PG_AMD64_ENTRIES / 2 - 1; + else if (pState->u64LastAddress >= UINT64_C(0xffff800000000000)) + iFirst = X86_PG_AMD64_ENTRIES / 2; + else + iFirst = X86_PG_AMD64_ENTRIES; /* neither address is canonical */ + + for (uint32_t i = iFirst; i <= iLast; i++) + { + X86PML4E Pml4e = pPML4->a[i]; + if (Pml4e.n.u1Present) + { + pState->u64Address = ((uint64_t)i << X86_PML4_SHIFT) + | (i >= RT_ELEMENTS(pPML4->a) / 2 ? UINT64_C(0xffff000000000000) : 0); + pState->pHlp->pfnPrintf(pState->pHlp, /*P R S A D G WT CD AT NX 4M a p ? */ + "%016llx 0 | P %c %c %c %c %c %s %s %s %s .. %c%c%c %016llx", + pState->u64Address, + Pml4e.n.u1Write ? 'W' : 'R', + Pml4e.n.u1User ? 'U' : 'S', + Pml4e.n.u1Accessed ? 'A' : '-', + Pml4e.n.u3Reserved & 1? '?' : '.', /* ignored */ + Pml4e.n.u3Reserved & 4? '!' : '.', /* mbz */ + Pml4e.n.u1WriteThru ? "WT" : "--", + Pml4e.n.u1CacheDisable? "CD" : "--", + Pml4e.n.u3Reserved & 2? "!" : "..",/* mbz */ + Pml4e.n.u1NoExecute ? "NX" : "--", + Pml4e.u & RT_BIT(9) ? '1' : '0', + Pml4e.u & PGM_PLXFLAGS_PERMANENT ? 'p' : '-', + Pml4e.u & RT_BIT(11) ? '1' : '0', + Pml4e.u & X86_PML4E_PG_MASK); + if (pState->fDumpPageInfo) + pgmR3DumpHierarchyShwTablePageInfo(pState, Pml4e.u & X86_PML4E_PG_MASK); + if ((Pml4e.u >> 52) & 0x7ff) + pState->pHlp->pfnPrintf(pState->pHlp, " 62:52=%03llx!", (Pml4e.u >> 52) & 0x7ff); + pState->pHlp->pfnPrintf(pState->pHlp, "\n"); + + if (cMaxDepth) + { + int rc2 = pgmR3DumpHierarchyShwPaePDPT(pState, Pml4e.u & X86_PML4E_PG_MASK, cMaxDepth); + if (rc2 < rc && RT_SUCCESS(rc)) + rc = rc2; + } + else + pState->cLeaves++; + } + } + return rc; +} + + +/** + * Dumps a 32-bit shadow page table. + * + * @returns VBox status code (VINF_SUCCESS). + * @param pState The dumper state. + * @param HCPhys The physical address of the table. + * @param fMapping Set if it's a guest mapping. + */ +static int pgmR3DumpHierarchyShw32BitPT(PPGMR3DUMPHIERARCHYSTATE pState, RTHCPHYS HCPhys, bool fMapping) +{ + PCX86PT pPT; + int rc = pgmR3DumpHierarchyShwMapPage(pState, HCPhys, "Page table", fMapping, (void const **)&pPT); + if (RT_FAILURE(rc)) + return rc; + + uint32_t iFirst, iLast; + uint64_t u64BaseAddress = pgmR3DumpHierarchyCalcRange(pState, X86_PT_SHIFT, X86_PG_ENTRIES, &iFirst, &iLast); + for (uint32_t i = iFirst; i <= iLast; i++) + { + X86PTE Pte = pPT->a[i]; + if (Pte.n.u1Present) + { + pState->u64Address = u64BaseAddress + (i << X86_PT_SHIFT); + pState->pHlp->pfnPrintf(pState->pHlp,/*P R S A D G WT CD AT NX 4M a m d */ + "%08llx 1 | P %c %c %c %c %c %s %s %s .. 4K %c%c%c %08x", + pState->u64Address, + Pte.n.u1Write ? 'W' : 'R', + Pte.n.u1User ? 'U' : 'S', + Pte.n.u1Accessed ? 'A' : '-', + Pte.n.u1Dirty ? 'D' : '-', + Pte.n.u1Global ? 'G' : '-', + Pte.n.u1WriteThru ? "WT" : "--", + Pte.n.u1CacheDisable? "CD" : "--", + Pte.n.u1PAT ? "AT" : "--", + Pte.u & PGM_PTFLAGS_TRACK_DIRTY ? 'd' : '-', + Pte.u & RT_BIT(10) ? '1' : '0', + Pte.u & PGM_PTFLAGS_CSAM_VALIDATED ? 'v' : '-', + Pte.u & X86_PDE_PG_MASK); + if (pState->fDumpPageInfo) + pgmR3DumpHierarchyShwGuestPageInfo(pState, Pte.u & X86_PDE_PG_MASK, _4K); + pState->pHlp->pfnPrintf(pState->pHlp, "\n"); + } + } + return VINF_SUCCESS; +} + + +/** + * Dumps a 32-bit shadow page directory and page tables. + * + * @returns VBox status code (VINF_SUCCESS). + * @param pState The dumper state. + * @param HCPhys The physical address of the table. + * @param cMaxDepth The maximum depth. + */ +static int pgmR3DumpHierarchyShw32BitPD(PPGMR3DUMPHIERARCHYSTATE pState, RTHCPHYS HCPhys, unsigned cMaxDepth) +{ + if (pState->u64Address >= _4G) + return VINF_SUCCESS; + + PCX86PD pPD; + int rc = pgmR3DumpHierarchyShwMapPage(pState, HCPhys, "Page directory", false, (void const **)&pPD); + if (RT_FAILURE(rc)) + return rc; + + Assert(cMaxDepth > 0); + cMaxDepth--; + + uint32_t iFirst, iLast; + pgmR3DumpHierarchyCalcRange(pState, X86_PD_SHIFT, X86_PG_ENTRIES, &iFirst, &iLast); + for (uint32_t i = iFirst; i <= iLast; i++) + { + X86PDE Pde = pPD->a[i]; + if (Pde.n.u1Present) + { + pState->u64Address = (uint32_t)i << X86_PD_SHIFT; + if (Pde.b.u1Size && pState->fPse) + { + uint64_t u64Phys = ((uint64_t)(Pde.u & X86_PDE4M_PG_HIGH_MASK) << X86_PDE4M_PG_HIGH_SHIFT) + | (Pde.u & X86_PDE4M_PG_MASK); + pState->pHlp->pfnPrintf(pState->pHlp,/*P R S A D G WT CD AT NX 4M a m d phys */ + "%08llx 0 | P %c %c %c %c %c %s %s %s .. 4M %c%c%c %08llx", + pState->u64Address, + Pde.b.u1Write ? 'W' : 'R', + Pde.b.u1User ? 'U' : 'S', + Pde.b.u1Accessed ? 'A' : '-', + Pde.b.u1Dirty ? 'D' : '-', + Pde.b.u1Global ? 'G' : '-', + Pde.b.u1WriteThru ? "WT" : "--", + Pde.b.u1CacheDisable? "CD" : "--", + Pde.b.u1PAT ? "AT" : "--", + Pde.u & PGM_PDFLAGS_BIG_PAGE ? 'b' : '-', + Pde.u & PGM_PDFLAGS_MAPPING ? 'm' : '-', + Pde.u & PGM_PDFLAGS_TRACK_DIRTY ? 'd' : '-', + u64Phys); + if (pState->fDumpPageInfo) + pgmR3DumpHierarchyShwGuestPageInfo(pState, u64Phys, _4M); + pState->pHlp->pfnPrintf(pState->pHlp, "\n"); + pState->cLeaves++; + } + else + { + pState->pHlp->pfnPrintf(pState->pHlp,/*P R S A D G WT CD AT NX 4M a m d phys */ + "%08llx 0 | P %c %c %c %c %c %s %s .. .. 4K %c%c%c %08x", + pState->u64Address, + Pde.n.u1Write ? 'W' : 'R', + Pde.n.u1User ? 'U' : 'S', + Pde.n.u1Accessed ? 'A' : '-', + Pde.n.u1Reserved0 ? '?' : '.', /* ignored */ + Pde.n.u1Reserved1 ? '?' : '.', /* ignored */ + Pde.n.u1WriteThru ? "WT" : "--", + Pde.n.u1CacheDisable? "CD" : "--", + Pde.u & PGM_PDFLAGS_BIG_PAGE ? 'b' : '-', + Pde.u & PGM_PDFLAGS_MAPPING ? 'm' : '-', + Pde.u & PGM_PDFLAGS_TRACK_DIRTY ? 'd' : '-', + Pde.u & X86_PDE_PG_MASK); + if (pState->fDumpPageInfo) + pgmR3DumpHierarchyShwTablePageInfo(pState, Pde.u & X86_PDE_PG_MASK); + pState->pHlp->pfnPrintf(pState->pHlp, "\n"); + + if (cMaxDepth) + { + int rc2 = pgmR3DumpHierarchyShw32BitPT(pState, Pde.u & X86_PDE_PG_MASK, !!(Pde.u & PGM_PDFLAGS_MAPPING)); + if (rc2 < rc && RT_SUCCESS(rc)) + rc = rc2; + } + else + pState->cLeaves++; + } + } + } + + return rc; +} + + +/** + * Internal worker that initiates the actual dump. + * + * @returns VBox status code. + * @param pState The dumper state. + * @param cr3 The CR3 value. + * @param cMaxDepth The max depth. + */ +static int pgmR3DumpHierarchyShwDoIt(PPGMR3DUMPHIERARCHYSTATE pState, uint64_t cr3, unsigned cMaxDepth) +{ + int rc; + unsigned const cch = pState->cchAddress; + uint64_t const cr3Mask = pState->fEpt ? X86_CR3_AMD64_PAGE_MASK + : pState->fLme ? X86_CR3_AMD64_PAGE_MASK + : pState->fPae ? X86_CR3_PAE_PAGE_MASK + : X86_CR3_PAGE_MASK; + if (pState->fPrintCr3) + { + const char * const pszMode = pState->fEpt ? "Extended Page Tables" + : pState->fLme ? "Long Mode" + : pState->fPae ? "PAE Mode" + : pState->fPse ? "32-bit w/ PSE" + : "32-bit"; + pState->pHlp->pfnPrintf(pState->pHlp, "cr3=%0*llx", cch, cr3); + if (pState->fDumpPageInfo) + pgmR3DumpHierarchyShwTablePageInfo(pState, cr3 & X86_CR3_AMD64_PAGE_MASK); + pState->pHlp->pfnPrintf(pState->pHlp, " %s%s%s\n", + pszMode, + pState->fNp ? " + Nested Paging" : "", + pState->fNxe ? " + NX" : ""); + } + + + if (pState->fEpt) + { + if (pState->fPrintHeader) + pState->pHlp->pfnPrintf(pState->pHlp, + "%-*s R - Readable\n" + "%-*s | W - Writeable\n" + "%-*s | | X - Executable\n" + "%-*s | | | EMT - EPT memory type\n" + "%-*s | | | | PAT - Ignored PAT?\n" + "%-*s | | | | | AVL1 - 4 available bits\n" + "%-*s | | | | | | AVL2 - 12 available bits\n" + "%-*s Level | | | | | | | page \n" + /* xxxx n **** R W X EMT PAT AVL1 AVL2 xxxxxxxxxxxxx + R W X 7 0 f fff 0123456701234567 */ + , + cch, "", cch, "", cch, "", cch, "", cch, "", cch, "", cch, "", cch, "Address"); + + pState->pHlp->pfnPrintf(pState->pHlp, "EPT dumping is not yet implemented, sorry.\n"); + /** @todo implemented EPT dumping. */ + rc = VERR_NOT_IMPLEMENTED; + } + else + { + if (pState->fPrintHeader) + pState->pHlp->pfnPrintf(pState->pHlp, + "%-*s P - Present\n" + "%-*s | R/W - Read (0) / Write (1)\n" + "%-*s | | U/S - User (1) / Supervisor (0)\n" + "%-*s | | | A - Accessed\n" + "%-*s | | | | D - Dirty\n" + "%-*s | | | | | G - Global\n" + "%-*s | | | | | | WT - Write thru\n" + "%-*s | | | | | | | CD - Cache disable\n" + "%-*s | | | | | | | | AT - Attribute table (PAT)\n" + "%-*s | | | | | | | | | NX - No execute (K8)\n" + "%-*s | | | | | | | | | | 4K/4M/2M - Page size.\n" + "%-*s | | | | | | | | | | | AVL - a=allocated; m=mapping; d=track dirty;\n" + "%-*s | | | | | | | | | | | | p=permanent; v=validated;\n" + "%-*s Level | | | | | | | | | | | | Page\n" + /* xxxx n **** P R S A D G WT CD AT NX 4M AVL xxxxxxxxxxxxx + - W U - - - -- -- -- -- -- 010 */ + , + cch, "", cch, "", cch, "", cch, "", cch, "", cch, "", cch, "", + cch, "", cch, "", cch, "", cch, "", cch, "", cch, "", cch, "Address"); + if (pState->fLme) + rc = pgmR3DumpHierarchyShwPaePML4(pState, cr3 & cr3Mask, cMaxDepth); + else if (pState->fPae) + rc = pgmR3DumpHierarchyShwPaePDPT(pState, cr3 & cr3Mask, cMaxDepth); + else + rc = pgmR3DumpHierarchyShw32BitPD(pState, cr3 & cr3Mask, cMaxDepth); + } + + if (!pState->cLeaves) + pState->pHlp->pfnPrintf(pState->pHlp, "not present\n"); + return rc; +} + + +/** + * dbgfR3PagingDumpEx worker. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param cr3 The CR3 register value. + * @param fFlags The flags, DBGFPGDMP_FLAGS_XXX. + * @param u64FirstAddr The start address. + * @param u64LastAddr The address to stop after. + * @param cMaxDepth The max depth. + * @param pHlp The output callbacks. Defaults to log if NULL. + * + * @internal + */ +VMMR3_INT_DECL(int) PGMR3DumpHierarchyShw(PVM pVM, uint64_t cr3, uint32_t fFlags, uint64_t u64FirstAddr, uint64_t u64LastAddr, + uint32_t cMaxDepth, PCDBGFINFOHLP pHlp) +{ + /* Minimal validation as we're only supposed to service DBGF. */ + AssertReturn(~(fFlags & ~DBGFPGDMP_FLAGS_VALID_MASK), VERR_INVALID_PARAMETER); + AssertReturn(!(fFlags & (DBGFPGDMP_FLAGS_CURRENT_MODE | DBGFPGDMP_FLAGS_CURRENT_CR3)), VERR_INVALID_PARAMETER); + AssertReturn(fFlags & DBGFPGDMP_FLAGS_SHADOW, VERR_INVALID_PARAMETER); + + PGMR3DUMPHIERARCHYSTATE State; + pgmR3DumpHierarchyInitState(&State, pVM, fFlags, u64FirstAddr, u64LastAddr, pHlp); + return pgmR3DumpHierarchyShwDoIt(&State, cr3, cMaxDepth); +} + + +/** + * Dumps a page table hierarchy use only physical addresses and cr4/lm flags. + * + * @returns VBox status code (VINF_SUCCESS). + * @param pVM The cross context VM structure. + * @param cr3 The root of the hierarchy. + * @param cr4 The cr4, only PAE and PSE is currently used. + * @param fLongMode Set if long mode, false if not long mode. + * @param cMaxDepth Number of levels to dump. + * @param pHlp Pointer to the output functions. + * + * @deprecated Use DBGFR3PagingDumpEx. + */ +VMMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint64_t cr3, uint64_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp) +{ + if (!cMaxDepth) + return VINF_SUCCESS; + + PVMCPU pVCpu = VMMGetCpu(pVM); + if (!pVCpu) + pVCpu = &pVM->aCpus[0]; + + uint32_t fFlags = DBGFPGDMP_FLAGS_HEADER | DBGFPGDMP_FLAGS_PRINT_CR3 | DBGFPGDMP_FLAGS_PAGE_INFO | DBGFPGDMP_FLAGS_SHADOW; + fFlags |= cr4 & (X86_CR4_PAE | X86_CR4_PSE); + if (fLongMode) + fFlags |= DBGFPGDMP_FLAGS_LME; + + return DBGFR3PagingDumpEx(pVM->pUVM, pVCpu->idCpu, fFlags, cr3, 0, fLongMode ? UINT64_MAX : UINT32_MAX, cMaxDepth, pHlp); +} + + +/** + * Maps the guest page. + * + * @returns VBox status code. + * @param pState The dumper state. + * @param GCPhys The physical address of the guest page. + * @param pszDesc The description. + * @param ppv Where to return the pointer. + * @param pLock Where to return the mapping lock. Hand this to + * PGMPhysReleasePageMappingLock when done. + */ +static int pgmR3DumpHierarchyGstMapPage(PPGMR3DUMPHIERARCHYSTATE pState, RTGCPHYS GCPhys, const char *pszDesc, + void const **ppv, PPGMPAGEMAPLOCK pLock) +{ + int rc = PGMPhysGCPhys2CCPtrReadOnly(pState->pVM, GCPhys, ppv, pLock); + if (RT_FAILURE(rc)) + { + pState->pHlp->pfnPrintf(pState->pHlp, "%0*llx error! Failed to map %s at GCPhys=%RGp: %Rrc!\n", + pState->cchAddress, pState->u64Address, pszDesc, GCPhys, rc); + return rc; + } + return VINF_SUCCESS; +} + + +/** + * Figures out which guest page this is and dumps a summary. + * + * @param pState The dumper state. + * @param GCPhys The page address. + * @param cbPage The page size. + */ +static void pgmR3DumpHierarchyGstPageInfo(PPGMR3DUMPHIERARCHYSTATE pState, RTGCPHYS GCPhys, uint32_t cbPage) +{ + char szPage[80]; + pgmLock(pState->pVM); + PCPGMPAGE pPage = pgmPhysGetPage(pState->pVM, GCPhys); + if (pPage) + RTStrPrintf(szPage, sizeof(szPage), " %R[pgmpage]", pPage); + else + strcpy(szPage, " not found"); + pgmUnlock(pState->pVM); + pState->pHlp->pfnPrintf(pState->pHlp, "%s", szPage); + NOREF(cbPage); +} + + +/** + * Checks the entry for reserved bits. + * + * @param pState The dumper state. + * @param u64Entry The entry to check. + */ +static void pgmR3DumpHierarchyGstCheckReservedHighBits(PPGMR3DUMPHIERARCHYSTATE pState, uint64_t u64Entry) +{ + uint32_t uRsvd = (u64Entry & pState->u64HighReservedBits) >> 52; + if (uRsvd) + pState->pHlp->pfnPrintf(pState->pHlp, " %u:52=%03x%s", + pState->uLastRsvdBit, uRsvd, pState->fLme ? "" : "!"); + /** @todo check the valid physical bits as well. */ +} + + +/** + * Dumps a PAE shadow page table. + * + * @returns VBox status code (VINF_SUCCESS). + * @param pState The dumper state. + * @param GCPhys The page table address. + */ +static int pgmR3DumpHierarchyGstPaePT(PPGMR3DUMPHIERARCHYSTATE pState, RTGCPHYS GCPhys) +{ + PCX86PTPAE pPT; + PGMPAGEMAPLOCK Lock; + int rc = pgmR3DumpHierarchyGstMapPage(pState, GCPhys, "Page table", (void const **)&pPT, &Lock); + if (RT_FAILURE(rc)) + return rc; + + uint32_t iFirst, iLast; + uint64_t u64BaseAddress = pgmR3DumpHierarchyCalcRange(pState, X86_PT_PAE_SHIFT, X86_PG_PAE_ENTRIES, &iFirst, &iLast); + for (uint32_t i = iFirst; i <= iLast; i++) + { + X86PTEPAE Pte = pPT->a[i]; + if (Pte.n.u1Present) + { + pState->u64Address = u64BaseAddress + ((uint64_t)i << X86_PT_PAE_SHIFT); + pState->pHlp->pfnPrintf(pState->pHlp, + pState->fLme /*P R S A D G WT CD AT NX 4M a p ? */ + ? "%016llx 3 | P %c %c %c %c %c %s %s %s %s 4K %c%c%c %016llx" + : "%08llx 2 | P %c %c %c %c %c %s %s %s %s 4K %c%c%c %016llx", + pState->u64Address, + Pte.n.u1Write ? 'W' : 'R', + Pte.n.u1User ? 'U' : 'S', + Pte.n.u1Accessed ? 'A' : '-', + Pte.n.u1Dirty ? 'D' : '-', + Pte.n.u1Global ? 'G' : '-', + Pte.n.u1WriteThru ? "WT" : "--", + Pte.n.u1CacheDisable? "CD" : "--", + Pte.n.u1PAT ? "AT" : "--", + Pte.n.u1NoExecute ? "NX" : "--", + Pte.u & RT_BIT(9) ? '1' : '0', + Pte.u & RT_BIT(10) ? '1' : '0', + Pte.u & RT_BIT(11) ? '1' : '0', + Pte.u & X86_PTE_PAE_PG_MASK); + if (pState->fDumpPageInfo) + pgmR3DumpHierarchyGstPageInfo(pState, Pte.u & X86_PTE_PAE_PG_MASK, _4K); + pgmR3DumpHierarchyGstCheckReservedHighBits(pState, Pte.u); + pState->pHlp->pfnPrintf(pState->pHlp, "\n"); + pState->cLeaves++; + } + } + + PGMPhysReleasePageMappingLock(pState->pVM, &Lock); + return VINF_SUCCESS; +} + + +/** + * Dumps a PAE shadow page directory table. + * + * @returns VBox status code (VINF_SUCCESS). + * @param pState The dumper state. + * @param GCPhys The physical address of the table. + * @param cMaxDepth The maximum depth. + */ +static int pgmR3DumpHierarchyGstPaePD(PPGMR3DUMPHIERARCHYSTATE pState, RTGCPHYS GCPhys, unsigned cMaxDepth) +{ + PCX86PDPAE pPD; + PGMPAGEMAPLOCK Lock; + int rc = pgmR3DumpHierarchyGstMapPage(pState, GCPhys, "Page directory", (void const **)&pPD, &Lock); + if (RT_FAILURE(rc)) + return rc; + + Assert(cMaxDepth > 0); + cMaxDepth--; + + uint32_t iFirst, iLast; + uint64_t u64BaseAddress = pgmR3DumpHierarchyCalcRange(pState, X86_PD_PAE_SHIFT, X86_PG_PAE_ENTRIES, &iFirst, &iLast); + for (uint32_t i = iFirst; i <= iLast; i++) + { + X86PDEPAE Pde = pPD->a[i]; + if (Pde.n.u1Present) + { + pState->u64Address = u64BaseAddress + ((uint64_t)i << X86_PD_PAE_SHIFT); + if (Pde.b.u1Size) + { + pState->pHlp->pfnPrintf(pState->pHlp, + pState->fLme /*P R S A D G WT CD AT NX 2M a p ? phys*/ + ? "%016llx 2 | P %c %c %c %c %c %s %s %s %s 2M %c%c%c %016llx" + : "%08llx 1 | P %c %c %c %c %c %s %s %s %s 2M %c%c%c %016llx", + pState->u64Address, + Pde.b.u1Write ? 'W' : 'R', + Pde.b.u1User ? 'U' : 'S', + Pde.b.u1Accessed ? 'A' : '-', + Pde.b.u1Dirty ? 'D' : '-', + Pde.b.u1Global ? 'G' : '-', + Pde.b.u1WriteThru ? "WT" : "--", + Pde.b.u1CacheDisable ? "CD" : "--", + Pde.b.u1PAT ? "AT" : "--", + Pde.b.u1NoExecute ? "NX" : "--", + Pde.u & RT_BIT_64(9) ? '1' : '0', + Pde.u & RT_BIT_64(10) ? '1' : '0', + Pde.u & RT_BIT_64(11) ? '1' : '0', + Pde.u & X86_PDE2M_PAE_PG_MASK); + if (pState->fDumpPageInfo) + pgmR3DumpHierarchyGstPageInfo(pState, Pde.u & X86_PDE2M_PAE_PG_MASK, _2M); + pgmR3DumpHierarchyGstCheckReservedHighBits(pState, Pde.u); + if ((Pde.u >> 13) & 0xff) + pState->pHlp->pfnPrintf(pState->pHlp, " 20:13=%02llx%s", (Pde.u >> 13) & 0x0ff, pState->fLme ? "" : "!"); + pState->pHlp->pfnPrintf(pState->pHlp, "\n"); + + pState->cLeaves++; + } + else + { + pState->pHlp->pfnPrintf(pState->pHlp, + pState->fLme /*P R S A D G WT CD AT NX 4M a p ? phys */ + ? "%016llx 2 | P %c %c %c %c %c %s %s .. %s .. %c%c%c %016llx" + : "%08llx 1 | P %c %c %c %c %c %s %s .. %s .. %c%c%c %016llx", + pState->u64Address, + Pde.n.u1Write ? 'W' : 'R', + Pde.n.u1User ? 'U' : 'S', + Pde.n.u1Accessed ? 'A' : '-', + Pde.n.u1Reserved0 ? '?' : '.', /* ignored */ + Pde.n.u1Reserved1 ? '?' : '.', /* ignored */ + Pde.n.u1WriteThru ? "WT" : "--", + Pde.n.u1CacheDisable ? "CD" : "--", + Pde.n.u1NoExecute ? "NX" : "--", + Pde.u & RT_BIT_64(9) ? '1' : '0', + Pde.u & RT_BIT_64(10) ? '1' : '0', + Pde.u & RT_BIT_64(11) ? '1' : '0', + Pde.u & X86_PDE_PAE_PG_MASK); + if (pState->fDumpPageInfo) + pgmR3DumpHierarchyGstPageInfo(pState, Pde.u & X86_PDE_PAE_PG_MASK, _4K); + pgmR3DumpHierarchyGstCheckReservedHighBits(pState, Pde.u); + pState->pHlp->pfnPrintf(pState->pHlp, "\n"); + + if (cMaxDepth) + { + int rc2 = pgmR3DumpHierarchyGstPaePT(pState, Pde.u & X86_PDE_PAE_PG_MASK); + if (rc2 < rc && RT_SUCCESS(rc)) + rc = rc2; + } + else + pState->cLeaves++; + } + } + } + + PGMPhysReleasePageMappingLock(pState->pVM, &Lock); + return rc; +} + + +/** + * Dumps a PAE shadow page directory pointer table. + * + * @returns VBox status code (VINF_SUCCESS). + * @param pState The dumper state. + * @param GCPhys The physical address of the table. + * @param cMaxDepth The maximum depth. + */ +static int pgmR3DumpHierarchyGstPaePDPT(PPGMR3DUMPHIERARCHYSTATE pState, RTGCPHYS GCPhys, unsigned cMaxDepth) +{ + /* Fend of addresses that are out of range in PAE mode - simplifies the code below. */ + if (!pState->fLme && pState->u64Address >= _4G) + return VINF_SUCCESS; + + PCX86PDPT pPDPT; + PGMPAGEMAPLOCK Lock; + int rc = pgmR3DumpHierarchyGstMapPage(pState, GCPhys, "Page directory pointer table", (void const **)&pPDPT, &Lock); + if (RT_FAILURE(rc)) + return rc; + + Assert(cMaxDepth > 0); + cMaxDepth--; + + uint32_t iFirst, iLast; + uint64_t u64BaseAddress = pgmR3DumpHierarchyCalcRange(pState, X86_PDPT_SHIFT, + pState->fLme ? X86_PG_AMD64_PDPE_ENTRIES : X86_PG_PAE_PDPE_ENTRIES, + &iFirst, &iLast); + for (uint32_t i = iFirst; i <= iLast; i++) + { + X86PDPE Pdpe = pPDPT->a[i]; + if (Pdpe.n.u1Present) + { + pState->u64Address = u64BaseAddress + ((uint64_t)i << X86_PDPT_SHIFT); + if (pState->fLme) + { + /** @todo Do 1G pages. */ + pState->pHlp->pfnPrintf(pState->pHlp, /*P R S A D G WT CD AT NX .. a p ? */ + "%016llx 1 | P %c %c %c %c %c %s %s %s %s .. %c%c%c %016llx", + pState->u64Address, + Pdpe.lm.u1Write ? 'W' : 'R', + Pdpe.lm.u1User ? 'U' : 'S', + Pdpe.lm.u1Accessed ? 'A' : '-', + Pdpe.lm.u3Reserved & 1 ? '?' : '.', /* ignored */ + Pdpe.lm.u3Reserved & 4 ? '!' : '.', /* mbz */ + Pdpe.lm.u1WriteThru ? "WT" : "--", + Pdpe.lm.u1CacheDisable ? "CD" : "--", + Pdpe.lm.u3Reserved & 2 ? "!" : "..",/* mbz */ + Pdpe.lm.u1NoExecute ? "NX" : "--", + Pdpe.u & RT_BIT_64(9) ? '1' : '0', + Pdpe.u & RT_BIT_64(10) ? '1' : '0', + Pdpe.u & RT_BIT_64(11) ? '1' : '0', + Pdpe.u & X86_PDPE_PG_MASK); + if (pState->fDumpPageInfo) + pgmR3DumpHierarchyGstPageInfo(pState, Pdpe.u & X86_PDPE_PG_MASK, _4K); + pgmR3DumpHierarchyGstCheckReservedHighBits(pState, Pdpe.u); + } + else + { + pState->pHlp->pfnPrintf(pState->pHlp,/*P R S A D G WT CD AT NX .. a p ? */ + "%08llx 0 | P %c %c %c %c %c %s %s %s %s .. %c%c%c %016llx", + pState->u64Address, + Pdpe.n.u2Reserved & 1 ? '!' : '.', /* mbz */ + Pdpe.n.u2Reserved & 2 ? '!' : '.', /* mbz */ + Pdpe.n.u4Reserved & 1 ? '!' : '.', /* mbz */ + Pdpe.n.u4Reserved & 2 ? '!' : '.', /* mbz */ + Pdpe.n.u4Reserved & 8 ? '!' : '.', /* mbz */ + Pdpe.n.u1WriteThru ? "WT" : "--", + Pdpe.n.u1CacheDisable ? "CD" : "--", + Pdpe.n.u4Reserved & 2 ? "!" : "..", /* mbz */ + Pdpe.lm.u1NoExecute ? "!!" : "..",/* mbz */ + Pdpe.u & RT_BIT_64(9) ? '1' : '0', + Pdpe.u & RT_BIT_64(10) ? '1' : '0', + Pdpe.u & RT_BIT_64(11) ? '1' : '0', + Pdpe.u & X86_PDPE_PG_MASK); + if (pState->fDumpPageInfo) + pgmR3DumpHierarchyGstPageInfo(pState, Pdpe.u & X86_PDPE_PG_MASK, _4K); + pgmR3DumpHierarchyGstCheckReservedHighBits(pState, Pdpe.u); + } + pState->pHlp->pfnPrintf(pState->pHlp, "\n"); + + if (cMaxDepth) + { + int rc2 = pgmR3DumpHierarchyGstPaePD(pState, Pdpe.u & X86_PDPE_PG_MASK, cMaxDepth); + if (rc2 < rc && RT_SUCCESS(rc)) + rc = rc2; + } + else + pState->cLeaves++; + } + } + + PGMPhysReleasePageMappingLock(pState->pVM, &Lock); + return rc; +} + + +/** + * Dumps a 32-bit shadow page table. + * + * @returns VBox status code (VINF_SUCCESS). + * @param pState The dumper state. + * @param GCPhys The physical address of the table. + * @param cMaxDepth The maximum depth. + */ +static int pgmR3DumpHierarchyGstPaePML4(PPGMR3DUMPHIERARCHYSTATE pState, RTHCPHYS GCPhys, unsigned cMaxDepth) +{ + PCX86PML4 pPML4; + PGMPAGEMAPLOCK Lock; + int rc = pgmR3DumpHierarchyGstMapPage(pState, GCPhys, "Page map level 4", (void const **)&pPML4, &Lock); + if (RT_FAILURE(rc)) + return rc; + + Assert(cMaxDepth); + cMaxDepth--; + + /* + * This is a bit tricky as we're working on unsigned addresses while the + * AMD64 spec uses signed tricks. + */ + uint32_t iFirst = (pState->u64FirstAddress >> X86_PML4_SHIFT) & X86_PML4_MASK; + uint32_t iLast = (pState->u64LastAddress >> X86_PML4_SHIFT) & X86_PML4_MASK; + if ( pState->u64LastAddress <= UINT64_C(0x00007fffffffffff) + || pState->u64FirstAddress >= UINT64_C(0xffff800000000000)) + { /* Simple, nothing to adjust */ } + else if (pState->u64FirstAddress <= UINT64_C(0x00007fffffffffff)) + iLast = X86_PG_AMD64_ENTRIES / 2 - 1; + else if (pState->u64LastAddress >= UINT64_C(0xffff800000000000)) + iFirst = X86_PG_AMD64_ENTRIES / 2; + else + iFirst = X86_PG_AMD64_ENTRIES; /* neither address is canonical */ + + for (uint32_t i = iFirst; i <= iLast; i++) + { + X86PML4E Pml4e = pPML4->a[i]; + if (Pml4e.n.u1Present) + { + pState->u64Address = ((uint64_t)i << X86_PML4_SHIFT) + | (i >= RT_ELEMENTS(pPML4->a) / 2 ? UINT64_C(0xffff000000000000) : 0); + pState->pHlp->pfnPrintf(pState->pHlp, /*P R S A D G WT CD AT NX 4M a p ? */ + "%016llx 0 | P %c %c %c %c %c %s %s %s %s .. %c%c%c %016llx", + pState->u64Address, + Pml4e.n.u1Write ? 'W' : 'R', + Pml4e.n.u1User ? 'U' : 'S', + Pml4e.n.u1Accessed ? 'A' : '-', + Pml4e.n.u3Reserved & 1 ? '?' : '.', /* ignored */ + Pml4e.n.u3Reserved & 4 ? '!' : '.', /* mbz */ + Pml4e.n.u1WriteThru ? "WT" : "--", + Pml4e.n.u1CacheDisable ? "CD" : "--", + Pml4e.n.u3Reserved & 2 ? "!" : "..",/* mbz */ + Pml4e.n.u1NoExecute ? "NX" : "--", + Pml4e.u & RT_BIT_64(9) ? '1' : '0', + Pml4e.u & RT_BIT_64(10) ? '1' : '0', + Pml4e.u & RT_BIT_64(11) ? '1' : '0', + Pml4e.u & X86_PML4E_PG_MASK); + if (pState->fDumpPageInfo) + pgmR3DumpHierarchyGstPageInfo(pState, Pml4e.u & X86_PML4E_PG_MASK, _4K); + pgmR3DumpHierarchyGstCheckReservedHighBits(pState, Pml4e.u); + pState->pHlp->pfnPrintf(pState->pHlp, "\n"); + + if (cMaxDepth) + { + int rc2 = pgmR3DumpHierarchyGstPaePDPT(pState, Pml4e.u & X86_PML4E_PG_MASK, cMaxDepth); + if (rc2 < rc && RT_SUCCESS(rc)) + rc = rc2; + } + else + pState->cLeaves++; + } + } + + PGMPhysReleasePageMappingLock(pState->pVM, &Lock); + return rc; +} + + +/** + * Dumps a 32-bit shadow page table. + * + * @returns VBox status code (VINF_SUCCESS). + * @param pState The dumper state. + * @param GCPhys The physical address of the table. + */ +static int pgmR3DumpHierarchyGst32BitPT(PPGMR3DUMPHIERARCHYSTATE pState, RTHCPHYS GCPhys) +{ + PCX86PT pPT; + PGMPAGEMAPLOCK Lock; + int rc = pgmR3DumpHierarchyGstMapPage(pState, GCPhys, "Page table", (void const **)&pPT, &Lock); + if (RT_FAILURE(rc)) + return rc; + + uint32_t iFirst, iLast; + uint64_t u64BaseAddress = pgmR3DumpHierarchyCalcRange(pState, X86_PT_SHIFT, X86_PG_ENTRIES, &iFirst, &iLast); + for (uint32_t i = iFirst; i <= iLast; i++) + { + X86PTE Pte = pPT->a[i]; + if (Pte.n.u1Present) + { + pState->u64Address = u64BaseAddress + (i << X86_PT_SHIFT); + pState->pHlp->pfnPrintf(pState->pHlp,/*P R S A D G WT CD AT NX 4M a m d */ + "%08llx 1 | P %c %c %c %c %c %s %s %s .. 4K %c%c%c %08x", + pState->u64Address, + Pte.n.u1Write ? 'W' : 'R', + Pte.n.u1User ? 'U' : 'S', + Pte.n.u1Accessed ? 'A' : '-', + Pte.n.u1Dirty ? 'D' : '-', + Pte.n.u1Global ? 'G' : '-', + Pte.n.u1WriteThru ? "WT" : "--", + Pte.n.u1CacheDisable ? "CD" : "--", + Pte.n.u1PAT ? "AT" : "--", + Pte.u & RT_BIT_32(9) ? '1' : '0', + Pte.u & RT_BIT_32(10) ? '1' : '0', + Pte.u & RT_BIT_32(11) ? '1' : '0', + Pte.u & X86_PDE_PG_MASK); + if (pState->fDumpPageInfo) + pgmR3DumpHierarchyGstPageInfo(pState, Pte.u & X86_PDE_PG_MASK, _4K); + pState->pHlp->pfnPrintf(pState->pHlp, "\n"); + } + } + + PGMPhysReleasePageMappingLock(pState->pVM, &Lock); + return VINF_SUCCESS; +} + + +/** + * Dumps a 32-bit shadow page directory and page tables. + * + * @returns VBox status code (VINF_SUCCESS). + * @param pState The dumper state. + * @param GCPhys The physical address of the table. + * @param cMaxDepth The maximum depth. + */ +static int pgmR3DumpHierarchyGst32BitPD(PPGMR3DUMPHIERARCHYSTATE pState, RTHCPHYS GCPhys, unsigned cMaxDepth) +{ + if (pState->u64Address >= _4G) + return VINF_SUCCESS; + + PCX86PD pPD; + PGMPAGEMAPLOCK Lock; + int rc = pgmR3DumpHierarchyGstMapPage(pState, GCPhys, "Page directory", (void const **)&pPD, &Lock); + if (RT_FAILURE(rc)) + return rc; + + Assert(cMaxDepth > 0); + cMaxDepth--; + + uint32_t iFirst, iLast; + pgmR3DumpHierarchyCalcRange(pState, X86_PD_SHIFT, X86_PG_ENTRIES, &iFirst, &iLast); + for (uint32_t i = iFirst; i <= iLast; i++) + { + X86PDE Pde = pPD->a[i]; + if (Pde.n.u1Present) + { + pState->u64Address = (uint32_t)i << X86_PD_SHIFT; + if (Pde.b.u1Size && pState->fPse) + { + uint64_t u64Phys = ((uint64_t)(Pde.u & X86_PDE4M_PG_HIGH_MASK) << X86_PDE4M_PG_HIGH_SHIFT) + | (Pde.u & X86_PDE4M_PG_MASK); + pState->pHlp->pfnPrintf(pState->pHlp,/*P R S A D G WT CD AT NX 4M a m d phys */ + "%08llx 0 | P %c %c %c %c %c %s %s %s .. 4M %c%c%c %08llx", + pState->u64Address, + Pde.b.u1Write ? 'W' : 'R', + Pde.b.u1User ? 'U' : 'S', + Pde.b.u1Accessed ? 'A' : '-', + Pde.b.u1Dirty ? 'D' : '-', + Pde.b.u1Global ? 'G' : '-', + Pde.b.u1WriteThru ? "WT" : "--", + Pde.b.u1CacheDisable ? "CD" : "--", + Pde.b.u1PAT ? "AT" : "--", + Pde.u & RT_BIT_32(9) ? '1' : '0', + Pde.u & RT_BIT_32(10) ? '1' : '0', + Pde.u & RT_BIT_32(11) ? '1' : '0', + u64Phys); + if (pState->fDumpPageInfo) + pgmR3DumpHierarchyGstPageInfo(pState, u64Phys, _4M); + pState->pHlp->pfnPrintf(pState->pHlp, "\n"); + pState->cLeaves++; + } + else + { + pState->pHlp->pfnPrintf(pState->pHlp,/*P R S A D G WT CD AT NX 4M a m d phys */ + "%08llx 0 | P %c %c %c %c %c %s %s .. .. .. %c%c%c %08x", + pState->u64Address, + Pde.n.u1Write ? 'W' : 'R', + Pde.n.u1User ? 'U' : 'S', + Pde.n.u1Accessed ? 'A' : '-', + Pde.n.u1Reserved0 ? '?' : '.', /* ignored */ + Pde.n.u1Reserved1 ? '?' : '.', /* ignored */ + Pde.n.u1WriteThru ? "WT" : "--", + Pde.n.u1CacheDisable ? "CD" : "--", + Pde.u & RT_BIT_32(9) ? '1' : '0', + Pde.u & RT_BIT_32(10) ? '1' : '0', + Pde.u & RT_BIT_32(11) ? '1' : '0', + Pde.u & X86_PDE_PG_MASK); + if (pState->fDumpPageInfo) + pgmR3DumpHierarchyGstPageInfo(pState, Pde.u & X86_PDE_PG_MASK, _4K); + pState->pHlp->pfnPrintf(pState->pHlp, "\n"); + + if (cMaxDepth) + { + int rc2 = pgmR3DumpHierarchyGst32BitPT(pState, Pde.u & X86_PDE_PG_MASK); + if (rc2 < rc && RT_SUCCESS(rc)) + rc = rc2; + } + else + pState->cLeaves++; + } + } + } + + PGMPhysReleasePageMappingLock(pState->pVM, &Lock); + return rc; +} + + +/** + * Internal worker that initiates the actual dump. + * + * @returns VBox status code. + * @param pState The dumper state. + * @param cr3 The CR3 value. + * @param cMaxDepth The max depth. + */ +static int pgmR3DumpHierarchyGstDoIt(PPGMR3DUMPHIERARCHYSTATE pState, uint64_t cr3, unsigned cMaxDepth) +{ + int rc; + unsigned const cch = pState->cchAddress; + uint64_t const cr3Mask = pState->fEpt ? X86_CR3_AMD64_PAGE_MASK + : pState->fLme ? X86_CR3_AMD64_PAGE_MASK + : pState->fPae ? X86_CR3_PAE_PAGE_MASK + : X86_CR3_PAGE_MASK; + if (pState->fPrintCr3) + { + const char * const pszMode = pState->fEpt ? "Extended Page Tables" + : pState->fLme ? "Long Mode" + : pState->fPae ? "PAE Mode" + : pState->fPse ? "32-bit w/ PSE" + : "32-bit"; + pState->pHlp->pfnPrintf(pState->pHlp, "cr3=%0*llx", cch, cr3); + if (pState->fDumpPageInfo) + pgmR3DumpHierarchyGstPageInfo(pState, cr3 & X86_CR3_AMD64_PAGE_MASK, _4K); + pState->pHlp->pfnPrintf(pState->pHlp, " %s%s%s\n", + pszMode, + pState->fNp ? " + Nested Paging" : "", + pState->fNxe ? " + NX" : ""); + } + + + if (pState->fEpt) + { + if (pState->fPrintHeader) + pState->pHlp->pfnPrintf(pState->pHlp, + "%-*s R - Readable\n" + "%-*s | W - Writeable\n" + "%-*s | | X - Executable\n" + "%-*s | | | EMT - EPT memory type\n" + "%-*s | | | | PAT - Ignored PAT?\n" + "%-*s | | | | | AVL1 - 4 available bits\n" + "%-*s | | | | | | AVL2 - 12 available bits\n" + "%-*s Level | | | | | | | page \n" + /* xxxx n **** R W X EMT PAT AVL1 AVL2 xxxxxxxxxxxxx + R W X 7 0 f fff 0123456701234567 */ + , + cch, "", cch, "", cch, "", cch, "", cch, "", cch, "", cch, "", cch, "Address"); + + pState->pHlp->pfnPrintf(pState->pHlp, "EPT dumping is not yet implemented, sorry.\n"); + /** @todo implemented EPT dumping. */ + rc = VERR_NOT_IMPLEMENTED; + } + else + { + if (pState->fPrintHeader) + pState->pHlp->pfnPrintf(pState->pHlp, + "%-*s P - Present\n" + "%-*s | R/W - Read (0) / Write (1)\n" + "%-*s | | U/S - User (1) / Supervisor (0)\n" + "%-*s | | | A - Accessed\n" + "%-*s | | | | D - Dirty\n" + "%-*s | | | | | G - Global\n" + "%-*s | | | | | | WT - Write thru\n" + "%-*s | | | | | | | CD - Cache disable\n" + "%-*s | | | | | | | | AT - Attribute table (PAT)\n" + "%-*s | | | | | | | | | NX - No execute (K8)\n" + "%-*s | | | | | | | | | | 4K/4M/2M - Page size.\n" + "%-*s | | | | | | | | | | | AVL - 3 available bits.\n" + "%-*s Level | | | | | | | | | | | | Page\n" + /* xxxx n **** P R S A D G WT CD AT NX 4M AVL xxxxxxxxxxxxx + - W U - - - -- -- -- -- -- 010 */ + , + cch, "", cch, "", cch, "", cch, "", cch, "", cch, "", cch, "", + cch, "", cch, "", cch, "", cch, "", cch, "", cch, "Address"); + if (pState->fLme) + rc = pgmR3DumpHierarchyGstPaePML4(pState, cr3 & cr3Mask, cMaxDepth); + else if (pState->fPae) + rc = pgmR3DumpHierarchyGstPaePDPT(pState, cr3 & cr3Mask, cMaxDepth); + else + rc = pgmR3DumpHierarchyGst32BitPD(pState, cr3 & cr3Mask, cMaxDepth); + } + + if (!pState->cLeaves) + pState->pHlp->pfnPrintf(pState->pHlp, "not present\n"); + return rc; +} + + +/** + * dbgfR3PagingDumpEx worker. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param cr3 The CR3 register value. + * @param fFlags The flags, DBGFPGDMP_FLAGS_XXX. + * @param FirstAddr The start address. + * @param LastAddr The address to stop after. + * @param cMaxDepth The max depth. + * @param pHlp The output callbacks. Defaults to log if NULL. + * + * @internal + */ +VMMR3_INT_DECL(int) PGMR3DumpHierarchyGst(PVM pVM, uint64_t cr3, uint32_t fFlags, RTGCPTR FirstAddr, RTGCPTR LastAddr, + uint32_t cMaxDepth, PCDBGFINFOHLP pHlp) +{ + /* Minimal validation as we're only supposed to service DBGF. */ + AssertReturn(~(fFlags & ~DBGFPGDMP_FLAGS_VALID_MASK), VERR_INVALID_PARAMETER); + AssertReturn(!(fFlags & (DBGFPGDMP_FLAGS_CURRENT_MODE | DBGFPGDMP_FLAGS_CURRENT_CR3)), VERR_INVALID_PARAMETER); + AssertReturn(fFlags & DBGFPGDMP_FLAGS_GUEST, VERR_INVALID_PARAMETER); + + PGMR3DUMPHIERARCHYSTATE State; + pgmR3DumpHierarchyInitState(&State, pVM, fFlags, FirstAddr, LastAddr, pHlp); + return pgmR3DumpHierarchyGstDoIt(&State, cr3, cMaxDepth); +} + + +/** + * For aiding with reset problems and similar. + * + * @param pVM The cross context VM handle. + */ +void pgmLogState(PVM pVM) +{ +#if 0 + RTLogRelPrintf("\npgmLogState pgmLogState pgmLogState pgmLogState pgmLogState\n"); + + /* + * Per CPU stuff. + */ + for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++) + { + PPGMCPU pPgmCpu = &pVM->aCpus[iCpu].pgm.s; + RTLogRelPrintf("pgmLogState: CPU #%u\n", iCpu); +# define LOG_PGMCPU_MEMBER(aFmt, aMember) RTLogRelPrintf(" %32s: %" aFmt "\n", #aMember, pPgmCpu->aMember) + LOG_PGMCPU_MEMBER("#RX32", offVM); + LOG_PGMCPU_MEMBER("#RX32", offVCpu); + LOG_PGMCPU_MEMBER("#RX32", offPGM); + LOG_PGMCPU_MEMBER("RGp", GCPhysA20Mask); + LOG_PGMCPU_MEMBER("RTbool", fA20Enabled); + LOG_PGMCPU_MEMBER("RTbool", fNoExecuteEnabled); + LOG_PGMCPU_MEMBER("#RX32", fSyncFlags); + LOG_PGMCPU_MEMBER("d", enmShadowMode); + LOG_PGMCPU_MEMBER("d", enmGuestMode); + LOG_PGMCPU_MEMBER("RGp", GCPhysCR3); + + LOG_PGMCPU_MEMBER("p", pGst32BitPdR3); +# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE + LOG_PGMCPU_MEMBER("p", pGst32BitPdR0); +# endif + LOG_PGMCPU_MEMBER("RRv", pGst32BitPdRC); + LOG_PGMCPU_MEMBER("#RX32", fGst32BitMbzBigPdeMask); + LOG_PGMCPU_MEMBER("RTbool", fGst32BitPageSizeExtension); + + LOG_PGMCPU_MEMBER("p", pGstPaePdptR3); +# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE + LOG_PGMCPU_MEMBER("p", pGstPaePdptR0); +# endif + LOG_PGMCPU_MEMBER("RRv", pGstPaePdptRC); + LOG_PGMCPU_MEMBER("p", apGstPaePDsR3[0]); + LOG_PGMCPU_MEMBER("p", apGstPaePDsR3[1]); + LOG_PGMCPU_MEMBER("p", apGstPaePDsR3[2]); + LOG_PGMCPU_MEMBER("p", apGstPaePDsR3[3]); +# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE + LOG_PGMCPU_MEMBER("p", apGstPaePDsR0[0]); + LOG_PGMCPU_MEMBER("p", apGstPaePDsR0[1]); + LOG_PGMCPU_MEMBER("p", apGstPaePDsR0[2]); + LOG_PGMCPU_MEMBER("p", apGstPaePDsR0[3]); +# endif + LOG_PGMCPU_MEMBER("RRv", apGstPaePDsR0[0]); + LOG_PGMCPU_MEMBER("RRv", apGstPaePDsR0[1]); + LOG_PGMCPU_MEMBER("RRv", apGstPaePDsR0[2]); + LOG_PGMCPU_MEMBER("RRv", apGstPaePDsR0[3]); + LOG_PGMCPU_MEMBER("RGp", aGCPhysGstPaePDs[0]); + LOG_PGMCPU_MEMBER("RGp", aGCPhysGstPaePDs[1]); + LOG_PGMCPU_MEMBER("RGp", aGCPhysGstPaePDs[2]); + LOG_PGMCPU_MEMBER("RGp", aGCPhysGstPaePDs[3]); + LOG_PGMCPU_MEMBER("#RX64", aGstPaePdpeRegs[0].u); + LOG_PGMCPU_MEMBER("#RX64", aGstPaePdpeRegs[1].u); + LOG_PGMCPU_MEMBER("#RX64", aGstPaePdpeRegs[2].u); + LOG_PGMCPU_MEMBER("#RX64", aGstPaePdpeRegs[3].u); + LOG_PGMCPU_MEMBER("RGp", aGCPhysGstPaePDsMonitored[0]); + LOG_PGMCPU_MEMBER("RGp", aGCPhysGstPaePDsMonitored[1]); + LOG_PGMCPU_MEMBER("RGp", aGCPhysGstPaePDsMonitored[2]); + LOG_PGMCPU_MEMBER("RGp", aGCPhysGstPaePDsMonitored[3]); + LOG_PGMCPU_MEMBER("#RX64", fGstPaeMbzPteMask); + LOG_PGMCPU_MEMBER("#RX64", fGstPaeMbzPdeMask); + LOG_PGMCPU_MEMBER("#RX64", fGstPaeMbzBigPdeMask); + LOG_PGMCPU_MEMBER("#RX64", fGstPaeMbzBigPdeMask); + LOG_PGMCPU_MEMBER("#RX64", fGstPaeMbzPdpeMask); + + LOG_PGMCPU_MEMBER("p", pGstAmd64Pml4R3); +# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE + LOG_PGMCPU_MEMBER("p", pGstAmd64Pml4R0); +# endif + LOG_PGMCPU_MEMBER("#RX64", fGstAmd64MbzPteMask); + LOG_PGMCPU_MEMBER("#RX64", fGstAmd64MbzPdeMask); + LOG_PGMCPU_MEMBER("#RX64", fGstAmd64MbzBigPdeMask); + LOG_PGMCPU_MEMBER("#RX64", fGstAmd64MbzPdpeMask); + LOG_PGMCPU_MEMBER("#RX64", fGstAmd64MbzBigPdpeMask); + LOG_PGMCPU_MEMBER("#RX64", fGstAmd64MbzPml4eMask); + LOG_PGMCPU_MEMBER("#RX64", fGstAmd64ShadowedPdpeMask); + LOG_PGMCPU_MEMBER("#RX64", fGstAmd64ShadowedPml4eMask); + LOG_PGMCPU_MEMBER("#RX64", fGst64ShadowedPteMask); + LOG_PGMCPU_MEMBER("#RX64", fGst64ShadowedPdeMask); + LOG_PGMCPU_MEMBER("#RX64", fGst64ShadowedBigPdeMask); + LOG_PGMCPU_MEMBER("#RX64", fGst64ShadowedBigPde4PteMask); + + LOG_PGMCPU_MEMBER("p", pShwPageCR3R3); + LOG_PGMCPU_MEMBER("p", pShwPageCR3R0); + LOG_PGMCPU_MEMBER("RRv", pShwPageCR3RC); + + LOG_PGMCPU_MEMBER("p", pfnR3ShwRelocate); + LOG_PGMCPU_MEMBER("p", pfnR3ShwExit); + LOG_PGMCPU_MEMBER("p", pfnR3ShwGetPage); + LOG_PGMCPU_MEMBER("p", pfnR3ShwModifyPage); + LOG_PGMCPU_MEMBER("p", pfnR0ShwGetPage); + LOG_PGMCPU_MEMBER("p", pfnR0ShwModifyPage); + LOG_PGMCPU_MEMBER("p", pfnR3GstRelocate); + LOG_PGMCPU_MEMBER("p", pfnR3GstExit); + LOG_PGMCPU_MEMBER("p", pfnR3GstGetPage); + LOG_PGMCPU_MEMBER("p", pfnR3GstModifyPage); + LOG_PGMCPU_MEMBER("p", pfnR0GstGetPage); + LOG_PGMCPU_MEMBER("p", pfnR0GstModifyPage); + LOG_PGMCPU_MEMBER("p", pfnR3BthRelocate); + LOG_PGMCPU_MEMBER("p", pfnR3BthInvalidatePage); + LOG_PGMCPU_MEMBER("p", pfnR3BthSyncCR3); + LOG_PGMCPU_MEMBER("p", pfnR3BthPrefetchPage); + LOG_PGMCPU_MEMBER("p", pfnR3BthMapCR3); + LOG_PGMCPU_MEMBER("p", pfnR3BthUnmapCR3); + LOG_PGMCPU_MEMBER("p", pfnR0BthMapCR3); + LOG_PGMCPU_MEMBER("p", pfnR0BthUnmapCR3); + LOG_PGMCPU_MEMBER("#RX64", cNetwareWp0Hacks); + LOG_PGMCPU_MEMBER("#RX64", cPoolAccessHandler); + + } + + /* + * PGM globals. + */ + RTLogRelPrintf("PGM globals\n"); + PPGM pPgm = &pVM->pgm.s; +# define LOG_PGM_MEMBER(aFmt, aMember) RTLogRelPrintf(" %32s: %" aFmt "\n", #aMember, pPgm->aMember) + LOG_PGM_MEMBER("#RX32", offVM); + LOG_PGM_MEMBER("#RX32", offVCpuPGM); + LOG_PGM_MEMBER("RTbool", fRamPreAlloc); + LOG_PGM_MEMBER("RTbool", fPhysWriteMonitoringEngaged); + LOG_PGM_MEMBER("RTbool", fLessThan52PhysicalAddressBits); + LOG_PGM_MEMBER("RTbool", fNestedPaging); + LOG_PGM_MEMBER("d", enmHostMode); + LOG_PGM_MEMBER("RTbool", fNoMorePhysWrites); + LOG_PGM_MEMBER("RTbool", fPageFusionAllowed); + LOG_PGM_MEMBER("RTbool", fPciPassthrough); + LOG_PGM_MEMBER("#x", cMmio2Regions); + LOG_PGM_MEMBER("RTbool", fRestoreRomPagesOnReset); + LOG_PGM_MEMBER("RTbool", fZeroRamPagesOnReset); + LOG_PGM_MEMBER("RTbool", fFinalizedMappings); + LOG_PGM_MEMBER("RTbool", fMappingsFixed); + LOG_PGM_MEMBER("RTbool", fMappingsFixedRestored); + LOG_PGM_MEMBER("%#x", cbMappingFixed); + LOG_PGM_MEMBER("%#x", idRamRangesGen); + LOG_PGM_MEMBER("#RGv", GCPtrMappingFixed); + LOG_PGM_MEMBER("#RGv", GCPtrPrevRamRangeMapping); + LOG_PGM_MEMBER("%#x", hRomPhysHandlerType); + LOG_PGM_MEMBER("#RGp", GCPhys4MBPSEMask); + LOG_PGM_MEMBER("#RGp", GCPhysInvAddrMask); + LOG_PGM_MEMBER("p", apRamRangesTlbR3[0]); + LOG_PGM_MEMBER("p", apRamRangesTlbR3[1]); + LOG_PGM_MEMBER("p", apRamRangesTlbR3[2]); + LOG_PGM_MEMBER("p", apRamRangesTlbR3[3]); + LOG_PGM_MEMBER("p", apRamRangesTlbR3[4]); + LOG_PGM_MEMBER("p", apRamRangesTlbR3[5]); + LOG_PGM_MEMBER("p", apRamRangesTlbR3[6]); + LOG_PGM_MEMBER("p", apRamRangesTlbR3[7]); + LOG_PGM_MEMBER("p", pRamRangesXR3); + LOG_PGM_MEMBER("p", pRamRangeTreeR3); + LOG_PGM_MEMBER("p", pTreesR3); + LOG_PGM_MEMBER("p", pLastPhysHandlerR3); + LOG_PGM_MEMBER("p", pPoolR3); + LOG_PGM_MEMBER("p", pMappingsR3); + LOG_PGM_MEMBER("p", pRomRangesR3); + LOG_PGM_MEMBER("p", pRegMmioRangesR3); + LOG_PGM_MEMBER("p", paModeData); + LOG_PGM_MEMBER("p", apMmio2RangesR3[0]); + LOG_PGM_MEMBER("p", apMmio2RangesR3[1]); + LOG_PGM_MEMBER("p", apMmio2RangesR3[2]); + LOG_PGM_MEMBER("p", apMmio2RangesR3[3]); + LOG_PGM_MEMBER("p", apMmio2RangesR3[4]); + LOG_PGM_MEMBER("p", apMmio2RangesR3[5]); + LOG_PGM_MEMBER("p", apRamRangesTlbR0[0]); + LOG_PGM_MEMBER("p", apRamRangesTlbR0[1]); + LOG_PGM_MEMBER("p", apRamRangesTlbR0[2]); + LOG_PGM_MEMBER("p", apRamRangesTlbR0[3]); + LOG_PGM_MEMBER("p", apRamRangesTlbR0[4]); + LOG_PGM_MEMBER("p", apRamRangesTlbR0[5]); + LOG_PGM_MEMBER("p", apRamRangesTlbR0[6]); + LOG_PGM_MEMBER("p", apRamRangesTlbR0[7]); + LOG_PGM_MEMBER("p", pRamRangesXR0); + LOG_PGM_MEMBER("p", pRamRangeTreeR0); + LOG_PGM_MEMBER("p", pTreesR0); + LOG_PGM_MEMBER("p", pLastPhysHandlerR0); + LOG_PGM_MEMBER("p", pPoolR0); + LOG_PGM_MEMBER("p", pMappingsR0); + LOG_PGM_MEMBER("p", pRomRangesR0); + LOG_PGM_MEMBER("p", apMmio2RangesR0[0]); + LOG_PGM_MEMBER("p", apMmio2RangesR0[1]); + LOG_PGM_MEMBER("p", apMmio2RangesR0[2]); + LOG_PGM_MEMBER("p", apMmio2RangesR0[3]); + LOG_PGM_MEMBER("p", apMmio2RangesR0[4]); + LOG_PGM_MEMBER("p", apMmio2RangesR0[5]); + LOG_PGM_MEMBER("RRv", apRamRangesTlbRC[0]); + LOG_PGM_MEMBER("RRv", apRamRangesTlbRC[1]); + LOG_PGM_MEMBER("RRv", apRamRangesTlbRC[2]); + LOG_PGM_MEMBER("RRv", apRamRangesTlbRC[3]); + LOG_PGM_MEMBER("RRv", apRamRangesTlbRC[4]); + LOG_PGM_MEMBER("RRv", apRamRangesTlbRC[5]); + LOG_PGM_MEMBER("RRv", apRamRangesTlbRC[6]); + LOG_PGM_MEMBER("RRv", apRamRangesTlbRC[7]); + LOG_PGM_MEMBER("RRv", pRamRangesXRC); + LOG_PGM_MEMBER("RRv", pRamRangeTreeRC); + LOG_PGM_MEMBER("RRv", pTreesRC); + LOG_PGM_MEMBER("RRv", pLastPhysHandlerRC); + LOG_PGM_MEMBER("RRv", pPoolRC); + LOG_PGM_MEMBER("RRv", pMappingsRC); + LOG_PGM_MEMBER("RRv", pRomRangesRC); + LOG_PGM_MEMBER("RRv", paDynPageMap32BitPTEsGC); + LOG_PGM_MEMBER("RRv", paDynPageMapPaePTEsGC); + + LOG_PGM_MEMBER("#RGv", GCPtrCR3Mapping); + LOG_PGM_MEMBER("p", pInterPD); + LOG_PGM_MEMBER("p", apInterPTs[0]); + LOG_PGM_MEMBER("p", apInterPTs[1]); + LOG_PGM_MEMBER("p", apInterPaePTs[0]); + LOG_PGM_MEMBER("p", apInterPaePTs[1]); + LOG_PGM_MEMBER("p", apInterPaePDs[0]); + LOG_PGM_MEMBER("p", apInterPaePDs[1]); + LOG_PGM_MEMBER("p", apInterPaePDs[2]); + LOG_PGM_MEMBER("p", apInterPaePDs[3]); + LOG_PGM_MEMBER("p", pInterPaePDPT); + LOG_PGM_MEMBER("p", pInterPaePML4); + LOG_PGM_MEMBER("p", pInterPaePDPT64); + LOG_PGM_MEMBER("#RHp", HCPhysInterPD); + LOG_PGM_MEMBER("#RHp", HCPhysInterPaePDPT); + LOG_PGM_MEMBER("#RHp", HCPhysInterPaePML4); + LOG_PGM_MEMBER("RRv", pbDynPageMapBaseGC); + LOG_PGM_MEMBER("RRv", pRCDynMap); + LOG_PGM_MEMBER("p", pvR0DynMapUsed); + LOG_PGM_MEMBER("%#x", cDeprecatedPageLocks); + + /** + * Data associated with managing the ring-3 mappings of the allocation chunks. + */ + LOG_PGM_MEMBER("p", ChunkR3Map.pTree); + //LOG_PGM_MEMBER(PGMCHUNKR3MAPTLB ChunkR3Map.Tlb); + LOG_PGM_MEMBER("%#x", ChunkR3Map.c); + LOG_PGM_MEMBER("%#x", ChunkR3Map.cMax); + LOG_PGM_MEMBER("%#x", ChunkR3Map.iNow); + //LOG_PGM_MEMBER(PGMPAGER3MAPTLB PhysTlbHC); + + LOG_PGM_MEMBER("#RHp", HCPhysZeroPg); + LOG_PGM_MEMBER("p", pvZeroPgR3); + LOG_PGM_MEMBER("p", pvZeroPgR0); + LOG_PGM_MEMBER("RRv", pvZeroPgRC); + LOG_PGM_MEMBER("#RHp", HCPhysMmioPg); + LOG_PGM_MEMBER("#RHp", HCPhysInvMmioPg); + LOG_PGM_MEMBER("p", pvMmioPgR3); + LOG_PGM_MEMBER("RTbool", fErrInjHandyPages); + + /* + * PGM page pool. + */ + PPGMPOOL pPool = pVM->pgm.s.pPoolR3; + RTLogRelPrintf("PGM Page Pool\n"); +# define LOG_PGMPOOL_MEMBER(aFmt, aMember) RTLogRelPrintf(" %32s: %" aFmt "\n", #aMember, pPool->aMember) + LOG_PGMPOOL_MEMBER("p", pVMR3); + LOG_PGMPOOL_MEMBER("p", pVMR0); + LOG_PGMPOOL_MEMBER("RRv", pVMRC); + LOG_PGMPOOL_MEMBER("#x", cMaxPages); + LOG_PGMPOOL_MEMBER("#x", cCurPages); + LOG_PGMPOOL_MEMBER("#x", iFreeHead); + LOG_PGMPOOL_MEMBER("#x", u16Padding); + LOG_PGMPOOL_MEMBER("#x", iUserFreeHead); + LOG_PGMPOOL_MEMBER("#x", cMaxUsers); + LOG_PGMPOOL_MEMBER("#x", cPresent); + LOG_PGMPOOL_MEMBER("RRv", paUsersRC); + LOG_PGMPOOL_MEMBER("p", paUsersR3); + LOG_PGMPOOL_MEMBER("p", paUsersR0); + LOG_PGMPOOL_MEMBER("#x", iPhysExtFreeHead); + LOG_PGMPOOL_MEMBER("#x", cMaxPhysExts); + LOG_PGMPOOL_MEMBER("RRv", paPhysExtsRC); + LOG_PGMPOOL_MEMBER("p", paPhysExtsR3); + LOG_PGMPOOL_MEMBER("p", paPhysExtsR0); + for (uint32_t i = 0; i < RT_ELEMENTS(pPool->aiHash); i++) + RTLogRelPrintf(" aiHash[%u]: %#x\n", i, pPool->aiHash[i]); + LOG_PGMPOOL_MEMBER("#x", iAgeHead); + LOG_PGMPOOL_MEMBER("#x", iAgeTail); + LOG_PGMPOOL_MEMBER("RTbool", fCacheEnabled); + LOG_PGMPOOL_MEMBER("RTbool", afPadding1[0]); + LOG_PGMPOOL_MEMBER("RTbool", afPadding1[1]); + LOG_PGMPOOL_MEMBER("RTbool", afPadding1[2]); + LOG_PGMPOOL_MEMBER("#x", iModifiedHead); + LOG_PGMPOOL_MEMBER("#x", cModifiedPages); + LOG_PGMPOOL_MEMBER("#x", hAccessHandlerType); + LOG_PGMPOOL_MEMBER("#x", idxFreeDirtyPage); + LOG_PGMPOOL_MEMBER("#x", cDirtyPages); + for (uint32_t i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++) + RTLogRelPrintf(" aDirtyPages[%u].uIdx: %#x\n", i, pPool->aDirtyPages[i].uIdx); + LOG_PGMPOOL_MEMBER("#x", cUsedPages); + LOG_PGMPOOL_MEMBER("#x", HCPhysTree); + for (uint32_t i = 0; i < pPool->cCurPages; i++) + { + PPGMPOOLPAGE pPage = &pPool->aPages[i]; +# define LOG_PAGE_MEMBER(aFmt, aMember) RTLogRelPrintf(" %3u:%-32s: %" aFmt "\n", i, #aMember, pPage->aMember) + RTLogRelPrintf("%3u:%-32s: %p\n", i, "", pPage); + LOG_PAGE_MEMBER("RHp", Core.Key); + LOG_PAGE_MEMBER("p", pvPageR3); + LOG_PAGE_MEMBER("RGp", GCPhys); + LOG_PAGE_MEMBER("d", enmKind); + LOG_PAGE_MEMBER("d", enmAccess); + LOG_PAGE_MEMBER("RTbool", fA20Enabled); + LOG_PAGE_MEMBER("RTbool", fZeroed); + LOG_PAGE_MEMBER("RTbool", fSeenNonGlobal); + LOG_PAGE_MEMBER("RTbool", fMonitored); + LOG_PAGE_MEMBER("RTbool", fCached); + LOG_PAGE_MEMBER("RTbool", fReusedFlushPending); + LOG_PAGE_MEMBER("RTbool", fDirty); + LOG_PAGE_MEMBER("RTbool", fPadding1); + LOG_PAGE_MEMBER("RTbool", fPadding2); + LOG_PAGE_MEMBER("#x", idx); + LOG_PAGE_MEMBER("#x", iNext); + LOG_PAGE_MEMBER("#x", iUserHead); + LOG_PAGE_MEMBER("#x", cPresent); + LOG_PAGE_MEMBER("#x", iFirstPresent); + LOG_PAGE_MEMBER("#x", cModifications); + LOG_PAGE_MEMBER("#x", iModifiedNext); + LOG_PAGE_MEMBER("#x", iModifiedPrev); + LOG_PAGE_MEMBER("#x", iMonitoredNext); + LOG_PAGE_MEMBER("#x", iMonitoredPrev); + LOG_PAGE_MEMBER("#x", iAgeNext); + LOG_PAGE_MEMBER("#x", iAgePrev); + LOG_PAGE_MEMBER("#x", idxDirtyEntry); + LOG_PAGE_MEMBER("RGv", GCPtrLastAccessHandlerRip); + LOG_PAGE_MEMBER("RGv", GCPtrLastAccessHandlerFault); + LOG_PAGE_MEMBER("#RX64", cLastAccessHandler); + LOG_PAGE_MEMBER("#RX32", cLocked); +# ifdef VBOX_STRICT + LOG_PAGE_MEMBER("RGv", GCPtrDirtyFault); +# endif + if ( pPage->enmKind == PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT + || pPage->enmKind == PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB + || pPage->enmKind == PGMPOOLKIND_32BIT_PD + || pPage->enmKind == PGMPOOLKIND_32BIT_PD_PHYS) + { + uint32_t const *pu32Page = (uint32_t const *)pPage->pvPageR3; + for (uint32_t i = 0; i < 1024/2; i += 4) + RTLogRelPrintf(" %#05x: %RX32 %RX32 %RX32 %RX32\n", i, pu32Page[i], pu32Page[i+1], pu32Page[i+2], pu32Page[i+3]); + } + else if ( pPage->enmKind != PGMPOOLKIND_FREE + && pPage->enmKind != PGMPOOLKIND_INVALID) + { + uint64_t const *pu64Page = (uint64_t const *)pPage->pvPageR3; + for (uint32_t i = 0; i < 512/2; i += 2) + RTLogRelPrintf(" %#05x: %RX64 %RX64\n", i, pu64Page[i], pu64Page[i+1]); + } + } + + RTLogRelPrintf("pgmLogState pgmLogState pgmLogState pgmLogState pgmLogState\n\n"); +#else + RT_NOREF(pVM); +#endif +} + diff --git a/src/VBox/VMM/VMMR3/PGMHandler.cpp b/src/VBox/VMM/VMMR3/PGMHandler.cpp new file mode 100644 index 00000000..dd2c779c --- /dev/null +++ b/src/VBox/VMM/VMMR3/PGMHandler.cpp @@ -0,0 +1,862 @@ +/* $Id: PGMHandler.cpp $ */ +/** @file + * PGM - Page Manager / Monitor, Access Handlers. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PGM +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef VBOX_WITH_REM +# include +#endif +#include +#ifdef VBOX_WITH_REM +# include +#endif +#include +#include +#include "PGMInternal.h" +#include +#include "PGMInline.h" +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static DECLCALLBACK(int) pgmR3HandlerPhysicalOneClear(PAVLROGCPHYSNODECORE pNode, void *pvUser); +static DECLCALLBACK(int) pgmR3HandlerPhysicalOneSet(PAVLROGCPHYSNODECORE pNode, void *pvUser); +static DECLCALLBACK(int) pgmR3InfoHandlersPhysicalOne(PAVLROGCPHYSNODECORE pNode, void *pvUser); +#ifdef VBOX_WITH_RAW_MODE +static DECLCALLBACK(int) pgmR3InfoHandlersVirtualOne(PAVLROGCPTRNODECORE pNode, void *pvUser); +#endif + + + + +/** + * Register a physical page access handler type, extended version. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param enmKind The kind of access handler. + * @param pfnHandlerR3 Pointer to the ring-3 handler callback. + * @param pfnHandlerR0 Pointer to the ring-0 handler callback. + * @param pfnPfHandlerR0 Pointer to the ring-0 \#PF handler callback. + * @param pfnHandlerRC Pointer to the raw-mode context handler callback. + * @param pfnPfHandlerRC Pointer to the raw-mode context \#PF handler + * callback. + * @param pszDesc The type description. + * @param phType Where to return the type handle (cross context + * safe). + */ +VMMR3_INT_DECL(int) PGMR3HandlerPhysicalTypeRegisterEx(PVM pVM, PGMPHYSHANDLERKIND enmKind, + PFNPGMPHYSHANDLER pfnHandlerR3, + R0PTRTYPE(PFNPGMPHYSHANDLER) pfnHandlerR0, + R0PTRTYPE(PFNPGMRZPHYSPFHANDLER) pfnPfHandlerR0, + RCPTRTYPE(PFNPGMPHYSHANDLER) pfnHandlerRC, + RCPTRTYPE(PFNPGMRZPHYSPFHANDLER) pfnPfHandlerRC, + const char *pszDesc, PPGMPHYSHANDLERTYPE phType) +{ + AssertPtrReturn(pfnHandlerR3, VERR_INVALID_POINTER); + AssertReturn(pfnHandlerR0 != NIL_RTR0PTR, VERR_INVALID_POINTER); + AssertReturn(pfnPfHandlerR0 != NIL_RTR0PTR, VERR_INVALID_POINTER); + AssertReturn(pfnHandlerRC != NIL_RTRCPTR || !VM_IS_RAW_MODE_ENABLED(pVM), VERR_INVALID_POINTER); + AssertReturn(pfnPfHandlerRC != NIL_RTRCPTR || !VM_IS_RAW_MODE_ENABLED(pVM), VERR_INVALID_POINTER); + AssertPtrReturn(pszDesc, VERR_INVALID_POINTER); + AssertReturn( enmKind == PGMPHYSHANDLERKIND_WRITE + || enmKind == PGMPHYSHANDLERKIND_ALL + || enmKind == PGMPHYSHANDLERKIND_MMIO, + VERR_INVALID_PARAMETER); + + PPGMPHYSHANDLERTYPEINT pType; + int rc = MMHyperAlloc(pVM, sizeof(*pType), 0, MM_TAG_PGM_HANDLER_TYPES, (void **)&pType); + if (RT_SUCCESS(rc)) + { + pType->u32Magic = PGMPHYSHANDLERTYPEINT_MAGIC; + pType->cRefs = 1; + pType->enmKind = enmKind; + pType->uState = enmKind == PGMPHYSHANDLERKIND_WRITE + ? PGM_PAGE_HNDL_PHYS_STATE_WRITE : PGM_PAGE_HNDL_PHYS_STATE_ALL; + pType->pfnHandlerR3 = pfnHandlerR3; + pType->pfnHandlerR0 = pfnHandlerR0; + pType->pfnPfHandlerR0 = pfnPfHandlerR0; + pType->pfnHandlerRC = pfnHandlerRC; + pType->pfnPfHandlerRC = pfnPfHandlerRC; + pType->pszDesc = pszDesc; + + pgmLock(pVM); + RTListOff32Append(&pVM->pgm.s.CTX_SUFF(pTrees)->HeadPhysHandlerTypes, &pType->ListNode); + pgmUnlock(pVM); + + *phType = MMHyperHeapPtrToOffset(pVM, pType); + LogFlow(("PGMR3HandlerPhysicalTypeRegisterEx: %p/%#x: enmKind=%d pfnHandlerR3=%RHv pfnHandlerR0=%RHv pfnHandlerRC=%RRv pszDesc=%s\n", + pType, *phType, enmKind, pfnHandlerR3, pfnPfHandlerR0, pfnPfHandlerRC, pszDesc)); + return VINF_SUCCESS; + } + *phType = NIL_PGMPHYSHANDLERTYPE; + return rc; +} + + +/** + * Register a physical page access handler type. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param enmKind The kind of access handler. + * @param pfnHandlerR3 Pointer to the ring-3 handler callback. + * @param pszModR0 The name of the ring-0 module, NULL is an alias for + * the main ring-0 module. + * @param pszHandlerR0 The name of the ring-0 handler, NULL if the ring-3 + * handler should be called. + * @param pszPfHandlerR0 The name of the ring-0 \#PF handler, NULL if the + * ring-3 handler should be called. + * @param pszModRC The name of the raw-mode context module, NULL is an + * alias for the main RC module. + * @param pszHandlerRC The name of the raw-mode context handler, NULL if + * the ring-3 handler should be called. + * @param pszPfHandlerRC The name of the raw-mode context \#PF handler, NULL + * if the ring-3 handler should be called. + * @param pszDesc The type description. + * @param phType Where to return the type handle (cross context + * safe). + */ +VMMR3DECL(int) PGMR3HandlerPhysicalTypeRegister(PVM pVM, PGMPHYSHANDLERKIND enmKind, + R3PTRTYPE(PFNPGMPHYSHANDLER) pfnHandlerR3, + const char *pszModR0, const char *pszHandlerR0, const char *pszPfHandlerR0, + const char *pszModRC, const char *pszHandlerRC, const char *pszPfHandlerRC, + const char *pszDesc, PPGMPHYSHANDLERTYPE phType) +{ + LogFlow(("PGMR3HandlerPhysicalTypeRegister: enmKind=%d pfnHandlerR3=%RHv pszModR0=%s pszHandlerR0=%s pszPfHandlerR0=%s pszModRC=%s pszHandlerRC=%s pszPfHandlerRC=%s pszDesc=%s\n", + enmKind, pfnHandlerR3, pszModR0, pszHandlerR0, pszPfHandlerR0, pszModRC, pszHandlerRC, pszPfHandlerRC, pszDesc)); + + /* + * Validate input. + */ + AssertPtrReturn(pfnHandlerR3, VERR_INVALID_POINTER); + AssertPtrNullReturn(pszModR0, VERR_INVALID_POINTER); + AssertPtrNullReturn(pszHandlerR0, VERR_INVALID_POINTER); + AssertPtrNullReturn(pszPfHandlerR0, VERR_INVALID_POINTER); + AssertPtrNullReturn(pszModRC, VERR_INVALID_POINTER); + AssertPtrNullReturn(pszHandlerRC, VERR_INVALID_POINTER); + AssertPtrNullReturn(pszPfHandlerRC, VERR_INVALID_POINTER); + + /* + * Resolve the R0 handlers. + */ + R0PTRTYPE(PFNPGMPHYSHANDLER) pfnHandlerR0 = NIL_RTR0PTR; + int rc = PDMR3LdrGetSymbolR0Lazy(pVM, pszHandlerR0 ? pszModR0 : NULL, NULL /*pszSearchPath*/, + pszHandlerR0 ? pszHandlerR0 : "pgmPhysHandlerRedirectToHC", &pfnHandlerR0); + if (RT_SUCCESS(rc)) + { + R0PTRTYPE(PFNPGMR0PHYSPFHANDLER) pfnPfHandlerR0 = NIL_RTR0PTR; + rc = PDMR3LdrGetSymbolR0Lazy(pVM, pszPfHandlerR0 ? pszModR0 : NULL, NULL /*pszSearchPath*/, + pszPfHandlerR0 ? pszPfHandlerR0 : "pgmPhysPfHandlerRedirectToHC", &pfnPfHandlerR0); + if (RT_SUCCESS(rc)) + { + /* + * Resolve the GC handler. + */ + RTRCPTR pfnHandlerRC = NIL_RTRCPTR; + RTRCPTR pfnPfHandlerRC = NIL_RTRCPTR; + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + rc = PDMR3LdrGetSymbolRCLazy(pVM, pszHandlerRC ? pszModRC : NULL, NULL /*pszSearchPath*/, + pszHandlerRC ? pszHandlerRC : "pgmPhysHandlerRedirectToHC", &pfnHandlerRC); + if (RT_SUCCESS(rc)) + { + rc = PDMR3LdrGetSymbolRCLazy(pVM, pszPfHandlerRC ? pszModRC : NULL, NULL /*pszSearchPath*/, + pszPfHandlerRC ? pszPfHandlerRC : "pgmPhysPfHandlerRedirectToHC", &pfnPfHandlerRC); + AssertMsgRC(rc, ("Failed to resolve %s.%s, rc=%Rrc.\n", pszPfHandlerRC ? pszModRC : VMMRC_MAIN_MODULE_NAME, + pszPfHandlerRC ? pszPfHandlerRC : "pgmPhysPfHandlerRedirectToHC", rc)); + } + else + AssertMsgFailed(("Failed to resolve %s.%s, rc=%Rrc.\n", pszHandlerRC ? pszModRC : VMMRC_MAIN_MODULE_NAME, + pszHandlerRC ? pszHandlerRC : "pgmPhysHandlerRedirectToHC", rc)); + + } + if (RT_SUCCESS(rc)) + return PGMR3HandlerPhysicalTypeRegisterEx(pVM, enmKind, pfnHandlerR3, + pfnHandlerR0, pfnPfHandlerR0, + pfnHandlerRC, pfnPfHandlerRC, + pszDesc, phType); + } + else + AssertMsgFailed(("Failed to resolve %s.%s, rc=%Rrc.\n", pszPfHandlerR0 ? pszModR0 : VMMR0_MAIN_MODULE_NAME, + pszPfHandlerR0 ? pszPfHandlerR0 : "pgmPhysHandlerRedirectToHC", rc)); + } + else + AssertMsgFailed(("Failed to resolve %s.%s, rc=%Rrc.\n", pszHandlerR0 ? pszModR0 : VMMR0_MAIN_MODULE_NAME, + pszHandlerR0 ? pszHandlerR0 : "pgmPhysHandlerRedirectToHC", rc)); + + return rc; +} + + +/** + * Updates the physical page access handlers. + * + * @param pVM The cross context VM structure. + * @remark Only used when restoring a saved state. + */ +void pgmR3HandlerPhysicalUpdateAll(PVM pVM) +{ + LogFlow(("pgmHandlerPhysicalUpdateAll:\n")); + + /* + * Clear and set. + * (the right -> left on the setting pass is just bird speculating on cache hits) + */ + pgmLock(pVM); + RTAvlroGCPhysDoWithAll(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, true, pgmR3HandlerPhysicalOneClear, pVM); + RTAvlroGCPhysDoWithAll(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, false, pgmR3HandlerPhysicalOneSet, pVM); + pgmUnlock(pVM); +} + + +/** + * Clears all the page level flags for one physical handler range. + * + * @returns 0 + * @param pNode Pointer to a PGMPHYSHANDLER. + * @param pvUser Pointer to the VM. + */ +static DECLCALLBACK(int) pgmR3HandlerPhysicalOneClear(PAVLROGCPHYSNODECORE pNode, void *pvUser) +{ + PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)pNode; + PPGMRAMRANGE pRamHint = NULL; + RTGCPHYS GCPhys = pCur->Core.Key; + RTUINT cPages = pCur->cPages; + PVM pVM = (PVM)pvUser; + for (;;) + { + PPGMPAGE pPage; + int rc = pgmPhysGetPageWithHintEx(pVM, GCPhys, &pPage, &pRamHint); + if (RT_SUCCESS(rc)) + { + PGM_PAGE_SET_HNDL_PHYS_STATE(pPage, PGM_PAGE_HNDL_PHYS_STATE_NONE); + + /* Tell NEM about the protection change. */ + if (VM_IS_NEM_ENABLED(pVM)) + { + uint8_t u2State = PGM_PAGE_GET_NEM_STATE(pPage); + PGMPAGETYPE enmType = (PGMPAGETYPE)PGM_PAGE_GET_TYPE(pPage); + NEMHCNotifyPhysPageProtChanged(pVM, GCPhys, PGM_PAGE_GET_HCPHYS(pPage), + pgmPhysPageCalcNemProtection(pPage, enmType), enmType, &u2State); + PGM_PAGE_SET_NEM_STATE(pPage, u2State); + } + } + else + AssertRC(rc); + + if (--cPages == 0) + return 0; + GCPhys += PAGE_SIZE; + } +} + + +/** + * Sets all the page level flags for one physical handler range. + * + * @returns 0 + * @param pNode Pointer to a PGMPHYSHANDLER. + * @param pvUser Pointer to the VM. + */ +static DECLCALLBACK(int) pgmR3HandlerPhysicalOneSet(PAVLROGCPHYSNODECORE pNode, void *pvUser) +{ + PVM pVM = (PVM)pvUser; + PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)pNode; + PPGMPHYSHANDLERTYPEINT pCurType = PGMPHYSHANDLER_GET_TYPE(pVM, pCur); + unsigned uState = pCurType->uState; + PPGMRAMRANGE pRamHint = NULL; + RTGCPHYS GCPhys = pCur->Core.Key; + RTUINT cPages = pCur->cPages; + for (;;) + { + PPGMPAGE pPage; + int rc = pgmPhysGetPageWithHintEx(pVM, GCPhys, &pPage, &pRamHint); + if (RT_SUCCESS(rc)) + { + PGM_PAGE_SET_HNDL_PHYS_STATE(pPage, uState); + + /* Tell NEM about the protection change. */ + if (VM_IS_NEM_ENABLED(pVM)) + { + uint8_t u2State = PGM_PAGE_GET_NEM_STATE(pPage); + PGMPAGETYPE enmType = (PGMPAGETYPE)PGM_PAGE_GET_TYPE(pPage); + NEMHCNotifyPhysPageProtChanged(pVM, GCPhys, PGM_PAGE_GET_HCPHYS(pPage), + pgmPhysPageCalcNemProtection(pPage, enmType), enmType, &u2State); + PGM_PAGE_SET_NEM_STATE(pPage, u2State); + } + } + else + AssertRC(rc); + + if (--cPages == 0) + return 0; + GCPhys += PAGE_SIZE; + } +} + +#ifdef VBOX_WITH_RAW_MODE + +/** + * Register a virtual page access handler type, extended version. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param enmKind The kind of access handler. + * @param fRelocUserRC Whether the pvUserRC argument should be + * automatically relocated or not. + * @param pfnInvalidateR3 Pointer to the ring-3 invalidation handler callback. + * Warning! This callback stopped working in VBox v1.2! + * @param pfnHandlerR3 Pointer to the ring-3 handler callback. + * @param pfnHandlerRC Pointer to the raw-mode context handler callback. + * @param pfnPfHandlerRC Pointer to the raw-mode context \#PF handler + * callback. + * @param pszDesc The type description. + * @param phType Where to return the type handle (cross context + * safe). + * @remarks No virtual handlers when executing using HM (i.e. ring-0). + */ +VMMR3_INT_DECL(int) PGMR3HandlerVirtualTypeRegisterEx(PVM pVM, PGMVIRTHANDLERKIND enmKind, bool fRelocUserRC, + PFNPGMR3VIRTINVALIDATE pfnInvalidateR3, + PFNPGMVIRTHANDLER pfnHandlerR3, + RCPTRTYPE(FNPGMVIRTHANDLER) pfnHandlerRC, + RCPTRTYPE(FNPGMRCVIRTPFHANDLER) pfnPfHandlerRC, + const char *pszDesc, PPGMVIRTHANDLERTYPE phType) +{ + AssertReturn(VM_IS_RAW_MODE_ENABLED(pVM), VERR_NOT_AVAILABLE); /* Only supported/relevant for raw-mode. */ + AssertReturn( enmKind == PGMVIRTHANDLERKIND_WRITE + || enmKind == PGMVIRTHANDLERKIND_ALL + || enmKind == PGMVIRTHANDLERKIND_HYPERVISOR, + VERR_INVALID_PARAMETER); + if (enmKind != PGMVIRTHANDLERKIND_HYPERVISOR) + { + AssertPtrNullReturn(pfnInvalidateR3, VERR_INVALID_POINTER); + AssertPtrReturn(pfnHandlerR3, VERR_INVALID_POINTER); + AssertReturn(pfnHandlerRC != NIL_RTRCPTR, VERR_INVALID_POINTER); + } + else + { + AssertReturn(pfnInvalidateR3 == NULL, VERR_INVALID_POINTER); + AssertReturn(pfnHandlerR3 == NULL, VERR_INVALID_POINTER); + AssertReturn(pfnHandlerRC == NIL_RTR0PTR, VERR_INVALID_POINTER); + } + AssertReturn(pfnPfHandlerRC != NIL_RTRCPTR, VERR_INVALID_POINTER); + AssertPtrReturn(pszDesc, VERR_INVALID_POINTER); + + PPGMVIRTHANDLERTYPEINT pType; + int rc = MMHyperAlloc(pVM, sizeof(*pType), 0, MM_TAG_PGM_HANDLER_TYPES, (void **)&pType); + if (RT_SUCCESS(rc)) + { + pType->u32Magic = PGMVIRTHANDLERTYPEINT_MAGIC; + pType->cRefs = 1; + pType->enmKind = enmKind; + pType->fRelocUserRC = fRelocUserRC; + pType->uState = enmKind == PGMVIRTHANDLERKIND_ALL + ? PGM_PAGE_HNDL_VIRT_STATE_ALL : PGM_PAGE_HNDL_VIRT_STATE_WRITE; + pType->pfnInvalidateR3 = pfnInvalidateR3; + pType->pfnHandlerR3 = pfnHandlerR3; + pType->pfnHandlerRC = pfnHandlerRC; + pType->pfnPfHandlerRC = pfnPfHandlerRC; + pType->pszDesc = pszDesc; + + pgmLock(pVM); + RTListOff32Append(&pVM->pgm.s.CTX_SUFF(pTrees)->HeadVirtHandlerTypes, &pType->ListNode); + pgmUnlock(pVM); + + *phType = MMHyperHeapPtrToOffset(pVM, pType); + LogFlow(("PGMR3HandlerVirtualTypeRegisterEx: %p/%#x: enmKind=%d pfnInvalidateR3=%RHv pfnHandlerR3=%RHv pfnPfHandlerRC=%RRv pszDesc=%s\n", + pType, *phType, enmKind, pfnInvalidateR3, pfnHandlerR3, pfnPfHandlerRC, pszDesc)); + return VINF_SUCCESS; + } + *phType = NIL_PGMVIRTHANDLERTYPE; + return rc; +} + + +/** + * Register a physical page access handler type. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param enmKind The kind of access handler. + * @param fRelocUserRC Whether the pvUserRC argument should be + * automatically relocated or not. + * @param pfnInvalidateR3 Pointer to the ring-3 invalidateion callback + * (optional, can be NULL). + * @param pfnHandlerR3 Pointer to the ring-3 handler callback. + * @param pszHandlerRC The name of the raw-mode context handler callback + * (in VMMRC.rc). + * @param pszPfHandlerRC The name of the raw-mode context \#PF handler (in + * VMMRC.rc). + * @param pszDesc The type description. + * @param phType Where to return the type handle (cross context + * safe). + * @remarks No virtual handlers when executing using HM (i.e. ring-0). + */ +VMMR3_INT_DECL(int) PGMR3HandlerVirtualTypeRegister(PVM pVM, PGMVIRTHANDLERKIND enmKind, bool fRelocUserRC, + PFNPGMR3VIRTINVALIDATE pfnInvalidateR3, + PFNPGMVIRTHANDLER pfnHandlerR3, + const char *pszHandlerRC, const char *pszPfHandlerRC, const char *pszDesc, + PPGMVIRTHANDLERTYPE phType) +{ + LogFlow(("PGMR3HandlerVirtualTypeRegister: enmKind=%d pfnInvalidateR3=%RHv pfnHandlerR3=%RHv pszPfHandlerRC=%s pszDesc=%s\n", + enmKind, pfnInvalidateR3, pfnHandlerR3, pszPfHandlerRC, pszDesc)); + + /* + * Validate input. + */ + AssertPtrNullReturn(pszHandlerRC, VERR_INVALID_POINTER); + AssertPtrReturn(pszPfHandlerRC, VERR_INVALID_POINTER); + + /* + * Resolve the GC handler. + */ + RTRCPTR pfnHandlerRC = NIL_RTRCPTR; + int rc = VINF_SUCCESS; + if (pszHandlerRC) + rc = PDMR3LdrGetSymbolRCLazy(pVM, VMMRC_MAIN_MODULE_NAME, NULL /*pszSearchPath*/, pszHandlerRC, &pfnHandlerRC); + if (RT_SUCCESS(rc)) + { + RTRCPTR pfnPfHandlerRC = NIL_RTRCPTR; + rc = PDMR3LdrGetSymbolRCLazy(pVM, VMMRC_MAIN_MODULE_NAME, NULL /*pszSearchPath*/, pszPfHandlerRC, &pfnPfHandlerRC); + if (RT_SUCCESS(rc)) + return PGMR3HandlerVirtualTypeRegisterEx(pVM, enmKind, fRelocUserRC, + pfnInvalidateR3, pfnHandlerR3, + pfnHandlerRC, pfnPfHandlerRC, + pszDesc, phType); + + AssertMsgFailed(("Failed to resolve %s.%s, rc=%Rrc.\n", VMMRC_MAIN_MODULE_NAME, pszPfHandlerRC, rc)); + } + else + AssertMsgFailed(("Failed to resolve %s.%s, rc=%Rrc.\n", VMMRC_MAIN_MODULE_NAME, pszHandlerRC, rc)); + return rc; +} + + +/** + * Register a access handler for a virtual range. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the + * calling EMT. + * @param hType The handler type. + * @param GCPtr Start address. + * @param GCPtrLast Last address (inclusive). + * @param pvUserR3 The ring-3 context user argument. + * @param pvUserRC The raw-mode context user argument. Whether this is + * automatically relocated or not depends on the type. + * @param pszDesc Pointer to description string. This must not be freed. + */ +VMMR3_INT_DECL(int) PGMR3HandlerVirtualRegister(PVM pVM, PVMCPU pVCpu, PGMVIRTHANDLERTYPE hType, RTGCPTR GCPtr, RTGCPTR GCPtrLast, + void *pvUserR3, RTRCPTR pvUserRC, const char *pszDesc) +{ + AssertReturn(VM_IS_RAW_MODE_ENABLED(pVM), VERR_NOT_AVAILABLE); /* Only supported/relevant for raw-mode. */ + PPGMVIRTHANDLERTYPEINT pType = PGMVIRTHANDLERTYPEINT_FROM_HANDLE(pVM, hType); + Log(("PGMR3HandlerVirtualRegister: GCPhys=%RGp GCPhysLast=%RGp pvUserR3=%RHv pvUserGC=%RRv hType=%#x (%d, %s) pszDesc=%RHv:%s\n", + GCPtr, GCPtrLast, pvUserR3, pvUserRC, hType, pType->enmKind, R3STRING(pType->pszDesc), pszDesc, R3STRING(pszDesc))); + + /* + * Validate input. + */ + AssertReturn(pType->u32Magic == PGMVIRTHANDLERTYPEINT_MAGIC, VERR_INVALID_HANDLE); + AssertMsgReturn(GCPtr < GCPtrLast, ("GCPtr >= GCPtrLast (%RGp >= %RGp)\n", GCPtr, GCPtrLast), VERR_INVALID_PARAMETER); + switch (pType->enmKind) + { + case PGMVIRTHANDLERKIND_ALL: + /* Simplification for PGMPhysRead and others: Full pages. */ + AssertReleaseMsgReturn( (GCPtr & PAGE_OFFSET_MASK) == 0 + && (GCPtrLast & PAGE_OFFSET_MASK) == PAGE_OFFSET_MASK, + ("PGMVIRTHANDLERKIND_ALL: GCPtr=%RGv GCPtrLast=%RGv\n", GCPtr, GCPtrLast), + VERR_NOT_IMPLEMENTED); + break; + case PGMVIRTHANDLERKIND_WRITE: + case PGMVIRTHANDLERKIND_HYPERVISOR: + break; + default: + AssertMsgFailedReturn(("Invalid enmKind=%d!\n", pType->enmKind), VERR_INVALID_PARAMETER); + } + AssertMsgReturn( (RTRCUINTPTR)pvUserRC < 0x10000 + || MMHyperR3ToRC(pVM, MMHyperRCToR3(pVM, pvUserRC)) == pvUserRC, + ("Not RC pointer! pvUserRC=%RRv\n", pvUserRC), + VERR_INVALID_PARAMETER); + + /* + * Allocate and initialize a new entry. + */ + unsigned cPages = (RT_ALIGN(GCPtrLast + 1, PAGE_SIZE) - (GCPtr & PAGE_BASE_GC_MASK)) >> PAGE_SHIFT; + PPGMVIRTHANDLER pNew; + int rc = MMHyperAlloc(pVM, RT_UOFFSETOF_DYN(PGMVIRTHANDLER, aPhysToVirt[cPages]), 0, MM_TAG_PGM_HANDLERS, (void **)&pNew); /** @todo r=bird: incorrect member name PhysToVirt? */ + if (RT_FAILURE(rc)) + return rc; + + pNew->Core.Key = GCPtr; + pNew->Core.KeyLast = GCPtrLast; + + pNew->hType = hType; + pNew->pvUserRC = pvUserRC; + pNew->pvUserR3 = pvUserR3; + pNew->pszDesc = pszDesc ? pszDesc : pType->pszDesc; + pNew->cb = GCPtrLast - GCPtr + 1; + pNew->cPages = cPages; + /* Will be synced at next guest execution attempt. */ + while (cPages-- > 0) + { + pNew->aPhysToVirt[cPages].Core.Key = NIL_RTGCPHYS; + pNew->aPhysToVirt[cPages].Core.KeyLast = NIL_RTGCPHYS; + pNew->aPhysToVirt[cPages].offVirtHandler = -(int32_t)RT_UOFFSETOF_DYN(PGMVIRTHANDLER, aPhysToVirt[cPages]); + pNew->aPhysToVirt[cPages].offNextAlias = 0; + } + + /* + * Try to insert it into the tree. + * + * The current implementation doesn't allow multiple handlers for + * the same range this makes everything much simpler and faster. + */ + AVLROGCPTRTREE *pRoot = pType->enmKind != PGMVIRTHANDLERKIND_HYPERVISOR + ? &pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers + : &pVM->pgm.s.CTX_SUFF(pTrees)->HyperVirtHandlers; + pgmLock(pVM); + if (*pRoot != 0) + { + PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrGetBestFit(pRoot, pNew->Core.Key, true); + if ( !pCur + || GCPtr > pCur->Core.KeyLast + || GCPtrLast < pCur->Core.Key) + pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrGetBestFit(pRoot, pNew->Core.Key, false); + if ( pCur + && GCPtr <= pCur->Core.KeyLast + && GCPtrLast >= pCur->Core.Key) + { + /* + * The LDT sometimes conflicts with the IDT and LDT ranges while being + * updated on linux. So, we don't assert simply log it. + */ + Log(("PGMR3HandlerVirtualRegister: Conflict with existing range %RGv-%RGv (%s), req. %RGv-%RGv (%s)\n", + pCur->Core.Key, pCur->Core.KeyLast, pCur->pszDesc, GCPtr, GCPtrLast, pszDesc)); + MMHyperFree(pVM, pNew); + pgmUnlock(pVM); + return VERR_PGM_HANDLER_VIRTUAL_CONFLICT; + } + } + if (RTAvlroGCPtrInsert(pRoot, &pNew->Core)) + { + if (pType->enmKind != PGMVIRTHANDLERKIND_HYPERVISOR) + { + pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_UPDATE_PAGE_BIT_VIRTUAL | PGM_SYNC_CLEAR_PGM_POOL; + VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); + } + PGMHandlerVirtualTypeRetain(pVM, hType); + pgmUnlock(pVM); + +#ifdef VBOX_WITH_STATISTICS + rc = STAMR3RegisterF(pVM, &pNew->Stat, STAMTYPE_PROFILE, STAMVISIBILITY_USED, STAMUNIT_TICKS_PER_CALL, pNew->pszDesc, + "/PGM/VirtHandler/Calls/%RGv-%RGv", pNew->Core.Key, pNew->Core.KeyLast); + AssertRC(rc); +#endif + return VINF_SUCCESS; + } + + pgmUnlock(pVM); + AssertFailed(); + MMHyperFree(pVM, pNew); + return VERR_PGM_HANDLER_VIRTUAL_CONFLICT; + +} + + +/** + * Changes the type of a virtual handler. + * + * The new and old type must have the same access kind. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param GCPtr Start address of the virtual handler. + * @param hNewType The new handler type. + */ +VMMR3_INT_DECL(int) PGMHandlerVirtualChangeType(PVM pVM, RTGCPTR GCPtr, PGMVIRTHANDLERTYPE hNewType) +{ + PPGMVIRTHANDLERTYPEINT pNewType = PGMVIRTHANDLERTYPEINT_FROM_HANDLE(pVM, hNewType); + AssertReturn(pNewType->u32Magic == PGMVIRTHANDLERTYPEINT_MAGIC, VERR_INVALID_HANDLE); + + pgmLock(pVM); + PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrGet(&pVM->pgm.s.pTreesR3->VirtHandlers, GCPtr); + if (pCur) + { + PGMVIRTHANDLERTYPE hOldType = pCur->hType; + PPGMVIRTHANDLERTYPEINT pOldType = PGMVIRTHANDLERTYPEINT_FROM_HANDLE(pVM, hOldType); + if (pOldType != pNewType) + { + AssertReturnStmt(pNewType->enmKind == pOldType->enmKind, pgmUnlock(pVM), VERR_ACCESS_DENIED); + PGMHandlerVirtualTypeRetain(pVM, hNewType); + pCur->hType = hNewType; + PGMHandlerVirtualTypeRelease(pVM, hOldType); + } + pgmUnlock(pVM); + return VINF_SUCCESS; + } + pgmUnlock(pVM); + AssertMsgFailed(("Range %#x not found!\n", GCPtr)); + return VERR_INVALID_PARAMETER; +} + + +/** + * Deregister an access handler for a virtual range. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling + * EMT. + * @param GCPtr Start address. + * @param fHypervisor Set if PGMVIRTHANDLERKIND_HYPERVISOR, false if not. + * @thread EMT(pVCpu) + */ +VMMR3_INT_DECL(int) PGMHandlerVirtualDeregister(PVM pVM, PVMCPU pVCpu, RTGCPTR GCPtr, bool fHypervisor) +{ + pgmLock(pVM); + + PPGMVIRTHANDLER pCur; + if (!fHypervisor) + { + /* + * Normal guest handler. + */ + pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRemove(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, GCPtr); + AssertMsgReturnStmt(pCur, ("GCPtr=%RGv\n", GCPtr), pgmUnlock(pVM), VERR_INVALID_PARAMETER); + Assert(PGMVIRTANDLER_GET_TYPE(pVM, pCur)->enmKind != PGMVIRTHANDLERKIND_HYPERVISOR); + + Log(("PGMHandlerVirtualDeregister: Removing Virtual (%d) Range %RGv-%RGv %s\n", + PGMVIRTANDLER_GET_TYPE(pVM, pCur)->enmKind, pCur->Core.Key, pCur->Core.KeyLast, pCur->pszDesc)); + + /* Reset the flags and remove phys2virt nodes. */ + for (uint32_t iPage = 0; iPage < pCur->cPages; iPage++) + if (pCur->aPhysToVirt[iPage].offNextAlias & PGMPHYS2VIRTHANDLER_IN_TREE) + pgmHandlerVirtualClearPage(pVM, pCur, iPage); + + /* Schedule CR3 sync. */ + pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_UPDATE_PAGE_BIT_VIRTUAL | PGM_SYNC_CLEAR_PGM_POOL; + VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); + } + else + { + /* + * Hypervisor one (hypervisor relocation or termination only). + */ + pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRemove(&pVM->pgm.s.CTX_SUFF(pTrees)->HyperVirtHandlers, GCPtr); + AssertMsgReturnStmt(pCur, ("GCPtr=%RGv\n", GCPtr), pgmUnlock(pVM), VERR_INVALID_PARAMETER); + Assert(PGMVIRTANDLER_GET_TYPE(pVM, pCur)->enmKind == PGMVIRTHANDLERKIND_HYPERVISOR); + + Log(("PGMHandlerVirtualDeregister: Removing Hyper Virtual Range %RGv-%RGv %s\n", + pCur->Core.Key, pCur->Core.KeyLast, pCur->pszDesc)); + } + + pgmUnlock(pVM); + + /* + * Free it. + */ +#ifdef VBOX_WITH_STATISTICS + STAMR3DeregisterF(pVM->pUVM, "/PGM/VirtHandler/Calls/%RGv-%RGv", pCur->Core.Key, pCur->Core.KeyLast); +#endif + PGMHandlerVirtualTypeRelease(pVM, pCur->hType); + MMHyperFree(pVM, pCur); + + return VINF_SUCCESS; +} + +#endif /* VBOX_WITH_RAW_MODE */ + + +/** + * Arguments for pgmR3InfoHandlersPhysicalOne and pgmR3InfoHandlersVirtualOne. + */ +typedef struct PGMHANDLERINFOARG +{ + /** The output helpers.*/ + PCDBGFINFOHLP pHlp; + /** Pointer to the cross context VM handle. */ + PVM pVM; + /** Set if statistics should be dumped. */ + bool fStats; +} PGMHANDLERINFOARG, *PPGMHANDLERINFOARG; + + +/** + * Info callback for 'pgmhandlers'. + * + * @param pVM The cross context VM structure. + * @param pHlp The output helpers. + * @param pszArgs The arguments. phys or virt. + */ +DECLCALLBACK(void) pgmR3InfoHandlers(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + /* + * Test input. + */ + PGMHANDLERINFOARG Args = { pHlp, pVM, /* .fStats = */ true }; + bool fPhysical = !pszArgs || !*pszArgs; + bool fVirtual = fPhysical; + bool fHyper = fPhysical; + if (!fPhysical) + { + bool fAll = strstr(pszArgs, "all") != NULL; + fPhysical = fAll || strstr(pszArgs, "phys") != NULL; + fVirtual = fAll || strstr(pszArgs, "virt") != NULL; + fHyper = fAll || strstr(pszArgs, "hyper")!= NULL; + Args.fStats = strstr(pszArgs, "nost") == NULL; + } + + /* + * Dump the handlers. + */ + if (fPhysical) + { + pHlp->pfnPrintf(pHlp, + "Physical handlers: (PhysHandlers=%d (%#x))\n" + "%*s %*s %*s %*s HandlerGC UserGC Type Description\n", + pVM->pgm.s.pTreesR3->PhysHandlers, pVM->pgm.s.pTreesR3->PhysHandlers, + - (int)sizeof(RTGCPHYS) * 2, "From", + - (int)sizeof(RTGCPHYS) * 2 - 3, "- To (incl)", + - (int)sizeof(RTHCPTR) * 2 - 1, "HandlerHC", + - (int)sizeof(RTHCPTR) * 2 - 1, "UserHC"); + RTAvlroGCPhysDoWithAll(&pVM->pgm.s.pTreesR3->PhysHandlers, true, pgmR3InfoHandlersPhysicalOne, &Args); + } + +#ifdef VBOX_WITH_RAW_MODE + if (fVirtual) + { + pHlp->pfnPrintf(pHlp, + "Virtual handlers:\n" + "%*s %*s %*s %*s Type Description\n", + - (int)sizeof(RTGCPTR) * 2, "From", + - (int)sizeof(RTGCPTR) * 2 - 3, "- To (excl)", + - (int)sizeof(RTHCPTR) * 2 - 1, "HandlerHC", + - (int)sizeof(RTRCPTR) * 2 - 1, "HandlerGC"); + RTAvlroGCPtrDoWithAll(&pVM->pgm.s.pTreesR3->VirtHandlers, true, pgmR3InfoHandlersVirtualOne, &Args); + } + + if (fHyper) + { + pHlp->pfnPrintf(pHlp, + "Hypervisor Virtual handlers:\n" + "%*s %*s %*s %*s Type Description\n", + - (int)sizeof(RTGCPTR) * 2, "From", + - (int)sizeof(RTGCPTR) * 2 - 3, "- To (excl)", + - (int)sizeof(RTHCPTR) * 2 - 1, "HandlerHC", + - (int)sizeof(RTRCPTR) * 2 - 1, "HandlerGC"); + RTAvlroGCPtrDoWithAll(&pVM->pgm.s.pTreesR3->HyperVirtHandlers, true, pgmR3InfoHandlersVirtualOne, &Args); + } +#endif +} + + +/** + * Displays one physical handler range. + * + * @returns 0 + * @param pNode Pointer to a PGMPHYSHANDLER. + * @param pvUser Pointer to command helper functions. + */ +static DECLCALLBACK(int) pgmR3InfoHandlersPhysicalOne(PAVLROGCPHYSNODECORE pNode, void *pvUser) +{ + PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)pNode; + PPGMHANDLERINFOARG pArgs = (PPGMHANDLERINFOARG)pvUser; + PCDBGFINFOHLP pHlp = pArgs->pHlp; + PPGMPHYSHANDLERTYPEINT pCurType = PGMPHYSHANDLER_GET_TYPE(pArgs->pVM, pCur); + const char *pszType; + switch (pCurType->enmKind) + { + case PGMPHYSHANDLERKIND_MMIO: pszType = "MMIO "; break; + case PGMPHYSHANDLERKIND_WRITE: pszType = "Write "; break; + case PGMPHYSHANDLERKIND_ALL: pszType = "All "; break; + default: pszType = "????"; break; + } + pHlp->pfnPrintf(pHlp, + "%RGp - %RGp %RHv %RHv %RRv %RRv %s %s\n", + pCur->Core.Key, pCur->Core.KeyLast, pCurType->pfnHandlerR3, pCur->pvUserR3, pCurType->pfnPfHandlerRC, pCur->pvUserRC, + pszType, pCur->pszDesc); +#ifdef VBOX_WITH_STATISTICS + if (pArgs->fStats) + pHlp->pfnPrintf(pHlp, " cPeriods: %9RU64 cTicks: %11RU64 Min: %11RU64 Avg: %11RU64 Max: %11RU64\n", + pCur->Stat.cPeriods, pCur->Stat.cTicks, pCur->Stat.cTicksMin, + pCur->Stat.cPeriods ? pCur->Stat.cTicks / pCur->Stat.cPeriods : 0, pCur->Stat.cTicksMax); +#endif + return 0; +} + + +#ifdef VBOX_WITH_RAW_MODE +/** + * Displays one virtual handler range. + * + * @returns 0 + * @param pNode Pointer to a PGMVIRTHANDLER. + * @param pvUser Pointer to command helper functions. + */ +static DECLCALLBACK(int) pgmR3InfoHandlersVirtualOne(PAVLROGCPTRNODECORE pNode, void *pvUser) +{ + PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)pNode; + PPGMHANDLERINFOARG pArgs = (PPGMHANDLERINFOARG)pvUser; + PCDBGFINFOHLP pHlp = pArgs->pHlp; + PPGMVIRTHANDLERTYPEINT pCurType = PGMVIRTANDLER_GET_TYPE(pArgs->pVM, pCur); + const char *pszType; + switch (pCurType->enmKind) + { + case PGMVIRTHANDLERKIND_WRITE: pszType = "Write "; break; + case PGMVIRTHANDLERKIND_ALL: pszType = "All "; break; + case PGMVIRTHANDLERKIND_HYPERVISOR: pszType = "WriteHyp "; break; + default: pszType = "????"; break; + } + pHlp->pfnPrintf(pHlp, "%RGv - %RGv %RHv %RRv %s %s\n", + pCur->Core.Key, pCur->Core.KeyLast, pCurType->pfnHandlerR3, pCurType->pfnPfHandlerRC, pszType, pCur->pszDesc); +# ifdef VBOX_WITH_STATISTICS + if (pArgs->fStats) + pHlp->pfnPrintf(pHlp, " cPeriods: %9RU64 cTicks: %11RU64 Min: %11RU64 Avg: %11RU64 Max: %11RU64\n", + pCur->Stat.cPeriods, pCur->Stat.cTicks, pCur->Stat.cTicksMin, + pCur->Stat.cPeriods ? pCur->Stat.cTicks / pCur->Stat.cPeriods : 0, pCur->Stat.cTicksMax); +# endif + return 0; +} +#endif /* VBOX_WITH_RAW_MODE */ + diff --git a/src/VBox/VMM/VMMR3/PGMMap.cpp b/src/VBox/VMM/VMMR3/PGMMap.cpp new file mode 100644 index 00000000..949070a5 --- /dev/null +++ b/src/VBox/VMM/VMMR3/PGMMap.cpp @@ -0,0 +1,1470 @@ +/* $Id: PGMMap.cpp $ */ +/** @file + * PGM - Page Manager, Guest Context Mappings. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PGM +#include +#include +#include "PGMInternal.h" +#include +#include "PGMInline.h" + +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +#ifndef PGM_WITHOUT_MAPPINGS +static void pgmR3MapClearPDEs(PVM pVM, PPGMMAPPING pMap, unsigned iOldPDE); +static void pgmR3MapSetPDEs(PVM pVM, PPGMMAPPING pMap, unsigned iNewPDE); +static int pgmR3MapIntermediateCheckOne(PVM pVM, uintptr_t uAddress, unsigned cPages, PX86PT pPTDefault, PX86PTPAE pPTPaeDefault); +static void pgmR3MapIntermediateDoOne(PVM pVM, uintptr_t uAddress, RTHCPHYS HCPhys, unsigned cPages, PX86PT pPTDefault, PX86PTPAE pPTPaeDefault); +#else +# define pgmR3MapClearPDEs(pVM, pMap, iNewPDE) do { } while (0) +# define pgmR3MapSetPDEs(pVM, pMap, iNewPDE) do { } while (0) +#endif + + +/** + * Creates a page table based mapping in GC. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param GCPtr Virtual Address. (Page table aligned!) + * @param cb Size of the range. Must be a 4MB aligned! + * @param fFlags PGMR3MAPPT_FLAGS_UNMAPPABLE or 0. + * @param pfnRelocate Relocation callback function. + * @param pvUser User argument to the callback. + * @param pszDesc Pointer to description string. This must not be freed. + */ +VMMR3DECL(int) PGMR3MapPT(PVM pVM, RTGCPTR GCPtr, uint32_t cb, uint32_t fFlags, PFNPGMRELOCATE pfnRelocate, void *pvUser, const char *pszDesc) +{ + LogFlow(("PGMR3MapPT: GCPtr=%#x cb=%d fFlags=%#x pfnRelocate=%p pvUser=%p pszDesc=%s\n", GCPtr, cb, fFlags, pfnRelocate, pvUser, pszDesc)); + AssertMsg(pVM->pgm.s.pInterPD, ("Paging isn't initialized, init order problems!\n")); + + /* + * Validate input. + * Note! The lower limit (1 MB) matches how pgmR3PhysMMIOExCreate works. + */ + Assert(!fFlags || fFlags == PGMR3MAPPT_FLAGS_UNMAPPABLE); + AssertMsgReturn(cb >= _1M && cb <= _64M, ("Seriously? cb=%d (%#x)\n", cb, cb), VERR_OUT_OF_RANGE); + + cb = RT_ALIGN_32(cb, _4M); + RTGCPTR GCPtrLast = GCPtr + cb - 1; + + AssertMsgReturn(GCPtrLast >= GCPtr, ("Range wraps! GCPtr=%x GCPtrLast=%x\n", GCPtr, GCPtrLast), + VERR_INVALID_PARAMETER); + AssertMsgReturn(!pVM->pgm.s.fMappingsFixed, ("Mappings are fixed! It's not possible to add new mappings at this time!\n"), + VERR_PGM_MAPPINGS_FIXED); + AssertPtrReturn(pfnRelocate, VERR_INVALID_PARAMETER); + + /* + * Find list location. + */ + PPGMMAPPING pPrev = NULL; + PPGMMAPPING pCur = pVM->pgm.s.pMappingsR3; + while (pCur) + { + if (pCur->GCPtrLast >= GCPtr && pCur->GCPtr <= GCPtrLast) + { + AssertMsgFailed(("Address is already in use by %s. req %#x-%#x take %#x-%#x\n", + pCur->pszDesc, GCPtr, GCPtrLast, pCur->GCPtr, pCur->GCPtrLast)); + LogRel(("VERR_PGM_MAPPING_CONFLICT: Address is already in use by %s. req %#x-%#x take %#x-%#x\n", + pCur->pszDesc, GCPtr, GCPtrLast, pCur->GCPtr, pCur->GCPtrLast)); + return VERR_PGM_MAPPING_CONFLICT; + } + if (pCur->GCPtr > GCPtr) + break; + pPrev = pCur; + pCur = pCur->pNextR3; + } + + /* + * Check for conflicts with intermediate mappings. + */ + const unsigned iPageDir = GCPtr >> X86_PD_SHIFT; + const unsigned cPTs = cb >> X86_PD_SHIFT; + if (pVM->pgm.s.fFinalizedMappings) + { + for (unsigned i = 0; i < cPTs; i++) + if (pVM->pgm.s.pInterPD->a[iPageDir + i].n.u1Present) + { + AssertMsgFailed(("Address %#x is already in use by an intermediate mapping.\n", GCPtr + (i << PAGE_SHIFT))); + LogRel(("VERR_PGM_MAPPING_CONFLICT: Address %#x is already in use by an intermediate mapping.\n", GCPtr + (i << PAGE_SHIFT))); + return VERR_PGM_MAPPING_CONFLICT; + } + /** @todo AMD64: add check in PAE structures too, so we can remove all the 32-Bit paging stuff there. */ + } + + /* + * Allocate and initialize the new list node. + */ + PPGMMAPPING pNew; + int rc; + if (fFlags & PGMR3MAPPT_FLAGS_UNMAPPABLE) + rc = MMHyperAlloc( pVM, RT_UOFFSETOF_DYN(PGMMAPPING, aPTs[cPTs]), 0, MM_TAG_PGM_MAPPINGS, (void **)&pNew); + else + rc = MMR3HyperAllocOnceNoRel(pVM, RT_UOFFSETOF_DYN(PGMMAPPING, aPTs[cPTs]), 0, MM_TAG_PGM_MAPPINGS, (void **)&pNew); + if (RT_FAILURE(rc)) + return rc; + pNew->GCPtr = GCPtr; + pNew->GCPtrLast = GCPtrLast; + pNew->cb = cb; + pNew->pfnRelocate = pfnRelocate; + pNew->pvUser = pvUser; + pNew->pszDesc = pszDesc; + pNew->cPTs = cPTs; + + /* + * Allocate page tables and insert them into the page directories. + * (One 32-bit PT and two PAE PTs.) + */ + uint8_t *pbPTs; + if (fFlags & PGMR3MAPPT_FLAGS_UNMAPPABLE) + rc = MMHyperAlloc( pVM, PAGE_SIZE * 3 * cPTs, PAGE_SIZE, MM_TAG_PGM_MAPPINGS, (void **)&pbPTs); + else + rc = MMR3HyperAllocOnceNoRel(pVM, PAGE_SIZE * 3 * cPTs, PAGE_SIZE, MM_TAG_PGM_MAPPINGS, (void **)&pbPTs); + if (RT_FAILURE(rc)) + { + MMHyperFree(pVM, pNew); + return VERR_NO_MEMORY; + } + + /* + * Init the page tables and insert them into the page directories. + */ + Log4(("PGMR3MapPT: GCPtr=%RGv cPTs=%u pbPTs=%p\n", GCPtr, cPTs, pbPTs)); + for (unsigned i = 0; i < cPTs; i++) + { + /* + * 32-bit. + */ + pNew->aPTs[i].pPTR3 = (PX86PT)pbPTs; + pNew->aPTs[i].pPTRC = MMHyperR3ToRC(pVM, pNew->aPTs[i].pPTR3); + pNew->aPTs[i].pPTR0 = MMHyperR3ToR0(pVM, pNew->aPTs[i].pPTR3); + pNew->aPTs[i].HCPhysPT = MMR3HyperHCVirt2HCPhys(pVM, pNew->aPTs[i].pPTR3); + pbPTs += PAGE_SIZE; + Log4(("PGMR3MapPT: i=%d: pPTR3=%RHv pPTRC=%RRv pPRTR0=%RHv HCPhysPT=%RHp\n", + i, pNew->aPTs[i].pPTR3, pNew->aPTs[i].pPTRC, pNew->aPTs[i].pPTR0, pNew->aPTs[i].HCPhysPT)); + + /* + * PAE. + */ + pNew->aPTs[i].HCPhysPaePT0 = MMR3HyperHCVirt2HCPhys(pVM, pbPTs); + pNew->aPTs[i].HCPhysPaePT1 = MMR3HyperHCVirt2HCPhys(pVM, pbPTs + PAGE_SIZE); + pNew->aPTs[i].paPaePTsR3 = (PPGMSHWPTPAE)pbPTs; + pNew->aPTs[i].paPaePTsRC = MMHyperR3ToRC(pVM, pbPTs); + pNew->aPTs[i].paPaePTsR0 = MMHyperR3ToR0(pVM, pbPTs); + pbPTs += PAGE_SIZE * 2; + Log4(("PGMR3MapPT: i=%d: paPaePTsR#=%RHv paPaePTsRC=%RRv paPaePTsR#=%RHv HCPhysPaePT0=%RHp HCPhysPaePT1=%RHp\n", + i, pNew->aPTs[i].paPaePTsR3, pNew->aPTs[i].paPaePTsRC, pNew->aPTs[i].paPaePTsR0, pNew->aPTs[i].HCPhysPaePT0, pNew->aPTs[i].HCPhysPaePT1)); + } + if (pVM->pgm.s.fFinalizedMappings) + pgmR3MapSetPDEs(pVM, pNew, iPageDir); + /* else PGMR3FinalizeMappings() */ + + /* + * Insert the new mapping. + */ + pNew->pNextR3 = pCur; + pNew->pNextRC = pCur ? MMHyperR3ToRC(pVM, pCur) : NIL_RTRCPTR; + pNew->pNextR0 = pCur ? MMHyperR3ToR0(pVM, pCur) : NIL_RTR0PTR; + if (pPrev) + { + pPrev->pNextR3 = pNew; + pPrev->pNextRC = MMHyperR3ToRC(pVM, pNew); + pPrev->pNextR0 = MMHyperR3ToR0(pVM, pNew); + } + else + { + pVM->pgm.s.pMappingsR3 = pNew; + pVM->pgm.s.pMappingsRC = MMHyperR3ToRC(pVM, pNew); + pVM->pgm.s.pMappingsR0 = MMHyperR3ToR0(pVM, pNew); + } + + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); + } + return VINF_SUCCESS; +} + +#ifdef VBOX_WITH_UNUSED_CODE + +/** + * Removes a page table based mapping. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param GCPtr Virtual Address. (Page table aligned!) + * + * @remarks Don't call this without passing PGMR3MAPPT_FLAGS_UNMAPPABLE to + * PGMR3MapPT or you'll burn in the heap. + */ +VMMR3DECL(int) PGMR3UnmapPT(PVM pVM, RTGCPTR GCPtr) +{ + LogFlow(("PGMR3UnmapPT: GCPtr=%#x\n", GCPtr)); + AssertReturn(pVM->pgm.s.fFinalizedMappings, VERR_WRONG_ORDER); + + /* + * Find it. + */ + PPGMMAPPING pPrev = NULL; + PPGMMAPPING pCur = pVM->pgm.s.pMappingsR3; + while (pCur) + { + if (pCur->GCPtr == GCPtr) + { + /* + * Unlink it. + */ + if (pPrev) + { + pPrev->pNextR3 = pCur->pNextR3; + pPrev->pNextRC = pCur->pNextRC; + pPrev->pNextR0 = pCur->pNextR0; + } + else + { + pVM->pgm.s.pMappingsR3 = pCur->pNextR3; + pVM->pgm.s.pMappingsRC = pCur->pNextRC; + pVM->pgm.s.pMappingsR0 = pCur->pNextR0; + } + + /* + * Free the page table memory, clear page directory entries + * and free the page tables and node memory. + */ + MMHyperFree(pVM, pCur->aPTs[0].pPTR3); + if (pCur->GCPtr != NIL_RTGCPTR) + pgmR3MapClearPDEs(pVM, pCur, pCur->GCPtr >> X86_PD_SHIFT); + MMHyperFree(pVM, pCur); + + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); + } + return VINF_SUCCESS; + } + + /* done? */ + if (pCur->GCPtr > GCPtr) + break; + + /* next */ + pPrev = pCur; + pCur = pCur->pNextR3; + } + + AssertMsgFailed(("No mapping for %#x found!\n", GCPtr)); + return VERR_INVALID_PARAMETER; +} + +#endif /* unused */ + + +/** + * Checks whether a range of PDEs in the intermediate + * memory context are unused. + * + * We're talking 32-bit PDEs here. + * + * @returns true/false. + * @param pVM The cross context VM structure. + * @param iPD The first PDE in the range. + * @param cPTs The number of PDEs in the range. + */ +DECLINLINE(bool) pgmR3AreIntermediatePDEsUnused(PVM pVM, unsigned iPD, unsigned cPTs) +{ + if (pVM->pgm.s.pInterPD->a[iPD].n.u1Present) + return false; + while (cPTs > 1) + { + iPD++; + if (pVM->pgm.s.pInterPD->a[iPD].n.u1Present) + return false; + cPTs--; + } + return true; +} + + +/** + * Unlinks the mapping. + * + * The mapping *must* be in the list. + * + * @param pVM The cross context VM structure. + * @param pMapping The mapping to unlink. + */ +static void pgmR3MapUnlink(PVM pVM, PPGMMAPPING pMapping) +{ + PPGMMAPPING pAfterThis = pVM->pgm.s.pMappingsR3; + if (pAfterThis == pMapping) + { + /* head */ + pVM->pgm.s.pMappingsR3 = pMapping->pNextR3; + pVM->pgm.s.pMappingsRC = pMapping->pNextRC; + pVM->pgm.s.pMappingsR0 = pMapping->pNextR0; + } + else + { + /* in the list */ + while (pAfterThis->pNextR3 != pMapping) + { + pAfterThis = pAfterThis->pNextR3; + AssertReleaseReturnVoid(pAfterThis); + } + + pAfterThis->pNextR3 = pMapping->pNextR3; + pAfterThis->pNextRC = pMapping->pNextRC; + pAfterThis->pNextR0 = pMapping->pNextR0; + } +} + + +/** + * Links the mapping. + * + * @param pVM The cross context VM structure. + * @param pMapping The mapping to linked. + */ +static void pgmR3MapLink(PVM pVM, PPGMMAPPING pMapping) +{ + /* + * Find the list location (it's sorted by GCPhys) and link it in. + */ + if ( !pVM->pgm.s.pMappingsR3 + || pVM->pgm.s.pMappingsR3->GCPtr > pMapping->GCPtr) + { + /* head */ + pMapping->pNextR3 = pVM->pgm.s.pMappingsR3; + pMapping->pNextRC = pVM->pgm.s.pMappingsRC; + pMapping->pNextR0 = pVM->pgm.s.pMappingsR0; + pVM->pgm.s.pMappingsR3 = pMapping; + pVM->pgm.s.pMappingsRC = MMHyperR3ToRC(pVM, pMapping); + pVM->pgm.s.pMappingsR0 = MMHyperR3ToR0(pVM, pMapping); + } + else + { + /* in the list */ + PPGMMAPPING pAfterThis = pVM->pgm.s.pMappingsR3; + PPGMMAPPING pBeforeThis = pAfterThis->pNextR3; + while (pBeforeThis && pBeforeThis->GCPtr <= pMapping->GCPtr) + { + pAfterThis = pBeforeThis; + pBeforeThis = pBeforeThis->pNextR3; + } + + pMapping->pNextR3 = pAfterThis->pNextR3; + pMapping->pNextRC = pAfterThis->pNextRC; + pMapping->pNextR0 = pAfterThis->pNextR0; + pAfterThis->pNextR3 = pMapping; + pAfterThis->pNextRC = MMHyperR3ToRC(pVM, pMapping); + pAfterThis->pNextR0 = MMHyperR3ToR0(pVM, pMapping); + } +} + + +/** + * Finalizes the intermediate context. + * + * This is called at the end of the ring-3 init and will construct the + * intermediate paging structures, relocating all the mappings in the process. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @thread EMT(0) + */ +VMMR3DECL(int) PGMR3FinalizeMappings(PVM pVM) +{ + AssertReturn(!pVM->pgm.s.fFinalizedMappings, VERR_WRONG_ORDER); + pVM->pgm.s.fFinalizedMappings = true; + + /* + * Loop until all mappings have been finalized. + */ +#if 0 + unsigned iPDNext = UINT32_C(0xc0000000) >> X86_PD_SHIFT; /* makes CSAM/PATM freak out booting linux. :-/ */ +#elif 0 + unsigned iPDNext = MM_HYPER_AREA_ADDRESS >> X86_PD_SHIFT; +#else + unsigned iPDNext = 1 << X86_PD_SHIFT; /* no hint, map them from the top. */ +#endif + + PPGMMAPPING pCur; + do + { + pCur = pVM->pgm.s.pMappingsR3; + while (pCur) + { + if (!pCur->fFinalized) + { + /* + * Find a suitable location. + */ + RTGCPTR const GCPtrOld = pCur->GCPtr; + const unsigned cPTs = pCur->cPTs; + unsigned iPDNew = iPDNext; + if ( iPDNew + cPTs >= X86_PG_ENTRIES /* exclude the last PD */ + || !pgmR3AreIntermediatePDEsUnused(pVM, iPDNew, cPTs) + || !pCur->pfnRelocate(pVM, GCPtrOld, (RTGCPTR)iPDNew << X86_PD_SHIFT, PGMRELOCATECALL_SUGGEST, pCur->pvUser)) + { + /* No luck, just scan down from 4GB-4MB, giving up at 4MB. */ + iPDNew = X86_PG_ENTRIES - cPTs - 1; + while ( iPDNew > 0 + && ( !pgmR3AreIntermediatePDEsUnused(pVM, iPDNew, cPTs) + || !pCur->pfnRelocate(pVM, GCPtrOld, (RTGCPTR)iPDNew << X86_PD_SHIFT, PGMRELOCATECALL_SUGGEST, pCur->pvUser)) + ) + iPDNew--; + AssertLogRelReturn(iPDNew != 0, VERR_PGM_INTERMEDIATE_PAGING_CONFLICT); + } + + /* + * Relocate it (something akin to pgmR3MapRelocate). + */ + pgmR3MapSetPDEs(pVM, pCur, iPDNew); + + /* unlink the mapping, update the entry and relink it. */ + pgmR3MapUnlink(pVM, pCur); + + RTGCPTR const GCPtrNew = (RTGCPTR)iPDNew << X86_PD_SHIFT; + pCur->GCPtr = GCPtrNew; + pCur->GCPtrLast = GCPtrNew + pCur->cb - 1; + pCur->fFinalized = true; + + pgmR3MapLink(pVM, pCur); + + /* Finally work the callback. */ + pCur->pfnRelocate(pVM, GCPtrOld, GCPtrNew, PGMRELOCATECALL_RELOCATE, pCur->pvUser); + + /* + * The list order might have changed, start from the beginning again. + */ + iPDNext = iPDNew + cPTs; + break; + } + + /* next */ + pCur = pCur->pNextR3; + } + } while (pCur); + + return VINF_SUCCESS; +} + + +/** + * Gets the size of the current guest mappings if they were to be + * put next to one another. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pcb Where to store the size. + */ +VMMR3DECL(int) PGMR3MappingsSize(PVM pVM, uint32_t *pcb) +{ + RTGCPTR cb = 0; +#ifndef PGM_WITHOUT_MAPPINGS + for (PPGMMAPPING pCur = pVM->pgm.s.pMappingsR3; pCur; pCur = pCur->pNextR3) + cb += pCur->cb; +#else + RT_NOREF(pVM); +#endif + + *pcb = cb; + AssertReturn(*pcb == cb, VERR_NUMBER_TOO_BIG); + Log(("PGMR3MappingsSize: return %d (%#x) bytes\n", cb, cb)); + return VINF_SUCCESS; +} + + +/** + * Fixates the guest context mappings in a range reserved from the Guest OS. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param GCPtrBase The address of the reserved range of guest memory. + * @param cb The size of the range starting at GCPtrBase. + */ +VMMR3DECL(int) PGMR3MappingsFix(PVM pVM, RTGCPTR GCPtrBase, uint32_t cb) +{ + Log(("PGMR3MappingsFix: GCPtrBase=%RGv cb=%#x (fMappingsFixed=%RTbool MappingEnabled=%RTbool)\n", + GCPtrBase, cb, pVM->pgm.s.fMappingsFixed, pgmMapAreMappingsEnabled(pVM))); + +#ifndef PGM_WITHOUT_MAPPINGS + if (pgmMapAreMappingsEnabled(pVM)) + { + /* + * Only applies to VCPU 0 as we don't support SMP guests with raw mode. + */ + Assert(pVM->cCpus == 1); + PVMCPU pVCpu = &pVM->aCpus[0]; + + /* + * Before we do anything we'll do a forced PD sync to try make sure any + * pending relocations because of these mappings have been resolved. + */ + PGMSyncCR3(pVCpu, CPUMGetGuestCR0(pVCpu), CPUMGetGuestCR3(pVCpu), CPUMGetGuestCR4(pVCpu), true); + + return pgmR3MappingsFixInternal(pVM, GCPtrBase, cb); + } + +#else /* PGM_WITHOUT_MAPPINGS */ + RT_NOREF(pVM, GCPtrBase, cb); +#endif /* PGM_WITHOUT_MAPPINGS */ + + Assert(!VM_IS_RAW_MODE_ENABLED(pVM)); + return VINF_SUCCESS; +} + + +#ifndef PGM_WITHOUT_MAPPINGS +/** + * Internal worker for PGMR3MappingsFix and pgmR3Load. + * + * (This does not perform a SyncCR3 before the fixation like PGMR3MappingsFix.) + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param GCPtrBase The address of the reserved range of guest memory. + * @param cb The size of the range starting at GCPtrBase. + */ +int pgmR3MappingsFixInternal(PVM pVM, RTGCPTR GCPtrBase, uint32_t cb) +{ + /* + * Check input arguments and pre-conditions. + */ + AssertMsgReturn(!(GCPtrBase & X86_PAGE_4M_OFFSET_MASK), ("GCPtrBase (%#x) has to be aligned on a 4MB address!\n", GCPtrBase), + VERR_INVALID_PARAMETER); + AssertMsgReturn(cb && !(cb & X86_PAGE_4M_OFFSET_MASK), ("cb (%#x) is 0 or not aligned on a 4MB address!\n", cb), + VERR_INVALID_PARAMETER); + AssertReturn(pgmMapAreMappingsEnabled(pVM), VERR_PGM_MAPPINGS_DISABLED); + AssertReturn(pVM->cCpus == 1, VERR_PGM_MAPPINGS_SMP); + + /* + * Check that it's not conflicting with a core code mapping in the intermediate page table. + */ + unsigned iPDNew = GCPtrBase >> X86_PD_SHIFT; + unsigned i = cb >> X86_PD_SHIFT; + while (i-- > 0) + { + if (pVM->pgm.s.pInterPD->a[iPDNew + i].n.u1Present) + { + /* Check that it's not one or our mappings. */ + PPGMMAPPING pCur = pVM->pgm.s.pMappingsR3; + while (pCur) + { + if (iPDNew + i - (pCur->GCPtr >> X86_PD_SHIFT) < (pCur->cb >> X86_PD_SHIFT)) + break; + pCur = pCur->pNextR3; + } + if (!pCur) + { + LogRel(("PGMR3MappingsFix: Conflicts with intermediate PDE %#x (GCPtrBase=%RGv cb=%#zx). The guest should retry.\n", + iPDNew + i, GCPtrBase, cb)); + return VERR_PGM_MAPPINGS_FIX_CONFLICT; + } + } + } + + /* + * In PAE / PAE mode, make sure we don't cross page directories. + */ + PVMCPU pVCpu = &pVM->aCpus[0]; + if ( ( pVCpu->pgm.s.enmGuestMode == PGMMODE_PAE + || pVCpu->pgm.s.enmGuestMode == PGMMODE_PAE_NX) + && ( pVCpu->pgm.s.enmShadowMode == PGMMODE_PAE + || pVCpu->pgm.s.enmShadowMode == PGMMODE_PAE_NX)) + { + unsigned iPdptBase = GCPtrBase >> X86_PDPT_SHIFT; + unsigned iPdptLast = (GCPtrBase + cb - 1) >> X86_PDPT_SHIFT; + if (iPdptBase != iPdptLast) + { + LogRel(("PGMR3MappingsFix: Crosses PD boundary; iPdptBase=%#x iPdptLast=%#x (GCPtrBase=%RGv cb=%#zx). The guest should retry.\n", + iPdptBase, iPdptLast, GCPtrBase, cb)); + return VERR_PGM_MAPPINGS_FIX_CONFLICT; + } + } + + /* + * Loop the mappings and check that they all agree on their new locations. + */ + RTGCPTR GCPtrCur = GCPtrBase; + PPGMMAPPING pCur = pVM->pgm.s.pMappingsR3; + while (pCur) + { + if (!pCur->pfnRelocate(pVM, pCur->GCPtr, GCPtrCur, PGMRELOCATECALL_SUGGEST, pCur->pvUser)) + { + AssertMsgFailed(("The suggested fixed address %#x was rejected by '%s'!\n", GCPtrCur, pCur->pszDesc)); + return VERR_PGM_MAPPINGS_FIX_REJECTED; + } + /* next */ + GCPtrCur += pCur->cb; + pCur = pCur->pNextR3; + } + if (GCPtrCur > GCPtrBase + cb) + { + AssertMsgFailed(("cb (%#x) is less than the required range %#x!\n", cb, GCPtrCur - GCPtrBase)); + return VERR_PGM_MAPPINGS_FIX_TOO_SMALL; + } + + /* + * Loop the table assigning the mappings to the passed in memory + * and call their relocator callback. + */ + GCPtrCur = GCPtrBase; + pCur = pVM->pgm.s.pMappingsR3; + while (pCur) + { + RTGCPTR const GCPtrOld = pCur->GCPtr; + + /* + * Relocate the page table(s). + */ + if (pCur->GCPtr != NIL_RTGCPTR) + pgmR3MapClearPDEs(pVM, pCur, GCPtrOld >> X86_PD_SHIFT); + pgmR3MapSetPDEs(pVM, pCur, GCPtrCur >> X86_PD_SHIFT); + + /* + * Update the entry. + */ + pCur->GCPtr = GCPtrCur; + pCur->GCPtrLast = GCPtrCur + pCur->cb - 1; + + /* + * Callback to execute the relocation. + */ + pCur->pfnRelocate(pVM, GCPtrOld, GCPtrCur, PGMRELOCATECALL_RELOCATE, pCur->pvUser); + + /* + * Advance. + */ + GCPtrCur += pCur->cb; + pCur = pCur->pNextR3; + } + + /* + * Mark the mappings as fixed at this new location and return. + */ + pVM->pgm.s.fMappingsFixed = true; + pVM->pgm.s.fMappingsFixedRestored = false; + pVM->pgm.s.GCPtrMappingFixed = GCPtrBase; + pVM->pgm.s.cbMappingFixed = cb; + + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + pVM->aCpus[idCpu].pgm.s.fSyncFlags &= ~PGM_SYNC_MONITOR_CR3; + VMCPU_FF_SET(&pVM->aCpus[idCpu], VMCPU_FF_PGM_SYNC_CR3); + } + return VINF_SUCCESS; +} +#endif /*!PGM_WITHOUT_MAPPINGS*/ + + +/** + * Unfixes the mappings. + * + * Unless PGMR3MappingsDisable is in effect, mapping conflict detection will be + * enabled after this call. If the mappings are fixed, a full CR3 resync will + * take place afterwards. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(int) PGMR3MappingsUnfix(PVM pVM) +{ + Log(("PGMR3MappingsUnfix: fMappingsFixed=%RTbool MappingsEnabled=%RTbool\n", pVM->pgm.s.fMappingsFixed, pgmMapAreMappingsEnabled(pVM))); + if ( pgmMapAreMappingsEnabled(pVM) + && ( pVM->pgm.s.fMappingsFixed + || pVM->pgm.s.fMappingsFixedRestored) + ) + { + bool const fResyncCR3 = pVM->pgm.s.fMappingsFixed; + + pVM->pgm.s.fMappingsFixed = false; + pVM->pgm.s.fMappingsFixedRestored = false; + pVM->pgm.s.GCPtrMappingFixed = 0; + pVM->pgm.s.cbMappingFixed = 0; + + if (fResyncCR3) + for (VMCPUID i = 0; i < pVM->cCpus; i++) + VMCPU_FF_SET(&pVM->aCpus[i], VMCPU_FF_PGM_SYNC_CR3); + } + return VINF_SUCCESS; +} + + +/** + * Checks if the mappings needs re-fixing after a restore. + * + * @returns true if they need, false if not. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(bool) PGMR3MappingsNeedReFixing(PVM pVM) +{ + VM_ASSERT_VALID_EXT_RETURN(pVM, false); + return pVM->pgm.s.fMappingsFixedRestored; +} + +#ifndef PGM_WITHOUT_MAPPINGS + +/** + * Map pages into the intermediate context (switcher code). + * + * These pages are mapped at both the give virtual address and at the physical + * address (for identity mapping). + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param Addr Intermediate context address of the mapping. + * @param HCPhys Start of the range of physical pages. This must be entriely below 4GB! + * @param cbPages Number of bytes to map. + * + * @remark This API shall not be used to anything but mapping the switcher code. + */ +VMMR3DECL(int) PGMR3MapIntermediate(PVM pVM, RTUINTPTR Addr, RTHCPHYS HCPhys, unsigned cbPages) +{ + LogFlow(("PGMR3MapIntermediate: Addr=%RTptr HCPhys=%RHp cbPages=%#x\n", Addr, HCPhys, cbPages)); + + /* + * Adjust input. + */ + cbPages += (uint32_t)HCPhys & PAGE_OFFSET_MASK; + cbPages = RT_ALIGN(cbPages, PAGE_SIZE); + HCPhys &= X86_PTE_PAE_PG_MASK; + Addr &= PAGE_BASE_MASK; + /* We only care about the first 4GB, because on AMD64 we'll be repeating them all over the address space. */ + uint32_t uAddress = (uint32_t)Addr; + + /* + * Assert input and state. + */ + AssertMsg(pVM->pgm.s.offVM, ("Bad init order\n")); + AssertMsg(pVM->pgm.s.pInterPD, ("Bad init order, paging.\n")); + AssertMsg(cbPages <= (512 << PAGE_SHIFT), ("The mapping is too big %d bytes\n", cbPages)); + AssertMsg(HCPhys < _4G && HCPhys + cbPages < _4G, ("Addr=%RTptr HCPhys=%RHp cbPages=%d\n", Addr, HCPhys, cbPages)); + AssertReturn(!pVM->pgm.s.fFinalizedMappings, VERR_WRONG_ORDER); + + /* + * Check for internal conflicts between the virtual address and the physical address. + * A 1:1 mapping is fine, but partial overlapping is a no-no. + */ + if ( uAddress != HCPhys + && ( uAddress < HCPhys + ? HCPhys - uAddress < cbPages + : uAddress - HCPhys < cbPages + ) + ) + AssertLogRelMsgFailedReturn(("Addr=%RTptr HCPhys=%RHp cbPages=%d\n", Addr, HCPhys, cbPages), + VERR_PGM_INTERMEDIATE_PAGING_CONFLICT); + + const unsigned cPages = cbPages >> PAGE_SHIFT; + int rc = pgmR3MapIntermediateCheckOne(pVM, uAddress, cPages, pVM->pgm.s.apInterPTs[0], pVM->pgm.s.apInterPaePTs[0]); + if (RT_FAILURE(rc)) + return rc; + rc = pgmR3MapIntermediateCheckOne(pVM, (uintptr_t)HCPhys, cPages, pVM->pgm.s.apInterPTs[1], pVM->pgm.s.apInterPaePTs[1]); + if (RT_FAILURE(rc)) + return rc; + + /* + * Everythings fine, do the mapping. + */ + pgmR3MapIntermediateDoOne(pVM, uAddress, HCPhys, cPages, pVM->pgm.s.apInterPTs[0], pVM->pgm.s.apInterPaePTs[0]); + pgmR3MapIntermediateDoOne(pVM, (uintptr_t)HCPhys, HCPhys, cPages, pVM->pgm.s.apInterPTs[1], pVM->pgm.s.apInterPaePTs[1]); + + return VINF_SUCCESS; +} + + +/** + * Validates that there are no conflicts for this mapping into the intermediate context. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param uAddress Address of the mapping. + * @param cPages Number of pages. + * @param pPTDefault Pointer to the default page table for this mapping. + * @param pPTPaeDefault Pointer to the default page table for this mapping. + */ +static int pgmR3MapIntermediateCheckOne(PVM pVM, uintptr_t uAddress, unsigned cPages, PX86PT pPTDefault, PX86PTPAE pPTPaeDefault) +{ + AssertMsg((uAddress >> X86_PD_SHIFT) + cPages <= 1024, ("64-bit fixme uAddress=%RGv cPages=%u\n", uAddress, cPages)); + + /* + * Check that the ranges are available. + * (This code doesn't have to be fast.) + */ + while (cPages > 0) + { + /* + * 32-Bit. + */ + unsigned iPDE = (uAddress >> X86_PD_SHIFT) & X86_PD_MASK; + unsigned iPTE = (uAddress >> X86_PT_SHIFT) & X86_PT_MASK; + PX86PT pPT = pPTDefault; + if (pVM->pgm.s.pInterPD->a[iPDE].u) + { + RTHCPHYS HCPhysPT = pVM->pgm.s.pInterPD->a[iPDE].u & X86_PDE_PG_MASK; + if (HCPhysPT == MMPage2Phys(pVM, pVM->pgm.s.apInterPTs[0])) + pPT = pVM->pgm.s.apInterPTs[0]; + else if (HCPhysPT == MMPage2Phys(pVM, pVM->pgm.s.apInterPTs[1])) + pPT = pVM->pgm.s.apInterPTs[1]; + else + { + /** @todo this must be handled with a relocation of the conflicting mapping! + * Which of course cannot be done because we're in the middle of the initialization. bad design! */ + AssertLogRelMsgFailedReturn(("Conflict between core code and PGMR3Mapping(). uAddress=%RHv\n", uAddress), + VERR_PGM_INTERMEDIATE_PAGING_CONFLICT); + } + } + if (pPT->a[iPTE].u) + AssertLogRelMsgFailedReturn(("Conflict iPTE=%#x iPDE=%#x uAddress=%RHv pPT->a[iPTE].u=%RX32\n", iPTE, iPDE, uAddress, pPT->a[iPTE].u), + VERR_PGM_INTERMEDIATE_PAGING_CONFLICT); + + /* + * PAE. + */ + const unsigned iPDPE= (uAddress >> X86_PDPT_SHIFT) & X86_PDPT_MASK_PAE; + iPDE = (uAddress >> X86_PD_PAE_SHIFT) & X86_PD_PAE_MASK; + iPTE = (uAddress >> X86_PT_PAE_SHIFT) & X86_PT_PAE_MASK; + Assert(iPDPE < 4); + Assert(pVM->pgm.s.apInterPaePDs[iPDPE]); + PX86PTPAE pPTPae = pPTPaeDefault; + if (pVM->pgm.s.apInterPaePDs[iPDPE]->a[iPDE].u) + { + RTHCPHYS HCPhysPT = pVM->pgm.s.apInterPaePDs[iPDPE]->a[iPDE].u & X86_PDE_PAE_PG_MASK; + if (HCPhysPT == MMPage2Phys(pVM, pVM->pgm.s.apInterPaePTs[0])) + pPTPae = pVM->pgm.s.apInterPaePTs[0]; + else if (HCPhysPT == MMPage2Phys(pVM, pVM->pgm.s.apInterPaePTs[0])) + pPTPae = pVM->pgm.s.apInterPaePTs[1]; + else + { + /** @todo this must be handled with a relocation of the conflicting mapping! + * Which of course cannot be done because we're in the middle of the initialization. bad design! */ + AssertLogRelMsgFailedReturn(("Conflict between core code and PGMR3Mapping(). uAddress=%RHv\n", uAddress), + VERR_PGM_INTERMEDIATE_PAGING_CONFLICT); + } + } + if (pPTPae->a[iPTE].u) + AssertLogRelMsgFailedReturn(("Conflict iPTE=%#x iPDE=%#x uAddress=%RHv pPTPae->a[iPTE].u=%#RX64\n", iPTE, iPDE, uAddress, pPTPae->a[iPTE].u), + VERR_PGM_INTERMEDIATE_PAGING_CONFLICT); + + /* next */ + uAddress += PAGE_SIZE; + cPages--; + } + + return VINF_SUCCESS; +} + + + +/** + * Sets up the intermediate page tables for a verified mapping. + * + * @param pVM The cross context VM structure. + * @param uAddress Address of the mapping. + * @param HCPhys The physical address of the page range. + * @param cPages Number of pages. + * @param pPTDefault Pointer to the default page table for this mapping. + * @param pPTPaeDefault Pointer to the default page table for this mapping. + */ +static void pgmR3MapIntermediateDoOne(PVM pVM, uintptr_t uAddress, RTHCPHYS HCPhys, unsigned cPages, PX86PT pPTDefault, PX86PTPAE pPTPaeDefault) +{ + while (cPages > 0) + { + /* + * 32-Bit. + */ + unsigned iPDE = (uAddress >> X86_PD_SHIFT) & X86_PD_MASK; + unsigned iPTE = (uAddress >> X86_PT_SHIFT) & X86_PT_MASK; + PX86PT pPT; + if (pVM->pgm.s.pInterPD->a[iPDE].u) + pPT = (PX86PT)MMPagePhys2Page(pVM, pVM->pgm.s.pInterPD->a[iPDE].u & X86_PDE_PG_MASK); + else + { + pVM->pgm.s.pInterPD->a[iPDE].u = X86_PDE_P | X86_PDE_A | X86_PDE_RW + | (uint32_t)MMPage2Phys(pVM, pPTDefault); + pPT = pPTDefault; + } + pPT->a[iPTE].u = X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D | (uint32_t)HCPhys; + + /* + * PAE + */ + const unsigned iPDPE= (uAddress >> X86_PDPT_SHIFT) & X86_PDPT_MASK_PAE; + iPDE = (uAddress >> X86_PD_PAE_SHIFT) & X86_PD_PAE_MASK; + iPTE = (uAddress >> X86_PT_PAE_SHIFT) & X86_PT_PAE_MASK; + Assert(iPDPE < 4); + Assert(pVM->pgm.s.apInterPaePDs[iPDPE]); + PX86PTPAE pPTPae; + if (pVM->pgm.s.apInterPaePDs[iPDPE]->a[iPDE].u) + pPTPae = (PX86PTPAE)MMPagePhys2Page(pVM, pVM->pgm.s.apInterPaePDs[iPDPE]->a[iPDE].u & X86_PDE_PAE_PG_MASK); + else + { + pPTPae = pPTPaeDefault; + pVM->pgm.s.apInterPaePDs[iPDPE]->a[iPDE].u = X86_PDE_P | X86_PDE_A | X86_PDE_RW + | MMPage2Phys(pVM, pPTPaeDefault); + } + pPTPae->a[iPTE].u = X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D | HCPhys; + + /* next */ + cPages--; + HCPhys += PAGE_SIZE; + uAddress += PAGE_SIZE; + } +} + + +/** + * Clears all PDEs involved with the mapping in the shadow and intermediate page tables. + * + * @param pVM The cross context VM structure. + * @param pMap Pointer to the mapping in question. + * @param iOldPDE The index of the 32-bit PDE corresponding to the base of the mapping. + */ +static void pgmR3MapClearPDEs(PVM pVM, PPGMMAPPING pMap, unsigned iOldPDE) +{ + unsigned i = pMap->cPTs; + PVMCPU pVCpu = VMMGetCpu(pVM); + pgmLock(pVM); /* to avoid assertions */ + + pgmMapClearShadowPDEs(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3), pMap, iOldPDE, false /*fDeactivateCR3*/); + + iOldPDE += i; + while (i-- > 0) + { + iOldPDE--; + + /* + * 32-bit. + */ + pVM->pgm.s.pInterPD->a[iOldPDE].u = 0; + + /* + * PAE. + */ + const unsigned iPD = iOldPDE / 256; /* iOldPDE * 2 / 512; iOldPDE is in 4 MB pages */ + unsigned iPDE = iOldPDE * 2 % 512; + pVM->pgm.s.apInterPaePDs[iPD]->a[iPDE].u = 0; + iPDE++; + AssertFatal(iPDE < 512); + pVM->pgm.s.apInterPaePDs[iPD]->a[iPDE].u = 0; + } + + pgmUnlock(pVM); +} + + +/** + * Sets all PDEs involved with the mapping in the shadow and intermediate page tables. + * + * @param pVM The cross context VM structure. + * @param pMap Pointer to the mapping in question. + * @param iNewPDE The index of the 32-bit PDE corresponding to the base of the mapping. + */ +static void pgmR3MapSetPDEs(PVM pVM, PPGMMAPPING pMap, unsigned iNewPDE) +{ + PPGM pPGM = &pVM->pgm.s; +#ifdef VBOX_STRICT + PVMCPU pVCpu = VMMGetCpu(pVM); +#endif + pgmLock(pVM); /* to avoid assertions */ + + Assert(!pgmMapAreMappingsEnabled(pVM) || PGMGetGuestMode(pVCpu) <= PGMMODE_PAE_NX); + + pgmMapSetShadowPDEs(pVM, pMap, iNewPDE); + + /* + * Init the page tables and insert them into the page directories. + */ + unsigned i = pMap->cPTs; + iNewPDE += i; + while (i-- > 0) + { + iNewPDE--; + + /* + * 32-bit. + */ + X86PDE Pde; + /* Default mapping page directory flags are read/write and supervisor; individual page attributes determine the final flags */ + Pde.u = PGM_PDFLAGS_MAPPING | X86_PDE_P | X86_PDE_A | X86_PDE_RW | X86_PDE_US | (uint32_t)pMap->aPTs[i].HCPhysPT; + pPGM->pInterPD->a[iNewPDE] = Pde; + + /* + * PAE. + */ + const unsigned iPD = iNewPDE / 256; + unsigned iPDE = iNewPDE * 2 % 512; + X86PDEPAE PdePae0; + PdePae0.u = PGM_PDFLAGS_MAPPING | X86_PDE_P | X86_PDE_A | X86_PDE_RW | X86_PDE_US | pMap->aPTs[i].HCPhysPaePT0; + pPGM->apInterPaePDs[iPD]->a[iPDE] = PdePae0; + iPDE++; + AssertFatal(iPDE < 512); + X86PDEPAE PdePae1; + PdePae1.u = PGM_PDFLAGS_MAPPING | X86_PDE_P | X86_PDE_A | X86_PDE_RW | X86_PDE_US | pMap->aPTs[i].HCPhysPaePT1; + pPGM->apInterPaePDs[iPD]->a[iPDE] = PdePae1; + } + + pgmUnlock(pVM); +} + + +/** + * Relocates a mapping to a new address. + * + * @param pVM The cross context VM structure. + * @param pMapping The mapping to relocate. + * @param GCPtrOldMapping The address of the start of the old mapping. + * NIL_RTGCPTR if not currently mapped. + * @param GCPtrNewMapping The address of the start of the new mapping. + */ +static void pgmR3MapRelocate(PVM pVM, PPGMMAPPING pMapping, RTGCPTR GCPtrOldMapping, RTGCPTR GCPtrNewMapping) +{ + Log(("PGM: Relocating %s from %RGv to %RGv\n", pMapping->pszDesc, GCPtrOldMapping, GCPtrNewMapping)); + AssertMsg(GCPtrOldMapping == pMapping->GCPtr, ("%RGv vs %RGv\n", GCPtrOldMapping, pMapping->GCPtr)); + AssertMsg((GCPtrOldMapping >> X86_PD_SHIFT) < X86_PG_ENTRIES, ("%RGv\n", GCPtrOldMapping)); + AssertMsg((GCPtrNewMapping >> X86_PD_SHIFT) < X86_PG_ENTRIES, ("%RGv\n", GCPtrOldMapping)); + + /* + * Relocate the page table(s). + */ + if (GCPtrOldMapping != NIL_RTGCPTR) + pgmR3MapClearPDEs(pVM, pMapping, GCPtrOldMapping >> X86_PD_SHIFT); + pgmR3MapSetPDEs(pVM, pMapping, GCPtrNewMapping >> X86_PD_SHIFT); + + /* + * Update and resort the mapping list. + */ + + /* Find previous mapping for pMapping, put result into pPrevMap. */ + PPGMMAPPING pPrevMap = NULL; + PPGMMAPPING pCur = pVM->pgm.s.pMappingsR3; + while (pCur && pCur != pMapping) + { + /* next */ + pPrevMap = pCur; + pCur = pCur->pNextR3; + } + Assert(pCur); + + /* Find mapping which >= than pMapping. */ + RTGCPTR GCPtrNew = GCPtrNewMapping; + PPGMMAPPING pPrev = NULL; + pCur = pVM->pgm.s.pMappingsR3; + while (pCur && pCur->GCPtr < GCPtrNew) + { + /* next */ + pPrev = pCur; + pCur = pCur->pNextR3; + } + + if (pCur != pMapping && pPrev != pMapping) + { + /* + * Unlink. + */ + if (pPrevMap) + { + pPrevMap->pNextR3 = pMapping->pNextR3; + pPrevMap->pNextRC = pMapping->pNextRC; + pPrevMap->pNextR0 = pMapping->pNextR0; + } + else + { + pVM->pgm.s.pMappingsR3 = pMapping->pNextR3; + pVM->pgm.s.pMappingsRC = pMapping->pNextRC; + pVM->pgm.s.pMappingsR0 = pMapping->pNextR0; + } + + /* + * Link + */ + pMapping->pNextR3 = pCur; + if (pPrev) + { + pMapping->pNextRC = pPrev->pNextRC; + pMapping->pNextR0 = pPrev->pNextR0; + pPrev->pNextR3 = pMapping; + pPrev->pNextRC = MMHyperR3ToRC(pVM, pMapping); + pPrev->pNextR0 = MMHyperR3ToR0(pVM, pMapping); + } + else + { + pMapping->pNextRC = pVM->pgm.s.pMappingsRC; + pMapping->pNextR0 = pVM->pgm.s.pMappingsR0; + pVM->pgm.s.pMappingsR3 = pMapping; + pVM->pgm.s.pMappingsRC = MMHyperR3ToRC(pVM, pMapping); + pVM->pgm.s.pMappingsR0 = MMHyperR3ToR0(pVM, pMapping); + } + } + + /* + * Update the entry. + */ + pMapping->GCPtr = GCPtrNew; + pMapping->GCPtrLast = GCPtrNew + pMapping->cb - 1; + + /* + * Callback to execute the relocation. + */ + pMapping->pfnRelocate(pVM, GCPtrOldMapping, GCPtrNewMapping, PGMRELOCATECALL_RELOCATE, pMapping->pvUser); +} + + +/** + * Checks if a new mapping address wasn't previously used and caused a clash with guest mappings. + * + * @returns VBox status code. + * @param pMapping The mapping which conflicts. + * @param GCPtr New mapping address to try + */ +bool pgmR3MapIsKnownConflictAddress(PPGMMAPPING pMapping, RTGCPTR GCPtr) +{ + for (unsigned i = 0; i < RT_ELEMENTS(pMapping->aGCPtrConflicts); i++) + { + if (GCPtr == pMapping->aGCPtrConflicts[i]) + return true; + } + return false; +} + + +/** + * Resolves a conflict between a page table based GC mapping and + * the Guest OS page tables. (32 bits version) + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pMapping The mapping which conflicts. + * @param pPDSrc The page directory of the guest OS. + * @param GCPtrOldMapping The address of the start of the current mapping. + */ +int pgmR3SyncPTResolveConflict(PVM pVM, PPGMMAPPING pMapping, PX86PD pPDSrc, RTGCPTR GCPtrOldMapping) +{ + STAM_REL_COUNTER_INC(&pVM->pgm.s.cRelocations); + STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pStats)->StatR3ResolveConflict, a); + + /* Raw mode only which implies one VCPU. */ + Assert(pVM->cCpus == 1); + + pMapping->aGCPtrConflicts[pMapping->cConflicts & (PGMMAPPING_CONFLICT_MAX-1)] = GCPtrOldMapping; + pMapping->cConflicts++; + + /* + * Scan for free page directory entries. + * + * Note that we do not support mappings at the very end of the + * address space since that will break our GCPtrEnd assumptions. + */ + const unsigned cPTs = pMapping->cPTs; + unsigned iPDNew = RT_ELEMENTS(pPDSrc->a) - cPTs; /* (+ 1 - 1) */ + while (iPDNew-- > 0) + { + if (pPDSrc->a[iPDNew].n.u1Present) + continue; + + if (pgmR3MapIsKnownConflictAddress(pMapping, iPDNew << X86_PD_SHIFT)) + continue; + + if (cPTs > 1) + { + bool fOk = true; + for (unsigned i = 1; fOk && i < cPTs; i++) + if (pPDSrc->a[iPDNew + i].n.u1Present) + fOk = false; + if (!fOk) + continue; + } + + /* + * Check that it's not conflicting with an intermediate page table mapping. + */ + bool fOk = true; + unsigned i = cPTs; + while (fOk && i-- > 0) + fOk = !pVM->pgm.s.pInterPD->a[iPDNew + i].n.u1Present; + if (!fOk) + continue; + /** @todo AMD64 should check the PAE directories and skip the 32bit stuff. */ + + /* + * Ask for the mapping. + */ + RTGCPTR GCPtrNewMapping = (RTGCPTR32)iPDNew << X86_PD_SHIFT; + + if (pMapping->pfnRelocate(pVM, GCPtrOldMapping, GCPtrNewMapping, PGMRELOCATECALL_SUGGEST, pMapping->pvUser)) + { + pgmR3MapRelocate(pVM, pMapping, GCPtrOldMapping, GCPtrNewMapping); + STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pStats)->StatR3ResolveConflict, a); + return VINF_SUCCESS; + } + } + + STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pStats)->StatR3ResolveConflict, a); +#ifdef DEBUG_bird + /* + * Ended up here frequently recently with an NT4.0 VM (using SMP kernel). + * + * The problem is when enabling large pages (i.e. updating CR4) using the + * _Ki386EnableCurrentLargePage@8 assembly routine (address 0x801c97ad-9). + * The routine loads a sparsely popuplated page tables with identiy mappings + * of its own code, most entries are whatever ExAllocatePool returned, which + * is documented as undefined but all 0xffffffff in this case. Once loaded, + * it jumps to the physical code address, disables paging, set CR4.PSE=1, + * re-enables paging, restore the original page table and returns successfully. + * + * Theory: if CSAM/PATM patches the pushf;cli;mov eax,cr3; sequence, at the + * start of that function we're apparently in trouble, if CSAM/PATM doesn't + * we're switching back to REM and doing disabling of paging there instead. + * + * Normal PD: CR3=00030000; Problematic identity mapped PD: CR3=0x5fa000. + */ + DBGFSTOP(pVM); +#endif + AssertMsgFailed(("Failed to relocate page table mapping '%s' from %#x! (cPTs=%d)\n", pMapping->pszDesc, GCPtrOldMapping, cPTs)); + return VERR_PGM_NO_HYPERVISOR_ADDRESS; +} + + +/** + * Resolves a conflict between a page table based GC mapping and + * the Guest OS page tables. (PAE bits version) + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pMapping The mapping which conflicts. + * @param GCPtrOldMapping The address of the start of the current mapping. + */ +int pgmR3SyncPTResolveConflictPAE(PVM pVM, PPGMMAPPING pMapping, RTGCPTR GCPtrOldMapping) +{ + STAM_REL_COUNTER_INC(&pVM->pgm.s.cRelocations); + STAM_PROFILE_START(&pVM->pgm.s.StatR3ResolveConflict, a); + + /* Raw mode only which implies one VCPU. */ + Assert(pVM->cCpus == 1); + PVMCPU pVCpu = VMMGetCpu(pVM); + + pMapping->aGCPtrConflicts[pMapping->cConflicts & (PGMMAPPING_CONFLICT_MAX-1)] = GCPtrOldMapping; + pMapping->cConflicts++; + + for (int iPDPTE = X86_PG_PAE_PDPE_ENTRIES - 1; iPDPTE >= 0; iPDPTE--) + { + unsigned iPDSrc; + PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(pVCpu, (RTGCPTR32)iPDPTE << X86_PDPT_SHIFT, &iPDSrc, NULL); + + /* + * Scan for free page directory entries. + * + * Note that we do not support mappings at the very end of the + * address space since that will break our GCPtrEnd assumptions. + * Nor do we support mappings crossing page directories. + */ + const unsigned cPTs = pMapping->cb >> X86_PD_PAE_SHIFT; + unsigned iPDNew = RT_ELEMENTS(pPDSrc->a) - cPTs; /* (+ 1 - 1) */ + + while (iPDNew-- > 0) + { + /* Ugly assumption that mappings start on a 4 MB boundary. */ + if (iPDNew & 1) + continue; + + if (pgmR3MapIsKnownConflictAddress(pMapping, ((RTGCPTR32)iPDPTE << X86_PDPT_SHIFT) + (iPDNew << X86_PD_PAE_SHIFT))) + continue; + + if (pPDSrc) + { + if (pPDSrc->a[iPDNew].n.u1Present) + continue; + if (cPTs > 1) + { + bool fOk = true; + for (unsigned i = 1; fOk && i < cPTs; i++) + if (pPDSrc->a[iPDNew + i].n.u1Present) + fOk = false; + if (!fOk) + continue; + } + } + /* + * Check that it's not conflicting with an intermediate page table mapping. + */ + bool fOk = true; + unsigned i = cPTs; + while (fOk && i-- > 0) + fOk = !pVM->pgm.s.apInterPaePDs[iPDPTE]->a[iPDNew + i].n.u1Present; + if (!fOk) + continue; + + /* + * Ask for the mapping. + */ + RTGCPTR GCPtrNewMapping = ((RTGCPTR32)iPDPTE << X86_PDPT_SHIFT) + ((RTGCPTR32)iPDNew << X86_PD_PAE_SHIFT); + + if (pMapping->pfnRelocate(pVM, GCPtrOldMapping, GCPtrNewMapping, PGMRELOCATECALL_SUGGEST, pMapping->pvUser)) + { + pgmR3MapRelocate(pVM, pMapping, GCPtrOldMapping, GCPtrNewMapping); + STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pStats)->StatR3ResolveConflict, a); + return VINF_SUCCESS; + } + } + } + STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pStats)->StatR3ResolveConflict, a); + AssertMsgFailed(("Failed to relocate page table mapping '%s' from %#x! (cPTs=%d)\n", pMapping->pszDesc, GCPtrOldMapping, pMapping->cb >> X86_PD_PAE_SHIFT)); + return VERR_PGM_NO_HYPERVISOR_ADDRESS; +} + +#endif /* !PGM_WITHOUT_MAPPINGS */ + +/** + * Read memory from the guest mappings. + * + * This will use the page tables associated with the mappings to + * read the memory. This means that not all kind of memory is readable + * since we don't necessarily know how to convert that physical address + * to a HC virtual one. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pvDst The destination address (HC of course). + * @param GCPtrSrc The source address (GC virtual address). + * @param cb Number of bytes to read. + * + * @remarks The is indirectly for DBGF only. + * @todo Consider renaming it to indicate it's special usage, or just + * reimplement it in MMR3HyperReadGCVirt. + */ +VMMR3DECL(int) PGMR3MapRead(PVM pVM, void *pvDst, RTGCPTR GCPtrSrc, size_t cb) +{ + /* + * Simplicity over speed... Chop the request up into chunks + * which don't cross pages. + */ + if (cb + (GCPtrSrc & PAGE_OFFSET_MASK) > PAGE_SIZE) + { + for (;;) + { + size_t cbRead = RT_MIN(cb, PAGE_SIZE - (GCPtrSrc & PAGE_OFFSET_MASK)); + int rc = PGMR3MapRead(pVM, pvDst, GCPtrSrc, cbRead); + if (RT_FAILURE(rc)) + return rc; + cb -= cbRead; + if (!cb) + break; + pvDst = (char *)pvDst + cbRead; + GCPtrSrc += cbRead; + } + return VINF_SUCCESS; + } + + /* + * Find the mapping. + */ + PPGMMAPPING pCur = pVM->pgm.s.CTX_SUFF(pMappings); + while (pCur) + { + RTGCPTR off = GCPtrSrc - pCur->GCPtr; + if (off < pCur->cb) + { + if (off + cb > pCur->cb) + { + AssertMsgFailed(("Invalid page range %RGv LB%#x. mapping '%s' %RGv to %RGv\n", + GCPtrSrc, cb, pCur->pszDesc, pCur->GCPtr, pCur->GCPtrLast)); + return VERR_INVALID_PARAMETER; + } + + unsigned iPT = off >> X86_PD_SHIFT; + unsigned iPTE = (off >> PAGE_SHIFT) & X86_PT_MASK; + while (cb > 0 && iPTE < RT_ELEMENTS(CTXALLSUFF(pCur->aPTs[iPT].pPT)->a)) + { + PCPGMSHWPTEPAE pPte = &pCur->aPTs[iPT].CTXALLSUFF(paPaePTs)[iPTE / 512].a[iPTE % 512]; + if (!PGMSHWPTEPAE_IS_P(*pPte)) + return VERR_PAGE_NOT_PRESENT; + RTHCPHYS HCPhys = PGMSHWPTEPAE_GET_HCPHYS(*pPte); + + /* + * Get the virtual page from the physical one. + */ + void *pvPage; + int rc = MMR3HCPhys2HCVirt(pVM, HCPhys, &pvPage); + if (RT_FAILURE(rc)) + return rc; + + memcpy(pvDst, (char *)pvPage + (GCPtrSrc & PAGE_OFFSET_MASK), cb); + return VINF_SUCCESS; + } + } + + /* next */ + pCur = CTXALLSUFF(pCur->pNext); + } + + return VERR_INVALID_POINTER; +} + + +/** + * Info callback for 'pgmhandlers'. + * + * @param pVM The cross context VM structure. + * @param pHlp The output helpers. + * @param pszArgs The arguments. phys or virt. + */ +DECLCALLBACK(void) pgmR3MapInfo(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + NOREF(pszArgs); + if (!pgmMapAreMappingsEnabled(pVM)) + pHlp->pfnPrintf(pHlp, "\nThe mappings are DISABLED.\n"); + else if (pVM->pgm.s.fMappingsFixed) + pHlp->pfnPrintf(pHlp, "\nThe mappings are FIXED: %RGv-%RGv\n", + pVM->pgm.s.GCPtrMappingFixed, pVM->pgm.s.GCPtrMappingFixed + pVM->pgm.s.cbMappingFixed - 1); + else if (pVM->pgm.s.fMappingsFixedRestored) + pHlp->pfnPrintf(pHlp, "\nThe mappings are FLOATING-RESTORED-FIXED: %RGv-%RGv\n", + pVM->pgm.s.GCPtrMappingFixed, pVM->pgm.s.GCPtrMappingFixed + pVM->pgm.s.cbMappingFixed - 1); + else + pHlp->pfnPrintf(pHlp, "\nThe mappings are FLOATING.\n"); + + PPGMMAPPING pCur; + for (pCur = pVM->pgm.s.pMappingsR3; pCur; pCur = pCur->pNextR3) + { + pHlp->pfnPrintf(pHlp, "%RGv - %RGv %s\n", pCur->GCPtr, pCur->GCPtrLast, pCur->pszDesc); + if (pCur->cConflicts > 0) + { + pHlp->pfnPrintf(pHlp, " %u conflict%s: ", pCur->cConflicts, pCur->cConflicts == 1 ? "" : "s"); + uint32_t cLeft = RT_MIN(pCur->cConflicts, RT_ELEMENTS(pCur->aGCPtrConflicts)); + uint32_t i = pCur->cConflicts; + while (cLeft-- > 0) + { + i = (i - 1) & (PGMMAPPING_CONFLICT_MAX - 1); + pHlp->pfnPrintf(pHlp, cLeft ? "%RGv, " : "%RGv\n", pCur->aGCPtrConflicts[i]); + } + } + } +} + diff --git a/src/VBox/VMM/VMMR3/PGMPhys.cpp b/src/VBox/VMM/VMMR3/PGMPhys.cpp new file mode 100644 index 00000000..03e1da21 --- /dev/null +++ b/src/VBox/VMM/VMMR3/PGMPhys.cpp @@ -0,0 +1,5498 @@ +/* $Id: PGMPhys.cpp $ */ +/** @file + * PGM - Page Manager and Monitor, Physical Memory Addressing. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PGM_PHYS +#include +#include +#include +#include +#include +#include +#ifdef VBOX_WITH_REM +# include +#endif +#include +#include "PGMInternal.h" +#include +#include +#include "PGMInline.h" +#include +#include +#include +#include +#include +#include +#include +#ifdef VBOX_STRICT +# include +#endif +#include +#include +#include + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** The number of pages to free in one batch. */ +#define PGMPHYS_FREE_PAGE_BATCH_SIZE 128 + + +/* + * PGMR3PhysReadU8-64 + * PGMR3PhysWriteU8-64 + */ +#define PGMPHYSFN_READNAME PGMR3PhysReadU8 +#define PGMPHYSFN_WRITENAME PGMR3PhysWriteU8 +#define PGMPHYS_DATASIZE 1 +#define PGMPHYS_DATATYPE uint8_t +#include "PGMPhysRWTmpl.h" + +#define PGMPHYSFN_READNAME PGMR3PhysReadU16 +#define PGMPHYSFN_WRITENAME PGMR3PhysWriteU16 +#define PGMPHYS_DATASIZE 2 +#define PGMPHYS_DATATYPE uint16_t +#include "PGMPhysRWTmpl.h" + +#define PGMPHYSFN_READNAME PGMR3PhysReadU32 +#define PGMPHYSFN_WRITENAME PGMR3PhysWriteU32 +#define PGMPHYS_DATASIZE 4 +#define PGMPHYS_DATATYPE uint32_t +#include "PGMPhysRWTmpl.h" + +#define PGMPHYSFN_READNAME PGMR3PhysReadU64 +#define PGMPHYSFN_WRITENAME PGMR3PhysWriteU64 +#define PGMPHYS_DATASIZE 8 +#define PGMPHYS_DATATYPE uint64_t +#include "PGMPhysRWTmpl.h" + + +/** + * EMT worker for PGMR3PhysReadExternal. + */ +static DECLCALLBACK(int) pgmR3PhysReadExternalEMT(PVM pVM, PRTGCPHYS pGCPhys, void *pvBuf, size_t cbRead, + PGMACCESSORIGIN enmOrigin) +{ + VBOXSTRICTRC rcStrict = PGMPhysRead(pVM, *pGCPhys, pvBuf, cbRead, enmOrigin); + AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); NOREF(rcStrict); + return VINF_SUCCESS; +} + + +/** + * Read from physical memory, external users. + * + * @returns VBox status code. + * @retval VINF_SUCCESS. + * + * @param pVM The cross context VM structure. + * @param GCPhys Physical address to read from. + * @param pvBuf Where to read into. + * @param cbRead How many bytes to read. + * @param enmOrigin Who is calling. + * + * @thread Any but EMTs. + */ +VMMR3DECL(int) PGMR3PhysReadExternal(PVM pVM, RTGCPHYS GCPhys, void *pvBuf, size_t cbRead, PGMACCESSORIGIN enmOrigin) +{ + VM_ASSERT_OTHER_THREAD(pVM); + + AssertMsgReturn(cbRead > 0, ("don't even think about reading zero bytes!\n"), VINF_SUCCESS); + LogFlow(("PGMR3PhysReadExternal: %RGp %d\n", GCPhys, cbRead)); + + pgmLock(pVM); + + /* + * Copy loop on ram ranges. + */ + PPGMRAMRANGE pRam = pgmPhysGetRangeAtOrAbove(pVM, GCPhys); + for (;;) + { + /* Inside range or not? */ + if (pRam && GCPhys >= pRam->GCPhys) + { + /* + * Must work our way thru this page by page. + */ + RTGCPHYS off = GCPhys - pRam->GCPhys; + while (off < pRam->cb) + { + unsigned iPage = off >> PAGE_SHIFT; + PPGMPAGE pPage = &pRam->aPages[iPage]; + + /* + * If the page has an ALL access handler, we'll have to + * delegate the job to EMT. + */ + if ( PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage) + || PGM_PAGE_IS_SPECIAL_ALIAS_MMIO(pPage)) + { + pgmUnlock(pVM); + + return VMR3ReqPriorityCallWait(pVM, VMCPUID_ANY, (PFNRT)pgmR3PhysReadExternalEMT, 5, + pVM, &GCPhys, pvBuf, cbRead, enmOrigin); + } + Assert(!PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(pPage)); + + /* + * Simple stuff, go ahead. + */ + size_t cb = PAGE_SIZE - (off & PAGE_OFFSET_MASK); + if (cb > cbRead) + cb = cbRead; + PGMPAGEMAPLOCK PgMpLck; + const void *pvSrc; + int rc = pgmPhysGCPhys2CCPtrInternalReadOnly(pVM, pPage, pRam->GCPhys + off, &pvSrc, &PgMpLck); + if (RT_SUCCESS(rc)) + { + memcpy(pvBuf, pvSrc, cb); + pgmPhysReleaseInternalPageMappingLock(pVM, &PgMpLck); + } + else + { + AssertLogRelMsgFailed(("pgmPhysGCPhys2CCPtrInternalReadOnly failed on %RGp / %R[pgmpage] -> %Rrc\n", + pRam->GCPhys + off, pPage, rc)); + memset(pvBuf, 0xff, cb); + } + + /* next page */ + if (cb >= cbRead) + { + pgmUnlock(pVM); + return VINF_SUCCESS; + } + cbRead -= cb; + off += cb; + GCPhys += cb; + pvBuf = (char *)pvBuf + cb; + } /* walk pages in ram range. */ + } + else + { + LogFlow(("PGMPhysRead: Unassigned %RGp size=%u\n", GCPhys, cbRead)); + + /* + * Unassigned address space. + */ + size_t cb = pRam ? pRam->GCPhys - GCPhys : ~(size_t)0; + if (cb >= cbRead) + { + memset(pvBuf, 0xff, cbRead); + break; + } + memset(pvBuf, 0xff, cb); + + cbRead -= cb; + pvBuf = (char *)pvBuf + cb; + GCPhys += cb; + } + + /* Advance range if necessary. */ + while (pRam && GCPhys > pRam->GCPhysLast) + pRam = pRam->CTX_SUFF(pNext); + } /* Ram range walk */ + + pgmUnlock(pVM); + + return VINF_SUCCESS; +} + + +/** + * EMT worker for PGMR3PhysWriteExternal. + */ +static DECLCALLBACK(int) pgmR3PhysWriteExternalEMT(PVM pVM, PRTGCPHYS pGCPhys, const void *pvBuf, size_t cbWrite, + PGMACCESSORIGIN enmOrigin) +{ + /** @todo VERR_EM_NO_MEMORY */ + VBOXSTRICTRC rcStrict = PGMPhysWrite(pVM, *pGCPhys, pvBuf, cbWrite, enmOrigin); + AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); NOREF(rcStrict); + return VINF_SUCCESS; +} + + +/** + * Write to physical memory, external users. + * + * @returns VBox status code. + * @retval VINF_SUCCESS. + * @retval VERR_EM_NO_MEMORY. + * + * @param pVM The cross context VM structure. + * @param GCPhys Physical address to write to. + * @param pvBuf What to write. + * @param cbWrite How many bytes to write. + * @param enmOrigin Who is calling. + * + * @thread Any but EMTs. + */ +VMMDECL(int) PGMR3PhysWriteExternal(PVM pVM, RTGCPHYS GCPhys, const void *pvBuf, size_t cbWrite, PGMACCESSORIGIN enmOrigin) +{ + VM_ASSERT_OTHER_THREAD(pVM); + + AssertMsg(!pVM->pgm.s.fNoMorePhysWrites, + ("Calling PGMR3PhysWriteExternal after pgmR3Save()! GCPhys=%RGp cbWrite=%#x enmOrigin=%d\n", + GCPhys, cbWrite, enmOrigin)); + AssertMsgReturn(cbWrite > 0, ("don't even think about writing zero bytes!\n"), VINF_SUCCESS); + LogFlow(("PGMR3PhysWriteExternal: %RGp %d\n", GCPhys, cbWrite)); + + pgmLock(pVM); + + /* + * Copy loop on ram ranges, stop when we hit something difficult. + */ + PPGMRAMRANGE pRam = pgmPhysGetRangeAtOrAbove(pVM, GCPhys); + for (;;) + { + /* Inside range or not? */ + if (pRam && GCPhys >= pRam->GCPhys) + { + /* + * Must work our way thru this page by page. + */ + RTGCPTR off = GCPhys - pRam->GCPhys; + while (off < pRam->cb) + { + RTGCPTR iPage = off >> PAGE_SHIFT; + PPGMPAGE pPage = &pRam->aPages[iPage]; + + /* + * Is the page problematic, we have to do the work on the EMT. + * + * Allocating writable pages and access handlers are + * problematic, write monitored pages are simple and can be + * dealt with here. + */ + if ( PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage) + || PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED + || PGM_PAGE_IS_SPECIAL_ALIAS_MMIO(pPage)) + { + if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED + && !PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage)) + pgmPhysPageMakeWriteMonitoredWritable(pVM, pPage, GCPhys); + else + { + pgmUnlock(pVM); + + return VMR3ReqPriorityCallWait(pVM, VMCPUID_ANY, (PFNRT)pgmR3PhysWriteExternalEMT, 5, + pVM, &GCPhys, pvBuf, cbWrite, enmOrigin); + } + } + Assert(!PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(pPage)); + + /* + * Simple stuff, go ahead. + */ + size_t cb = PAGE_SIZE - (off & PAGE_OFFSET_MASK); + if (cb > cbWrite) + cb = cbWrite; + PGMPAGEMAPLOCK PgMpLck; + void *pvDst; + int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, pRam->GCPhys + off, &pvDst, &PgMpLck); + if (RT_SUCCESS(rc)) + { + memcpy(pvDst, pvBuf, cb); + pgmPhysReleaseInternalPageMappingLock(pVM, &PgMpLck); + } + else + AssertLogRelMsgFailed(("pgmPhysGCPhys2CCPtrInternal failed on %RGp / %R[pgmpage] -> %Rrc\n", + pRam->GCPhys + off, pPage, rc)); + + /* next page */ + if (cb >= cbWrite) + { + pgmUnlock(pVM); + return VINF_SUCCESS; + } + + cbWrite -= cb; + off += cb; + GCPhys += cb; + pvBuf = (const char *)pvBuf + cb; + } /* walk pages in ram range */ + } + else + { + /* + * Unassigned address space, skip it. + */ + if (!pRam) + break; + size_t cb = pRam->GCPhys - GCPhys; + if (cb >= cbWrite) + break; + cbWrite -= cb; + pvBuf = (const char *)pvBuf + cb; + GCPhys += cb; + } + + /* Advance range if necessary. */ + while (pRam && GCPhys > pRam->GCPhysLast) + pRam = pRam->CTX_SUFF(pNext); + } /* Ram range walk */ + + pgmUnlock(pVM); + return VINF_SUCCESS; +} + + +/** + * VMR3ReqCall worker for PGMR3PhysGCPhys2CCPtrExternal to make pages writable. + * + * @returns see PGMR3PhysGCPhys2CCPtrExternal + * @param pVM The cross context VM structure. + * @param pGCPhys Pointer to the guest physical address. + * @param ppv Where to store the mapping address. + * @param pLock Where to store the lock. + */ +static DECLCALLBACK(int) pgmR3PhysGCPhys2CCPtrDelegated(PVM pVM, PRTGCPHYS pGCPhys, void **ppv, PPGMPAGEMAPLOCK pLock) +{ + /* + * Just hand it to PGMPhysGCPhys2CCPtr and check that it's not a page with + * an access handler after it succeeds. + */ + int rc = pgmLock(pVM); + AssertRCReturn(rc, rc); + + rc = PGMPhysGCPhys2CCPtr(pVM, *pGCPhys, ppv, pLock); + if (RT_SUCCESS(rc)) + { + PPGMPAGEMAPTLBE pTlbe; + int rc2 = pgmPhysPageQueryTlbe(pVM, *pGCPhys, &pTlbe); + AssertFatalRC(rc2); + PPGMPAGE pPage = pTlbe->pPage; + if (PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(pPage)) + { + PGMPhysReleasePageMappingLock(pVM, pLock); + rc = VERR_PGM_PHYS_PAGE_RESERVED; + } + else if ( PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage) +#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT + || pgmPoolIsDirtyPage(pVM, *pGCPhys) +#endif + ) + { + /* We *must* flush any corresponding pgm pool page here, otherwise we'll + * not be informed about writes and keep bogus gst->shw mappings around. + */ + pgmPoolFlushPageByGCPhys(pVM, *pGCPhys); + Assert(!PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage)); + /** @todo r=bird: return VERR_PGM_PHYS_PAGE_RESERVED here if it still has + * active handlers, see the PGMR3PhysGCPhys2CCPtrExternal docs. */ + } + } + + pgmUnlock(pVM); + return rc; +} + + +/** + * Requests the mapping of a guest page into ring-3, external threads. + * + * When you're done with the page, call PGMPhysReleasePageMappingLock() ASAP to + * release it. + * + * This API will assume your intention is to write to the page, and will + * therefore replace shared and zero pages. If you do not intend to modify the + * page, use the PGMR3PhysGCPhys2CCPtrReadOnlyExternal() API. + * + * @returns VBox status code. + * @retval VINF_SUCCESS on success. + * @retval VERR_PGM_PHYS_PAGE_RESERVED it it's a valid page but has no physical + * backing or if the page has any active access handlers. The caller + * must fall back on using PGMR3PhysWriteExternal. + * @retval VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS if it's not a valid physical address. + * + * @param pVM The cross context VM structure. + * @param GCPhys The guest physical address of the page that should be mapped. + * @param ppv Where to store the address corresponding to GCPhys. + * @param pLock Where to store the lock information that PGMPhysReleasePageMappingLock needs. + * + * @remark Avoid calling this API from within critical sections (other than the + * PGM one) because of the deadlock risk when we have to delegating the + * task to an EMT. + * @thread Any. + */ +VMMR3DECL(int) PGMR3PhysGCPhys2CCPtrExternal(PVM pVM, RTGCPHYS GCPhys, void **ppv, PPGMPAGEMAPLOCK pLock) +{ + AssertPtr(ppv); + AssertPtr(pLock); + + Assert(VM_IS_EMT(pVM) || !PGMIsLockOwner(pVM)); + + int rc = pgmLock(pVM); + AssertRCReturn(rc, rc); + + /* + * Query the Physical TLB entry for the page (may fail). + */ + PPGMPAGEMAPTLBE pTlbe; + rc = pgmPhysPageQueryTlbe(pVM, GCPhys, &pTlbe); + if (RT_SUCCESS(rc)) + { + PPGMPAGE pPage = pTlbe->pPage; + if (PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(pPage)) + rc = VERR_PGM_PHYS_PAGE_RESERVED; + else + { + /* + * If the page is shared, the zero page, or being write monitored + * it must be converted to an page that's writable if possible. + * We can only deal with write monitored pages here, the rest have + * to be on an EMT. + */ + if ( PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage) + || PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED +#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT + || pgmPoolIsDirtyPage(pVM, GCPhys) +#endif + ) + { + if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED + && !PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage) +#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT + && !pgmPoolIsDirtyPage(pVM, GCPhys) +#endif + ) + pgmPhysPageMakeWriteMonitoredWritable(pVM, pPage, GCPhys); + else + { + pgmUnlock(pVM); + + return VMR3ReqPriorityCallWait(pVM, VMCPUID_ANY, (PFNRT)pgmR3PhysGCPhys2CCPtrDelegated, 4, + pVM, &GCPhys, ppv, pLock); + } + } + + /* + * Now, just perform the locking and calculate the return address. + */ + PPGMPAGEMAP pMap = pTlbe->pMap; + if (pMap) + pMap->cRefs++; + + unsigned cLocks = PGM_PAGE_GET_WRITE_LOCKS(pPage); + if (RT_LIKELY(cLocks < PGM_PAGE_MAX_LOCKS - 1)) + { + if (cLocks == 0) + pVM->pgm.s.cWriteLockedPages++; + PGM_PAGE_INC_WRITE_LOCKS(pPage); + } + else if (cLocks != PGM_PAGE_GET_WRITE_LOCKS(pPage)) + { + PGM_PAGE_INC_WRITE_LOCKS(pPage); + AssertMsgFailed(("%RGp / %R[pgmpage] is entering permanent write locked state!\n", GCPhys, pPage)); + if (pMap) + pMap->cRefs++; /* Extra ref to prevent it from going away. */ + } + + *ppv = (void *)((uintptr_t)pTlbe->pv | (uintptr_t)(GCPhys & PAGE_OFFSET_MASK)); + pLock->uPageAndType = (uintptr_t)pPage | PGMPAGEMAPLOCK_TYPE_WRITE; + pLock->pvMap = pMap; + } + } + + pgmUnlock(pVM); + return rc; +} + + +/** + * Requests the mapping of a guest page into ring-3, external threads. + * + * When you're done with the page, call PGMPhysReleasePageMappingLock() ASAP to + * release it. + * + * @returns VBox status code. + * @retval VINF_SUCCESS on success. + * @retval VERR_PGM_PHYS_PAGE_RESERVED it it's a valid page but has no physical + * backing or if the page as an active ALL access handler. The caller + * must fall back on using PGMPhysRead. + * @retval VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS if it's not a valid physical address. + * + * @param pVM The cross context VM structure. + * @param GCPhys The guest physical address of the page that should be mapped. + * @param ppv Where to store the address corresponding to GCPhys. + * @param pLock Where to store the lock information that PGMPhysReleasePageMappingLock needs. + * + * @remark Avoid calling this API from within critical sections (other than + * the PGM one) because of the deadlock risk. + * @thread Any. + */ +VMMR3DECL(int) PGMR3PhysGCPhys2CCPtrReadOnlyExternal(PVM pVM, RTGCPHYS GCPhys, void const **ppv, PPGMPAGEMAPLOCK pLock) +{ + int rc = pgmLock(pVM); + AssertRCReturn(rc, rc); + + /* + * Query the Physical TLB entry for the page (may fail). + */ + PPGMPAGEMAPTLBE pTlbe; + rc = pgmPhysPageQueryTlbe(pVM, GCPhys, &pTlbe); + if (RT_SUCCESS(rc)) + { + PPGMPAGE pPage = pTlbe->pPage; +#if 1 + /* MMIO pages doesn't have any readable backing. */ + if (PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(pPage)) + rc = VERR_PGM_PHYS_PAGE_RESERVED; +#else + if (PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)) + rc = VERR_PGM_PHYS_PAGE_RESERVED; +#endif + else + { + /* + * Now, just perform the locking and calculate the return address. + */ + PPGMPAGEMAP pMap = pTlbe->pMap; + if (pMap) + pMap->cRefs++; + + unsigned cLocks = PGM_PAGE_GET_READ_LOCKS(pPage); + if (RT_LIKELY(cLocks < PGM_PAGE_MAX_LOCKS - 1)) + { + if (cLocks == 0) + pVM->pgm.s.cReadLockedPages++; + PGM_PAGE_INC_READ_LOCKS(pPage); + } + else if (cLocks != PGM_PAGE_GET_READ_LOCKS(pPage)) + { + PGM_PAGE_INC_READ_LOCKS(pPage); + AssertMsgFailed(("%RGp / %R[pgmpage] is entering permanent readonly locked state!\n", GCPhys, pPage)); + if (pMap) + pMap->cRefs++; /* Extra ref to prevent it from going away. */ + } + + *ppv = (void *)((uintptr_t)pTlbe->pv | (uintptr_t)(GCPhys & PAGE_OFFSET_MASK)); + pLock->uPageAndType = (uintptr_t)pPage | PGMPAGEMAPLOCK_TYPE_READ; + pLock->pvMap = pMap; + } + } + + pgmUnlock(pVM); + return rc; +} + + +#define MAKE_LEAF(a_pNode) \ + do { \ + (a_pNode)->pLeftR3 = NIL_RTR3PTR; \ + (a_pNode)->pRightR3 = NIL_RTR3PTR; \ + (a_pNode)->pLeftR0 = NIL_RTR0PTR; \ + (a_pNode)->pRightR0 = NIL_RTR0PTR; \ + (a_pNode)->pLeftRC = NIL_RTRCPTR; \ + (a_pNode)->pRightRC = NIL_RTRCPTR; \ + } while (0) + +#define INSERT_LEFT(a_pParent, a_pNode) \ + do { \ + (a_pParent)->pLeftR3 = (a_pNode); \ + (a_pParent)->pLeftR0 = (a_pNode)->pSelfR0; \ + (a_pParent)->pLeftRC = (a_pNode)->pSelfRC; \ + } while (0) +#define INSERT_RIGHT(a_pParent, a_pNode) \ + do { \ + (a_pParent)->pRightR3 = (a_pNode); \ + (a_pParent)->pRightR0 = (a_pNode)->pSelfR0; \ + (a_pParent)->pRightRC = (a_pNode)->pSelfRC; \ + } while (0) + + +/** + * Recursive tree builder. + * + * @param ppRam Pointer to the iterator variable. + * @param iDepth The current depth. Inserts a leaf node if 0. + */ +static PPGMRAMRANGE pgmR3PhysRebuildRamRangeSearchTreesRecursively(PPGMRAMRANGE *ppRam, int iDepth) +{ + PPGMRAMRANGE pRam; + if (iDepth <= 0) + { + /* + * Leaf node. + */ + pRam = *ppRam; + if (pRam) + { + *ppRam = pRam->pNextR3; + MAKE_LEAF(pRam); + } + } + else + { + + /* + * Intermediate node. + */ + PPGMRAMRANGE pLeft = pgmR3PhysRebuildRamRangeSearchTreesRecursively(ppRam, iDepth - 1); + + pRam = *ppRam; + if (!pRam) + return pLeft; + *ppRam = pRam->pNextR3; + MAKE_LEAF(pRam); + INSERT_LEFT(pRam, pLeft); + + PPGMRAMRANGE pRight = pgmR3PhysRebuildRamRangeSearchTreesRecursively(ppRam, iDepth - 1); + if (pRight) + INSERT_RIGHT(pRam, pRight); + } + return pRam; +} + + +/** + * Rebuilds the RAM range search trees. + * + * @param pVM The cross context VM structure. + */ +static void pgmR3PhysRebuildRamRangeSearchTrees(PVM pVM) +{ + + /* + * Create the reasonably balanced tree in a sequential fashion. + * For simplicity (laziness) we use standard recursion here. + */ + int iDepth = 0; + PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3; + PPGMRAMRANGE pRoot = pgmR3PhysRebuildRamRangeSearchTreesRecursively(&pRam, 0); + while (pRam) + { + PPGMRAMRANGE pLeft = pRoot; + + pRoot = pRam; + pRam = pRam->pNextR3; + MAKE_LEAF(pRoot); + INSERT_LEFT(pRoot, pLeft); + + PPGMRAMRANGE pRight = pgmR3PhysRebuildRamRangeSearchTreesRecursively(&pRam, iDepth); + if (pRight) + INSERT_RIGHT(pRoot, pRight); + /** @todo else: rotate the tree. */ + + iDepth++; + } + + pVM->pgm.s.pRamRangeTreeR3 = pRoot; + pVM->pgm.s.pRamRangeTreeR0 = pRoot ? pRoot->pSelfR0 : NIL_RTR0PTR; + pVM->pgm.s.pRamRangeTreeRC = pRoot ? pRoot->pSelfRC : NIL_RTRCPTR; + +#ifdef VBOX_STRICT + /* + * Verify that the above code works. + */ + unsigned cRanges = 0; + for (pRam = pVM->pgm.s.pRamRangesXR3; pRam; pRam = pRam->pNextR3) + cRanges++; + Assert(cRanges > 0); + + unsigned cMaxDepth = ASMBitLastSetU32(cRanges); + if ((1U << cMaxDepth) < cRanges) + cMaxDepth++; + + for (pRam = pVM->pgm.s.pRamRangesXR3; pRam; pRam = pRam->pNextR3) + { + unsigned cDepth = 0; + PPGMRAMRANGE pRam2 = pVM->pgm.s.pRamRangeTreeR3; + for (;;) + { + if (pRam == pRam2) + break; + Assert(pRam2); + if (pRam->GCPhys < pRam2->GCPhys) + pRam2 = pRam2->pLeftR3; + else + pRam2 = pRam2->pRightR3; + } + AssertMsg(cDepth <= cMaxDepth, ("cDepth=%d cMaxDepth=%d\n", cDepth, cMaxDepth)); + } +#endif /* VBOX_STRICT */ +} + +#undef MAKE_LEAF +#undef INSERT_LEFT +#undef INSERT_RIGHT + +/** + * Relinks the RAM ranges using the pSelfRC and pSelfR0 pointers. + * + * Called when anything was relocated. + * + * @param pVM The cross context VM structure. + */ +void pgmR3PhysRelinkRamRanges(PVM pVM) +{ + PPGMRAMRANGE pCur; + +#ifdef VBOX_STRICT + for (pCur = pVM->pgm.s.pRamRangesXR3; pCur; pCur = pCur->pNextR3) + { + Assert((pCur->fFlags & PGM_RAM_RANGE_FLAGS_FLOATING) || pCur->pSelfR0 == MMHyperCCToR0(pVM, pCur)); + Assert((pCur->fFlags & PGM_RAM_RANGE_FLAGS_FLOATING) || pCur->pSelfRC == MMHyperCCToRC(pVM, pCur)); + Assert((pCur->GCPhys & PAGE_OFFSET_MASK) == 0); + Assert((pCur->GCPhysLast & PAGE_OFFSET_MASK) == PAGE_OFFSET_MASK); + Assert((pCur->cb & PAGE_OFFSET_MASK) == 0); + Assert(pCur->cb == pCur->GCPhysLast - pCur->GCPhys + 1); + for (PPGMRAMRANGE pCur2 = pVM->pgm.s.pRamRangesXR3; pCur2; pCur2 = pCur2->pNextR3) + Assert( pCur2 == pCur + || strcmp(pCur2->pszDesc, pCur->pszDesc)); /** @todo fix MMIO ranges!! */ + } +#endif + + pCur = pVM->pgm.s.pRamRangesXR3; + if (pCur) + { + pVM->pgm.s.pRamRangesXR0 = pCur->pSelfR0; + pVM->pgm.s.pRamRangesXRC = pCur->pSelfRC; + + for (; pCur->pNextR3; pCur = pCur->pNextR3) + { + pCur->pNextR0 = pCur->pNextR3->pSelfR0; + pCur->pNextRC = pCur->pNextR3->pSelfRC; + } + + Assert(pCur->pNextR0 == NIL_RTR0PTR); + Assert(pCur->pNextRC == NIL_RTRCPTR); + } + else + { + Assert(pVM->pgm.s.pRamRangesXR0 == NIL_RTR0PTR); + Assert(pVM->pgm.s.pRamRangesXRC == NIL_RTRCPTR); + } + ASMAtomicIncU32(&pVM->pgm.s.idRamRangesGen); + + pgmR3PhysRebuildRamRangeSearchTrees(pVM); +} + + +/** + * Links a new RAM range into the list. + * + * @param pVM The cross context VM structure. + * @param pNew Pointer to the new list entry. + * @param pPrev Pointer to the previous list entry. If NULL, insert as head. + */ +static void pgmR3PhysLinkRamRange(PVM pVM, PPGMRAMRANGE pNew, PPGMRAMRANGE pPrev) +{ + AssertMsg(pNew->pszDesc, ("%RGp-%RGp\n", pNew->GCPhys, pNew->GCPhysLast)); + Assert((pNew->fFlags & PGM_RAM_RANGE_FLAGS_FLOATING) || pNew->pSelfR0 == MMHyperCCToR0(pVM, pNew)); + Assert((pNew->fFlags & PGM_RAM_RANGE_FLAGS_FLOATING) || pNew->pSelfRC == MMHyperCCToRC(pVM, pNew)); + + pgmLock(pVM); + + PPGMRAMRANGE pRam = pPrev ? pPrev->pNextR3 : pVM->pgm.s.pRamRangesXR3; + pNew->pNextR3 = pRam; + pNew->pNextR0 = pRam ? pRam->pSelfR0 : NIL_RTR0PTR; + pNew->pNextRC = pRam ? pRam->pSelfRC : NIL_RTRCPTR; + + if (pPrev) + { + pPrev->pNextR3 = pNew; + pPrev->pNextR0 = pNew->pSelfR0; + pPrev->pNextRC = pNew->pSelfRC; + } + else + { + pVM->pgm.s.pRamRangesXR3 = pNew; + pVM->pgm.s.pRamRangesXR0 = pNew->pSelfR0; + pVM->pgm.s.pRamRangesXRC = pNew->pSelfRC; + } + ASMAtomicIncU32(&pVM->pgm.s.idRamRangesGen); + + pgmR3PhysRebuildRamRangeSearchTrees(pVM); + pgmUnlock(pVM); +} + + +/** + * Unlink an existing RAM range from the list. + * + * @param pVM The cross context VM structure. + * @param pRam Pointer to the new list entry. + * @param pPrev Pointer to the previous list entry. If NULL, insert as head. + */ +static void pgmR3PhysUnlinkRamRange2(PVM pVM, PPGMRAMRANGE pRam, PPGMRAMRANGE pPrev) +{ + Assert(pPrev ? pPrev->pNextR3 == pRam : pVM->pgm.s.pRamRangesXR3 == pRam); + Assert((pRam->fFlags & PGM_RAM_RANGE_FLAGS_FLOATING) || pRam->pSelfR0 == MMHyperCCToR0(pVM, pRam)); + Assert((pRam->fFlags & PGM_RAM_RANGE_FLAGS_FLOATING) || pRam->pSelfRC == MMHyperCCToRC(pVM, pRam)); + + pgmLock(pVM); + + PPGMRAMRANGE pNext = pRam->pNextR3; + if (pPrev) + { + pPrev->pNextR3 = pNext; + pPrev->pNextR0 = pNext ? pNext->pSelfR0 : NIL_RTR0PTR; + pPrev->pNextRC = pNext ? pNext->pSelfRC : NIL_RTRCPTR; + } + else + { + Assert(pVM->pgm.s.pRamRangesXR3 == pRam); + pVM->pgm.s.pRamRangesXR3 = pNext; + pVM->pgm.s.pRamRangesXR0 = pNext ? pNext->pSelfR0 : NIL_RTR0PTR; + pVM->pgm.s.pRamRangesXRC = pNext ? pNext->pSelfRC : NIL_RTRCPTR; + } + ASMAtomicIncU32(&pVM->pgm.s.idRamRangesGen); + + pgmR3PhysRebuildRamRangeSearchTrees(pVM); + pgmUnlock(pVM); +} + + +/** + * Unlink an existing RAM range from the list. + * + * @param pVM The cross context VM structure. + * @param pRam Pointer to the new list entry. + */ +static void pgmR3PhysUnlinkRamRange(PVM pVM, PPGMRAMRANGE pRam) +{ + pgmLock(pVM); + + /* find prev. */ + PPGMRAMRANGE pPrev = NULL; + PPGMRAMRANGE pCur = pVM->pgm.s.pRamRangesXR3; + while (pCur != pRam) + { + pPrev = pCur; + pCur = pCur->pNextR3; + } + AssertFatal(pCur); + + pgmR3PhysUnlinkRamRange2(pVM, pRam, pPrev); + pgmUnlock(pVM); +} + + +/** + * Frees a range of pages, replacing them with ZERO pages of the specified type. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pRam The RAM range in which the pages resides. + * @param GCPhys The address of the first page. + * @param GCPhysLast The address of the last page. + * @param enmType The page type to replace then with. + */ +static int pgmR3PhysFreePageRange(PVM pVM, PPGMRAMRANGE pRam, RTGCPHYS GCPhys, RTGCPHYS GCPhysLast, PGMPAGETYPE enmType) +{ + PGM_LOCK_ASSERT_OWNER(pVM); + uint32_t cPendingPages = 0; + PGMMFREEPAGESREQ pReq; + int rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE); + AssertLogRelRCReturn(rc, rc); + + /* Iterate the pages. */ + PPGMPAGE pPageDst = &pRam->aPages[(GCPhys - pRam->GCPhys) >> PAGE_SHIFT]; + uint32_t cPagesLeft = ((GCPhysLast - GCPhys) >> PAGE_SHIFT) + 1; + while (cPagesLeft-- > 0) + { + rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPageDst, GCPhys, enmType); + AssertLogRelRCReturn(rc, rc); /* We're done for if this goes wrong. */ + + PGM_PAGE_SET_TYPE(pVM, pPageDst, enmType); + + GCPhys += PAGE_SIZE; + pPageDst++; + } + + if (cPendingPages) + { + rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages); + AssertLogRelRCReturn(rc, rc); + } + GMMR3FreePagesCleanup(pReq); + + return rc; +} + +#if HC_ARCH_BITS == 64 && (defined(RT_OS_WINDOWS) || defined(RT_OS_SOLARIS) || defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD)) + +/** + * Rendezvous callback used by PGMR3ChangeMemBalloon that changes the memory balloon size + * + * This is only called on one of the EMTs while the other ones are waiting for + * it to complete this function. + * + * @returns VINF_SUCCESS (VBox strict status code). + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. Unused. + * @param pvUser User parameter + */ +static DECLCALLBACK(VBOXSTRICTRC) pgmR3PhysChangeMemBalloonRendezvous(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + uintptr_t *paUser = (uintptr_t *)pvUser; + bool fInflate = !!paUser[0]; + unsigned cPages = paUser[1]; + RTGCPHYS *paPhysPage = (RTGCPHYS *)paUser[2]; + uint32_t cPendingPages = 0; + PGMMFREEPAGESREQ pReq; + int rc; + + Log(("pgmR3PhysChangeMemBalloonRendezvous: %s %x pages\n", (fInflate) ? "inflate" : "deflate", cPages)); + pgmLock(pVM); + + if (fInflate) + { + /* Flush the PGM pool cache as we might have stale references to pages that we just freed. */ + pgmR3PoolClearAllRendezvous(pVM, pVCpu, NULL); + + /* Replace pages with ZERO pages. */ + rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE); + if (RT_FAILURE(rc)) + { + pgmUnlock(pVM); + AssertLogRelRC(rc); + return rc; + } + + /* Iterate the pages. */ + for (unsigned i = 0; i < cPages; i++) + { + PPGMPAGE pPage = pgmPhysGetPage(pVM, paPhysPage[i]); + if ( pPage == NULL + || PGM_PAGE_GET_TYPE(pPage) != PGMPAGETYPE_RAM) + { + Log(("pgmR3PhysChangeMemBalloonRendezvous: invalid physical page %RGp pPage->u3Type=%d\n", paPhysPage[i], pPage ? PGM_PAGE_GET_TYPE(pPage) : 0)); + break; + } + + LogFlow(("balloon page: %RGp\n", paPhysPage[i])); + + /* Flush the shadow PT if this page was previously used as a guest page table. */ + pgmPoolFlushPageByGCPhys(pVM, paPhysPage[i]); + + rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPage, paPhysPage[i], (PGMPAGETYPE)PGM_PAGE_GET_TYPE(pPage)); + if (RT_FAILURE(rc)) + { + pgmUnlock(pVM); + AssertLogRelRC(rc); + return rc; + } + Assert(PGM_PAGE_IS_ZERO(pPage)); + PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_BALLOONED); + } + + if (cPendingPages) + { + rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages); + if (RT_FAILURE(rc)) + { + pgmUnlock(pVM); + AssertLogRelRC(rc); + return rc; + } + } + GMMR3FreePagesCleanup(pReq); + } + else + { + /* Iterate the pages. */ + for (unsigned i = 0; i < cPages; i++) + { + PPGMPAGE pPage = pgmPhysGetPage(pVM, paPhysPage[i]); + AssertBreak(pPage && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM); + + LogFlow(("Free ballooned page: %RGp\n", paPhysPage[i])); + + Assert(PGM_PAGE_IS_BALLOONED(pPage)); + + /* Change back to zero page. (NEM does not need to be informed.) */ + PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ZERO); + } + + /* Note that we currently do not map any ballooned pages in our shadow page tables, so no need to flush the pgm pool. */ + } + + /* Notify GMM about the balloon change. */ + rc = GMMR3BalloonedPages(pVM, (fInflate) ? GMMBALLOONACTION_INFLATE : GMMBALLOONACTION_DEFLATE, cPages); + if (RT_SUCCESS(rc)) + { + if (!fInflate) + { + Assert(pVM->pgm.s.cBalloonedPages >= cPages); + pVM->pgm.s.cBalloonedPages -= cPages; + } + else + pVM->pgm.s.cBalloonedPages += cPages; + } + + pgmUnlock(pVM); + + /* Flush the recompiler's TLB as well. */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + CPUMSetChangedFlags(&pVM->aCpus[i], CPUM_CHANGED_GLOBAL_TLB_FLUSH); + + AssertLogRelRC(rc); + return rc; +} + + +/** + * Frees a range of ram pages, replacing them with ZERO pages; helper for PGMR3PhysFreeRamPages + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param fInflate Inflate or deflate memory balloon + * @param cPages Number of pages to free + * @param paPhysPage Array of guest physical addresses + */ +static DECLCALLBACK(void) pgmR3PhysChangeMemBalloonHelper(PVM pVM, bool fInflate, unsigned cPages, RTGCPHYS *paPhysPage) +{ + uintptr_t paUser[3]; + + paUser[0] = fInflate; + paUser[1] = cPages; + paUser[2] = (uintptr_t)paPhysPage; + int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PhysChangeMemBalloonRendezvous, (void *)paUser); + AssertRC(rc); + + /* Made a copy in PGMR3PhysFreeRamPages; free it here. */ + RTMemFree(paPhysPage); +} + +#endif /* 64-bit host && (Windows || Solaris || Linux || FreeBSD) */ + +/** + * Inflate or deflate a memory balloon + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param fInflate Inflate or deflate memory balloon + * @param cPages Number of pages to free + * @param paPhysPage Array of guest physical addresses + */ +VMMR3DECL(int) PGMR3PhysChangeMemBalloon(PVM pVM, bool fInflate, unsigned cPages, RTGCPHYS *paPhysPage) +{ + /* This must match GMMR0Init; currently we only support memory ballooning on all 64-bit hosts except Mac OS X */ +#if HC_ARCH_BITS == 64 && (defined(RT_OS_WINDOWS) || defined(RT_OS_SOLARIS) || defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD)) + int rc; + + /* Older additions (ancient non-functioning balloon code) pass wrong physical addresses. */ + AssertReturn(!(paPhysPage[0] & 0xfff), VERR_INVALID_PARAMETER); + + /* We own the IOM lock here and could cause a deadlock by waiting for another VCPU that is blocking on the IOM lock. + * In the SMP case we post a request packet to postpone the job. + */ + if (pVM->cCpus > 1) + { + unsigned cbPhysPage = cPages * sizeof(paPhysPage[0]); + RTGCPHYS *paPhysPageCopy = (RTGCPHYS *)RTMemAlloc(cbPhysPage); + AssertReturn(paPhysPageCopy, VERR_NO_MEMORY); + + memcpy(paPhysPageCopy, paPhysPage, cbPhysPage); + + rc = VMR3ReqCallNoWait(pVM, VMCPUID_ANY_QUEUE, (PFNRT)pgmR3PhysChangeMemBalloonHelper, 4, pVM, fInflate, cPages, paPhysPageCopy); + AssertRC(rc); + } + else + { + uintptr_t paUser[3]; + + paUser[0] = fInflate; + paUser[1] = cPages; + paUser[2] = (uintptr_t)paPhysPage; + rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PhysChangeMemBalloonRendezvous, (void *)paUser); + AssertRC(rc); + } + return rc; + +#else + NOREF(pVM); NOREF(fInflate); NOREF(cPages); NOREF(paPhysPage); + return VERR_NOT_IMPLEMENTED; +#endif +} + + +/** + * Rendezvous callback used by PGMR3WriteProtectRAM that write protects all + * physical RAM. + * + * This is only called on one of the EMTs while the other ones are waiting for + * it to complete this function. + * + * @returns VINF_SUCCESS (VBox strict status code). + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. Unused. + * @param pvUser User parameter, unused. + */ +static DECLCALLBACK(VBOXSTRICTRC) pgmR3PhysWriteProtectRAMRendezvous(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + int rc = VINF_SUCCESS; + NOREF(pvUser); NOREF(pVCpu); + + pgmLock(pVM); +#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT + pgmPoolResetDirtyPages(pVM); +#endif + + /** @todo pointless to write protect the physical page pointed to by RSP. */ + + for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX); + pRam; + pRam = pRam->CTX_SUFF(pNext)) + { + uint32_t cPages = pRam->cb >> PAGE_SHIFT; + for (uint32_t iPage = 0; iPage < cPages; iPage++) + { + PPGMPAGE pPage = &pRam->aPages[iPage]; + PGMPAGETYPE enmPageType = (PGMPAGETYPE)PGM_PAGE_GET_TYPE(pPage); + + if ( RT_LIKELY(enmPageType == PGMPAGETYPE_RAM) + || enmPageType == PGMPAGETYPE_MMIO2) + { + /* + * A RAM page. + */ + switch (PGM_PAGE_GET_STATE(pPage)) + { + case PGM_PAGE_STATE_ALLOCATED: + /** @todo Optimize this: Don't always re-enable write + * monitoring if the page is known to be very busy. */ + if (PGM_PAGE_IS_WRITTEN_TO(pPage)) + { + PGM_PAGE_CLEAR_WRITTEN_TO(pVM, pPage); + /* Remember this dirty page for the next (memory) sync. */ + PGM_PAGE_SET_FT_DIRTY(pPage); + } + + pgmPhysPageWriteMonitor(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT)); + break; + + case PGM_PAGE_STATE_SHARED: + AssertFailed(); + break; + + case PGM_PAGE_STATE_WRITE_MONITORED: /* nothing to change. */ + default: + break; + } + } + } + } + pgmR3PoolWriteProtectPages(pVM); + PGM_INVL_ALL_VCPU_TLBS(pVM); + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + CPUMSetChangedFlags(&pVM->aCpus[idCpu], CPUM_CHANGED_GLOBAL_TLB_FLUSH); + + pgmUnlock(pVM); + return rc; +} + +/** + * Protect all physical RAM to monitor writes + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(int) PGMR3PhysWriteProtectRAM(PVM pVM) +{ + VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT); + + int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PhysWriteProtectRAMRendezvous, NULL); + AssertRC(rc); + return rc; +} + +/** + * Enumerate all dirty FT pages. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pfnEnum Enumerate callback handler. + * @param pvUser Enumerate callback handler parameter. + */ +VMMR3DECL(int) PGMR3PhysEnumDirtyFTPages(PVM pVM, PFNPGMENUMDIRTYFTPAGES pfnEnum, void *pvUser) +{ + int rc = VINF_SUCCESS; + + pgmLock(pVM); + for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX); + pRam; + pRam = pRam->CTX_SUFF(pNext)) + { + uint32_t cPages = pRam->cb >> PAGE_SHIFT; + for (uint32_t iPage = 0; iPage < cPages; iPage++) + { + PPGMPAGE pPage = &pRam->aPages[iPage]; + PGMPAGETYPE enmPageType = (PGMPAGETYPE)PGM_PAGE_GET_TYPE(pPage); + + if ( RT_LIKELY(enmPageType == PGMPAGETYPE_RAM) + || enmPageType == PGMPAGETYPE_MMIO2) + { + /* + * A RAM page. + */ + switch (PGM_PAGE_GET_STATE(pPage)) + { + case PGM_PAGE_STATE_ALLOCATED: + case PGM_PAGE_STATE_WRITE_MONITORED: + if ( !PGM_PAGE_IS_WRITTEN_TO(pPage) /* not very recently updated? */ + && PGM_PAGE_IS_FT_DIRTY(pPage)) + { + uint32_t cbPageRange = PAGE_SIZE; + uint32_t iPageClean = iPage + 1; + RTGCPHYS GCPhysPage = pRam->GCPhys + iPage * PAGE_SIZE; + uint8_t *pu8Page = NULL; + PGMPAGEMAPLOCK Lock; + + /* Find the next clean page, so we can merge adjacent dirty pages. */ + for (; iPageClean < cPages; iPageClean++) + { + PPGMPAGE pPageNext = &pRam->aPages[iPageClean]; + if ( RT_UNLIKELY(PGM_PAGE_GET_TYPE(pPageNext) != PGMPAGETYPE_RAM) + || PGM_PAGE_GET_STATE(pPageNext) != PGM_PAGE_STATE_ALLOCATED + || PGM_PAGE_IS_WRITTEN_TO(pPageNext) + || !PGM_PAGE_IS_FT_DIRTY(pPageNext) + /* Crossing a chunk boundary? */ + || (GCPhysPage & GMM_PAGEID_IDX_MASK) != ((GCPhysPage + cbPageRange) & GMM_PAGEID_IDX_MASK) + ) + break; + + cbPageRange += PAGE_SIZE; + } + + rc = PGMPhysGCPhys2CCPtrReadOnly(pVM, GCPhysPage, (const void **)&pu8Page, &Lock); + if (RT_SUCCESS(rc)) + { + /** @todo this is risky; the range might be changed, but little choice as the sync + * costs a lot of time. */ + pgmUnlock(pVM); + pfnEnum(pVM, GCPhysPage, pu8Page, cbPageRange, pvUser); + pgmLock(pVM); + PGMPhysReleasePageMappingLock(pVM, &Lock); + } + + for (uint32_t iTmp = iPage; iTmp < iPageClean; iTmp++) + PGM_PAGE_CLEAR_FT_DIRTY(&pRam->aPages[iTmp]); + } + break; + } + } + } + } + pgmUnlock(pVM); + return rc; +} + + +/** + * Gets the number of ram ranges. + * + * @returns Number of ram ranges. Returns UINT32_MAX if @a pVM is invalid. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(uint32_t) PGMR3PhysGetRamRangeCount(PVM pVM) +{ + VM_ASSERT_VALID_EXT_RETURN(pVM, UINT32_MAX); + + pgmLock(pVM); + uint32_t cRamRanges = 0; + for (PPGMRAMRANGE pCur = pVM->pgm.s.CTX_SUFF(pRamRangesX); pCur; pCur = pCur->CTX_SUFF(pNext)) + cRamRanges++; + pgmUnlock(pVM); + return cRamRanges; +} + + +/** + * Get information about a range. + * + * @returns VINF_SUCCESS or VERR_OUT_OF_RANGE. + * @param pVM The cross context VM structure. + * @param iRange The ordinal of the range. + * @param pGCPhysStart Where to return the start of the range. Optional. + * @param pGCPhysLast Where to return the address of the last byte in the + * range. Optional. + * @param ppszDesc Where to return the range description. Optional. + * @param pfIsMmio Where to indicate that this is a pure MMIO range. + * Optional. + */ +VMMR3DECL(int) PGMR3PhysGetRange(PVM pVM, uint32_t iRange, PRTGCPHYS pGCPhysStart, PRTGCPHYS pGCPhysLast, + const char **ppszDesc, bool *pfIsMmio) +{ + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + pgmLock(pVM); + uint32_t iCurRange = 0; + for (PPGMRAMRANGE pCur = pVM->pgm.s.CTX_SUFF(pRamRangesX); pCur; pCur = pCur->CTX_SUFF(pNext), iCurRange++) + if (iCurRange == iRange) + { + if (pGCPhysStart) + *pGCPhysStart = pCur->GCPhys; + if (pGCPhysLast) + *pGCPhysLast = pCur->GCPhysLast; + if (ppszDesc) + *ppszDesc = pCur->pszDesc; + if (pfIsMmio) + *pfIsMmio = !!(pCur->fFlags & PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO); + + pgmUnlock(pVM); + return VINF_SUCCESS; + } + pgmUnlock(pVM); + return VERR_OUT_OF_RANGE; +} + + +/** + * Query the amount of free memory inside VMMR0 + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pcbAllocMem Where to return the amount of memory allocated + * by VMs. + * @param pcbFreeMem Where to return the amount of memory that is + * allocated from the host but not currently used + * by any VMs. + * @param pcbBallonedMem Where to return the sum of memory that is + * currently ballooned by the VMs. + * @param pcbSharedMem Where to return the amount of memory that is + * currently shared. + */ +VMMR3DECL(int) PGMR3QueryGlobalMemoryStats(PUVM pUVM, uint64_t *pcbAllocMem, uint64_t *pcbFreeMem, + uint64_t *pcbBallonedMem, uint64_t *pcbSharedMem) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, VERR_INVALID_VM_HANDLE); + + uint64_t cAllocPages = 0; + uint64_t cFreePages = 0; + uint64_t cBalloonPages = 0; + uint64_t cSharedPages = 0; + int rc = GMMR3QueryHypervisorMemoryStats(pUVM->pVM, &cAllocPages, &cFreePages, &cBalloonPages, &cSharedPages); + AssertRCReturn(rc, rc); + + if (pcbAllocMem) + *pcbAllocMem = cAllocPages * _4K; + + if (pcbFreeMem) + *pcbFreeMem = cFreePages * _4K; + + if (pcbBallonedMem) + *pcbBallonedMem = cBalloonPages * _4K; + + if (pcbSharedMem) + *pcbSharedMem = cSharedPages * _4K; + + Log(("PGMR3QueryVMMMemoryStats: all=%llx free=%llx ballooned=%llx shared=%llx\n", + cAllocPages, cFreePages, cBalloonPages, cSharedPages)); + return VINF_SUCCESS; +} + + +/** + * Query memory stats for the VM. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pcbTotalMem Where to return total amount memory the VM may + * possibly use. + * @param pcbPrivateMem Where to return the amount of private memory + * currently allocated. + * @param pcbSharedMem Where to return the amount of actually shared + * memory currently used by the VM. + * @param pcbZeroMem Where to return the amount of memory backed by + * zero pages. + * + * @remarks The total mem is normally larger than the sum of the three + * components. There are two reasons for this, first the amount of + * shared memory is what we're sure is shared instead of what could + * possibly be shared with someone. Secondly, because the total may + * include some pure MMIO pages that doesn't go into any of the three + * sub-counts. + * + * @todo Why do we return reused shared pages instead of anything that could + * potentially be shared? Doesn't this mean the first VM gets a much + * lower number of shared pages? + */ +VMMR3DECL(int) PGMR3QueryMemoryStats(PUVM pUVM, uint64_t *pcbTotalMem, uint64_t *pcbPrivateMem, + uint64_t *pcbSharedMem, uint64_t *pcbZeroMem) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + if (pcbTotalMem) + *pcbTotalMem = (uint64_t)pVM->pgm.s.cAllPages * PAGE_SIZE; + + if (pcbPrivateMem) + *pcbPrivateMem = (uint64_t)pVM->pgm.s.cPrivatePages * PAGE_SIZE; + + if (pcbSharedMem) + *pcbSharedMem = (uint64_t)pVM->pgm.s.cReusedSharedPages * PAGE_SIZE; + + if (pcbZeroMem) + *pcbZeroMem = (uint64_t)pVM->pgm.s.cZeroPages * PAGE_SIZE; + + Log(("PGMR3QueryMemoryStats: all=%x private=%x reused=%x zero=%x\n", pVM->pgm.s.cAllPages, pVM->pgm.s.cPrivatePages, pVM->pgm.s.cReusedSharedPages, pVM->pgm.s.cZeroPages)); + return VINF_SUCCESS; +} + + +/** + * PGMR3PhysRegisterRam worker that initializes and links a RAM range. + * + * @param pVM The cross context VM structure. + * @param pNew The new RAM range. + * @param GCPhys The address of the RAM range. + * @param GCPhysLast The last address of the RAM range. + * @param RCPtrNew The RC address if the range is floating. NIL_RTRCPTR + * if in HMA. + * @param R0PtrNew Ditto for R0. + * @param pszDesc The description. + * @param pPrev The previous RAM range (for linking). + */ +static void pgmR3PhysInitAndLinkRamRange(PVM pVM, PPGMRAMRANGE pNew, RTGCPHYS GCPhys, RTGCPHYS GCPhysLast, + RTRCPTR RCPtrNew, RTR0PTR R0PtrNew, const char *pszDesc, PPGMRAMRANGE pPrev) +{ + /* + * Initialize the range. + */ + pNew->pSelfR0 = R0PtrNew != NIL_RTR0PTR ? R0PtrNew : MMHyperCCToR0(pVM, pNew); + pNew->pSelfRC = RCPtrNew != NIL_RTRCPTR ? RCPtrNew : MMHyperCCToRC(pVM, pNew); + pNew->GCPhys = GCPhys; + pNew->GCPhysLast = GCPhysLast; + pNew->cb = GCPhysLast - GCPhys + 1; + pNew->pszDesc = pszDesc; + pNew->fFlags = RCPtrNew != NIL_RTRCPTR ? PGM_RAM_RANGE_FLAGS_FLOATING : 0; + pNew->pvR3 = NULL; + pNew->paLSPages = NULL; + + uint32_t const cPages = pNew->cb >> PAGE_SHIFT; + RTGCPHYS iPage = cPages; + while (iPage-- > 0) + PGM_PAGE_INIT_ZERO(&pNew->aPages[iPage], pVM, PGMPAGETYPE_RAM); + + /* Update the page count stats. */ + pVM->pgm.s.cZeroPages += cPages; + pVM->pgm.s.cAllPages += cPages; + + /* + * Link it. + */ + pgmR3PhysLinkRamRange(pVM, pNew, pPrev); +} + + +/** + * @callback_method_impl{FNPGMRELOCATE, Relocate a floating RAM range.} + * @sa pgmR3PhysMMIO2ExRangeRelocate + */ +static DECLCALLBACK(bool) pgmR3PhysRamRangeRelocate(PVM pVM, RTGCPTR GCPtrOld, RTGCPTR GCPtrNew, + PGMRELOCATECALL enmMode, void *pvUser) +{ + PPGMRAMRANGE pRam = (PPGMRAMRANGE)pvUser; + Assert(pRam->fFlags & PGM_RAM_RANGE_FLAGS_FLOATING); + Assert(pRam->pSelfRC == GCPtrOld + PAGE_SIZE); RT_NOREF_PV(GCPtrOld); + + switch (enmMode) + { + case PGMRELOCATECALL_SUGGEST: + return true; + + case PGMRELOCATECALL_RELOCATE: + { + /* + * Update myself, then relink all the ranges and flush the RC TLB. + */ + pgmLock(pVM); + + pRam->pSelfRC = (RTRCPTR)(GCPtrNew + PAGE_SIZE); + + pgmR3PhysRelinkRamRanges(pVM); + for (unsigned i = 0; i < PGM_RAMRANGE_TLB_ENTRIES; i++) + pVM->pgm.s.apRamRangesTlbRC[i] = NIL_RTRCPTR; + + pgmUnlock(pVM); + return true; + } + + default: + AssertFailedReturn(false); + } +} + + +/** + * PGMR3PhysRegisterRam worker that registers a high chunk. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param GCPhys The address of the RAM. + * @param cRamPages The number of RAM pages to register. + * @param cbChunk The size of the PGMRAMRANGE guest mapping. + * @param iChunk The chunk number. + * @param pszDesc The RAM range description. + * @param ppPrev Previous RAM range pointer. In/Out. + */ +static int pgmR3PhysRegisterHighRamChunk(PVM pVM, RTGCPHYS GCPhys, uint32_t cRamPages, + uint32_t cbChunk, uint32_t iChunk, const char *pszDesc, + PPGMRAMRANGE *ppPrev) +{ + const char *pszDescChunk = iChunk == 0 + ? pszDesc + : MMR3HeapAPrintf(pVM, MM_TAG_PGM_PHYS, "%s (#%u)", pszDesc, iChunk + 1); + AssertReturn(pszDescChunk, VERR_NO_MEMORY); + + /* + * Allocate memory for the new chunk. + */ + size_t const cChunkPages = RT_ALIGN_Z(RT_UOFFSETOF_DYN(PGMRAMRANGE, aPages[cRamPages]), PAGE_SIZE) >> PAGE_SHIFT; + PSUPPAGE paChunkPages = (PSUPPAGE)RTMemTmpAllocZ(sizeof(SUPPAGE) * cChunkPages); + AssertReturn(paChunkPages, VERR_NO_TMP_MEMORY); + RTR0PTR R0PtrChunk = NIL_RTR0PTR; + void *pvChunk = NULL; + int rc = SUPR3PageAllocEx(cChunkPages, 0 /*fFlags*/, &pvChunk, +#if defined(VBOX_WITH_MORE_RING0_MEM_MAPPINGS) + &R0PtrChunk, +#elif defined(VBOX_WITH_2X_4GB_ADDR_SPACE) + VM_IS_HM_OR_NEM_ENABLED(pVM) ? &R0PtrChunk : NULL, +#else + NULL, +#endif + paChunkPages); + if (RT_SUCCESS(rc)) + { +#if defined(VBOX_WITH_MORE_RING0_MEM_MAPPINGS) + Assert(R0PtrChunk != NIL_RTR0PTR); +#elif defined(VBOX_WITH_2X_4GB_ADDR_SPACE) + if (!VM_IS_HM_OR_NEM_ENABLED(pVM)) + R0PtrChunk = NIL_RTR0PTR; +#else + R0PtrChunk = (uintptr_t)pvChunk; +#endif + memset(pvChunk, 0, cChunkPages << PAGE_SHIFT); + + PPGMRAMRANGE pNew = (PPGMRAMRANGE)pvChunk; + + /* + * Create a mapping and map the pages into it. + * We push these in below the HMA. + */ + RTGCPTR GCPtrChunkMap = pVM->pgm.s.GCPtrPrevRamRangeMapping - cbChunk; + rc = PGMR3MapPT(pVM, GCPtrChunkMap, cbChunk, 0 /*fFlags*/, pgmR3PhysRamRangeRelocate, pNew, pszDescChunk); + if (RT_SUCCESS(rc)) + { + pVM->pgm.s.GCPtrPrevRamRangeMapping = GCPtrChunkMap; + + RTGCPTR const GCPtrChunk = GCPtrChunkMap + PAGE_SIZE; + RTGCPTR GCPtrPage = GCPtrChunk; + for (uint32_t iPage = 0; iPage < cChunkPages && RT_SUCCESS(rc); iPage++, GCPtrPage += PAGE_SIZE) + rc = PGMMap(pVM, GCPtrPage, paChunkPages[iPage].Phys, PAGE_SIZE, 0); + if (RT_SUCCESS(rc)) + { + /* + * Ok, init and link the range. + */ + pgmR3PhysInitAndLinkRamRange(pVM, pNew, GCPhys, GCPhys + ((RTGCPHYS)cRamPages << PAGE_SHIFT) - 1, + (RTRCPTR)GCPtrChunk, R0PtrChunk, pszDescChunk, *ppPrev); + *ppPrev = pNew; + } + } + + if (RT_FAILURE(rc)) + SUPR3PageFreeEx(pvChunk, cChunkPages); + } + + RTMemTmpFree(paChunkPages); + return rc; +} + + +/** + * Sets up a range RAM. + * + * This will check for conflicting registrations, make a resource + * reservation for the memory (with GMM), and setup the per-page + * tracking structures (PGMPAGE). + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param GCPhys The physical address of the RAM. + * @param cb The size of the RAM. + * @param pszDesc The description - not copied, so, don't free or change it. + */ +VMMR3DECL(int) PGMR3PhysRegisterRam(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, const char *pszDesc) +{ + /* + * Validate input. + */ + Log(("PGMR3PhysRegisterRam: GCPhys=%RGp cb=%RGp pszDesc=%s\n", GCPhys, cb, pszDesc)); + AssertReturn(RT_ALIGN_T(GCPhys, PAGE_SIZE, RTGCPHYS) == GCPhys, VERR_INVALID_PARAMETER); + AssertReturn(RT_ALIGN_T(cb, PAGE_SIZE, RTGCPHYS) == cb, VERR_INVALID_PARAMETER); + AssertReturn(cb > 0, VERR_INVALID_PARAMETER); + RTGCPHYS GCPhysLast = GCPhys + (cb - 1); + AssertMsgReturn(GCPhysLast > GCPhys, ("The range wraps! GCPhys=%RGp cb=%RGp\n", GCPhys, cb), VERR_INVALID_PARAMETER); + AssertPtrReturn(pszDesc, VERR_INVALID_POINTER); + VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT); + + pgmLock(pVM); + + /* + * Find range location and check for conflicts. + * (We don't lock here because the locking by EMT is only required on update.) + */ + PPGMRAMRANGE pPrev = NULL; + PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3; + while (pRam && GCPhysLast >= pRam->GCPhys) + { + if ( GCPhysLast >= pRam->GCPhys + && GCPhys <= pRam->GCPhysLast) + AssertLogRelMsgFailedReturn(("%RGp-%RGp (%s) conflicts with existing %RGp-%RGp (%s)\n", + GCPhys, GCPhysLast, pszDesc, + pRam->GCPhys, pRam->GCPhysLast, pRam->pszDesc), + VERR_PGM_RAM_CONFLICT); + + /* next */ + pPrev = pRam; + pRam = pRam->pNextR3; + } + + /* + * Register it with GMM (the API bitches). + */ + const RTGCPHYS cPages = cb >> PAGE_SHIFT; + int rc = MMR3IncreaseBaseReservation(pVM, cPages); + if (RT_FAILURE(rc)) + { + pgmUnlock(pVM); + return rc; + } + + if ( GCPhys >= _4G + && cPages > 256) + { + /* + * The PGMRAMRANGE structures for the high memory can get very big. + * In order to avoid SUPR3PageAllocEx allocation failures due to the + * allocation size limit there and also to avoid being unable to find + * guest mapping space for them, we split this memory up into 4MB in + * (potential) raw-mode configs and 16MB chunks in forced AMD-V/VT-x + * mode. + * + * The first and last page of each mapping are guard pages and marked + * not-present. So, we've got 4186112 and 16769024 bytes available for + * the PGMRAMRANGE structure. + * + * Note! The sizes used here will influence the saved state. + */ + uint32_t cbChunk; + uint32_t cPagesPerChunk; + if (!VM_IS_RAW_MODE_ENABLED(pVM)) + { + cbChunk = 16U*_1M; + cPagesPerChunk = 1048048; /* max ~1048059 */ + AssertCompile(sizeof(PGMRAMRANGE) + sizeof(PGMPAGE) * 1048048 < 16U*_1M - PAGE_SIZE * 2); + } + else + { + cbChunk = 4U*_1M; + cPagesPerChunk = 261616; /* max ~261627 */ + AssertCompile(sizeof(PGMRAMRANGE) + sizeof(PGMPAGE) * 261616 < 4U*_1M - PAGE_SIZE * 2); + } + AssertRelease(RT_UOFFSETOF_DYN(PGMRAMRANGE, aPages[cPagesPerChunk]) + PAGE_SIZE * 2 <= cbChunk); + + RTGCPHYS cPagesLeft = cPages; + RTGCPHYS GCPhysChunk = GCPhys; + uint32_t iChunk = 0; + while (cPagesLeft > 0) + { + uint32_t cPagesInChunk = cPagesLeft; + if (cPagesInChunk > cPagesPerChunk) + cPagesInChunk = cPagesPerChunk; + + rc = pgmR3PhysRegisterHighRamChunk(pVM, GCPhysChunk, cPagesInChunk, cbChunk, iChunk, pszDesc, &pPrev); + AssertRCReturn(rc, rc); + + /* advance */ + GCPhysChunk += (RTGCPHYS)cPagesInChunk << PAGE_SHIFT; + cPagesLeft -= cPagesInChunk; + iChunk++; + } + } + else + { + /* + * Allocate, initialize and link the new RAM range. + */ + const size_t cbRamRange = RT_UOFFSETOF_DYN(PGMRAMRANGE, aPages[cPages]); + PPGMRAMRANGE pNew; + rc = MMR3HyperAllocOnceNoRel(pVM, cbRamRange, 0, MM_TAG_PGM_PHYS, (void **)&pNew); + AssertLogRelMsgRCReturn(rc, ("cbRamRange=%zu\n", cbRamRange), rc); + + pgmR3PhysInitAndLinkRamRange(pVM, pNew, GCPhys, GCPhysLast, NIL_RTRCPTR, NIL_RTR0PTR, pszDesc, pPrev); + } + pgmPhysInvalidatePageMapTLB(pVM); + + /* + * Notify NEM while holding the lock (experimental) and REM without (like always). + */ + rc = NEMR3NotifyPhysRamRegister(pVM, GCPhys, cb); + pgmUnlock(pVM); +#ifdef VBOX_WITH_REM + REMR3NotifyPhysRamRegister(pVM, GCPhys, cb, REM_NOTIFY_PHYS_RAM_FLAGS_RAM); +#endif + return rc; +} + + +/** + * Worker called by PGMR3InitFinalize if we're configured to pre-allocate RAM. + * + * We do this late in the init process so that all the ROM and MMIO ranges have + * been registered already and we don't go wasting memory on them. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + */ +int pgmR3PhysRamPreAllocate(PVM pVM) +{ + Assert(pVM->pgm.s.fRamPreAlloc); + Log(("pgmR3PhysRamPreAllocate: enter\n")); + + /* + * Walk the RAM ranges and allocate all RAM pages, halt at + * the first allocation error. + */ + uint64_t cPages = 0; + uint64_t NanoTS = RTTimeNanoTS(); + pgmLock(pVM); + for (PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3; pRam; pRam = pRam->pNextR3) + { + PPGMPAGE pPage = &pRam->aPages[0]; + RTGCPHYS GCPhys = pRam->GCPhys; + uint32_t cLeft = pRam->cb >> PAGE_SHIFT; + while (cLeft-- > 0) + { + if (PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM) + { + switch (PGM_PAGE_GET_STATE(pPage)) + { + case PGM_PAGE_STATE_ZERO: + { + int rc = pgmPhysAllocPage(pVM, pPage, GCPhys); + if (RT_FAILURE(rc)) + { + LogRel(("PGM: RAM Pre-allocation failed at %RGp (in %s) with rc=%Rrc\n", GCPhys, pRam->pszDesc, rc)); + pgmUnlock(pVM); + return rc; + } + cPages++; + break; + } + + case PGM_PAGE_STATE_BALLOONED: + case PGM_PAGE_STATE_ALLOCATED: + case PGM_PAGE_STATE_WRITE_MONITORED: + case PGM_PAGE_STATE_SHARED: + /* nothing to do here. */ + break; + } + } + + /* next */ + pPage++; + GCPhys += PAGE_SIZE; + } + } + pgmUnlock(pVM); + NanoTS = RTTimeNanoTS() - NanoTS; + + LogRel(("PGM: Pre-allocated %llu pages in %llu ms\n", cPages, NanoTS / 1000000)); + Log(("pgmR3PhysRamPreAllocate: returns VINF_SUCCESS\n")); + return VINF_SUCCESS; +} + + +/** + * Checks shared page checksums. + * + * @param pVM The cross context VM structure. + */ +void pgmR3PhysAssertSharedPageChecksums(PVM pVM) +{ +#ifdef VBOX_STRICT + pgmLock(pVM); + + if (pVM->pgm.s.cSharedPages > 0) + { + /* + * Walk the ram ranges. + */ + for (PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3; pRam; pRam = pRam->pNextR3) + { + uint32_t iPage = pRam->cb >> PAGE_SHIFT; + AssertMsg(((RTGCPHYS)iPage << PAGE_SHIFT) == pRam->cb, ("%RGp %RGp\n", (RTGCPHYS)iPage << PAGE_SHIFT, pRam->cb)); + + while (iPage-- > 0) + { + PPGMPAGE pPage = &pRam->aPages[iPage]; + if (PGM_PAGE_IS_SHARED(pPage)) + { + uint32_t u32Checksum = pPage->s.u2Unused0/* | ((uint32_t)pPage->s.u2Unused1 << 8)*/; + if (!u32Checksum) + { + RTGCPHYS GCPhysPage = pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT); + void const *pvPage; + int rc = pgmPhysPageMapReadOnly(pVM, pPage, GCPhysPage, &pvPage); + if (RT_SUCCESS(rc)) + { + uint32_t u32Checksum2 = RTCrc32(pvPage, PAGE_SIZE); +# if 0 + AssertMsg((u32Checksum2 & /*UINT32_C(0x00000303)*/ 0x3) == u32Checksum, ("GCPhysPage=%RGp\n", GCPhysPage)); +# else + if ((u32Checksum2 & /*UINT32_C(0x00000303)*/ 0x3) == u32Checksum) + LogFlow(("shpg %#x @ %RGp %#x [OK]\n", PGM_PAGE_GET_PAGEID(pPage), GCPhysPage, u32Checksum2)); + else + AssertMsgFailed(("shpg %#x @ %RGp %#x\n", PGM_PAGE_GET_PAGEID(pPage), GCPhysPage, u32Checksum2)); +# endif + } + else + AssertRC(rc); + } + } + + } /* for each page */ + + } /* for each ram range */ + } + + pgmUnlock(pVM); +#endif /* VBOX_STRICT */ + NOREF(pVM); +} + + +/** + * Resets the physical memory state. + * + * ASSUMES that the caller owns the PGM lock. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +int pgmR3PhysRamReset(PVM pVM) +{ + PGM_LOCK_ASSERT_OWNER(pVM); + + /* Reset the memory balloon. */ + int rc = GMMR3BalloonedPages(pVM, GMMBALLOONACTION_RESET, 0); + AssertRC(rc); + +#ifdef VBOX_WITH_PAGE_SHARING + /* Clear all registered shared modules. */ + pgmR3PhysAssertSharedPageChecksums(pVM); + rc = GMMR3ResetSharedModules(pVM); + AssertRC(rc); +#endif + /* Reset counters. */ + pVM->pgm.s.cReusedSharedPages = 0; + pVM->pgm.s.cBalloonedPages = 0; + + return VINF_SUCCESS; +} + + +/** + * Resets (zeros) the RAM after all devices and components have been reset. + * + * ASSUMES that the caller owns the PGM lock. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +int pgmR3PhysRamZeroAll(PVM pVM) +{ + PGM_LOCK_ASSERT_OWNER(pVM); + + /* + * We batch up pages that should be freed instead of calling GMM for + * each and every one of them. + */ + uint32_t cPendingPages = 0; + PGMMFREEPAGESREQ pReq; + int rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE); + AssertLogRelRCReturn(rc, rc); + + /* + * Walk the ram ranges. + */ + for (PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3; pRam; pRam = pRam->pNextR3) + { + uint32_t iPage = pRam->cb >> PAGE_SHIFT; + AssertMsg(((RTGCPHYS)iPage << PAGE_SHIFT) == pRam->cb, ("%RGp %RGp\n", (RTGCPHYS)iPage << PAGE_SHIFT, pRam->cb)); + + if ( !pVM->pgm.s.fRamPreAlloc + && pVM->pgm.s.fZeroRamPagesOnReset) + { + /* Replace all RAM pages by ZERO pages. */ + while (iPage-- > 0) + { + PPGMPAGE pPage = &pRam->aPages[iPage]; + switch (PGM_PAGE_GET_TYPE(pPage)) + { + case PGMPAGETYPE_RAM: + /* Do not replace pages part of a 2 MB continuous range + with zero pages, but zero them instead. */ + if ( PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE + || PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED) + { + void *pvPage; + rc = pgmPhysPageMap(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT), &pvPage); + AssertLogRelRCReturn(rc, rc); + ASMMemZeroPage(pvPage); + } + else if (PGM_PAGE_IS_BALLOONED(pPage)) + { + /* Turn into a zero page; the balloon status is lost when the VM reboots. */ + PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ZERO); + } + else if (!PGM_PAGE_IS_ZERO(pPage)) + { + rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT), + PGMPAGETYPE_RAM); + AssertLogRelRCReturn(rc, rc); + } + break; + + case PGMPAGETYPE_MMIO2_ALIAS_MMIO: + case PGMPAGETYPE_SPECIAL_ALIAS_MMIO: /** @todo perhaps leave the special page alone? I don't think VT-x copes with this code. */ + pgmHandlerPhysicalResetAliasedPage(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT), + true /*fDoAccounting*/); + break; + + case PGMPAGETYPE_MMIO2: + case PGMPAGETYPE_ROM_SHADOW: /* handled by pgmR3PhysRomReset. */ + case PGMPAGETYPE_ROM: + case PGMPAGETYPE_MMIO: + break; + default: + AssertFailed(); + } + } /* for each page */ + } + else + { + /* Zero the memory. */ + while (iPage-- > 0) + { + PPGMPAGE pPage = &pRam->aPages[iPage]; + switch (PGM_PAGE_GET_TYPE(pPage)) + { + case PGMPAGETYPE_RAM: + switch (PGM_PAGE_GET_STATE(pPage)) + { + case PGM_PAGE_STATE_ZERO: + break; + + case PGM_PAGE_STATE_BALLOONED: + /* Turn into a zero page; the balloon status is lost when the VM reboots. */ + PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ZERO); + break; + + case PGM_PAGE_STATE_SHARED: + case PGM_PAGE_STATE_WRITE_MONITORED: + rc = pgmPhysPageMakeWritable(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT)); + AssertLogRelRCReturn(rc, rc); + RT_FALL_THRU(); + + case PGM_PAGE_STATE_ALLOCATED: + if (pVM->pgm.s.fZeroRamPagesOnReset) + { + void *pvPage; + rc = pgmPhysPageMap(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT), &pvPage); + AssertLogRelRCReturn(rc, rc); + ASMMemZeroPage(pvPage); + } + break; + } + break; + + case PGMPAGETYPE_MMIO2_ALIAS_MMIO: + case PGMPAGETYPE_SPECIAL_ALIAS_MMIO: /** @todo perhaps leave the special page alone? I don't think VT-x copes with this code. */ + pgmHandlerPhysicalResetAliasedPage(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT), + true /*fDoAccounting*/); + break; + + case PGMPAGETYPE_MMIO2: + case PGMPAGETYPE_ROM_SHADOW: + case PGMPAGETYPE_ROM: + case PGMPAGETYPE_MMIO: + break; + default: + AssertFailed(); + + } + } /* for each page */ + } + + } + + /* + * Finish off any pages pending freeing. + */ + if (cPendingPages) + { + rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages); + AssertLogRelRCReturn(rc, rc); + } + GMMR3FreePagesCleanup(pReq); + return VINF_SUCCESS; +} + + +/** + * Frees all RAM during VM termination + * + * ASSUMES that the caller owns the PGM lock. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +int pgmR3PhysRamTerm(PVM pVM) +{ + PGM_LOCK_ASSERT_OWNER(pVM); + + /* Reset the memory balloon. */ + int rc = GMMR3BalloonedPages(pVM, GMMBALLOONACTION_RESET, 0); + AssertRC(rc); + +#ifdef VBOX_WITH_PAGE_SHARING + /* + * Clear all registered shared modules. + */ + pgmR3PhysAssertSharedPageChecksums(pVM); + rc = GMMR3ResetSharedModules(pVM); + AssertRC(rc); + + /* + * Flush the handy pages updates to make sure no shared pages are hiding + * in there. (No unlikely if the VM shuts down, apparently.) + */ + rc = VMMR3CallR0(pVM, VMMR0_DO_PGM_FLUSH_HANDY_PAGES, 0, NULL); +#endif + + /* + * We batch up pages that should be freed instead of calling GMM for + * each and every one of them. + */ + uint32_t cPendingPages = 0; + PGMMFREEPAGESREQ pReq; + rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE); + AssertLogRelRCReturn(rc, rc); + + /* + * Walk the ram ranges. + */ + for (PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3; pRam; pRam = pRam->pNextR3) + { + uint32_t iPage = pRam->cb >> PAGE_SHIFT; + AssertMsg(((RTGCPHYS)iPage << PAGE_SHIFT) == pRam->cb, ("%RGp %RGp\n", (RTGCPHYS)iPage << PAGE_SHIFT, pRam->cb)); + + while (iPage-- > 0) + { + PPGMPAGE pPage = &pRam->aPages[iPage]; + switch (PGM_PAGE_GET_TYPE(pPage)) + { + case PGMPAGETYPE_RAM: + /* Free all shared pages. Private pages are automatically freed during GMM VM cleanup. */ + /** @todo change this to explicitly free private pages here. */ + if (PGM_PAGE_IS_SHARED(pPage)) + { + rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT), + PGMPAGETYPE_RAM); + AssertLogRelRCReturn(rc, rc); + } + break; + + case PGMPAGETYPE_MMIO2_ALIAS_MMIO: + case PGMPAGETYPE_SPECIAL_ALIAS_MMIO: + case PGMPAGETYPE_MMIO2: + case PGMPAGETYPE_ROM_SHADOW: /* handled by pgmR3PhysRomReset. */ + case PGMPAGETYPE_ROM: + case PGMPAGETYPE_MMIO: + break; + default: + AssertFailed(); + } + } /* for each page */ + } + + /* + * Finish off any pages pending freeing. + */ + if (cPendingPages) + { + rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages); + AssertLogRelRCReturn(rc, rc); + } + GMMR3FreePagesCleanup(pReq); + return VINF_SUCCESS; +} + + +/** + * This is the interface IOM is using to register an MMIO region. + * + * It will check for conflicts and ensure that a RAM range structure + * is present before calling the PGMR3HandlerPhysicalRegister API to + * register the callbacks. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param GCPhys The start of the MMIO region. + * @param cb The size of the MMIO region. + * @param hType The physical access handler type registration. + * @param pvUserR3 The user argument for R3. + * @param pvUserR0 The user argument for R0. + * @param pvUserRC The user argument for RC. + * @param pszDesc The description of the MMIO region. + */ +VMMR3DECL(int) PGMR3PhysMMIORegister(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, PGMPHYSHANDLERTYPE hType, + RTR3PTR pvUserR3, RTR0PTR pvUserR0, RTRCPTR pvUserRC, const char *pszDesc) +{ + /* + * Assert on some assumption. + */ + VM_ASSERT_EMT(pVM); + AssertReturn(!(cb & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER); + AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER); + AssertPtrReturn(pszDesc, VERR_INVALID_POINTER); + AssertReturn(*pszDesc, VERR_INVALID_PARAMETER); + Assert(((PPGMPHYSHANDLERTYPEINT)MMHyperHeapOffsetToPtr(pVM, hType))->enmKind == PGMPHYSHANDLERKIND_MMIO); + + int rc = pgmLock(pVM); + AssertRCReturn(rc, rc); + + /* + * Make sure there's a RAM range structure for the region. + */ + RTGCPHYS GCPhysLast = GCPhys + (cb - 1); + bool fRamExists = false; + PPGMRAMRANGE pRamPrev = NULL; + PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3; + while (pRam && GCPhysLast >= pRam->GCPhys) + { + if ( GCPhysLast >= pRam->GCPhys + && GCPhys <= pRam->GCPhysLast) + { + /* Simplification: all within the same range. */ + AssertLogRelMsgReturnStmt( GCPhys >= pRam->GCPhys + && GCPhysLast <= pRam->GCPhysLast, + ("%RGp-%RGp (MMIO/%s) falls partly outside %RGp-%RGp (%s)\n", + GCPhys, GCPhysLast, pszDesc, + pRam->GCPhys, pRam->GCPhysLast, pRam->pszDesc), + pgmUnlock(pVM), + VERR_PGM_RAM_CONFLICT); + + /* Check that it's all RAM or MMIO pages. */ + PCPGMPAGE pPage = &pRam->aPages[(GCPhys - pRam->GCPhys) >> PAGE_SHIFT]; + uint32_t cLeft = cb >> PAGE_SHIFT; + while (cLeft-- > 0) + { + AssertLogRelMsgReturnStmt( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM + || PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_MMIO, + ("%RGp-%RGp (MMIO/%s): %RGp is not a RAM or MMIO page - type=%d desc=%s\n", + GCPhys, GCPhysLast, pszDesc, pRam->GCPhys, PGM_PAGE_GET_TYPE(pPage), pRam->pszDesc), + pgmUnlock(pVM), + VERR_PGM_RAM_CONFLICT); + pPage++; + } + + /* Looks good. */ + fRamExists = true; + break; + } + + /* next */ + pRamPrev = pRam; + pRam = pRam->pNextR3; + } + PPGMRAMRANGE pNew; + if (fRamExists) + { + pNew = NULL; + + /* + * Make all the pages in the range MMIO/ZERO pages, freeing any + * RAM pages currently mapped here. This might not be 100% correct + * for PCI memory, but we're doing the same thing for MMIO2 pages. + */ + rc = pgmR3PhysFreePageRange(pVM, pRam, GCPhys, GCPhysLast, PGMPAGETYPE_MMIO); + AssertRCReturnStmt(rc, pgmUnlock(pVM), rc); + + /* Force a PGM pool flush as guest ram references have been changed. */ + /** @todo not entirely SMP safe; assuming for now the guest takes + * care of this internally (not touch mapped mmio while changing the + * mapping). */ + PVMCPU pVCpu = VMMGetCpu(pVM); + pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL; + VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); + } + else + { + + /* + * No RAM range, insert an ad hoc one. + * + * Note that we don't have to tell REM about this range because + * PGMHandlerPhysicalRegisterEx will do that for us. + */ + Log(("PGMR3PhysMMIORegister: Adding ad hoc MMIO range for %RGp-%RGp %s\n", GCPhys, GCPhysLast, pszDesc)); + + const uint32_t cPages = cb >> PAGE_SHIFT; + const size_t cbRamRange = RT_UOFFSETOF_DYN(PGMRAMRANGE, aPages[cPages]); + rc = MMHyperAlloc(pVM, RT_UOFFSETOF_DYN(PGMRAMRANGE, aPages[cPages]), 16, MM_TAG_PGM_PHYS, (void **)&pNew); + AssertLogRelMsgRCReturnStmt(rc, ("cbRamRange=%zu\n", cbRamRange), pgmUnlock(pVM), rc); + + /* Initialize the range. */ + pNew->pSelfR0 = MMHyperCCToR0(pVM, pNew); + pNew->pSelfRC = MMHyperCCToRC(pVM, pNew); + pNew->GCPhys = GCPhys; + pNew->GCPhysLast = GCPhysLast; + pNew->cb = cb; + pNew->pszDesc = pszDesc; + pNew->fFlags = PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO; + pNew->pvR3 = NULL; + pNew->paLSPages = NULL; + + uint32_t iPage = cPages; + while (iPage-- > 0) + PGM_PAGE_INIT_ZERO(&pNew->aPages[iPage], pVM, PGMPAGETYPE_MMIO); + Assert(PGM_PAGE_GET_TYPE(&pNew->aPages[0]) == PGMPAGETYPE_MMIO); + + /* update the page count stats. */ + pVM->pgm.s.cPureMmioPages += cPages; + pVM->pgm.s.cAllPages += cPages; + + /* link it */ + pgmR3PhysLinkRamRange(pVM, pNew, pRamPrev); + } + + /* + * Register the access handler. + */ + rc = PGMHandlerPhysicalRegister(pVM, GCPhys, GCPhysLast, hType, pvUserR3, pvUserR0, pvUserRC, pszDesc); + if ( RT_FAILURE(rc) + && !fRamExists) + { + pVM->pgm.s.cPureMmioPages -= cb >> PAGE_SHIFT; + pVM->pgm.s.cAllPages -= cb >> PAGE_SHIFT; + + /* remove the ad hoc range. */ + pgmR3PhysUnlinkRamRange2(pVM, pNew, pRamPrev); + pNew->cb = pNew->GCPhys = pNew->GCPhysLast = NIL_RTGCPHYS; + MMHyperFree(pVM, pRam); + } + pgmPhysInvalidatePageMapTLB(pVM); + + pgmUnlock(pVM); + return rc; +} + + +/** + * This is the interface IOM is using to register an MMIO region. + * + * It will take care of calling PGMHandlerPhysicalDeregister and clean up + * any ad hoc PGMRAMRANGE left behind. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param GCPhys The start of the MMIO region. + * @param cb The size of the MMIO region. + */ +VMMR3DECL(int) PGMR3PhysMMIODeregister(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb) +{ + VM_ASSERT_EMT(pVM); + + int rc = pgmLock(pVM); + AssertRCReturn(rc, rc); + + /* + * First deregister the handler, then check if we should remove the ram range. + */ + rc = PGMHandlerPhysicalDeregister(pVM, GCPhys); + if (RT_SUCCESS(rc)) + { + RTGCPHYS GCPhysLast = GCPhys + (cb - 1); + PPGMRAMRANGE pRamPrev = NULL; + PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3; + while (pRam && GCPhysLast >= pRam->GCPhys) + { + /** @todo We're being a bit too careful here. rewrite. */ + if ( GCPhysLast == pRam->GCPhysLast + && GCPhys == pRam->GCPhys) + { + Assert(pRam->cb == cb); + + /* + * See if all the pages are dead MMIO pages. + */ + uint32_t const cPages = cb >> PAGE_SHIFT; + bool fAllMMIO = true; + uint32_t iPage = 0; + uint32_t cLeft = cPages; + while (cLeft-- > 0) + { + PPGMPAGE pPage = &pRam->aPages[iPage]; + if ( !PGM_PAGE_IS_MMIO_OR_ALIAS(pPage) + /*|| not-out-of-action later */) + { + fAllMMIO = false; + AssertMsgFailed(("%RGp %R[pgmpage]\n", pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT), pPage)); + break; + } + Assert( PGM_PAGE_IS_ZERO(pPage) + || PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_MMIO2_ALIAS_MMIO + || PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_SPECIAL_ALIAS_MMIO); + pPage++; + } + if (fAllMMIO) + { + /* + * Ad-hoc range, unlink and free it. + */ + Log(("PGMR3PhysMMIODeregister: Freeing ad hoc MMIO range for %RGp-%RGp %s\n", + GCPhys, GCPhysLast, pRam->pszDesc)); + + pVM->pgm.s.cAllPages -= cPages; + pVM->pgm.s.cPureMmioPages -= cPages; + + pgmR3PhysUnlinkRamRange2(pVM, pRam, pRamPrev); + pRam->cb = pRam->GCPhys = pRam->GCPhysLast = NIL_RTGCPHYS; + MMHyperFree(pVM, pRam); + break; + } + } + + /* + * Range match? It will all be within one range (see PGMAllHandler.cpp). + */ + if ( GCPhysLast >= pRam->GCPhys + && GCPhys <= pRam->GCPhysLast) + { + Assert(GCPhys >= pRam->GCPhys); + Assert(GCPhysLast <= pRam->GCPhysLast); + + /* + * Turn the pages back into RAM pages. + */ + uint32_t iPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT; + uint32_t cLeft = cb >> PAGE_SHIFT; + while (cLeft--) + { + PPGMPAGE pPage = &pRam->aPages[iPage]; + AssertMsg( (PGM_PAGE_IS_MMIO(pPage) && PGM_PAGE_IS_ZERO(pPage)) + || PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_MMIO2_ALIAS_MMIO + || PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_SPECIAL_ALIAS_MMIO, + ("%RGp %R[pgmpage]\n", pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT), pPage)); + if (PGM_PAGE_IS_MMIO_OR_ALIAS(pPage)) + PGM_PAGE_SET_TYPE(pVM, pPage, PGMPAGETYPE_RAM); + } + break; + } + + /* next */ + pRamPrev = pRam; + pRam = pRam->pNextR3; + } + } + + /* Force a PGM pool flush as guest ram references have been changed. */ + /** @todo Not entirely SMP safe; assuming for now the guest takes care of + * this internally (not touch mapped mmio while changing the mapping). */ + PVMCPU pVCpu = VMMGetCpu(pVM); + pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL; + VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); + + pgmPhysInvalidatePageMapTLB(pVM); + pgmPhysInvalidRamRangeTlbs(pVM); + pgmUnlock(pVM); + return rc; +} + + +/** + * Locate a MMIO2 range. + * + * @returns Pointer to the MMIO2 range. + * @param pVM The cross context VM structure. + * @param pDevIns The device instance owning the region. + * @param iSubDev The sub-device number. + * @param iRegion The region. + */ +DECLINLINE(PPGMREGMMIORANGE) pgmR3PhysMMIOExFind(PVM pVM, PPDMDEVINS pDevIns, uint32_t iSubDev, uint32_t iRegion) +{ + /* + * Search the list. There shouldn't be many entries. + */ + /** @todo Optimize this lookup! There may now be many entries and it'll + * become really slow when doing MMR3HyperMapMMIO2 and similar. */ + for (PPGMREGMMIORANGE pCur = pVM->pgm.s.pRegMmioRangesR3; pCur; pCur = pCur->pNextR3) + if ( pCur->pDevInsR3 == pDevIns + && pCur->iRegion == iRegion + && pCur->iSubDev == iSubDev) + return pCur; + return NULL; +} + + +/** + * @callback_method_impl{FNPGMRELOCATE, Relocate a floating MMIO/MMIO2 range.} + * @sa pgmR3PhysRamRangeRelocate + */ +static DECLCALLBACK(bool) pgmR3PhysMMIOExRangeRelocate(PVM pVM, RTGCPTR GCPtrOld, RTGCPTR GCPtrNew, + PGMRELOCATECALL enmMode, void *pvUser) +{ + PPGMREGMMIORANGE pMmio = (PPGMREGMMIORANGE)pvUser; + Assert(pMmio->RamRange.fFlags & PGM_RAM_RANGE_FLAGS_FLOATING); + Assert(pMmio->RamRange.pSelfRC == GCPtrOld + PAGE_SIZE + RT_UOFFSETOF(PGMREGMMIORANGE, RamRange)); RT_NOREF_PV(GCPtrOld); + + switch (enmMode) + { + case PGMRELOCATECALL_SUGGEST: + return true; + + case PGMRELOCATECALL_RELOCATE: + { + /* + * Update myself, then relink all the ranges and flush the RC TLB. + */ + pgmLock(pVM); + + pMmio->RamRange.pSelfRC = (RTRCPTR)(GCPtrNew + PAGE_SIZE + RT_UOFFSETOF(PGMREGMMIORANGE, RamRange)); + + pgmR3PhysRelinkRamRanges(pVM); + for (unsigned i = 0; i < PGM_RAMRANGE_TLB_ENTRIES; i++) + pVM->pgm.s.apRamRangesTlbRC[i] = NIL_RTRCPTR; + + pgmUnlock(pVM); + return true; + } + + default: + AssertFailedReturn(false); + } +} + + +/** + * Calculates the number of chunks + * + * @returns Number of registration chunk needed. + * @param pVM The cross context VM structure. + * @param cb The size of the MMIO/MMIO2 range. + * @param pcPagesPerChunk Where to return the number of pages tracked by each + * chunk. Optional. + * @param pcbChunk Where to return the guest mapping size for a chunk. + */ +static uint16_t pgmR3PhysMMIOExCalcChunkCount(PVM pVM, RTGCPHYS cb, uint32_t *pcPagesPerChunk, uint32_t *pcbChunk) +{ + RT_NOREF_PV(pVM); /* without raw mode */ + + /* + * This is the same calculation as PGMR3PhysRegisterRam does, except we'll be + * needing a few bytes extra the PGMREGMMIORANGE structure. + * + * Note! In additions, we've got a 24 bit sub-page range for MMIO2 ranges, leaving + * us with an absolute maximum of 16777215 pages per chunk (close to 64 GB). + */ + uint32_t cbChunk; + uint32_t cPagesPerChunk; + if (!VM_IS_RAW_MODE_ENABLED(pVM)) + { + cbChunk = 16U*_1M; + cPagesPerChunk = 1048048; /* max ~1048059 */ + AssertCompile(sizeof(PGMREGMMIORANGE) + sizeof(PGMPAGE) * 1048048 < 16U*_1M - PAGE_SIZE * 2); + } + else + { + cbChunk = 4U*_1M; + cPagesPerChunk = 261616; /* max ~261627 */ + AssertCompile(sizeof(PGMREGMMIORANGE) + sizeof(PGMPAGE) * 261616 < 4U*_1M - PAGE_SIZE * 2); + } + AssertRelease(cPagesPerChunk <= PGM_MMIO2_MAX_PAGE_COUNT); /* See above note. */ + AssertRelease(RT_UOFFSETOF_DYN(PGMREGMMIORANGE, RamRange.aPages[cPagesPerChunk]) + PAGE_SIZE * 2 <= cbChunk); + if (pcbChunk) + *pcbChunk = cbChunk; + if (pcPagesPerChunk) + *pcPagesPerChunk = cPagesPerChunk; + + /* Calc the number of chunks we need. */ + RTGCPHYS const cPages = cb >> X86_PAGE_SHIFT; + uint16_t cChunks = (uint16_t)((cPages + cPagesPerChunk - 1) / cPagesPerChunk); + AssertRelease((RTGCPHYS)cChunks * cPagesPerChunk >= cPages); + return cChunks; +} + + +/** + * Worker for PGMR3PhysMMIOExPreRegister & PGMR3PhysMMIO2Register that allocates + * and the PGMREGMMIORANGE structures and does basic initialization. + * + * Caller must set type specfic members and initialize the PGMPAGE structures. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDevIns The device instance owning the region. + * @param iSubDev The sub-device number (internal PCI config number). + * @param iRegion The region number. If the MMIO2 memory is a PCI + * I/O region this number has to be the number of that + * region. Otherwise it can be any number safe + * UINT8_MAX. + * @param cb The size of the region. Must be page aligned. + * @param pszDesc The description. + * @param ppHeadRet Where to return the pointer to the first + * registration chunk. + * + * @thread EMT + */ +static int pgmR3PhysMMIOExCreate(PVM pVM, PPDMDEVINS pDevIns, uint32_t iSubDev, uint32_t iRegion, RTGCPHYS cb, + const char *pszDesc, PPGMREGMMIORANGE *ppHeadRet) +{ + /* + * Figure out how many chunks we need and of which size. + */ + uint32_t cPagesPerChunk; + uint16_t cChunks = pgmR3PhysMMIOExCalcChunkCount(pVM, cb, &cPagesPerChunk, NULL); + AssertReturn(cChunks, VERR_PGM_PHYS_MMIO_EX_IPE); + + /* + * Allocate the chunks. + */ + PPGMREGMMIORANGE *ppNext = ppHeadRet; + *ppNext = NULL; + + int rc = VINF_SUCCESS; + uint32_t cPagesLeft = cb >> X86_PAGE_SHIFT; + for (uint16_t iChunk = 0; iChunk < cChunks && RT_SUCCESS(rc); iChunk++) + { + /* + * We currently do a single RAM range for the whole thing. This will + * probably have to change once someone needs really large MMIO regions, + * as we will be running into SUPR3PageAllocEx limitations and such. + */ + const uint32_t cPagesTrackedByChunk = RT_MIN(cPagesLeft, cPagesPerChunk); + const size_t cbRange = RT_UOFFSETOF_DYN(PGMREGMMIORANGE, RamRange.aPages[cPagesTrackedByChunk]); + PPGMREGMMIORANGE pNew = NULL; + if ( iChunk + 1 < cChunks + || cbRange >= _1M) + { + /* + * Allocate memory for the registration structure. + */ + size_t const cChunkPages = RT_ALIGN_Z(cbRange, PAGE_SIZE) >> PAGE_SHIFT; + size_t const cbChunk = (1 + cChunkPages + 1) << PAGE_SHIFT; + AssertLogRelBreakStmt(cbChunk == (uint32_t)cbChunk, rc = VERR_OUT_OF_RANGE); + PSUPPAGE paChunkPages = (PSUPPAGE)RTMemTmpAllocZ(sizeof(SUPPAGE) * cChunkPages); + AssertBreakStmt(paChunkPages, rc = VERR_NO_TMP_MEMORY); + RTR0PTR R0PtrChunk = NIL_RTR0PTR; + void *pvChunk = NULL; + rc = SUPR3PageAllocEx(cChunkPages, 0 /*fFlags*/, &pvChunk, +#if defined(VBOX_WITH_MORE_RING0_MEM_MAPPINGS) + &R0PtrChunk, +#elif defined(VBOX_WITH_2X_4GB_ADDR_SPACE) + VM_IS_HM_OR_NEM_ENABLED(pVM) ? &R0PtrChunk : NULL, +#else + NULL, +#endif + paChunkPages); + AssertLogRelMsgRCBreakStmt(rc, ("rc=%Rrc, cChunkPages=%#zx\n", rc, cChunkPages), RTMemTmpFree(paChunkPages)); + +#if defined(VBOX_WITH_MORE_RING0_MEM_MAPPINGS) + Assert(R0PtrChunk != NIL_RTR0PTR); +#elif defined(VBOX_WITH_2X_4GB_ADDR_SPACE) + if (!VM_IS_HM_OR_NEM_ENABLED(pVM)) + R0PtrChunk = NIL_RTR0PTR; +#else + R0PtrChunk = (uintptr_t)pvChunk; +#endif + memset(pvChunk, 0, cChunkPages << PAGE_SHIFT); + + pNew = (PPGMREGMMIORANGE)pvChunk; + pNew->RamRange.fFlags = PGM_RAM_RANGE_FLAGS_FLOATING; + pNew->RamRange.pSelfR0 = R0PtrChunk + RT_UOFFSETOF(PGMREGMMIORANGE, RamRange); + + /* + * If we might end up in raw-mode, make a HMA mapping of the range, + * just like we do for memory above 4GB. + */ + if (!VM_IS_RAW_MODE_ENABLED(pVM)) + pNew->RamRange.pSelfRC = NIL_RTRCPTR; + else + { + RTGCPTR GCPtrChunkMap = pVM->pgm.s.GCPtrPrevRamRangeMapping - RT_ALIGN_Z(cbChunk, _4M); + RTGCPTR const GCPtrChunk = GCPtrChunkMap + PAGE_SIZE; + rc = PGMR3MapPT(pVM, GCPtrChunkMap, (uint32_t)cbChunk, 0 /*fFlags*/, pgmR3PhysMMIOExRangeRelocate, pNew, pszDesc); + if (RT_SUCCESS(rc)) + { + pVM->pgm.s.GCPtrPrevRamRangeMapping = GCPtrChunkMap; + + RTGCPTR GCPtrPage = GCPtrChunk; + for (uint32_t iPage = 0; iPage < cChunkPages && RT_SUCCESS(rc); iPage++, GCPtrPage += PAGE_SIZE) + rc = PGMMap(pVM, GCPtrPage, paChunkPages[iPage].Phys, PAGE_SIZE, 0); + } + if (RT_FAILURE(rc)) + { + SUPR3PageFreeEx(pvChunk, cChunkPages); + break; + } + pNew->RamRange.pSelfRC = GCPtrChunk + RT_UOFFSETOF(PGMREGMMIORANGE, RamRange); + } + } + /* + * Not so big, do a one time hyper allocation. + */ + else + { + rc = MMR3HyperAllocOnceNoRel(pVM, cbRange, 0, MM_TAG_PGM_PHYS, (void **)&pNew); + AssertLogRelMsgRCBreak(rc, ("cbRange=%zu\n", cbRange)); + + /* + * Initialize allocation specific items. + */ + //pNew->RamRange.fFlags = 0; + pNew->RamRange.pSelfR0 = MMHyperCCToR0(pVM, &pNew->RamRange); + pNew->RamRange.pSelfRC = MMHyperCCToRC(pVM, &pNew->RamRange); + } + + /* + * Initialize the registration structure (caller does specific bits). + */ + pNew->pDevInsR3 = pDevIns; + //pNew->pvR3 = NULL; + //pNew->pNext = NULL; + //pNew->fFlags = 0; + if (iChunk == 0) + pNew->fFlags |= PGMREGMMIORANGE_F_FIRST_CHUNK; + if (iChunk + 1 == cChunks) + pNew->fFlags |= PGMREGMMIORANGE_F_LAST_CHUNK; + pNew->iSubDev = iSubDev; + pNew->iRegion = iRegion; + pNew->idSavedState = UINT8_MAX; + pNew->idMmio2 = UINT8_MAX; + //pNew->pPhysHandlerR3 = NULL; + //pNew->paLSPages = NULL; + pNew->RamRange.GCPhys = NIL_RTGCPHYS; + pNew->RamRange.GCPhysLast = NIL_RTGCPHYS; + pNew->RamRange.pszDesc = pszDesc; + pNew->RamRange.cb = pNew->cbReal = (RTGCPHYS)cPagesTrackedByChunk << X86_PAGE_SHIFT; + pNew->RamRange.fFlags |= PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO_EX; + //pNew->RamRange.pvR3 = NULL; + //pNew->RamRange.paLSPages = NULL; + + *ppNext = pNew; + ASMCompilerBarrier(); + cPagesLeft -= cPagesTrackedByChunk; + ppNext = &pNew->pNextR3; + } + Assert(cPagesLeft == 0); + + if (RT_SUCCESS(rc)) + { + Assert((*ppHeadRet)->fFlags & PGMREGMMIORANGE_F_FIRST_CHUNK); + return VINF_SUCCESS; + } + + /* + * Free floating ranges. + */ + while (*ppHeadRet) + { + PPGMREGMMIORANGE pFree = *ppHeadRet; + *ppHeadRet = pFree->pNextR3; + + if (pFree->RamRange.fFlags & PGM_RAM_RANGE_FLAGS_FLOATING) + { + const size_t cbRange = RT_UOFFSETOF_DYN(PGMREGMMIORANGE, RamRange.aPages[pFree->RamRange.cb >> X86_PAGE_SHIFT]); + size_t const cChunkPages = RT_ALIGN_Z(cbRange, PAGE_SIZE) >> PAGE_SHIFT; + SUPR3PageFreeEx(pFree, cChunkPages); + } + } + + return rc; +} + + +/** + * Common worker PGMR3PhysMMIOExPreRegister & PGMR3PhysMMIO2Register that links + * a complete registration entry into the lists and lookup tables. + * + * @param pVM The cross context VM structure. + * @param pNew The new MMIO / MMIO2 registration to link. + */ +static void pgmR3PhysMMIOExLink(PVM pVM, PPGMREGMMIORANGE pNew) +{ + /* + * Link it into the list (order doesn't matter, so insert it at the head). + * + * Note! The range we're link may consist of multiple chunks, so we have to + * find the last one. + */ + PPGMREGMMIORANGE pLast = pNew; + for (pLast = pNew; ; pLast = pLast->pNextR3) + { + if (pLast->fFlags & PGMREGMMIORANGE_F_LAST_CHUNK) + break; + Assert(pLast->pNextR3); + Assert(pLast->pNextR3->pDevInsR3 == pNew->pDevInsR3); + Assert(pLast->pNextR3->iSubDev == pNew->iSubDev); + Assert(pLast->pNextR3->iRegion == pNew->iRegion); + Assert((pLast->pNextR3->fFlags & PGMREGMMIORANGE_F_MMIO2) == (pNew->fFlags & PGMREGMMIORANGE_F_MMIO2)); + Assert(pLast->pNextR3->idMmio2 == (pLast->fFlags & PGMREGMMIORANGE_F_MMIO2 ? pNew->idMmio2 + 1 : UINT8_MAX)); + } + + pgmLock(pVM); + + /* Link in the chain of ranges at the head of the list. */ + pLast->pNextR3 = pVM->pgm.s.pRegMmioRangesR3; + pVM->pgm.s.pRegMmioRangesR3 = pNew; + + /* If MMIO, insert the MMIO2 range/page IDs. */ + uint8_t idMmio2 = pNew->idMmio2; + if (idMmio2 != UINT8_MAX) + { + for (;;) + { + Assert(pNew->fFlags & PGMREGMMIORANGE_F_MMIO2); + Assert(pVM->pgm.s.apMmio2RangesR3[idMmio2 - 1] == NULL); + Assert(pVM->pgm.s.apMmio2RangesR0[idMmio2 - 1] == NIL_RTR0PTR); + pVM->pgm.s.apMmio2RangesR3[idMmio2 - 1] = pNew; + pVM->pgm.s.apMmio2RangesR0[idMmio2 - 1] = pNew->RamRange.pSelfR0 - RT_UOFFSETOF(PGMREGMMIORANGE, RamRange); + if (pNew->fFlags & PGMREGMMIORANGE_F_LAST_CHUNK) + break; + pNew = pNew->pNextR3; + } + } + else + Assert(!(pNew->fFlags & PGMREGMMIORANGE_F_MMIO2)); + + pgmPhysInvalidatePageMapTLB(pVM); + pgmUnlock(pVM); +} + + +/** + * Allocate and pre-register an MMIO region. + * + * This is currently the way to deal with large MMIO regions. It may in the + * future be extended to be the way we deal with all MMIO regions, but that + * means we'll have to do something about the simple list based approach we take + * to tracking the registrations. + * + * @returns VBox status code. + * @retval VINF_SUCCESS on success, *ppv pointing to the R3 mapping of the + * memory. + * @retval VERR_ALREADY_EXISTS if the region already exists. + * + * @param pVM The cross context VM structure. + * @param pDevIns The device instance owning the region. + * @param iSubDev The sub-device number. + * @param iRegion The region number. If the MMIO2 memory is a PCI + * I/O region this number has to be the number of that + * region. Otherwise it can be any number safe + * UINT8_MAX. + * @param cbRegion The size of the region. Must be page aligned. + * @param hType The physical handler callback type. + * @param pvUserR3 User parameter for ring-3 context callbacks. + * @param pvUserR0 User parameter for ring-0 context callbacks. + * @param pvUserRC User parameter for raw-mode context callbacks. + * @param pszDesc The description. + * + * @thread EMT + * + * @sa PGMR3PhysMMIORegister, PGMR3PhysMMIO2Register, + * PGMR3PhysMMIOExMap, PGMR3PhysMMIOExUnmap, PGMR3PhysMMIOExDeregister. + */ +VMMR3DECL(int) PGMR3PhysMMIOExPreRegister(PVM pVM, PPDMDEVINS pDevIns, uint32_t iSubDev, uint32_t iRegion, RTGCPHYS cbRegion, + PGMPHYSHANDLERTYPE hType, RTR3PTR pvUserR3, RTR0PTR pvUserR0, RTRCPTR pvUserRC, + const char *pszDesc) +{ + /* + * Validate input. + */ + VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT); + AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER); + AssertReturn(iSubDev <= UINT8_MAX, VERR_INVALID_PARAMETER); + AssertReturn(iRegion <= UINT8_MAX, VERR_INVALID_PARAMETER); + AssertPtrReturn(pszDesc, VERR_INVALID_POINTER); + AssertReturn(*pszDesc, VERR_INVALID_PARAMETER); + AssertReturn(pgmR3PhysMMIOExFind(pVM, pDevIns, iSubDev, iRegion) == NULL, VERR_ALREADY_EXISTS); + AssertReturn(!(cbRegion & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER); + AssertReturn(cbRegion, VERR_INVALID_PARAMETER); + + const uint32_t cPages = cbRegion >> PAGE_SHIFT; + AssertLogRelReturn(((RTGCPHYS)cPages << PAGE_SHIFT) == cbRegion, VERR_INVALID_PARAMETER); + AssertLogRelReturn(cPages <= (MM_MMIO_64_MAX >> X86_PAGE_SHIFT), VERR_OUT_OF_RANGE); + + /* + * For the 2nd+ instance, mangle the description string so it's unique. + */ + if (pDevIns->iInstance > 0) /** @todo Move to PDMDevHlp.cpp and use a real string cache. */ + { + pszDesc = MMR3HeapAPrintf(pVM, MM_TAG_PGM_PHYS, "%s [%u]", pszDesc, pDevIns->iInstance); + if (!pszDesc) + return VERR_NO_MEMORY; + } + + /* + * Register the MMIO callbacks. + */ + PPGMPHYSHANDLER pPhysHandler; + int rc = pgmHandlerPhysicalExCreate(pVM, hType, pvUserR3, pvUserR0, pvUserRC, pszDesc, &pPhysHandler); + if (RT_SUCCESS(rc)) + { + /* + * Create the registered MMIO range record for it. + */ + PPGMREGMMIORANGE pNew; + rc = pgmR3PhysMMIOExCreate(pVM, pDevIns, iSubDev, iRegion, cbRegion, pszDesc, &pNew); + if (RT_SUCCESS(rc)) + { + Assert(!(pNew->fFlags & PGMREGMMIORANGE_F_MMIO2)); + + /* + * Intialize the page structures and set up physical handlers (one for each chunk). + */ + for (PPGMREGMMIORANGE pCur = pNew; pCur != NULL && RT_SUCCESS(rc); pCur = pCur->pNextR3) + { + if (pCur == pNew) + pCur->pPhysHandlerR3 = pPhysHandler; + else + rc = pgmHandlerPhysicalExDup(pVM, pPhysHandler, &pCur->pPhysHandlerR3); + + uint32_t iPage = pCur->RamRange.cb >> X86_PAGE_SHIFT; + while (iPage-- > 0) + PGM_PAGE_INIT_ZERO(&pCur->RamRange.aPages[iPage], pVM, PGMPAGETYPE_MMIO); + } + if (RT_SUCCESS(rc)) + { + /* + * Update the page count stats, link the registration and we're done. + */ + pVM->pgm.s.cAllPages += cPages; + pVM->pgm.s.cPureMmioPages += cPages; + + pgmR3PhysMMIOExLink(pVM, pNew); + return VINF_SUCCESS; + } + + /* + * Clean up in case we're out of memory for extra access handlers. + */ + while (pNew != NULL) + { + PPGMREGMMIORANGE pFree = pNew; + pNew = pFree->pNextR3; + + if (pFree->pPhysHandlerR3) + { + pgmHandlerPhysicalExDestroy(pVM, pFree->pPhysHandlerR3); + pFree->pPhysHandlerR3 = NULL; + } + + if (pFree->RamRange.fFlags & PGM_RAM_RANGE_FLAGS_FLOATING) + { + const size_t cbRange = RT_UOFFSETOF_DYN(PGMREGMMIORANGE, RamRange.aPages[pFree->RamRange.cb >> X86_PAGE_SHIFT]); + size_t const cChunkPages = RT_ALIGN_Z(cbRange, PAGE_SIZE) >> PAGE_SHIFT; + SUPR3PageFreeEx(pFree, cChunkPages); + } + } + } + else + pgmHandlerPhysicalExDestroy(pVM, pPhysHandler); + } + return rc; +} + + +/** + * Allocate and register an MMIO2 region. + * + * As mentioned elsewhere, MMIO2 is just RAM spelled differently. It's RAM + * associated with a device. It is also non-shared memory with a permanent + * ring-3 mapping and page backing (presently). + * + * A MMIO2 range may overlap with base memory if a lot of RAM is configured for + * the VM, in which case we'll drop the base memory pages. Presently we will + * make no attempt to preserve anything that happens to be present in the base + * memory that is replaced, this is of course incorrect but it's too much + * effort. + * + * @returns VBox status code. + * @retval VINF_SUCCESS on success, *ppv pointing to the R3 mapping of the + * memory. + * @retval VERR_ALREADY_EXISTS if the region already exists. + * + * @param pVM The cross context VM structure. + * @param pDevIns The device instance owning the region. + * @param iSubDev The sub-device number. + * @param iRegion The region number. If the MMIO2 memory is a PCI + * I/O region this number has to be the number of that + * region. Otherwise it can be any number safe + * UINT8_MAX. + * @param cb The size of the region. Must be page aligned. + * @param fFlags Reserved for future use, must be zero. + * @param ppv Where to store the pointer to the ring-3 mapping of + * the memory. + * @param pszDesc The description. + * @thread EMT + */ +VMMR3DECL(int) PGMR3PhysMMIO2Register(PVM pVM, PPDMDEVINS pDevIns, uint32_t iSubDev, uint32_t iRegion, RTGCPHYS cb, + uint32_t fFlags, void **ppv, const char *pszDesc) +{ + /* + * Validate input. + */ + VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT); + AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER); + AssertReturn(iSubDev <= UINT8_MAX, VERR_INVALID_PARAMETER); + AssertReturn(iRegion <= UINT8_MAX, VERR_INVALID_PARAMETER); + AssertPtrReturn(ppv, VERR_INVALID_POINTER); + AssertPtrReturn(pszDesc, VERR_INVALID_POINTER); + AssertReturn(*pszDesc, VERR_INVALID_PARAMETER); + AssertReturn(pgmR3PhysMMIOExFind(pVM, pDevIns, iSubDev, iRegion) == NULL, VERR_ALREADY_EXISTS); + AssertReturn(!(cb & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER); + AssertReturn(cb, VERR_INVALID_PARAMETER); + AssertReturn(!fFlags, VERR_INVALID_PARAMETER); + + const uint32_t cPages = cb >> PAGE_SHIFT; + AssertLogRelReturn(((RTGCPHYS)cPages << PAGE_SHIFT) == cb, VERR_INVALID_PARAMETER); + AssertLogRelReturn(cPages <= (MM_MMIO_64_MAX >> X86_PAGE_SHIFT), VERR_OUT_OF_RANGE); + + /* + * For the 2nd+ instance, mangle the description string so it's unique. + */ + if (pDevIns->iInstance > 0) /** @todo Move to PDMDevHlp.cpp and use a real string cache. */ + { + pszDesc = MMR3HeapAPrintf(pVM, MM_TAG_PGM_PHYS, "%s [%u]", pszDesc, pDevIns->iInstance); + if (!pszDesc) + return VERR_NO_MEMORY; + } + + /* + * Allocate an MMIO2 range ID (not freed on failure). + * + * The zero ID is not used as it could be confused with NIL_GMM_PAGEID, so + * the IDs goes from 1 thru PGM_MMIO2_MAX_RANGES. + */ + unsigned cChunks = pgmR3PhysMMIOExCalcChunkCount(pVM, cb, NULL, NULL); + pgmLock(pVM); + uint8_t idMmio2 = pVM->pgm.s.cMmio2Regions + 1; + unsigned cNewMmio2Regions = pVM->pgm.s.cMmio2Regions + cChunks; + if (cNewMmio2Regions > PGM_MMIO2_MAX_RANGES) + { + pgmUnlock(pVM); + AssertLogRelFailedReturn(VERR_PGM_TOO_MANY_MMIO2_RANGES); + } + pVM->pgm.s.cMmio2Regions = cNewMmio2Regions; + pgmUnlock(pVM); + + /* + * Try reserve and allocate the backing memory first as this is what is + * most likely to fail. + */ + int rc = MMR3AdjustFixedReservation(pVM, cPages, pszDesc); + if (RT_SUCCESS(rc)) + { + PSUPPAGE paPages = (PSUPPAGE)RTMemTmpAlloc(cPages * sizeof(SUPPAGE)); + if (RT_SUCCESS(rc)) + { + void *pvPages; + rc = SUPR3PageAllocEx(cPages, 0 /*fFlags*/, &pvPages, NULL /*pR0Ptr*/, paPages); + if (RT_SUCCESS(rc)) + { + memset(pvPages, 0, cPages * PAGE_SIZE); + + /* + * Create the registered MMIO range record for it. + */ + PPGMREGMMIORANGE pNew; + rc = pgmR3PhysMMIOExCreate(pVM, pDevIns, iSubDev, iRegion, cb, pszDesc, &pNew); + if (RT_SUCCESS(rc)) + { + uint32_t iSrcPage = 0; + uint8_t *pbCurPages = (uint8_t *)pvPages; + for (PPGMREGMMIORANGE pCur = pNew; pCur; pCur = pCur->pNextR3) + { + pCur->pvR3 = pbCurPages; + pCur->RamRange.pvR3 = pbCurPages; + pCur->idMmio2 = idMmio2; + pCur->fFlags |= PGMREGMMIORANGE_F_MMIO2; + + uint32_t iDstPage = pCur->RamRange.cb >> X86_PAGE_SHIFT; + while (iDstPage-- > 0) + { + PGM_PAGE_INIT(&pNew->RamRange.aPages[iDstPage], + paPages[iDstPage + iSrcPage].Phys, + PGM_MMIO2_PAGEID_MAKE(idMmio2, iDstPage), + PGMPAGETYPE_MMIO2, PGM_PAGE_STATE_ALLOCATED); + } + + /* advance. */ + iSrcPage += pCur->RamRange.cb >> X86_PAGE_SHIFT; + pbCurPages += pCur->RamRange.cb; + idMmio2++; + } + + RTMemTmpFree(paPages); + + /* + * Update the page count stats, link the registration and we're done. + */ + pVM->pgm.s.cAllPages += cPages; + pVM->pgm.s.cPrivatePages += cPages; + + pgmR3PhysMMIOExLink(pVM, pNew); + + *ppv = pvPages; + return VINF_SUCCESS; + } + + SUPR3PageFreeEx(pvPages, cPages); + } + } + RTMemTmpFree(paPages); + MMR3AdjustFixedReservation(pVM, -(int32_t)cPages, pszDesc); + } + if (pDevIns->iInstance > 0) + MMR3HeapFree((void *)pszDesc); + return rc; +} + + +/** + * Deregisters and frees an MMIO2 region or a pre-registered MMIO region + * + * Any physical (and virtual) access handlers registered for the region must + * be deregistered before calling this function. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDevIns The device instance owning the region. + * @param iSubDev The sub-device number. Pass UINT32_MAX for wildcard + * matching. + * @param iRegion The region. Pass UINT32_MAX for wildcard matching. + */ +VMMR3DECL(int) PGMR3PhysMMIOExDeregister(PVM pVM, PPDMDEVINS pDevIns, uint32_t iSubDev, uint32_t iRegion) +{ + /* + * Validate input. + */ + VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT); + AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER); + AssertReturn(iSubDev <= UINT8_MAX || iSubDev == UINT32_MAX, VERR_INVALID_PARAMETER); + AssertReturn(iRegion <= UINT8_MAX || iRegion == UINT32_MAX, VERR_INVALID_PARAMETER); + + /* + * The loop here scanning all registrations will make sure that multi-chunk ranges + * get properly deregistered, though it's original purpose was the wildcard iRegion. + */ + pgmLock(pVM); + int rc = VINF_SUCCESS; + unsigned cFound = 0; + PPGMREGMMIORANGE pPrev = NULL; + PPGMREGMMIORANGE pCur = pVM->pgm.s.pRegMmioRangesR3; + while (pCur) + { + if ( pCur->pDevInsR3 == pDevIns + && ( iRegion == UINT32_MAX + || pCur->iRegion == iRegion) + && ( iSubDev == UINT32_MAX + || pCur->iSubDev == iSubDev) ) + { + cFound++; + + /* + * Unmap it if it's mapped. + */ + if (pCur->fFlags & PGMREGMMIORANGE_F_MAPPED) + { + int rc2 = PGMR3PhysMMIOExUnmap(pVM, pCur->pDevInsR3, pCur->iSubDev, pCur->iRegion, pCur->RamRange.GCPhys); + AssertRC(rc2); + if (RT_FAILURE(rc2) && RT_SUCCESS(rc)) + rc = rc2; + } + + /* + * Must tell IOM about MMIO (first one only). + */ + if ((pCur->fFlags & (PGMREGMMIORANGE_F_MMIO2 | PGMREGMMIORANGE_F_FIRST_CHUNK)) == PGMREGMMIORANGE_F_MMIO2) + IOMR3MmioExNotifyDeregistered(pVM, pCur->pPhysHandlerR3->pvUserR3); + + /* + * Unlink it + */ + PPGMREGMMIORANGE pNext = pCur->pNextR3; + if (pPrev) + pPrev->pNextR3 = pNext; + else + pVM->pgm.s.pRegMmioRangesR3 = pNext; + pCur->pNextR3 = NULL; + + uint8_t idMmio2 = pCur->idMmio2; + if (idMmio2 != UINT8_MAX) + { + Assert(pVM->pgm.s.apMmio2RangesR3[idMmio2 - 1] == pCur); + pVM->pgm.s.apMmio2RangesR3[idMmio2 - 1] = NULL; + pVM->pgm.s.apMmio2RangesR0[idMmio2 - 1] = NIL_RTR0PTR; + } + + /* + * Free the memory. + */ + uint32_t const cPages = pCur->cbReal >> PAGE_SHIFT; + if (pCur->fFlags & PGMREGMMIORANGE_F_MMIO2) + { + int rc2 = SUPR3PageFreeEx(pCur->pvR3, cPages); + AssertRC(rc2); + if (RT_FAILURE(rc2) && RT_SUCCESS(rc)) + rc = rc2; + + rc2 = MMR3AdjustFixedReservation(pVM, -(int32_t)cPages, pCur->RamRange.pszDesc); + AssertRC(rc2); + if (RT_FAILURE(rc2) && RT_SUCCESS(rc)) + rc = rc2; + } + + /* we're leaking hyper memory here if done at runtime. */ +#ifdef VBOX_STRICT + VMSTATE const enmState = VMR3GetState(pVM); + AssertMsg( enmState == VMSTATE_POWERING_OFF + || enmState == VMSTATE_POWERING_OFF_LS + || enmState == VMSTATE_OFF + || enmState == VMSTATE_OFF_LS + || enmState == VMSTATE_DESTROYING + || enmState == VMSTATE_TERMINATED + || enmState == VMSTATE_CREATING + , ("%s\n", VMR3GetStateName(enmState))); +#endif + + const bool fIsMmio2 = RT_BOOL(pCur->fFlags & PGMREGMMIORANGE_F_MMIO2); + if (pCur->RamRange.fFlags & PGM_RAM_RANGE_FLAGS_FLOATING) + { + const size_t cbRange = RT_UOFFSETOF_DYN(PGMREGMMIORANGE, RamRange.aPages[cPages]); + size_t const cChunkPages = RT_ALIGN_Z(cbRange, PAGE_SIZE) >> PAGE_SHIFT; + SUPR3PageFreeEx(pCur, cChunkPages); + } + /*else + { + rc = MMHyperFree(pVM, pCur); - does not work, see the alloc call. + AssertRCReturn(rc, rc); + } */ + + + /* update page count stats */ + pVM->pgm.s.cAllPages -= cPages; + if (fIsMmio2) + pVM->pgm.s.cPrivatePages -= cPages; + else + pVM->pgm.s.cPureMmioPages -= cPages; + + /* next */ + pCur = pNext; + } + else + { + pPrev = pCur; + pCur = pCur->pNextR3; + } + } + pgmPhysInvalidatePageMapTLB(pVM); + pgmUnlock(pVM); + return !cFound && iRegion != UINT32_MAX && iSubDev != UINT32_MAX ? VERR_NOT_FOUND : rc; +} + + +/** + * Maps a MMIO2 region or a pre-registered MMIO region. + * + * This is done when a guest / the bios / state loading changes the + * PCI config. The replacing of base memory has the same restrictions + * as during registration, of course. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pDevIns The device instance owning the region. + * @param iSubDev The sub-device number of the registered region. + * @param iRegion The index of the registered region. + * @param GCPhys The guest-physical address to be remapped. + */ +VMMR3DECL(int) PGMR3PhysMMIOExMap(PVM pVM, PPDMDEVINS pDevIns, uint32_t iSubDev, uint32_t iRegion, RTGCPHYS GCPhys) +{ + /* + * Validate input. + * + * Note! It's safe to walk the MMIO/MMIO2 list since registrations only + * happens during VM construction. + */ + VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT); + AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER); + AssertReturn(iSubDev <= UINT8_MAX, VERR_INVALID_PARAMETER); + AssertReturn(iRegion <= UINT8_MAX, VERR_INVALID_PARAMETER); + AssertReturn(GCPhys != NIL_RTGCPHYS, VERR_INVALID_PARAMETER); + AssertReturn(GCPhys != 0, VERR_INVALID_PARAMETER); + AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER); + + PPGMREGMMIORANGE pFirstMmio = pgmR3PhysMMIOExFind(pVM, pDevIns, iSubDev, iRegion); + AssertReturn(pFirstMmio, VERR_NOT_FOUND); + Assert(pFirstMmio->fFlags & PGMREGMMIORANGE_F_FIRST_CHUNK); + + PPGMREGMMIORANGE pLastMmio = pFirstMmio; + RTGCPHYS cbRange = 0; + for (;;) + { + AssertReturn(!(pLastMmio->fFlags & PGMREGMMIORANGE_F_MAPPED), VERR_WRONG_ORDER); + Assert(pLastMmio->RamRange.GCPhys == NIL_RTGCPHYS); + Assert(pLastMmio->RamRange.GCPhysLast == NIL_RTGCPHYS); + Assert(pLastMmio->pDevInsR3 == pFirstMmio->pDevInsR3); + Assert(pLastMmio->iSubDev == pFirstMmio->iSubDev); + Assert(pLastMmio->iRegion == pFirstMmio->iRegion); + cbRange += pLastMmio->RamRange.cb; + if (pLastMmio->fFlags & PGMREGMMIORANGE_F_LAST_CHUNK) + break; + pLastMmio = pLastMmio->pNextR3; + } + + RTGCPHYS GCPhysLast = GCPhys + cbRange - 1; + AssertLogRelReturn(GCPhysLast > GCPhys, VERR_INVALID_PARAMETER); + + /* + * Find our location in the ram range list, checking for restriction + * we don't bother implementing yet (partially overlapping, multiple + * ram ranges). + */ + pgmLock(pVM); + + AssertReturnStmt(!(pFirstMmio->fFlags & PGMREGMMIORANGE_F_MAPPED), pgmUnlock(pVM), VERR_WRONG_ORDER); + + bool fRamExists = false; + PPGMRAMRANGE pRamPrev = NULL; + PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3; + while (pRam && GCPhysLast >= pRam->GCPhys) + { + if ( GCPhys <= pRam->GCPhysLast + && GCPhysLast >= pRam->GCPhys) + { + /* Completely within? */ + AssertLogRelMsgReturnStmt( GCPhys >= pRam->GCPhys + && GCPhysLast <= pRam->GCPhysLast, + ("%RGp-%RGp (MMIOEx/%s) falls partly outside %RGp-%RGp (%s)\n", + GCPhys, GCPhysLast, pFirstMmio->RamRange.pszDesc, + pRam->GCPhys, pRam->GCPhysLast, pRam->pszDesc), + pgmUnlock(pVM), + VERR_PGM_RAM_CONFLICT); + + /* Check that all the pages are RAM pages. */ + PPGMPAGE pPage = &pRam->aPages[(GCPhys - pRam->GCPhys) >> PAGE_SHIFT]; + uint32_t cPagesLeft = cbRange >> PAGE_SHIFT; + while (cPagesLeft-- > 0) + { + AssertLogRelMsgReturnStmt(PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM, + ("%RGp isn't a RAM page (%d) - mapping %RGp-%RGp (MMIO2/%s).\n", + GCPhys, PGM_PAGE_GET_TYPE(pPage), GCPhys, GCPhysLast, pFirstMmio->RamRange.pszDesc), + pgmUnlock(pVM), + VERR_PGM_RAM_CONFLICT); + pPage++; + } + + /* There can only be one MMIO/MMIO2 chunk matching here! */ + AssertLogRelMsgReturnStmt(pFirstMmio->fFlags & PGMREGMMIORANGE_F_LAST_CHUNK, + ("%RGp-%RGp (MMIOEx/%s, flags %#X) consists of multiple chunks whereas the RAM somehow doesn't!\n", + GCPhys, GCPhysLast, pFirstMmio->RamRange.pszDesc, pFirstMmio->fFlags), + pgmUnlock(pVM), + VERR_PGM_PHYS_MMIO_EX_IPE); + + fRamExists = true; + break; + } + + /* next */ + pRamPrev = pRam; + pRam = pRam->pNextR3; + } + Log(("PGMR3PhysMMIOExMap: %RGp-%RGp fRamExists=%RTbool %s\n", GCPhys, GCPhysLast, fRamExists, pFirstMmio->RamRange.pszDesc)); + + + /* + * Make the changes. + */ + RTGCPHYS GCPhysCur = GCPhys; + for (PPGMREGMMIORANGE pCurMmio = pFirstMmio; ; pCurMmio = pCurMmio->pNextR3) + { + pCurMmio->RamRange.GCPhys = GCPhysCur; + pCurMmio->RamRange.GCPhysLast = GCPhysCur + pCurMmio->RamRange.cb - 1; + if (pCurMmio->fFlags & PGMREGMMIORANGE_F_LAST_CHUNK) + { + Assert(pCurMmio->RamRange.GCPhysLast == GCPhysLast); + break; + } + GCPhysCur += pCurMmio->RamRange.cb; + } + + if (fRamExists) + { + /* + * Make all the pages in the range MMIO/ZERO pages, freeing any + * RAM pages currently mapped here. This might not be 100% correct + * for PCI memory, but we're doing the same thing for MMIO2 pages. + * + * We replace this MMIO/ZERO pages with real pages in the MMIO2 case. + */ + Assert(pFirstMmio->fFlags & PGMREGMMIORANGE_F_LAST_CHUNK); /* Only one chunk */ + + int rc = pgmR3PhysFreePageRange(pVM, pRam, GCPhys, GCPhysLast, PGMPAGETYPE_MMIO); + AssertRCReturnStmt(rc, pgmUnlock(pVM), rc); + + if (pFirstMmio->fFlags & PGMREGMMIORANGE_F_MMIO2) + { + /* replace the pages, freeing all present RAM pages. */ + PPGMPAGE pPageSrc = &pFirstMmio->RamRange.aPages[0]; + PPGMPAGE pPageDst = &pRam->aPages[(GCPhys - pRam->GCPhys) >> PAGE_SHIFT]; + uint32_t cPagesLeft = pFirstMmio->RamRange.cb >> PAGE_SHIFT; + while (cPagesLeft-- > 0) + { + Assert(PGM_PAGE_IS_MMIO(pPageDst)); + + RTHCPHYS const HCPhys = PGM_PAGE_GET_HCPHYS(pPageSrc); + uint32_t const idPage = PGM_PAGE_GET_PAGEID(pPageSrc); + PGM_PAGE_SET_PAGEID(pVM, pPageDst, idPage); + PGM_PAGE_SET_HCPHYS(pVM, pPageDst, HCPhys); + PGM_PAGE_SET_TYPE(pVM, pPageDst, PGMPAGETYPE_MMIO2); + PGM_PAGE_SET_STATE(pVM, pPageDst, PGM_PAGE_STATE_ALLOCATED); + PGM_PAGE_SET_PDE_TYPE(pVM, pPageDst, PGM_PAGE_PDE_TYPE_DONTCARE); + PGM_PAGE_SET_PTE_INDEX(pVM, pPageDst, 0); + PGM_PAGE_SET_TRACKING(pVM, pPageDst, 0); + /* (We tell NEM at the end of the function.) */ + + pVM->pgm.s.cZeroPages--; + GCPhys += PAGE_SIZE; + pPageSrc++; + pPageDst++; + } + } + + /* Flush physical page map TLB. */ + pgmPhysInvalidatePageMapTLB(pVM); + + /* Force a PGM pool flush as guest ram references have been changed. */ + /** @todo not entirely SMP safe; assuming for now the guest takes care of + * this internally (not touch mapped mmio while changing the mapping). */ + PVMCPU pVCpu = VMMGetCpu(pVM); + pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL; + VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); + } + else + { + /* + * No RAM range, insert the ones prepared during registration. + */ + for (PPGMREGMMIORANGE pCurMmio = pFirstMmio; ; pCurMmio = pCurMmio->pNextR3) + { + /* Clear the tracking data of pages we're going to reactivate. */ + PPGMPAGE pPageSrc = &pCurMmio->RamRange.aPages[0]; + uint32_t cPagesLeft = pCurMmio->RamRange.cb >> PAGE_SHIFT; + while (cPagesLeft-- > 0) + { + PGM_PAGE_SET_TRACKING(pVM, pPageSrc, 0); + PGM_PAGE_SET_PTE_INDEX(pVM, pPageSrc, 0); + pPageSrc++; + } + + /* link in the ram range */ + pgmR3PhysLinkRamRange(pVM, &pCurMmio->RamRange, pRamPrev); + + if (pCurMmio->fFlags & PGMREGMMIORANGE_F_LAST_CHUNK) + { + Assert(pCurMmio->RamRange.GCPhysLast == GCPhysLast); + break; + } + pRamPrev = &pCurMmio->RamRange; + } + } + + /* + * Register the access handler if plain MMIO. + * + * We must register access handlers for each range since the access handler + * code refuses to deal with multiple ranges (and we can). + */ + if (!(pFirstMmio->fFlags & PGMREGMMIORANGE_F_MMIO2)) + { + int rc = VINF_SUCCESS; + for (PPGMREGMMIORANGE pCurMmio = pFirstMmio; ; pCurMmio = pCurMmio->pNextR3) + { + Assert(!(pCurMmio->fFlags & PGMREGMMIORANGE_F_MAPPED)); + rc = pgmHandlerPhysicalExRegister(pVM, pCurMmio->pPhysHandlerR3, pCurMmio->RamRange.GCPhys, + pCurMmio->RamRange.GCPhysLast); + if (RT_FAILURE(rc)) + break; + pCurMmio->fFlags |= PGMREGMMIORANGE_F_MAPPED; /* Use this to mark that the handler is registered. */ + if (pCurMmio->fFlags & PGMREGMMIORANGE_F_LAST_CHUNK) + { + rc = IOMR3MmioExNotifyMapped(pVM, pFirstMmio->pPhysHandlerR3->pvUserR3, GCPhys); + break; + } + } + if (RT_FAILURE(rc)) + { + /* Almost impossible, but try clean up properly and get out of here. */ + for (PPGMREGMMIORANGE pCurMmio = pFirstMmio; ; pCurMmio = pCurMmio->pNextR3) + { + if (pCurMmio->fFlags & PGMREGMMIORANGE_F_MAPPED) + { + pCurMmio->fFlags &= ~PGMREGMMIORANGE_F_MAPPED; + pgmHandlerPhysicalExDeregister(pVM, pCurMmio->pPhysHandlerR3, fRamExists); + } + + if (!fRamExists) + pgmR3PhysUnlinkRamRange(pVM, &pCurMmio->RamRange); + else + { + Assert(pCurMmio->fFlags & PGMREGMMIORANGE_F_LAST_CHUNK); /* Only one chunk */ + + uint32_t cPagesLeft = pCurMmio->RamRange.cb >> PAGE_SHIFT; + PPGMPAGE pPageDst = &pRam->aPages[(pCurMmio->RamRange.GCPhys - pRam->GCPhys) >> PAGE_SHIFT]; + while (cPagesLeft-- > 0) + { + PGM_PAGE_INIT_ZERO(pPageDst, pVM, PGMPAGETYPE_RAM); + pPageDst++; + } + } + + pCurMmio->RamRange.GCPhys = NIL_RTGCPHYS; + pCurMmio->RamRange.GCPhysLast = NIL_RTGCPHYS; + if (pCurMmio->fFlags & PGMREGMMIORANGE_F_LAST_CHUNK) + break; + } + + pgmUnlock(pVM); + return rc; + } + } + + /* + * We're good, set the flags and invalid the mapping TLB. + */ + for (PPGMREGMMIORANGE pCurMmio = pFirstMmio; ; pCurMmio = pCurMmio->pNextR3) + { + pCurMmio->fFlags |= PGMREGMMIORANGE_F_MAPPED; + if (fRamExists) + pCurMmio->fFlags |= PGMREGMMIORANGE_F_OVERLAPPING; + else + pCurMmio->fFlags &= ~PGMREGMMIORANGE_F_OVERLAPPING; + if (pCurMmio->fFlags & PGMREGMMIORANGE_F_LAST_CHUNK) + break; + } + pgmPhysInvalidatePageMapTLB(pVM); + + /* + * Notify NEM while holding the lock (experimental) and REM without (like always). + */ + uint32_t const fNemNotify = (pFirstMmio->fFlags & PGMREGMMIORANGE_F_MMIO2 ? NEM_NOTIFY_PHYS_MMIO_EX_F_MMIO2 : 0) + | (pFirstMmio->fFlags & PGMREGMMIORANGE_F_OVERLAPPING ? NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE : 0); + int rc = NEMR3NotifyPhysMmioExMap(pVM, GCPhys, cbRange, fNemNotify, pFirstMmio->pvR3); + + pgmUnlock(pVM); + +#ifdef VBOX_WITH_REM + if (!fRamExists && (pFirstMmio->fFlags & PGMREGMMIORANGE_F_MMIO2)) /** @todo this doesn't look right. */ + REMR3NotifyPhysRamRegister(pVM, GCPhys, cbRange, REM_NOTIFY_PHYS_RAM_FLAGS_MMIO2); +#endif + return rc; +} + + +/** + * Unmaps a MMIO2 or a pre-registered MMIO region. + * + * This is done when a guest / the bios / state loading changes the + * PCI config. The replacing of base memory has the same restrictions + * as during registration, of course. + */ +VMMR3DECL(int) PGMR3PhysMMIOExUnmap(PVM pVM, PPDMDEVINS pDevIns, uint32_t iSubDev, uint32_t iRegion, RTGCPHYS GCPhys) +{ + /* + * Validate input + */ + VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT); + AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER); + AssertReturn(iSubDev <= UINT8_MAX, VERR_INVALID_PARAMETER); + AssertReturn(iRegion <= UINT8_MAX, VERR_INVALID_PARAMETER); + AssertReturn(GCPhys != NIL_RTGCPHYS, VERR_INVALID_PARAMETER); + AssertReturn(GCPhys != 0, VERR_INVALID_PARAMETER); + AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER); + + PPGMREGMMIORANGE pFirstMmio = pgmR3PhysMMIOExFind(pVM, pDevIns, iSubDev, iRegion); + AssertReturn(pFirstMmio, VERR_NOT_FOUND); + Assert(pFirstMmio->fFlags & PGMREGMMIORANGE_F_FIRST_CHUNK); + + PPGMREGMMIORANGE pLastMmio = pFirstMmio; + RTGCPHYS cbRange = 0; + for (;;) + { + AssertReturn(pLastMmio->fFlags & PGMREGMMIORANGE_F_MAPPED, VERR_WRONG_ORDER); + AssertReturn(pLastMmio->RamRange.GCPhys == GCPhys + cbRange, VERR_INVALID_PARAMETER); + Assert(pLastMmio->pDevInsR3 == pFirstMmio->pDevInsR3); + Assert(pLastMmio->iSubDev == pFirstMmio->iSubDev); + Assert(pLastMmio->iRegion == pFirstMmio->iRegion); + cbRange += pLastMmio->RamRange.cb; + if (pLastMmio->fFlags & PGMREGMMIORANGE_F_LAST_CHUNK) + break; + pLastMmio = pLastMmio->pNextR3; + } + + Log(("PGMR3PhysMMIOExUnmap: %RGp-%RGp %s\n", + pFirstMmio->RamRange.GCPhys, pLastMmio->RamRange.GCPhysLast, pFirstMmio->RamRange.pszDesc)); + + int rc = pgmLock(pVM); + AssertRCReturn(rc, rc); + uint16_t const fOldFlags = pFirstMmio->fFlags; + AssertReturnStmt(fOldFlags & PGMREGMMIORANGE_F_MAPPED, pgmUnlock(pVM), VERR_WRONG_ORDER); + + /* + * If plain MMIO, we must deregister the handlers first. + */ + if (!(fOldFlags & PGMREGMMIORANGE_F_MMIO2)) + { + PPGMREGMMIORANGE pCurMmio = pFirstMmio; + rc = pgmHandlerPhysicalExDeregister(pVM, pFirstMmio->pPhysHandlerR3, RT_BOOL(fOldFlags & PGMREGMMIORANGE_F_OVERLAPPING)); + AssertRCReturnStmt(rc, pgmUnlock(pVM), rc); + while (!(pCurMmio->fFlags & PGMREGMMIORANGE_F_LAST_CHUNK)) + { + pCurMmio = pCurMmio->pNextR3; + rc = pgmHandlerPhysicalExDeregister(pVM, pCurMmio->pPhysHandlerR3, RT_BOOL(fOldFlags & PGMREGMMIORANGE_F_OVERLAPPING)); + AssertRCReturnStmt(rc, pgmUnlock(pVM), VERR_PGM_PHYS_MMIO_EX_IPE); + } + + IOMR3MmioExNotifyUnmapped(pVM, pFirstMmio->pPhysHandlerR3->pvUserR3, GCPhys); + } + + /* + * Unmap it. + */ + RTGCPHYS const GCPhysRangeNotify = pFirstMmio->RamRange.GCPhys; + if (fOldFlags & PGMREGMMIORANGE_F_OVERLAPPING) + { + /* + * We've replaced RAM, replace with zero pages. + * + * Note! This is where we might differ a little from a real system, because + * it's likely to just show the RAM pages as they were before the + * MMIO/MMIO2 region was mapped here. + */ + /* Only one chunk allowed when overlapping! */ + Assert(fOldFlags & PGMREGMMIORANGE_F_LAST_CHUNK); + + /* Restore the RAM pages we've replaced. */ + PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3; + while (pRam->GCPhys > pFirstMmio->RamRange.GCPhysLast) + pRam = pRam->pNextR3; + + uint32_t cPagesLeft = pFirstMmio->RamRange.cb >> PAGE_SHIFT; + if (fOldFlags & PGMREGMMIORANGE_F_MMIO2) + pVM->pgm.s.cZeroPages += cPagesLeft; + + PPGMPAGE pPageDst = &pRam->aPages[(pFirstMmio->RamRange.GCPhys - pRam->GCPhys) >> PAGE_SHIFT]; + while (cPagesLeft-- > 0) + { + PGM_PAGE_INIT_ZERO(pPageDst, pVM, PGMPAGETYPE_RAM); + pPageDst++; + } + + /* Flush physical page map TLB. */ + pgmPhysInvalidatePageMapTLB(pVM); + + /* Update range state. */ + pFirstMmio->RamRange.GCPhys = NIL_RTGCPHYS; + pFirstMmio->RamRange.GCPhysLast = NIL_RTGCPHYS; + pFirstMmio->fFlags &= ~(PGMREGMMIORANGE_F_OVERLAPPING | PGMREGMMIORANGE_F_MAPPED); + } + else + { + /* + * Unlink the chunks related to the MMIO/MMIO2 region. + */ + for (PPGMREGMMIORANGE pCurMmio = pFirstMmio; ; pCurMmio = pCurMmio->pNextR3) + { + pgmR3PhysUnlinkRamRange(pVM, &pCurMmio->RamRange); + pCurMmio->RamRange.GCPhys = NIL_RTGCPHYS; + pCurMmio->RamRange.GCPhysLast = NIL_RTGCPHYS; + pCurMmio->fFlags &= ~(PGMREGMMIORANGE_F_OVERLAPPING | PGMREGMMIORANGE_F_MAPPED); + if (pCurMmio->fFlags & PGMREGMMIORANGE_F_LAST_CHUNK) + break; + } + } + + /* Force a PGM pool flush as guest ram references have been changed. */ + /** @todo not entirely SMP safe; assuming for now the guest takes care + * of this internally (not touch mapped mmio while changing the + * mapping). */ + PVMCPU pVCpu = VMMGetCpu(pVM); + pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL; + VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); + + pgmPhysInvalidatePageMapTLB(pVM); + pgmPhysInvalidRamRangeTlbs(pVM); + + /* + * Notify NEM while holding the lock (experimental) and REM without (like always). + */ + uint32_t const fNemFlags = (fOldFlags & PGMREGMMIORANGE_F_MMIO2 ? NEM_NOTIFY_PHYS_MMIO_EX_F_MMIO2 : 0) + | (fOldFlags & PGMREGMMIORANGE_F_OVERLAPPING ? NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE : 0); + rc = NEMR3NotifyPhysMmioExUnmap(pVM, GCPhysRangeNotify, cbRange, fNemFlags); + pgmUnlock(pVM); +#ifdef VBOX_WITH_REM + if ((fOldFlags & (PGMREGMMIORANGE_F_OVERLAPPING | PGMREGMMIORANGE_F_MMIO2)) == PGMREGMMIORANGE_F_MMIO2) + REMR3NotifyPhysRamDeregister(pVM, GCPhysRangeNotify, cbRange); +#endif + return rc; +} + + +/** + * Reduces the mapping size of a MMIO2 or pre-registered MMIO region. + * + * This is mainly for dealing with old saved states after changing the default + * size of a mapping region. See PGMDevHlpMMIOExReduce and + * PDMPCIDEV::pfnRegionLoadChangeHookR3. + * + * The region must not currently be mapped when making this call. The VM state + * must be state restore or VM construction. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDevIns The device instance owning the region. + * @param iSubDev The sub-device number of the registered region. + * @param iRegion The index of the registered region. + * @param cbRegion The new mapping size. + */ +VMMR3_INT_DECL(int) PGMR3PhysMMIOExReduce(PVM pVM, PPDMDEVINS pDevIns, uint32_t iSubDev, uint32_t iRegion, RTGCPHYS cbRegion) +{ + /* + * Validate input + */ + VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT); + AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER); + AssertReturn(iSubDev <= UINT8_MAX, VERR_INVALID_PARAMETER); + AssertReturn(iRegion <= UINT8_MAX, VERR_INVALID_PARAMETER); + AssertReturn(cbRegion >= X86_PAGE_SIZE, VERR_INVALID_PARAMETER); + AssertReturn(!(cbRegion & X86_PAGE_OFFSET_MASK), VERR_UNSUPPORTED_ALIGNMENT); + VMSTATE enmVmState = VMR3GetState(pVM); + AssertLogRelMsgReturn( enmVmState == VMSTATE_CREATING + || enmVmState == VMSTATE_LOADING, + ("enmVmState=%d (%s)\n", enmVmState, VMR3GetStateName(enmVmState)), + VERR_VM_INVALID_VM_STATE); + + int rc = pgmLock(pVM); + AssertRCReturn(rc, rc); + + PPGMREGMMIORANGE pFirstMmio = pgmR3PhysMMIOExFind(pVM, pDevIns, iSubDev, iRegion); + if (pFirstMmio) + { + Assert(pFirstMmio->fFlags & PGMREGMMIORANGE_F_FIRST_CHUNK); + if (!(pFirstMmio->fFlags & PGMREGMMIORANGE_F_MAPPED)) + { + /* + * NOTE! Current implementation does not support multiple ranges. + * Implement when there is a real world need and thus a testcase. + */ + AssertLogRelMsgStmt(pFirstMmio->fFlags & PGMREGMMIORANGE_F_LAST_CHUNK, + ("%s: %#x\n", pFirstMmio->RamRange.pszDesc, pFirstMmio->fFlags), + rc = VERR_NOT_SUPPORTED); + if (RT_SUCCESS(rc)) + { + /* + * Make the change. + */ + Log(("PGMR3PhysMMIOExReduce: %s changes from %RGp bytes (%RGp) to %RGp bytes.\n", + pFirstMmio->RamRange.pszDesc, pFirstMmio->RamRange.cb, pFirstMmio->cbReal, cbRegion)); + + AssertLogRelMsgStmt(cbRegion <= pFirstMmio->cbReal, + ("%s: cbRegion=%#RGp cbReal=%#RGp\n", pFirstMmio->RamRange.pszDesc, cbRegion, pFirstMmio->cbReal), + rc = VERR_OUT_OF_RANGE); + if (RT_SUCCESS(rc)) + { + pFirstMmio->RamRange.cb = cbRegion; + } + } + } + else + rc = VERR_WRONG_ORDER; + } + else + rc = VERR_NOT_FOUND; + + pgmUnlock(pVM); + return rc; +} + + +/** + * Checks if the given address is an MMIO2 or pre-registered MMIO base address + * or not. + * + * @returns true/false accordingly. + * @param pVM The cross context VM structure. + * @param pDevIns The owner of the memory, optional. + * @param GCPhys The address to check. + */ +VMMR3DECL(bool) PGMR3PhysMMIOExIsBase(PVM pVM, PPDMDEVINS pDevIns, RTGCPHYS GCPhys) +{ + /* + * Validate input + */ + VM_ASSERT_EMT_RETURN(pVM, false); + AssertPtrReturn(pDevIns, false); + AssertReturn(GCPhys != NIL_RTGCPHYS, false); + AssertReturn(GCPhys != 0, false); + AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), false); + + /* + * Search the list. + */ + pgmLock(pVM); + for (PPGMREGMMIORANGE pCurMmio = pVM->pgm.s.pRegMmioRangesR3; pCurMmio; pCurMmio = pCurMmio->pNextR3) + if (pCurMmio->RamRange.GCPhys == GCPhys) + { + Assert(pCurMmio->fFlags & PGMREGMMIORANGE_F_MAPPED); + bool fRet = RT_BOOL(pCurMmio->fFlags & PGMREGMMIORANGE_F_FIRST_CHUNK); + pgmUnlock(pVM); + return fRet; + } + pgmUnlock(pVM); + return false; +} + + +/** + * Gets the HC physical address of a page in the MMIO2 region. + * + * This is API is intended for MMHyper and shouldn't be called + * by anyone else... + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDevIns The owner of the memory, optional. + * @param iSubDev Sub-device number. + * @param iRegion The region. + * @param off The page expressed an offset into the MMIO2 region. + * @param pHCPhys Where to store the result. + */ +VMMR3_INT_DECL(int) PGMR3PhysMMIO2GetHCPhys(PVM pVM, PPDMDEVINS pDevIns, uint32_t iSubDev, uint32_t iRegion, + RTGCPHYS off, PRTHCPHYS pHCPhys) +{ + /* + * Validate input + */ + VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT); + AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER); + AssertReturn(iSubDev <= UINT8_MAX, VERR_INVALID_PARAMETER); + AssertReturn(iRegion <= UINT8_MAX, VERR_INVALID_PARAMETER); + + pgmLock(pVM); + PPGMREGMMIORANGE pCurMmio = pgmR3PhysMMIOExFind(pVM, pDevIns, iSubDev, iRegion); + AssertReturn(pCurMmio, VERR_NOT_FOUND); + AssertReturn(pCurMmio->fFlags & (PGMREGMMIORANGE_F_MMIO2 | PGMREGMMIORANGE_F_FIRST_CHUNK), VERR_WRONG_TYPE); + + while ( off >= pCurMmio->RamRange.cb + && !(pCurMmio->fFlags & PGMREGMMIORANGE_F_LAST_CHUNK)) + { + off -= pCurMmio->RamRange.cb; + pCurMmio = pCurMmio->pNextR3; + } + AssertReturn(off < pCurMmio->RamRange.cb, VERR_INVALID_PARAMETER); + + PCPGMPAGE pPage = &pCurMmio->RamRange.aPages[off >> PAGE_SHIFT]; + *pHCPhys = PGM_PAGE_GET_HCPHYS(pPage); + pgmUnlock(pVM); + return VINF_SUCCESS; +} + + +/** + * Maps a portion of an MMIO2 region into kernel space (host). + * + * The kernel mapping will become invalid when the MMIO2 memory is deregistered + * or the VM is terminated. + * + * @return VBox status code. + * + * @param pVM The cross context VM structure. + * @param pDevIns The device owning the MMIO2 memory. + * @param iSubDev The sub-device number. + * @param iRegion The region. + * @param off The offset into the region. Must be page aligned. + * @param cb The number of bytes to map. Must be page aligned. + * @param pszDesc Mapping description. + * @param pR0Ptr Where to store the R0 address. + */ +VMMR3_INT_DECL(int) PGMR3PhysMMIO2MapKernel(PVM pVM, PPDMDEVINS pDevIns, uint32_t iSubDev, uint32_t iRegion, + RTGCPHYS off, RTGCPHYS cb, const char *pszDesc, PRTR0PTR pR0Ptr) +{ + /* + * Validate input. + */ + VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT); + AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER); + AssertReturn(iSubDev <= UINT8_MAX, VERR_INVALID_PARAMETER); + AssertReturn(iRegion <= UINT8_MAX, VERR_INVALID_PARAMETER); + + PPGMREGMMIORANGE pFirstRegMmio = pgmR3PhysMMIOExFind(pVM, pDevIns, iSubDev, iRegion); + AssertReturn(pFirstRegMmio, VERR_NOT_FOUND); + AssertReturn(pFirstRegMmio->fFlags & (PGMREGMMIORANGE_F_MMIO2 | PGMREGMMIORANGE_F_FIRST_CHUNK), VERR_WRONG_TYPE); + AssertReturn(off < pFirstRegMmio->RamRange.cb, VERR_INVALID_PARAMETER); + AssertReturn(cb <= pFirstRegMmio->RamRange.cb, VERR_INVALID_PARAMETER); + AssertReturn(off + cb <= pFirstRegMmio->RamRange.cb, VERR_INVALID_PARAMETER); + NOREF(pszDesc); + + /* + * Pass the request on to the support library/driver. + */ +#if defined(RT_OS_WINDOWS) || defined(RT_OS_LINUX) || defined(RT_OS_OS2) /** @todo Fully implement RTR0MemObjMapKernelEx everywhere. */ + AssertLogRelReturn(off == 0, VERR_NOT_SUPPORTED); + AssertLogRelReturn(pFirstRegMmio->fFlags & PGMREGMMIORANGE_F_LAST_CHUNK, VERR_NOT_SUPPORTED); + int rc = SUPR3PageMapKernel(pFirstRegMmio->pvR3, 0 /*off*/, pFirstRegMmio->RamRange.cb, 0 /*fFlags*/, pR0Ptr); +#else + int rc = SUPR3PageMapKernel(pFirstRegMmio->pvR3, off, cb, 0 /*fFlags*/, pR0Ptr); +#endif + + return rc; +} + + +/** + * Worker for PGMR3PhysRomRegister. + * + * This is here to simplify lock management, i.e. the caller does all the + * locking and we can simply return without needing to remember to unlock + * anything first. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDevIns The device instance owning the ROM. + * @param GCPhys First physical address in the range. + * Must be page aligned! + * @param cb The size of the range (in bytes). + * Must be page aligned! + * @param pvBinary Pointer to the binary data backing the ROM image. + * @param cbBinary The size of the binary data pvBinary points to. + * This must be less or equal to @a cb. + * @param fFlags Mask of flags. PGMPHYS_ROM_FLAGS_SHADOWED + * and/or PGMPHYS_ROM_FLAGS_PERMANENT_BINARY. + * @param pszDesc Pointer to description string. This must not be freed. + */ +static int pgmR3PhysRomRegisterLocked(PVM pVM, PPDMDEVINS pDevIns, RTGCPHYS GCPhys, RTGCPHYS cb, + const void *pvBinary, uint32_t cbBinary, uint32_t fFlags, const char *pszDesc) +{ + /* + * Validate input. + */ + AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER); + AssertReturn(RT_ALIGN_T(GCPhys, PAGE_SIZE, RTGCPHYS) == GCPhys, VERR_INVALID_PARAMETER); + AssertReturn(RT_ALIGN_T(cb, PAGE_SIZE, RTGCPHYS) == cb, VERR_INVALID_PARAMETER); + RTGCPHYS GCPhysLast = GCPhys + (cb - 1); + AssertReturn(GCPhysLast > GCPhys, VERR_INVALID_PARAMETER); + AssertPtrReturn(pvBinary, VERR_INVALID_PARAMETER); + AssertPtrReturn(pszDesc, VERR_INVALID_POINTER); + AssertReturn(!(fFlags & ~(PGMPHYS_ROM_FLAGS_SHADOWED | PGMPHYS_ROM_FLAGS_PERMANENT_BINARY)), VERR_INVALID_PARAMETER); + VM_ASSERT_STATE_RETURN(pVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE); + + const uint32_t cPages = cb >> PAGE_SHIFT; + + /* + * Find the ROM location in the ROM list first. + */ + PPGMROMRANGE pRomPrev = NULL; + PPGMROMRANGE pRom = pVM->pgm.s.pRomRangesR3; + while (pRom && GCPhysLast >= pRom->GCPhys) + { + if ( GCPhys <= pRom->GCPhysLast + && GCPhysLast >= pRom->GCPhys) + AssertLogRelMsgFailedReturn(("%RGp-%RGp (%s) conflicts with existing %RGp-%RGp (%s)\n", + GCPhys, GCPhysLast, pszDesc, + pRom->GCPhys, pRom->GCPhysLast, pRom->pszDesc), + VERR_PGM_RAM_CONFLICT); + /* next */ + pRomPrev = pRom; + pRom = pRom->pNextR3; + } + + /* + * Find the RAM location and check for conflicts. + * + * Conflict detection is a bit different than for RAM + * registration since a ROM can be located within a RAM + * range. So, what we have to check for is other memory + * types (other than RAM that is) and that we don't span + * more than one RAM range (layz). + */ + bool fRamExists = false; + PPGMRAMRANGE pRamPrev = NULL; + PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3; + while (pRam && GCPhysLast >= pRam->GCPhys) + { + if ( GCPhys <= pRam->GCPhysLast + && GCPhysLast >= pRam->GCPhys) + { + /* completely within? */ + AssertLogRelMsgReturn( GCPhys >= pRam->GCPhys + && GCPhysLast <= pRam->GCPhysLast, + ("%RGp-%RGp (%s) falls partly outside %RGp-%RGp (%s)\n", + GCPhys, GCPhysLast, pszDesc, + pRam->GCPhys, pRam->GCPhysLast, pRam->pszDesc), + VERR_PGM_RAM_CONFLICT); + fRamExists = true; + break; + } + + /* next */ + pRamPrev = pRam; + pRam = pRam->pNextR3; + } + if (fRamExists) + { + PPGMPAGE pPage = &pRam->aPages[(GCPhys - pRam->GCPhys) >> PAGE_SHIFT]; + uint32_t cPagesLeft = cPages; + while (cPagesLeft-- > 0) + { + AssertLogRelMsgReturn(PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM, + ("%RGp (%R[pgmpage]) isn't a RAM page - registering %RGp-%RGp (%s).\n", + pRam->GCPhys + ((RTGCPHYS)(uintptr_t)(pPage - &pRam->aPages[0]) << PAGE_SHIFT), + pPage, GCPhys, GCPhysLast, pszDesc), VERR_PGM_RAM_CONFLICT); + Assert(PGM_PAGE_IS_ZERO(pPage)); + pPage++; + } + } + + /* + * Update the base memory reservation if necessary. + */ + uint32_t cExtraBaseCost = fRamExists ? 0 : cPages; + if (fFlags & PGMPHYS_ROM_FLAGS_SHADOWED) + cExtraBaseCost += cPages; + if (cExtraBaseCost) + { + int rc = MMR3IncreaseBaseReservation(pVM, cExtraBaseCost); + if (RT_FAILURE(rc)) + return rc; + } + + /* + * Allocate memory for the virgin copy of the RAM. + */ + PGMMALLOCATEPAGESREQ pReq; + int rc = GMMR3AllocatePagesPrepare(pVM, &pReq, cPages, GMMACCOUNT_BASE); + AssertRCReturn(rc, rc); + + for (uint32_t iPage = 0; iPage < cPages; iPage++) + { + pReq->aPages[iPage].HCPhysGCPhys = GCPhys + (iPage << PAGE_SHIFT); + pReq->aPages[iPage].idPage = NIL_GMM_PAGEID; + pReq->aPages[iPage].idSharedPage = NIL_GMM_PAGEID; + } + + rc = GMMR3AllocatePagesPerform(pVM, pReq); + if (RT_FAILURE(rc)) + { + GMMR3AllocatePagesCleanup(pReq); + return rc; + } + + /* + * Allocate the new ROM range and RAM range (if necessary). + */ + PPGMROMRANGE pRomNew; + rc = MMHyperAlloc(pVM, RT_UOFFSETOF_DYN(PGMROMRANGE, aPages[cPages]), 0, MM_TAG_PGM_PHYS, (void **)&pRomNew); + if (RT_SUCCESS(rc)) + { + PPGMRAMRANGE pRamNew = NULL; + if (!fRamExists) + rc = MMHyperAlloc(pVM, RT_UOFFSETOF_DYN(PGMRAMRANGE, aPages[cPages]), sizeof(PGMPAGE), MM_TAG_PGM_PHYS, (void **)&pRamNew); + if (RT_SUCCESS(rc)) + { + /* + * Initialize and insert the RAM range (if required). + */ + PPGMROMPAGE pRomPage = &pRomNew->aPages[0]; + if (!fRamExists) + { + pRamNew->pSelfR0 = MMHyperCCToR0(pVM, pRamNew); + pRamNew->pSelfRC = MMHyperCCToRC(pVM, pRamNew); + pRamNew->GCPhys = GCPhys; + pRamNew->GCPhysLast = GCPhysLast; + pRamNew->cb = cb; + pRamNew->pszDesc = pszDesc; + pRamNew->fFlags = PGM_RAM_RANGE_FLAGS_AD_HOC_ROM; + pRamNew->pvR3 = NULL; + pRamNew->paLSPages = NULL; + + PPGMPAGE pPage = &pRamNew->aPages[0]; + for (uint32_t iPage = 0; iPage < cPages; iPage++, pPage++, pRomPage++) + { + PGM_PAGE_INIT(pPage, + pReq->aPages[iPage].HCPhysGCPhys, + pReq->aPages[iPage].idPage, + PGMPAGETYPE_ROM, + PGM_PAGE_STATE_ALLOCATED); + + pRomPage->Virgin = *pPage; + } + + pVM->pgm.s.cAllPages += cPages; + pgmR3PhysLinkRamRange(pVM, pRamNew, pRamPrev); + } + else + { + PPGMPAGE pPage = &pRam->aPages[(GCPhys - pRam->GCPhys) >> PAGE_SHIFT]; + for (uint32_t iPage = 0; iPage < cPages; iPage++, pPage++, pRomPage++) + { + PGM_PAGE_SET_TYPE(pVM, pPage, PGMPAGETYPE_ROM); + PGM_PAGE_SET_HCPHYS(pVM, pPage, pReq->aPages[iPage].HCPhysGCPhys); + PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ALLOCATED); + PGM_PAGE_SET_PAGEID(pVM, pPage, pReq->aPages[iPage].idPage); + PGM_PAGE_SET_PDE_TYPE(pVM, pPage, PGM_PAGE_PDE_TYPE_DONTCARE); + PGM_PAGE_SET_PTE_INDEX(pVM, pPage, 0); + PGM_PAGE_SET_TRACKING(pVM, pPage, 0); + + pRomPage->Virgin = *pPage; + } + + pRamNew = pRam; + + pVM->pgm.s.cZeroPages -= cPages; + } + pVM->pgm.s.cPrivatePages += cPages; + + /* Flush physical page map TLB. */ + pgmPhysInvalidatePageMapTLB(pVM); + + + /* Notify NEM before we register handlers. */ + uint32_t const fNemNotify = (fRamExists ? NEM_NOTIFY_PHYS_ROM_F_REPLACE : 0) + | (fFlags & PGMPHYS_ROM_FLAGS_SHADOWED ? NEM_NOTIFY_PHYS_ROM_F_SHADOW : 0); + rc = NEMR3NotifyPhysRomRegisterEarly(pVM, GCPhys, cb, fNemNotify); + + /* + * !HACK ALERT! REM + (Shadowed) ROM ==> mess. + * + * If it's shadowed we'll register the handler after the ROM notification + * so we get the access handler callbacks that we should. If it isn't + * shadowed we'll do it the other way around to make REM use the built-in + * ROM behavior and not the handler behavior (which is to route all access + * to PGM atm). + */ + if (fFlags & PGMPHYS_ROM_FLAGS_SHADOWED) + { +#ifdef VBOX_WITH_REM + REMR3NotifyPhysRomRegister(pVM, GCPhys, cb, NULL, true /* fShadowed */); +#endif + if (RT_SUCCESS(rc)) + rc = PGMHandlerPhysicalRegister(pVM, GCPhys, GCPhysLast, pVM->pgm.s.hRomPhysHandlerType, + pRomNew, MMHyperCCToR0(pVM, pRomNew), MMHyperCCToRC(pVM, pRomNew), + pszDesc); + } + else + { + if (RT_SUCCESS(rc)) + rc = PGMHandlerPhysicalRegister(pVM, GCPhys, GCPhysLast, pVM->pgm.s.hRomPhysHandlerType, + pRomNew, MMHyperCCToR0(pVM, pRomNew), MMHyperCCToRC(pVM, pRomNew), + pszDesc); +#ifdef VBOX_WITH_REM + REMR3NotifyPhysRomRegister(pVM, GCPhys, cb, NULL, false /* fShadowed */); +#endif + } + if (RT_SUCCESS(rc)) + { + /* + * Copy the image over to the virgin pages. + * This must be done after linking in the RAM range. + */ + size_t cbBinaryLeft = cbBinary; + PPGMPAGE pRamPage = &pRamNew->aPages[(GCPhys - pRamNew->GCPhys) >> PAGE_SHIFT]; + for (uint32_t iPage = 0; iPage < cPages; iPage++, pRamPage++) + { + void *pvDstPage; + rc = pgmPhysPageMap(pVM, pRamPage, GCPhys + (iPage << PAGE_SHIFT), &pvDstPage); + if (RT_FAILURE(rc)) + { + VMSetError(pVM, rc, RT_SRC_POS, "Failed to map virgin ROM page at %RGp", GCPhys); + break; + } + if (cbBinaryLeft >= PAGE_SIZE) + { + memcpy(pvDstPage, (uint8_t const *)pvBinary + ((size_t)iPage << PAGE_SHIFT), PAGE_SIZE); + cbBinaryLeft -= PAGE_SIZE; + } + else + { + ASMMemZeroPage(pvDstPage); /* (shouldn't be necessary, but can't hurt either) */ + if (cbBinaryLeft > 0) + { + memcpy(pvDstPage, (uint8_t const *)pvBinary + ((size_t)iPage << PAGE_SHIFT), cbBinaryLeft); + cbBinaryLeft = 0; + } + } + } + if (RT_SUCCESS(rc)) + { + /* + * Initialize the ROM range. + * Note that the Virgin member of the pages has already been initialized above. + */ + pRomNew->GCPhys = GCPhys; + pRomNew->GCPhysLast = GCPhysLast; + pRomNew->cb = cb; + pRomNew->fFlags = fFlags; + pRomNew->idSavedState = UINT8_MAX; + pRomNew->cbOriginal = cbBinary; + pRomNew->pszDesc = pszDesc; + pRomNew->pvOriginal = fFlags & PGMPHYS_ROM_FLAGS_PERMANENT_BINARY + ? pvBinary : RTMemDup(pvBinary, cbBinary); + if (pRomNew->pvOriginal) + { + for (unsigned iPage = 0; iPage < cPages; iPage++) + { + PPGMROMPAGE pPage = &pRomNew->aPages[iPage]; + pPage->enmProt = PGMROMPROT_READ_ROM_WRITE_IGNORE; + PGM_PAGE_INIT_ZERO(&pPage->Shadow, pVM, PGMPAGETYPE_ROM_SHADOW); + } + + /* update the page count stats for the shadow pages. */ + if (fFlags & PGMPHYS_ROM_FLAGS_SHADOWED) + { + pVM->pgm.s.cZeroPages += cPages; + pVM->pgm.s.cAllPages += cPages; + } + + /* + * Insert the ROM range, tell REM and return successfully. + */ + pRomNew->pNextR3 = pRom; + pRomNew->pNextR0 = pRom ? MMHyperCCToR0(pVM, pRom) : NIL_RTR0PTR; + pRomNew->pNextRC = pRom ? MMHyperCCToRC(pVM, pRom) : NIL_RTRCPTR; + + if (pRomPrev) + { + pRomPrev->pNextR3 = pRomNew; + pRomPrev->pNextR0 = MMHyperCCToR0(pVM, pRomNew); + pRomPrev->pNextRC = MMHyperCCToRC(pVM, pRomNew); + } + else + { + pVM->pgm.s.pRomRangesR3 = pRomNew; + pVM->pgm.s.pRomRangesR0 = MMHyperCCToR0(pVM, pRomNew); + pVM->pgm.s.pRomRangesRC = MMHyperCCToRC(pVM, pRomNew); + } + + pgmPhysInvalidatePageMapTLB(pVM); + GMMR3AllocatePagesCleanup(pReq); + + /* Notify NEM again. */ + return NEMR3NotifyPhysRomRegisterLate(pVM, GCPhys, cb, fNemNotify); + } + + /* bail out */ + rc = VERR_NO_MEMORY; + } + + int rc2 = PGMHandlerPhysicalDeregister(pVM, GCPhys); + AssertRC(rc2); + } + + if (!fRamExists) + { + pgmR3PhysUnlinkRamRange2(pVM, pRamNew, pRamPrev); + MMHyperFree(pVM, pRamNew); + } + } + MMHyperFree(pVM, pRomNew); + } + + /** @todo Purge the mapping cache or something... */ + GMMR3FreeAllocatedPages(pVM, pReq); + GMMR3AllocatePagesCleanup(pReq); + return rc; +} + + +/** + * Registers a ROM image. + * + * Shadowed ROM images requires double the amount of backing memory, so, + * don't use that unless you have to. Shadowing of ROM images is process + * where we can select where the reads go and where the writes go. On real + * hardware the chipset provides means to configure this. We provide + * PGMR3PhysProtectROM() for this purpose. + * + * A read-only copy of the ROM image will always be kept around while we + * will allocate RAM pages for the changes on demand (unless all memory + * is configured to be preallocated). + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDevIns The device instance owning the ROM. + * @param GCPhys First physical address in the range. + * Must be page aligned! + * @param cb The size of the range (in bytes). + * Must be page aligned! + * @param pvBinary Pointer to the binary data backing the ROM image. + * @param cbBinary The size of the binary data pvBinary points to. + * This must be less or equal to @a cb. + * @param fFlags Mask of flags. PGMPHYS_ROM_FLAGS_SHADOWED + * and/or PGMPHYS_ROM_FLAGS_PERMANENT_BINARY. + * @param pszDesc Pointer to description string. This must not be freed. + * + * @remark There is no way to remove the rom, automatically on device cleanup or + * manually from the device yet. This isn't difficult in any way, it's + * just not something we expect to be necessary for a while. + */ +VMMR3DECL(int) PGMR3PhysRomRegister(PVM pVM, PPDMDEVINS pDevIns, RTGCPHYS GCPhys, RTGCPHYS cb, + const void *pvBinary, uint32_t cbBinary, uint32_t fFlags, const char *pszDesc) +{ + Log(("PGMR3PhysRomRegister: pDevIns=%p GCPhys=%RGp(-%RGp) cb=%RGp pvBinary=%p cbBinary=%#x fFlags=%#x pszDesc=%s\n", + pDevIns, GCPhys, GCPhys + cb, cb, pvBinary, cbBinary, fFlags, pszDesc)); + pgmLock(pVM); + int rc = pgmR3PhysRomRegisterLocked(pVM, pDevIns, GCPhys, cb, pvBinary, cbBinary, fFlags, pszDesc); + pgmUnlock(pVM); + return rc; +} + + +/** + * Called by PGMR3MemSetup to reset the shadow, switch to the virgin, and verify + * that the virgin part is untouched. + * + * This is done after the normal memory has been cleared. + * + * ASSUMES that the caller owns the PGM lock. + * + * @param pVM The cross context VM structure. + */ +int pgmR3PhysRomReset(PVM pVM) +{ + PGM_LOCK_ASSERT_OWNER(pVM); + for (PPGMROMRANGE pRom = pVM->pgm.s.pRomRangesR3; pRom; pRom = pRom->pNextR3) + { + const uint32_t cPages = pRom->cb >> PAGE_SHIFT; + + if (pRom->fFlags & PGMPHYS_ROM_FLAGS_SHADOWED) + { + /* + * Reset the physical handler. + */ + int rc = PGMR3PhysRomProtect(pVM, pRom->GCPhys, pRom->cb, PGMROMPROT_READ_ROM_WRITE_IGNORE); + AssertRCReturn(rc, rc); + + /* + * What we do with the shadow pages depends on the memory + * preallocation option. If not enabled, we'll just throw + * out all the dirty pages and replace them by the zero page. + */ + if (!pVM->pgm.s.fRamPreAlloc) + { + /* Free the dirty pages. */ + uint32_t cPendingPages = 0; + PGMMFREEPAGESREQ pReq; + rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE); + AssertRCReturn(rc, rc); + + for (uint32_t iPage = 0; iPage < cPages; iPage++) + if ( !PGM_PAGE_IS_ZERO(&pRom->aPages[iPage].Shadow) + && !PGM_PAGE_IS_BALLOONED(&pRom->aPages[iPage].Shadow)) + { + Assert(PGM_PAGE_GET_STATE(&pRom->aPages[iPage].Shadow) == PGM_PAGE_STATE_ALLOCATED); + rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, &pRom->aPages[iPage].Shadow, + pRom->GCPhys + (iPage << PAGE_SHIFT), + (PGMPAGETYPE)PGM_PAGE_GET_TYPE(&pRom->aPages[iPage].Shadow)); + AssertLogRelRCReturn(rc, rc); + } + + if (cPendingPages) + { + rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages); + AssertLogRelRCReturn(rc, rc); + } + GMMR3FreePagesCleanup(pReq); + } + else + { + /* clear all the shadow pages. */ + for (uint32_t iPage = 0; iPage < cPages; iPage++) + { + if (PGM_PAGE_IS_ZERO(&pRom->aPages[iPage].Shadow)) + continue; + Assert(!PGM_PAGE_IS_BALLOONED(&pRom->aPages[iPage].Shadow)); + void *pvDstPage; + const RTGCPHYS GCPhys = pRom->GCPhys + (iPage << PAGE_SHIFT); + rc = pgmPhysPageMakeWritableAndMap(pVM, &pRom->aPages[iPage].Shadow, GCPhys, &pvDstPage); + if (RT_FAILURE(rc)) + break; + ASMMemZeroPage(pvDstPage); + } + AssertRCReturn(rc, rc); + } + } + + /* + * Restore the original ROM pages after a saved state load. + * Also, in strict builds check that ROM pages remain unmodified. + */ +#ifndef VBOX_STRICT + if (pVM->pgm.s.fRestoreRomPagesOnReset) +#endif + { + size_t cbSrcLeft = pRom->cbOriginal; + uint8_t const *pbSrcPage = (uint8_t const *)pRom->pvOriginal; + uint32_t cRestored = 0; + for (uint32_t iPage = 0; iPage < cPages && cbSrcLeft > 0; iPage++, pbSrcPage += PAGE_SIZE) + { + const RTGCPHYS GCPhys = pRom->GCPhys + (iPage << PAGE_SHIFT); + void const *pvDstPage; + int rc = pgmPhysPageMapReadOnly(pVM, &pRom->aPages[iPage].Virgin, GCPhys, &pvDstPage); + if (RT_FAILURE(rc)) + break; + + if (memcmp(pvDstPage, pbSrcPage, RT_MIN(cbSrcLeft, PAGE_SIZE))) + { + if (pVM->pgm.s.fRestoreRomPagesOnReset) + { + void *pvDstPageW; + rc = pgmPhysPageMap(pVM, &pRom->aPages[iPage].Virgin, GCPhys, &pvDstPageW); + AssertLogRelRCReturn(rc, rc); + memcpy(pvDstPageW, pbSrcPage, RT_MIN(cbSrcLeft, PAGE_SIZE)); + cRestored++; + } + else + LogRel(("pgmR3PhysRomReset: %RGp: ROM page changed (%s)\n", GCPhys, pRom->pszDesc)); + } + cbSrcLeft -= RT_MIN(cbSrcLeft, PAGE_SIZE); + } + if (cRestored > 0) + LogRel(("PGM: ROM \"%s\": Reloaded %u of %u pages.\n", pRom->pszDesc, cRestored, cPages)); + } + } + + /* Clear the ROM restore flag now as we only need to do this once after + loading saved state. */ + pVM->pgm.s.fRestoreRomPagesOnReset = false; + + return VINF_SUCCESS; +} + + +/** + * Called by PGMR3Term to free resources. + * + * ASSUMES that the caller owns the PGM lock. + * + * @param pVM The cross context VM structure. + */ +void pgmR3PhysRomTerm(PVM pVM) +{ + /* + * Free the heap copy of the original bits. + */ + for (PPGMROMRANGE pRom = pVM->pgm.s.pRomRangesR3; pRom; pRom = pRom->pNextR3) + { + if ( pRom->pvOriginal + && !(pRom->fFlags & PGMPHYS_ROM_FLAGS_PERMANENT_BINARY)) + { + RTMemFree((void *)pRom->pvOriginal); + pRom->pvOriginal = NULL; + } + } +} + + +/** + * Change the shadowing of a range of ROM pages. + * + * This is intended for implementing chipset specific memory registers + * and will not be very strict about the input. It will silently ignore + * any pages that are not the part of a shadowed ROM. + * + * @returns VBox status code. + * @retval VINF_PGM_SYNC_CR3 + * + * @param pVM The cross context VM structure. + * @param GCPhys Where to start. Page aligned. + * @param cb How much to change. Page aligned. + * @param enmProt The new ROM protection. + */ +VMMR3DECL(int) PGMR3PhysRomProtect(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, PGMROMPROT enmProt) +{ + /* + * Check input + */ + if (!cb) + return VINF_SUCCESS; + AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER); + AssertReturn(!(cb & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER); + RTGCPHYS GCPhysLast = GCPhys + (cb - 1); + AssertReturn(GCPhysLast > GCPhys, VERR_INVALID_PARAMETER); + AssertReturn(enmProt >= PGMROMPROT_INVALID && enmProt <= PGMROMPROT_END, VERR_INVALID_PARAMETER); + + /* + * Process the request. + */ + pgmLock(pVM); + int rc = VINF_SUCCESS; + bool fFlushTLB = false; + for (PPGMROMRANGE pRom = pVM->pgm.s.pRomRangesR3; pRom; pRom = pRom->pNextR3) + { + if ( GCPhys <= pRom->GCPhysLast + && GCPhysLast >= pRom->GCPhys + && (pRom->fFlags & PGMPHYS_ROM_FLAGS_SHADOWED)) + { + /* + * Iterate the relevant pages and make necessary the changes. + */ + bool fChanges = false; + uint32_t const cPages = pRom->GCPhysLast <= GCPhysLast + ? pRom->cb >> PAGE_SHIFT + : (GCPhysLast - pRom->GCPhys + 1) >> PAGE_SHIFT; + for (uint32_t iPage = (GCPhys - pRom->GCPhys) >> PAGE_SHIFT; + iPage < cPages; + iPage++) + { + PPGMROMPAGE pRomPage = &pRom->aPages[iPage]; + if (PGMROMPROT_IS_ROM(pRomPage->enmProt) != PGMROMPROT_IS_ROM(enmProt)) + { + fChanges = true; + + /* flush references to the page. */ + PPGMPAGE pRamPage = pgmPhysGetPage(pVM, pRom->GCPhys + (iPage << PAGE_SHIFT)); + int rc2 = pgmPoolTrackUpdateGCPhys(pVM, pRom->GCPhys + (iPage << PAGE_SHIFT), pRamPage, + true /*fFlushPTEs*/, &fFlushTLB); + if (rc2 != VINF_SUCCESS && (rc == VINF_SUCCESS || RT_FAILURE(rc2))) + rc = rc2; + uint8_t u2State = PGM_PAGE_GET_NEM_STATE(pRamPage); + + PPGMPAGE pOld = PGMROMPROT_IS_ROM(pRomPage->enmProt) ? &pRomPage->Virgin : &pRomPage->Shadow; + PPGMPAGE pNew = PGMROMPROT_IS_ROM(pRomPage->enmProt) ? &pRomPage->Shadow : &pRomPage->Virgin; + + *pOld = *pRamPage; + *pRamPage = *pNew; + /** @todo preserve the volatile flags (handlers) when these have been moved out of HCPhys! */ + + /* Tell NEM about the backing and protection change. */ + if (VM_IS_NEM_ENABLED(pVM)) + { + PGMPAGETYPE enmType = (PGMPAGETYPE)PGM_PAGE_GET_TYPE(pNew); + NEMHCNotifyPhysPageChanged(pVM, GCPhys, PGM_PAGE_GET_HCPHYS(pOld), PGM_PAGE_GET_HCPHYS(pNew), + pgmPhysPageCalcNemProtection(pRamPage, enmType), enmType, &u2State); + PGM_PAGE_SET_NEM_STATE(pRamPage, u2State); + } + } + pRomPage->enmProt = enmProt; + } + + /* + * Reset the access handler if we made changes, no need + * to optimize this. + */ + if (fChanges) + { + int rc2 = PGMHandlerPhysicalReset(pVM, pRom->GCPhys); + if (RT_FAILURE(rc2)) + { + pgmUnlock(pVM); + AssertRC(rc); + return rc2; + } + } + + /* Advance - cb isn't updated. */ + GCPhys = pRom->GCPhys + (cPages << PAGE_SHIFT); + } + } + pgmUnlock(pVM); + if (fFlushTLB) + PGM_INVL_ALL_VCPU_TLBS(pVM); + + return rc; +} + + +/** + * Sets the Address Gate 20 state. + * + * @param pVCpu The cross context virtual CPU structure. + * @param fEnable True if the gate should be enabled. + * False if the gate should be disabled. + */ +VMMDECL(void) PGMR3PhysSetA20(PVMCPU pVCpu, bool fEnable) +{ + LogFlow(("PGMR3PhysSetA20 %d (was %d)\n", fEnable, pVCpu->pgm.s.fA20Enabled)); + if (pVCpu->pgm.s.fA20Enabled != fEnable) + { +#ifdef VBOX_WITH_NESTED_HWVIRT_VMX + PCCPUMCTX pCtx = CPUMQueryGuestCtxPtr(pVCpu); + if ( CPUMIsGuestInVmxRootMode(pCtx) + && !fEnable) + { + Log(("Cannot enter A20M mode while in VMX root mode\n")); + return; + } +#endif + pVCpu->pgm.s.fA20Enabled = fEnable; + pVCpu->pgm.s.GCPhysA20Mask = ~((RTGCPHYS)!fEnable << 20); +#ifdef VBOX_WITH_REM + REMR3A20Set(pVCpu->pVMR3, pVCpu, fEnable); +#endif + NEMR3NotifySetA20(pVCpu, fEnable); +#ifdef PGM_WITH_A20 + pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_UPDATE_PAGE_BIT_VIRTUAL; + VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); + pgmR3RefreshShadowModeAfterA20Change(pVCpu); + HMFlushTlb(pVCpu); +#endif + IEMTlbInvalidateAllPhysical(pVCpu); + STAM_REL_COUNTER_INC(&pVCpu->pgm.s.cA20Changes); + } +} + + +/** + * Tree enumeration callback for dealing with age rollover. + * It will perform a simple compression of the current age. + */ +static DECLCALLBACK(int) pgmR3PhysChunkAgeingRolloverCallback(PAVLU32NODECORE pNode, void *pvUser) +{ + /* Age compression - ASSUMES iNow == 4. */ + PPGMCHUNKR3MAP pChunk = (PPGMCHUNKR3MAP)pNode; + if (pChunk->iLastUsed >= UINT32_C(0xffffff00)) + pChunk->iLastUsed = 3; + else if (pChunk->iLastUsed >= UINT32_C(0xfffff000)) + pChunk->iLastUsed = 2; + else if (pChunk->iLastUsed) + pChunk->iLastUsed = 1; + else /* iLastUsed = 0 */ + pChunk->iLastUsed = 4; + + NOREF(pvUser); + return 0; +} + + +/** + * The structure passed in the pvUser argument of pgmR3PhysChunkUnmapCandidateCallback(). + */ +typedef struct PGMR3PHYSCHUNKUNMAPCB +{ + PVM pVM; /**< Pointer to the VM. */ + PPGMCHUNKR3MAP pChunk; /**< The chunk to unmap. */ +} PGMR3PHYSCHUNKUNMAPCB, *PPGMR3PHYSCHUNKUNMAPCB; + + +/** + * Callback used to find the mapping that's been unused for + * the longest time. + */ +static DECLCALLBACK(int) pgmR3PhysChunkUnmapCandidateCallback(PAVLU32NODECORE pNode, void *pvUser) +{ + PPGMCHUNKR3MAP pChunk = (PPGMCHUNKR3MAP)pNode; + PPGMR3PHYSCHUNKUNMAPCB pArg = (PPGMR3PHYSCHUNKUNMAPCB)pvUser; + + /* + * Check for locks and compare when last used. + */ + if (pChunk->cRefs) + return 0; + if (pChunk->cPermRefs) + return 0; + if ( pArg->pChunk + && pChunk->iLastUsed >= pArg->pChunk->iLastUsed) + return 0; + + /* + * Check that it's not in any of the TLBs. + */ + PVM pVM = pArg->pVM; + if ( pVM->pgm.s.ChunkR3Map.Tlb.aEntries[PGM_CHUNKR3MAPTLB_IDX(pChunk->Core.Key)].idChunk + == pChunk->Core.Key) + { + pChunk = NULL; + return 0; + } +#ifdef VBOX_STRICT + for (unsigned i = 0; i < RT_ELEMENTS(pVM->pgm.s.ChunkR3Map.Tlb.aEntries); i++) + { + Assert(pVM->pgm.s.ChunkR3Map.Tlb.aEntries[i].pChunk != pChunk); + Assert(pVM->pgm.s.ChunkR3Map.Tlb.aEntries[i].idChunk != pChunk->Core.Key); + } +#endif + + for (unsigned i = 0; i < RT_ELEMENTS(pVM->pgm.s.PhysTlbHC.aEntries); i++) + if (pVM->pgm.s.PhysTlbHC.aEntries[i].pMap == pChunk) + return 0; + + pArg->pChunk = pChunk; + return 0; +} + + +/** + * Finds a good candidate for unmapping when the ring-3 mapping cache is full. + * + * The candidate will not be part of any TLBs, so no need to flush + * anything afterwards. + * + * @returns Chunk id. + * @param pVM The cross context VM structure. + */ +static int32_t pgmR3PhysChunkFindUnmapCandidate(PVM pVM) +{ + PGM_LOCK_ASSERT_OWNER(pVM); + + /* + * Enumerate the age tree starting with the left most node. + */ + STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pStats)->StatChunkFindCandidate, a); + PGMR3PHYSCHUNKUNMAPCB Args; + Args.pVM = pVM; + Args.pChunk = NULL; + RTAvlU32DoWithAll(&pVM->pgm.s.ChunkR3Map.pTree, true /*fFromLeft*/, pgmR3PhysChunkUnmapCandidateCallback, &Args); + Assert(Args.pChunk); + if (Args.pChunk) + { + Assert(Args.pChunk->cRefs == 0); + Assert(Args.pChunk->cPermRefs == 0); + STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pStats)->StatChunkFindCandidate, a); + return Args.pChunk->Core.Key; + } + + STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pStats)->StatChunkFindCandidate, a); + return INT32_MAX; +} + + +/** + * Rendezvous callback used by pgmR3PhysUnmapChunk that unmaps a chunk + * + * This is only called on one of the EMTs while the other ones are waiting for + * it to complete this function. + * + * @returns VINF_SUCCESS (VBox strict status code). + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. Unused. + * @param pvUser User pointer. Unused + * + */ +static DECLCALLBACK(VBOXSTRICTRC) pgmR3PhysUnmapChunkRendezvous(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + int rc = VINF_SUCCESS; + pgmLock(pVM); + NOREF(pVCpu); NOREF(pvUser); + + if (pVM->pgm.s.ChunkR3Map.c >= pVM->pgm.s.ChunkR3Map.cMax) + { + /* Flush the pgm pool cache; call the internal rendezvous handler as we're already in a rendezvous handler here. */ + /** @todo also not really efficient to unmap a chunk that contains PD + * or PT pages. */ + pgmR3PoolClearAllRendezvous(pVM, &pVM->aCpus[0], NULL /* no need to flush the REM TLB as we already did that above */); + + /* + * Request the ring-0 part to unmap a chunk to make space in the mapping cache. + */ + GMMMAPUNMAPCHUNKREQ Req; + Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + Req.Hdr.cbReq = sizeof(Req); + Req.pvR3 = NULL; + Req.idChunkMap = NIL_GMM_CHUNKID; + Req.idChunkUnmap = pgmR3PhysChunkFindUnmapCandidate(pVM); + if (Req.idChunkUnmap != INT32_MAX) + { + STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pStats)->StatChunkUnmap, a); + rc = VMMR3CallR0(pVM, VMMR0_DO_GMM_MAP_UNMAP_CHUNK, 0, &Req.Hdr); + STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pStats)->StatChunkUnmap, a); + if (RT_SUCCESS(rc)) + { + /* + * Remove the unmapped one. + */ + PPGMCHUNKR3MAP pUnmappedChunk = (PPGMCHUNKR3MAP)RTAvlU32Remove(&pVM->pgm.s.ChunkR3Map.pTree, Req.idChunkUnmap); + AssertRelease(pUnmappedChunk); + AssertRelease(!pUnmappedChunk->cRefs); + AssertRelease(!pUnmappedChunk->cPermRefs); + pUnmappedChunk->pv = NULL; + pUnmappedChunk->Core.Key = UINT32_MAX; +#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE + MMR3HeapFree(pUnmappedChunk); +#else + MMR3UkHeapFree(pVM, pUnmappedChunk, MM_TAG_PGM_CHUNK_MAPPING); +#endif + pVM->pgm.s.ChunkR3Map.c--; + pVM->pgm.s.cUnmappedChunks++; + + /* + * Flush dangling PGM pointers (R3 & R0 ptrs to GC physical addresses). + */ + /** @todo We should not flush chunks which include cr3 mappings. */ + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + PPGMCPU pPGM = &pVM->aCpus[idCpu].pgm.s; + + pPGM->pGst32BitPdR3 = NULL; + pPGM->pGstPaePdptR3 = NULL; + pPGM->pGstAmd64Pml4R3 = NULL; +#ifndef VBOX_WITH_2X_4GB_ADDR_SPACE + pPGM->pGst32BitPdR0 = NIL_RTR0PTR; + pPGM->pGstPaePdptR0 = NIL_RTR0PTR; + pPGM->pGstAmd64Pml4R0 = NIL_RTR0PTR; +#endif + for (unsigned i = 0; i < RT_ELEMENTS(pPGM->apGstPaePDsR3); i++) + { + pPGM->apGstPaePDsR3[i] = NULL; +#ifndef VBOX_WITH_2X_4GB_ADDR_SPACE + pPGM->apGstPaePDsR0[i] = NIL_RTR0PTR; +#endif + } + + /* Flush REM TLBs. */ + CPUMSetChangedFlags(&pVM->aCpus[idCpu], CPUM_CHANGED_GLOBAL_TLB_FLUSH); + } +#ifdef VBOX_WITH_REM + /* Flush REM translation blocks. */ + REMFlushTBs(pVM); +#endif + } + } + } + pgmUnlock(pVM); + return rc; +} + +/** + * Unmap a chunk to free up virtual address space (request packet handler for pgmR3PhysChunkMap) + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +void pgmR3PhysUnmapChunk(PVM pVM) +{ + int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PhysUnmapChunkRendezvous, NULL); + AssertRC(rc); +} + + +/** + * Maps the given chunk into the ring-3 mapping cache. + * + * This will call ring-0. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param idChunk The chunk in question. + * @param ppChunk Where to store the chunk tracking structure. + * + * @remarks Called from within the PGM critical section. + * @remarks Can be called from any thread! + */ +int pgmR3PhysChunkMap(PVM pVM, uint32_t idChunk, PPPGMCHUNKR3MAP ppChunk) +{ + int rc; + + PGM_LOCK_ASSERT_OWNER(pVM); + + /* + * Move the chunk time forward. + */ + pVM->pgm.s.ChunkR3Map.iNow++; + if (pVM->pgm.s.ChunkR3Map.iNow == 0) + { + pVM->pgm.s.ChunkR3Map.iNow = 4; + RTAvlU32DoWithAll(&pVM->pgm.s.ChunkR3Map.pTree, true /*fFromLeft*/, pgmR3PhysChunkAgeingRolloverCallback, NULL); + } + + /* + * Allocate a new tracking structure first. + */ +#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE + PPGMCHUNKR3MAP pChunk = (PPGMCHUNKR3MAP)MMR3HeapAllocZ(pVM, MM_TAG_PGM_CHUNK_MAPPING, sizeof(*pChunk)); +#else + PPGMCHUNKR3MAP pChunk = (PPGMCHUNKR3MAP)MMR3UkHeapAllocZ(pVM, MM_TAG_PGM_CHUNK_MAPPING, sizeof(*pChunk), NULL); +#endif + AssertReturn(pChunk, VERR_NO_MEMORY); + pChunk->Core.Key = idChunk; + pChunk->iLastUsed = pVM->pgm.s.ChunkR3Map.iNow; + + /* + * Request the ring-0 part to map the chunk in question. + */ + GMMMAPUNMAPCHUNKREQ Req; + Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + Req.Hdr.cbReq = sizeof(Req); + Req.pvR3 = NULL; + Req.idChunkMap = idChunk; + Req.idChunkUnmap = NIL_GMM_CHUNKID; + + /* Must be callable from any thread, so can't use VMMR3CallR0. */ + STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pStats)->StatChunkMap, a); + rc = SUPR3CallVMMR0Ex(pVM->pVMR0, NIL_VMCPUID, VMMR0_DO_GMM_MAP_UNMAP_CHUNK, 0, &Req.Hdr); + STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pStats)->StatChunkMap, a); + if (RT_SUCCESS(rc)) + { + pChunk->pv = Req.pvR3; + + /* + * If we're running out of virtual address space, then we should + * unmap another chunk. + * + * Currently, an unmap operation requires that all other virtual CPUs + * are idling and not by chance making use of the memory we're + * unmapping. So, we create an async unmap operation here. + * + * Now, when creating or restoring a saved state this wont work very + * well since we may want to restore all guest RAM + a little something. + * So, we have to do the unmap synchronously. Fortunately for us + * though, during these operations the other virtual CPUs are inactive + * and it should be safe to do this. + */ + /** @todo Eventually we should lock all memory when used and do + * map+unmap as one kernel call without any rendezvous or + * other precautions. */ + if (pVM->pgm.s.ChunkR3Map.c + 1 >= pVM->pgm.s.ChunkR3Map.cMax) + { + switch (VMR3GetState(pVM)) + { + case VMSTATE_LOADING: + case VMSTATE_SAVING: + { + PVMCPU pVCpu = VMMGetCpu(pVM); + if ( pVCpu + && pVM->pgm.s.cDeprecatedPageLocks == 0) + { + pgmR3PhysUnmapChunkRendezvous(pVM, pVCpu, NULL); + break; + } + } + RT_FALL_THRU(); + default: + rc = VMR3ReqCallNoWait(pVM, VMCPUID_ANY_QUEUE, (PFNRT)pgmR3PhysUnmapChunk, 1, pVM); + AssertRC(rc); + break; + } + } + + /* + * Update the tree. We must do this after any unmapping to make sure + * the chunk we're going to return isn't unmapped by accident. + */ + AssertPtr(Req.pvR3); + bool fRc = RTAvlU32Insert(&pVM->pgm.s.ChunkR3Map.pTree, &pChunk->Core); + AssertRelease(fRc); + pVM->pgm.s.ChunkR3Map.c++; + pVM->pgm.s.cMappedChunks++; + } + else + { + /** @todo this may fail because of /proc/sys/vm/max_map_count, so we + * should probably restrict ourselves on linux. */ + AssertRC(rc); +#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE + MMR3HeapFree(pChunk); +#else + MMR3UkHeapFree(pVM, pChunk, MM_TAG_PGM_CHUNK_MAPPING); +#endif + pChunk = NULL; + } + + *ppChunk = pChunk; + return rc; +} + + +/** + * For VMMCALLRING3_PGM_MAP_CHUNK, considered internal. + * + * @returns see pgmR3PhysChunkMap. + * @param pVM The cross context VM structure. + * @param idChunk The chunk to map. + */ +VMMR3DECL(int) PGMR3PhysChunkMap(PVM pVM, uint32_t idChunk) +{ + PPGMCHUNKR3MAP pChunk; + int rc; + + pgmLock(pVM); + rc = pgmR3PhysChunkMap(pVM, idChunk, &pChunk); + pgmUnlock(pVM); + return rc; +} + + +/** + * Invalidates the TLB for the ring-3 mapping cache. + * + * @param pVM The cross context VM structure. + */ +VMMR3DECL(void) PGMR3PhysChunkInvalidateTLB(PVM pVM) +{ + pgmLock(pVM); + for (unsigned i = 0; i < RT_ELEMENTS(pVM->pgm.s.ChunkR3Map.Tlb.aEntries); i++) + { + pVM->pgm.s.ChunkR3Map.Tlb.aEntries[i].idChunk = NIL_GMM_CHUNKID; + pVM->pgm.s.ChunkR3Map.Tlb.aEntries[i].pChunk = NULL; + } + /* The page map TLB references chunks, so invalidate that one too. */ + pgmPhysInvalidatePageMapTLB(pVM); + pgmUnlock(pVM); +} + + +/** + * Response to VMMCALLRING3_PGM_ALLOCATE_LARGE_HANDY_PAGE to allocate a large + * (2MB) page for use with a nested paging PDE. + * + * @returns The following VBox status codes. + * @retval VINF_SUCCESS on success. + * @retval VINF_EM_NO_MEMORY if we're out of memory. + * + * @param pVM The cross context VM structure. + * @param GCPhys GC physical start address of the 2 MB range + */ +VMMR3DECL(int) PGMR3PhysAllocateLargeHandyPage(PVM pVM, RTGCPHYS GCPhys) +{ +#ifdef PGM_WITH_LARGE_PAGES + uint64_t u64TimeStamp1, u64TimeStamp2; + + pgmLock(pVM); + + STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pStats)->StatAllocLargePage, a); + u64TimeStamp1 = RTTimeMilliTS(); + int rc = VMMR3CallR0(pVM, VMMR0_DO_PGM_ALLOCATE_LARGE_HANDY_PAGE, 0, NULL); + u64TimeStamp2 = RTTimeMilliTS(); + STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pStats)->StatAllocLargePage, a); + if (RT_SUCCESS(rc)) + { + Assert(pVM->pgm.s.cLargeHandyPages == 1); + + uint32_t idPage = pVM->pgm.s.aLargeHandyPage[0].idPage; + RTHCPHYS HCPhys = pVM->pgm.s.aLargeHandyPage[0].HCPhysGCPhys; + + void *pv; + + /* Map the large page into our address space. + * + * Note: assuming that within the 2 MB range: + * - GCPhys + PAGE_SIZE = HCPhys + PAGE_SIZE (whole point of this exercise) + * - user space mapping is continuous as well + * - page id (GCPhys) + 1 = page id (GCPhys + PAGE_SIZE) + */ + rc = pgmPhysPageMapByPageID(pVM, idPage, HCPhys, &pv); + AssertLogRelMsg(RT_SUCCESS(rc), ("idPage=%#x HCPhysGCPhys=%RHp rc=%Rrc\n", idPage, HCPhys, rc)); + + if (RT_SUCCESS(rc)) + { + /* + * Clear the pages. + */ + STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pStats)->StatClearLargePage, b); + for (unsigned i = 0; i < _2M/PAGE_SIZE; i++) + { + ASMMemZeroPage(pv); + + PPGMPAGE pPage; + rc = pgmPhysGetPageEx(pVM, GCPhys, &pPage); + AssertRC(rc); + + Assert(PGM_PAGE_IS_ZERO(pPage)); + STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatRZPageReplaceZero); + pVM->pgm.s.cZeroPages--; + + /* + * Do the PGMPAGE modifications. + */ + pVM->pgm.s.cPrivatePages++; + PGM_PAGE_SET_HCPHYS(pVM, pPage, HCPhys); + PGM_PAGE_SET_PAGEID(pVM, pPage, idPage); + PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ALLOCATED); + PGM_PAGE_SET_PDE_TYPE(pVM, pPage, PGM_PAGE_PDE_TYPE_PDE); + PGM_PAGE_SET_PTE_INDEX(pVM, pPage, 0); + PGM_PAGE_SET_TRACKING(pVM, pPage, 0); + + /* Somewhat dirty assumption that page ids are increasing. */ + idPage++; + + HCPhys += PAGE_SIZE; + GCPhys += PAGE_SIZE; + + pv = (void *)((uintptr_t)pv + PAGE_SIZE); + + Log3(("PGMR3PhysAllocateLargePage: idPage=%#x HCPhys=%RGp\n", idPage, HCPhys)); + } + STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pStats)->StatClearLargePage, b); + + /* Flush all TLBs. */ + PGM_INVL_ALL_VCPU_TLBS(pVM); + pgmPhysInvalidatePageMapTLB(pVM); + } + pVM->pgm.s.cLargeHandyPages = 0; + } + + if (RT_SUCCESS(rc)) + { + static uint32_t cTimeOut = 0; + uint64_t u64TimeStampDelta = u64TimeStamp2 - u64TimeStamp1; + + if (u64TimeStampDelta > 100) + { + STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatLargePageOverflow); + if ( ++cTimeOut > 10 + || u64TimeStampDelta > 1000 /* more than one second forces an early retirement from allocating large pages. */) + { + /* If repeated attempts to allocate a large page takes more than 100 ms, then we fall back to normal 4k pages. + * E.g. Vista 64 tries to move memory around, which takes a huge amount of time. + */ + LogRel(("PGMR3PhysAllocateLargePage: allocating large pages takes too long (last attempt %d ms; nr of timeouts %d); DISABLE\n", u64TimeStampDelta, cTimeOut)); + PGMSetLargePageUsage(pVM, false); + } + } + else + if (cTimeOut > 0) + cTimeOut--; + } + + pgmUnlock(pVM); + return rc; +#else + RT_NOREF(pVM, GCPhys); + return VERR_NOT_IMPLEMENTED; +#endif /* PGM_WITH_LARGE_PAGES */ +} + + +/** + * Response to VM_FF_PGM_NEED_HANDY_PAGES and VMMCALLRING3_PGM_ALLOCATE_HANDY_PAGES. + * + * This function will also work the VM_FF_PGM_NO_MEMORY force action flag, to + * signal and clear the out of memory condition. When contracted, this API is + * used to try clear the condition when the user wants to resume. + * + * @returns The following VBox status codes. + * @retval VINF_SUCCESS on success. FFs cleared. + * @retval VINF_EM_NO_MEMORY if we're out of memory. The FF is not cleared in + * this case and it gets accompanied by VM_FF_PGM_NO_MEMORY. + * + * @param pVM The cross context VM structure. + * + * @remarks The VINF_EM_NO_MEMORY status is for the benefit of the FF processing + * in EM.cpp and shouldn't be propagated outside TRPM, HM, EM and + * pgmPhysEnsureHandyPage. There is one exception to this in the \#PF + * handler. + */ +VMMR3DECL(int) PGMR3PhysAllocateHandyPages(PVM pVM) +{ + pgmLock(pVM); + + /* + * Allocate more pages, noting down the index of the first new page. + */ + uint32_t iClear = pVM->pgm.s.cHandyPages; + AssertMsgReturn(iClear <= RT_ELEMENTS(pVM->pgm.s.aHandyPages), ("%d", iClear), VERR_PGM_HANDY_PAGE_IPE); + Log(("PGMR3PhysAllocateHandyPages: %d -> %d\n", iClear, RT_ELEMENTS(pVM->pgm.s.aHandyPages))); + int rcAlloc = VINF_SUCCESS; + int rcSeed = VINF_SUCCESS; + int rc = VMMR3CallR0(pVM, VMMR0_DO_PGM_ALLOCATE_HANDY_PAGES, 0, NULL); + while (rc == VERR_GMM_SEED_ME) + { + void *pvChunk; + rcAlloc = rc = SUPR3PageAlloc(GMM_CHUNK_SIZE >> PAGE_SHIFT, &pvChunk); + if (RT_SUCCESS(rc)) + { + rcSeed = rc = VMMR3CallR0(pVM, VMMR0_DO_GMM_SEED_CHUNK, (uintptr_t)pvChunk, NULL); + if (RT_FAILURE(rc)) + SUPR3PageFree(pvChunk, GMM_CHUNK_SIZE >> PAGE_SHIFT); + } + if (RT_SUCCESS(rc)) + rc = VMMR3CallR0(pVM, VMMR0_DO_PGM_ALLOCATE_HANDY_PAGES, 0, NULL); + } + + /** @todo we should split this up into an allocate and flush operation. sometimes you want to flush and not allocate more (which will trigger the vm account limit error) */ + if ( rc == VERR_GMM_HIT_VM_ACCOUNT_LIMIT + && pVM->pgm.s.cHandyPages > 0) + { + /* Still handy pages left, so don't panic. */ + rc = VINF_SUCCESS; + } + + if (RT_SUCCESS(rc)) + { + AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", rc)); + Assert(pVM->pgm.s.cHandyPages > 0); + VM_FF_CLEAR(pVM, VM_FF_PGM_NEED_HANDY_PAGES); + VM_FF_CLEAR(pVM, VM_FF_PGM_NO_MEMORY); + +#ifdef VBOX_STRICT + uint32_t i; + for (i = iClear; i < pVM->pgm.s.cHandyPages; i++) + if ( pVM->pgm.s.aHandyPages[i].idPage == NIL_GMM_PAGEID + || pVM->pgm.s.aHandyPages[i].idSharedPage != NIL_GMM_PAGEID + || (pVM->pgm.s.aHandyPages[i].HCPhysGCPhys & PAGE_OFFSET_MASK)) + break; + if (i != pVM->pgm.s.cHandyPages) + { + RTAssertMsg1Weak(NULL, __LINE__, __FILE__, __FUNCTION__); + RTAssertMsg2Weak("i=%d iClear=%d cHandyPages=%d\n", i, iClear, pVM->pgm.s.cHandyPages); + for (uint32_t j = iClear; j < pVM->pgm.s.cHandyPages; j++) + RTAssertMsg2Add("%03d: idPage=%d HCPhysGCPhys=%RHp idSharedPage=%d%\n", j, + pVM->pgm.s.aHandyPages[j].idPage, + pVM->pgm.s.aHandyPages[j].HCPhysGCPhys, + pVM->pgm.s.aHandyPages[j].idSharedPage, + j == i ? " <---" : ""); + RTAssertPanic(); + } +#endif + /* + * Clear the pages. + */ + while (iClear < pVM->pgm.s.cHandyPages) + { + PGMMPAGEDESC pPage = &pVM->pgm.s.aHandyPages[iClear]; + void *pv; + rc = pgmPhysPageMapByPageID(pVM, pPage->idPage, pPage->HCPhysGCPhys, &pv); + AssertLogRelMsgBreak(RT_SUCCESS(rc), + ("%u/%u: idPage=%#x HCPhysGCPhys=%RHp rc=%Rrc\n", + iClear, pVM->pgm.s.cHandyPages, pPage->idPage, pPage->HCPhysGCPhys, rc)); + ASMMemZeroPage(pv); + iClear++; + Log3(("PGMR3PhysAllocateHandyPages: idPage=%#x HCPhys=%RGp\n", pPage->idPage, pPage->HCPhysGCPhys)); + } + } + else + { + uint64_t cAllocPages, cMaxPages, cBalloonPages; + + /* + * We should never get here unless there is a genuine shortage of + * memory (or some internal error). Flag the error so the VM can be + * suspended ASAP and the user informed. If we're totally out of + * handy pages we will return failure. + */ + /* Report the failure. */ + LogRel(("PGM: Failed to procure handy pages; rc=%Rrc rcAlloc=%Rrc rcSeed=%Rrc cHandyPages=%#x\n" + " cAllPages=%#x cPrivatePages=%#x cSharedPages=%#x cZeroPages=%#x\n", + rc, rcAlloc, rcSeed, + pVM->pgm.s.cHandyPages, + pVM->pgm.s.cAllPages, + pVM->pgm.s.cPrivatePages, + pVM->pgm.s.cSharedPages, + pVM->pgm.s.cZeroPages)); + + if (GMMR3QueryMemoryStats(pVM, &cAllocPages, &cMaxPages, &cBalloonPages) == VINF_SUCCESS) + { + LogRel(("GMM: Statistics:\n" + " Allocated pages: %RX64\n" + " Maximum pages: %RX64\n" + " Ballooned pages: %RX64\n", cAllocPages, cMaxPages, cBalloonPages)); + } + + if ( rc != VERR_NO_MEMORY + && rc != VERR_NO_PHYS_MEMORY + && rc != VERR_LOCK_FAILED) + { + for (uint32_t i = 0; i < RT_ELEMENTS(pVM->pgm.s.aHandyPages); i++) + { + LogRel(("PGM: aHandyPages[#%#04x] = {.HCPhysGCPhys=%RHp, .idPage=%#08x, .idSharedPage=%#08x}\n", + i, pVM->pgm.s.aHandyPages[i].HCPhysGCPhys, pVM->pgm.s.aHandyPages[i].idPage, + pVM->pgm.s.aHandyPages[i].idSharedPage)); + uint32_t const idPage = pVM->pgm.s.aHandyPages[i].idPage; + if (idPage != NIL_GMM_PAGEID) + { + for (PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3; + pRam; + pRam = pRam->pNextR3) + { + uint32_t const cPages = pRam->cb >> PAGE_SHIFT; + for (uint32_t iPage = 0; iPage < cPages; iPage++) + if (PGM_PAGE_GET_PAGEID(&pRam->aPages[iPage]) == idPage) + LogRel(("PGM: Used by %RGp %R[pgmpage] (%s)\n", + pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT), &pRam->aPages[iPage], pRam->pszDesc)); + } + } + } + } + + if (rc == VERR_NO_MEMORY) + { + uint64_t cbHostRamAvail = 0; + int rc2 = RTSystemQueryAvailableRam(&cbHostRamAvail); + if (RT_SUCCESS(rc2)) + LogRel(("Host RAM: %RU64MB available\n", cbHostRamAvail / _1M)); + else + LogRel(("Cannot determine the amount of available host memory\n")); + } + + /* Set the FFs and adjust rc. */ + VM_FF_SET(pVM, VM_FF_PGM_NEED_HANDY_PAGES); + VM_FF_SET(pVM, VM_FF_PGM_NO_MEMORY); + if ( rc == VERR_NO_MEMORY + || rc == VERR_NO_PHYS_MEMORY + || rc == VERR_LOCK_FAILED) + rc = VINF_EM_NO_MEMORY; + } + + pgmUnlock(pVM); + return rc; +} + + +/** + * Frees the specified RAM page and replaces it with the ZERO page. + * + * This is used by ballooning, remapping MMIO2, RAM reset and state loading. + * + * @param pVM The cross context VM structure. + * @param pReq Pointer to the request. + * @param pcPendingPages Where the number of pages waiting to be freed are + * kept. This will normally be incremented. + * @param pPage Pointer to the page structure. + * @param GCPhys The guest physical address of the page, if applicable. + * @param enmNewType New page type for NEM notification, since several + * callers will change the type upon successful return. + * + * @remarks The caller must own the PGM lock. + */ +int pgmPhysFreePage(PVM pVM, PGMMFREEPAGESREQ pReq, uint32_t *pcPendingPages, PPGMPAGE pPage, RTGCPHYS GCPhys, + PGMPAGETYPE enmNewType) +{ + /* + * Assert sanity. + */ + PGM_LOCK_ASSERT_OWNER(pVM); + if (RT_UNLIKELY( PGM_PAGE_GET_TYPE(pPage) != PGMPAGETYPE_RAM + && PGM_PAGE_GET_TYPE(pPage) != PGMPAGETYPE_ROM_SHADOW)) + { + AssertMsgFailed(("GCPhys=%RGp pPage=%R[pgmpage]\n", GCPhys, pPage)); + return VMSetError(pVM, VERR_PGM_PHYS_NOT_RAM, RT_SRC_POS, "GCPhys=%RGp type=%d", GCPhys, PGM_PAGE_GET_TYPE(pPage)); + } + + /** @todo What about ballooning of large pages??! */ + Assert( PGM_PAGE_GET_PDE_TYPE(pPage) != PGM_PAGE_PDE_TYPE_PDE + && PGM_PAGE_GET_PDE_TYPE(pPage) != PGM_PAGE_PDE_TYPE_PDE_DISABLED); + + if ( PGM_PAGE_IS_ZERO(pPage) + || PGM_PAGE_IS_BALLOONED(pPage)) + return VINF_SUCCESS; + + const uint32_t idPage = PGM_PAGE_GET_PAGEID(pPage); + Log3(("pgmPhysFreePage: idPage=%#x GCPhys=%RGp pPage=%R[pgmpage]\n", idPage, GCPhys, pPage)); + if (RT_UNLIKELY( idPage == NIL_GMM_PAGEID + || idPage > GMM_PAGEID_LAST + || PGM_PAGE_GET_CHUNKID(pPage) == NIL_GMM_CHUNKID)) + { + AssertMsgFailed(("GCPhys=%RGp pPage=%R[pgmpage]\n", GCPhys, pPage)); + return VMSetError(pVM, VERR_PGM_PHYS_INVALID_PAGE_ID, RT_SRC_POS, "GCPhys=%RGp idPage=%#x", GCPhys, pPage); + } + const RTHCPHYS HCPhysPrev = PGM_PAGE_GET_HCPHYS(pPage); + + /* update page count stats. */ + if (PGM_PAGE_IS_SHARED(pPage)) + pVM->pgm.s.cSharedPages--; + else + pVM->pgm.s.cPrivatePages--; + pVM->pgm.s.cZeroPages++; + + /* Deal with write monitored pages. */ + if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED) + { + PGM_PAGE_SET_WRITTEN_TO(pVM, pPage); + pVM->pgm.s.cWrittenToPages++; + } + + /* + * pPage = ZERO page. + */ + PGM_PAGE_SET_HCPHYS(pVM, pPage, pVM->pgm.s.HCPhysZeroPg); + PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ZERO); + PGM_PAGE_SET_PAGEID(pVM, pPage, NIL_GMM_PAGEID); + PGM_PAGE_SET_PDE_TYPE(pVM, pPage, PGM_PAGE_PDE_TYPE_DONTCARE); + PGM_PAGE_SET_PTE_INDEX(pVM, pPage, 0); + PGM_PAGE_SET_TRACKING(pVM, pPage, 0); + + /* Flush physical page map TLB entry. */ + pgmPhysInvalidatePageMapTLBEntry(pVM, GCPhys); + + /* Notify NEM. */ + /** @todo consider doing batch NEM notifications. */ + if (VM_IS_NEM_ENABLED(pVM)) + { + uint8_t u2State = PGM_PAGE_GET_NEM_STATE(pPage); + NEMHCNotifyPhysPageChanged(pVM, GCPhys, HCPhysPrev, pVM->pgm.s.HCPhysZeroPg, + pgmPhysPageCalcNemProtection(pPage, enmNewType), enmNewType, &u2State); + PGM_PAGE_SET_NEM_STATE(pPage, u2State); + } + + /* + * Make sure it's not in the handy page array. + */ + for (uint32_t i = pVM->pgm.s.cHandyPages; i < RT_ELEMENTS(pVM->pgm.s.aHandyPages); i++) + { + if (pVM->pgm.s.aHandyPages[i].idPage == idPage) + { + pVM->pgm.s.aHandyPages[i].idPage = NIL_GMM_PAGEID; + break; + } + if (pVM->pgm.s.aHandyPages[i].idSharedPage == idPage) + { + pVM->pgm.s.aHandyPages[i].idSharedPage = NIL_GMM_PAGEID; + break; + } + } + + /* + * Push it onto the page array. + */ + uint32_t iPage = *pcPendingPages; + Assert(iPage < PGMPHYS_FREE_PAGE_BATCH_SIZE); + *pcPendingPages += 1; + + pReq->aPages[iPage].idPage = idPage; + + if (iPage + 1 < PGMPHYS_FREE_PAGE_BATCH_SIZE) + return VINF_SUCCESS; + + /* + * Flush the pages. + */ + int rc = GMMR3FreePagesPerform(pVM, pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE); + if (RT_SUCCESS(rc)) + { + GMMR3FreePagesRePrep(pVM, pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE); + *pcPendingPages = 0; + } + return rc; +} + + +/** + * Converts a GC physical address to a HC ring-3 pointer, with some + * additional checks. + * + * @returns VBox status code. + * @retval VINF_SUCCESS on success. + * @retval VINF_PGM_PHYS_TLB_CATCH_WRITE and *ppv set if the page has a write + * access handler of some kind. + * @retval VERR_PGM_PHYS_TLB_CATCH_ALL if the page has a handler catching all + * accesses or is odd in any way. + * @retval VERR_PGM_PHYS_TLB_UNASSIGNED if the page doesn't exist. + * + * @param pVM The cross context VM structure. + * @param GCPhys The GC physical address to convert. Since this is only + * used for filling the REM TLB, the A20 mask must be + * applied before calling this API. + * @param fWritable Whether write access is required. + * @param ppv Where to store the pointer corresponding to GCPhys on + * success. + */ +VMMR3DECL(int) PGMR3PhysTlbGCPhys2Ptr(PVM pVM, RTGCPHYS GCPhys, bool fWritable, void **ppv) +{ + pgmLock(pVM); + PGM_A20_ASSERT_MASKED(VMMGetCpu(pVM), GCPhys); + + PPGMRAMRANGE pRam; + PPGMPAGE pPage; + int rc = pgmPhysGetPageAndRangeEx(pVM, GCPhys, &pPage, &pRam); + if (RT_SUCCESS(rc)) + { + if (PGM_PAGE_IS_BALLOONED(pPage)) + rc = VINF_PGM_PHYS_TLB_CATCH_WRITE; + else if (!PGM_PAGE_HAS_ANY_HANDLERS(pPage)) + rc = VINF_SUCCESS; + else + { + if (PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)) /* catches MMIO */ + rc = VERR_PGM_PHYS_TLB_CATCH_ALL; + else if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage)) + { + /** @todo Handle TLB loads of virtual handlers so ./test.sh can be made to work + * in -norawr0 mode. */ + if (fWritable) + rc = VINF_PGM_PHYS_TLB_CATCH_WRITE; + } + else + { + /* Temporarily disabled physical handler(s), since the recompiler + doesn't get notified when it's reset we'll have to pretend it's + operating normally. */ + if (pgmHandlerPhysicalIsAll(pVM, GCPhys)) + rc = VERR_PGM_PHYS_TLB_CATCH_ALL; + else + rc = VINF_PGM_PHYS_TLB_CATCH_WRITE; + } + } + if (RT_SUCCESS(rc)) + { + int rc2; + + /* Make sure what we return is writable. */ + if (fWritable) + switch (PGM_PAGE_GET_STATE(pPage)) + { + case PGM_PAGE_STATE_ALLOCATED: + break; + case PGM_PAGE_STATE_BALLOONED: + AssertFailed(); + break; + case PGM_PAGE_STATE_ZERO: + case PGM_PAGE_STATE_SHARED: + if (rc == VINF_PGM_PHYS_TLB_CATCH_WRITE) + break; + RT_FALL_THRU(); + case PGM_PAGE_STATE_WRITE_MONITORED: + rc2 = pgmPhysPageMakeWritable(pVM, pPage, GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK); + AssertLogRelRCReturn(rc2, rc2); + break; + } + + /* Get a ring-3 mapping of the address. */ + PPGMPAGER3MAPTLBE pTlbe; + rc2 = pgmPhysPageQueryTlbe(pVM, GCPhys, &pTlbe); + AssertLogRelRCReturn(rc2, rc2); + *ppv = (void *)((uintptr_t)pTlbe->pv | (uintptr_t)(GCPhys & PAGE_OFFSET_MASK)); + /** @todo mapping/locking hell; this isn't horribly efficient since + * pgmPhysPageLoadIntoTlb will repeat the lookup we've done here. */ + + Log6(("PGMR3PhysTlbGCPhys2Ptr: GCPhys=%RGp rc=%Rrc pPage=%R[pgmpage] *ppv=%p\n", GCPhys, rc, pPage, *ppv)); + } + else + Log6(("PGMR3PhysTlbGCPhys2Ptr: GCPhys=%RGp rc=%Rrc pPage=%R[pgmpage]\n", GCPhys, rc, pPage)); + + /* else: handler catching all access, no pointer returned. */ + } + else + rc = VERR_PGM_PHYS_TLB_UNASSIGNED; + + pgmUnlock(pVM); + return rc; +} + diff --git a/src/VBox/VMM/VMMR3/PGMPhysRWTmpl.h b/src/VBox/VMM/VMMR3/PGMPhysRWTmpl.h new file mode 100644 index 00000000..c23d75cf --- /dev/null +++ b/src/VBox/VMM/VMMR3/PGMPhysRWTmpl.h @@ -0,0 +1,61 @@ +/* $Id: PGMPhysRWTmpl.h $ */ +/** @file + * PGM - Page Manager and Monitor, Physical Memory Access Template. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/** + * Read physical memory. (one byte/word/dword) + * + * This API respects access handlers and MMIO. Use PGMPhysSimpleReadGCPhys() if you + * want to ignore those. + * + * @param pVM The cross context VM structure. + * @param GCPhys Physical address start reading from. + * @param enmOrigin Who is calling. + */ +VMMDECL(PGMPHYS_DATATYPE) PGMPHYSFN_READNAME(PVM pVM, RTGCPHYS GCPhys, PGMACCESSORIGIN enmOrigin) +{ + Assert(VM_IS_EMT(pVM)); + PGMPHYS_DATATYPE val; + VBOXSTRICTRC rcStrict = PGMPhysRead(pVM, GCPhys, &val, sizeof(val), enmOrigin); + AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); NOREF(rcStrict); + return val; +} + + +/** + * Write to physical memory. (one byte/word/dword) + * + * This API respects access handlers and MMIO. Use PGMPhysSimpleReadGCPhys() if you + * want to ignore those. + * + * @param pVM The cross context VM structure. + * @param GCPhys Physical address to write to. + * @param val What to write. + * @param enmOrigin Who is calling. + */ +VMMDECL(void) PGMPHYSFN_WRITENAME(PVM pVM, RTGCPHYS GCPhys, PGMPHYS_DATATYPE val, PGMACCESSORIGIN enmOrigin) +{ + Assert(VM_IS_EMT(pVM)); + VBOXSTRICTRC rcStrict = PGMPhysWrite(pVM, GCPhys, &val, sizeof(val), enmOrigin); + AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); NOREF(rcStrict); +} + +#undef PGMPHYSFN_READNAME +#undef PGMPHYSFN_WRITENAME +#undef PGMPHYS_DATATYPE +#undef PGMPHYS_DATASIZE + diff --git a/src/VBox/VMM/VMMR3/PGMPool.cpp b/src/VBox/VMM/VMMR3/PGMPool.cpp new file mode 100644 index 00000000..8a759dee --- /dev/null +++ b/src/VBox/VMM/VMMR3/PGMPool.cpp @@ -0,0 +1,982 @@ +/* $Id: PGMPool.cpp $ */ +/** @file + * PGM Shadow Page Pool. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +/** @page pg_pgm_pool PGM Shadow Page Pool + * + * Motivations: + * -# Relationship between shadow page tables and physical guest pages. This + * should allow us to skip most of the global flushes now following access + * handler changes. The main expense is flushing shadow pages. + * -# Limit the pool size if necessary (default is kind of limitless). + * -# Allocate shadow pages from RC. We use to only do this in SyncCR3. + * -# Required for 64-bit guests. + * -# Combining the PD cache and page pool in order to simplify caching. + * + * + * @section sec_pgm_pool_outline Design Outline + * + * The shadow page pool tracks pages used for shadowing paging structures (i.e. + * page tables, page directory, page directory pointer table and page map + * level-4). Each page in the pool has an unique identifier. This identifier is + * used to link a guest physical page to a shadow PT. The identifier is a + * non-zero value and has a relativly low max value - say 14 bits. This makes it + * possible to fit it into the upper bits of the of the aHCPhys entries in the + * ram range. + * + * By restricting host physical memory to the first 48 bits (which is the + * announced physical memory range of the K8L chip (scheduled for 2008)), we + * can safely use the upper 16 bits for shadow page ID and reference counting. + * + * Update: The 48 bit assumption will be lifted with the new physical memory + * management (PGMPAGE), so we won't have any trouble when someone stuffs 2TB + * into a box in some years. + * + * Now, it's possible for a page to be aliased, i.e. mapped by more than one PT + * or PD. This is solved by creating a list of physical cross reference extents + * when ever this happens. Each node in the list (extent) is can contain 3 page + * pool indexes. The list it self is chained using indexes into the paPhysExt + * array. + * + * + * @section sec_pgm_pool_life Life Cycle of a Shadow Page + * + * -# The SyncPT function requests a page from the pool. + * The request includes the kind of page it is (PT/PD, PAE/legacy), the + * address of the page it's shadowing, and more. + * -# The pool responds to the request by allocating a new page. + * When the cache is enabled, it will first check if it's in the cache. + * Should the pool be exhausted, one of two things can be done: + * -# Flush the whole pool and current CR3. + * -# Use the cache to find a page which can be flushed (~age). + * -# The SyncPT function will sync one or more pages and insert it into the + * shadow PD. + * -# The SyncPage function may sync more pages on a later \#PFs. + * -# The page is freed / flushed in SyncCR3 (perhaps) and some other cases. + * When caching is enabled, the page isn't flush but remains in the cache. + * + * + * @section sec_pgm_pool_monitoring Monitoring + * + * We always monitor PAGE_SIZE chunks of memory. When we've got multiple shadow + * pages for the same PAGE_SIZE of guest memory (PAE and mixed PD/PT) the pages + * sharing the monitor get linked using the iMonitoredNext/Prev. The head page + * is the pvUser to the access handlers. + * + * + * @section sec_pgm_pool_impl Implementation + * + * The pool will take pages from the MM page pool. The tracking data + * (attributes, bitmaps and so on) are allocated from the hypervisor heap. The + * pool content can be accessed both by using the page id and the physical + * address (HC). The former is managed by means of an array, the latter by an + * offset based AVL tree. + * + * Flushing of a pool page means that we iterate the content (we know what kind + * it is) and updates the link information in the ram range. + * + * ... + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PGM_POOL +#include +#include +#include "PGMInternal.h" +#include +#include +#include "PGMInline.h" + +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +#ifdef VBOX_WITH_DEBUGGER +static FNDBGCCMD pgmR3PoolCmdCheck; +#endif + +#ifdef VBOX_WITH_DEBUGGER +/** Command descriptors. */ +static const DBGCCMD g_aCmds[] = +{ + /* pszCmd, cArgsMin, cArgsMax, paArgDesc, cArgDescs, fFlags, pfnHandler pszSyntax, ....pszDescription */ + { "pgmpoolcheck", 0, 0, NULL, 0, 0, pgmR3PoolCmdCheck, "", "Check the pgm pool pages." }, +}; +#endif + +/** + * Initializes the pool + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +int pgmR3PoolInit(PVM pVM) +{ + int rc; + + AssertCompile(NIL_PGMPOOL_IDX == 0); + /* pPage->cLocked is an unsigned byte. */ + AssertCompile(VMM_MAX_CPU_COUNT <= 255); + + /* + * Query Pool config. + */ + PCFGMNODE pCfg = CFGMR3GetChild(CFGMR3GetRoot(pVM), "/PGM/Pool"); + + /* Default pgm pool size is 1024 pages (4MB). */ + uint16_t cMaxPages = 1024; + + /* Adjust it up relative to the RAM size, using the nested paging formula. */ + uint64_t cbRam; + rc = CFGMR3QueryU64Def(CFGMR3GetRoot(pVM), "RamSize", &cbRam, 0); AssertRCReturn(rc, rc); + uint64_t u64MaxPages = (cbRam >> 9) + + (cbRam >> 18) + + (cbRam >> 27) + + 32 * PAGE_SIZE; + u64MaxPages >>= PAGE_SHIFT; + if (u64MaxPages > PGMPOOL_IDX_LAST) + cMaxPages = PGMPOOL_IDX_LAST; + else + cMaxPages = (uint16_t)u64MaxPages; + + /** @cfgm{/PGM/Pool/MaxPages, uint16_t, \#pages, 16, 0x3fff, F(ram-size)} + * The max size of the shadow page pool in pages. The pool will grow dynamically + * up to this limit. + */ + rc = CFGMR3QueryU16Def(pCfg, "MaxPages", &cMaxPages, cMaxPages); + AssertLogRelRCReturn(rc, rc); + AssertLogRelMsgReturn(cMaxPages <= PGMPOOL_IDX_LAST && cMaxPages >= RT_ALIGN(PGMPOOL_IDX_FIRST, 16), + ("cMaxPages=%u (%#x)\n", cMaxPages, cMaxPages), VERR_INVALID_PARAMETER); + cMaxPages = RT_ALIGN(cMaxPages, 16); + if (cMaxPages > PGMPOOL_IDX_LAST) + cMaxPages = PGMPOOL_IDX_LAST; + LogRel(("PGM: PGMPool: cMaxPages=%u (u64MaxPages=%llu)\n", cMaxPages, u64MaxPages)); + + /** @todo + * We need to be much more careful with our allocation strategy here. + * For nested paging we don't need pool user info nor extents at all, but + * we can't check for nested paging here (too early during init to get a + * confirmation it can be used). The default for large memory configs is a + * bit large for shadow paging, so I've restricted the extent maximum to 8k + * (8k * 16 = 128k of hyper heap). + * + * Also when large page support is enabled, we typically don't need so much, + * although that depends on the availability of 2 MB chunks on the host. + */ + + /** @cfgm{/PGM/Pool/MaxUsers, uint16_t, \#users, MaxUsers, 32K, MaxPages*2} + * The max number of shadow page user tracking records. Each shadow page has + * zero of other shadow pages (or CR3s) that references it, or uses it if you + * like. The structures describing these relationships are allocated from a + * fixed sized pool. This configuration variable defines the pool size. + */ + uint16_t cMaxUsers; + rc = CFGMR3QueryU16Def(pCfg, "MaxUsers", &cMaxUsers, cMaxPages * 2); + AssertLogRelRCReturn(rc, rc); + AssertLogRelMsgReturn(cMaxUsers >= cMaxPages && cMaxPages <= _32K, + ("cMaxUsers=%u (%#x)\n", cMaxUsers, cMaxUsers), VERR_INVALID_PARAMETER); + + /** @cfgm{/PGM/Pool/MaxPhysExts, uint16_t, \#extents, 16, MaxPages * 2, MIN(MaxPages*2\,8192)} + * The max number of extents for tracking aliased guest pages. + */ + uint16_t cMaxPhysExts; + rc = CFGMR3QueryU16Def(pCfg, "MaxPhysExts", &cMaxPhysExts, + RT_MIN(cMaxPages * 2, 8192 /* 8Ki max as this eat too much hyper heap */)); + AssertLogRelRCReturn(rc, rc); + AssertLogRelMsgReturn(cMaxPhysExts >= 16 && cMaxPhysExts <= PGMPOOL_IDX_LAST, + ("cMaxPhysExts=%u (%#x)\n", cMaxPhysExts, cMaxPhysExts), VERR_INVALID_PARAMETER); + + /** @cfgm{/PGM/Pool/ChacheEnabled, bool, true} + * Enables or disabling caching of shadow pages. Caching means that we will try + * reuse shadow pages instead of recreating them everything SyncCR3, SyncPT or + * SyncPage requests one. When reusing a shadow page, we can save time + * reconstructing it and it's children. + */ + bool fCacheEnabled; + rc = CFGMR3QueryBoolDef(pCfg, "CacheEnabled", &fCacheEnabled, true); + AssertLogRelRCReturn(rc, rc); + + LogRel(("PGM: pgmR3PoolInit: cMaxPages=%#RX16 cMaxUsers=%#RX16 cMaxPhysExts=%#RX16 fCacheEnable=%RTbool\n", + cMaxPages, cMaxUsers, cMaxPhysExts, fCacheEnabled)); + + /* + * Allocate the data structures. + */ + uint32_t cb = RT_UOFFSETOF_DYN(PGMPOOL, aPages[cMaxPages]); + cb += cMaxUsers * sizeof(PGMPOOLUSER); + cb += cMaxPhysExts * sizeof(PGMPOOLPHYSEXT); + PPGMPOOL pPool; + rc = MMR3HyperAllocOnceNoRel(pVM, cb, 0, MM_TAG_PGM_POOL, (void **)&pPool); + if (RT_FAILURE(rc)) + return rc; + pVM->pgm.s.pPoolR3 = pPool; + pVM->pgm.s.pPoolR0 = MMHyperR3ToR0(pVM, pPool); + pVM->pgm.s.pPoolRC = MMHyperR3ToRC(pVM, pPool); + + /* + * Initialize it. + */ + pPool->pVMR3 = pVM; + pPool->pVMR0 = pVM->pVMR0; + pPool->pVMRC = pVM->pVMRC; + pPool->cMaxPages = cMaxPages; + pPool->cCurPages = PGMPOOL_IDX_FIRST; + pPool->iUserFreeHead = 0; + pPool->cMaxUsers = cMaxUsers; + PPGMPOOLUSER paUsers = (PPGMPOOLUSER)&pPool->aPages[pPool->cMaxPages]; + pPool->paUsersR3 = paUsers; + pPool->paUsersR0 = MMHyperR3ToR0(pVM, paUsers); + pPool->paUsersRC = MMHyperR3ToRC(pVM, paUsers); + for (unsigned i = 0; i < cMaxUsers; i++) + { + paUsers[i].iNext = i + 1; + paUsers[i].iUser = NIL_PGMPOOL_IDX; + paUsers[i].iUserTable = 0xfffffffe; + } + paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX; + pPool->iPhysExtFreeHead = 0; + pPool->cMaxPhysExts = cMaxPhysExts; + PPGMPOOLPHYSEXT paPhysExts = (PPGMPOOLPHYSEXT)&paUsers[cMaxUsers]; + pPool->paPhysExtsR3 = paPhysExts; + pPool->paPhysExtsR0 = MMHyperR3ToR0(pVM, paPhysExts); + pPool->paPhysExtsRC = MMHyperR3ToRC(pVM, paPhysExts); + for (unsigned i = 0; i < cMaxPhysExts; i++) + { + paPhysExts[i].iNext = i + 1; + paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX; + paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE; + paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX; + paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE; + paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX; + paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE; + } + paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX; + for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++) + pPool->aiHash[i] = NIL_PGMPOOL_IDX; + pPool->iAgeHead = NIL_PGMPOOL_IDX; + pPool->iAgeTail = NIL_PGMPOOL_IDX; + pPool->fCacheEnabled = fCacheEnabled; + + pPool->hAccessHandlerType = NIL_PGMPHYSHANDLERTYPE; + rc = PGMR3HandlerPhysicalTypeRegister(pVM, PGMPHYSHANDLERKIND_WRITE, + pgmPoolAccessHandler, + NULL, "pgmPoolAccessHandler", "pgmRZPoolAccessPfHandler", + NULL, "pgmPoolAccessHandler", "pgmRZPoolAccessPfHandler", + "Guest Paging Access Handler", + &pPool->hAccessHandlerType); + AssertLogRelRCReturn(rc, rc); + + pPool->HCPhysTree = 0; + + /* + * The NIL entry. + */ + Assert(NIL_PGMPOOL_IDX == 0); + pPool->aPages[NIL_PGMPOOL_IDX].enmKind = PGMPOOLKIND_INVALID; + pPool->aPages[NIL_PGMPOOL_IDX].idx = NIL_PGMPOOL_IDX; + pPool->aPages[NIL_PGMPOOL_IDX].Core.Key = NIL_RTHCPHYS; + pPool->aPages[NIL_PGMPOOL_IDX].GCPhys = NIL_RTGCPHYS; + pPool->aPages[NIL_PGMPOOL_IDX].iNext = NIL_PGMPOOL_IDX; + /* pPool->aPages[NIL_PGMPOOL_IDX].cLocked = INT32_MAX; - test this out... */ + pPool->aPages[NIL_PGMPOOL_IDX].pvPageR3 = 0; + pPool->aPages[NIL_PGMPOOL_IDX].iUserHead = NIL_PGMPOOL_USER_INDEX; + pPool->aPages[NIL_PGMPOOL_IDX].iModifiedNext = NIL_PGMPOOL_IDX; + pPool->aPages[NIL_PGMPOOL_IDX].iModifiedPrev = NIL_PGMPOOL_IDX; + pPool->aPages[NIL_PGMPOOL_IDX].iMonitoredNext = NIL_PGMPOOL_IDX; + pPool->aPages[NIL_PGMPOOL_IDX].iMonitoredPrev = NIL_PGMPOOL_IDX; + pPool->aPages[NIL_PGMPOOL_IDX].iAgeNext = NIL_PGMPOOL_IDX; + pPool->aPages[NIL_PGMPOOL_IDX].iAgePrev = NIL_PGMPOOL_IDX; + + Assert(pPool->aPages[NIL_PGMPOOL_IDX].idx == NIL_PGMPOOL_IDX); + Assert(pPool->aPages[NIL_PGMPOOL_IDX].GCPhys == NIL_RTGCPHYS); + Assert(!pPool->aPages[NIL_PGMPOOL_IDX].fSeenNonGlobal); + Assert(!pPool->aPages[NIL_PGMPOOL_IDX].fMonitored); + Assert(!pPool->aPages[NIL_PGMPOOL_IDX].fCached); + Assert(!pPool->aPages[NIL_PGMPOOL_IDX].fZeroed); + Assert(!pPool->aPages[NIL_PGMPOOL_IDX].fReusedFlushPending); + +#ifdef VBOX_WITH_STATISTICS + /* + * Register statistics. + */ + STAM_REG(pVM, &pPool->cCurPages, STAMTYPE_U16, "/PGM/Pool/cCurPages", STAMUNIT_PAGES, "Current pool size."); + STAM_REG(pVM, &pPool->cMaxPages, STAMTYPE_U16, "/PGM/Pool/cMaxPages", STAMUNIT_PAGES, "Max pool size."); + STAM_REG(pVM, &pPool->cUsedPages, STAMTYPE_U16, "/PGM/Pool/cUsedPages", STAMUNIT_PAGES, "The number of pages currently in use."); + STAM_REG(pVM, &pPool->cUsedPagesHigh, STAMTYPE_U16_RESET, "/PGM/Pool/cUsedPagesHigh", STAMUNIT_PAGES, "The high watermark for cUsedPages."); + STAM_REG(pVM, &pPool->StatAlloc, STAMTYPE_PROFILE_ADV, "/PGM/Pool/Alloc", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmPoolAlloc."); + STAM_REG(pVM, &pPool->StatClearAll, STAMTYPE_PROFILE, "/PGM/Pool/ClearAll", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmR3PoolClearAll."); + STAM_REG(pVM, &pPool->StatR3Reset, STAMTYPE_PROFILE, "/PGM/Pool/R3Reset", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmR3PoolReset."); + STAM_REG(pVM, &pPool->StatFlushPage, STAMTYPE_PROFILE, "/PGM/Pool/FlushPage", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmPoolFlushPage."); + STAM_REG(pVM, &pPool->StatFree, STAMTYPE_PROFILE, "/PGM/Pool/Free", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmPoolFree."); + STAM_REG(pVM, &pPool->StatForceFlushPage, STAMTYPE_COUNTER, "/PGM/Pool/FlushForce", STAMUNIT_OCCURENCES, "Counting explicit flushes by PGMPoolFlushPage()."); + STAM_REG(pVM, &pPool->StatForceFlushDirtyPage, STAMTYPE_COUNTER, "/PGM/Pool/FlushForceDirty", STAMUNIT_OCCURENCES, "Counting explicit flushes of dirty pages by PGMPoolFlushPage()."); + STAM_REG(pVM, &pPool->StatForceFlushReused, STAMTYPE_COUNTER, "/PGM/Pool/FlushReused", STAMUNIT_OCCURENCES, "Counting flushes for reused pages."); + STAM_REG(pVM, &pPool->StatZeroPage, STAMTYPE_PROFILE, "/PGM/Pool/ZeroPage", STAMUNIT_TICKS_PER_CALL, "Profiling time spent zeroing pages. Overlaps with Alloc."); + STAM_REG(pVM, &pPool->cMaxUsers, STAMTYPE_U16, "/PGM/Pool/Track/cMaxUsers", STAMUNIT_COUNT, "Max user tracking records."); + STAM_REG(pVM, &pPool->cPresent, STAMTYPE_U32, "/PGM/Pool/Track/cPresent", STAMUNIT_COUNT, "Number of present page table entries."); + STAM_REG(pVM, &pPool->StatTrackDeref, STAMTYPE_PROFILE, "/PGM/Pool/Track/Deref", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmPoolTrackDeref."); + STAM_REG(pVM, &pPool->StatTrackFlushGCPhysPT, STAMTYPE_PROFILE, "/PGM/Pool/Track/FlushGCPhysPT", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmPoolTrackFlushGCPhysPT."); + STAM_REG(pVM, &pPool->StatTrackFlushGCPhysPTs, STAMTYPE_PROFILE, "/PGM/Pool/Track/FlushGCPhysPTs", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmPoolTrackFlushGCPhysPTs."); + STAM_REG(pVM, &pPool->StatTrackFlushGCPhysPTsSlow, STAMTYPE_PROFILE, "/PGM/Pool/Track/FlushGCPhysPTsSlow", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmPoolTrackFlushGCPhysPTsSlow."); + STAM_REG(pVM, &pPool->StatTrackFlushEntry, STAMTYPE_COUNTER, "/PGM/Pool/Track/Entry/Flush", STAMUNIT_COUNT, "Nr of flushed entries."); + STAM_REG(pVM, &pPool->StatTrackFlushEntryKeep, STAMTYPE_COUNTER, "/PGM/Pool/Track/Entry/Update", STAMUNIT_COUNT, "Nr of updated entries."); + STAM_REG(pVM, &pPool->StatTrackFreeUpOneUser, STAMTYPE_COUNTER, "/PGM/Pool/Track/FreeUpOneUser", STAMUNIT_TICKS_PER_CALL, "The number of times we were out of user tracking records."); + STAM_REG(pVM, &pPool->StatTrackDerefGCPhys, STAMTYPE_PROFILE, "/PGM/Pool/Track/DrefGCPhys", STAMUNIT_TICKS_PER_CALL, "Profiling deref activity related tracking GC physical pages."); + STAM_REG(pVM, &pPool->StatTrackLinearRamSearches, STAMTYPE_COUNTER, "/PGM/Pool/Track/LinearRamSearches", STAMUNIT_OCCURENCES, "The number of times we had to do linear ram searches."); + STAM_REG(pVM, &pPool->StamTrackPhysExtAllocFailures,STAMTYPE_COUNTER, "/PGM/Pool/Track/PhysExtAllocFailures", STAMUNIT_OCCURENCES, "The number of failing pgmPoolTrackPhysExtAlloc calls."); + + STAM_REG(pVM, &pPool->StatMonitorPfRZ, STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/#PF", STAMUNIT_TICKS_PER_CALL, "Profiling the RC/R0 #PF access handler."); + STAM_REG(pVM, &pPool->StatMonitorPfRZEmulateInstr, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/#PF/EmulateInstr", STAMUNIT_OCCURENCES, "Times we've failed interpreting the instruction."); + STAM_REG(pVM, &pPool->StatMonitorPfRZFlushPage, STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/#PF/FlushPage", STAMUNIT_TICKS_PER_CALL, "Profiling the pgmPoolFlushPage calls made from the RC/R0 access handler."); + STAM_REG(pVM, &pPool->StatMonitorPfRZFlushReinit, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/#PF/FlushReinit", STAMUNIT_OCCURENCES, "Times we've detected a page table reinit."); + STAM_REG(pVM, &pPool->StatMonitorPfRZFlushModOverflow,STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/#PF/FlushOverflow", STAMUNIT_OCCURENCES, "Counting flushes for pages that are modified too often."); + STAM_REG(pVM, &pPool->StatMonitorPfRZFork, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/#PF/Fork", STAMUNIT_OCCURENCES, "Times we've detected fork()."); + STAM_REG(pVM, &pPool->StatMonitorPfRZHandled, STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/#PF/Handled", STAMUNIT_TICKS_PER_CALL, "Profiling the RC/R0 #PF access we've handled (except REP STOSD)."); + STAM_REG(pVM, &pPool->StatMonitorPfRZIntrFailPatch1, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/#PF/IntrFailPatch1", STAMUNIT_OCCURENCES, "Times we've failed interpreting a patch code instruction."); + STAM_REG(pVM, &pPool->StatMonitorPfRZIntrFailPatch2, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/#PF/IntrFailPatch2", STAMUNIT_OCCURENCES, "Times we've failed interpreting a patch code instruction during flushing."); + STAM_REG(pVM, &pPool->StatMonitorPfRZRepPrefix, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/#PF/RepPrefix", STAMUNIT_OCCURENCES, "The number of times we've seen rep prefixes we can't handle."); + STAM_REG(pVM, &pPool->StatMonitorPfRZRepStosd, STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/#PF/RepStosd", STAMUNIT_TICKS_PER_CALL, "Profiling the REP STOSD cases we've handled."); + + STAM_REG(pVM, &pPool->StatMonitorRZ, STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM", STAMUNIT_TICKS_PER_CALL, "Profiling the regular access handler."); + STAM_REG(pVM, &pPool->StatMonitorRZFlushPage, STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/FlushPage", STAMUNIT_TICKS_PER_CALL, "Profiling the pgmPoolFlushPage calls made from the regular access handler."); + STAM_REG(pVM, &pPool->aStatMonitorRZSizes[0], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/Size01", STAMUNIT_OCCURENCES, "Number of 1 byte accesses."); + STAM_REG(pVM, &pPool->aStatMonitorRZSizes[1], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/Size02", STAMUNIT_OCCURENCES, "Number of 2 byte accesses."); + STAM_REG(pVM, &pPool->aStatMonitorRZSizes[2], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/Size03", STAMUNIT_OCCURENCES, "Number of 3 byte accesses."); + STAM_REG(pVM, &pPool->aStatMonitorRZSizes[3], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/Size04", STAMUNIT_OCCURENCES, "Number of 4 byte accesses."); + STAM_REG(pVM, &pPool->aStatMonitorRZSizes[4], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/Size05", STAMUNIT_OCCURENCES, "Number of 5 byte accesses."); + STAM_REG(pVM, &pPool->aStatMonitorRZSizes[5], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/Size06", STAMUNIT_OCCURENCES, "Number of 6 byte accesses."); + STAM_REG(pVM, &pPool->aStatMonitorRZSizes[6], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/Size07", STAMUNIT_OCCURENCES, "Number of 7 byte accesses."); + STAM_REG(pVM, &pPool->aStatMonitorRZSizes[7], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/Size08", STAMUNIT_OCCURENCES, "Number of 8 byte accesses."); + STAM_REG(pVM, &pPool->aStatMonitorRZSizes[8], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/Size09", STAMUNIT_OCCURENCES, "Number of 9 byte accesses."); + STAM_REG(pVM, &pPool->aStatMonitorRZSizes[9], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/Size0a", STAMUNIT_OCCURENCES, "Number of 10 byte accesses."); + STAM_REG(pVM, &pPool->aStatMonitorRZSizes[10], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/Size0b", STAMUNIT_OCCURENCES, "Number of 11 byte accesses."); + STAM_REG(pVM, &pPool->aStatMonitorRZSizes[11], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/Size0c", STAMUNIT_OCCURENCES, "Number of 12 byte accesses."); + STAM_REG(pVM, &pPool->aStatMonitorRZSizes[12], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/Size0d", STAMUNIT_OCCURENCES, "Number of 13 byte accesses."); + STAM_REG(pVM, &pPool->aStatMonitorRZSizes[13], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/Size0e", STAMUNIT_OCCURENCES, "Number of 14 byte accesses."); + STAM_REG(pVM, &pPool->aStatMonitorRZSizes[14], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/Size0f", STAMUNIT_OCCURENCES, "Number of 15 byte accesses."); + STAM_REG(pVM, &pPool->aStatMonitorRZSizes[15], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/Size10", STAMUNIT_OCCURENCES, "Number of 16 byte accesses."); + STAM_REG(pVM, &pPool->aStatMonitorRZSizes[16], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/Size11-2f", STAMUNIT_OCCURENCES, "Number of 17-31 byte accesses."); + STAM_REG(pVM, &pPool->aStatMonitorRZSizes[17], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/Size20-3f", STAMUNIT_OCCURENCES, "Number of 32-63 byte accesses."); + STAM_REG(pVM, &pPool->aStatMonitorRZSizes[18], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/Size40+", STAMUNIT_OCCURENCES, "Number of 64+ byte accesses."); + STAM_REG(pVM, &pPool->aStatMonitorRZMisaligned[0], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/Misaligned1", STAMUNIT_OCCURENCES, "Number of misaligned access with offset 1."); + STAM_REG(pVM, &pPool->aStatMonitorRZMisaligned[1], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/Misaligned2", STAMUNIT_OCCURENCES, "Number of misaligned access with offset 2."); + STAM_REG(pVM, &pPool->aStatMonitorRZMisaligned[2], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/Misaligned3", STAMUNIT_OCCURENCES, "Number of misaligned access with offset 3."); + STAM_REG(pVM, &pPool->aStatMonitorRZMisaligned[3], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/Misaligned4", STAMUNIT_OCCURENCES, "Number of misaligned access with offset 4."); + STAM_REG(pVM, &pPool->aStatMonitorRZMisaligned[4], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/Misaligned5", STAMUNIT_OCCURENCES, "Number of misaligned access with offset 5."); + STAM_REG(pVM, &pPool->aStatMonitorRZMisaligned[5], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/Misaligned6", STAMUNIT_OCCURENCES, "Number of misaligned access with offset 6."); + STAM_REG(pVM, &pPool->aStatMonitorRZMisaligned[6], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/IEM/Misaligned7", STAMUNIT_OCCURENCES, "Number of misaligned access with offset 7."); + + STAM_REG(pVM, &pPool->StatMonitorRZFaultPT, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/Fault/PT", STAMUNIT_OCCURENCES, "Nr of handled PT faults."); + STAM_REG(pVM, &pPool->StatMonitorRZFaultPD, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/Fault/PD", STAMUNIT_OCCURENCES, "Nr of handled PD faults."); + STAM_REG(pVM, &pPool->StatMonitorRZFaultPDPT, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/Fault/PDPT", STAMUNIT_OCCURENCES, "Nr of handled PDPT faults."); + STAM_REG(pVM, &pPool->StatMonitorRZFaultPML4, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/Fault/PML4", STAMUNIT_OCCURENCES, "Nr of handled PML4 faults."); + + STAM_REG(pVM, &pPool->StatMonitorR3, STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3", STAMUNIT_TICKS_PER_CALL, "Profiling the R3 access handler."); + STAM_REG(pVM, &pPool->StatMonitorR3FlushPage, STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/FlushPage", STAMUNIT_TICKS_PER_CALL, "Profiling the pgmPoolFlushPage calls made from the R3 access handler."); + STAM_REG(pVM, &pPool->aStatMonitorR3Sizes[0], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Size01", STAMUNIT_OCCURENCES, "Number of 1 byte accesses (R3)."); + STAM_REG(pVM, &pPool->aStatMonitorR3Sizes[1], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Size02", STAMUNIT_OCCURENCES, "Number of 2 byte accesses (R3)."); + STAM_REG(pVM, &pPool->aStatMonitorR3Sizes[2], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Size03", STAMUNIT_OCCURENCES, "Number of 3 byte accesses (R3)."); + STAM_REG(pVM, &pPool->aStatMonitorR3Sizes[3], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Size04", STAMUNIT_OCCURENCES, "Number of 4 byte accesses (R3)."); + STAM_REG(pVM, &pPool->aStatMonitorR3Sizes[4], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Size05", STAMUNIT_OCCURENCES, "Number of 5 byte accesses (R3)."); + STAM_REG(pVM, &pPool->aStatMonitorR3Sizes[5], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Size06", STAMUNIT_OCCURENCES, "Number of 6 byte accesses (R3)."); + STAM_REG(pVM, &pPool->aStatMonitorR3Sizes[6], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Size07", STAMUNIT_OCCURENCES, "Number of 7 byte accesses (R3)."); + STAM_REG(pVM, &pPool->aStatMonitorR3Sizes[7], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Size08", STAMUNIT_OCCURENCES, "Number of 8 byte accesses (R3)."); + STAM_REG(pVM, &pPool->aStatMonitorR3Sizes[8], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Size09", STAMUNIT_OCCURENCES, "Number of 9 byte accesses (R3)."); + STAM_REG(pVM, &pPool->aStatMonitorR3Sizes[9], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Size0a", STAMUNIT_OCCURENCES, "Number of 10 byte accesses (R3)."); + STAM_REG(pVM, &pPool->aStatMonitorR3Sizes[10], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Size0b", STAMUNIT_OCCURENCES, "Number of 11 byte accesses (R3)."); + STAM_REG(pVM, &pPool->aStatMonitorR3Sizes[11], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Size0c", STAMUNIT_OCCURENCES, "Number of 12 byte accesses (R3)."); + STAM_REG(pVM, &pPool->aStatMonitorR3Sizes[12], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Size0d", STAMUNIT_OCCURENCES, "Number of 13 byte accesses (R3)."); + STAM_REG(pVM, &pPool->aStatMonitorR3Sizes[13], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Size0e", STAMUNIT_OCCURENCES, "Number of 14 byte accesses (R3)."); + STAM_REG(pVM, &pPool->aStatMonitorR3Sizes[14], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Size0f", STAMUNIT_OCCURENCES, "Number of 15 byte accesses (R3)."); + STAM_REG(pVM, &pPool->aStatMonitorR3Sizes[15], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Size10", STAMUNIT_OCCURENCES, "Number of 16 byte accesses (R3)."); + STAM_REG(pVM, &pPool->aStatMonitorR3Sizes[16], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Size11-2f", STAMUNIT_OCCURENCES, "Number of 17-31 byte accesses."); + STAM_REG(pVM, &pPool->aStatMonitorR3Sizes[17], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Size20-3f", STAMUNIT_OCCURENCES, "Number of 32-63 byte accesses."); + STAM_REG(pVM, &pPool->aStatMonitorR3Sizes[18], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Size40+", STAMUNIT_OCCURENCES, "Number of 64+ byte accesses."); + STAM_REG(pVM, &pPool->aStatMonitorR3Misaligned[0], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Misaligned1", STAMUNIT_OCCURENCES, "Number of misaligned access with offset 1 in R3."); + STAM_REG(pVM, &pPool->aStatMonitorR3Misaligned[1], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Misaligned2", STAMUNIT_OCCURENCES, "Number of misaligned access with offset 2 in R3."); + STAM_REG(pVM, &pPool->aStatMonitorR3Misaligned[2], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Misaligned3", STAMUNIT_OCCURENCES, "Number of misaligned access with offset 3 in R3."); + STAM_REG(pVM, &pPool->aStatMonitorR3Misaligned[3], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Misaligned4", STAMUNIT_OCCURENCES, "Number of misaligned access with offset 4 in R3."); + STAM_REG(pVM, &pPool->aStatMonitorR3Misaligned[4], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Misaligned5", STAMUNIT_OCCURENCES, "Number of misaligned access with offset 5 in R3."); + STAM_REG(pVM, &pPool->aStatMonitorR3Misaligned[5], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Misaligned6", STAMUNIT_OCCURENCES, "Number of misaligned access with offset 6 in R3."); + STAM_REG(pVM, &pPool->aStatMonitorR3Misaligned[6], STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Misaligned7", STAMUNIT_OCCURENCES, "Number of misaligned access with offset 7 in R3."); + + STAM_REG(pVM, &pPool->StatMonitorR3FaultPT, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/R3/Fault/PT", STAMUNIT_OCCURENCES, "Nr of handled PT faults."); + STAM_REG(pVM, &pPool->StatMonitorR3FaultPD, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/R3/Fault/PD", STAMUNIT_OCCURENCES, "Nr of handled PD faults."); + STAM_REG(pVM, &pPool->StatMonitorR3FaultPDPT, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/R3/Fault/PDPT", STAMUNIT_OCCURENCES, "Nr of handled PDPT faults."); + STAM_REG(pVM, &pPool->StatMonitorR3FaultPML4, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/R3/Fault/PML4", STAMUNIT_OCCURENCES, "Nr of handled PML4 faults."); + + STAM_REG(pVM, &pPool->cModifiedPages, STAMTYPE_U16, "/PGM/Pool/Monitor/cModifiedPages", STAMUNIT_PAGES, "The current cModifiedPages value."); + STAM_REG(pVM, &pPool->cModifiedPagesHigh, STAMTYPE_U16_RESET, "/PGM/Pool/Monitor/cModifiedPagesHigh", STAMUNIT_PAGES, "The high watermark for cModifiedPages."); + STAM_REG(pVM, &pPool->StatResetDirtyPages, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/Dirty/Resets", STAMUNIT_OCCURENCES, "Times we've called pgmPoolResetDirtyPages (and there were dirty page)."); + STAM_REG(pVM, &pPool->StatDirtyPage, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/Dirty/Pages", STAMUNIT_OCCURENCES, "Times we've called pgmPoolAddDirtyPage."); + STAM_REG(pVM, &pPool->StatDirtyPageDupFlush, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/Dirty/FlushDup", STAMUNIT_OCCURENCES, "Times we've had to flush duplicates for dirty page management."); + STAM_REG(pVM, &pPool->StatDirtyPageOverFlowFlush, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/Dirty/FlushOverflow",STAMUNIT_OCCURENCES, "Times we've had to flush because of overflow."); + STAM_REG(pVM, &pPool->StatCacheHits, STAMTYPE_COUNTER, "/PGM/Pool/Cache/Hits", STAMUNIT_OCCURENCES, "The number of pgmPoolAlloc calls satisfied by the cache."); + STAM_REG(pVM, &pPool->StatCacheMisses, STAMTYPE_COUNTER, "/PGM/Pool/Cache/Misses", STAMUNIT_OCCURENCES, "The number of pgmPoolAlloc calls not statisfied by the cache."); + STAM_REG(pVM, &pPool->StatCacheKindMismatches, STAMTYPE_COUNTER, "/PGM/Pool/Cache/KindMismatches", STAMUNIT_OCCURENCES, "The number of shadow page kind mismatches. (Better be low, preferably 0!)"); + STAM_REG(pVM, &pPool->StatCacheFreeUpOne, STAMTYPE_COUNTER, "/PGM/Pool/Cache/FreeUpOne", STAMUNIT_OCCURENCES, "The number of times the cache was asked to free up a page."); + STAM_REG(pVM, &pPool->StatCacheCacheable, STAMTYPE_COUNTER, "/PGM/Pool/Cache/Cacheable", STAMUNIT_OCCURENCES, "The number of cacheable allocations."); + STAM_REG(pVM, &pPool->StatCacheUncacheable, STAMTYPE_COUNTER, "/PGM/Pool/Cache/Uncacheable", STAMUNIT_OCCURENCES, "The number of uncacheable allocations."); +#endif /* VBOX_WITH_STATISTICS */ + +#ifdef VBOX_WITH_DEBUGGER + /* + * Debugger commands. + */ + static bool s_fRegisteredCmds = false; + if (!s_fRegisteredCmds) + { + rc = DBGCRegisterCommands(&g_aCmds[0], RT_ELEMENTS(g_aCmds)); + if (RT_SUCCESS(rc)) + s_fRegisteredCmds = true; + } +#endif + + return VINF_SUCCESS; +} + + +/** + * Relocate the page pool data. + * + * @param pVM The cross context VM structure. + */ +void pgmR3PoolRelocate(PVM pVM) +{ + pVM->pgm.s.pPoolRC = MMHyperR3ToRC(pVM, pVM->pgm.s.pPoolR3); + pVM->pgm.s.pPoolR3->pVMRC = pVM->pVMRC; + pVM->pgm.s.pPoolR3->paUsersRC = MMHyperR3ToRC(pVM, pVM->pgm.s.pPoolR3->paUsersR3); + pVM->pgm.s.pPoolR3->paPhysExtsRC = MMHyperR3ToRC(pVM, pVM->pgm.s.pPoolR3->paPhysExtsR3); +} + + +/** + * Grows the shadow page pool. + * + * I.e. adds more pages to it, assuming that hasn't reached cMaxPages yet. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(int) PGMR3PoolGrow(PVM pVM) +{ + PPGMPOOL pPool = pVM->pgm.s.pPoolR3; + AssertReturn(pPool->cCurPages < pPool->cMaxPages, VERR_PGM_POOL_MAXED_OUT_ALREADY); + + /* With 32-bit guests and no EPT, the CR3 limits the root pages to low + (below 4 GB) memory. */ + /** @todo change the pool to handle ROOT page allocations specially when + * required. */ + bool fCanUseHighMemory = HMIsNestedPagingActive(pVM) + && HMIsVmxActive(pVM); + + pgmLock(pVM); + + /* + * How much to grow it by? + */ + uint32_t cPages = pPool->cMaxPages - pPool->cCurPages; + cPages = RT_MIN(PGMPOOL_CFG_MAX_GROW, cPages); + LogFlow(("PGMR3PoolGrow: Growing the pool by %d (%#x) pages. fCanUseHighMemory=%RTbool\n", cPages, cPages, fCanUseHighMemory)); + + for (unsigned i = pPool->cCurPages; cPages-- > 0; i++) + { + PPGMPOOLPAGE pPage = &pPool->aPages[i]; + + if (fCanUseHighMemory) + pPage->pvPageR3 = MMR3PageAlloc(pVM); + else + pPage->pvPageR3 = MMR3PageAllocLow(pVM); + if (!pPage->pvPageR3) + { + Log(("We're out of memory!! i=%d fCanUseHighMemory=%RTbool\n", i, fCanUseHighMemory)); + pgmUnlock(pVM); + return i ? VINF_SUCCESS : VERR_NO_PAGE_MEMORY; + } + pPage->Core.Key = MMPage2Phys(pVM, pPage->pvPageR3); + AssertFatal(pPage->Core.Key < _4G || fCanUseHighMemory); + pPage->GCPhys = NIL_RTGCPHYS; + pPage->enmKind = PGMPOOLKIND_FREE; + pPage->idx = pPage - &pPool->aPages[0]; + LogFlow(("PGMR3PoolGrow: insert page #%#x - %RHp\n", pPage->idx, pPage->Core.Key)); + pPage->iNext = pPool->iFreeHead; + pPage->iUserHead = NIL_PGMPOOL_USER_INDEX; + pPage->iModifiedNext = NIL_PGMPOOL_IDX; + pPage->iModifiedPrev = NIL_PGMPOOL_IDX; + pPage->iMonitoredNext = NIL_PGMPOOL_IDX; + pPage->iMonitoredPrev = NIL_PGMPOOL_IDX; + pPage->iAgeNext = NIL_PGMPOOL_IDX; + pPage->iAgePrev = NIL_PGMPOOL_IDX; + /* commit it */ + bool fRc = RTAvloHCPhysInsert(&pPool->HCPhysTree, &pPage->Core); Assert(fRc); NOREF(fRc); + pPool->iFreeHead = i; + pPool->cCurPages = i + 1; + } + + pgmUnlock(pVM); + Assert(pPool->cCurPages <= pPool->cMaxPages); + return VINF_SUCCESS; +} + + +/** + * Rendezvous callback used by pgmR3PoolClearAll that clears all shadow pages + * and all modification counters. + * + * This is only called on one of the EMTs while the other ones are waiting for + * it to complete this function. + * + * @returns VINF_SUCCESS (VBox strict status code). + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. Unused. + * @param fpvFlushRemTlb When not NULL, we'll flush the REM TLB as well. + * (This is the pvUser, so it has to be void *.) + * + */ +DECLCALLBACK(VBOXSTRICTRC) pgmR3PoolClearAllRendezvous(PVM pVM, PVMCPU pVCpu, void *fpvFlushRemTlb) +{ + PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); + STAM_PROFILE_START(&pPool->StatClearAll, c); + NOREF(pVCpu); + + pgmLock(pVM); + Log(("pgmR3PoolClearAllRendezvous: cUsedPages=%d fpvFlushRemTlb=%RTbool\n", pPool->cUsedPages, !!fpvFlushRemTlb)); + + /* + * Iterate all the pages until we've encountered all that are in use. + * This is a simple but not quite optimal solution. + */ + unsigned cModifiedPages = 0; NOREF(cModifiedPages); + unsigned cLeft = pPool->cUsedPages; + uint32_t iPage = pPool->cCurPages; + while (--iPage >= PGMPOOL_IDX_FIRST) + { + PPGMPOOLPAGE pPage = &pPool->aPages[iPage]; + if (pPage->GCPhys != NIL_RTGCPHYS) + { + switch (pPage->enmKind) + { + /* + * We only care about shadow page tables that reference physical memory + */ +#ifdef PGM_WITH_LARGE_PAGES + case PGMPOOLKIND_EPT_PD_FOR_PHYS: /* Large pages reference 2 MB of physical memory, so we must clear them. */ + if (pPage->cPresent) + { + PX86PDPAE pShwPD = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pPool->CTX_SUFF(pVM), pVCpu, pPage); + for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++) + { + if ( pShwPD->a[i].n.u1Present + && pShwPD->a[i].b.u1Size) + { + Assert(!(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)); + pShwPD->a[i].u = 0; + Assert(pPage->cPresent); + pPage->cPresent--; + } + } + if (pPage->cPresent == 0) + pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX; + } + goto default_case; + + case PGMPOOLKIND_PAE_PD_PHYS: /* Large pages reference 2 MB of physical memory, so we must clear them. */ + if (pPage->cPresent) + { + PEPTPD pShwPD = (PEPTPD)PGMPOOL_PAGE_2_PTR_V2(pPool->CTX_SUFF(pVM), pVCpu, pPage); + for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++) + { + Assert((pShwPD->a[i].u & UINT64_C(0xfff0000000000f80)) == 0); + if ( pShwPD->a[i].n.u1Present + && pShwPD->a[i].b.u1Size) + { + Assert(!(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)); + pShwPD->a[i].u = 0; + Assert(pPage->cPresent); + pPage->cPresent--; + } + } + if (pPage->cPresent == 0) + pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX; + } + goto default_case; +#endif /* PGM_WITH_LARGE_PAGES */ + + case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT: + case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB: + case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT: + case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB: + case PGMPOOLKIND_PAE_PT_FOR_PAE_PT: + case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB: + case PGMPOOLKIND_32BIT_PT_FOR_PHYS: + case PGMPOOLKIND_PAE_PT_FOR_PHYS: + case PGMPOOLKIND_EPT_PT_FOR_PHYS: + { + if (pPage->cPresent) + { + void *pvShw = PGMPOOL_PAGE_2_PTR_V2(pPool->CTX_SUFF(pVM), pVCpu, pPage); + STAM_PROFILE_START(&pPool->StatZeroPage, z); +#if 0 + /* Useful check for leaking references; *very* expensive though. */ + switch (pPage->enmKind) + { + case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT: + case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB: + case PGMPOOLKIND_PAE_PT_FOR_PAE_PT: + case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB: + case PGMPOOLKIND_PAE_PT_FOR_PHYS: + { + bool fFoundFirst = false; + PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)pvShw; + for (unsigned ptIndex = 0; ptIndex < RT_ELEMENTS(pPT->a); ptIndex++) + { + if (pPT->a[ptIndex].u) + { + if (!fFoundFirst) + { + AssertFatalMsg(pPage->iFirstPresent <= ptIndex, ("ptIndex = %d first present = %d\n", ptIndex, pPage->iFirstPresent)); + if (pPage->iFirstPresent != ptIndex) + Log(("ptIndex = %d first present = %d\n", ptIndex, pPage->iFirstPresent)); + fFoundFirst = true; + } + if (PGMSHWPTEPAE_IS_P(pPT->a[ptIndex])) + { + pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pPT->a[ptIndex]), NIL_RTGCPHYS); + if (pPage->iFirstPresent == ptIndex) + pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX; + } + } + } + AssertFatalMsg(pPage->cPresent == 0, ("cPresent = %d pPage = %RGv\n", pPage->cPresent, pPage->GCPhys)); + break; + } + default: + break; + } +#endif + ASMMemZeroPage(pvShw); + STAM_PROFILE_STOP(&pPool->StatZeroPage, z); + pPage->cPresent = 0; + pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX; + } + } + RT_FALL_THRU(); +#ifdef PGM_WITH_LARGE_PAGES + default_case: +#endif + default: + Assert(!pPage->cModifications || ++cModifiedPages); + Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications); + Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications); + pPage->iModifiedNext = NIL_PGMPOOL_IDX; + pPage->iModifiedPrev = NIL_PGMPOOL_IDX; + pPage->cModifications = 0; + break; + + } + if (!--cLeft) + break; + } + } + +#ifndef DEBUG_michael + AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages)); +#endif + pPool->iModifiedHead = NIL_PGMPOOL_IDX; + pPool->cModifiedPages = 0; + + /* + * Clear all the GCPhys links and rebuild the phys ext free list. + */ + for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX); + pRam; + pRam = pRam->CTX_SUFF(pNext)) + { + iPage = pRam->cb >> PAGE_SHIFT; + while (iPage-- > 0) + PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0); + } + + pPool->iPhysExtFreeHead = 0; + PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts); + const unsigned cMaxPhysExts = pPool->cMaxPhysExts; + for (unsigned i = 0; i < cMaxPhysExts; i++) + { + paPhysExts[i].iNext = i + 1; + paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX; + paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE; + paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX; + paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE; + paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX; + paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE; + } + paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX; + + +#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT + /* Reset all dirty pages to reactivate the page monitoring. */ + /* Note: we must do this *after* clearing all page references and shadow page tables as there might be stale references to + * recently removed MMIO ranges around that might otherwise end up asserting in pgmPoolTracDerefGCPhysHint + */ + for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++) + { + PPGMPOOLPAGE pPage; + unsigned idxPage; + + if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX) + continue; + + idxPage = pPool->aDirtyPages[i].uIdx; + AssertRelease(idxPage != NIL_PGMPOOL_IDX); + pPage = &pPool->aPages[idxPage]; + Assert(pPage->idx == idxPage); + Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX); + + AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, i)); + + Log(("Reactivate dirty page %RGp\n", pPage->GCPhys)); + + /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */ + int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK); + AssertRCSuccess(rc); + pPage->fDirty = false; + + pPool->aDirtyPages[i].uIdx = NIL_PGMPOOL_IDX; + } + + /* Clear all dirty pages. */ + pPool->idxFreeDirtyPage = 0; + pPool->cDirtyPages = 0; +#endif + + /* Clear the PGM_SYNC_CLEAR_PGM_POOL flag on all VCPUs to prevent redundant flushes. */ + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + pVM->aCpus[idCpu].pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL; + + /* Flush job finished. */ + VM_FF_CLEAR(pVM, VM_FF_PGM_POOL_FLUSH_PENDING); + pPool->cPresent = 0; + pgmUnlock(pVM); + + PGM_INVL_ALL_VCPU_TLBS(pVM); + + if (fpvFlushRemTlb) + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + CPUMSetChangedFlags(&pVM->aCpus[idCpu], CPUM_CHANGED_GLOBAL_TLB_FLUSH); + + STAM_PROFILE_STOP(&pPool->StatClearAll, c); + return VINF_SUCCESS; +} + + +/** + * Clears the shadow page pool. + * + * @param pVM The cross context VM structure. + * @param fFlushRemTlb When set, the REM TLB is scheduled for flushing as + * well. + */ +void pgmR3PoolClearAll(PVM pVM, bool fFlushRemTlb) +{ + int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PoolClearAllRendezvous, &fFlushRemTlb); + AssertRC(rc); +} + + +/** + * Protect all pgm pool page table entries to monitor writes + * + * @param pVM The cross context VM structure. + * + * @remarks ASSUMES the caller will flush all TLBs!! + */ +void pgmR3PoolWriteProtectPages(PVM pVM) +{ + PGM_LOCK_ASSERT_OWNER(pVM); + PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); + unsigned cLeft = pPool->cUsedPages; + unsigned iPage = pPool->cCurPages; + while (--iPage >= PGMPOOL_IDX_FIRST) + { + PPGMPOOLPAGE pPage = &pPool->aPages[iPage]; + if ( pPage->GCPhys != NIL_RTGCPHYS + && pPage->cPresent) + { + union + { + void *pv; + PX86PT pPT; + PPGMSHWPTPAE pPTPae; + PEPTPT pPTEpt; + } uShw; + uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage); + + switch (pPage->enmKind) + { + /* + * We only care about shadow page tables. + */ + case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT: + case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB: + case PGMPOOLKIND_32BIT_PT_FOR_PHYS: + for (unsigned iShw = 0; iShw < RT_ELEMENTS(uShw.pPT->a); iShw++) + { + if (uShw.pPT->a[iShw].n.u1Present) + uShw.pPT->a[iShw].n.u1Write = 0; + } + break; + + case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT: + case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB: + case PGMPOOLKIND_PAE_PT_FOR_PAE_PT: + case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB: + case PGMPOOLKIND_PAE_PT_FOR_PHYS: + for (unsigned iShw = 0; iShw < RT_ELEMENTS(uShw.pPTPae->a); iShw++) + { + if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw])) + PGMSHWPTEPAE_SET_RO(uShw.pPTPae->a[iShw]); + } + break; + + case PGMPOOLKIND_EPT_PT_FOR_PHYS: + for (unsigned iShw = 0; iShw < RT_ELEMENTS(uShw.pPTEpt->a); iShw++) + { + if (uShw.pPTEpt->a[iShw].n.u1Present) + uShw.pPTEpt->a[iShw].n.u1Write = 0; + } + break; + + default: + break; + } + if (!--cLeft) + break; + } + } +} + +#ifdef VBOX_WITH_DEBUGGER +/** + * @callback_method_impl{FNDBGCCMD, The '.pgmpoolcheck' command.} + */ +static DECLCALLBACK(int) pgmR3PoolCmdCheck(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PUVM pUVM, PCDBGCVAR paArgs, unsigned cArgs) +{ + DBGC_CMDHLP_REQ_UVM_RET(pCmdHlp, pCmd, pUVM); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + DBGC_CMDHLP_ASSERT_PARSER_RET(pCmdHlp, pCmd, -1, cArgs == 0); + uint32_t cErrors = 0; + NOREF(paArgs); + + PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); + for (unsigned i = 0; i < pPool->cCurPages; i++) + { + PPGMPOOLPAGE pPage = &pPool->aPages[i]; + bool fFirstMsg = true; + + /** @todo cover other paging modes too. */ + if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT) + { + PPGMSHWPTPAE pShwPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage); + { + PX86PTPAE pGstPT; + PGMPAGEMAPLOCK LockPage; + int rc = PGMPhysGCPhys2CCPtrReadOnly(pVM, pPage->GCPhys, (const void **)&pGstPT, &LockPage); AssertReleaseRC(rc); + + /* Check if any PTEs are out of sync. */ + for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++) + { + if (PGMSHWPTEPAE_IS_P(pShwPT->a[j])) + { + RTHCPHYS HCPhys = NIL_RTHCPHYS; + rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[j].u & X86_PTE_PAE_PG_MASK, &HCPhys); + if ( rc != VINF_SUCCESS + || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[j]) != HCPhys) + { + if (fFirstMsg) + { + DBGCCmdHlpPrintf(pCmdHlp, "Check pool page %RGp\n", pPage->GCPhys); + fFirstMsg = false; + } + DBGCCmdHlpPrintf(pCmdHlp, "Mismatch HCPhys: rc=%Rrc idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, j, pGstPT->a[j].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), HCPhys); + cErrors++; + } + else if ( PGMSHWPTEPAE_IS_RW(pShwPT->a[j]) + && !pGstPT->a[j].n.u1Write) + { + if (fFirstMsg) + { + DBGCCmdHlpPrintf(pCmdHlp, "Check pool page %RGp\n", pPage->GCPhys); + fFirstMsg = false; + } + DBGCCmdHlpPrintf(pCmdHlp, "Mismatch r/w gst/shw: idx=%d guest %RX64 shw=%RX64 vs %RHp\n", j, pGstPT->a[j].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), HCPhys); + cErrors++; + } + } + } + PGMPhysReleasePageMappingLock(pVM, &LockPage); + } + + /* Make sure this page table can't be written to from any shadow mapping. */ + RTHCPHYS HCPhysPT = NIL_RTHCPHYS; + int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pPage->GCPhys, &HCPhysPT); + AssertMsgRC(rc, ("PGMPhysGCPhys2HCPhys failed with rc=%d for %RGp\n", rc, pPage->GCPhys)); + if (rc == VINF_SUCCESS) + { + for (unsigned j = 0; j < pPool->cCurPages; j++) + { + PPGMPOOLPAGE pTempPage = &pPool->aPages[j]; + + if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT) + { + PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pTempPage); + + for (unsigned k = 0; k < RT_ELEMENTS(pShwPT->a); k++) + { + if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[k]) +# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT + && !pPage->fDirty +# endif + && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[k]) == HCPhysPT) + { + if (fFirstMsg) + { + DBGCCmdHlpPrintf(pCmdHlp, "Check pool page %RGp\n", pPage->GCPhys); + fFirstMsg = false; + } + DBGCCmdHlpPrintf(pCmdHlp, "Mismatch: r/w: GCPhys=%RGp idx=%d shw %RX64 %RX64\n", pTempPage->GCPhys, k, PGMSHWPTEPAE_GET_LOG(pShwPT->a[k]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[k])); + cErrors++; + } + } + } + } + } + } + } + if (cErrors > 0) + return DBGCCmdHlpFail(pCmdHlp, pCmd, "Found %#x errors", cErrors); + return VINF_SUCCESS; +} +#endif /* VBOX_WITH_DEBUGGER */ diff --git a/src/VBox/VMM/VMMR3/PGMR3DbgA.asm b/src/VBox/VMM/VMMR3/PGMR3DbgA.asm new file mode 100644 index 00000000..48205e6e --- /dev/null +++ b/src/VBox/VMM/VMMR3/PGMR3DbgA.asm @@ -0,0 +1,475 @@ +; $Id: PGMR3DbgA.asm $ +;; @file +; PGM - Page Manager and Monitor - Debugger & Debugging API Optimizations. +; + +; +; Copyright (C) 2006-2019 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; + + +;******************************************************************************* +;* Header Files * +;******************************************************************************* +%define RT_ASM_WITH_SEH64 +%include "VBox/asmdefs.mac" + +BEGINCODE ;; Doesn't end up in code seg on 64-bit darwin. weird. + + +; +; Common to all code below. +; +%ifdef ASM_CALL64_MSC + %define pvNeedle r8 + %define cbNeedle r9d + %define bTmp dl +%elifdef ASM_CALL64_GCC + %define pvNeedle rdx + %define cbNeedle esi + %define bTmp r9b +%elifdef RT_ARCH_X86 + %define pvNeedle dword [esp + 8h] + %define cbNeedle dword [esp + 10h] +%else + %error "Unsupported arch!" +%endif + +;; +; Searches for a 8 byte needle in steps of 8. +; +; In 32-bit mode, this will only actually search for a 8 byte needle. +; +; @param pbHaystack [msc:rcx, gcc:rdi, x86:ebp+08h] What to search thru. +; @param cbHaystack [msc:edx, gcc:rsi, x86:ebp+0ch] The amount of hay to search. +; @param pvNeedle [msc:r8, gcc:rdx, x86:ebp+10h] What we're searching for +; @param cbNeedle [msc:r9, gcc:rcx, x86:esp+10h] Size of what we're searcing for. Currently ignored. +; +; @remarks ASSUMES pbHaystack is aligned at uAlign. +; +BEGINPROC pgmR3DbgFixedMemScan8Wide8Step +%ifdef ASM_CALL64_MSC + mov r10, rdi ; save it + mov rdi, rcx ; rdi=pbHaystack + mov ecx, edx ; rcx=cbHaystack + mov rax, [r8] ; *(uint64_t *)pvNeedle +%elifdef ASM_CALL64_GCC + xchg rcx, rsi ; rcx=cbHaystack, rsi=cbNeedle + mov rax, [rdx] ; *(uint64_t *)pvNeedle +%elifdef RT_ARCH_X86 + push ebp + mov ebp, esp + push edi ; save it + mov edi, [ebp + 08h] ; pbHaystack + mov ecx, [ebp + 0ch] ; cbHaystack + mov eax, [ebp + 10h] ; pvNeedle + mov edx, [eax + 4] ; ((uint32_t *)pvNeedle)[1] + mov eax, [eax] ; ((uint32_t *)pvNeedle)[0] +%else + %error "Unsupported arch!" +%endif +SEH64_END_PROLOGUE + +%ifdef RT_ARCH_X86 + ; + ; No string instruction to help us here. Do a simple tight loop instead. + ; + shr ecx, 3 + jz .return_null +.again: + cmp [edi], eax + je .needle_check +.continue: + add edi, 8 + dec ecx + jnz .again + jmp .return_null + + ; Check the needle 2nd dword, caller can do the rest. +.needle_check: + cmp edx, [edi + 4] + jne .continue + +.return_edi: + mov eax, edi + +%else ; RT_ARCH_AMD64 + cmp ecx, 8 + jb .return_null +.continue: + shr ecx, 3 + repne scasq + jne .return_null + ; check more of the needle if we can. + mov r11d, 8 + shl ecx, 3 +.needle_check: + cmp cbNeedle, r11d + je .return_edi + cmp ecx, r11d + jb .return_edi ; returns success here as we've might've lost stuff while shifting ecx around. + mov bTmp, [pvNeedle + r11] + cmp bTmp, [xDI + r11 - 8] + jne .continue + inc r11d + jmp .needle_check + +.return_edi: + lea xAX, [xDI - 8] +%endif ; RT_ARCH_AMD64 + +.return: +%ifdef ASM_CALL64_MSC + mov rdi, r10 +%elifdef RT_ARCH_X86 + pop edi + leave +%endif + ret + +.return_null: + xor eax, eax + jmp .return +ENDPROC pgmR3DbgFixedMemScan8Wide8Step + + +;; +; Searches for a 4 byte needle in steps of 4. +; +; @param pbHaystack [msc:rcx, gcc:rdi, x86:esp+04h] What to search thru. +; @param cbHaystack [msc:edx, gcc:rsi, x86:esp+08h] The amount of hay to search. +; @param pvNeedle [msc:r8, gcc:rdx, x86:esp+0ch] What we're searching for +; @param cbNeedle [msc:r9, gcc:rcx, x86:esp+10h] Size of what we're searcing for. Currently ignored. +; +; @remarks ASSUMES pbHaystack is aligned at uAlign. +; +BEGINPROC pgmR3DbgFixedMemScan4Wide4Step +%ifdef ASM_CALL64_MSC + mov r10, rdi ; save it + mov rdi, rcx ; rdi=pbHaystack + mov ecx, edx ; rcx=cbHaystack + mov eax, [r8] ; *(uint32_t *)pvNeedle +%elifdef ASM_CALL64_GCC + xchg rcx, rsi ; rcx=cbHaystack, rsi=cbNeedle + mov eax, [rdx] ; *(uint32_t *)pvNeedle +%elifdef RT_ARCH_X86 + mov edx, edi ; save it + mov edi, [esp + 04h] ; pbHaystack + mov ecx, [esp + 08h] ; cbHaystack + mov eax, [esp + 0ch] ; pvNeedle + mov eax, [eax] ; *(uint32_t *)pvNeedle +%else + %error "Unsupported arch!" +%endif +SEH64_END_PROLOGUE + +.continue: + cmp ecx, 4 + jb .return_null + shr ecx, 2 + repne scasd + jne .return_null + +%ifdef RT_ARCH_AMD64 + ; check more of the needle if we can. + mov r11d, 4 +.needle_check: + cmp cbNeedle, r11d + je .return_edi + cmp ecx, r11d ; don't bother converting ecx to bytes. + jb .return_edi + mov bTmp, [pvNeedle + r11] + cmp bTmp, [xDI + r11 - 4] + jne .continue + inc r11d + jmp .needle_check +%endif + +.return_edi: + lea xAX, [xDI - 4] +.return: +%ifdef ASM_CALL64_MSC + mov rdi, r10 +%elifdef RT_ARCH_X86 + mov edi, edx +%endif + ret + +.return_null: + xor eax, eax + jmp .return +ENDPROC pgmR3DbgFixedMemScan4Wide4Step + + +;; +; Searches for a 2 byte needle in steps of 2. +; +; @param pbHaystack [msc:rcx, gcc:rdi, x86:esp+04h] What to search thru. +; @param cbHaystack [msc:edx, gcc:rsi, x86:esp+08h] The amount of hay to search. +; @param pvNeedle [msc:r8, gcc:rdx, x86:esp+0ch] What we're searching for +; @param cbNeedle [msc:r9, gcc:rcx, x86:esp+10h] Size of what we're searcing for. Currently ignored. +; +; @remarks ASSUMES pbHaystack is aligned at uAlign. +; +BEGINPROC pgmR3DbgFixedMemScan2Wide2Step +%ifdef ASM_CALL64_MSC + mov r10, rdi ; save it + mov rdi, rcx ; rdi=pbHaystack + mov ecx, edx ; rcx=cbHaystack + mov ax, [r8] ; *(uint16_t *)pvNeedle +%elifdef ASM_CALL64_GCC + xchg rcx, rsi ; rcx=cbHaystack, rsi=cbNeedle + mov ax, [rdx] ; *(uint16_t *)pvNeedle +%elifdef RT_ARCH_X86 + mov edx, edi ; save it + mov edi, [esp + 04h] ; pbHaystack + mov ecx, [esp + 08h] ; cbHaystack + mov eax, [esp + 0ch] ; pvNeedle + mov ax, [eax] ; *(uint16_t *)pvNeedle +%else + %error "Unsupported arch!" +%endif +SEH64_END_PROLOGUE + +.continue: + cmp ecx, 2 + jb .return_null + shr ecx, 1 + repne scasw + jne .return_null + +%ifdef RT_ARCH_AMD64 + ; check more of the needle if we can. + mov r11d, 2 +.needle_check: + cmp cbNeedle, r11d + je .return_edi + cmp ecx, r11d ; don't bother converting ecx to bytes. + jb .return_edi + mov bTmp, [pvNeedle + r11] + cmp bTmp, [xDI + r11 - 2] + jne .continue + inc r11d + jmp .needle_check +%endif + +.return_edi: + lea xAX, [xDI - 2] +.return: +%ifdef ASM_CALL64_MSC + mov rdi, r10 +%elifdef RT_ARCH_X86 + mov edi, edx +%endif + ret + +.return_null: + xor eax, eax + jmp .return +ENDPROC pgmR3DbgFixedMemScan2Wide2Step + + +;; +; Searches for a 1 byte needle in steps of 1. +; +; @param pbHaystack [msc:rcx, gcc:rdi, x86:esp+04h] What to search thru. +; @param cbHaystack [msc:edx, gcc:rsi, x86:esp+08h] The amount of hay to search. +; @param pvNeedle [msc:r8, gcc:rdx, x86:esp+0ch] What we're searching for +; @param cbNeedle [msc:r9, gcc:rcx, x86:esp+10h] Size of what we're searcing for. Currently ignored. +; +BEGINPROC pgmR3DbgFixedMemScan1Wide1Step +%ifdef ASM_CALL64_MSC + mov r10, rdi ; save it + mov rdi, rcx ; rdi=pbHaystack + mov ecx, edx ; rcx=cbHaystack + mov al, [r8] ; *(uint8_t *)pvNeedle +%elifdef ASM_CALL64_GCC + xchg rcx, rsi ; rcx=cbHaystack, rsi=cbNeedle + mov al, [rdx] ; *(uint8_t *)pvNeedle +%elifdef RT_ARCH_X86 + mov edx, edi ; save it + mov edi, [esp + 04h] ; pbHaystack + mov ecx, [esp + 08h] ; cbHaystack + mov eax, [esp + 0ch] ; pvNeedle + mov al, [eax] ; *(uint8_t *)pvNeedle +%else + %error "Unsupported arch!" +%endif +SEH64_END_PROLOGUE + + cmp ecx, 1 + jb .return_null +.continue: + repne scasb + jne .return_null + +%ifdef RT_ARCH_AMD64 + ; check more of the needle if we can. + mov r11d, 1 +.needle_check: + cmp cbNeedle, r11d + je .return_edi + cmp ecx, r11d + jb .return_edi + mov bTmp, [pvNeedle + r11] + cmp bTmp, [xDI + r11 - 1] + jne .continue + inc r11d + jmp .needle_check +%endif + +.return_edi: + lea xAX, [xDI - 1] +.return: +%ifdef ASM_CALL64_MSC + mov rdi, r10 +%elifdef RT_ARCH_X86 + mov edi, edx +%endif + ret + +.return_null: + xor eax, eax +%ifdef ASM_CALL64_MSC + mov rdi, r10 +%elifdef RT_ARCH_X86 + mov edi, edx +%endif + ret +ENDPROC pgmR3DbgFixedMemScan1Wide1Step + + +;; +; Searches for a 4 byte needle in steps of 1. +; +; @param pbHaystack [msc:rcx, gcc:rdi, x86:esp+04h] What to search thru. +; @param cbHaystack [msc:edx, gcc:rsi, x86:esp+08h] The amount of hay to search. +; @param pvNeedle [msc:r8, gcc:rdx, x86:esp+0ch] What we're searching for +; @param cbNeedle [msc:r9, gcc:rcx, x86:esp+10h] Size of what we're searcing for. Currently ignored. +; +BEGINPROC pgmR3DbgFixedMemScan4Wide1Step +%ifdef ASM_CALL64_MSC + mov r10, rdi ; save it + mov rdi, rcx ; rdi=pbHaystack + mov ecx, edx ; rcx=cbHaystack + mov eax, [r8] ; *(uint32_t *)pvNeedle +%elifdef ASM_CALL64_GCC + xchg rcx, rsi ; rcx=cbHaystack, rsi=cbNeedle + mov eax, [rdx] ; *(uint32_t *)pvNeedle +%elifdef RT_ARCH_X86 + mov edx, edi ; save it + mov edi, [esp + 04h] ; pbHaystack + mov ecx, [esp + 08h] ; cbHaystack + mov eax, [esp + 0ch] ; pvNeedle + mov eax, [eax] ; *(uint32_t *)pvNeedle +%else + %error "Unsupported arch!" +%endif +SEH64_END_PROLOGUE + + cmp ecx, 1 + jb .return_null +.continue: + repne scasb + jne .return_null + cmp ecx, 3 + jb .return_null + cmp eax, [xDI - 1] + jne .continue + +.return_edi: + lea xAX, [xDI - 1] +.return: +%ifdef ASM_CALL64_MSC + mov rdi, r10 +%elifdef RT_ARCH_X86 + mov edi, edx +%endif + ret + +.return_null: + xor eax, eax +%ifdef ASM_CALL64_MSC + mov rdi, r10 +%elifdef RT_ARCH_X86 + mov edi, edx +%endif + ret +ENDPROC pgmR3DbgFixedMemScan4Wide1Step + +;; +; Searches for a 8 byte needle in steps of 1. +; +; @param pbHaystack [msc:rcx, gcc:rdi, x86:esp+04h] What to search thru. +; @param cbHaystack [msc:edx, gcc:rsi, x86:esp+08h] The amount of hay to search. +; @param pvNeedle [msc:r8, gcc:rdx, x86:esp+0ch] What we're searching for +; @param cbNeedle [msc:r9, gcc:rcx, x86:esp+10h] Size of what we're searcing for. Currently ignored. +; +; @remarks The 32-bit version is currently identical to pgmR3DbgFixedMemScan4Wide1Step. +; +BEGINPROC pgmR3DbgFixedMemScan8Wide1Step +%ifdef ASM_CALL64_MSC + mov r10, rdi ; save it + mov rdi, rcx ; rdi=pbHaystack + mov ecx, edx ; rcx=cbHaystack + mov rax, [r8] ; *(uint64_t *)pvNeedle +%elifdef ASM_CALL64_GCC + xchg rcx, rsi ; rcx=cbHaystack, rsi=cbNeedle + mov rax, [rdx] ; *(uint64_t *)pvNeedle +%elifdef RT_ARCH_X86 + mov edx, edi ; save it + mov edi, [esp + 04h] ; pbHaystack + mov ecx, [esp + 08h] ; cbHaystack + mov eax, [esp + 0ch] ; pvNeedle + mov eax, [eax] ; *(uint32_t *)pvNeedle +%else + %error "Unsupported arch!" +%endif +SEH64_END_PROLOGUE + + cmp ecx, 1 + jb .return_null +.continue: + repne scasb + jne .return_null +%ifdef RT_ARCH_AMD64 + cmp ecx, 7 + jb .check_smaller + cmp rax, [xDI - 1] + jne .continue + jmp .return_edi +.check_smaller: +%endif + cmp ecx, 3 + jb .return_null + cmp eax, [xDI - 1] + jne .continue + +.return_edi: + lea xAX, [xDI - 1] +.return: +%ifdef ASM_CALL64_MSC + mov rdi, r10 +%elifdef RT_ARCH_X86 + mov edi, edx +%endif + ret + +.return_null: + xor eax, eax +%ifdef ASM_CALL64_MSC + mov rdi, r10 +%elifdef RT_ARCH_X86 + mov edi, edx +%endif + ret +ENDPROC pgmR3DbgFixedMemScan8Wide1Step + diff --git a/src/VBox/VMM/VMMR3/PGMSavedState.cpp b/src/VBox/VMM/VMMR3/PGMSavedState.cpp new file mode 100644 index 00000000..05b13b77 --- /dev/null +++ b/src/VBox/VMM/VMMR3/PGMSavedState.cpp @@ -0,0 +1,3328 @@ +/* $Id: PGMSavedState.cpp $ */ +/** @file + * PGM - Page Manager and Monitor, The Saved State Part. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PGM +#include +#include +#include +#include +#include +#include "PGMInternal.h" +#include +#include "PGMInline.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** Saved state data unit version. */ +#define PGM_SAVED_STATE_VERSION 14 +/** Saved state data unit version before the PAE PDPE registers. */ +#define PGM_SAVED_STATE_VERSION_PRE_PAE 13 +/** Saved state data unit version after this includes ballooned page flags in + * the state (see @bugref{5515}). */ +#define PGM_SAVED_STATE_VERSION_BALLOON_BROKEN 12 +/** Saved state before the balloon change. */ +#define PGM_SAVED_STATE_VERSION_PRE_BALLOON 11 +/** Saved state data unit version used during 3.1 development, misses the RAM + * config. */ +#define PGM_SAVED_STATE_VERSION_NO_RAM_CFG 10 +/** Saved state data unit version for 3.0 (pre teleportation). */ +#define PGM_SAVED_STATE_VERSION_3_0_0 9 +/** Saved state data unit version for 2.2.2 and later. */ +#define PGM_SAVED_STATE_VERSION_2_2_2 8 +/** Saved state data unit version for 2.2.0. */ +#define PGM_SAVED_STATE_VERSION_RR_DESC 7 +/** Saved state data unit version. */ +#define PGM_SAVED_STATE_VERSION_OLD_PHYS_CODE 6 + + +/** @name Sparse state record types + * @{ */ +/** Zero page. No data. */ +#define PGM_STATE_REC_RAM_ZERO UINT8_C(0x00) +/** Raw page. */ +#define PGM_STATE_REC_RAM_RAW UINT8_C(0x01) +/** Raw MMIO2 page. */ +#define PGM_STATE_REC_MMIO2_RAW UINT8_C(0x02) +/** Zero MMIO2 page. */ +#define PGM_STATE_REC_MMIO2_ZERO UINT8_C(0x03) +/** Virgin ROM page. Followed by protection (8-bit) and the raw bits. */ +#define PGM_STATE_REC_ROM_VIRGIN UINT8_C(0x04) +/** Raw shadowed ROM page. The protection (8-bit) precedes the raw bits. */ +#define PGM_STATE_REC_ROM_SHW_RAW UINT8_C(0x05) +/** Zero shadowed ROM page. The protection (8-bit) is the only payload. */ +#define PGM_STATE_REC_ROM_SHW_ZERO UINT8_C(0x06) +/** ROM protection (8-bit). */ +#define PGM_STATE_REC_ROM_PROT UINT8_C(0x07) +/** Ballooned page. No data. */ +#define PGM_STATE_REC_RAM_BALLOONED UINT8_C(0x08) +/** The last record type. */ +#define PGM_STATE_REC_LAST PGM_STATE_REC_RAM_BALLOONED +/** End marker. */ +#define PGM_STATE_REC_END UINT8_C(0xff) +/** Flag indicating that the data is preceded by the page address. + * For RAW pages this is a RTGCPHYS. For MMIO2 and ROM pages this is a 8-bit + * range ID and a 32-bit page index. + */ +#define PGM_STATE_REC_FLAG_ADDR UINT8_C(0x80) +/** @} */ + +/** The CRC-32 for a zero page. */ +#define PGM_STATE_CRC32_ZERO_PAGE UINT32_C(0xc71c0011) +/** The CRC-32 for a zero half page. */ +#define PGM_STATE_CRC32_ZERO_HALF_PAGE UINT32_C(0xf1e8ba9e) + + + +/** @name Old Page types used in older saved states. + * @{ */ +/** Old saved state: The usual invalid zero entry. */ +#define PGMPAGETYPE_OLD_INVALID 0 +/** Old saved state: RAM page. (RWX) */ +#define PGMPAGETYPE_OLD_RAM 1 +/** Old saved state: MMIO2 page. (RWX) */ +#define PGMPAGETYPE_OLD_MMIO2 1 +/** Old saved state: MMIO2 page aliased over an MMIO page. (RWX) + * See PGMHandlerPhysicalPageAlias(). */ +#define PGMPAGETYPE_OLD_MMIO2_ALIAS_MMIO 2 +/** Old saved state: Shadowed ROM. (RWX) */ +#define PGMPAGETYPE_OLD_ROM_SHADOW 3 +/** Old saved state: ROM page. (R-X) */ +#define PGMPAGETYPE_OLD_ROM 4 +/** Old saved state: MMIO page. (---) */ +#define PGMPAGETYPE_OLD_MMIO 5 +/** @} */ + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** For loading old saved states. (pre-smp) */ +typedef struct +{ + /** If set no conflict checks are required. (boolean) */ + bool fMappingsFixed; + /** Size of fixed mapping */ + uint32_t cbMappingFixed; + /** Base address (GC) of fixed mapping */ + RTGCPTR GCPtrMappingFixed; + /** A20 gate mask. + * Our current approach to A20 emulation is to let REM do it and don't bother + * anywhere else. The interesting guests will be operating with it enabled anyway. + * But should the need arise, we'll subject physical addresses to this mask. */ + RTGCPHYS GCPhysA20Mask; + /** A20 gate state - boolean! */ + bool fA20Enabled; + /** The guest paging mode. */ + PGMMODE enmGuestMode; +} PGMOLD; + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +/** PGM fields to save/load. */ + +static const SSMFIELD s_aPGMFields[] = +{ + SSMFIELD_ENTRY( PGM, fMappingsFixed), + SSMFIELD_ENTRY_GCPTR( PGM, GCPtrMappingFixed), + SSMFIELD_ENTRY( PGM, cbMappingFixed), + SSMFIELD_ENTRY( PGM, cBalloonedPages), + SSMFIELD_ENTRY_TERM() +}; + +static const SSMFIELD s_aPGMFieldsPreBalloon[] = +{ + SSMFIELD_ENTRY( PGM, fMappingsFixed), + SSMFIELD_ENTRY_GCPTR( PGM, GCPtrMappingFixed), + SSMFIELD_ENTRY( PGM, cbMappingFixed), + SSMFIELD_ENTRY_TERM() +}; + +static const SSMFIELD s_aPGMCpuFields[] = +{ + SSMFIELD_ENTRY( PGMCPU, fA20Enabled), + SSMFIELD_ENTRY_GCPHYS( PGMCPU, GCPhysA20Mask), + SSMFIELD_ENTRY( PGMCPU, enmGuestMode), + SSMFIELD_ENTRY( PGMCPU, aGCPhysGstPaePDs[0]), + SSMFIELD_ENTRY( PGMCPU, aGCPhysGstPaePDs[1]), + SSMFIELD_ENTRY( PGMCPU, aGCPhysGstPaePDs[2]), + SSMFIELD_ENTRY( PGMCPU, aGCPhysGstPaePDs[3]), + SSMFIELD_ENTRY_TERM() +}; + +static const SSMFIELD s_aPGMCpuFieldsPrePae[] = +{ + SSMFIELD_ENTRY( PGMCPU, fA20Enabled), + SSMFIELD_ENTRY_GCPHYS( PGMCPU, GCPhysA20Mask), + SSMFIELD_ENTRY( PGMCPU, enmGuestMode), + SSMFIELD_ENTRY_TERM() +}; + +static const SSMFIELD s_aPGMFields_Old[] = +{ + SSMFIELD_ENTRY( PGMOLD, fMappingsFixed), + SSMFIELD_ENTRY_GCPTR( PGMOLD, GCPtrMappingFixed), + SSMFIELD_ENTRY( PGMOLD, cbMappingFixed), + SSMFIELD_ENTRY( PGMOLD, fA20Enabled), + SSMFIELD_ENTRY_GCPHYS( PGMOLD, GCPhysA20Mask), + SSMFIELD_ENTRY( PGMOLD, enmGuestMode), + SSMFIELD_ENTRY_TERM() +}; + + +/** + * Find the ROM tracking structure for the given page. + * + * @returns Pointer to the ROM page structure. NULL if the caller didn't check + * that it's a ROM page. + * @param pVM The cross context VM structure. + * @param GCPhys The address of the ROM page. + */ +static PPGMROMPAGE pgmR3GetRomPage(PVM pVM, RTGCPHYS GCPhys) /** @todo change this to take a hint. */ +{ + for (PPGMROMRANGE pRomRange = pVM->pgm.s.CTX_SUFF(pRomRanges); + pRomRange; + pRomRange = pRomRange->CTX_SUFF(pNext)) + { + RTGCPHYS off = GCPhys - pRomRange->GCPhys; + if (GCPhys - pRomRange->GCPhys < pRomRange->cb) + return &pRomRange->aPages[off >> PAGE_SHIFT]; + } + return NULL; +} + + +/** + * Prepares the ROM pages for a live save. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +static int pgmR3PrepRomPages(PVM pVM) +{ + /* + * Initialize the live save tracking in the ROM page descriptors. + */ + pgmLock(pVM); + for (PPGMROMRANGE pRom = pVM->pgm.s.pRomRangesR3; pRom; pRom = pRom->pNextR3) + { + PPGMRAMRANGE pRamHint = NULL;; + uint32_t const cPages = pRom->cb >> PAGE_SHIFT; + + for (uint32_t iPage = 0; iPage < cPages; iPage++) + { + pRom->aPages[iPage].LiveSave.u8Prot = (uint8_t)PGMROMPROT_INVALID; + pRom->aPages[iPage].LiveSave.fWrittenTo = false; + pRom->aPages[iPage].LiveSave.fDirty = true; + pRom->aPages[iPage].LiveSave.fDirtiedRecently = true; + if (!(pRom->fFlags & PGMPHYS_ROM_FLAGS_SHADOWED)) + { + if (PGMROMPROT_IS_ROM(pRom->aPages[iPage].enmProt)) + pRom->aPages[iPage].LiveSave.fWrittenTo = !PGM_PAGE_IS_ZERO(&pRom->aPages[iPage].Shadow) && !PGM_PAGE_IS_BALLOONED(&pRom->aPages[iPage].Shadow); + else + { + RTGCPHYS GCPhys = pRom->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT); + PPGMPAGE pPage; + int rc = pgmPhysGetPageWithHintEx(pVM, GCPhys, &pPage, &pRamHint); + AssertLogRelMsgRC(rc, ("%Rrc GCPhys=%RGp\n", rc, GCPhys)); + if (RT_SUCCESS(rc)) + pRom->aPages[iPage].LiveSave.fWrittenTo = !PGM_PAGE_IS_ZERO(pPage) && !PGM_PAGE_IS_BALLOONED(pPage); + else + pRom->aPages[iPage].LiveSave.fWrittenTo = !PGM_PAGE_IS_ZERO(&pRom->aPages[iPage].Shadow) && !PGM_PAGE_IS_BALLOONED(&pRom->aPages[iPage].Shadow); + } + } + } + + pVM->pgm.s.LiveSave.Rom.cDirtyPages += cPages; + if (pRom->fFlags & PGMPHYS_ROM_FLAGS_SHADOWED) + pVM->pgm.s.LiveSave.Rom.cDirtyPages += cPages; + } + pgmUnlock(pVM); + + return VINF_SUCCESS; +} + + +/** + * Assigns IDs to the ROM ranges and saves them. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM Saved state handle. + */ +static int pgmR3SaveRomRanges(PVM pVM, PSSMHANDLE pSSM) +{ + pgmLock(pVM); + uint8_t id = 1; + for (PPGMROMRANGE pRom = pVM->pgm.s.pRomRangesR3; pRom; pRom = pRom->pNextR3, id++) + { + pRom->idSavedState = id; + SSMR3PutU8(pSSM, id); + SSMR3PutStrZ(pSSM, ""); /* device name */ + SSMR3PutU32(pSSM, 0); /* device instance */ + SSMR3PutU8(pSSM, 0); /* region */ + SSMR3PutStrZ(pSSM, pRom->pszDesc); + SSMR3PutGCPhys(pSSM, pRom->GCPhys); + int rc = SSMR3PutGCPhys(pSSM, pRom->cb); + if (RT_FAILURE(rc)) + break; + } + pgmUnlock(pVM); + return SSMR3PutU8(pSSM, UINT8_MAX); +} + + +/** + * Loads the ROM range ID assignments. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + */ +static int pgmR3LoadRomRanges(PVM pVM, PSSMHANDLE pSSM) +{ + PGM_LOCK_ASSERT_OWNER(pVM); + + for (PPGMROMRANGE pRom = pVM->pgm.s.pRomRangesR3; pRom; pRom = pRom->pNextR3) + pRom->idSavedState = UINT8_MAX; + + for (;;) + { + /* + * Read the data. + */ + uint8_t id; + int rc = SSMR3GetU8(pSSM, &id); + if (RT_FAILURE(rc)) + return rc; + if (id == UINT8_MAX) + { + for (PPGMROMRANGE pRom = pVM->pgm.s.pRomRangesR3; pRom; pRom = pRom->pNextR3) + AssertLogRelMsg(pRom->idSavedState != UINT8_MAX, + ("The \"%s\" ROM was not found in the saved state. Probably due to some misconfiguration\n", + pRom->pszDesc)); + return VINF_SUCCESS; /* the end */ + } + AssertLogRelReturn(id != 0, VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + + char szDevName[RT_SIZEOFMEMB(PDMDEVREG, szName)]; + rc = SSMR3GetStrZ(pSSM, szDevName, sizeof(szDevName)); + AssertLogRelRCReturn(rc, rc); + + uint32_t uInstance; + SSMR3GetU32(pSSM, &uInstance); + uint8_t iRegion; + SSMR3GetU8(pSSM, &iRegion); + + char szDesc[64]; + rc = SSMR3GetStrZ(pSSM, szDesc, sizeof(szDesc)); + AssertLogRelRCReturn(rc, rc); + + RTGCPHYS GCPhys; + SSMR3GetGCPhys(pSSM, &GCPhys); + RTGCPHYS cb; + rc = SSMR3GetGCPhys(pSSM, &cb); + if (RT_FAILURE(rc)) + return rc; + AssertLogRelMsgReturn(!(GCPhys & PAGE_OFFSET_MASK), ("GCPhys=%RGp %s\n", GCPhys, szDesc), VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + AssertLogRelMsgReturn(!(cb & PAGE_OFFSET_MASK), ("cb=%RGp %s\n", cb, szDesc), VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + + /* + * Locate a matching ROM range. + */ + AssertLogRelMsgReturn( uInstance == 0 + && iRegion == 0 + && szDevName[0] == '\0', + ("GCPhys=%RGp %s\n", GCPhys, szDesc), + VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + PPGMROMRANGE pRom; + for (pRom = pVM->pgm.s.pRomRangesR3; pRom; pRom = pRom->pNextR3) + { + if ( pRom->idSavedState == UINT8_MAX + && !strcmp(pRom->pszDesc, szDesc)) + { + pRom->idSavedState = id; + break; + } + } + if (!pRom) + return SSMR3SetCfgError(pSSM, RT_SRC_POS, N_("ROM at %RGp by the name '%s' was not found"), GCPhys, szDesc); + } /* forever */ +} + + +/** + * Scan ROM pages. + * + * @param pVM The cross context VM structure. + */ +static void pgmR3ScanRomPages(PVM pVM) +{ + /* + * The shadow ROMs. + */ + pgmLock(pVM); + for (PPGMROMRANGE pRom = pVM->pgm.s.pRomRangesR3; pRom; pRom = pRom->pNextR3) + { + if (pRom->fFlags & PGMPHYS_ROM_FLAGS_SHADOWED) + { + uint32_t const cPages = pRom->cb >> PAGE_SHIFT; + for (uint32_t iPage = 0; iPage < cPages; iPage++) + { + PPGMROMPAGE pRomPage = &pRom->aPages[iPage]; + if (pRomPage->LiveSave.fWrittenTo) + { + pRomPage->LiveSave.fWrittenTo = false; + if (!pRomPage->LiveSave.fDirty) + { + pRomPage->LiveSave.fDirty = true; + pVM->pgm.s.LiveSave.Rom.cReadyPages--; + pVM->pgm.s.LiveSave.Rom.cDirtyPages++; + } + pRomPage->LiveSave.fDirtiedRecently = true; + } + else + pRomPage->LiveSave.fDirtiedRecently = false; + } + } + } + pgmUnlock(pVM); +} + + +/** + * Takes care of the virgin ROM pages in the first pass. + * + * This is an attempt at simplifying the handling of ROM pages a little bit. + * This ASSUMES that no new ROM ranges will be added and that they won't be + * relinked in any way. + * + * @param pVM The cross context VM structure. + * @param pSSM The SSM handle. + * @param fLiveSave Whether we're in a live save or not. + */ +static int pgmR3SaveRomVirginPages(PVM pVM, PSSMHANDLE pSSM, bool fLiveSave) +{ + if (FTMIsDeltaLoadSaveActive(pVM)) + return VINF_SUCCESS; /* nothing to do as nothing has changed here */ + + pgmLock(pVM); + for (PPGMROMRANGE pRom = pVM->pgm.s.pRomRangesR3; pRom; pRom = pRom->pNextR3) + { + uint32_t const cPages = pRom->cb >> PAGE_SHIFT; + for (uint32_t iPage = 0; iPage < cPages; iPage++) + { + RTGCPHYS GCPhys = pRom->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT); + PGMROMPROT enmProt = pRom->aPages[iPage].enmProt; + + /* Get the virgin page descriptor. */ + PPGMPAGE pPage; + if (PGMROMPROT_IS_ROM(enmProt)) + pPage = pgmPhysGetPage(pVM, GCPhys); + else + pPage = &pRom->aPages[iPage].Virgin; + + /* Get the page bits. (Cannot use pgmPhysGCPhys2CCPtrInternalReadOnly here!) */ + int rc = VINF_SUCCESS; + char abPage[PAGE_SIZE]; + if ( !PGM_PAGE_IS_ZERO(pPage) + && !PGM_PAGE_IS_BALLOONED(pPage)) + { + void const *pvPage; + rc = pgmPhysPageMapReadOnly(pVM, pPage, GCPhys, &pvPage); + if (RT_SUCCESS(rc)) + memcpy(abPage, pvPage, PAGE_SIZE); + } + else + ASMMemZeroPage(abPage); + pgmUnlock(pVM); + AssertLogRelMsgRCReturn(rc, ("rc=%Rrc GCPhys=%RGp\n", rc, GCPhys), rc); + + /* Save it. */ + if (iPage > 0) + SSMR3PutU8(pSSM, PGM_STATE_REC_ROM_VIRGIN); + else + { + SSMR3PutU8(pSSM, PGM_STATE_REC_ROM_VIRGIN | PGM_STATE_REC_FLAG_ADDR); + SSMR3PutU8(pSSM, pRom->idSavedState); + SSMR3PutU32(pSSM, iPage); + } + SSMR3PutU8(pSSM, (uint8_t)enmProt); + rc = SSMR3PutMem(pSSM, abPage, PAGE_SIZE); + if (RT_FAILURE(rc)) + return rc; + + /* Update state. */ + pgmLock(pVM); + pRom->aPages[iPage].LiveSave.u8Prot = (uint8_t)enmProt; + if (fLiveSave) + { + pVM->pgm.s.LiveSave.Rom.cDirtyPages--; + pVM->pgm.s.LiveSave.Rom.cReadyPages++; + pVM->pgm.s.LiveSave.cSavedPages++; + } + } + } + pgmUnlock(pVM); + return VINF_SUCCESS; +} + + +/** + * Saves dirty pages in the shadowed ROM ranges. + * + * Used by pgmR3LiveExecPart2 and pgmR3SaveExecMemory. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM The SSM handle. + * @param fLiveSave Whether it's a live save or not. + * @param fFinalPass Whether this is the final pass or not. + */ +static int pgmR3SaveShadowedRomPages(PVM pVM, PSSMHANDLE pSSM, bool fLiveSave, bool fFinalPass) +{ + if (FTMIsDeltaLoadSaveActive(pVM)) + return VINF_SUCCESS; /* nothing to do as we deal with those pages separately */ + + /* + * The Shadowed ROMs. + * + * ASSUMES that the ROM ranges are fixed. + * ASSUMES that all the ROM ranges are mapped. + */ + pgmLock(pVM); + for (PPGMROMRANGE pRom = pVM->pgm.s.pRomRangesR3; pRom; pRom = pRom->pNextR3) + { + if (pRom->fFlags & PGMPHYS_ROM_FLAGS_SHADOWED) + { + uint32_t const cPages = pRom->cb >> PAGE_SHIFT; + uint32_t iPrevPage = cPages; + for (uint32_t iPage = 0; iPage < cPages; iPage++) + { + PPGMROMPAGE pRomPage = &pRom->aPages[iPage]; + if ( !fLiveSave + || ( pRomPage->LiveSave.fDirty + && ( ( !pRomPage->LiveSave.fDirtiedRecently + && !pRomPage->LiveSave.fWrittenTo) + || fFinalPass + ) + ) + ) + { + uint8_t abPage[PAGE_SIZE]; + PGMROMPROT enmProt = pRomPage->enmProt; + RTGCPHYS GCPhys = pRom->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT); + PPGMPAGE pPage = PGMROMPROT_IS_ROM(enmProt) ? &pRomPage->Shadow : pgmPhysGetPage(pVM, GCPhys); + bool fZero = PGM_PAGE_IS_ZERO(pPage) || PGM_PAGE_IS_BALLOONED(pPage); Assert(!PGM_PAGE_IS_BALLOONED(pPage)); /* Shouldn't be ballooned. */ + int rc = VINF_SUCCESS; + if (!fZero) + { + void const *pvPage; + rc = pgmPhysPageMapReadOnly(pVM, pPage, GCPhys, &pvPage); + if (RT_SUCCESS(rc)) + memcpy(abPage, pvPage, PAGE_SIZE); + } + if (fLiveSave && RT_SUCCESS(rc)) + { + pRomPage->LiveSave.u8Prot = (uint8_t)enmProt; + pRomPage->LiveSave.fDirty = false; + pVM->pgm.s.LiveSave.Rom.cReadyPages++; + pVM->pgm.s.LiveSave.Rom.cDirtyPages--; + pVM->pgm.s.LiveSave.cSavedPages++; + } + pgmUnlock(pVM); + AssertLogRelMsgRCReturn(rc, ("rc=%Rrc GCPhys=%RGp\n", rc, GCPhys), rc); + + if (iPage - 1U == iPrevPage && iPage > 0) + SSMR3PutU8(pSSM, (fZero ? PGM_STATE_REC_ROM_SHW_ZERO : PGM_STATE_REC_ROM_SHW_RAW)); + else + { + SSMR3PutU8(pSSM, (fZero ? PGM_STATE_REC_ROM_SHW_ZERO : PGM_STATE_REC_ROM_SHW_RAW) | PGM_STATE_REC_FLAG_ADDR); + SSMR3PutU8(pSSM, pRom->idSavedState); + SSMR3PutU32(pSSM, iPage); + } + rc = SSMR3PutU8(pSSM, (uint8_t)enmProt); + if (!fZero) + rc = SSMR3PutMem(pSSM, abPage, PAGE_SIZE); + if (RT_FAILURE(rc)) + return rc; + + pgmLock(pVM); + iPrevPage = iPage; + } + /* + * In the final pass, make sure the protection is in sync. + */ + else if ( fFinalPass + && pRomPage->LiveSave.u8Prot != pRomPage->enmProt) + { + PGMROMPROT enmProt = pRomPage->enmProt; + pRomPage->LiveSave.u8Prot = (uint8_t)enmProt; + pgmUnlock(pVM); + + if (iPage - 1U == iPrevPage && iPage > 0) + SSMR3PutU8(pSSM, PGM_STATE_REC_ROM_PROT); + else + { + SSMR3PutU8(pSSM, PGM_STATE_REC_ROM_PROT | PGM_STATE_REC_FLAG_ADDR); + SSMR3PutU8(pSSM, pRom->idSavedState); + SSMR3PutU32(pSSM, iPage); + } + int rc = SSMR3PutU8(pSSM, (uint8_t)enmProt); + if (RT_FAILURE(rc)) + return rc; + + pgmLock(pVM); + iPrevPage = iPage; + } + } + } + } + pgmUnlock(pVM); + return VINF_SUCCESS; +} + + +/** + * Cleans up ROM pages after a live save. + * + * @param pVM The cross context VM structure. + */ +static void pgmR3DoneRomPages(PVM pVM) +{ + NOREF(pVM); +} + + +/** + * Prepares the MMIO2 pages for a live save. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +static int pgmR3PrepMmio2Pages(PVM pVM) +{ + /* + * Initialize the live save tracking in the MMIO2 ranges. + * ASSUME nothing changes here. + */ + pgmLock(pVM); + for (PPGMREGMMIORANGE pRegMmio = pVM->pgm.s.pRegMmioRangesR3; pRegMmio; pRegMmio = pRegMmio->pNextR3) + { + if (pRegMmio->fFlags & PGMREGMMIORANGE_F_MMIO2) + { + uint32_t const cPages = pRegMmio->RamRange.cb >> PAGE_SHIFT; + pgmUnlock(pVM); + + PPGMLIVESAVEMMIO2PAGE paLSPages = (PPGMLIVESAVEMMIO2PAGE)MMR3HeapAllocZ(pVM, MM_TAG_PGM, sizeof(PGMLIVESAVEMMIO2PAGE) * cPages); + if (!paLSPages) + return VERR_NO_MEMORY; + for (uint32_t iPage = 0; iPage < cPages; iPage++) + { + /* Initialize it as a dirty zero page. */ + paLSPages[iPage].fDirty = true; + paLSPages[iPage].cUnchangedScans = 0; + paLSPages[iPage].fZero = true; + paLSPages[iPage].u32CrcH1 = PGM_STATE_CRC32_ZERO_HALF_PAGE; + paLSPages[iPage].u32CrcH2 = PGM_STATE_CRC32_ZERO_HALF_PAGE; + } + + pgmLock(pVM); + pRegMmio->paLSPages = paLSPages; + pVM->pgm.s.LiveSave.Mmio2.cDirtyPages += cPages; + } + } + pgmUnlock(pVM); + return VINF_SUCCESS; +} + + +/** + * Assigns IDs to the MMIO2 ranges and saves them. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM Saved state handle. + */ +static int pgmR3SaveMmio2Ranges(PVM pVM, PSSMHANDLE pSSM) +{ + pgmLock(pVM); + uint8_t id = 1; + for (PPGMREGMMIORANGE pRegMmio = pVM->pgm.s.pRegMmioRangesR3; pRegMmio; pRegMmio = pRegMmio->pNextR3) + { + if (pRegMmio->fFlags & PGMREGMMIORANGE_F_MMIO2) + { + pRegMmio->idSavedState = id; + SSMR3PutU8(pSSM, id); + SSMR3PutStrZ(pSSM, pRegMmio->pDevInsR3->pReg->szName); + SSMR3PutU32(pSSM, pRegMmio->pDevInsR3->iInstance); + SSMR3PutU8(pSSM, pRegMmio->iRegion); + SSMR3PutStrZ(pSSM, pRegMmio->RamRange.pszDesc); + int rc = SSMR3PutGCPhys(pSSM, pRegMmio->RamRange.cb); + if (RT_FAILURE(rc)) + break; + id++; + } + } + pgmUnlock(pVM); + return SSMR3PutU8(pSSM, UINT8_MAX); +} + + +/** + * Loads the MMIO2 range ID assignments. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + */ +static int pgmR3LoadMmio2Ranges(PVM pVM, PSSMHANDLE pSSM) +{ + PGM_LOCK_ASSERT_OWNER(pVM); + + for (PPGMREGMMIORANGE pRegMmio = pVM->pgm.s.pRegMmioRangesR3; pRegMmio; pRegMmio = pRegMmio->pNextR3) + if (pRegMmio->fFlags & PGMREGMMIORANGE_F_MMIO2) + pRegMmio->idSavedState = UINT8_MAX; + + for (;;) + { + /* + * Read the data. + */ + uint8_t id; + int rc = SSMR3GetU8(pSSM, &id); + if (RT_FAILURE(rc)) + return rc; + if (id == UINT8_MAX) + { + for (PPGMREGMMIORANGE pRegMmio = pVM->pgm.s.pRegMmioRangesR3; pRegMmio; pRegMmio = pRegMmio->pNextR3) + AssertLogRelMsg( pRegMmio->idSavedState != UINT8_MAX + || !(pRegMmio->fFlags & PGMREGMMIORANGE_F_MMIO2), + ("%s\n", pRegMmio->RamRange.pszDesc)); + return VINF_SUCCESS; /* the end */ + } + AssertLogRelReturn(id != 0, VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + + char szDevName[RT_SIZEOFMEMB(PDMDEVREG, szName)]; + rc = SSMR3GetStrZ(pSSM, szDevName, sizeof(szDevName)); + AssertLogRelRCReturn(rc, rc); + + uint32_t uInstance; + SSMR3GetU32(pSSM, &uInstance); + uint8_t iRegion; + SSMR3GetU8(pSSM, &iRegion); + + char szDesc[64]; + rc = SSMR3GetStrZ(pSSM, szDesc, sizeof(szDesc)); + AssertLogRelRCReturn(rc, rc); + + RTGCPHYS cb; + rc = SSMR3GetGCPhys(pSSM, &cb); + AssertLogRelMsgReturn(!(cb & PAGE_OFFSET_MASK), ("cb=%RGp %s\n", cb, szDesc), VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + + /* + * Locate a matching MMIO2 range. + */ + PPGMREGMMIORANGE pRegMmio; + for (pRegMmio = pVM->pgm.s.pRegMmioRangesR3; pRegMmio; pRegMmio = pRegMmio->pNextR3) + { + if ( pRegMmio->idSavedState == UINT8_MAX + && pRegMmio->iRegion == iRegion + && pRegMmio->pDevInsR3->iInstance == uInstance + && (pRegMmio->fFlags & PGMREGMMIORANGE_F_MMIO2) + && !strcmp(pRegMmio->pDevInsR3->pReg->szName, szDevName)) + { + pRegMmio->idSavedState = id; + break; + } + } + if (!pRegMmio) + return SSMR3SetCfgError(pSSM, RT_SRC_POS, N_("Failed to locate a MMIO2 range called '%s' owned by %s/%u, region %d"), + szDesc, szDevName, uInstance, iRegion); + + /* + * Validate the configuration, the size of the MMIO2 region should be + * the same. + */ + if (cb != pRegMmio->RamRange.cb) + { + LogRel(("PGM: MMIO2 region \"%s\" size mismatch: saved=%RGp config=%RGp\n", + pRegMmio->RamRange.pszDesc, cb, pRegMmio->RamRange.cb)); + if (cb > pRegMmio->RamRange.cb) /* bad idea? */ + return SSMR3SetCfgError(pSSM, RT_SRC_POS, N_("MMIO2 region \"%s\" size mismatch: saved=%RGp config=%RGp"), + pRegMmio->RamRange.pszDesc, cb, pRegMmio->RamRange.cb); + } + } /* forever */ +} + + +/** + * Scans one MMIO2 page. + * + * @returns True if changed, false if unchanged. + * + * @param pVM The cross context VM structure. + * @param pbPage The page bits. + * @param pLSPage The live save tracking structure for the page. + * + */ +DECLINLINE(bool) pgmR3ScanMmio2Page(PVM pVM, uint8_t const *pbPage, PPGMLIVESAVEMMIO2PAGE pLSPage) +{ + /* + * Special handling of zero pages. + */ + bool const fZero = pLSPage->fZero; + if (fZero) + { + if (ASMMemIsZeroPage(pbPage)) + { + /* Not modified. */ + if (pLSPage->fDirty) + pLSPage->cUnchangedScans++; + return false; + } + + pLSPage->fZero = false; + pLSPage->u32CrcH1 = RTCrc32(pbPage, PAGE_SIZE / 2); + } + else + { + /* + * CRC the first half, if it doesn't match the page is dirty and + * we won't check the 2nd half (we'll do that next time). + */ + uint32_t u32CrcH1 = RTCrc32(pbPage, PAGE_SIZE / 2); + if (u32CrcH1 == pLSPage->u32CrcH1) + { + uint32_t u32CrcH2 = RTCrc32(pbPage + PAGE_SIZE / 2, PAGE_SIZE / 2); + if (u32CrcH2 == pLSPage->u32CrcH2) + { + /* Probably not modified. */ + if (pLSPage->fDirty) + pLSPage->cUnchangedScans++; + return false; + } + + pLSPage->u32CrcH2 = u32CrcH2; + } + else + { + pLSPage->u32CrcH1 = u32CrcH1; + if ( u32CrcH1 == PGM_STATE_CRC32_ZERO_HALF_PAGE + && ASMMemIsZeroPage(pbPage)) + { + pLSPage->u32CrcH2 = PGM_STATE_CRC32_ZERO_HALF_PAGE; + pLSPage->fZero = true; + } + } + } + + /* dirty page path */ + pLSPage->cUnchangedScans = 0; + if (!pLSPage->fDirty) + { + pLSPage->fDirty = true; + pVM->pgm.s.LiveSave.Mmio2.cReadyPages--; + pVM->pgm.s.LiveSave.Mmio2.cDirtyPages++; + if (fZero) + pVM->pgm.s.LiveSave.Mmio2.cZeroPages--; + } + return true; +} + + +/** + * Scan for MMIO2 page modifications. + * + * @param pVM The cross context VM structure. + * @param uPass The pass number. + */ +static void pgmR3ScanMmio2Pages(PVM pVM, uint32_t uPass) +{ + /* + * Since this is a bit expensive we lower the scan rate after a little while. + */ + if ( ( (uPass & 3) != 0 + && uPass > 10) + || uPass == SSM_PASS_FINAL) + return; + + pgmLock(pVM); /* paranoia */ + for (PPGMREGMMIORANGE pRegMmio = pVM->pgm.s.pRegMmioRangesR3; pRegMmio; pRegMmio = pRegMmio->pNextR3) + if (pRegMmio->fFlags & PGMREGMMIORANGE_F_MMIO2) + { + PPGMLIVESAVEMMIO2PAGE paLSPages = pRegMmio->paLSPages; + uint32_t cPages = pRegMmio->RamRange.cb >> PAGE_SHIFT; + pgmUnlock(pVM); + + for (uint32_t iPage = 0; iPage < cPages; iPage++) + { + uint8_t const *pbPage = (uint8_t const *)pRegMmio->pvR3 + iPage * PAGE_SIZE; + pgmR3ScanMmio2Page(pVM, pbPage, &paLSPages[iPage]); + } + + pgmLock(pVM); + } + pgmUnlock(pVM); + +} + + +/** + * Save quiescent MMIO2 pages. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM The SSM handle. + * @param fLiveSave Whether it's a live save or not. + * @param uPass The pass number. + */ +static int pgmR3SaveMmio2Pages(PVM pVM, PSSMHANDLE pSSM, bool fLiveSave, uint32_t uPass) +{ + /** @todo implement live saving of MMIO2 pages. (Need some way of telling the + * device that we wish to know about changes.) */ + + int rc = VINF_SUCCESS; + if (uPass == SSM_PASS_FINAL) + { + /* + * The mop up round. + */ + pgmLock(pVM); + for (PPGMREGMMIORANGE pRegMmio = pVM->pgm.s.pRegMmioRangesR3; + pRegMmio && RT_SUCCESS(rc); + pRegMmio = pRegMmio->pNextR3) + if (pRegMmio->fFlags & PGMREGMMIORANGE_F_MMIO2) + { + PPGMLIVESAVEMMIO2PAGE paLSPages = pRegMmio->paLSPages; + uint8_t const *pbPage = (uint8_t const *)pRegMmio->RamRange.pvR3; + uint32_t cPages = pRegMmio->RamRange.cb >> PAGE_SHIFT; + uint32_t iPageLast = cPages; + for (uint32_t iPage = 0; iPage < cPages; iPage++, pbPage += PAGE_SIZE) + { + uint8_t u8Type; + if (!fLiveSave) + u8Type = ASMMemIsZeroPage(pbPage) ? PGM_STATE_REC_MMIO2_ZERO : PGM_STATE_REC_MMIO2_RAW; + else + { + /* Try figure if it's a clean page, compare the SHA-1 to be really sure. */ + if ( !paLSPages[iPage].fDirty + && !pgmR3ScanMmio2Page(pVM, pbPage, &paLSPages[iPage])) + { + if (paLSPages[iPage].fZero) + continue; + + uint8_t abSha1Hash[RTSHA1_HASH_SIZE]; + RTSha1(pbPage, PAGE_SIZE, abSha1Hash); + if (!memcmp(abSha1Hash, paLSPages[iPage].abSha1Saved, sizeof(abSha1Hash))) + continue; + } + u8Type = paLSPages[iPage].fZero ? PGM_STATE_REC_MMIO2_ZERO : PGM_STATE_REC_MMIO2_RAW; + pVM->pgm.s.LiveSave.cSavedPages++; + } + + if (iPage != 0 && iPage == iPageLast + 1) + rc = SSMR3PutU8(pSSM, u8Type); + else + { + SSMR3PutU8(pSSM, u8Type | PGM_STATE_REC_FLAG_ADDR); + SSMR3PutU8(pSSM, pRegMmio->idSavedState); + rc = SSMR3PutU32(pSSM, iPage); + } + if (u8Type == PGM_STATE_REC_MMIO2_RAW) + rc = SSMR3PutMem(pSSM, pbPage, PAGE_SIZE); + if (RT_FAILURE(rc)) + break; + iPageLast = iPage; + } + } + pgmUnlock(pVM); + } + /* + * Reduce the rate after a little while since the current MMIO2 approach is + * a bit expensive. + * We position it two passes after the scan pass to avoid saving busy pages. + */ + else if ( uPass <= 10 + || (uPass & 3) == 2) + { + pgmLock(pVM); + for (PPGMREGMMIORANGE pRegMmio = pVM->pgm.s.pRegMmioRangesR3; + pRegMmio && RT_SUCCESS(rc); + pRegMmio = pRegMmio->pNextR3) + if (pRegMmio->fFlags & PGMREGMMIORANGE_F_MMIO2) + { + PPGMLIVESAVEMMIO2PAGE paLSPages = pRegMmio->paLSPages; + uint8_t const *pbPage = (uint8_t const *)pRegMmio->RamRange.pvR3; + uint32_t cPages = pRegMmio->RamRange.cb >> PAGE_SHIFT; + uint32_t iPageLast = cPages; + pgmUnlock(pVM); + + for (uint32_t iPage = 0; iPage < cPages; iPage++, pbPage += PAGE_SIZE) + { + /* Skip clean pages and pages which hasn't quiesced. */ + if (!paLSPages[iPage].fDirty) + continue; + if (paLSPages[iPage].cUnchangedScans < 3) + continue; + if (pgmR3ScanMmio2Page(pVM, pbPage, &paLSPages[iPage])) + continue; + + /* Save it. */ + bool const fZero = paLSPages[iPage].fZero; + uint8_t abPage[PAGE_SIZE]; + if (!fZero) + { + memcpy(abPage, pbPage, PAGE_SIZE); + RTSha1(abPage, PAGE_SIZE, paLSPages[iPage].abSha1Saved); + } + + uint8_t u8Type = paLSPages[iPage].fZero ? PGM_STATE_REC_MMIO2_ZERO : PGM_STATE_REC_MMIO2_RAW; + if (iPage != 0 && iPage == iPageLast + 1) + rc = SSMR3PutU8(pSSM, u8Type); + else + { + SSMR3PutU8(pSSM, u8Type | PGM_STATE_REC_FLAG_ADDR); + SSMR3PutU8(pSSM, pRegMmio->idSavedState); + rc = SSMR3PutU32(pSSM, iPage); + } + if (u8Type == PGM_STATE_REC_MMIO2_RAW) + rc = SSMR3PutMem(pSSM, abPage, PAGE_SIZE); + if (RT_FAILURE(rc)) + break; + + /* Housekeeping. */ + paLSPages[iPage].fDirty = false; + pVM->pgm.s.LiveSave.Mmio2.cDirtyPages--; + pVM->pgm.s.LiveSave.Mmio2.cReadyPages++; + if (u8Type == PGM_STATE_REC_MMIO2_ZERO) + pVM->pgm.s.LiveSave.Mmio2.cZeroPages++; + pVM->pgm.s.LiveSave.cSavedPages++; + iPageLast = iPage; + } + + pgmLock(pVM); + } + pgmUnlock(pVM); + } + + return rc; +} + + +/** + * Cleans up MMIO2 pages after a live save. + * + * @param pVM The cross context VM structure. + */ +static void pgmR3DoneMmio2Pages(PVM pVM) +{ + /* + * Free the tracking structures for the MMIO2 pages. + * We do the freeing outside the lock in case the VM is running. + */ + pgmLock(pVM); + for (PPGMREGMMIORANGE pRegMmio = pVM->pgm.s.pRegMmioRangesR3; pRegMmio; pRegMmio = pRegMmio->pNextR3) + if (pRegMmio->fFlags & PGMREGMMIORANGE_F_MMIO2) + { + void *pvMmio2ToFree = pRegMmio->paLSPages; + if (pvMmio2ToFree) + { + pRegMmio->paLSPages = NULL; + pgmUnlock(pVM); + MMR3HeapFree(pvMmio2ToFree); + pgmLock(pVM); + } + } + pgmUnlock(pVM); +} + + +/** + * Prepares the RAM pages for a live save. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +static int pgmR3PrepRamPages(PVM pVM) +{ + + /* + * Try allocating tracking structures for the ram ranges. + * + * To avoid lock contention, we leave the lock every time we're allocating + * a new array. This means we'll have to ditch the allocation and start + * all over again if the RAM range list changes in-between. + * + * Note! pgmR3SaveDone will always be called and it is therefore responsible + * for cleaning up. + */ + PPGMRAMRANGE pCur; + pgmLock(pVM); + do + { + for (pCur = pVM->pgm.s.pRamRangesXR3; pCur; pCur = pCur->pNextR3) + { + if ( !pCur->paLSPages + && !PGM_RAM_RANGE_IS_AD_HOC(pCur)) + { + uint32_t const idRamRangesGen = pVM->pgm.s.idRamRangesGen; + uint32_t const cPages = pCur->cb >> PAGE_SHIFT; + pgmUnlock(pVM); + PPGMLIVESAVERAMPAGE paLSPages = (PPGMLIVESAVERAMPAGE)MMR3HeapAllocZ(pVM, MM_TAG_PGM, cPages * sizeof(PGMLIVESAVERAMPAGE)); + if (!paLSPages) + return VERR_NO_MEMORY; + pgmLock(pVM); + if (pVM->pgm.s.idRamRangesGen != idRamRangesGen) + { + pgmUnlock(pVM); + MMR3HeapFree(paLSPages); + pgmLock(pVM); + break; /* try again */ + } + pCur->paLSPages = paLSPages; + + /* + * Initialize the array. + */ + uint32_t iPage = cPages; + while (iPage-- > 0) + { + /** @todo yield critsect! (after moving this away from EMT0) */ + PCPGMPAGE pPage = &pCur->aPages[iPage]; + paLSPages[iPage].cDirtied = 0; + paLSPages[iPage].fDirty = 1; /* everything is dirty at this time */ + paLSPages[iPage].fWriteMonitored = 0; + paLSPages[iPage].fWriteMonitoredJustNow = 0; + paLSPages[iPage].u2Reserved = 0; + switch (PGM_PAGE_GET_TYPE(pPage)) + { + case PGMPAGETYPE_RAM: + if ( PGM_PAGE_IS_ZERO(pPage) + || PGM_PAGE_IS_BALLOONED(pPage)) + { + paLSPages[iPage].fZero = 1; + paLSPages[iPage].fShared = 0; +#ifdef PGMLIVESAVERAMPAGE_WITH_CRC32 + paLSPages[iPage].u32Crc = PGM_STATE_CRC32_ZERO_PAGE; +#endif + } + else if (PGM_PAGE_IS_SHARED(pPage)) + { + paLSPages[iPage].fZero = 0; + paLSPages[iPage].fShared = 1; +#ifdef PGMLIVESAVERAMPAGE_WITH_CRC32 + paLSPages[iPage].u32Crc = UINT32_MAX; +#endif + } + else + { + paLSPages[iPage].fZero = 0; + paLSPages[iPage].fShared = 0; +#ifdef PGMLIVESAVERAMPAGE_WITH_CRC32 + paLSPages[iPage].u32Crc = UINT32_MAX; +#endif + } + paLSPages[iPage].fIgnore = 0; + pVM->pgm.s.LiveSave.Ram.cDirtyPages++; + break; + + case PGMPAGETYPE_ROM_SHADOW: + case PGMPAGETYPE_ROM: + { + paLSPages[iPage].fZero = 0; + paLSPages[iPage].fShared = 0; + paLSPages[iPage].fDirty = 0; + paLSPages[iPage].fIgnore = 1; +#ifdef PGMLIVESAVERAMPAGE_WITH_CRC32 + paLSPages[iPage].u32Crc = UINT32_MAX; +#endif + pVM->pgm.s.LiveSave.cIgnoredPages++; + break; + } + + default: + AssertMsgFailed(("%R[pgmpage]", pPage)); + RT_FALL_THRU(); + case PGMPAGETYPE_MMIO2: + case PGMPAGETYPE_MMIO2_ALIAS_MMIO: + paLSPages[iPage].fZero = 0; + paLSPages[iPage].fShared = 0; + paLSPages[iPage].fDirty = 0; + paLSPages[iPage].fIgnore = 1; +#ifdef PGMLIVESAVERAMPAGE_WITH_CRC32 + paLSPages[iPage].u32Crc = UINT32_MAX; +#endif + pVM->pgm.s.LiveSave.cIgnoredPages++; + break; + + case PGMPAGETYPE_MMIO: + case PGMPAGETYPE_SPECIAL_ALIAS_MMIO: + paLSPages[iPage].fZero = 0; + paLSPages[iPage].fShared = 0; + paLSPages[iPage].fDirty = 0; + paLSPages[iPage].fIgnore = 1; +#ifdef PGMLIVESAVERAMPAGE_WITH_CRC32 + paLSPages[iPage].u32Crc = UINT32_MAX; +#endif + pVM->pgm.s.LiveSave.cIgnoredPages++; + break; + } + } + } + } + } while (pCur); + pgmUnlock(pVM); + + return VINF_SUCCESS; +} + + +/** + * Saves the RAM configuration. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + */ +static int pgmR3SaveRamConfig(PVM pVM, PSSMHANDLE pSSM) +{ + uint32_t cbRamHole = 0; + int rc = CFGMR3QueryU32Def(CFGMR3GetRoot(pVM), "RamHoleSize", &cbRamHole, MM_RAM_HOLE_SIZE_DEFAULT); + AssertRCReturn(rc, rc); + + uint64_t cbRam = 0; + rc = CFGMR3QueryU64Def(CFGMR3GetRoot(pVM), "RamSize", &cbRam, 0); + AssertRCReturn(rc, rc); + + SSMR3PutU32(pSSM, cbRamHole); + return SSMR3PutU64(pSSM, cbRam); +} + + +/** + * Loads and verifies the RAM configuration. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + */ +static int pgmR3LoadRamConfig(PVM pVM, PSSMHANDLE pSSM) +{ + uint32_t cbRamHoleCfg = 0; + int rc = CFGMR3QueryU32Def(CFGMR3GetRoot(pVM), "RamHoleSize", &cbRamHoleCfg, MM_RAM_HOLE_SIZE_DEFAULT); + AssertRCReturn(rc, rc); + + uint64_t cbRamCfg = 0; + rc = CFGMR3QueryU64Def(CFGMR3GetRoot(pVM), "RamSize", &cbRamCfg, 0); + AssertRCReturn(rc, rc); + + uint32_t cbRamHoleSaved; + SSMR3GetU32(pSSM, &cbRamHoleSaved); + + uint64_t cbRamSaved; + rc = SSMR3GetU64(pSSM, &cbRamSaved); + AssertRCReturn(rc, rc); + + if ( cbRamHoleCfg != cbRamHoleSaved + || cbRamCfg != cbRamSaved) + return SSMR3SetCfgError(pSSM, RT_SRC_POS, N_("Ram config mismatch: saved=%RX64/%RX32 config=%RX64/%RX32 (RAM/Hole)"), + cbRamSaved, cbRamHoleSaved, cbRamCfg, cbRamHoleCfg); + return VINF_SUCCESS; +} + +#ifdef PGMLIVESAVERAMPAGE_WITH_CRC32 + +/** + * Calculates the CRC-32 for a RAM page and updates the live save page tracking + * info with it. + * + * @param pVM The cross context VM structure. + * @param pCur The current RAM range. + * @param paLSPages The current array of live save page tracking + * structures. + * @param iPage The page index. + */ +static void pgmR3StateCalcCrc32ForRamPage(PVM pVM, PPGMRAMRANGE pCur, PPGMLIVESAVERAMPAGE paLSPages, uint32_t iPage) +{ + RTGCPHYS GCPhys = pCur->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT); + PGMPAGEMAPLOCK PgMpLck; + void const *pvPage; + int rc = pgmPhysGCPhys2CCPtrInternalReadOnly(pVM, &pCur->aPages[iPage], GCPhys, &pvPage, &PgMpLck); + if (RT_SUCCESS(rc)) + { + paLSPages[iPage].u32Crc = RTCrc32(pvPage, PAGE_SIZE); + pgmPhysReleaseInternalPageMappingLock(pVM, &PgMpLck); + } + else + paLSPages[iPage].u32Crc = UINT32_MAX; /* Invalid */ +} + + +/** + * Verifies the CRC-32 for a page given it's raw bits. + * + * @param pvPage The page bits. + * @param pCur The current RAM range. + * @param paLSPages The current array of live save page tracking + * structures. + * @param iPage The page index. + */ +static void pgmR3StateVerifyCrc32ForPage(void const *pvPage, PPGMRAMRANGE pCur, PPGMLIVESAVERAMPAGE paLSPages, uint32_t iPage, const char *pszWhere) +{ + if (paLSPages[iPage].u32Crc != UINT32_MAX) + { + uint32_t u32Crc = RTCrc32(pvPage, PAGE_SIZE); + Assert( ( !PGM_PAGE_IS_ZERO(&pCur->aPages[iPage]) + && !PGM_PAGE_IS_BALLOONED(&pCur->aPages[iPage])) + || u32Crc == PGM_STATE_CRC32_ZERO_PAGE); + AssertMsg(paLSPages[iPage].u32Crc == u32Crc, + ("%08x != %08x for %RGp %R[pgmpage] %s\n", paLSPages[iPage].u32Crc, u32Crc, + pCur->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT), &pCur->aPages[iPage], pszWhere)); + } +} + + +/** + * Verifies the CRC-32 for a RAM page. + * + * @param pVM The cross context VM structure. + * @param pCur The current RAM range. + * @param paLSPages The current array of live save page tracking + * structures. + * @param iPage The page index. + */ +static void pgmR3StateVerifyCrc32ForRamPage(PVM pVM, PPGMRAMRANGE pCur, PPGMLIVESAVERAMPAGE paLSPages, uint32_t iPage, const char *pszWhere) +{ + if (paLSPages[iPage].u32Crc != UINT32_MAX) + { + RTGCPHYS GCPhys = pCur->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT); + PGMPAGEMAPLOCK PgMpLck; + void const *pvPage; + int rc = pgmPhysGCPhys2CCPtrInternalReadOnly(pVM, &pCur->aPages[iPage], GCPhys, &pvPage, &PgMpLck); + if (RT_SUCCESS(rc)) + { + pgmR3StateVerifyCrc32ForPage(pvPage, pCur, paLSPages, iPage, pszWhere); + pgmPhysReleaseInternalPageMappingLock(pVM, &PgMpLck); + } + } +} + +#endif /* PGMLIVESAVERAMPAGE_WITH_CRC32 */ + +/** + * Scan for RAM page modifications and reprotect them. + * + * @param pVM The cross context VM structure. + * @param fFinalPass Whether this is the final pass or not. + */ +static void pgmR3ScanRamPages(PVM pVM, bool fFinalPass) +{ + /* + * The RAM. + */ + RTGCPHYS GCPhysCur = 0; + PPGMRAMRANGE pCur; + pgmLock(pVM); + do + { + uint32_t const idRamRangesGen = pVM->pgm.s.idRamRangesGen; + for (pCur = pVM->pgm.s.pRamRangesXR3; pCur; pCur = pCur->pNextR3) + { + if ( pCur->GCPhysLast > GCPhysCur + && !PGM_RAM_RANGE_IS_AD_HOC(pCur)) + { + PPGMLIVESAVERAMPAGE paLSPages = pCur->paLSPages; + uint32_t cPages = pCur->cb >> PAGE_SHIFT; + uint32_t iPage = GCPhysCur <= pCur->GCPhys ? 0 : (GCPhysCur - pCur->GCPhys) >> PAGE_SHIFT; + GCPhysCur = 0; + for (; iPage < cPages; iPage++) + { + /* Do yield first. */ + if ( !fFinalPass +#ifndef PGMLIVESAVERAMPAGE_WITH_CRC32 + && (iPage & 0x7ff) == 0x100 +#endif + && PDMR3CritSectYield(&pVM->pgm.s.CritSectX) + && pVM->pgm.s.idRamRangesGen != idRamRangesGen) + { + GCPhysCur = pCur->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT); + break; /* restart */ + } + + /* Skip already ignored pages. */ + if (paLSPages[iPage].fIgnore) + continue; + + if (RT_LIKELY(PGM_PAGE_GET_TYPE(&pCur->aPages[iPage]) == PGMPAGETYPE_RAM)) + { + /* + * A RAM page. + */ + switch (PGM_PAGE_GET_STATE(&pCur->aPages[iPage])) + { + case PGM_PAGE_STATE_ALLOCATED: + /** @todo Optimize this: Don't always re-enable write + * monitoring if the page is known to be very busy. */ + if (PGM_PAGE_IS_WRITTEN_TO(&pCur->aPages[iPage])) + { + AssertMsg(paLSPages[iPage].fWriteMonitored, + ("%RGp %R[pgmpage]\n", pCur->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT), &pCur->aPages[iPage])); + PGM_PAGE_CLEAR_WRITTEN_TO(pVM, &pCur->aPages[iPage]); + Assert(pVM->pgm.s.cWrittenToPages > 0); + pVM->pgm.s.cWrittenToPages--; + } + else + { + AssertMsg(!paLSPages[iPage].fWriteMonitored, + ("%RGp %R[pgmpage]\n", pCur->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT), &pCur->aPages[iPage])); + pVM->pgm.s.LiveSave.Ram.cMonitoredPages++; + } + + if (!paLSPages[iPage].fDirty) + { + pVM->pgm.s.LiveSave.Ram.cReadyPages--; + if (paLSPages[iPage].fZero) + pVM->pgm.s.LiveSave.Ram.cZeroPages--; + pVM->pgm.s.LiveSave.Ram.cDirtyPages++; + if (++paLSPages[iPage].cDirtied > PGMLIVSAVEPAGE_MAX_DIRTIED) + paLSPages[iPage].cDirtied = PGMLIVSAVEPAGE_MAX_DIRTIED; + } + + pgmPhysPageWriteMonitor(pVM, &pCur->aPages[iPage], + pCur->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT)); + paLSPages[iPage].fWriteMonitored = 1; + paLSPages[iPage].fWriteMonitoredJustNow = 1; + paLSPages[iPage].fDirty = 1; + paLSPages[iPage].fZero = 0; + paLSPages[iPage].fShared = 0; +#ifdef PGMLIVESAVERAMPAGE_WITH_CRC32 + paLSPages[iPage].u32Crc = UINT32_MAX; /* invalid */ +#endif + break; + + case PGM_PAGE_STATE_WRITE_MONITORED: + Assert(paLSPages[iPage].fWriteMonitored); + if (PGM_PAGE_GET_WRITE_LOCKS(&pCur->aPages[iPage]) == 0) + { +#ifdef PGMLIVESAVERAMPAGE_WITH_CRC32 + if (paLSPages[iPage].fWriteMonitoredJustNow) + pgmR3StateCalcCrc32ForRamPage(pVM, pCur, paLSPages, iPage); + else + pgmR3StateVerifyCrc32ForRamPage(pVM, pCur, paLSPages, iPage, "scan"); +#endif + paLSPages[iPage].fWriteMonitoredJustNow = 0; + } + else + { + paLSPages[iPage].fWriteMonitoredJustNow = 1; +#ifdef PGMLIVESAVERAMPAGE_WITH_CRC32 + paLSPages[iPage].u32Crc = UINT32_MAX; /* invalid */ +#endif + if (!paLSPages[iPage].fDirty) + { + pVM->pgm.s.LiveSave.Ram.cReadyPages--; + pVM->pgm.s.LiveSave.Ram.cDirtyPages++; + if (++paLSPages[iPage].cDirtied > PGMLIVSAVEPAGE_MAX_DIRTIED) + paLSPages[iPage].cDirtied = PGMLIVSAVEPAGE_MAX_DIRTIED; + } + } + break; + + case PGM_PAGE_STATE_ZERO: + case PGM_PAGE_STATE_BALLOONED: + if (!paLSPages[iPage].fZero) + { + if (!paLSPages[iPage].fDirty) + { + paLSPages[iPage].fDirty = 1; + pVM->pgm.s.LiveSave.Ram.cReadyPages--; + pVM->pgm.s.LiveSave.Ram.cDirtyPages++; + } + paLSPages[iPage].fZero = 1; + paLSPages[iPage].fShared = 0; +#ifdef PGMLIVESAVERAMPAGE_WITH_CRC32 + paLSPages[iPage].u32Crc = PGM_STATE_CRC32_ZERO_PAGE; +#endif + } + break; + + case PGM_PAGE_STATE_SHARED: + if (!paLSPages[iPage].fShared) + { + if (!paLSPages[iPage].fDirty) + { + paLSPages[iPage].fDirty = 1; + pVM->pgm.s.LiveSave.Ram.cReadyPages--; + if (paLSPages[iPage].fZero) + pVM->pgm.s.LiveSave.Ram.cZeroPages--; + pVM->pgm.s.LiveSave.Ram.cDirtyPages++; + } + paLSPages[iPage].fZero = 0; + paLSPages[iPage].fShared = 1; +#ifdef PGMLIVESAVERAMPAGE_WITH_CRC32 + pgmR3StateCalcCrc32ForRamPage(pVM, pCur, paLSPages, iPage); +#endif + } + break; + } + } + else + { + /* + * All other types => Ignore the page. + */ + Assert(!paLSPages[iPage].fIgnore); /* skipped before switch */ + paLSPages[iPage].fIgnore = 1; + if (paLSPages[iPage].fWriteMonitored) + { + /** @todo this doesn't hold water when we start monitoring MMIO2 and ROM shadow + * pages! */ + if (RT_UNLIKELY(PGM_PAGE_GET_STATE(&pCur->aPages[iPage]) == PGM_PAGE_STATE_WRITE_MONITORED)) + { + AssertMsgFailed(("%R[pgmpage]", &pCur->aPages[iPage])); /* shouldn't happen. */ + PGM_PAGE_SET_STATE(pVM, &pCur->aPages[iPage], PGM_PAGE_STATE_ALLOCATED); + Assert(pVM->pgm.s.cMonitoredPages > 0); + pVM->pgm.s.cMonitoredPages--; + } + if (PGM_PAGE_IS_WRITTEN_TO(&pCur->aPages[iPage])) + { + PGM_PAGE_CLEAR_WRITTEN_TO(pVM, &pCur->aPages[iPage]); + Assert(pVM->pgm.s.cWrittenToPages > 0); + pVM->pgm.s.cWrittenToPages--; + } + pVM->pgm.s.LiveSave.Ram.cMonitoredPages--; + } + + /** @todo the counting doesn't quite work out here. fix later? */ + if (paLSPages[iPage].fDirty) + pVM->pgm.s.LiveSave.Ram.cDirtyPages--; + else + { + pVM->pgm.s.LiveSave.Ram.cReadyPages--; + if (paLSPages[iPage].fZero) + pVM->pgm.s.LiveSave.Ram.cZeroPages--; + } + pVM->pgm.s.LiveSave.cIgnoredPages++; + } + } /* for each page in range */ + + if (GCPhysCur != 0) + break; /* Yield + ramrange change */ + GCPhysCur = pCur->GCPhysLast; + } + } /* for each range */ + } while (pCur); + pgmUnlock(pVM); +} + + +/** + * Save quiescent RAM pages. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM The SSM handle. + * @param fLiveSave Whether it's a live save or not. + * @param uPass The pass number. + */ +static int pgmR3SaveRamPages(PVM pVM, PSSMHANDLE pSSM, bool fLiveSave, uint32_t uPass) +{ + NOREF(fLiveSave); + + /* + * The RAM. + */ + RTGCPHYS GCPhysLast = NIL_RTGCPHYS; + RTGCPHYS GCPhysCur = 0; + PPGMRAMRANGE pCur; + bool fFTMDeltaSaveActive = FTMIsDeltaLoadSaveActive(pVM); + + pgmLock(pVM); + do + { + uint32_t const idRamRangesGen = pVM->pgm.s.idRamRangesGen; + for (pCur = pVM->pgm.s.pRamRangesXR3; pCur; pCur = pCur->pNextR3) + { + if ( pCur->GCPhysLast > GCPhysCur + && !PGM_RAM_RANGE_IS_AD_HOC(pCur)) + { + PPGMLIVESAVERAMPAGE paLSPages = pCur->paLSPages; + uint32_t cPages = pCur->cb >> PAGE_SHIFT; + uint32_t iPage = GCPhysCur <= pCur->GCPhys ? 0 : (GCPhysCur - pCur->GCPhys) >> PAGE_SHIFT; + GCPhysCur = 0; + for (; iPage < cPages; iPage++) + { + /* Do yield first. */ + if ( uPass != SSM_PASS_FINAL + && (iPage & 0x7ff) == 0x100 + && PDMR3CritSectYield(&pVM->pgm.s.CritSectX) + && pVM->pgm.s.idRamRangesGen != idRamRangesGen) + { + GCPhysCur = pCur->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT); + break; /* restart */ + } + + PPGMPAGE pCurPage = &pCur->aPages[iPage]; + + /* + * Only save pages that haven't changed since last scan and are dirty. + */ + if ( uPass != SSM_PASS_FINAL + && paLSPages) + { + if (!paLSPages[iPage].fDirty) + continue; + if (paLSPages[iPage].fWriteMonitoredJustNow) + continue; + if (paLSPages[iPage].fIgnore) + continue; + if (PGM_PAGE_GET_TYPE(pCurPage) != PGMPAGETYPE_RAM) /* in case of recent remappings */ + continue; + if ( PGM_PAGE_GET_STATE(pCurPage) + != ( paLSPages[iPage].fZero + ? PGM_PAGE_STATE_ZERO + : paLSPages[iPage].fShared + ? PGM_PAGE_STATE_SHARED + : PGM_PAGE_STATE_WRITE_MONITORED)) + continue; + if (PGM_PAGE_GET_WRITE_LOCKS(&pCur->aPages[iPage]) > 0) + continue; + } + else + { + if ( paLSPages + && !paLSPages[iPage].fDirty + && !paLSPages[iPage].fIgnore) + { +#ifdef PGMLIVESAVERAMPAGE_WITH_CRC32 + if (PGM_PAGE_GET_TYPE(pCurPage) != PGMPAGETYPE_RAM) + pgmR3StateVerifyCrc32ForRamPage(pVM, pCur, paLSPages, iPage, "save#1"); +#endif + continue; + } + if (PGM_PAGE_GET_TYPE(pCurPage) != PGMPAGETYPE_RAM) + continue; + } + + /* + * Do the saving outside the PGM critsect since SSM may block on I/O. + */ + int rc; + RTGCPHYS GCPhys = pCur->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT); + bool fZero = PGM_PAGE_IS_ZERO(pCurPage); + bool fBallooned = PGM_PAGE_IS_BALLOONED(pCurPage); + bool fSkipped = false; + + if (!fZero && !fBallooned) + { + /* + * Copy the page and then save it outside the lock (since any + * SSM call may block). + */ + uint8_t abPage[PAGE_SIZE]; + PGMPAGEMAPLOCK PgMpLck; + void const *pvPage; + rc = pgmPhysGCPhys2CCPtrInternalReadOnly(pVM, pCurPage, GCPhys, &pvPage, &PgMpLck); + if (RT_SUCCESS(rc)) + { + memcpy(abPage, pvPage, PAGE_SIZE); +#ifdef PGMLIVESAVERAMPAGE_WITH_CRC32 + if (paLSPages) + pgmR3StateVerifyCrc32ForPage(abPage, pCur, paLSPages, iPage, "save#3"); +#endif + pgmPhysReleaseInternalPageMappingLock(pVM, &PgMpLck); + } + pgmUnlock(pVM); + AssertLogRelMsgRCReturn(rc, ("rc=%Rrc GCPhys=%RGp\n", rc, GCPhys), rc); + + /* Try save some memory when restoring. */ + if (!ASMMemIsZeroPage(pvPage)) + { + if (fFTMDeltaSaveActive) + { + if ( PGM_PAGE_IS_WRITTEN_TO(pCurPage) + || PGM_PAGE_IS_FT_DIRTY(pCurPage)) + { + if (GCPhys == GCPhysLast + PAGE_SIZE) + SSMR3PutU8(pSSM, PGM_STATE_REC_RAM_RAW); + else + { + SSMR3PutU8(pSSM, PGM_STATE_REC_RAM_RAW | PGM_STATE_REC_FLAG_ADDR); + SSMR3PutGCPhys(pSSM, GCPhys); + } + rc = SSMR3PutMem(pSSM, abPage, PAGE_SIZE); + PGM_PAGE_CLEAR_WRITTEN_TO(pVM, pCurPage); + PGM_PAGE_CLEAR_FT_DIRTY(pCurPage); + } + /* else nothing changed, so skip it. */ + else + fSkipped = true; + } + else + { + if (GCPhys == GCPhysLast + PAGE_SIZE) + SSMR3PutU8(pSSM, PGM_STATE_REC_RAM_RAW); + else + { + SSMR3PutU8(pSSM, PGM_STATE_REC_RAM_RAW | PGM_STATE_REC_FLAG_ADDR); + SSMR3PutGCPhys(pSSM, GCPhys); + } + rc = SSMR3PutMem(pSSM, abPage, PAGE_SIZE); + } + } + else + { + if (GCPhys == GCPhysLast + PAGE_SIZE) + rc = SSMR3PutU8(pSSM, PGM_STATE_REC_RAM_ZERO); + else + { + SSMR3PutU8(pSSM, PGM_STATE_REC_RAM_ZERO | PGM_STATE_REC_FLAG_ADDR); + rc = SSMR3PutGCPhys(pSSM, GCPhys); + } + } + } + else + { + /* + * Dirty zero or ballooned page. + */ +#ifdef PGMLIVESAVERAMPAGE_WITH_CRC32 + if (paLSPages) + pgmR3StateVerifyCrc32ForRamPage(pVM, pCur, paLSPages, iPage, "save#2"); +#endif + pgmUnlock(pVM); + + uint8_t u8RecType = fBallooned ? PGM_STATE_REC_RAM_BALLOONED : PGM_STATE_REC_RAM_ZERO; + if (GCPhys == GCPhysLast + PAGE_SIZE) + rc = SSMR3PutU8(pSSM, u8RecType); + else + { + SSMR3PutU8(pSSM, u8RecType | PGM_STATE_REC_FLAG_ADDR); + rc = SSMR3PutGCPhys(pSSM, GCPhys); + } + } + if (RT_FAILURE(rc)) + return rc; + + pgmLock(pVM); + if (!fSkipped) + GCPhysLast = GCPhys; + if (paLSPages) + { + paLSPages[iPage].fDirty = 0; + pVM->pgm.s.LiveSave.Ram.cReadyPages++; + if (fZero) + pVM->pgm.s.LiveSave.Ram.cZeroPages++; + pVM->pgm.s.LiveSave.Ram.cDirtyPages--; + pVM->pgm.s.LiveSave.cSavedPages++; + } + if (idRamRangesGen != pVM->pgm.s.idRamRangesGen) + { + GCPhysCur = GCPhys | PAGE_OFFSET_MASK; + break; /* restart */ + } + + } /* for each page in range */ + + if (GCPhysCur != 0) + break; /* Yield + ramrange change */ + GCPhysCur = pCur->GCPhysLast; + } + } /* for each range */ + } while (pCur); + + pgmUnlock(pVM); + + return VINF_SUCCESS; +} + + +/** + * Cleans up RAM pages after a live save. + * + * @param pVM The cross context VM structure. + */ +static void pgmR3DoneRamPages(PVM pVM) +{ + /* + * Free the tracking arrays and disable write monitoring. + * + * Play nice with the PGM lock in case we're called while the VM is still + * running. This means we have to delay the freeing since we wish to use + * paLSPages as an indicator of which RAM ranges which we need to scan for + * write monitored pages. + */ + void *pvToFree = NULL; + PPGMRAMRANGE pCur; + uint32_t cMonitoredPages = 0; + pgmLock(pVM); + do + { + for (pCur = pVM->pgm.s.pRamRangesXR3; pCur; pCur = pCur->pNextR3) + { + if (pCur->paLSPages) + { + if (pvToFree) + { + uint32_t idRamRangesGen = pVM->pgm.s.idRamRangesGen; + pgmUnlock(pVM); + MMR3HeapFree(pvToFree); + pvToFree = NULL; + pgmLock(pVM); + if (idRamRangesGen != pVM->pgm.s.idRamRangesGen) + break; /* start over again. */ + } + + pvToFree = pCur->paLSPages; + pCur->paLSPages = NULL; + + uint32_t iPage = pCur->cb >> PAGE_SHIFT; + while (iPage--) + { + PPGMPAGE pPage = &pCur->aPages[iPage]; + PGM_PAGE_CLEAR_WRITTEN_TO(pVM, pPage); + if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED) + { + PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ALLOCATED); + cMonitoredPages++; + } + } + } + } + } while (pCur); + + Assert(pVM->pgm.s.cMonitoredPages >= cMonitoredPages); + if (pVM->pgm.s.cMonitoredPages < cMonitoredPages) + pVM->pgm.s.cMonitoredPages = 0; + else + pVM->pgm.s.cMonitoredPages -= cMonitoredPages; + + pgmUnlock(pVM); + + MMR3HeapFree(pvToFree); + pvToFree = NULL; +} + + +/** + * @callback_method_impl{FNSSMINTLIVEEXEC} + */ +static DECLCALLBACK(int) pgmR3LiveExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uPass) +{ + int rc; + + /* + * Save the MMIO2 and ROM range IDs in pass 0. + */ + if (uPass == 0) + { + rc = pgmR3SaveRamConfig(pVM, pSSM); + if (RT_FAILURE(rc)) + return rc; + rc = pgmR3SaveRomRanges(pVM, pSSM); + if (RT_FAILURE(rc)) + return rc; + rc = pgmR3SaveMmio2Ranges(pVM, pSSM); + if (RT_FAILURE(rc)) + return rc; + } + /* + * Reset the page-per-second estimate to avoid inflation by the initial + * load of zero pages. pgmR3LiveVote ASSUMES this is done at pass 7. + */ + else if (uPass == 7) + { + pVM->pgm.s.LiveSave.cSavedPages = 0; + pVM->pgm.s.LiveSave.uSaveStartNS = RTTimeNanoTS(); + } + + /* + * Do the scanning. + */ + pgmR3ScanRomPages(pVM); + pgmR3ScanMmio2Pages(pVM, uPass); + pgmR3ScanRamPages(pVM, false /*fFinalPass*/); + pgmR3PoolClearAll(pVM, true /*fFlushRemTlb*/); /** @todo this could perhaps be optimized a bit. */ + + /* + * Save the pages. + */ + if (uPass == 0) + rc = pgmR3SaveRomVirginPages( pVM, pSSM, true /*fLiveSave*/); + else + rc = VINF_SUCCESS; + if (RT_SUCCESS(rc)) + rc = pgmR3SaveShadowedRomPages(pVM, pSSM, true /*fLiveSave*/, false /*fFinalPass*/); + if (RT_SUCCESS(rc)) + rc = pgmR3SaveMmio2Pages( pVM, pSSM, true /*fLiveSave*/, uPass); + if (RT_SUCCESS(rc)) + rc = pgmR3SaveRamPages( pVM, pSSM, true /*fLiveSave*/, uPass); + SSMR3PutU8(pSSM, PGM_STATE_REC_END); /* (Ignore the rc, SSM takes care of it.) */ + + return rc; +} + + +/** + * @callback_method_impl{FNSSMINTLIVEVOTE} + */ +static DECLCALLBACK(int) pgmR3LiveVote(PVM pVM, PSSMHANDLE pSSM, uint32_t uPass) +{ + /* + * Update and calculate parameters used in the decision making. + */ + const uint32_t cHistoryEntries = RT_ELEMENTS(pVM->pgm.s.LiveSave.acDirtyPagesHistory); + + /* update history. */ + pgmLock(pVM); + uint32_t const cWrittenToPages = pVM->pgm.s.cWrittenToPages; + pgmUnlock(pVM); + uint32_t const cDirtyNow = pVM->pgm.s.LiveSave.Rom.cDirtyPages + + pVM->pgm.s.LiveSave.Mmio2.cDirtyPages + + pVM->pgm.s.LiveSave.Ram.cDirtyPages + + cWrittenToPages; + uint32_t i = pVM->pgm.s.LiveSave.iDirtyPagesHistory; + pVM->pgm.s.LiveSave.acDirtyPagesHistory[i] = cDirtyNow; + pVM->pgm.s.LiveSave.iDirtyPagesHistory = (i + 1) % cHistoryEntries; + + /* calc shortterm average (4 passes). */ + AssertCompile(RT_ELEMENTS(pVM->pgm.s.LiveSave.acDirtyPagesHistory) > 4); + uint64_t cTotal = pVM->pgm.s.LiveSave.acDirtyPagesHistory[i]; + cTotal += pVM->pgm.s.LiveSave.acDirtyPagesHistory[(i + cHistoryEntries - 1) % cHistoryEntries]; + cTotal += pVM->pgm.s.LiveSave.acDirtyPagesHistory[(i + cHistoryEntries - 2) % cHistoryEntries]; + cTotal += pVM->pgm.s.LiveSave.acDirtyPagesHistory[(i + cHistoryEntries - 3) % cHistoryEntries]; + uint32_t const cDirtyPagesShort = cTotal / 4; + pVM->pgm.s.LiveSave.cDirtyPagesShort = cDirtyPagesShort; + + /* calc longterm average. */ + cTotal = 0; + if (uPass < cHistoryEntries) + for (i = 0; i < cHistoryEntries && i <= uPass; i++) + cTotal += pVM->pgm.s.LiveSave.acDirtyPagesHistory[i]; + else + for (i = 0; i < cHistoryEntries; i++) + cTotal += pVM->pgm.s.LiveSave.acDirtyPagesHistory[i]; + uint32_t const cDirtyPagesLong = cTotal / cHistoryEntries; + pVM->pgm.s.LiveSave.cDirtyPagesLong = cDirtyPagesLong; + + /* estimate the speed */ + uint64_t cNsElapsed = RTTimeNanoTS() - pVM->pgm.s.LiveSave.uSaveStartNS; + uint32_t cPagesPerSecond = (uint32_t)( pVM->pgm.s.LiveSave.cSavedPages + / ((long double)cNsElapsed / 1000000000.0) ); + pVM->pgm.s.LiveSave.cPagesPerSecond = cPagesPerSecond; + + /* + * Try make a decision. + */ + if ( cDirtyPagesShort <= cDirtyPagesLong + && ( cDirtyNow <= cDirtyPagesShort + || cDirtyNow - cDirtyPagesShort < RT_MIN(cDirtyPagesShort / 8, 16) + ) + ) + { + if (uPass > 10) + { + uint32_t cMsLeftShort = (uint32_t)(cDirtyPagesShort / (long double)cPagesPerSecond * 1000.0); + uint32_t cMsLeftLong = (uint32_t)(cDirtyPagesLong / (long double)cPagesPerSecond * 1000.0); + uint32_t cMsMaxDowntime = SSMR3HandleMaxDowntime(pSSM); + if (cMsMaxDowntime < 32) + cMsMaxDowntime = 32; + if ( ( cMsLeftLong <= cMsMaxDowntime + && cMsLeftShort < cMsMaxDowntime) + || cMsLeftShort < cMsMaxDowntime / 2 + ) + { + Log(("pgmR3LiveVote: VINF_SUCCESS - pass=%d cDirtyPagesShort=%u|%ums cDirtyPagesLong=%u|%ums cMsMaxDowntime=%u\n", + uPass, cDirtyPagesShort, cMsLeftShort, cDirtyPagesLong, cMsLeftLong, cMsMaxDowntime)); + return VINF_SUCCESS; + } + } + else + { + if ( ( cDirtyPagesShort <= 128 + && cDirtyPagesLong <= 1024) + || cDirtyPagesLong <= 256 + ) + { + Log(("pgmR3LiveVote: VINF_SUCCESS - pass=%d cDirtyPagesShort=%u cDirtyPagesLong=%u\n", uPass, cDirtyPagesShort, cDirtyPagesLong)); + return VINF_SUCCESS; + } + } + } + + /* + * Come up with a completion percentage. Currently this is a simple + * dirty page (long term) vs. total pages ratio + some pass trickery. + */ + unsigned uPctDirty = (unsigned)( (long double)cDirtyPagesLong + / (pVM->pgm.s.cAllPages - pVM->pgm.s.LiveSave.cIgnoredPages - pVM->pgm.s.cZeroPages) ); + if (uPctDirty <= 100) + SSMR3HandleReportLivePercent(pSSM, RT_MIN(100 - uPctDirty, uPass * 2)); + else + AssertMsgFailed(("uPctDirty=%u cDirtyPagesLong=%#x cAllPages=%#x cIgnoredPages=%#x cZeroPages=%#x\n", + uPctDirty, cDirtyPagesLong, pVM->pgm.s.cAllPages, pVM->pgm.s.LiveSave.cIgnoredPages, pVM->pgm.s.cZeroPages)); + + return VINF_SSM_VOTE_FOR_ANOTHER_PASS; +} + + +/** + * @callback_method_impl{FNSSMINTLIVEPREP} + * + * This will attempt to allocate and initialize the tracking structures. It + * will also prepare for write monitoring of pages and initialize PGM::LiveSave. + * pgmR3SaveDone will do the cleanups. + */ +static DECLCALLBACK(int) pgmR3LivePrep(PVM pVM, PSSMHANDLE pSSM) +{ + /* + * Indicate that we will be using the write monitoring. + */ + pgmLock(pVM); + /** @todo find a way of mediating this when more users are added. */ + if (pVM->pgm.s.fPhysWriteMonitoringEngaged) + { + pgmUnlock(pVM); + AssertLogRelFailedReturn(VERR_PGM_WRITE_MONITOR_ENGAGED); + } + pVM->pgm.s.fPhysWriteMonitoringEngaged = true; + pgmUnlock(pVM); + + /* + * Initialize the statistics. + */ + pVM->pgm.s.LiveSave.Rom.cReadyPages = 0; + pVM->pgm.s.LiveSave.Rom.cDirtyPages = 0; + pVM->pgm.s.LiveSave.Mmio2.cReadyPages = 0; + pVM->pgm.s.LiveSave.Mmio2.cDirtyPages = 0; + pVM->pgm.s.LiveSave.Ram.cReadyPages = 0; + pVM->pgm.s.LiveSave.Ram.cDirtyPages = 0; + pVM->pgm.s.LiveSave.cIgnoredPages = 0; + pVM->pgm.s.LiveSave.fActive = true; + for (unsigned i = 0; i < RT_ELEMENTS(pVM->pgm.s.LiveSave.acDirtyPagesHistory); i++) + pVM->pgm.s.LiveSave.acDirtyPagesHistory[i] = UINT32_MAX / 2; + pVM->pgm.s.LiveSave.iDirtyPagesHistory = 0; + pVM->pgm.s.LiveSave.cSavedPages = 0; + pVM->pgm.s.LiveSave.uSaveStartNS = RTTimeNanoTS(); + pVM->pgm.s.LiveSave.cPagesPerSecond = 8192; + + /* + * Per page type. + */ + int rc = pgmR3PrepRomPages(pVM); + if (RT_SUCCESS(rc)) + rc = pgmR3PrepMmio2Pages(pVM); + if (RT_SUCCESS(rc)) + rc = pgmR3PrepRamPages(pVM); + + NOREF(pSSM); + return rc; +} + + +/** + * @callback_method_impl{FNSSMINTSAVEEXEC} + */ +static DECLCALLBACK(int) pgmR3SaveExec(PVM pVM, PSSMHANDLE pSSM) +{ + int rc = VINF_SUCCESS; + PPGM pPGM = &pVM->pgm.s; + + /* + * Lock PGM and set the no-more-writes indicator. + */ + pgmLock(pVM); + pVM->pgm.s.fNoMorePhysWrites = true; + + /* + * Save basic data (required / unaffected by relocation). + */ + bool const fMappingsFixed = pVM->pgm.s.fMappingsFixed; + pVM->pgm.s.fMappingsFixed |= pVM->pgm.s.fMappingsFixedRestored; + SSMR3PutStruct(pSSM, pPGM, &s_aPGMFields[0]); + pVM->pgm.s.fMappingsFixed = fMappingsFixed; + + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + rc = SSMR3PutStruct(pSSM, &pVM->aCpus[idCpu].pgm.s, &s_aPGMCpuFields[0]); + + /* + * Save the (remainder of the) memory. + */ + if (RT_SUCCESS(rc)) + { + if (pVM->pgm.s.LiveSave.fActive) + { + pgmR3ScanRomPages(pVM); + pgmR3ScanMmio2Pages(pVM, SSM_PASS_FINAL); + pgmR3ScanRamPages(pVM, true /*fFinalPass*/); + + rc = pgmR3SaveShadowedRomPages( pVM, pSSM, true /*fLiveSave*/, true /*fFinalPass*/); + if (RT_SUCCESS(rc)) + rc = pgmR3SaveMmio2Pages( pVM, pSSM, true /*fLiveSave*/, SSM_PASS_FINAL); + if (RT_SUCCESS(rc)) + rc = pgmR3SaveRamPages( pVM, pSSM, true /*fLiveSave*/, SSM_PASS_FINAL); + } + else + { + rc = pgmR3SaveRamConfig(pVM, pSSM); + if (RT_SUCCESS(rc)) + rc = pgmR3SaveRomRanges(pVM, pSSM); + if (RT_SUCCESS(rc)) + rc = pgmR3SaveMmio2Ranges(pVM, pSSM); + if (RT_SUCCESS(rc)) + rc = pgmR3SaveRomVirginPages( pVM, pSSM, false /*fLiveSave*/); + if (RT_SUCCESS(rc)) + rc = pgmR3SaveShadowedRomPages(pVM, pSSM, false /*fLiveSave*/, true /*fFinalPass*/); + if (RT_SUCCESS(rc)) + rc = pgmR3SaveMmio2Pages( pVM, pSSM, false /*fLiveSave*/, SSM_PASS_FINAL); + if (RT_SUCCESS(rc)) + rc = pgmR3SaveRamPages( pVM, pSSM, false /*fLiveSave*/, SSM_PASS_FINAL); + } + SSMR3PutU8(pSSM, PGM_STATE_REC_END); /* (Ignore the rc, SSM takes of it.) */ + } + + pgmUnlock(pVM); + return rc; +} + + +/** + * @callback_method_impl{FNSSMINTSAVEDONE} + */ +static DECLCALLBACK(int) pgmR3SaveDone(PVM pVM, PSSMHANDLE pSSM) +{ + /* + * Do per page type cleanups first. + */ + if (pVM->pgm.s.LiveSave.fActive) + { + pgmR3DoneRomPages(pVM); + pgmR3DoneMmio2Pages(pVM); + pgmR3DoneRamPages(pVM); + } + + /* + * Clear the live save indicator and disengage write monitoring. + */ + pgmLock(pVM); + pVM->pgm.s.LiveSave.fActive = false; + /** @todo this is blindly assuming that we're the only user of write + * monitoring. Fix this when more users are added. */ + pVM->pgm.s.fPhysWriteMonitoringEngaged = false; + pgmUnlock(pVM); + + NOREF(pSSM); + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{FNSSMINTLOADPREP} + */ +static DECLCALLBACK(int) pgmR3LoadPrep(PVM pVM, PSSMHANDLE pSSM) +{ + /* + * Call the reset function to make sure all the memory is cleared. + */ + PGMR3Reset(pVM); + pVM->pgm.s.LiveSave.fActive = false; + NOREF(pSSM); + return VINF_SUCCESS; +} + + +/** + * Load an ignored page. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + */ +static int pgmR3LoadPageToDevNullOld(PSSMHANDLE pSSM) +{ + uint8_t abPage[PAGE_SIZE]; + return SSMR3GetMem(pSSM, &abPage[0], sizeof(abPage)); +} + + +/** + * Compares a page with an old save type value. + * + * @returns true if equal, false if not. + * @param pPage The page to compare. + * @param uOldType The old type value from the saved state. + */ +DECLINLINE(bool) pgmR3CompareNewAndOldPageTypes(PPGMPAGE pPage, uint8_t uOldType) +{ + uint8_t uOldPageType; + switch (PGM_PAGE_GET_TYPE(pPage)) + { + case PGMPAGETYPE_INVALID: uOldPageType = PGMPAGETYPE_OLD_INVALID; break; + case PGMPAGETYPE_RAM: uOldPageType = PGMPAGETYPE_OLD_RAM; break; + case PGMPAGETYPE_MMIO2: uOldPageType = PGMPAGETYPE_OLD_MMIO2; break; + case PGMPAGETYPE_MMIO2_ALIAS_MMIO: uOldPageType = PGMPAGETYPE_OLD_MMIO2_ALIAS_MMIO; break; + case PGMPAGETYPE_ROM_SHADOW: uOldPageType = PGMPAGETYPE_OLD_ROM_SHADOW; break; + case PGMPAGETYPE_ROM: uOldPageType = PGMPAGETYPE_OLD_ROM; break; + case PGMPAGETYPE_SPECIAL_ALIAS_MMIO: RT_FALL_THRU(); + case PGMPAGETYPE_MMIO: uOldPageType = PGMPAGETYPE_OLD_MMIO; break; + default: + AssertFailed(); + uOldPageType = PGMPAGETYPE_OLD_INVALID; + break; + } + return uOldPageType == uOldType; +} + + +/** + * Loads a page without any bits in the saved state, i.e. making sure it's + * really zero. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param uOldType The page type or PGMPAGETYPE_OLD_INVALID (old saved + * state). + * @param pPage The guest page tracking structure. + * @param GCPhys The page address. + * @param pRam The ram range (logging). + */ +static int pgmR3LoadPageZeroOld(PVM pVM, uint8_t uOldType, PPGMPAGE pPage, RTGCPHYS GCPhys, PPGMRAMRANGE pRam) +{ + if ( uOldType != PGMPAGETYPE_OLD_INVALID + && !pgmR3CompareNewAndOldPageTypes(pPage, uOldType)) + return VERR_SSM_UNEXPECTED_DATA; + + /* I think this should be sufficient. */ + if ( !PGM_PAGE_IS_ZERO(pPage) + && !PGM_PAGE_IS_BALLOONED(pPage)) + return VERR_SSM_UNEXPECTED_DATA; + + NOREF(pVM); + NOREF(GCPhys); + NOREF(pRam); + return VINF_SUCCESS; +} + + +/** + * Loads a page from the saved state. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM The SSM handle. + * @param uOldType The page type or PGMPAGETYPE_OLD_INVALID (old saved + * state). + * @param pPage The guest page tracking structure. + * @param GCPhys The page address. + * @param pRam The ram range (logging). + */ +static int pgmR3LoadPageBitsOld(PVM pVM, PSSMHANDLE pSSM, uint8_t uOldType, PPGMPAGE pPage, RTGCPHYS GCPhys, PPGMRAMRANGE pRam) +{ + /* + * Match up the type, dealing with MMIO2 aliases (dropped). + */ + AssertLogRelMsgReturn( uOldType == PGMPAGETYPE_INVALID + || pgmR3CompareNewAndOldPageTypes(pPage, uOldType) + /* kudge for the expanded PXE bios (r67885) - @bugref{5687}: */ + || ( uOldType == PGMPAGETYPE_OLD_RAM + && GCPhys >= 0xed000 + && GCPhys <= 0xeffff + && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_ROM) + , + ("pPage=%R[pgmpage] GCPhys=%#x %s\n", pPage, GCPhys, pRam->pszDesc), + VERR_SSM_UNEXPECTED_DATA); + + /* + * Load the page. + */ + PGMPAGEMAPLOCK PgMpLck; + void *pvPage; + int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhys, &pvPage, &PgMpLck); + if (RT_SUCCESS(rc)) + { + rc = SSMR3GetMem(pSSM, pvPage, PAGE_SIZE); + pgmPhysReleaseInternalPageMappingLock(pVM, &PgMpLck); + } + + return rc; +} + + +/** + * Loads a page (counter part to pgmR3SavePage). + * + * @returns VBox status code, fully bitched errors. + * @param pVM The cross context VM structure. + * @param pSSM The SSM handle. + * @param uOldType The page type. + * @param pPage The page. + * @param GCPhys The page address. + * @param pRam The RAM range (for error messages). + */ +static int pgmR3LoadPageOld(PVM pVM, PSSMHANDLE pSSM, uint8_t uOldType, PPGMPAGE pPage, RTGCPHYS GCPhys, PPGMRAMRANGE pRam) +{ + uint8_t uState; + int rc = SSMR3GetU8(pSSM, &uState); + AssertLogRelMsgRCReturn(rc, ("pPage=%R[pgmpage] GCPhys=%#x %s rc=%Rrc\n", pPage, GCPhys, pRam->pszDesc, rc), rc); + if (uState == 0 /* zero */) + rc = pgmR3LoadPageZeroOld(pVM, uOldType, pPage, GCPhys, pRam); + else if (uState == 1) + rc = pgmR3LoadPageBitsOld(pVM, pSSM, uOldType, pPage, GCPhys, pRam); + else + rc = VERR_PGM_INVALID_SAVED_PAGE_STATE; + AssertLogRelMsgRCReturn(rc, ("pPage=%R[pgmpage] uState=%d uOldType=%d GCPhys=%RGp %s rc=%Rrc\n", + pPage, uState, uOldType, GCPhys, pRam->pszDesc, rc), + rc); + return VINF_SUCCESS; +} + + +/** + * Loads a shadowed ROM page. + * + * @returns VBox status code, errors are fully bitched. + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + * @param pPage The page. + * @param GCPhys The page address. + * @param pRam The RAM range (for error messages). + */ +static int pgmR3LoadShadowedRomPageOld(PVM pVM, PSSMHANDLE pSSM, PPGMPAGE pPage, RTGCPHYS GCPhys, PPGMRAMRANGE pRam) +{ + /* + * Load and set the protection first, then load the two pages, the first + * one is the active the other is the passive. + */ + PPGMROMPAGE pRomPage = pgmR3GetRomPage(pVM, GCPhys); + AssertLogRelMsgReturn(pRomPage, ("GCPhys=%RGp %s\n", GCPhys, pRam->pszDesc), VERR_PGM_SAVED_ROM_PAGE_NOT_FOUND); + + uint8_t uProt; + int rc = SSMR3GetU8(pSSM, &uProt); + AssertLogRelMsgRCReturn(rc, ("pPage=%R[pgmpage] GCPhys=%#x %s\n", pPage, GCPhys, pRam->pszDesc), rc); + PGMROMPROT enmProt = (PGMROMPROT)uProt; + AssertLogRelMsgReturn( enmProt >= PGMROMPROT_INVALID + && enmProt < PGMROMPROT_END, + ("enmProt=%d pPage=%R[pgmpage] GCPhys=%#x %s\n", enmProt, pPage, GCPhys, pRam->pszDesc), + VERR_SSM_UNEXPECTED_DATA); + + if (pRomPage->enmProt != enmProt) + { + rc = PGMR3PhysRomProtect(pVM, GCPhys, PAGE_SIZE, enmProt); + AssertLogRelRCReturn(rc, rc); + AssertLogRelReturn(pRomPage->enmProt == enmProt, VERR_PGM_SAVED_ROM_PAGE_PROT); + } + + PPGMPAGE pPageActive = PGMROMPROT_IS_ROM(enmProt) ? &pRomPage->Virgin : &pRomPage->Shadow; + PPGMPAGE pPagePassive = PGMROMPROT_IS_ROM(enmProt) ? &pRomPage->Shadow : &pRomPage->Virgin; + uint8_t u8ActiveType = PGMROMPROT_IS_ROM(enmProt) ? PGMPAGETYPE_ROM : PGMPAGETYPE_ROM_SHADOW; + uint8_t u8PassiveType= PGMROMPROT_IS_ROM(enmProt) ? PGMPAGETYPE_ROM_SHADOW : PGMPAGETYPE_ROM; + + /** @todo this isn't entirely correct as long as pgmPhysGCPhys2CCPtrInternal is + * used down the line (will the 2nd page will be written to the first + * one because of a false TLB hit since the TLB is using GCPhys and + * doesn't check the HCPhys of the desired page). */ + rc = pgmR3LoadPageOld(pVM, pSSM, u8ActiveType, pPage, GCPhys, pRam); + if (RT_SUCCESS(rc)) + { + *pPageActive = *pPage; + rc = pgmR3LoadPageOld(pVM, pSSM, u8PassiveType, pPagePassive, GCPhys, pRam); + } + return rc; +} + +/** + * Ram range flags and bits for older versions of the saved state. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pSSM The SSM handle. + * @param uVersion The saved state version. + */ +static int pgmR3LoadMemoryOld(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion) +{ + PPGM pPGM = &pVM->pgm.s; + + /* + * Ram range flags and bits. + */ + uint32_t i = 0; + for (PPGMRAMRANGE pRam = pPGM->pRamRangesXR3; ; pRam = pRam->pNextR3, i++) + { + /* Check the sequence number / separator. */ + uint32_t u32Sep; + int rc = SSMR3GetU32(pSSM, &u32Sep); + if (RT_FAILURE(rc)) + return rc; + if (u32Sep == ~0U) + break; + if (u32Sep != i) + { + AssertMsgFailed(("u32Sep=%#x (last)\n", u32Sep)); + return VERR_SSM_DATA_UNIT_FORMAT_CHANGED; + } + AssertLogRelReturn(pRam, VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + + /* Get the range details. */ + RTGCPHYS GCPhys; + SSMR3GetGCPhys(pSSM, &GCPhys); + RTGCPHYS GCPhysLast; + SSMR3GetGCPhys(pSSM, &GCPhysLast); + RTGCPHYS cb; + SSMR3GetGCPhys(pSSM, &cb); + uint8_t fHaveBits; + rc = SSMR3GetU8(pSSM, &fHaveBits); + if (RT_FAILURE(rc)) + return rc; + if (fHaveBits & ~1) + { + AssertMsgFailed(("u32Sep=%#x (last)\n", u32Sep)); + return VERR_SSM_DATA_UNIT_FORMAT_CHANGED; + } + size_t cchDesc = 0; + char szDesc[256]; + szDesc[0] = '\0'; + if (uVersion >= PGM_SAVED_STATE_VERSION_RR_DESC) + { + rc = SSMR3GetStrZ(pSSM, szDesc, sizeof(szDesc)); + if (RT_FAILURE(rc)) + return rc; + /* Since we've modified the description strings in r45878, only compare + them if the saved state is more recent. */ + if (uVersion != PGM_SAVED_STATE_VERSION_RR_DESC) + cchDesc = strlen(szDesc); + } + + /* + * Match it up with the current range. + * + * Note there is a hack for dealing with the high BIOS mapping + * in the old saved state format, this means we might not have + * a 1:1 match on success. + */ + if ( ( GCPhys != pRam->GCPhys + || GCPhysLast != pRam->GCPhysLast + || cb != pRam->cb + || ( cchDesc + && strcmp(szDesc, pRam->pszDesc)) ) + /* Hack for PDMDevHlpPhysReserve(pDevIns, 0xfff80000, 0x80000, "High ROM Region"); */ + && ( uVersion != PGM_SAVED_STATE_VERSION_OLD_PHYS_CODE + || GCPhys != UINT32_C(0xfff80000) + || GCPhysLast != UINT32_C(0xffffffff) + || pRam->GCPhysLast != GCPhysLast + || pRam->GCPhys < GCPhys + || !fHaveBits) + ) + { + LogRel(("Ram range: %RGp-%RGp %RGp bytes %s %s\n" + "State : %RGp-%RGp %RGp bytes %s %s\n", + pRam->GCPhys, pRam->GCPhysLast, pRam->cb, pRam->pvR3 ? "bits" : "nobits", pRam->pszDesc, + GCPhys, GCPhysLast, cb, fHaveBits ? "bits" : "nobits", szDesc)); + /* + * If we're loading a state for debugging purpose, don't make a fuss if + * the MMIO and ROM stuff isn't 100% right, just skip the mismatches. + */ + if ( SSMR3HandleGetAfter(pSSM) != SSMAFTER_DEBUG_IT + || GCPhys < 8 * _1M) + return SSMR3SetCfgError(pSSM, RT_SRC_POS, + N_("RAM range mismatch; saved={%RGp-%RGp %RGp bytes %s %s} config={%RGp-%RGp %RGp bytes %s %s}"), + GCPhys, GCPhysLast, cb, fHaveBits ? "bits" : "nobits", szDesc, + pRam->GCPhys, pRam->GCPhysLast, pRam->cb, pRam->pvR3 ? "bits" : "nobits", pRam->pszDesc); + + AssertMsgFailed(("debug skipping not implemented, sorry\n")); + continue; + } + + uint32_t cPages = (GCPhysLast - GCPhys + 1) >> PAGE_SHIFT; + if (uVersion >= PGM_SAVED_STATE_VERSION_RR_DESC) + { + /* + * Load the pages one by one. + */ + for (uint32_t iPage = 0; iPage < cPages; iPage++) + { + RTGCPHYS const GCPhysPage = ((RTGCPHYS)iPage << PAGE_SHIFT) + pRam->GCPhys; + PPGMPAGE pPage = &pRam->aPages[iPage]; + uint8_t uOldType; + rc = SSMR3GetU8(pSSM, &uOldType); + AssertLogRelMsgRCReturn(rc, ("pPage=%R[pgmpage] iPage=%#x GCPhysPage=%#x %s\n", pPage, iPage, GCPhysPage, pRam->pszDesc), rc); + if (uOldType == PGMPAGETYPE_OLD_ROM_SHADOW) + rc = pgmR3LoadShadowedRomPageOld(pVM, pSSM, pPage, GCPhysPage, pRam); + else + rc = pgmR3LoadPageOld(pVM, pSSM, uOldType, pPage, GCPhysPage, pRam); + AssertLogRelMsgRCReturn(rc, ("rc=%Rrc iPage=%#x GCPhysPage=%#x %s\n", rc, iPage, GCPhysPage, pRam->pszDesc), rc); + } + } + else + { + /* + * Old format. + */ + + /* Of the page flags, pick up MMIO2 and ROM/RESERVED for the !fHaveBits case. + The rest is generally irrelevant and wrong since the stuff have to match registrations. */ + uint32_t fFlags = 0; + for (uint32_t iPage = 0; iPage < cPages; iPage++) + { + uint16_t u16Flags; + rc = SSMR3GetU16(pSSM, &u16Flags); + AssertLogRelMsgRCReturn(rc, ("rc=%Rrc iPage=%#x GCPhys=%#x %s\n", rc, iPage, pRam->GCPhys, pRam->pszDesc), rc); + fFlags |= u16Flags; + } + + /* Load the bits */ + if ( !fHaveBits + && GCPhysLast < UINT32_C(0xe0000000)) + { + /* + * Dynamic chunks. + */ + const uint32_t cPagesInChunk = (1*1024*1024) >> PAGE_SHIFT; + AssertLogRelMsgReturn(cPages % cPagesInChunk == 0, + ("cPages=%#x cPagesInChunk=%#x GCPhys=%RGp %s\n", cPages, cPagesInChunk, pRam->GCPhys, pRam->pszDesc), + VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + + for (uint32_t iPage = 0; iPage < cPages; /* incremented by inner loop */ ) + { + uint8_t fPresent; + rc = SSMR3GetU8(pSSM, &fPresent); + AssertLogRelMsgRCReturn(rc, ("rc=%Rrc iPage=%#x GCPhys=%#x %s\n", rc, iPage, pRam->GCPhys, pRam->pszDesc), rc); + AssertLogRelMsgReturn(fPresent == (uint8_t)true || fPresent == (uint8_t)false, + ("fPresent=%#x iPage=%#x GCPhys=%#x %s\n", fPresent, iPage, pRam->GCPhys, pRam->pszDesc), + VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + + for (uint32_t iChunkPage = 0; iChunkPage < cPagesInChunk; iChunkPage++, iPage++) + { + RTGCPHYS const GCPhysPage = ((RTGCPHYS)iPage << PAGE_SHIFT) + pRam->GCPhys; + PPGMPAGE pPage = &pRam->aPages[iPage]; + if (fPresent) + { + if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_MMIO + || PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_SPECIAL_ALIAS_MMIO) + rc = pgmR3LoadPageToDevNullOld(pSSM); + else + rc = pgmR3LoadPageBitsOld(pVM, pSSM, PGMPAGETYPE_INVALID, pPage, GCPhysPage, pRam); + } + else + rc = pgmR3LoadPageZeroOld(pVM, PGMPAGETYPE_INVALID, pPage, GCPhysPage, pRam); + AssertLogRelMsgRCReturn(rc, ("rc=%Rrc iPage=%#x GCPhysPage=%#x %s\n", rc, iPage, GCPhysPage, pRam->pszDesc), rc); + } + } + } + else if (pRam->pvR3) + { + /* + * MMIO2. + */ + AssertLogRelMsgReturn((fFlags & 0x0f) == RT_BIT(3) /*MM_RAM_FLAGS_MMIO2*/, + ("fFlags=%#x GCPhys=%#x %s\n", fFlags, pRam->GCPhys, pRam->pszDesc), + VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + AssertLogRelMsgReturn(pRam->pvR3, + ("GCPhys=%#x %s\n", pRam->GCPhys, pRam->pszDesc), + VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + + rc = SSMR3GetMem(pSSM, pRam->pvR3, pRam->cb); + AssertLogRelMsgRCReturn(rc, ("GCPhys=%#x %s\n", pRam->GCPhys, pRam->pszDesc), rc); + } + else if (GCPhysLast < UINT32_C(0xfff80000)) + { + /* + * PCI MMIO, no pages saved. + */ + } + else + { + /* + * Load the 0xfff80000..0xffffffff BIOS range. + * It starts with X reserved pages that we have to skip over since + * the RAMRANGE create by the new code won't include those. + */ + AssertLogRelMsgReturn( !(fFlags & RT_BIT(3) /*MM_RAM_FLAGS_MMIO2*/) + && (fFlags & RT_BIT(0) /*MM_RAM_FLAGS_RESERVED*/), + ("fFlags=%#x GCPhys=%#x %s\n", fFlags, pRam->GCPhys, pRam->pszDesc), + VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + AssertLogRelMsgReturn(GCPhys == UINT32_C(0xfff80000), + ("GCPhys=%RGp pRamRange{GCPhys=%#x %s}\n", GCPhys, pRam->GCPhys, pRam->pszDesc), + VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + + /* Skip wasted reserved pages before the ROM. */ + while (GCPhys < pRam->GCPhys) + { + rc = pgmR3LoadPageToDevNullOld(pSSM); + GCPhys += PAGE_SIZE; + } + + /* Load the bios pages. */ + cPages = pRam->cb >> PAGE_SHIFT; + for (uint32_t iPage = 0; iPage < cPages; iPage++) + { + RTGCPHYS const GCPhysPage = ((RTGCPHYS)iPage << PAGE_SHIFT) + pRam->GCPhys; + PPGMPAGE pPage = &pRam->aPages[iPage]; + + AssertLogRelMsgReturn(PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_ROM, + ("GCPhys=%RGp pPage=%R[pgmpage]\n", GCPhys, GCPhys), + VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + rc = pgmR3LoadPageBitsOld(pVM, pSSM, PGMPAGETYPE_ROM, pPage, GCPhysPage, pRam); + AssertLogRelMsgRCReturn(rc, ("rc=%Rrc iPage=%#x GCPhys=%#x %s\n", rc, iPage, pRam->GCPhys, pRam->pszDesc), rc); + } + } + } + } + + return VINF_SUCCESS; +} + + +/** + * Worker for pgmR3Load and pgmR3LoadLocked. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pSSM The SSM handle. + * @param uVersion The PGM saved state unit version. + * @param uPass The pass number. + * + * @todo This needs splitting up if more record types or code twists are + * added... + */ +static int pgmR3LoadMemory(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + NOREF(uPass); + + /* + * Process page records until we hit the terminator. + */ + RTGCPHYS GCPhys = NIL_RTGCPHYS; + PPGMRAMRANGE pRamHint = NULL; + uint8_t id = UINT8_MAX; + uint32_t iPage = UINT32_MAX - 10; + PPGMROMRANGE pRom = NULL; + PPGMREGMMIORANGE pRegMmio = NULL; + + /* + * We batch up pages that should be freed instead of calling GMM for + * each and every one of them. Note that we'll lose the pages in most + * failure paths - this should probably be addressed one day. + */ + uint32_t cPendingPages = 0; + PGMMFREEPAGESREQ pReq; + int rc = GMMR3FreePagesPrepare(pVM, &pReq, 128 /* batch size */, GMMACCOUNT_BASE); + AssertLogRelRCReturn(rc, rc); + + for (;;) + { + /* + * Get the record type and flags. + */ + uint8_t u8; + rc = SSMR3GetU8(pSSM, &u8); + if (RT_FAILURE(rc)) + return rc; + if (u8 == PGM_STATE_REC_END) + { + /* + * Finish off any pages pending freeing. + */ + if (cPendingPages) + { + Log(("pgmR3LoadMemory: GMMR3FreePagesPerform pVM=%p cPendingPages=%u\n", pVM, cPendingPages)); + rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages); + AssertLogRelRCReturn(rc, rc); + } + GMMR3FreePagesCleanup(pReq); + return VINF_SUCCESS; + } + AssertLogRelMsgReturn((u8 & ~PGM_STATE_REC_FLAG_ADDR) <= PGM_STATE_REC_LAST, ("%#x\n", u8), VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + switch (u8 & ~PGM_STATE_REC_FLAG_ADDR) + { + /* + * RAM page. + */ + case PGM_STATE_REC_RAM_ZERO: + case PGM_STATE_REC_RAM_RAW: + case PGM_STATE_REC_RAM_BALLOONED: + { + /* + * Get the address and resolve it into a page descriptor. + */ + if (!(u8 & PGM_STATE_REC_FLAG_ADDR)) + GCPhys += PAGE_SIZE; + else + { + rc = SSMR3GetGCPhys(pSSM, &GCPhys); + if (RT_FAILURE(rc)) + return rc; + } + AssertLogRelMsgReturn(!(GCPhys & PAGE_OFFSET_MASK), ("%RGp\n", GCPhys), VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + + PPGMPAGE pPage; + rc = pgmPhysGetPageWithHintEx(pVM, GCPhys, &pPage, &pRamHint); + AssertLogRelMsgRCReturn(rc, ("rc=%Rrc %RGp\n", rc, GCPhys), rc); + + /* + * Take action according to the record type. + */ + switch (u8 & ~PGM_STATE_REC_FLAG_ADDR) + { + case PGM_STATE_REC_RAM_ZERO: + { + if (PGM_PAGE_IS_ZERO(pPage)) + break; + + /* Ballooned pages must be unmarked (live snapshot and + teleportation scenarios). */ + if (PGM_PAGE_IS_BALLOONED(pPage)) + { + Assert(PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM); + if (uVersion == PGM_SAVED_STATE_VERSION_BALLOON_BROKEN) + break; + PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ZERO); + break; + } + + AssertLogRelMsgReturn(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED, ("GCPhys=%RGp %R[pgmpage]\n", GCPhys, pPage), VERR_PGM_UNEXPECTED_PAGE_STATE); + + /* If this is a ROM page, we must clear it and not try to + * free it. Ditto if the VM is using RamPreAlloc (see + * @bugref{6318}). */ + if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_ROM + || PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_ROM_SHADOW + || pVM->pgm.s.fRamPreAlloc) + { + PGMPAGEMAPLOCK PgMpLck; + void *pvDstPage; + rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhys, &pvDstPage, &PgMpLck); + AssertLogRelMsgRCReturn(rc, ("GCPhys=%RGp %R[pgmpage] rc=%Rrc\n", GCPhys, pPage, rc), rc); + + ASMMemZeroPage(pvDstPage); + pgmPhysReleaseInternalPageMappingLock(pVM, &PgMpLck); + } + /* Free it only if it's not part of a previously + allocated large page (no need to clear the page). */ + else if ( PGM_PAGE_GET_PDE_TYPE(pPage) != PGM_PAGE_PDE_TYPE_PDE + && PGM_PAGE_GET_PDE_TYPE(pPage) != PGM_PAGE_PDE_TYPE_PDE_DISABLED) + { + rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPage, GCPhys, (PGMPAGETYPE)PGM_PAGE_GET_TYPE(pPage)); + AssertRCReturn(rc, rc); + } + /** @todo handle large pages (see @bugref{5545}) */ + break; + } + + case PGM_STATE_REC_RAM_BALLOONED: + { + Assert(PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM); + if (PGM_PAGE_IS_BALLOONED(pPage)) + break; + + /* We don't map ballooned pages in our shadow page tables, let's + just free it if allocated and mark as ballooned. See @bugref{5515}. */ + if (PGM_PAGE_IS_ALLOCATED(pPage)) + { + /** @todo handle large pages + ballooning when it works. (see @bugref{5515}, + * @bugref{5545}). */ + AssertLogRelMsgReturn( PGM_PAGE_GET_PDE_TYPE(pPage) != PGM_PAGE_PDE_TYPE_PDE + && PGM_PAGE_GET_PDE_TYPE(pPage) != PGM_PAGE_PDE_TYPE_PDE_DISABLED, + ("GCPhys=%RGp %R[pgmpage]\n", GCPhys, pPage), VERR_PGM_LOAD_UNEXPECTED_PAGE_TYPE); + + rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPage, GCPhys, (PGMPAGETYPE)PGM_PAGE_GET_TYPE(pPage)); + AssertRCReturn(rc, rc); + } + Assert(PGM_PAGE_IS_ZERO(pPage)); + PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_BALLOONED); + break; + } + + case PGM_STATE_REC_RAM_RAW: + { + PGMPAGEMAPLOCK PgMpLck; + void *pvDstPage; + rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhys, &pvDstPage, &PgMpLck); + AssertLogRelMsgRCReturn(rc, ("GCPhys=%RGp %R[pgmpage] rc=%Rrc\n", GCPhys, pPage, rc), rc); + rc = SSMR3GetMem(pSSM, pvDstPage, PAGE_SIZE); + pgmPhysReleaseInternalPageMappingLock(pVM, &PgMpLck); + if (RT_FAILURE(rc)) + return rc; + break; + } + + default: + AssertMsgFailedReturn(("%#x\n", u8), VERR_PGM_SAVED_REC_TYPE); + } + id = UINT8_MAX; + break; + } + + /* + * MMIO2 page. + */ + case PGM_STATE_REC_MMIO2_RAW: + case PGM_STATE_REC_MMIO2_ZERO: + { + /* + * Get the ID + page number and resolved that into a MMIO2 page. + */ + if (!(u8 & PGM_STATE_REC_FLAG_ADDR)) + iPage++; + else + { + SSMR3GetU8(pSSM, &id); + rc = SSMR3GetU32(pSSM, &iPage); + if (RT_FAILURE(rc)) + return rc; + } + if ( !pRegMmio + || pRegMmio->idSavedState != id) + { + for (pRegMmio = pVM->pgm.s.pRegMmioRangesR3; pRegMmio; pRegMmio = pRegMmio->pNextR3) + if ( pRegMmio->idSavedState == id + && (pRegMmio->fFlags & PGMREGMMIORANGE_F_MMIO2)) + break; + AssertLogRelMsgReturn(pRegMmio, ("id=%#u iPage=%#x\n", id, iPage), VERR_PGM_SAVED_MMIO2_RANGE_NOT_FOUND); + } + AssertLogRelMsgReturn(iPage < (pRegMmio->RamRange.cb >> PAGE_SHIFT), ("iPage=%#x cb=%RGp %s\n", iPage, pRegMmio->RamRange.cb, pRegMmio->RamRange.pszDesc), VERR_PGM_SAVED_MMIO2_PAGE_NOT_FOUND); + void *pvDstPage = (uint8_t *)pRegMmio->RamRange.pvR3 + ((size_t)iPage << PAGE_SHIFT); + + /* + * Load the page bits. + */ + if ((u8 & ~PGM_STATE_REC_FLAG_ADDR) == PGM_STATE_REC_MMIO2_ZERO) + ASMMemZeroPage(pvDstPage); + else + { + rc = SSMR3GetMem(pSSM, pvDstPage, PAGE_SIZE); + if (RT_FAILURE(rc)) + return rc; + } + GCPhys = NIL_RTGCPHYS; + break; + } + + /* + * ROM pages. + */ + case PGM_STATE_REC_ROM_VIRGIN: + case PGM_STATE_REC_ROM_SHW_RAW: + case PGM_STATE_REC_ROM_SHW_ZERO: + case PGM_STATE_REC_ROM_PROT: + { + /* + * Get the ID + page number and resolved that into a ROM page descriptor. + */ + if (!(u8 & PGM_STATE_REC_FLAG_ADDR)) + iPage++; + else + { + SSMR3GetU8(pSSM, &id); + rc = SSMR3GetU32(pSSM, &iPage); + if (RT_FAILURE(rc)) + return rc; + } + if ( !pRom + || pRom->idSavedState != id) + { + for (pRom = pVM->pgm.s.pRomRangesR3; pRom; pRom = pRom->pNextR3) + if (pRom->idSavedState == id) + break; + AssertLogRelMsgReturn(pRom, ("id=%#u iPage=%#x\n", id, iPage), VERR_PGM_SAVED_ROM_RANGE_NOT_FOUND); + } + AssertLogRelMsgReturn(iPage < (pRom->cb >> PAGE_SHIFT), ("iPage=%#x cb=%RGp %s\n", iPage, pRom->cb, pRom->pszDesc), VERR_PGM_SAVED_ROM_PAGE_NOT_FOUND); + PPGMROMPAGE pRomPage = &pRom->aPages[iPage]; + GCPhys = pRom->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT); + + /* + * Get and set the protection. + */ + uint8_t u8Prot; + rc = SSMR3GetU8(pSSM, &u8Prot); + if (RT_FAILURE(rc)) + return rc; + PGMROMPROT enmProt = (PGMROMPROT)u8Prot; + AssertLogRelMsgReturn(enmProt > PGMROMPROT_INVALID && enmProt < PGMROMPROT_END, ("GCPhys=%RGp enmProt=%d\n", GCPhys, enmProt), VERR_PGM_SAVED_ROM_PAGE_PROT); + + if (enmProt != pRomPage->enmProt) + { + if (RT_UNLIKELY(!(pRom->fFlags & PGMPHYS_ROM_FLAGS_SHADOWED))) + return SSMR3SetCfgError(pSSM, RT_SRC_POS, + N_("Protection change of unshadowed ROM page: GCPhys=%RGp enmProt=%d %s"), + GCPhys, enmProt, pRom->pszDesc); + rc = PGMR3PhysRomProtect(pVM, GCPhys, PAGE_SIZE, enmProt); + AssertLogRelMsgRCReturn(rc, ("GCPhys=%RGp rc=%Rrc\n", GCPhys, rc), rc); + AssertLogRelReturn(pRomPage->enmProt == enmProt, VERR_PGM_SAVED_ROM_PAGE_PROT); + } + if ((u8 & ~PGM_STATE_REC_FLAG_ADDR) == PGM_STATE_REC_ROM_PROT) + break; /* done */ + + /* + * Get the right page descriptor. + */ + PPGMPAGE pRealPage; + switch (u8 & ~PGM_STATE_REC_FLAG_ADDR) + { + case PGM_STATE_REC_ROM_VIRGIN: + if (!PGMROMPROT_IS_ROM(enmProt)) + pRealPage = &pRomPage->Virgin; + else + pRealPage = NULL; + break; + + case PGM_STATE_REC_ROM_SHW_RAW: + case PGM_STATE_REC_ROM_SHW_ZERO: + if (RT_UNLIKELY(!(pRom->fFlags & PGMPHYS_ROM_FLAGS_SHADOWED))) + return SSMR3SetCfgError(pSSM, RT_SRC_POS, + N_("Shadowed / non-shadowed page type mismatch: GCPhys=%RGp enmProt=%d %s"), + GCPhys, enmProt, pRom->pszDesc); + if (PGMROMPROT_IS_ROM(enmProt)) + pRealPage = &pRomPage->Shadow; + else + pRealPage = NULL; + break; + + default: AssertLogRelFailedReturn(VERR_IPE_NOT_REACHED_DEFAULT_CASE); /* shut up gcc */ + } + if (!pRealPage) + { + rc = pgmPhysGetPageWithHintEx(pVM, GCPhys, &pRealPage, &pRamHint); + AssertLogRelMsgRCReturn(rc, ("rc=%Rrc %RGp\n", rc, GCPhys), rc); + } + + /* + * Make it writable and map it (if necessary). + */ + void *pvDstPage = NULL; + switch (u8 & ~PGM_STATE_REC_FLAG_ADDR) + { + case PGM_STATE_REC_ROM_SHW_ZERO: + if ( PGM_PAGE_IS_ZERO(pRealPage) + || PGM_PAGE_IS_BALLOONED(pRealPage)) + break; + /** @todo implement zero page replacing. */ + RT_FALL_THRU(); + case PGM_STATE_REC_ROM_VIRGIN: + case PGM_STATE_REC_ROM_SHW_RAW: + { + rc = pgmPhysPageMakeWritableAndMap(pVM, pRealPage, GCPhys, &pvDstPage); + AssertLogRelMsgRCReturn(rc, ("GCPhys=%RGp rc=%Rrc\n", GCPhys, rc), rc); + break; + } + } + + /* + * Load the bits. + */ + switch (u8 & ~PGM_STATE_REC_FLAG_ADDR) + { + case PGM_STATE_REC_ROM_SHW_ZERO: + if (pvDstPage) + ASMMemZeroPage(pvDstPage); + break; + + case PGM_STATE_REC_ROM_VIRGIN: + case PGM_STATE_REC_ROM_SHW_RAW: + rc = SSMR3GetMem(pSSM, pvDstPage, PAGE_SIZE); + if (RT_FAILURE(rc)) + return rc; + break; + } + GCPhys = NIL_RTGCPHYS; + break; + } + + /* + * Unknown type. + */ + default: + AssertLogRelMsgFailedReturn(("%#x\n", u8), VERR_PGM_SAVED_REC_TYPE); + } + } /* forever */ +} + + +/** + * Worker for pgmR3Load. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pSSM The SSM handle. + * @param uVersion The saved state version. + */ +static int pgmR3LoadFinalLocked(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion) +{ + PPGM pPGM = &pVM->pgm.s; + int rc; + uint32_t u32Sep; + + /* + * Load basic data (required / unaffected by relocation). + */ + if (uVersion >= PGM_SAVED_STATE_VERSION_3_0_0) + { + if (uVersion > PGM_SAVED_STATE_VERSION_PRE_BALLOON) + rc = SSMR3GetStruct(pSSM, pPGM, &s_aPGMFields[0]); + else + rc = SSMR3GetStruct(pSSM, pPGM, &s_aPGMFieldsPreBalloon[0]); + + AssertLogRelRCReturn(rc, rc); + + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + if (uVersion <= PGM_SAVED_STATE_VERSION_PRE_PAE) + rc = SSMR3GetStruct(pSSM, &pVM->aCpus[i].pgm.s, &s_aPGMCpuFieldsPrePae[0]); + else + rc = SSMR3GetStruct(pSSM, &pVM->aCpus[i].pgm.s, &s_aPGMCpuFields[0]); + AssertLogRelRCReturn(rc, rc); + } + } + else if (uVersion >= PGM_SAVED_STATE_VERSION_RR_DESC) + { + AssertRelease(pVM->cCpus == 1); + + PGMOLD pgmOld; + rc = SSMR3GetStruct(pSSM, &pgmOld, &s_aPGMFields_Old[0]); + AssertLogRelRCReturn(rc, rc); + + pPGM->fMappingsFixed = pgmOld.fMappingsFixed; + pPGM->GCPtrMappingFixed = pgmOld.GCPtrMappingFixed; + pPGM->cbMappingFixed = pgmOld.cbMappingFixed; + + pVM->aCpus[0].pgm.s.fA20Enabled = pgmOld.fA20Enabled; + pVM->aCpus[0].pgm.s.GCPhysA20Mask = pgmOld.GCPhysA20Mask; + pVM->aCpus[0].pgm.s.enmGuestMode = pgmOld.enmGuestMode; + } + else + { + AssertRelease(pVM->cCpus == 1); + + SSMR3GetBool(pSSM, &pPGM->fMappingsFixed); + SSMR3GetGCPtr(pSSM, &pPGM->GCPtrMappingFixed); + SSMR3GetU32(pSSM, &pPGM->cbMappingFixed); + + uint32_t cbRamSizeIgnored; + rc = SSMR3GetU32(pSSM, &cbRamSizeIgnored); + if (RT_FAILURE(rc)) + return rc; + SSMR3GetGCPhys(pSSM, &pVM->aCpus[0].pgm.s.GCPhysA20Mask); + + uint32_t u32 = 0; + SSMR3GetUInt(pSSM, &u32); + pVM->aCpus[0].pgm.s.fA20Enabled = !!u32; + SSMR3GetUInt(pSSM, &pVM->aCpus[0].pgm.s.fSyncFlags); + RTUINT uGuestMode; + SSMR3GetUInt(pSSM, &uGuestMode); + pVM->aCpus[0].pgm.s.enmGuestMode = (PGMMODE)uGuestMode; + + /* check separator. */ + SSMR3GetU32(pSSM, &u32Sep); + if (RT_FAILURE(rc)) + return rc; + if (u32Sep != (uint32_t)~0) + { + AssertMsgFailed(("u32Sep=%#x (first)\n", u32Sep)); + return VERR_SSM_DATA_UNIT_FORMAT_CHANGED; + } + } + + /* + * Fix the A20 mask. + */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + pVCpu->pgm.s.GCPhysA20Mask = ~((RTGCPHYS)!pVCpu->pgm.s.fA20Enabled << 20); + pgmR3RefreshShadowModeAfterA20Change(pVCpu); + } + + /* + * The guest mappings - skipped now, see re-fixation in the caller. + */ + if (uVersion <= PGM_SAVED_STATE_VERSION_PRE_PAE) + { + for (uint32_t i = 0; ; i++) + { + rc = SSMR3GetU32(pSSM, &u32Sep); /* sequence number */ + if (RT_FAILURE(rc)) + return rc; + if (u32Sep == ~0U) + break; + AssertMsgReturn(u32Sep == i, ("u32Sep=%#x i=%#x\n", u32Sep, i), VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + + char szDesc[256]; + rc = SSMR3GetStrZ(pSSM, szDesc, sizeof(szDesc)); + if (RT_FAILURE(rc)) + return rc; + RTGCPTR GCPtrIgnore; + SSMR3GetGCPtr(pSSM, &GCPtrIgnore); /* GCPtr */ + rc = SSMR3GetGCPtr(pSSM, &GCPtrIgnore); /* cPTs */ + if (RT_FAILURE(rc)) + return rc; + } + } + + /* + * Load the RAM contents. + */ + if (uVersion > PGM_SAVED_STATE_VERSION_3_0_0) + { + if (!pVM->pgm.s.LiveSave.fActive) + { + if (uVersion > PGM_SAVED_STATE_VERSION_NO_RAM_CFG) + { + rc = pgmR3LoadRamConfig(pVM, pSSM); + if (RT_FAILURE(rc)) + return rc; + } + rc = pgmR3LoadRomRanges(pVM, pSSM); + if (RT_FAILURE(rc)) + return rc; + rc = pgmR3LoadMmio2Ranges(pVM, pSSM); + if (RT_FAILURE(rc)) + return rc; + } + + rc = pgmR3LoadMemory(pVM, pSSM, uVersion, SSM_PASS_FINAL); + } + else + rc = pgmR3LoadMemoryOld(pVM, pSSM, uVersion); + + /* Refresh balloon accounting. */ + if (pVM->pgm.s.cBalloonedPages) + { + Log(("pgmR3LoadFinalLocked: pVM=%p cBalloonedPages=%#x\n", pVM, pVM->pgm.s.cBalloonedPages)); + rc = GMMR3BalloonedPages(pVM, GMMBALLOONACTION_INFLATE, pVM->pgm.s.cBalloonedPages); + AssertRCReturn(rc, rc); + } + return rc; +} + + +/** + * @callback_method_impl{FNSSMINTLOADEXEC} + */ +static DECLCALLBACK(int) pgmR3Load(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + int rc; + + /* + * Validate version. + */ + if ( ( uPass != SSM_PASS_FINAL + && uVersion != PGM_SAVED_STATE_VERSION + && uVersion != PGM_SAVED_STATE_VERSION_PRE_PAE + && uVersion != PGM_SAVED_STATE_VERSION_BALLOON_BROKEN + && uVersion != PGM_SAVED_STATE_VERSION_PRE_BALLOON + && uVersion != PGM_SAVED_STATE_VERSION_NO_RAM_CFG) + || ( uVersion != PGM_SAVED_STATE_VERSION + && uVersion != PGM_SAVED_STATE_VERSION_PRE_PAE + && uVersion != PGM_SAVED_STATE_VERSION_BALLOON_BROKEN + && uVersion != PGM_SAVED_STATE_VERSION_PRE_BALLOON + && uVersion != PGM_SAVED_STATE_VERSION_NO_RAM_CFG + && uVersion != PGM_SAVED_STATE_VERSION_3_0_0 + && uVersion != PGM_SAVED_STATE_VERSION_2_2_2 + && uVersion != PGM_SAVED_STATE_VERSION_RR_DESC + && uVersion != PGM_SAVED_STATE_VERSION_OLD_PHYS_CODE) + ) + { + AssertMsgFailed(("pgmR3Load: Invalid version uVersion=%d (current %d)!\n", uVersion, PGM_SAVED_STATE_VERSION)); + return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION; + } + + /* + * Do the loading while owning the lock because a bunch of the functions + * we're using requires this. + */ + if (uPass != SSM_PASS_FINAL) + { + pgmLock(pVM); + if (uPass != 0) + rc = pgmR3LoadMemory(pVM, pSSM, uVersion, uPass); + else + { + pVM->pgm.s.LiveSave.fActive = true; + if (uVersion > PGM_SAVED_STATE_VERSION_NO_RAM_CFG) + rc = pgmR3LoadRamConfig(pVM, pSSM); + else + rc = VINF_SUCCESS; + if (RT_SUCCESS(rc)) + rc = pgmR3LoadRomRanges(pVM, pSSM); + if (RT_SUCCESS(rc)) + rc = pgmR3LoadMmio2Ranges(pVM, pSSM); + if (RT_SUCCESS(rc)) + rc = pgmR3LoadMemory(pVM, pSSM, uVersion, uPass); + } + pgmUnlock(pVM); + } + else + { + pgmLock(pVM); + rc = pgmR3LoadFinalLocked(pVM, pSSM, uVersion); + pVM->pgm.s.LiveSave.fActive = false; + pgmUnlock(pVM); + if (RT_SUCCESS(rc)) + { + /* + * We require a full resync now. + */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL); + VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); + pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_UPDATE_PAGE_BIT_VIRTUAL; + /** @todo For guest PAE, we might get the wrong + * aGCPhysGstPaePDs values now. We should used the + * saved ones... Postponing this since it nothing new + * and PAE/PDPTR needs some general readjusting, see + * @bugref{5880}. */ + } + + pgmR3HandlerPhysicalUpdateAll(pVM); + + /* + * Change the paging mode (indirectly restores PGMCPU::GCPhysCR3). + * (Requires the CPUM state to be restored already!) + */ + if (CPUMR3IsStateRestorePending(pVM)) + return SSMR3SetLoadError(pSSM, VERR_WRONG_ORDER, RT_SRC_POS, + N_("PGM was unexpectedly restored before CPUM")); + + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + + rc = PGMHCChangeMode(pVM, pVCpu, pVCpu->pgm.s.enmGuestMode); + AssertLogRelRCReturn(rc, rc); + + /* Update the PSE, NX flags and validity masks. */ + pVCpu->pgm.s.fGst32BitPageSizeExtension = CPUMIsGuestPageSizeExtEnabled(pVCpu); + PGMNotifyNxeChanged(pVCpu, CPUMIsGuestNXEnabled(pVCpu)); + } + + /* + * Try re-fixate the guest mappings. + */ + pVM->pgm.s.fMappingsFixedRestored = false; + if ( pVM->pgm.s.fMappingsFixed + && pgmMapAreMappingsEnabled(pVM)) + { +#ifndef PGM_WITHOUT_MAPPINGS + RTGCPTR GCPtrFixed = pVM->pgm.s.GCPtrMappingFixed; + uint32_t cbFixed = pVM->pgm.s.cbMappingFixed; + pVM->pgm.s.fMappingsFixed = false; + + uint32_t cbRequired; + int rc2 = PGMR3MappingsSize(pVM, &cbRequired); AssertRC(rc2); + if ( RT_SUCCESS(rc2) + && cbRequired > cbFixed) + rc2 = VERR_OUT_OF_RANGE; + if (RT_SUCCESS(rc2)) + rc2 = pgmR3MappingsFixInternal(pVM, GCPtrFixed, cbFixed); + if (RT_FAILURE(rc2)) + { + LogRel(("PGM: Unable to re-fixate the guest mappings at %RGv-%RGv: rc=%Rrc (cbRequired=%#x)\n", + GCPtrFixed, GCPtrFixed + cbFixed, rc2, cbRequired)); + pVM->pgm.s.fMappingsFixed = false; + pVM->pgm.s.fMappingsFixedRestored = true; + pVM->pgm.s.GCPtrMappingFixed = GCPtrFixed; + pVM->pgm.s.cbMappingFixed = cbFixed; + } +#else + AssertFailed(); +#endif + } + else + { + /* We used to set fixed + disabled while we only use disabled now, + so wipe the state to avoid any confusion. */ + pVM->pgm.s.fMappingsFixed = false; + pVM->pgm.s.GCPtrMappingFixed = NIL_RTGCPTR; + pVM->pgm.s.cbMappingFixed = 0; + } + + /* + * If we have floating mappings, do a CR3 sync now to make sure the HMA + * doesn't conflict with guest code / data and thereby cause trouble + * when restoring other components like PATM. + */ + if (pgmMapAreMappingsFloating(pVM)) + { + PVMCPU pVCpu = &pVM->aCpus[0]; + rc = PGMSyncCR3(pVCpu, CPUMGetGuestCR0(pVCpu), CPUMGetGuestCR3(pVCpu), CPUMGetGuestCR4(pVCpu), true); + if (RT_FAILURE(rc)) + return SSMR3SetLoadError(pSSM, VERR_WRONG_ORDER, RT_SRC_POS, + N_("PGMSyncCR3 failed unexpectedly with rc=%Rrc"), rc); + + /* Make sure to re-sync before executing code. */ + VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL); + VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); + pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_UPDATE_PAGE_BIT_VIRTUAL; + } + } + } + + return rc; +} + + +/** + * @callback_method_impl{FNSSMINTLOADDONE} + */ +static DECLCALLBACK(int) pgmR3LoadDone(PVM pVM, PSSMHANDLE pSSM) +{ + pVM->pgm.s.fRestoreRomPagesOnReset = true; + NOREF(pSSM); + return VINF_SUCCESS; +} + + +/** + * Registers the saved state callbacks with SSM. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param cbRam The RAM size. + */ +int pgmR3InitSavedState(PVM pVM, uint64_t cbRam) +{ + return SSMR3RegisterInternal(pVM, "pgm", 1, PGM_SAVED_STATE_VERSION, (size_t)cbRam + sizeof(PGM), + pgmR3LivePrep, pgmR3LiveExec, pgmR3LiveVote, + NULL, pgmR3SaveExec, pgmR3SaveDone, + pgmR3LoadPrep, pgmR3Load, pgmR3LoadDone); +} + diff --git a/src/VBox/VMM/VMMR3/PGMSharedPage.cpp b/src/VBox/VMM/VMMR3/PGMSharedPage.cpp new file mode 100644 index 00000000..53206b6e --- /dev/null +++ b/src/VBox/VMM/VMMR3/PGMSharedPage.cpp @@ -0,0 +1,442 @@ +/* $Id: PGMSharedPage.cpp $ */ +/** @file + * PGM - Page Manager and Monitor, Shared page handling + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_PGM_SHARED +#include +#include +#include +#include "PGMInternal.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "PGMInline.h" + + +#ifdef VBOX_WITH_PAGE_SHARING + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +# ifdef VBOX_STRICT +/** Keep a copy of all registered shared modules for the .pgmcheckduppages debugger command. */ +static PGMMREGISTERSHAREDMODULEREQ g_apSharedModules[512] = {0}; +static unsigned g_cSharedModules = 0; +# endif /* VBOX_STRICT */ + + +/** + * Registers a new shared module for the VM + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param enmGuestOS Guest OS type. + * @param pszModuleName Module name. + * @param pszVersion Module version. + * @param GCBaseAddr Module base address. + * @param cbModule Module size. + * @param cRegions Number of shared region descriptors. + * @param paRegions Shared region(s). + * + * @todo This should be a GMMR3 call. No need to involve GMM here. + */ +VMMR3DECL(int) PGMR3SharedModuleRegister(PVM pVM, VBOXOSFAMILY enmGuestOS, char *pszModuleName, char *pszVersion, + RTGCPTR GCBaseAddr, uint32_t cbModule, uint32_t cRegions, + VMMDEVSHAREDREGIONDESC const *paRegions) +{ + Log(("PGMR3SharedModuleRegister family=%d name=%s version=%s base=%RGv size=%x cRegions=%d\n", + enmGuestOS, pszModuleName, pszVersion, GCBaseAddr, cbModule, cRegions)); + + /* + * Sanity check. + */ + AssertReturn(cRegions <= VMMDEVSHAREDREGIONDESC_MAX, VERR_INVALID_PARAMETER); + if (!pVM->pgm.s.fPageFusionAllowed) + return VERR_NOT_SUPPORTED; + + /* + * Allocate and initialize a GMM request. + */ + PGMMREGISTERSHAREDMODULEREQ pReq; + pReq = (PGMMREGISTERSHAREDMODULEREQ)RTMemAllocZ(RT_UOFFSETOF_DYN(GMMREGISTERSHAREDMODULEREQ, aRegions[cRegions])); + AssertReturn(pReq, VERR_NO_MEMORY); + + pReq->enmGuestOS = enmGuestOS; + pReq->GCBaseAddr = GCBaseAddr; + pReq->cbModule = cbModule; + pReq->cRegions = cRegions; + for (uint32_t i = 0; i < cRegions; i++) + pReq->aRegions[i] = paRegions[i]; + + int rc = RTStrCopy(pReq->szName, sizeof(pReq->szName), pszModuleName); + if (RT_SUCCESS(rc)) + { + rc = RTStrCopy(pReq->szVersion, sizeof(pReq->szVersion), pszVersion); + if (RT_SUCCESS(rc)) + { + /* + * Issue the request. In strict builds, do some local tracking. + */ + pgmR3PhysAssertSharedPageChecksums(pVM); + rc = GMMR3RegisterSharedModule(pVM, pReq); + if (RT_SUCCESS(rc)) + rc = pReq->rc; + AssertMsg(rc == VINF_SUCCESS || rc == VINF_GMM_SHARED_MODULE_ALREADY_REGISTERED, ("%Rrc\n", rc)); + +# ifdef VBOX_STRICT + if ( rc == VINF_SUCCESS + && g_cSharedModules < RT_ELEMENTS(g_apSharedModules)) + { + unsigned i; + for (i = 0; i < RT_ELEMENTS(g_apSharedModules); i++) + if (g_apSharedModules[i] == NULL) + { + + size_t const cbSharedModule = RT_UOFFSETOF_DYN(GMMREGISTERSHAREDMODULEREQ, aRegions[cRegions]); + g_apSharedModules[i] = (PGMMREGISTERSHAREDMODULEREQ)RTMemDup(pReq, cbSharedModule); + g_cSharedModules++; + break; + } + Assert(i < RT_ELEMENTS(g_apSharedModules)); + } +# endif /* VBOX_STRICT */ + if (RT_SUCCESS(rc)) + rc = VINF_SUCCESS; + } + } + + RTMemFree(pReq); + return rc; +} + + +/** + * Unregisters a shared module for the VM + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pszModuleName Module name. + * @param pszVersion Module version. + * @param GCBaseAddr Module base address. + * @param cbModule Module size. + * + * @todo This should be a GMMR3 call. No need to involve GMM here. + */ +VMMR3DECL(int) PGMR3SharedModuleUnregister(PVM pVM, char *pszModuleName, char *pszVersion, RTGCPTR GCBaseAddr, uint32_t cbModule) +{ + Log(("PGMR3SharedModuleUnregister name=%s version=%s base=%RGv size=%x\n", pszModuleName, pszVersion, GCBaseAddr, cbModule)); + + AssertMsgReturn(cbModule > 0 && cbModule < _1G, ("%u\n", cbModule), VERR_OUT_OF_RANGE); + if (!pVM->pgm.s.fPageFusionAllowed) + return VERR_NOT_SUPPORTED; + + /* + * Forward the request to GMM (ring-0). + */ + PGMMUNREGISTERSHAREDMODULEREQ pReq = (PGMMUNREGISTERSHAREDMODULEREQ)RTMemAlloc(sizeof(*pReq)); + AssertReturn(pReq, VERR_NO_MEMORY); + + pReq->GCBaseAddr = GCBaseAddr; + pReq->u32Alignment = 0; + pReq->cbModule = cbModule; + + int rc = RTStrCopy(pReq->szName, sizeof(pReq->szName), pszModuleName); + if (RT_SUCCESS(rc)) + { + rc = RTStrCopy(pReq->szVersion, sizeof(pReq->szVersion), pszVersion); + if (RT_SUCCESS(rc)) + { + pgmR3PhysAssertSharedPageChecksums(pVM); + rc = GMMR3UnregisterSharedModule(pVM, pReq); + pgmR3PhysAssertSharedPageChecksums(pVM); + +# ifdef VBOX_STRICT + /* + * Update our local tracking. + */ + for (unsigned i = 0; i < g_cSharedModules; i++) + { + if ( g_apSharedModules[i] + && !strcmp(g_apSharedModules[i]->szName, pszModuleName) + && !strcmp(g_apSharedModules[i]->szVersion, pszVersion)) + { + RTMemFree(g_apSharedModules[i]); + g_apSharedModules[i] = NULL; + g_cSharedModules--; + break; + } + } +# endif /* VBOX_STRICT */ + } + } + + RTMemFree(pReq); + return rc; +} + + +/** + * Rendezvous callback that will be called once. + * + * @returns VBox strict status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * @param pvUser Pointer to a VMCPUID with the requester's ID. + */ +static DECLCALLBACK(VBOXSTRICTRC) pgmR3SharedModuleRegRendezvous(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + VMCPUID idCpu = *(VMCPUID *)pvUser; + + /* Execute on the VCPU that issued the original request to make sure we're in the right cr3 context. */ + if (pVCpu->idCpu != idCpu) + { + Assert(pVM->cCpus > 1); + return VINF_SUCCESS; + } + + + /* Flush all pending handy page operations before changing any shared page assignments. */ + int rc = PGMR3PhysAllocateHandyPages(pVM); + AssertRC(rc); + + /* + * Lock it here as we can't deal with busy locks in this ring-0 path. + */ + LogFlow(("pgmR3SharedModuleRegRendezvous: start (%d)\n", pVM->pgm.s.cSharedPages)); + + pgmLock(pVM); + pgmR3PhysAssertSharedPageChecksums(pVM); + rc = GMMR3CheckSharedModules(pVM); + pgmR3PhysAssertSharedPageChecksums(pVM); + pgmUnlock(pVM); + AssertLogRelRC(rc); + + LogFlow(("pgmR3SharedModuleRegRendezvous: done (%d)\n", pVM->pgm.s.cSharedPages)); + return rc; +} + +/** + * Shared module check helper (called on the way out). + * + * @param pVM The cross context VM structure. + * @param idCpu VCPU id. + */ +static DECLCALLBACK(void) pgmR3CheckSharedModulesHelper(PVM pVM, VMCPUID idCpu) +{ + /* We must stall other VCPUs as we'd otherwise have to send IPI flush commands for every single change we make. */ + STAM_REL_PROFILE_START(&pVM->pgm.s.StatShModCheck, a); + int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ALL_AT_ONCE, pgmR3SharedModuleRegRendezvous, &idCpu); + AssertRCSuccess(rc); + STAM_REL_PROFILE_STOP(&pVM->pgm.s.StatShModCheck, a); +} + + +/** + * Check all registered modules for changes. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(int) PGMR3SharedModuleCheckAll(PVM pVM) +{ + if (!pVM->pgm.s.fPageFusionAllowed) + return VERR_NOT_SUPPORTED; + + /* Queue the actual registration as we are under the IOM lock right now. Perform this operation on the way out. */ + return VMR3ReqCallNoWait(pVM, VMCPUID_ANY_QUEUE, (PFNRT)pgmR3CheckSharedModulesHelper, 2, pVM, VMMGetCpuId(pVM)); +} + + +# ifdef DEBUG +/** + * Query the state of a page in a shared module + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param GCPtrPage Page address. + * @param pfShared Shared status (out). + * @param pfPageFlags Page flags (out). + */ +VMMR3DECL(int) PGMR3SharedModuleGetPageState(PVM pVM, RTGCPTR GCPtrPage, bool *pfShared, uint64_t *pfPageFlags) +{ + /* Debug only API for the page fusion testcase. */ + RTGCPHYS GCPhys; + uint64_t fFlags; + + pgmLock(pVM); + + int rc = PGMGstGetPage(VMMGetCpu(pVM), GCPtrPage, &fFlags, &GCPhys); + switch (rc) + { + case VINF_SUCCESS: + { + PPGMPAGE pPage = pgmPhysGetPage(pVM, GCPhys); + if (pPage) + { + *pfShared = PGM_PAGE_IS_SHARED(pPage); + *pfPageFlags = fFlags; + } + else + rc = VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS; + break; + } + + case VERR_PAGE_NOT_PRESENT: + case VERR_PAGE_TABLE_NOT_PRESENT: + case VERR_PAGE_MAP_LEVEL4_NOT_PRESENT: + case VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT: + *pfShared = false; + *pfPageFlags = 0; + rc = VINF_SUCCESS; + break; + + default: + break; + } + + pgmUnlock(pVM); + return rc; +} +# endif /* DEBUG */ + +# ifdef VBOX_STRICT + +/** + * @callback_method_impl{FNDBGCCMD, The '.pgmcheckduppages' command.} + */ +DECLCALLBACK(int) pgmR3CmdCheckDuplicatePages(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PUVM pUVM, PCDBGCVAR paArgs, unsigned cArgs) +{ + unsigned cBallooned = 0; + unsigned cShared = 0; + unsigned cZero = 0; + unsigned cUnique = 0; + unsigned cDuplicate = 0; + unsigned cAllocZero = 0; + unsigned cPages = 0; + NOREF(pCmd); NOREF(paArgs); NOREF(cArgs); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + pgmLock(pVM); + + for (PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3; pRam; pRam = pRam->pNextR3) + { + PPGMPAGE pPage = &pRam->aPages[0]; + RTGCPHYS GCPhys = pRam->GCPhys; + uint32_t cLeft = pRam->cb >> PAGE_SHIFT; + while (cLeft-- > 0) + { + if (PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM) + { + switch (PGM_PAGE_GET_STATE(pPage)) + { + case PGM_PAGE_STATE_ZERO: + cZero++; + break; + + case PGM_PAGE_STATE_BALLOONED: + cBallooned++; + break; + + case PGM_PAGE_STATE_SHARED: + cShared++; + break; + + case PGM_PAGE_STATE_ALLOCATED: + case PGM_PAGE_STATE_WRITE_MONITORED: + { + /* Check if the page was allocated, but completely zero. */ + PGMPAGEMAPLOCK PgMpLck; + const void *pvPage; + int rc = pgmPhysGCPhys2CCPtrInternalReadOnly(pVM, pPage, GCPhys, &pvPage, &PgMpLck); + if ( RT_SUCCESS(rc) + && ASMMemIsZeroPage(pvPage)) + cAllocZero++; + else if (GMMR3IsDuplicatePage(pVM, PGM_PAGE_GET_PAGEID(pPage))) + cDuplicate++; + else + cUnique++; + if (RT_SUCCESS(rc)) + pgmPhysReleaseInternalPageMappingLock(pVM, &PgMpLck); + break; + } + + default: + AssertFailed(); + break; + } + } + + /* next */ + pPage++; + GCPhys += PAGE_SIZE; + cPages++; + /* Give some feedback for every processed megabyte. */ + if ((cPages & 0x7f) == 0) + pCmdHlp->pfnPrintf(pCmdHlp, NULL, "."); + } + } + pgmUnlock(pVM); + + pCmdHlp->pfnPrintf(pCmdHlp, NULL, "\nNumber of zero pages %08x (%d MB)\n", cZero, cZero / 256); + pCmdHlp->pfnPrintf(pCmdHlp, NULL, "Number of alloczero pages %08x (%d MB)\n", cAllocZero, cAllocZero / 256); + pCmdHlp->pfnPrintf(pCmdHlp, NULL, "Number of ballooned pages %08x (%d MB)\n", cBallooned, cBallooned / 256); + pCmdHlp->pfnPrintf(pCmdHlp, NULL, "Number of shared pages %08x (%d MB)\n", cShared, cShared / 256); + pCmdHlp->pfnPrintf(pCmdHlp, NULL, "Number of unique pages %08x (%d MB)\n", cUnique, cUnique / 256); + pCmdHlp->pfnPrintf(pCmdHlp, NULL, "Number of duplicate pages %08x (%d MB)\n", cDuplicate, cDuplicate / 256); + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{FNDBGCCMD, The '.pgmsharedmodules' command.} + */ +DECLCALLBACK(int) pgmR3CmdShowSharedModules(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PUVM pUVM, PCDBGCVAR paArgs, unsigned cArgs) +{ + NOREF(pCmd); NOREF(paArgs); NOREF(cArgs); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + pgmLock(pVM); + for (unsigned i = 0; i < RT_ELEMENTS(g_apSharedModules); i++) + { + if (g_apSharedModules[i]) + { + pCmdHlp->pfnPrintf(pCmdHlp, NULL, "Shared module %s (%s):\n", g_apSharedModules[i]->szName, g_apSharedModules[i]->szVersion); + for (unsigned j = 0; j < g_apSharedModules[i]->cRegions; j++) + pCmdHlp->pfnPrintf(pCmdHlp, NULL, "--- Region %d: base %RGv size %x\n", j, g_apSharedModules[i]->aRegions[j].GCRegionAddr, g_apSharedModules[i]->aRegions[j].cbRegion); + } + } + pgmUnlock(pVM); + + return VINF_SUCCESS; +} + +# endif /* VBOX_STRICT*/ +#endif /* VBOX_WITH_PAGE_SHARING */ diff --git a/src/VBox/VMM/VMMR3/SELM.cpp b/src/VBox/VMM/VMMR3/SELM.cpp new file mode 100644 index 00000000..cb318eee --- /dev/null +++ b/src/VBox/VMM/VMMR3/SELM.cpp @@ -0,0 +1,2715 @@ +/* $Id: SELM.cpp $ */ +/** @file + * SELM - The Selector Manager. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +/** @page pg_selm SELM - The Selector Manager + * + * SELM takes care of GDT, LDT and TSS shadowing in raw-mode, and the injection + * of a few hyper selector for the raw-mode context. In the hardware assisted + * virtualization mode its only task is to decode entries in the guest GDT or + * LDT once in a while. + * + * @see grp_selm + * + * + * @section seg_selm_shadowing Shadowing + * + * SELMR3UpdateFromCPUM() and SELMR3SyncTSS() does the bulk synchronization + * work. The three structures (GDT, LDT, TSS) are all shadowed wholesale atm. + * The idea is to do it in a more on-demand fashion when we get time. There + * also a whole bunch of issues with the current synchronization of all three + * tables, see notes and todos in the code. + * + * When the guest makes changes to the GDT we will try update the shadow copy + * without involving SELMR3UpdateFromCPUM(), see selmGCSyncGDTEntry(). + * + * When the guest make LDT changes we'll trigger a full resync of the LDT + * (SELMR3UpdateFromCPUM()), which, needless to say, isn't optimal. + * + * The TSS shadowing is limited to the fields we need to care about, namely SS0 + * and ESP0. The Patch Manager makes use of these. We monitor updates to the + * guest TSS and will try keep our SS0 and ESP0 copies up to date this way + * rather than go the SELMR3SyncTSS() route. + * + * When in raw-mode SELM also injects a few extra GDT selectors which are used + * by the raw-mode (hyper) context. These start their life at the high end of + * the table and will be relocated when the guest tries to make use of them... + * Well, that was that idea at least, only the code isn't quite there yet which + * is why we have trouble with guests which actually have a full sized GDT. + * + * So, the summary of the current GDT, LDT and TSS shadowing is that there is a + * lot of relatively simple and enjoyable work to be done, see @bugref{3267}. + * + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_SELM +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "SELMInternal.h" +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "SELMInline.h" + + +/** SELM saved state version. */ +#define SELM_SAVED_STATE_VERSION 5 + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static DECLCALLBACK(int) selmR3Save(PVM pVM, PSSMHANDLE pSSM); +static DECLCALLBACK(int) selmR3Load(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass); +static DECLCALLBACK(int) selmR3LoadDone(PVM pVM, PSSMHANDLE pSSM); +static DECLCALLBACK(void) selmR3InfoGdt(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); +static DECLCALLBACK(void) selmR3InfoGdtGuest(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); +static DECLCALLBACK(void) selmR3InfoLdt(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); +static DECLCALLBACK(void) selmR3InfoLdtGuest(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); +//static DECLCALLBACK(void) selmR3InfoTss(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); +//static DECLCALLBACK(void) selmR3InfoTssGuest(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +#if defined(VBOX_WITH_RAW_MODE) && defined(LOG_ENABLED) +/** Segment register names. */ +static char const g_aszSRegNms[X86_SREG_COUNT][4] = { "ES", "CS", "SS", "DS", "FS", "GS" }; +#endif + + +/** + * Initializes the SELM. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(int) SELMR3Init(PVM pVM) +{ + int rc; + LogFlow(("SELMR3Init\n")); + + /* + * Assert alignment and sizes. + * (The TSS block requires contiguous back.) + */ + AssertCompile(sizeof(pVM->selm.s) <= sizeof(pVM->selm.padding)); AssertRelease(sizeof(pVM->selm.s) <= sizeof(pVM->selm.padding)); + AssertCompileMemberAlignment(VM, selm.s, 32); AssertRelease(!(RT_UOFFSETOF(VM, selm.s) & 31)); +#if 0 /* doesn't work */ + AssertCompile((RT_OFFSETOF(VM, selm.s.Tss) & PAGE_OFFSET_MASK) <= PAGE_SIZE - sizeof(pVM->selm.s.Tss)); + AssertCompile((RT_OFFSETOF(VM, selm.s.TssTrap08) & PAGE_OFFSET_MASK) <= PAGE_SIZE - sizeof(pVM->selm.s.TssTrap08)); +#endif + AssertRelease((RT_UOFFSETOF(VM, selm.s.Tss) & PAGE_OFFSET_MASK) <= PAGE_SIZE - sizeof(pVM->selm.s.Tss)); + AssertRelease((RT_UOFFSETOF(VM, selm.s.TssTrap08) & PAGE_OFFSET_MASK) <= PAGE_SIZE - sizeof(pVM->selm.s.TssTrap08)); + AssertRelease(sizeof(pVM->selm.s.Tss.IntRedirBitmap) == 0x20); + + /* + * Init the structure. + */ + pVM->selm.s.offVM = RT_UOFFSETOF(VM, selm); + pVM->selm.s.aHyperSel[SELM_HYPER_SEL_CS] = (SELM_GDT_ELEMENTS - 0x1) << 3; + pVM->selm.s.aHyperSel[SELM_HYPER_SEL_DS] = (SELM_GDT_ELEMENTS - 0x2) << 3; + pVM->selm.s.aHyperSel[SELM_HYPER_SEL_CS64] = (SELM_GDT_ELEMENTS - 0x3) << 3; + pVM->selm.s.aHyperSel[SELM_HYPER_SEL_TSS] = (SELM_GDT_ELEMENTS - 0x4) << 3; + pVM->selm.s.aHyperSel[SELM_HYPER_SEL_TSS_TRAP08] = (SELM_GDT_ELEMENTS - 0x5) << 3; + + if (VM_IS_RAW_MODE_ENABLED(pVM) || HMIsRawModeCtxNeeded(pVM)) + { + /* + * Allocate GDT table. + */ + rc = MMR3HyperAllocOnceNoRel(pVM, sizeof(pVM->selm.s.paGdtR3[0]) * SELM_GDT_ELEMENTS, + PAGE_SIZE, MM_TAG_SELM, (void **)&pVM->selm.s.paGdtR3); + AssertRCReturn(rc, rc); + + /* + * Allocate LDT area. + */ + rc = MMR3HyperAllocOnceNoRel(pVM, _64K + PAGE_SIZE, PAGE_SIZE, MM_TAG_SELM, &pVM->selm.s.pvLdtR3); + AssertRCReturn(rc, rc); + } + + /* + * Init Guest's and Shadow GDT, LDT, TSS changes control variables. + */ + pVM->selm.s.cbEffGuestGdtLimit = 0; + pVM->selm.s.GuestGdtr.pGdt = RTRCPTR_MAX; + pVM->selm.s.GCPtrGuestLdt = RTRCPTR_MAX; + pVM->selm.s.GCPtrGuestTss = RTRCPTR_MAX; + + pVM->selm.s.paGdtRC = NIL_RTRCPTR; /* Must be set in SELMR3Relocate because of monitoring. */ + pVM->selm.s.pvLdtRC = RTRCPTR_MAX; + pVM->selm.s.pvMonShwTssRC = RTRCPTR_MAX; + pVM->selm.s.GCSelTss = RTSEL_MAX; + + pVM->selm.s.fSyncTSSRing0Stack = false; + + /* The I/O bitmap starts right after the virtual interrupt redirection + bitmap. Outside the TSS on purpose; the CPU will not check it for + I/O operations. */ + pVM->selm.s.Tss.offIoBitmap = sizeof(VBOXTSS); + /* bit set to 1 means no redirection */ + memset(pVM->selm.s.Tss.IntRedirBitmap, 0xff, sizeof(pVM->selm.s.Tss.IntRedirBitmap)); + + /* + * Register the virtual access handlers. + */ + pVM->selm.s.hShadowGdtWriteHandlerType = NIL_PGMVIRTHANDLERTYPE; + pVM->selm.s.hShadowLdtWriteHandlerType = NIL_PGMVIRTHANDLERTYPE; + pVM->selm.s.hShadowTssWriteHandlerType = NIL_PGMVIRTHANDLERTYPE; + pVM->selm.s.hGuestGdtWriteHandlerType = NIL_PGMVIRTHANDLERTYPE; + pVM->selm.s.hGuestLdtWriteHandlerType = NIL_PGMVIRTHANDLERTYPE; + pVM->selm.s.hGuestTssWriteHandlerType = NIL_PGMVIRTHANDLERTYPE; +#ifdef VBOX_WITH_RAW_MODE + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { +# ifdef SELM_TRACK_SHADOW_GDT_CHANGES + rc = PGMR3HandlerVirtualTypeRegister(pVM, PGMVIRTHANDLERKIND_HYPERVISOR, false /*fRelocUserRC*/, + NULL /*pfnInvalidateR3*/, NULL /*pfnHandlerR3*/, + NULL /*pszHandlerRC*/, "selmRCShadowGDTWritePfHandler", + "Shadow GDT write access handler", &pVM->selm.s.hShadowGdtWriteHandlerType); + AssertRCReturn(rc, rc); +# endif +# ifdef SELM_TRACK_SHADOW_TSS_CHANGES + rc = PGMR3HandlerVirtualTypeRegister(pVM, PGMVIRTHANDLERKIND_HYPERVISOR, false /*fRelocUserRC*/, + NULL /*pfnInvalidateR3*/, NULL /*pfnHandlerR3*/, + NULL /*pszHandlerRC*/, "selmRCShadowTSSWritePfHandler", + "Shadow TSS write access handler", &pVM->selm.s.hShadowTssWriteHandlerType); + AssertRCReturn(rc, rc); +# endif +# ifdef SELM_TRACK_SHADOW_LDT_CHANGES + rc = PGMR3HandlerVirtualTypeRegister(pVM, PGMVIRTHANDLERKIND_HYPERVISOR, false /*fRelocUserRC*/, + NULL /*pfnInvalidateR3*/, NULL /*pfnHandlerR3*/, + NULL /*pszHandlerRC*/, "selmRCShadowLDTWritePfHandler", + "Shadow LDT write access handler", &pVM->selm.s.hShadowLdtWriteHandlerType); + AssertRCReturn(rc, rc); +# endif + rc = PGMR3HandlerVirtualTypeRegister(pVM, PGMVIRTHANDLERKIND_WRITE, false /*fRelocUserRC*/, + NULL /*pfnInvalidateR3*/, selmGuestGDTWriteHandler, + "selmGuestGDTWriteHandler", "selmRCGuestGDTWritePfHandler", + "Guest GDT write access handler", &pVM->selm.s.hGuestGdtWriteHandlerType); + AssertRCReturn(rc, rc); + rc = PGMR3HandlerVirtualTypeRegister(pVM, PGMVIRTHANDLERKIND_WRITE, false /*fRelocUserRC*/, + NULL /*pfnInvalidateR3*/, selmGuestLDTWriteHandler, + "selmGuestLDTWriteHandler", "selmRCGuestLDTWritePfHandler", + "Guest LDT write access handler", &pVM->selm.s.hGuestLdtWriteHandlerType); + AssertRCReturn(rc, rc); + rc = PGMR3HandlerVirtualTypeRegister(pVM, PGMVIRTHANDLERKIND_WRITE, false /*fRelocUserRC*/, + NULL /*pfnInvalidateR3*/, selmGuestTSSWriteHandler, + "selmGuestTSSWriteHandler", "selmRCGuestTSSWritePfHandler", + "Guest TSS write access handler", &pVM->selm.s.hGuestTssWriteHandlerType); + AssertRCReturn(rc, rc); + } +#endif /* VBOX_WITH_RAW_MODE */ + + /* + * Register the saved state data unit. + */ + rc = SSMR3RegisterInternal(pVM, "selm", 1, SELM_SAVED_STATE_VERSION, sizeof(SELM), + NULL, NULL, NULL, + NULL, selmR3Save, NULL, + NULL, selmR3Load, selmR3LoadDone); + if (RT_FAILURE(rc)) + return rc; + + /* + * Statistics. + */ + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + STAM_REG(pVM, &pVM->selm.s.StatRCWriteGuestGDTHandled, STAMTYPE_COUNTER, "/SELM/GC/Write/Guest/GDTInt", STAMUNIT_OCCURENCES, "The number of handled writes to the Guest GDT."); + STAM_REG(pVM, &pVM->selm.s.StatRCWriteGuestGDTUnhandled, STAMTYPE_COUNTER, "/SELM/GC/Write/Guest/GDTEmu", STAMUNIT_OCCURENCES, "The number of unhandled writes to the Guest GDT."); + STAM_REG(pVM, &pVM->selm.s.StatRCWriteGuestLDT, STAMTYPE_COUNTER, "/SELM/GC/Write/Guest/LDT", STAMUNIT_OCCURENCES, "The number of writes to the Guest LDT was detected."); + STAM_REG(pVM, &pVM->selm.s.StatRCWriteGuestTSSHandled, STAMTYPE_COUNTER, "/SELM/GC/Write/Guest/TSSInt", STAMUNIT_OCCURENCES, "The number of handled writes to the Guest TSS."); + STAM_REG(pVM, &pVM->selm.s.StatRCWriteGuestTSSRedir, STAMTYPE_COUNTER, "/SELM/GC/Write/Guest/TSSRedir",STAMUNIT_OCCURENCES, "The number of handled redir bitmap writes to the Guest TSS."); + STAM_REG(pVM, &pVM->selm.s.StatRCWriteGuestTSSHandledChanged,STAMTYPE_COUNTER, "/SELM/GC/Write/Guest/TSSIntChg", STAMUNIT_OCCURENCES, "The number of handled writes to the Guest TSS where the R0 stack changed."); + STAM_REG(pVM, &pVM->selm.s.StatRCWriteGuestTSSUnhandled, STAMTYPE_COUNTER, "/SELM/GC/Write/Guest/TSSEmu", STAMUNIT_OCCURENCES, "The number of unhandled writes to the Guest TSS."); + STAM_REG(pVM, &pVM->selm.s.StatTSSSync, STAMTYPE_PROFILE, "/PROF/SELM/TSSSync", STAMUNIT_TICKS_PER_CALL, "Profiling of the SELMR3SyncTSS() body."); + STAM_REG(pVM, &pVM->selm.s.StatUpdateFromCPUM, STAMTYPE_PROFILE, "/PROF/SELM/UpdateFromCPUM", STAMUNIT_TICKS_PER_CALL, "Profiling of the SELMR3UpdateFromCPUM() body."); + + STAM_REL_REG(pVM, &pVM->selm.s.StatHyperSelsChanged, STAMTYPE_COUNTER, "/SELM/HyperSels/Changed", STAMUNIT_OCCURENCES, "The number of times we had to relocate our hypervisor selectors."); + STAM_REL_REG(pVM, &pVM->selm.s.StatScanForHyperSels, STAMTYPE_COUNTER, "/SELM/HyperSels/Scan", STAMUNIT_OCCURENCES, "The number of times we had find free hypervisor selectors."); + + STAM_REL_REG(pVM, &pVM->selm.s.aStatDetectedStaleSReg[X86_SREG_ES], STAMTYPE_COUNTER, "/SELM/UpdateFromCPUM/DetectedStaleES", STAMUNIT_OCCURENCES, "Stale ES was detected in UpdateFromCPUM."); + STAM_REL_REG(pVM, &pVM->selm.s.aStatDetectedStaleSReg[X86_SREG_CS], STAMTYPE_COUNTER, "/SELM/UpdateFromCPUM/DetectedStaleCS", STAMUNIT_OCCURENCES, "Stale CS was detected in UpdateFromCPUM."); + STAM_REL_REG(pVM, &pVM->selm.s.aStatDetectedStaleSReg[X86_SREG_SS], STAMTYPE_COUNTER, "/SELM/UpdateFromCPUM/DetectedStaleSS", STAMUNIT_OCCURENCES, "Stale SS was detected in UpdateFromCPUM."); + STAM_REL_REG(pVM, &pVM->selm.s.aStatDetectedStaleSReg[X86_SREG_DS], STAMTYPE_COUNTER, "/SELM/UpdateFromCPUM/DetectedStaleDS", STAMUNIT_OCCURENCES, "Stale DS was detected in UpdateFromCPUM."); + STAM_REL_REG(pVM, &pVM->selm.s.aStatDetectedStaleSReg[X86_SREG_FS], STAMTYPE_COUNTER, "/SELM/UpdateFromCPUM/DetectedStaleFS", STAMUNIT_OCCURENCES, "Stale FS was detected in UpdateFromCPUM."); + STAM_REL_REG(pVM, &pVM->selm.s.aStatDetectedStaleSReg[X86_SREG_GS], STAMTYPE_COUNTER, "/SELM/UpdateFromCPUM/DetectedStaleGS", STAMUNIT_OCCURENCES, "Stale GS was detected in UpdateFromCPUM."); + + STAM_REL_REG(pVM, &pVM->selm.s.aStatAlreadyStaleSReg[X86_SREG_ES], STAMTYPE_COUNTER, "/SELM/UpdateFromCPUM/AlreadyStaleES", STAMUNIT_OCCURENCES, "Already stale ES in UpdateFromCPUM."); + STAM_REL_REG(pVM, &pVM->selm.s.aStatAlreadyStaleSReg[X86_SREG_CS], STAMTYPE_COUNTER, "/SELM/UpdateFromCPUM/AlreadyStaleCS", STAMUNIT_OCCURENCES, "Already stale CS in UpdateFromCPUM."); + STAM_REL_REG(pVM, &pVM->selm.s.aStatAlreadyStaleSReg[X86_SREG_SS], STAMTYPE_COUNTER, "/SELM/UpdateFromCPUM/AlreadyStaleSS", STAMUNIT_OCCURENCES, "Already stale SS in UpdateFromCPUM."); + STAM_REL_REG(pVM, &pVM->selm.s.aStatAlreadyStaleSReg[X86_SREG_DS], STAMTYPE_COUNTER, "/SELM/UpdateFromCPUM/AlreadyStaleDS", STAMUNIT_OCCURENCES, "Already stale DS in UpdateFromCPUM."); + STAM_REL_REG(pVM, &pVM->selm.s.aStatAlreadyStaleSReg[X86_SREG_FS], STAMTYPE_COUNTER, "/SELM/UpdateFromCPUM/AlreadyStaleFS", STAMUNIT_OCCURENCES, "Already stale FS in UpdateFromCPUM."); + STAM_REL_REG(pVM, &pVM->selm.s.aStatAlreadyStaleSReg[X86_SREG_GS], STAMTYPE_COUNTER, "/SELM/UpdateFromCPUM/AlreadyStaleGS", STAMUNIT_OCCURENCES, "Already stale GS in UpdateFromCPUM."); + + STAM_REL_REG(pVM, &pVM->selm.s.StatStaleToUnstaleSReg, STAMTYPE_COUNTER, "/SELM/UpdateFromCPUM/StaleToUnstale", STAMUNIT_OCCURENCES, "Transitions from stale to unstale UpdateFromCPUM."); + + STAM_REG( pVM, &pVM->selm.s.aStatUpdatedSReg[X86_SREG_ES], STAMTYPE_COUNTER, "/SELM/UpdateFromCPUM/UpdatedES", STAMUNIT_OCCURENCES, "Updated hidden ES values in UpdateFromCPUM."); + STAM_REG( pVM, &pVM->selm.s.aStatUpdatedSReg[X86_SREG_CS], STAMTYPE_COUNTER, "/SELM/UpdateFromCPUM/UpdatedCS", STAMUNIT_OCCURENCES, "Updated hidden CS values in UpdateFromCPUM."); + STAM_REG( pVM, &pVM->selm.s.aStatUpdatedSReg[X86_SREG_SS], STAMTYPE_COUNTER, "/SELM/UpdateFromCPUM/UpdatedSS", STAMUNIT_OCCURENCES, "Updated hidden SS values in UpdateFromCPUM."); + STAM_REG( pVM, &pVM->selm.s.aStatUpdatedSReg[X86_SREG_DS], STAMTYPE_COUNTER, "/SELM/UpdateFromCPUM/UpdatedDS", STAMUNIT_OCCURENCES, "Updated hidden DS values in UpdateFromCPUM."); + STAM_REG( pVM, &pVM->selm.s.aStatUpdatedSReg[X86_SREG_FS], STAMTYPE_COUNTER, "/SELM/UpdateFromCPUM/UpdatedFS", STAMUNIT_OCCURENCES, "Updated hidden FS values in UpdateFromCPUM."); + STAM_REG( pVM, &pVM->selm.s.aStatUpdatedSReg[X86_SREG_GS], STAMTYPE_COUNTER, "/SELM/UpdateFromCPUM/UpdatedGS", STAMUNIT_OCCURENCES, "Updated hidden GS values in UpdateFromCPUM."); + } + + STAM_REG( pVM, &pVM->selm.s.StatLoadHidSelGst, STAMTYPE_COUNTER, "/SELM/LoadHidSel/LoadedGuest", STAMUNIT_OCCURENCES, "SELMLoadHiddenSelectorReg: Loaded from guest tables."); + STAM_REG( pVM, &pVM->selm.s.StatLoadHidSelShw, STAMTYPE_COUNTER, "/SELM/LoadHidSel/LoadedShadow", STAMUNIT_OCCURENCES, "SELMLoadHiddenSelectorReg: Loaded from shadow tables."); + STAM_REL_REG(pVM, &pVM->selm.s.StatLoadHidSelReadErrors, STAMTYPE_COUNTER, "/SELM/LoadHidSel/GstReadErrors", STAMUNIT_OCCURENCES, "SELMLoadHiddenSelectorReg: Guest table read errors."); + STAM_REL_REG(pVM, &pVM->selm.s.StatLoadHidSelGstNoGood, STAMTYPE_COUNTER, "/SELM/LoadHidSel/NoGoodGuest", STAMUNIT_OCCURENCES, "SELMLoadHiddenSelectorReg: No good guest table entry."); + +#ifdef VBOX_WITH_RAW_MODE + /* + * Default action when entering raw mode for the first time + */ + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + PVMCPU pVCpu = &pVM->aCpus[0]; /* raw mode implies on VCPU */ + VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_TSS); + VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_GDT); + VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_LDT); + } +#endif + + /* + * Register info handlers. + */ + if (VM_IS_RAW_MODE_ENABLED(pVM) || HMIsRawModeCtxNeeded(pVM)) + { + DBGFR3InfoRegisterInternal(pVM, "gdt", "Displays the shadow GDT. No arguments.", &selmR3InfoGdt); + DBGFR3InfoRegisterInternal(pVM, "ldt", "Displays the shadow LDT. No arguments.", &selmR3InfoLdt); + //DBGFR3InfoRegisterInternal(pVM, "tss", "Displays the shadow TSS. No arguments.", &selmR3InfoTss); + } + DBGFR3InfoRegisterInternalEx(pVM, "gdtguest", "Displays the guest GDT. No arguments.", &selmR3InfoGdtGuest, DBGFINFO_FLAGS_RUN_ON_EMT); + DBGFR3InfoRegisterInternalEx(pVM, "ldtguest", "Displays the guest LDT. No arguments.", &selmR3InfoLdtGuest, DBGFINFO_FLAGS_RUN_ON_EMT); + //DBGFR3InfoRegisterInternal(pVM, "tssguest", "Displays the guest TSS. No arguments.", &selmR3InfoTssGuest, DBGFINFO_FLAGS_RUN_ON_EMT); + + return rc; +} + + +/** + * Finalizes HMA page attributes. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(int) SELMR3InitFinalize(PVM pVM) +{ +#ifdef VBOX_WITH_RAW_MODE + /** @cfgm{/DoubleFault,bool,false} + * Enables catching of double faults in the raw-mode context VMM code. This can + * be used when the triple faults or hangs occur and one suspect an unhandled + * double fault. This is not enabled by default because it means making the + * hyper selectors writeable for all supervisor code, including the guest's. + * The double fault is a task switch and thus requires write access to the GDT + * of the TSS (to set it busy), to the old TSS (to store state), and to the Trap + * 8 TSS for the back link. + */ + bool f; +# if defined(DEBUG_bird) + int rc = CFGMR3QueryBoolDef(CFGMR3GetRoot(pVM), "DoubleFault", &f, true); +# else + int rc = CFGMR3QueryBoolDef(CFGMR3GetRoot(pVM), "DoubleFault", &f, false); +# endif + AssertLogRelRCReturn(rc, rc); + if (f && (VM_IS_RAW_MODE_ENABLED(pVM) || HMIsRawModeCtxNeeded(pVM))) + { + PX86DESC paGdt = pVM->selm.s.paGdtR3; + rc = PGMMapSetPage(pVM, MMHyperR3ToRC(pVM, &paGdt[pVM->selm.s.aHyperSel[SELM_HYPER_SEL_TSS_TRAP08] >> 3]), sizeof(paGdt[0]), + X86_PTE_RW | X86_PTE_P | X86_PTE_A | X86_PTE_D); + AssertRC(rc); + rc = PGMMapSetPage(pVM, MMHyperR3ToRC(pVM, &paGdt[pVM->selm.s.aHyperSel[SELM_HYPER_SEL_TSS] >> 3]), sizeof(paGdt[0]), + X86_PTE_RW | X86_PTE_P | X86_PTE_A | X86_PTE_D); + AssertRC(rc); + rc = PGMMapSetPage(pVM, VM_RC_ADDR(pVM, &pVM->selm.s.aHyperSel[SELM_HYPER_SEL_TSS]), sizeof(pVM->selm.s.aHyperSel[SELM_HYPER_SEL_TSS]), + X86_PTE_RW | X86_PTE_P | X86_PTE_A | X86_PTE_D); + AssertRC(rc); + rc = PGMMapSetPage(pVM, VM_RC_ADDR(pVM, &pVM->selm.s.aHyperSel[SELM_HYPER_SEL_TSS_TRAP08]), sizeof(pVM->selm.s.aHyperSel[SELM_HYPER_SEL_TSS_TRAP08]), + X86_PTE_RW | X86_PTE_P | X86_PTE_A | X86_PTE_D); + AssertRC(rc); + } +#else /* !VBOX_WITH_RAW_MODE */ + RT_NOREF(pVM); +#endif /* !VBOX_WITH_RAW_MODE */ + return VINF_SUCCESS; +} + + +/** + * Setup the hypervisor GDT selectors in our shadow table + * + * @param pVM The cross context VM structure. + */ +static void selmR3SetupHyperGDTSelectors(PVM pVM) +{ + PX86DESC paGdt = pVM->selm.s.paGdtR3; + + /* + * Set up global code and data descriptors for use in the guest context. + * Both are wide open (base 0, limit 4GB) + */ + PX86DESC pDesc = &paGdt[pVM->selm.s.aHyperSel[SELM_HYPER_SEL_CS] >> 3]; + pDesc->Gen.u16LimitLow = 0xffff; + pDesc->Gen.u4LimitHigh = 0xf; + pDesc->Gen.u16BaseLow = 0; + pDesc->Gen.u8BaseHigh1 = 0; + pDesc->Gen.u8BaseHigh2 = 0; + pDesc->Gen.u4Type = X86_SEL_TYPE_ER_ACC; + pDesc->Gen.u1DescType = 1; /* not system, but code/data */ + pDesc->Gen.u2Dpl = 0; /* supervisor */ + pDesc->Gen.u1Present = 1; + pDesc->Gen.u1Available = 0; + pDesc->Gen.u1Long = 0; + pDesc->Gen.u1DefBig = 1; /* def 32 bit */ + pDesc->Gen.u1Granularity = 1; /* 4KB limit */ + + /* data */ + pDesc = &paGdt[pVM->selm.s.aHyperSel[SELM_HYPER_SEL_DS] >> 3]; + pDesc->Gen.u16LimitLow = 0xffff; + pDesc->Gen.u4LimitHigh = 0xf; + pDesc->Gen.u16BaseLow = 0; + pDesc->Gen.u8BaseHigh1 = 0; + pDesc->Gen.u8BaseHigh2 = 0; + pDesc->Gen.u4Type = X86_SEL_TYPE_RW_ACC; + pDesc->Gen.u1DescType = 1; /* not system, but code/data */ + pDesc->Gen.u2Dpl = 0; /* supervisor */ + pDesc->Gen.u1Present = 1; + pDesc->Gen.u1Available = 0; + pDesc->Gen.u1Long = 0; + pDesc->Gen.u1DefBig = 1; /* big */ + pDesc->Gen.u1Granularity = 1; /* 4KB limit */ + + /* 64-bit mode code (& data?) */ + pDesc = &paGdt[pVM->selm.s.aHyperSel[SELM_HYPER_SEL_CS64] >> 3]; + pDesc->Gen.u16LimitLow = 0xffff; + pDesc->Gen.u4LimitHigh = 0xf; + pDesc->Gen.u16BaseLow = 0; + pDesc->Gen.u8BaseHigh1 = 0; + pDesc->Gen.u8BaseHigh2 = 0; + pDesc->Gen.u4Type = X86_SEL_TYPE_ER_ACC; + pDesc->Gen.u1DescType = 1; /* not system, but code/data */ + pDesc->Gen.u2Dpl = 0; /* supervisor */ + pDesc->Gen.u1Present = 1; + pDesc->Gen.u1Available = 0; + pDesc->Gen.u1Long = 1; /* The Long (L) attribute bit. */ + pDesc->Gen.u1DefBig = 0; /* With L=1 this must be 0. */ + pDesc->Gen.u1Granularity = 1; /* 4KB limit */ + + /* + * TSS descriptor + */ + pDesc = &paGdt[pVM->selm.s.aHyperSel[SELM_HYPER_SEL_TSS] >> 3]; + RTRCPTR RCPtrTSS = VM_RC_ADDR(pVM, &pVM->selm.s.Tss); + pDesc->Gen.u16BaseLow = RT_LOWORD(RCPtrTSS); + pDesc->Gen.u8BaseHigh1 = RT_BYTE3(RCPtrTSS); + pDesc->Gen.u8BaseHigh2 = RT_BYTE4(RCPtrTSS); + pDesc->Gen.u16LimitLow = sizeof(VBOXTSS) - 1; + pDesc->Gen.u4LimitHigh = 0; + pDesc->Gen.u4Type = X86_SEL_TYPE_SYS_386_TSS_AVAIL; + pDesc->Gen.u1DescType = 0; /* system */ + pDesc->Gen.u2Dpl = 0; /* supervisor */ + pDesc->Gen.u1Present = 1; + pDesc->Gen.u1Available = 0; + pDesc->Gen.u1Long = 0; + pDesc->Gen.u1DefBig = 0; + pDesc->Gen.u1Granularity = 0; /* byte limit */ + + /* + * TSS descriptor for trap 08 + */ + pDesc = &paGdt[pVM->selm.s.aHyperSel[SELM_HYPER_SEL_TSS_TRAP08] >> 3]; + pDesc->Gen.u16LimitLow = sizeof(VBOXTSS) - 1; + pDesc->Gen.u4LimitHigh = 0; + RCPtrTSS = VM_RC_ADDR(pVM, &pVM->selm.s.TssTrap08); + pDesc->Gen.u16BaseLow = RT_LOWORD(RCPtrTSS); + pDesc->Gen.u8BaseHigh1 = RT_BYTE3(RCPtrTSS); + pDesc->Gen.u8BaseHigh2 = RT_BYTE4(RCPtrTSS); + pDesc->Gen.u4Type = X86_SEL_TYPE_SYS_386_TSS_AVAIL; + pDesc->Gen.u1DescType = 0; /* system */ + pDesc->Gen.u2Dpl = 0; /* supervisor */ + pDesc->Gen.u1Present = 1; + pDesc->Gen.u1Available = 0; + pDesc->Gen.u1Long = 0; + pDesc->Gen.u1DefBig = 0; + pDesc->Gen.u1Granularity = 0; /* byte limit */ +} + +/** + * Applies relocations to data and code managed by this + * component. This function will be called at init and + * whenever the VMM need to relocate it self inside the GC. + * + * @param pVM The cross context VM structure. + */ +VMMR3DECL(void) SELMR3Relocate(PVM pVM) +{ + PX86DESC paGdt = pVM->selm.s.paGdtR3; + LogFlow(("SELMR3Relocate\n")); + + if (VM_IS_RAW_MODE_ENABLED(pVM) || HMIsRawModeCtxNeeded(pVM)) + { + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + + /* + * Update GDTR and selector. + */ + CPUMSetHyperGDTR(pVCpu, MMHyperR3ToRC(pVM, paGdt), SELM_GDT_ELEMENTS * sizeof(paGdt[0]) - 1); + + /** @todo selector relocations should be a separate operation? */ + CPUMSetHyperCS(pVCpu, pVM->selm.s.aHyperSel[SELM_HYPER_SEL_CS]); + CPUMSetHyperDS(pVCpu, pVM->selm.s.aHyperSel[SELM_HYPER_SEL_DS]); + CPUMSetHyperES(pVCpu, pVM->selm.s.aHyperSel[SELM_HYPER_SEL_DS]); + CPUMSetHyperSS(pVCpu, pVM->selm.s.aHyperSel[SELM_HYPER_SEL_DS]); + CPUMSetHyperTR(pVCpu, pVM->selm.s.aHyperSel[SELM_HYPER_SEL_TSS]); + } + + selmR3SetupHyperGDTSelectors(pVM); + +/** @todo SELM must be called when any of the CR3s changes during a cpu mode change. */ +/** @todo PGM knows the proper CR3 values these days, not CPUM. */ + /* + * Update the TSSes. + */ + /* Only applies to raw mode which supports only 1 VCPU */ + PVMCPU pVCpu = &pVM->aCpus[0]; + + /* Current TSS */ + pVM->selm.s.Tss.cr3 = PGMGetHyperCR3(pVCpu); + pVM->selm.s.Tss.ss0 = pVM->selm.s.aHyperSel[SELM_HYPER_SEL_DS]; + pVM->selm.s.Tss.esp0 = VMMGetStackRC(pVCpu); + pVM->selm.s.Tss.cs = pVM->selm.s.aHyperSel[SELM_HYPER_SEL_CS]; + pVM->selm.s.Tss.ds = pVM->selm.s.aHyperSel[SELM_HYPER_SEL_DS]; + pVM->selm.s.Tss.es = pVM->selm.s.aHyperSel[SELM_HYPER_SEL_DS]; + pVM->selm.s.Tss.offIoBitmap = sizeof(VBOXTSS); + + /* trap 08 */ + pVM->selm.s.TssTrap08.cr3 = PGMGetInterRCCR3(pVM, pVCpu); /* this should give use better survival chances. */ + pVM->selm.s.TssTrap08.ss0 = pVM->selm.s.aHyperSel[SELM_HYPER_SEL_DS]; + pVM->selm.s.TssTrap08.ss = pVM->selm.s.aHyperSel[SELM_HYPER_SEL_DS]; + pVM->selm.s.TssTrap08.esp0 = VMMGetStackRC(pVCpu) - PAGE_SIZE / 2; /* upper half can be analysed this way. */ + pVM->selm.s.TssTrap08.esp = pVM->selm.s.TssTrap08.esp0; + pVM->selm.s.TssTrap08.ebp = pVM->selm.s.TssTrap08.esp0; + pVM->selm.s.TssTrap08.cs = pVM->selm.s.aHyperSel[SELM_HYPER_SEL_CS]; + pVM->selm.s.TssTrap08.ds = pVM->selm.s.aHyperSel[SELM_HYPER_SEL_DS]; + pVM->selm.s.TssTrap08.es = pVM->selm.s.aHyperSel[SELM_HYPER_SEL_DS]; + pVM->selm.s.TssTrap08.fs = 0; + pVM->selm.s.TssTrap08.gs = 0; + pVM->selm.s.TssTrap08.selLdt = 0; + pVM->selm.s.TssTrap08.eflags = 0x2; /* all cleared */ + pVM->selm.s.TssTrap08.ecx = VM_RC_ADDR(pVM, &pVM->selm.s.Tss); /* setup ecx to normal Hypervisor TSS address. */ + pVM->selm.s.TssTrap08.edi = pVM->selm.s.TssTrap08.ecx; + pVM->selm.s.TssTrap08.eax = pVM->selm.s.TssTrap08.ecx; + pVM->selm.s.TssTrap08.edx = VM_RC_ADDR(pVM, pVM); /* setup edx VM address. */ + pVM->selm.s.TssTrap08.edi = pVM->selm.s.TssTrap08.edx; + pVM->selm.s.TssTrap08.ebx = pVM->selm.s.TssTrap08.edx; + pVM->selm.s.TssTrap08.offIoBitmap = sizeof(VBOXTSS); + /* TRPM will be updating the eip */ + } + + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + /* + * Update shadow GDT/LDT/TSS write access handlers. + */ + PVMCPU pVCpu = VMMGetCpu(pVM); NOREF(pVCpu); + int rc; NOREF(rc); +#ifdef SELM_TRACK_SHADOW_GDT_CHANGES + if (pVM->selm.s.paGdtRC != NIL_RTRCPTR) + { + rc = PGMHandlerVirtualDeregister(pVM, pVCpu, pVM->selm.s.paGdtRC, true /*fHypervisor*/); + AssertRC(rc); + } + pVM->selm.s.paGdtRC = MMHyperR3ToRC(pVM, paGdt); + rc = PGMR3HandlerVirtualRegister(pVM, pVCpu, pVM->selm.s.hShadowGdtWriteHandlerType, + pVM->selm.s.paGdtRC, + pVM->selm.s.paGdtRC + SELM_GDT_ELEMENTS * sizeof(paGdt[0]) - 1, + NULL /*pvUserR3*/, NIL_RTR0PTR /*pvUserRC*/, NULL /*pszDesc*/); + AssertRC(rc); +#endif +#ifdef SELM_TRACK_SHADOW_TSS_CHANGES + if (pVM->selm.s.pvMonShwTssRC != RTRCPTR_MAX) + { + rc = PGMHandlerVirtualDeregister(pVM, pVCpu, pVM->selm.s.pvMonShwTssRC, true /*fHypervisor*/); + AssertRC(rc); + } + pVM->selm.s.pvMonShwTssRC = VM_RC_ADDR(pVM, &pVM->selm.s.Tss); + rc = PGMR3HandlerVirtualRegister(pVM, pVCpu, pVM->selm.s.hShadowTssWriteHandlerType, + pVM->selm.s.pvMonShwTssRC, + pVM->selm.s.pvMonShwTssRC + sizeof(pVM->selm.s.Tss) - 1, + NULL /*pvUserR3*/, NIL_RTR0PTR /*pvUserRC*/, NULL /*pszDesc*/); + AssertRC(rc); +#endif + + /* + * Update the GC LDT region handler and address. + */ +#ifdef SELM_TRACK_SHADOW_LDT_CHANGES + if (pVM->selm.s.pvLdtRC != RTRCPTR_MAX) + { + rc = PGMHandlerVirtualDeregister(pVM, pVCpu, pVM->selm.s.pvLdtRC, true /*fHypervisor*/); + AssertRC(rc); + } +#endif + pVM->selm.s.pvLdtRC = MMHyperR3ToRC(pVM, pVM->selm.s.pvLdtR3); +#ifdef SELM_TRACK_SHADOW_LDT_CHANGES + rc = PGMR3HandlerVirtualRegister(pVM, pVCpu, pVM->selm.s.hShadowLdtWriteHandlerType, + pVM->selm.s.pvLdtRC, + pVM->selm.s.pvLdtRC + _64K + PAGE_SIZE - 1, + NULL /*pvUserR3*/, NIL_RTR0PTR /*pvUserRC*/, NULL /*pszDesc*/); + AssertRC(rc); +#endif + } +} + + +/** + * Terminates the SELM. + * + * Termination means cleaning up and freeing all resources, + * the VM it self is at this point powered off or suspended. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(int) SELMR3Term(PVM pVM) +{ + NOREF(pVM); + return VINF_SUCCESS; +} + + +/** + * The VM is being reset. + * + * For the SELM component this means that any GDT/LDT/TSS monitors + * needs to be removed. + * + * @param pVM The cross context VM structure. + */ +VMMR3DECL(void) SELMR3Reset(PVM pVM) +{ + LogFlow(("SELMR3Reset:\n")); + VM_ASSERT_EMT(pVM); + + /* + * Uninstall guest GDT/LDT/TSS write access handlers. + */ + PVMCPU pVCpu = VMMGetCpu(pVM); NOREF(pVCpu); + if (pVM->selm.s.GuestGdtr.pGdt != RTRCPTR_MAX && pVM->selm.s.fGDTRangeRegistered) + { +#ifdef SELM_TRACK_GUEST_GDT_CHANGES + int rc = PGMHandlerVirtualDeregister(pVM, pVCpu, pVM->selm.s.GuestGdtr.pGdt, false /*fHypervisor*/); + AssertRC(rc); +#endif + pVM->selm.s.GuestGdtr.pGdt = RTRCPTR_MAX; + pVM->selm.s.GuestGdtr.cbGdt = 0; + } + pVM->selm.s.fGDTRangeRegistered = false; + if (pVM->selm.s.GCPtrGuestLdt != RTRCPTR_MAX) + { +#ifdef SELM_TRACK_GUEST_LDT_CHANGES + int rc = PGMHandlerVirtualDeregister(pVM, pVCpu, pVM->selm.s.GCPtrGuestLdt, false /*fHypervisor*/); + AssertRC(rc); +#endif + pVM->selm.s.GCPtrGuestLdt = RTRCPTR_MAX; + } + if (pVM->selm.s.GCPtrGuestTss != RTRCPTR_MAX) + { +#ifdef SELM_TRACK_GUEST_TSS_CHANGES + int rc = PGMHandlerVirtualDeregister(pVM, pVCpu, pVM->selm.s.GCPtrGuestTss, false /*fHypervisor*/); + AssertRC(rc); +#endif + pVM->selm.s.GCPtrGuestTss = RTRCPTR_MAX; + pVM->selm.s.GCSelTss = RTSEL_MAX; + } + + /* + * Re-initialize other members. + */ + pVM->selm.s.cbLdtLimit = 0; + pVM->selm.s.offLdtHyper = 0; + pVM->selm.s.cbMonitoredGuestTss = 0; + + pVM->selm.s.fSyncTSSRing0Stack = false; + +#ifdef VBOX_WITH_RAW_MODE + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + /* + * Default action when entering raw mode for the first time + */ + VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_TSS); + VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_GDT); + VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_LDT); + } +#endif +} + + +/** + * Execute state save operation. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM SSM operation handle. + */ +static DECLCALLBACK(int) selmR3Save(PVM pVM, PSSMHANDLE pSSM) +{ + LogFlow(("selmR3Save:\n")); + + /* + * Save the basic bits - fortunately all the other things can be resynced on load. + */ + PSELM pSelm = &pVM->selm.s; + + SSMR3PutBool(pSSM, !VM_IS_RAW_MODE_ENABLED(pVM)); + SSMR3PutBool(pSSM, pSelm->fSyncTSSRing0Stack); + SSMR3PutSel(pSSM, pSelm->aHyperSel[SELM_HYPER_SEL_CS]); + SSMR3PutSel(pSSM, pSelm->aHyperSel[SELM_HYPER_SEL_DS]); + SSMR3PutSel(pSSM, pSelm->aHyperSel[SELM_HYPER_SEL_CS64]); + SSMR3PutSel(pSSM, pSelm->aHyperSel[SELM_HYPER_SEL_CS64]); /* reserved for DS64. */ + SSMR3PutSel(pSSM, pSelm->aHyperSel[SELM_HYPER_SEL_TSS]); + return SSMR3PutSel(pSSM, pSelm->aHyperSel[SELM_HYPER_SEL_TSS_TRAP08]); +} + + +/** + * Execute state load operation. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM SSM operation handle. + * @param uVersion Data layout version. + * @param uPass The data pass. + */ +static DECLCALLBACK(int) selmR3Load(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + LogFlow(("selmR3Load:\n")); + Assert(uPass == SSM_PASS_FINAL); NOREF(uPass); + + /* + * Validate version. + */ + if (uVersion != SELM_SAVED_STATE_VERSION) + { + AssertMsgFailed(("selmR3Load: Invalid version uVersion=%d!\n", uVersion)); + return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION; + } + + /* + * Do a reset. + */ + SELMR3Reset(pVM); + + /* Get the monitoring flag. */ + bool fIgnored; + SSMR3GetBool(pSSM, &fIgnored); + + /* Get the TSS state flag. */ + SSMR3GetBool(pSSM, &pVM->selm.s.fSyncTSSRing0Stack); + + /* + * Get the selectors. + */ + RTSEL SelCS; + SSMR3GetSel(pSSM, &SelCS); + RTSEL SelDS; + SSMR3GetSel(pSSM, &SelDS); + RTSEL SelCS64; + SSMR3GetSel(pSSM, &SelCS64); + RTSEL SelDS64; + SSMR3GetSel(pSSM, &SelDS64); + RTSEL SelTSS; + SSMR3GetSel(pSSM, &SelTSS); + RTSEL SelTSSTrap08; + SSMR3GetSel(pSSM, &SelTSSTrap08); + + /* Copy the selectors; they will be checked during relocation. */ + PSELM pSelm = &pVM->selm.s; + pSelm->aHyperSel[SELM_HYPER_SEL_CS] = SelCS; + pSelm->aHyperSel[SELM_HYPER_SEL_DS] = SelDS; + pSelm->aHyperSel[SELM_HYPER_SEL_CS64] = SelCS64; + pSelm->aHyperSel[SELM_HYPER_SEL_TSS] = SelTSS; + pSelm->aHyperSel[SELM_HYPER_SEL_TSS_TRAP08] = SelTSSTrap08; + + return VINF_SUCCESS; +} + + +/** + * Sync the GDT, LDT and TSS after loading the state. + * + * Just to play save, we set the FFs to force syncing before + * executing GC code. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM SSM operation handle. + */ +static DECLCALLBACK(int) selmR3LoadDone(PVM pVM, PSSMHANDLE pSSM) +{ +#ifdef VBOX_WITH_RAW_MODE + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + PVMCPU pVCpu = VMMGetCpu(pVM); + + LogFlow(("selmR3LoadDone:\n")); + + /* + * Don't do anything if it's a load failure. + */ + int rc = SSMR3HandleGetStatus(pSSM); + if (RT_FAILURE(rc)) + return VINF_SUCCESS; + + /* + * Do the syncing if we're in protected mode and using raw-mode. + */ + if (PGMGetGuestMode(pVCpu) != PGMMODE_REAL) + { + VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_GDT); + VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_LDT); + VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_TSS); + SELMR3UpdateFromCPUM(pVM, pVCpu); + } + + /* + * Flag everything for resync on next raw mode entry. + */ + VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_GDT); + VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_LDT); + VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_TSS); + } + +#else /* !VBOX_WITH_RAW_MODE */ + RT_NOREF(pVM, pSSM); +#endif /* !VBOX_WITH_RAW_MODE */ + return VINF_SUCCESS; +} + +#ifdef VBOX_WITH_RAW_MODE + +/** + * Updates (syncs) the shadow GDT. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + */ +static int selmR3UpdateShadowGdt(PVM pVM, PVMCPU pVCpu) +{ + LogFlow(("selmR3UpdateShadowGdt\n")); + Assert(VM_IS_RAW_MODE_ENABLED(pVM)); + + /* + * Always assume the best... + */ + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_SELM_SYNC_GDT); + + /* If the GDT was changed, then make sure the LDT is checked too */ + /** @todo only do this if the actual ldtr selector was changed; this is a bit excessive */ + VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_LDT); + /* Same goes for the TSS selector */ + VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_TSS); + + /* + * Get the GDTR and check if there is anything to do (there usually is). + */ + VBOXGDTR GDTR; + CPUMGetGuestGDTR(pVCpu, &GDTR); + if (GDTR.cbGdt < sizeof(X86DESC)) + { + Log(("No GDT entries...\n")); + return VINF_SUCCESS; + } + + /* + * Read the Guest GDT. + * ASSUMES that the entire GDT is in memory. + */ + RTUINT cbEffLimit = GDTR.cbGdt; + PX86DESC pGDTE = &pVM->selm.s.paGdtR3[1]; + int rc = PGMPhysSimpleReadGCPtr(pVCpu, pGDTE, GDTR.pGdt + sizeof(X86DESC), cbEffLimit + 1 - sizeof(X86DESC)); + if (RT_FAILURE(rc)) + { + /* + * Read it page by page. + * + * Keep track of the last valid page and delay memsets and + * adjust cbEffLimit to reflect the effective size. The latter + * is something we do in the belief that the guest will probably + * never actually commit the last page, thus allowing us to keep + * our selectors in the high end of the GDT. + */ + RTUINT cbLeft = cbEffLimit + 1 - sizeof(X86DESC); + RTGCPTR GCPtrSrc = (RTGCPTR)GDTR.pGdt + sizeof(X86DESC); + uint8_t *pu8Dst = (uint8_t *)&pVM->selm.s.paGdtR3[1]; + uint8_t *pu8DstInvalid = pu8Dst; + + while (cbLeft) + { + RTUINT cb = PAGE_SIZE - (GCPtrSrc & PAGE_OFFSET_MASK); + cb = RT_MIN(cb, cbLeft); + rc = PGMPhysSimpleReadGCPtr(pVCpu, pu8Dst, GCPtrSrc, cb); + if (RT_SUCCESS(rc)) + { + if (pu8DstInvalid != pu8Dst) + RT_BZERO(pu8DstInvalid, pu8Dst - pu8DstInvalid); + GCPtrSrc += cb; + pu8Dst += cb; + pu8DstInvalid = pu8Dst; + } + else if ( rc == VERR_PAGE_NOT_PRESENT + || rc == VERR_PAGE_TABLE_NOT_PRESENT) + { + GCPtrSrc += cb; + pu8Dst += cb; + } + else + { + AssertLogRelMsgFailed(("Couldn't read GDT at %016RX64, rc=%Rrc!\n", GDTR.pGdt, rc)); + return VERR_SELM_GDT_READ_ERROR; + } + cbLeft -= cb; + } + + /* any invalid pages at the end? */ + if (pu8DstInvalid != pu8Dst) + { + cbEffLimit = pu8DstInvalid - (uint8_t *)pVM->selm.s.paGdtR3 - 1; + /* If any GDTEs was invalidated, zero them. */ + if (cbEffLimit < pVM->selm.s.cbEffGuestGdtLimit) + RT_BZERO(pu8DstInvalid + cbEffLimit + 1, pVM->selm.s.cbEffGuestGdtLimit - cbEffLimit); + } + + /* keep track of the effective limit. */ + if (cbEffLimit != pVM->selm.s.cbEffGuestGdtLimit) + { + Log(("SELMR3UpdateFromCPUM: cbEffGuestGdtLimit=%#x -> %#x (actual %#x)\n", + pVM->selm.s.cbEffGuestGdtLimit, cbEffLimit, GDTR.cbGdt)); + pVM->selm.s.cbEffGuestGdtLimit = cbEffLimit; + } + } + + /* + * Check if the Guest GDT intrudes on our GDT entries. + */ + /** @todo we should try to minimize relocations by making sure our current selectors can be reused. */ + RTSEL aHyperSel[SELM_HYPER_SEL_MAX]; + if (cbEffLimit >= SELM_HYPER_DEFAULT_BASE) + { + PX86DESC pGDTEStart = pVM->selm.s.paGdtR3; + PX86DESC pGDTECur = (PX86DESC)((char *)pGDTEStart + GDTR.cbGdt + 1 - sizeof(X86DESC)); + int iGDT = 0; + + Log(("Internal SELM GDT conflict: use non-present entries\n")); + STAM_REL_COUNTER_INC(&pVM->selm.s.StatScanForHyperSels); + while ((uintptr_t)pGDTECur > (uintptr_t)pGDTEStart) + { + /* We can reuse non-present entries */ + if (!pGDTECur->Gen.u1Present) + { + aHyperSel[iGDT] = ((uintptr_t)pGDTECur - (uintptr_t)pVM->selm.s.paGdtR3) / sizeof(X86DESC); + aHyperSel[iGDT] = aHyperSel[iGDT] << X86_SEL_SHIFT; + Log(("SELM: Found unused GDT %04X\n", aHyperSel[iGDT])); + iGDT++; + if (iGDT >= SELM_HYPER_SEL_MAX) + break; + } + + pGDTECur--; + } + if (iGDT != SELM_HYPER_SEL_MAX) + { + AssertLogRelMsgFailed(("Internal SELM GDT conflict.\n")); + return VERR_SELM_GDT_TOO_FULL; + } + } + else + { + aHyperSel[SELM_HYPER_SEL_CS] = SELM_HYPER_DEFAULT_SEL_CS; + aHyperSel[SELM_HYPER_SEL_DS] = SELM_HYPER_DEFAULT_SEL_DS; + aHyperSel[SELM_HYPER_SEL_CS64] = SELM_HYPER_DEFAULT_SEL_CS64; + aHyperSel[SELM_HYPER_SEL_TSS] = SELM_HYPER_DEFAULT_SEL_TSS; + aHyperSel[SELM_HYPER_SEL_TSS_TRAP08] = SELM_HYPER_DEFAULT_SEL_TSS_TRAP08; + } + +# ifdef VBOX_WITH_SAFE_STR + /* Use the guest's TR selector to plug the str virtualization hole. */ + if (CPUMGetGuestTR(pVCpu, NULL) != 0) + { + Log(("SELM: Use guest TSS selector %x\n", CPUMGetGuestTR(pVCpu, NULL))); + aHyperSel[SELM_HYPER_SEL_TSS] = CPUMGetGuestTR(pVCpu, NULL); + } +# endif + + /* + * Work thru the copied GDT entries adjusting them for correct virtualization. + */ + PX86DESC pGDTEEnd = (PX86DESC)((char *)pGDTE + cbEffLimit + 1 - sizeof(X86DESC)); + while (pGDTE < pGDTEEnd) + { + if (pGDTE->Gen.u1Present) + selmGuestToShadowDesc(pVM, pGDTE); + + /* Next GDT entry. */ + pGDTE++; + } + + /* + * Check if our hypervisor selectors were changed. + */ + if ( aHyperSel[SELM_HYPER_SEL_CS] != pVM->selm.s.aHyperSel[SELM_HYPER_SEL_CS] + || aHyperSel[SELM_HYPER_SEL_DS] != pVM->selm.s.aHyperSel[SELM_HYPER_SEL_DS] + || aHyperSel[SELM_HYPER_SEL_CS64] != pVM->selm.s.aHyperSel[SELM_HYPER_SEL_CS64] + || aHyperSel[SELM_HYPER_SEL_TSS] != pVM->selm.s.aHyperSel[SELM_HYPER_SEL_TSS] + || aHyperSel[SELM_HYPER_SEL_TSS_TRAP08] != pVM->selm.s.aHyperSel[SELM_HYPER_SEL_TSS_TRAP08]) + { + /* Reinitialize our hypervisor GDTs */ + pVM->selm.s.aHyperSel[SELM_HYPER_SEL_CS] = aHyperSel[SELM_HYPER_SEL_CS]; + pVM->selm.s.aHyperSel[SELM_HYPER_SEL_DS] = aHyperSel[SELM_HYPER_SEL_DS]; + pVM->selm.s.aHyperSel[SELM_HYPER_SEL_CS64] = aHyperSel[SELM_HYPER_SEL_CS64]; + pVM->selm.s.aHyperSel[SELM_HYPER_SEL_TSS] = aHyperSel[SELM_HYPER_SEL_TSS]; + pVM->selm.s.aHyperSel[SELM_HYPER_SEL_TSS_TRAP08] = aHyperSel[SELM_HYPER_SEL_TSS_TRAP08]; + + STAM_REL_COUNTER_INC(&pVM->selm.s.StatHyperSelsChanged); + + /* + * Do the relocation callbacks to let everyone update their hyper selector dependencies. + * (SELMR3Relocate will call selmR3SetupHyperGDTSelectors() for us.) + */ + VMR3Relocate(pVM, 0); + } +# ifdef VBOX_WITH_SAFE_STR + else if ( cbEffLimit >= SELM_HYPER_DEFAULT_BASE + || CPUMGetGuestTR(pVCpu, NULL) != 0) /* Our shadow TR entry was overwritten when we synced the guest's GDT. */ +# else + else if (cbEffLimit >= SELM_HYPER_DEFAULT_BASE) +# endif + /* We overwrote all entries above, so we have to save them again. */ + selmR3SetupHyperGDTSelectors(pVM); + + /* + * Adjust the cached GDT limit. + * Any GDT entries which have been removed must be cleared. + */ + if (pVM->selm.s.GuestGdtr.cbGdt != GDTR.cbGdt) + { + if (pVM->selm.s.GuestGdtr.cbGdt > GDTR.cbGdt) + RT_BZERO(pGDTE, pVM->selm.s.GuestGdtr.cbGdt - GDTR.cbGdt); + } + + /* + * Check if Guest's GDTR is changed. + */ + if ( GDTR.pGdt != pVM->selm.s.GuestGdtr.pGdt + || GDTR.cbGdt != pVM->selm.s.GuestGdtr.cbGdt) + { + Log(("SELMR3UpdateFromCPUM: Guest's GDT is changed to pGdt=%016RX64 cbGdt=%08X\n", GDTR.pGdt, GDTR.cbGdt)); + +# ifdef SELM_TRACK_GUEST_GDT_CHANGES + /* + * [Re]Register write virtual handler for guest's GDT. + */ + if (pVM->selm.s.GuestGdtr.pGdt != RTRCPTR_MAX && pVM->selm.s.fGDTRangeRegistered) + { + rc = PGMHandlerVirtualDeregister(pVM, pVCpu, pVM->selm.s.GuestGdtr.pGdt, false /*fHypervisor*/); + AssertRC(rc); + } + rc = PGMR3HandlerVirtualRegister(pVM, pVCpu, pVM->selm.s.hGuestGdtWriteHandlerType, + GDTR.pGdt, GDTR.pGdt + GDTR.cbGdt /* already inclusive */, + NULL /*pvUserR3*/, NIL_RTR0PTR /*pvUserRC*/, NULL /*pszDesc*/); +# ifdef VBOX_WITH_RAW_RING1 + /** @todo !HACK ALERT! + * Some guest OSes (QNX) share code and the GDT on the same page; + * PGMR3HandlerVirtualRegister doesn't support more than one handler, + * so we kick out the PATM handler as this one is more important. Fix this + * properly in PGMR3HandlerVirtualRegister? + */ + if (rc == VERR_PGM_HANDLER_VIRTUAL_CONFLICT) + { + LogRel(("selmR3UpdateShadowGdt: Virtual handler conflict %RGv -> kick out PATM handler for the higher priority GDT page monitor\n", GDTR.pGdt)); + rc = PGMHandlerVirtualDeregister(pVM, pVCpu, GDTR.pGdt & PAGE_BASE_GC_MASK, false /*fHypervisor*/); + AssertRC(rc); + rc = PGMR3HandlerVirtualRegister(pVM, pVCpu, pVM->selm.s.hGuestGdtWriteHandlerType, + GDTR.pGdt, GDTR.pGdt + GDTR.cbGdt /* already inclusive */, + NULL /*pvUserR3*/, NIL_RTR0PTR /*pvUserRC*/, NULL /*pszDesc*/); + } +# endif + if (RT_FAILURE(rc)) + return rc; +# endif /* SELM_TRACK_GUEST_GDT_CHANGES */ + + /* Update saved Guest GDTR. */ + pVM->selm.s.GuestGdtr = GDTR; + pVM->selm.s.fGDTRangeRegistered = true; + } + + return VINF_SUCCESS; +} + + +/** + * Updates (syncs) the shadow LDT. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + */ +static int selmR3UpdateShadowLdt(PVM pVM, PVMCPU pVCpu) +{ + LogFlow(("selmR3UpdateShadowLdt\n")); + int rc = VINF_SUCCESS; + Assert(VM_IS_RAW_MODE_ENABLED(pVM)); + + /* + * Always assume the best... + */ + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_SELM_SYNC_LDT); + + /* + * LDT handling is done similarly to the GDT handling with a shadow + * array. However, since the LDT is expected to be swappable (at least + * some ancient OSes makes it swappable) it must be floating and + * synced on a per-page basis. + * + * Eventually we will change this to be fully on demand. Meaning that + * we will only sync pages containing LDT selectors actually used and + * let the #PF handler lazily sync pages as they are used. + * (This applies to GDT too, when we start making OS/2 fast.) + */ + + /* + * First, determine the current LDT selector. + */ + RTSEL SelLdt = CPUMGetGuestLDTR(pVCpu); + if (!(SelLdt & X86_SEL_MASK_OFF_RPL)) + { + /* ldtr = 0 - update hyper LDTR and deregister any active handler. */ + CPUMSetHyperLDTR(pVCpu, 0); + if (pVM->selm.s.GCPtrGuestLdt != RTRCPTR_MAX) + { + rc = PGMHandlerVirtualDeregister(pVM, pVCpu, pVM->selm.s.GCPtrGuestLdt, false /*fHypervisor*/); + AssertRC(rc); + pVM->selm.s.GCPtrGuestLdt = RTRCPTR_MAX; + } + pVM->selm.s.cbLdtLimit = 0; + return VINF_SUCCESS; + } + + /* + * Get the LDT selector. + */ +/** @todo this is wrong, use CPUMGetGuestLdtrEx */ + PX86DESC pDesc = &pVM->selm.s.paGdtR3[SelLdt >> X86_SEL_SHIFT]; + RTGCPTR GCPtrLdt = X86DESC_BASE(pDesc); + uint32_t cbLdt = X86DESC_LIMIT_G(pDesc); + + /* + * Validate it. + */ + if ( !cbLdt + || SelLdt >= pVM->selm.s.GuestGdtr.cbGdt + || pDesc->Gen.u1DescType + || pDesc->Gen.u4Type != X86_SEL_TYPE_SYS_LDT) + { + AssertMsg(!cbLdt, ("Invalid LDT %04x!\n", SelLdt)); + + /* cbLdt > 0: + * This is quite impossible, so we do as most people do when faced with + * the impossible, we simply ignore it. + */ + CPUMSetHyperLDTR(pVCpu, 0); + if (pVM->selm.s.GCPtrGuestLdt != RTRCPTR_MAX) + { + rc = PGMHandlerVirtualDeregister(pVM, pVCpu, pVM->selm.s.GCPtrGuestLdt, false /*fHypervisor*/); + AssertRC(rc); + pVM->selm.s.GCPtrGuestLdt = RTRCPTR_MAX; + } + return VINF_SUCCESS; + } + /** @todo check what intel does about odd limits. */ + AssertMsg(RT_ALIGN(cbLdt + 1, sizeof(X86DESC)) == cbLdt + 1 && cbLdt <= 0xffff, ("cbLdt=%d\n", cbLdt)); + + /* + * Use the cached guest ldt address if the descriptor has already been modified (see below) + * (this is necessary due to redundant LDT updates; see todo above at GDT sync) + */ + if (MMHyperIsInsideArea(pVM, GCPtrLdt)) + GCPtrLdt = pVM->selm.s.GCPtrGuestLdt; /* use the old one */ + + + /** @todo Handle only present LDT segments. */ +// if (pDesc->Gen.u1Present) + { + /* + * Check if Guest's LDT address/limit is changed. + */ + if ( GCPtrLdt != pVM->selm.s.GCPtrGuestLdt + || cbLdt != pVM->selm.s.cbLdtLimit) + { + Log(("SELMR3UpdateFromCPUM: Guest LDT changed to from %RGv:%04x to %RGv:%04x. (GDTR=%016RX64:%04x)\n", + pVM->selm.s.GCPtrGuestLdt, pVM->selm.s.cbLdtLimit, GCPtrLdt, cbLdt, pVM->selm.s.GuestGdtr.pGdt, pVM->selm.s.GuestGdtr.cbGdt)); + +# ifdef SELM_TRACK_GUEST_LDT_CHANGES + /* + * [Re]Register write virtual handler for guest's GDT. + * In the event of LDT overlapping something, don't install it just assume it's being updated. + */ + if (pVM->selm.s.GCPtrGuestLdt != RTRCPTR_MAX) + { + rc = PGMHandlerVirtualDeregister(pVM, pVCpu, pVM->selm.s.GCPtrGuestLdt, false /*fHypervisor*/); + AssertRC(rc); + } +# ifdef LOG_ENABLED + if (pDesc->Gen.u1Present) + Log(("LDT selector marked not present!!\n")); +# endif + rc = PGMR3HandlerVirtualRegister(pVM, pVCpu, pVM->selm.s.hGuestLdtWriteHandlerType, + GCPtrLdt, GCPtrLdt + cbLdt /* already inclusive */, + NULL /*pvUserR3*/, NIL_RTR0PTR /*pvUserRC*/, NULL /*pszDesc*/); + if (rc == VERR_PGM_HANDLER_VIRTUAL_CONFLICT) + { + /** @todo investigate the various cases where conflicts happen and try avoid them by enh. the instruction emulation. */ + pVM->selm.s.GCPtrGuestLdt = RTRCPTR_MAX; + Log(("WARNING: Guest LDT (%RGv:%04x) conflicted with existing access range!! Assumes LDT is begin updated. (GDTR=%016RX64:%04x)\n", + GCPtrLdt, cbLdt, pVM->selm.s.GuestGdtr.pGdt, pVM->selm.s.GuestGdtr.cbGdt)); + } + else if (RT_SUCCESS(rc)) + pVM->selm.s.GCPtrGuestLdt = GCPtrLdt; + else + { + CPUMSetHyperLDTR(pVCpu, 0); + return rc; + } +# else + pVM->selm.s.GCPtrGuestLdt = GCPtrLdt; +# endif + pVM->selm.s.cbLdtLimit = cbLdt; + } + } + + /* + * Calc Shadow LDT base. + */ + unsigned off; + pVM->selm.s.offLdtHyper = off = (GCPtrLdt & PAGE_OFFSET_MASK); + RTGCPTR GCPtrShadowLDT = (RTGCPTR)((RTGCUINTPTR)pVM->selm.s.pvLdtRC + off); + PX86DESC pShadowLDT = (PX86DESC)((uintptr_t)pVM->selm.s.pvLdtR3 + off); + + /* + * Enable the LDT selector in the shadow GDT. + */ + pDesc->Gen.u1Present = 1; + pDesc->Gen.u16BaseLow = RT_LOWORD(GCPtrShadowLDT); + pDesc->Gen.u8BaseHigh1 = RT_BYTE3(GCPtrShadowLDT); + pDesc->Gen.u8BaseHigh2 = RT_BYTE4(GCPtrShadowLDT); + pDesc->Gen.u1Available = 0; + pDesc->Gen.u1Long = 0; + if (cbLdt > 0xffff) + { + cbLdt = 0xffff; + pDesc->Gen.u4LimitHigh = 0; + pDesc->Gen.u16LimitLow = pDesc->Gen.u1Granularity ? 0xf : 0xffff; + } + + /* + * Set Hyper LDTR and notify TRPM. + */ + CPUMSetHyperLDTR(pVCpu, SelLdt); + LogFlow(("selmR3UpdateShadowLdt: Hyper LDTR %#x\n", SelLdt)); + + /* + * Loop synchronising the LDT page by page. + */ + /** @todo investigate how intel handle various operations on half present cross page entries. */ + off = GCPtrLdt & (sizeof(X86DESC) - 1); + AssertMsg(!off, ("LDT is not aligned on entry size! GCPtrLdt=%08x\n", GCPtrLdt)); + + /* Note: Do not skip the first selector; unlike the GDT, a zero LDT selector is perfectly valid. */ + unsigned cbLeft = cbLdt + 1; + PX86DESC pLDTE = pShadowLDT; + while (cbLeft) + { + /* + * Read a chunk. + */ + unsigned cbChunk = PAGE_SIZE - ((RTGCUINTPTR)GCPtrLdt & PAGE_OFFSET_MASK); + if (cbChunk > cbLeft) + cbChunk = cbLeft; + rc = PGMPhysSimpleReadGCPtr(pVCpu, pShadowLDT, GCPtrLdt, cbChunk); + if (RT_SUCCESS(rc)) + { + /* + * Mark page + */ + rc = PGMMapSetPage(pVM, GCPtrShadowLDT & PAGE_BASE_GC_MASK, PAGE_SIZE, X86_PTE_P | X86_PTE_A | X86_PTE_D); + AssertRC(rc); + + /* + * Loop thru the available LDT entries. + * Figure out where to start and end and the potential cross pageness of + * things adds a little complexity. pLDTE is updated there and not in the + * 'next' part of the loop. The pLDTEEnd is inclusive. + */ + PX86DESC pLDTEEnd = (PX86DESC)((uintptr_t)pShadowLDT + cbChunk) - 1; + if (pLDTE + 1 < pShadowLDT) + pLDTE = (PX86DESC)((uintptr_t)pShadowLDT + off); + while (pLDTE <= pLDTEEnd) + { + if (pLDTE->Gen.u1Present) + selmGuestToShadowDesc(pVM, pLDTE); + + /* Next LDT entry. */ + pLDTE++; + } + } + else + { + RT_BZERO(pShadowLDT, cbChunk); + AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("rc=%Rrc\n", rc)); + rc = PGMMapSetPage(pVM, GCPtrShadowLDT & PAGE_BASE_GC_MASK, PAGE_SIZE, 0); + AssertRC(rc); + } + + /* + * Advance to the next page. + */ + cbLeft -= cbChunk; + GCPtrShadowLDT += cbChunk; + pShadowLDT = (PX86DESC)((char *)pShadowLDT + cbChunk); + GCPtrLdt += cbChunk; + } + + return VINF_SUCCESS; +} + + +/** + * Checks and updates segment selector registers. + * + * @returns VBox strict status code. + * @retval VINF_EM_RESCHEDULE_REM if a stale register was found. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + */ +static VBOXSTRICTRC selmR3UpdateSegmentRegisters(PVM pVM, PVMCPU pVCpu) +{ + Assert(CPUMIsGuestInProtectedMode(pVCpu)); + Assert(VM_IS_RAW_MODE_ENABLED(pVM)); + + /* + * No stale selectors in V8086 mode. + */ + PCPUMCTX pCtx = CPUMQueryGuestCtxPtr(pVCpu); + if (pCtx->eflags.Bits.u1VM) + return VINF_SUCCESS; + + /* + * Check for stale selectors and load hidden register bits where they + * are missing. + */ + uint32_t uCpl = CPUMGetGuestCPL(pVCpu); + VBOXSTRICTRC rcStrict = VINF_SUCCESS; + PCPUMSELREG paSReg = CPUMCTX_FIRST_SREG(pCtx); + for (uint32_t iSReg = 0; iSReg < X86_SREG_COUNT; iSReg++) + { + RTSEL const Sel = paSReg[iSReg].Sel; + if (Sel & X86_SEL_MASK_OFF_RPL) + { + /* Get the shadow descriptor entry corresponding to this. */ + static X86DESC const s_NotPresentDesc = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }; + PCX86DESC pDesc; + if (!(Sel & X86_SEL_LDT)) + { + if ((Sel | (sizeof(*pDesc) - 1)) <= pCtx->gdtr.cbGdt) + pDesc = &pVM->selm.s.paGdtR3[Sel >> X86_SEL_SHIFT]; + else + pDesc = &s_NotPresentDesc; + } + else + { + if ((Sel | (sizeof(*pDesc) - 1)) <= pVM->selm.s.cbLdtLimit) + pDesc = &((PCX86DESC)((uintptr_t)pVM->selm.s.pvLdtR3 + pVM->selm.s.offLdtHyper))[Sel >> X86_SEL_SHIFT]; + else + pDesc = &s_NotPresentDesc; + } + + /* Check the segment register. */ + if (CPUMSELREG_ARE_HIDDEN_PARTS_VALID(pVCpu, &paSReg[iSReg])) + { + if (!(paSReg[iSReg].fFlags & CPUMSELREG_FLAGS_STALE)) + { + /* Did it go stale? */ + if (selmIsSRegStale32(&paSReg[iSReg], pDesc, iSReg)) + { + Log2(("SELM: Detected stale %s=%#x (was valid)\n", g_aszSRegNms[iSReg], Sel)); + STAM_REL_COUNTER_INC(&pVM->selm.s.aStatDetectedStaleSReg[iSReg]); + paSReg[iSReg].fFlags |= CPUMSELREG_FLAGS_STALE; + rcStrict = VINF_EM_RESCHEDULE_REM; + } + } + else + { + /* Did it stop being stale? I.e. did the guest change it things + back to the way they were? */ + if (!selmIsSRegStale32(&paSReg[iSReg], pDesc, iSReg)) + { + STAM_REL_COUNTER_INC(&pVM->selm.s.StatStaleToUnstaleSReg); + paSReg[iSReg].fFlags &= CPUMSELREG_FLAGS_STALE; + } + else + { + Log2(("SELM: Already stale %s=%#x\n", g_aszSRegNms[iSReg], Sel)); + STAM_REL_COUNTER_INC(&pVM->selm.s.aStatAlreadyStaleSReg[iSReg]); + rcStrict = VINF_EM_RESCHEDULE_REM; + } + } + } + /* Load the hidden registers if it's a valid descriptor for the + current segment register. */ + else if (selmIsShwDescGoodForSReg(&paSReg[iSReg], pDesc, iSReg, uCpl)) + { + selmLoadHiddenSRegFromShadowDesc(&paSReg[iSReg], pDesc); + STAM_COUNTER_INC(&pVM->selm.s.aStatUpdatedSReg[iSReg]); + } + /* It's stale. */ + else + { + Log2(("SELM: Detected stale %s=%#x (wasn't valid)\n", g_aszSRegNms[iSReg], Sel)); + STAM_REL_COUNTER_INC(&pVM->selm.s.aStatDetectedStaleSReg[iSReg]); + paSReg[iSReg].fFlags = CPUMSELREG_FLAGS_STALE; + rcStrict = VINF_EM_RESCHEDULE_REM; + } + } + /* else: 0 selector, ignore. */ + } + + return rcStrict; +} + + +/** + * Updates the Guest GDT & LDT virtualization based on current CPU state. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +VMMR3DECL(VBOXSTRICTRC) SELMR3UpdateFromCPUM(PVM pVM, PVMCPU pVCpu) +{ + STAM_PROFILE_START(&pVM->selm.s.StatUpdateFromCPUM, a); + AssertReturn(VM_IS_RAW_MODE_ENABLED(pVM), VERR_SELM_HM_IPE); + + /* + * GDT sync + */ + int rc; + if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_SELM_SYNC_GDT)) + { + rc = selmR3UpdateShadowGdt(pVM, pVCpu); + if (RT_FAILURE(rc)) + return rc; /* We're toast, so forget the profiling. */ + AssertRCSuccess(rc); + } + + /* + * TSS sync + */ + if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_SELM_SYNC_TSS)) + { + rc = SELMR3SyncTSS(pVM, pVCpu); + if (RT_FAILURE(rc)) + return rc; + AssertRCSuccess(rc); + } + + /* + * LDT sync + */ + if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_SELM_SYNC_LDT)) + { + rc = selmR3UpdateShadowLdt(pVM, pVCpu); + if (RT_FAILURE(rc)) + return rc; + AssertRCSuccess(rc); + } + + /* + * Check selector registers. + */ + VBOXSTRICTRC rcStrict = selmR3UpdateSegmentRegisters(pVM, pVCpu); + + STAM_PROFILE_STOP(&pVM->selm.s.StatUpdateFromCPUM, a); + return rcStrict; +} + + +/** + * Synchronize the shadowed fields in the TSS. + * + * At present we're shadowing the ring-0 stack selector & pointer, and the + * interrupt redirection bitmap (if present). We take the lazy approach wrt to + * REM and this function is called both if REM made any changes to the TSS or + * loaded TR. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +VMMR3DECL(int) SELMR3SyncTSS(PVM pVM, PVMCPU pVCpu) +{ + LogFlow(("SELMR3SyncTSS\n")); + int rc; + AssertReturnStmt(VM_IS_RAW_MODE_ENABLED(pVM), VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_SELM_SYNC_TSS), VINF_SUCCESS); + + STAM_PROFILE_START(&pVM->selm.s.StatTSSSync, a); + Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_SELM_SYNC_TSS)); + + /* + * Get TR and extract and store the basic info. + * + * Note! The TSS limit is not checked by the LTR code, so we + * have to be a bit careful with it. We make sure cbTss + * won't be zero if TR is valid and if it's NULL we'll + * make sure cbTss is 0. + */ +/** @todo use the hidden bits, not shadow GDT. */ + CPUMSELREGHID trHid; + RTSEL SelTss = CPUMGetGuestTR(pVCpu, &trHid); + RTGCPTR GCPtrTss = trHid.u64Base; + uint32_t cbTss = trHid.u32Limit; + Assert( (SelTss & X86_SEL_MASK_OFF_RPL) + || (cbTss == 0 && GCPtrTss == 0 && trHid.Attr.u == 0 /* TR=0 */) + || (cbTss == 0xffff && GCPtrTss == 0 && trHid.Attr.n.u1Present && trHid.Attr.n.u4Type == X86_SEL_TYPE_SYS_386_TSS_BUSY /* RESET */)); + if (SelTss & X86_SEL_MASK_OFF_RPL) + { + Assert(!(SelTss & X86_SEL_LDT)); + Assert(trHid.Attr.n.u1DescType == 0); + Assert( trHid.Attr.n.u4Type == X86_SEL_TYPE_SYS_286_TSS_BUSY + || trHid.Attr.n.u4Type == X86_SEL_TYPE_SYS_386_TSS_BUSY); + if (!++cbTss) + cbTss = UINT32_MAX; + } + else + { + Assert( (cbTss == 0 && GCPtrTss == 0 && trHid.Attr.u == 0 /* TR=0 */) + || (cbTss == 0xffff && GCPtrTss == 0 && trHid.Attr.n.u1Present && trHid.Attr.n.u4Type == X86_SEL_TYPE_SYS_386_TSS_BUSY /* RESET */)); + cbTss = 0; /* the reset case. */ + } + pVM->selm.s.cbGuestTss = cbTss; + pVM->selm.s.fGuestTss32Bit = trHid.Attr.n.u4Type == X86_SEL_TYPE_SYS_386_TSS_AVAIL + || trHid.Attr.n.u4Type == X86_SEL_TYPE_SYS_386_TSS_BUSY; + + /* + * Figure out the size of what need to monitor. + */ + /* We're not interested in any 16-bit TSSes. */ + uint32_t cbMonitoredTss = cbTss; + if ( trHid.Attr.n.u4Type != X86_SEL_TYPE_SYS_386_TSS_AVAIL + && trHid.Attr.n.u4Type != X86_SEL_TYPE_SYS_386_TSS_BUSY) + cbMonitoredTss = 0; + + pVM->selm.s.offGuestIoBitmap = 0; + bool fNoRing1Stack = true; + if (cbMonitoredTss) + { + /* + * 32-bit TSS. What we're really keen on is the SS0 and ESP0 fields. + * If VME is enabled we also want to keep an eye on the interrupt + * redirection bitmap. + */ + VBOXTSS Tss; + uint32_t cr4 = CPUMGetGuestCR4(pVCpu); + rc = PGMPhysSimpleReadGCPtr(pVCpu, &Tss, GCPtrTss, RT_UOFFSETOF(VBOXTSS, IntRedirBitmap)); + if ( !(cr4 & X86_CR4_VME) + || ( RT_SUCCESS(rc) + && Tss.offIoBitmap < sizeof(VBOXTSS) /* too small */ + && Tss.offIoBitmap > cbTss) /* beyond the end */ /** @todo not sure how the partial case is handled; probably not allowed. */ + ) + /* No interrupt redirection bitmap, just ESP0 and SS0. */ + cbMonitoredTss = RT_UOFFSETOF(VBOXTSS, padding_ss0); + else if (RT_SUCCESS(rc)) + { + /* + * Everything up to and including the interrupt redirection bitmap. Unfortunately + * this can be quite a large chunk. We use to skip it earlier and just hope it + * was kind of static... + * + * Update the virtual interrupt redirection bitmap while we're here. + * (It is located in the 32 bytes before TR:offIoBitmap.) + */ + cbMonitoredTss = Tss.offIoBitmap; + pVM->selm.s.offGuestIoBitmap = Tss.offIoBitmap; + + uint32_t offRedirBitmap = Tss.offIoBitmap - sizeof(Tss.IntRedirBitmap); + rc = PGMPhysSimpleReadGCPtr(pVCpu, &pVM->selm.s.Tss.IntRedirBitmap, + GCPtrTss + offRedirBitmap, sizeof(Tss.IntRedirBitmap)); + AssertRC(rc); + /** @todo memset the bitmap on failure? */ + Log2(("Redirection bitmap:\n")); + Log2(("%.*Rhxd\n", sizeof(Tss.IntRedirBitmap), &pVM->selm.s.Tss.IntRedirBitmap)); + } + else + { + cbMonitoredTss = RT_UOFFSETOF(VBOXTSS, IntRedirBitmap); + pVM->selm.s.offGuestIoBitmap = 0; + /** @todo memset the bitmap? */ + } + + /* + * Update the ring 0 stack selector and base address. + */ + if (RT_SUCCESS(rc)) + { +# ifdef LOG_ENABLED + if (LogIsEnabled()) + { + uint32_t ssr0, espr0; + SELMGetRing1Stack(pVM, &ssr0, &espr0); + if ((ssr0 & ~1) != Tss.ss0 || espr0 != Tss.esp0) + { + RTGCPHYS GCPhys = NIL_RTGCPHYS; + rc = PGMGstGetPage(pVCpu, GCPtrTss, NULL, &GCPhys); AssertRC(rc); + Log(("SELMR3SyncTSS: Updating TSS ring 0 stack to %04X:%08X from %04X:%08X; TSS Phys=%RGp)\n", + Tss.ss0, Tss.esp0, (ssr0 & ~1), espr0, GCPhys)); + AssertMsg(ssr0 != Tss.ss0, + ("ring-1 leak into TSS.SS0! %04X:%08X from %04X:%08X; TSS Phys=%RGp)\n", + Tss.ss0, Tss.esp0, (ssr0 & ~1), espr0, GCPhys)); + } + Log(("offIoBitmap=%#x\n", Tss.offIoBitmap)); + } +# endif /* LOG_ENABLED */ + AssertMsg(!(Tss.ss0 & 3), ("ring-1 leak into TSS.SS0? %04X:%08X\n", Tss.ss0, Tss.esp0)); + + /* Update our TSS structure for the guest's ring 1 stack */ + selmSetRing1Stack(pVM, Tss.ss0 | 1, Tss.esp0); + pVM->selm.s.fSyncTSSRing0Stack = fNoRing1Stack = false; + +# ifdef VBOX_WITH_RAW_RING1 + /* Update our TSS structure for the guest's ring 2 stack */ + if (EMIsRawRing1Enabled(pVM)) + { + if ( (pVM->selm.s.Tss.ss2 != ((Tss.ss1 & ~2) | 1)) + || pVM->selm.s.Tss.esp2 != Tss.esp1) + Log(("SELMR3SyncTSS: Updating TSS ring 1 stack to %04X:%08X from %04X:%08X\n", Tss.ss1, Tss.esp1, (pVM->selm.s.Tss.ss2 & ~2) | 1, pVM->selm.s.Tss.esp2)); + selmSetRing2Stack(pVM, (Tss.ss1 & ~1) | 2, Tss.esp1); + } +# endif + } + } + + /* + * Flush the ring-1 stack and the direct syscall dispatching if we + * cannot obtain SS0:ESP0. + */ + if (fNoRing1Stack) + { + selmSetRing1Stack(pVM, 0 /* invalid SS */, 0); + pVM->selm.s.fSyncTSSRing0Stack = cbMonitoredTss != 0; + + /** @todo handle these dependencies better! */ + TRPMR3SetGuestTrapHandler(pVM, 0x2E, TRPM_INVALID_HANDLER); + TRPMR3SetGuestTrapHandler(pVM, 0x80, TRPM_INVALID_HANDLER); + } + + /* + * Check for monitor changes and apply them. + */ + if ( GCPtrTss != pVM->selm.s.GCPtrGuestTss + || cbMonitoredTss != pVM->selm.s.cbMonitoredGuestTss) + { + Log(("SELMR3SyncTSS: Guest's TSS is changed to pTss=%RGv cbMonitoredTss=%08X cbGuestTss=%#08x\n", + GCPtrTss, cbMonitoredTss, pVM->selm.s.cbGuestTss)); + + /* Release the old range first. */ + if (pVM->selm.s.GCPtrGuestTss != RTRCPTR_MAX) + { + rc = PGMHandlerVirtualDeregister(pVM, pVCpu, pVM->selm.s.GCPtrGuestTss, false /*fHypervisor*/); + AssertRC(rc); + } + + /* Register the write handler if TS != 0. */ + if (cbMonitoredTss != 0) + { +# ifdef SELM_TRACK_GUEST_TSS_CHANGES + rc = PGMR3HandlerVirtualRegister(pVM, pVCpu, pVM->selm.s.hGuestTssWriteHandlerType, + GCPtrTss, GCPtrTss + cbMonitoredTss - 1, + NULL /*pvUserR3*/, NIL_RTR0PTR /*pvUserRC*/, NULL /*pszDesc*/); + if (RT_FAILURE(rc)) + { +# ifdef VBOX_WITH_RAW_RING1 + /** @todo !HACK ALERT! + * Some guest OSes (QNX) share code and the TSS on the same page; + * PGMR3HandlerVirtualRegister doesn't support more than one + * handler, so we kick out the PATM handler as this one is more + * important. Fix this properly in PGMR3HandlerVirtualRegister? + */ + if (rc == VERR_PGM_HANDLER_VIRTUAL_CONFLICT) + { + LogRel(("SELMR3SyncTSS: Virtual handler conflict %RGv -> kick out PATM handler for the higher priority TSS page monitor\n", GCPtrTss)); + rc = PGMHandlerVirtualDeregister(pVM, pVCpu, GCPtrTss & PAGE_BASE_GC_MASK, false /*fHypervisor*/); + AssertRC(rc); + + rc = PGMR3HandlerVirtualRegister(pVM, pVCpu, pVM->selm.s.hGuestTssWriteHandlerType, + GCPtrTss, GCPtrTss + cbMonitoredTss - 1, + NULL /*pvUserR3*/, NIL_RTR0PTR /*pvUserRC*/, NULL /*pszDesc*/); + if (RT_FAILURE(rc)) + { + STAM_PROFILE_STOP(&pVM->selm.s.StatUpdateFromCPUM, a); + return rc; + } + } +# else + STAM_PROFILE_STOP(&pVM->selm.s.StatUpdateFromCPUM, a); + return rc; +# endif + } +# endif /* SELM_TRACK_GUEST_TSS_CHANGES */ + + /* Update saved Guest TSS info. */ + pVM->selm.s.GCPtrGuestTss = GCPtrTss; + pVM->selm.s.cbMonitoredGuestTss = cbMonitoredTss; + pVM->selm.s.GCSelTss = SelTss; + } + else + { + pVM->selm.s.GCPtrGuestTss = RTRCPTR_MAX; + pVM->selm.s.cbMonitoredGuestTss = 0; + pVM->selm.s.GCSelTss = 0; + } + } + + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_SELM_SYNC_TSS); + + STAM_PROFILE_STOP(&pVM->selm.s.StatTSSSync, a); + return VINF_SUCCESS; +} + + +/** + * Compares the Guest GDT and LDT with the shadow tables. + * This is a VBOX_STRICT only function. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(int) SELMR3DebugCheck(PVM pVM) +{ +# ifdef VBOX_STRICT + PVMCPU pVCpu = VMMGetCpu(pVM); + AssertReturn(VM_IS_RAW_MODE_ENABLED(pVM), VERR_SELM_HM_IPE); + + /* + * Get GDTR and check for conflict. + */ + VBOXGDTR GDTR; + CPUMGetGuestGDTR(pVCpu, &GDTR); + if (GDTR.cbGdt == 0) + return VINF_SUCCESS; + + if (GDTR.cbGdt >= (unsigned)(pVM->selm.s.aHyperSel[SELM_HYPER_SEL_TSS_TRAP08] >> X86_SEL_SHIFT)) + Log(("SELMR3DebugCheck: guest GDT size forced us to look for unused selectors.\n")); + + if (GDTR.cbGdt != pVM->selm.s.GuestGdtr.cbGdt) + Log(("SELMR3DebugCheck: limits have changed! new=%d old=%d\n", GDTR.cbGdt, pVM->selm.s.GuestGdtr.cbGdt)); + + /* + * Loop thru the GDT checking each entry. + */ + RTGCPTR GCPtrGDTEGuest = GDTR.pGdt; + PX86DESC pGDTE = pVM->selm.s.paGdtR3; + PX86DESC pGDTEEnd = (PX86DESC)((uintptr_t)pGDTE + GDTR.cbGdt); + while (pGDTE < pGDTEEnd) + { + X86DESC GDTEGuest; + int rc = PGMPhysSimpleReadGCPtr(pVCpu, &GDTEGuest, GCPtrGDTEGuest, sizeof(GDTEGuest)); + if (RT_SUCCESS(rc)) + { + if (pGDTE->Gen.u1DescType || pGDTE->Gen.u4Type != X86_SEL_TYPE_SYS_LDT) + { + if ( pGDTE->Gen.u16LimitLow != GDTEGuest.Gen.u16LimitLow + || pGDTE->Gen.u4LimitHigh != GDTEGuest.Gen.u4LimitHigh + || pGDTE->Gen.u16BaseLow != GDTEGuest.Gen.u16BaseLow + || pGDTE->Gen.u8BaseHigh1 != GDTEGuest.Gen.u8BaseHigh1 + || pGDTE->Gen.u8BaseHigh2 != GDTEGuest.Gen.u8BaseHigh2 + || pGDTE->Gen.u1DefBig != GDTEGuest.Gen.u1DefBig + || pGDTE->Gen.u1DescType != GDTEGuest.Gen.u1DescType) + { + unsigned iGDT = pGDTE - pVM->selm.s.paGdtR3; + SELMR3DumpDescriptor(*pGDTE, iGDT << 3, "SELMR3DebugCheck: GDT mismatch, shadow"); + SELMR3DumpDescriptor(GDTEGuest, iGDT << 3, "SELMR3DebugCheck: GDT mismatch, guest"); + } + } + } + + /* Advance to the next descriptor. */ + GCPtrGDTEGuest += sizeof(X86DESC); + pGDTE++; + } + + + /* + * LDT? + */ + RTSEL SelLdt = CPUMGetGuestLDTR(pVCpu); + if ((SelLdt & X86_SEL_MASK_OFF_RPL) == 0) + return VINF_SUCCESS; + Assert(!(SelLdt & X86_SEL_LDT)); + if (SelLdt > GDTR.cbGdt) + { + Log(("SELMR3DebugCheck: ldt is out of bound SelLdt=%#x\n", SelLdt)); + return VERR_SELM_LDT_OUT_OF_BOUNDS; + } + X86DESC LDTDesc; + int rc = PGMPhysSimpleReadGCPtr(pVCpu, &LDTDesc, GDTR.pGdt + (SelLdt & X86_SEL_MASK), sizeof(LDTDesc)); + if (RT_FAILURE(rc)) + { + Log(("SELMR3DebugCheck: Failed to read LDT descriptor. rc=%d\n", rc)); + return rc; + } + RTGCPTR GCPtrLDTEGuest = X86DESC_BASE(&LDTDesc); + uint32_t cbLdt = X86DESC_LIMIT_G(&LDTDesc); + + /* + * Validate it. + */ + if (!cbLdt) + return VINF_SUCCESS; + /** @todo check what intel does about odd limits. */ + AssertMsg(RT_ALIGN(cbLdt + 1, sizeof(X86DESC)) == cbLdt + 1 && cbLdt <= 0xffff, ("cbLdt=%d\n", cbLdt)); + if ( LDTDesc.Gen.u1DescType + || LDTDesc.Gen.u4Type != X86_SEL_TYPE_SYS_LDT + || SelLdt >= pVM->selm.s.GuestGdtr.cbGdt) + { + Log(("SELmR3DebugCheck: Invalid LDT %04x!\n", SelLdt)); + return VERR_SELM_INVALID_LDT; + } + + /* + * Loop thru the LDT checking each entry. + */ + unsigned off = (GCPtrLDTEGuest & PAGE_OFFSET_MASK); + PX86DESC pLDTE = (PX86DESC)((uintptr_t)pVM->selm.s.pvLdtR3 + off); + PX86DESC pLDTEEnd = (PX86DESC)((uintptr_t)pGDTE + cbLdt); + while (pLDTE < pLDTEEnd) + { + X86DESC LDTEGuest; + rc = PGMPhysSimpleReadGCPtr(pVCpu, &LDTEGuest, GCPtrLDTEGuest, sizeof(LDTEGuest)); + if (RT_SUCCESS(rc)) + { + if ( pLDTE->Gen.u16LimitLow != LDTEGuest.Gen.u16LimitLow + || pLDTE->Gen.u4LimitHigh != LDTEGuest.Gen.u4LimitHigh + || pLDTE->Gen.u16BaseLow != LDTEGuest.Gen.u16BaseLow + || pLDTE->Gen.u8BaseHigh1 != LDTEGuest.Gen.u8BaseHigh1 + || pLDTE->Gen.u8BaseHigh2 != LDTEGuest.Gen.u8BaseHigh2 + || pLDTE->Gen.u1DefBig != LDTEGuest.Gen.u1DefBig + || pLDTE->Gen.u1DescType != LDTEGuest.Gen.u1DescType) + { + unsigned iLDT = pLDTE - (PX86DESC)((uintptr_t)pVM->selm.s.pvLdtR3 + off); + SELMR3DumpDescriptor(*pLDTE, iLDT << 3, "SELMR3DebugCheck: LDT mismatch, shadow"); + SELMR3DumpDescriptor(LDTEGuest, iLDT << 3, "SELMR3DebugCheck: LDT mismatch, guest"); + } + } + + /* Advance to the next descriptor. */ + GCPtrLDTEGuest += sizeof(X86DESC); + pLDTE++; + } + +# else /* !VBOX_STRICT */ + NOREF(pVM); +# endif /* !VBOX_STRICT */ + + return VINF_SUCCESS; +} + + +/** + * Validates the RawR0 TSS values against the one in the Guest TSS. + * + * @returns true if it matches. + * @returns false and assertions on mismatch.. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(bool) SELMR3CheckTSS(PVM pVM) +{ +# if defined(VBOX_STRICT) && defined(SELM_TRACK_GUEST_TSS_CHANGES) + PVMCPU pVCpu = VMMGetCpu(pVM); + + if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_SELM_SYNC_TSS)) + return true; + + /* + * Get TR and extract the basic info. + */ + CPUMSELREGHID trHid; + RTSEL SelTss = CPUMGetGuestTR(pVCpu, &trHid); + RTGCPTR GCPtrTss = trHid.u64Base; + uint32_t cbTss = trHid.u32Limit; + Assert( (SelTss & X86_SEL_MASK_OFF_RPL) + || (cbTss == 0 && GCPtrTss == 0 && trHid.Attr.u == 0 /* TR=0 */) + || (cbTss == 0xffff && GCPtrTss == 0 && trHid.Attr.n.u1Present && trHid.Attr.n.u4Type == X86_SEL_TYPE_SYS_386_TSS_BUSY /* RESET */)); + if (SelTss & X86_SEL_MASK_OFF_RPL) + { + AssertReturn(!(SelTss & X86_SEL_LDT), false); + AssertReturn(trHid.Attr.n.u1DescType == 0, false); + AssertReturn( trHid.Attr.n.u4Type == X86_SEL_TYPE_SYS_286_TSS_BUSY + || trHid.Attr.n.u4Type == X86_SEL_TYPE_SYS_386_TSS_BUSY, + false); + if (!++cbTss) + cbTss = UINT32_MAX; + } + else + { + AssertReturn( (cbTss == 0 && GCPtrTss == 0 && trHid.Attr.u == 0 /* TR=0 */) + || (cbTss == 0xffff && GCPtrTss == 0 && trHid.Attr.n.u1Present && trHid.Attr.n.u4Type == X86_SEL_TYPE_SYS_386_TSS_BUSY /* RESET */), + false); + cbTss = 0; /* the reset case. */ + } + AssertMsgReturn(pVM->selm.s.cbGuestTss == cbTss, ("%#x %#x\n", pVM->selm.s.cbGuestTss, cbTss), false); + AssertMsgReturn(pVM->selm.s.fGuestTss32Bit == ( trHid.Attr.n.u4Type == X86_SEL_TYPE_SYS_386_TSS_AVAIL + || trHid.Attr.n.u4Type == X86_SEL_TYPE_SYS_386_TSS_BUSY), + ("%RTbool u4Type=%d\n", pVM->selm.s.fGuestTss32Bit, trHid.Attr.n.u4Type), + false); + AssertMsgReturn( pVM->selm.s.GCSelTss == SelTss + || (!pVM->selm.s.GCSelTss && !(SelTss & X86_SEL_LDT)), + ("%#x %#x\n", pVM->selm.s.GCSelTss, SelTss), + false); + AssertMsgReturn( pVM->selm.s.GCPtrGuestTss == GCPtrTss + || (pVM->selm.s.GCPtrGuestTss == RTRCPTR_MAX && !GCPtrTss), + ("%#RGv %#RGv\n", pVM->selm.s.GCPtrGuestTss, GCPtrTss), + false); + + + /* + * Figure out the size of what need to monitor. + */ + /* We're not interested in any 16-bit TSSes. */ + uint32_t cbMonitoredTss = cbTss; + if ( trHid.Attr.n.u4Type != X86_SEL_TYPE_SYS_386_TSS_AVAIL + && trHid.Attr.n.u4Type != X86_SEL_TYPE_SYS_386_TSS_BUSY) + cbMonitoredTss = 0; + if (cbMonitoredTss) + { + VBOXTSS Tss; + uint32_t cr4 = CPUMGetGuestCR4(pVCpu); + int rc = PGMPhysSimpleReadGCPtr(pVCpu, &Tss, GCPtrTss, RT_UOFFSETOF(VBOXTSS, IntRedirBitmap)); + AssertReturn( rc == VINF_SUCCESS + /* Happens early in XP boot during page table switching. */ + || ( (rc == VERR_PAGE_TABLE_NOT_PRESENT || rc == VERR_PAGE_NOT_PRESENT) + && !(CPUMGetGuestEFlags(pVCpu) & X86_EFL_IF)), + false); + if ( !(cr4 & X86_CR4_VME) + || ( RT_SUCCESS(rc) + && Tss.offIoBitmap < sizeof(VBOXTSS) /* too small */ + && Tss.offIoBitmap > cbTss) + ) + cbMonitoredTss = RT_UOFFSETOF(VBOXTSS, padding_ss0); + else if (RT_SUCCESS(rc)) + { + cbMonitoredTss = Tss.offIoBitmap; + AssertMsgReturn(pVM->selm.s.offGuestIoBitmap == Tss.offIoBitmap, + ("%#x %#x\n", pVM->selm.s.offGuestIoBitmap, Tss.offIoBitmap), + false); + + /* check the bitmap */ + uint32_t offRedirBitmap = Tss.offIoBitmap - sizeof(Tss.IntRedirBitmap); + rc = PGMPhysSimpleReadGCPtr(pVCpu, &Tss.IntRedirBitmap, + GCPtrTss + offRedirBitmap, sizeof(Tss.IntRedirBitmap)); + AssertRCReturn(rc, false); + AssertMsgReturn(!memcmp(&Tss.IntRedirBitmap[0], &pVM->selm.s.Tss.IntRedirBitmap[0], sizeof(Tss.IntRedirBitmap)), + ("offIoBitmap=%#x cbTss=%#x\n" + " Guest: %.32Rhxs\n" + "Shadow: %.32Rhxs\n", + Tss.offIoBitmap, cbTss, + &Tss.IntRedirBitmap[0], + &pVM->selm.s.Tss.IntRedirBitmap[0]), + false); + } + else + cbMonitoredTss = RT_UOFFSETOF(VBOXTSS, IntRedirBitmap); + + /* + * Check SS0 and ESP0. + */ + if ( !pVM->selm.s.fSyncTSSRing0Stack + && RT_SUCCESS(rc)) + { + if ( Tss.esp0 != pVM->selm.s.Tss.esp1 + || Tss.ss0 != (pVM->selm.s.Tss.ss1 & ~1)) + { + RTGCPHYS GCPhys; + rc = PGMGstGetPage(pVCpu, GCPtrTss, NULL, &GCPhys); AssertRC(rc); + AssertMsgFailed(("TSS out of sync!! (%04X:%08X vs %04X:%08X (guest)) Tss=%RGv Phys=%RGp\n", + (pVM->selm.s.Tss.ss1 & ~1), pVM->selm.s.Tss.esp1, + Tss.ss1, Tss.esp1, GCPtrTss, GCPhys)); + return false; + } + } + AssertMsgReturn(pVM->selm.s.cbMonitoredGuestTss == cbMonitoredTss, ("%#x %#x\n", pVM->selm.s.cbMonitoredGuestTss, cbMonitoredTss), false); + } + else + { + AssertMsgReturn(pVM->selm.s.Tss.ss1 == 0 && pVM->selm.s.Tss.esp1 == 0, ("%04x:%08x\n", pVM->selm.s.Tss.ss1, pVM->selm.s.Tss.esp1), false); + AssertReturn(!pVM->selm.s.fSyncTSSRing0Stack, false); + AssertMsgReturn(pVM->selm.s.cbMonitoredGuestTss == cbMonitoredTss, ("%#x %#x\n", pVM->selm.s.cbMonitoredGuestTss, cbMonitoredTss), false); + } + + + + return true; + +# else /* !VBOX_STRICT */ + NOREF(pVM); + return true; +# endif /* !VBOX_STRICT */ +} + + +# ifdef VBOX_WITH_SAFE_STR +/** + * Validates the RawR0 TR shadow GDT entry. + * + * @returns true if it matches. + * @returns false and assertions on mismatch.. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(bool) SELMR3CheckShadowTR(PVM pVM) +{ +# ifdef VBOX_STRICT + PX86DESC paGdt = pVM->selm.s.paGdtR3; + + /* + * TSS descriptor + */ + PX86DESC pDesc = &paGdt[pVM->selm.s.aHyperSel[SELM_HYPER_SEL_TSS] >> 3]; + RTRCPTR RCPtrTSS = VM_RC_ADDR(pVM, &pVM->selm.s.Tss); + + if ( pDesc->Gen.u16BaseLow != RT_LOWORD(RCPtrTSS) + || pDesc->Gen.u8BaseHigh1 != RT_BYTE3(RCPtrTSS) + || pDesc->Gen.u8BaseHigh2 != RT_BYTE4(RCPtrTSS) + || pDesc->Gen.u16LimitLow != sizeof(VBOXTSS) - 1 + || pDesc->Gen.u4LimitHigh != 0 + || (pDesc->Gen.u4Type != X86_SEL_TYPE_SYS_386_TSS_AVAIL && pDesc->Gen.u4Type != X86_SEL_TYPE_SYS_386_TSS_BUSY) + || pDesc->Gen.u1DescType != 0 /* system */ + || pDesc->Gen.u2Dpl != 0 /* supervisor */ + || pDesc->Gen.u1Present != 1 + || pDesc->Gen.u1Available != 0 + || pDesc->Gen.u1Long != 0 + || pDesc->Gen.u1DefBig != 0 + || pDesc->Gen.u1Granularity != 0 /* byte limit */ + ) + { + AssertFailed(); + return false; + } +# else + RT_NOREF_PV(pVM); +# endif + return true; +} +# endif /* VBOX_WITH_SAFE_STR */ + +#endif /* VBOX_WITH_RAW_MODE */ + +/** + * Gets information about a 64-bit selector, SELMR3GetSelectorInfo helper. + * + * See SELMR3GetSelectorInfo for details. + * + * @returns VBox status code, see SELMR3GetSelectorInfo for details. + * + * @param pVCpu The cross context virtual CPU structure. + * @param Sel The selector to get info about. + * @param pSelInfo Where to store the information. + */ +static int selmR3GetSelectorInfo64(PVMCPU pVCpu, RTSEL Sel, PDBGFSELINFO pSelInfo) +{ + /* + * Read it from the guest descriptor table. + */ +/** @todo this is bogus wrt the LDT/GDT limit on long selectors. */ + X86DESC64 Desc; + RTGCPTR GCPtrDesc; + if (!(Sel & X86_SEL_LDT)) + { + /* GDT */ + VBOXGDTR Gdtr; + CPUMGetGuestGDTR(pVCpu, &Gdtr); + if ((Sel | X86_SEL_RPL_LDT) > Gdtr.cbGdt) + return VERR_INVALID_SELECTOR; + GCPtrDesc = Gdtr.pGdt + (Sel & X86_SEL_MASK); + } + else + { + /* LDT */ + uint64_t GCPtrBase; + uint32_t cbLimit; + CPUMGetGuestLdtrEx(pVCpu, &GCPtrBase, &cbLimit); + if ((Sel | X86_SEL_RPL_LDT) > cbLimit) + return VERR_INVALID_SELECTOR; + + /* calc the descriptor location. */ + GCPtrDesc = GCPtrBase + (Sel & X86_SEL_MASK); + } + + /* read the descriptor. */ + int rc = PGMPhysSimpleReadGCPtr(pVCpu, &Desc, GCPtrDesc, sizeof(Desc)); + if (RT_FAILURE(rc)) + { + rc = PGMPhysSimpleReadGCPtr(pVCpu, &Desc, GCPtrDesc, sizeof(X86DESC)); + if (RT_FAILURE(rc)) + return rc; + Desc.au64[1] = 0; + } + + /* + * Extract the base and limit + * (We ignore the present bit here, which is probably a bit silly...) + */ + pSelInfo->Sel = Sel; + pSelInfo->fFlags = DBGFSELINFO_FLAGS_LONG_MODE; + pSelInfo->u.Raw64 = Desc; + if (Desc.Gen.u1DescType) + { + /* + * 64-bit code selectors are wide open, it's not possible to detect + * 64-bit data or stack selectors without also dragging in assumptions + * about current CS (i.e. that's we're executing in 64-bit mode). So, + * the selinfo user needs to deal with this in the context the info is + * used unfortunately. + */ + if ( Desc.Gen.u1Long + && !Desc.Gen.u1DefBig + && (Desc.Gen.u4Type & X86_SEL_TYPE_CODE)) + { + /* Note! We ignore the segment limit hacks that was added by AMD. */ + pSelInfo->GCPtrBase = 0; + pSelInfo->cbLimit = ~(RTGCUINTPTR)0; + } + else + { + pSelInfo->cbLimit = X86DESC_LIMIT_G(&Desc); + pSelInfo->GCPtrBase = X86DESC_BASE(&Desc); + } + pSelInfo->SelGate = 0; + } + else if ( Desc.Gen.u4Type == AMD64_SEL_TYPE_SYS_LDT + || Desc.Gen.u4Type == AMD64_SEL_TYPE_SYS_TSS_AVAIL + || Desc.Gen.u4Type == AMD64_SEL_TYPE_SYS_TSS_BUSY) + { + /* Note. LDT descriptors are weird in long mode, we ignore the footnote + in the AMD manual here as a simplification. */ + pSelInfo->GCPtrBase = X86DESC64_BASE(&Desc); + pSelInfo->cbLimit = X86DESC_LIMIT_G(&Desc); + pSelInfo->SelGate = 0; + } + else if ( Desc.Gen.u4Type == AMD64_SEL_TYPE_SYS_CALL_GATE + || Desc.Gen.u4Type == AMD64_SEL_TYPE_SYS_TRAP_GATE + || Desc.Gen.u4Type == AMD64_SEL_TYPE_SYS_INT_GATE) + { + pSelInfo->cbLimit = X86DESC64_BASE(&Desc); + pSelInfo->GCPtrBase = Desc.Gate.u16OffsetLow + | ((uint32_t)Desc.Gate.u16OffsetHigh << 16) + | ((uint64_t)Desc.Gate.u32OffsetTop << 32); + pSelInfo->SelGate = Desc.Gate.u16Sel; + pSelInfo->fFlags |= DBGFSELINFO_FLAGS_GATE; + } + else + { + pSelInfo->cbLimit = 0; + pSelInfo->GCPtrBase = 0; + pSelInfo->SelGate = 0; + pSelInfo->fFlags |= DBGFSELINFO_FLAGS_INVALID; + } + if (!Desc.Gen.u1Present) + pSelInfo->fFlags |= DBGFSELINFO_FLAGS_NOT_PRESENT; + + return VINF_SUCCESS; +} + + +/** + * Worker for selmR3GetSelectorInfo32 and SELMR3GetShadowSelectorInfo that + * interprets a legacy descriptor table entry and fills in the selector info + * structure from it. + * + * @param pSelInfo Where to store the selector info. Only the fFlags and + * Sel members have been initialized. + * @param pDesc The legacy descriptor to parse. + */ +DECLINLINE(void) selmR3SelInfoFromDesc32(PDBGFSELINFO pSelInfo, PCX86DESC pDesc) +{ + pSelInfo->u.Raw64.au64[1] = 0; + pSelInfo->u.Raw = *pDesc; + if ( pDesc->Gen.u1DescType + || !(pDesc->Gen.u4Type & 4)) + { + pSelInfo->cbLimit = X86DESC_LIMIT_G(pDesc); + pSelInfo->GCPtrBase = X86DESC_BASE(pDesc); + pSelInfo->SelGate = 0; + } + else if (pDesc->Gen.u4Type != X86_SEL_TYPE_SYS_UNDEFINED4) + { + pSelInfo->cbLimit = 0; + if (pDesc->Gen.u4Type == X86_SEL_TYPE_SYS_TASK_GATE) + pSelInfo->GCPtrBase = 0; + else + pSelInfo->GCPtrBase = pDesc->Gate.u16OffsetLow + | (uint32_t)pDesc->Gate.u16OffsetHigh << 16; + pSelInfo->SelGate = pDesc->Gate.u16Sel; + pSelInfo->fFlags |= DBGFSELINFO_FLAGS_GATE; + } + else + { + pSelInfo->cbLimit = 0; + pSelInfo->GCPtrBase = 0; + pSelInfo->SelGate = 0; + pSelInfo->fFlags |= DBGFSELINFO_FLAGS_INVALID; + } + if (!pDesc->Gen.u1Present) + pSelInfo->fFlags |= DBGFSELINFO_FLAGS_NOT_PRESENT; +} + + +/** + * Gets information about a 64-bit selector, SELMR3GetSelectorInfo helper. + * + * See SELMR3GetSelectorInfo for details. + * + * @returns VBox status code, see SELMR3GetSelectorInfo for details. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param Sel The selector to get info about. + * @param pSelInfo Where to store the information. + */ +static int selmR3GetSelectorInfo32(PVM pVM, PVMCPU pVCpu, RTSEL Sel, PDBGFSELINFO pSelInfo) +{ + /* + * Read the descriptor entry + */ + pSelInfo->fFlags = 0; + X86DESC Desc; + if ( !(Sel & X86_SEL_LDT) + && ( pVM->selm.s.aHyperSel[SELM_HYPER_SEL_CS] == (Sel & X86_SEL_RPL_LDT) + || pVM->selm.s.aHyperSel[SELM_HYPER_SEL_DS] == (Sel & X86_SEL_RPL_LDT) + || pVM->selm.s.aHyperSel[SELM_HYPER_SEL_CS64] == (Sel & X86_SEL_RPL_LDT) + || pVM->selm.s.aHyperSel[SELM_HYPER_SEL_TSS] == (Sel & X86_SEL_RPL_LDT) + || pVM->selm.s.aHyperSel[SELM_HYPER_SEL_TSS_TRAP08] == (Sel & X86_SEL_RPL_LDT)) + ) + { + /* + * Hypervisor descriptor. + */ + pSelInfo->fFlags = DBGFSELINFO_FLAGS_HYPER; + if (CPUMIsGuestInProtectedMode(pVCpu)) + pSelInfo->fFlags |= DBGFSELINFO_FLAGS_PROT_MODE; + else + pSelInfo->fFlags |= DBGFSELINFO_FLAGS_REAL_MODE; + + Desc = pVM->selm.s.paGdtR3[Sel >> X86_SEL_SHIFT]; + } + else if (CPUMIsGuestInProtectedMode(pVCpu)) + { + /* + * Read it from the guest descriptor table. + */ + pSelInfo->fFlags = DBGFSELINFO_FLAGS_PROT_MODE; + + RTGCPTR GCPtrDesc; + if (!(Sel & X86_SEL_LDT)) + { + /* GDT */ + VBOXGDTR Gdtr; + CPUMGetGuestGDTR(pVCpu, &Gdtr); + if ((Sel | X86_SEL_RPL_LDT) > Gdtr.cbGdt) + return VERR_INVALID_SELECTOR; + GCPtrDesc = Gdtr.pGdt + (Sel & X86_SEL_MASK); + } + else + { + /* LDT */ + uint64_t GCPtrBase; + uint32_t cbLimit; + CPUMGetGuestLdtrEx(pVCpu, &GCPtrBase, &cbLimit); + if ((Sel | X86_SEL_RPL_LDT) > cbLimit) + return VERR_INVALID_SELECTOR; + + /* calc the descriptor location. */ + GCPtrDesc = GCPtrBase + (Sel & X86_SEL_MASK); + } + + /* read the descriptor. */ + int rc = PGMPhysSimpleReadGCPtr(pVCpu, &Desc, GCPtrDesc, sizeof(Desc)); + if (RT_FAILURE(rc)) + return rc; + } + else + { + /* + * We're in real mode. + */ + pSelInfo->Sel = Sel; + pSelInfo->GCPtrBase = Sel << 4; + pSelInfo->cbLimit = 0xffff; + pSelInfo->fFlags = DBGFSELINFO_FLAGS_REAL_MODE; + pSelInfo->u.Raw64.au64[0] = 0; + pSelInfo->u.Raw64.au64[1] = 0; + pSelInfo->SelGate = 0; + return VINF_SUCCESS; + } + + /* + * Extract the base and limit or sel:offset for gates. + */ + pSelInfo->Sel = Sel; + selmR3SelInfoFromDesc32(pSelInfo, &Desc); + + return VINF_SUCCESS; +} + + +/** + * Gets information about a selector. + * + * Intended for the debugger mostly and will prefer the guest descriptor tables + * over the shadow ones. + * + * @retval VINF_SUCCESS on success. + * @retval VERR_INVALID_SELECTOR if the selector isn't fully inside the + * descriptor table. + * @retval VERR_SELECTOR_NOT_PRESENT if the LDT is invalid or not present. This + * is not returned if the selector itself isn't present, you have to + * check that for yourself (see DBGFSELINFO::fFlags). + * @retval VERR_PAGE_TABLE_NOT_PRESENT or VERR_PAGE_NOT_PRESENT if the + * pagetable or page backing the selector table wasn't present. + * @returns Other VBox status code on other errors. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param Sel The selector to get info about. + * @param pSelInfo Where to store the information. + */ +VMMR3DECL(int) SELMR3GetSelectorInfo(PVM pVM, PVMCPU pVCpu, RTSEL Sel, PDBGFSELINFO pSelInfo) +{ + AssertPtr(pSelInfo); + if (CPUMIsGuestInLongMode(pVCpu)) + return selmR3GetSelectorInfo64(pVCpu, Sel, pSelInfo); + return selmR3GetSelectorInfo32(pVM, pVCpu, Sel, pSelInfo); +} + + +/** + * Gets information about a selector from the shadow tables. + * + * This is intended to be faster than the SELMR3GetSelectorInfo() method, but + * requires that the caller ensures that the shadow tables are up to date. + * + * @retval VINF_SUCCESS on success. + * @retval VERR_INVALID_SELECTOR if the selector isn't fully inside the + * descriptor table. + * @retval VERR_SELECTOR_NOT_PRESENT if the LDT is invalid or not present. This + * is not returned if the selector itself isn't present, you have to + * check that for yourself (see DBGFSELINFO::fFlags). + * @retval VERR_PAGE_TABLE_NOT_PRESENT or VERR_PAGE_NOT_PRESENT if the + * pagetable or page backing the selector table wasn't present. + * @returns Other VBox status code on other errors. + * + * @param pVM The cross context VM structure. + * @param Sel The selector to get info about. + * @param pSelInfo Where to store the information. + * + * @remarks Don't use this when in hardware assisted virtualization mode. + */ +VMMR3DECL(int) SELMR3GetShadowSelectorInfo(PVM pVM, RTSEL Sel, PDBGFSELINFO pSelInfo) +{ + Assert(pSelInfo); + + /* + * Read the descriptor entry + */ + X86DESC Desc; + if (!(Sel & X86_SEL_LDT)) + { + /* + * Global descriptor. + */ + Desc = pVM->selm.s.paGdtR3[Sel >> X86_SEL_SHIFT]; + pSelInfo->fFlags = pVM->selm.s.aHyperSel[SELM_HYPER_SEL_CS] == (Sel & X86_SEL_MASK_OFF_RPL) + || pVM->selm.s.aHyperSel[SELM_HYPER_SEL_DS] == (Sel & X86_SEL_MASK_OFF_RPL) + || pVM->selm.s.aHyperSel[SELM_HYPER_SEL_CS64] == (Sel & X86_SEL_MASK_OFF_RPL) + || pVM->selm.s.aHyperSel[SELM_HYPER_SEL_TSS] == (Sel & X86_SEL_MASK_OFF_RPL) + || pVM->selm.s.aHyperSel[SELM_HYPER_SEL_TSS_TRAP08] == (Sel & X86_SEL_MASK_OFF_RPL) + ? DBGFSELINFO_FLAGS_HYPER + : 0; + /** @todo check that the GDT offset is valid. */ + } + else + { + /* + * Local Descriptor. + */ + PX86DESC paLDT = (PX86DESC)((char *)pVM->selm.s.pvLdtR3 + pVM->selm.s.offLdtHyper); + Desc = paLDT[Sel >> X86_SEL_SHIFT]; + /** @todo check if the LDT page is actually available. */ + /** @todo check that the LDT offset is valid. */ + pSelInfo->fFlags = 0; + } + if (CPUMIsGuestInProtectedMode(VMMGetCpu0(pVM))) + pSelInfo->fFlags |= DBGFSELINFO_FLAGS_PROT_MODE; + else + pSelInfo->fFlags |= DBGFSELINFO_FLAGS_REAL_MODE; + + /* + * Extract the base and limit or sel:offset for gates. + */ + pSelInfo->Sel = Sel; + selmR3SelInfoFromDesc32(pSelInfo, &Desc); + + return VINF_SUCCESS; +} + + +/** + * Formats a descriptor. + * + * @param Desc Descriptor to format. + * @param Sel Selector number. + * @param pszOutput Output buffer. + * @param cchOutput Size of output buffer. + */ +static void selmR3FormatDescriptor(X86DESC Desc, RTSEL Sel, char *pszOutput, size_t cchOutput) +{ + /* + * Make variable description string. + */ + static struct + { + unsigned cch; + const char *psz; + } const aTypes[32] = + { +#define STRENTRY(str) { sizeof(str) - 1, str } + /* system */ + STRENTRY("Reserved0 "), /* 0x00 */ + STRENTRY("TSS16Avail "), /* 0x01 */ + STRENTRY("LDT "), /* 0x02 */ + STRENTRY("TSS16Busy "), /* 0x03 */ + STRENTRY("Call16 "), /* 0x04 */ + STRENTRY("Task "), /* 0x05 */ + STRENTRY("Int16 "), /* 0x06 */ + STRENTRY("Trap16 "), /* 0x07 */ + STRENTRY("Reserved8 "), /* 0x08 */ + STRENTRY("TSS32Avail "), /* 0x09 */ + STRENTRY("ReservedA "), /* 0x0a */ + STRENTRY("TSS32Busy "), /* 0x0b */ + STRENTRY("Call32 "), /* 0x0c */ + STRENTRY("ReservedD "), /* 0x0d */ + STRENTRY("Int32 "), /* 0x0e */ + STRENTRY("Trap32 "), /* 0x0f */ + /* non system */ + STRENTRY("DataRO "), /* 0x10 */ + STRENTRY("DataRO Accessed "), /* 0x11 */ + STRENTRY("DataRW "), /* 0x12 */ + STRENTRY("DataRW Accessed "), /* 0x13 */ + STRENTRY("DataDownRO "), /* 0x14 */ + STRENTRY("DataDownRO Accessed "), /* 0x15 */ + STRENTRY("DataDownRW "), /* 0x16 */ + STRENTRY("DataDownRW Accessed "), /* 0x17 */ + STRENTRY("CodeEO "), /* 0x18 */ + STRENTRY("CodeEO Accessed "), /* 0x19 */ + STRENTRY("CodeER "), /* 0x1a */ + STRENTRY("CodeER Accessed "), /* 0x1b */ + STRENTRY("CodeConfEO "), /* 0x1c */ + STRENTRY("CodeConfEO Accessed "), /* 0x1d */ + STRENTRY("CodeConfER "), /* 0x1e */ + STRENTRY("CodeConfER Accessed ") /* 0x1f */ +#undef SYSENTRY + }; +#define ADD_STR(psz, pszAdd) do { strcpy(psz, pszAdd); psz += strlen(pszAdd); } while (0) + char szMsg[128]; + char *psz = &szMsg[0]; + unsigned i = Desc.Gen.u1DescType << 4 | Desc.Gen.u4Type; + memcpy(psz, aTypes[i].psz, aTypes[i].cch); + psz += aTypes[i].cch; + + if (Desc.Gen.u1Present) + ADD_STR(psz, "Present "); + else + ADD_STR(psz, "Not-Present "); + if (Desc.Gen.u1Granularity) + ADD_STR(psz, "Page "); + if (Desc.Gen.u1DefBig) + ADD_STR(psz, "32-bit "); + else + ADD_STR(psz, "16-bit "); +#undef ADD_STR + *psz = '\0'; + + /* + * Limit and Base and format the output. + */ + uint32_t u32Limit = X86DESC_LIMIT_G(&Desc); + uint32_t u32Base = X86DESC_BASE(&Desc); + + RTStrPrintf(pszOutput, cchOutput, "%04x - %08x %08x - base=%08x limit=%08x dpl=%d %s", + Sel, Desc.au32[0], Desc.au32[1], u32Base, u32Limit, Desc.Gen.u2Dpl, szMsg); +} + + +/** + * Dumps a descriptor. + * + * @param Desc Descriptor to dump. + * @param Sel Selector number. + * @param pszMsg Message to prepend the log entry with. + */ +VMMR3DECL(void) SELMR3DumpDescriptor(X86DESC Desc, RTSEL Sel, const char *pszMsg) +{ +#ifdef LOG_ENABLED + if (LogIsEnabled()) + { + char szOutput[128]; + selmR3FormatDescriptor(Desc, Sel, &szOutput[0], sizeof(szOutput)); + Log(("%s: %s\n", pszMsg, szOutput)); + } +#else + RT_NOREF3(Desc, Sel, pszMsg); +#endif +} + + +/** + * Display the shadow gdt. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helpers. + * @param pszArgs Arguments, ignored. + */ +static DECLCALLBACK(void) selmR3InfoGdt(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + NOREF(pszArgs); + pHlp->pfnPrintf(pHlp, "Shadow GDT (GCAddr=%RRv):\n", MMHyperR3ToRC(pVM, pVM->selm.s.paGdtR3)); + for (unsigned iGDT = 0; iGDT < SELM_GDT_ELEMENTS; iGDT++) + { + if (pVM->selm.s.paGdtR3[iGDT].Gen.u1Present) + { + char szOutput[128]; + selmR3FormatDescriptor(pVM->selm.s.paGdtR3[iGDT], iGDT << X86_SEL_SHIFT, &szOutput[0], sizeof(szOutput)); + const char *psz = ""; + if (iGDT == ((unsigned)pVM->selm.s.aHyperSel[SELM_HYPER_SEL_CS] >> X86_SEL_SHIFT)) + psz = " HyperCS"; + else if (iGDT == ((unsigned)pVM->selm.s.aHyperSel[SELM_HYPER_SEL_DS] >> X86_SEL_SHIFT)) + psz = " HyperDS"; + else if (iGDT == ((unsigned)pVM->selm.s.aHyperSel[SELM_HYPER_SEL_CS64] >> X86_SEL_SHIFT)) + psz = " HyperCS64"; + else if (iGDT == ((unsigned)pVM->selm.s.aHyperSel[SELM_HYPER_SEL_TSS] >> X86_SEL_SHIFT)) + psz = " HyperTSS"; + else if (iGDT == ((unsigned)pVM->selm.s.aHyperSel[SELM_HYPER_SEL_TSS_TRAP08] >> X86_SEL_SHIFT)) + psz = " HyperTSSTrap08"; + pHlp->pfnPrintf(pHlp, "%s%s\n", szOutput, psz); + } + } +} + + +/** + * Display the guest gdt. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helpers. + * @param pszArgs Arguments, ignored. + */ +static DECLCALLBACK(void) selmR3InfoGdtGuest(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + /** @todo SMP support! */ + PVMCPU pVCpu = &pVM->aCpus[0]; + + VBOXGDTR GDTR; + CPUMGetGuestGDTR(pVCpu, &GDTR); + RTGCPTR GCPtrGDT = GDTR.pGdt; + unsigned cGDTs = ((unsigned)GDTR.cbGdt + 1) / sizeof(X86DESC); + + pHlp->pfnPrintf(pHlp, "Guest GDT (GCAddr=%RGv limit=%x):\n", GCPtrGDT, GDTR.cbGdt); + for (unsigned iGDT = 0; iGDT < cGDTs; iGDT++, GCPtrGDT += sizeof(X86DESC)) + { + X86DESC GDTE; + int rc = PGMPhysSimpleReadGCPtr(pVCpu, &GDTE, GCPtrGDT, sizeof(GDTE)); + if (RT_SUCCESS(rc)) + { + if (GDTE.Gen.u1Present) + { + char szOutput[128]; + selmR3FormatDescriptor(GDTE, iGDT << X86_SEL_SHIFT, &szOutput[0], sizeof(szOutput)); + pHlp->pfnPrintf(pHlp, "%s\n", szOutput); + } + } + else if (rc == VERR_PAGE_NOT_PRESENT) + { + if ((GCPtrGDT & PAGE_OFFSET_MASK) + sizeof(X86DESC) - 1 < sizeof(X86DESC)) + pHlp->pfnPrintf(pHlp, "%04x - page not present (GCAddr=%RGv)\n", iGDT << X86_SEL_SHIFT, GCPtrGDT); + } + else + pHlp->pfnPrintf(pHlp, "%04x - read error rc=%Rrc GCAddr=%RGv\n", iGDT << X86_SEL_SHIFT, rc, GCPtrGDT); + } + NOREF(pszArgs); +} + + +/** + * Display the shadow ldt. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helpers. + * @param pszArgs Arguments, ignored. + */ +static DECLCALLBACK(void) selmR3InfoLdt(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + unsigned cLDTs = ((unsigned)pVM->selm.s.cbLdtLimit + 1) >> X86_SEL_SHIFT; + PX86DESC paLDT = (PX86DESC)((char *)pVM->selm.s.pvLdtR3 + pVM->selm.s.offLdtHyper); + pHlp->pfnPrintf(pHlp, "Shadow LDT (GCAddr=%RRv limit=%#x):\n", pVM->selm.s.pvLdtRC + pVM->selm.s.offLdtHyper, pVM->selm.s.cbLdtLimit); + for (unsigned iLDT = 0; iLDT < cLDTs; iLDT++) + { + if (paLDT[iLDT].Gen.u1Present) + { + char szOutput[128]; + selmR3FormatDescriptor(paLDT[iLDT], (iLDT << X86_SEL_SHIFT) | X86_SEL_LDT, &szOutput[0], sizeof(szOutput)); + pHlp->pfnPrintf(pHlp, "%s\n", szOutput); + } + } + NOREF(pszArgs); +} + + +/** + * Display the guest ldt. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helpers. + * @param pszArgs Arguments, ignored. + */ +static DECLCALLBACK(void) selmR3InfoLdtGuest(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + /** @todo SMP support! */ + PVMCPU pVCpu = &pVM->aCpus[0]; + + uint64_t GCPtrLdt; + uint32_t cbLdt; + RTSEL SelLdt = CPUMGetGuestLdtrEx(pVCpu, &GCPtrLdt, &cbLdt); + if (!(SelLdt & X86_SEL_MASK_OFF_RPL)) + { + pHlp->pfnPrintf(pHlp, "Guest LDT (Sel=%x): Null-Selector\n", SelLdt); + return; + } + + pHlp->pfnPrintf(pHlp, "Guest LDT (Sel=%x GCAddr=%RX64 limit=%x):\n", SelLdt, GCPtrLdt, cbLdt); + unsigned cLdts = (cbLdt + 1) >> X86_SEL_SHIFT; + for (unsigned iLdt = 0; iLdt < cLdts; iLdt++, GCPtrLdt += sizeof(X86DESC)) + { + X86DESC LdtE; + int rc = PGMPhysSimpleReadGCPtr(pVCpu, &LdtE, GCPtrLdt, sizeof(LdtE)); + if (RT_SUCCESS(rc)) + { + if (LdtE.Gen.u1Present) + { + char szOutput[128]; + selmR3FormatDescriptor(LdtE, (iLdt << X86_SEL_SHIFT) | X86_SEL_LDT, &szOutput[0], sizeof(szOutput)); + pHlp->pfnPrintf(pHlp, "%s\n", szOutput); + } + } + else if (rc == VERR_PAGE_NOT_PRESENT) + { + if ((GCPtrLdt & PAGE_OFFSET_MASK) + sizeof(X86DESC) - 1 < sizeof(X86DESC)) + pHlp->pfnPrintf(pHlp, "%04x - page not present (GCAddr=%RGv)\n", (iLdt << X86_SEL_SHIFT) | X86_SEL_LDT, GCPtrLdt); + } + else + pHlp->pfnPrintf(pHlp, "%04x - read error rc=%Rrc GCAddr=%RGv\n", (iLdt << X86_SEL_SHIFT) | X86_SEL_LDT, rc, GCPtrLdt); + } + NOREF(pszArgs); +} + + +/** + * Dumps the hypervisor GDT + * + * @param pVM The cross context VM structure. + */ +VMMR3DECL(void) SELMR3DumpHyperGDT(PVM pVM) +{ + DBGFR3Info(pVM->pUVM, "gdt", NULL, NULL); +} + + +/** + * Dumps the hypervisor LDT + * + * @param pVM The cross context VM structure. + */ +VMMR3DECL(void) SELMR3DumpHyperLDT(PVM pVM) +{ + DBGFR3Info(pVM->pUVM, "ldt", NULL, NULL); +} + + +/** + * Dumps the guest GDT + * + * @param pVM The cross context VM structure. + */ +VMMR3DECL(void) SELMR3DumpGuestGDT(PVM pVM) +{ + DBGFR3Info(pVM->pUVM, "gdtguest", NULL, NULL); +} + + +/** + * Dumps the guest LDT + * + * @param pVM The cross context VM structure. + */ +VMMR3DECL(void) SELMR3DumpGuestLDT(PVM pVM) +{ + DBGFR3Info(pVM->pUVM, "ldtguest", NULL, NULL); +} + diff --git a/src/VBox/VMM/VMMR3/SSM.cpp b/src/VBox/VMM/VMMR3/SSM.cpp new file mode 100644 index 00000000..aa96b408 --- /dev/null +++ b/src/VBox/VMM/VMMR3/SSM.cpp @@ -0,0 +1,9683 @@ +/* $Id: SSM.cpp $ */ +/** @file + * SSM - Saved State Manager. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/** @page pg_ssm SSM - The Saved State Manager + * + * The Saved State Manager (SSM) implements facilities for saving and loading a + * VM state in a structural manner using callbacks for named data units. + * + * At init time each of the VMM components, Devices, Drivers and one or two + * other things will register data units which they need to save and restore. + * Each unit have a unique name (ascii), instance number, and a set of callbacks + * associated with it. The name will be used to identify the unit during + * restore. The callbacks are for the two operations, save and restore. There + * are three callbacks for each of the two - a prepare, a execute and a complete + * - giving each component ample opportunity to perform actions both before and + * afterwards. + * + * The SSM provides a number of APIs for encoding and decoding the data: @see + * grp_ssm + * + * + * + * @section sec_ssm_live_snapshots Live Snapshots + * + * The live snapshots feature (LS) is similar to teleportation (TP) and was a + * natural first step when implementing TP. The main differences between LS and + * TP are that after a live snapshot we will have a saved state file, disk image + * snapshots, and the VM will still be running. + * + * Compared to normal saved stated and snapshots, the difference is in that the + * VM is running while we do most of the saving. Prior to LS, there was only + * one round of callbacks during saving and the VM was paused during it. With + * LS there are 1 or more passes while the VM is still running and a final one + * after it has been paused. The runtime passes are executed on a dedicated + * thread running at at the same priority as the EMTs so that the saving doesn't + * starve or lose in scheduling questions (note: not implemented yet). The final + * pass is done on EMT(0). + * + * There are a couple of common reasons why LS and TP will fail: + * - Memory configuration changed (PCI memory mappings). + * - Takes too long (TP) / Too much output (LS). + * + * + * The live saving sequence is something like this: + * + * -# SSMR3LiveSave is called on EMT0. It returns a saved state + * handle. + * -# SSMR3LiveDoStep1 is called on a non-EMT. This will save the major + * parts of the state while the VM may still be running. + * -# The VM is suspended. + * -# SSMR3LiveDoStep2 is called on EMT0 to save the remainder of the state + * in the normal way. + * -# The client does any necessary reconfiguration of harddisks and + * similar. + * -# SSMR3LiveDone is called on EMT0 to close the handle. + * -# The VM is resumed or powered off and destroyed. + * + * + * @section sec_ssm_teleportation Teleportation + * + * As mentioned in the previous section, the main differences between this and + * live snapshots are in where the saved state is written and what state the + * local VM is in afterwards - at least from the VMM point of view. The + * necessary administrative work - establishing the connection to the remote + * machine, cloning the VM config on it and doing lowlevel saved state data + * transfer - is taken care of by layer above the VMM (i.e. Main). + * + * The SSM data format was made streamable for the purpose of teleportation + * (v1.2 was the last non-streamable version). + * + * + * @section sec_ssm_format Saved State Format + * + * The stream format starts with a header (SSMFILEHDR) that indicates the + * version and such things, it is followed by zero or more saved state units + * (name + instance + pass), and the stream concludes with a footer + * (SSMFILEFTR) that contains unit counts and optionally a checksum for the + * entire file. (In version 1.2 and earlier, the checksum was in the header and + * there was no footer. This meant that the header was updated after the entire + * file was written.) + * + * The saved state units each starts with a variable sized header + * (SSMFILEUNITHDRV2) that contains the name, instance and pass. The data + * follows the header and is encoded as records with a 2-8 byte record header + * indicating the type, flags and size. The first byte in the record header + * indicates the type and flags: + * + * - bits 0..3: Record type: + * - type 0: Invalid. + * - type 1: Terminator with CRC-32 and unit size. + * - type 2: Raw data record. + * - type 3: Raw data compressed by LZF. The data is prefixed by a 8-bit + * field containing the length of the uncompressed data given in + * 1KB units. + * - type 4: Zero data. The record header is followed by a 8-bit field + * counting the length of the zero data given in 1KB units. + * - type 5: Named data - length prefixed name followed by the data. This + * type is not implemented yet as we're missing the API part, so + * the type assignment is tentative. + * - types 6 thru 15 are current undefined. + * - bit 4: Important (set), can be skipped (clear). + * - bit 5: Undefined flag, must be zero. + * - bit 6: Undefined flag, must be zero. + * - bit 7: "magic" bit, always set. + * + * Record header byte 2 (optionally thru 7) is the size of the following data + * encoded in UTF-8 style. To make buffering simpler and more efficient during + * the save operation, the strict checks enforcing optimal encoding has been + * relaxed for the 2 and 3 byte encodings. + * + * (In version 1.2 and earlier the unit data was compressed and not record + * based. The unit header contained the compressed size of the data, i.e. it + * needed updating after the data was written.) + * + * + * @section sec_ssm_future Future Changes + * + * There are plans to extend SSM to make it easier to be both backwards and + * (somewhat) forwards compatible. One of the new features will be being able + * to classify units and data items as unimportant (added to the format in + * v2.0). Another suggested feature is naming data items (also added to the + * format in v2.0), perhaps by extending the SSMR3PutStruct API. Both features + * will require API changes, the naming may possibly require both buffering of + * the stream as well as some helper managing them. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_SSM +#include +#include +#include +#include +#include +#include "SSMInternal.h" +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** The max length of a unit name. */ +#define SSM_MAX_NAME_SIZE 48 + +/** Saved state file magic base string. */ +#define SSMFILEHDR_MAGIC_BASE "\177VirtualBox SavedState " +/** Saved state file magic indicating version 1.x. */ +#define SSMFILEHDR_MAGIC_V1_X "\177VirtualBox SavedState V1." +/** Saved state file v1.1 magic. */ +#define SSMFILEHDR_MAGIC_V1_1 "\177VirtualBox SavedState V1.1\n" +/** Saved state file v1.2 magic. */ +#define SSMFILEHDR_MAGIC_V1_2 "\177VirtualBox SavedState V1.2\n\0\0\0" +/** Saved state file v2.0 magic. */ +#define SSMFILEHDR_MAGIC_V2_0 "\177VirtualBox SavedState V2.0\n\0\0\0" + +/** @name SSMFILEHDR::fFlags + * @{ */ +/** The stream is checksummed up to the footer using CRC-32. */ +#define SSMFILEHDR_FLAGS_STREAM_CRC32 RT_BIT_32(0) +/** Indicates that the file was produced by a live save. */ +#define SSMFILEHDR_FLAGS_STREAM_LIVE_SAVE RT_BIT_32(1) +/** @} */ + +/** The directory magic. */ +#define SSMFILEDIR_MAGIC "\nDir\n\0\0" + +/** Saved state file v2.0 magic. */ +#define SSMFILEFTR_MAGIC "\nFooter" + +/** Data unit magic. */ +#define SSMFILEUNITHDR_MAGIC "\nUnit\n\0" +/** Data end marker magic. */ +#define SSMFILEUNITHDR_END "\nTheEnd" + + +/** @name Record Types (data unit) + * @{ */ +/** The record type mask. */ +#define SSM_REC_TYPE_MASK UINT8_C(0x0f) +/** Invalid record. */ +#define SSM_REC_TYPE_INVALID 0 +/** Normal termination record, see SSMRECTERM. */ +#define SSM_REC_TYPE_TERM 1 +/** Raw data. The data follows the size field without further ado. */ +#define SSM_REC_TYPE_RAW 2 +/** Raw data compressed by LZF. + * The record header is followed by a 8-bit field containing the size of the + * uncompressed data in 1KB units. The compressed data is after it. */ +#define SSM_REC_TYPE_RAW_LZF 3 +/** Raw zero data. + * The record header is followed by a 8-bit field containing the size of the + * zero data in 1KB units. */ +#define SSM_REC_TYPE_RAW_ZERO 4 +/** Named data items. + * A length prefix zero terminated string (i.e. max 255) followed by the data. */ +#define SSM_REC_TYPE_NAMED 5 +/** Macro for validating the record type. + * This can be used with the flags+type byte, no need to mask out the type first. */ +#define SSM_REC_TYPE_IS_VALID(u8Type) ( ((u8Type) & SSM_REC_TYPE_MASK) > SSM_REC_TYPE_INVALID \ + && ((u8Type) & SSM_REC_TYPE_MASK) <= SSM_REC_TYPE_NAMED ) +/** @} */ + +/** The flag mask. */ +#define SSM_REC_FLAGS_MASK UINT8_C(0xf0) +/** The record is important if this flag is set, if clear it can be omitted. */ +#define SSM_REC_FLAGS_IMPORTANT UINT8_C(0x10) +/** This flag is always set. */ +#define SSM_REC_FLAGS_FIXED UINT8_C(0x80) +/** Macro for validating the flags. + * No need to mask the flags out of the flags+type byte before invoking this macro. */ +#define SSM_REC_FLAGS_ARE_VALID(fFlags) ( ((fFlags) & UINT8_C(0xe0)) == UINT8_C(0x80) ) + +/** Macro for validating the type and flags byte in a data record. */ +#define SSM_REC_ARE_TYPE_AND_FLAGS_VALID(u8) ( SSM_REC_FLAGS_ARE_VALID(u8) && SSM_REC_TYPE_IS_VALID(u8) ) + +/** @name SSMRECTERM::fFlags + * @{ */ +/** There is a CRC-32 value for the stream. */ +#define SSMRECTERM_FLAGS_CRC32 UINT16_C(0x0001) +/** @} */ + +/** Start structure magic. (Isaac Asimov) */ +#define SSMR3STRUCT_BEGIN UINT32_C(0x19200102) +/** End structure magic. (Isaac Asimov) */ +#define SSMR3STRUCT_END UINT32_C(0x19920406) + + +/** Number of bytes to log in Log2 and Log4 statements. */ +#define SSM_LOG_BYTES 16 + +/** SSMHANDLE::fCancelled value indicating that the operation has been + * cancelled. */ +#define SSMHANDLE_CANCELLED UINT32_C(0xdeadbeef) +/** SSMHANDLE::fCancelled value indicating no cancellation. */ +#define SSMHANDLE_OK UINT32_C(0x77777777) + + +/** Macro for checking the u32CRC field of a structure. + * The Msg can assume there are u32ActualCRC and u32CRC in the context. */ +#define SSM_CHECK_CRC32_RET(p, cb, Msg) \ + do \ + { \ + uint32_t u32CRC = (p)->u32CRC; \ + (p)->u32CRC = 0; \ + uint32_t u32ActualCRC = RTCrc32((p), (cb)); \ + (p)->u32CRC = u32CRC; \ + AssertLogRelMsgReturn(u32ActualCRC == u32CRC, Msg, VERR_SSM_INTEGRITY_CRC); \ + } while (0) + +/** The number of bytes to compress is one block. + * Must be a multiple of 1KB. */ +#define SSM_ZIP_BLOCK_SIZE _4K +AssertCompile(SSM_ZIP_BLOCK_SIZE / _1K * _1K == SSM_ZIP_BLOCK_SIZE); + + +/** + * Asserts that the handle is writable and returns with VERR_SSM_INVALID_STATE + * if it isn't. + */ +#define SSM_ASSERT_WRITEABLE_RET(pSSM) \ + AssertMsgReturn( pSSM->enmOp == SSMSTATE_SAVE_EXEC \ + || pSSM->enmOp == SSMSTATE_LIVE_EXEC,\ + ("Invalid state %d\n", pSSM->enmOp), VERR_SSM_INVALID_STATE); + +/** + * Asserts that the handle is readable and returns with VERR_SSM_INVALID_STATE + * if it isn't. + */ +#define SSM_ASSERT_READABLE_RET(pSSM) \ + AssertMsgReturn( pSSM->enmOp == SSMSTATE_LOAD_EXEC \ + || pSSM->enmOp == SSMSTATE_OPEN_READ,\ + ("Invalid state %d\n", pSSM->enmOp), VERR_SSM_INVALID_STATE); + +/** Checks for cancellation and returns if pending. + * Sets SSMHANDLE::rc to VERR_SSM_CANCELLED (if it still indicates success) and + * then returns SSMHANDLE::rc. (Debug logging only.) */ +#define SSM_CHECK_CANCELLED_RET(pSSM) \ + do \ + { \ + if (RT_UNLIKELY(ASMAtomicUoReadU32(&(pSSM)->fCancelled) == SSMHANDLE_CANCELLED)) \ + { \ + LogFlow(("%Rfn: Cancelled -> VERR_SSM_CANCELLED\n", __PRETTY_FUNCTION__)); \ + if (RT_SUCCESS((pSSM)->rc)) \ + (pSSM)->rc = VERR_SSM_CANCELLED; \ + return (pSSM)->rc; \ + } \ + } while (0) + +/** + * Asserts that the handle is somewhat valid. No returns as this is just a + * simple safeguard for catching bad API calls. */ +#define SSM_ASSERT_VALID_HANDLE(pSSM) \ + do \ + { \ + AssertPtr(pSSM); \ + Assert(pSSM->enmOp > SSMSTATE_INVALID && pSSM->enmOp < SSMSTATE_END); \ + } while (0) + + +/** @def SSM_HOST_IS_MSC_32 + * Set to 1 if the host is 32-bit MSC, otherwise set to 0. + * */ +#if defined(_MSC_VER) && HC_ARCH_BITS == 32 +# define SSM_HOST_IS_MSC_32 1 +#else +# define SSM_HOST_IS_MSC_32 0 +#endif + + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** SSM state. */ +typedef enum SSMSTATE +{ + SSMSTATE_INVALID = 0, + SSMSTATE_LIVE_PREP, + SSMSTATE_LIVE_STEP1, + SSMSTATE_LIVE_EXEC, + SSMSTATE_LIVE_VOTE, + SSMSTATE_LIVE_STEP2, + SSMSTATE_SAVE_PREP, + SSMSTATE_SAVE_EXEC, + SSMSTATE_SAVE_DONE, + SSMSTATE_LOAD_PREP, + SSMSTATE_LOAD_EXEC, + SSMSTATE_LOAD_DONE, + SSMSTATE_OPEN_READ, + SSMSTATE_END +} SSMSTATE; + + +/** Pointer to a SSM stream buffer. */ +typedef struct SSMSTRMBUF *PSSMSTRMBUF; +/** + * A SSM stream buffer. + */ +typedef struct SSMSTRMBUF +{ + /** The buffer data. */ + uint8_t abData[_64K]; + + /** The stream position of this buffer. */ + uint64_t offStream; + /** The amount of buffered data. */ + uint32_t cb; + /** End of stream indicator (for read streams only). */ + bool fEndOfStream; + /** The nano timestamp set by ssmR3StrmGetFreeBuf. */ + uint64_t NanoTS; + /** Pointer to the next buffer in the chain. */ + PSSMSTRMBUF volatile pNext; +} SSMSTRMBUF; + +/** + * SSM stream. + * + * This is a typical producer / consumer setup with a dedicated I/O thread and + * fixed number of buffers for read ahead and write back. + */ +typedef struct SSMSTRM +{ + /** The stream method table. */ + PCSSMSTRMOPS pOps; + /** The user argument for the stream methods. + * For file based streams, this is the file handle and not a pointer. */ + void *pvUser; + + /** Write (set) or read (clear) stream. */ + bool fWrite; + /** Termination indicator. */ + bool volatile fTerminating; + /** Indicates whether it is necessary to seek before the next buffer is + * read from the stream. This is used to avoid a seek in ssmR3StrmPeekAt. */ + bool fNeedSeek; + /** Stream error status. */ + int32_t volatile rc; + /** The handle of the I/O thread. This is set to nil when not active. */ + RTTHREAD hIoThread; + /** Where to seek to. */ + uint64_t offNeedSeekTo; + + /** The head of the consumer queue. + * For save the consumer is the I/O thread. For load the I/O thread is the + * producer. */ + PSSMSTRMBUF volatile pHead; + /** Chain of free buffers. + * The consumer/producer roles are the inverse of pHead. */ + PSSMSTRMBUF volatile pFree; + /** Event that's signalled when pHead is updated. */ + RTSEMEVENT hEvtHead; + /** Event that's signalled when pFree is updated. */ + RTSEMEVENT hEvtFree; + + /** List of pending buffers that has been dequeued from pHead and reversed. */ + PSSMSTRMBUF pPending; + /** Pointer to the current buffer. */ + PSSMSTRMBUF pCur; + /** The stream offset of the current buffer. */ + uint64_t offCurStream; + /** The current buffer offset. */ + uint32_t off; + /** Whether we're checksumming reads/writes. */ + bool fChecksummed; + /** The stream CRC if fChecksummed is set. */ + uint32_t u32StreamCRC; + /** How far into the buffer u32StreamCRC is up-to-date. + * This may lag behind off as it's desirable to checksum as large blocks as + * possible. */ + uint32_t offStreamCRC; +} SSMSTRM; +/** Pointer to a SSM stream. */ +typedef SSMSTRM *PSSMSTRM; + + +/** + * Handle structure. + */ +typedef struct SSMHANDLE +{ + /** Stream/buffer manager. */ + SSMSTRM Strm; + + /** Pointer to the VM. */ + PVM pVM; + /** The current operation. */ + SSMSTATE enmOp; + /** What to do after save completes. (move the enum) */ + SSMAFTER enmAfter; + /** Flag indicating that the operation has been cancelled. */ + uint32_t volatile fCancelled; + /** The current rc of the save operation. */ + int32_t rc; + /** Number of compressed bytes left in the current data unit (V1). */ + uint64_t cbUnitLeftV1; + /** The current compressed? offset into the data unit. */ + uint64_t offUnit; + /** The current user data offset into the unit (debug purposes). */ + uint64_t offUnitUser; + /** Indicates that this is a live save or restore operation. */ + bool fLiveSave; + + /** Pointer to the progress callback function. */ + PFNVMPROGRESS pfnProgress; + /** User specified argument to the callback function. */ + void *pvUser; + /** Next completion percentage. (corresponds to offEstProgress) */ + unsigned uPercent; + /** The position of the next progress callback in the estimated file. */ + uint64_t offEstProgress; + /** The estimated total byte count. + * (Only valid after the prep.) */ + uint64_t cbEstTotal; + /** Current position in the estimated file. */ + uint64_t offEst; + /** End of current unit in the estimated file. */ + uint64_t offEstUnitEnd; + /** The amount of % we reserve for the 'live' stage */ + unsigned uPercentLive; + /** The amount of % we reserve for the 'prepare' phase */ + unsigned uPercentPrepare; + /** The amount of % we reserve for the 'done' stage */ + unsigned uPercentDone; + /** The lowest value reported via SSMR3HandleReportLivePercent during one + * vote run. */ + unsigned uReportedLivePercent; + /** The filename, NULL if remote stream. */ + const char *pszFilename; + + union + { + /** Write data. */ + struct + { + /** Offset into the databuffer. */ + uint32_t offDataBuffer; + /** Space for the record header. */ + uint8_t abRecHdr[1+7]; + /** Data buffer. */ + uint8_t abDataBuffer[4096]; + /** The maximum downtime given as milliseconds. */ + uint32_t cMsMaxDowntime; + } Write; + + /** Read data. */ + struct + { + /** V1: The decompressor of the current data unit. */ + PRTZIPDECOMP pZipDecompV1; + /** The major format version number. */ + uint32_t uFmtVerMajor; + /** The minor format version number. */ + uint32_t uFmtVerMinor; + + /** V2: Unread bytes in the current record. */ + uint32_t cbRecLeft; + /** V2: Bytes in the data buffer. */ + uint32_t cbDataBuffer; + /** V2: Current buffer position. */ + uint32_t offDataBuffer; + /** V2: End of data indicator. */ + bool fEndOfData; + /** V2: The type and flags byte fo the current record. */ + uint8_t u8TypeAndFlags; + + /** @name Context info for SSMR3SetLoadError. + * @{ */ + /** Pointer to the header for the current unit. */ + PSSMUNIT pCurUnit; + /** The version of the current unit if in the load exec stage. */ + uint32_t uCurUnitVer; + /** The pass number of the current unit if in the load exec stage. */ + uint32_t uCurUnitPass; + /** Whether SSMR3SetLoadError[V] has been called. + * @note Using ASMAtomicXchgBool because I'm very lazy. */ + bool volatile fHaveSetError; + /** @} */ + + /** RTGCPHYS size in bytes. (Only applicable when loading/reading.) */ + unsigned cbGCPhys; + /** RTGCPTR size in bytes. (Only applicable when loading/reading.) */ + unsigned cbGCPtr; + /** Whether cbGCPtr is fixed or settable. */ + bool fFixedGCPtrSize; + + /** 32-bit MSC saved this? */ + bool fIsHostMsc32; + /** "Host OS" dot "architecture", picked up from recent SSM data units. */ + char szHostOSAndArch[32]; + + /** @name Header info (set by ssmR3ValidateFile) + * @{ */ + /** The size of the file header. */ + uint32_t cbFileHdr; + /** The major version number. */ + uint16_t u16VerMajor; + /** The minor version number. */ + uint16_t u16VerMinor; + /** The build number. */ + uint32_t u32VerBuild; + /** The SVN revision. */ + uint32_t u32SvnRev; + /** 32 or 64 depending on the host. */ + uint8_t cHostBits; + /** Whether the stream is checksummed (SSMFILEHDR_FLAGS_STREAM_CRC32). */ + bool fStreamCrc32; + /** The CRC of the loaded file. */ + uint32_t u32LoadCRC; + /** The size of the load file. */ + uint64_t cbLoadFile; + /** @} */ + + /** V2: Data buffer. + * @remarks Be extremely careful when changing the size of this buffer! */ + uint8_t abDataBuffer[4096]; + + /** V2: Decompression buffer for when we cannot use the stream buffer. */ + uint8_t abComprBuffer[4096]; + } Read; + } u; +} SSMHANDLE; + + +/** + * Header of the saved state file. + * + * Added in r5xxxx on 2009-07-2?, VirtualBox v3.0.51. + */ +typedef struct SSMFILEHDR +{ + /** Magic string which identifies this file as a version of VBox saved state + * file format (SSMFILEHDR_MAGIC_V2_0). */ + char szMagic[32]; + /** The major version number. */ + uint16_t u16VerMajor; + /** The minor version number. */ + uint16_t u16VerMinor; + /** The build number. */ + uint32_t u32VerBuild; + /** The SVN revision. */ + uint32_t u32SvnRev; + /** 32 or 64 depending on the host. */ + uint8_t cHostBits; + /** The size of RTGCPHYS. */ + uint8_t cbGCPhys; + /** The size of RTGCPTR. */ + uint8_t cbGCPtr; + /** Reserved header space - must be zero. */ + uint8_t u8Reserved; + /** The number of units that (may) have stored data in the file. */ + uint32_t cUnits; + /** Flags, see SSMFILEHDR_FLAGS_XXX. */ + uint32_t fFlags; + /** The maximum size of decompressed data. */ + uint32_t cbMaxDecompr; + /** The checksum of this header. + * This field is set to zero when calculating the checksum. */ + uint32_t u32CRC; +} SSMFILEHDR; +AssertCompileSize(SSMFILEHDR, 64); +AssertCompileMemberOffset(SSMFILEHDR, u32CRC, 60); +AssertCompileMemberSize(SSMFILEHDR, szMagic, sizeof(SSMFILEHDR_MAGIC_V2_0)); +/** Pointer to a saved state file header. */ +typedef SSMFILEHDR *PSSMFILEHDR; +/** Pointer to a const saved state file header. */ +typedef SSMFILEHDR const *PCSSMFILEHDR; + + +/** + * Header of the saved state file. + * + * Added in r40980 on 2008-12-15, VirtualBox v2.0.51. + * + * @remarks This is a superset of SSMFILEHDRV11. + */ +typedef struct SSMFILEHDRV12 +{ + /** Magic string which identifies this file as a version of VBox saved state + * file format (SSMFILEHDR_MAGIC_V1_2). */ + char achMagic[32]; + /** The size of this file. Used to check + * whether the save completed and that things are fine otherwise. */ + uint64_t cbFile; + /** File checksum. The actual calculation skips past the u32CRC field. */ + uint32_t u32CRC; + /** Padding. */ + uint32_t u32Reserved; + /** The machine UUID. (Ignored if NIL.) */ + RTUUID MachineUuid; + + /** The major version number. */ + uint16_t u16VerMajor; + /** The minor version number. */ + uint16_t u16VerMinor; + /** The build number. */ + uint32_t u32VerBuild; + /** The SVN revision. */ + uint32_t u32SvnRev; + + /** 32 or 64 depending on the host. */ + uint8_t cHostBits; + /** The size of RTGCPHYS. */ + uint8_t cbGCPhys; + /** The size of RTGCPTR. */ + uint8_t cbGCPtr; + /** Padding. */ + uint8_t au8Reserved; +} SSMFILEHDRV12; +AssertCompileSize(SSMFILEHDRV12, 64+16); +AssertCompileMemberOffset(SSMFILEHDRV12, u32CRC, 40); +AssertCompileMemberSize(SSMFILEHDRV12, achMagic, sizeof(SSMFILEHDR_MAGIC_V1_2)); +/** Pointer to a saved state file header. */ +typedef SSMFILEHDRV12 *PSSMFILEHDRV12; + + +/** + * Header of the saved state file, version 1.1. + * + * Added in r23677 on 2007-08-17, VirtualBox v1.4.1. + */ +typedef struct SSMFILEHDRV11 +{ + /** Magic string which identifies this file as a version of VBox saved state + * file format (SSMFILEHDR_MAGIC_V1_1). */ + char achMagic[32]; + /** The size of this file. Used to check + * whether the save completed and that things are fine otherwise. */ + uint64_t cbFile; + /** File checksum. The actual calculation skips past the u32CRC field. */ + uint32_t u32CRC; + /** Padding. */ + uint32_t u32Reserved; + /** The machine UUID. (Ignored if NIL.) */ + RTUUID MachineUuid; +} SSMFILEHDRV11; +AssertCompileSize(SSMFILEHDRV11, 64); +AssertCompileMemberOffset(SSMFILEHDRV11, u32CRC, 40); +/** Pointer to a saved state file header. */ +typedef SSMFILEHDRV11 *PSSMFILEHDRV11; + + +/** + * Data unit header. + */ +typedef struct SSMFILEUNITHDRV2 +{ + /** Magic (SSMFILEUNITHDR_MAGIC or SSMFILEUNITHDR_END). */ + char szMagic[8]; + /** The offset in the saved state stream of the start of this unit. + * This is mainly intended for sanity checking. */ + uint64_t offStream; + /** The CRC-in-progress value this unit starts at. */ + uint32_t u32CurStreamCRC; + /** The checksum of this structure, including the whole name. + * Calculated with this field set to zero. */ + uint32_t u32CRC; + /** Data version. */ + uint32_t u32Version; + /** Instance number. */ + uint32_t u32Instance; + /** Data pass number. */ + uint32_t u32Pass; + /** Flags reserved for future extensions. Must be zero. */ + uint32_t fFlags; + /** Size of the data unit name including the terminator. (bytes) */ + uint32_t cbName; + /** Data unit name, variable size. */ + char szName[SSM_MAX_NAME_SIZE]; +} SSMFILEUNITHDRV2; +AssertCompileMemberOffset(SSMFILEUNITHDRV2, szName, 44); +AssertCompileMemberSize(SSMFILEUNITHDRV2, szMagic, sizeof(SSMFILEUNITHDR_MAGIC)); +AssertCompileMemberSize(SSMFILEUNITHDRV2, szMagic, sizeof(SSMFILEUNITHDR_END)); +/** Pointer to SSMFILEUNITHDRV2. */ +typedef SSMFILEUNITHDRV2 *PSSMFILEUNITHDRV2; + + +/** + * Data unit header. + * + * This is used by v1.0, v1.1 and v1.2 of the format. + */ +typedef struct SSMFILEUNITHDRV1 +{ + /** Magic (SSMFILEUNITHDR_MAGIC or SSMFILEUNITHDR_END). */ + char achMagic[8]; + /** Number of bytes in this data unit including the header. */ + uint64_t cbUnit; + /** Data version. */ + uint32_t u32Version; + /** Instance number. */ + uint32_t u32Instance; + /** Size of the data unit name including the terminator. (bytes) */ + uint32_t cchName; + /** Data unit name. */ + char szName[1]; +} SSMFILEUNITHDRV1; +/** Pointer to SSMFILEUNITHDR. */ +typedef SSMFILEUNITHDRV1 *PSSMFILEUNITHDRV1; + + +/** + * Termination data record. + */ +typedef struct SSMRECTERM +{ + uint8_t u8TypeAndFlags; + /** The record size (sizeof(SSMRECTERM) - 2). */ + uint8_t cbRec; + /** Flags, see SSMRECTERM_FLAGS_CRC32. */ + uint16_t fFlags; + /** The checksum of the stream up to fFlags (exclusive). */ + uint32_t u32StreamCRC; + /** The length of this data unit in bytes (including this record). */ + uint64_t cbUnit; +} SSMRECTERM; +AssertCompileSize(SSMRECTERM, 16); +AssertCompileMemberAlignment(SSMRECTERM, cbUnit, 8); +/** Pointer to a termination record. */ +typedef SSMRECTERM *PSSMRECTERM; +/** Pointer to a const termination record. */ +typedef SSMRECTERM const *PCSSMRECTERM; + + +/** + * Directory entry. + */ +typedef struct SSMFILEDIRENTRY +{ + /** The offset of the data unit. */ + uint64_t off; + /** The instance number. */ + uint32_t u32Instance; + /** The CRC-32 of the name excluding the terminator. (lazy bird) */ + uint32_t u32NameCRC; +} SSMFILEDIRENTRY; +AssertCompileSize(SSMFILEDIRENTRY, 16); +/** Pointer to a directory entry. */ +typedef SSMFILEDIRENTRY *PSSMFILEDIRENTRY; +/** Pointer to a const directory entry. */ +typedef SSMFILEDIRENTRY const *PCSSMFILEDIRENTRY; + +/** + * Directory for the data units from the final pass. + * + * This is used to speed up SSMR3Seek (it would have to decompress and parse the + * whole stream otherwise). + */ +typedef struct SSMFILEDIR +{ + /** Magic string (SSMFILEDIR_MAGIC). */ + char szMagic[8]; + /** The CRC-32 for the whole directory. + * Calculated with this field set to zero. */ + uint32_t u32CRC; + /** The number of directory entries. */ + uint32_t cEntries; + /** The directory entries (variable size). */ + SSMFILEDIRENTRY aEntries[1]; +} SSMFILEDIR; +AssertCompileSize(SSMFILEDIR, 32); +/** Pointer to a directory. */ +typedef SSMFILEDIR *PSSMFILEDIR; +/** Pointer to a const directory. */ +typedef SSMFILEDIR *PSSMFILEDIR; + + +/** + * Footer structure + */ +typedef struct SSMFILEFTR +{ + /** Magic string (SSMFILEFTR_MAGIC). */ + char szMagic[8]; + /** The offset of this record in the stream. */ + uint64_t offStream; + /** The CRC for the stream. + * This is set to zero if SSMFILEHDR_FLAGS_STREAM_CRC32 is clear. */ + uint32_t u32StreamCRC; + /** Number directory entries. */ + uint32_t cDirEntries; + /** Reserved footer space - must be zero. */ + uint32_t u32Reserved; + /** The CRC-32 for this structure. + * Calculated with this field set to zero. */ + uint32_t u32CRC; +} SSMFILEFTR; +AssertCompileSize(SSMFILEFTR, 32); +/** Pointer to a footer. */ +typedef SSMFILEFTR *PSSMFILEFTR; +/** Pointer to a const footer. */ +typedef SSMFILEFTR const *PCSSMFILEFTR; + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +#ifndef SSM_STANDALONE +/** Zeros used by the struct putter. + * This must be at least 8 bytes or the code breaks. */ +static uint8_t const g_abZero[_1K] = {0}; +#endif + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +#ifndef SSM_STANDALONE +static int ssmR3LazyInit(PVM pVM); +static DECLCALLBACK(int) ssmR3SelfLiveExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uPass); +static DECLCALLBACK(int) ssmR3SelfSaveExec(PVM pVM, PSSMHANDLE pSSM); +static DECLCALLBACK(int) ssmR3SelfLoadExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass); +static DECLCALLBACK(int) ssmR3LiveControlLoadExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass); +static int ssmR3Register(PVM pVM, const char *pszName, uint32_t uInstance, uint32_t uVersion, size_t cbGuess, const char *pszBefore, PSSMUNIT *ppUnit); +static int ssmR3LiveControlEmit(PSSMHANDLE pSSM, long double lrdPct, uint32_t uPass); +#endif + +static int ssmR3StrmWriteBuffers(PSSMSTRM pStrm); +static int ssmR3StrmReadMore(PSSMSTRM pStrm); + +#ifndef SSM_STANDALONE +static int ssmR3DataFlushBuffer(PSSMHANDLE pSSM); +#endif +static int ssmR3DataReadRecHdrV2(PSSMHANDLE pSSM); + + +#ifndef SSM_STANDALONE + +/** + * Cleans up resources allocated by SSM on VM termination. + * + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(void) SSMR3Term(PVM pVM) +{ + if (pVM->ssm.s.fInitialized) + { + pVM->ssm.s.fInitialized = false; + RTCritSectDelete(&pVM->ssm.s.CancelCritSect); + } +} + + +/** + * Performs lazy initialization of the SSM. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +static int ssmR3LazyInit(PVM pVM) +{ + /* + * Register a saved state unit which we use to put the VirtualBox version, + * revision and similar stuff in. + */ + pVM->ssm.s.fInitialized = true; + int rc = SSMR3RegisterInternal(pVM, "SSM", 0 /*uInstance*/, 1 /*uVersion*/, 64 /*cbGuess*/, + NULL /*pfnLivePrep*/, ssmR3SelfLiveExec, NULL /*pfnLiveVote*/, + NULL /*pfnSavePrep*/, ssmR3SelfSaveExec, NULL /*pfnSaveDone*/, + NULL /*pfnSavePrep*/, ssmR3SelfLoadExec, NULL /*pfnSaveDone*/); + if (RT_SUCCESS(rc)) + rc = SSMR3RegisterInternal(pVM, "SSMLiveControl", 0 /*uInstance*/, 1 /*uVersion*/, 1 /*cbGuess*/, + NULL /*pfnLivePrep*/, NULL /*pfnLiveExec*/, NULL /*pfnLiveVote*/, + NULL /*pfnSavePrep*/, NULL /*pfnSaveExec*/, NULL /*pfnSaveDone*/, + NULL /*pfnSavePrep*/, ssmR3LiveControlLoadExec, NULL /*pfnSaveDone*/); + + /* + * Initialize the cancellation critsect now. + */ + if (RT_SUCCESS(rc)) + rc = RTCritSectInit(&pVM->ssm.s.CancelCritSect); + if (RT_SUCCESS(rc)) + { + STAM_REL_REG_USED(pVM, &pVM->ssm.s.uPass, STAMTYPE_U32, "/SSM/uPass", STAMUNIT_COUNT, "Current pass"); + } + + pVM->ssm.s.fInitialized = RT_SUCCESS(rc); + return rc; +} + + +/** + * Do ssmR3SelfSaveExec in pass 0. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM The SSM handle. + * @param uPass The data pass number. + */ +static DECLCALLBACK(int) ssmR3SelfLiveExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uPass) +{ + if (uPass == 0) + { + int rc = ssmR3SelfSaveExec(pVM, pSSM); + if (RT_SUCCESS(rc)) + rc = VINF_SSM_DONT_CALL_AGAIN; + return rc; + } + AssertFailed(); + return VERR_SSM_UNEXPECTED_PASS; +} + + +/** + * For saving usful things without having to go thru the tedious process of + * adding it to the header. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM The SSM handle. + */ +static DECLCALLBACK(int) ssmR3SelfSaveExec(PVM pVM, PSSMHANDLE pSSM) +{ + NOREF(pVM); + + /* + * String table containing pairs of variable and value string. + * Terminated by two empty strings. + */ + SSMR3PutStrZ(pSSM, "Build Type"); + SSMR3PutStrZ(pSSM, KBUILD_TYPE); + SSMR3PutStrZ(pSSM, "Host OS"); + SSMR3PutStrZ(pSSM, KBUILD_TARGET "." KBUILD_TARGET_ARCH); +#ifdef VBOX_OSE + SSMR3PutStrZ(pSSM, "OSE"); + SSMR3PutStrZ(pSSM, "true"); +#endif + + /* terminator */ + SSMR3PutStrZ(pSSM, ""); + return SSMR3PutStrZ(pSSM, ""); +} + + +/** + * For load the version + revision and stuff. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM The SSM handle. + * @param uVersion The version (1). + * @param uPass The pass. + */ +static DECLCALLBACK(int) ssmR3SelfLoadExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + AssertLogRelMsgReturn(uVersion == 1, ("%d\n", uVersion), VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION); + NOREF(pVM); NOREF(uPass); + + /* + * The first and last passes contains a {name, value} string table that is + * terminated by two emptry strings. It contains useful informal build + * info and can be very handy when something goes wrong after restore. + */ + if ( uPass == 0 + || uPass == SSM_PASS_FINAL) + { + for (unsigned i = 0; ; i++) + { + char szVar[128]; + char szValue[1024]; + int rc = SSMR3GetStrZ(pSSM, szVar, sizeof(szVar)); + AssertRCReturn(rc, rc); + rc = SSMR3GetStrZ(pSSM, szValue, sizeof(szValue)); + AssertRCReturn(rc, rc); + if (!szVar[0] && !szValue[0]) + break; + if (i == 0) + LogRel(("SSM: Saved state info:\n")); + LogRel(("SSM: %s: %s\n", szVar, szValue)); + + /* + * Detect 32-bit MSC for handling SSMFIELD_ENTRY_PAD_MSC32_AUTO. + * Save the Host OS for SSMR3HandleHostOSAndArch + */ + if (!strcmp(szVar, "Host OS")) + { + bool fIsHostMsc32 = !strcmp(szValue, "win.x86"); + if (fIsHostMsc32 != pSSM->u.Read.fIsHostMsc32) + { + LogRel(("SSM: (fIsHostMsc32 %RTbool => %RTbool)\n", pSSM->u.Read.fIsHostMsc32, fIsHostMsc32)); + pSSM->u.Read.fIsHostMsc32 = fIsHostMsc32; + } + + size_t cchValue = strlen(szValue); + size_t cchCopy = RT_MIN(cchValue, sizeof(pSSM->u.Read.szHostOSAndArch) - 1); + Assert(cchValue == cchCopy); + memcpy(pSSM->u.Read.szHostOSAndArch, szValue, cchCopy); + pSSM->u.Read.szHostOSAndArch[cchCopy] = '\0'; + } + } + } + return VINF_SUCCESS; +} + + +/** + * Load exec callback for the special live save state unit that tracks the + * progress of a live save. + * + * This is saved by ssmR3LiveControlEmit(). + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM The SSM handle. + * @param uVersion The version (1). + * @param uPass The pass. + */ +static DECLCALLBACK(int) ssmR3LiveControlLoadExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + AssertLogRelMsgReturn(uVersion == 1, ("%d\n", uVersion), VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION); + NOREF(uPass); + + uint16_t uPartsPerTenThousand; + int rc = SSMR3GetU16(pSSM, &uPartsPerTenThousand); + if (RT_SUCCESS(rc)) + { + /* Scale it down to fit in our exec range. */ + unsigned uPct = (unsigned)( (long double)uPartsPerTenThousand / 100 + * (100 - pSSM->uPercentPrepare - pSSM->uPercentDone) / 100) + + pSSM->uPercentPrepare; + if (uPct != pSSM->uPercent) + { + AssertMsg(uPct < 100, ("uPct=%d uPartsPerTenThousand=%d uPercentPrepare=%d uPercentDone=%d\n", uPct, uPartsPerTenThousand, pSSM->uPercentPrepare, pSSM->uPercentDone)); + pSSM->uPercent = uPct; + if (pSSM->pfnProgress) + pSSM->pfnProgress(pVM->pUVM, RT_MIN(uPct, 100 - pSSM->uPercentDone), pSSM->pvUser); + } + } + return rc; +} + + +/** + * Internal registration worker. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pszName Data unit name. + * @param uInstance The instance id. + * @param uVersion The data unit version. + * @param cbGuess The guessed data unit size. + * @param pszBefore Name of data unit to be placed in front of. + * Optional. + * @param ppUnit Where to store the inserted unit node. + * Caller must fill in the missing details. + */ +static int ssmR3Register(PVM pVM, const char *pszName, uint32_t uInstance, + uint32_t uVersion, size_t cbGuess, const char *pszBefore, PSSMUNIT *ppUnit) +{ + /* + * Validate input. + */ + AssertPtr(pszName); + AssertReturn(*pszName, VERR_INVALID_PARAMETER); + size_t cchName = strlen(pszName); + AssertMsgReturn(cchName < SSM_MAX_NAME_SIZE, ("%zu >= %u: %s\n", cchName, SSM_MAX_NAME_SIZE, pszName), VERR_OUT_OF_RANGE); + + AssertReturn(!pszBefore || *pszBefore, VERR_INVALID_PARAMETER); + size_t cchBefore = pszBefore ? strlen(pszBefore) : 0; + AssertMsgReturn(cchBefore < SSM_MAX_NAME_SIZE, ("%zu >= %u: %s\n", cchBefore, SSM_MAX_NAME_SIZE, pszBefore), VERR_OUT_OF_RANGE); + + /* + * Lazy init. + */ + if (!pVM->ssm.s.fInitialized) + { + int rc = ssmR3LazyInit(pVM); + AssertRCReturn(rc, rc); + } + + /* + * Walk to the end of the list checking for duplicates as we go. + */ + PSSMUNIT pUnitBeforePrev = NULL; + PSSMUNIT pUnitBefore = NULL; + PSSMUNIT pUnitPrev = NULL; + PSSMUNIT pUnit = pVM->ssm.s.pHead; + while (pUnit) + { + if ( pUnit->u32Instance == uInstance + && pUnit->cchName == cchName + && !memcmp(pUnit->szName, pszName, cchName)) + { + AssertMsgFailed(("Duplicate registration %s\n", pszName)); + return VERR_SSM_UNIT_EXISTS; + } + if ( pUnit->cchName == cchBefore + && !pUnitBefore + && !memcmp(pUnit->szName, pszBefore, cchBefore)) + { + pUnitBeforePrev = pUnitPrev; + pUnitBefore = pUnit; + } + + /* next */ + pUnitPrev = pUnit; + pUnit = pUnit->pNext; + } + + /* + * Allocate new node. + */ + pUnit = (PSSMUNIT)MMR3HeapAllocZ(pVM, MM_TAG_SSM, RT_UOFFSETOF_DYN(SSMUNIT, szName[cchName + 1])); + if (!pUnit) + return VERR_NO_MEMORY; + + /* + * Fill in (some) data. (Stuff is zero'd.) + */ + pUnit->u32Version = uVersion; + pUnit->u32Instance = uInstance; + pUnit->cbGuess = cbGuess; + pUnit->cchName = cchName; + memcpy(pUnit->szName, pszName, cchName); + + /* + * Insert + */ + if (pUnitBefore) + { + pUnit->pNext = pUnitBefore; + if (pUnitBeforePrev) + pUnitBeforePrev->pNext = pUnit; + else + pVM->ssm.s.pHead = pUnit; + } + else if (pUnitPrev) + pUnitPrev->pNext = pUnit; + else + pVM->ssm.s.pHead = pUnit; + pVM->ssm.s.cUnits++; + + *ppUnit = pUnit; + return VINF_SUCCESS; +} + + +/** + * Register a PDM Devices data unit. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pDevIns Device instance. + * @param pszName Data unit name. + * @param uInstance The instance identifier of the data unit. + * This must together with the name be unique. + * @param uVersion Data layout version number. + * @param cbGuess The approximate amount of data in the unit. + * Only for progress indicators. + * @param pszBefore Name of data unit which we should be put in front + * of. Optional (NULL). + * + * @param pfnLivePrep Prepare live save callback, optional. + * @param pfnLiveExec Execute live save callback, optional. + * @param pfnLiveVote Vote live save callback, optional. + * + * @param pfnSavePrep Prepare save callback, optional. + * @param pfnSaveExec Execute save callback, optional. + * @param pfnSaveDone Done save callback, optional. + * + * @param pfnLoadPrep Prepare load callback, optional. + * @param pfnLoadExec Execute load callback, optional. + * @param pfnLoadDone Done load callback, optional. + */ +VMMR3_INT_DECL(int) +SSMR3RegisterDevice(PVM pVM, PPDMDEVINS pDevIns, const char *pszName, + uint32_t uInstance, uint32_t uVersion, size_t cbGuess, const char *pszBefore, + PFNSSMDEVLIVEPREP pfnLivePrep, PFNSSMDEVLIVEEXEC pfnLiveExec, PFNSSMDEVLIVEVOTE pfnLiveVote, + PFNSSMDEVSAVEPREP pfnSavePrep, PFNSSMDEVSAVEEXEC pfnSaveExec, PFNSSMDEVSAVEDONE pfnSaveDone, + PFNSSMDEVLOADPREP pfnLoadPrep, PFNSSMDEVLOADEXEC pfnLoadExec, PFNSSMDEVLOADDONE pfnLoadDone) +{ + PSSMUNIT pUnit; + int rc = ssmR3Register(pVM, pszName, uInstance, uVersion, cbGuess, pszBefore, &pUnit); + if (RT_SUCCESS(rc)) + { + pUnit->enmType = SSMUNITTYPE_DEV; + pUnit->u.Dev.pfnLivePrep = pfnLivePrep; + pUnit->u.Dev.pfnLiveExec = pfnLiveExec; + pUnit->u.Dev.pfnLiveVote = pfnLiveVote; + pUnit->u.Dev.pfnSavePrep = pfnSavePrep; + pUnit->u.Dev.pfnSaveExec = pfnSaveExec; + pUnit->u.Dev.pfnSaveDone = pfnSaveDone; + pUnit->u.Dev.pfnLoadPrep = pfnLoadPrep; + pUnit->u.Dev.pfnLoadExec = pfnLoadExec; + pUnit->u.Dev.pfnLoadDone = pfnLoadDone; + pUnit->u.Dev.pDevIns = pDevIns; + pUnit->pCritSect = PDMR3DevGetCritSect(pVM, pDevIns); + } + return rc; +} + + +/** + * Register a PDM driver data unit. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pDrvIns Driver instance. + * @param pszName Data unit name. + * @param uInstance The instance identifier of the data unit. + * This must together with the name be unique. + * @param uVersion Data layout version number. + * @param cbGuess The approximate amount of data in the unit. + * Only for progress indicators. + * + * @param pfnLivePrep Prepare live save callback, optional. + * @param pfnLiveExec Execute live save callback, optional. + * @param pfnLiveVote Vote live save callback, optional. + * + * @param pfnSavePrep Prepare save callback, optional. + * @param pfnSaveExec Execute save callback, optional. + * @param pfnSaveDone Done save callback, optional. + * + * @param pfnLoadPrep Prepare load callback, optional. + * @param pfnLoadExec Execute load callback, optional. + * @param pfnLoadDone Done load callback, optional. + */ +VMMR3_INT_DECL(int) +SSMR3RegisterDriver(PVM pVM, PPDMDRVINS pDrvIns, const char *pszName, uint32_t uInstance, uint32_t uVersion, size_t cbGuess, + PFNSSMDRVLIVEPREP pfnLivePrep, PFNSSMDRVLIVEEXEC pfnLiveExec, PFNSSMDRVLIVEVOTE pfnLiveVote, + PFNSSMDRVSAVEPREP pfnSavePrep, PFNSSMDRVSAVEEXEC pfnSaveExec, PFNSSMDRVSAVEDONE pfnSaveDone, + PFNSSMDRVLOADPREP pfnLoadPrep, PFNSSMDRVLOADEXEC pfnLoadExec, PFNSSMDRVLOADDONE pfnLoadDone) +{ + PSSMUNIT pUnit; + int rc = ssmR3Register(pVM, pszName, uInstance, uVersion, cbGuess, NULL, &pUnit); + if (RT_SUCCESS(rc)) + { + pUnit->enmType = SSMUNITTYPE_DRV; + pUnit->u.Drv.pfnLivePrep = pfnLivePrep; + pUnit->u.Drv.pfnLiveExec = pfnLiveExec; + pUnit->u.Drv.pfnLiveVote = pfnLiveVote; + pUnit->u.Drv.pfnSavePrep = pfnSavePrep; + pUnit->u.Drv.pfnSaveExec = pfnSaveExec; + pUnit->u.Drv.pfnSaveDone = pfnSaveDone; + pUnit->u.Drv.pfnLoadPrep = pfnLoadPrep; + pUnit->u.Drv.pfnLoadExec = pfnLoadExec; + pUnit->u.Drv.pfnLoadDone = pfnLoadDone; + pUnit->u.Drv.pDrvIns = pDrvIns; + } + return rc; +} + + +/** + * Register a PDM USB device data unit. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pUsbIns USB instance. + * @param pszName Data unit name. + * @param uInstance The instance identifier of the data unit. + * This must together with the name be unique. + * @param uVersion Data layout version number. + * @param cbGuess The approximate amount of data in the unit. + * Only for progress indicators. + * + * @param pfnLivePrep Prepare live save callback, optional. + * @param pfnLiveExec Execute live save callback, optional. + * @param pfnLiveVote Vote live save callback, optional. + * + * @param pfnSavePrep Prepare save callback, optional. + * @param pfnSaveExec Execute save callback, optional. + * @param pfnSaveDone Done save callback, optional. + * + * @param pfnLoadPrep Prepare load callback, optional. + * @param pfnLoadExec Execute load callback, optional. + * @param pfnLoadDone Done load callback, optional. + */ +VMMR3_INT_DECL(int) +SSMR3RegisterUsb(PVM pVM, PPDMUSBINS pUsbIns, const char *pszName, uint32_t uInstance, uint32_t uVersion, size_t cbGuess, + PFNSSMUSBLIVEPREP pfnLivePrep, PFNSSMUSBLIVEEXEC pfnLiveExec, PFNSSMUSBLIVEVOTE pfnLiveVote, + PFNSSMUSBSAVEPREP pfnSavePrep, PFNSSMUSBSAVEEXEC pfnSaveExec, PFNSSMUSBSAVEDONE pfnSaveDone, + PFNSSMUSBLOADPREP pfnLoadPrep, PFNSSMUSBLOADEXEC pfnLoadExec, PFNSSMUSBLOADDONE pfnLoadDone) +{ + PSSMUNIT pUnit; + int rc = ssmR3Register(pVM, pszName, uInstance, uVersion, cbGuess, NULL, &pUnit); + if (RT_SUCCESS(rc)) + { + pUnit->enmType = SSMUNITTYPE_USB; + pUnit->u.Usb.pfnLivePrep = pfnLivePrep; + pUnit->u.Usb.pfnLiveExec = pfnLiveExec; + pUnit->u.Usb.pfnLiveVote = pfnLiveVote; + pUnit->u.Usb.pfnSavePrep = pfnSavePrep; + pUnit->u.Usb.pfnSaveExec = pfnSaveExec; + pUnit->u.Usb.pfnSaveDone = pfnSaveDone; + pUnit->u.Usb.pfnLoadPrep = pfnLoadPrep; + pUnit->u.Usb.pfnLoadExec = pfnLoadExec; + pUnit->u.Usb.pfnLoadDone = pfnLoadDone; + pUnit->u.Usb.pUsbIns = pUsbIns; + } + return rc; +} + + +/** + * Register a internal data unit. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pszName Data unit name. + * @param uInstance The instance identifier of the data unit. + * This must together with the name be unique. + * @param uVersion Data layout version number. + * @param cbGuess The approximate amount of data in the unit. + * Only for progress indicators. + * + * @param pfnLivePrep Prepare live save callback, optional. + * @param pfnLiveExec Execute live save callback, optional. + * @param pfnLiveVote Vote live save callback, optional. + * + * @param pfnSavePrep Prepare save callback, optional. + * @param pfnSaveExec Execute save callback, optional. + * @param pfnSaveDone Done save callback, optional. + * + * @param pfnLoadPrep Prepare load callback, optional. + * @param pfnLoadExec Execute load callback, optional. + * @param pfnLoadDone Done load callback, optional. + */ +VMMR3DECL(int) SSMR3RegisterInternal(PVM pVM, const char *pszName, uint32_t uInstance, uint32_t uVersion, size_t cbGuess, + PFNSSMINTLIVEPREP pfnLivePrep, PFNSSMINTLIVEEXEC pfnLiveExec, PFNSSMINTLIVEVOTE pfnLiveVote, + PFNSSMINTSAVEPREP pfnSavePrep, PFNSSMINTSAVEEXEC pfnSaveExec, PFNSSMINTSAVEDONE pfnSaveDone, + PFNSSMINTLOADPREP pfnLoadPrep, PFNSSMINTLOADEXEC pfnLoadExec, PFNSSMINTLOADDONE pfnLoadDone) +{ + PSSMUNIT pUnit; + int rc = ssmR3Register(pVM, pszName, uInstance, uVersion, cbGuess, NULL /* pszBefore */, &pUnit); + if (RT_SUCCESS(rc)) + { + pUnit->enmType = SSMUNITTYPE_INTERNAL; + pUnit->u.Internal.pfnLivePrep = pfnLivePrep; + pUnit->u.Internal.pfnLiveExec = pfnLiveExec; + pUnit->u.Internal.pfnLiveVote = pfnLiveVote; + pUnit->u.Internal.pfnSavePrep = pfnSavePrep; + pUnit->u.Internal.pfnSaveExec = pfnSaveExec; + pUnit->u.Internal.pfnSaveDone = pfnSaveDone; + pUnit->u.Internal.pfnLoadPrep = pfnLoadPrep; + pUnit->u.Internal.pfnLoadExec = pfnLoadExec; + pUnit->u.Internal.pfnLoadDone = pfnLoadDone; + } + return rc; +} + + +/** + * Register an external data unit. + * + * @returns VBox status code. + * + * @param pUVM The user mode VM handle. + * @param pszName Data unit name. + * @param uInstance The instance identifier of the data unit. + * This must together with the name be unique. + * @param uVersion Data layout version number. + * @param cbGuess The approximate amount of data in the unit. + * Only for progress indicators. + * + * @param pfnLivePrep Prepare live save callback, optional. + * @param pfnLiveExec Execute live save callback, optional. + * @param pfnLiveVote Vote live save callback, optional. + * + * @param pfnSavePrep Prepare save callback, optional. + * @param pfnSaveExec Execute save callback, optional. + * @param pfnSaveDone Done save callback, optional. + * + * @param pfnLoadPrep Prepare load callback, optional. + * @param pfnLoadExec Execute load callback, optional. + * @param pfnLoadDone Done load callback, optional. + * @param pvUser User argument. + */ +VMMR3DECL(int) SSMR3RegisterExternal(PUVM pUVM, const char *pszName, uint32_t uInstance, uint32_t uVersion, size_t cbGuess, + PFNSSMEXTLIVEPREP pfnLivePrep, PFNSSMEXTLIVEEXEC pfnLiveExec, PFNSSMEXTLIVEVOTE pfnLiveVote, + PFNSSMEXTSAVEPREP pfnSavePrep, PFNSSMEXTSAVEEXEC pfnSaveExec, PFNSSMEXTSAVEDONE pfnSaveDone, + PFNSSMEXTLOADPREP pfnLoadPrep, PFNSSMEXTLOADEXEC pfnLoadExec, PFNSSMEXTLOADDONE pfnLoadDone, void *pvUser) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + PSSMUNIT pUnit; + int rc = ssmR3Register(pVM, pszName, uInstance, uVersion, cbGuess, NULL /* pszBefore */, &pUnit); + if (RT_SUCCESS(rc)) + { + pUnit->enmType = SSMUNITTYPE_EXTERNAL; + pUnit->u.External.pfnLivePrep = pfnLivePrep; + pUnit->u.External.pfnLiveExec = pfnLiveExec; + pUnit->u.External.pfnLiveVote = pfnLiveVote; + pUnit->u.External.pfnSavePrep = pfnSavePrep; + pUnit->u.External.pfnSaveExec = pfnSaveExec; + pUnit->u.External.pfnSaveDone = pfnSaveDone; + pUnit->u.External.pfnLoadPrep = pfnLoadPrep; + pUnit->u.External.pfnLoadExec = pfnLoadExec; + pUnit->u.External.pfnLoadDone = pfnLoadDone; + pUnit->u.External.pvUser = pvUser; + } + return rc; +} + + +/** + * @callback_method_impl{FNSSMINTLOADEXEC, + * Stub that skips the whole unit (see SSMR3RegisterStub).} + */ +static DECLCALLBACK(int) ssmR3LoadExecStub(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + NOREF(pVM); NOREF(uVersion); NOREF(uPass); + return SSMR3SkipToEndOfUnit(pSSM); +} + + +/** + * Registers a stub state loader for working around legacy. + * + * This is used to deal with irelevant PATM and CSAM saved state units in HM + * mode and when built without raw-mode. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pszName Data unit name. + * @param uInstance Instance number. + */ +VMMR3DECL(int) SSMR3RegisterStub(PVM pVM, const char *pszName, uint32_t uInstance) +{ + return SSMR3RegisterInternal(pVM, pszName, uInstance, UINT32_MAX, 0, + NULL, NULL, NULL, + NULL, NULL, NULL, + NULL, ssmR3LoadExecStub, NULL); +} + + +/** + * Deregister one or more PDM Device data units. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pDevIns Device instance. + * @param pszName Data unit name. + * Use NULL to deregister all data units for that device instance. + * @param uInstance The instance identifier of the data unit. + * This must together with the name be unique. + * @remark Only for dynamic data units and dynamic unloaded modules. + */ +VMMR3_INT_DECL(int) SSMR3DeregisterDevice(PVM pVM, PPDMDEVINS pDevIns, const char *pszName, uint32_t uInstance) +{ + /* + * Validate input. + */ + if (!pDevIns) + { + AssertMsgFailed(("pDevIns is NULL!\n")); + return VERR_INVALID_PARAMETER; + } + + /* + * Search the list. + */ + size_t cchName = pszName ? strlen(pszName) : 0; + int rc = pszName ? VERR_SSM_UNIT_NOT_FOUND : VINF_SUCCESS; + PSSMUNIT pUnitPrev = NULL; + PSSMUNIT pUnit = pVM->ssm.s.pHead; + while (pUnit) + { + if ( pUnit->enmType == SSMUNITTYPE_DEV + && ( !pszName + || ( pUnit->cchName == cchName + && !memcmp(pUnit->szName, pszName, cchName))) + && pUnit->u32Instance == uInstance + ) + { + if (pUnit->u.Dev.pDevIns == pDevIns) + { + /* + * Unlink it, advance pointer, and free the node. + */ + PSSMUNIT pFree = pUnit; + pUnit = pUnit->pNext; + if (pUnitPrev) + pUnitPrev->pNext = pUnit; + else + pVM->ssm.s.pHead = pUnit; + pVM->ssm.s.cUnits--; + Log(("SSM: Removed data unit '%s' (pdm dev).\n", pFree->szName)); + MMR3HeapFree(pFree); + + if (pszName) + return VINF_SUCCESS; + rc = VINF_SUCCESS; + continue; + } + else if (pszName) + { + AssertMsgFailed(("Caller is not owner! Owner=%p Caller=%p %s\n", + pUnit->u.Dev.pDevIns, pDevIns, pszName)); + return VERR_SSM_UNIT_NOT_OWNER; + } + } + + /* next */ + pUnitPrev = pUnit; + pUnit = pUnit->pNext; + } + + return rc; +} + + +/** + * Deregister one ore more PDM Driver data units. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDrvIns Driver instance. + * @param pszName Data unit name. + * Use NULL to deregister all data units for that driver instance. + * @param uInstance The instance identifier of the data unit. + * This must together with the name be unique. Ignored if pszName is NULL. + * @remark Only for dynamic data units and dynamic unloaded modules. + */ +VMMR3_INT_DECL(int) SSMR3DeregisterDriver(PVM pVM, PPDMDRVINS pDrvIns, const char *pszName, uint32_t uInstance) +{ + /* + * Validate input. + */ + if (!pDrvIns) + { + AssertMsgFailed(("pDrvIns is NULL!\n")); + return VERR_INVALID_PARAMETER; + } + + /* + * Search the list. + */ + size_t cchName = pszName ? strlen(pszName) : 0; + int rc = pszName ? VERR_SSM_UNIT_NOT_FOUND : VINF_SUCCESS; + PSSMUNIT pUnitPrev = NULL; + PSSMUNIT pUnit = pVM->ssm.s.pHead; + while (pUnit) + { + if ( pUnit->enmType == SSMUNITTYPE_DRV + && ( !pszName + || ( pUnit->cchName == cchName + && !memcmp(pUnit->szName, pszName, cchName) + && pUnit->u32Instance == uInstance)) + ) + { + if (pUnit->u.Drv.pDrvIns == pDrvIns) + { + /* + * Unlink it, advance pointer, and free the node. + */ + PSSMUNIT pFree = pUnit; + pUnit = pUnit->pNext; + if (pUnitPrev) + pUnitPrev->pNext = pUnit; + else + pVM->ssm.s.pHead = pUnit; + pVM->ssm.s.cUnits--; + Log(("SSM: Removed data unit '%s' (pdm drv).\n", pFree->szName)); + MMR3HeapFree(pFree); + + if (pszName) + return VINF_SUCCESS; + rc = VINF_SUCCESS; + continue; + } + + AssertMsgReturn(!pszName, + ("Caller is not owner! Owner=%p Caller=%p %s\n", pUnit->u.Drv.pDrvIns, pDrvIns, pszName), + VERR_SSM_UNIT_NOT_OWNER); + } + + /* next */ + pUnitPrev = pUnit; + pUnit = pUnit->pNext; + } + + return rc; +} + + +/** + * Deregister one or more PDM USB device data units. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pUsbIns USB device instance. + * @param pszName Data unit name. + * Use NULL to deregister all data units for that driver instance. + * @param uInstance The instance identifier of the data unit. + * This must together with the name be unique. Ignored if pszName is NULL. + * @remark Only for dynamic data units and dynamic unloaded modules. + */ +VMMR3_INT_DECL(int) SSMR3DeregisterUsb(PVM pVM, PPDMUSBINS pUsbIns, const char *pszName, uint32_t uInstance) +{ + /* + * Validate input. + */ + AssertMsgReturn(VALID_PTR(pUsbIns), ("pUsbIns is NULL!\n"), VERR_INVALID_PARAMETER); + + /* + * Search the list. + */ + size_t cchName = pszName ? strlen(pszName) : 0; + int rc = pszName ? VERR_SSM_UNIT_NOT_FOUND : VINF_SUCCESS; + PSSMUNIT pUnitPrev = NULL; + PSSMUNIT pUnit = pVM->ssm.s.pHead; + while (pUnit) + { + if ( pUnit->enmType == SSMUNITTYPE_USB + && ( !pszName + || ( pUnit->cchName == cchName + && !memcmp(pUnit->szName, pszName, cchName) + && pUnit->u32Instance == uInstance)) + ) + { + if (pUnit->u.Usb.pUsbIns == pUsbIns) + { + /* + * Unlink it, advance pointer, and free the node. + */ + PSSMUNIT pFree = pUnit; + pUnit = pUnit->pNext; + if (pUnitPrev) + pUnitPrev->pNext = pUnit; + else + pVM->ssm.s.pHead = pUnit; + pVM->ssm.s.cUnits--; + Log(("SSM: Removed data unit '%s' (pdm drv).\n", pFree->szName)); + MMR3HeapFree(pFree); + + if (pszName) + return VINF_SUCCESS; + rc = VINF_SUCCESS; + continue; + } + + AssertMsgReturn(!pszName, + ("Caller is not owner! Owner=%p Caller=%p %s\n", pUnit->u.Usb.pUsbIns, pUsbIns, pszName), + VERR_SSM_UNIT_NOT_OWNER); + } + + /* next */ + pUnitPrev = pUnit; + pUnit = pUnit->pNext; + } + + return rc; +} + + +/** + * Deregister a data unit. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param enmType Unit type + * @param pszName Data unit name. + * @remark Only for dynamic data units. + */ +static int ssmR3DeregisterByNameAndType(PVM pVM, const char *pszName, SSMUNITTYPE enmType) +{ + /* + * Validate input. + */ + if (!pszName) + { + AssertMsgFailed(("pszName is NULL!\n")); + return VERR_INVALID_PARAMETER; + } + + /* + * Search the list. + */ + size_t cchName = strlen(pszName); + int rc = VERR_SSM_UNIT_NOT_FOUND; + PSSMUNIT pUnitPrev = NULL; + PSSMUNIT pUnit = pVM->ssm.s.pHead; + while (pUnit) + { + if ( pUnit->enmType == enmType + && pUnit->cchName == cchName + && !memcmp(pUnit->szName, pszName, cchName)) + { + /* + * Unlink it, advance pointer, and free the node. + */ + PSSMUNIT pFree = pUnit; + pUnit = pUnit->pNext; + if (pUnitPrev) + pUnitPrev->pNext = pUnit; + else + pVM->ssm.s.pHead = pUnit; + pVM->ssm.s.cUnits--; + Log(("SSM: Removed data unit '%s' (type=%d).\n", pFree->szName, enmType)); + MMR3HeapFree(pFree); + return VINF_SUCCESS; + } + + /* next */ + pUnitPrev = pUnit; + pUnit = pUnit->pNext; + } + + return rc; +} + + +/** + * Deregister an internal data unit. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pszName Data unit name. + * @remark Only for dynamic data units. + */ +VMMR3DECL(int) SSMR3DeregisterInternal(PVM pVM, const char *pszName) +{ + return ssmR3DeregisterByNameAndType(pVM, pszName, SSMUNITTYPE_INTERNAL); +} + + +/** + * Deregister an external data unit. + * + * @returns VBox status code. + * @param pUVM The user mode VM structure. + * @param pszName Data unit name. + * @remark Only for dynamic data units. + */ +VMMR3DECL(int) SSMR3DeregisterExternal(PUVM pUVM, const char *pszName) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + return ssmR3DeregisterByNameAndType(pVM, pszName, SSMUNITTYPE_EXTERNAL); +} + +#endif /* !SSM_STANDALONE */ + + +/** + * Initializes the stream after/before opening the file/whatever. + * + * @returns VINF_SUCCESS or VERR_NO_MEMORY. + * @param pStrm The stream handle. + * @param fChecksummed Whether the stream is to be checksummed while + * written/read. + * @param cBuffers The number of buffers. + */ +static int ssmR3StrmInitInternal(PSSMSTRM pStrm, bool fChecksummed, uint32_t cBuffers) +{ + Assert(cBuffers > 0); + + /* + * Init the common data members. + */ + pStrm->fTerminating = false; + pStrm->fNeedSeek = false; + pStrm->rc = VINF_SUCCESS; + pStrm->hIoThread = NIL_RTTHREAD; + pStrm->offNeedSeekTo= UINT64_MAX; + + pStrm->pHead = NULL; + pStrm->pFree = NULL; + pStrm->hEvtHead = NIL_RTSEMEVENT; + pStrm->hEvtFree = NIL_RTSEMEVENT; + + pStrm->pPending = NULL; + pStrm->pCur = NULL; + pStrm->offCurStream = 0; + pStrm->off = 0; + pStrm->fChecksummed = fChecksummed; + pStrm->u32StreamCRC = fChecksummed ? RTCrc32Start() : 0; + pStrm->offStreamCRC = 0; + + /* + * Allocate the buffers. Page align them in case that makes the kernel + * and/or cpu happier in some way. + */ + int rc = VINF_SUCCESS; + for (uint32_t i = 0; i < cBuffers; i++) + { + PSSMSTRMBUF pBuf = (PSSMSTRMBUF)RTMemPageAllocZ(sizeof(*pBuf)); + if (!pBuf) + { + if (i > 2) + { + LogRel(("ssmR3StrmAllocBuffer: WARNING: Could only get %d stream buffers.\n", i)); + break; + } + LogRel(("ssmR3StrmAllocBuffer: Failed to allocate stream buffers. (i=%d)\n", i)); + return VERR_NO_MEMORY; + } + + /* link it */ + pBuf->pNext = pStrm->pFree; + pStrm->pFree = pBuf; + } + + /* + * Create the event semaphores. + */ + rc = RTSemEventCreate(&pStrm->hEvtHead); + if (RT_FAILURE(rc)) + return rc; + rc = RTSemEventCreate(&pStrm->hEvtFree); + if (RT_FAILURE(rc)) + return rc; + + return VINF_SUCCESS; +} + + +/** + * Destroys a list of buffers. + * + * @param pHead Pointer to the head. + */ +static void ssmR3StrmDestroyBufList(PSSMSTRMBUF pHead) +{ + while (pHead) + { + PSSMSTRMBUF pCur = pHead; + pHead = pCur->pNext; + pCur->pNext = NULL; + RTMemPageFree(pCur, sizeof(*pCur)); + } +} + + +/** + * Cleans up a stream after ssmR3StrmInitInternal has been called (regardless of + * it succeeded or not). + * + * @param pStrm The stream handle. + */ +static void ssmR3StrmDelete(PSSMSTRM pStrm) +{ + RTMemPageFree(pStrm->pCur, sizeof(*pStrm->pCur)); + pStrm->pCur = NULL; + ssmR3StrmDestroyBufList(pStrm->pHead); + pStrm->pHead = NULL; + ssmR3StrmDestroyBufList(pStrm->pPending); + pStrm->pPending = NULL; + ssmR3StrmDestroyBufList(pStrm->pFree); + pStrm->pFree = NULL; + + RTSemEventDestroy(pStrm->hEvtHead); + pStrm->hEvtHead = NIL_RTSEMEVENT; + + RTSemEventDestroy(pStrm->hEvtFree); + pStrm->hEvtFree = NIL_RTSEMEVENT; +} + + +/** + * Initializes a stream that uses a method table. + * + * @returns VBox status code. + * @param pStrm The stream manager structure. + * @param pStreamOps The stream method table. + * @param pvUser The user argument for the stream methods. + * @param fWrite Whether to open for writing or reading. + * @param fChecksummed Whether the stream is to be checksummed while + * written/read. + * @param cBuffers The number of buffers. + */ +static int ssmR3StrmInit(PSSMSTRM pStrm, PCSSMSTRMOPS pStreamOps, void *pvUser, bool fWrite, bool fChecksummed, uint32_t cBuffers) +{ + int rc = ssmR3StrmInitInternal(pStrm, fChecksummed, cBuffers); + if (RT_SUCCESS(rc)) + { + pStrm->pOps = pStreamOps; + pStrm->pvUser = pvUser; + pStrm->fWrite = fWrite; + return VINF_SUCCESS; + } + + ssmR3StrmDelete(pStrm); + pStrm->rc = rc; + return rc; +} + + +/** + * @copydoc SSMSTRMOPS::pfnWrite + */ +static DECLCALLBACK(int) ssmR3FileWrite(void *pvUser, uint64_t offStream, const void *pvBuf, size_t cbToWrite) +{ + Assert(RTFileTell((RTFILE)(uintptr_t)pvUser) == offStream); NOREF(offStream); + return RTFileWriteAt((RTFILE)(uintptr_t)pvUser, offStream, pvBuf, cbToWrite, NULL); /** @todo use RTFileWrite */ +} + + +/** + * @copydoc SSMSTRMOPS::pfnRead + */ +static DECLCALLBACK(int) ssmR3FileRead(void *pvUser, uint64_t offStream, void *pvBuf, size_t cbToRead, size_t *pcbRead) +{ + Assert(RTFileTell((RTFILE)(uintptr_t)pvUser) == offStream); NOREF(offStream); + return RTFileRead((RTFILE)(uintptr_t)pvUser, pvBuf, cbToRead, pcbRead); +} + + +/** + * @copydoc SSMSTRMOPS::pfnSeek + */ +static DECLCALLBACK(int) ssmR3FileSeek(void *pvUser, int64_t offSeek, unsigned uMethod, uint64_t *poffActual) +{ + return RTFileSeek((RTFILE)(uintptr_t)pvUser, offSeek, uMethod, poffActual); +} + + +/** + * @copydoc SSMSTRMOPS::pfnTell + */ +static DECLCALLBACK(uint64_t) ssmR3FileTell(void *pvUser) +{ + return RTFileTell((RTFILE)(uintptr_t)pvUser); +} + + +/** + * @copydoc SSMSTRMOPS::pfnSize + */ +static DECLCALLBACK(int) ssmR3FileSize(void *pvUser, uint64_t *pcb) +{ + return RTFileGetSize((RTFILE)(uintptr_t)pvUser, pcb); +} + + +/** + * @copydoc SSMSTRMOPS::pfnIsOk + */ +static DECLCALLBACK(int) ssmR3FileIsOk(void *pvUser) +{ + /* + * Check that there is still some space left on the disk. + */ + RTFOFF cbFree; + int rc = RTFileQueryFsSizes((RTFILE)(uintptr_t)pvUser, NULL, &cbFree, NULL, NULL); +#define SSM_MIN_DISK_FREE ((RTFOFF)( 10 * _1M )) + if (RT_SUCCESS(rc)) + { + if (cbFree < SSM_MIN_DISK_FREE) + { + LogRel(("SSM: Giving up: Low on disk space. (cbFree=%RTfoff, SSM_MIN_DISK_FREE=%RTfoff).\n", + cbFree, SSM_MIN_DISK_FREE)); + rc = VERR_SSM_LOW_ON_DISK_SPACE; + } + } + else if (rc == VERR_NOT_SUPPORTED) + rc = VINF_SUCCESS; + else + AssertLogRelRC(rc); + return rc; +} + + +/** + * @copydoc SSMSTRMOPS::pfnClose + */ +static DECLCALLBACK(int) ssmR3FileClose(void *pvUser, bool fCancelled) +{ + NOREF(fCancelled); + return RTFileClose((RTFILE)(uintptr_t)pvUser); +} + + +/** + * Method table for a file based stream. + */ +static SSMSTRMOPS const g_ssmR3FileOps = +{ + SSMSTRMOPS_VERSION, + ssmR3FileWrite, + ssmR3FileRead, + ssmR3FileSeek, + ssmR3FileTell, + ssmR3FileSize, + ssmR3FileIsOk, + ssmR3FileClose, + SSMSTRMOPS_VERSION +}; + + +/** + * Opens a file stream. + * + * @returns VBox status code. + * @param pStrm The stream manager structure. + * @param pszFilename The file to open or create. + * @param fWrite Whether to open for writing or reading. + * @param fChecksummed Whether the stream is to be checksummed while + * written/read. + * @param cBuffers The number of buffers. + */ +static int ssmR3StrmOpenFile(PSSMSTRM pStrm, const char *pszFilename, bool fWrite, bool fChecksummed, uint32_t cBuffers) +{ + int rc = ssmR3StrmInitInternal(pStrm, fChecksummed, cBuffers); + if (RT_SUCCESS(rc)) + { + uint32_t fFlags = fWrite + ? RTFILE_O_READWRITE | RTFILE_O_CREATE_REPLACE | RTFILE_O_DENY_WRITE + : RTFILE_O_READ | RTFILE_O_OPEN | RTFILE_O_DENY_WRITE; + RTFILE hFile; + rc = RTFileOpen(&hFile, pszFilename, fFlags); + if (RT_SUCCESS(rc)) + { + pStrm->pOps = &g_ssmR3FileOps; + pStrm->pvUser = (void *)(uintptr_t)hFile; + pStrm->fWrite = fWrite; + return VINF_SUCCESS; + } + } + + ssmR3StrmDelete(pStrm); + pStrm->rc = rc; + return rc; +} + + +/** + * Raise an error condition on the stream. + * + * @returns true if we raised the error condition, false if the stream already + * had an error condition set. + * + * @param pStrm The stream handle. + * @param rc The VBox error status code. + * + * @thread Any. + */ +DECLINLINE(bool) ssmR3StrmSetError(PSSMSTRM pStrm, int rc) +{ + Assert(RT_FAILURE_NP(rc)); + return ASMAtomicCmpXchgS32(&pStrm->rc, rc, VINF_SUCCESS); +} + + +/** + * Puts a buffer into the free list. + * + * @param pStrm The stream handle. + * @param pBuf The buffer. + * + * @thread The consumer. + */ +static void ssmR3StrmPutFreeBuf(PSSMSTRM pStrm, PSSMSTRMBUF pBuf) +{ + for (;;) + { + PSSMSTRMBUF pCurFreeHead = ASMAtomicUoReadPtrT(&pStrm->pFree, PSSMSTRMBUF); + ASMAtomicUoWritePtr(&pBuf->pNext, pCurFreeHead); + if (ASMAtomicCmpXchgPtr(&pStrm->pFree, pBuf, pCurFreeHead)) + { + int rc = RTSemEventSignal(pStrm->hEvtFree); + AssertRC(rc); + return; + } + } +} + + +/** + * Gets a free buffer, waits for one if necessary. + * + * @returns Pointer to the buffer on success. NULL if we're terminating. + * @param pStrm The stream handle. + * + * @thread The producer. + */ +static PSSMSTRMBUF ssmR3StrmGetFreeBuf(PSSMSTRM pStrm) +{ + for (;;) + { + PSSMSTRMBUF pMine = ASMAtomicUoReadPtrT(&pStrm->pFree, PSSMSTRMBUF); + if (!pMine) + { + if (pStrm->fTerminating) + return NULL; + if (RT_FAILURE(pStrm->rc)) + return NULL; + if ( pStrm->fWrite + && pStrm->hIoThread == NIL_RTTHREAD) + { + int rc = ssmR3StrmWriteBuffers(pStrm); + if (RT_FAILURE(rc)) + return NULL; + } + int rc = RTSemEventWaitNoResume(pStrm->hEvtFree, 30000); + if ( rc == VERR_SEM_DESTROYED + || pStrm->fTerminating) + return NULL; + continue; + } + + if (ASMAtomicCmpXchgPtr(&pStrm->pFree, pMine->pNext, pMine)) + { + pMine->offStream = UINT64_MAX; + pMine->cb = 0; + pMine->pNext = NULL; + pMine->fEndOfStream = false; + pMine->NanoTS = RTTimeNanoTS(); + return pMine; + } + } +} + + +/** + * Puts a buffer onto the queue. + * + * @param pStrm The stream handle. + * @param pBuf The stream buffer to put. + * + * @thread The producer. + */ +static void ssmR3StrmPutBuf(PSSMSTRM pStrm, PSSMSTRMBUF pBuf) +{ + for (;;) + { + PSSMSTRMBUF pCurHead = ASMAtomicUoReadPtrT(&pStrm->pHead, PSSMSTRMBUF); + ASMAtomicUoWritePtr(&pBuf->pNext, pCurHead); + if (ASMAtomicCmpXchgPtr(&pStrm->pHead, pBuf, pCurHead)) + { + int rc = RTSemEventSignal(pStrm->hEvtHead); + AssertRC(rc); + return; + } + } +} + + +/** + * Reverses the list. + * + * @returns The head of the reversed list. + * @param pHead The head of the list to reverse. + */ +static PSSMSTRMBUF ssmR3StrmReverseList(PSSMSTRMBUF pHead) +{ + PSSMSTRMBUF pRevHead = NULL; + while (pHead) + { + PSSMSTRMBUF pCur = pHead; + pHead = pCur->pNext; + pCur->pNext = pRevHead; + pRevHead = pCur; + } + return pRevHead; +} + + +/** + * Gets one buffer from the queue, will wait for one to become ready if + * necessary. + * + * @returns Pointer to the buffer on success. NULL if we're terminating. + * @param pStrm The stream handle. + * + * @thread The consumer. + */ +static PSSMSTRMBUF ssmR3StrmGetBuf(PSSMSTRM pStrm) +{ + for (;;) + { + PSSMSTRMBUF pMine = pStrm->pPending; + if (pMine) + { + pStrm->pPending = pMine->pNext; + pMine->pNext = NULL; + return pMine; + } + + pMine = ASMAtomicXchgPtrT(&pStrm->pHead, NULL, PSSMSTRMBUF); + if (pMine) + pStrm->pPending = ssmR3StrmReverseList(pMine); + else + { + if (pStrm->fTerminating) + return NULL; + if (RT_FAILURE(pStrm->rc)) + return NULL; + if ( !pStrm->fWrite + && pStrm->hIoThread == NIL_RTTHREAD) + { + int rc = ssmR3StrmReadMore(pStrm); + if (RT_FAILURE(rc)) + return NULL; + continue; + } + + int rc = RTSemEventWaitNoResume(pStrm->hEvtHead, 30000); + if ( rc == VERR_SEM_DESTROYED + || pStrm->fTerminating) + return NULL; + } + } +} + + +/** + * Flushes the current buffer (both write and read streams). + * + * @param pStrm The stream handle. + */ +static void ssmR3StrmFlushCurBuf(PSSMSTRM pStrm) +{ + if (pStrm->pCur) + { + PSSMSTRMBUF pBuf = pStrm->pCur; + pStrm->pCur = NULL; + + if (pStrm->fWrite) + { + uint32_t cb = pStrm->off; + pBuf->cb = cb; + pBuf->offStream = pStrm->offCurStream; + if ( pStrm->fChecksummed + && pStrm->offStreamCRC < cb) + pStrm->u32StreamCRC = RTCrc32Process(pStrm->u32StreamCRC, + &pBuf->abData[pStrm->offStreamCRC], + cb - pStrm->offStreamCRC); + pStrm->offCurStream += cb; + pStrm->off = 0; + pStrm->offStreamCRC = 0; + + ssmR3StrmPutBuf(pStrm, pBuf); + } + else + { + uint32_t cb = pBuf->cb; + if ( pStrm->fChecksummed + && pStrm->offStreamCRC < cb) + pStrm->u32StreamCRC = RTCrc32Process(pStrm->u32StreamCRC, + &pBuf->abData[pStrm->offStreamCRC], + cb - pStrm->offStreamCRC); + pStrm->offCurStream += cb; + pStrm->off = 0; + pStrm->offStreamCRC = 0; + + ssmR3StrmPutFreeBuf(pStrm, pBuf); + } + } +} + + +/** + * Flush buffered data. + * + * @returns VBox status code. Returns VINF_EOF if we encounter a buffer with the + * fEndOfStream indicator set. + * @param pStrm The stream handle. + * + * @thread The producer thread. + */ +static int ssmR3StrmWriteBuffers(PSSMSTRM pStrm) +{ + Assert(pStrm->fWrite); + + /* + * Just return if the stream has a pending error condition. + */ + int rc = pStrm->rc; + if (RT_FAILURE(rc)) + return rc; + + /* + * Grab the pending list and write it out. + */ + PSSMSTRMBUF pHead = ASMAtomicXchgPtrT(&pStrm->pHead, NULL, PSSMSTRMBUF); + if (!pHead) + return VINF_SUCCESS; + pHead = ssmR3StrmReverseList(pHead); + + while (pHead) + { + /* pop */ + PSSMSTRMBUF pCur = pHead; + pHead = pCur->pNext; + + /* flush */ + rc = pStrm->pOps->pfnIsOk(pStrm->pvUser); + if (RT_SUCCESS(rc)) + rc = pStrm->pOps->pfnWrite(pStrm->pvUser, pCur->offStream, &pCur->abData[0], pCur->cb); + if ( RT_FAILURE(rc) + && ssmR3StrmSetError(pStrm, rc)) + LogRel(("ssmR3StrmWriteBuffers: Write failed with rc=%Rrc at offStream=%#llx\n", rc, pCur->offStream)); + + /* free */ + bool fEndOfStream = pCur->fEndOfStream; + ssmR3StrmPutFreeBuf(pStrm, pCur); + if (fEndOfStream) + { + Assert(!pHead); + return VINF_EOF; + } + } + + return pStrm->rc; +} + + +/** + * Closes the stream after first flushing any pending write. + * + * @returns VBox status code. + * @param pStrm The stream handle. + * @param fCancelled Indicates whether the operation was cancelled or + * not. + */ +static int ssmR3StrmClose(PSSMSTRM pStrm, bool fCancelled) +{ + /* + * Flush, terminate the I/O thread, and close the stream. + */ + if (pStrm->fWrite) + { + ssmR3StrmFlushCurBuf(pStrm); + if (pStrm->hIoThread == NIL_RTTHREAD) + ssmR3StrmWriteBuffers(pStrm); + } + + if (pStrm->hIoThread != NIL_RTTHREAD) + ASMAtomicWriteBool(&pStrm->fTerminating, true); + + int rc; + if (pStrm->fWrite) + { + if (pStrm->hIoThread != NIL_RTTHREAD) + { + int rc2 = RTSemEventSignal(pStrm->hEvtHead); + AssertLogRelRC(rc2); + int rc3 = RTThreadWait(pStrm->hIoThread, RT_INDEFINITE_WAIT, NULL); + AssertLogRelRC(rc3); + pStrm->hIoThread = NIL_RTTHREAD; + } + + rc = pStrm->pOps->pfnClose(pStrm->pvUser, fCancelled); + if (RT_FAILURE(rc)) + ssmR3StrmSetError(pStrm, rc); + } + else + { + rc = pStrm->pOps->pfnClose(pStrm->pvUser, fCancelled); + if (RT_FAILURE(rc)) + ssmR3StrmSetError(pStrm, rc); + + if (pStrm->hIoThread != NIL_RTTHREAD) + { + int rc2 = RTSemEventSignal(pStrm->hEvtFree); + AssertLogRelRC(rc2); + int rc3 = RTThreadWait(pStrm->hIoThread, RT_INDEFINITE_WAIT, NULL); + AssertLogRelRC(rc3); + pStrm->hIoThread = NIL_RTTHREAD; + } + } + + pStrm->pOps = NULL; + pStrm->pvUser = NULL; + + rc = pStrm->rc; + ssmR3StrmDelete(pStrm); + + return rc; +} + +#ifndef SSM_STANDALONE + +/** + * Stream output routine. + * + * @returns VBox status code. + * @param pStrm The stream handle. + * @param pvBuf What to write. + * @param cbToWrite How much to write. + * + * @thread The producer in a write stream (never the I/O thread). + */ +static int ssmR3StrmWrite(PSSMSTRM pStrm, const void *pvBuf, size_t cbToWrite) +{ + AssertReturn(cbToWrite > 0, VINF_SUCCESS); + Assert(pStrm->fWrite); + + /* + * Squeeze as much as possible into the current buffer. + */ + PSSMSTRMBUF pBuf = pStrm->pCur; + if (RT_LIKELY(pBuf)) + { + uint32_t cbLeft = RT_SIZEOFMEMB(SSMSTRMBUF, abData) - pStrm->off; + if (RT_LIKELY(cbLeft >= cbToWrite)) + { + memcpy(&pBuf->abData[pStrm->off], pvBuf, cbToWrite); + pStrm->off += (uint32_t)cbToWrite; + return VINF_SUCCESS; + } + + if (cbLeft > 0) + { + memcpy(&pBuf->abData[pStrm->off], pvBuf, cbLeft); + pStrm->off += cbLeft; + cbToWrite -= cbLeft; + pvBuf = (uint8_t const *)pvBuf + cbLeft; + } + Assert(pStrm->off == RT_SIZEOFMEMB(SSMSTRMBUF, abData)); + } + + /* + * Need one or more new buffers. + */ + do + { + /* + * Flush the current buffer and replace it with a new one. + */ + ssmR3StrmFlushCurBuf(pStrm); + pBuf = ssmR3StrmGetFreeBuf(pStrm); + if (!pBuf) + break; + pStrm->pCur = pBuf; + Assert(pStrm->off == 0); + + /* + * Copy data to the buffer. + */ + uint32_t cbCopy = RT_SIZEOFMEMB(SSMSTRMBUF, abData); + if (cbCopy > cbToWrite) + cbCopy = (uint32_t)cbToWrite; + memcpy(&pBuf->abData[0], pvBuf, cbCopy); + pStrm->off = cbCopy; + cbToWrite -= cbCopy; + pvBuf = (uint8_t const *)pvBuf + cbCopy; + } while (cbToWrite > 0); + + return pStrm->rc; +} + + +/** + * Reserves space in the current buffer so the caller can write directly to the + * buffer instead of doing double buffering. + * + * @returns VBox status code + * @param pStrm The stream handle. + * @param cb The amount of buffer space to reserve. + * @param ppb Where to return the pointer. + */ +static int ssmR3StrmReserveWriteBufferSpace(PSSMSTRM pStrm, size_t cb, uint8_t **ppb) +{ + Assert(pStrm->fWrite); + Assert(RT_SIZEOFMEMB(SSMSTRMBUF, abData) / 4 >= cb); + + /* + * Check if there is room in the current buffer, it not flush it. + */ + PSSMSTRMBUF pBuf = pStrm->pCur; + if (pBuf) + { + uint32_t cbLeft = RT_SIZEOFMEMB(SSMSTRMBUF, abData) - pStrm->off; + if (cbLeft >= cb) + { + *ppb = &pBuf->abData[pStrm->off]; + return VINF_SUCCESS; + } + + ssmR3StrmFlushCurBuf(pStrm); + } + + /* + * Get a fresh buffer and return a pointer into it. + */ + pBuf = ssmR3StrmGetFreeBuf(pStrm); + if (pBuf) + { + pStrm->pCur = pBuf; + Assert(pStrm->off == 0); + *ppb = &pBuf->abData[0]; + } + else + *ppb = NULL; /* make gcc happy. */ + return pStrm->rc; +} + + +/** + * Commits buffer space reserved by ssmR3StrmReserveWriteBufferSpace. + * + * @returns VBox status code. + * @param pStrm The stream handle. + * @param cb The amount of buffer space to commit. This can be less + * that what was reserved initially. + */ +static int ssmR3StrmCommitWriteBufferSpace(PSSMSTRM pStrm, size_t cb) +{ + Assert(pStrm->pCur); + Assert(pStrm->off + cb <= RT_SIZEOFMEMB(SSMSTRMBUF, abData)); + pStrm->off += (uint32_t)cb; + return VINF_SUCCESS; +} + + +/** + * Marks the end of the stream. + * + * This will cause the I/O thread to quit waiting for more buffers. + * + * @returns VBox status code. + * @param pStrm The stream handle. + */ +static int ssmR3StrmSetEnd(PSSMSTRM pStrm) +{ + Assert(pStrm->fWrite); + PSSMSTRMBUF pBuf = pStrm->pCur; + if (RT_UNLIKELY(!pStrm->pCur)) + { + pBuf = ssmR3StrmGetFreeBuf(pStrm); + if (!pBuf) + return pStrm->rc; + pStrm->pCur = pBuf; + Assert(pStrm->off == 0); + } + pBuf->fEndOfStream = true; + ssmR3StrmFlushCurBuf(pStrm); + return VINF_SUCCESS; +} + +#endif /* !SSM_STANDALONE */ + +/** + * Read more from the stream. + * + * @returns VBox status code. VERR_EOF gets translated into VINF_EOF. + * @param pStrm The stream handle. + * + * @thread The I/O thread when we got one, otherwise the stream user. + */ +static int ssmR3StrmReadMore(PSSMSTRM pStrm) +{ + int rc; + Log6(("ssmR3StrmReadMore:\n")); + + /* + * Undo seek done by ssmR3StrmPeekAt. + */ + if (pStrm->fNeedSeek) + { + rc = pStrm->pOps->pfnSeek(pStrm->pvUser, pStrm->offNeedSeekTo, RTFILE_SEEK_BEGIN, NULL); + if (RT_FAILURE(rc)) + { + if (ssmR3StrmSetError(pStrm, rc)) + LogRel(("ssmR3StrmReadMore: RTFileSeek(,%#llx,) failed with rc=%Rrc\n", pStrm->offNeedSeekTo, rc)); + return rc; + } + pStrm->fNeedSeek = false; + pStrm->offNeedSeekTo = UINT64_MAX; + } + + /* + * Get a free buffer and try fill it up. + */ + PSSMSTRMBUF pBuf = ssmR3StrmGetFreeBuf(pStrm); + if (!pBuf) + return pStrm->rc; + + pBuf->offStream = pStrm->pOps->pfnTell(pStrm->pvUser); + size_t cbRead = sizeof(pBuf->abData); + rc = pStrm->pOps->pfnRead(pStrm->pvUser, pBuf->offStream, &pBuf->abData[0], cbRead, &cbRead); + if ( RT_SUCCESS(rc) + && cbRead > 0) + { + pBuf->cb = (uint32_t)cbRead; + pBuf->fEndOfStream = false; + Log6(("ssmR3StrmReadMore: %#010llx %#x\n", pBuf->offStream, pBuf->cb)); + ssmR3StrmPutBuf(pStrm, pBuf); + } + else if ( ( RT_SUCCESS_NP(rc) + && cbRead == 0) + || rc == VERR_EOF) + { + pBuf->cb = 0; + pBuf->fEndOfStream = true; + Log6(("ssmR3StrmReadMore: %#010llx 0 EOF!\n", pBuf->offStream)); + ssmR3StrmPutBuf(pStrm, pBuf); + rc = VINF_EOF; + } + else + { + Log6(("ssmR3StrmReadMore: %#010llx rc=%Rrc!\n", pBuf->offStream, rc)); + if (ssmR3StrmSetError(pStrm, rc)) + LogRel(("ssmR3StrmReadMore: RTFileRead(,,%#x,) -> %Rrc at offset %#llx\n", + sizeof(pBuf->abData), rc, pBuf->offStream)); + ssmR3StrmPutFreeBuf(pStrm, pBuf); + } + return rc; +} + + +/** + * Stream input routine. + * + * @returns VBox status code. + * @param pStrm The stream handle. + * @param pvBuf Where to put what we read. + * @param cbToRead How much to read. + */ +static int ssmR3StrmRead(PSSMSTRM pStrm, void *pvBuf, size_t cbToRead) +{ + AssertReturn(cbToRead > 0, VINF_SUCCESS); + Assert(!pStrm->fWrite); + + /* + * Read from the current buffer if we got one. + */ + PSSMSTRMBUF pBuf = pStrm->pCur; + if (RT_LIKELY(pBuf)) + { + Assert(pStrm->off <= pBuf->cb); + uint32_t cbLeft = pBuf->cb - pStrm->off; + if (cbLeft >= cbToRead) + { + memcpy(pvBuf, &pBuf->abData[pStrm->off], cbToRead); + pStrm->off += (uint32_t)cbToRead; + Assert(pStrm->off <= pBuf->cb); + return VINF_SUCCESS; + } + if (cbLeft) + { + memcpy(pvBuf, &pBuf->abData[pStrm->off], cbLeft); + pStrm->off += cbLeft; + cbToRead -= cbLeft; + pvBuf = (uint8_t *)pvBuf + cbLeft; + } + else if (pBuf->fEndOfStream) + return VERR_EOF; + Assert(pStrm->off == pBuf->cb); + } + + /* + * Get more buffers from the stream. + */ + int rc = VINF_SUCCESS; + do + { + /* + * Check for EOF first - never flush the EOF buffer. + */ + if ( pBuf + && pBuf->fEndOfStream) + return VERR_EOF; + + /* + * Flush the current buffer and get the next one. + */ + ssmR3StrmFlushCurBuf(pStrm); + pBuf = ssmR3StrmGetBuf(pStrm); + if (!pBuf) + { + rc = pStrm->rc; + break; + } + pStrm->pCur = pBuf; + Assert(pStrm->off == 0); + Assert(pStrm->offCurStream == pBuf->offStream); + if (!pBuf->cb) + { + Assert(pBuf->fEndOfStream); + return VERR_EOF; + } + + /* + * Read data from the buffer. + */ + uint32_t cbCopy = pBuf->cb; + if (cbCopy > cbToRead) + cbCopy = (uint32_t)cbToRead; + memcpy(pvBuf, &pBuf->abData[0], cbCopy); + pStrm->off = cbCopy; + cbToRead -= cbCopy; + pvBuf = (uint8_t *)pvBuf + cbCopy; + Assert(!pStrm->pCur || pStrm->off <= pStrm->pCur->cb); + } while (cbToRead > 0); + + return rc; +} + + +/** + * Reads data from the stream but instead of copying it to some output buffer + * the caller gets a pointer to into the current stream buffer. + * + * The returned pointer becomes invalid after the next stream operation! + * + * @returns Pointer to the read data residing in the stream buffer. NULL is + * returned if the request amount of data isn't available in the + * buffer. The caller must fall back on ssmR3StrmRead when this + * happens. + * + * @param pStrm The stream handle. + * @param cbToRead The number of bytes to tread. + */ +static uint8_t const *ssmR3StrmReadDirect(PSSMSTRM pStrm, size_t cbToRead) +{ + AssertReturn(cbToRead > 0, VINF_SUCCESS); + Assert(!pStrm->fWrite); + + /* + * Too lazy to fetch more data for the odd case that we're + * exactly at the boundary between two buffers. + */ + PSSMSTRMBUF pBuf = pStrm->pCur; + if (RT_LIKELY(pBuf)) + { + Assert(pStrm->off <= pBuf->cb); + uint32_t cbLeft = pBuf->cb - pStrm->off; + if (cbLeft >= cbToRead) + { + uint8_t const *pb = &pBuf->abData[pStrm->off]; + pStrm->off += (uint32_t)cbToRead; + Assert(pStrm->off <= pBuf->cb); + return pb; + } + } + return NULL; +} + + +#ifndef SSM_STANDALONE +/** + * Check that the stream is OK and flush data that is getting old + * + * The checking is mainly for testing for cancellation and out of space + * conditions. + * + * @returns VBox status code. + * @param pStrm The stream handle. + */ +static int ssmR3StrmCheckAndFlush(PSSMSTRM pStrm) +{ + int rc = pStrm->pOps->pfnIsOk(pStrm->pvUser); + if (RT_FAILURE(rc)) + return rc; + + if ( pStrm->fWrite + && pStrm->hIoThread != NIL_RTTHREAD + && !pStrm->pHead /* the worker is probably idle */ + && pStrm->pCur + && RTTimeNanoTS() - pStrm->pCur->NanoTS > 500*1000*1000 /* 0.5s */ + ) + ssmR3StrmFlushCurBuf(pStrm); + return VINF_SUCCESS; +} +#endif /* !SSM_STANDALONE */ + + +#if !defined(SSM_STANDALONE) || defined(LOG_ENABLED) +/** + * Tell current stream position. + * + * @returns stream position. + * @param pStrm The stream handle. + */ +static uint64_t ssmR3StrmTell(PSSMSTRM pStrm) +{ + return pStrm->offCurStream + pStrm->off; +} +#endif + + +/** + * Gets the intermediate stream CRC up to the current position. + * + * @returns CRC. + * @param pStrm The stream handle. + */ +static uint32_t ssmR3StrmCurCRC(PSSMSTRM pStrm) +{ + if (!pStrm->fChecksummed) + return 0; + if (pStrm->offStreamCRC < pStrm->off) + { + PSSMSTRMBUF pBuf = pStrm->pCur; Assert(pBuf); + pStrm->u32StreamCRC = RTCrc32Process(pStrm->u32StreamCRC, &pBuf->abData[pStrm->offStreamCRC], pStrm->off - pStrm->offStreamCRC); + pStrm->offStreamCRC = pStrm->off; + } + else + Assert(pStrm->offStreamCRC == pStrm->off); + return pStrm->u32StreamCRC; +} + + +/** + * Gets the final stream CRC up to the current position. + * + * @returns CRC. + * @param pStrm The stream handle. + */ +static uint32_t ssmR3StrmFinalCRC(PSSMSTRM pStrm) +{ + if (!pStrm->fChecksummed) + return 0; + return RTCrc32Finish(ssmR3StrmCurCRC(pStrm)); +} + + +/** + * Disables checksumming of the stream. + * + * @param pStrm The stream handle. + */ +static void ssmR3StrmDisableChecksumming(PSSMSTRM pStrm) +{ + pStrm->fChecksummed = false; +} + + +/** + * Used by SSMR3Seek to position the stream at the new unit. + * + * @returns VBox status code. + * @param pStrm The strem handle. + * @param off The seek offset. + * @param uMethod The seek method. + * @param u32CurCRC The current CRC at the seek position. + */ +static int ssmR3StrmSeek(PSSMSTRM pStrm, int64_t off, uint32_t uMethod, uint32_t u32CurCRC) +{ + AssertReturn(!pStrm->fWrite, VERR_NOT_SUPPORTED); + AssertReturn(pStrm->hIoThread == NIL_RTTHREAD, VERR_WRONG_ORDER); + + uint64_t offStream; + int rc = pStrm->pOps->pfnSeek(pStrm->pvUser, off, uMethod, &offStream); + if (RT_SUCCESS(rc)) + { + pStrm->fNeedSeek = false; + pStrm->offNeedSeekTo= UINT64_MAX; + pStrm->offCurStream = offStream; + pStrm->off = 0; + pStrm->offStreamCRC = 0; + if (pStrm->fChecksummed) + pStrm->u32StreamCRC = u32CurCRC; + if (pStrm->pCur) + { + ssmR3StrmPutFreeBuf(pStrm, pStrm->pCur); + pStrm->pCur = NULL; + } + if (pStrm->pPending) + { + ssmR3StrmDestroyBufList(pStrm->pPending); + pStrm->pPending = NULL; + } + if (pStrm->pHead) + { + ssmR3StrmDestroyBufList(pStrm->pHead); + pStrm->pHead = NULL; + } + } + return rc; +} + + +#ifndef SSM_STANDALONE +/** + * Skip some bytes in the stream. + * + * This is only used if someone didn't read all of their data in the V1 format, + * so don't bother making this very efficient yet. + * + * @returns VBox status code. + * @param pStrm The stream handle. + * @param offDst The destination offset. + */ +static int ssmR3StrmSkipTo(PSSMSTRM pStrm, uint64_t offDst) +{ + /* dead simple - lazy bird! */ + for (;;) + { + uint64_t offCur = ssmR3StrmTell(pStrm); + AssertReturn(offCur <= offDst, VERR_SSM_SKIP_BACKWARDS); + if (offCur == offDst) + return VINF_SUCCESS; + + uint8_t abBuf[4096]; + size_t cbToRead = RT_MIN(sizeof(abBuf), offDst - offCur); + int rc = ssmR3StrmRead(pStrm, abBuf, cbToRead); + if (RT_FAILURE(rc)) + return rc; + } +} +#endif /* !SSM_STANDALONE */ + + +/** + * Get the size of the file. + * + * This does not work for non-file streams! + * + * @returns The file size, or UINT64_MAX if not a file stream. + * @param pStrm The stream handle. + */ +static uint64_t ssmR3StrmGetSize(PSSMSTRM pStrm) +{ + uint64_t cbFile; + int rc = pStrm->pOps->pfnSize(pStrm->pvUser, &cbFile); + AssertLogRelRCReturn(rc, UINT64_MAX); + return cbFile; +} + + +/*** + * Tests if the stream is a file stream or not. + * + * @returns true / false. + * @param pStrm The stream handle. + */ +static bool ssmR3StrmIsFile(PSSMSTRM pStrm) +{ + return pStrm->pOps == &g_ssmR3FileOps; +} + + +/** + * Peeks at data in a file stream without buffering anything (or upsetting + * the buffering for that matter). + * + * @returns VBox status code. + * @param pStrm The stream handle + * @param off The offset to start peeking at. Use a negative offset to + * peek at something relative to the end of the file. + * @param pvBuf Output buffer. + * @param cbToRead How much to read. + * @param poff Where to optionally store the position. Useful when + * using a negative off. + * + * @remarks Failures occurring while peeking will not be raised on the stream. + */ +static int ssmR3StrmPeekAt(PSSMSTRM pStrm, RTFOFF off, void *pvBuf, size_t cbToRead, uint64_t *poff) +{ + AssertReturn(!pStrm->fWrite, VERR_NOT_SUPPORTED); + AssertReturn(pStrm->hIoThread == NIL_RTTHREAD, VERR_WRONG_ORDER); + + if (!pStrm->fNeedSeek) + { + pStrm->fNeedSeek = true; + pStrm->offNeedSeekTo = pStrm->offCurStream + (pStrm->pCur ? pStrm->pCur->cb : 0); + } + uint64_t offActual; + int rc = pStrm->pOps->pfnSeek(pStrm->pvUser, off, off >= 0 ? RTFILE_SEEK_BEGIN : RTFILE_SEEK_END, &offActual); + if (RT_SUCCESS(rc)) + { + if (poff) + *poff = offActual; + rc = pStrm->pOps->pfnRead(pStrm->pvUser, offActual, pvBuf, cbToRead, NULL); + } + + return rc; +} + +#ifndef SSM_STANDALONE + +/** + * The I/O thread. + * + * @returns VINF_SUCCESS (ignored). + * @param hSelf The thread handle. + * @param pvStrm The stream handle. + */ +static DECLCALLBACK(int) ssmR3StrmIoThread(RTTHREAD hSelf, void *pvStrm) +{ + PSSMSTRM pStrm = (PSSMSTRM)pvStrm; + ASMAtomicWriteHandle(&pStrm->hIoThread, hSelf); /* paranoia */ + + Log(("ssmR3StrmIoThread: starts working\n")); + if (pStrm->fWrite) + { + /* + * Write until error or terminated. + */ + for (;;) + { + int rc = ssmR3StrmWriteBuffers(pStrm); + if ( RT_FAILURE(rc) + || rc == VINF_EOF) + { + Log(("ssmR3StrmIoThread: quitting writing with rc=%Rrc.\n", rc)); + break; + } + if (RT_FAILURE(pStrm->rc)) + { + Log(("ssmR3StrmIoThread: quitting writing with stream rc=%Rrc\n", pStrm->rc)); + break; + } + + if (ASMAtomicReadBool(&pStrm->fTerminating)) + { + if (!ASMAtomicReadPtrT(&pStrm->pHead, PSSMSTRMBUF)) + { + Log(("ssmR3StrmIoThread: quitting writing because of pending termination.\n")); + break; + } + Log(("ssmR3StrmIoThread: postponing termination because of pending buffers.\n")); + } + else if (!ASMAtomicReadPtrT(&pStrm->pHead, PSSMSTRMBUF)) + { + rc = RTSemEventWait(pStrm->hEvtHead, RT_INDEFINITE_WAIT); + AssertLogRelRC(rc); + } + } + + if (!ASMAtomicReadBool(&pStrm->fTerminating)) + RTSemEventSignal(pStrm->hEvtFree); + } + else + { + /* + * Read until end of file, error or termination. + */ + for (;;) + { + if (ASMAtomicReadBool(&pStrm->fTerminating)) + { + Log(("ssmR3StrmIoThread: quitting reading because of pending termination.\n")); + break; + } + + int rc = ssmR3StrmReadMore(pStrm); + if ( RT_FAILURE(rc) + || rc == VINF_EOF) + { + Log(("ssmR3StrmIoThread: quitting reading with rc=%Rrc\n", rc)); + break; + } + if (RT_FAILURE(pStrm->rc)) + { + Log(("ssmR3StrmIoThread: quitting reading with stream rc=%Rrc\n", pStrm->rc)); + break; + } + } + + if (!ASMAtomicReadBool(&pStrm->fTerminating)) + RTSemEventSignal(pStrm->hEvtHead); + } + + return VINF_SUCCESS; +} + + +/** + * Starts the I/O thread for the specified stream. + * + * @param pStrm The stream handle. + */ +static void ssmR3StrmStartIoThread(PSSMSTRM pStrm) +{ + Assert(pStrm->hIoThread == NIL_RTTHREAD); + + RTTHREAD hThread; + int rc = RTThreadCreate(&hThread, ssmR3StrmIoThread, pStrm, 0, RTTHREADTYPE_IO, RTTHREADFLAGS_WAITABLE, "SSM-IO"); + AssertRCReturnVoid(rc); + ASMAtomicWriteHandle(&pStrm->hIoThread, hThread); /* paranoia */ +} + + +/** + * Stops the I/O thread. + * + * @param pStrm The stream handle. + */ +static void ssmR3StrmStopIoThread(PSSMSTRM pStrm) +{ + LogFlow(("ssmR3StrmStopIoThread: %p\n", pStrm->hIoThread)); + if (pStrm->hIoThread != NIL_RTTHREAD) + { + /* + * Signal the I/O thread and wait for it to complete. + */ + ASMAtomicWriteBool(&pStrm->fTerminating, true); + if (pStrm->fWrite) + { + int rc1 = RTSemEventSignal(pStrm->hEvtHead); + AssertLogRelRC(rc1); + } + else + { + int rc2 = RTSemEventSignal(pStrm->hEvtFree); + AssertLogRelRC(rc2); + } + int rc3 = RTThreadWait(pStrm->hIoThread, RT_INDEFINITE_WAIT, NULL); + AssertLogRelRC(rc3); + pStrm->hIoThread = NIL_RTTHREAD; + pStrm->fTerminating = false; /* Can't read stuff otherwise. */ + } +} + +#endif /* !SSM_STANDALONE */ + +/** + * Works the progress calculation for non-live saves and restores. + * + * @param pSSM The SSM handle. + * @param cbAdvance Number of bytes to advance (with in the current unit). + */ +static void ssmR3ProgressByByte(PSSMHANDLE pSSM, uint64_t cbAdvance) +{ + if (!pSSM->fLiveSave) + { + /* Can't advance it beyond the estimated end of the unit. */ + uint64_t cbLeft = pSSM->offEstUnitEnd - pSSM->offEst; + if (cbAdvance > cbLeft) + cbAdvance = cbLeft; + pSSM->offEst += cbAdvance; + + /* uPercentPrepare% prepare, xx% exec, uPercentDone% done+crc. This is not + quite right for live save, but the non-live stage there is very short. */ + while ( pSSM->offEst >= pSSM->offEstProgress + && pSSM->uPercent <= 100 - pSSM->uPercentDone) + { + if (pSSM->pfnProgress) + pSSM->pfnProgress(pSSM->pVM->pUVM, pSSM->uPercent, pSSM->pvUser); + pSSM->uPercent++; + pSSM->offEstProgress = (pSSM->uPercent - pSSM->uPercentPrepare - pSSM->uPercentLive) * pSSM->cbEstTotal + / (100 - pSSM->uPercentDone - pSSM->uPercentPrepare - pSSM->uPercentLive); + } + } +} + + +#ifndef SSM_STANDALONE +/** + * Makes the SSM operation cancellable or not (via SSMR3Cancel). + * + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. (SSMHANDLE::rc may be set.) + * @param fCancellable The new state. + */ +static void ssmR3SetCancellable(PVM pVM, PSSMHANDLE pSSM, bool fCancellable) +{ + RTCritSectEnter(&pVM->ssm.s.CancelCritSect); + if (fCancellable) + { + Assert(!pVM->ssm.s.pSSM); + pVM->ssm.s.pSSM = pSSM; + } + else + { + if (pVM->ssm.s.pSSM == pSSM) + pVM->ssm.s.pSSM = NULL; + + uint32_t fCancelled = ASMAtomicUoReadU32(&pSSM->fCancelled); + if ( fCancelled == SSMHANDLE_CANCELLED + && RT_SUCCESS(pSSM->rc)) + pSSM->rc = VERR_SSM_CANCELLED; + } + + RTCritSectLeave(&pVM->ssm.s.CancelCritSect); +} +#endif /* !SSM_STANDALONE */ + + +/** + * Gets the host bit count of the saved state. + * + * Works for on both save and load handles. + * + * @returns 32 or 64. + * @param pSSM The saved state handle. + */ +DECLINLINE(uint32_t) ssmR3GetHostBits(PSSMHANDLE pSSM) +{ + if (pSSM->enmOp >= SSMSTATE_LOAD_PREP) + { + uint32_t cBits = pSSM->u.Read.cHostBits; + if (cBits) + return cBits; + } + return HC_ARCH_BITS; +} + + +/** + * Saved state origins on a host using 32-bit MSC? + * + * Works for on both save and load handles. + * + * @returns true/false. + * @param pSSM The saved state handle. + */ +DECLINLINE(bool) ssmR3IsHostMsc32(PSSMHANDLE pSSM) +{ + if (pSSM->enmOp >= SSMSTATE_LOAD_PREP) + return pSSM->u.Read.fIsHostMsc32; + return SSM_HOST_IS_MSC_32; +} + +#ifndef SSM_STANDALONE + +/** + * Finishes a data unit. + * All buffers and compressor instances are flushed and destroyed. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + */ +static int ssmR3DataWriteFinish(PSSMHANDLE pSSM) +{ + //Log2(("ssmR3DataWriteFinish: %#010llx start\n", ssmR3StrmTell(&pSSM->Strm))); + int rc = ssmR3DataFlushBuffer(pSSM); + if (RT_SUCCESS(rc)) + { + pSSM->offUnit = UINT64_MAX; + pSSM->offUnitUser = UINT64_MAX; + return VINF_SUCCESS; + } + + if (RT_SUCCESS(pSSM->rc)) + pSSM->rc = rc; + Log2(("ssmR3DataWriteFinish: failure rc=%Rrc\n", rc)); + return rc; +} + + +/** + * Begins writing the data of a data unit. + * + * Errors are signalled via pSSM->rc. + * + * @param pSSM The saved state handle. + */ +static void ssmR3DataWriteBegin(PSSMHANDLE pSSM) +{ + pSSM->offUnit = 0; + pSSM->offUnitUser = 0; +} + + +/** + * Writes a record to the current data item in the saved state file. + * + * @returns VBox status code. Sets pSSM->rc on failure. + * @param pSSM The saved state handle. + * @param pvBuf The bits to write. + * @param cbBuf The number of bytes to write. + */ +static int ssmR3DataWriteRaw(PSSMHANDLE pSSM, const void *pvBuf, size_t cbBuf) +{ + Log2(("ssmR3DataWriteRaw: %08llx|%08llx: pvBuf=%p cbBuf=%#x %.*Rhxs%s\n", + ssmR3StrmTell(&pSSM->Strm), pSSM->offUnit, pvBuf, cbBuf, RT_MIN(cbBuf, SSM_LOG_BYTES), pvBuf, cbBuf > SSM_LOG_BYTES ? "..." : "")); + + /* + * Check that everything is fine. + */ + if (RT_FAILURE(pSSM->rc)) + return pSSM->rc; + + /* + * Write the data item in 1MB chunks for progress indicator reasons. + */ + while (cbBuf > 0) + { + size_t cbChunk = RT_MIN(cbBuf, _1M); + int rc = ssmR3StrmWrite(&pSSM->Strm, pvBuf, cbChunk); + if (RT_FAILURE(rc)) + return rc; + pSSM->offUnit += cbChunk; + cbBuf -= cbChunk; + pvBuf = (char *)pvBuf + cbChunk; + } + + return VINF_SUCCESS; +} + + +/** + * Writes a record header for the specified amount of data. + * + * @returns VBox status code. Sets pSSM->rc on failure. + * @param pSSM The saved state handle + * @param cb The amount of data. + * @param u8TypeAndFlags The record type and flags. + */ +static int ssmR3DataWriteRecHdr(PSSMHANDLE pSSM, size_t cb, uint8_t u8TypeAndFlags) +{ + size_t cbHdr; + uint8_t abHdr[8]; + abHdr[0] = u8TypeAndFlags; + if (cb < 0x80) + { + cbHdr = 2; + abHdr[1] = (uint8_t)cb; + } + else if (cb < 0x00000800) + { + cbHdr = 3; + abHdr[1] = (uint8_t)(0xc0 | (cb >> 6)); + abHdr[2] = (uint8_t)(0x80 | (cb & 0x3f)); + } + else if (cb < 0x00010000) + { + cbHdr = 4; + abHdr[1] = (uint8_t)(0xe0 | (cb >> 12)); + abHdr[2] = (uint8_t)(0x80 | ((cb >> 6) & 0x3f)); + abHdr[3] = (uint8_t)(0x80 | (cb & 0x3f)); + } + else if (cb < 0x00200000) + { + cbHdr = 5; + abHdr[1] = (uint8_t)(0xf0 | (cb >> 18)); + abHdr[2] = (uint8_t)(0x80 | ((cb >> 12) & 0x3f)); + abHdr[3] = (uint8_t)(0x80 | ((cb >> 6) & 0x3f)); + abHdr[4] = (uint8_t)(0x80 | (cb & 0x3f)); + } + else if (cb < 0x04000000) + { + cbHdr = 6; + abHdr[1] = (uint8_t)(0xf8 | (cb >> 24)); + abHdr[2] = (uint8_t)(0x80 | ((cb >> 18) & 0x3f)); + abHdr[3] = (uint8_t)(0x80 | ((cb >> 12) & 0x3f)); + abHdr[4] = (uint8_t)(0x80 | ((cb >> 6) & 0x3f)); + abHdr[5] = (uint8_t)(0x80 | (cb & 0x3f)); + } + else if (cb <= 0x7fffffff) + { + cbHdr = 7; + abHdr[1] = (uint8_t)(0xfc | (cb >> 30)); + abHdr[2] = (uint8_t)(0x80 | ((cb >> 24) & 0x3f)); + abHdr[3] = (uint8_t)(0x80 | ((cb >> 18) & 0x3f)); + abHdr[4] = (uint8_t)(0x80 | ((cb >> 12) & 0x3f)); + abHdr[5] = (uint8_t)(0x80 | ((cb >> 6) & 0x3f)); + abHdr[6] = (uint8_t)(0x80 | (cb & 0x3f)); + } + else + AssertLogRelMsgFailedReturn(("cb=%#x\n", cb), pSSM->rc = VERR_SSM_MEM_TOO_BIG); + + Log3(("ssmR3DataWriteRecHdr: %08llx|%08llx/%08x: Type=%02x fImportant=%RTbool cbHdr=%u\n", + ssmR3StrmTell(&pSSM->Strm) + cbHdr, pSSM->offUnit + cbHdr, cb, u8TypeAndFlags & SSM_REC_TYPE_MASK, !!(u8TypeAndFlags & SSM_REC_FLAGS_IMPORTANT), cbHdr)); + + return ssmR3DataWriteRaw(pSSM, &abHdr[0], cbHdr); +} + + +/** + * Worker that flushes the buffered data. + * + * @returns VBox status code. Will set pSSM->rc on error. + * @param pSSM The saved state handle. + */ +static int ssmR3DataFlushBuffer(PSSMHANDLE pSSM) +{ + /* + * Check how much there current is in the buffer. + */ + uint32_t cb = pSSM->u.Write.offDataBuffer; + if (!cb) + return pSSM->rc; + pSSM->u.Write.offDataBuffer = 0; + + /* + * Write a record header and then the data. + * (No need for fancy optimizations here any longer since the stream is + * fully buffered.) + */ + int rc = ssmR3DataWriteRecHdr(pSSM, cb, SSM_REC_FLAGS_FIXED | SSM_REC_FLAGS_IMPORTANT | SSM_REC_TYPE_RAW); + if (RT_SUCCESS(rc)) + rc = ssmR3DataWriteRaw(pSSM, pSSM->u.Write.abDataBuffer, cb); + ssmR3ProgressByByte(pSSM, cb); + return rc; +} + + +/** + * ssmR3DataWrite worker that writes big stuff. + * + * @returns VBox status code + * @param pSSM The saved state handle. + * @param pvBuf The bits to write. + * @param cbBuf The number of bytes to write. + */ +static int ssmR3DataWriteBig(PSSMHANDLE pSSM, const void *pvBuf, size_t cbBuf) +{ + int rc = ssmR3DataFlushBuffer(pSSM); + if (RT_SUCCESS(rc)) + { + pSSM->offUnitUser += cbBuf; + + /* + * Split it up into compression blocks. + */ + for (;;) + { + AssertCompile(SSM_ZIP_BLOCK_SIZE == PAGE_SIZE); + if ( cbBuf >= SSM_ZIP_BLOCK_SIZE + && ( ((uintptr_t)pvBuf & 0xf) + || !ASMMemIsZeroPage(pvBuf)) + ) + { + /* + * Compress it. + */ + AssertCompile(1 + 3 + 1 + SSM_ZIP_BLOCK_SIZE < 0x00010000); + uint8_t *pb; + rc = ssmR3StrmReserveWriteBufferSpace(&pSSM->Strm, 1 + 3 + 1 + SSM_ZIP_BLOCK_SIZE, &pb); + if (RT_FAILURE(rc)) + break; + size_t cbRec = SSM_ZIP_BLOCK_SIZE - (SSM_ZIP_BLOCK_SIZE / 16); + rc = RTZipBlockCompress(RTZIPTYPE_LZF, RTZIPLEVEL_FAST, 0 /*fFlags*/, + pvBuf, SSM_ZIP_BLOCK_SIZE, + pb + 1 + 3 + 1, cbRec, &cbRec); + if (RT_SUCCESS(rc)) + { + pb[0] = SSM_REC_FLAGS_FIXED | SSM_REC_FLAGS_IMPORTANT | SSM_REC_TYPE_RAW_LZF; + pb[4] = SSM_ZIP_BLOCK_SIZE / _1K; + cbRec += 1; + } + else + { + pb[0] = SSM_REC_FLAGS_FIXED | SSM_REC_FLAGS_IMPORTANT | SSM_REC_TYPE_RAW; + memcpy(&pb[4], pvBuf, SSM_ZIP_BLOCK_SIZE); + cbRec = SSM_ZIP_BLOCK_SIZE; + } + pb[1] = (uint8_t)(0xe0 | ( cbRec >> 12)); + pb[2] = (uint8_t)(0x80 | ((cbRec >> 6) & 0x3f)); + pb[3] = (uint8_t)(0x80 | ( cbRec & 0x3f)); + cbRec += 1 + 3; + rc = ssmR3StrmCommitWriteBufferSpace(&pSSM->Strm, cbRec); + if (RT_FAILURE(rc)) + break; + + pSSM->offUnit += cbRec; + ssmR3ProgressByByte(pSSM, SSM_ZIP_BLOCK_SIZE); + + /* advance */ + if (cbBuf == SSM_ZIP_BLOCK_SIZE) + return VINF_SUCCESS; + cbBuf -= SSM_ZIP_BLOCK_SIZE; + pvBuf = (uint8_t const*)pvBuf + SSM_ZIP_BLOCK_SIZE; + } + else if (cbBuf >= SSM_ZIP_BLOCK_SIZE) + { + /* + * Zero block. + */ + uint8_t abRec[3]; + abRec[0] = SSM_REC_FLAGS_FIXED | SSM_REC_FLAGS_IMPORTANT | SSM_REC_TYPE_RAW_ZERO; + abRec[1] = 1; + abRec[2] = SSM_ZIP_BLOCK_SIZE / _1K; + Log3(("ssmR3DataWriteBig: %08llx|%08llx/%08x: ZERO\n", ssmR3StrmTell(&pSSM->Strm) + 2, pSSM->offUnit + 2, 1)); + rc = ssmR3DataWriteRaw(pSSM, &abRec[0], sizeof(abRec)); + if (RT_FAILURE(rc)) + break; + + /* advance */ + ssmR3ProgressByByte(pSSM, SSM_ZIP_BLOCK_SIZE); + if (cbBuf == SSM_ZIP_BLOCK_SIZE) + return VINF_SUCCESS; + cbBuf -= SSM_ZIP_BLOCK_SIZE; + pvBuf = (uint8_t const*)pvBuf + SSM_ZIP_BLOCK_SIZE; + } + else + { + /* + * Less than one block left, store it the simple way. + */ + rc = ssmR3DataWriteRecHdr(pSSM, cbBuf, SSM_REC_FLAGS_FIXED | SSM_REC_FLAGS_IMPORTANT | SSM_REC_TYPE_RAW); + if (RT_SUCCESS(rc)) + rc = ssmR3DataWriteRaw(pSSM, pvBuf, cbBuf); + ssmR3ProgressByByte(pSSM, cbBuf); + break; + } + } + } + return rc; +} + + +/** + * ssmR3DataWrite worker that is called when there isn't enough room in the + * buffer for the current chunk of data. + * + * This will first flush the buffer and then add the new bits to it. + * + * @returns VBox status code + * @param pSSM The saved state handle. + * @param pvBuf The bits to write. + * @param cbBuf The number of bytes to write. + */ +static int ssmR3DataWriteFlushAndBuffer(PSSMHANDLE pSSM, const void *pvBuf, size_t cbBuf) +{ + int rc = ssmR3DataFlushBuffer(pSSM); + if (RT_SUCCESS(rc)) + { + memcpy(&pSSM->u.Write.abDataBuffer[0], pvBuf, cbBuf); + pSSM->u.Write.offDataBuffer = (uint32_t)cbBuf; + pSSM->offUnitUser += cbBuf; + } + return rc; +} + + +/** + * Writes data to the current data unit. + * + * This is an inlined wrapper that optimizes the small writes that so many of + * the APIs make. + * + * @returns VBox status code + * @param pSSM The saved state handle. + * @param pvBuf The bits to write. + * @param cbBuf The number of bytes to write. + */ +DECLINLINE(int) ssmR3DataWrite(PSSMHANDLE pSSM, const void *pvBuf, size_t cbBuf) +{ + if (cbBuf > sizeof(pSSM->u.Write.abDataBuffer) / 8) + return ssmR3DataWriteBig(pSSM, pvBuf, cbBuf); + if (!cbBuf) + return VINF_SUCCESS; + + uint32_t off = pSSM->u.Write.offDataBuffer; + if (RT_UNLIKELY(cbBuf + off > sizeof(pSSM->u.Write.abDataBuffer))) + return ssmR3DataWriteFlushAndBuffer(pSSM, pvBuf, cbBuf); + + memcpy(&pSSM->u.Write.abDataBuffer[off], pvBuf, cbBuf); + pSSM->u.Write.offDataBuffer = off + (uint32_t)cbBuf; + pSSM->offUnitUser += cbBuf; + return VINF_SUCCESS; +} + + +/** + * Puts a structure. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pvStruct The structure address. + * @param paFields The array of structure fields descriptions. + * The array must be terminated by a SSMFIELD_ENTRY_TERM(). + */ +VMMR3DECL(int) SSMR3PutStruct(PSSMHANDLE pSSM, const void *pvStruct, PCSSMFIELD paFields) +{ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + AssertPtr(pvStruct); + AssertPtr(paFields); + + /* begin marker. */ + int rc = SSMR3PutU32(pSSM, SSMR3STRUCT_BEGIN); + if (RT_FAILURE(rc)) + return rc; + + /* put the fields */ + for (PCSSMFIELD pCur = paFields; + pCur->cb != UINT32_MAX && pCur->off != UINT32_MAX; + pCur++) + { + uint8_t const *pbField = (uint8_t const *)pvStruct + pCur->off; + switch ((uintptr_t)pCur->pfnGetPutOrTransformer) + { + case SSMFIELDTRANS_NO_TRANSFORMATION: + rc = ssmR3DataWrite(pSSM, pbField, pCur->cb); + break; + + case SSMFIELDTRANS_GCPTR: + AssertMsgBreakStmt(pCur->cb == sizeof(RTGCPTR), ("%#x (%s)\n", pCur->cb, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = SSMR3PutGCPtr(pSSM, *(PRTGCPTR)pbField); + break; + + case SSMFIELDTRANS_GCPHYS: + AssertMsgBreakStmt(pCur->cb == sizeof(RTGCPHYS), ("%#x (%s)\n", pCur->cb, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = SSMR3PutGCPhys(pSSM, *(PRTGCPHYS)pbField); + break; + + case SSMFIELDTRANS_RCPTR: + AssertMsgBreakStmt(pCur->cb == sizeof(RTRCPTR), ("%#x (%s)\n", pCur->cb, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = SSMR3PutRCPtr(pSSM, *(PRTRCPTR)pbField); + break; + + case SSMFIELDTRANS_RCPTR_ARRAY: + { + uint32_t const cEntries = pCur->cb / sizeof(RTRCPTR); + AssertMsgBreakStmt(pCur->cb == cEntries * sizeof(RTRCPTR) && cEntries, ("%#x (%s)\n", pCur->cb, pCur->pszName), + rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = VINF_SUCCESS; + for (uint32_t i = 0; i < cEntries && RT_SUCCESS(rc); i++) + rc = SSMR3PutRCPtr(pSSM, ((PRTRCPTR)pbField)[i]); + break; + } + + default: + AssertMsgFailedBreakStmt(("%#x\n", pCur->pfnGetPutOrTransformer), rc = VERR_SSM_FIELD_COMPLEX); + } + if (RT_FAILURE(rc)) + { + if (RT_SUCCESS(pSSM->rc)) + pSSM->rc = rc; + return rc; + } + } + + /* end marker */ + return SSMR3PutU32(pSSM, SSMR3STRUCT_END); +} + + +/** + * SSMR3PutStructEx helper that puts a HCPTR that is used as a NULL indicator. + * + * @returns VBox status code. + * + * @param pSSM The saved state handle. + * @param pv The value to put. + * @param fFlags SSMSTRUCT_FLAGS_XXX. + */ +DECLINLINE(int) ssmR3PutHCPtrNI(PSSMHANDLE pSSM, void *pv, uint32_t fFlags) +{ + int rc; + if (fFlags & SSMSTRUCT_FLAGS_DONT_IGNORE) + rc = ssmR3DataWrite(pSSM, &pv, sizeof(void *)); + else + rc = SSMR3PutBool(pSSM, pv != NULL); + return rc; +} + + +/** + * SSMR3PutStructEx helper that puts an arbitrary number of zeros. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param cbToFill The number of zeros to stuff into the state. + */ +static int ssmR3PutZeros(PSSMHANDLE pSSM, uint32_t cbToFill) +{ + while (cbToFill > 0) + { + uint32_t cb = RT_MIN(sizeof(g_abZero), cbToFill); + int rc = ssmR3DataWrite(pSSM, g_abZero, cb); + if (RT_FAILURE(rc)) + return rc; + cbToFill -= cb; + } + return VINF_SUCCESS; +} + + +/** + * Puts a structure, extended API. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pvStruct The structure address. + * @param cbStruct The size of the struct (use for validation only). + * @param fFlags Combination of SSMSTRUCT_FLAGS_XXX defines. + * @param paFields The array of structure fields descriptions. The + * array must be terminated by a SSMFIELD_ENTRY_TERM(). + * @param pvUser User argument for any callbacks that paFields might + * contain. + */ +VMMR3DECL(int) SSMR3PutStructEx(PSSMHANDLE pSSM, const void *pvStruct, size_t cbStruct, + uint32_t fFlags, PCSSMFIELD paFields, void *pvUser) +{ + int rc; + + /* + * Validation. + */ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + AssertMsgReturn(!(fFlags & ~SSMSTRUCT_FLAGS_VALID_MASK), ("%#x\n", fFlags), pSSM->rc = VERR_INVALID_PARAMETER); + AssertPtr(pvStruct); + AssertPtr(paFields); + + + /* + * Begin marker. + */ + if (!(fFlags & (SSMSTRUCT_FLAGS_NO_MARKERS | SSMSTRUCT_FLAGS_NO_LEAD_MARKER))) + { + rc = SSMR3PutU32(pSSM, SSMR3STRUCT_BEGIN); + if (RT_FAILURE(rc)) + return rc; + } + + /* + * Put the fields + */ + rc = VINF_SUCCESS; + uint32_t off = 0; + for (PCSSMFIELD pCur = paFields; + pCur->cb != UINT32_MAX && pCur->off != UINT32_MAX; + pCur++) + { + uint32_t const offField = (!SSMFIELDTRANS_IS_PADDING(pCur->pfnGetPutOrTransformer) || pCur->off != UINT32_MAX / 2) + && !SSMFIELDTRANS_IS_OLD(pCur->pfnGetPutOrTransformer) + ? pCur->off + : off; + uint32_t const cbField = SSMFIELDTRANS_IS_OLD(pCur->pfnGetPutOrTransformer) + ? 0 + : SSMFIELDTRANS_IS_PADDING(pCur->pfnGetPutOrTransformer) + ? RT_HIWORD(pCur->cb) + : pCur->cb; + AssertMsgBreakStmt( cbField <= cbStruct + && offField + cbField <= cbStruct + && offField + cbField >= offField, + ("offField=%#x cbField=%#x cbStruct=%#x (%s)\n", offField, cbField, cbStruct, pCur->pszName), + rc = VERR_SSM_FIELD_OUT_OF_BOUNDS); + AssertMsgBreakStmt( !(fFlags & SSMSTRUCT_FLAGS_FULL_STRUCT) + || off == offField, + ("off=%#x offField=%#x (%s)\n", off, offField, pCur->pszName), + rc = VERR_SSM_FIELD_NOT_CONSECUTIVE); + + rc = VINF_SUCCESS; + uint8_t const *pbField = (uint8_t const *)pvStruct + offField; + switch ((uintptr_t)pCur->pfnGetPutOrTransformer) + { + case SSMFIELDTRANS_NO_TRANSFORMATION: + rc = ssmR3DataWrite(pSSM, pbField, cbField); + break; + + case SSMFIELDTRANS_GCPHYS: + AssertMsgBreakStmt(cbField == sizeof(RTGCPHYS), ("%#x (%s)\n", cbField, pCur->pszName), + rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = SSMR3PutGCPhys(pSSM, *(PRTGCPHYS)pbField); + break; + + case SSMFIELDTRANS_GCPTR: + AssertMsgBreakStmt(cbField == sizeof(RTGCPTR), ("%#x (%s)\n", cbField, pCur->pszName), + rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = SSMR3PutGCPtr(pSSM, *(PRTGCPTR)pbField); + break; + + case SSMFIELDTRANS_RCPTR: + AssertMsgBreakStmt(cbField == sizeof(RTRCPTR), ("%#x (%s)\n", cbField, pCur->pszName), + rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = SSMR3PutRCPtr(pSSM, *(PRTRCPTR)pbField); + break; + + case SSMFIELDTRANS_RCPTR_ARRAY: + { + uint32_t const cEntries = cbField / sizeof(RTRCPTR); + AssertMsgBreakStmt(cbField == cEntries * sizeof(RTRCPTR) && cEntries, ("%#x (%s)\n", cbField, pCur->pszName), + rc = VERR_SSM_FIELD_INVALID_SIZE); + for (uint32_t i = 0; i < cEntries && RT_SUCCESS(rc); i++) + rc = SSMR3PutRCPtr(pSSM, ((PRTRCPTR)pbField)[i]); + break; + } + + case SSMFIELDTRANS_HCPTR_NI: + AssertMsgBreakStmt(cbField == sizeof(void *), ("%#x (%s)\n", cbField, pCur->pszName), + rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = ssmR3PutHCPtrNI(pSSM, *(void * const *)pbField, fFlags); + break; + + case SSMFIELDTRANS_HCPTR_NI_ARRAY: + { + uint32_t const cEntries = cbField / sizeof(void *); + AssertMsgBreakStmt(cbField == cEntries * sizeof(void *) && cEntries, ("%#x (%s)\n", cbField, pCur->pszName), + rc = VERR_SSM_FIELD_INVALID_SIZE); + for (uint32_t i = 0; i < cEntries && RT_SUCCESS(rc); i++) + rc = ssmR3PutHCPtrNI(pSSM, ((void * const *)pbField)[i], fFlags); + break; + } + + case SSMFIELDTRANS_HCPTR_HACK_U32: + AssertMsgBreakStmt(cbField == sizeof(void *), ("%#x (%s)\n", cbField, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + AssertMsgBreakStmt(*(uintptr_t *)pbField <= UINT32_MAX, ("%p (%s)\n", *(uintptr_t *)pbField, pCur->pszName), + rc = VERR_SSM_FIELD_INVALID_VALUE); + rc = ssmR3DataWrite(pSSM, pbField, sizeof(uint32_t)); + if ((fFlags & SSMSTRUCT_FLAGS_DONT_IGNORE) && sizeof(void *) != sizeof(uint32_t) && RT_SUCCESS(rc)) + rc = ssmR3DataWrite(pSSM, g_abZero, sizeof(uint32_t)); + break; + + case SSMFIELDTRANS_U32_ZX_U64: + AssertFailedBreakStmt(rc = VERR_SSM_FIELD_LOAD_ONLY_TRANSFORMATION); + break; + + case SSMFIELDTRANS_IGNORE: + if (fFlags & SSMSTRUCT_FLAGS_DONT_IGNORE) + rc = ssmR3PutZeros(pSSM, cbField); + break; + + case SSMFIELDTRANS_IGN_GCPHYS: + AssertMsgBreakStmt(cbField == sizeof(RTGCPHYS), ("%#x (%s)\n", cbField, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + if (fFlags & SSMSTRUCT_FLAGS_DONT_IGNORE) + rc = ssmR3DataWrite(pSSM, g_abZero, sizeof(RTGCPHYS)); + break; + + case SSMFIELDTRANS_IGN_GCPTR: + AssertMsgBreakStmt(cbField == sizeof(RTGCPTR), ("%#x (%s)\n", cbField, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + if (fFlags & SSMSTRUCT_FLAGS_DONT_IGNORE) + rc = ssmR3DataWrite(pSSM, g_abZero, sizeof(RTGCPTR)); + break; + + case SSMFIELDTRANS_IGN_RCPTR: + AssertMsgBreakStmt(cbField == sizeof(RTRCPTR), ("%#x (%s)\n", cbField, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + if (fFlags & SSMSTRUCT_FLAGS_DONT_IGNORE) + rc = ssmR3DataWrite(pSSM, g_abZero, sizeof(RTRCPTR)); + break; + + case SSMFIELDTRANS_IGN_HCPTR: + AssertMsgBreakStmt(cbField == sizeof(void *), ("%#x (%s)\n", cbField, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + if (fFlags & SSMSTRUCT_FLAGS_DONT_IGNORE) + rc = ssmR3DataWrite(pSSM, g_abZero, sizeof(void *)); + break; + + + case SSMFIELDTRANS_OLD: + AssertMsgBreakStmt(pCur->off == UINT32_MAX / 2, ("%#x %#x (%s)\n", pCur->cb, pCur->off, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = ssmR3PutZeros(pSSM, pCur->cb); + break; + + case SSMFIELDTRANS_OLD_GCPHYS: + AssertMsgBreakStmt(pCur->cb == sizeof(RTGCPHYS) && pCur->off == UINT32_MAX / 2, ("%#x %#x (%s)\n", pCur->cb, pCur->off, pCur->pszName), + rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = ssmR3DataWrite(pSSM, g_abZero, sizeof(RTGCPHYS)); + break; + + case SSMFIELDTRANS_OLD_GCPTR: + AssertMsgBreakStmt(pCur->cb == sizeof(RTGCPTR) && pCur->off == UINT32_MAX / 2, ("%#x %#x (%s)\n", pCur->cb, pCur->off, pCur->pszName), + rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = ssmR3DataWrite(pSSM, g_abZero, sizeof(RTGCPTR)); + break; + + case SSMFIELDTRANS_OLD_RCPTR: + AssertMsgBreakStmt(pCur->cb == sizeof(RTRCPTR) && pCur->off == UINT32_MAX / 2, ("%#x %#x (%s)\n", pCur->cb, pCur->off, pCur->pszName), + rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = ssmR3DataWrite(pSSM, g_abZero, sizeof(RTRCPTR)); + break; + + case SSMFIELDTRANS_OLD_HCPTR: + AssertMsgBreakStmt(pCur->cb == sizeof(void *) && pCur->off == UINT32_MAX / 2, ("%#x %#x (%s)\n", pCur->cb, pCur->off, pCur->pszName), + rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = ssmR3DataWrite(pSSM, g_abZero, sizeof(void *)); + break; + + case SSMFIELDTRANS_OLD_PAD_HC: + AssertMsgBreakStmt(pCur->off == UINT32_MAX / 2, ("%#x %#x (%s)\n", pCur->cb, pCur->off, pCur->pszName), + rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = ssmR3PutZeros(pSSM, HC_ARCH_BITS == 64 ? RT_HIWORD(pCur->cb) : RT_LOWORD(pCur->cb)); + break; + + case SSMFIELDTRANS_OLD_PAD_MSC32: + AssertMsgBreakStmt(pCur->off == UINT32_MAX / 2, ("%#x %#x (%s)\n", pCur->cb, pCur->off, pCur->pszName), + rc = VERR_SSM_FIELD_INVALID_SIZE); + if (SSM_HOST_IS_MSC_32) + rc = ssmR3PutZeros(pSSM, pCur->cb); + break; + + + case SSMFIELDTRANS_PAD_HC: + case SSMFIELDTRANS_PAD_HC32: + case SSMFIELDTRANS_PAD_HC64: + case SSMFIELDTRANS_PAD_HC_AUTO: + case SSMFIELDTRANS_PAD_MSC32_AUTO: + { + uint32_t cb32 = RT_BYTE1(pCur->cb); + uint32_t cb64 = RT_BYTE2(pCur->cb); + uint32_t cbCtx = HC_ARCH_BITS == 64 + || ( (uintptr_t)pCur->pfnGetPutOrTransformer == SSMFIELDTRANS_PAD_MSC32_AUTO + && !SSM_HOST_IS_MSC_32) + ? cb64 : cb32; + uint32_t cbSaved = ssmR3GetHostBits(pSSM) == 64 + || ( (uintptr_t)pCur->pfnGetPutOrTransformer == SSMFIELDTRANS_PAD_MSC32_AUTO + && !ssmR3IsHostMsc32(pSSM)) + ? cb64 : cb32; + AssertMsgBreakStmt( cbField == cbCtx + && ( ( pCur->off == UINT32_MAX / 2 + && ( cbField == 0 + || (uintptr_t)pCur->pfnGetPutOrTransformer == SSMFIELDTRANS_PAD_HC_AUTO + || (uintptr_t)pCur->pfnGetPutOrTransformer == SSMFIELDTRANS_PAD_MSC32_AUTO + ) + ) + || (pCur->off != UINT32_MAX / 2 && cbField != 0) + ) + , ("cbField=%#x cb32=%#x cb64=%#x HC_ARCH_BITS=%u cbCtx=%#x cbSaved=%#x off=%#x\n", + cbField, cb32, cb64, HC_ARCH_BITS, cbCtx, cbSaved, pCur->off), + rc = VERR_SSM_FIELD_INVALID_PADDING_SIZE); + if (fFlags & SSMSTRUCT_FLAGS_DONT_IGNORE) + rc = ssmR3PutZeros(pSSM, cbSaved); + break; + } + + default: + AssertPtrBreakStmt(pCur->pfnGetPutOrTransformer, rc = VERR_SSM_FIELD_INVALID_CALLBACK); + rc = pCur->pfnGetPutOrTransformer(pSSM, pCur, (void *)pvStruct, fFlags, false /*fGetOrPut*/, pvUser); + break; + } + if (RT_FAILURE(rc)) + break; /* Deal with failures in one place (see below). */ + + off = offField + cbField; + } + + if (RT_SUCCESS(rc)) + AssertMsgStmt( !(fFlags & SSMSTRUCT_FLAGS_FULL_STRUCT) + || off == cbStruct, + ("off=%#x cbStruct=%#x\n", off, cbStruct), + rc = VERR_SSM_FIELD_NOT_CONSECUTIVE); + + if (RT_FAILURE(rc)) + { + if (RT_SUCCESS(pSSM->rc)) + pSSM->rc = rc; + return rc; + } + + /* + * End marker + */ + if (!(fFlags & (SSMSTRUCT_FLAGS_NO_MARKERS | SSMSTRUCT_FLAGS_NO_TAIL_MARKER))) + { + rc = SSMR3PutU32(pSSM, SSMR3STRUCT_END); + if (RT_FAILURE(rc)) + return rc; + } + + return VINF_SUCCESS; +} + + +/** + * Saves a boolean item to the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param fBool Item to save. + */ +VMMR3DECL(int) SSMR3PutBool(PSSMHANDLE pSSM, bool fBool) +{ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + uint8_t u8 = fBool; /* enforce 1 byte size */ + return ssmR3DataWrite(pSSM, &u8, sizeof(u8)); +} + + +/** + * Saves a 8-bit unsigned integer item to the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param u8 Item to save. + */ +VMMR3DECL(int) SSMR3PutU8(PSSMHANDLE pSSM, uint8_t u8) +{ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataWrite(pSSM, &u8, sizeof(u8)); +} + + +/** + * Saves a 8-bit signed integer item to the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param i8 Item to save. + */ +VMMR3DECL(int) SSMR3PutS8(PSSMHANDLE pSSM, int8_t i8) +{ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataWrite(pSSM, &i8, sizeof(i8)); +} + + +/** + * Saves a 16-bit unsigned integer item to the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param u16 Item to save. + */ +VMMR3DECL(int) SSMR3PutU16(PSSMHANDLE pSSM, uint16_t u16) +{ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataWrite(pSSM, &u16, sizeof(u16)); +} + + +/** + * Saves a 16-bit signed integer item to the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param i16 Item to save. + */ +VMMR3DECL(int) SSMR3PutS16(PSSMHANDLE pSSM, int16_t i16) +{ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataWrite(pSSM, &i16, sizeof(i16)); +} + + +/** + * Saves a 32-bit unsigned integer item to the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param u32 Item to save. + */ +VMMR3DECL(int) SSMR3PutU32(PSSMHANDLE pSSM, uint32_t u32) +{ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataWrite(pSSM, &u32, sizeof(u32)); +} + + +/** + * Saves a 32-bit signed integer item to the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param i32 Item to save. + */ +VMMR3DECL(int) SSMR3PutS32(PSSMHANDLE pSSM, int32_t i32) +{ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataWrite(pSSM, &i32, sizeof(i32)); +} + + +/** + * Saves a 64-bit unsigned integer item to the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param u64 Item to save. + */ +VMMR3DECL(int) SSMR3PutU64(PSSMHANDLE pSSM, uint64_t u64) +{ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataWrite(pSSM, &u64, sizeof(u64)); +} + + +/** + * Saves a 64-bit signed integer item to the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param i64 Item to save. + */ +VMMR3DECL(int) SSMR3PutS64(PSSMHANDLE pSSM, int64_t i64) +{ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataWrite(pSSM, &i64, sizeof(i64)); +} + + +/** + * Saves a 128-bit unsigned integer item to the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param u128 Item to save. + */ +VMMR3DECL(int) SSMR3PutU128(PSSMHANDLE pSSM, uint128_t u128) +{ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataWrite(pSSM, &u128, sizeof(u128)); +} + + +/** + * Saves a 128-bit signed integer item to the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param i128 Item to save. + */ +VMMR3DECL(int) SSMR3PutS128(PSSMHANDLE pSSM, int128_t i128) +{ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataWrite(pSSM, &i128, sizeof(i128)); +} + + +/** + * Saves a VBox unsigned integer item to the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param u Item to save. + */ +VMMR3DECL(int) SSMR3PutUInt(PSSMHANDLE pSSM, RTUINT u) +{ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataWrite(pSSM, &u, sizeof(u)); +} + + +/** + * Saves a VBox signed integer item to the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param i Item to save. + */ +VMMR3DECL(int) SSMR3PutSInt(PSSMHANDLE pSSM, RTINT i) +{ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataWrite(pSSM, &i, sizeof(i)); +} + + +/** + * Saves a GC natural unsigned integer item to the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param u Item to save. + * + * @deprecated Silly type, don't use it. + */ +VMMR3DECL(int) SSMR3PutGCUInt(PSSMHANDLE pSSM, RTGCUINT u) +{ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataWrite(pSSM, &u, sizeof(u)); +} + + +/** + * Saves a GC unsigned integer register item to the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param u Item to save. + */ +VMMR3DECL(int) SSMR3PutGCUIntReg(PSSMHANDLE pSSM, RTGCUINTREG u) +{ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataWrite(pSSM, &u, sizeof(u)); +} + + +/** + * Saves a 32 bits GC physical address item to the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param GCPhys The item to save + */ +VMMR3DECL(int) SSMR3PutGCPhys32(PSSMHANDLE pSSM, RTGCPHYS32 GCPhys) +{ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataWrite(pSSM, &GCPhys, sizeof(GCPhys)); +} + + +/** + * Saves a 64 bits GC physical address item to the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param GCPhys The item to save + */ +VMMR3DECL(int) SSMR3PutGCPhys64(PSSMHANDLE pSSM, RTGCPHYS64 GCPhys) +{ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataWrite(pSSM, &GCPhys, sizeof(GCPhys)); +} + + +/** + * Saves a GC physical address item to the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param GCPhys The item to save + */ +VMMR3DECL(int) SSMR3PutGCPhys(PSSMHANDLE pSSM, RTGCPHYS GCPhys) +{ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataWrite(pSSM, &GCPhys, sizeof(GCPhys)); +} + + +/** + * Saves a GC virtual address item to the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param GCPtr The item to save. + */ +VMMR3DECL(int) SSMR3PutGCPtr(PSSMHANDLE pSSM, RTGCPTR GCPtr) +{ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataWrite(pSSM, &GCPtr, sizeof(GCPtr)); +} + + +/** + * Saves an RC virtual address item to the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param RCPtr The item to save. + */ +VMMR3DECL(int) SSMR3PutRCPtr(PSSMHANDLE pSSM, RTRCPTR RCPtr) +{ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataWrite(pSSM, &RCPtr, sizeof(RCPtr)); +} + + +/** + * Saves a GC virtual address (represented as an unsigned integer) item to the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param GCPtr The item to save. + */ +VMMR3DECL(int) SSMR3PutGCUIntPtr(PSSMHANDLE pSSM, RTGCUINTPTR GCPtr) +{ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataWrite(pSSM, &GCPtr, sizeof(GCPtr)); +} + + +/** + * Saves a I/O port address item to the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param IOPort The item to save. + */ +VMMR3DECL(int) SSMR3PutIOPort(PSSMHANDLE pSSM, RTIOPORT IOPort) +{ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataWrite(pSSM, &IOPort, sizeof(IOPort)); +} + + +/** + * Saves a selector item to the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param Sel The item to save. + */ +VMMR3DECL(int) SSMR3PutSel(PSSMHANDLE pSSM, RTSEL Sel) +{ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataWrite(pSSM, &Sel, sizeof(Sel)); +} + + +/** + * Saves a memory item to the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pv Item to save. + * @param cb Size of the item. + */ +VMMR3DECL(int) SSMR3PutMem(PSSMHANDLE pSSM, const void *pv, size_t cb) +{ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataWrite(pSSM, pv, cb); +} + + +/** + * Saves a zero terminated string item to the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param psz Item to save. + */ +VMMR3DECL(int) SSMR3PutStrZ(PSSMHANDLE pSSM, const char *psz) +{ + SSM_ASSERT_WRITEABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + + size_t cch = strlen(psz); + if (cch > _1M) + { + AssertMsgFailed(("a %zu byte long string, what's this!?!\n", cch)); + return VERR_TOO_MUCH_DATA; + } + uint32_t u32 = (uint32_t)cch; + int rc = ssmR3DataWrite(pSSM, &u32, sizeof(u32)); + if (rc) + return rc; + return ssmR3DataWrite(pSSM, psz, cch); +} + + +/** + * Emits a SSMLiveControl unit with a new progress report. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param lrdPct The progress of the live save. + * @param uPass The current pass. + */ +static int ssmR3LiveControlEmit(PSSMHANDLE pSSM, long double lrdPct, uint32_t uPass) +{ + AssertMsg(lrdPct <= 100.0, ("%u\n", lrdPct * 100)); + + /* + * Make sure we're in one of the two EXEC states or we may fail. + */ + SSMSTATE enmSavedState = pSSM->enmOp; + if (enmSavedState == SSMSTATE_LIVE_VOTE) + pSSM->enmOp = SSMSTATE_LIVE_EXEC; + else if (enmSavedState == SSMSTATE_SAVE_DONE) + pSSM->enmOp = SSMSTATE_SAVE_EXEC; + + /* + * Write the unit header. + */ + SSMFILEUNITHDRV2 UnitHdr; + memcpy(&UnitHdr.szMagic[0], SSMFILEUNITHDR_MAGIC, sizeof(UnitHdr.szMagic)); + UnitHdr.offStream = ssmR3StrmTell(&pSSM->Strm); + UnitHdr.u32CurStreamCRC = ssmR3StrmCurCRC(&pSSM->Strm); + UnitHdr.u32CRC = 0; + UnitHdr.u32Version = 1; + UnitHdr.u32Instance = 0; + UnitHdr.u32Pass = uPass; + UnitHdr.fFlags = 0; + UnitHdr.cbName = sizeof("SSMLiveControl"); + memcpy(&UnitHdr.szName[0], "SSMLiveControl", UnitHdr.cbName); + UnitHdr.u32CRC = RTCrc32(&UnitHdr, RT_UOFFSETOF_DYN(SSMFILEUNITHDRV2, szName[UnitHdr.cbName])); + Log(("SSM: Unit at %#9llx: '%s', instance %u, pass %#x, version %u\n", + UnitHdr.offStream, UnitHdr.szName, UnitHdr.u32Instance, UnitHdr.u32Pass, UnitHdr.u32Version)); + int rc = ssmR3StrmWrite(&pSSM->Strm, &UnitHdr, RT_UOFFSETOF_DYN(SSMFILEUNITHDRV2, szName[UnitHdr.cbName])); + if (RT_SUCCESS(rc)) + { + /* + * Write the payload. + */ + ssmR3DataWriteBegin(pSSM); + + uint16_t u16PartsPerTenThousand = (uint16_t)(lrdPct * (100 - pSSM->uPercentDone)); + AssertMsg(u16PartsPerTenThousand <= 10000, ("%u\n", u16PartsPerTenThousand)); + ssmR3DataWrite(pSSM, &u16PartsPerTenThousand, sizeof(u16PartsPerTenThousand)); + + rc = ssmR3DataFlushBuffer(pSSM); /* will return SSMHANDLE::rc if it is set */ + if (RT_SUCCESS(rc)) + { + /* + * Write the termination record and flush the compression stream. + */ + SSMRECTERM TermRec; + TermRec.u8TypeAndFlags = SSM_REC_FLAGS_FIXED | SSM_REC_FLAGS_IMPORTANT | SSM_REC_TYPE_TERM; + TermRec.cbRec = sizeof(TermRec) - 2; + if (pSSM->Strm.fChecksummed) + { + TermRec.fFlags = SSMRECTERM_FLAGS_CRC32; + TermRec.u32StreamCRC = RTCrc32Finish(RTCrc32Process(ssmR3StrmCurCRC(&pSSM->Strm), &TermRec, 2)); + } + else + { + TermRec.fFlags = 0; + TermRec.u32StreamCRC = 0; + } + TermRec.cbUnit = pSSM->offUnit + sizeof(TermRec); + rc = ssmR3DataWriteRaw(pSSM, &TermRec, sizeof(TermRec)); + if (RT_SUCCESS(rc)) + rc = ssmR3DataWriteFinish(pSSM); + if (RT_SUCCESS(rc)) + { + pSSM->enmOp = enmSavedState; + return rc; + } + } + } + + LogRel(("SSM: Failed to write live control unit. rc=%Rrc\n", rc)); + if (RT_SUCCESS_NP(pSSM->rc)) + pSSM->rc = rc; + pSSM->enmOp = enmSavedState; + return rc; +} + + + +/** + * Enters the critical session (optionally) associated with the unit. + * + * @param pUnit The unit. + */ +DECLINLINE(void) ssmR3UnitCritSectEnter(PSSMUNIT pUnit) +{ + PPDMCRITSECT pCritSect = pUnit->pCritSect; + if (pCritSect) + { + int rc = PDMCritSectEnter(pCritSect, VERR_IGNORED); + AssertRC(rc); + } +} + + +/** + * Leaves the critical session (optionally) associated with the unit. + * + * @param pUnit The unit. + */ +DECLINLINE(void) ssmR3UnitCritSectLeave(PSSMUNIT pUnit) +{ + PPDMCRITSECT pCritSect = pUnit->pCritSect; + if (pCritSect) + { + int rc = PDMCritSectLeave(pCritSect); + AssertRC(rc); + } +} + + +/** + * Do the pfnSaveDone run. + * + * @returns VBox status code (pSSM->rc). + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + */ +static int ssmR3SaveDoDoneRun(PVM pVM, PSSMHANDLE pSSM) +{ + VM_ASSERT_EMT0(pVM); + + /* + * Do the done run. + */ + pSSM->enmOp = SSMSTATE_SAVE_DONE; + for (PSSMUNIT pUnit = pVM->ssm.s.pHead; pUnit; pUnit = pUnit->pNext) + { + if ( pUnit->u.Common.pfnSaveDone + && ( pUnit->fCalled + || (!pUnit->u.Common.pfnSavePrep && !pUnit->u.Common.pfnSaveExec))) + { + int rcOld = pSSM->rc; + int rc; + ssmR3UnitCritSectEnter(pUnit); + switch (pUnit->enmType) + { + case SSMUNITTYPE_DEV: + rc = pUnit->u.Dev.pfnSaveDone(pUnit->u.Dev.pDevIns, pSSM); + break; + case SSMUNITTYPE_DRV: + rc = pUnit->u.Drv.pfnSaveDone(pUnit->u.Drv.pDrvIns, pSSM); + break; + case SSMUNITTYPE_USB: + rc = pUnit->u.Usb.pfnSaveDone(pUnit->u.Usb.pUsbIns, pSSM); + break; + case SSMUNITTYPE_INTERNAL: + rc = pUnit->u.Internal.pfnSaveDone(pVM, pSSM); + break; + case SSMUNITTYPE_EXTERNAL: + rc = pUnit->u.External.pfnSaveDone(pSSM, pUnit->u.External.pvUser); + break; + default: + rc = VERR_SSM_IPE_1; + break; + } + ssmR3UnitCritSectLeave(pUnit); + if (RT_SUCCESS(rc) && pSSM->rc != rcOld) + rc = pSSM->rc; + if (RT_FAILURE(rc)) + { + LogRel(("SSM: Done save failed with rc=%Rrc for data unit '%s.\n", rc, pUnit->szName)); + if (RT_SUCCESS_NP(pSSM->rc)) + pSSM->rc = rc; + } + } + } + return pSSM->rc; +} + + +/** + * Worker for SSMR3LiveDone and SSMR3Save that closes the handle and deletes the + * saved state file on failure. + * + * @returns VBox status code (pSSM->rc). + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + */ +static int ssmR3SaveDoClose(PVM pVM, PSSMHANDLE pSSM) +{ + VM_ASSERT_EMT0(pVM); + pVM->ssm.s.uPass = 0; + + /* + * Make it non-cancellable, close the stream and delete the file on failure. + */ + ssmR3SetCancellable(pVM, pSSM, false); + int rc = ssmR3StrmClose(&pSSM->Strm, pSSM->rc == VERR_SSM_CANCELLED); + if (RT_SUCCESS(rc)) + rc = pSSM->rc; + if (RT_SUCCESS(rc)) + { + Assert(pSSM->enmOp == SSMSTATE_SAVE_DONE); + if (pSSM->pfnProgress) + pSSM->pfnProgress(pVM->pUVM, 100, pSSM->pvUser); + LogRel(("SSM: Successfully saved the VM state to '%s'\n", + pSSM->pszFilename ? pSSM->pszFilename : "")); + } + else + { + if (pSSM->pszFilename) + { + int rc2 = RTFileDelete(pSSM->pszFilename); + AssertRC(rc2); + if (RT_SUCCESS(rc2)) + LogRel(("SSM: Failed to save the VM state to '%s' (file deleted): %Rrc\n", + pSSM->pszFilename, rc)); + else + LogRel(("SSM: Failed to save the VM state to '%s' (file deletion failed, rc2=%Rrc): %Rrc\n", + pSSM->pszFilename, rc2, rc)); + } + else + LogRel(("SSM: Failed to save the VM state.\n")); + + Assert(pSSM->enmOp <= SSMSTATE_SAVE_DONE); + if (pSSM->enmOp != SSMSTATE_SAVE_DONE) + ssmR3SaveDoDoneRun(pVM, pSSM); + } + + /* + * Trash the handle before freeing it. + */ + ASMAtomicWriteU32(&pSSM->fCancelled, 0); + pSSM->pVM = NULL; + pSSM->enmAfter = SSMAFTER_INVALID; + pSSM->enmOp = SSMSTATE_INVALID; + RTMemFree(pSSM); + + return rc; +} + + +/** + * Closes the SSM handle. + * + * This must always be called on a handled returned by SSMR3LiveSave. + * + * @returns VBox status code. + * + * @param pSSM The SSM handle returned by SSMR3LiveSave. + * + * @thread EMT(0). + */ +VMMR3_INT_DECL(int) SSMR3LiveDone(PSSMHANDLE pSSM) +{ + LogFlow(("SSMR3LiveDone: pSSM=%p\n", pSSM)); + + /* + * Validate input. + */ + AssertPtrReturn(pSSM, VERR_INVALID_POINTER); + PVM pVM = pSSM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_EMT0(pVM); + AssertMsgReturn( pSSM->enmAfter == SSMAFTER_DESTROY + || pSSM->enmAfter == SSMAFTER_CONTINUE + || pSSM->enmAfter == SSMAFTER_TELEPORT, + ("%d\n", pSSM->enmAfter), + VERR_INVALID_PARAMETER); + AssertMsgReturn( pSSM->enmOp >= SSMSTATE_LIVE_PREP + && pSSM->enmOp <= SSMSTATE_SAVE_DONE, + ("%d\n", pSSM->enmOp), VERR_INVALID_STATE); + + /* + * Join paths with SSMR3Save again. + */ + return ssmR3SaveDoClose(pVM, pSSM); +} + + +/** + * Writes the directory. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM The SSM handle. + * @param pcEntries Where to return the number of directory entries. + */ +static int ssmR3WriteDirectory(PVM pVM, PSSMHANDLE pSSM, uint32_t *pcEntries) +{ + VM_ASSERT_EMT0(pVM); + + /* + * Grab some temporary memory for the dictionary. + */ + size_t cbDir = RT_UOFFSETOF_DYN(SSMFILEDIR, aEntries[pVM->ssm.s.cUnits]); + PSSMFILEDIR pDir = (PSSMFILEDIR)RTMemTmpAlloc(cbDir); + if (!pDir) + { + LogRel(("ssmR3WriteDirectory: failed to allocate %zu bytes!\n", cbDir)); + return VERR_NO_TMP_MEMORY; + } + + /* + * Initialize it. + */ + memcpy(pDir->szMagic, SSMFILEDIR_MAGIC, sizeof(pDir->szMagic)); + pDir->u32CRC = 0; + pDir->cEntries = 0; + + for (PSSMUNIT pUnit = pVM->ssm.s.pHead; pUnit; pUnit = pUnit->pNext) + if (pUnit->offStream != RTFOFF_MIN) + { + PSSMFILEDIRENTRY pEntry = &pDir->aEntries[pDir->cEntries++]; + Assert(pDir->cEntries <= pVM->ssm.s.cUnits); + Assert(pUnit->offStream >= (RTFOFF)sizeof(SSMFILEHDR)); + pEntry->off = pUnit->offStream; + pEntry->u32Instance = pUnit->u32Instance; + pEntry->u32NameCRC = RTCrc32(pUnit->szName, pUnit->cchName); + } + + /* + * Calculate the actual size and CRC-32, then write the directory + * out to the stream. + */ + *pcEntries = pDir->cEntries; + cbDir = RT_UOFFSETOF_DYN(SSMFILEDIR, aEntries[pDir->cEntries]); + pDir->u32CRC = RTCrc32(pDir, cbDir); + int rc = ssmR3StrmWrite(&pSSM->Strm, pDir, cbDir); + RTMemTmpFree(pDir); + return rc; +} + + +/** + * Finalize the saved state stream, i.e. add the end unit, directory + * and footer. + * + * @returns VBox status code (pSSM->rc). + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + */ +static int ssmR3SaveDoFinalization(PVM pVM, PSSMHANDLE pSSM) +{ + VM_ASSERT_EMT0(pVM); + Assert(RT_SUCCESS(pSSM->rc)); + + /* + * Write the end unit. + */ + SSMFILEUNITHDRV2 UnitHdr; + memcpy(&UnitHdr.szMagic[0], SSMFILEUNITHDR_END, sizeof(UnitHdr.szMagic)); + UnitHdr.offStream = ssmR3StrmTell(&pSSM->Strm); + UnitHdr.u32CurStreamCRC = ssmR3StrmCurCRC(&pSSM->Strm); + UnitHdr.u32CRC = 0; + UnitHdr.u32Version = 0; + UnitHdr.u32Instance = 0; + UnitHdr.u32Pass = SSM_PASS_FINAL; + UnitHdr.fFlags = 0; + UnitHdr.cbName = 0; + UnitHdr.u32CRC = RTCrc32(&UnitHdr, RT_UOFFSETOF(SSMFILEUNITHDRV2, szName[0])); + Log(("SSM: Unit at %#9llx: END UNIT\n", UnitHdr.offStream)); + int rc = ssmR3StrmWrite(&pSSM->Strm, &UnitHdr, RT_UOFFSETOF(SSMFILEUNITHDRV2, szName[0])); + if (RT_FAILURE(rc)) + { + LogRel(("SSM: Failed writing the end unit: %Rrc\n", rc)); + return pSSM->rc = rc; + } + + /* + * Write the directory for the final units and then the footer. + */ + SSMFILEFTR Footer; + rc = ssmR3WriteDirectory(pVM, pSSM, &Footer.cDirEntries); + if (RT_FAILURE(rc)) + { + LogRel(("SSM: Failed writing the directory: %Rrc\n", rc)); + return pSSM->rc = rc; + } + + memcpy(Footer.szMagic, SSMFILEFTR_MAGIC, sizeof(Footer.szMagic)); + Footer.offStream = ssmR3StrmTell(&pSSM->Strm); + Footer.u32StreamCRC = ssmR3StrmFinalCRC(&pSSM->Strm); + Footer.u32Reserved = 0; + Footer.u32CRC = 0; + Footer.u32CRC = RTCrc32(&Footer, sizeof(Footer)); + Log(("SSM: Footer at %#9llx: \n", Footer.offStream)); + rc = ssmR3StrmWrite(&pSSM->Strm, &Footer, sizeof(Footer)); + if (RT_SUCCESS(rc)) + rc = ssmR3StrmSetEnd(&pSSM->Strm); + if (RT_FAILURE(rc)) + { + LogRel(("SSM: Failed writing the footer: %Rrc\n", rc)); + return pSSM->rc = rc; + } + + LogRel(("SSM: Footer at %#llx (%lld), %u directory entries.\n", + Footer.offStream, Footer.offStream, Footer.cDirEntries)); + return VINF_SUCCESS; +} + + +/** + * Works the progress calculation during the exec part of a live save. + * + * @param pSSM The SSM handle. + * @param iUnit The current unit number. + */ +static void ssmR3ProgressByUnit(PSSMHANDLE pSSM, uint32_t iUnit) +{ + if (pSSM->fLiveSave) + { + unsigned uPctExec = iUnit * 100 / pSSM->pVM->ssm.s.cUnits; + unsigned cPctExec = 100 - pSSM->uPercentDone - pSSM->uPercentPrepare - pSSM->uPercentLive; + long double lrdPct = (long double)uPctExec * cPctExec / 100 + pSSM->uPercentPrepare + pSSM->uPercentLive; + unsigned uPct = (unsigned)lrdPct; + if (uPct != pSSM->uPercent) + { + ssmR3LiveControlEmit(pSSM, lrdPct, SSM_PASS_FINAL); + pSSM->uPercent = uPct; + pSSM->pfnProgress(pSSM->pVM->pUVM, uPct, pSSM->pvUser); + } + } +} + + +/** + * Do the pfnSaveExec run. + * + * @returns VBox status code (pSSM->rc). + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + */ +static int ssmR3SaveDoExecRun(PVM pVM, PSSMHANDLE pSSM) +{ + VM_ASSERT_EMT0(pVM); + AssertRC(pSSM->rc); + pSSM->rc = VINF_SUCCESS; + pSSM->enmOp = SSMSTATE_SAVE_EXEC; + unsigned iUnit = 0; + for (PSSMUNIT pUnit = pVM->ssm.s.pHead; pUnit; pUnit = pUnit->pNext, iUnit++) + { + /* + * Not all unit have a callback. Skip those which don't and + * make sure to keep the progress indicator up to date. + */ + ssmR3ProgressByUnit(pSSM, iUnit); + pSSM->offEstUnitEnd += pUnit->cbGuess; + if (!pUnit->u.Common.pfnSaveExec) + { + pUnit->fCalled = true; + if (pUnit->cbGuess) + ssmR3ProgressByByte(pSSM, pSSM->offEstUnitEnd - pSSM->offEst); + continue; + } + pUnit->offStream = ssmR3StrmTell(&pSSM->Strm); + + /* + * Check for cancellation. + */ + if (RT_UNLIKELY(ASMAtomicUoReadU32(&(pSSM)->fCancelled) == SSMHANDLE_CANCELLED)) + { + LogRel(("SSM: Cancelled!\n")); + AssertRC(pSSM->rc); + return pSSM->rc = VERR_SSM_CANCELLED; + } + + /* + * Write data unit header + */ + SSMFILEUNITHDRV2 UnitHdr; + memcpy(&UnitHdr.szMagic[0], SSMFILEUNITHDR_MAGIC, sizeof(UnitHdr.szMagic)); + UnitHdr.offStream = pUnit->offStream; + UnitHdr.u32CurStreamCRC = ssmR3StrmCurCRC(&pSSM->Strm); + UnitHdr.u32CRC = 0; + UnitHdr.u32Version = pUnit->u32Version; + UnitHdr.u32Instance = pUnit->u32Instance; + UnitHdr.u32Pass = SSM_PASS_FINAL; + UnitHdr.fFlags = 0; + UnitHdr.cbName = (uint32_t)pUnit->cchName + 1; + memcpy(&UnitHdr.szName[0], &pUnit->szName[0], UnitHdr.cbName); + UnitHdr.u32CRC = RTCrc32(&UnitHdr, RT_UOFFSETOF_DYN(SSMFILEUNITHDRV2, szName[UnitHdr.cbName])); + Log(("SSM: Unit at %#9llx: '%s', instance %u, pass %#x, version %u\n", + UnitHdr.offStream, UnitHdr.szName, UnitHdr.u32Instance, UnitHdr.u32Pass, UnitHdr.u32Version)); + int rc = ssmR3StrmWrite(&pSSM->Strm, &UnitHdr, RT_UOFFSETOF_DYN(SSMFILEUNITHDRV2, szName[UnitHdr.cbName])); + if (RT_FAILURE(rc)) + { + LogRel(("SSM: Failed to write unit header. rc=%Rrc\n", rc)); + return pSSM->rc = rc; + } + + /* + * Call the execute handler. + */ + ssmR3DataWriteBegin(pSSM); + ssmR3UnitCritSectEnter(pUnit); + switch (pUnit->enmType) + { + case SSMUNITTYPE_DEV: + rc = pUnit->u.Dev.pfnSaveExec(pUnit->u.Dev.pDevIns, pSSM); + break; + case SSMUNITTYPE_DRV: + rc = pUnit->u.Drv.pfnSaveExec(pUnit->u.Drv.pDrvIns, pSSM); + break; + case SSMUNITTYPE_USB: + rc = pUnit->u.Usb.pfnSaveExec(pUnit->u.Usb.pUsbIns, pSSM); + break; + case SSMUNITTYPE_INTERNAL: + rc = pUnit->u.Internal.pfnSaveExec(pVM, pSSM); + break; + case SSMUNITTYPE_EXTERNAL: + pUnit->u.External.pfnSaveExec(pSSM, pUnit->u.External.pvUser); + rc = pSSM->rc; + break; + default: + rc = VERR_SSM_IPE_1; + break; + } + ssmR3UnitCritSectLeave(pUnit); + pUnit->fCalled = true; + if (RT_FAILURE(rc) && RT_SUCCESS_NP(pSSM->rc)) + pSSM->rc = rc; + else + rc = ssmR3DataFlushBuffer(pSSM); /* will return SSMHANDLE::rc if it is set */ + if (RT_FAILURE(rc)) + { + LogRel(("SSM: Execute save failed with rc=%Rrc for data unit '%s'/#%u.\n", rc, pUnit->szName, pUnit->u32Instance)); + return rc; + } + + /* + * Write the termination record and flush the compression stream. + */ + SSMRECTERM TermRec; + TermRec.u8TypeAndFlags = SSM_REC_FLAGS_FIXED | SSM_REC_FLAGS_IMPORTANT | SSM_REC_TYPE_TERM; + TermRec.cbRec = sizeof(TermRec) - 2; + if (pSSM->Strm.fChecksummed) + { + TermRec.fFlags = SSMRECTERM_FLAGS_CRC32; + TermRec.u32StreamCRC = RTCrc32Finish(RTCrc32Process(ssmR3StrmCurCRC(&pSSM->Strm), &TermRec, 2)); + } + else + { + TermRec.fFlags = 0; + TermRec.u32StreamCRC = 0; + } + TermRec.cbUnit = pSSM->offUnit + sizeof(TermRec); + rc = ssmR3DataWriteRaw(pSSM, &TermRec, sizeof(TermRec)); + if (RT_SUCCESS(rc)) + rc = ssmR3DataWriteFinish(pSSM); + if (RT_FAILURE(rc)) + { + LogRel(("SSM: Failed terminating unit: %Rrc\n", rc)); + return pSSM->rc = rc; + } + + /* + * Advance the progress indicator to the end of the current unit. + */ + ssmR3ProgressByByte(pSSM, pSSM->offEstUnitEnd - pSSM->offEst); + } /* for each unit */ + ssmR3ProgressByUnit(pSSM, pVM->ssm.s.cUnits); + + /* (progress should be pending 99% now) */ + AssertMsg( pSSM->uPercent == 101 - pSSM->uPercentDone + || pSSM->uPercent == 100 - pSSM->uPercentDone, + ("%d\n", pSSM->uPercent)); + return VINF_SUCCESS; +} + + +/** + * Do the pfnSavePrep run. + * + * @returns VBox status code (pSSM->rc). + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + */ +static int ssmR3SaveDoPrepRun(PVM pVM, PSSMHANDLE pSSM) +{ + VM_ASSERT_EMT0(pVM); + Assert(RT_SUCCESS(pSSM->rc)); + pSSM->enmOp = SSMSTATE_SAVE_PREP; + for (PSSMUNIT pUnit = pVM->ssm.s.pHead; pUnit; pUnit = pUnit->pNext) + { + if (pUnit->u.Common.pfnSavePrep) + { + int rc; + ssmR3UnitCritSectEnter(pUnit); + switch (pUnit->enmType) + { + case SSMUNITTYPE_DEV: + rc = pUnit->u.Dev.pfnSavePrep(pUnit->u.Dev.pDevIns, pSSM); + break; + case SSMUNITTYPE_DRV: + rc = pUnit->u.Drv.pfnSavePrep(pUnit->u.Drv.pDrvIns, pSSM); + break; + case SSMUNITTYPE_USB: + rc = pUnit->u.Usb.pfnSavePrep(pUnit->u.Usb.pUsbIns, pSSM); + break; + case SSMUNITTYPE_INTERNAL: + rc = pUnit->u.Internal.pfnSavePrep(pVM, pSSM); + break; + case SSMUNITTYPE_EXTERNAL: + rc = pUnit->u.External.pfnSavePrep(pSSM, pUnit->u.External.pvUser); + break; + default: + rc = VERR_SSM_IPE_1; + break; + } + ssmR3UnitCritSectLeave(pUnit); + pUnit->fCalled = true; + if (RT_FAILURE(rc) && RT_SUCCESS_NP(pSSM->rc)) + pSSM->rc = rc; + else + rc = pSSM->rc; + if (RT_FAILURE(rc)) + { + LogRel(("SSM: Prepare save failed with rc=%Rrc for data unit '%s.\n", rc, pUnit->szName)); + return rc; + } + } + + pSSM->cbEstTotal += pUnit->cbGuess; + } + + /* + * Work the progress indicator if we got one. + */ + if (pSSM->pfnProgress) + pSSM->pfnProgress(pVM->pUVM, pSSM->uPercentPrepare + pSSM->uPercentLive - 1, pSSM->pvUser); + pSSM->uPercent = pSSM->uPercentPrepare + pSSM->uPercentLive; + + return VINF_SUCCESS; +} + + +/** + * Common worker for SSMR3Save and SSMR3LiveSave. + * + * @returns VBox status code (no need to check pSSM->rc). + * @param pVM The cross context VM structure. + * @param pSSM The state handle. + * + * @thread EMT(0) + */ +static int ssmR3SaveDoCommon(PVM pVM, PSSMHANDLE pSSM) +{ + VM_ASSERT_EMT0(pVM); + + /* + * Do the work. + */ + int rc = ssmR3SaveDoPrepRun(pVM, pSSM); + if (RT_SUCCESS(rc)) + { + rc = ssmR3SaveDoExecRun(pVM, pSSM); + if (RT_SUCCESS(rc)) + rc = ssmR3SaveDoFinalization(pVM, pSSM); + } + Assert(pSSM->rc == rc); + int rc2 = ssmR3SaveDoDoneRun(pVM, pSSM); + if (RT_SUCCESS(rc)) + rc = rc2; + + return rc; +} + + +/** + * Saves the rest of the state on EMT0. + * + * @returns VBox status code. + * + * @param pSSM The SSM handle returned by SSMR3LiveSave. + * + * @thread Non-EMT thread. Will involve the EMT at the end of the operation. + */ +VMMR3_INT_DECL(int) SSMR3LiveDoStep2(PSSMHANDLE pSSM) +{ + LogFlow(("SSMR3LiveDoStep2: pSSM=%p\n", pSSM)); + + /* + * Validate input. + */ + AssertPtrReturn(pSSM, VERR_INVALID_POINTER); + PVM pVM = pSSM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_EMT0(pVM); + AssertMsgReturn( pSSM->enmAfter == SSMAFTER_DESTROY + || pSSM->enmAfter == SSMAFTER_CONTINUE + || pSSM->enmAfter == SSMAFTER_TELEPORT, + ("%d\n", pSSM->enmAfter), + VERR_INVALID_PARAMETER); + AssertMsgReturn(pSSM->enmOp == SSMSTATE_LIVE_STEP2, ("%d\n", pSSM->enmOp), VERR_INVALID_STATE); + AssertRCReturn(pSSM->rc, pSSM->rc); + + /* + * Join paths with VMMR3Save. + */ + return ssmR3SaveDoCommon(pVM, pSSM); +} + + +/** + * Writes the file header and clear the per-unit data. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM The SSM handle. + */ +static int ssmR3WriteHeaderAndClearPerUnitData(PVM pVM, PSSMHANDLE pSSM) +{ + /* + * Write the header. + */ + SSMFILEHDR FileHdr; + memcpy(&FileHdr.szMagic, SSMFILEHDR_MAGIC_V2_0, sizeof(FileHdr.szMagic)); + FileHdr.u16VerMajor = VBOX_VERSION_MAJOR; + FileHdr.u16VerMinor = VBOX_VERSION_MINOR; + FileHdr.u32VerBuild = VBOX_VERSION_BUILD; + FileHdr.u32SvnRev = VMMGetSvnRev(); + FileHdr.cHostBits = HC_ARCH_BITS; + FileHdr.cbGCPhys = sizeof(RTGCPHYS); + FileHdr.cbGCPtr = sizeof(RTGCPTR); + FileHdr.u8Reserved = 0; + FileHdr.cUnits = pVM->ssm.s.cUnits; + FileHdr.fFlags = SSMFILEHDR_FLAGS_STREAM_CRC32; + if (pSSM->fLiveSave) + FileHdr.fFlags |= SSMFILEHDR_FLAGS_STREAM_LIVE_SAVE; + FileHdr.cbMaxDecompr = RT_SIZEOFMEMB(SSMHANDLE, u.Read.abDataBuffer); + FileHdr.u32CRC = 0; + FileHdr.u32CRC = RTCrc32(&FileHdr, sizeof(FileHdr)); + int rc = ssmR3StrmWrite(&pSSM->Strm, &FileHdr, sizeof(FileHdr)); + if (RT_FAILURE(rc)) + return rc; + + /* + * Clear the per unit flags and offsets. + */ + for (PSSMUNIT pUnit = pVM->ssm.s.pHead; pUnit; pUnit = pUnit->pNext) + { + pUnit->fCalled = false; + pUnit->offStream = RTFOFF_MIN; + } + + return VINF_SUCCESS; +} + + +/** + * Creates a new saved state file. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pszFilename The name of the file. NULL if pStreamOps is + * used. + * @param pStreamOps The stream methods. NULL if pszFilename is + * used. + * @param pvStreamOpsUser The user argument to the stream methods. + * @param enmAfter What to do afterwards. + * @param pfnProgress The progress callback. + * @param pvProgressUser The progress callback user argument. + * @param ppSSM Where to return the pointer to the saved state + * handle upon successful return. Free it using + * RTMemFree after closing the stream. + */ +static int ssmR3SaveDoCreateFile(PVM pVM, const char *pszFilename, PCSSMSTRMOPS pStreamOps, void *pvStreamOpsUser, + SSMAFTER enmAfter, PFNVMPROGRESS pfnProgress, void *pvProgressUser, PSSMHANDLE *ppSSM) +{ + PSSMHANDLE pSSM = (PSSMHANDLE)RTMemAllocZ(sizeof(*pSSM)); + if (!pSSM) + return VERR_NO_MEMORY; + + pSSM->pVM = pVM; + pSSM->enmOp = SSMSTATE_INVALID; + pSSM->enmAfter = enmAfter; + pSSM->fCancelled = SSMHANDLE_OK; + pSSM->rc = VINF_SUCCESS; + pSSM->cbUnitLeftV1 = 0; + pSSM->offUnit = UINT64_MAX; + pSSM->offUnitUser = UINT64_MAX; + pSSM->fLiveSave = false; + pSSM->pfnProgress = pfnProgress; + pSSM->pvUser = pvProgressUser; + pSSM->uPercent = 0; + pSSM->offEstProgress = 0; + pSSM->cbEstTotal = 0; + pSSM->offEst = 0; + pSSM->offEstUnitEnd = 0; + pSSM->uPercentLive = 0; + pSSM->uPercentPrepare = 0; + pSSM->uPercentDone = 0; + pSSM->uReportedLivePercent = 0; + pSSM->pszFilename = pszFilename; + pSSM->u.Write.offDataBuffer = 0; + pSSM->u.Write.cMsMaxDowntime = UINT32_MAX; + + int rc; + if (pStreamOps) + rc = ssmR3StrmInit(&pSSM->Strm, pStreamOps, pvStreamOpsUser, true /*fWrite*/, true /*fChecksummed*/, 8 /*cBuffers*/); + else + rc = ssmR3StrmOpenFile(&pSSM->Strm, pszFilename, true /*fWrite*/, true /*fChecksummed*/, 8 /*cBuffers*/); + if (RT_FAILURE(rc)) + { + LogRel(("SSM: Failed to create save state file '%s', rc=%Rrc.\n", pszFilename, rc)); + RTMemFree(pSSM); + return rc; + } + + *ppSSM = pSSM; + return VINF_SUCCESS; +} + + +/** + * Start VM save operation. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pszFilename Name of the file to save the state in. NULL if pStreamOps is used. + * @param pStreamOps The stream method table. NULL if pszFilename is + * used. + * @param pvStreamOpsUser The user argument to the stream methods. + * @param enmAfter What is planned after a successful save operation. + * @param pfnProgress Progress callback. Optional. + * @param pvUser User argument for the progress callback. + * + * @thread EMT + */ +VMMR3DECL(int) SSMR3Save(PVM pVM, const char *pszFilename, PCSSMSTRMOPS pStreamOps, void *pvStreamOpsUser, + SSMAFTER enmAfter, PFNVMPROGRESS pfnProgress, void *pvUser) +{ + LogFlow(("SSMR3Save: pszFilename=%p:{%s} enmAfter=%d pfnProgress=%p pvUser=%p\n", pszFilename, pszFilename, enmAfter, pfnProgress, pvUser)); + VM_ASSERT_EMT0(pVM); + + /* + * Validate input. + */ + AssertMsgReturn( enmAfter == SSMAFTER_DESTROY + || enmAfter == SSMAFTER_CONTINUE, + ("%d\n", enmAfter), + VERR_INVALID_PARAMETER); + + AssertReturn(!pszFilename != !pStreamOps, VERR_INVALID_PARAMETER); + if (pStreamOps) + { + AssertReturn(pStreamOps->u32Version == SSMSTRMOPS_VERSION, VERR_INVALID_MAGIC); + AssertReturn(pStreamOps->u32EndVersion == SSMSTRMOPS_VERSION, VERR_INVALID_MAGIC); + AssertReturn(pStreamOps->pfnWrite, VERR_INVALID_PARAMETER); + AssertReturn(pStreamOps->pfnRead, VERR_INVALID_PARAMETER); + AssertReturn(pStreamOps->pfnSeek, VERR_INVALID_PARAMETER); + AssertReturn(pStreamOps->pfnTell, VERR_INVALID_PARAMETER); + AssertReturn(pStreamOps->pfnSize, VERR_INVALID_PARAMETER); + AssertReturn(pStreamOps->pfnClose, VERR_INVALID_PARAMETER); + } + + /* + * Create the saved state file and handle. + * + * Note that there might be quite some work to do after executing the saving, + * so we reserve 20% for the 'Done' period. + */ + PSSMHANDLE pSSM; + int rc = ssmR3SaveDoCreateFile(pVM, pszFilename, pStreamOps, pvStreamOpsUser, + enmAfter, pfnProgress, pvUser, &pSSM); + if (RT_FAILURE(rc)) + return rc; + pSSM->uPercentLive = 0; + pSSM->uPercentPrepare = 20; + pSSM->uPercentDone = 2; + pSSM->fLiveSave = false; + + /* + * Write the saved state stream header and join paths with + * the other save methods for the rest of the job. + */ + Log(("SSM: Starting state save to file '%s'...\n", pszFilename)); + ssmR3StrmStartIoThread(&pSSM->Strm); + rc = ssmR3WriteHeaderAndClearPerUnitData(pVM, pSSM); + if (RT_SUCCESS(rc)) + { + ssmR3SetCancellable(pVM, pSSM, true); + ssmR3SaveDoCommon(pVM, pSSM); + } + + return ssmR3SaveDoClose(pVM, pSSM); +} + + +/** + * Used by PGM to report the completion percentage of the live stage during the + * vote run. + * + * @param pSSM The saved state handle. + * @param uPercent The completion percentage. + */ +VMMR3DECL(void) SSMR3HandleReportLivePercent(PSSMHANDLE pSSM, unsigned uPercent) +{ + AssertMsgReturnVoid(pSSM->enmOp == SSMSTATE_LIVE_VOTE, ("%d\n", pSSM->enmOp)); + AssertReturnVoid(uPercent <= 100); + if (uPercent < pSSM->uReportedLivePercent) + pSSM->uReportedLivePercent = uPercent; +} + + +/** + * Calls pfnLiveVote for all units. + * + * @returns VBox status code (no need to check pSSM->rc). + * @retval VINF_SUCCESS if we can pass on to step 2. + * @retval VINF_SSM_VOTE_FOR_ANOTHER_PASS if we need another pass. + * + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + * @param uPass The current pass. + */ +static int ssmR3LiveDoVoteRun(PVM pVM, PSSMHANDLE pSSM, uint32_t uPass) +{ + int rcRet = VINF_SUCCESS; + AssertRC(pSSM->rc); + pSSM->rc = VINF_SUCCESS; + pSSM->enmOp = SSMSTATE_LIVE_VOTE; + + unsigned uPrevPrecent = pSSM->uReportedLivePercent; + pSSM->uReportedLivePercent = 101; + + for (PSSMUNIT pUnit = pVM->ssm.s.pHead; pUnit; pUnit = pUnit->pNext) + { + if ( pUnit->u.Common.pfnLiveVote + && !pUnit->fDoneLive) + { + int rc; + ssmR3UnitCritSectEnter(pUnit); + switch (pUnit->enmType) + { + case SSMUNITTYPE_DEV: + rc = pUnit->u.Dev.pfnLiveVote(pUnit->u.Dev.pDevIns, pSSM, uPass); + break; + case SSMUNITTYPE_DRV: + rc = pUnit->u.Drv.pfnLiveVote(pUnit->u.Drv.pDrvIns, pSSM, uPass); + break; + case SSMUNITTYPE_USB: + rc = pUnit->u.Usb.pfnLiveVote(pUnit->u.Usb.pUsbIns, pSSM, uPass); + break; + case SSMUNITTYPE_INTERNAL: + rc = pUnit->u.Internal.pfnLiveVote(pVM, pSSM, uPass); + break; + case SSMUNITTYPE_EXTERNAL: + rc = pUnit->u.External.pfnLiveVote(pSSM, pUnit->u.External.pvUser, uPass); + break; + default: + rc = VERR_SSM_IPE_1; + break; + } + ssmR3UnitCritSectLeave(pUnit); + pUnit->fCalled = true; + Assert(pSSM->rc == VINF_SUCCESS); + if (rc != VINF_SUCCESS) + { + if (rc == VINF_SSM_VOTE_FOR_ANOTHER_PASS) + { + Log(("ssmR3DoLiveVoteRun: '%s'/#%u -> VINF_SSM_VOTE_FOR_ANOTHER_PASS (pass=%u)\n", pUnit->szName, pUnit->u32Instance, uPass)); + rcRet = VINF_SSM_VOTE_FOR_ANOTHER_PASS; + } + else if (rc == VINF_SSM_VOTE_DONE_DONT_CALL_AGAIN) + { + pUnit->fDoneLive = true; + Log(("ssmR3DoLiveVoteRun: '%s'/#%u -> VINF_SSM_VOTE_DONE_DONT_CALL_AGAIN (pass=%u)\n", pUnit->szName, pUnit->u32Instance, uPass)); + } + else + { + /* + * rc is usually VERR_SSM_VOTE_FOR_GIVING_UP here, but we allow + * other status codes for better user feed back. However, no + * other non-error status is allowed. + */ + LogRel(("SSM: Error - '%s'/#%u voted %Rrc! (pass=%u)\n", pUnit->szName, pUnit->u32Instance, rc, uPass)); + AssertMsgReturn(RT_FAILURE(rc), ("%Rrc; '%s'\n", rc, pUnit->szName), pSSM->rc = VERR_IPE_UNEXPECTED_INFO_STATUS); + return pSSM->rc = rc; + } + } + } + } + if (rcRet == VINF_SUCCESS) + { + LogRel(("SSM: Step 1 completed after pass %u.\n", uPass)); + pSSM->uReportedLivePercent = 100; + } + else + { + /* + * Work the progress callback. + */ + if (pSSM->uReportedLivePercent > 100) + pSSM->uReportedLivePercent = 0; + if ( pSSM->uReportedLivePercent != uPrevPrecent + && pSSM->pfnProgress + && pSSM->uPercentLive) + { + long double lrdPct = (long double)pSSM->uReportedLivePercent * pSSM->uPercentLive / 100; + unsigned uPct = (unsigned)lrdPct; + if (uPct != pSSM->uPercent) + { + ssmR3LiveControlEmit(pSSM, lrdPct, uPass); + pSSM->uPercent = uPct; + pSSM->pfnProgress(pVM->pUVM, uPct, pSSM->pvUser); + } + } + } + return rcRet; +} + + +/** + * Calls pfnLiveExec for all units. + * + * @returns VBox status code (no need to check pSSM->rc). + * + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + * @param uPass The current pass. + */ +static int ssmR3LiveDoExecRun(PVM pVM, PSSMHANDLE pSSM, uint32_t uPass) +{ + AssertRC(pSSM->rc); + pSSM->rc = VINF_SUCCESS; + pSSM->enmOp = SSMSTATE_LIVE_EXEC; + for (PSSMUNIT pUnit = pVM->ssm.s.pHead; pUnit; pUnit = pUnit->pNext) + { + /* + * Skip units without a callback (this is most). + */ + if ( !pUnit->u.Common.pfnLiveExec + || pUnit->fDoneLive) + continue; + pUnit->offStream = ssmR3StrmTell(&pSSM->Strm); + + /* + * Check for cancellation. + */ + if (RT_UNLIKELY(ASMAtomicUoReadU32(&(pSSM)->fCancelled) == SSMHANDLE_CANCELLED)) + { + LogRel(("SSM: Cancelled!\n")); + AssertRC(pSSM->rc); + return pSSM->rc = VERR_SSM_CANCELLED; + } + + /* + * Write data unit header. + */ + SSMFILEUNITHDRV2 UnitHdr; + memcpy(&UnitHdr.szMagic[0], SSMFILEUNITHDR_MAGIC, sizeof(UnitHdr.szMagic)); + UnitHdr.offStream = pUnit->offStream; + UnitHdr.u32CurStreamCRC = ssmR3StrmCurCRC(&pSSM->Strm); + UnitHdr.u32CRC = 0; + UnitHdr.u32Version = pUnit->u32Version; + UnitHdr.u32Instance = pUnit->u32Instance; + UnitHdr.u32Pass = uPass; + UnitHdr.fFlags = 0; + UnitHdr.cbName = (uint32_t)pUnit->cchName + 1; + memcpy(&UnitHdr.szName[0], &pUnit->szName[0], UnitHdr.cbName); + UnitHdr.u32CRC = RTCrc32(&UnitHdr, RT_UOFFSETOF_DYN(SSMFILEUNITHDRV2, szName[UnitHdr.cbName])); + Log(("SSM: Unit at %#9llx: '%s', instance %u, pass %#x, version %u\n", + UnitHdr.offStream, UnitHdr.szName, UnitHdr.u32Instance, UnitHdr.u32Pass, UnitHdr.u32Version)); + int rc = ssmR3StrmWrite(&pSSM->Strm, &UnitHdr, RT_UOFFSETOF_DYN(SSMFILEUNITHDRV2, szName[UnitHdr.cbName])); + if (RT_FAILURE(rc)) + { + LogRel(("SSM: Failed to write unit header. rc=%Rrc\n", rc)); + return pSSM->rc = rc; + } + + /* + * Call the execute handler. + */ + ssmR3DataWriteBegin(pSSM); + ssmR3UnitCritSectEnter(pUnit); + switch (pUnit->enmType) + { + case SSMUNITTYPE_DEV: + rc = pUnit->u.Dev.pfnLiveExec(pUnit->u.Dev.pDevIns, pSSM, uPass); + break; + case SSMUNITTYPE_DRV: + rc = pUnit->u.Drv.pfnLiveExec(pUnit->u.Drv.pDrvIns, pSSM, uPass); + break; + case SSMUNITTYPE_USB: + rc = pUnit->u.Usb.pfnLiveExec(pUnit->u.Usb.pUsbIns, pSSM, uPass); + break; + case SSMUNITTYPE_INTERNAL: + rc = pUnit->u.Internal.pfnLiveExec(pVM, pSSM, uPass); + break; + case SSMUNITTYPE_EXTERNAL: + rc = pUnit->u.External.pfnLiveExec(pSSM, pUnit->u.External.pvUser, uPass); + break; + default: + rc = VERR_SSM_IPE_1; + break; + } + ssmR3UnitCritSectLeave(pUnit); + pUnit->fCalled = true; + if (RT_FAILURE(rc) && RT_SUCCESS_NP(pSSM->rc)) + pSSM->rc = rc; + else + { + if (rc == VINF_SSM_DONT_CALL_AGAIN) + pUnit->fDoneLive = true; + rc = ssmR3DataFlushBuffer(pSSM); /* will return SSMHANDLE::rc if it is set */ + } + if (RT_FAILURE(rc)) + { + LogRel(("SSM: Execute save failed with rc=%Rrc for data unit '%s'/#%u.\n", rc, pUnit->szName, pUnit->u32Instance)); + if (RT_SUCCESS(pSSM->rc)) + pSSM->rc = rc; + return rc; + } + + /* + * Write the termination record and flush the compression stream. + */ + SSMRECTERM TermRec; + TermRec.u8TypeAndFlags = SSM_REC_FLAGS_FIXED | SSM_REC_FLAGS_IMPORTANT | SSM_REC_TYPE_TERM; + TermRec.cbRec = sizeof(TermRec) - 2; + if (pSSM->Strm.fChecksummed) + { + TermRec.fFlags = SSMRECTERM_FLAGS_CRC32; + TermRec.u32StreamCRC = RTCrc32Finish(RTCrc32Process(ssmR3StrmCurCRC(&pSSM->Strm), &TermRec, 2)); + } + else + { + TermRec.fFlags = 0; + TermRec.u32StreamCRC = 0; + } + TermRec.cbUnit = pSSM->offUnit + sizeof(TermRec); + rc = ssmR3DataWriteRaw(pSSM, &TermRec, sizeof(TermRec)); + if (RT_SUCCESS(rc)) + rc = ssmR3DataWriteFinish(pSSM); + if (RT_FAILURE(rc)) + { + LogRel(("SSM: Failed terminating unit: %Rrc (pass=%u)\n", rc, uPass)); + return pSSM->rc = rc; + } + } /* for each unit */ + + return VINF_SUCCESS; +} + + +/** + * Implements the live exec+vote loop. + * + * @returns VBox status code (no need to check pSSM->rc). + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + */ +static int ssmR3DoLiveExecVoteLoop(PVM pVM, PSSMHANDLE pSSM) +{ + /* + * Calc the max saved state size before we should give up because of insane + * amounts of data. + */ +#define SSM_MAX_GROWTH_FILE 10000 +#define SSM_MAX_GROWTH_REMOTE 100000 + uint64_t cbSum = 0; + for (PSSMUNIT pUnit = pVM->ssm.s.pHead; pUnit; pUnit = pUnit->pNext) + cbSum += pUnit->cbGuess; + uint64_t cbMax = cbSum * (pSSM->pszFilename ? SSM_MAX_GROWTH_FILE : SSM_MAX_GROWTH_REMOTE); + AssertLogRelMsgReturn(cbMax > cbSum, ("cbMax=%#RX64, cbSum=%#RX64\n", cbMax, cbSum), pSSM->rc = VERR_OUT_OF_RANGE); + if (cbMax < _1G) + cbMax = _1G; + + /* + * The pass loop. + * + * The number of iterations is restricted for two reasons, first + * to make sure + */ +#define SSM_MAX_PASSES _1M + for (uint32_t uPass = 0; uPass < SSM_MAX_PASSES; uPass++) + { + pVM->ssm.s.uPass = uPass; + + /* + * Save state and vote on whether we need more passes or not. + */ + int rc = ssmR3LiveDoExecRun(pVM, pSSM, uPass); + if (RT_FAILURE(rc)) + return rc; + rc = ssmR3LiveDoVoteRun(pVM, pSSM, uPass); + if (rc == VINF_SUCCESS) + { + pSSM->enmOp = SSMSTATE_LIVE_STEP2; + return VINF_SUCCESS; + } + if (RT_FAILURE(rc)) + return rc; + + /* + * Check that we're still within sane data amounts. + */ + uint64_t cbSaved = ssmR3StrmTell(&pSSM->Strm); + if (cbSaved > cbMax) + { + LogRel(("SSM: Giving up: Exceeded max state size. (cbSaved=%#RX64, cbMax=%#RX64)\n", cbSaved, cbMax)); + return pSSM->rc = VERR_SSM_STATE_GREW_TOO_BIG; + } + + /* + * Check that the stream is still OK. + */ + rc = ssmR3StrmCheckAndFlush(&pSSM->Strm); + if (RT_FAILURE(rc)) + return pSSM->rc = rc; + } + + LogRel(("SSM: Giving up: Too many passes! (%u)\n", SSM_MAX_PASSES)); + return pSSM->rc = VERR_SSM_TOO_MANY_PASSES; +} + + +/** + * Calls pfnLivePrep for all units. + * + * @returns VBox status code (no need to check pSSM->rc). + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + */ +static int ssmR3DoLivePrepRun(PVM pVM, PSSMHANDLE pSSM) +{ + /* + * Do the prepare run. + */ + pSSM->rc = VINF_SUCCESS; + pSSM->enmOp = SSMSTATE_SAVE_PREP; + for (PSSMUNIT pUnit = pVM->ssm.s.pHead; pUnit; pUnit = pUnit->pNext) + { + if (pUnit->u.Common.pfnLivePrep) + { + int rc; + ssmR3UnitCritSectEnter(pUnit); + switch (pUnit->enmType) + { + case SSMUNITTYPE_DEV: + rc = pUnit->u.Dev.pfnLivePrep(pUnit->u.Dev.pDevIns, pSSM); + break; + case SSMUNITTYPE_DRV: + rc = pUnit->u.Drv.pfnLivePrep(pUnit->u.Drv.pDrvIns, pSSM); + break; + case SSMUNITTYPE_USB: + rc = pUnit->u.Usb.pfnLivePrep(pUnit->u.Usb.pUsbIns, pSSM); + break; + case SSMUNITTYPE_INTERNAL: + rc = pUnit->u.Internal.pfnLivePrep(pVM, pSSM); + break; + case SSMUNITTYPE_EXTERNAL: + rc = pUnit->u.External.pfnLivePrep(pSSM, pUnit->u.External.pvUser); + break; + default: + rc = VERR_SSM_IPE_1; + break; + } + ssmR3UnitCritSectLeave(pUnit); + pUnit->fCalled = true; + if (RT_FAILURE(rc) && RT_SUCCESS_NP(pSSM->rc)) + pSSM->rc = rc; + else + rc = pSSM->rc; + if (RT_FAILURE(rc)) + { + LogRel(("SSM: Prepare save failed with rc=%Rrc for data unit '%s.\n", rc, pUnit->szName)); + return rc; + } + } + + pSSM->cbEstTotal += pUnit->cbGuess; + } + + /* + * Work the progress indicator if we got one. + */ + if (pSSM->pfnProgress) + pSSM->pfnProgress(pVM->pUVM, 2, pSSM->pvUser); + pSSM->uPercent = 2; + + return VINF_SUCCESS; +} + + +/** + * Continue a live state saving operation on the worker thread. + * + * @returns VBox status code. + * + * @param pSSM The SSM handle returned by SSMR3LiveSave. + * + * @thread Non-EMT thread. Will involve the EMT at the end of the operation. + */ +VMMR3_INT_DECL(int) SSMR3LiveDoStep1(PSSMHANDLE pSSM) +{ + LogFlow(("SSMR3LiveDoStep1: pSSM=%p\n", pSSM)); + + /* + * Validate input. + */ + AssertPtrReturn(pSSM, VERR_INVALID_POINTER); + PVM pVM = pSSM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_OTHER_THREAD(pVM); + AssertMsgReturn( pSSM->enmAfter == SSMAFTER_DESTROY + || pSSM->enmAfter == SSMAFTER_CONTINUE + || pSSM->enmAfter == SSMAFTER_TELEPORT, + ("%d\n", pSSM->enmAfter), + VERR_INVALID_PARAMETER); + AssertMsgReturn(pSSM->enmOp == SSMSTATE_LIVE_STEP1, ("%d\n", pSSM->enmOp), VERR_INVALID_STATE); + AssertRCReturn(pSSM->rc, pSSM->rc); + + /* + * Do the prep run, then the exec+vote cycle. + */ + int rc = ssmR3DoLivePrepRun(pVM, pSSM); + if (RT_SUCCESS(rc)) + rc = ssmR3DoLiveExecVoteLoop(pVM, pSSM); + return rc; +} + + +/** + * Start saving the live state. + * + * Call SSMR3LiveDoStep1, SSMR3LiveDoStep2 and finally SSMR3LiveDone on success. + * SSMR3LiveDone should be called even if SSMR3LiveDoStep1 or SSMR3LiveDoStep2 + * fails. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param cMsMaxDowntime The maximum downtime given as milliseconds. + * @param pszFilename Name of the file to save the state in. This string + * must remain valid until SSMR3LiveDone is called. + * Must be NULL if pStreamOps is used. + * @param pStreamOps The stream method table. NULL if pszFilename is + * used. + * @param pvStreamOpsUser The user argument to the stream methods. + * @param enmAfter What is planned after a successful save operation. + * @param pfnProgress Progress callback. Optional. + * @param pvProgressUser User argument for the progress callback. + * @param ppSSM Where to return the saved state handle on success. + * + * @thread EMT0 + */ +VMMR3_INT_DECL(int) SSMR3LiveSave(PVM pVM, uint32_t cMsMaxDowntime, + const char *pszFilename, PCSSMSTRMOPS pStreamOps, void *pvStreamOpsUser, + SSMAFTER enmAfter, PFNVMPROGRESS pfnProgress, void *pvProgressUser, + PSSMHANDLE *ppSSM) +{ + LogFlow(("SSMR3LiveSave: cMsMaxDowntime=%u pszFilename=%p:{%s} pStreamOps=%p pvStreamOpsUser=%p enmAfter=%d pfnProgress=%p pvProgressUser=%p\n", + cMsMaxDowntime, pszFilename, pszFilename, pStreamOps, pvStreamOpsUser, enmAfter, pfnProgress, pvProgressUser)); + VM_ASSERT_EMT0(pVM); + + /* + * Validate input. + */ + AssertMsgReturn( enmAfter == SSMAFTER_DESTROY + || enmAfter == SSMAFTER_CONTINUE + || enmAfter == SSMAFTER_TELEPORT, + ("%d\n", enmAfter), + VERR_INVALID_PARAMETER); + AssertReturn(!pszFilename != !pStreamOps, VERR_INVALID_PARAMETER); + if (pStreamOps) + { + AssertReturn(pStreamOps->u32Version == SSMSTRMOPS_VERSION, VERR_INVALID_MAGIC); + AssertReturn(pStreamOps->u32EndVersion == SSMSTRMOPS_VERSION, VERR_INVALID_MAGIC); + AssertReturn(pStreamOps->pfnWrite, VERR_INVALID_PARAMETER); + AssertReturn(pStreamOps->pfnRead, VERR_INVALID_PARAMETER); + AssertReturn(pStreamOps->pfnSeek, VERR_INVALID_PARAMETER); + AssertReturn(pStreamOps->pfnTell, VERR_INVALID_PARAMETER); + AssertReturn(pStreamOps->pfnSize, VERR_INVALID_PARAMETER); + AssertReturn(pStreamOps->pfnClose, VERR_INVALID_PARAMETER); + } + + /* + * Create the saved state file and handle. + * + * Note that there might be quite some work to do after executing the saving, + * so we reserve 20% for the 'Done' period. + */ + PSSMHANDLE pSSM; + int rc = ssmR3SaveDoCreateFile(pVM, pszFilename, pStreamOps, pvStreamOpsUser, + enmAfter, pfnProgress, pvProgressUser, &pSSM); + if (RT_FAILURE(rc)) + return rc; + pSSM->uPercentLive = 93; + pSSM->uPercentPrepare = 2; + pSSM->uPercentDone = 2; + pSSM->fLiveSave = true; + pSSM->u.Write.cMsMaxDowntime = cMsMaxDowntime; + + /* + * Write the saved state stream header and do the prep run for live saving. + */ + Log(("SSM: Starting state save to file '%s'...\n", pszFilename)); + ssmR3StrmStartIoThread(&pSSM->Strm); + rc = ssmR3WriteHeaderAndClearPerUnitData(pVM, pSSM); + if (RT_SUCCESS(rc)) + { + /* + * Return and let the requestor thread do the pfnLiveExec/Vote part + * via SSMR3SaveFinishLive + */ + pSSM->enmOp = SSMSTATE_LIVE_STEP1; + ssmR3SetCancellable(pVM, pSSM, true); + *ppSSM = pSSM; + return VINF_SUCCESS; + } + /* bail out. */ + int rc2 = ssmR3StrmClose(&pSSM->Strm, pSSM->rc == VERR_SSM_CANCELLED); + RTMemFree(pSSM); + rc2 = RTFileDelete(pszFilename); + AssertRC(rc2); + return rc; +} + +#endif /* !SSM_STANDALONE */ + + +/* ... Loading and reading starts here ... */ +/* ... Loading and reading starts here ... */ +/* ... Loading and reading starts here ... */ +/* ... Loading and reading starts here ... */ +/* ... Loading and reading starts here ... */ +/* ... Loading and reading starts here ... */ +/* ... Loading and reading starts here ... */ +/* ... Loading and reading starts here ... */ +/* ... Loading and reading starts here ... */ +/* ... Loading and reading starts here ... */ +/* ... Loading and reading starts here ... */ +/* ... Loading and reading starts here ... */ +/* ... Loading and reading starts here ... */ +/* ... Loading and reading starts here ... */ +/* ... Loading and reading starts here ... */ +/* ... Loading and reading starts here ... */ +/* ... Loading and reading starts here ... */ + + +#ifndef SSM_STANDALONE +/** + * Closes the decompressor of a data unit. + * + * @returns pSSM->rc. + * @param pSSM The saved state handle. + */ +static int ssmR3DataReadFinishV1(PSSMHANDLE pSSM) +{ + if (pSSM->u.Read.pZipDecompV1) + { + int rc = RTZipDecompDestroy(pSSM->u.Read.pZipDecompV1); + AssertRC(rc); + pSSM->u.Read.pZipDecompV1 = NULL; + } + return pSSM->rc; +} +#endif /* !SSM_STANDALONE */ + + +/** + * Callback for reading compressed data into the input buffer of the + * decompressor, for saved file format version 1. + * + * @returns VBox status code. Set pSSM->rc on error. + * @param pvSSM The SSM handle. + * @param pvBuf Where to store the compressed data. + * @param cbBuf Size of the buffer. + * @param pcbRead Number of bytes actually stored in the buffer. + */ +static DECLCALLBACK(int) ssmR3ReadInV1(void *pvSSM, void *pvBuf, size_t cbBuf, size_t *pcbRead) +{ + PSSMHANDLE pSSM = (PSSMHANDLE)pvSSM; + size_t cbRead = cbBuf; + if (pSSM->cbUnitLeftV1 < cbBuf) + cbRead = (size_t)pSSM->cbUnitLeftV1; + if (cbRead) + { + //Log2(("ssmR3ReadInV1: %#010llx cbBug=%#x cbRead=%#x\n", ssmR3StrmTell(&pSSM->Strm), cbBuf, cbRead)); + int rc = ssmR3StrmRead(&pSSM->Strm, pvBuf, cbRead); + if (RT_SUCCESS(rc)) + { + pSSM->cbUnitLeftV1 -= cbRead; + if (pcbRead) + *pcbRead = cbRead; + ssmR3ProgressByByte(pSSM, cbRead); + return VINF_SUCCESS; + } + return pSSM->rc = rc; + } + + if (pSSM->enmAfter != SSMAFTER_DEBUG_IT) + AssertMsgFailed(("SSM: attempted reading more than the unit!\n")); + return pSSM->rc = VERR_SSM_LOADED_TOO_MUCH; +} + + +/** + * Internal read worker for reading data from a version 1 unit. + * + * @returns VBox status code, pSSM->rc is set on error. + * + * @param pSSM The saved state handle. + * @param pvBuf Where to store the read data. + * @param cbBuf Number of bytes to read. + */ +static int ssmR3DataReadV1(PSSMHANDLE pSSM, void *pvBuf, size_t cbBuf) +{ + /* + * Open the decompressor on the first read. + */ + if (!pSSM->u.Read.pZipDecompV1) + { + pSSM->rc = RTZipDecompCreate(&pSSM->u.Read.pZipDecompV1, pSSM, ssmR3ReadInV1); + if (RT_FAILURE(pSSM->rc)) + return pSSM->rc; + } + + /* + * Do the requested read. + */ + int rc = pSSM->rc = RTZipDecompress(pSSM->u.Read.pZipDecompV1, pvBuf, cbBuf, NULL); + if (RT_SUCCESS(rc)) + { + Log2(("ssmR3DataRead: pvBuf=%p cbBuf=%#x offUnit=%#llx %.*Rhxs%s\n", pvBuf, cbBuf, pSSM->offUnit, RT_MIN(cbBuf, SSM_LOG_BYTES), pvBuf, cbBuf > SSM_LOG_BYTES ? "..." : "")); + pSSM->offUnit += cbBuf; + pSSM->offUnitUser += cbBuf; + return VINF_SUCCESS; + } + AssertMsgFailed(("rc=%Rrc cbBuf=%#x\n", rc, cbBuf)); + return rc; +} + + +/** + * Creates the decompressor for the data unit. + * + * pSSM->rc will be set on error. + * + * @param pSSM The saved state handle. + */ +static void ssmR3DataReadBeginV2(PSSMHANDLE pSSM) +{ + Assert(!pSSM->u.Read.cbDataBuffer || pSSM->u.Read.cbDataBuffer == pSSM->u.Read.offDataBuffer); + Assert(!pSSM->u.Read.cbRecLeft); + + pSSM->offUnit = 0; + pSSM->offUnitUser = 0; + pSSM->u.Read.cbRecLeft = 0; + pSSM->u.Read.cbDataBuffer = 0; + pSSM->u.Read.offDataBuffer = 0; + pSSM->u.Read.fEndOfData = false; + pSSM->u.Read.u8TypeAndFlags = 0; +} + + +#ifndef SSM_STANDALONE +/** + * Checks for the termination record and closes the decompressor. + * + * pSSM->rc will be set on error. + * + * @returns pSSM->rc. + * @param pSSM The saved state handle. + */ +static int ssmR3DataReadFinishV2(PSSMHANDLE pSSM) +{ + /* + * If we haven't encountered the end of the record, it must be the next one. + */ + int rc = pSSM->rc; + if ( !pSSM->u.Read.fEndOfData + && RT_SUCCESS(rc)) + { + if ( pSSM->u.Read.cbDataBuffer != pSSM->u.Read.offDataBuffer + && pSSM->u.Read.cbDataBuffer > 0) + { + LogRel(("SSM: At least %#x bytes left to read\n", pSSM->u.Read.cbDataBuffer - pSSM->u.Read.offDataBuffer)); + rc = VERR_SSM_LOADED_TOO_LITTLE; + } + else + { + rc = ssmR3DataReadRecHdrV2(pSSM); + if ( RT_SUCCESS(rc) + && !pSSM->u.Read.fEndOfData) + { + LogRel(("SSM: At least %#x bytes left to read\n", pSSM->u.Read.cbDataBuffer)); + rc = VERR_SSM_LOADED_TOO_LITTLE; + AssertFailed(); + } + } + pSSM->rc = rc; + } + return rc; +} +#endif /* !SSM_STANDALONE */ + + +/** + * Read raw record bytes, work the progress indicator and unit offset. + * + * @returns VBox status code. Does NOT set pSSM->rc. + * @param pSSM The saved state handle. + * @param pvBuf Where to put the bits + * @param cbToRead How many bytes to read. + */ +DECLINLINE(int) ssmR3DataReadV2Raw(PSSMHANDLE pSSM, void *pvBuf, size_t cbToRead) +{ + int rc = ssmR3StrmRead(&pSSM->Strm, pvBuf, cbToRead); + if (RT_SUCCESS(rc)) + { + pSSM->offUnit += cbToRead; + ssmR3ProgressByByte(pSSM, cbToRead); + return VINF_SUCCESS; + } + + if (rc == VERR_SSM_CANCELLED) + return rc; + + if (pSSM->enmAfter != SSMAFTER_DEBUG_IT && rc == VERR_EOF) + AssertMsgFailedReturn(("SSM: attempted reading more than the unit! rc=%Rrc\n", rc), VERR_SSM_LOADED_TOO_MUCH); + return VERR_SSM_STREAM_ERROR; +} + + +/** + * Reads and checks the LZF "header". + * + * @returns VBox status code. Sets pSSM->rc on error. + * @param pSSM The saved state handle.. + * @param pcbDecompr Where to store the size of the decompressed data. + */ +DECLINLINE(int) ssmR3DataReadV2RawLzfHdr(PSSMHANDLE pSSM, uint32_t *pcbDecompr) +{ + *pcbDecompr = 0; /* shuts up gcc. */ + AssertLogRelMsgReturn( pSSM->u.Read.cbRecLeft > 1 + && pSSM->u.Read.cbRecLeft <= RT_SIZEOFMEMB(SSMHANDLE, u.Read.abComprBuffer) + 2, + ("%#x\n", pSSM->u.Read.cbRecLeft), + pSSM->rc = VERR_SSM_INTEGRITY_DECOMPRESSION); + + uint8_t cKB; + int rc = ssmR3DataReadV2Raw(pSSM, &cKB, 1); + if (RT_FAILURE(rc)) + return pSSM->rc = rc; + pSSM->u.Read.cbRecLeft -= sizeof(cKB); + + uint32_t cbDecompr = (uint32_t)cKB * _1K; + AssertLogRelMsgReturn( cbDecompr >= pSSM->u.Read.cbRecLeft + && cbDecompr <= RT_SIZEOFMEMB(SSMHANDLE, u.Read.abDataBuffer), + ("%#x\n", cbDecompr), + pSSM->rc = VERR_SSM_INTEGRITY_DECOMPRESSION); + + *pcbDecompr = cbDecompr; + return VINF_SUCCESS; +} + + +/** + * Reads an LZF block from the stream and decompresses into the specified + * buffer. + * + * @returns VBox status code. Sets pSSM->rc on error. + * @param pSSM The saved state handle. + * @param pvDst Pointer to the output buffer. + * @param cbDecompr The size of the decompressed data. + */ +static int ssmR3DataReadV2RawLzf(PSSMHANDLE pSSM, void *pvDst, size_t cbDecompr) +{ + int rc; + uint32_t cbCompr = pSSM->u.Read.cbRecLeft; + pSSM->u.Read.cbRecLeft = 0; + + /* + * Try use the stream buffer directly to avoid copying things around. + */ + uint8_t const *pb = ssmR3StrmReadDirect(&pSSM->Strm, cbCompr); + if (pb) + { + pSSM->offUnit += cbCompr; + ssmR3ProgressByByte(pSSM, cbCompr); + } + else + { + rc = ssmR3DataReadV2Raw(pSSM, &pSSM->u.Read.abComprBuffer[0], cbCompr); + if (RT_FAILURE(rc)) + return pSSM->rc = rc; + pb = &pSSM->u.Read.abComprBuffer[0]; + } + + /* + * Decompress it. + */ + size_t cbDstActual; + rc = RTZipBlockDecompress(RTZIPTYPE_LZF, 0 /*fFlags*/, + pb, cbCompr, NULL /*pcbSrcActual*/, + pvDst, cbDecompr, &cbDstActual); + if (RT_SUCCESS(rc)) + { + AssertLogRelMsgReturn(cbDstActual == cbDecompr, ("%#x %#x\n", cbDstActual, cbDecompr), pSSM->rc = VERR_SSM_INTEGRITY_DECOMPRESSION); + return VINF_SUCCESS; + } + + AssertLogRelMsgFailed(("cbCompr=%#x cbDecompr=%#x rc=%Rrc\n", cbCompr, cbDecompr, rc)); + return pSSM->rc = VERR_SSM_INTEGRITY_DECOMPRESSION; +} + + +/** + * Reads and checks the raw zero "header". + * + * @returns VBox status code. Sets pSSM->rc on error. + * @param pSSM The saved state handle.. + * @param pcbZero Where to store the size of the zero data. + */ +DECLINLINE(int) ssmR3DataReadV2RawZeroHdr(PSSMHANDLE pSSM, uint32_t *pcbZero) +{ + *pcbZero = 0; /* shuts up gcc. */ + AssertLogRelMsgReturn(pSSM->u.Read.cbRecLeft == 1, ("%#x\n", pSSM->u.Read.cbRecLeft), pSSM->rc = VERR_SSM_INTEGRITY_DECOMPRESSION); + + uint8_t cKB; + int rc = ssmR3DataReadV2Raw(pSSM, &cKB, 1); + if (RT_FAILURE(rc)) + return pSSM->rc = rc; + pSSM->u.Read.cbRecLeft = 0; + + uint32_t cbZero = (uint32_t)cKB * _1K; + AssertLogRelMsgReturn(cbZero <= RT_SIZEOFMEMB(SSMHANDLE, u.Read.abDataBuffer), + ("%#x\n", cbZero), pSSM->rc = VERR_SSM_INTEGRITY_DECOMPRESSION); + + *pcbZero = cbZero; + return VINF_SUCCESS; +} + + +/** + * Worker for reading the record header. + * + * It sets pSSM->u.Read.cbRecLeft, pSSM->u.Read.u8TypeAndFlags and + * pSSM->u.Read.fEndOfData. When a termination record is encounter, it will be + * read in full and validated, the fEndOfData indicator is set, and VINF_SUCCESS + * is returned. + * + * @returns VBox status code. Does not set pSSM->rc. + * @param pSSM The saved state handle. + */ +static int ssmR3DataReadRecHdrV2(PSSMHANDLE pSSM) +{ + AssertLogRelReturn(!pSSM->u.Read.fEndOfData, VERR_SSM_LOADED_TOO_MUCH); + + /* + * Read the two mandatory bytes. + */ + uint8_t abHdr[8]; + int rc = ssmR3DataReadV2Raw(pSSM, abHdr, 2); + if (RT_FAILURE(rc)) + return rc; + + /* + * Validate the first byte and check for the termination records. + */ + pSSM->u.Read.u8TypeAndFlags = abHdr[0]; + AssertLogRelMsgReturn(SSM_REC_ARE_TYPE_AND_FLAGS_VALID(abHdr[0]), ("%#x %#x\n", abHdr[0], abHdr[1]), VERR_SSM_INTEGRITY_REC_HDR); + if ((abHdr[0] & SSM_REC_TYPE_MASK) == SSM_REC_TYPE_TERM) + { + pSSM->u.Read.cbRecLeft = 0; + pSSM->u.Read.fEndOfData = true; + AssertLogRelMsgReturn(abHdr[1] == sizeof(SSMRECTERM) - 2, ("%#x\n", abHdr[1]), VERR_SSM_INTEGRITY_REC_TERM); + AssertLogRelMsgReturn(abHdr[0] & SSM_REC_FLAGS_IMPORTANT, ("%#x\n", abHdr[0]), VERR_SSM_INTEGRITY_REC_TERM); + + /* get the rest */ + uint32_t u32StreamCRC = ssmR3StrmFinalCRC(&pSSM->Strm); + SSMRECTERM TermRec; + rc = ssmR3DataReadV2Raw(pSSM, (uint8_t *)&TermRec + 2, sizeof(SSMRECTERM) - 2); + if (RT_FAILURE(rc)) + return rc; + + /* validate integrity */ + AssertLogRelMsgReturn(TermRec.cbUnit == pSSM->offUnit, + ("cbUnit=%#llx offUnit=%#llx\n", TermRec.cbUnit, pSSM->offUnit), + VERR_SSM_INTEGRITY_REC_TERM); + AssertLogRelMsgReturn(!(TermRec.fFlags & ~SSMRECTERM_FLAGS_CRC32), ("%#x\n", TermRec.fFlags), VERR_SSM_INTEGRITY_REC_TERM); + if (!(TermRec.fFlags & SSMRECTERM_FLAGS_CRC32)) + AssertLogRelMsgReturn(TermRec.u32StreamCRC == 0, ("%#x\n", TermRec.u32StreamCRC), VERR_SSM_INTEGRITY_REC_TERM); + else if (pSSM->Strm.fChecksummed) + AssertLogRelMsgReturn(TermRec.u32StreamCRC == u32StreamCRC, ("%#x, %#x\n", TermRec.u32StreamCRC, u32StreamCRC), + VERR_SSM_INTEGRITY_REC_TERM_CRC); + + Log3(("ssmR3DataReadRecHdrV2: %08llx|%08llx: TERM\n", ssmR3StrmTell(&pSSM->Strm) - sizeof(SSMRECTERM), pSSM->offUnit)); + return VINF_SUCCESS; + } + + /* + * Figure the size. The 2nd byte is encoded in UTF-8 fashion, so this + * is can be highly enjoyable. + */ + uint32_t cbHdr = 2; + uint32_t cb = abHdr[1]; + if (!(cb & 0x80)) + pSSM->u.Read.cbRecLeft = cb; + else + { + /* + * Need more data. Figure how much and read it. + */ + if (!(cb & RT_BIT(5))) + cb = 2; + else if (!(cb & RT_BIT(4))) + cb = 3; + else if (!(cb & RT_BIT(3))) + cb = 4; + else if (!(cb & RT_BIT(2))) + cb = 5; + else if (!(cb & RT_BIT(1))) + cb = 6; + else + AssertLogRelMsgFailedReturn(("Invalid record size byte: %#x\n", cb), VERR_SSM_INTEGRITY_REC_HDR); + cbHdr = cb + 1; + + rc = ssmR3DataReadV2Raw(pSSM, &abHdr[2], cb - 1); + if (RT_FAILURE(rc)) + return rc; + + /* + * Validate what we've read. + */ + switch (cb) + { + case 6: + AssertLogRelMsgReturn((abHdr[6] & 0xc0) == 0x80, ("6/%u: %.*Rhxs\n", cb, cb + 1, &abHdr[0]), VERR_SSM_INTEGRITY_REC_HDR); + RT_FALL_THRU(); + case 5: + AssertLogRelMsgReturn((abHdr[5] & 0xc0) == 0x80, ("5/%u: %.*Rhxs\n", cb, cb + 1, &abHdr[0]), VERR_SSM_INTEGRITY_REC_HDR); + RT_FALL_THRU(); + case 4: + AssertLogRelMsgReturn((abHdr[4] & 0xc0) == 0x80, ("4/%u: %.*Rhxs\n", cb, cb + 1, &abHdr[0]), VERR_SSM_INTEGRITY_REC_HDR); + RT_FALL_THRU(); + case 3: + AssertLogRelMsgReturn((abHdr[3] & 0xc0) == 0x80, ("3/%u: %.*Rhxs\n", cb, cb + 1, &abHdr[0]), VERR_SSM_INTEGRITY_REC_HDR); + RT_FALL_THRU(); + case 2: + AssertLogRelMsgReturn((abHdr[2] & 0xc0) == 0x80, ("2/%u: %.*Rhxs\n", cb, cb + 1, &abHdr[0]), VERR_SSM_INTEGRITY_REC_HDR); + break; + default: + return VERR_IPE_NOT_REACHED_DEFAULT_CASE; + } + + /* + * Decode it and validate the range. + */ + switch (cb) + { + case 6: + cb = (abHdr[6] & 0x3f) + | ((uint32_t)(abHdr[5] & 0x3f) << 6) + | ((uint32_t)(abHdr[4] & 0x3f) << 12) + | ((uint32_t)(abHdr[3] & 0x3f) << 18) + | ((uint32_t)(abHdr[2] & 0x3f) << 24) + | ((uint32_t)(abHdr[1] & 0x01) << 30); + AssertLogRelMsgReturn(cb >= 0x04000000 && cb <= 0x7fffffff, ("cb=%#x\n", cb), VERR_SSM_INTEGRITY_REC_HDR); + break; + case 5: + cb = (abHdr[5] & 0x3f) + | ((uint32_t)(abHdr[4] & 0x3f) << 6) + | ((uint32_t)(abHdr[3] & 0x3f) << 12) + | ((uint32_t)(abHdr[2] & 0x3f) << 18) + | ((uint32_t)(abHdr[1] & 0x03) << 24); + AssertLogRelMsgReturn(cb >= 0x00200000 && cb <= 0x03ffffff, ("cb=%#x\n", cb), VERR_SSM_INTEGRITY_REC_HDR); + break; + case 4: + cb = (abHdr[4] & 0x3f) + | ((uint32_t)(abHdr[3] & 0x3f) << 6) + | ((uint32_t)(abHdr[2] & 0x3f) << 12) + | ((uint32_t)(abHdr[1] & 0x07) << 18); + AssertLogRelMsgReturn(cb >= 0x00010000 && cb <= 0x001fffff, ("cb=%#x\n", cb), VERR_SSM_INTEGRITY_REC_HDR); + break; + case 3: + cb = (abHdr[3] & 0x3f) + | ((uint32_t)(abHdr[2] & 0x3f) << 6) + | ((uint32_t)(abHdr[1] & 0x0f) << 12); +#if 0 /* disabled to optimize buffering */ + AssertLogRelMsgReturn(cb >= 0x00000800 && cb <= 0x0000ffff, ("cb=%#x\n", cb), VERR_SSM_INTEGRITY_REC_HDR); +#endif + break; + case 2: + cb = (abHdr[2] & 0x3f) + | ((uint32_t)(abHdr[1] & 0x1f) << 6); +#if 0 /* disabled to optimize buffering */ + AssertLogRelMsgReturn(cb >= 0x00000080 && cb <= 0x000007ff, ("cb=%#x\n", cb), VERR_SSM_INTEGRITY_REC_HDR); +#endif + break; + default: + return VERR_IPE_NOT_REACHED_DEFAULT_CASE; + } + + pSSM->u.Read.cbRecLeft = cb; + } + + Log3(("ssmR3DataReadRecHdrV2: %08llx|%08llx/%08x: Type=%02x fImportant=%RTbool cbHdr=%u\n", + ssmR3StrmTell(&pSSM->Strm), pSSM->offUnit, pSSM->u.Read.cbRecLeft, + pSSM->u.Read.u8TypeAndFlags & SSM_REC_TYPE_MASK, + !!(pSSM->u.Read.u8TypeAndFlags & SSM_REC_FLAGS_IMPORTANT), + cbHdr + )); NOREF(cbHdr); + return VINF_SUCCESS; +} + + +/** + * Buffer miss, do an unbuffered read. + * + * @returns VBox status code. Sets pSSM->rc on error. + * @param pSSM The saved state handle. + * @param pvBuf Where to store the read data. + * @param cbBuf Number of bytes to read. + */ +static int ssmR3DataReadUnbufferedV2(PSSMHANDLE pSSM, void *pvBuf, size_t cbBuf) +{ + void const *pvBufOrg = pvBuf; NOREF(pvBufOrg); + size_t const cbBufOrg = cbBuf; NOREF(cbBufOrg); + + /* + * Copy out what we've got in the buffer. + */ + uint32_t off = pSSM->u.Read.offDataBuffer; + int32_t cbInBuffer = pSSM->u.Read.cbDataBuffer - off; + Log4(("ssmR3DataReadUnbufferedV2: %08llx|%08llx/%08x/%08x: cbBuf=%#x\n", ssmR3StrmTell(&pSSM->Strm), pSSM->offUnit, pSSM->u.Read.cbRecLeft, cbInBuffer, cbBufOrg)); + if (cbInBuffer > 0) + { + uint32_t const cbToCopy = (uint32_t)cbInBuffer; + Assert(cbBuf > cbToCopy); + memcpy(pvBuf, &pSSM->u.Read.abDataBuffer[off], cbToCopy); + pvBuf = (uint8_t *)pvBuf + cbToCopy; + cbBuf -= cbToCopy; + pSSM->u.Read.cbDataBuffer = 0; + pSSM->u.Read.offDataBuffer = 0; + } + + /* + * Read data. + */ + do + { + /* + * Read the next record header if no more data. + */ + if (!pSSM->u.Read.cbRecLeft) + { + int rc = ssmR3DataReadRecHdrV2(pSSM); + if (RT_FAILURE(rc)) + return pSSM->rc = rc; + } + AssertLogRelMsgReturn(!pSSM->u.Read.fEndOfData, ("cbBuf=%zu\n", cbBuf), pSSM->rc = VERR_SSM_LOADED_TOO_MUCH); + + /* + * Read data from the current record. + */ + uint32_t cbToRead; + switch (pSSM->u.Read.u8TypeAndFlags & SSM_REC_TYPE_MASK) + { + case SSM_REC_TYPE_RAW: + { + cbToRead = (uint32_t)RT_MIN(cbBuf, pSSM->u.Read.cbRecLeft); + int rc = ssmR3DataReadV2Raw(pSSM, pvBuf, cbToRead); + if (RT_FAILURE(rc)) + return pSSM->rc = rc; + pSSM->u.Read.cbRecLeft -= cbToRead; + break; + } + + case SSM_REC_TYPE_RAW_LZF: + { + int rc = ssmR3DataReadV2RawLzfHdr(pSSM, &cbToRead); + if (RT_FAILURE(rc)) + return rc; + if (cbToRead <= cbBuf) + { + rc = ssmR3DataReadV2RawLzf(pSSM, pvBuf, cbToRead); + if (RT_FAILURE(rc)) + return rc; + } + else + { + /* The output buffer is too small, use the data buffer. */ + rc = ssmR3DataReadV2RawLzf(pSSM, &pSSM->u.Read.abDataBuffer[0], cbToRead); + if (RT_FAILURE(rc)) + return rc; + pSSM->u.Read.cbDataBuffer = cbToRead; + cbToRead = (uint32_t)cbBuf; + pSSM->u.Read.offDataBuffer = cbToRead; + memcpy(pvBuf, &pSSM->u.Read.abDataBuffer[0], cbToRead); + } + break; + } + + case SSM_REC_TYPE_RAW_ZERO: + { + int rc = ssmR3DataReadV2RawZeroHdr(pSSM, &cbToRead); + if (RT_FAILURE(rc)) + return rc; + if (cbToRead > cbBuf) + { + /* Spill the remainder into the data buffer. */ + memset(&pSSM->u.Read.abDataBuffer[0], 0, cbToRead - cbBuf); + pSSM->u.Read.cbDataBuffer = cbToRead - (uint32_t)cbBuf; + pSSM->u.Read.offDataBuffer = 0; + cbToRead = (uint32_t)cbBuf; + } + memset(pvBuf, 0, cbToRead); + break; + } + + default: + AssertMsgFailedReturn(("%x\n", pSSM->u.Read.u8TypeAndFlags), pSSM->rc = VERR_SSM_BAD_REC_TYPE); + } + + pSSM->offUnitUser += cbToRead; + cbBuf -= cbToRead; + pvBuf = (uint8_t *)pvBuf + cbToRead; + } while (cbBuf > 0); + + Log4(("ssmR3DataReadUnBufferedV2: %08llx|%08llx/%08x/%08x: cbBuf=%#x %.*Rhxs%s\n", + ssmR3StrmTell(&pSSM->Strm), pSSM->offUnit, pSSM->u.Read.cbRecLeft, 0, cbBufOrg, RT_MIN(SSM_LOG_BYTES, cbBufOrg), pvBufOrg, cbBufOrg > SSM_LOG_BYTES ? "..." : "")); + return VINF_SUCCESS; +} + + +/** + * Buffer miss, do a buffered read. + * + * @returns VBox status code. Sets pSSM->rc on error. + * + * @param pSSM The saved state handle. + * @param pvBuf Where to store the read data. + * @param cbBuf Number of bytes to read. + */ +static int ssmR3DataReadBufferedV2(PSSMHANDLE pSSM, void *pvBuf, size_t cbBuf) +{ + void const *pvBufOrg = pvBuf; NOREF(pvBufOrg); + size_t const cbBufOrg = cbBuf; NOREF(cbBufOrg); + + /* + * Copy out what we've got in the buffer. + */ + uint32_t off = pSSM->u.Read.offDataBuffer; + int32_t cbInBuffer = pSSM->u.Read.cbDataBuffer - off; + Log4(("ssmR3DataReadBufferedV2: %08llx|%08llx/%08x/%08x: cbBuf=%#x\n", ssmR3StrmTell(&pSSM->Strm), pSSM->offUnit, pSSM->u.Read.cbRecLeft, cbInBuffer, cbBufOrg)); + if (cbInBuffer > 0) + { + uint32_t const cbToCopy = (uint32_t)cbInBuffer; + Assert(cbBuf > cbToCopy); + memcpy(pvBuf, &pSSM->u.Read.abDataBuffer[off], cbToCopy); + pvBuf = (uint8_t *)pvBuf + cbToCopy; + cbBuf -= cbToCopy; + pSSM->offUnitUser += cbToCopy; + pSSM->u.Read.cbDataBuffer = 0; + pSSM->u.Read.offDataBuffer = 0; + } + + /* + * Buffer more data. + */ + do + { + /* + * Read the next record header if no more data. + */ + if (!pSSM->u.Read.cbRecLeft) + { + int rc = ssmR3DataReadRecHdrV2(pSSM); + if (RT_FAILURE(rc)) + return pSSM->rc = rc; + } + AssertLogRelMsgReturn(!pSSM->u.Read.fEndOfData, ("cbBuf=%zu\n", cbBuf), pSSM->rc = VERR_SSM_LOADED_TOO_MUCH); + + /* + * Read data from the current record. + * LATER: optimize by reading directly into the output buffer for some cases. + */ + uint32_t cbToRead; + switch (pSSM->u.Read.u8TypeAndFlags & SSM_REC_TYPE_MASK) + { + case SSM_REC_TYPE_RAW: + { + cbToRead = RT_MIN(sizeof(pSSM->u.Read.abDataBuffer), pSSM->u.Read.cbRecLeft); + int rc = ssmR3DataReadV2Raw(pSSM, &pSSM->u.Read.abDataBuffer[0], cbToRead); + if (RT_FAILURE(rc)) + return pSSM->rc = rc; + pSSM->u.Read.cbRecLeft -= cbToRead; + pSSM->u.Read.cbDataBuffer = cbToRead; + break; + } + + case SSM_REC_TYPE_RAW_LZF: + { + int rc = ssmR3DataReadV2RawLzfHdr(pSSM, &cbToRead); + if (RT_FAILURE(rc)) + return rc; + rc = ssmR3DataReadV2RawLzf(pSSM, &pSSM->u.Read.abDataBuffer[0], cbToRead); + if (RT_FAILURE(rc)) + return rc; + pSSM->u.Read.cbDataBuffer = cbToRead; + break; + } + + case SSM_REC_TYPE_RAW_ZERO: + { + int rc = ssmR3DataReadV2RawZeroHdr(pSSM, &cbToRead); + if (RT_FAILURE(rc)) + return rc; + memset(&pSSM->u.Read.abDataBuffer[0], 0, cbToRead); + pSSM->u.Read.cbDataBuffer = cbToRead; + break; + } + + default: + AssertMsgFailedReturn(("%x\n", pSSM->u.Read.u8TypeAndFlags), pSSM->rc = VERR_SSM_BAD_REC_TYPE); + } + /*pSSM->u.Read.offDataBuffer = 0;*/ + + /* + * Copy data from the buffer. + */ + uint32_t cbToCopy = (uint32_t)RT_MIN(cbBuf, cbToRead); + memcpy(pvBuf, &pSSM->u.Read.abDataBuffer[0], cbToCopy); + cbBuf -= cbToCopy; + pvBuf = (uint8_t *)pvBuf + cbToCopy; + pSSM->offUnitUser += cbToCopy; + pSSM->u.Read.offDataBuffer = cbToCopy; + } while (cbBuf > 0); + + Log4(("ssmR3DataReadBufferedV2: %08llx|%08llx/%08x/%08x: cbBuf=%#x %.*Rhxs%s\n", + ssmR3StrmTell(&pSSM->Strm), pSSM->offUnit, pSSM->u.Read.cbRecLeft, pSSM->u.Read.cbDataBuffer - pSSM->u.Read.offDataBuffer, + cbBufOrg, RT_MIN(SSM_LOG_BYTES, cbBufOrg), pvBufOrg, cbBufOrg > SSM_LOG_BYTES ? "..." : "")); + return VINF_SUCCESS; +} + + +/** + * Inlined worker that handles format checks and buffered reads. + * + * @param pSSM The saved state handle. + * @param pvBuf Where to store the read data. + * @param cbBuf Number of bytes to read. + */ +DECLINLINE(int) ssmR3DataRead(PSSMHANDLE pSSM, void *pvBuf, size_t cbBuf) +{ + /* + * Fend off previous errors and V1 data units. + */ + if (RT_SUCCESS(pSSM->rc)) + { + if (RT_LIKELY(pSSM->u.Read.uFmtVerMajor != 1)) + { + /* + * Check if the requested data is buffered. + */ + uint32_t off = pSSM->u.Read.offDataBuffer; + if ( off + cbBuf > pSSM->u.Read.cbDataBuffer + || cbBuf > sizeof(pSSM->u.Read.abDataBuffer)) + { + if (cbBuf <= sizeof(pSSM->u.Read.abDataBuffer) / 8) + return ssmR3DataReadBufferedV2(pSSM, pvBuf, cbBuf); + return ssmR3DataReadUnbufferedV2(pSSM, pvBuf, cbBuf); + } + + memcpy(pvBuf, &pSSM->u.Read.abDataBuffer[off], cbBuf); + pSSM->u.Read.offDataBuffer = off + (uint32_t)cbBuf; + pSSM->offUnitUser += cbBuf; + Log4((cbBuf + ? "ssmR3DataRead: %08llx|%08llx/%08x/%08x: cbBuf=%#x %.*Rhxs%s\n" + : "ssmR3DataRead: %08llx|%08llx/%08x/%08x: cbBuf=%#x\n", + ssmR3StrmTell(&pSSM->Strm), pSSM->offUnit, pSSM->u.Read.cbRecLeft, pSSM->u.Read.cbDataBuffer - pSSM->u.Read.offDataBuffer, + cbBuf, RT_MIN(SSM_LOG_BYTES, cbBuf), pvBuf, cbBuf > SSM_LOG_BYTES ? "..." : "")); + + return VINF_SUCCESS; + } + return ssmR3DataReadV1(pSSM, pvBuf, cbBuf); + } + return pSSM->rc; +} + + +/** + * Gets a structure. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pvStruct The structure address. + * @param paFields The array of structure fields descriptions. + * The array must be terminated by a SSMFIELD_ENTRY_TERM(). + */ +VMMR3DECL(int) SSMR3GetStruct(PSSMHANDLE pSSM, void *pvStruct, PCSSMFIELD paFields) +{ + SSM_ASSERT_READABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + AssertPtr(pvStruct); + AssertPtr(paFields); + + /* begin marker. */ + uint32_t u32Magic; + int rc = SSMR3GetU32(pSSM, &u32Magic); + if (RT_FAILURE(rc)) + return rc; + AssertMsgReturn(u32Magic == SSMR3STRUCT_BEGIN, ("u32Magic=%#RX32\n", u32Magic), pSSM->rc = VERR_SSM_STRUCTURE_MAGIC); + + /* get the fields */ + for (PCSSMFIELD pCur = paFields; + pCur->cb != UINT32_MAX && pCur->off != UINT32_MAX; + pCur++) + { + if (pCur->uFirstVer <= pSSM->u.Read.uCurUnitVer) + { + uint8_t *pbField = (uint8_t *)pvStruct + pCur->off; + switch ((uintptr_t)pCur->pfnGetPutOrTransformer) + { + case SSMFIELDTRANS_NO_TRANSFORMATION: + rc = ssmR3DataRead(pSSM, pbField, pCur->cb); + break; + + case SSMFIELDTRANS_GCPTR: + AssertMsgBreakStmt(pCur->cb == sizeof(RTGCPTR), ("%#x (%s)\n", pCur->cb, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = SSMR3GetGCPtr(pSSM, (PRTGCPTR)pbField); + break; + + case SSMFIELDTRANS_GCPHYS: + AssertMsgBreakStmt(pCur->cb == sizeof(RTGCPHYS), ("%#x (%s)\n", pCur->cb, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = SSMR3GetGCPhys(pSSM, (PRTGCPHYS)pbField); + break; + + case SSMFIELDTRANS_RCPTR: + AssertMsgBreakStmt(pCur->cb == sizeof(RTRCPTR), ("%#x (%s)\n", pCur->cb, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = SSMR3GetRCPtr(pSSM, (PRTRCPTR)pbField); + break; + + case SSMFIELDTRANS_RCPTR_ARRAY: + { + uint32_t const cEntries = pCur->cb / sizeof(RTRCPTR); + AssertMsgBreakStmt(pCur->cb == cEntries * sizeof(RTRCPTR) && cEntries, ("%#x (%s)\n", pCur->cb, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = VINF_SUCCESS; + for (uint32_t i = 0; i < cEntries && RT_SUCCESS(rc); i++) + rc = SSMR3GetRCPtr(pSSM, &((PRTRCPTR)pbField)[i]); + break; + } + + default: + AssertMsgFailedBreakStmt(("%#x\n", pCur->pfnGetPutOrTransformer), rc = VERR_SSM_FIELD_COMPLEX); + } + if (RT_FAILURE(rc)) + { + if (RT_SUCCESS(pSSM->rc)) + pSSM->rc = rc; + return rc; + } + } + } + + /* end marker */ + rc = SSMR3GetU32(pSSM, &u32Magic); + if (RT_FAILURE(rc)) + return rc; + AssertMsgReturn(u32Magic == SSMR3STRUCT_END, ("u32Magic=%#RX32\n", u32Magic), pSSM->rc = VERR_SSM_STRUCTURE_MAGIC); + return rc; +} + + +/** + * SSMR3GetStructEx helper that gets a HCPTR that is used as a NULL indicator. + * + * @returns VBox status code. + * + * @param pSSM The saved state handle. + * @param ppv Where to return the value (0/1). + * @param fFlags SSMSTRUCT_FLAGS_XXX. + */ +DECLINLINE(int) ssmR3GetHCPtrNI(PSSMHANDLE pSSM, void **ppv, uint32_t fFlags) +{ + uintptr_t uPtrNI; + if (fFlags & SSMSTRUCT_FLAGS_DONT_IGNORE) + { + if (ssmR3GetHostBits(pSSM) == 64) + { + uint64_t u; + int rc = ssmR3DataRead(pSSM, &u, sizeof(u)); + if (RT_FAILURE(rc)) + return rc; + uPtrNI = u ? 1 : 0; + } + else + { + uint32_t u; + int rc = ssmR3DataRead(pSSM, &u, sizeof(u)); + if (RT_FAILURE(rc)) + return rc; + uPtrNI = u ? 1 : 0; + } + } + else + { + bool f; + int rc = SSMR3GetBool(pSSM, &f); + if (RT_FAILURE(rc)) + return rc; + uPtrNI = f ? 1 : 0; + } + *ppv = (void *)uPtrNI; + return VINF_SUCCESS; +} + + +/** + * Gets a structure, extended API. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pvStruct The structure address. + * @param cbStruct The size of the struct (use for validation only). + * @param fFlags Combination of SSMSTRUCT_FLAGS_XXX defines. + * @param paFields The array of structure fields descriptions. The + * array must be terminated by a SSMFIELD_ENTRY_TERM(). + * @param pvUser User argument for any callbacks that paFields might + * contain. + */ +VMMR3DECL(int) SSMR3GetStructEx(PSSMHANDLE pSSM, void *pvStruct, size_t cbStruct, + uint32_t fFlags, PCSSMFIELD paFields, void *pvUser) +{ + int rc; + uint32_t u32Magic; + + /* + * Validation. + */ + SSM_ASSERT_READABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + AssertMsgReturn(!(fFlags & ~SSMSTRUCT_FLAGS_VALID_MASK), ("%#x\n", fFlags), pSSM->rc = VERR_INVALID_PARAMETER); + AssertPtr(pvStruct); + AssertPtr(paFields); + + /* + * Begin marker. + */ + if (!(fFlags & (SSMSTRUCT_FLAGS_NO_MARKERS | SSMSTRUCT_FLAGS_NO_LEAD_MARKER))) + { + rc = SSMR3GetU32(pSSM, &u32Magic); + if (RT_FAILURE(rc)) + return rc; + AssertMsgReturn(u32Magic == SSMR3STRUCT_BEGIN, ("u32Magic=%#RX32\n", u32Magic), pSSM->rc = VERR_SSM_STRUCTURE_MAGIC); + } + + /* + * Put the fields + */ + rc = VINF_SUCCESS; + uint32_t off = 0; + for (PCSSMFIELD pCur = paFields; + pCur->cb != UINT32_MAX && pCur->off != UINT32_MAX; + pCur++) + { + uint32_t const offField = (!SSMFIELDTRANS_IS_PADDING(pCur->pfnGetPutOrTransformer) || pCur->off != UINT32_MAX / 2) + && !SSMFIELDTRANS_IS_OLD(pCur->pfnGetPutOrTransformer) + ? pCur->off + : off; + uint32_t const cbField = SSMFIELDTRANS_IS_OLD(pCur->pfnGetPutOrTransformer) + ? 0 + : SSMFIELDTRANS_IS_PADDING(pCur->pfnGetPutOrTransformer) + ? RT_HIWORD(pCur->cb) + : pCur->cb; + AssertMsgReturn( cbField <= cbStruct + && offField + cbField <= cbStruct + && offField + cbField >= offField, + ("off=%#x cb=%#x cbStruct=%#x (%s)\n", cbField, offField, cbStruct, pCur->pszName), + pSSM->rc = VERR_SSM_FIELD_OUT_OF_BOUNDS); + AssertMsgReturn( !(fFlags & SSMSTRUCT_FLAGS_FULL_STRUCT) + || off == offField, + ("off=%#x offField=%#x (%s)\n", off, offField, pCur->pszName), + pSSM->rc = VERR_SSM_FIELD_NOT_CONSECUTIVE); + + if (pCur->uFirstVer <= pSSM->u.Read.uCurUnitVer) + { + rc = VINF_SUCCESS; + uint8_t *pbField = (uint8_t *)pvStruct + offField; + switch ((uintptr_t)pCur->pfnGetPutOrTransformer) + { + case SSMFIELDTRANS_NO_TRANSFORMATION: + rc = ssmR3DataRead(pSSM, pbField, cbField); + break; + + case SSMFIELDTRANS_GCPHYS: + AssertMsgBreakStmt(cbField == sizeof(RTGCPHYS), ("%#x (%s)\n", cbField, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = SSMR3GetGCPhys(pSSM, (PRTGCPHYS)pbField); + break; + + case SSMFIELDTRANS_GCPTR: + AssertMsgBreakStmt(cbField == sizeof(RTGCPTR), ("%#x (%s)\n", cbField, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = SSMR3GetGCPtr(pSSM, (PRTGCPTR)pbField); + break; + + case SSMFIELDTRANS_RCPTR: + AssertMsgBreakStmt(cbField == sizeof(RTRCPTR), ("%#x (%s)\n", cbField, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = SSMR3GetRCPtr(pSSM, (PRTRCPTR)pbField); + break; + + case SSMFIELDTRANS_RCPTR_ARRAY: + { + uint32_t const cEntries = cbField / sizeof(RTRCPTR); + AssertMsgBreakStmt(cbField == cEntries * sizeof(RTRCPTR) && cEntries, ("%#x (%s)\n", cbField, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = VINF_SUCCESS; + for (uint32_t i = 0; i < cEntries && RT_SUCCESS(rc); i++) + rc = SSMR3GetRCPtr(pSSM, &((PRTRCPTR)pbField)[i]); + break; + } + + case SSMFIELDTRANS_HCPTR_NI: + AssertMsgBreakStmt(cbField == sizeof(void *), ("%#x (%s)\n", cbField, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = ssmR3GetHCPtrNI(pSSM, (void **)pbField, fFlags); + break; + + case SSMFIELDTRANS_HCPTR_NI_ARRAY: + { + uint32_t const cEntries = cbField / sizeof(void *); + AssertMsgBreakStmt(cbField == cEntries * sizeof(void *) && cEntries, ("%#x (%s)\n", cbField, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = VINF_SUCCESS; + for (uint32_t i = 0; i < cEntries && RT_SUCCESS(rc); i++) + rc = ssmR3GetHCPtrNI(pSSM, &((void **)pbField)[i], fFlags); + break; + } + + case SSMFIELDTRANS_HCPTR_HACK_U32: + AssertMsgBreakStmt(cbField == sizeof(void *), ("%#x (%s)\n", cbField, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + *(uintptr_t *)pbField = 0; + rc = ssmR3DataRead(pSSM, pbField, sizeof(uint32_t)); + if ((fFlags & SSMSTRUCT_FLAGS_DONT_IGNORE) && ssmR3GetHostBits(pSSM) == 64) + { + uint32_t u32; + rc = ssmR3DataRead(pSSM, &u32, sizeof(uint32_t)); + AssertMsgBreakStmt(RT_FAILURE(rc) || u32 == 0 || (fFlags & SSMSTRUCT_FLAGS_SAVED_AS_MEM), + ("high=%#x low=%#x (%s)\n", u32, *(uint32_t *)pbField, pCur->pszName), + rc = VERR_SSM_FIELD_INVALID_VALUE); + } + break; + + case SSMFIELDTRANS_U32_ZX_U64: + AssertMsgBreakStmt(cbField == sizeof(uint64_t), ("%#x (%s)\n", cbField, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + ((uint32_t *)pbField)[1] = 0; + rc = SSMR3GetU32(pSSM, (uint32_t *)pbField); + break; + + + case SSMFIELDTRANS_IGNORE: + if (fFlags & SSMSTRUCT_FLAGS_DONT_IGNORE) + rc = SSMR3Skip(pSSM, cbField); + break; + + case SSMFIELDTRANS_IGN_GCPHYS: + AssertMsgBreakStmt(cbField == sizeof(RTGCPHYS), ("%#x (%s)\n", cbField, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + if (fFlags & SSMSTRUCT_FLAGS_DONT_IGNORE) + rc = SSMR3Skip(pSSM, pSSM->u.Read.cbGCPhys); + break; + + case SSMFIELDTRANS_IGN_GCPTR: + AssertMsgBreakStmt(cbField == sizeof(RTGCPTR), ("%#x (%s)\n", cbField, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + if (fFlags & SSMSTRUCT_FLAGS_DONT_IGNORE) + rc = SSMR3Skip(pSSM, pSSM->u.Read.cbGCPtr); + break; + + case SSMFIELDTRANS_IGN_RCPTR: + AssertMsgBreakStmt(cbField == sizeof(RTRCPTR), ("%#x (%s)\n", cbField, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + if (fFlags & SSMSTRUCT_FLAGS_DONT_IGNORE) + rc = SSMR3Skip(pSSM, sizeof(RTRCPTR)); + break; + + case SSMFIELDTRANS_IGN_HCPTR: + AssertMsgBreakStmt(cbField == sizeof(void *), ("%#x (%s)\n", cbField, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + if (fFlags & SSMSTRUCT_FLAGS_DONT_IGNORE) + rc = SSMR3Skip(pSSM, ssmR3GetHostBits(pSSM) / 8); + break; + + + case SSMFIELDTRANS_OLD: + AssertMsgBreakStmt(pCur->off == UINT32_MAX / 2, ("%#x %#x (%s)\n", pCur->cb, pCur->off, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = SSMR3Skip(pSSM, pCur->cb); + break; + + case SSMFIELDTRANS_OLD_GCPHYS: + AssertMsgBreakStmt(pCur->cb == sizeof(RTGCPHYS) && pCur->off == UINT32_MAX / 2, ("%#x %#x (%s)\n", pCur->cb, pCur->off, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = SSMR3Skip(pSSM, pSSM->u.Read.cbGCPhys); + break; + + case SSMFIELDTRANS_OLD_GCPTR: + AssertMsgBreakStmt(pCur->cb == sizeof(RTGCPTR) && pCur->off == UINT32_MAX / 2, ("%#x %#x (%s)\n", pCur->cb, pCur->off, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = SSMR3Skip(pSSM, pSSM->u.Read.cbGCPtr); + break; + + case SSMFIELDTRANS_OLD_RCPTR: + AssertMsgBreakStmt(pCur->cb == sizeof(RTRCPTR) && pCur->off == UINT32_MAX / 2, ("%#x %#x (%s)\n", pCur->cb, pCur->off, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = SSMR3Skip(pSSM, sizeof(RTRCPTR)); + break; + + case SSMFIELDTRANS_OLD_HCPTR: + AssertMsgBreakStmt(pCur->cb == sizeof(void *) && pCur->off == UINT32_MAX / 2, ("%#x %#x (%s)\n", pCur->cb, pCur->off, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = SSMR3Skip(pSSM, ssmR3GetHostBits(pSSM) / 8); + break; + + case SSMFIELDTRANS_OLD_PAD_HC: + AssertMsgBreakStmt(pCur->off == UINT32_MAX / 2, ("%#x %#x (%s)\n", pCur->cb, pCur->off, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + rc = SSMR3Skip(pSSM, ssmR3GetHostBits(pSSM) == 64 ? RT_HIWORD(pCur->cb) : RT_LOWORD(pCur->cb)); + break; + + case SSMFIELDTRANS_OLD_PAD_MSC32: + AssertMsgBreakStmt(pCur->off == UINT32_MAX / 2, ("%#x %#x (%s)\n", pCur->cb, pCur->off, pCur->pszName), rc = VERR_SSM_FIELD_INVALID_SIZE); + if (ssmR3IsHostMsc32(pSSM)) + rc = SSMR3Skip(pSSM, pCur->cb); + break; + + + case SSMFIELDTRANS_PAD_HC: + case SSMFIELDTRANS_PAD_HC32: + case SSMFIELDTRANS_PAD_HC64: + case SSMFIELDTRANS_PAD_HC_AUTO: + case SSMFIELDTRANS_PAD_MSC32_AUTO: + { + uint32_t cb32 = RT_BYTE1(pCur->cb); + uint32_t cb64 = RT_BYTE2(pCur->cb); + uint32_t cbCtx = HC_ARCH_BITS == 64 + || ( (uintptr_t)pCur->pfnGetPutOrTransformer == SSMFIELDTRANS_PAD_MSC32_AUTO + && !SSM_HOST_IS_MSC_32) + ? cb64 : cb32; + uint32_t cbSaved = ssmR3GetHostBits(pSSM) == 64 + || ( (uintptr_t)pCur->pfnGetPutOrTransformer == SSMFIELDTRANS_PAD_MSC32_AUTO + && !ssmR3IsHostMsc32(pSSM)) + ? cb64 : cb32; + AssertMsgBreakStmt( cbField == cbCtx + && ( ( pCur->off == UINT32_MAX / 2 + && ( cbField == 0 + || (uintptr_t)pCur->pfnGetPutOrTransformer == SSMFIELDTRANS_PAD_HC_AUTO + || (uintptr_t)pCur->pfnGetPutOrTransformer == SSMFIELDTRANS_PAD_MSC32_AUTO + ) + ) + || (pCur->off != UINT32_MAX / 2 && cbField != 0) + ) + , ("cbField=%#x cb32=%#x cb64=%#x HC_ARCH_BITS=%u cbCtx=%#x cbSaved=%#x off=%#x\n", + cbField, cb32, cb64, HC_ARCH_BITS, cbCtx, cbSaved, pCur->off), + rc = VERR_SSM_FIELD_INVALID_PADDING_SIZE); + if (fFlags & SSMSTRUCT_FLAGS_DONT_IGNORE) + rc = SSMR3Skip(pSSM, cbSaved); + break; + } + + default: + AssertBreakStmt(pCur->pfnGetPutOrTransformer, rc = VERR_SSM_FIELD_INVALID_CALLBACK); + rc = pCur->pfnGetPutOrTransformer(pSSM, pCur, pvStruct, fFlags, true /*fGetOrPut*/, pvUser); + break; + } + if (RT_FAILURE(rc)) + break; + } + + off = offField + cbField; + } + + if (RT_SUCCESS(rc)) + AssertMsgStmt( !(fFlags & SSMSTRUCT_FLAGS_FULL_STRUCT) + || off == cbStruct, + ("off=%#x cbStruct=%#x\n", off, cbStruct), + rc = VERR_SSM_FIELD_NOT_CONSECUTIVE); + + if (RT_FAILURE(rc)) + { + if (RT_SUCCESS(pSSM->rc)) + pSSM->rc = rc; + return rc; + } + + /* + * End marker + */ + if (!(fFlags & (SSMSTRUCT_FLAGS_NO_MARKERS | SSMSTRUCT_FLAGS_NO_TAIL_MARKER))) + { + rc = SSMR3GetU32(pSSM, &u32Magic); + if (RT_FAILURE(rc)) + return rc; + AssertMsgReturn(u32Magic == SSMR3STRUCT_END, ("u32Magic=%#RX32\n", u32Magic), pSSM->rc = VERR_SSM_STRUCTURE_MAGIC); + } + + return VINF_SUCCESS; +} + + +/** + * Loads a boolean item from the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pfBool Where to store the item. + */ +VMMR3DECL(int) SSMR3GetBool(PSSMHANDLE pSSM, bool *pfBool) +{ + SSM_ASSERT_READABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + uint8_t u8; /* see SSMR3PutBool */ + int rc = ssmR3DataRead(pSSM, &u8, sizeof(u8)); + if (RT_SUCCESS(rc)) + { + Assert(u8 <= 1); + *pfBool = !!u8; + } + return rc; +} + + +/** + * Loads a 8-bit unsigned integer item from the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pu8 Where to store the item. + */ +VMMR3DECL(int) SSMR3GetU8(PSSMHANDLE pSSM, uint8_t *pu8) +{ + SSM_ASSERT_READABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataRead(pSSM, pu8, sizeof(*pu8)); +} + + +/** + * Loads a 8-bit signed integer item from the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pi8 Where to store the item. + */ +VMMR3DECL(int) SSMR3GetS8(PSSMHANDLE pSSM, int8_t *pi8) +{ + SSM_ASSERT_READABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataRead(pSSM, pi8, sizeof(*pi8)); +} + + +/** + * Loads a 16-bit unsigned integer item from the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pu16 Where to store the item. + */ +VMMR3DECL(int) SSMR3GetU16(PSSMHANDLE pSSM, uint16_t *pu16) +{ + SSM_ASSERT_READABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataRead(pSSM, pu16, sizeof(*pu16)); +} + + +/** + * Loads a 16-bit signed integer item from the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pi16 Where to store the item. + */ +VMMR3DECL(int) SSMR3GetS16(PSSMHANDLE pSSM, int16_t *pi16) +{ + SSM_ASSERT_READABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataRead(pSSM, pi16, sizeof(*pi16)); +} + + +/** + * Loads a 32-bit unsigned integer item from the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pu32 Where to store the item. + */ +VMMR3DECL(int) SSMR3GetU32(PSSMHANDLE pSSM, uint32_t *pu32) +{ + SSM_ASSERT_READABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataRead(pSSM, pu32, sizeof(*pu32)); +} + + +/** + * Loads a 32-bit signed integer item from the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pi32 Where to store the item. + */ +VMMR3DECL(int) SSMR3GetS32(PSSMHANDLE pSSM, int32_t *pi32) +{ + SSM_ASSERT_READABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataRead(pSSM, pi32, sizeof(*pi32)); +} + + +/** + * Loads a 64-bit unsigned integer item from the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pu64 Where to store the item. + */ +VMMR3DECL(int) SSMR3GetU64(PSSMHANDLE pSSM, uint64_t *pu64) +{ + SSM_ASSERT_READABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataRead(pSSM, pu64, sizeof(*pu64)); +} + + +/** + * Loads a 64-bit signed integer item from the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pi64 Where to store the item. + */ +VMMR3DECL(int) SSMR3GetS64(PSSMHANDLE pSSM, int64_t *pi64) +{ + SSM_ASSERT_READABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataRead(pSSM, pi64, sizeof(*pi64)); +} + + +/** + * Loads a 128-bit unsigned integer item from the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pu128 Where to store the item. + */ +VMMR3DECL(int) SSMR3GetU128(PSSMHANDLE pSSM, uint128_t *pu128) +{ + SSM_ASSERT_READABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataRead(pSSM, pu128, sizeof(*pu128)); +} + + +/** + * Loads a 128-bit signed integer item from the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pi128 Where to store the item. + */ +VMMR3DECL(int) SSMR3GetS128(PSSMHANDLE pSSM, int128_t *pi128) +{ + SSM_ASSERT_READABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataRead(pSSM, pi128, sizeof(*pi128)); +} + + +/** + * Loads a VBox unsigned integer item from the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pu Where to store the integer. + */ +VMMR3DECL(int) SSMR3GetUInt(PSSMHANDLE pSSM, PRTUINT pu) +{ + SSM_ASSERT_READABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataRead(pSSM, pu, sizeof(*pu)); +} + + +/** + * Loads a VBox signed integer item from the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pi Where to store the integer. + */ +VMMR3DECL(int) SSMR3GetSInt(PSSMHANDLE pSSM, PRTINT pi) +{ + SSM_ASSERT_READABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataRead(pSSM, pi, sizeof(*pi)); +} + + +/** + * Loads a GC natural unsigned integer item from the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pu Where to store the integer. + * + * @deprecated Silly type with an incorrect size, don't use it. + */ +VMMR3DECL(int) SSMR3GetGCUInt(PSSMHANDLE pSSM, PRTGCUINT pu) +{ + AssertCompile(sizeof(RTGCPTR) == sizeof(*pu)); + return SSMR3GetGCPtr(pSSM, (PRTGCPTR)pu); +} + + +/** + * Loads a GC unsigned integer register item from the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pu Where to store the integer. + */ +VMMR3DECL(int) SSMR3GetGCUIntReg(PSSMHANDLE pSSM, PRTGCUINTREG pu) +{ + AssertCompile(sizeof(RTGCPTR) == sizeof(*pu)); + return SSMR3GetGCPtr(pSSM, (PRTGCPTR)pu); +} + + +/** + * Loads a 32 bits GC physical address item from the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pGCPhys Where to store the GC physical address. + */ +VMMR3DECL(int) SSMR3GetGCPhys32(PSSMHANDLE pSSM, PRTGCPHYS32 pGCPhys) +{ + SSM_ASSERT_READABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataRead(pSSM, pGCPhys, sizeof(*pGCPhys)); +} + + +/** + * Loads a 64 bits GC physical address item from the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pGCPhys Where to store the GC physical address. + */ +VMMR3DECL(int) SSMR3GetGCPhys64(PSSMHANDLE pSSM, PRTGCPHYS64 pGCPhys) +{ + SSM_ASSERT_READABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataRead(pSSM, pGCPhys, sizeof(*pGCPhys)); +} + + +/** + * Loads a GC physical address item from the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pGCPhys Where to store the GC physical address. + */ +VMMR3DECL(int) SSMR3GetGCPhys(PSSMHANDLE pSSM, PRTGCPHYS pGCPhys) +{ + SSM_ASSERT_READABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + + /* + * Default size? + */ + if (RT_LIKELY(sizeof(*pGCPhys) == pSSM->u.Read.cbGCPhys)) + return ssmR3DataRead(pSSM, pGCPhys, sizeof(*pGCPhys)); + + /* + * Fiddly. + */ + Assert(sizeof(*pGCPhys) == sizeof(uint64_t) || sizeof(*pGCPhys) == sizeof(uint32_t)); + Assert(pSSM->u.Read.cbGCPhys == sizeof(uint64_t) || pSSM->u.Read.cbGCPhys == sizeof(uint32_t)); + if (pSSM->u.Read.cbGCPhys == sizeof(uint64_t)) + { + /* 64-bit saved, 32-bit load: try truncate it. */ + uint64_t u64; + int rc = ssmR3DataRead(pSSM, &u64, sizeof(uint64_t)); + if (RT_FAILURE(rc)) + return rc; + if (u64 >= _4G) + return VERR_SSM_GCPHYS_OVERFLOW; + *pGCPhys = (RTGCPHYS)u64; + return rc; + } + + /* 32-bit saved, 64-bit load: clear the high part. */ + *pGCPhys = 0; + return ssmR3DataRead(pSSM, pGCPhys, sizeof(uint32_t)); +} + + +/** + * Loads a GC virtual address item from the current data unit. + * + * Only applies to in the 1.1 format: + * - SSMR3GetGCPtr + * - SSMR3GetGCUIntPtr + * - SSMR3GetGCUInt + * - SSMR3GetGCUIntReg + * + * Put functions are not affected. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param cbGCPtr Size of RTGCPTR + * + * @remarks This interface only works with saved state version 1.1, if the + * format isn't 1.1 the call will be ignored. + */ +VMMR3_INT_DECL(int) SSMR3HandleSetGCPtrSize(PSSMHANDLE pSSM, unsigned cbGCPtr) +{ + Assert(cbGCPtr == sizeof(RTGCPTR32) || cbGCPtr == sizeof(RTGCPTR64)); + if (!pSSM->u.Read.fFixedGCPtrSize) + { + Log(("SSMR3SetGCPtrSize: %u -> %u bytes\n", pSSM->u.Read.cbGCPtr, cbGCPtr)); + pSSM->u.Read.cbGCPtr = cbGCPtr; + pSSM->u.Read.fFixedGCPtrSize = true; + } + else if ( pSSM->u.Read.cbGCPtr != cbGCPtr + && pSSM->u.Read.uFmtVerMajor == 1 + && pSSM->u.Read.uFmtVerMinor == 1) + AssertMsgFailed(("SSMR3SetGCPtrSize: already fixed at %u bytes; requested %u bytes\n", pSSM->u.Read.cbGCPtr, cbGCPtr)); + + return VINF_SUCCESS; +} + + +/** + * Loads a GC virtual address item from the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pGCPtr Where to store the GC virtual address. + */ +VMMR3DECL(int) SSMR3GetGCPtr(PSSMHANDLE pSSM, PRTGCPTR pGCPtr) +{ + SSM_ASSERT_READABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + + /* + * Default size? + */ + if (RT_LIKELY(sizeof(*pGCPtr) == pSSM->u.Read.cbGCPtr)) + return ssmR3DataRead(pSSM, pGCPtr, sizeof(*pGCPtr)); + + /* + * Fiddly. + */ + Assert(sizeof(*pGCPtr) == sizeof(uint64_t) || sizeof(*pGCPtr) == sizeof(uint32_t)); + Assert(pSSM->u.Read.cbGCPtr == sizeof(uint64_t) || pSSM->u.Read.cbGCPtr == sizeof(uint32_t)); + if (pSSM->u.Read.cbGCPtr == sizeof(uint64_t)) + { + /* 64-bit saved, 32-bit load: try truncate it. */ + uint64_t u64; + int rc = ssmR3DataRead(pSSM, &u64, sizeof(uint64_t)); + if (RT_FAILURE(rc)) + return rc; + if (u64 >= _4G) + return VERR_SSM_GCPTR_OVERFLOW; + *pGCPtr = (RTGCPTR)u64; + return rc; + } + + /* 32-bit saved, 64-bit load: clear the high part. */ + *pGCPtr = 0; + return ssmR3DataRead(pSSM, pGCPtr, sizeof(uint32_t)); +} + + +/** + * Loads a GC virtual address (represented as unsigned integer) item from the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pGCPtr Where to store the GC virtual address. + */ +VMMR3DECL(int) SSMR3GetGCUIntPtr(PSSMHANDLE pSSM, PRTGCUINTPTR pGCPtr) +{ + AssertCompile(sizeof(RTGCPTR) == sizeof(*pGCPtr)); + return SSMR3GetGCPtr(pSSM, (PRTGCPTR)pGCPtr); +} + + +/** + * Loads an RC virtual address item from the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pRCPtr Where to store the RC virtual address. + */ +VMMR3DECL(int) SSMR3GetRCPtr(PSSMHANDLE pSSM, PRTRCPTR pRCPtr) +{ + SSM_ASSERT_READABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataRead(pSSM, pRCPtr, sizeof(*pRCPtr)); +} + + +/** + * Loads a I/O port address item from the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pIOPort Where to store the I/O port address. + */ +VMMR3DECL(int) SSMR3GetIOPort(PSSMHANDLE pSSM, PRTIOPORT pIOPort) +{ + SSM_ASSERT_READABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataRead(pSSM, pIOPort, sizeof(*pIOPort)); +} + + +/** + * Loads a selector item from the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pSel Where to store the selector. + */ +VMMR3DECL(int) SSMR3GetSel(PSSMHANDLE pSSM, PRTSEL pSel) +{ + SSM_ASSERT_READABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataRead(pSSM, pSel, sizeof(*pSel)); +} + + +/** + * Loads a memory item from the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param pv Where to store the item. + * @param cb Size of the item. + */ +VMMR3DECL(int) SSMR3GetMem(PSSMHANDLE pSSM, void *pv, size_t cb) +{ + SSM_ASSERT_READABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + return ssmR3DataRead(pSSM, pv, cb); +} + + +/** + * Loads a string item from the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param psz Where to store the item. + * @param cbMax Max size of the item (including '\\0'). + */ +VMMR3DECL(int) SSMR3GetStrZ(PSSMHANDLE pSSM, char *psz, size_t cbMax) +{ + return SSMR3GetStrZEx(pSSM, psz, cbMax, NULL); +} + + +/** + * Loads a string item from the current data unit. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + * @param psz Where to store the item. + * @param cbMax Max size of the item (including '\\0'). + * @param pcbStr The length of the loaded string excluding the '\\0'. (optional) + */ +VMMR3DECL(int) SSMR3GetStrZEx(PSSMHANDLE pSSM, char *psz, size_t cbMax, size_t *pcbStr) +{ + SSM_ASSERT_READABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + + /* read size prefix. */ + uint32_t u32; + int rc = SSMR3GetU32(pSSM, &u32); + if (RT_SUCCESS(rc)) + { + if (pcbStr) + *pcbStr = u32; + if (u32 < cbMax) + { + /* terminate and read string content. */ + psz[u32] = '\0'; + return ssmR3DataRead(pSSM, psz, u32); + } + return VERR_TOO_MUCH_DATA; + } + return rc; +} + + +/** + * Skips a number of bytes in the current data unit. + * + * @returns VBox status code. + * @param pSSM The SSM handle. + * @param cb The number of bytes to skip. + */ +VMMR3DECL(int) SSMR3Skip(PSSMHANDLE pSSM, size_t cb) +{ + SSM_ASSERT_READABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + while (cb > 0) + { + uint8_t abBuf[8192]; + size_t cbCur = RT_MIN(sizeof(abBuf), cb); + cb -= cbCur; + int rc = ssmR3DataRead(pSSM, abBuf, cbCur); + if (RT_FAILURE(rc)) + return rc; + } + + return VINF_SUCCESS; +} + + +/** + * Skips to the end of the current data unit. + * + * Since version 2 of the format, the load exec callback have to explicitly call + * this API if it wish to be lazy for some reason. This is because there seldom + * is a good reason to not read your entire data unit and it was hiding bugs. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + */ +VMMR3DECL(int) SSMR3SkipToEndOfUnit(PSSMHANDLE pSSM) +{ + SSM_ASSERT_READABLE_RET(pSSM); + SSM_CHECK_CANCELLED_RET(pSSM); + if (pSSM->u.Read.uFmtVerMajor >= 2) + { + /* + * Read until we the end of data condition is raised. + */ + pSSM->u.Read.cbDataBuffer = 0; + pSSM->u.Read.offDataBuffer = 0; + if (!pSSM->u.Read.fEndOfData) + { + do + { + /* read the rest of the current record */ + while (pSSM->u.Read.cbRecLeft) + { + uint8_t abBuf[8192]; + uint32_t cbToRead = RT_MIN(pSSM->u.Read.cbRecLeft, sizeof(abBuf)); + int rc = ssmR3DataReadV2Raw(pSSM, abBuf, cbToRead); + if (RT_FAILURE(rc)) + return pSSM->rc = rc; + pSSM->u.Read.cbRecLeft -= cbToRead; + } + + /* read the next header. */ + int rc = ssmR3DataReadRecHdrV2(pSSM); + if (RT_FAILURE(rc)) + return pSSM->rc = rc; + } while (!pSSM->u.Read.fEndOfData); + } + } + /* else: Doesn't matter for the version 1 loading. */ + + return VINF_SUCCESS; +} + + +/** + * Calculate the checksum of a file portion. + * + * @returns VBox status code. + * @param pStrm The stream handle + * @param off Where to start checksumming. + * @param cb How much to checksum. + * @param pu32CRC Where to store the calculated checksum. + */ +static int ssmR3CalcChecksum(PSSMSTRM pStrm, uint64_t off, uint64_t cb, uint32_t *pu32CRC) +{ + /* + * Allocate a buffer. + */ + const size_t cbBuf = _32K; + void *pvBuf = RTMemTmpAlloc(cbBuf); + if (!pvBuf) + return VERR_NO_TMP_MEMORY; + + /* + * Loop reading and calculating CRC32. + */ + int rc = VINF_SUCCESS; + uint32_t u32CRC = RTCrc32Start(); + while (cb > 0) + { + /* read chunk */ + size_t cbToRead = cbBuf; + if (cb < cbBuf) + cbToRead = cb; + rc = ssmR3StrmPeekAt(pStrm, off, pvBuf, cbToRead, NULL); + if (RT_FAILURE(rc)) + { + AssertMsgFailed(("Failed with rc=%Rrc while calculating crc.\n", rc)); + RTMemTmpFree(pvBuf); + return rc; + } + + /* advance */ + cb -= cbToRead; + off += cbToRead; + + /* calc crc32. */ + u32CRC = RTCrc32Process(u32CRC, pvBuf, cbToRead); + } + RTMemTmpFree(pvBuf); + + /* store the calculated crc */ + u32CRC = RTCrc32Finish(u32CRC); + Log(("SSM: u32CRC=0x%08x\n", u32CRC)); + *pu32CRC = u32CRC; + + return VINF_SUCCESS; +} + + +/** + * Validates a version 2 footer. + * + * @returns VBox status code. + * + * @param pFooter The footer. + * @param offFooter The stream offset of the footer. + * @param cDirEntries The number of directory entries. UINT32_MAX if + * unknown. + * @param fStreamCrc32 Whether the stream is checksummed using CRC-32. + * @param u32StreamCRC The stream checksum. + */ +static int ssmR3ValidateFooter(PSSMFILEFTR pFooter, uint64_t offFooter, uint32_t cDirEntries, bool fStreamCrc32, uint32_t u32StreamCRC) +{ + if (memcmp(pFooter->szMagic, SSMFILEFTR_MAGIC, sizeof(pFooter->szMagic))) + { + LogRel(("SSM: Bad footer magic: %.*Rhxs\n", sizeof(pFooter->szMagic), &pFooter->szMagic[0])); + return VERR_SSM_INTEGRITY_FOOTER; + } + SSM_CHECK_CRC32_RET(pFooter, sizeof(*pFooter), ("Footer CRC mismatch: %08x, correct is %08x\n", u32CRC, u32ActualCRC)); + if (pFooter->offStream != offFooter) + { + LogRel(("SSM: SSMFILEFTR::offStream is wrong: %llx, expected %llx\n", pFooter->offStream, offFooter)); + return VERR_SSM_INTEGRITY_FOOTER; + } + if (pFooter->u32Reserved) + { + LogRel(("SSM: Reserved footer field isn't zero: %08x\n", pFooter->u32Reserved)); + return VERR_SSM_INTEGRITY_FOOTER; + } + if (cDirEntries != UINT32_MAX) + AssertLogRelMsgReturn(pFooter->cDirEntries == cDirEntries, + ("Footer: cDirEntries=%#x, expected %#x\n", pFooter->cDirEntries, cDirEntries), + VERR_SSM_INTEGRITY_FOOTER); + else + AssertLogRelMsgReturn(pFooter->cDirEntries < _64K, + ("Footer: cDirEntries=%#x\n", pFooter->cDirEntries), + VERR_SSM_INTEGRITY_FOOTER); + if ( !fStreamCrc32 + && pFooter->u32StreamCRC) + { + LogRel(("SSM: u32StreamCRC field isn't zero, but header says stream checksumming is disabled.\n")); + return VERR_SSM_INTEGRITY_FOOTER; + } + if ( fStreamCrc32 + && pFooter->u32StreamCRC != u32StreamCRC) + { + LogRel(("SSM: Bad stream CRC: %#x, expected %#x.\n", pFooter->u32StreamCRC, u32StreamCRC)); + return VERR_SSM_INTEGRITY_CRC; + } + return VINF_SUCCESS; +} + + +/** + * Validates the header information stored in the handle. + * + * @returns VBox status code. + * + * @param pSSM The handle. + * @param fHaveHostBits Set if the host bits field is valid. + * @param fHaveVersion Set if we have a version. + */ +static int ssmR3ValidateHeaderInfo(PSSMHANDLE pSSM, bool fHaveHostBits, bool fHaveVersion) +{ + Assert(pSSM->u.Read.cbFileHdr < 256 && pSSM->u.Read.cbFileHdr > 32); + Assert(pSSM->u.Read.uFmtVerMajor == 1 || pSSM->u.Read.uFmtVerMajor == 2); + Assert(pSSM->u.Read.uFmtVerMinor <= 2); + + if (fHaveVersion) + { + if ( pSSM->u.Read.u16VerMajor == 0 + || pSSM->u.Read.u16VerMajor > 1000 + || pSSM->u.Read.u16VerMinor > 1000 + || pSSM->u.Read.u32VerBuild > _1M + || pSSM->u.Read.u32SvnRev == 0 + || pSSM->u.Read.u32SvnRev > 10000000 /*100M*/) + { + LogRel(("SSM: Incorrect version values: %u.%u.%u.r%u\n", + pSSM->u.Read.u16VerMajor, pSSM->u.Read.u16VerMinor, pSSM->u.Read.u32VerBuild, pSSM->u.Read.u32SvnRev)); + return VERR_SSM_INTEGRITY_VBOX_VERSION; + } + } + else + AssertLogRelReturn( pSSM->u.Read.u16VerMajor == 0 + && pSSM->u.Read.u16VerMinor == 0 + && pSSM->u.Read.u32VerBuild == 0 + && pSSM->u.Read.u32SvnRev == 0, + VERR_SSM_INTEGRITY_VBOX_VERSION); + + if (fHaveHostBits) + { + if ( pSSM->u.Read.cHostBits != 32 + && pSSM->u.Read.cHostBits != 64) + { + LogRel(("SSM: Incorrect cHostBits value: %u\n", pSSM->u.Read.cHostBits)); + return VERR_SSM_INTEGRITY_HEADER; + } + } + else + AssertLogRelReturn(pSSM->u.Read.cHostBits == 0, VERR_SSM_INTEGRITY_HEADER); + + if ( pSSM->u.Read.cbGCPhys != sizeof(uint32_t) + && pSSM->u.Read.cbGCPhys != sizeof(uint64_t)) + { + LogRel(("SSM: Incorrect cbGCPhys value: %d\n", pSSM->u.Read.cbGCPhys)); + return VERR_SSM_INTEGRITY_HEADER; + } + if ( pSSM->u.Read.cbGCPtr != sizeof(uint32_t) + && pSSM->u.Read.cbGCPtr != sizeof(uint64_t)) + { + LogRel(("SSM: Incorrect cbGCPtr value: %d\n", pSSM->u.Read.cbGCPtr)); + return VERR_SSM_INTEGRITY_HEADER; + } + + return VINF_SUCCESS; +} + + +/** + * Reads the header, detects the format version and performs integrity + * validations. + * + * @returns VBox status code. + * @param pSSM The saved state handle. A number of field will + * be updated, mostly header related information. + * fLiveSave is also set if appropriate. + * @param fChecksumIt Whether to checksum the file or not. This will + * be ignored if it the stream isn't a file. + * @param fChecksumOnRead Whether to validate the checksum while reading + * the stream instead of up front. If not possible, + * verify the checksum up front. + */ +static int ssmR3HeaderAndValidate(PSSMHANDLE pSSM, bool fChecksumIt, bool fChecksumOnRead) +{ + /* + * Read and check the header magic. + */ + union + { + SSMFILEHDR v2_0; + SSMFILEHDRV12 v1_2; + SSMFILEHDRV11 v1_1; + } uHdr; + int rc = ssmR3StrmRead(&pSSM->Strm, &uHdr, sizeof(uHdr.v2_0.szMagic)); + if (RT_FAILURE(rc)) + { + LogRel(("SSM: Failed to read file magic header. rc=%Rrc\n", rc)); + return rc; + } + if (memcmp(uHdr.v2_0.szMagic, SSMFILEHDR_MAGIC_BASE, sizeof(SSMFILEHDR_MAGIC_BASE) - 1)) + { + Log(("SSM: Not a saved state file. magic=%.*s\n", sizeof(uHdr.v2_0.szMagic) - 1, uHdr.v2_0.szMagic)); + return VERR_SSM_INTEGRITY_MAGIC; + } + + /* + * Find the header size and read the rest. + */ + static const struct + { + char szMagic[sizeof(SSMFILEHDR_MAGIC_V2_0)]; + uint32_t cbHdr; + unsigned uFmtVerMajor; + unsigned uFmtVerMinor; + } s_aVers[] = + { + { SSMFILEHDR_MAGIC_V2_0, sizeof(SSMFILEHDR), 2, 0 }, + { SSMFILEHDR_MAGIC_V1_2, sizeof(SSMFILEHDRV12), 1, 2 }, + { SSMFILEHDR_MAGIC_V1_1, sizeof(SSMFILEHDRV11), 1, 1 }, + }; + int iVer = RT_ELEMENTS(s_aVers); + while (iVer-- > 0) + if (!memcmp(uHdr.v2_0.szMagic, s_aVers[iVer].szMagic, sizeof(uHdr.v2_0.szMagic))) + break; + if (iVer < 0) + { + Log(("SSM: Unknown file format version. magic=%.*s\n", sizeof(uHdr.v2_0.szMagic) - 1, uHdr.v2_0.szMagic)); + return VERR_SSM_INTEGRITY_VERSION; + } + pSSM->u.Read.uFmtVerMajor = s_aVers[iVer].uFmtVerMajor; + pSSM->u.Read.uFmtVerMinor = s_aVers[iVer].uFmtVerMinor; + pSSM->u.Read.cbFileHdr = s_aVers[iVer].cbHdr; + + rc = ssmR3StrmRead(&pSSM->Strm, (uint8_t *)&uHdr + sizeof(uHdr.v2_0.szMagic), pSSM->u.Read.cbFileHdr - sizeof(uHdr.v2_0.szMagic)); + if (RT_FAILURE(rc)) + { + LogRel(("SSM: Failed to read the file header. rc=%Rrc\n", rc)); + return rc; + } + + /* + * Make version specific adjustments. + */ + if (pSSM->u.Read.uFmtVerMajor >= 2) + { + /* + * Version 2.0 and later. + */ + if (pSSM->u.Read.uFmtVerMinor == 0) + { + /* validate the header. */ + SSM_CHECK_CRC32_RET(&uHdr.v2_0, sizeof(uHdr.v2_0), ("Header CRC mismatch: %08x, correct is %08x\n", u32CRC, u32ActualCRC)); + if (uHdr.v2_0.u8Reserved) + { + LogRel(("SSM: Reserved header field isn't zero: %02x\n", uHdr.v2_0.u8Reserved)); + return VERR_SSM_INTEGRITY; + } + if (uHdr.v2_0.fFlags & ~(SSMFILEHDR_FLAGS_STREAM_CRC32 | SSMFILEHDR_FLAGS_STREAM_LIVE_SAVE)) + { + LogRel(("SSM: Unknown header flags: %08x\n", uHdr.v2_0.fFlags)); + return VERR_SSM_INTEGRITY; + } + if ( uHdr.v2_0.cbMaxDecompr > sizeof(pSSM->u.Read.abDataBuffer) + || uHdr.v2_0.cbMaxDecompr < _1K + || (uHdr.v2_0.cbMaxDecompr & 0xff) != 0) + { + LogRel(("SSM: The cbMaxDecompr header field is out of range: %#x\n", uHdr.v2_0.cbMaxDecompr)); + return VERR_SSM_INTEGRITY; + } + + /* set the header info. */ + pSSM->u.Read.cHostBits = uHdr.v2_0.cHostBits; + pSSM->u.Read.u16VerMajor = uHdr.v2_0.u16VerMajor; + pSSM->u.Read.u16VerMinor = uHdr.v2_0.u16VerMinor; + pSSM->u.Read.u32VerBuild = uHdr.v2_0.u32VerBuild; + pSSM->u.Read.u32SvnRev = uHdr.v2_0.u32SvnRev; + pSSM->u.Read.cbGCPhys = uHdr.v2_0.cbGCPhys; + pSSM->u.Read.cbGCPtr = uHdr.v2_0.cbGCPtr; + pSSM->u.Read.fFixedGCPtrSize= true; + pSSM->u.Read.fStreamCrc32 = !!(uHdr.v2_0.fFlags & SSMFILEHDR_FLAGS_STREAM_CRC32); + pSSM->fLiveSave = !!(uHdr.v2_0.fFlags & SSMFILEHDR_FLAGS_STREAM_LIVE_SAVE); + } + else + AssertFailedReturn(VERR_SSM_IPE_2); + if (!pSSM->u.Read.fStreamCrc32) + ssmR3StrmDisableChecksumming(&pSSM->Strm); + + /* + * Read and validate the footer if it's a file. + */ + if (ssmR3StrmIsFile(&pSSM->Strm)) + { + SSMFILEFTR Footer; + uint64_t offFooter; + rc = ssmR3StrmPeekAt(&pSSM->Strm, -(RTFOFF)sizeof(SSMFILEFTR), &Footer, sizeof(Footer), &offFooter); + AssertLogRelRCReturn(rc, rc); + + rc = ssmR3ValidateFooter(&Footer, offFooter, UINT32_MAX, pSSM->u.Read.fStreamCrc32, Footer.u32StreamCRC); + if (RT_FAILURE(rc)) + return rc; + + pSSM->u.Read.cbLoadFile = offFooter + sizeof(Footer); + pSSM->u.Read.u32LoadCRC = Footer.u32StreamCRC; + } + else + { + pSSM->u.Read.cbLoadFile = UINT64_MAX; + pSSM->u.Read.u32LoadCRC = 0; + } + + /* + * Validate the header info we've set in the handle. + */ + rc = ssmR3ValidateHeaderInfo(pSSM, true /*fHaveHostBits*/, true /*fHaveVersion*/); + if (RT_FAILURE(rc)) + return rc; + + /* + * Check the checksum if that's called for and possible. + */ + if ( pSSM->u.Read.fStreamCrc32 + && fChecksumIt + && !fChecksumOnRead + && ssmR3StrmIsFile(&pSSM->Strm)) + { + uint32_t u32CRC; + rc = ssmR3CalcChecksum(&pSSM->Strm, 0, pSSM->u.Read.cbLoadFile - sizeof(SSMFILEFTR), &u32CRC); + if (RT_FAILURE(rc)) + return rc; + if (u32CRC != pSSM->u.Read.u32LoadCRC) + { + LogRel(("SSM: Invalid CRC! Calculated %#010x, in footer %#010x\n", u32CRC, pSSM->u.Read.u32LoadCRC)); + return VERR_SSM_INTEGRITY_CRC; + } + } + } + else + { + /* + * Version 1.x of the format. + */ + bool fHaveHostBits = true; + bool fHaveVersion = false; + RTUUID MachineUuidFromHdr; + + ssmR3StrmDisableChecksumming(&pSSM->Strm); + if (pSSM->u.Read.uFmtVerMinor == 1) + { + pSSM->u.Read.cHostBits = 0; /* unknown */ + pSSM->u.Read.u16VerMajor = 0; + pSSM->u.Read.u16VerMinor = 0; + pSSM->u.Read.u32VerBuild = 0; + pSSM->u.Read.u32SvnRev = 0; + pSSM->u.Read.cbLoadFile = uHdr.v1_1.cbFile; + pSSM->u.Read.u32LoadCRC = uHdr.v1_1.u32CRC; + pSSM->u.Read.cbGCPhys = sizeof(RTGCPHYS); + pSSM->u.Read.cbGCPtr = sizeof(RTGCPTR); + pSSM->u.Read.fFixedGCPtrSize = false; /* settable */ + pSSM->u.Read.fStreamCrc32 = false; + + MachineUuidFromHdr = uHdr.v1_1.MachineUuid; + fHaveHostBits = false; + } + else if (pSSM->u.Read.uFmtVerMinor == 2) + { + pSSM->u.Read.cHostBits = uHdr.v1_2.cHostBits; + pSSM->u.Read.u16VerMajor = uHdr.v1_2.u16VerMajor; + pSSM->u.Read.u16VerMinor = uHdr.v1_2.u16VerMinor; + pSSM->u.Read.u32VerBuild = uHdr.v1_2.u32VerBuild; + pSSM->u.Read.u32SvnRev = uHdr.v1_2.u32SvnRev; + pSSM->u.Read.cbLoadFile = uHdr.v1_2.cbFile; + pSSM->u.Read.u32LoadCRC = uHdr.v1_2.u32CRC; + pSSM->u.Read.cbGCPhys = uHdr.v1_2.cbGCPhys; + pSSM->u.Read.cbGCPtr = uHdr.v1_2.cbGCPtr; + pSSM->u.Read.fFixedGCPtrSize = true; + pSSM->u.Read.fStreamCrc32 = false; + + MachineUuidFromHdr = uHdr.v1_2.MachineUuid; + fHaveVersion = true; + } + else + AssertFailedReturn(VERR_SSM_IPE_1); + + /* + * The MachineUuid must be NULL (was never used). + */ + if (!RTUuidIsNull(&MachineUuidFromHdr)) + { + LogRel(("SSM: The UUID of the saved state doesn't match the running VM.\n")); + return VERR_SMM_INTEGRITY_MACHINE; + } + + /* + * Verify the file size. + */ + uint64_t cbFile = ssmR3StrmGetSize(&pSSM->Strm); + if (cbFile != pSSM->u.Read.cbLoadFile) + { + LogRel(("SSM: File size mismatch. hdr.cbFile=%lld actual %lld\n", pSSM->u.Read.cbLoadFile, cbFile)); + return VERR_SSM_INTEGRITY_SIZE; + } + + /* + * Validate the header info we've set in the handle. + */ + rc = ssmR3ValidateHeaderInfo(pSSM, fHaveHostBits, fHaveVersion); + if (RT_FAILURE(rc)) + return rc; + + /* + * Verify the checksum if requested. + * + * Note! The checksum is not actually generated for the whole file, + * this is of course a bug in the v1.x code that we cannot do + * anything about. + */ + if ( fChecksumIt + || fChecksumOnRead) + { + uint32_t u32CRC; + rc = ssmR3CalcChecksum(&pSSM->Strm, + RT_UOFFSETOF(SSMFILEHDRV11, u32CRC) + sizeof(uHdr.v1_1.u32CRC), + cbFile - pSSM->u.Read.cbFileHdr, + &u32CRC); + if (RT_FAILURE(rc)) + return rc; + if (u32CRC != pSSM->u.Read.u32LoadCRC) + { + LogRel(("SSM: Invalid CRC! Calculated %#010x, in header %#010x\n", u32CRC, pSSM->u.Read.u32LoadCRC)); + return VERR_SSM_INTEGRITY_CRC; + } + } + } + + return VINF_SUCCESS; +} + + +/** + * Open a saved state for reading. + * + * The file will be positioned at the first data unit upon successful return. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pszFilename The filename. NULL if pStreamOps is used. + * @param pStreamOps The stream method table. NULL if pszFilename is + * used. + * @param pvUser The user argument to the stream methods. + * @param fChecksumIt Check the checksum for the entire file. + * @param fChecksumOnRead Whether to validate the checksum while reading + * the stream instead of up front. If not possible, + * verify the checksum up front. + * @param pSSM Pointer to the handle structure. This will be + * completely initialized on success. + * @param cBuffers The number of stream buffers. + */ +static int ssmR3OpenFile(PVM pVM, const char *pszFilename, PCSSMSTRMOPS pStreamOps, void *pvUser, + bool fChecksumIt, bool fChecksumOnRead, uint32_t cBuffers, PSSMHANDLE pSSM) +{ + /* + * Initialize the handle. + */ + pSSM->pVM = pVM; + pSSM->enmOp = SSMSTATE_INVALID; + pSSM->enmAfter = SSMAFTER_INVALID; + pSSM->fCancelled = SSMHANDLE_OK; + pSSM->rc = VINF_SUCCESS; + pSSM->cbUnitLeftV1 = 0; + pSSM->offUnit = UINT64_MAX; + pSSM->offUnitUser = UINT64_MAX; + pSSM->fLiveSave = false; + pSSM->pfnProgress = NULL; + pSSM->pvUser = NULL; + pSSM->uPercent = 0; + pSSM->offEstProgress = 0; + pSSM->cbEstTotal = 0; + pSSM->offEst = 0; + pSSM->offEstUnitEnd = 0; + pSSM->uPercentLive = 0; + pSSM->uPercentPrepare = 5; + pSSM->uPercentDone = 2; + pSSM->uReportedLivePercent = 0; + pSSM->pszFilename = pszFilename; + + pSSM->u.Read.pZipDecompV1 = NULL; + pSSM->u.Read.uFmtVerMajor = UINT32_MAX; + pSSM->u.Read.uFmtVerMinor = UINT32_MAX; + pSSM->u.Read.cbFileHdr = UINT32_MAX; + pSSM->u.Read.cbGCPhys = UINT8_MAX; + pSSM->u.Read.cbGCPtr = UINT8_MAX; + pSSM->u.Read.fFixedGCPtrSize= false; + pSSM->u.Read.fIsHostMsc32 = SSM_HOST_IS_MSC_32; + RT_ZERO(pSSM->u.Read.szHostOSAndArch); + pSSM->u.Read.u16VerMajor = UINT16_MAX; + pSSM->u.Read.u16VerMinor = UINT16_MAX; + pSSM->u.Read.u32VerBuild = UINT32_MAX; + pSSM->u.Read.u32SvnRev = UINT32_MAX; + pSSM->u.Read.cHostBits = UINT8_MAX; + pSSM->u.Read.cbLoadFile = UINT64_MAX; + + pSSM->u.Read.cbRecLeft = 0; + pSSM->u.Read.cbDataBuffer = 0; + pSSM->u.Read.offDataBuffer = 0; + pSSM->u.Read.fEndOfData = 0; + pSSM->u.Read.u8TypeAndFlags = 0; + + pSSM->u.Read.pCurUnit = NULL; + pSSM->u.Read.uCurUnitVer = UINT32_MAX; + pSSM->u.Read.uCurUnitPass = 0; + pSSM->u.Read.fHaveSetError = false; + + /* + * Try open and validate the file. + */ + int rc; + if (pStreamOps) + rc = ssmR3StrmInit(&pSSM->Strm, pStreamOps, pvUser, false /*fWrite*/, fChecksumOnRead, cBuffers); + else + rc = ssmR3StrmOpenFile(&pSSM->Strm, pszFilename, false /*fWrite*/, fChecksumOnRead, cBuffers); + if (RT_SUCCESS(rc)) + { + rc = ssmR3HeaderAndValidate(pSSM, fChecksumIt, fChecksumOnRead); + if (RT_SUCCESS(rc)) + return rc; + + /* failure path */ + ssmR3StrmClose(&pSSM->Strm, pSSM->rc == VERR_SSM_CANCELLED); + } + else + Log(("SSM: Failed to open save state file '%s', rc=%Rrc.\n", pszFilename, rc)); + return rc; +} + + +/** + * Verifies the directory. + * + * @returns VBox status code. + * + * @param pDir The full directory. + * @param cbDir The size of the directory. + * @param offDir The directory stream offset. + * @param cDirEntries The directory entry count from the footer. + * @param cbHdr The header size. + * @param uSvnRev The SVN revision that saved the state. Bug detection. + */ +static int ssmR3ValidateDirectory(PSSMFILEDIR pDir, size_t cbDir, uint64_t offDir, uint32_t cDirEntries, + uint32_t cbHdr, uint32_t uSvnRev) +{ + AssertLogRelReturn(!memcmp(pDir->szMagic, SSMFILEDIR_MAGIC, sizeof(pDir->szMagic)), VERR_SSM_INTEGRITY_DIR_MAGIC); + SSM_CHECK_CRC32_RET(pDir, cbDir, ("Bad directory CRC: %08x, actual %08x\n", u32CRC, u32ActualCRC)); + AssertLogRelMsgReturn(pDir->cEntries == cDirEntries, + ("Bad directory entry count: %#x, expected %#x (from the footer)\n", pDir->cEntries, cDirEntries), + VERR_SSM_INTEGRITY_DIR); + AssertLogRelReturn(RT_UOFFSETOF_DYN(SSMFILEDIR, aEntries[pDir->cEntries]) == cbDir, VERR_SSM_INTEGRITY_DIR); + + for (uint32_t i = 0; i < pDir->cEntries; i++) + { + AssertLogRelMsgReturn( ( pDir->aEntries[i].off >= cbHdr + && pDir->aEntries[i].off < offDir) + || ( pDir->aEntries[i].off == 0 /* bug in unreleased code */ + && uSvnRev < 53365), + ("off=%#llx cbHdr=%#x offDir=%#llx\n", pDir->aEntries[i].off, cbHdr, offDir), + VERR_SSM_INTEGRITY_DIR); + } + return VINF_SUCCESS; +} + +#ifndef SSM_STANDALONE + +/** + * LogRel the unit content. + * + * @param pSSM The save state handle. + * @param pUnitHdr The unit head (for cbName). + * @param offUnit The offset of the unit header. + * @param offStart Where to start. + * @param offEnd Where to end. + */ +static void ssmR3StrmLogUnitContent(PSSMHANDLE pSSM, SSMFILEUNITHDRV2 const *pUnitHdr, uint64_t offUnit, + uint64_t offStart, uint64_t offEnd) +{ + /* + * Stop the I/O thread (if present). + */ + ssmR3StrmStopIoThread(&pSSM->Strm); + + /* + * Save the current status, resetting it so we can read + log the unit bytes. + */ + int rcSaved = pSSM->rc; + pSSM->rc = VINF_SUCCESS; + + /* + * Reverse back to the start of the unit if we can. + */ + uint32_t cbUnitHdr = RT_UOFFSETOF_DYN(SSMFILEUNITHDRV2, szName[pUnitHdr->cbName]); + int rc = ssmR3StrmSeek(&pSSM->Strm, offUnit/* + cbUnitHdr*/, RTFILE_SEEK_BEGIN, pUnitHdr->u32CurStreamCRC); + if (RT_SUCCESS(rc)) + { + SSMFILEUNITHDRV2 UnitHdr2; + rc = ssmR3StrmRead(&pSSM->Strm, &UnitHdr2, cbUnitHdr); + if ( RT_SUCCESS(rc) + && memcmp(&UnitHdr2, pUnitHdr, cbUnitHdr) == 0) + { + pSSM->u.Read.cbDataBuffer = 0; /* avoid assertions */ + pSSM->u.Read.cbRecLeft = 0; + ssmR3DataReadBeginV2(pSSM); + + /* + * Read the unit, dumping the requested bits. + */ + uint8_t cbLine = 0; + uint8_t abLine[16]; + uint64_t offCur = 0; + offStart &= ~(uint64_t)(sizeof(abLine) - 1); + Assert(offStart < offEnd); + LogRel(("SSM: Unit '%s' contents:\n", pUnitHdr->szName)); + + do + { + /* + * Read the next 16 bytes into abLine. We have to take some care to + * get all the bytes in the unit, since we don't really know its size. + */ + while ( cbLine < sizeof(abLine) + && !pSSM->u.Read.fEndOfData + && RT_SUCCESS(pSSM->rc)) + { + uint32_t cbToRead = sizeof(abLine) - cbLine; + if (cbToRead > 1) + { + int32_t cbInBuffer = pSSM->u.Read.cbDataBuffer - pSSM->u.Read.offDataBuffer; + if ((int32_t)cbToRead > cbInBuffer) + { + if (cbInBuffer > 0) + cbToRead = cbInBuffer; + else if (pSSM->u.Read.cbRecLeft) + cbToRead = 1; + else + { + rc = ssmR3DataReadRecHdrV2(pSSM); + if (RT_FAILURE(rc)) + { + pSSM->rc = rc; + break; + } + if (pSSM->u.Read.fEndOfData) + break; + } + } + } + rc = ssmR3DataRead(pSSM, &abLine[cbLine], cbToRead); + if (RT_SUCCESS(rc)) + cbLine += cbToRead; + else + break; + } + + /* + * Display the bytes if in the requested range. + */ + if ( offCur >= offStart + && offCur <= offEnd) + { + char szLine[132]; + char *pchDst = szLine; + uint8_t offSrc = 0; + while (offSrc < cbLine) + { + static char const s_szHex[17] = "0123456789abcdef"; + uint8_t const b = abLine[offSrc++]; + *pchDst++ = s_szHex[b >> 4]; + *pchDst++ = s_szHex[b & 0xf]; + *pchDst++ = offSrc != 8 ? ' ' : '-'; + } + while (offSrc < sizeof(abLine)) + { + *pchDst++ = ' '; + *pchDst++ = ' '; + *pchDst++ = offSrc != 7 ? ' ' : '-'; + offSrc++; + } + *pchDst++ = ' '; + + offSrc = 0; + while (offSrc < cbLine) + { + char const ch = (int8_t)abLine[offSrc++]; + if (ch < 0x20 || ch >= 0x7f) + *pchDst++ = '.'; + else + *pchDst++ = ch; + } + *pchDst = '\0'; + Assert((uintptr_t)(pchDst - &szLine[0]) < sizeof(szLine)); + Assert(strchr(szLine, '\0') == pchDst); + + LogRel(("%#010llx: %s\n", offCur, szLine)); + } + offCur += cbLine; + cbLine = 0; + } while ( !pSSM->u.Read.fEndOfData + && RT_SUCCESS(pSSM->rc)); + LogRel(("SSM: offCur=%#llx fEndOfData=%d (rc=%Rrc)\n", offCur, pSSM->u.Read.fEndOfData, rc)); + } + else if (RT_SUCCESS(rc)) + LogRel(("SSM: Cannot dump unit - mismatching unit head\n")); + else + LogRel(("SSM: Cannot dump unit - unit header read error: %Rrc\n", rc)); + } + else + LogRel(("SSM: Cannot dump unit - ssmR3StrmSeek error: %Rrc\n", rc)); + + pSSM->rc = rcSaved; +} + + +/** + * Find a data unit by name. + * + * @returns Pointer to the unit. + * @returns NULL if not found. + * + * @param pVM The cross context VM structure. + * @param pszName Data unit name. + * @param uInstance The data unit instance id. + */ +static PSSMUNIT ssmR3Find(PVM pVM, const char *pszName, uint32_t uInstance) +{ + size_t cchName = strlen(pszName); + PSSMUNIT pUnit = pVM->ssm.s.pHead; + while ( pUnit + && ( pUnit->u32Instance != uInstance + || pUnit->cchName != cchName + || memcmp(pUnit->szName, pszName, cchName))) + pUnit = pUnit->pNext; + return pUnit; +} + + +/** + * Executes the loading of a V1.X file. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + */ +static int ssmR3LoadExecV1(PVM pVM, PSSMHANDLE pSSM) +{ + int rc; + char *pszName = NULL; + size_t cchName = 0; + pSSM->enmOp = SSMSTATE_LOAD_EXEC; + for (;;) + { + /* + * Save the current file position and read the data unit header. + */ + uint64_t offUnit = ssmR3StrmTell(&pSSM->Strm); + SSMFILEUNITHDRV1 UnitHdr; + rc = ssmR3StrmRead(&pSSM->Strm, &UnitHdr, RT_UOFFSETOF(SSMFILEUNITHDRV1, szName)); + if (RT_SUCCESS(rc)) + { + /* + * Check the magic and see if it's valid and whether it is a end header or not. + */ + if (memcmp(&UnitHdr.achMagic[0], SSMFILEUNITHDR_MAGIC, sizeof(SSMFILEUNITHDR_MAGIC))) + { + if (!memcmp(&UnitHdr.achMagic[0], SSMFILEUNITHDR_END, sizeof(SSMFILEUNITHDR_END))) + { + Log(("SSM: EndOfFile: offset %#9llx size %9d\n", offUnit, UnitHdr.cbUnit)); + /* Complete the progress bar (pending 99% afterwards). */ + ssmR3ProgressByByte(pSSM, pSSM->cbEstTotal - pSSM->offEst); + break; + } + LogRel(("SSM: Invalid unit magic at offset %#llx (%lld), '%.*s'!\n", + offUnit, offUnit, sizeof(UnitHdr.achMagic) - 1, &UnitHdr.achMagic[0])); + rc = VERR_SSM_INTEGRITY_UNIT_MAGIC; + break; + } + + /* + * Read the name. + * Adjust the name buffer first. + */ + if (cchName < UnitHdr.cchName) + { + if (pszName) + RTMemTmpFree(pszName); + cchName = RT_ALIGN_Z(UnitHdr.cchName, 64); + pszName = (char *)RTMemTmpAlloc(cchName); + } + if (pszName) + { + rc = ssmR3StrmRead(&pSSM->Strm, pszName, UnitHdr.cchName); + if (RT_SUCCESS(rc)) + { + if (pszName[UnitHdr.cchName - 1]) + { + LogRel(("SSM: Unit name '%.*s' was not properly terminated.\n", UnitHdr.cchName, pszName)); + rc = VERR_SSM_INTEGRITY_UNIT; + break; + } + Log(("SSM: Data unit: offset %#9llx size %9lld '%s'\n", offUnit, UnitHdr.cbUnit, pszName)); + + /* + * Find the data unit in our internal table. + */ + PSSMUNIT pUnit = ssmR3Find(pVM, pszName, UnitHdr.u32Instance); + if (pUnit) + { + /* + * Call the execute handler. + */ + pSSM->cbUnitLeftV1 = UnitHdr.cbUnit - RT_UOFFSETOF_DYN(SSMFILEUNITHDRV1, szName[UnitHdr.cchName]); + pSSM->offUnit = 0; + pSSM->offUnitUser = 0; + pSSM->u.Read.uCurUnitVer = UnitHdr.u32Version; + pSSM->u.Read.uCurUnitPass = SSM_PASS_FINAL; + pSSM->u.Read.pCurUnit = pUnit; + if (!pUnit->u.Common.pfnLoadExec) + { + LogRel(("SSM: No load exec callback for unit '%s'!\n", pszName)); + pSSM->rc = rc = VERR_SSM_NO_LOAD_EXEC; + break; + } + ssmR3UnitCritSectEnter(pUnit); + switch (pUnit->enmType) + { + case SSMUNITTYPE_DEV: + rc = pUnit->u.Dev.pfnLoadExec(pUnit->u.Dev.pDevIns, pSSM, UnitHdr.u32Version, SSM_PASS_FINAL); + break; + case SSMUNITTYPE_DRV: + rc = pUnit->u.Drv.pfnLoadExec(pUnit->u.Drv.pDrvIns, pSSM, UnitHdr.u32Version, SSM_PASS_FINAL); + break; + case SSMUNITTYPE_USB: + rc = pUnit->u.Usb.pfnLoadExec(pUnit->u.Usb.pUsbIns, pSSM, UnitHdr.u32Version, SSM_PASS_FINAL); + break; + case SSMUNITTYPE_INTERNAL: + rc = pUnit->u.Internal.pfnLoadExec(pVM, pSSM, UnitHdr.u32Version, SSM_PASS_FINAL); + break; + case SSMUNITTYPE_EXTERNAL: + rc = pUnit->u.External.pfnLoadExec(pSSM, pUnit->u.External.pvUser, UnitHdr.u32Version, SSM_PASS_FINAL); + break; + default: + rc = VERR_SSM_IPE_1; + break; + } + ssmR3UnitCritSectLeave(pUnit); + pUnit->fCalled = true; + if (RT_FAILURE(rc) && RT_SUCCESS_NP(pSSM->rc)) + pSSM->rc = rc; + + /* + * Close the reader stream. + */ + rc = ssmR3DataReadFinishV1(pSSM); + if (RT_SUCCESS(rc)) + { + /* + * Now, we'll check the current position to see if all, or + * more than all, the data was read. + * + * Note! Because of buffering / compression we'll only see the + * really bad ones here. + */ + uint64_t off = ssmR3StrmTell(&pSSM->Strm); + int64_t i64Diff = off - (offUnit + UnitHdr.cbUnit); + if (i64Diff < 0) + { + Log(("SSM: Unit '%s' left %lld bytes unread!\n", pszName, -i64Diff)); + rc = ssmR3StrmSkipTo(&pSSM->Strm, offUnit + UnitHdr.cbUnit); + ssmR3ProgressByByte(pSSM, offUnit + UnitHdr.cbUnit - pSSM->offEst); + } + else if (i64Diff > 0) + { + LogRel(("SSM: Unit '%s' read %lld bytes too much!\n", pszName, i64Diff)); + if (!ASMAtomicXchgBool(&pSSM->u.Read.fHaveSetError, true)) + rc = VMSetError(pVM, VERR_SSM_LOADED_TOO_MUCH, RT_SRC_POS, + N_("Unit '%s' read %lld bytes too much"), pszName, i64Diff); + break; + } + + pSSM->offUnit = UINT64_MAX; + pSSM->offUnitUser = UINT64_MAX; + } + else + { + LogRel(("SSM: Load exec failed for '%s' instance #%u ! (version %u)\n", + pszName, UnitHdr.u32Instance, UnitHdr.u32Version)); + if (!ASMAtomicXchgBool(&pSSM->u.Read.fHaveSetError, true)) + { + if (rc == VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION) + VMSetError(pVM, rc, RT_SRC_POS, N_("Unsupported version %u of data unit '%s' (instance #%u)"), + UnitHdr.u32Version, UnitHdr.szName, UnitHdr.u32Instance); + else + VMSetError(pVM, rc, RT_SRC_POS, N_("Load exec failed for '%s' instance #%u (version %u)"), + pszName, UnitHdr.u32Instance, UnitHdr.u32Version); + } + break; + } + + pSSM->u.Read.pCurUnit = NULL; + pSSM->u.Read.uCurUnitVer = UINT32_MAX; + pSSM->u.Read.uCurUnitPass = 0; + } + else + { + /* + * SSM unit wasn't found - ignore this when loading for the debugger. + */ + LogRel(("SSM: Found no handler for unit '%s'!\n", pszName)); + rc = VERR_SSM_INTEGRITY_UNIT_NOT_FOUND; + if (pSSM->enmAfter != SSMAFTER_DEBUG_IT) + break; + rc = ssmR3StrmSkipTo(&pSSM->Strm, offUnit + UnitHdr.cbUnit); + } + } + } + else + rc = VERR_NO_TMP_MEMORY; + } + + /* + * I/O errors ends up here (yea, I know, very nice programming). + */ + if (RT_FAILURE(rc)) + { + LogRel(("SSM: I/O error. rc=%Rrc\n", rc)); + break; + } + + /* + * Check for cancellation. + */ + if (RT_UNLIKELY(ASMAtomicUoReadU32(&(pSSM)->fCancelled) == SSMHANDLE_CANCELLED)) + { + LogRel(("SSM: Cancelled!n")); + rc = pSSM->rc; + if (RT_SUCCESS(pSSM->rc)) + pSSM->rc = rc = VERR_SSM_CANCELLED; + break; + } + } + + RTMemTmpFree(pszName); + return rc; +} + + +/** + * Reads and verifies the directory and footer. + * + * @returns VBox status code. + * @param pSSM The saved state handle. + */ +static int ssmR3LoadDirectoryAndFooter(PSSMHANDLE pSSM) +{ + /* + * The directory. + * + * Get the header containing the number of entries first. Then read the + * entries and pass the combined block to the validation function. + */ + uint64_t off = ssmR3StrmTell(&pSSM->Strm); + size_t const cbDirHdr = RT_UOFFSETOF(SSMFILEDIR, aEntries); + SSMFILEDIR DirHdr; + int rc = ssmR3StrmRead(&pSSM->Strm, &DirHdr, cbDirHdr); + if (RT_FAILURE(rc)) + return rc; + AssertLogRelMsgReturn(!memcmp(DirHdr.szMagic, SSMFILEDIR_MAGIC, sizeof(DirHdr.szMagic)), + ("Invalid directory magic at %#llx (%lld): %.*Rhxs\n", off, off, sizeof(DirHdr.szMagic), DirHdr.szMagic), + VERR_SSM_INTEGRITY_DIR_MAGIC); + AssertLogRelMsgReturn(DirHdr.cEntries < _64K, + ("Too many directory entries at %#llx (%lld): %#x\n", off, off, DirHdr.cEntries), + VERR_SSM_INTEGRITY_DIR); + + size_t cbDir = RT_UOFFSETOF_DYN(SSMFILEDIR, aEntries[DirHdr.cEntries]); + PSSMFILEDIR pDir = (PSSMFILEDIR)RTMemTmpAlloc(cbDir); + if (!pDir) + return VERR_NO_TMP_MEMORY; + memcpy(pDir, &DirHdr, cbDirHdr); + rc = ssmR3StrmRead(&pSSM->Strm, (uint8_t *)pDir + cbDirHdr, cbDir - cbDirHdr); + if (RT_SUCCESS(rc)) + rc = ssmR3ValidateDirectory(pDir, cbDir, off, DirHdr.cEntries, pSSM->u.Read.cbFileHdr, pSSM->u.Read.u32SvnRev); + RTMemTmpFree(pDir); + if (RT_FAILURE(rc)) + return rc; + + /* + * Read and validate the footer. + */ + off = ssmR3StrmTell(&pSSM->Strm); + uint32_t u32StreamCRC = ssmR3StrmFinalCRC(&pSSM->Strm); + SSMFILEFTR Footer; + rc = ssmR3StrmRead(&pSSM->Strm, &Footer, sizeof(Footer)); + if (RT_FAILURE(rc)) + return rc; + return ssmR3ValidateFooter(&Footer, off, DirHdr.cEntries, pSSM->u.Read.fStreamCrc32, u32StreamCRC); +} + + +/** + * Executes the loading of a V2.X file. + * + * @returns VBox status code. May or may not set pSSM->rc, the returned + * status code is ALWAYS the more accurate of the two. + * @param pVM The cross context VM structure. + * @param pSSM The saved state handle. + */ +static int ssmR3LoadExecV2(PVM pVM, PSSMHANDLE pSSM) +{ + pSSM->enmOp = SSMSTATE_LOAD_EXEC; + for (;;) + { + /* + * Read the unit header and check its integrity. + */ + uint64_t offUnit = ssmR3StrmTell(&pSSM->Strm); + uint32_t u32CurStreamCRC = ssmR3StrmCurCRC(&pSSM->Strm); + SSMFILEUNITHDRV2 UnitHdr; + int rc = ssmR3StrmRead(&pSSM->Strm, &UnitHdr, RT_UOFFSETOF(SSMFILEUNITHDRV2, szName)); + if (RT_FAILURE(rc)) + return rc; + if (RT_UNLIKELY( memcmp(&UnitHdr.szMagic[0], SSMFILEUNITHDR_MAGIC, sizeof(UnitHdr.szMagic)) + && memcmp(&UnitHdr.szMagic[0], SSMFILEUNITHDR_END, sizeof(UnitHdr.szMagic)))) + { + LogRel(("SSM: Unit at %#llx (%lld): Invalid unit magic: %.*Rhxs!\n", + offUnit, offUnit, sizeof(UnitHdr.szMagic) - 1, &UnitHdr.szMagic[0])); + pSSM->u.Read.fHaveSetError = true; + return VMSetError(pVM, VERR_SSM_INTEGRITY_UNIT_MAGIC, RT_SRC_POS, + N_("Unit at %#llx (%lld): Invalid unit magic"), offUnit, offUnit); + } + if (UnitHdr.cbName) + { + AssertLogRelMsgReturn(UnitHdr.cbName <= sizeof(UnitHdr.szName), + ("Unit at %#llx (%lld): UnitHdr.cbName=%u > %u\n", + offUnit, offUnit, UnitHdr.cbName, sizeof(UnitHdr.szName)), + VERR_SSM_INTEGRITY_UNIT); + rc = ssmR3StrmRead(&pSSM->Strm, &UnitHdr.szName[0], UnitHdr.cbName); + if (RT_FAILURE(rc)) + return rc; + AssertLogRelMsgReturn(!UnitHdr.szName[UnitHdr.cbName - 1], + ("Unit at %#llx (%lld): Name %.*Rhxs was not properly terminated.\n", + offUnit, offUnit, UnitHdr.cbName, UnitHdr.szName), + VERR_SSM_INTEGRITY_UNIT); + } + SSM_CHECK_CRC32_RET(&UnitHdr, RT_UOFFSETOF_DYN(SSMFILEUNITHDRV2, szName[UnitHdr.cbName]), + ("Unit at %#llx (%lld): CRC mismatch: %08x, correct is %08x\n", offUnit, offUnit, u32CRC, u32ActualCRC)); + AssertLogRelMsgReturn(UnitHdr.offStream == offUnit, + ("Unit at %#llx (%lld): offStream=%#llx, expected %#llx\n", offUnit, offUnit, UnitHdr.offStream, offUnit), + VERR_SSM_INTEGRITY_UNIT); + AssertLogRelMsgReturn(UnitHdr.u32CurStreamCRC == u32CurStreamCRC || !pSSM->Strm.fChecksummed, + ("Unit at %#llx (%lld): Stream CRC mismatch: %08x, correct is %08x\n", offUnit, offUnit, UnitHdr.u32CurStreamCRC, u32CurStreamCRC), + VERR_SSM_INTEGRITY_UNIT); + AssertLogRelMsgReturn(!UnitHdr.fFlags, ("Unit at %#llx (%lld): fFlags=%08x\n", offUnit, offUnit, UnitHdr.fFlags), + VERR_SSM_INTEGRITY_UNIT); + if (!memcmp(&UnitHdr.szMagic[0], SSMFILEUNITHDR_END, sizeof(UnitHdr.szMagic))) + { + AssertLogRelMsgReturn( UnitHdr.cbName == 0 + && UnitHdr.u32Instance == 0 + && UnitHdr.u32Version == 0 + && UnitHdr.u32Pass == SSM_PASS_FINAL, + ("Unit at %#llx (%lld): Malformed END unit\n", offUnit, offUnit), + VERR_SSM_INTEGRITY_UNIT); + + /* + * Complete the progress bar (pending 99% afterwards) and RETURN. + */ + Log(("SSM: Unit at %#9llx: END UNIT\n", offUnit)); + ssmR3ProgressByByte(pSSM, pSSM->cbEstTotal - pSSM->offEst); + return ssmR3LoadDirectoryAndFooter(pSSM); + } + AssertLogRelMsgReturn(UnitHdr.cbName > 1, ("Unit at %#llx (%lld): No name\n", offUnit, offUnit), VERR_SSM_INTEGRITY); + + Log(("SSM: Unit at %#9llx: '%s', instance %u, pass %#x, version %u\n", + offUnit, UnitHdr.szName, UnitHdr.u32Instance, UnitHdr.u32Pass, UnitHdr.u32Version)); + + /* + * Find the data unit in our internal table. + */ + PSSMUNIT pUnit = ssmR3Find(pVM, UnitHdr.szName, UnitHdr.u32Instance); + if (pUnit) + { + /* + * Call the execute handler. + */ + AssertLogRelMsgReturn(pUnit->u.Common.pfnLoadExec, + ("SSM: No load exec callback for unit '%s'!\n", UnitHdr.szName), + VERR_SSM_NO_LOAD_EXEC); + pSSM->u.Read.uCurUnitVer = UnitHdr.u32Version; + pSSM->u.Read.uCurUnitPass = UnitHdr.u32Pass; + pSSM->u.Read.pCurUnit = pUnit; + ssmR3DataReadBeginV2(pSSM); + ssmR3UnitCritSectEnter(pUnit); + switch (pUnit->enmType) + { + case SSMUNITTYPE_DEV: + rc = pUnit->u.Dev.pfnLoadExec(pUnit->u.Dev.pDevIns, pSSM, UnitHdr.u32Version, UnitHdr.u32Pass); + break; + case SSMUNITTYPE_DRV: + rc = pUnit->u.Drv.pfnLoadExec(pUnit->u.Drv.pDrvIns, pSSM, UnitHdr.u32Version, UnitHdr.u32Pass); + break; + case SSMUNITTYPE_USB: + rc = pUnit->u.Usb.pfnLoadExec(pUnit->u.Usb.pUsbIns, pSSM, UnitHdr.u32Version, UnitHdr.u32Pass); + break; + case SSMUNITTYPE_INTERNAL: + rc = pUnit->u.Internal.pfnLoadExec(pVM, pSSM, UnitHdr.u32Version, UnitHdr.u32Pass); + break; + case SSMUNITTYPE_EXTERNAL: + rc = pUnit->u.External.pfnLoadExec(pSSM, pUnit->u.External.pvUser, UnitHdr.u32Version, UnitHdr.u32Pass); + break; + default: + rc = VERR_SSM_IPE_1; + break; + } + ssmR3UnitCritSectLeave(pUnit); + pUnit->fCalled = true; + if (RT_FAILURE(rc) && RT_SUCCESS_NP(pSSM->rc)) + pSSM->rc = rc; + rc = ssmR3DataReadFinishV2(pSSM); + if (RT_SUCCESS(rc)) + { + pSSM->offUnit = UINT64_MAX; + pSSM->offUnitUser = UINT64_MAX; + } + else + { + LogRel(("SSM: LoadExec failed for '%s' instance #%u (version %u, pass %#x): %Rrc\n", + UnitHdr.szName, UnitHdr.u32Instance, UnitHdr.u32Version, UnitHdr.u32Pass, rc)); + LogRel(("SSM: Unit at %#llx, current position: offUnit=%#llx offUnitUser=%#llx\n", + offUnit, pSSM->offUnit, pSSM->offUnitUser)); + + if (!ASMAtomicXchgBool(&pSSM->u.Read.fHaveSetError, true)) + { + if (rc == VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION) + rc = VMSetError(pVM, rc, RT_SRC_POS, N_("Unsupported version %u of data unit '%s' (instance #%u, pass %#x)"), + UnitHdr.u32Version, UnitHdr.szName, UnitHdr.u32Instance, UnitHdr.u32Pass); + else + rc = VMSetError(pVM, rc, RT_SRC_POS, N_("Failed to load unit '%s'"), UnitHdr.szName); + } + + /* Try log the unit content, unless it's too big. */ + if (pSSM->offUnitUser < _512K) + ssmR3StrmLogUnitContent(pSSM, &UnitHdr, offUnit, 0, pSSM->offUnitUser + _16K); + else + ssmR3StrmLogUnitContent(pSSM, &UnitHdr, offUnit, pSSM->offUnitUser - _256K, pSSM->offUnitUser + _16K); + return rc; + } + } + else + { + /* + * SSM unit wasn't found - ignore this when loading for the debugger. + */ + LogRel(("SSM: Found no handler for unit '%s' instance #%u!\n", UnitHdr.szName, UnitHdr.u32Instance)); + if (pSSM->enmAfter != SSMAFTER_DEBUG_IT) + { + pSSM->u.Read.fHaveSetError = true; + return VMSetError(pVM, VERR_SSM_INTEGRITY_UNIT_NOT_FOUND, RT_SRC_POS, + N_("Found no handler for unit '%s' instance #%u"), UnitHdr.szName, UnitHdr.u32Instance); + } + SSMR3SkipToEndOfUnit(pSSM); + ssmR3DataReadFinishV2(pSSM); + } + + /* + * Check for cancellation. + */ + if (RT_UNLIKELY(ASMAtomicUoReadU32(&(pSSM)->fCancelled) == SSMHANDLE_CANCELLED)) + { + LogRel(("SSM: Cancelled!\n")); + if (RT_SUCCESS(pSSM->rc)) + pSSM->rc = VERR_SSM_CANCELLED; + return pSSM->rc; + } + } + /* won't get here */ +} + + + + +/** + * Load VM save operation. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param pszFilename The name of the saved state file. NULL if pStreamOps + * is used. + * @param pStreamOps The stream method table. NULL if pszFilename is + * used. + * @param pvStreamOpsUser The user argument for the stream methods. + * @param enmAfter What is planned after a successful load operation. + * Only acceptable values are SSMAFTER_RESUME and SSMAFTER_DEBUG_IT. + * @param pfnProgress Progress callback. Optional. + * @param pvProgressUser User argument for the progress callback. + * + * @thread EMT + */ +VMMR3DECL(int) SSMR3Load(PVM pVM, const char *pszFilename, PCSSMSTRMOPS pStreamOps, void *pvStreamOpsUser, + SSMAFTER enmAfter, PFNVMPROGRESS pfnProgress, void *pvProgressUser) +{ + LogFlow(("SSMR3Load: pszFilename=%p:{%s} pStreamOps=%p pvStreamOpsUser=%p enmAfter=%d pfnProgress=%p pvProgressUser=%p\n", + pszFilename, pszFilename, pStreamOps, pvStreamOpsUser, enmAfter, pfnProgress, pvProgressUser)); + VM_ASSERT_EMT0(pVM); + + /* + * Validate input. + */ + AssertMsgReturn( enmAfter == SSMAFTER_RESUME + || enmAfter == SSMAFTER_TELEPORT + || enmAfter == SSMAFTER_DEBUG_IT, + ("%d\n", enmAfter), + VERR_INVALID_PARAMETER); + AssertReturn(!pszFilename != !pStreamOps, VERR_INVALID_PARAMETER); + if (pStreamOps) + { + AssertReturn(pStreamOps->u32Version == SSMSTRMOPS_VERSION, VERR_INVALID_MAGIC); + AssertReturn(pStreamOps->u32EndVersion == SSMSTRMOPS_VERSION, VERR_INVALID_MAGIC); + AssertReturn(pStreamOps->pfnWrite, VERR_INVALID_PARAMETER); + AssertReturn(pStreamOps->pfnRead, VERR_INVALID_PARAMETER); + AssertReturn(pStreamOps->pfnSeek, VERR_INVALID_PARAMETER); + AssertReturn(pStreamOps->pfnTell, VERR_INVALID_PARAMETER); + AssertReturn(pStreamOps->pfnSize, VERR_INVALID_PARAMETER); + AssertReturn(pStreamOps->pfnClose, VERR_INVALID_PARAMETER); + } + + /* + * Create the handle and open the file. + */ + SSMHANDLE Handle; + int rc = ssmR3OpenFile(pVM, pszFilename, pStreamOps, pvStreamOpsUser, false /* fChecksumIt */, + true /* fChecksumOnRead */, 8 /*cBuffers*/, &Handle); + if (RT_SUCCESS(rc)) + { + ssmR3StrmStartIoThread(&Handle.Strm); + ssmR3SetCancellable(pVM, &Handle, true); + + Handle.enmAfter = enmAfter; + Handle.pfnProgress = pfnProgress; + Handle.pvUser = pvProgressUser; + Handle.uPercentLive = 0; + Handle.uPercentPrepare = 2; + Handle.uPercentDone = 2; + + if (Handle.u.Read.u16VerMajor) + LogRel(("SSM: File header: Format %u.%u, VirtualBox Version %u.%u.%u r%u, %u-bit host, cbGCPhys=%u, cbGCPtr=%u\n", + Handle.u.Read.uFmtVerMajor, Handle.u.Read.uFmtVerMinor, + Handle.u.Read.u16VerMajor, Handle.u.Read.u16VerMinor, Handle.u.Read.u32VerBuild, Handle.u.Read.u32SvnRev, + Handle.u.Read.cHostBits, Handle.u.Read.cbGCPhys, Handle.u.Read.cbGCPtr)); + else + LogRel(("SSM: File header: Format %u.%u, %u-bit host, cbGCPhys=%u, cbGCPtr=%u\n" , + Handle.u.Read.uFmtVerMajor, Handle.u.Read.uFmtVerMinor, + Handle.u.Read.cHostBits, Handle.u.Read.cbGCPhys, Handle.u.Read.cbGCPtr)); + + if (pfnProgress) + pfnProgress(pVM->pUVM, Handle.uPercent, pvProgressUser); + + /* + * Clear the per unit flags. + */ + PSSMUNIT pUnit; + for (pUnit = pVM->ssm.s.pHead; pUnit; pUnit = pUnit->pNext) + pUnit->fCalled = false; + + /* + * Do the prepare run. + */ + Handle.rc = VINF_SUCCESS; + Handle.enmOp = SSMSTATE_LOAD_PREP; + for (pUnit = pVM->ssm.s.pHead; pUnit; pUnit = pUnit->pNext) + { + if (pUnit->u.Common.pfnLoadPrep) + { + Handle.u.Read.pCurUnit = pUnit; + pUnit->fCalled = true; + ssmR3UnitCritSectEnter(pUnit); + switch (pUnit->enmType) + { + case SSMUNITTYPE_DEV: + rc = pUnit->u.Dev.pfnLoadPrep(pUnit->u.Dev.pDevIns, &Handle); + break; + case SSMUNITTYPE_DRV: + rc = pUnit->u.Drv.pfnLoadPrep(pUnit->u.Drv.pDrvIns, &Handle); + break; + case SSMUNITTYPE_USB: + rc = pUnit->u.Usb.pfnLoadPrep(pUnit->u.Usb.pUsbIns, &Handle); + break; + case SSMUNITTYPE_INTERNAL: + rc = pUnit->u.Internal.pfnLoadPrep(pVM, &Handle); + break; + case SSMUNITTYPE_EXTERNAL: + rc = pUnit->u.External.pfnLoadPrep(&Handle, pUnit->u.External.pvUser); + break; + default: + rc = VERR_SSM_IPE_1; + break; + } + ssmR3UnitCritSectLeave(pUnit); + Handle.u.Read.pCurUnit = NULL; + if (RT_FAILURE(rc) && RT_SUCCESS_NP(Handle.rc)) + Handle.rc = rc; + else + rc = Handle.rc; + if (RT_FAILURE(rc)) + { + LogRel(("SSM: Prepare load failed with rc=%Rrc for data unit '%s.\n", rc, pUnit->szName)); + break; + } + } + } + + /* end of prepare % */ + if (pfnProgress) + pfnProgress(pVM->pUVM, Handle.uPercentPrepare - 1, pvProgressUser); + Handle.uPercent = Handle.uPercentPrepare; + Handle.cbEstTotal = Handle.u.Read.cbLoadFile; + Handle.offEstUnitEnd = Handle.u.Read.cbLoadFile; + + /* + * Do the execute run. + */ + if (RT_SUCCESS(rc)) + { + if (Handle.u.Read.uFmtVerMajor >= 2) + rc = ssmR3LoadExecV2(pVM, &Handle); + else + rc = ssmR3LoadExecV1(pVM, &Handle); + Handle.u.Read.pCurUnit = NULL; + Handle.u.Read.uCurUnitVer = UINT32_MAX; + Handle.u.Read.uCurUnitPass = 0; + + /* (progress should be pending 99% now) */ + AssertMsg( Handle.fLiveSave + || RT_FAILURE(rc) + || Handle.uPercent == 101 - Handle.uPercentDone, ("%d\n", Handle.uPercent)); + } + + /* + * Do the done run. + */ + Handle.rc = rc; + Handle.enmOp = SSMSTATE_LOAD_DONE; + for (pUnit = pVM->ssm.s.pHead; pUnit; pUnit = pUnit->pNext) + { + if ( pUnit->u.Common.pfnLoadDone + && ( pUnit->fCalled + || (!pUnit->u.Common.pfnLoadPrep && !pUnit->u.Common.pfnLoadExec))) + { + Handle.u.Read.pCurUnit = pUnit; + int const rcOld = Handle.rc; + rc = VINF_SUCCESS; + ssmR3UnitCritSectEnter(pUnit); + switch (pUnit->enmType) + { + case SSMUNITTYPE_DEV: + rc = pUnit->u.Dev.pfnLoadDone(pUnit->u.Dev.pDevIns, &Handle); + break; + case SSMUNITTYPE_DRV: + rc = pUnit->u.Drv.pfnLoadDone(pUnit->u.Drv.pDrvIns, &Handle); + break; + case SSMUNITTYPE_USB: + rc = pUnit->u.Usb.pfnLoadDone(pUnit->u.Usb.pUsbIns, &Handle); + break; + case SSMUNITTYPE_INTERNAL: + rc = pUnit->u.Internal.pfnLoadDone(pVM, &Handle); + break; + case SSMUNITTYPE_EXTERNAL: + rc = pUnit->u.External.pfnLoadDone(&Handle, pUnit->u.External.pvUser); + break; + default: + rc = VERR_SSM_IPE_1; + break; + } + ssmR3UnitCritSectLeave(pUnit); + Handle.u.Read.pCurUnit = NULL; + if (RT_SUCCESS(rc) && Handle.rc != rcOld) + rc = Handle.rc; + if (RT_FAILURE(rc)) + { + LogRel(("SSM: LoadDone failed with rc=%Rrc for data unit '%s' instance #%u.\n", + rc, pUnit->szName, pUnit->u32Instance)); + if (!ASMAtomicXchgBool(&Handle.u.Read.fHaveSetError, true)) + VMSetError(pVM, rc, RT_SRC_POS, N_("LoadDone failed with rc=%Rrc for data unit '%s' instance #%u."), + rc, pUnit->szName, pUnit->u32Instance); + if (RT_SUCCESS_NP(Handle.rc)) + Handle.rc = rc; + } + } + } + + /* progress */ + if (pfnProgress) + pfnProgress(pVM->pUVM, 99, pvProgressUser); + + ssmR3SetCancellable(pVM, &Handle, false); + ssmR3StrmClose(&Handle.Strm, Handle.rc == VERR_SSM_CANCELLED); + rc = Handle.rc; + } + + /* + * Done + */ + if (RT_SUCCESS(rc)) + { + /* progress */ + if (pfnProgress) + pfnProgress(pVM->pUVM, 100, pvProgressUser); + Log(("SSM: Load of '%s' completed!\n", pszFilename)); + } + return rc; +} + + +/** + * VMSetError wrapper for load errors that inserts the saved state details. + * + * @returns rc. + * @param pSSM The saved state handle. + * @param rc The status code of the error. Use RT_SRC_POS. + * @param SRC_POS The source location. + * @param pszFormat The message format string. + * @param ... Variable argument list. + */ +VMMR3DECL(int) SSMR3SetLoadError(PSSMHANDLE pSSM, int rc, RT_SRC_POS_DECL, const char *pszFormat, ...) +{ + va_list va; + va_start(va, pszFormat); + rc = SSMR3SetLoadErrorV(pSSM, rc, RT_SRC_POS_ARGS, pszFormat, va); + va_end(va); + return rc; +} + + +/** + * VMSetError wrapper for load errors that inserts the saved state details. + * + * @returns rc. + * @param pSSM The saved state handle. + * @param rc The status code of the error. + * @param SRC_POS The error location, use RT_SRC_POS. + * @param pszFormat The message format string. + * @param va Variable argument list. + */ +VMMR3DECL(int) SSMR3SetLoadErrorV(PSSMHANDLE pSSM, int rc, RT_SRC_POS_DECL, const char *pszFormat, va_list va) +{ + /* + * Input validations. + */ + SSM_ASSERT_READABLE_RET(pSSM); + AssertPtr(pszFormat); + Assert(RT_FAILURE_NP(rc)); + + /* + * Format the incoming error. + */ + char *pszMsg; + RTStrAPrintfV(&pszMsg, pszFormat, va); + if (!pszMsg) + { + VMSetError(pSSM->pVM, VERR_NO_MEMORY, RT_SRC_POS, + N_("SSMR3SetLoadErrorV ran out of memory formatting: %s\n"), pszFormat); + return rc; + } + + /* + * Forward to VMSetError with the additional info. + */ + PSSMUNIT pUnit = pSSM->u.Read.pCurUnit; + const char *pszName = pUnit ? pUnit->szName : "unknown"; + uint32_t uInstance = pUnit ? pUnit->u32Instance : 0; + if ( pSSM->enmOp == SSMSTATE_LOAD_EXEC + && pSSM->u.Read.uCurUnitPass == SSM_PASS_FINAL) + rc = VMSetError(pSSM->pVM, rc, RT_SRC_POS_ARGS, N_("%s#%u: %s [ver=%u pass=final]"), + pszName, uInstance, pszMsg, pSSM->u.Read.uCurUnitVer); + else if (pSSM->enmOp == SSMSTATE_LOAD_EXEC) + rc = VMSetError(pSSM->pVM, rc, RT_SRC_POS_ARGS, N_("%s#%u: %s [ver=%u pass=#%u]"), + pszName, uInstance, pszMsg, pSSM->u.Read.uCurUnitVer, pSSM->u.Read.uCurUnitPass); + else if (pSSM->enmOp == SSMSTATE_LOAD_PREP) + rc = VMSetError(pSSM->pVM, rc, RT_SRC_POS_ARGS, N_("%s#%u: %s [prep]"), + pszName, uInstance, pszMsg); + else if (pSSM->enmOp == SSMSTATE_LOAD_DONE) + rc = VMSetError(pSSM->pVM, rc, RT_SRC_POS_ARGS, N_("%s#%u: %s [done]"), + pszName, uInstance, pszMsg); + else if (pSSM->enmOp == SSMSTATE_OPEN_READ) + rc = VMSetError(pSSM->pVM, rc, RT_SRC_POS_ARGS, N_("%s#%u: %s [read]"), + pszName, uInstance, pszMsg); + else + AssertFailed(); + pSSM->u.Read.fHaveSetError = true; + RTStrFree(pszMsg); + return rc; +} + + +/** + * SSMR3SetLoadError wrapper that returns VERR_SSM_LOAD_CONFIG_MISMATCH. + * + * @returns VERR_SSM_LOAD_CONFIG_MISMATCH. + * @param pSSM The saved state handle. + * @param SRC_POS The error location, use RT_SRC_POS. + * @param pszFormat The message format string. + * @param ... Variable argument list. + */ +VMMR3DECL(int) SSMR3SetCfgError(PSSMHANDLE pSSM, RT_SRC_POS_DECL, const char *pszFormat, ...) +{ + va_list va; + va_start(va, pszFormat); + int rc = SSMR3SetLoadErrorV(pSSM, VERR_SSM_LOAD_CONFIG_MISMATCH, RT_SRC_POS_ARGS, pszFormat, va); + va_end(va); + return rc; +} + +#endif /* !SSM_STANDALONE */ + +/** + * Validates a file as a validate SSM saved state. + * + * This will only verify the file format, the format and content of individual + * data units are not inspected. + * + * @returns VINF_SUCCESS if valid. + * @returns VBox status code on other failures. + * + * @param pszFilename The path to the file to validate. + * @param fChecksumIt Whether to checksum the file or not. + * + * @thread Any. + */ +VMMR3DECL(int) SSMR3ValidateFile(const char *pszFilename, bool fChecksumIt) +{ + LogFlow(("SSMR3ValidateFile: pszFilename=%p:{%s} fChecksumIt=%RTbool\n", pszFilename, pszFilename, fChecksumIt)); + + /* + * Try open the file and validate it. + */ + SSMHANDLE Handle; + int rc = ssmR3OpenFile(NULL, pszFilename, NULL /*pStreamOps*/, NULL /*pvUser*/, fChecksumIt, + false /*fChecksumOnRead*/, 1 /*cBuffers*/, &Handle); + if (RT_SUCCESS(rc)) + ssmR3StrmClose(&Handle.Strm, false /*fCancelled*/); + else + Log(("SSM: Failed to open saved state file '%s', rc=%Rrc.\n", pszFilename, rc)); + return rc; +} + + +/** + * Opens a saved state file for reading. + * + * @returns VBox status code. + * + * @param pszFilename The path to the saved state file. + * @param fFlags Open flags. Reserved, must be 0. + * @param ppSSM Where to store the SSM handle. + * + * @thread Any. + */ +VMMR3DECL(int) SSMR3Open(const char *pszFilename, unsigned fFlags, PSSMHANDLE *ppSSM) +{ + LogFlow(("SSMR3Open: pszFilename=%p:{%s} fFlags=%#x ppSSM=%p\n", pszFilename, pszFilename, fFlags, ppSSM)); + + /* + * Validate input. + */ + AssertMsgReturn(VALID_PTR(pszFilename), ("%p\n", pszFilename), VERR_INVALID_PARAMETER); + AssertMsgReturn(!fFlags, ("%#x\n", fFlags), VERR_INVALID_PARAMETER); + AssertMsgReturn(VALID_PTR(ppSSM), ("%p\n", ppSSM), VERR_INVALID_PARAMETER); + + /* + * Allocate a handle. + */ + PSSMHANDLE pSSM = (PSSMHANDLE)RTMemAllocZ(sizeof(*pSSM)); + AssertReturn(pSSM, VERR_NO_MEMORY); + + /* + * Try open the file and validate it. + */ + int rc = ssmR3OpenFile(NULL, pszFilename, NULL /*pStreamOps*/, NULL /*pvUser*/, false /*fChecksumIt*/, + true /*fChecksumOnRead*/, 1 /*cBuffers*/, pSSM); + if (RT_SUCCESS(rc)) + { + pSSM->enmAfter = SSMAFTER_OPENED; + pSSM->enmOp = SSMSTATE_OPEN_READ; + *ppSSM = pSSM; + LogFlow(("SSMR3Open: returns VINF_SUCCESS *ppSSM=%p\n", *ppSSM)); + return VINF_SUCCESS; + } + + Log(("SSMR3Open: Failed to open saved state file '%s', rc=%Rrc.\n", pszFilename, rc)); + RTMemFree(pSSM); + return rc; + +} + + +/** + * Closes a saved state file opened by SSMR3Open(). + * + * @returns VBox status code. + * + * @param pSSM The SSM handle returned by SSMR3Open(). + * + * @thread Any, but the caller is responsible for serializing calls per handle. + */ +VMMR3DECL(int) SSMR3Close(PSSMHANDLE pSSM) +{ + LogFlow(("SSMR3Close: pSSM=%p\n", pSSM)); + + /* + * Validate input. + */ + AssertMsgReturn(VALID_PTR(pSSM), ("%p\n", pSSM), VERR_INVALID_PARAMETER); + AssertMsgReturn(pSSM->enmAfter == SSMAFTER_OPENED, ("%d\n", pSSM->enmAfter),VERR_INVALID_PARAMETER); + AssertMsgReturn(pSSM->enmOp == SSMSTATE_OPEN_READ, ("%d\n", pSSM->enmOp), VERR_INVALID_PARAMETER); + Assert(pSSM->fCancelled == SSMHANDLE_OK); + + /* + * Close the stream and free the handle. + */ + int rc = ssmR3StrmClose(&pSSM->Strm, pSSM->rc == VERR_SSM_CANCELLED); + if (pSSM->u.Read.pZipDecompV1) + { + RTZipDecompDestroy(pSSM->u.Read.pZipDecompV1); + pSSM->u.Read.pZipDecompV1 = NULL; + } + RTMemFree(pSSM); + return rc; +} + + +/** + * Worker for SSMR3Seek that seeks version 1 saved state files. + * + * @returns VBox status code. + * @param pSSM The SSM handle. + * @param pszUnit The unit to seek to. + * @param iInstance The particular instance we seek. + * @param piVersion Where to store the unit version number. + */ +static int ssmR3FileSeekV1(PSSMHANDLE pSSM, const char *pszUnit, uint32_t iInstance, uint32_t *piVersion) +{ + /* + * Walk the data units until we find EOF or a match. + */ + size_t cbUnitNm = strlen(pszUnit) + 1; + AssertLogRelReturn(cbUnitNm <= SSM_MAX_NAME_SIZE, VERR_SSM_UNIT_NOT_FOUND); + char szName[SSM_MAX_NAME_SIZE]; + SSMFILEUNITHDRV1 UnitHdr; + for (RTFOFF off = pSSM->u.Read.cbFileHdr; ; off += UnitHdr.cbUnit) + { + /* + * Read the unit header and verify it. + */ + int rc = ssmR3StrmPeekAt(&pSSM->Strm, off, &UnitHdr, RT_UOFFSETOF(SSMFILEUNITHDRV1, szName), NULL); + AssertRCReturn(rc, rc); + if (!memcmp(&UnitHdr.achMagic[0], SSMFILEUNITHDR_MAGIC, sizeof(SSMFILEUNITHDR_MAGIC))) + { + /* + * Does what we've got match, if so read the name. + */ + if ( UnitHdr.u32Instance == iInstance + && UnitHdr.cchName == cbUnitNm) + { + rc = ssmR3StrmPeekAt(&pSSM->Strm, off + RT_UOFFSETOF(SSMFILEUNITHDRV1, szName), szName, cbUnitNm, NULL); + AssertRCReturn(rc, rc); + AssertLogRelMsgReturn(!szName[UnitHdr.cchName - 1], + (" Unit name '%.*s' was not properly terminated.\n", cbUnitNm, szName), + VERR_SSM_INTEGRITY_UNIT); + + /* + * Does the name match? + */ + if (!memcmp(szName, pszUnit, cbUnitNm)) + { + rc = ssmR3StrmSeek(&pSSM->Strm, off + RT_UOFFSETOF(SSMFILEUNITHDRV1, szName) + cbUnitNm, RTFILE_SEEK_BEGIN, 0); + pSSM->cbUnitLeftV1 = UnitHdr.cbUnit - RT_UOFFSETOF_DYN(SSMFILEUNITHDRV1, szName[cbUnitNm]); + pSSM->offUnit = 0; + pSSM->offUnitUser = 0; + if (piVersion) + *piVersion = UnitHdr.u32Version; + return VINF_SUCCESS; + } + } + } + else if (!memcmp(&UnitHdr.achMagic[0], SSMFILEUNITHDR_END, sizeof(SSMFILEUNITHDR_END))) + return VERR_SSM_UNIT_NOT_FOUND; + else + AssertLogRelMsgFailedReturn(("Invalid unit magic at offset %RTfoff, '%.*s'!\n", + off, sizeof(UnitHdr.achMagic) - 1, &UnitHdr.achMagic[0]), + VERR_SSM_INTEGRITY_UNIT_MAGIC); + } + /* won't get here. */ +} + + +/** + * Worker for ssmR3FileSeekV2 for simplifying memory cleanup. + * + * @returns VBox status code. + * @param pSSM The SSM handle. + * @param pDir The directory buffer. + * @param cbDir The size of the directory. + * @param cDirEntries The number of directory entries. + * @param offDir The directory offset in the file. + * @param pszUnit The unit to seek to. + * @param iInstance The particular instance we seek. + * @param piVersion Where to store the unit version number. + */ +static int ssmR3FileSeekSubV2(PSSMHANDLE pSSM, PSSMFILEDIR pDir, size_t cbDir, uint32_t cDirEntries, uint64_t offDir, + const char *pszUnit, uint32_t iInstance, uint32_t *piVersion) +{ + /* + * Read it. + */ + int rc = ssmR3StrmPeekAt(&pSSM->Strm, offDir, pDir, cbDir, NULL); + AssertLogRelRCReturn(rc, rc); + rc = ssmR3ValidateDirectory(pDir, (uint32_t)cbDir, offDir, cDirEntries, pSSM->u.Read.cbFileHdr, pSSM->u.Read.u32SvnRev); + if (RT_FAILURE(rc)) + return rc; + + /* + * Search the directory. + */ + size_t cbUnitNm = strlen(pszUnit) + 1; + uint32_t const u32NameCRC = RTCrc32(pszUnit, cbUnitNm - 1); + for (uint32_t i = 0; i < cDirEntries; i++) + { + if ( pDir->aEntries[i].u32NameCRC == u32NameCRC + && pDir->aEntries[i].u32Instance == iInstance + && pDir->aEntries[i].off != 0 /* bug in unreleased code */ + ) + { + /* + * Read and validate the unit header. + */ + SSMFILEUNITHDRV2 UnitHdr; + size_t cbToRead = sizeof(UnitHdr); + if (pDir->aEntries[i].off + cbToRead > offDir) + { + cbToRead = offDir - pDir->aEntries[i].off; + RT_ZERO(UnitHdr); + } + rc = ssmR3StrmPeekAt(&pSSM->Strm, pDir->aEntries[i].off, &UnitHdr, cbToRead, NULL); + AssertLogRelRCReturn(rc, rc); + + AssertLogRelMsgReturn(!memcmp(UnitHdr.szMagic, SSMFILEUNITHDR_MAGIC, sizeof(UnitHdr.szMagic)), + ("Bad unit header or dictionary offset: i=%u off=%lld\n", i, pDir->aEntries[i].off), + VERR_SSM_INTEGRITY_UNIT); + AssertLogRelMsgReturn(UnitHdr.offStream == pDir->aEntries[i].off, + ("Bad unit header: i=%d off=%lld offStream=%lld\n", i, pDir->aEntries[i].off, UnitHdr.offStream), + VERR_SSM_INTEGRITY_UNIT); + AssertLogRelMsgReturn(UnitHdr.u32Instance == pDir->aEntries[i].u32Instance, + ("Bad unit header: i=%d off=%lld u32Instance=%u Dir.u32Instance=%u\n", + i, pDir->aEntries[i].off, UnitHdr.u32Instance, pDir->aEntries[i].u32Instance), + VERR_SSM_INTEGRITY_UNIT); + uint32_t cbUnitHdr = RT_UOFFSETOF_DYN(SSMFILEUNITHDRV2, szName[UnitHdr.cbName]); + AssertLogRelMsgReturn( UnitHdr.cbName > 0 + && UnitHdr.cbName < sizeof(UnitHdr) + && cbUnitHdr <= cbToRead, + ("Bad unit header: i=%u off=%lld cbName=%#x cbToRead=%#x\n", i, pDir->aEntries[i].off, UnitHdr.cbName, cbToRead), + VERR_SSM_INTEGRITY_UNIT); + SSM_CHECK_CRC32_RET(&UnitHdr, RT_UOFFSETOF_DYN(SSMFILEUNITHDRV2, szName[UnitHdr.cbName]), + ("Bad unit header CRC: i=%u off=%lld u32CRC=%#x u32ActualCRC=%#x\n", + i, pDir->aEntries[i].off, u32CRC, u32ActualCRC)); + + /* + * Ok, it is valid, get on with the comparing now. + */ + if ( UnitHdr.cbName == cbUnitNm + && !memcmp(UnitHdr.szName, pszUnit, cbUnitNm)) + { + if (piVersion) + *piVersion = UnitHdr.u32Version; + rc = ssmR3StrmSeek(&pSSM->Strm, pDir->aEntries[i].off + cbUnitHdr, RTFILE_SEEK_BEGIN, + RTCrc32Process(UnitHdr.u32CurStreamCRC, &UnitHdr, cbUnitHdr)); + AssertLogRelRCReturn(rc, rc); + ssmR3DataReadBeginV2(pSSM); + return VINF_SUCCESS; + } + } + } + + return VERR_SSM_UNIT_NOT_FOUND; +} + + +/** + * Worker for SSMR3Seek that seeks version 2 saved state files. + * + * @returns VBox status code. + * @param pSSM The SSM handle. + * @param pszUnit The unit to seek to. + * @param iInstance The particular instance we seek. + * @param piVersion Where to store the unit version number. + */ +static int ssmR3FileSeekV2(PSSMHANDLE pSSM, const char *pszUnit, uint32_t iInstance, uint32_t *piVersion) +{ + /* + * Read the footer, allocate a temporary buffer for the dictionary and + * pass it down to a worker to simplify cleanup. + */ + uint64_t offFooter; + SSMFILEFTR Footer; + int rc = ssmR3StrmPeekAt(&pSSM->Strm, -(RTFOFF)sizeof(Footer), &Footer, sizeof(Footer), &offFooter); + AssertLogRelRCReturn(rc, rc); + AssertLogRelReturn(!memcmp(Footer.szMagic, SSMFILEFTR_MAGIC, sizeof(Footer.szMagic)), VERR_SSM_INTEGRITY); + SSM_CHECK_CRC32_RET(&Footer, sizeof(Footer), ("Bad footer CRC: %08x, actual %08x\n", u32CRC, u32ActualCRC)); + + size_t const cbDir = RT_UOFFSETOF_DYN(SSMFILEDIR, aEntries[Footer.cDirEntries]); + PSSMFILEDIR pDir = (PSSMFILEDIR)RTMemTmpAlloc(cbDir); + if (RT_UNLIKELY(!pDir)) + return VERR_NO_TMP_MEMORY; + rc = ssmR3FileSeekSubV2(pSSM, pDir, cbDir, Footer.cDirEntries, offFooter - cbDir, + pszUnit, iInstance, piVersion); + RTMemTmpFree(pDir); + + return rc; +} + + +/** + * Seeks to a specific data unit. + * + * After seeking it's possible to use the getters to on + * that data unit. + * + * @returns VBox status code. + * @returns VERR_SSM_UNIT_NOT_FOUND if the unit+instance wasn't found. + * + * @param pSSM The SSM handle returned by SSMR3Open(). + * @param pszUnit The name of the data unit. + * @param iInstance The instance number. + * @param piVersion Where to store the version number. (Optional) + * + * @thread Any, but the caller is responsible for serializing calls per handle. + */ +VMMR3DECL(int) SSMR3Seek(PSSMHANDLE pSSM, const char *pszUnit, uint32_t iInstance, uint32_t *piVersion) +{ + LogFlow(("SSMR3Seek: pSSM=%p pszUnit=%p:{%s} iInstance=%RU32 piVersion=%p\n", + pSSM, pszUnit, pszUnit, iInstance, piVersion)); + + /* + * Validate input. + */ + AssertPtrReturn(pSSM, VERR_INVALID_PARAMETER); + AssertMsgReturn(pSSM->enmAfter == SSMAFTER_OPENED, ("%d\n", pSSM->enmAfter),VERR_INVALID_PARAMETER); + AssertMsgReturn(pSSM->enmOp == SSMSTATE_OPEN_READ, ("%d\n", pSSM->enmOp), VERR_INVALID_PARAMETER); + AssertPtrReturn(pszUnit, VERR_INVALID_POINTER); + AssertMsgReturn(!piVersion || VALID_PTR(piVersion), ("%p\n", piVersion), VERR_INVALID_POINTER); + + /* + * Reset the state. + */ + if (pSSM->u.Read.pZipDecompV1) + { + RTZipDecompDestroy(pSSM->u.Read.pZipDecompV1); + pSSM->u.Read.pZipDecompV1 = NULL; + } + pSSM->cbUnitLeftV1 = 0; + pSSM->offUnit = UINT64_MAX; + pSSM->offUnitUser = UINT64_MAX; + + /* + * Call the version specific workers. + */ + if (pSSM->u.Read.uFmtVerMajor >= 2) + pSSM->rc = ssmR3FileSeekV2(pSSM, pszUnit, iInstance, piVersion); + else + pSSM->rc = ssmR3FileSeekV1(pSSM, pszUnit, iInstance, piVersion); + return pSSM->rc; +} + + + +/* ... Misc APIs ... */ +/* ... Misc APIs ... */ +/* ... Misc APIs ... */ +/* ... Misc APIs ... */ +/* ... Misc APIs ... */ +/* ... Misc APIs ... */ +/* ... Misc APIs ... */ +/* ... Misc APIs ... */ +/* ... Misc APIs ... */ +/* ... Misc APIs ... */ +/* ... Misc APIs ... */ + + + +/** + * Query what the VBox status code of the operation is. + * + * This can be used for putting and getting a batch of values + * without bother checking the result till all the calls have + * been made. + * + * @returns SSMAFTER enum value. + * @param pSSM The saved state handle. + */ +VMMR3DECL(int) SSMR3HandleGetStatus(PSSMHANDLE pSSM) +{ + SSM_ASSERT_VALID_HANDLE(pSSM); + return pSSM->rc; +} + + +/** + * Fail the load operation. + * + * This is mainly intended for sub item loaders (like timers) which + * return code isn't necessarily heeded by the caller but is important + * to SSM. + * + * @returns VBox status code of the handle, or VERR_INVALID_PARAMETER. + * @param pSSM The saved state handle. + * @param iStatus Failure status code. This MUST be a VERR_*. + */ +VMMR3DECL(int) SSMR3HandleSetStatus(PSSMHANDLE pSSM, int iStatus) +{ + SSM_ASSERT_VALID_HANDLE(pSSM); + Assert(pSSM->enmOp != SSMSTATE_LIVE_VOTE); + if (RT_FAILURE(iStatus)) + { + int rc = pSSM->rc; + if (RT_SUCCESS(rc)) + pSSM->rc = rc = iStatus; + return rc; + } + AssertMsgFailed(("iStatus=%d %Rrc\n", iStatus, iStatus)); + return VERR_INVALID_PARAMETER; +} + + +/** + * Get what to do after this operation. + * + * @returns SSMAFTER enum value. + * @param pSSM The saved state handle. + */ +VMMR3DECL(SSMAFTER) SSMR3HandleGetAfter(PSSMHANDLE pSSM) +{ + SSM_ASSERT_VALID_HANDLE(pSSM); + return pSSM->enmAfter; +} + + +/** + * Checks if it is a live save operation or not. + * + * @returns True if it is, false if it isn't. + * @param pSSM The saved state handle. + */ +VMMR3DECL(bool) SSMR3HandleIsLiveSave(PSSMHANDLE pSSM) +{ + SSM_ASSERT_VALID_HANDLE(pSSM); + return pSSM->fLiveSave; +} + + +/** + * Gets the maximum downtime for a live operation. + * + * @returns The max downtime in milliseconds. Can be anything from 0 thru + * UINT32_MAX. + * + * @param pSSM The saved state handle. + */ +VMMR3DECL(uint32_t) SSMR3HandleMaxDowntime(PSSMHANDLE pSSM) +{ + SSM_ASSERT_VALID_HANDLE(pSSM); + if (pSSM->enmOp <= SSMSTATE_SAVE_DONE) + return pSSM->u.Write.cMsMaxDowntime; + return UINT32_MAX; +} + + +/** + * Gets the host bit count of a saved state. + * + * @returns 32 or 64. If pSSM is invalid, 0 is returned. + * @param pSSM The saved state handle. + * + * @remarks This method should ONLY be used for hacks when loading OLDER saved + * state that have data layout or semantic changes without the + * compulsory version number change. + */ +VMMR3DECL(uint32_t) SSMR3HandleHostBits(PSSMHANDLE pSSM) +{ + SSM_ASSERT_VALID_HANDLE(pSSM); + return ssmR3GetHostBits(pSSM); +} + + +/** + * Get the VirtualBox SVN revision that created the saved state. + * + * @returns The revision number on success. + * form. If we don't know, it's 0. + * @param pSSM The saved state handle. + * + * @remarks This method should ONLY be used for hacks when loading OLDER saved + * state that have data layout or semantic changes without the + * compulsory version number change. Be VERY careful with this + * function since it will return different values for OSE builds! + */ +VMMR3DECL(uint32_t) SSMR3HandleRevision(PSSMHANDLE pSSM) +{ + if (pSSM->enmOp >= SSMSTATE_LOAD_PREP) + return pSSM->u.Read.u32SvnRev; +#ifdef SSM_STANDALONE + return 0; +#else + return VMMGetSvnRev(); +#endif +} + + +/** + * Gets the VirtualBox version that created the saved state. + * + * @returns VBOX_FULL_VERSION style version number. + * Returns UINT32_MAX if unknown or somehow out of range. + * + * @param pSSM The saved state handle. + * + * @remarks This method should ONLY be used for hacks when loading OLDER saved + * state that have data layout or semantic changes without the + * compulsory version number change. + */ +VMMR3DECL(uint32_t) SSMR3HandleVersion(PSSMHANDLE pSSM) +{ + if (pSSM->enmOp >= SSMSTATE_LOAD_PREP) + { + if ( !pSSM->u.Read.u16VerMajor + && !pSSM->u.Read.u16VerMinor + && !pSSM->u.Read.u32VerBuild) + return UINT32_MAX; + AssertReturn(pSSM->u.Read.u16VerMajor <= 0xff, UINT32_MAX); + AssertReturn(pSSM->u.Read.u16VerMinor <= 0xff, UINT32_MAX); + AssertReturn(pSSM->u.Read.u32VerBuild <= 0xffff, UINT32_MAX); + return VBOX_FULL_VERSION_MAKE(pSSM->u.Read.u16VerMajor, pSSM->u.Read.u16VerMinor, pSSM->u.Read.u32VerBuild); + } + return VBOX_FULL_VERSION; +} + + +/** + * Get the host OS and architecture where the saved state was created. + * + * @returns Pointer to a read only string. When known, this is on the os.arch + * form. If we don't know, it's an empty string. + * @param pSSM The saved state handle. + * + * @remarks This method should ONLY be used for hacks when loading OLDER saved + * state that have data layout or semantic changes without the + * compulsory version number change. + */ +VMMR3DECL(const char *) SSMR3HandleHostOSAndArch(PSSMHANDLE pSSM) +{ + if (pSSM->enmOp >= SSMSTATE_LOAD_PREP) + return pSSM->u.Read.szHostOSAndArch; + return KBUILD_TARGET "." KBUILD_TARGET_ARCH; +} + + +#ifndef SSM_STANDALONE +/** + * Asynchronously cancels the current SSM operation ASAP. + * + * @returns VBox status code. + * @retval VINF_SUCCESS on success. + * @retval VERR_SSM_NO_PENDING_OPERATION if nothing around that can be + * cancelled. + * @retval VERR_SSM_ALREADY_CANCELLED if the operation as already been + * cancelled. + * + * @param pUVM The VM handle. + * + * @thread Any. + */ +VMMR3DECL(int) SSMR3Cancel(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + int rc = RTCritSectEnter(&pVM->ssm.s.CancelCritSect); + AssertRCReturn(rc, rc); + + PSSMHANDLE pSSM = pVM->ssm.s.pSSM; + if (pSSM) + { + uint32_t u32Old; + if (ASMAtomicCmpXchgExU32(&pSSM->fCancelled, SSMHANDLE_CANCELLED, SSMHANDLE_OK, &u32Old)) + { + LogRel(("SSM: Cancelled pending operation\n")); + rc = VINF_SUCCESS; + } + else if (u32Old == SSMHANDLE_CANCELLED) + rc = VERR_SSM_ALREADY_CANCELLED; + else + { + AssertLogRelMsgFailed(("fCancelled=%RX32 enmOp=%d\n", u32Old, pSSM->enmOp)); + rc = VERR_SSM_IPE_3; + } + } + else + rc = VERR_SSM_NO_PENDING_OPERATION; + + RTCritSectLeave(&pVM->ssm.s.CancelCritSect); + return rc; +} +#endif /* !SSM_STANDALONE */ + diff --git a/src/VBox/VMM/VMMR3/STAM.cpp b/src/VBox/VMM/VMMR3/STAM.cpp new file mode 100644 index 00000000..91a7ae7b --- /dev/null +++ b/src/VBox/VMM/VMMR3/STAM.cpp @@ -0,0 +1,2916 @@ +/* $Id: STAM.cpp $ */ +/** @file + * STAM - The Statistics Manager. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +/** @page pg_stam STAM - The Statistics Manager + * + * The purpose for the statistics manager is to present the rest of the system + * with a somewhat uniform way of accessing VMM statistics. STAM sports a + * couple of different APIs for accessing them: STAMR3EnumU, STAMR3SnapshotU, + * STAMR3DumpU, STAMR3DumpToReleaseLogU and the debugger. Main is exposing the + * XML based one, STAMR3SnapshotU. + * + * The rest of the VMM together with the devices and drivers registers their + * statistics with STAM giving them a name. The name is hierarchical, the + * components separated by slashes ('/') and must start with a slash. + * + * Each item registered with STAM - also, half incorrectly, called a sample - + * has a type, unit, visibility, data pointer and description associated with it + * in addition to the name (described above). The type tells STAM what kind of + * structure the pointer is pointing to. The visibility allows unused + * statistics from cluttering the output or showing up in the GUI. All the bits + * together makes STAM able to present the items in a sensible way to the user. + * Some types also allows STAM to reset the data, which is very convenient when + * digging into specific operations and such. + * + * PS. The VirtualBox Debugger GUI has a viewer for inspecting the statistics + * STAM provides. You will also find statistics in the release and debug logs. + * And as mentioned in the introduction, the debugger console features a couple + * of command: .stats and .statsreset. + * + * @see grp_stam + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_STAM +#include +#include "STAMInternal.h" +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** The maximum name length excluding the terminator. */ +#define STAM_MAX_NAME_LEN 239 + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * Argument structure for stamR3PrintOne(). + */ +typedef struct STAMR3PRINTONEARGS +{ + PUVM pUVM; + void *pvArg; + DECLCALLBACKMEMBER(void, pfnPrintf)(struct STAMR3PRINTONEARGS *pvArg, const char *pszFormat, ...); +} STAMR3PRINTONEARGS, *PSTAMR3PRINTONEARGS; + + +/** + * Argument structure to stamR3EnumOne(). + */ +typedef struct STAMR3ENUMONEARGS +{ + PVM pVM; + PFNSTAMR3ENUM pfnEnum; + void *pvUser; +} STAMR3ENUMONEARGS, *PSTAMR3ENUMONEARGS; + + +/** + * The snapshot status structure. + * Argument package passed to stamR3SnapshotOne, stamR3SnapshotPrintf and stamR3SnapshotOutput. + */ +typedef struct STAMR3SNAPSHOTONE +{ + /** Pointer to the buffer start. */ + char *pszStart; + /** Pointer to the buffer end. */ + char *pszEnd; + /** Pointer to the current buffer position. */ + char *psz; + /** Pointer to the VM. */ + PVM pVM; + /** The number of bytes allocated. */ + size_t cbAllocated; + /** The status code. */ + int rc; + /** Whether to include the description strings. */ + bool fWithDesc; +} STAMR3SNAPSHOTONE, *PSTAMR3SNAPSHOTONE; + + +/** + * Init record for a ring-0 statistic sample. + */ +typedef struct STAMR0SAMPLE +{ + /** The GVMMSTATS structure offset of the variable. */ + unsigned offVar; + /** The type. */ + STAMTYPE enmType; + /** The unit. */ + STAMUNIT enmUnit; + /** The name. */ + const char *pszName; + /** The description. */ + const char *pszDesc; +} STAMR0SAMPLE; + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +#ifdef STAM_WITH_LOOKUP_TREE +static void stamR3LookupDestroyTree(PSTAMLOOKUP pRoot); +#endif +static int stamR3RegisterU(PUVM pUVM, void *pvSample, PFNSTAMR3CALLBACKRESET pfnReset, + PFNSTAMR3CALLBACKPRINT pfnPrint, STAMTYPE enmType, STAMVISIBILITY enmVisibility, + const char *pszName, STAMUNIT enmUnit, const char *pszDesc, uint8_t iRefreshGrp); +static int stamR3ResetOne(PSTAMDESC pDesc, void *pvArg); +static DECLCALLBACK(void) stamR3EnumLogPrintf(PSTAMR3PRINTONEARGS pvArg, const char *pszFormat, ...); +static DECLCALLBACK(void) stamR3EnumRelLogPrintf(PSTAMR3PRINTONEARGS pvArg, const char *pszFormat, ...); +static DECLCALLBACK(void) stamR3EnumPrintf(PSTAMR3PRINTONEARGS pvArg, const char *pszFormat, ...); +static int stamR3SnapshotOne(PSTAMDESC pDesc, void *pvArg); +static int stamR3SnapshotPrintf(PSTAMR3SNAPSHOTONE pThis, const char *pszFormat, ...); +static int stamR3PrintOne(PSTAMDESC pDesc, void *pvArg); +static int stamR3EnumOne(PSTAMDESC pDesc, void *pvArg); +static bool stamR3MultiMatch(const char * const *papszExpressions, unsigned cExpressions, unsigned *piExpression, const char *pszName); +static char ** stamR3SplitPattern(const char *pszPat, unsigned *pcExpressions, char **ppszCopy); +static int stamR3EnumU(PUVM pUVM, const char *pszPat, bool fUpdateRing0, int (pfnCallback)(PSTAMDESC pDesc, void *pvArg), void *pvArg); +static void stamR3Ring0StatsRegisterU(PUVM pUVM); + +#ifdef VBOX_WITH_DEBUGGER +static FNDBGCCMD stamR3CmdStats; +static DECLCALLBACK(void) stamR3EnumDbgfPrintf(PSTAMR3PRINTONEARGS pArgs, const char *pszFormat, ...); +static FNDBGCCMD stamR3CmdStatsReset; +#endif + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +#ifdef VBOX_WITH_DEBUGGER +/** Pattern argument. */ +static const DBGCVARDESC g_aArgPat[] = +{ + /* cTimesMin, cTimesMax, enmCategory, fFlags, pszName, pszDescription */ + { 0, 1, DBGCVAR_CAT_STRING, 0, "pattern", "Which samples the command shall be applied to. Use '*' as wildcard. Use ';' to separate expression." } +}; + +/** Command descriptors. */ +static const DBGCCMD g_aCmds[] = +{ + /* pszCmd, cArgsMin, cArgsMax, paArgDesc, cArgDescs, fFlags, pfnHandler pszSyntax, ....pszDescription */ + { "stats", 0, 1, &g_aArgPat[0], RT_ELEMENTS(g_aArgPat), 0, stamR3CmdStats, "[pattern]", "Display statistics." }, + { "statsreset", 0, 1, &g_aArgPat[0], RT_ELEMENTS(g_aArgPat), 0, stamR3CmdStatsReset,"[pattern]", "Resets statistics." } +}; +#endif + + +/** + * The GVMM mapping records - sans the host cpus. + */ +static const STAMR0SAMPLE g_aGVMMStats[] = +{ + { RT_UOFFSETOF(GVMMSTATS, SchedVM.cHaltCalls), STAMTYPE_U64_RESET, STAMUNIT_CALLS, "/GVMM/VM/HaltCalls", "The number of calls to GVMMR0SchedHalt." }, + { RT_UOFFSETOF(GVMMSTATS, SchedVM.cHaltBlocking), STAMTYPE_U64_RESET, STAMUNIT_CALLS, "/GVMM/VM/HaltBlocking", "The number of times we did go to sleep in GVMMR0SchedHalt." }, + { RT_UOFFSETOF(GVMMSTATS, SchedVM.cHaltTimeouts), STAMTYPE_U64_RESET, STAMUNIT_CALLS, "/GVMM/VM/HaltTimeouts", "The number of times we timed out in GVMMR0SchedHalt." }, + { RT_UOFFSETOF(GVMMSTATS, SchedVM.cHaltNotBlocking), STAMTYPE_U64_RESET, STAMUNIT_CALLS, "/GVMM/VM/HaltNotBlocking", "The number of times we didn't go to sleep in GVMMR0SchedHalt." }, + { RT_UOFFSETOF(GVMMSTATS, SchedVM.cHaltWakeUps), STAMTYPE_U64_RESET, STAMUNIT_CALLS, "/GVMM/VM/HaltWakeUps", "The number of wake ups done during GVMMR0SchedHalt." }, + { RT_UOFFSETOF(GVMMSTATS, SchedVM.cWakeUpCalls), STAMTYPE_U64_RESET, STAMUNIT_CALLS, "/GVMM/VM/WakeUpCalls", "The number of calls to GVMMR0WakeUp." }, + { RT_UOFFSETOF(GVMMSTATS, SchedVM.cWakeUpNotHalted), STAMTYPE_U64_RESET, STAMUNIT_CALLS, "/GVMM/VM/WakeUpNotHalted", "The number of times the EMT thread wasn't actually halted when GVMMR0WakeUp was called." }, + { RT_UOFFSETOF(GVMMSTATS, SchedVM.cWakeUpWakeUps), STAMTYPE_U64_RESET, STAMUNIT_CALLS, "/GVMM/VM/WakeUpWakeUps", "The number of wake ups done during GVMMR0WakeUp (not counting the explicit one)." }, + { RT_UOFFSETOF(GVMMSTATS, SchedVM.cPokeCalls), STAMTYPE_U64_RESET, STAMUNIT_CALLS, "/GVMM/VM/PokeCalls", "The number of calls to GVMMR0Poke." }, + { RT_UOFFSETOF(GVMMSTATS, SchedVM.cPokeNotBusy), STAMTYPE_U64_RESET, STAMUNIT_CALLS, "/GVMM/VM/PokeNotBusy", "The number of times the EMT thread wasn't actually busy when GVMMR0Poke was called." }, + { RT_UOFFSETOF(GVMMSTATS, SchedVM.cPollCalls), STAMTYPE_U64_RESET, STAMUNIT_CALLS, "/GVMM/VM/PollCalls", "The number of calls to GVMMR0SchedPoll." }, + { RT_UOFFSETOF(GVMMSTATS, SchedVM.cPollHalts), STAMTYPE_U64_RESET, STAMUNIT_CALLS, "/GVMM/VM/PollHalts", "The number of times the EMT has halted in a GVMMR0SchedPoll call." }, + { RT_UOFFSETOF(GVMMSTATS, SchedVM.cPollWakeUps), STAMTYPE_U64_RESET, STAMUNIT_CALLS, "/GVMM/VM/PollWakeUps", "The number of wake ups done during GVMMR0SchedPoll." }, + + { RT_UOFFSETOF(GVMMSTATS, SchedSum.cHaltCalls), STAMTYPE_U64_RESET, STAMUNIT_CALLS, "/GVMM/Sum/HaltCalls", "The number of calls to GVMMR0SchedHalt." }, + { RT_UOFFSETOF(GVMMSTATS, SchedSum.cHaltBlocking), STAMTYPE_U64_RESET, STAMUNIT_CALLS, "/GVMM/Sum/HaltBlocking", "The number of times we did go to sleep in GVMMR0SchedHalt." }, + { RT_UOFFSETOF(GVMMSTATS, SchedSum.cHaltTimeouts), STAMTYPE_U64_RESET, STAMUNIT_CALLS, "/GVMM/Sum/HaltTimeouts", "The number of times we timed out in GVMMR0SchedHalt." }, + { RT_UOFFSETOF(GVMMSTATS, SchedSum.cHaltNotBlocking), STAMTYPE_U64_RESET, STAMUNIT_CALLS, "/GVMM/Sum/HaltNotBlocking", "The number of times we didn't go to sleep in GVMMR0SchedHalt." }, + { RT_UOFFSETOF(GVMMSTATS, SchedSum.cHaltWakeUps), STAMTYPE_U64_RESET, STAMUNIT_CALLS, "/GVMM/Sum/HaltWakeUps", "The number of wake ups done during GVMMR0SchedHalt." }, + { RT_UOFFSETOF(GVMMSTATS, SchedSum.cWakeUpCalls), STAMTYPE_U64_RESET, STAMUNIT_CALLS, "/GVMM/Sum/WakeUpCalls", "The number of calls to GVMMR0WakeUp." }, + { RT_UOFFSETOF(GVMMSTATS, SchedSum.cWakeUpNotHalted), STAMTYPE_U64_RESET, STAMUNIT_CALLS, "/GVMM/Sum/WakeUpNotHalted", "The number of times the EMT thread wasn't actually halted when GVMMR0WakeUp was called." }, + { RT_UOFFSETOF(GVMMSTATS, SchedSum.cWakeUpWakeUps), STAMTYPE_U64_RESET, STAMUNIT_CALLS, "/GVMM/Sum/WakeUpWakeUps", "The number of wake ups done during GVMMR0WakeUp (not counting the explicit one)." }, + { RT_UOFFSETOF(GVMMSTATS, SchedSum.cPokeCalls), STAMTYPE_U64_RESET, STAMUNIT_CALLS, "/GVMM/Sum/PokeCalls", "The number of calls to GVMMR0Poke." }, + { RT_UOFFSETOF(GVMMSTATS, SchedSum.cPokeNotBusy), STAMTYPE_U64_RESET, STAMUNIT_CALLS, "/GVMM/Sum/PokeNotBusy", "The number of times the EMT thread wasn't actually busy when GVMMR0Poke was called." }, + { RT_UOFFSETOF(GVMMSTATS, SchedSum.cPollCalls), STAMTYPE_U64_RESET, STAMUNIT_CALLS, "/GVMM/Sum/PollCalls", "The number of calls to GVMMR0SchedPoll." }, + { RT_UOFFSETOF(GVMMSTATS, SchedSum.cPollHalts), STAMTYPE_U64_RESET, STAMUNIT_CALLS, "/GVMM/Sum/PollHalts", "The number of times the EMT has halted in a GVMMR0SchedPoll call." }, + { RT_UOFFSETOF(GVMMSTATS, SchedSum.cPollWakeUps), STAMTYPE_U64_RESET, STAMUNIT_CALLS, "/GVMM/Sum/PollWakeUps", "The number of wake ups done during GVMMR0SchedPoll." }, + + { RT_UOFFSETOF(GVMMSTATS, cVMs), STAMTYPE_U32, STAMUNIT_CALLS, "/GVMM/VMs", "The number of VMs accessible to the caller." }, + { RT_UOFFSETOF(GVMMSTATS, cEMTs), STAMTYPE_U32, STAMUNIT_CALLS, "/GVMM/EMTs", "The number of emulation threads." }, + { RT_UOFFSETOF(GVMMSTATS, cHostCpus), STAMTYPE_U32, STAMUNIT_CALLS, "/GVMM/HostCPUs", "The number of host CPUs." }, +}; + + +/** + * The GMM mapping records. + */ +static const STAMR0SAMPLE g_aGMMStats[] = +{ + { RT_UOFFSETOF(GMMSTATS, cMaxPages), STAMTYPE_U64, STAMUNIT_PAGES, "/GMM/cMaxPages", "The maximum number of pages GMM is allowed to allocate." }, + { RT_UOFFSETOF(GMMSTATS, cReservedPages), STAMTYPE_U64, STAMUNIT_PAGES, "/GMM/cReservedPages", "The number of pages that has been reserved." }, + { RT_UOFFSETOF(GMMSTATS, cOverCommittedPages), STAMTYPE_U64, STAMUNIT_PAGES, "/GMM/cOverCommittedPages", "The number of pages that we have over-committed in reservations." }, + { RT_UOFFSETOF(GMMSTATS, cAllocatedPages), STAMTYPE_U64, STAMUNIT_PAGES, "/GMM/cAllocatedPages", "The number of actually allocated (committed if you like) pages." }, + { RT_UOFFSETOF(GMMSTATS, cSharedPages), STAMTYPE_U64, STAMUNIT_PAGES, "/GMM/cSharedPages", "The number of pages that are shared. A subset of cAllocatedPages." }, + { RT_UOFFSETOF(GMMSTATS, cDuplicatePages), STAMTYPE_U64, STAMUNIT_PAGES, "/GMM/cDuplicatePages", "The number of pages that are actually shared between VMs." }, + { RT_UOFFSETOF(GMMSTATS, cLeftBehindSharedPages), STAMTYPE_U64, STAMUNIT_PAGES, "/GMM/cLeftBehindSharedPages", "The number of pages that are shared that has been left behind by VMs not doing proper cleanups." }, + { RT_UOFFSETOF(GMMSTATS, cBalloonedPages), STAMTYPE_U64, STAMUNIT_PAGES, "/GMM/cBalloonedPages", "The number of current ballooned pages." }, + { RT_UOFFSETOF(GMMSTATS, cChunks), STAMTYPE_U32, STAMUNIT_COUNT, "/GMM/cChunks", "The number of allocation chunks." }, + { RT_UOFFSETOF(GMMSTATS, cFreedChunks), STAMTYPE_U32, STAMUNIT_COUNT, "/GMM/cFreedChunks", "The number of freed chunks ever." }, + { RT_UOFFSETOF(GMMSTATS, cShareableModules), STAMTYPE_U32, STAMUNIT_COUNT, "/GMM/cShareableModules", "The number of shareable modules." }, + { RT_UOFFSETOF(GMMSTATS, VMStats.Reserved.cBasePages), STAMTYPE_U64, STAMUNIT_PAGES, "/GMM/VM/Reserved/cBasePages", "The amount of base memory (RAM, ROM, ++) reserved by the VM." }, + { RT_UOFFSETOF(GMMSTATS, VMStats.Reserved.cShadowPages), STAMTYPE_U32, STAMUNIT_PAGES, "/GMM/VM/Reserved/cShadowPages", "The amount of memory reserved for shadow/nested page tables." }, + { RT_UOFFSETOF(GMMSTATS, VMStats.Reserved.cFixedPages), STAMTYPE_U32, STAMUNIT_PAGES, "/GMM/VM/Reserved/cFixedPages", "The amount of memory reserved for fixed allocations like MMIO2 and the hyper heap." }, + { RT_UOFFSETOF(GMMSTATS, VMStats.Allocated.cBasePages), STAMTYPE_U64, STAMUNIT_PAGES, "/GMM/VM/Allocated/cBasePages", "The amount of base memory (RAM, ROM, ++) allocated by the VM." }, + { RT_UOFFSETOF(GMMSTATS, VMStats.Allocated.cShadowPages), STAMTYPE_U32, STAMUNIT_PAGES, "/GMM/VM/Allocated/cShadowPages", "The amount of memory allocated for shadow/nested page tables." }, + { RT_UOFFSETOF(GMMSTATS, VMStats.Allocated.cFixedPages), STAMTYPE_U32, STAMUNIT_PAGES, "/GMM/VM/Allocated/cFixedPages", "The amount of memory allocated for fixed allocations like MMIO2 and the hyper heap." }, + { RT_UOFFSETOF(GMMSTATS, VMStats.cPrivatePages), STAMTYPE_U64, STAMUNIT_PAGES, "/GMM/VM/cPrivatePages", "The current number of private pages." }, + { RT_UOFFSETOF(GMMSTATS, VMStats.cSharedPages), STAMTYPE_U64, STAMUNIT_PAGES, "/GMM/VM/cSharedPages", "The current number of shared pages." }, + { RT_UOFFSETOF(GMMSTATS, VMStats.cBalloonedPages), STAMTYPE_U64, STAMUNIT_PAGES, "/GMM/VM/cBalloonedPages", "The current number of ballooned pages." }, + { RT_UOFFSETOF(GMMSTATS, VMStats.cMaxBalloonedPages), STAMTYPE_U64, STAMUNIT_PAGES, "/GMM/VM/cMaxBalloonedPages", "The max number of pages that can be ballooned." }, + { RT_UOFFSETOF(GMMSTATS, VMStats.cReqBalloonedPages), STAMTYPE_U64, STAMUNIT_PAGES, "/GMM/VM/cReqBalloonedPages", "The number of pages we've currently requested the guest to give us." }, + { RT_UOFFSETOF(GMMSTATS, VMStats.cReqActuallyBalloonedPages),STAMTYPE_U64, STAMUNIT_PAGES, "/GMM/VM/cReqActuallyBalloonedPages","The number of pages the guest has given us in response to the request." }, + { RT_UOFFSETOF(GMMSTATS, VMStats.cReqDeflatePages), STAMTYPE_U64, STAMUNIT_PAGES, "/GMM/VM/cReqDeflatePages", "The number of pages we've currently requested the guest to take back." }, + { RT_UOFFSETOF(GMMSTATS, VMStats.cShareableModules), STAMTYPE_U32, STAMUNIT_COUNT, "/GMM/VM/cShareableModules", "The number of shareable modules traced by the VM." }, + { RT_UOFFSETOF(GMMSTATS, VMStats.enmPolicy), STAMTYPE_U32, STAMUNIT_NONE, "/GMM/VM/enmPolicy", "The current over-commit policy." }, + { RT_UOFFSETOF(GMMSTATS, VMStats.enmPriority), STAMTYPE_U32, STAMUNIT_NONE, "/GMM/VM/enmPriority", "The VM priority for arbitrating VMs in low and out of memory situation." }, + { RT_UOFFSETOF(GMMSTATS, VMStats.fBallooningEnabled), STAMTYPE_BOOL, STAMUNIT_NONE, "/GMM/VM/fBallooningEnabled", "Whether ballooning is enabled or not." }, + { RT_UOFFSETOF(GMMSTATS, VMStats.fSharedPagingEnabled), STAMTYPE_BOOL, STAMUNIT_NONE, "/GMM/VM/fSharedPagingEnabled", "Whether shared paging is enabled or not." }, + { RT_UOFFSETOF(GMMSTATS, VMStats.fMayAllocate), STAMTYPE_BOOL, STAMUNIT_NONE, "/GMM/VM/fMayAllocate", "Whether the VM is allowed to allocate memory or not." }, +}; + + +/** + * Initializes the STAM. + * + * @returns VBox status code. + * @param pUVM The user mode VM structure. + */ +VMMR3DECL(int) STAMR3InitUVM(PUVM pUVM) +{ + LogFlow(("STAMR3Init\n")); + + /* + * Assert alignment and sizes. + */ + AssertCompile(sizeof(pUVM->stam.s) <= sizeof(pUVM->stam.padding)); + AssertRelease(sizeof(pUVM->stam.s) <= sizeof(pUVM->stam.padding)); + + /* + * Initialize the read/write lock and list. + */ + int rc = RTSemRWCreate(&pUVM->stam.s.RWSem); + AssertRCReturn(rc, rc); + + RTListInit(&pUVM->stam.s.List); + +#ifdef STAM_WITH_LOOKUP_TREE + /* + * Initialize the root node. + */ + PSTAMLOOKUP pRoot = (PSTAMLOOKUP)RTMemAlloc(sizeof(STAMLOOKUP)); + if (!pRoot) + { + RTSemRWDestroy(pUVM->stam.s.RWSem); + pUVM->stam.s.RWSem = NIL_RTSEMRW; + return VERR_NO_MEMORY; + } + pRoot->pParent = NULL; + pRoot->papChildren = NULL; + pRoot->pDesc = NULL; + pRoot->cDescsInTree = 0; + pRoot->cChildren = 0; + pRoot->iParent = UINT16_MAX; + pRoot->off = 0; + pRoot->cch = 0; + pRoot->szName[0] = '\0'; + + pUVM->stam.s.pRoot = pRoot; +#endif + + + /* + * Register the ring-0 statistics (GVMM/GMM). + */ + stamR3Ring0StatsRegisterU(pUVM); + +#ifdef VBOX_WITH_DEBUGGER + /* + * Register debugger commands. + */ + static bool fRegisteredCmds = false; + if (!fRegisteredCmds) + { + rc = DBGCRegisterCommands(&g_aCmds[0], RT_ELEMENTS(g_aCmds)); + if (RT_SUCCESS(rc)) + fRegisteredCmds = true; + } +#endif + + return VINF_SUCCESS; +} + + +/** + * Terminates the STAM. + * + * @param pUVM Pointer to the user mode VM structure. + */ +VMMR3DECL(void) STAMR3TermUVM(PUVM pUVM) +{ + /* + * Free used memory and the RWLock. + */ + PSTAMDESC pCur, pNext; + RTListForEachSafe(&pUVM->stam.s.List, pCur, pNext, STAMDESC, ListEntry) + { +#ifdef STAM_WITH_LOOKUP_TREE + pCur->pLookup->pDesc = NULL; +#endif + RTMemFree(pCur); + } + +#ifdef STAM_WITH_LOOKUP_TREE + stamR3LookupDestroyTree(pUVM->stam.s.pRoot); + pUVM->stam.s.pRoot = NULL; +#endif + + Assert(pUVM->stam.s.RWSem != NIL_RTSEMRW); + RTSemRWDestroy(pUVM->stam.s.RWSem); + pUVM->stam.s.RWSem = NIL_RTSEMRW; +} + + +/** + * Registers a sample with the statistics manager. + * + * Statistics are maintained on a per VM basis and is normally registered + * during the VM init stage, but there is nothing preventing you from + * register them at runtime. + * + * Use STAMR3Deregister() to deregister statistics at runtime, however do + * not bother calling at termination time. + * + * It is not possible to register the same sample twice. + * + * @returns VBox status code. + * @param pUVM Pointer to the user mode VM structure. + * @param pvSample Pointer to the sample. + * @param enmType Sample type. This indicates what pvSample is pointing at. + * @param enmVisibility Visibility type specifying whether unused statistics should be visible or not. + * @param pszName Sample name. The name is on this form "//". + * Further nesting is possible. + * @param enmUnit Sample unit. + * @param pszDesc Sample description. + */ +VMMR3DECL(int) STAMR3RegisterU(PUVM pUVM, void *pvSample, STAMTYPE enmType, STAMVISIBILITY enmVisibility, const char *pszName, + STAMUNIT enmUnit, const char *pszDesc) +{ + AssertReturn(enmType != STAMTYPE_CALLBACK, VERR_INVALID_PARAMETER); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + return stamR3RegisterU(pUVM, pvSample, NULL, NULL, enmType, enmVisibility, pszName, enmUnit, pszDesc, STAM_REFRESH_GRP_NONE); +} + + +/** + * Registers a sample with the statistics manager. + * + * Statistics are maintained on a per VM basis and is normally registered + * during the VM init stage, but there is nothing preventing you from + * register them at runtime. + * + * Use STAMR3Deregister() to deregister statistics at runtime, however do + * not bother calling at termination time. + * + * It is not possible to register the same sample twice. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pvSample Pointer to the sample. + * @param enmType Sample type. This indicates what pvSample is pointing at. + * @param enmVisibility Visibility type specifying whether unused statistics should be visible or not. + * @param pszName Sample name. The name is on this form "//". + * Further nesting is possible. + * @param enmUnit Sample unit. + * @param pszDesc Sample description. + */ +VMMR3DECL(int) STAMR3Register(PVM pVM, void *pvSample, STAMTYPE enmType, STAMVISIBILITY enmVisibility, const char *pszName, + STAMUNIT enmUnit, const char *pszDesc) +{ + AssertReturn(enmType != STAMTYPE_CALLBACK, VERR_INVALID_PARAMETER); + return stamR3RegisterU(pVM->pUVM, pvSample, NULL, NULL, enmType, enmVisibility, pszName, enmUnit, pszDesc, + STAM_REFRESH_GRP_NONE); +} + + +/** + * Same as STAMR3RegisterU except that the name is specified in a + * RTStrPrintf like fashion. + * + * @returns VBox status code. + * @param pUVM Pointer to the user mode VM structure. + * @param pvSample Pointer to the sample. + * @param enmType Sample type. This indicates what pvSample is pointing at. + * @param enmVisibility Visibility type specifying whether unused statistics should be visible or not. + * @param enmUnit Sample unit. + * @param pszDesc Sample description. + * @param pszName The sample name format string. + * @param ... Arguments to the format string. + */ +VMMR3DECL(int) STAMR3RegisterFU(PUVM pUVM, void *pvSample, STAMTYPE enmType, STAMVISIBILITY enmVisibility, STAMUNIT enmUnit, + const char *pszDesc, const char *pszName, ...) +{ + va_list args; + va_start(args, pszName); + int rc = STAMR3RegisterVU(pUVM, pvSample, enmType, enmVisibility, enmUnit, pszDesc, pszName, args); + va_end(args); + return rc; +} + + +/** + * Same as STAMR3Register except that the name is specified in a + * RTStrPrintf like fashion. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pvSample Pointer to the sample. + * @param enmType Sample type. This indicates what pvSample is pointing at. + * @param enmVisibility Visibility type specifying whether unused statistics should be visible or not. + * @param enmUnit Sample unit. + * @param pszDesc Sample description. + * @param pszName The sample name format string. + * @param ... Arguments to the format string. + */ +VMMR3DECL(int) STAMR3RegisterF(PVM pVM, void *pvSample, STAMTYPE enmType, STAMVISIBILITY enmVisibility, STAMUNIT enmUnit, + const char *pszDesc, const char *pszName, ...) +{ + va_list args; + va_start(args, pszName); + int rc = STAMR3RegisterVU(pVM->pUVM, pvSample, enmType, enmVisibility, enmUnit, pszDesc, pszName, args); + va_end(args); + return rc; +} + + +/** + * Same as STAMR3Register except that the name is specified in a + * RTStrPrintfV like fashion. + * + * @returns VBox status code. + * @param pUVM The user mode VM structure. + * @param pvSample Pointer to the sample. + * @param enmType Sample type. This indicates what pvSample is pointing at. + * @param enmVisibility Visibility type specifying whether unused statistics should be visible or not. + * @param enmUnit Sample unit. + * @param pszDesc Sample description. + * @param pszName The sample name format string. + * @param args Arguments to the format string. + */ +VMMR3DECL(int) STAMR3RegisterVU(PUVM pUVM, void *pvSample, STAMTYPE enmType, STAMVISIBILITY enmVisibility, STAMUNIT enmUnit, + const char *pszDesc, const char *pszName, va_list args) +{ + AssertReturn(enmType != STAMTYPE_CALLBACK, VERR_INVALID_PARAMETER); + + char szFormattedName[STAM_MAX_NAME_LEN + 8]; + size_t cch = RTStrPrintfV(szFormattedName, sizeof(szFormattedName), pszName, args); + AssertReturn(cch <= STAM_MAX_NAME_LEN, VERR_OUT_OF_RANGE); + + return STAMR3RegisterU(pUVM, pvSample, enmType, enmVisibility, szFormattedName, enmUnit, pszDesc); +} + + +/** + * Same as STAMR3Register except that the name is specified in a + * RTStrPrintfV like fashion. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pvSample Pointer to the sample. + * @param enmType Sample type. This indicates what pvSample is pointing at. + * @param enmVisibility Visibility type specifying whether unused statistics should be visible or not. + * @param enmUnit Sample unit. + * @param pszDesc Sample description. + * @param pszName The sample name format string. + * @param args Arguments to the format string. + */ +VMMR3DECL(int) STAMR3RegisterV(PVM pVM, void *pvSample, STAMTYPE enmType, STAMVISIBILITY enmVisibility, STAMUNIT enmUnit, + const char *pszDesc, const char *pszName, va_list args) +{ + return STAMR3RegisterVU(pVM->pUVM, pvSample, enmType, enmVisibility, enmUnit, pszDesc, pszName, args); +} + + +/** + * Similar to STAMR3Register except for the two callbacks, the implied type (STAMTYPE_CALLBACK), + * and name given in an RTStrPrintf like fashion. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pvSample Pointer to the sample. + * @param enmVisibility Visibility type specifying whether unused statistics should be visible or not. + * @param enmUnit Sample unit. + * @param pfnReset Callback for resetting the sample. NULL should be used if the sample can't be reset. + * @param pfnPrint Print the sample. + * @param pszDesc Sample description. + * @param pszName The sample name format string. + * @param ... Arguments to the format string. + * @remark There is currently no device or driver variant of this API. Add one if it should become necessary! + */ +VMMR3DECL(int) STAMR3RegisterCallback(PVM pVM, void *pvSample, STAMVISIBILITY enmVisibility, STAMUNIT enmUnit, + PFNSTAMR3CALLBACKRESET pfnReset, PFNSTAMR3CALLBACKPRINT pfnPrint, + const char *pszDesc, const char *pszName, ...) +{ + va_list args; + va_start(args, pszName); + int rc = STAMR3RegisterCallbackV(pVM, pvSample, enmVisibility, enmUnit, pfnReset, pfnPrint, pszDesc, pszName, args); + va_end(args); + return rc; +} + + +/** + * Same as STAMR3RegisterCallback() except for the ellipsis which is a va_list here. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pvSample Pointer to the sample. + * @param enmVisibility Visibility type specifying whether unused statistics should be visible or not. + * @param enmUnit Sample unit. + * @param pfnReset Callback for resetting the sample. NULL should be used if the sample can't be reset. + * @param pfnPrint Print the sample. + * @param pszDesc Sample description. + * @param pszName The sample name format string. + * @param args Arguments to the format string. + * @remark There is currently no device or driver variant of this API. Add one if it should become necessary! + */ +VMMR3DECL(int) STAMR3RegisterCallbackV(PVM pVM, void *pvSample, STAMVISIBILITY enmVisibility, STAMUNIT enmUnit, + PFNSTAMR3CALLBACKRESET pfnReset, PFNSTAMR3CALLBACKPRINT pfnPrint, + const char *pszDesc, const char *pszName, va_list args) +{ + char *pszFormattedName; + RTStrAPrintfV(&pszFormattedName, pszName, args); + if (!pszFormattedName) + return VERR_NO_MEMORY; + + int rc = stamR3RegisterU(pVM->pUVM, pvSample, pfnReset, pfnPrint, STAMTYPE_CALLBACK, enmVisibility, pszFormattedName, + enmUnit, pszDesc, STAM_REFRESH_GRP_NONE); + RTStrFree(pszFormattedName); + return rc; +} + + +/** + * Same as STAMR3RegisterFU, except there is an extra refresh group parameter. + * + * @returns VBox status code. + * @param pUVM Pointer to the user mode VM structure. + * @param pvSample Pointer to the sample. + * @param enmType Sample type. This indicates what pvSample is pointing at. + * @param enmVisibility Visibility type specifying whether unused statistics should be visible or not. + * @param enmUnit Sample unit. + * @param iRefreshGrp The refresh group, STAM_REFRESH_GRP_XXX. + * @param pszDesc Sample description. + * @param pszName The sample name format string. + * @param ... Arguments to the format string. + */ +VMMR3DECL(int) STAMR3RegisterRefresh(PUVM pUVM, void *pvSample, STAMTYPE enmType, STAMVISIBILITY enmVisibility, STAMUNIT enmUnit, + uint8_t iRefreshGrp, const char *pszDesc, const char *pszName, ...) +{ + va_list args; + va_start(args, pszName); + int rc = STAMR3RegisterRefreshV(pUVM, pvSample, enmType, enmVisibility, enmUnit, iRefreshGrp, pszDesc, pszName, args); + va_end(args); + return rc; +} + + +/** + * Same as STAMR3RegisterVU, except there is an extra refresh group parameter. + * + * @returns VBox status code. + * @param pUVM The user mode VM structure. + * @param pvSample Pointer to the sample. + * @param enmType Sample type. This indicates what pvSample is pointing at. + * @param enmVisibility Visibility type specifying whether unused statistics should be visible or not. + * @param enmUnit Sample unit. + * @param iRefreshGrp The refresh group, STAM_REFRESH_GRP_XXX. + * @param pszDesc Sample description. + * @param pszName The sample name format string. + * @param va Arguments to the format string. + */ +VMMR3DECL(int) STAMR3RegisterRefreshV(PUVM pUVM, void *pvSample, STAMTYPE enmType, STAMVISIBILITY enmVisibility, STAMUNIT enmUnit, + uint8_t iRefreshGrp, const char *pszDesc, const char *pszName, va_list va) +{ + AssertReturn(enmType != STAMTYPE_CALLBACK, VERR_INVALID_PARAMETER); + + char szFormattedName[STAM_MAX_NAME_LEN + 8]; + size_t cch = RTStrPrintfV(szFormattedName, sizeof(szFormattedName), pszName, va); + AssertReturn(cch <= STAM_MAX_NAME_LEN, VERR_OUT_OF_RANGE); + + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + return stamR3RegisterU(pUVM, pvSample, NULL, NULL, enmType, enmVisibility, pszName, enmUnit, pszDesc, iRefreshGrp); +} + + +#ifdef VBOX_STRICT +/** + * Divide the strings into sub-strings using '/' as delimiter + * and then compare them in strcmp fashion. + * + * @returns Difference. + * @retval 0 if equal. + * @retval < 0 if psz1 is less than psz2. + * @retval > 0 if psz1 greater than psz2. + * + * @param psz1 The first string. + * @param psz2 The second string. + */ +static int stamR3SlashCompare(const char *psz1, const char *psz2) +{ + for (;;) + { + unsigned int ch1 = *psz1++; + unsigned int ch2 = *psz2++; + if (ch1 != ch2) + { + /* slash is end-of-sub-string, so it trumps everything but '\0'. */ + if (ch1 == '/') + return ch2 ? -1 : 1; + if (ch2 == '/') + return ch1 ? 1 : -1; + return ch1 - ch2; + } + + /* done? */ + if (ch1 == '\0') + return 0; + } +} +#endif /* VBOX_STRICT */ + + +#ifdef STAM_WITH_LOOKUP_TREE + +/** + * Compares a lookup node with a name. + * + * @returns like strcmp and memcmp. + * @param pNode The lookup node. + * @param pchName The name, not necessarily terminated. + * @param cchName The length of the name. + */ +DECL_FORCE_INLINE(int) stamR3LookupCmp(PSTAMLOOKUP pNode, const char *pchName, uint32_t cchName) +{ + uint32_t cchComp = RT_MIN(pNode->cch, cchName); + int iDiff = memcmp(pNode->szName, pchName, cchComp); + if (!iDiff && pNode->cch != cchName) + iDiff = pNode->cch > cchName ? 2 : -2; + return iDiff; +} + + +/** + * Creates a new lookup child node. + * + * @returns Pointer to the newly created lookup node. + * @param pParent The parent node. + * @param pchName The name (not necessarily terminated). + * @param cchName The length of the name. + * @param offName The offset of the node in a path. + * @param iChild Child index of a node that's before the one + * we're inserting (returned by + * stamR3LookupFindChild). + */ +static PSTAMLOOKUP stamR3LookupNewChild(PSTAMLOOKUP pParent, const char *pchName, uint32_t cchName, uint32_t offName, + uint32_t iChild) +{ + Assert(cchName <= UINT8_MAX); + Assert(offName <= UINT8_MAX); + Assert(iChild < UINT16_MAX); + + /* + * Allocate a new entry. + */ + PSTAMLOOKUP pNew = (PSTAMLOOKUP)RTMemAlloc(RT_UOFFSETOF_DYN(STAMLOOKUP, szName[cchName + 1])); + if (!pNew) + return NULL; + pNew->pParent = pParent; + pNew->papChildren = NULL; + pNew->pDesc = NULL; + pNew->cDescsInTree = 0; + pNew->cChildren = 0; + pNew->cch = (uint16_t)cchName; + pNew->off = (uint16_t)offName; + memcpy(pNew->szName, pchName, cchName); + pNew->szName[cchName] = '\0'; + + /* + * Reallocate the array? + */ + if (RT_IS_POWER_OF_TWO(pParent->cChildren)) + { + uint32_t cNew = pParent->cChildren ? (uint32_t)pParent->cChildren * 2 : 8; + AssertReturnStmt(cNew <= 0x8000, RTMemFree(pNew), NULL); + void *pvNew = RTMemRealloc(pParent->papChildren, cNew * sizeof(pParent->papChildren[0])); + if (!pvNew) + { + RTMemFree(pNew); + return NULL; + } + pParent->papChildren = (PSTAMLOOKUP *)pvNew; + } + + /* + * Find the exact insertion point using iChild as a very good clue from + * the find function. + */ + if (!pParent->cChildren) + iChild = 0; + else + { + if (iChild >= pParent->cChildren) + iChild = pParent->cChildren - 1; + while ( iChild < pParent->cChildren + && stamR3LookupCmp(pParent->papChildren[iChild], pchName, cchName) < 0) + iChild++; + } + + /* + * Insert it. + */ + if (iChild < pParent->cChildren) + { + /* Do shift. */ + uint32_t i = pParent->cChildren; + while (i > iChild) + { + PSTAMLOOKUP pNode = pParent->papChildren[i - 1]; + pParent->papChildren[i] = pNode; + pNode->iParent = i; + i--; + } + } + + pNew->iParent = iChild; + pParent->papChildren[iChild] = pNew; + pParent->cChildren++; + + return pNew; +} + + +/** + * Looks up a child. + * + * @returns Pointer to child node if found, NULL if not. + * @param pParent The parent node. + * @param pchName The name (not necessarily terminated). + * @param cchName The length of the name. + * @param piChild Where to store a child index suitable for + * passing to stamR3LookupNewChild when NULL is + * returned. + */ +static PSTAMLOOKUP stamR3LookupFindChild(PSTAMLOOKUP pParent, const char *pchName, uint32_t cchName, uint32_t *piChild) +{ + uint32_t iChild = pParent->cChildren; + if (iChild > 4) + { + uint32_t iFirst = 0; + uint32_t iEnd = iChild; + iChild /= 2; + for (;;) + { + int iDiff = stamR3LookupCmp(pParent->papChildren[iChild], pchName, cchName); + if (!iDiff) + { + if (piChild) + *piChild = iChild; + return pParent->papChildren[iChild]; + } + + /* Split. */ + if (iDiff < 0) + { + iFirst = iChild + 1; + if (iFirst >= iEnd) + { + if (piChild) + *piChild = iChild; + break; + } + } + else + { + if (iChild == iFirst) + { + if (piChild) + *piChild = iChild ? iChild - 1 : 0; + break; + } + iEnd = iChild; + } + + /* Calc next child. */ + iChild = (iEnd - iFirst) / 2 + iFirst; + } + return NULL; + } + + /* + * Linear search. + */ + while (iChild-- > 0) + { + int iDiff = stamR3LookupCmp(pParent->papChildren[iChild], pchName, cchName); + if (iDiff <= 0) + { + if (piChild) + *piChild = iChild; + return !iDiff ? pParent->papChildren[iChild] : NULL; + } + } + if (piChild) + *piChild = 0; + return NULL; +} + + +/** + * Find the next sample descriptor node. + * + * This is for use with insertion in the big list and pattern range lookups. + * + * @returns Pointer to the next sample descriptor. NULL if not found (i.e. + * we're at the end of the list). + * @param pLookup The current node. + */ +static PSTAMDESC stamR3LookupFindNextWithDesc(PSTAMLOOKUP pLookup) +{ + Assert(!pLookup->pDesc); + PSTAMLOOKUP pCur = pLookup; + uint32_t iCur = 0; + for (;;) + { + /* + * Check all children. + */ + uint32_t cChildren = pCur->cChildren; + if (iCur < cChildren) + { + PSTAMLOOKUP *papChildren = pCur->papChildren; + do + { + PSTAMLOOKUP pChild = papChildren[iCur]; + if (pChild->pDesc) + return pChild->pDesc; + + if (pChild->cChildren > 0) + { + /* One level down. */ + iCur = 0; + pCur = pChild; + break; + } + } while (++iCur < cChildren); + } + else + { + /* + * One level up, resuming after the current. + */ + iCur = pCur->iParent + 1; + pCur = pCur->pParent; + if (!pCur) + return NULL; + } + } +} + + +/** + * Look up a sample descriptor by name. + * + * @returns Pointer to a sample descriptor. + * @param pRoot The root node. + * @param pszName The name to lookup. + */ +static PSTAMDESC stamR3LookupFindDesc(PSTAMLOOKUP pRoot, const char *pszName) +{ + Assert(!pRoot->pParent); + while (*pszName++ == '/') + { + const char *pszEnd = strchr(pszName, '/'); + uint32_t cch = pszEnd ? pszEnd - pszName : (uint32_t)strlen(pszName); + PSTAMLOOKUP pChild = stamR3LookupFindChild(pRoot, pszName, cch, NULL); + if (!pChild) + break; + if (!pszEnd) + return pChild->pDesc; + pszName = pszEnd; + pRoot = pChild; + } + + return NULL; +} + + +/** + * Finds the first sample descriptor for a given lookup range. + * + * This is for pattern range lookups. + * + * @returns Pointer to the first descriptor. + * @param pFirst The first node in the range. + * @param pLast The last node in the range. + */ +static PSTAMDESC stamR3LookupFindFirstDescForRange(PSTAMLOOKUP pFirst, PSTAMLOOKUP pLast) +{ + if (pFirst->pDesc) + return pFirst->pDesc; + + PSTAMLOOKUP pCur = pFirst; + uint32_t iCur = 0; + for (;;) + { + uint32_t cChildren = pCur->cChildren; + if (iCur < pCur->cChildren) + { + /* + * Check all children. + */ + PSTAMLOOKUP * const papChildren = pCur->papChildren; + do + { + PSTAMLOOKUP pChild = papChildren[iCur]; + if (pChild->pDesc) + return pChild->pDesc; + if (pChild->cChildren > 0) + { + /* One level down. */ + iCur = 0; + pCur = pChild; + break; + } + if (pChild == pLast) + return NULL; + } while (++iCur < cChildren); + } + else + { + /* + * One level up, checking current and its 'older' sibilings. + */ + if (pCur == pLast) + return NULL; + iCur = pCur->iParent + 1; + pCur = pCur->pParent; + if (!pCur) + break; + } + } + + return NULL; +} + + +/** + * Finds the first sample descriptor for a given lookup range. + * + * This is for pattern range lookups. + * + * @returns Pointer to the first descriptor. + * @param pFirst The first node in the range. + * @param pLast The last node in the range. + */ +static PSTAMDESC stamR3LookupFindLastDescForRange(PSTAMLOOKUP pFirst, PSTAMLOOKUP pLast) +{ + PSTAMLOOKUP pCur = pLast; + uint32_t iCur = pCur->cChildren - 1; + for (;;) + { + if (iCur < pCur->cChildren) + { + /* + * Check children backwards, depth first. + */ + PSTAMLOOKUP * const papChildren = pCur->papChildren; + do + { + PSTAMLOOKUP pChild = papChildren[iCur]; + if (pChild->cChildren > 0) + { + /* One level down. */ + iCur = pChild->cChildren - 1; + pCur = pChild; + break; + } + + if (pChild->pDesc) + return pChild->pDesc; + if (pChild == pFirst) + return NULL; + } while (iCur-- > 0); /* (underflow handled above) */ + } + else + { + /* + * One level up, checking current and its 'older' sibilings. + */ + if (pCur->pDesc) + return pCur->pDesc; + if (pCur == pFirst) + return NULL; + iCur = pCur->iParent - 1; /* (underflow handled above) */ + pCur = pCur->pParent; + if (!pCur) + break; + } + } + + return NULL; +} + + +/** + * Look up the first and last descriptors for a (single) pattern expression. + * + * This is used to optimize pattern enumerations and doesn't have to return 100% + * accurate results if that costs too much. + * + * @returns Pointer to the first descriptor in the range. + * @param pRoot The root node. + * @param pList The descriptor list anchor. + * @param pszPat The name patter to lookup. + * @param ppLastDesc Where to store the address of the last + * descriptor (approximate). + */ +static PSTAMDESC stamR3LookupFindPatternDescRange(PSTAMLOOKUP pRoot, PRTLISTANCHOR pList, const char *pszPat, + PSTAMDESC *ppLastDesc) +{ + Assert(!pRoot->pParent); + + /* + * If there is an early enough wildcard, the whole list needs to be searched. + */ + if ( pszPat[0] == '*' || pszPat[0] == '?' + || pszPat[1] == '*' || pszPat[1] == '?') + { + *ppLastDesc = RTListGetLast(pList, STAMDESC, ListEntry); + return RTListGetFirst(pList, STAMDESC, ListEntry); + } + + /* + * All statistics starts with a slash. + */ + while ( *pszPat++ == '/' + && pRoot->cDescsInTree > 0 + && pRoot->cChildren > 0) + { + const char *pszEnd = strchr(pszPat, '/'); + uint32_t cch = pszEnd ? pszEnd - pszPat : (uint32_t)strlen(pszPat); + if (!cch) + break; + + const char *pszPat1 = (const char *)memchr(pszPat, '*', cch); + const char *pszPat2 = (const char *)memchr(pszPat, '?', cch); + if (pszPat1 || pszPat2) + { + /* We've narrowed it down to a sub-tree now. */ + PSTAMLOOKUP pFirst = pRoot->papChildren[0]; + PSTAMLOOKUP pLast = pRoot->papChildren[pRoot->cChildren - 1]; + /** @todo narrow the range further if both pszPat1/2 != pszPat. */ + + *ppLastDesc = stamR3LookupFindLastDescForRange(pFirst, pLast); + return stamR3LookupFindFirstDescForRange(pFirst, pLast); + } + + PSTAMLOOKUP pChild = stamR3LookupFindChild(pRoot, pszPat, cch, NULL); + if (!pChild) + break; + + /* Advance */ + if (!pszEnd) + return *ppLastDesc = pChild->pDesc; + pszPat = pszEnd; + pRoot = pChild; + } + + /* No match. */ + *ppLastDesc = NULL; + return NULL; +} + + +/** + * Increments the cDescInTree member of the given node an all ancestors. + * + * @param pLookup The lookup node. + */ +static void stamR3LookupIncUsage(PSTAMLOOKUP pLookup) +{ + Assert(pLookup->pDesc); + + PSTAMLOOKUP pCur = pLookup; + while (pCur != NULL) + { + pCur->cDescsInTree++; + pCur = pCur->pParent; + } +} + + +/** + * Descrements the cDescInTree member of the given node an all ancestors. + * + * @param pLookup The lookup node. + */ +static void stamR3LookupDecUsage(PSTAMLOOKUP pLookup) +{ + Assert(!pLookup->pDesc); + + PSTAMLOOKUP pCur = pLookup; + while (pCur != NULL) + { + Assert(pCur->cDescsInTree > 0); + pCur->cDescsInTree--; + pCur = pCur->pParent; + } +} + + +/** + * Frees empty lookup nodes if it's worth it. + * + * @param pLookup The lookup node. + */ +static void stamR3LookupMaybeFree(PSTAMLOOKUP pLookup) +{ + Assert(!pLookup->pDesc); + + /* + * Free between two and three levels of nodes. Freeing too much most + * likely wasted effort since we're either going to repopluate the tree + * or quit the whole thing. + */ + if (pLookup->cDescsInTree > 0) + return; + + PSTAMLOOKUP pCur = pLookup->pParent; + if (!pCur) + return; + if (pCur->cDescsInTree > 0) + return; + PSTAMLOOKUP pParent = pCur->pParent; + if (!pParent) + return; + + if (pParent->cDescsInTree == 0 && pParent->pParent) + { + pCur = pParent; + pParent = pCur->pParent; + } + + /* + * Remove pCur from pParent. + */ + PSTAMLOOKUP *papChildren = pParent->papChildren; + uint32_t cChildren = --pParent->cChildren; + for (uint32_t i = pCur->iParent; i < cChildren; i++) + { + PSTAMLOOKUP pChild = papChildren[i + 1]; + pChild->iParent = i; + papChildren[i] = pChild; + } + pCur->pParent = NULL; + pCur->iParent = UINT16_MAX; + + /* + * Destroy pCur. + */ + stamR3LookupDestroyTree(pCur); +} + + +/** + * Destroys a lookup tree. + * + * This is used by STAMR3Term as well as stamR3LookupMaybeFree. + * + * @param pRoot The root of the tree (must have no parent). + */ +static void stamR3LookupDestroyTree(PSTAMLOOKUP pRoot) +{ + Assert(pRoot); Assert(!pRoot->pParent); + PSTAMLOOKUP pCur = pRoot; + for (;;) + { + uint32_t i = pCur->cChildren; + if (i > 0) + { + /* + * Push child (with leaf optimization). + */ + PSTAMLOOKUP pChild = pCur->papChildren[--i]; + if (pChild->cChildren != 0) + pCur = pChild; + else + { + /* free leaves. */ + for (;;) + { + if (pChild->papChildren) + { + RTMemFree(pChild->papChildren); + pChild->papChildren = NULL; + } + RTMemFree(pChild); + pCur->papChildren[i] = NULL; + + /* next */ + if (i == 0) + { + pCur->cChildren = 0; + break; + } + pChild = pCur->papChildren[--i]; + if (pChild->cChildren != 0) + { + pCur->cChildren = i + 1; + pCur = pChild; + break; + } + } + } + } + else + { + /* + * Pop and free current. + */ + Assert(!pCur->pDesc); + + PSTAMLOOKUP pParent = pCur->pParent; + Assert(pCur->iParent == (pParent ? pParent->cChildren - 1 : UINT16_MAX)); + + RTMemFree(pCur->papChildren); + pCur->papChildren = NULL; + RTMemFree(pCur); + + pCur = pParent; + if (!pCur) + break; + pCur->papChildren[--pCur->cChildren] = NULL; + } + } +} + +#endif /* STAM_WITH_LOOKUP_TREE */ + + + +/** + * Internal worker for the different register calls. + * + * @returns VBox status code. + * @param pUVM Pointer to the user mode VM structure. + * @param pvSample Pointer to the sample. + * @param pfnReset Callback for resetting the sample. NULL should be used if the sample can't be reset. + * @param pfnPrint Print the sample. + * @param enmType Sample type. This indicates what pvSample is pointing at. + * @param enmVisibility Visibility type specifying whether unused statistics should be visible or not. + * @param pszName The sample name format string. + * @param enmUnit Sample unit. + * @param pszDesc Sample description. + * @param iRefreshGrp The refresh group, STAM_REFRESH_GRP_XXX. + * @remark There is currently no device or driver variant of this API. Add one if it should become necessary! + */ +static int stamR3RegisterU(PUVM pUVM, void *pvSample, PFNSTAMR3CALLBACKRESET pfnReset, PFNSTAMR3CALLBACKPRINT pfnPrint, + STAMTYPE enmType, STAMVISIBILITY enmVisibility, + const char *pszName, STAMUNIT enmUnit, const char *pszDesc, uint8_t iRefreshGrp) +{ + AssertReturn(pszName[0] == '/', VERR_INVALID_NAME); + AssertReturn(pszName[1] != '/' && pszName[1], VERR_INVALID_NAME); + uint32_t const cchName = (uint32_t)strlen(pszName); + AssertReturn(cchName <= STAM_MAX_NAME_LEN, VERR_OUT_OF_RANGE); + AssertReturn(pszName[cchName - 1] != '/', VERR_INVALID_NAME); + AssertReturn(memchr(pszName, '\\', cchName) == NULL, VERR_INVALID_NAME); + AssertReturn(iRefreshGrp == STAM_REFRESH_GRP_NONE || iRefreshGrp < 64, VERR_INVALID_PARAMETER); + + STAM_LOCK_WR(pUVM); + + /* + * Look up the tree location, populating the lookup tree as we walk it. + */ +#ifdef STAM_WITH_LOOKUP_TREE + PSTAMLOOKUP pLookup = pUVM->stam.s.pRoot; Assert(pLookup); + uint32_t offName = 1; + for (;;) + { + /* Get the next part of the path. */ + const char *pszStart = &pszName[offName]; + const char *pszEnd = strchr(pszStart, '/'); + uint32_t cch = pszEnd ? (uint32_t)(pszEnd - pszStart) : cchName - offName; + if (cch == 0) + { + STAM_UNLOCK_WR(pUVM); + AssertMsgFailed(("No double or trailing slashes are allowed: '%s'\n", pszName)); + return VERR_INVALID_NAME; + } + + /* Do the looking up. */ + uint32_t iChild = 0; + PSTAMLOOKUP pChild = stamR3LookupFindChild(pLookup, pszStart, cch, &iChild); + if (!pChild) + { + pChild = stamR3LookupNewChild(pLookup, pszStart, cch, offName, iChild); + if (!pChild) + { + STAM_UNLOCK_WR(pUVM); + return VERR_NO_MEMORY; + } + } + + /* Advance. */ + pLookup = pChild; + if (!pszEnd) + break; + offName += cch + 1; + } + if (pLookup->pDesc) + { + STAM_UNLOCK_WR(pUVM); + AssertMsgFailed(("Duplicate sample name: %s\n", pszName)); + return VERR_ALREADY_EXISTS; + } + + PSTAMDESC pCur = stamR3LookupFindNextWithDesc(pLookup); + +#else + PSTAMDESC pCur; + RTListForEach(&pUVM->stam.s.List, pCur, STAMDESC, ListEntry) + { + int iDiff = strcmp(pCur->pszName, pszName); + /* passed it */ + if (iDiff > 0) + break; + /* found it. */ + if (!iDiff) + { + STAM_UNLOCK_WR(pUVM); + AssertMsgFailed(("Duplicate sample name: %s\n", pszName)); + return VERR_ALREADY_EXISTS; + } + } +#endif + + /* + * Check that the name doesn't screw up sorting order when taking + * slashes into account. The QT GUI makes some assumptions. + * Problematic chars are: !"#$%&'()*+,-. + */ +#ifdef VBOX_STRICT + Assert(pszName[0] == '/'); + PSTAMDESC pPrev = pCur + ? RTListGetPrev(&pUVM->stam.s.List, pCur, STAMDESC, ListEntry) + : RTListGetLast(&pUVM->stam.s.List, STAMDESC, ListEntry); + Assert(!pPrev || strcmp(pszName, pPrev->pszName) > 0); + Assert(!pCur || strcmp(pszName, pCur->pszName) < 0); + Assert(!pPrev || stamR3SlashCompare(pPrev->pszName, pszName) < 0); + Assert(!pCur || stamR3SlashCompare(pCur->pszName, pszName) > 0); + + /* + * Check alignment requirements. + */ + switch (enmType) + { + /* 8 byte / 64-bit */ + case STAMTYPE_U64: + case STAMTYPE_U64_RESET: + case STAMTYPE_X64: + case STAMTYPE_X64_RESET: + case STAMTYPE_COUNTER: + case STAMTYPE_PROFILE: + case STAMTYPE_PROFILE_ADV: + AssertMsg(!((uintptr_t)pvSample & 7), ("%p - %s\n", pvSample, pszName)); + break; + + /* 4 byte / 32-bit */ + case STAMTYPE_RATIO_U32: + case STAMTYPE_RATIO_U32_RESET: + case STAMTYPE_U32: + case STAMTYPE_U32_RESET: + case STAMTYPE_X32: + case STAMTYPE_X32_RESET: + AssertMsg(!((uintptr_t)pvSample & 3), ("%p - %s\n", pvSample, pszName)); + break; + + /* 2 byte / 32-bit */ + case STAMTYPE_U16: + case STAMTYPE_U16_RESET: + case STAMTYPE_X16: + case STAMTYPE_X16_RESET: + AssertMsg(!((uintptr_t)pvSample & 1), ("%p - %s\n", pvSample, pszName)); + break; + + /* 1 byte / 8-bit / unaligned */ + case STAMTYPE_U8: + case STAMTYPE_U8_RESET: + case STAMTYPE_X8: + case STAMTYPE_X8_RESET: + case STAMTYPE_BOOL: + case STAMTYPE_BOOL_RESET: + case STAMTYPE_CALLBACK: + break; + + default: + AssertMsgFailed(("%d\n", enmType)); + break; + } +#endif /* VBOX_STRICT */ + + /* + * Create a new node and insert it at the current location. + */ + int rc; + size_t cbDesc = pszDesc ? strlen(pszDesc) + 1 : 0; + PSTAMDESC pNew = (PSTAMDESC)RTMemAlloc(sizeof(*pNew) + cchName + 1 + cbDesc); + if (pNew) + { + pNew->pszName = (char *)memcpy((char *)(pNew + 1), pszName, cchName + 1); + pNew->enmType = enmType; + pNew->enmVisibility = enmVisibility; + if (enmType != STAMTYPE_CALLBACK) + pNew->u.pv = pvSample; + else + { + pNew->u.Callback.pvSample = pvSample; + pNew->u.Callback.pfnReset = pfnReset; + pNew->u.Callback.pfnPrint = pfnPrint; + } + pNew->enmUnit = enmUnit; + pNew->iRefreshGroup = iRefreshGrp; + pNew->pszDesc = NULL; + if (pszDesc) + pNew->pszDesc = (char *)memcpy((char *)(pNew + 1) + cchName + 1, pszDesc, cbDesc); + + if (pCur) + RTListNodeInsertBefore(&pCur->ListEntry, &pNew->ListEntry); + else + RTListAppend(&pUVM->stam.s.List, &pNew->ListEntry); + +#ifdef STAM_WITH_LOOKUP_TREE + pNew->pLookup = pLookup; + pLookup->pDesc = pNew; + stamR3LookupIncUsage(pLookup); +#endif + + stamR3ResetOne(pNew, pUVM->pVM); + rc = VINF_SUCCESS; + } + else + rc = VERR_NO_MEMORY; + + STAM_UNLOCK_WR(pUVM); + return rc; +} + + +/** + * Destroys the statistics descriptor, unlinking it and freeing all resources. + * + * @returns VINF_SUCCESS + * @param pCur The descriptor to destroy. + */ +static int stamR3DestroyDesc(PSTAMDESC pCur) +{ + RTListNodeRemove(&pCur->ListEntry); +#ifdef STAM_WITH_LOOKUP_TREE + pCur->pLookup->pDesc = NULL; /** @todo free lookup nodes once it's working. */ + stamR3LookupDecUsage(pCur->pLookup); + stamR3LookupMaybeFree(pCur->pLookup); +#endif + RTMemFree(pCur); + + return VINF_SUCCESS; +} + + +/** + * Deregisters a sample previously registered by STAR3Register() given its + * address. + * + * This is intended used for devices which can be unplugged and for + * temporary samples. + * + * @returns VBox status code. + * @param pUVM Pointer to the user mode VM structure. + * @param pvSample Pointer to the sample registered with STAMR3Register(). + */ +VMMR3DECL(int) STAMR3DeregisterByAddr(PUVM pUVM, void *pvSample) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + + /* This is a complete waste of time when shutting down. */ + VMSTATE enmState = VMR3GetStateU(pUVM); + if (enmState >= VMSTATE_DESTROYING) + return VINF_SUCCESS; + + STAM_LOCK_WR(pUVM); + + /* + * Search for it. + */ + int rc = VERR_INVALID_HANDLE; + PSTAMDESC pCur, pNext; + RTListForEachSafe(&pUVM->stam.s.List, pCur, pNext, STAMDESC, ListEntry) + { + if (pCur->u.pv == pvSample) + rc = stamR3DestroyDesc(pCur); + } + + STAM_UNLOCK_WR(pUVM); + return rc; +} + + +/** + * Worker for STAMR3Deregister, STAMR3DeregisterV and STAMR3DeregisterF. + * + * @returns VBox status code. + * @retval VWRN_NOT_FOUND if no matching names found. + * + * @param pUVM Pointer to the user mode VM structure. + * @param pszPat The name pattern. + */ +static int stamR3DeregisterByPattern(PUVM pUVM, const char *pszPat) +{ + Assert(!strchr(pszPat, '|')); /* single pattern! */ + + int rc = VWRN_NOT_FOUND; + STAM_LOCK_WR(pUVM); + + PSTAMDESC pLast; + PSTAMDESC pCur = stamR3LookupFindPatternDescRange(pUVM->stam.s.pRoot, &pUVM->stam.s.List, pszPat, &pLast); + if (pCur) + { + for (;;) + { + PSTAMDESC pNext = RTListNodeGetNext(&pCur->ListEntry, STAMDESC, ListEntry); + + if (RTStrSimplePatternMatch(pszPat, pCur->pszName)) + rc = stamR3DestroyDesc(pCur); + + /* advance. */ + if (pCur == pLast) + break; + pCur = pNext; + } + Assert(pLast); + } + else + Assert(!pLast); + + STAM_UNLOCK_WR(pUVM); + return rc; +} + + +/** + * Deregister zero or more samples given a (single) pattern matching their + * names. + * + * @returns VBox status code. + * @param pUVM Pointer to the user mode VM structure. + * @param pszPat The name pattern. + * @sa STAMR3DeregisterF, STAMR3DeregisterV + */ +VMMR3DECL(int) STAMR3Deregister(PUVM pUVM, const char *pszPat) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + + /* This is a complete waste of time when shutting down. */ + VMSTATE enmState = VMR3GetStateU(pUVM); + if (enmState >= VMSTATE_DESTROYING) + return VINF_SUCCESS; + + return stamR3DeregisterByPattern(pUVM, pszPat); +} + + +/** + * Deregister zero or more samples given a (single) pattern matching their + * names. + * + * @returns VBox status code. + * @param pUVM Pointer to the user mode VM structure. + * @param pszPatFmt The name pattern format string. + * @param ... Format string arguments. + * @sa STAMR3Deregister, STAMR3DeregisterV + */ +VMMR3DECL(int) STAMR3DeregisterF(PUVM pUVM, const char *pszPatFmt, ...) +{ + va_list va; + va_start(va, pszPatFmt); + int rc = STAMR3DeregisterV(pUVM, pszPatFmt, va); + va_end(va); + return rc; +} + + +/** + * Deregister zero or more samples given a (single) pattern matching their + * names. + * + * @returns VBox status code. + * @param pUVM Pointer to the user mode VM structure. + * @param pszPatFmt The name pattern format string. + * @param va Format string arguments. + * @sa STAMR3Deregister, STAMR3DeregisterF + */ +VMMR3DECL(int) STAMR3DeregisterV(PUVM pUVM, const char *pszPatFmt, va_list va) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + + /* This is a complete waste of time when shutting down. */ + VMSTATE enmState = VMR3GetStateU(pUVM); + if (enmState >= VMSTATE_DESTROYING) + return VINF_SUCCESS; + + char szPat[STAM_MAX_NAME_LEN + 8]; + size_t cchPat = RTStrPrintfV(szPat, sizeof(szPat), pszPatFmt, va); + AssertReturn(cchPat <= STAM_MAX_NAME_LEN, VERR_OUT_OF_RANGE); + + return stamR3DeregisterByPattern(pUVM, szPat); +} + + +/** + * Resets statistics for the specified VM. + * It's possible to select a subset of the samples. + * + * @returns VBox status code. (Basically, it cannot fail.) + * @param pUVM The user mode VM handle. + * @param pszPat The name matching pattern. See somewhere_where_this_is_described_in_detail. + * If NULL all samples are reset. + * @remarks Don't confuse this with the other 'XYZR3Reset' methods, it's not called at VM reset. + */ +VMMR3DECL(int) STAMR3Reset(PUVM pUVM, const char *pszPat) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, VERR_INVALID_VM_HANDLE); + + int rc = VINF_SUCCESS; + + /* ring-0 */ + GVMMRESETSTATISTICSSREQ GVMMReq; + GMMRESETSTATISTICSSREQ GMMReq; + bool fGVMMMatched = !pszPat || !*pszPat; + bool fGMMMatched = fGVMMMatched; + if (fGVMMMatched) + { + memset(&GVMMReq.Stats, 0xff, sizeof(GVMMReq.Stats)); + memset(&GMMReq.Stats, 0xff, sizeof(GMMReq.Stats)); + } + else + { + char *pszCopy; + unsigned cExpressions; + char **papszExpressions = stamR3SplitPattern(pszPat, &cExpressions, &pszCopy); + if (!papszExpressions) + return VERR_NO_MEMORY; + + /* GVMM */ + RT_ZERO(GVMMReq.Stats); + for (unsigned i = 0; i < RT_ELEMENTS(g_aGVMMStats); i++) + if (stamR3MultiMatch(papszExpressions, cExpressions, NULL, g_aGVMMStats[i].pszName)) + { + *((uint8_t *)&GVMMReq.Stats + g_aGVMMStats[i].offVar) = 0xff; + fGVMMMatched = true; + } + if (!fGVMMMatched) + { + /** @todo match cpu leaves some rainy day. */ + } + + /* GMM */ + RT_ZERO(GMMReq.Stats); + for (unsigned i = 0; i < RT_ELEMENTS(g_aGMMStats); i++) + if (stamR3MultiMatch(papszExpressions, cExpressions, NULL, g_aGMMStats[i].pszName)) + { + *((uint8_t *)&GMMReq.Stats + g_aGMMStats[i].offVar) = 0xff; + fGMMMatched = true; + } + + RTMemTmpFree(papszExpressions); + RTStrFree(pszCopy); + } + + STAM_LOCK_WR(pUVM); + + if (fGVMMMatched) + { + PVM pVM = pUVM->pVM; + GVMMReq.Hdr.cbReq = sizeof(GVMMReq); + GVMMReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + GVMMReq.pSession = pVM->pSession; + rc = SUPR3CallVMMR0Ex(pVM->pVMR0, NIL_VMCPUID, VMMR0_DO_GVMM_RESET_STATISTICS, 0, &GVMMReq.Hdr); + } + + if (fGMMMatched) + { + PVM pVM = pUVM->pVM; + GMMReq.Hdr.cbReq = sizeof(GMMReq); + GMMReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + GMMReq.pSession = pVM->pSession; + rc = SUPR3CallVMMR0Ex(pVM->pVMR0, NIL_VMCPUID, VMMR0_DO_GMM_RESET_STATISTICS, 0, &GMMReq.Hdr); + } + + /* and the reset */ + stamR3EnumU(pUVM, pszPat, false /* fUpdateRing0 */, stamR3ResetOne, pUVM->pVM); + + STAM_UNLOCK_WR(pUVM); + return rc; +} + + +/** + * Resets one statistics sample. + * Callback for stamR3EnumU(). + * + * @returns VINF_SUCCESS + * @param pDesc Pointer to the current descriptor. + * @param pvArg User argument - Pointer to the VM. + */ +static int stamR3ResetOne(PSTAMDESC pDesc, void *pvArg) +{ + switch (pDesc->enmType) + { + case STAMTYPE_COUNTER: + ASMAtomicXchgU64(&pDesc->u.pCounter->c, 0); + break; + + case STAMTYPE_PROFILE: + case STAMTYPE_PROFILE_ADV: + ASMAtomicXchgU64(&pDesc->u.pProfile->cPeriods, 0); + ASMAtomicXchgU64(&pDesc->u.pProfile->cTicks, 0); + ASMAtomicXchgU64(&pDesc->u.pProfile->cTicksMax, 0); + ASMAtomicXchgU64(&pDesc->u.pProfile->cTicksMin, UINT64_MAX); + break; + + case STAMTYPE_RATIO_U32_RESET: + ASMAtomicXchgU32(&pDesc->u.pRatioU32->u32A, 0); + ASMAtomicXchgU32(&pDesc->u.pRatioU32->u32B, 0); + break; + + case STAMTYPE_CALLBACK: + if (pDesc->u.Callback.pfnReset) + pDesc->u.Callback.pfnReset((PVM)pvArg, pDesc->u.Callback.pvSample); + break; + + case STAMTYPE_U8_RESET: + case STAMTYPE_X8_RESET: + ASMAtomicXchgU8(pDesc->u.pu8, 0); + break; + + case STAMTYPE_U16_RESET: + case STAMTYPE_X16_RESET: + ASMAtomicXchgU16(pDesc->u.pu16, 0); + break; + + case STAMTYPE_U32_RESET: + case STAMTYPE_X32_RESET: + ASMAtomicXchgU32(pDesc->u.pu32, 0); + break; + + case STAMTYPE_U64_RESET: + case STAMTYPE_X64_RESET: + ASMAtomicXchgU64(pDesc->u.pu64, 0); + break; + + case STAMTYPE_BOOL_RESET: + ASMAtomicXchgBool(pDesc->u.pf, false); + break; + + /* These are custom and will not be touched. */ + case STAMTYPE_U8: + case STAMTYPE_X8: + case STAMTYPE_U16: + case STAMTYPE_X16: + case STAMTYPE_U32: + case STAMTYPE_X32: + case STAMTYPE_U64: + case STAMTYPE_X64: + case STAMTYPE_RATIO_U32: + case STAMTYPE_BOOL: + break; + + default: + AssertMsgFailed(("enmType=%d\n", pDesc->enmType)); + break; + } + NOREF(pvArg); + return VINF_SUCCESS; +} + + +/** + * Get a snapshot of the statistics. + * It's possible to select a subset of the samples. + * + * @returns VBox status code. (Basically, it cannot fail.) + * @param pUVM The user mode VM handle. + * @param pszPat The name matching pattern. See somewhere_where_this_is_described_in_detail. + * If NULL all samples are reset. + * @param fWithDesc Whether to include the descriptions. + * @param ppszSnapshot Where to store the pointer to the snapshot data. + * The format of the snapshot should be XML, but that will have to be discussed + * when this function is implemented. + * The returned pointer must be freed by calling STAMR3SnapshotFree(). + * @param pcchSnapshot Where to store the size of the snapshot data. (Excluding the trailing '\0') + */ +VMMR3DECL(int) STAMR3Snapshot(PUVM pUVM, const char *pszPat, char **ppszSnapshot, size_t *pcchSnapshot, bool fWithDesc) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, VERR_INVALID_VM_HANDLE); + + STAMR3SNAPSHOTONE State = { NULL, NULL, NULL, pUVM->pVM, 0, VINF_SUCCESS, fWithDesc }; + + /* + * Write the XML header. + */ + /** @todo Make this proper & valid XML. */ + stamR3SnapshotPrintf(&State, "\n"); + + /* + * Write the content. + */ + stamR3SnapshotPrintf(&State, "\n"); + int rc = stamR3EnumU(pUVM, pszPat, true /* fUpdateRing0 */, stamR3SnapshotOne, &State); + stamR3SnapshotPrintf(&State, "\n"); + + if (RT_SUCCESS(rc)) + rc = State.rc; + else + { + RTMemFree(State.pszStart); + State.pszStart = State.pszEnd = State.psz = NULL; + State.cbAllocated = 0; + } + + /* + * Done. + */ + *ppszSnapshot = State.pszStart; + if (pcchSnapshot) + *pcchSnapshot = State.psz - State.pszStart; + return rc; +} + + +/** + * stamR3EnumU callback employed by STAMR3Snapshot. + * + * @returns VBox status code, but it's interpreted as 0 == success / !0 == failure by enmR3Enum. + * @param pDesc The sample. + * @param pvArg The snapshot status structure. + */ +static int stamR3SnapshotOne(PSTAMDESC pDesc, void *pvArg) +{ + PSTAMR3SNAPSHOTONE pThis = (PSTAMR3SNAPSHOTONE)pvArg; + + switch (pDesc->enmType) + { + case STAMTYPE_COUNTER: + if (pDesc->enmVisibility == STAMVISIBILITY_USED && pDesc->u.pCounter->c == 0) + return VINF_SUCCESS; + stamR3SnapshotPrintf(pThis, "u.pCounter->c); + break; + + case STAMTYPE_PROFILE: + case STAMTYPE_PROFILE_ADV: + if (pDesc->enmVisibility == STAMVISIBILITY_USED && pDesc->u.pProfile->cPeriods == 0) + return VINF_SUCCESS; + stamR3SnapshotPrintf(pThis, "u.pProfile->cPeriods, pDesc->u.pProfile->cTicks, pDesc->u.pProfile->cTicksMin, + pDesc->u.pProfile->cTicksMax); + break; + + case STAMTYPE_RATIO_U32: + case STAMTYPE_RATIO_U32_RESET: + if (pDesc->enmVisibility == STAMVISIBILITY_USED && !pDesc->u.pRatioU32->u32A && !pDesc->u.pRatioU32->u32B) + return VINF_SUCCESS; + stamR3SnapshotPrintf(pThis, "u.pRatioU32->u32A, pDesc->u.pRatioU32->u32B); + break; + + case STAMTYPE_CALLBACK: + { + char szBuf[512]; + pDesc->u.Callback.pfnPrint(pThis->pVM, pDesc->u.Callback.pvSample, szBuf, sizeof(szBuf)); + stamR3SnapshotPrintf(pThis, "enmVisibility == STAMVISIBILITY_USED && *pDesc->u.pu8 == 0) + return VINF_SUCCESS; + stamR3SnapshotPrintf(pThis, "u.pu8); + break; + + case STAMTYPE_X8: + case STAMTYPE_X8_RESET: + if (pDesc->enmVisibility == STAMVISIBILITY_USED && *pDesc->u.pu8 == 0) + return VINF_SUCCESS; + stamR3SnapshotPrintf(pThis, "u.pu8); + break; + + case STAMTYPE_U16: + case STAMTYPE_U16_RESET: + if (pDesc->enmVisibility == STAMVISIBILITY_USED && *pDesc->u.pu16 == 0) + return VINF_SUCCESS; + stamR3SnapshotPrintf(pThis, "u.pu16); + break; + + case STAMTYPE_X16: + case STAMTYPE_X16_RESET: + if (pDesc->enmVisibility == STAMVISIBILITY_USED && *pDesc->u.pu16 == 0) + return VINF_SUCCESS; + stamR3SnapshotPrintf(pThis, "u.pu16); + break; + + case STAMTYPE_U32: + case STAMTYPE_U32_RESET: + if (pDesc->enmVisibility == STAMVISIBILITY_USED && *pDesc->u.pu32 == 0) + return VINF_SUCCESS; + stamR3SnapshotPrintf(pThis, "u.pu32); + break; + + case STAMTYPE_X32: + case STAMTYPE_X32_RESET: + if (pDesc->enmVisibility == STAMVISIBILITY_USED && *pDesc->u.pu32 == 0) + return VINF_SUCCESS; + stamR3SnapshotPrintf(pThis, "u.pu32); + break; + + case STAMTYPE_U64: + case STAMTYPE_U64_RESET: + if (pDesc->enmVisibility == STAMVISIBILITY_USED && *pDesc->u.pu64 == 0) + return VINF_SUCCESS; + stamR3SnapshotPrintf(pThis, "u.pu64); + break; + + case STAMTYPE_X64: + case STAMTYPE_X64_RESET: + if (pDesc->enmVisibility == STAMVISIBILITY_USED && *pDesc->u.pu64 == 0) + return VINF_SUCCESS; + stamR3SnapshotPrintf(pThis, "u.pu64); + break; + + case STAMTYPE_BOOL: + case STAMTYPE_BOOL_RESET: + if (pDesc->enmVisibility == STAMVISIBILITY_USED && *pDesc->u.pf == false) + return VINF_SUCCESS; + stamR3SnapshotPrintf(pThis, "u.pf); + break; + + default: + AssertMsgFailed(("%d\n", pDesc->enmType)); + return 0; + } + + stamR3SnapshotPrintf(pThis, " unit=\"%s\"", STAMR3GetUnit(pDesc->enmUnit)); + + switch (pDesc->enmVisibility) + { + default: + case STAMVISIBILITY_ALWAYS: + break; + case STAMVISIBILITY_USED: + stamR3SnapshotPrintf(pThis, " vis=\"used\""); + break; + case STAMVISIBILITY_NOT_GUI: + stamR3SnapshotPrintf(pThis, " vis=\"not-gui\""); + break; + } + + stamR3SnapshotPrintf(pThis, " name=\"%s\"", pDesc->pszName); + + if (pThis->fWithDesc && pDesc->pszDesc) + { + /* + * The description is a bit tricky as it may include chars that + * xml requires to be escaped. + */ + const char *pszBadChar = strpbrk(pDesc->pszDesc, "&<>\"'"); + if (!pszBadChar) + return stamR3SnapshotPrintf(pThis, " desc=\"%s\"/>\n", pDesc->pszDesc); + + stamR3SnapshotPrintf(pThis, " desc=\""); + const char *pszCur = pDesc->pszDesc; + do + { + stamR3SnapshotPrintf(pThis, "%.*s", pszBadChar - pszCur, pszCur); + switch (*pszBadChar) + { + case '&': stamR3SnapshotPrintf(pThis, "&"); break; + case '<': stamR3SnapshotPrintf(pThis, "<"); break; + case '>': stamR3SnapshotPrintf(pThis, ">"); break; + case '"': stamR3SnapshotPrintf(pThis, """); break; + case '\'': stamR3SnapshotPrintf(pThis, "'"); break; + default: AssertMsgFailed(("%c", *pszBadChar)); break; + } + pszCur = pszBadChar + 1; + pszBadChar = strpbrk(pszCur, "&<>\"'"); + } while (pszBadChar); + return stamR3SnapshotPrintf(pThis, "%s\"/>\n", pszCur); + } + return stamR3SnapshotPrintf(pThis, "/>\n"); +} + + +/** + * Output callback for stamR3SnapshotPrintf. + * + * @returns number of bytes written. + * @param pvArg The snapshot status structure. + * @param pach Pointer to an array of characters (bytes). + * @param cch The number or chars (bytes) to write from the array. + */ +static DECLCALLBACK(size_t) stamR3SnapshotOutput(void *pvArg, const char *pach, size_t cch) +{ + PSTAMR3SNAPSHOTONE pThis = (PSTAMR3SNAPSHOTONE)pvArg; + + /* + * Make sure we've got space for it. + */ + if (RT_UNLIKELY((uintptr_t)pThis->pszEnd - (uintptr_t)pThis->psz < cch + 1)) + { + if (RT_FAILURE(pThis->rc)) + return 0; + + size_t cbNewSize = pThis->cbAllocated; + if (cbNewSize > cch) + cbNewSize *= 2; + else + cbNewSize += RT_ALIGN(cch + 1, 0x1000); + char *pszNew = (char *)RTMemRealloc(pThis->pszStart, cbNewSize); + if (!pszNew) + { + /* + * Free up immediately, out-of-memory is bad news and this + * isn't an important allocations / API. + */ + pThis->rc = VERR_NO_MEMORY; + RTMemFree(pThis->pszStart); + pThis->pszStart = pThis->pszEnd = pThis->psz = NULL; + pThis->cbAllocated = 0; + return 0; + } + + pThis->psz = pszNew + (pThis->psz - pThis->pszStart); + pThis->pszStart = pszNew; + pThis->pszEnd = pszNew + cbNewSize; + pThis->cbAllocated = cbNewSize; + } + + /* + * Copy the chars to the buffer and terminate it. + */ + if (cch) + { + memcpy(pThis->psz, pach, cch); + pThis->psz += cch; + } + *pThis->psz = '\0'; + return cch; +} + + +/** + * Wrapper around RTStrFormatV for use by the snapshot API. + * + * @returns VBox status code. + * @param pThis The snapshot status structure. + * @param pszFormat The format string. + * @param ... Optional arguments. + */ +static int stamR3SnapshotPrintf(PSTAMR3SNAPSHOTONE pThis, const char *pszFormat, ...) +{ + va_list va; + va_start(va, pszFormat); + RTStrFormatV(stamR3SnapshotOutput, pThis, NULL, NULL, pszFormat, va); + va_end(va); + return pThis->rc; +} + + +/** + * Releases a statistics snapshot returned by STAMR3Snapshot(). + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszSnapshot The snapshot data pointer returned by STAMR3Snapshot(). + * NULL is allowed. + */ +VMMR3DECL(int) STAMR3SnapshotFree(PUVM pUVM, char *pszSnapshot) +{ + if (pszSnapshot) + RTMemFree(pszSnapshot); + NOREF(pUVM); + return VINF_SUCCESS; +} + + +/** + * Dumps the selected statistics to the log. + * + * @returns VBox status code. + * @param pUVM Pointer to the user mode VM structure. + * @param pszPat The name matching pattern. See somewhere_where_this_is_described_in_detail. + * If NULL all samples are written to the log. + */ +VMMR3DECL(int) STAMR3Dump(PUVM pUVM, const char *pszPat) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, VERR_INVALID_VM_HANDLE); + + STAMR3PRINTONEARGS Args; + Args.pUVM = pUVM; + Args.pvArg = NULL; + Args.pfnPrintf = stamR3EnumLogPrintf; + + stamR3EnumU(pUVM, pszPat, true /* fUpdateRing0 */, stamR3PrintOne, &Args); + return VINF_SUCCESS; +} + + +/** + * Prints to the log. + * + * @param pArgs Pointer to the print one argument structure. + * @param pszFormat Format string. + * @param ... Format arguments. + */ +static DECLCALLBACK(void) stamR3EnumLogPrintf(PSTAMR3PRINTONEARGS pArgs, const char *pszFormat, ...) +{ + va_list va; + va_start(va, pszFormat); + RTLogPrintfV(pszFormat, va); + va_end(va); + NOREF(pArgs); +} + + +/** + * Dumps the selected statistics to the release log. + * + * @returns VBox status code. + * @param pUVM Pointer to the user mode VM structure. + * @param pszPat The name matching pattern. See somewhere_where_this_is_described_in_detail. + * If NULL all samples are written to the log. + */ +VMMR3DECL(int) STAMR3DumpToReleaseLog(PUVM pUVM, const char *pszPat) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, VERR_INVALID_VM_HANDLE); + + STAMR3PRINTONEARGS Args; + Args.pUVM = pUVM; + Args.pvArg = NULL; + Args.pfnPrintf = stamR3EnumRelLogPrintf; + + stamR3EnumU(pUVM, pszPat, true /* fUpdateRing0 */, stamR3PrintOne, &Args); + return VINF_SUCCESS; +} + +/** + * Prints to the release log. + * + * @param pArgs Pointer to the print one argument structure. + * @param pszFormat Format string. + * @param ... Format arguments. + */ +static DECLCALLBACK(void) stamR3EnumRelLogPrintf(PSTAMR3PRINTONEARGS pArgs, const char *pszFormat, ...) +{ + va_list va; + va_start(va, pszFormat); + RTLogRelPrintfV(pszFormat, va); + va_end(va); + NOREF(pArgs); +} + + +/** + * Prints the selected statistics to standard out. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pszPat The name matching pattern. See somewhere_where_this_is_described_in_detail. + * If NULL all samples are reset. + */ +VMMR3DECL(int) STAMR3Print(PUVM pUVM, const char *pszPat) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, VERR_INVALID_VM_HANDLE); + + STAMR3PRINTONEARGS Args; + Args.pUVM = pUVM; + Args.pvArg = NULL; + Args.pfnPrintf = stamR3EnumPrintf; + + stamR3EnumU(pUVM, pszPat, true /* fUpdateRing0 */, stamR3PrintOne, &Args); + return VINF_SUCCESS; +} + + +/** + * Prints to stdout. + * + * @param pArgs Pointer to the print one argument structure. + * @param pszFormat Format string. + * @param ... Format arguments. + */ +static DECLCALLBACK(void) stamR3EnumPrintf(PSTAMR3PRINTONEARGS pArgs, const char *pszFormat, ...) +{ + va_list va; + va_start(va, pszFormat); + RTPrintfV(pszFormat, va); + va_end(va); + NOREF(pArgs); +} + + +/** + * Prints one sample. + * Callback for stamR3EnumU(). + * + * @returns VINF_SUCCESS + * @param pDesc Pointer to the current descriptor. + * @param pvArg User argument - STAMR3PRINTONEARGS. + */ +static int stamR3PrintOne(PSTAMDESC pDesc, void *pvArg) +{ + PSTAMR3PRINTONEARGS pArgs = (PSTAMR3PRINTONEARGS)pvArg; + + switch (pDesc->enmType) + { + case STAMTYPE_COUNTER: + if (pDesc->enmVisibility == STAMVISIBILITY_USED && pDesc->u.pCounter->c == 0) + return VINF_SUCCESS; + + pArgs->pfnPrintf(pArgs, "%-32s %8llu %s\n", pDesc->pszName, pDesc->u.pCounter->c, STAMR3GetUnit(pDesc->enmUnit)); + break; + + case STAMTYPE_PROFILE: + case STAMTYPE_PROFILE_ADV: + { + if (pDesc->enmVisibility == STAMVISIBILITY_USED && pDesc->u.pProfile->cPeriods == 0) + return VINF_SUCCESS; + + uint64_t u64 = pDesc->u.pProfile->cPeriods ? pDesc->u.pProfile->cPeriods : 1; + pArgs->pfnPrintf(pArgs, "%-32s %8llu %s (%12llu ticks, %7llu times, max %9llu, min %7lld)\n", pDesc->pszName, + pDesc->u.pProfile->cTicks / u64, STAMR3GetUnit(pDesc->enmUnit), + pDesc->u.pProfile->cTicks, pDesc->u.pProfile->cPeriods, pDesc->u.pProfile->cTicksMax, pDesc->u.pProfile->cTicksMin); + break; + } + + case STAMTYPE_RATIO_U32: + case STAMTYPE_RATIO_U32_RESET: + if (pDesc->enmVisibility == STAMVISIBILITY_USED && !pDesc->u.pRatioU32->u32A && !pDesc->u.pRatioU32->u32B) + return VINF_SUCCESS; + pArgs->pfnPrintf(pArgs, "%-32s %8u:%-8u %s\n", pDesc->pszName, + pDesc->u.pRatioU32->u32A, pDesc->u.pRatioU32->u32B, STAMR3GetUnit(pDesc->enmUnit)); + break; + + case STAMTYPE_CALLBACK: + { + char szBuf[512]; + pDesc->u.Callback.pfnPrint(pArgs->pUVM->pVM, pDesc->u.Callback.pvSample, szBuf, sizeof(szBuf)); + pArgs->pfnPrintf(pArgs, "%-32s %s %s\n", pDesc->pszName, szBuf, STAMR3GetUnit(pDesc->enmUnit)); + break; + } + + case STAMTYPE_U8: + case STAMTYPE_U8_RESET: + if (pDesc->enmVisibility == STAMVISIBILITY_USED && *pDesc->u.pu8 == 0) + return VINF_SUCCESS; + pArgs->pfnPrintf(pArgs, "%-32s %8u %s\n", pDesc->pszName, *pDesc->u.pu8, STAMR3GetUnit(pDesc->enmUnit)); + break; + + case STAMTYPE_X8: + case STAMTYPE_X8_RESET: + if (pDesc->enmVisibility == STAMVISIBILITY_USED && *pDesc->u.pu8 == 0) + return VINF_SUCCESS; + pArgs->pfnPrintf(pArgs, "%-32s %8x %s\n", pDesc->pszName, *pDesc->u.pu8, STAMR3GetUnit(pDesc->enmUnit)); + break; + + case STAMTYPE_U16: + case STAMTYPE_U16_RESET: + if (pDesc->enmVisibility == STAMVISIBILITY_USED && *pDesc->u.pu16 == 0) + return VINF_SUCCESS; + pArgs->pfnPrintf(pArgs, "%-32s %8u %s\n", pDesc->pszName, *pDesc->u.pu16, STAMR3GetUnit(pDesc->enmUnit)); + break; + + case STAMTYPE_X16: + case STAMTYPE_X16_RESET: + if (pDesc->enmVisibility == STAMVISIBILITY_USED && *pDesc->u.pu16 == 0) + return VINF_SUCCESS; + pArgs->pfnPrintf(pArgs, "%-32s %8x %s\n", pDesc->pszName, *pDesc->u.pu16, STAMR3GetUnit(pDesc->enmUnit)); + break; + + case STAMTYPE_U32: + case STAMTYPE_U32_RESET: + if (pDesc->enmVisibility == STAMVISIBILITY_USED && *pDesc->u.pu32 == 0) + return VINF_SUCCESS; + pArgs->pfnPrintf(pArgs, "%-32s %8u %s\n", pDesc->pszName, *pDesc->u.pu32, STAMR3GetUnit(pDesc->enmUnit)); + break; + + case STAMTYPE_X32: + case STAMTYPE_X32_RESET: + if (pDesc->enmVisibility == STAMVISIBILITY_USED && *pDesc->u.pu32 == 0) + return VINF_SUCCESS; + pArgs->pfnPrintf(pArgs, "%-32s %8x %s\n", pDesc->pszName, *pDesc->u.pu32, STAMR3GetUnit(pDesc->enmUnit)); + break; + + case STAMTYPE_U64: + case STAMTYPE_U64_RESET: + if (pDesc->enmVisibility == STAMVISIBILITY_USED && *pDesc->u.pu64 == 0) + return VINF_SUCCESS; + pArgs->pfnPrintf(pArgs, "%-32s %8llu %s\n", pDesc->pszName, *pDesc->u.pu64, STAMR3GetUnit(pDesc->enmUnit)); + break; + + case STAMTYPE_X64: + case STAMTYPE_X64_RESET: + if (pDesc->enmVisibility == STAMVISIBILITY_USED && *pDesc->u.pu64 == 0) + return VINF_SUCCESS; + pArgs->pfnPrintf(pArgs, "%-32s %8llx %s\n", pDesc->pszName, *pDesc->u.pu64, STAMR3GetUnit(pDesc->enmUnit)); + break; + + case STAMTYPE_BOOL: + case STAMTYPE_BOOL_RESET: + if (pDesc->enmVisibility == STAMVISIBILITY_USED && *pDesc->u.pf == false) + return VINF_SUCCESS; + pArgs->pfnPrintf(pArgs, "%-32s %s %s\n", pDesc->pszName, *pDesc->u.pf ? "true " : "false ", STAMR3GetUnit(pDesc->enmUnit)); + break; + + default: + AssertMsgFailed(("enmType=%d\n", pDesc->enmType)); + break; + } + NOREF(pvArg); + return VINF_SUCCESS; +} + + +/** + * Enumerate the statistics by the means of a callback function. + * + * @returns Whatever the callback returns. + * + * @param pUVM The user mode VM handle. + * @param pszPat The pattern to match samples. + * @param pfnEnum The callback function. + * @param pvUser The pvUser argument of the callback function. + */ +VMMR3DECL(int) STAMR3Enum(PUVM pUVM, const char *pszPat, PFNSTAMR3ENUM pfnEnum, void *pvUser) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, VERR_INVALID_VM_HANDLE); + + STAMR3ENUMONEARGS Args; + Args.pVM = pUVM->pVM; + Args.pfnEnum = pfnEnum; + Args.pvUser = pvUser; + + return stamR3EnumU(pUVM, pszPat, true /* fUpdateRing0 */, stamR3EnumOne, &Args); +} + + +/** + * Callback function for STARTR3Enum(). + * + * @returns whatever the callback returns. + * @param pDesc Pointer to the current descriptor. + * @param pvArg Points to a STAMR3ENUMONEARGS structure. + */ +static int stamR3EnumOne(PSTAMDESC pDesc, void *pvArg) +{ + PSTAMR3ENUMONEARGS pArgs = (PSTAMR3ENUMONEARGS)pvArg; + int rc; + if (pDesc->enmType == STAMTYPE_CALLBACK) + { + /* Give the enumerator something useful. */ + char szBuf[512]; + pDesc->u.Callback.pfnPrint(pArgs->pVM, pDesc->u.Callback.pvSample, szBuf, sizeof(szBuf)); + rc = pArgs->pfnEnum(pDesc->pszName, pDesc->enmType, szBuf, pDesc->enmUnit, + pDesc->enmVisibility, pDesc->pszDesc, pArgs->pvUser); + } + else + rc = pArgs->pfnEnum(pDesc->pszName, pDesc->enmType, pDesc->u.pv, pDesc->enmUnit, + pDesc->enmVisibility, pDesc->pszDesc, pArgs->pvUser); + return rc; +} + +static void stamR3RefreshGroup(PUVM pUVM, uint8_t iRefreshGroup, uint64_t *pbmRefreshedGroups) +{ + *pbmRefreshedGroups |= RT_BIT_64(iRefreshGroup); + + PVM pVM = pUVM->pVM; + if (pVM && pVM->pSession) + { + switch (iRefreshGroup) + { + /* + * GVMM + */ + case STAM_REFRESH_GRP_GVMM: + { + GVMMQUERYSTATISTICSSREQ Req; + Req.Hdr.cbReq = sizeof(Req); + Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + Req.pSession = pVM->pSession; + int rc = SUPR3CallVMMR0Ex(pVM->pVMR0, NIL_VMCPUID, VMMR0_DO_GVMM_QUERY_STATISTICS, 0, &Req.Hdr); + if (RT_SUCCESS(rc)) + { + pUVM->stam.s.GVMMStats = Req.Stats; + + /* + * Check if the number of host CPUs has changed (it will the first + * time around and normally never again). + */ + if (RT_UNLIKELY(pUVM->stam.s.GVMMStats.cHostCpus > pUVM->stam.s.cRegisteredHostCpus)) + { + if (RT_UNLIKELY(pUVM->stam.s.GVMMStats.cHostCpus > pUVM->stam.s.cRegisteredHostCpus)) + { + STAM_UNLOCK_RD(pUVM); + STAM_LOCK_WR(pUVM); + uint32_t cCpus = pUVM->stam.s.GVMMStats.cHostCpus; + for (uint32_t iCpu = pUVM->stam.s.cRegisteredHostCpus; iCpu < cCpus; iCpu++) + { + char szName[120]; + size_t cchBase = RTStrPrintf(szName, sizeof(szName), "/GVMM/HostCpus/%u", iCpu); + stamR3RegisterU(pUVM, &pUVM->stam.s.GVMMStats.aHostCpus[iCpu].idCpu, NULL, NULL, + STAMTYPE_U32, STAMVISIBILITY_ALWAYS, szName, STAMUNIT_NONE, + "Host CPU ID", STAM_REFRESH_GRP_GVMM); + strcpy(&szName[cchBase], "/idxCpuSet"); + stamR3RegisterU(pUVM, &pUVM->stam.s.GVMMStats.aHostCpus[iCpu].idxCpuSet, NULL, NULL, + STAMTYPE_U32, STAMVISIBILITY_ALWAYS, szName, STAMUNIT_NONE, + "CPU Set index", STAM_REFRESH_GRP_GVMM); + strcpy(&szName[cchBase], "/DesiredHz"); + stamR3RegisterU(pUVM, &pUVM->stam.s.GVMMStats.aHostCpus[iCpu].uDesiredHz, NULL, NULL, + STAMTYPE_U32, STAMVISIBILITY_ALWAYS, szName, STAMUNIT_HZ, + "The desired frequency", STAM_REFRESH_GRP_GVMM); + strcpy(&szName[cchBase], "/CurTimerHz"); + stamR3RegisterU(pUVM, &pUVM->stam.s.GVMMStats.aHostCpus[iCpu].uTimerHz, NULL, NULL, + STAMTYPE_U32, STAMVISIBILITY_ALWAYS, szName, STAMUNIT_HZ, + "The current timer frequency", STAM_REFRESH_GRP_GVMM); + strcpy(&szName[cchBase], "/PPTChanges"); + stamR3RegisterU(pUVM, &pUVM->stam.s.GVMMStats.aHostCpus[iCpu].cChanges, NULL, NULL, + STAMTYPE_U32, STAMVISIBILITY_ALWAYS, szName, STAMUNIT_OCCURENCES, + "RTTimerChangeInterval calls", STAM_REFRESH_GRP_GVMM); + strcpy(&szName[cchBase], "/PPTStarts"); + stamR3RegisterU(pUVM, &pUVM->stam.s.GVMMStats.aHostCpus[iCpu].cStarts, NULL, NULL, + STAMTYPE_U32, STAMVISIBILITY_ALWAYS, szName, STAMUNIT_OCCURENCES, + "RTTimerStart calls", STAM_REFRESH_GRP_GVMM); + } + pUVM->stam.s.cRegisteredHostCpus = cCpus; + STAM_UNLOCK_WR(pUVM); + STAM_LOCK_RD(pUVM); + } + } + } + break; + } + + /* + * GMM + */ + case STAM_REFRESH_GRP_GMM: + { + GMMQUERYSTATISTICSSREQ Req; + Req.Hdr.cbReq = sizeof(Req); + Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + Req.pSession = pVM->pSession; + int rc = SUPR3CallVMMR0Ex(pVM->pVMR0, NIL_VMCPUID, VMMR0_DO_GMM_QUERY_STATISTICS, 0, &Req.Hdr); + if (RT_SUCCESS(rc)) + pUVM->stam.s.GMMStats = Req.Stats; + break; + } + + /* + * NEM. + */ + case STAM_REFRESH_GRP_NEM: + SUPR3CallVMMR0(pVM->pVMR0, NIL_VMCPUID, VMMR0_DO_NEM_UPDATE_STATISTICS, NULL); + break; + + default: + AssertMsgFailed(("iRefreshGroup=%d\n", iRefreshGroup)); + } + } +} + + +/** + * Refreshes the statistics behind the given entry, if necessary. + * + * This helps implement fetching global ring-0 stats into ring-3 accessible + * storage. GVMM, GMM and NEM makes use of this. + * + * @param pUVM The user mode VM handle. + * @param pCur The statistics descriptor which group to check + * and maybe update. + * @param pbmRefreshedGroups Bitmap tracking what has already been updated. + */ +DECLINLINE(void) stamR3Refresh(PUVM pUVM, PSTAMDESC pCur, uint64_t *pbmRefreshedGroups) +{ + uint8_t const iRefreshGroup = pCur->iRefreshGroup; + if (RT_LIKELY(iRefreshGroup == STAM_REFRESH_GRP_NONE)) + { /* likely */ } + else if (!(*pbmRefreshedGroups & RT_BIT_64(iRefreshGroup))) + stamR3RefreshGroup(pUVM, iRefreshGroup, pbmRefreshedGroups); +} + + +/** + * Checks if the string contains a pattern expression or not. + * + * @returns true / false. + * @param pszPat The potential pattern. + */ +static bool stamR3IsPattern(const char *pszPat) +{ + return strchr(pszPat, '*') != NULL + || strchr(pszPat, '?') != NULL; +} + + +/** + * Match a name against an array of patterns. + * + * @returns true if it matches, false if it doesn't match. + * @param papszExpressions The array of pattern expressions. + * @param cExpressions The number of array entries. + * @param piExpression Where to read/store the current skip index. Optional. + * @param pszName The name to match. + */ +static bool stamR3MultiMatch(const char * const *papszExpressions, unsigned cExpressions, + unsigned *piExpression, const char *pszName) +{ + for (unsigned i = piExpression ? *piExpression : 0; i < cExpressions; i++) + { + const char *pszPat = papszExpressions[i]; + if (RTStrSimplePatternMatch(pszPat, pszName)) + { + /* later: + if (piExpression && i > *piExpression) + { + Check if we can skip some expressions. + Requires the expressions to be sorted. + }*/ + return true; + } + } + return false; +} + + +/** + * Splits a multi pattern into single ones. + * + * @returns Pointer to an array of single patterns. Free it with RTMemTmpFree. + * @param pszPat The pattern to split. + * @param pcExpressions The number of array elements. + * @param ppszCopy The pattern copy to free using RTStrFree. + */ +static char **stamR3SplitPattern(const char *pszPat, unsigned *pcExpressions, char **ppszCopy) +{ + Assert(pszPat && *pszPat); + + char *pszCopy = RTStrDup(pszPat); + if (!pszCopy) + return NULL; + + /* count them & allocate array. */ + char *psz = pszCopy; + unsigned cExpressions = 1; + while ((psz = strchr(psz, '|')) != NULL) + cExpressions++, psz++; + + char **papszExpressions = (char **)RTMemTmpAllocZ((cExpressions + 1) * sizeof(char *)); + if (!papszExpressions) + { + RTStrFree(pszCopy); + return NULL; + } + + /* split */ + psz = pszCopy; + for (unsigned i = 0;;) + { + papszExpressions[i] = psz; + if (++i >= cExpressions) + break; + psz = strchr(psz, '|'); + *psz++ = '\0'; + } + + /* sort the array, putting '*' last. */ + /** @todo sort it... */ + + *pcExpressions = cExpressions; + *ppszCopy = pszCopy; + return papszExpressions; +} + + +/** + * Enumerates the nodes selected by a pattern or all nodes if no pattern + * is specified. + * + * The call may lock STAM for writing before calling this function, however do + * not lock it for reading as this function may need to write lock STAM. + * + * @returns The rc from the callback. + * @param pUVM Pointer to the user mode VM structure. + * @param pszPat Pattern. + * @param fUpdateRing0 Update the ring-0 . + * @param pfnCallback Callback function which shall be called for matching nodes. + * If it returns anything but VINF_SUCCESS the enumeration is + * terminated and the status code returned to the caller. + * @param pvArg User parameter for the callback. + */ +static int stamR3EnumU(PUVM pUVM, const char *pszPat, bool fUpdateRing0, + int (*pfnCallback)(PSTAMDESC pDesc, void *pvArg), void *pvArg) +{ + int rc = VINF_SUCCESS; + uint64_t bmRefreshedGroups = 0; + PSTAMDESC pCur; + + /* + * All. + */ + if (!pszPat || !*pszPat || !strcmp(pszPat, "*")) + { + STAM_LOCK_RD(pUVM); + RTListForEach(&pUVM->stam.s.List, pCur, STAMDESC, ListEntry) + { + if (fUpdateRing0) + stamR3Refresh(pUVM, pCur, &bmRefreshedGroups); + rc = pfnCallback(pCur, pvArg); + if (rc) + break; + } + STAM_UNLOCK_RD(pUVM); + } + + /* + * Single expression pattern. + */ + else if (!strchr(pszPat, '|')) + { + STAM_LOCK_RD(pUVM); +#ifdef STAM_WITH_LOOKUP_TREE + if (!stamR3IsPattern(pszPat)) + { + pCur = stamR3LookupFindDesc(pUVM->stam.s.pRoot, pszPat); + if (pCur) + { + if (fUpdateRing0) + stamR3Refresh(pUVM, pCur, &bmRefreshedGroups); + rc = pfnCallback(pCur, pvArg); + } + } + else + { + PSTAMDESC pLast; + pCur = stamR3LookupFindPatternDescRange(pUVM->stam.s.pRoot, &pUVM->stam.s.List, pszPat, &pLast); + if (pCur) + { + for (;;) + { + if (RTStrSimplePatternMatch(pszPat, pCur->pszName)) + { + if (fUpdateRing0) + stamR3Refresh(pUVM, pCur, &bmRefreshedGroups); + rc = pfnCallback(pCur, pvArg); + if (rc) + break; + } + if (pCur == pLast) + break; + pCur = RTListNodeGetNext(&pCur->ListEntry, STAMDESC, ListEntry); + } + Assert(pLast); + } + else + Assert(!pLast); + + } +#else + RTListForEach(&pUVM->stam.s.List, pCur, STAMDESC, ListEntry) + { + if (RTStrSimplePatternMatch(pszPat, pCur->pszName)) + { + if (fUpdateRing0) + stamR3Refresh(pUVM, pCur, &bmRefreshedGroups); + rc = pfnCallback(pCur, pvArg); + if (rc) + break; + } + } +#endif + STAM_UNLOCK_RD(pUVM); + } + + /* + * Multi expression pattern. + */ + else + { + /* + * Split up the pattern first. + */ + char *pszCopy; + unsigned cExpressions; + char **papszExpressions = stamR3SplitPattern(pszPat, &cExpressions, &pszCopy); + if (!papszExpressions) + return VERR_NO_MEMORY; + + /* + * Perform the enumeration. + */ + STAM_LOCK_RD(pUVM); + unsigned iExpression = 0; + RTListForEach(&pUVM->stam.s.List, pCur, STAMDESC, ListEntry) + { + if (stamR3MultiMatch(papszExpressions, cExpressions, &iExpression, pCur->pszName)) + { + if (fUpdateRing0) + stamR3Refresh(pUVM, pCur, &bmRefreshedGroups); + rc = pfnCallback(pCur, pvArg); + if (rc) + break; + } + } + STAM_UNLOCK_RD(pUVM); + + RTMemTmpFree(papszExpressions); + RTStrFree(pszCopy); + } + + return rc; +} + + +/** + * Registers the ring-0 statistics. + * + * @param pUVM Pointer to the user mode VM structure. + */ +static void stamR3Ring0StatsRegisterU(PUVM pUVM) +{ + /* GVMM */ + for (unsigned i = 0; i < RT_ELEMENTS(g_aGVMMStats); i++) + stamR3RegisterU(pUVM, (uint8_t *)&pUVM->stam.s.GVMMStats + g_aGVMMStats[i].offVar, NULL, NULL, + g_aGVMMStats[i].enmType, STAMVISIBILITY_ALWAYS, g_aGVMMStats[i].pszName, + g_aGVMMStats[i].enmUnit, g_aGVMMStats[i].pszDesc, STAM_REFRESH_GRP_GVMM); + pUVM->stam.s.cRegisteredHostCpus = 0; + + /* GMM */ + for (unsigned i = 0; i < RT_ELEMENTS(g_aGMMStats); i++) + stamR3RegisterU(pUVM, (uint8_t *)&pUVM->stam.s.GMMStats + g_aGMMStats[i].offVar, NULL, NULL, + g_aGMMStats[i].enmType, STAMVISIBILITY_ALWAYS, g_aGMMStats[i].pszName, + g_aGMMStats[i].enmUnit, g_aGMMStats[i].pszDesc, STAM_REFRESH_GRP_GMM); +} + + +/** + * Get the unit string. + * + * @returns Pointer to read only unit string. + * @param enmUnit The unit. + */ +VMMR3DECL(const char *) STAMR3GetUnit(STAMUNIT enmUnit) +{ + switch (enmUnit) + { + case STAMUNIT_NONE: return ""; + case STAMUNIT_CALLS: return "calls"; + case STAMUNIT_COUNT: return "count"; + case STAMUNIT_BYTES: return "bytes"; + case STAMUNIT_PAGES: return "pages"; + case STAMUNIT_ERRORS: return "errors"; + case STAMUNIT_OCCURENCES: return "times"; + case STAMUNIT_TICKS: return "ticks"; + case STAMUNIT_TICKS_PER_CALL: return "ticks/call"; + case STAMUNIT_TICKS_PER_OCCURENCE: return "ticks/time"; + case STAMUNIT_GOOD_BAD: return "good:bad"; + case STAMUNIT_MEGABYTES: return "megabytes"; + case STAMUNIT_KILOBYTES: return "kilobytes"; + case STAMUNIT_NS: return "ns"; + case STAMUNIT_NS_PER_CALL: return "ns/call"; + case STAMUNIT_NS_PER_OCCURENCE: return "ns/time"; + case STAMUNIT_PCT: return "%"; + case STAMUNIT_HZ: return "Hz"; + + default: + AssertMsgFailed(("Unknown unit %d\n", enmUnit)); + return "(?unit?)"; + } +} + +#ifdef VBOX_WITH_DEBUGGER + +/** + * @callback_method_impl{FNDBGCCMD, The '.stats' command.} + */ +static DECLCALLBACK(int) stamR3CmdStats(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PUVM pUVM, PCDBGCVAR paArgs, unsigned cArgs) +{ + /* + * Validate input. + */ + DBGC_CMDHLP_REQ_UVM_RET(pCmdHlp, pCmd, pUVM); + if (RTListIsEmpty(&pUVM->stam.s.List)) + return DBGCCmdHlpFail(pCmdHlp, pCmd, "No statistics present"); + + /* + * Do the printing. + */ + STAMR3PRINTONEARGS Args; + Args.pUVM = pUVM; + Args.pvArg = pCmdHlp; + Args.pfnPrintf = stamR3EnumDbgfPrintf; + + return stamR3EnumU(pUVM, cArgs ? paArgs[0].u.pszString : NULL, true /* fUpdateRing0 */, stamR3PrintOne, &Args); +} + + +/** + * Display one sample in the debugger. + * + * @param pArgs Pointer to the print one argument structure. + * @param pszFormat Format string. + * @param ... Format arguments. + */ +static DECLCALLBACK(void) stamR3EnumDbgfPrintf(PSTAMR3PRINTONEARGS pArgs, const char *pszFormat, ...) +{ + PDBGCCMDHLP pCmdHlp = (PDBGCCMDHLP)pArgs->pvArg; + + va_list va; + va_start(va, pszFormat); + pCmdHlp->pfnPrintfV(pCmdHlp, NULL, pszFormat, va); + va_end(va); + NOREF(pArgs); +} + + +/** + * @callback_method_impl{FNDBGCCMD, The '.statsreset' command.} + */ +static DECLCALLBACK(int) stamR3CmdStatsReset(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PUVM pUVM, PCDBGCVAR paArgs, unsigned cArgs) +{ + /* + * Validate input. + */ + DBGC_CMDHLP_REQ_UVM_RET(pCmdHlp, pCmd, pUVM); + if (RTListIsEmpty(&pUVM->stam.s.List)) + return DBGCCmdHlpFail(pCmdHlp, pCmd, "No statistics present"); + + /* + * Execute reset. + */ + int rc = STAMR3Reset(pUVM, cArgs ? paArgs[0].u.pszString : NULL); + if (RT_SUCCESS(rc)) + return DBGCCmdHlpFailRc(pCmdHlp, pCmd, rc, "STAMR3ResetU"); + return DBGCCmdHlpPrintf(pCmdHlp, "Statistics have been reset.\n"); +} + +#endif /* VBOX_WITH_DEBUGGER */ + diff --git a/src/VBox/VMM/VMMR3/TM.cpp b/src/VBox/VMM/VMMR3/TM.cpp new file mode 100644 index 00000000..87aee791 --- /dev/null +++ b/src/VBox/VMM/VMMR3/TM.cpp @@ -0,0 +1,3713 @@ +/* $Id: TM.cpp $ */ +/** @file + * TM - Time Manager. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +/** @page pg_tm TM - The Time Manager + * + * The Time Manager abstracts the CPU clocks and manages timers used by the VMM, + * device and drivers. + * + * @see grp_tm + * + * + * @section sec_tm_clocks Clocks + * + * There are currently 4 clocks: + * - Virtual (guest). + * - Synchronous virtual (guest). + * - CPU Tick (TSC) (guest). Only current use is rdtsc emulation. Usually a + * function of the virtual clock. + * - Real (host). This is only used for display updates atm. + * + * The most important clocks are the three first ones and of these the second is + * the most interesting. + * + * + * The synchronous virtual clock is tied to the virtual clock except that it + * will take into account timer delivery lag caused by host scheduling. It will + * normally never advance beyond the head timer, and when lagging too far behind + * it will gradually speed up to catch up with the virtual clock. All devices + * implementing time sources accessible to and used by the guest is using this + * clock (for timers and other things). This ensures consistency between the + * time sources. + * + * The virtual clock is implemented as an offset to a monotonic, high + * resolution, wall clock. The current time source is using the RTTimeNanoTS() + * machinery based upon the Global Info Pages (GIP), that is, we're using TSC + * deltas (usually 10 ms) to fill the gaps between GIP updates. The result is + * a fairly high res clock that works in all contexts and on all hosts. The + * virtual clock is paused when the VM isn't in the running state. + * + * The CPU tick (TSC) is normally virtualized as a function of the synchronous + * virtual clock, where the frequency defaults to the host cpu frequency (as we + * measure it). In this mode it is possible to configure the frequency. Another + * (non-default) option is to use the raw unmodified host TSC values. And yet + * another, to tie it to time spent executing guest code. All these things are + * configurable should non-default behavior be desirable. + * + * The real clock is a monotonic clock (when available) with relatively low + * resolution, though this a bit host specific. Note that we're currently not + * servicing timers using the real clock when the VM is not running, this is + * simply because it has not been needed yet therefore not implemented. + * + * + * @subsection subsec_tm_timesync Guest Time Sync / UTC time + * + * Guest time syncing is primarily taken care of by the VMM device. The + * principle is very simple, the guest additions periodically asks the VMM + * device what the current UTC time is and makes adjustments accordingly. + * + * A complicating factor is that the synchronous virtual clock might be doing + * catchups and the guest perception is currently a little bit behind the world + * but it will (hopefully) be catching up soon as we're feeding timer interrupts + * at a slightly higher rate. Adjusting the guest clock to the current wall + * time in the real world would be a bad idea then because the guest will be + * advancing too fast and run ahead of world time (if the catchup works out). + * To solve this problem TM provides the VMM device with an UTC time source that + * gets adjusted with the current lag, so that when the guest eventually catches + * up the lag it will be showing correct real world time. + * + * + * @section sec_tm_timers Timers + * + * The timers can use any of the TM clocks described in the previous section. + * Each clock has its own scheduling facility, or timer queue if you like. + * There are a few factors which makes it a bit complex. First, there is the + * usual R0 vs R3 vs. RC thing. Then there are multiple threads, and then there + * is the timer thread that periodically checks whether any timers has expired + * without EMT noticing. On the API level, all but the create and save APIs + * must be multithreaded. EMT will always run the timers. + * + * The design is using a doubly linked list of active timers which is ordered + * by expire date. This list is only modified by the EMT thread. Updates to + * the list are batched in a singly linked list, which is then processed by the + * EMT thread at the first opportunity (immediately, next time EMT modifies a + * timer on that clock, or next timer timeout). Both lists are offset based and + * all the elements are therefore allocated from the hyper heap. + * + * For figuring out when there is need to schedule and run timers TM will: + * - Poll whenever somebody queries the virtual clock. + * - Poll the virtual clocks from the EM and REM loops. + * - Poll the virtual clocks from trap exit path. + * - Poll the virtual clocks and calculate first timeout from the halt loop. + * - Employ a thread which periodically (100Hz) polls all the timer queues. + * + * + * @image html TMTIMER-Statechart-Diagram.gif + * + * @section sec_tm_timer Logging + * + * Level 2: Logs a most of the timer state transitions and queue servicing. + * Level 3: Logs a few oddments. + * Level 4: Logs TMCLOCK_VIRTUAL_SYNC catch-up events. + * + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_TM +#ifdef DEBUG_bird +# define DBGFTRACE_DISABLED /* annoying */ +#endif +#include +#include /* for SUPGetCpuHzFromGip from sup.h */ +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef VBOX_WITH_REM +# include +#endif +#include +#include +#include "TMInternal.h" +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "TMInline.h" + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** The current saved state version.*/ +#define TM_SAVED_STATE_VERSION 3 + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static bool tmR3HasFixedTSC(PVM pVM); +static uint64_t tmR3CalibrateTSC(void); +static DECLCALLBACK(int) tmR3Save(PVM pVM, PSSMHANDLE pSSM); +static DECLCALLBACK(int) tmR3Load(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass); +static DECLCALLBACK(void) tmR3TimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick); +static void tmR3TimerQueueRun(PVM pVM, PTMTIMERQUEUE pQueue); +static void tmR3TimerQueueRunVirtualSync(PVM pVM); +static DECLCALLBACK(int) tmR3SetWarpDrive(PUVM pUVM, uint32_t u32Percent); +#ifndef VBOX_WITHOUT_NS_ACCOUNTING +static DECLCALLBACK(void) tmR3CpuLoadTimer(PVM pVM, PTMTIMER pTimer, void *pvUser); +#endif +static DECLCALLBACK(void) tmR3TimerInfo(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); +static DECLCALLBACK(void) tmR3TimerInfoActive(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); +static DECLCALLBACK(void) tmR3InfoClocks(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); +static DECLCALLBACK(VBOXSTRICTRC) tmR3CpuTickParavirtDisable(PVM pVM, PVMCPU pVCpu, void *pvData); +static const char * tmR3GetTSCModeName(PVM pVM); +static const char * tmR3GetTSCModeNameEx(TMTSCMODE enmMode); + + +/** + * Initializes the TM. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMM_INT_DECL(int) TMR3Init(PVM pVM) +{ + LogFlow(("TMR3Init:\n")); + + /* + * Assert alignment and sizes. + */ + AssertCompileMemberAlignment(VM, tm.s, 32); + AssertCompile(sizeof(pVM->tm.s) <= sizeof(pVM->tm.padding)); + AssertCompileMemberAlignment(TM, TimerCritSect, 8); + AssertCompileMemberAlignment(TM, VirtualSyncLock, 8); + + /* + * Init the structure. + */ + void *pv; + int rc = MMHyperAlloc(pVM, sizeof(pVM->tm.s.paTimerQueuesR3[0]) * TMCLOCK_MAX, 0, MM_TAG_TM, &pv); + AssertRCReturn(rc, rc); + pVM->tm.s.paTimerQueuesR3 = (PTMTIMERQUEUE)pv; + pVM->tm.s.paTimerQueuesR0 = MMHyperR3ToR0(pVM, pv); + pVM->tm.s.paTimerQueuesRC = MMHyperR3ToRC(pVM, pv); + + pVM->tm.s.offVM = RT_UOFFSETOF(VM, tm.s); + pVM->tm.s.idTimerCpu = pVM->cCpus - 1; /* The last CPU. */ + pVM->tm.s.paTimerQueuesR3[TMCLOCK_VIRTUAL].enmClock = TMCLOCK_VIRTUAL; + pVM->tm.s.paTimerQueuesR3[TMCLOCK_VIRTUAL].u64Expire = INT64_MAX; + pVM->tm.s.paTimerQueuesR3[TMCLOCK_VIRTUAL_SYNC].enmClock = TMCLOCK_VIRTUAL_SYNC; + pVM->tm.s.paTimerQueuesR3[TMCLOCK_VIRTUAL_SYNC].u64Expire = INT64_MAX; + pVM->tm.s.paTimerQueuesR3[TMCLOCK_REAL].enmClock = TMCLOCK_REAL; + pVM->tm.s.paTimerQueuesR3[TMCLOCK_REAL].u64Expire = INT64_MAX; + pVM->tm.s.paTimerQueuesR3[TMCLOCK_TSC].enmClock = TMCLOCK_TSC; + pVM->tm.s.paTimerQueuesR3[TMCLOCK_TSC].u64Expire = INT64_MAX; + + + /* + * We directly use the GIP to calculate the virtual time. We map the + * the GIP into the guest context so we can do this calculation there + * as well and save costly world switches. + */ + PSUPGLOBALINFOPAGE pGip = g_pSUPGlobalInfoPage; + pVM->tm.s.pvGIPR3 = (void *)pGip; + AssertMsgReturn(pVM->tm.s.pvGIPR3, ("GIP support is now required!\n"), VERR_TM_GIP_REQUIRED); + AssertMsgReturn((pGip->u32Version >> 16) == (SUPGLOBALINFOPAGE_VERSION >> 16), + ("Unsupported GIP version %#x! (expected=%#x)\n", pGip->u32Version, SUPGLOBALINFOPAGE_VERSION), + VERR_TM_GIP_VERSION); + + RTHCPHYS HCPhysGIP; + rc = SUPR3GipGetPhys(&HCPhysGIP); + AssertMsgRCReturn(rc, ("Failed to get GIP physical address!\n"), rc); + + RTGCPTR GCPtr; +#ifdef SUP_WITH_LOTS_OF_CPUS + rc = MMR3HyperMapHCPhys(pVM, pVM->tm.s.pvGIPR3, NIL_RTR0PTR, HCPhysGIP, (size_t)pGip->cPages * PAGE_SIZE, + "GIP", &GCPtr); +#else + rc = MMR3HyperMapHCPhys(pVM, pVM->tm.s.pvGIPR3, NIL_RTR0PTR, HCPhysGIP, PAGE_SIZE, "GIP", &GCPtr); +#endif + if (RT_FAILURE(rc)) + { + AssertMsgFailed(("Failed to map GIP into GC, rc=%Rrc!\n", rc)); + return rc; + } + pVM->tm.s.pvGIPRC = GCPtr; + LogFlow(("TMR3Init: HCPhysGIP=%RHp at %RRv\n", HCPhysGIP, pVM->tm.s.pvGIPRC)); + MMR3HyperReserve(pVM, PAGE_SIZE, "fence", NULL); + + /* Check assumptions made in TMAllVirtual.cpp about the GIP update interval. */ + if ( pGip->u32Magic == SUPGLOBALINFOPAGE_MAGIC + && pGip->u32UpdateIntervalNS >= 250000000 /* 0.25s */) + return VMSetError(pVM, VERR_TM_GIP_UPDATE_INTERVAL_TOO_BIG, RT_SRC_POS, + N_("The GIP update interval is too big. u32UpdateIntervalNS=%RU32 (u32UpdateHz=%RU32)"), + pGip->u32UpdateIntervalNS, pGip->u32UpdateHz); + + /* Log GIP info that may come in handy. */ + LogRel(("TM: GIP - u32Mode=%d (%s) u32UpdateHz=%u u32UpdateIntervalNS=%u enmUseTscDelta=%d (%s) fGetGipCpu=%#x cCpus=%d\n", + pGip->u32Mode, SUPGetGIPModeName(pGip), pGip->u32UpdateHz, pGip->u32UpdateIntervalNS, + pGip->enmUseTscDelta, SUPGetGIPTscDeltaModeName(pGip), pGip->fGetGipCpu, pGip->cCpus)); + LogRel(("TM: GIP - u64CpuHz=%'RU64 (%#RX64) SUPGetCpuHzFromGip => %'RU64\n", + pGip->u64CpuHz, pGip->u64CpuHz, SUPGetCpuHzFromGip(pGip))); + for (uint32_t iCpuSet = 0; iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); iCpuSet++) + { + uint16_t iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]; + if (iGipCpu != UINT16_MAX) + LogRel(("TM: GIP - CPU: iCpuSet=%#x idCpu=%#x idApic=%#x iGipCpu=%#x i64TSCDelta=%RI64 enmState=%d u64CpuHz=%RU64(*) cErrors=%u\n", + iCpuSet, pGip->aCPUs[iGipCpu].idCpu, pGip->aCPUs[iGipCpu].idApic, iGipCpu, pGip->aCPUs[iGipCpu].i64TSCDelta, + pGip->aCPUs[iGipCpu].enmState, pGip->aCPUs[iGipCpu].u64CpuHz, pGip->aCPUs[iGipCpu].cErrors)); + } + + /* + * Setup the VirtualGetRaw backend. + */ + pVM->tm.s.pfnVirtualGetRawR3 = tmVirtualNanoTSRediscover; + pVM->tm.s.VirtualGetRawDataR3.pfnRediscover = tmVirtualNanoTSRediscover; + pVM->tm.s.VirtualGetRawDataR3.pfnBad = tmVirtualNanoTSBad; + pVM->tm.s.VirtualGetRawDataR3.pfnBadCpuIndex = tmVirtualNanoTSBadCpuIndex; + pVM->tm.s.VirtualGetRawDataR3.pu64Prev = &pVM->tm.s.u64VirtualRawPrev; + pVM->tm.s.VirtualGetRawDataRC.pu64Prev = MMHyperR3ToRC(pVM, (void *)&pVM->tm.s.u64VirtualRawPrev); + pVM->tm.s.VirtualGetRawDataR0.pu64Prev = MMHyperR3ToR0(pVM, (void *)&pVM->tm.s.u64VirtualRawPrev); + AssertRelease(pVM->tm.s.VirtualGetRawDataR0.pu64Prev); + /* The rest is done in TMR3InitFinalize() since it's too early to call PDM. */ + + /* + * Init the locks. + */ + rc = PDMR3CritSectInit(pVM, &pVM->tm.s.TimerCritSect, RT_SRC_POS, "TM Timer Lock"); + if (RT_FAILURE(rc)) + return rc; + rc = PDMR3CritSectInit(pVM, &pVM->tm.s.VirtualSyncLock, RT_SRC_POS, "TM VirtualSync Lock"); + if (RT_FAILURE(rc)) + return rc; + + /* + * Get our CFGM node, create it if necessary. + */ + PCFGMNODE pCfgHandle = CFGMR3GetChild(CFGMR3GetRoot(pVM), "TM"); + if (!pCfgHandle) + { + rc = CFGMR3InsertNode(CFGMR3GetRoot(pVM), "TM", &pCfgHandle); + AssertRCReturn(rc, rc); + } + + /* + * Specific errors about some obsolete TM settings (remove after 2015-12-03). + */ + if (CFGMR3Exists(pCfgHandle, "TSCVirtualized")) + return VMSetError(pVM, VERR_CFGM_CONFIG_UNKNOWN_VALUE, RT_SRC_POS, + N_("Configuration error: TM setting \"TSCVirtualized\" is no longer supported. Use the \"TSCMode\" setting instead.")); + if (CFGMR3Exists(pCfgHandle, "UseRealTSC")) + return VMSetError(pVM, VERR_CFGM_CONFIG_UNKNOWN_VALUE, RT_SRC_POS, + N_("Configuration error: TM setting \"UseRealTSC\" is no longer supported. Use the \"TSCMode\" setting instead.")); + + if (CFGMR3Exists(pCfgHandle, "MaybeUseOffsettedHostTSC")) + return VMSetError(pVM, VERR_CFGM_CONFIG_UNKNOWN_VALUE, RT_SRC_POS, + N_("Configuration error: TM setting \"MaybeUseOffsettedHostTSC\" is no longer supported. Use the \"TSCMode\" setting instead.")); + + /* + * Validate the rest of the TM settings. + */ + rc = CFGMR3ValidateConfig(pCfgHandle, "/TM/", + "TSCMode|" + "TSCModeSwitchAllowed|" + "TSCTicksPerSecond|" + "TSCTiedToExecution|" + "TSCNotTiedToHalt|" + "ScheduleSlack|" + "CatchUpStopThreshold|" + "CatchUpGiveUpThreshold|" + "CatchUpStartThreshold0|CatchUpStartThreshold1|CatchUpStartThreshold2|CatchUpStartThreshold3|" + "CatchUpStartThreshold4|CatchUpStartThreshold5|CatchUpStartThreshold6|CatchUpStartThreshold7|" + "CatchUpStartThreshold8|CatchUpStartThreshold9|" + "CatchUpPrecentage0|CatchUpPrecentage1|CatchUpPrecentage2|CatchUpPrecentage3|" + "CatchUpPrecentage4|CatchUpPrecentage5|CatchUpPrecentage6|CatchUpPrecentage7|" + "CatchUpPrecentage8|CatchUpPrecentage9|" + "UTCOffset|" + "UTCTouchFileOnJump|" + "WarpDrivePercentage|" + "HostHzMax|" + "HostHzFudgeFactorTimerCpu|" + "HostHzFudgeFactorOtherCpu|" + "HostHzFudgeFactorCatchUp100|" + "HostHzFudgeFactorCatchUp200|" + "HostHzFudgeFactorCatchUp400|" + "TimerMillies" + , + "", + "TM", 0); + if (RT_FAILURE(rc)) + return rc; + + /* + * Determine the TSC configuration and frequency. + */ + /** @cfgm{/TM/TSCMode, string, Depends on the CPU and VM config} + * The name of the TSC mode to use: VirtTSCEmulated, RealTSCOffset or Dynamic. + * The default depends on the VM configuration and the capabilities of the + * host CPU. Other config options or runtime changes may override the TSC + * mode specified here. + */ + char szTSCMode[32]; + rc = CFGMR3QueryString(pCfgHandle, "TSCMode", szTSCMode, sizeof(szTSCMode)); + if (rc == VERR_CFGM_VALUE_NOT_FOUND) + { + /** @todo Rainy-day/never: Dynamic mode isn't currently suitable for SMP VMs, so + * fall back on the more expensive emulated mode. With the current TSC handling + * (frequent switching between offsetted mode and taking VM exits, on all VCPUs + * without any kind of coordination) will lead to inconsistent TSC behavior with + * guest SMP, including TSC going backwards. */ + pVM->tm.s.enmTSCMode = NEMR3NeedSpecialTscMode(pVM) ? TMTSCMODE_NATIVE_API + : pVM->cCpus == 1 && tmR3HasFixedTSC(pVM) ? TMTSCMODE_DYNAMIC : TMTSCMODE_VIRT_TSC_EMULATED; + } + else if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, N_("Configuration error: Failed to querying string value \"TSCMode\"")); + else + { + if (!RTStrCmp(szTSCMode, "VirtTSCEmulated")) + pVM->tm.s.enmTSCMode = TMTSCMODE_VIRT_TSC_EMULATED; + else if (!RTStrCmp(szTSCMode, "RealTSCOffset")) + pVM->tm.s.enmTSCMode = TMTSCMODE_REAL_TSC_OFFSET; + else if (!RTStrCmp(szTSCMode, "Dynamic")) + pVM->tm.s.enmTSCMode = TMTSCMODE_DYNAMIC; + else + return VMSetError(pVM, rc, RT_SRC_POS, N_("Configuration error: Unrecognized TM TSC mode value \"%s\""), szTSCMode); + if (NEMR3NeedSpecialTscMode(pVM)) + { + LogRel(("TM: NEM overrides the /TM/TSCMode=%s settings.\n", szTSCMode)); + pVM->tm.s.enmTSCMode = TMTSCMODE_NATIVE_API; + } + } + + /** + * @cfgm{/TM/TSCModeSwitchAllowed, bool, Whether TM TSC mode switch is allowed + * at runtime} + * When using paravirtualized guests, we dynamically switch TSC modes to a more + * optimal one for performance. This setting allows overriding this behaviour. + */ + rc = CFGMR3QueryBool(pCfgHandle, "TSCModeSwitchAllowed", &pVM->tm.s.fTSCModeSwitchAllowed); + if (rc == VERR_CFGM_VALUE_NOT_FOUND) + { + /* This is finally determined in TMR3InitFinalize() as GIM isn't initialized yet. */ + pVM->tm.s.fTSCModeSwitchAllowed = true; + } + else if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, N_("Configuration error: Failed to querying bool value \"TSCModeSwitchAllowed\"")); + if (pVM->tm.s.fTSCModeSwitchAllowed && pVM->tm.s.enmTSCMode == TMTSCMODE_NATIVE_API) + { + LogRel(("TM: NEM overrides the /TM/TSCModeSwitchAllowed setting.\n")); + pVM->tm.s.fTSCModeSwitchAllowed = false; + } + + /** @cfgm{/TM/TSCTicksPerSecond, uint32_t, Current TSC frequency from GIP} + * The number of TSC ticks per second (i.e. the TSC frequency). This will + * override enmTSCMode. + */ + rc = CFGMR3QueryU64(pCfgHandle, "TSCTicksPerSecond", &pVM->tm.s.cTSCTicksPerSecond); + if (rc == VERR_CFGM_VALUE_NOT_FOUND) + { + pVM->tm.s.cTSCTicksPerSecond = tmR3CalibrateTSC(); + if ( ( pVM->tm.s.enmTSCMode == TMTSCMODE_DYNAMIC + || pVM->tm.s.enmTSCMode == TMTSCMODE_VIRT_TSC_EMULATED) + && pVM->tm.s.cTSCTicksPerSecond >= _4G) + { + pVM->tm.s.cTSCTicksPerSecond = _4G - 1; /* (A limitation of our math code) */ + pVM->tm.s.enmTSCMode = TMTSCMODE_VIRT_TSC_EMULATED; + } + } + else if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, + N_("Configuration error: Failed to querying uint64_t value \"TSCTicksPerSecond\"")); + else if ( pVM->tm.s.cTSCTicksPerSecond < _1M + || pVM->tm.s.cTSCTicksPerSecond >= _4G) + return VMSetError(pVM, VERR_INVALID_PARAMETER, RT_SRC_POS, + N_("Configuration error: \"TSCTicksPerSecond\" = %RI64 is not in the range 1MHz..4GHz-1"), + pVM->tm.s.cTSCTicksPerSecond); + else if (pVM->tm.s.enmTSCMode != TMTSCMODE_NATIVE_API) + pVM->tm.s.enmTSCMode = TMTSCMODE_VIRT_TSC_EMULATED; + else + { + LogRel(("TM: NEM overrides the /TM/TSCTicksPerSecond=%RU64 setting.\n", pVM->tm.s.cTSCTicksPerSecond)); + pVM->tm.s.cTSCTicksPerSecond = tmR3CalibrateTSC(); + } + + /** @cfgm{/TM/TSCTiedToExecution, bool, false} + * Whether the TSC should be tied to execution. This will exclude most of the + * virtualization overhead, but will by default include the time spent in the + * halt state (see TM/TSCNotTiedToHalt). This setting will override all other + * TSC settings except for TSCTicksPerSecond and TSCNotTiedToHalt, which should + * be used avoided or used with great care. Note that this will only work right + * together with VT-x or AMD-V, and with a single virtual CPU. */ + rc = CFGMR3QueryBoolDef(pCfgHandle, "TSCTiedToExecution", &pVM->tm.s.fTSCTiedToExecution, false); + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, + N_("Configuration error: Failed to querying bool value \"TSCTiedToExecution\"")); + if (pVM->tm.s.fTSCTiedToExecution && pVM->tm.s.enmTSCMode == TMTSCMODE_NATIVE_API) + return VMSetError(pVM, VERR_INVALID_PARAMETER, RT_SRC_POS, N_("/TM/TSCTiedToExecution is not supported in NEM mode!")); + if (pVM->tm.s.fTSCTiedToExecution) + pVM->tm.s.enmTSCMode = TMTSCMODE_VIRT_TSC_EMULATED; + + + /** @cfgm{/TM/TSCNotTiedToHalt, bool, false} + * This is used with /TM/TSCTiedToExecution to control how TSC operates + * accross HLT instructions. When true HLT is considered execution time and + * TSC continues to run, while when false (default) TSC stops during halt. */ + rc = CFGMR3QueryBoolDef(pCfgHandle, "TSCNotTiedToHalt", &pVM->tm.s.fTSCNotTiedToHalt, false); + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, + N_("Configuration error: Failed to querying bool value \"TSCNotTiedToHalt\"")); + + /* + * Configure the timer synchronous virtual time. + */ + /** @cfgm{/TM/ScheduleSlack, uint32_t, ns, 0, UINT32_MAX, 100000} + * Scheduling slack when processing timers. */ + rc = CFGMR3QueryU32(pCfgHandle, "ScheduleSlack", &pVM->tm.s.u32VirtualSyncScheduleSlack); + if (rc == VERR_CFGM_VALUE_NOT_FOUND) + pVM->tm.s.u32VirtualSyncScheduleSlack = 100000; /* 0.100ms (ASSUMES virtual time is nanoseconds) */ + else if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, + N_("Configuration error: Failed to querying 32-bit integer value \"ScheduleSlack\"")); + + /** @cfgm{/TM/CatchUpStopThreshold, uint64_t, ns, 0, UINT64_MAX, 500000} + * When to stop a catch-up, considering it successful. */ + rc = CFGMR3QueryU64(pCfgHandle, "CatchUpStopThreshold", &pVM->tm.s.u64VirtualSyncCatchUpStopThreshold); + if (rc == VERR_CFGM_VALUE_NOT_FOUND) + pVM->tm.s.u64VirtualSyncCatchUpStopThreshold = 500000; /* 0.5ms */ + else if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, + N_("Configuration error: Failed to querying 64-bit integer value \"CatchUpStopThreshold\"")); + + /** @cfgm{/TM/CatchUpGiveUpThreshold, uint64_t, ns, 0, UINT64_MAX, 60000000000} + * When to give up a catch-up attempt. */ + rc = CFGMR3QueryU64(pCfgHandle, "CatchUpGiveUpThreshold", &pVM->tm.s.u64VirtualSyncCatchUpGiveUpThreshold); + if (rc == VERR_CFGM_VALUE_NOT_FOUND) + pVM->tm.s.u64VirtualSyncCatchUpGiveUpThreshold = UINT64_C(60000000000); /* 60 sec */ + else if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, + N_("Configuration error: Failed to querying 64-bit integer value \"CatchUpGiveUpThreshold\"")); + + + /** @cfgm{/TM/CatchUpPrecentage[0..9], uint32_t, %, 1, 2000, various} + * The catch-up percent for a given period. */ + /** @cfgm{/TM/CatchUpStartThreshold[0..9], uint64_t, ns, 0, UINT64_MAX} + * The catch-up period threshold, or if you like, when a period starts. */ +#define TM_CFG_PERIOD(iPeriod, DefStart, DefPct) \ + do \ + { \ + uint64_t u64; \ + rc = CFGMR3QueryU64(pCfgHandle, "CatchUpStartThreshold" #iPeriod, &u64); \ + if (rc == VERR_CFGM_VALUE_NOT_FOUND) \ + u64 = UINT64_C(DefStart); \ + else if (RT_FAILURE(rc)) \ + return VMSetError(pVM, rc, RT_SRC_POS, N_("Configuration error: Failed to querying 64-bit integer value \"CatchUpThreshold" #iPeriod "\"")); \ + if ( (iPeriod > 0 && u64 <= pVM->tm.s.aVirtualSyncCatchUpPeriods[iPeriod - 1].u64Start) \ + || u64 >= pVM->tm.s.u64VirtualSyncCatchUpGiveUpThreshold) \ + return VMSetError(pVM, VERR_INVALID_PARAMETER, RT_SRC_POS, N_("Configuration error: Invalid start of period #" #iPeriod ": %'RU64"), u64); \ + pVM->tm.s.aVirtualSyncCatchUpPeriods[iPeriod].u64Start = u64; \ + rc = CFGMR3QueryU32(pCfgHandle, "CatchUpPrecentage" #iPeriod, &pVM->tm.s.aVirtualSyncCatchUpPeriods[iPeriod].u32Percentage); \ + if (rc == VERR_CFGM_VALUE_NOT_FOUND) \ + pVM->tm.s.aVirtualSyncCatchUpPeriods[iPeriod].u32Percentage = (DefPct); \ + else if (RT_FAILURE(rc)) \ + return VMSetError(pVM, rc, RT_SRC_POS, N_("Configuration error: Failed to querying 32-bit integer value \"CatchUpPrecentage" #iPeriod "\"")); \ + } while (0) + /* This needs more tuning. Not sure if we really need so many period and be so gentle. */ + TM_CFG_PERIOD(0, 750000, 5); /* 0.75ms at 1.05x */ + TM_CFG_PERIOD(1, 1500000, 10); /* 1.50ms at 1.10x */ + TM_CFG_PERIOD(2, 8000000, 25); /* 8ms at 1.25x */ + TM_CFG_PERIOD(3, 30000000, 50); /* 30ms at 1.50x */ + TM_CFG_PERIOD(4, 75000000, 75); /* 75ms at 1.75x */ + TM_CFG_PERIOD(5, 175000000, 100); /* 175ms at 2x */ + TM_CFG_PERIOD(6, 500000000, 200); /* 500ms at 3x */ + TM_CFG_PERIOD(7, 3000000000, 300); /* 3s at 4x */ + TM_CFG_PERIOD(8,30000000000, 400); /* 30s at 5x */ + TM_CFG_PERIOD(9,55000000000, 500); /* 55s at 6x */ + AssertCompile(RT_ELEMENTS(pVM->tm.s.aVirtualSyncCatchUpPeriods) == 10); +#undef TM_CFG_PERIOD + + /* + * Configure real world time (UTC). + */ + /** @cfgm{/TM/UTCOffset, int64_t, ns, INT64_MIN, INT64_MAX, 0} + * The UTC offset. This is used to put the guest back or forwards in time. */ + rc = CFGMR3QueryS64(pCfgHandle, "UTCOffset", &pVM->tm.s.offUTC); + if (rc == VERR_CFGM_VALUE_NOT_FOUND) + pVM->tm.s.offUTC = 0; /* ns */ + else if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, + N_("Configuration error: Failed to querying 64-bit integer value \"UTCOffset\"")); + + /** @cfgm{/TM/UTCTouchFileOnJump, string, none} + * File to be written to everytime the host time jumps. */ + rc = CFGMR3QueryStringAlloc(pCfgHandle, "UTCTouchFileOnJump", &pVM->tm.s.pszUtcTouchFileOnJump); + if (rc == VERR_CFGM_VALUE_NOT_FOUND) + pVM->tm.s.pszUtcTouchFileOnJump = NULL; + else if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, + N_("Configuration error: Failed to querying string value \"UTCTouchFileOnJump\"")); + + /* + * Setup the warp drive. + */ + /** @cfgm{/TM/WarpDrivePercentage, uint32_t, %, 0, 20000, 100} + * The warp drive percentage, 100% is normal speed. This is used to speed up + * or slow down the virtual clock, which can be useful for fast forwarding + * borring periods during tests. */ + rc = CFGMR3QueryU32(pCfgHandle, "WarpDrivePercentage", &pVM->tm.s.u32VirtualWarpDrivePercentage); + if (rc == VERR_CFGM_VALUE_NOT_FOUND) + rc = CFGMR3QueryU32(CFGMR3GetRoot(pVM), "WarpDrivePercentage", &pVM->tm.s.u32VirtualWarpDrivePercentage); /* legacy */ + if (rc == VERR_CFGM_VALUE_NOT_FOUND) + pVM->tm.s.u32VirtualWarpDrivePercentage = 100; + else if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, + N_("Configuration error: Failed to querying uint32_t value \"WarpDrivePercent\"")); + else if ( pVM->tm.s.u32VirtualWarpDrivePercentage < 2 + || pVM->tm.s.u32VirtualWarpDrivePercentage > 20000) + return VMSetError(pVM, VERR_INVALID_PARAMETER, RT_SRC_POS, + N_("Configuration error: \"WarpDrivePercent\" = %RI32 is not in the range 2..20000"), + pVM->tm.s.u32VirtualWarpDrivePercentage); + pVM->tm.s.fVirtualWarpDrive = pVM->tm.s.u32VirtualWarpDrivePercentage != 100; + if (pVM->tm.s.fVirtualWarpDrive) + { + if (pVM->tm.s.enmTSCMode == TMTSCMODE_NATIVE_API) + LogRel(("TM: Warp-drive active, escept for TSC which is in NEM mode. u32VirtualWarpDrivePercentage=%RI32\n", + pVM->tm.s.u32VirtualWarpDrivePercentage)); + else + { + pVM->tm.s.enmTSCMode = TMTSCMODE_VIRT_TSC_EMULATED; + LogRel(("TM: Warp-drive active. u32VirtualWarpDrivePercentage=%RI32\n", pVM->tm.s.u32VirtualWarpDrivePercentage)); + } + } + + /* + * Gather the Host Hz configuration values. + */ + rc = CFGMR3QueryU32Def(pCfgHandle, "HostHzMax", &pVM->tm.s.cHostHzMax, 20000); + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, + N_("Configuration error: Failed to querying uint32_t value \"HostHzMax\"")); + + rc = CFGMR3QueryU32Def(pCfgHandle, "HostHzFudgeFactorTimerCpu", &pVM->tm.s.cPctHostHzFudgeFactorTimerCpu, 111); + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, + N_("Configuration error: Failed to querying uint32_t value \"HostHzFudgeFactorTimerCpu\"")); + + rc = CFGMR3QueryU32Def(pCfgHandle, "HostHzFudgeFactorOtherCpu", &pVM->tm.s.cPctHostHzFudgeFactorOtherCpu, 110); + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, + N_("Configuration error: Failed to querying uint32_t value \"HostHzFudgeFactorOtherCpu\"")); + + rc = CFGMR3QueryU32Def(pCfgHandle, "HostHzFudgeFactorCatchUp100", &pVM->tm.s.cPctHostHzFudgeFactorCatchUp100, 300); + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, + N_("Configuration error: Failed to querying uint32_t value \"HostHzFudgeFactorCatchUp100\"")); + + rc = CFGMR3QueryU32Def(pCfgHandle, "HostHzFudgeFactorCatchUp200", &pVM->tm.s.cPctHostHzFudgeFactorCatchUp200, 250); + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, + N_("Configuration error: Failed to querying uint32_t value \"HostHzFudgeFactorCatchUp200\"")); + + rc = CFGMR3QueryU32Def(pCfgHandle, "HostHzFudgeFactorCatchUp400", &pVM->tm.s.cPctHostHzFudgeFactorCatchUp400, 200); + if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, + N_("Configuration error: Failed to querying uint32_t value \"HostHzFudgeFactorCatchUp400\"")); + + /* + * Finally, setup and report. + */ + pVM->tm.s.enmOriginalTSCMode = pVM->tm.s.enmTSCMode; + CPUMR3SetCR4Feature(pVM, X86_CR4_TSD, ~X86_CR4_TSD); + LogRel(("TM: cTSCTicksPerSecond=%'RU64 (%#RX64) enmTSCMode=%d (%s)\n" + "TM: TSCTiedToExecution=%RTbool TSCNotTiedToHalt=%RTbool\n", + pVM->tm.s.cTSCTicksPerSecond, pVM->tm.s.cTSCTicksPerSecond, pVM->tm.s.enmTSCMode, tmR3GetTSCModeName(pVM), + pVM->tm.s.fTSCTiedToExecution, pVM->tm.s.fTSCNotTiedToHalt)); + + /* + * Start the timer (guard against REM not yielding). + */ + /** @cfgm{/TM/TimerMillies, uint32_t, ms, 1, 1000, 10} + * The watchdog timer interval. */ + uint32_t u32Millies; + rc = CFGMR3QueryU32(pCfgHandle, "TimerMillies", &u32Millies); + if (rc == VERR_CFGM_VALUE_NOT_FOUND) + u32Millies = 10; + else if (RT_FAILURE(rc)) + return VMSetError(pVM, rc, RT_SRC_POS, + N_("Configuration error: Failed to query uint32_t value \"TimerMillies\"")); + rc = RTTimerCreate(&pVM->tm.s.pTimer, u32Millies, tmR3TimerCallback, pVM); + if (RT_FAILURE(rc)) + { + AssertMsgFailed(("Failed to create timer, u32Millies=%d rc=%Rrc.\n", u32Millies, rc)); + return rc; + } + Log(("TM: Created timer %p firing every %d milliseconds\n", pVM->tm.s.pTimer, u32Millies)); + pVM->tm.s.u32TimerMillies = u32Millies; + + /* + * Register saved state. + */ + rc = SSMR3RegisterInternal(pVM, "tm", 1, TM_SAVED_STATE_VERSION, sizeof(uint64_t) * 8, + NULL, NULL, NULL, + NULL, tmR3Save, NULL, + NULL, tmR3Load, NULL); + if (RT_FAILURE(rc)) + return rc; + + /* + * Register statistics. + */ + STAM_REL_REG_USED(pVM,(void*)&pVM->tm.s.VirtualGetRawDataR3.c1nsSteps,STAMTYPE_U32, "/TM/R3/1nsSteps", STAMUNIT_OCCURENCES, "Virtual time 1ns steps (due to TSC / GIP variations)."); + STAM_REL_REG_USED(pVM,(void*)&pVM->tm.s.VirtualGetRawDataR3.cBadPrev, STAMTYPE_U32, "/TM/R3/cBadPrev", STAMUNIT_OCCURENCES, "Times the previous virtual time was considered erratic (shouldn't ever happen)."); + STAM_REL_REG_USED(pVM,(void*)&pVM->tm.s.VirtualGetRawDataR0.c1nsSteps,STAMTYPE_U32, "/TM/R0/1nsSteps", STAMUNIT_OCCURENCES, "Virtual time 1ns steps (due to TSC / GIP variations)."); + STAM_REL_REG_USED(pVM,(void*)&pVM->tm.s.VirtualGetRawDataR0.cBadPrev, STAMTYPE_U32, "/TM/R0/cBadPrev", STAMUNIT_OCCURENCES, "Times the previous virtual time was considered erratic (shouldn't ever happen)."); + STAM_REL_REG_USED(pVM,(void*)&pVM->tm.s.VirtualGetRawDataRC.c1nsSteps,STAMTYPE_U32, "/TM/RC/1nsSteps", STAMUNIT_OCCURENCES, "Virtual time 1ns steps (due to TSC / GIP variations)."); + STAM_REL_REG_USED(pVM,(void*)&pVM->tm.s.VirtualGetRawDataRC.cBadPrev, STAMTYPE_U32, "/TM/RC/cBadPrev", STAMUNIT_OCCURENCES, "Times the previous virtual time was considered erratic (shouldn't ever happen)."); + STAM_REL_REG( pVM,(void*)&pVM->tm.s.offVirtualSync, STAMTYPE_U64, "/TM/VirtualSync/CurrentOffset", STAMUNIT_NS, "The current offset. (subtract GivenUp to get the lag)"); + STAM_REL_REG_USED(pVM,(void*)&pVM->tm.s.offVirtualSyncGivenUp, STAMTYPE_U64, "/TM/VirtualSync/GivenUp", STAMUNIT_NS, "Nanoseconds of the 'CurrentOffset' that's been given up and won't ever be attempted caught up with."); + STAM_REL_REG( pVM,(void*)&pVM->tm.s.uMaxHzHint, STAMTYPE_U32, "/TM/MaxHzHint", STAMUNIT_HZ, "Max guest timer frequency hint."); + +#ifdef VBOX_WITH_STATISTICS + STAM_REG_USED(pVM,(void *)&pVM->tm.s.VirtualGetRawDataR3.cExpired, STAMTYPE_U32, "/TM/R3/cExpired", STAMUNIT_OCCURENCES, "Times the TSC interval expired (overlaps 1ns steps)."); + STAM_REG_USED(pVM,(void *)&pVM->tm.s.VirtualGetRawDataR3.cUpdateRaces,STAMTYPE_U32, "/TM/R3/cUpdateRaces", STAMUNIT_OCCURENCES, "Thread races when updating the previous timestamp."); + STAM_REG_USED(pVM,(void *)&pVM->tm.s.VirtualGetRawDataR0.cExpired, STAMTYPE_U32, "/TM/R0/cExpired", STAMUNIT_OCCURENCES, "Times the TSC interval expired (overlaps 1ns steps)."); + STAM_REG_USED(pVM,(void *)&pVM->tm.s.VirtualGetRawDataR0.cUpdateRaces,STAMTYPE_U32, "/TM/R0/cUpdateRaces", STAMUNIT_OCCURENCES, "Thread races when updating the previous timestamp."); + STAM_REG_USED(pVM,(void *)&pVM->tm.s.VirtualGetRawDataRC.cExpired, STAMTYPE_U32, "/TM/RC/cExpired", STAMUNIT_OCCURENCES, "Times the TSC interval expired (overlaps 1ns steps)."); + STAM_REG_USED(pVM,(void *)&pVM->tm.s.VirtualGetRawDataRC.cUpdateRaces,STAMTYPE_U32, "/TM/RC/cUpdateRaces", STAMUNIT_OCCURENCES, "Thread races when updating the previous timestamp."); + STAM_REG(pVM, &pVM->tm.s.StatDoQueues, STAMTYPE_PROFILE, "/TM/DoQueues", STAMUNIT_TICKS_PER_CALL, "Profiling timer TMR3TimerQueuesDo."); + STAM_REG(pVM, &pVM->tm.s.aStatDoQueues[TMCLOCK_VIRTUAL], STAMTYPE_PROFILE_ADV, "/TM/DoQueues/Virtual", STAMUNIT_TICKS_PER_CALL, "Time spent on the virtual clock queue."); + STAM_REG(pVM, &pVM->tm.s.aStatDoQueues[TMCLOCK_VIRTUAL_SYNC], STAMTYPE_PROFILE_ADV, "/TM/DoQueues/VirtualSync", STAMUNIT_TICKS_PER_CALL, "Time spent on the virtual sync clock queue."); + STAM_REG(pVM, &pVM->tm.s.aStatDoQueues[TMCLOCK_REAL], STAMTYPE_PROFILE_ADV, "/TM/DoQueues/Real", STAMUNIT_TICKS_PER_CALL, "Time spent on the real clock queue."); + + STAM_REG(pVM, &pVM->tm.s.StatPoll, STAMTYPE_COUNTER, "/TM/Poll", STAMUNIT_OCCURENCES, "TMTimerPoll calls."); + STAM_REG(pVM, &pVM->tm.s.StatPollAlreadySet, STAMTYPE_COUNTER, "/TM/Poll/AlreadySet", STAMUNIT_OCCURENCES, "TMTimerPoll calls where the FF was already set."); + STAM_REG(pVM, &pVM->tm.s.StatPollELoop, STAMTYPE_COUNTER, "/TM/Poll/ELoop", STAMUNIT_OCCURENCES, "Times TMTimerPoll has given up getting a consistent virtual sync data set."); + STAM_REG(pVM, &pVM->tm.s.StatPollMiss, STAMTYPE_COUNTER, "/TM/Poll/Miss", STAMUNIT_OCCURENCES, "TMTimerPoll calls where nothing had expired."); + STAM_REG(pVM, &pVM->tm.s.StatPollRunning, STAMTYPE_COUNTER, "/TM/Poll/Running", STAMUNIT_OCCURENCES, "TMTimerPoll calls where the queues were being run."); + STAM_REG(pVM, &pVM->tm.s.StatPollSimple, STAMTYPE_COUNTER, "/TM/Poll/Simple", STAMUNIT_OCCURENCES, "TMTimerPoll calls where we could take the simple path."); + STAM_REG(pVM, &pVM->tm.s.StatPollVirtual, STAMTYPE_COUNTER, "/TM/Poll/HitsVirtual", STAMUNIT_OCCURENCES, "The number of times TMTimerPoll found an expired TMCLOCK_VIRTUAL queue."); + STAM_REG(pVM, &pVM->tm.s.StatPollVirtualSync, STAMTYPE_COUNTER, "/TM/Poll/HitsVirtualSync", STAMUNIT_OCCURENCES, "The number of times TMTimerPoll found an expired TMCLOCK_VIRTUAL_SYNC queue."); + + STAM_REG(pVM, &pVM->tm.s.StatPostponedR3, STAMTYPE_COUNTER, "/TM/PostponedR3", STAMUNIT_OCCURENCES, "Postponed due to unschedulable state, in ring-3."); + STAM_REG(pVM, &pVM->tm.s.StatPostponedRZ, STAMTYPE_COUNTER, "/TM/PostponedRZ", STAMUNIT_OCCURENCES, "Postponed due to unschedulable state, in ring-0 / RC."); + + STAM_REG(pVM, &pVM->tm.s.StatScheduleOneR3, STAMTYPE_PROFILE, "/TM/ScheduleOneR3", STAMUNIT_TICKS_PER_CALL, "Profiling the scheduling of one queue during a TMTimer* call in EMT."); + STAM_REG(pVM, &pVM->tm.s.StatScheduleOneRZ, STAMTYPE_PROFILE, "/TM/ScheduleOneRZ", STAMUNIT_TICKS_PER_CALL, "Profiling the scheduling of one queue during a TMTimer* call in EMT."); + STAM_REG(pVM, &pVM->tm.s.StatScheduleSetFF, STAMTYPE_COUNTER, "/TM/ScheduleSetFF", STAMUNIT_OCCURENCES, "The number of times the timer FF was set instead of doing scheduling."); + + STAM_REG(pVM, &pVM->tm.s.StatTimerSet, STAMTYPE_COUNTER, "/TM/TimerSet", STAMUNIT_OCCURENCES, "Calls, except virtual sync timers"); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetOpt, STAMTYPE_COUNTER, "/TM/TimerSet/Opt", STAMUNIT_OCCURENCES, "Optimized path taken."); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetR3, STAMTYPE_PROFILE, "/TM/TimerSet/R3", STAMUNIT_TICKS_PER_CALL, "Profiling TMTimerSet calls made in ring-3."); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetRZ, STAMTYPE_PROFILE, "/TM/TimerSet/RZ", STAMUNIT_TICKS_PER_CALL, "Profiling TMTimerSet calls made in ring-0 / RC."); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetStActive, STAMTYPE_COUNTER, "/TM/TimerSet/StActive", STAMUNIT_OCCURENCES, "ACTIVE"); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetStExpDeliver, STAMTYPE_COUNTER, "/TM/TimerSet/StExpDeliver", STAMUNIT_OCCURENCES, "EXPIRED_DELIVER"); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetStOther, STAMTYPE_COUNTER, "/TM/TimerSet/StOther", STAMUNIT_OCCURENCES, "Other states"); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetStPendStop, STAMTYPE_COUNTER, "/TM/TimerSet/StPendStop", STAMUNIT_OCCURENCES, "PENDING_STOP"); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetStPendStopSched, STAMTYPE_COUNTER, "/TM/TimerSet/StPendStopSched", STAMUNIT_OCCURENCES, "PENDING_STOP_SCHEDULE"); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetStPendSched, STAMTYPE_COUNTER, "/TM/TimerSet/StPendSched", STAMUNIT_OCCURENCES, "PENDING_SCHEDULE"); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetStPendResched, STAMTYPE_COUNTER, "/TM/TimerSet/StPendResched", STAMUNIT_OCCURENCES, "PENDING_RESCHEDULE"); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetStStopped, STAMTYPE_COUNTER, "/TM/TimerSet/StStopped", STAMUNIT_OCCURENCES, "STOPPED"); + + STAM_REG(pVM, &pVM->tm.s.StatTimerSetVs, STAMTYPE_COUNTER, "/TM/TimerSetVs", STAMUNIT_OCCURENCES, "TMTimerSet calls on virtual sync timers"); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetVsR3, STAMTYPE_PROFILE, "/TM/TimerSetVs/R3", STAMUNIT_TICKS_PER_CALL, "Profiling TMTimerSet calls made in ring-3 on virtual sync timers."); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetVsRZ, STAMTYPE_PROFILE, "/TM/TimerSetVs/RZ", STAMUNIT_TICKS_PER_CALL, "Profiling TMTimerSet calls made in ring-0 / RC on virtual sync timers."); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetVsStActive, STAMTYPE_COUNTER, "/TM/TimerSetVs/StActive", STAMUNIT_OCCURENCES, "ACTIVE"); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetVsStExpDeliver, STAMTYPE_COUNTER, "/TM/TimerSetVs/StExpDeliver", STAMUNIT_OCCURENCES, "EXPIRED_DELIVER"); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetVsStStopped, STAMTYPE_COUNTER, "/TM/TimerSetVs/StStopped", STAMUNIT_OCCURENCES, "STOPPED"); + + STAM_REG(pVM, &pVM->tm.s.StatTimerSetRelative, STAMTYPE_COUNTER, "/TM/TimerSetRelative", STAMUNIT_OCCURENCES, "Calls, except virtual sync timers"); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetRelativeOpt, STAMTYPE_COUNTER, "/TM/TimerSetRelative/Opt", STAMUNIT_OCCURENCES, "Optimized path taken."); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetRelativeR3, STAMTYPE_PROFILE, "/TM/TimerSetRelative/R3", STAMUNIT_TICKS_PER_CALL, "Profiling TMTimerSetRelative calls made in ring-3 (sans virtual sync)."); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetRelativeRZ, STAMTYPE_PROFILE, "/TM/TimerSetRelative/RZ", STAMUNIT_TICKS_PER_CALL, "Profiling TMTimerSetReltaive calls made in ring-0 / RC (sans virtual sync)."); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetRelativeStActive, STAMTYPE_COUNTER, "/TM/TimerSetRelative/StActive", STAMUNIT_OCCURENCES, "ACTIVE"); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetRelativeStExpDeliver, STAMTYPE_COUNTER, "/TM/TimerSetRelative/StExpDeliver", STAMUNIT_OCCURENCES, "EXPIRED_DELIVER"); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetRelativeStOther, STAMTYPE_COUNTER, "/TM/TimerSetRelative/StOther", STAMUNIT_OCCURENCES, "Other states"); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetRelativeStPendStop, STAMTYPE_COUNTER, "/TM/TimerSetRelative/StPendStop", STAMUNIT_OCCURENCES, "PENDING_STOP"); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetRelativeStPendStopSched, STAMTYPE_COUNTER, "/TM/TimerSetRelative/StPendStopSched",STAMUNIT_OCCURENCES, "PENDING_STOP_SCHEDULE"); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetRelativeStPendSched, STAMTYPE_COUNTER, "/TM/TimerSetRelative/StPendSched", STAMUNIT_OCCURENCES, "PENDING_SCHEDULE"); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetRelativeStPendResched, STAMTYPE_COUNTER, "/TM/TimerSetRelative/StPendResched", STAMUNIT_OCCURENCES, "PENDING_RESCHEDULE"); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetRelativeStStopped, STAMTYPE_COUNTER, "/TM/TimerSetRelative/StStopped", STAMUNIT_OCCURENCES, "STOPPED"); + + STAM_REG(pVM, &pVM->tm.s.StatTimerSetRelativeVs, STAMTYPE_COUNTER, "/TM/TimerSetRelativeVs", STAMUNIT_OCCURENCES, "TMTimerSetRelative calls on virtual sync timers"); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetRelativeVsR3, STAMTYPE_PROFILE, "/TM/TimerSetRelativeVs/R3", STAMUNIT_TICKS_PER_CALL, "Profiling TMTimerSetRelative calls made in ring-3 on virtual sync timers."); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetRelativeVsRZ, STAMTYPE_PROFILE, "/TM/TimerSetRelativeVs/RZ", STAMUNIT_TICKS_PER_CALL, "Profiling TMTimerSetReltaive calls made in ring-0 / RC on virtual sync timers."); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetRelativeVsStActive, STAMTYPE_COUNTER, "/TM/TimerSetRelativeVs/StActive", STAMUNIT_OCCURENCES, "ACTIVE"); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetRelativeVsStExpDeliver, STAMTYPE_COUNTER, "/TM/TimerSetRelativeVs/StExpDeliver", STAMUNIT_OCCURENCES, "EXPIRED_DELIVER"); + STAM_REG(pVM, &pVM->tm.s.StatTimerSetRelativeVsStStopped, STAMTYPE_COUNTER, "/TM/TimerSetRelativeVs/StStopped", STAMUNIT_OCCURENCES, "STOPPED"); + + STAM_REG(pVM, &pVM->tm.s.StatTimerStopR3, STAMTYPE_PROFILE, "/TM/TimerStopR3", STAMUNIT_TICKS_PER_CALL, "Profiling TMTimerStop calls made in ring-3."); + STAM_REG(pVM, &pVM->tm.s.StatTimerStopRZ, STAMTYPE_PROFILE, "/TM/TimerStopRZ", STAMUNIT_TICKS_PER_CALL, "Profiling TMTimerStop calls made in ring-0 / RC."); + + STAM_REG(pVM, &pVM->tm.s.StatVirtualGet, STAMTYPE_COUNTER, "/TM/VirtualGet", STAMUNIT_OCCURENCES, "The number of times TMTimerGet was called when the clock was running."); + STAM_REG(pVM, &pVM->tm.s.StatVirtualGetSetFF, STAMTYPE_COUNTER, "/TM/VirtualGetSetFF", STAMUNIT_OCCURENCES, "Times we set the FF when calling TMTimerGet."); + STAM_REG(pVM, &pVM->tm.s.StatVirtualSyncGet, STAMTYPE_COUNTER, "/TM/VirtualSyncGet", STAMUNIT_OCCURENCES, "The number of times tmVirtualSyncGetEx was called."); + STAM_REG(pVM, &pVM->tm.s.StatVirtualSyncGetAdjLast, STAMTYPE_COUNTER, "/TM/VirtualSyncGet/AdjLast", STAMUNIT_OCCURENCES, "Times we've adjusted against the last returned time stamp ."); + STAM_REG(pVM, &pVM->tm.s.StatVirtualSyncGetELoop, STAMTYPE_COUNTER, "/TM/VirtualSyncGet/ELoop", STAMUNIT_OCCURENCES, "Times tmVirtualSyncGetEx has given up getting a consistent virtual sync data set."); + STAM_REG(pVM, &pVM->tm.s.StatVirtualSyncGetExpired, STAMTYPE_COUNTER, "/TM/VirtualSyncGet/Expired", STAMUNIT_OCCURENCES, "Times tmVirtualSyncGetEx encountered an expired timer stopping the clock."); + STAM_REG(pVM, &pVM->tm.s.StatVirtualSyncGetLocked, STAMTYPE_COUNTER, "/TM/VirtualSyncGet/Locked", STAMUNIT_OCCURENCES, "Times we successfully acquired the lock in tmVirtualSyncGetEx."); + STAM_REG(pVM, &pVM->tm.s.StatVirtualSyncGetLockless, STAMTYPE_COUNTER, "/TM/VirtualSyncGet/Lockless", STAMUNIT_OCCURENCES, "Times tmVirtualSyncGetEx returned without needing to take the lock."); + STAM_REG(pVM, &pVM->tm.s.StatVirtualSyncGetSetFF, STAMTYPE_COUNTER, "/TM/VirtualSyncGet/SetFF", STAMUNIT_OCCURENCES, "Times we set the FF when calling tmVirtualSyncGetEx."); + STAM_REG(pVM, &pVM->tm.s.StatVirtualPause, STAMTYPE_COUNTER, "/TM/VirtualPause", STAMUNIT_OCCURENCES, "The number of times TMR3TimerPause was called."); + STAM_REG(pVM, &pVM->tm.s.StatVirtualResume, STAMTYPE_COUNTER, "/TM/VirtualResume", STAMUNIT_OCCURENCES, "The number of times TMR3TimerResume was called."); + + STAM_REG(pVM, &pVM->tm.s.StatTimerCallbackSetFF, STAMTYPE_COUNTER, "/TM/CallbackSetFF", STAMUNIT_OCCURENCES, "The number of times the timer callback set FF."); + STAM_REG(pVM, &pVM->tm.s.StatTimerCallback, STAMTYPE_COUNTER, "/TM/Callback", STAMUNIT_OCCURENCES, "The number of times the timer callback is invoked."); + + STAM_REG(pVM, &pVM->tm.s.StatTSCCatchupLE010, STAMTYPE_COUNTER, "/TM/TSC/Intercept/CatchupLE010", STAMUNIT_OCCURENCES, "In catch-up mode, 10% or lower."); + STAM_REG(pVM, &pVM->tm.s.StatTSCCatchupLE025, STAMTYPE_COUNTER, "/TM/TSC/Intercept/CatchupLE025", STAMUNIT_OCCURENCES, "In catch-up mode, 25%-11%."); + STAM_REG(pVM, &pVM->tm.s.StatTSCCatchupLE100, STAMTYPE_COUNTER, "/TM/TSC/Intercept/CatchupLE100", STAMUNIT_OCCURENCES, "In catch-up mode, 100%-26%."); + STAM_REG(pVM, &pVM->tm.s.StatTSCCatchupOther, STAMTYPE_COUNTER, "/TM/TSC/Intercept/CatchupOther", STAMUNIT_OCCURENCES, "In catch-up mode, > 100%."); + STAM_REG(pVM, &pVM->tm.s.StatTSCNotFixed, STAMTYPE_COUNTER, "/TM/TSC/Intercept/NotFixed", STAMUNIT_OCCURENCES, "TSC is not fixed, it may run at variable speed."); + STAM_REG(pVM, &pVM->tm.s.StatTSCNotTicking, STAMTYPE_COUNTER, "/TM/TSC/Intercept/NotTicking", STAMUNIT_OCCURENCES, "TSC is not ticking."); + STAM_REG(pVM, &pVM->tm.s.StatTSCSyncNotTicking, STAMTYPE_COUNTER, "/TM/TSC/Intercept/SyncNotTicking", STAMUNIT_OCCURENCES, "VirtualSync isn't ticking."); + STAM_REG(pVM, &pVM->tm.s.StatTSCWarp, STAMTYPE_COUNTER, "/TM/TSC/Intercept/Warp", STAMUNIT_OCCURENCES, "Warpdrive is active."); + STAM_REG(pVM, &pVM->tm.s.StatTSCSet, STAMTYPE_COUNTER, "/TM/TSC/Sets", STAMUNIT_OCCURENCES, "Calls to TMCpuTickSet."); + STAM_REG(pVM, &pVM->tm.s.StatTSCUnderflow, STAMTYPE_COUNTER, "/TM/TSC/Underflow", STAMUNIT_OCCURENCES, "TSC underflow; corrected with last seen value ."); + STAM_REG(pVM, &pVM->tm.s.StatVirtualPause, STAMTYPE_COUNTER, "/TM/TSC/Pause", STAMUNIT_OCCURENCES, "The number of times the TSC was paused."); + STAM_REG(pVM, &pVM->tm.s.StatVirtualResume, STAMTYPE_COUNTER, "/TM/TSC/Resume", STAMUNIT_OCCURENCES, "The number of times the TSC was resumed."); +#endif /* VBOX_WITH_STATISTICS */ + + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + STAMR3RegisterF(pVM, &pVM->aCpus[i].tm.s.offTSCRawSrc, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS, "TSC offset relative the raw source", "/TM/TSC/offCPU%u", i); +#ifndef VBOX_WITHOUT_NS_ACCOUNTING +# if defined(VBOX_WITH_STATISTICS) || defined(VBOX_WITH_NS_ACCOUNTING_STATS) + STAMR3RegisterF(pVM, &pVM->aCpus[i].tm.s.StatNsTotal, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_NS, "Resettable: Total CPU run time.", "/TM/CPU/%02u", i); + STAMR3RegisterF(pVM, &pVM->aCpus[i].tm.s.StatNsExecuting, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_NS_PER_OCCURENCE, "Resettable: Time spent executing guest code.", "/TM/CPU/%02u/PrfExecuting", i); + STAMR3RegisterF(pVM, &pVM->aCpus[i].tm.s.StatNsExecLong, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_NS_PER_OCCURENCE, "Resettable: Time spent executing guest code - long hauls.", "/TM/CPU/%02u/PrfExecLong", i); + STAMR3RegisterF(pVM, &pVM->aCpus[i].tm.s.StatNsExecShort, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_NS_PER_OCCURENCE, "Resettable: Time spent executing guest code - short stretches.", "/TM/CPU/%02u/PrfExecShort", i); + STAMR3RegisterF(pVM, &pVM->aCpus[i].tm.s.StatNsExecTiny, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_NS_PER_OCCURENCE, "Resettable: Time spent executing guest code - tiny bits.", "/TM/CPU/%02u/PrfExecTiny", i); + STAMR3RegisterF(pVM, &pVM->aCpus[i].tm.s.StatNsHalted, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_NS_PER_OCCURENCE, "Resettable: Time spent halted.", "/TM/CPU/%02u/PrfHalted", i); + STAMR3RegisterF(pVM, &pVM->aCpus[i].tm.s.StatNsOther, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_NS_PER_OCCURENCE, "Resettable: Time spent in the VMM or preempted.", "/TM/CPU/%02u/PrfOther", i); +# endif + STAMR3RegisterF(pVM, &pVM->aCpus[i].tm.s.cNsTotal, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_NS, "Total CPU run time.", "/TM/CPU/%02u/cNsTotal", i); + STAMR3RegisterF(pVM, &pVM->aCpus[i].tm.s.cNsExecuting, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_NS, "Time spent executing guest code.", "/TM/CPU/%02u/cNsExecuting", i); + STAMR3RegisterF(pVM, &pVM->aCpus[i].tm.s.cNsHalted, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_NS, "Time spent halted.", "/TM/CPU/%02u/cNsHalted", i); + STAMR3RegisterF(pVM, &pVM->aCpus[i].tm.s.cNsOther, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_NS, "Time spent in the VMM or preempted.", "/TM/CPU/%02u/cNsOther", i); + STAMR3RegisterF(pVM, &pVM->aCpus[i].tm.s.cPeriodsExecuting, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT, "Times executed guest code.", "/TM/CPU/%02u/cPeriodsExecuting", i); + STAMR3RegisterF(pVM, &pVM->aCpus[i].tm.s.cPeriodsHalted, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT, "Times halted.", "/TM/CPU/%02u/cPeriodsHalted", i); + STAMR3RegisterF(pVM, &pVM->aCpus[i].tm.s.CpuLoad.cPctExecuting, STAMTYPE_U8, STAMVISIBILITY_ALWAYS, STAMUNIT_PCT, "Time spent executing guest code recently.", "/TM/CPU/%02u/pctExecuting", i); + STAMR3RegisterF(pVM, &pVM->aCpus[i].tm.s.CpuLoad.cPctHalted, STAMTYPE_U8, STAMVISIBILITY_ALWAYS, STAMUNIT_PCT, "Time spent halted recently.", "/TM/CPU/%02u/pctHalted", i); + STAMR3RegisterF(pVM, &pVM->aCpus[i].tm.s.CpuLoad.cPctOther, STAMTYPE_U8, STAMVISIBILITY_ALWAYS, STAMUNIT_PCT, "Time spent in the VMM or preempted recently.", "/TM/CPU/%02u/pctOther", i); +#endif + } +#ifndef VBOX_WITHOUT_NS_ACCOUNTING + STAMR3RegisterF(pVM, &pVM->tm.s.CpuLoad.cPctExecuting, STAMTYPE_U8, STAMVISIBILITY_ALWAYS, STAMUNIT_PCT, "Time spent executing guest code recently.", "/TM/CPU/pctExecuting"); + STAMR3RegisterF(pVM, &pVM->tm.s.CpuLoad.cPctHalted, STAMTYPE_U8, STAMVISIBILITY_ALWAYS, STAMUNIT_PCT, "Time spent halted recently.", "/TM/CPU/pctHalted"); + STAMR3RegisterF(pVM, &pVM->tm.s.CpuLoad.cPctOther, STAMTYPE_U8, STAMVISIBILITY_ALWAYS, STAMUNIT_PCT, "Time spent in the VMM or preempted recently.", "/TM/CPU/pctOther"); +#endif + +#ifdef VBOX_WITH_STATISTICS + STAM_REG(pVM, &pVM->tm.s.StatVirtualSyncCatchup, STAMTYPE_PROFILE_ADV, "/TM/VirtualSync/CatchUp", STAMUNIT_TICKS_PER_OCCURENCE, "Counting and measuring the times spent catching up."); + STAM_REG(pVM, (void *)&pVM->tm.s.fVirtualSyncCatchUp, STAMTYPE_U8, "/TM/VirtualSync/CatchUpActive", STAMUNIT_NONE, "Catch-Up active indicator."); + STAM_REG(pVM, (void *)&pVM->tm.s.u32VirtualSyncCatchUpPercentage, STAMTYPE_U32, "/TM/VirtualSync/CatchUpPercentage", STAMUNIT_PCT, "The catch-up percentage. (+100/100 to get clock multiplier)"); + STAM_REG(pVM, &pVM->tm.s.StatVirtualSyncFF, STAMTYPE_PROFILE, "/TM/VirtualSync/FF", STAMUNIT_TICKS_PER_OCCURENCE, "Time spent in TMR3VirtualSyncFF by all but the dedicate timer EMT."); + STAM_REG(pVM, &pVM->tm.s.StatVirtualSyncGiveUp, STAMTYPE_COUNTER, "/TM/VirtualSync/GiveUp", STAMUNIT_OCCURENCES, "Times the catch-up was abandoned."); + STAM_REG(pVM, &pVM->tm.s.StatVirtualSyncGiveUpBeforeStarting, STAMTYPE_COUNTER, "/TM/VirtualSync/GiveUpBeforeStarting",STAMUNIT_OCCURENCES, "Times the catch-up was abandoned before even starting. (Typically debugging++.)"); + STAM_REG(pVM, &pVM->tm.s.StatVirtualSyncRun, STAMTYPE_COUNTER, "/TM/VirtualSync/Run", STAMUNIT_OCCURENCES, "Times the virtual sync timer queue was considered."); + STAM_REG(pVM, &pVM->tm.s.StatVirtualSyncRunRestart, STAMTYPE_COUNTER, "/TM/VirtualSync/Run/Restarts", STAMUNIT_OCCURENCES, "Times the clock was restarted after a run."); + STAM_REG(pVM, &pVM->tm.s.StatVirtualSyncRunStop, STAMTYPE_COUNTER, "/TM/VirtualSync/Run/Stop", STAMUNIT_OCCURENCES, "Times the clock was stopped when calculating the current time before examining the timers."); + STAM_REG(pVM, &pVM->tm.s.StatVirtualSyncRunStoppedAlready, STAMTYPE_COUNTER, "/TM/VirtualSync/Run/StoppedAlready", STAMUNIT_OCCURENCES, "Times the clock was already stopped elsewhere (TMVirtualSyncGet)."); + STAM_REG(pVM, &pVM->tm.s.StatVirtualSyncRunSlack, STAMTYPE_PROFILE, "/TM/VirtualSync/Run/Slack", STAMUNIT_NS_PER_OCCURENCE, "The scheduling slack. (Catch-up handed out when running timers.)"); + for (unsigned i = 0; i < RT_ELEMENTS(pVM->tm.s.aVirtualSyncCatchUpPeriods); i++) + { + STAMR3RegisterF(pVM, &pVM->tm.s.aVirtualSyncCatchUpPeriods[i].u32Percentage, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_PCT, "The catch-up percentage.", "/TM/VirtualSync/Periods/%u", i); + STAMR3RegisterF(pVM, &pVM->tm.s.aStatVirtualSyncCatchupAdjust[i], STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Times adjusted to this period.", "/TM/VirtualSync/Periods/%u/Adjust", i); + STAMR3RegisterF(pVM, &pVM->tm.s.aStatVirtualSyncCatchupInitial[i], STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Times started in this period.", "/TM/VirtualSync/Periods/%u/Initial", i); + STAMR3RegisterF(pVM, &pVM->tm.s.aVirtualSyncCatchUpPeriods[i].u64Start, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_NS, "Start of this period (lag).", "/TM/VirtualSync/Periods/%u/Start", i); + } +#endif /* VBOX_WITH_STATISTICS */ + + /* + * Register info handlers. + */ + DBGFR3InfoRegisterInternalEx(pVM, "timers", "Dumps all timers. No arguments.", tmR3TimerInfo, DBGFINFO_FLAGS_RUN_ON_EMT); + DBGFR3InfoRegisterInternalEx(pVM, "activetimers", "Dumps active all timers. No arguments.", tmR3TimerInfoActive, DBGFINFO_FLAGS_RUN_ON_EMT); + DBGFR3InfoRegisterInternalEx(pVM, "clocks", "Display the time of the various clocks.", tmR3InfoClocks, DBGFINFO_FLAGS_RUN_ON_EMT); + + return VINF_SUCCESS; +} + + +/** + * Checks if the host CPU has a fixed TSC frequency. + * + * @returns true if it has, false if it hasn't. + * + * @remarks This test doesn't bother with very old CPUs that don't do power + * management or any other stuff that might influence the TSC rate. + * This isn't currently relevant. + */ +static bool tmR3HasFixedTSC(PVM pVM) +{ + /* + * ASSUME that if the GIP is in invariant TSC mode, it's because the CPU + * actually has invariant TSC. + */ + PSUPGLOBALINFOPAGE pGip = g_pSUPGlobalInfoPage; + if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC) + return true; + + /* + * Go by features and model info from the CPUID instruction. + */ + if (ASMHasCpuId()) + { + uint32_t uEAX, uEBX, uECX, uEDX; + + /* + * By feature. (Used to be AMD specific, intel seems to have picked it up.) + */ + ASMCpuId(0x80000000, &uEAX, &uEBX, &uECX, &uEDX); + if (uEAX >= 0x80000007 && ASMIsValidExtRange(uEAX)) + { + ASMCpuId(0x80000007, &uEAX, &uEBX, &uECX, &uEDX); + if ( (uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR) /* TscInvariant */ + && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC) /* No fixed tsc if the gip timer is in async mode. */ + return true; + } + + /* + * By model. + */ + if (CPUMGetHostCpuVendor(pVM) == CPUMCPUVENDOR_AMD) + { + /* + * AuthenticAMD - Check for APM support and that TscInvariant is set. + * + * This test isn't correct with respect to fixed/non-fixed TSC and + * older models, but this isn't relevant since the result is currently + * only used for making a decision on AMD-V models. + */ +#if 0 /* Promoted to generic */ + ASMCpuId(0x80000000, &uEAX, &uEBX, &uECX, &uEDX); + if (uEAX >= 0x80000007) + { + ASMCpuId(0x80000007, &uEAX, &uEBX, &uECX, &uEDX); + if ( (uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR) /* TscInvariant */ + && ( pGip->u32Mode == SUPGIPMODE_SYNC_TSC /* No fixed tsc if the gip timer is in async mode. */ + || pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)) + return true; + } +#endif + } + else if (CPUMGetHostCpuVendor(pVM) == CPUMCPUVENDOR_INTEL) + { + /* + * GenuineIntel - Check the model number. + * + * This test is lacking in the same way and for the same reasons + * as the AMD test above. + */ + /** @todo use ASMGetCpuFamily() and ASMGetCpuModel() here. */ + ASMCpuId(1, &uEAX, &uEBX, &uECX, &uEDX); + unsigned uModel = (uEAX >> 4) & 0x0f; + unsigned uFamily = (uEAX >> 8) & 0x0f; + if (uFamily == 0x0f) + uFamily += (uEAX >> 20) & 0xff; + if (uFamily >= 0x06) + uModel += ((uEAX >> 16) & 0x0f) << 4; + if ( (uFamily == 0x0f /*P4*/ && uModel >= 0x03) + || (uFamily == 0x06 /*P2/P3*/ && uModel >= 0x0e)) + return true; + } + else if (CPUMGetHostCpuVendor(pVM) == CPUMCPUVENDOR_VIA) + { + /* + * CentaurHauls - Check the model, family and stepping. + * + * This only checks for VIA CPU models Nano X2, Nano X3, + * Eden X2 and QuadCore. + */ + /** @todo use ASMGetCpuFamily() and ASMGetCpuModel() here. */ + ASMCpuId(1, &uEAX, &uEBX, &uECX, &uEDX); + unsigned uStepping = (uEAX & 0x0f); + unsigned uModel = (uEAX >> 4) & 0x0f; + unsigned uFamily = (uEAX >> 8) & 0x0f; + if ( uFamily == 0x06 + && uModel == 0x0f + && uStepping >= 0x0c + && uStepping <= 0x0f) + return true; + } + else if (CPUMGetHostCpuVendor(pVM) == CPUMCPUVENDOR_SHANGHAI) + { + /* + * Shanghai - Check the model, family and stepping. + */ + /** @todo use ASMGetCpuFamily() and ASMGetCpuModel() here. */ + ASMCpuId(1, &uEAX, &uEBX, &uECX, &uEDX); + unsigned uFamily = (uEAX >> 8) & 0x0f; + if ( uFamily == 0x06 + || uFamily == 0x07) + { + return true; + } + } + } + return false; +} + + +/** + * Calibrate the CPU tick. + * + * @returns Number of ticks per second. + */ +static uint64_t tmR3CalibrateTSC(void) +{ + uint64_t u64Hz; + + /* + * Use GIP when available. Prefere the nominal one, no need to wait for it. + */ + PSUPGLOBALINFOPAGE pGip = g_pSUPGlobalInfoPage; + if (pGip) + { + u64Hz = pGip->u64CpuHz; + if (u64Hz < _1T && u64Hz > _1M) + return u64Hz; + AssertFailed(); /* This shouldn't happen. */ + + u64Hz = SUPGetCpuHzFromGip(pGip); + if (u64Hz < _1T && u64Hz > _1M) + return u64Hz; + + AssertFailed(); /* This shouldn't happen. */ + } + /* else: This should only happen in fake SUPLib mode, which we don't really support any more... */ + + /* Call this once first to make sure it's initialized. */ + RTTimeNanoTS(); + + /* + * Yield the CPU to increase our chances of getting + * a correct value. + */ + RTThreadYield(); /* Try avoid interruptions between TSC and NanoTS samplings. */ + static const unsigned s_auSleep[5] = { 50, 30, 30, 40, 40 }; + uint64_t au64Samples[5]; + unsigned i; + for (i = 0; i < RT_ELEMENTS(au64Samples); i++) + { + RTMSINTERVAL cMillies; + int cTries = 5; + uint64_t u64Start = ASMReadTSC(); + uint64_t u64End; + uint64_t StartTS = RTTimeNanoTS(); + uint64_t EndTS; + do + { + RTThreadSleep(s_auSleep[i]); + u64End = ASMReadTSC(); + EndTS = RTTimeNanoTS(); + cMillies = (RTMSINTERVAL)((EndTS - StartTS + 500000) / 1000000); + } while ( cMillies == 0 /* the sleep may be interrupted... */ + || (cMillies < 20 && --cTries > 0)); + uint64_t u64Diff = u64End - u64Start; + + au64Samples[i] = (u64Diff * 1000) / cMillies; + AssertMsg(cTries > 0, ("cMillies=%d i=%d\n", cMillies, i)); + } + + /* + * Discard the highest and lowest results and calculate the average. + */ + unsigned iHigh = 0; + unsigned iLow = 0; + for (i = 1; i < RT_ELEMENTS(au64Samples); i++) + { + if (au64Samples[i] < au64Samples[iLow]) + iLow = i; + if (au64Samples[i] > au64Samples[iHigh]) + iHigh = i; + } + au64Samples[iLow] = 0; + au64Samples[iHigh] = 0; + + u64Hz = au64Samples[0]; + for (i = 1; i < RT_ELEMENTS(au64Samples); i++) + u64Hz += au64Samples[i]; + u64Hz /= RT_ELEMENTS(au64Samples) - 2; + + return u64Hz; +} + + +/** + * Finalizes the TM initialization. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMM_INT_DECL(int) TMR3InitFinalize(PVM pVM) +{ + int rc; + + /* + * Resolve symbols. + */ + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + rc = PDMR3LdrGetSymbolRC(pVM, NULL, "tmVirtualNanoTSBad", &pVM->tm.s.VirtualGetRawDataRC.pfnBad); + AssertRCReturn(rc, rc); + rc = PDMR3LdrGetSymbolRC(pVM, NULL, "tmVirtualNanoTSBadCpuIndex", &pVM->tm.s.VirtualGetRawDataRC.pfnBadCpuIndex); + AssertRCReturn(rc, rc); + rc = PDMR3LdrGetSymbolRC(pVM, NULL, "tmVirtualNanoTSRediscover", &pVM->tm.s.VirtualGetRawDataRC.pfnRediscover); + AssertRCReturn(rc, rc); + pVM->tm.s.pfnVirtualGetRawRC = pVM->tm.s.VirtualGetRawDataRC.pfnRediscover; + } + + rc = PDMR3LdrGetSymbolR0(pVM, NULL, "tmVirtualNanoTSBad", &pVM->tm.s.VirtualGetRawDataR0.pfnBad); + AssertRCReturn(rc, rc); + rc = PDMR3LdrGetSymbolR0(pVM, NULL, "tmVirtualNanoTSBadCpuIndex", &pVM->tm.s.VirtualGetRawDataR0.pfnBadCpuIndex); + AssertRCReturn(rc, rc); + rc = PDMR3LdrGetSymbolR0(pVM, NULL, "tmVirtualNanoTSRediscover", &pVM->tm.s.VirtualGetRawDataR0.pfnRediscover); + AssertRCReturn(rc, rc); + pVM->tm.s.pfnVirtualGetRawR0 = pVM->tm.s.VirtualGetRawDataR0.pfnRediscover; + +#ifndef VBOX_WITHOUT_NS_ACCOUNTING + /* + * Create a timer for refreshing the CPU load stats. + */ + PTMTIMER pTimer; + rc = TMR3TimerCreateInternal(pVM, TMCLOCK_REAL, tmR3CpuLoadTimer, NULL, "CPU Load Timer", &pTimer); + if (RT_SUCCESS(rc)) + rc = TMTimerSetMillies(pTimer, 1000); +#endif + + /* + * GIM is now initialized. Determine if TSC mode switching is allowed (respecting CFGM override). + */ + pVM->tm.s.fTSCModeSwitchAllowed &= tmR3HasFixedTSC(pVM) && GIMIsEnabled(pVM) && !VM_IS_RAW_MODE_ENABLED(pVM); + LogRel(("TM: TMR3InitFinalize: fTSCModeSwitchAllowed=%RTbool\n", pVM->tm.s.fTSCModeSwitchAllowed)); + return rc; +} + + +/** + * Applies relocations to data and code managed by this + * component. This function will be called at init and + * whenever the VMM need to relocate it self inside the GC. + * + * @param pVM The cross context VM structure. + * @param offDelta Relocation delta relative to old location. + */ +VMM_INT_DECL(void) TMR3Relocate(PVM pVM, RTGCINTPTR offDelta) +{ + LogFlow(("TMR3Relocate\n")); + + pVM->tm.s.paTimerQueuesR0 = MMHyperR3ToR0(pVM, pVM->tm.s.paTimerQueuesR3); + + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + pVM->tm.s.pvGIPRC = MMHyperR3ToRC(pVM, pVM->tm.s.pvGIPR3); + pVM->tm.s.paTimerQueuesRC = MMHyperR3ToRC(pVM, pVM->tm.s.paTimerQueuesR3); + pVM->tm.s.VirtualGetRawDataRC.pu64Prev += offDelta; + pVM->tm.s.VirtualGetRawDataRC.pfnBad += offDelta; + pVM->tm.s.VirtualGetRawDataRC.pfnBadCpuIndex += offDelta; + pVM->tm.s.VirtualGetRawDataRC.pfnRediscover += offDelta; + pVM->tm.s.pfnVirtualGetRawRC += offDelta; + } + + /* + * Iterate the timers updating the pVMRC pointers. + */ + for (PTMTIMER pTimer = pVM->tm.s.pCreated; pTimer; pTimer = pTimer->pBigNext) + { + pTimer->pVMRC = pVM->pVMRC; + pTimer->pVMR0 = pVM->pVMR0; + } +} + + +/** + * Terminates the TM. + * + * Termination means cleaning up and freeing all resources, + * the VM it self is at this point powered off or suspended. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMM_INT_DECL(int) TMR3Term(PVM pVM) +{ + AssertMsg(pVM->tm.s.offVM, ("bad init order!\n")); + if (pVM->tm.s.pTimer) + { + int rc = RTTimerDestroy(pVM->tm.s.pTimer); + AssertRC(rc); + pVM->tm.s.pTimer = NULL; + } + + return VINF_SUCCESS; +} + + +/** + * The VM is being reset. + * + * For the TM component this means that a rescheduling is preformed, + * the FF is cleared and but without running the queues. We'll have to + * check if this makes sense or not, but it seems like a good idea now.... + * + * @param pVM The cross context VM structure. + */ +VMM_INT_DECL(void) TMR3Reset(PVM pVM) +{ + LogFlow(("TMR3Reset:\n")); + VM_ASSERT_EMT(pVM); + TM_LOCK_TIMERS(pVM); + + /* + * Abort any pending catch up. + * This isn't perfect... + */ + if (pVM->tm.s.fVirtualSyncCatchUp) + { + const uint64_t offVirtualNow = TMVirtualGetNoCheck(pVM); + const uint64_t offVirtualSyncNow = TMVirtualSyncGetNoCheck(pVM); + if (pVM->tm.s.fVirtualSyncCatchUp) + { + STAM_PROFILE_ADV_STOP(&pVM->tm.s.StatVirtualSyncCatchup, c); + + const uint64_t offOld = pVM->tm.s.offVirtualSyncGivenUp; + const uint64_t offNew = offVirtualNow - offVirtualSyncNow; + Assert(offOld <= offNew); + ASMAtomicWriteU64((uint64_t volatile *)&pVM->tm.s.offVirtualSyncGivenUp, offNew); + ASMAtomicWriteU64((uint64_t volatile *)&pVM->tm.s.offVirtualSync, offNew); + ASMAtomicWriteBool(&pVM->tm.s.fVirtualSyncCatchUp, false); + LogRel(("TM: Aborting catch-up attempt on reset with a %'RU64 ns lag on reset; new total: %'RU64 ns\n", offNew - offOld, offNew)); + } + } + + /* + * Process the queues. + */ + for (int i = 0; i < TMCLOCK_MAX; i++) + tmTimerQueueSchedule(pVM, &pVM->tm.s.paTimerQueuesR3[i]); +#ifdef VBOX_STRICT + tmTimerQueuesSanityChecks(pVM, "TMR3Reset"); +#endif + + PVMCPU pVCpuDst = &pVM->aCpus[pVM->tm.s.idTimerCpu]; + VMCPU_FF_CLEAR(pVCpuDst, VMCPU_FF_TIMER); /** @todo FIXME: this isn't right. */ + + /* + * Switch TM TSC mode back to the original mode after a reset for + * paravirtualized guests that alter the TM TSC mode during operation. + */ + if ( pVM->tm.s.fTSCModeSwitchAllowed + && pVM->tm.s.enmTSCMode != pVM->tm.s.enmOriginalTSCMode) + { + VM_ASSERT_EMT0(pVM); + tmR3CpuTickParavirtDisable(pVM, &pVM->aCpus[0], NULL /* pvData */); + } + Assert(!GIMIsParavirtTscEnabled(pVM)); + pVM->tm.s.fParavirtTscEnabled = false; + + /* + * Reset TSC to avoid a Windows 8+ bug (see @bugref{8926}). If Windows + * sees TSC value beyond 0x40000000000 at startup, it will reset the + * TSC on boot-up CPU only, causing confusion and mayhem with SMP. + */ + VM_ASSERT_EMT0(pVM); + uint64_t offTscRawSrc; + switch (pVM->tm.s.enmTSCMode) + { + case TMTSCMODE_REAL_TSC_OFFSET: + offTscRawSrc = SUPReadTsc(); + break; + case TMTSCMODE_DYNAMIC: + case TMTSCMODE_VIRT_TSC_EMULATED: + offTscRawSrc = TMVirtualSyncGetNoCheck(pVM); + offTscRawSrc = ASMMultU64ByU32DivByU32(offTscRawSrc, pVM->tm.s.cTSCTicksPerSecond, TMCLOCK_FREQ_VIRTUAL); + break; + case TMTSCMODE_NATIVE_API: + /** @todo NEM TSC reset on reset for Windows8+ bug workaround. */ + offTscRawSrc = 0; + break; + default: + AssertFailedBreakStmt(offTscRawSrc = 0); + } + for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++) + { + pVM->aCpus[iCpu].tm.s.offTSCRawSrc = offTscRawSrc; + pVM->aCpus[iCpu].tm.s.u64TSC = 0; + pVM->aCpus[iCpu].tm.s.u64TSCLastSeen = 0; + } + + TM_UNLOCK_TIMERS(pVM); +} + + +/** + * Resolve a builtin RC symbol. + * Called by PDM when loading or relocating GC modules. + * + * @returns VBox status + * @param pVM The cross context VM structure. + * @param pszSymbol Symbol to resolve. + * @param pRCPtrValue Where to store the symbol value. + * @remark This has to work before TMR3Relocate() is called. + */ +VMM_INT_DECL(int) TMR3GetImportRC(PVM pVM, const char *pszSymbol, PRTRCPTR pRCPtrValue) +{ + if (!strcmp(pszSymbol, "g_pSUPGlobalInfoPage")) + *pRCPtrValue = MMHyperR3ToRC(pVM, &pVM->tm.s.pvGIPRC); + //else if (..) + else + return VERR_SYMBOL_NOT_FOUND; + return VINF_SUCCESS; +} + + +/** + * Execute state save operation. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM SSM operation handle. + */ +static DECLCALLBACK(int) tmR3Save(PVM pVM, PSSMHANDLE pSSM) +{ + LogFlow(("tmR3Save:\n")); +#ifdef VBOX_STRICT + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + Assert(!pVCpu->tm.s.fTSCTicking); + } + Assert(!pVM->tm.s.cVirtualTicking); + Assert(!pVM->tm.s.fVirtualSyncTicking); + Assert(!pVM->tm.s.cTSCsTicking); +#endif + + /* + * Save the virtual clocks. + */ + /* the virtual clock. */ + SSMR3PutU64(pSSM, TMCLOCK_FREQ_VIRTUAL); + SSMR3PutU64(pSSM, pVM->tm.s.u64Virtual); + + /* the virtual timer synchronous clock. */ + SSMR3PutU64(pSSM, pVM->tm.s.u64VirtualSync); + SSMR3PutU64(pSSM, pVM->tm.s.offVirtualSync); + SSMR3PutU64(pSSM, pVM->tm.s.offVirtualSyncGivenUp); + SSMR3PutU64(pSSM, pVM->tm.s.u64VirtualSyncCatchUpPrev); + SSMR3PutBool(pSSM, pVM->tm.s.fVirtualSyncCatchUp); + + /* real time clock */ + SSMR3PutU64(pSSM, TMCLOCK_FREQ_REAL); + + /* the cpu tick clock. */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + SSMR3PutU64(pSSM, TMCpuTickGet(pVCpu)); + } + return SSMR3PutU64(pSSM, pVM->tm.s.cTSCTicksPerSecond); +} + + +/** + * Execute state load operation. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM SSM operation handle. + * @param uVersion Data layout version. + * @param uPass The data pass. + */ +static DECLCALLBACK(int) tmR3Load(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + LogFlow(("tmR3Load:\n")); + + Assert(uPass == SSM_PASS_FINAL); NOREF(uPass); +#ifdef VBOX_STRICT + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + Assert(!pVCpu->tm.s.fTSCTicking); + } + Assert(!pVM->tm.s.cVirtualTicking); + Assert(!pVM->tm.s.fVirtualSyncTicking); + Assert(!pVM->tm.s.cTSCsTicking); +#endif + + /* + * Validate version. + */ + if (uVersion != TM_SAVED_STATE_VERSION) + { + AssertMsgFailed(("tmR3Load: Invalid version uVersion=%d!\n", uVersion)); + return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION; + } + + /* + * Load the virtual clock. + */ + pVM->tm.s.cVirtualTicking = 0; + /* the virtual clock. */ + uint64_t u64Hz; + int rc = SSMR3GetU64(pSSM, &u64Hz); + if (RT_FAILURE(rc)) + return rc; + if (u64Hz != TMCLOCK_FREQ_VIRTUAL) + { + AssertMsgFailed(("The virtual clock frequency differs! Saved: %'RU64 Binary: %'RU64\n", + u64Hz, TMCLOCK_FREQ_VIRTUAL)); + return VERR_SSM_VIRTUAL_CLOCK_HZ; + } + SSMR3GetU64(pSSM, &pVM->tm.s.u64Virtual); + pVM->tm.s.u64VirtualOffset = 0; + + /* the virtual timer synchronous clock. */ + pVM->tm.s.fVirtualSyncTicking = false; + uint64_t u64; + SSMR3GetU64(pSSM, &u64); + pVM->tm.s.u64VirtualSync = u64; + SSMR3GetU64(pSSM, &u64); + pVM->tm.s.offVirtualSync = u64; + SSMR3GetU64(pSSM, &u64); + pVM->tm.s.offVirtualSyncGivenUp = u64; + SSMR3GetU64(pSSM, &u64); + pVM->tm.s.u64VirtualSyncCatchUpPrev = u64; + bool f; + SSMR3GetBool(pSSM, &f); + pVM->tm.s.fVirtualSyncCatchUp = f; + + /* the real clock */ + rc = SSMR3GetU64(pSSM, &u64Hz); + if (RT_FAILURE(rc)) + return rc; + if (u64Hz != TMCLOCK_FREQ_REAL) + { + AssertMsgFailed(("The real clock frequency differs! Saved: %'RU64 Binary: %'RU64\n", + u64Hz, TMCLOCK_FREQ_REAL)); + return VERR_SSM_VIRTUAL_CLOCK_HZ; /* misleading... */ + } + + /* the cpu tick clock. */ + pVM->tm.s.cTSCsTicking = 0; + pVM->tm.s.offTSCPause = 0; + pVM->tm.s.u64LastPausedTSC = 0; + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + + pVCpu->tm.s.fTSCTicking = false; + SSMR3GetU64(pSSM, &pVCpu->tm.s.u64TSC); + if (pVM->tm.s.u64LastPausedTSC < pVCpu->tm.s.u64TSC) + pVM->tm.s.u64LastPausedTSC = pVCpu->tm.s.u64TSC; + + if (pVM->tm.s.enmTSCMode == TMTSCMODE_REAL_TSC_OFFSET) + pVCpu->tm.s.offTSCRawSrc = 0; /** @todo TSC restore stuff and HWACC. */ + } + + rc = SSMR3GetU64(pSSM, &u64Hz); + if (RT_FAILURE(rc)) + return rc; + if (pVM->tm.s.enmTSCMode != TMTSCMODE_REAL_TSC_OFFSET) + pVM->tm.s.cTSCTicksPerSecond = u64Hz; + + LogRel(("TM: cTSCTicksPerSecond=%#RX64 (%'RU64) enmTSCMode=%d (%s) (state load)\n", + pVM->tm.s.cTSCTicksPerSecond, pVM->tm.s.cTSCTicksPerSecond, pVM->tm.s.enmTSCMode, tmR3GetTSCModeName(pVM))); + + /* Disabled as this isn't tested, also should this apply only if GIM is enabled etc. */ +#if 0 + /* + * If the current host TSC frequency is incompatible with what is in the + * saved state of the VM, fall back to emulating TSC and disallow TSC mode + * switches during VM runtime (e.g. by GIM). + */ + if ( GIMIsEnabled(pVM) + || pVM->tm.s.enmTSCMode == TMTSCMODE_REAL_TSC_OFFSET) + { + uint64_t uGipCpuHz; + bool fRelax = RTSystemIsInsideVM(); + bool fCompat = SUPIsTscFreqCompatible(pVM->tm.s.cTSCTicksPerSecond, &uGipCpuHz, fRelax); + if (!fCompat) + { + pVM->tm.s.enmTSCMode = TMTSCMODE_VIRT_TSC_EMULATED; + pVM->tm.s.fTSCModeSwitchAllowed = false; + if (g_pSUPGlobalInfoPage->u32Mode != SUPGIPMODE_ASYNC_TSC) + { + LogRel(("TM: TSC frequency incompatible! uGipCpuHz=%#RX64 (%'RU64) enmTSCMode=%d (%s) fTSCModeSwitchAllowed=%RTbool (state load)\n", + uGipCpuHz, uGipCpuHz, pVM->tm.s.enmTSCMode, tmR3GetTSCModeName(pVM), pVM->tm.s.fTSCModeSwitchAllowed)); + } + else + { + LogRel(("TM: GIP is async, enmTSCMode=%d (%s) fTSCModeSwitchAllowed=%RTbool (state load)\n", + uGipCpuHz, uGipCpuHz, pVM->tm.s.enmTSCMode, tmR3GetTSCModeName(pVM), pVM->tm.s.fTSCModeSwitchAllowed)); + } + } + } +#endif + + /* + * Make sure timers get rescheduled immediately. + */ + PVMCPU pVCpuDst = &pVM->aCpus[pVM->tm.s.idTimerCpu]; + VMCPU_FF_SET(pVCpuDst, VMCPU_FF_TIMER); + + return VINF_SUCCESS; +} + + +/** + * Internal TMR3TimerCreate worker. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param enmClock The timer clock. + * @param pszDesc The timer description. + * @param ppTimer Where to store the timer pointer on success. + */ +static int tmr3TimerCreate(PVM pVM, TMCLOCK enmClock, const char *pszDesc, PPTMTIMERR3 ppTimer) +{ + VM_ASSERT_EMT(pVM); + + /* + * Allocate the timer. + */ + PTMTIMERR3 pTimer = NULL; + if (pVM->tm.s.pFree && VM_IS_EMT(pVM)) + { + pTimer = pVM->tm.s.pFree; + pVM->tm.s.pFree = pTimer->pBigNext; + Log3(("TM: Recycling timer %p, new free head %p.\n", pTimer, pTimer->pBigNext)); + } + + if (!pTimer) + { + int rc = MMHyperAlloc(pVM, sizeof(*pTimer), 0, MM_TAG_TM, (void **)&pTimer); + if (RT_FAILURE(rc)) + return rc; + Log3(("TM: Allocated new timer %p\n", pTimer)); + } + + /* + * Initialize it. + */ + pTimer->u64Expire = 0; + pTimer->enmClock = enmClock; + pTimer->pVMR3 = pVM; + pTimer->pVMR0 = pVM->pVMR0; + pTimer->pVMRC = pVM->pVMRC; + pTimer->enmState = TMTIMERSTATE_STOPPED; + pTimer->offScheduleNext = 0; + pTimer->offNext = 0; + pTimer->offPrev = 0; + pTimer->pvUser = NULL; + pTimer->pCritSect = NULL; + pTimer->pszDesc = pszDesc; + + /* insert into the list of created timers. */ + TM_LOCK_TIMERS(pVM); + pTimer->pBigPrev = NULL; + pTimer->pBigNext = pVM->tm.s.pCreated; + pVM->tm.s.pCreated = pTimer; + if (pTimer->pBigNext) + pTimer->pBigNext->pBigPrev = pTimer; +#ifdef VBOX_STRICT + tmTimerQueuesSanityChecks(pVM, "tmR3TimerCreate"); +#endif + TM_UNLOCK_TIMERS(pVM); + + *ppTimer = pTimer; + return VINF_SUCCESS; +} + + +/** + * Creates a device timer. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDevIns Device instance. + * @param enmClock The clock to use on this timer. + * @param pfnCallback Callback function. + * @param pvUser The user argument to the callback. + * @param fFlags Timer creation flags, see grp_tm_timer_flags. + * @param pszDesc Pointer to description string which must stay around + * until the timer is fully destroyed (i.e. a bit after TMTimerDestroy()). + * @param ppTimer Where to store the timer on success. + */ +VMM_INT_DECL(int) TMR3TimerCreateDevice(PVM pVM, PPDMDEVINS pDevIns, TMCLOCK enmClock, + PFNTMTIMERDEV pfnCallback, void *pvUser, + uint32_t fFlags, const char *pszDesc, PPTMTIMERR3 ppTimer) +{ + AssertReturn(!(fFlags & ~(TMTIMER_FLAGS_NO_CRIT_SECT)), VERR_INVALID_PARAMETER); + + /* + * Allocate and init stuff. + */ + int rc = tmr3TimerCreate(pVM, enmClock, pszDesc, ppTimer); + if (RT_SUCCESS(rc)) + { + (*ppTimer)->enmType = TMTIMERTYPE_DEV; + (*ppTimer)->u.Dev.pfnTimer = pfnCallback; + (*ppTimer)->u.Dev.pDevIns = pDevIns; + (*ppTimer)->pvUser = pvUser; + if (!(fFlags & TMTIMER_FLAGS_NO_CRIT_SECT)) + (*ppTimer)->pCritSect = PDMR3DevGetCritSect(pVM, pDevIns); + Log(("TM: Created device timer %p clock %d callback %p '%s'\n", (*ppTimer), enmClock, pfnCallback, pszDesc)); + } + + return rc; +} + + + + +/** + * Creates a USB device timer. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pUsbIns The USB device instance. + * @param enmClock The clock to use on this timer. + * @param pfnCallback Callback function. + * @param pvUser The user argument to the callback. + * @param fFlags Timer creation flags, see grp_tm_timer_flags. + * @param pszDesc Pointer to description string which must stay around + * until the timer is fully destroyed (i.e. a bit after TMTimerDestroy()). + * @param ppTimer Where to store the timer on success. + */ +VMM_INT_DECL(int) TMR3TimerCreateUsb(PVM pVM, PPDMUSBINS pUsbIns, TMCLOCK enmClock, + PFNTMTIMERUSB pfnCallback, void *pvUser, + uint32_t fFlags, const char *pszDesc, PPTMTIMERR3 ppTimer) +{ + AssertReturn(!(fFlags & ~(TMTIMER_FLAGS_NO_CRIT_SECT)), VERR_INVALID_PARAMETER); + + /* + * Allocate and init stuff. + */ + int rc = tmr3TimerCreate(pVM, enmClock, pszDesc, ppTimer); + if (RT_SUCCESS(rc)) + { + (*ppTimer)->enmType = TMTIMERTYPE_USB; + (*ppTimer)->u.Usb.pfnTimer = pfnCallback; + (*ppTimer)->u.Usb.pUsbIns = pUsbIns; + (*ppTimer)->pvUser = pvUser; + //if (!(fFlags & TMTIMER_FLAGS_NO_CRIT_SECT)) + //{ + // if (pDevIns->pCritSectR3) + // (*ppTimer)->pCritSect = pUsbIns->pCritSectR3; + // else + // (*ppTimer)->pCritSect = IOMR3GetCritSect(pVM); + //} + Log(("TM: Created USB device timer %p clock %d callback %p '%s'\n", (*ppTimer), enmClock, pfnCallback, pszDesc)); + } + + return rc; +} + + +/** + * Creates a driver timer. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDrvIns Driver instance. + * @param enmClock The clock to use on this timer. + * @param pfnCallback Callback function. + * @param pvUser The user argument to the callback. + * @param fFlags Timer creation flags, see grp_tm_timer_flags. + * @param pszDesc Pointer to description string which must stay around + * until the timer is fully destroyed (i.e. a bit after TMTimerDestroy()). + * @param ppTimer Where to store the timer on success. + */ +VMM_INT_DECL(int) TMR3TimerCreateDriver(PVM pVM, PPDMDRVINS pDrvIns, TMCLOCK enmClock, PFNTMTIMERDRV pfnCallback, void *pvUser, + uint32_t fFlags, const char *pszDesc, PPTMTIMERR3 ppTimer) +{ + AssertReturn(!(fFlags & ~(TMTIMER_FLAGS_NO_CRIT_SECT)), VERR_INVALID_PARAMETER); + + /* + * Allocate and init stuff. + */ + int rc = tmr3TimerCreate(pVM, enmClock, pszDesc, ppTimer); + if (RT_SUCCESS(rc)) + { + (*ppTimer)->enmType = TMTIMERTYPE_DRV; + (*ppTimer)->u.Drv.pfnTimer = pfnCallback; + (*ppTimer)->u.Drv.pDrvIns = pDrvIns; + (*ppTimer)->pvUser = pvUser; + Log(("TM: Created device timer %p clock %d callback %p '%s'\n", (*ppTimer), enmClock, pfnCallback, pszDesc)); + } + + return rc; +} + + +/** + * Creates an internal timer. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param enmClock The clock to use on this timer. + * @param pfnCallback Callback function. + * @param pvUser User argument to be passed to the callback. + * @param pszDesc Pointer to description string which must stay around + * until the timer is fully destroyed (i.e. a bit after TMTimerDestroy()). + * @param ppTimer Where to store the timer on success. + */ +VMMR3DECL(int) TMR3TimerCreateInternal(PVM pVM, TMCLOCK enmClock, PFNTMTIMERINT pfnCallback, void *pvUser, const char *pszDesc, PPTMTIMERR3 ppTimer) +{ + /* + * Allocate and init stuff. + */ + PTMTIMER pTimer; + int rc = tmr3TimerCreate(pVM, enmClock, pszDesc, &pTimer); + if (RT_SUCCESS(rc)) + { + pTimer->enmType = TMTIMERTYPE_INTERNAL; + pTimer->u.Internal.pfnTimer = pfnCallback; + pTimer->pvUser = pvUser; + *ppTimer = pTimer; + Log(("TM: Created internal timer %p clock %d callback %p '%s'\n", pTimer, enmClock, pfnCallback, pszDesc)); + } + + return rc; +} + +/** + * Creates an external timer. + * + * @returns Timer handle on success. + * @returns NULL on failure. + * @param pVM The cross context VM structure. + * @param enmClock The clock to use on this timer. + * @param pfnCallback Callback function. + * @param pvUser User argument. + * @param pszDesc Pointer to description string which must stay around + * until the timer is fully destroyed (i.e. a bit after TMTimerDestroy()). + */ +VMMR3DECL(PTMTIMERR3) TMR3TimerCreateExternal(PVM pVM, TMCLOCK enmClock, PFNTMTIMEREXT pfnCallback, void *pvUser, const char *pszDesc) +{ + /* + * Allocate and init stuff. + */ + PTMTIMERR3 pTimer; + int rc = tmr3TimerCreate(pVM, enmClock, pszDesc, &pTimer); + if (RT_SUCCESS(rc)) + { + pTimer->enmType = TMTIMERTYPE_EXTERNAL; + pTimer->u.External.pfnTimer = pfnCallback; + pTimer->pvUser = pvUser; + Log(("TM: Created external timer %p clock %d callback %p '%s'\n", pTimer, enmClock, pfnCallback, pszDesc)); + return pTimer; + } + + return NULL; +} + + +/** + * Destroy a timer + * + * @returns VBox status code. + * @param pTimer Timer handle as returned by one of the create functions. + */ +VMMR3DECL(int) TMR3TimerDestroy(PTMTIMER pTimer) +{ + /* + * Be extra careful here. + */ + if (!pTimer) + return VINF_SUCCESS; + AssertPtr(pTimer); + Assert((unsigned)pTimer->enmClock < (unsigned)TMCLOCK_MAX); + + PVM pVM = pTimer->CTX_SUFF(pVM); + PTMTIMERQUEUE pQueue = &pVM->tm.s.CTX_SUFF(paTimerQueues)[pTimer->enmClock]; + bool fActive = false; + bool fPending = false; + + AssertMsg( !pTimer->pCritSect + || VMR3GetState(pVM) != VMSTATE_RUNNING + || PDMCritSectIsOwner(pTimer->pCritSect), ("%s\n", pTimer->pszDesc)); + + /* + * The rest of the game happens behind the lock, just + * like create does. All the work is done here. + */ + TM_LOCK_TIMERS(pVM); + for (int cRetries = 1000;; cRetries--) + { + /* + * Change to the DESTROY state. + */ + TMTIMERSTATE const enmState = pTimer->enmState; + Log2(("TMTimerDestroy: %p:{.enmState=%s, .pszDesc='%s'} cRetries=%d\n", + pTimer, tmTimerState(enmState), R3STRING(pTimer->pszDesc), cRetries)); + switch (enmState) + { + case TMTIMERSTATE_STOPPED: + case TMTIMERSTATE_EXPIRED_DELIVER: + break; + + case TMTIMERSTATE_ACTIVE: + fActive = true; + break; + + case TMTIMERSTATE_PENDING_STOP: + case TMTIMERSTATE_PENDING_STOP_SCHEDULE: + case TMTIMERSTATE_PENDING_RESCHEDULE: + fActive = true; + fPending = true; + break; + + case TMTIMERSTATE_PENDING_SCHEDULE: + fPending = true; + break; + + /* + * This shouldn't happen as the caller should make sure there are no races. + */ + case TMTIMERSTATE_EXPIRED_GET_UNLINK: + case TMTIMERSTATE_PENDING_SCHEDULE_SET_EXPIRE: + case TMTIMERSTATE_PENDING_RESCHEDULE_SET_EXPIRE: + AssertMsgFailed(("%p:.enmState=%s %s\n", pTimer, tmTimerState(enmState), pTimer->pszDesc)); + TM_UNLOCK_TIMERS(pVM); + if (!RTThreadYield()) + RTThreadSleep(1); + AssertMsgReturn(cRetries > 0, ("Failed waiting for stable state. state=%d (%s)\n", pTimer->enmState, pTimer->pszDesc), + VERR_TM_UNSTABLE_STATE); + TM_LOCK_TIMERS(pVM); + continue; + + /* + * Invalid states. + */ + case TMTIMERSTATE_FREE: + case TMTIMERSTATE_DESTROY: + TM_UNLOCK_TIMERS(pVM); + AssertLogRelMsgFailedReturn(("pTimer=%p %s\n", pTimer, tmTimerState(enmState)), VERR_TM_INVALID_STATE); + + default: + AssertMsgFailed(("Unknown timer state %d (%s)\n", enmState, R3STRING(pTimer->pszDesc))); + TM_UNLOCK_TIMERS(pVM); + return VERR_TM_UNKNOWN_STATE; + } + + /* + * Try switch to the destroy state. + * This should always succeed as the caller should make sure there are no race. + */ + bool fRc; + TM_TRY_SET_STATE(pTimer, TMTIMERSTATE_DESTROY, enmState, fRc); + if (fRc) + break; + AssertMsgFailed(("%p:.enmState=%s %s\n", pTimer, tmTimerState(enmState), pTimer->pszDesc)); + TM_UNLOCK_TIMERS(pVM); + AssertMsgReturn(cRetries > 0, ("Failed waiting for stable state. state=%d (%s)\n", pTimer->enmState, pTimer->pszDesc), + VERR_TM_UNSTABLE_STATE); + TM_LOCK_TIMERS(pVM); + } + + /* + * Unlink from the active list. + */ + if (fActive) + { + const PTMTIMER pPrev = TMTIMER_GET_PREV(pTimer); + const PTMTIMER pNext = TMTIMER_GET_NEXT(pTimer); + if (pPrev) + TMTIMER_SET_NEXT(pPrev, pNext); + else + { + TMTIMER_SET_HEAD(pQueue, pNext); + pQueue->u64Expire = pNext ? pNext->u64Expire : INT64_MAX; + } + if (pNext) + TMTIMER_SET_PREV(pNext, pPrev); + pTimer->offNext = 0; + pTimer->offPrev = 0; + } + + /* + * Unlink from the schedule list by running it. + */ + if (fPending) + { + Log3(("TMR3TimerDestroy: tmTimerQueueSchedule\n")); + STAM_PROFILE_START(&pVM->tm.s.CTX_SUFF_Z(StatScheduleOne), a); + Assert(pQueue->offSchedule); + tmTimerQueueSchedule(pVM, pQueue); + STAM_PROFILE_STOP(&pVM->tm.s.CTX_SUFF_Z(StatScheduleOne), a); + } + + /* + * Read to move the timer from the created list and onto the free list. + */ + Assert(!pTimer->offNext); Assert(!pTimer->offPrev); Assert(!pTimer->offScheduleNext); + + /* unlink from created list */ + if (pTimer->pBigPrev) + pTimer->pBigPrev->pBigNext = pTimer->pBigNext; + else + pVM->tm.s.pCreated = pTimer->pBigNext; + if (pTimer->pBigNext) + pTimer->pBigNext->pBigPrev = pTimer->pBigPrev; + pTimer->pBigNext = 0; + pTimer->pBigPrev = 0; + + /* free */ + Log2(("TM: Inserting %p into the free list ahead of %p!\n", pTimer, pVM->tm.s.pFree)); + TM_SET_STATE(pTimer, TMTIMERSTATE_FREE); + pTimer->pBigNext = pVM->tm.s.pFree; + pVM->tm.s.pFree = pTimer; + +#ifdef VBOX_STRICT + tmTimerQueuesSanityChecks(pVM, "TMR3TimerDestroy"); +#endif + TM_UNLOCK_TIMERS(pVM); + return VINF_SUCCESS; +} + + +/** + * Destroy all timers owned by a device. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDevIns Device which timers should be destroyed. + */ +VMM_INT_DECL(int) TMR3TimerDestroyDevice(PVM pVM, PPDMDEVINS pDevIns) +{ + LogFlow(("TMR3TimerDestroyDevice: pDevIns=%p\n", pDevIns)); + if (!pDevIns) + return VERR_INVALID_PARAMETER; + + TM_LOCK_TIMERS(pVM); + PTMTIMER pCur = pVM->tm.s.pCreated; + while (pCur) + { + PTMTIMER pDestroy = pCur; + pCur = pDestroy->pBigNext; + if ( pDestroy->enmType == TMTIMERTYPE_DEV + && pDestroy->u.Dev.pDevIns == pDevIns) + { + int rc = TMR3TimerDestroy(pDestroy); + AssertRC(rc); + } + } + TM_UNLOCK_TIMERS(pVM); + + LogFlow(("TMR3TimerDestroyDevice: returns VINF_SUCCESS\n")); + return VINF_SUCCESS; +} + + +/** + * Destroy all timers owned by a USB device. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pUsbIns USB device which timers should be destroyed. + */ +VMM_INT_DECL(int) TMR3TimerDestroyUsb(PVM pVM, PPDMUSBINS pUsbIns) +{ + LogFlow(("TMR3TimerDestroyUsb: pUsbIns=%p\n", pUsbIns)); + if (!pUsbIns) + return VERR_INVALID_PARAMETER; + + TM_LOCK_TIMERS(pVM); + PTMTIMER pCur = pVM->tm.s.pCreated; + while (pCur) + { + PTMTIMER pDestroy = pCur; + pCur = pDestroy->pBigNext; + if ( pDestroy->enmType == TMTIMERTYPE_USB + && pDestroy->u.Usb.pUsbIns == pUsbIns) + { + int rc = TMR3TimerDestroy(pDestroy); + AssertRC(rc); + } + } + TM_UNLOCK_TIMERS(pVM); + + LogFlow(("TMR3TimerDestroyUsb: returns VINF_SUCCESS\n")); + return VINF_SUCCESS; +} + + +/** + * Destroy all timers owned by a driver. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pDrvIns Driver which timers should be destroyed. + */ +VMM_INT_DECL(int) TMR3TimerDestroyDriver(PVM pVM, PPDMDRVINS pDrvIns) +{ + LogFlow(("TMR3TimerDestroyDriver: pDrvIns=%p\n", pDrvIns)); + if (!pDrvIns) + return VERR_INVALID_PARAMETER; + + TM_LOCK_TIMERS(pVM); + PTMTIMER pCur = pVM->tm.s.pCreated; + while (pCur) + { + PTMTIMER pDestroy = pCur; + pCur = pDestroy->pBigNext; + if ( pDestroy->enmType == TMTIMERTYPE_DRV + && pDestroy->u.Drv.pDrvIns == pDrvIns) + { + int rc = TMR3TimerDestroy(pDestroy); + AssertRC(rc); + } + } + TM_UNLOCK_TIMERS(pVM); + + LogFlow(("TMR3TimerDestroyDriver: returns VINF_SUCCESS\n")); + return VINF_SUCCESS; +} + + +/** + * Internal function for getting the clock time. + * + * @returns clock time. + * @param pVM The cross context VM structure. + * @param enmClock The clock. + */ +DECLINLINE(uint64_t) tmClock(PVM pVM, TMCLOCK enmClock) +{ + switch (enmClock) + { + case TMCLOCK_VIRTUAL: return TMVirtualGet(pVM); + case TMCLOCK_VIRTUAL_SYNC: return TMVirtualSyncGet(pVM); + case TMCLOCK_REAL: return TMRealGet(pVM); + case TMCLOCK_TSC: return TMCpuTickGet(&pVM->aCpus[0] /* just take VCPU 0 */); + default: + AssertMsgFailed(("enmClock=%d\n", enmClock)); + return ~(uint64_t)0; + } +} + + +/** + * Checks if the sync queue has one or more expired timers. + * + * @returns true / false. + * + * @param pVM The cross context VM structure. + * @param enmClock The queue. + */ +DECLINLINE(bool) tmR3HasExpiredTimer(PVM pVM, TMCLOCK enmClock) +{ + const uint64_t u64Expire = pVM->tm.s.CTX_SUFF(paTimerQueues)[enmClock].u64Expire; + return u64Expire != INT64_MAX && u64Expire <= tmClock(pVM, enmClock); +} + + +/** + * Checks for expired timers in all the queues. + * + * @returns true / false. + * @param pVM The cross context VM structure. + */ +DECLINLINE(bool) tmR3AnyExpiredTimers(PVM pVM) +{ + /* + * Combine the time calculation for the first two since we're not on EMT + * TMVirtualSyncGet only permits EMT. + */ + uint64_t u64Now = TMVirtualGetNoCheck(pVM); + if (pVM->tm.s.CTX_SUFF(paTimerQueues)[TMCLOCK_VIRTUAL].u64Expire <= u64Now) + return true; + u64Now = pVM->tm.s.fVirtualSyncTicking + ? u64Now - pVM->tm.s.offVirtualSync + : pVM->tm.s.u64VirtualSync; + if (pVM->tm.s.CTX_SUFF(paTimerQueues)[TMCLOCK_VIRTUAL_SYNC].u64Expire <= u64Now) + return true; + + /* + * The remaining timers. + */ + if (tmR3HasExpiredTimer(pVM, TMCLOCK_REAL)) + return true; + if (tmR3HasExpiredTimer(pVM, TMCLOCK_TSC)) + return true; + return false; +} + + +/** + * Schedule timer callback. + * + * @param pTimer Timer handle. + * @param pvUser Pointer to the VM. + * @thread Timer thread. + * + * @remark We cannot do the scheduling and queues running from a timer handler + * since it's not executing in EMT, and even if it was it would be async + * and we wouldn't know the state of the affairs. + * So, we'll just raise the timer FF and force any REM execution to exit. + */ +static DECLCALLBACK(void) tmR3TimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t /*iTick*/) +{ + PVM pVM = (PVM)pvUser; + PVMCPU pVCpuDst = &pVM->aCpus[pVM->tm.s.idTimerCpu]; + NOREF(pTimer); + + AssertCompile(TMCLOCK_MAX == 4); + STAM_COUNTER_INC(&pVM->tm.s.StatTimerCallback); + +#ifdef DEBUG_Sander /* very annoying, keep it private. */ + if (VMCPU_FF_IS_SET(pVCpuDst, VMCPU_FF_TIMER)) + Log(("tmR3TimerCallback: timer event still pending!!\n")); +#endif + if ( !VMCPU_FF_IS_SET(pVCpuDst, VMCPU_FF_TIMER) + && ( pVM->tm.s.paTimerQueuesR3[TMCLOCK_VIRTUAL_SYNC].offSchedule /** @todo FIXME - reconsider offSchedule as a reason for running the timer queues. */ + || pVM->tm.s.paTimerQueuesR3[TMCLOCK_VIRTUAL].offSchedule + || pVM->tm.s.paTimerQueuesR3[TMCLOCK_REAL].offSchedule + || pVM->tm.s.paTimerQueuesR3[TMCLOCK_TSC].offSchedule + || tmR3AnyExpiredTimers(pVM) + ) + && !VMCPU_FF_IS_SET(pVCpuDst, VMCPU_FF_TIMER) + && !pVM->tm.s.fRunningQueues + ) + { + Log5(("TM(%u): FF: 0 -> 1\n", __LINE__)); + VMCPU_FF_SET(pVCpuDst, VMCPU_FF_TIMER); +#ifdef VBOX_WITH_REM + REMR3NotifyTimerPending(pVM, pVCpuDst); +#endif + VMR3NotifyCpuFFU(pVCpuDst->pUVCpu, VMNOTIFYFF_FLAGS_DONE_REM | VMNOTIFYFF_FLAGS_POKE); + STAM_COUNTER_INC(&pVM->tm.s.StatTimerCallbackSetFF); + } +} + + +/** + * Schedules and runs any pending timers. + * + * This is normally called from a forced action handler in EMT. + * + * @param pVM The cross context VM structure. + * + * @thread EMT (actually EMT0, but we fend off the others) + */ +VMMR3DECL(void) TMR3TimerQueuesDo(PVM pVM) +{ + /* + * Only the dedicated timer EMT should do stuff here. + * (fRunningQueues is only used as an indicator.) + */ + Assert(pVM->tm.s.idTimerCpu < pVM->cCpus); + PVMCPU pVCpuDst = &pVM->aCpus[pVM->tm.s.idTimerCpu]; + if (VMMGetCpu(pVM) != pVCpuDst) + { + Assert(pVM->cCpus > 1); + return; + } + STAM_PROFILE_START(&pVM->tm.s.StatDoQueues, a); + Log2(("TMR3TimerQueuesDo:\n")); + Assert(!pVM->tm.s.fRunningQueues); + ASMAtomicWriteBool(&pVM->tm.s.fRunningQueues, true); + TM_LOCK_TIMERS(pVM); + + /* + * Process the queues. + */ + AssertCompile(TMCLOCK_MAX == 4); + + /* TMCLOCK_VIRTUAL_SYNC (see also TMR3VirtualSyncFF) */ + STAM_PROFILE_ADV_START(&pVM->tm.s.aStatDoQueues[TMCLOCK_VIRTUAL_SYNC], s1); + PDMCritSectEnter(&pVM->tm.s.VirtualSyncLock, VERR_IGNORED); + ASMAtomicWriteBool(&pVM->tm.s.fRunningVirtualSyncQueue, true); + VMCPU_FF_CLEAR(pVCpuDst, VMCPU_FF_TIMER); /* Clear the FF once we started working for real. */ + + Assert(!pVM->tm.s.paTimerQueuesR3[TMCLOCK_VIRTUAL_SYNC].offSchedule); + tmR3TimerQueueRunVirtualSync(pVM); + if (pVM->tm.s.fVirtualSyncTicking) /** @todo move into tmR3TimerQueueRunVirtualSync - FIXME */ + VM_FF_CLEAR(pVM, VM_FF_TM_VIRTUAL_SYNC); + + ASMAtomicWriteBool(&pVM->tm.s.fRunningVirtualSyncQueue, false); + PDMCritSectLeave(&pVM->tm.s.VirtualSyncLock); + STAM_PROFILE_ADV_STOP(&pVM->tm.s.aStatDoQueues[TMCLOCK_VIRTUAL_SYNC], s1); + + /* TMCLOCK_VIRTUAL */ + STAM_PROFILE_ADV_START(&pVM->tm.s.aStatDoQueues[TMCLOCK_VIRTUAL], s2); + if (pVM->tm.s.paTimerQueuesR3[TMCLOCK_VIRTUAL].offSchedule) + tmTimerQueueSchedule(pVM, &pVM->tm.s.paTimerQueuesR3[TMCLOCK_VIRTUAL]); + tmR3TimerQueueRun(pVM, &pVM->tm.s.paTimerQueuesR3[TMCLOCK_VIRTUAL]); + STAM_PROFILE_ADV_STOP(&pVM->tm.s.aStatDoQueues[TMCLOCK_VIRTUAL], s2); + + /* TMCLOCK_TSC */ + Assert(!pVM->tm.s.paTimerQueuesR3[TMCLOCK_TSC].offActive); /* not used */ + + /* TMCLOCK_REAL */ + STAM_PROFILE_ADV_START(&pVM->tm.s.aStatDoQueues[TMCLOCK_REAL], s3); + if (pVM->tm.s.paTimerQueuesR3[TMCLOCK_REAL].offSchedule) + tmTimerQueueSchedule(pVM, &pVM->tm.s.paTimerQueuesR3[TMCLOCK_REAL]); + tmR3TimerQueueRun(pVM, &pVM->tm.s.paTimerQueuesR3[TMCLOCK_REAL]); + STAM_PROFILE_ADV_STOP(&pVM->tm.s.aStatDoQueues[TMCLOCK_REAL], s3); + +#ifdef VBOX_STRICT + /* check that we didn't screw up. */ + tmTimerQueuesSanityChecks(pVM, "TMR3TimerQueuesDo"); +#endif + + /* done */ + Log2(("TMR3TimerQueuesDo: returns void\n")); + ASMAtomicWriteBool(&pVM->tm.s.fRunningQueues, false); + TM_UNLOCK_TIMERS(pVM); + STAM_PROFILE_STOP(&pVM->tm.s.StatDoQueues, a); +} + +//RT_C_DECLS_BEGIN +//int iomLock(PVM pVM); +//void iomUnlock(PVM pVM); +//RT_C_DECLS_END + + +/** + * Schedules and runs any pending times in the specified queue. + * + * This is normally called from a forced action handler in EMT. + * + * @param pVM The cross context VM structure. + * @param pQueue The queue to run. + */ +static void tmR3TimerQueueRun(PVM pVM, PTMTIMERQUEUE pQueue) +{ + VM_ASSERT_EMT(pVM); + + /* + * Run timers. + * + * We check the clock once and run all timers which are ACTIVE + * and have an expire time less or equal to the time we read. + * + * N.B. A generic unlink must be applied since other threads + * are allowed to mess with any active timer at any time. + * However, we only allow EMT to handle EXPIRED_PENDING + * timers, thus enabling the timer handler function to + * arm the timer again. + */ + PTMTIMER pNext = TMTIMER_GET_HEAD(pQueue); + if (!pNext) + return; + const uint64_t u64Now = tmClock(pVM, pQueue->enmClock); + while (pNext && pNext->u64Expire <= u64Now) + { + PTMTIMER pTimer = pNext; + pNext = TMTIMER_GET_NEXT(pTimer); + PPDMCRITSECT pCritSect = pTimer->pCritSect; + if (pCritSect) + PDMCritSectEnter(pCritSect, VERR_IGNORED); + Log2(("tmR3TimerQueueRun: %p:{.enmState=%s, .enmClock=%d, .enmType=%d, u64Expire=%llx (now=%llx) .pszDesc=%s}\n", + pTimer, tmTimerState(pTimer->enmState), pTimer->enmClock, pTimer->enmType, pTimer->u64Expire, u64Now, pTimer->pszDesc)); + bool fRc; + TM_TRY_SET_STATE(pTimer, TMTIMERSTATE_EXPIRED_GET_UNLINK, TMTIMERSTATE_ACTIVE, fRc); + if (fRc) + { + Assert(!pTimer->offScheduleNext); /* this can trigger falsely */ + + /* unlink */ + const PTMTIMER pPrev = TMTIMER_GET_PREV(pTimer); + if (pPrev) + TMTIMER_SET_NEXT(pPrev, pNext); + else + { + TMTIMER_SET_HEAD(pQueue, pNext); + pQueue->u64Expire = pNext ? pNext->u64Expire : INT64_MAX; + } + if (pNext) + TMTIMER_SET_PREV(pNext, pPrev); + pTimer->offNext = 0; + pTimer->offPrev = 0; + + /* fire */ + TM_SET_STATE(pTimer, TMTIMERSTATE_EXPIRED_DELIVER); + switch (pTimer->enmType) + { + case TMTIMERTYPE_DEV: pTimer->u.Dev.pfnTimer(pTimer->u.Dev.pDevIns, pTimer, pTimer->pvUser); break; + case TMTIMERTYPE_USB: pTimer->u.Usb.pfnTimer(pTimer->u.Usb.pUsbIns, pTimer, pTimer->pvUser); break; + case TMTIMERTYPE_DRV: pTimer->u.Drv.pfnTimer(pTimer->u.Drv.pDrvIns, pTimer, pTimer->pvUser); break; + case TMTIMERTYPE_INTERNAL: pTimer->u.Internal.pfnTimer(pVM, pTimer, pTimer->pvUser); break; + case TMTIMERTYPE_EXTERNAL: pTimer->u.External.pfnTimer(pTimer->pvUser); break; + default: + AssertMsgFailed(("Invalid timer type %d (%s)\n", pTimer->enmType, pTimer->pszDesc)); + break; + } + + /* change the state if it wasn't changed already in the handler. */ + TM_TRY_SET_STATE(pTimer, TMTIMERSTATE_STOPPED, TMTIMERSTATE_EXPIRED_DELIVER, fRc); + Log2(("tmR3TimerQueueRun: new state %s\n", tmTimerState(pTimer->enmState))); + } + if (pCritSect) + PDMCritSectLeave(pCritSect); + } /* run loop */ +} + + +/** + * Schedules and runs any pending times in the timer queue for the + * synchronous virtual clock. + * + * This scheduling is a bit different from the other queues as it need + * to implement the special requirements of the timer synchronous virtual + * clock, thus this 2nd queue run function. + * + * @param pVM The cross context VM structure. + * + * @remarks The caller must the Virtual Sync lock. Owning the TM lock is no + * longer important. + */ +static void tmR3TimerQueueRunVirtualSync(PVM pVM) +{ + PTMTIMERQUEUE const pQueue = &pVM->tm.s.paTimerQueuesR3[TMCLOCK_VIRTUAL_SYNC]; + VM_ASSERT_EMT(pVM); + Assert(PDMCritSectIsOwner(&pVM->tm.s.VirtualSyncLock)); + + /* + * Any timers? + */ + PTMTIMER pNext = TMTIMER_GET_HEAD(pQueue); + if (RT_UNLIKELY(!pNext)) + { + Assert(pVM->tm.s.fVirtualSyncTicking || !pVM->tm.s.cVirtualTicking); + return; + } + STAM_COUNTER_INC(&pVM->tm.s.StatVirtualSyncRun); + + /* + * Calculate the time frame for which we will dispatch timers. + * + * We use a time frame ranging from the current sync time (which is most likely the + * same as the head timer) and some configurable period (100000ns) up towards the + * current virtual time. This period might also need to be restricted by the catch-up + * rate so frequent calls to this function won't accelerate the time too much, however + * this will be implemented at a later point if necessary. + * + * Without this frame we would 1) having to run timers much more frequently + * and 2) lag behind at a steady rate. + */ + const uint64_t u64VirtualNow = TMVirtualGetNoCheck(pVM); + uint64_t const offSyncGivenUp = pVM->tm.s.offVirtualSyncGivenUp; + uint64_t u64Now; + if (!pVM->tm.s.fVirtualSyncTicking) + { + STAM_COUNTER_INC(&pVM->tm.s.StatVirtualSyncRunStoppedAlready); + u64Now = pVM->tm.s.u64VirtualSync; + Assert(u64Now <= pNext->u64Expire); + } + else + { + /* Calc 'now'. */ + bool fStopCatchup = false; + bool fUpdateStuff = false; + uint64_t off = pVM->tm.s.offVirtualSync; + if (pVM->tm.s.fVirtualSyncCatchUp) + { + uint64_t u64Delta = u64VirtualNow - pVM->tm.s.u64VirtualSyncCatchUpPrev; + if (RT_LIKELY(!(u64Delta >> 32))) + { + uint64_t u64Sub = ASMMultU64ByU32DivByU32(u64Delta, pVM->tm.s.u32VirtualSyncCatchUpPercentage, 100); + if (off > u64Sub + offSyncGivenUp) + { + off -= u64Sub; + Log4(("TM: %'RU64/-%'8RU64: sub %'RU64 [tmR3TimerQueueRunVirtualSync]\n", u64VirtualNow - off, off - offSyncGivenUp, u64Sub)); + } + else + { + STAM_PROFILE_ADV_STOP(&pVM->tm.s.StatVirtualSyncCatchup, c); + fStopCatchup = true; + off = offSyncGivenUp; + } + fUpdateStuff = true; + } + } + u64Now = u64VirtualNow - off; + + /* Adjust against last returned time. */ + uint64_t u64Last = ASMAtomicUoReadU64(&pVM->tm.s.u64VirtualSync); + if (u64Last > u64Now) + { + u64Now = u64Last + 1; + STAM_COUNTER_INC(&pVM->tm.s.StatVirtualSyncGetAdjLast); + } + + /* Check if stopped by expired timer. */ + uint64_t const u64Expire = pNext->u64Expire; + if (u64Now >= u64Expire) + { + STAM_COUNTER_INC(&pVM->tm.s.StatVirtualSyncRunStop); + u64Now = u64Expire; + ASMAtomicWriteU64(&pVM->tm.s.u64VirtualSync, u64Now); + ASMAtomicWriteBool(&pVM->tm.s.fVirtualSyncTicking, false); + Log4(("TM: %'RU64/-%'8RU64: exp tmr [tmR3TimerQueueRunVirtualSync]\n", u64Now, u64VirtualNow - u64Now - offSyncGivenUp)); + } + else + { + ASMAtomicWriteU64(&pVM->tm.s.u64VirtualSync, u64Now); + if (fUpdateStuff) + { + ASMAtomicWriteU64(&pVM->tm.s.offVirtualSync, off); + ASMAtomicWriteU64(&pVM->tm.s.u64VirtualSyncCatchUpPrev, u64VirtualNow); + ASMAtomicWriteU64(&pVM->tm.s.u64VirtualSync, u64Now); + if (fStopCatchup) + { + ASMAtomicWriteBool(&pVM->tm.s.fVirtualSyncCatchUp, false); + Log4(("TM: %'RU64/0: caught up [tmR3TimerQueueRunVirtualSync]\n", u64VirtualNow)); + } + } + } + } + + /* calc end of frame. */ + uint64_t u64Max = u64Now + pVM->tm.s.u32VirtualSyncScheduleSlack; + if (u64Max > u64VirtualNow - offSyncGivenUp) + u64Max = u64VirtualNow - offSyncGivenUp; + + /* assert sanity */ + Assert(u64Now <= u64VirtualNow - offSyncGivenUp); + Assert(u64Max <= u64VirtualNow - offSyncGivenUp); + Assert(u64Now <= u64Max); + Assert(offSyncGivenUp == pVM->tm.s.offVirtualSyncGivenUp); + + /* + * Process the expired timers moving the clock along as we progress. + */ +#ifdef VBOX_STRICT + uint64_t u64Prev = u64Now; NOREF(u64Prev); +#endif + while (pNext && pNext->u64Expire <= u64Max) + { + /* Advance */ + PTMTIMER pTimer = pNext; + pNext = TMTIMER_GET_NEXT(pTimer); + + /* Take the associated lock. */ + PPDMCRITSECT pCritSect = pTimer->pCritSect; + if (pCritSect) + PDMCritSectEnter(pCritSect, VERR_IGNORED); + + Log2(("tmR3TimerQueueRun: %p:{.enmState=%s, .enmClock=%d, .enmType=%d, u64Expire=%llx (now=%llx) .pszDesc=%s}\n", + pTimer, tmTimerState(pTimer->enmState), pTimer->enmClock, pTimer->enmType, pTimer->u64Expire, u64Now, pTimer->pszDesc)); + + /* Advance the clock - don't permit timers to be out of order or armed + in the 'past'. */ +#ifdef VBOX_STRICT + AssertMsg(pTimer->u64Expire >= u64Prev, ("%'RU64 < %'RU64 %s\n", pTimer->u64Expire, u64Prev, pTimer->pszDesc)); + u64Prev = pTimer->u64Expire; +#endif + ASMAtomicWriteU64(&pVM->tm.s.u64VirtualSync, pTimer->u64Expire); + ASMAtomicWriteBool(&pVM->tm.s.fVirtualSyncTicking, false); + + /* Unlink it, change the state and do the callout. */ + tmTimerQueueUnlinkActive(pQueue, pTimer); + TM_SET_STATE(pTimer, TMTIMERSTATE_EXPIRED_DELIVER); + switch (pTimer->enmType) + { + case TMTIMERTYPE_DEV: pTimer->u.Dev.pfnTimer(pTimer->u.Dev.pDevIns, pTimer, pTimer->pvUser); break; + case TMTIMERTYPE_USB: pTimer->u.Usb.pfnTimer(pTimer->u.Usb.pUsbIns, pTimer, pTimer->pvUser); break; + case TMTIMERTYPE_DRV: pTimer->u.Drv.pfnTimer(pTimer->u.Drv.pDrvIns, pTimer, pTimer->pvUser); break; + case TMTIMERTYPE_INTERNAL: pTimer->u.Internal.pfnTimer(pVM, pTimer, pTimer->pvUser); break; + case TMTIMERTYPE_EXTERNAL: pTimer->u.External.pfnTimer(pTimer->pvUser); break; + default: + AssertMsgFailed(("Invalid timer type %d (%s)\n", pTimer->enmType, pTimer->pszDesc)); + break; + } + + /* Change the state if it wasn't changed already in the handler. + Reset the Hz hint too since this is the same as TMTimerStop. */ + bool fRc; + TM_TRY_SET_STATE(pTimer, TMTIMERSTATE_STOPPED, TMTIMERSTATE_EXPIRED_DELIVER, fRc); + if (fRc && pTimer->uHzHint) + { + if (pTimer->uHzHint >= pVM->tm.s.uMaxHzHint) + ASMAtomicWriteBool(&pVM->tm.s.fHzHintNeedsUpdating, true); + pTimer->uHzHint = 0; + } + Log2(("tmR3TimerQueueRun: new state %s\n", tmTimerState(pTimer->enmState))); + + /* Leave the associated lock. */ + if (pCritSect) + PDMCritSectLeave(pCritSect); + } /* run loop */ + + + /* + * Restart the clock if it was stopped to serve any timers, + * and start/adjust catch-up if necessary. + */ + if ( !pVM->tm.s.fVirtualSyncTicking + && pVM->tm.s.cVirtualTicking) + { + STAM_COUNTER_INC(&pVM->tm.s.StatVirtualSyncRunRestart); + + /* calc the slack we've handed out. */ + const uint64_t u64VirtualNow2 = TMVirtualGetNoCheck(pVM); + Assert(u64VirtualNow2 >= u64VirtualNow); + AssertMsg(pVM->tm.s.u64VirtualSync >= u64Now, ("%'RU64 < %'RU64\n", pVM->tm.s.u64VirtualSync, u64Now)); + const uint64_t offSlack = pVM->tm.s.u64VirtualSync - u64Now; + STAM_STATS({ + if (offSlack) + { + PSTAMPROFILE p = &pVM->tm.s.StatVirtualSyncRunSlack; + p->cPeriods++; + p->cTicks += offSlack; + if (p->cTicksMax < offSlack) p->cTicksMax = offSlack; + if (p->cTicksMin > offSlack) p->cTicksMin = offSlack; + } + }); + + /* Let the time run a little bit while we were busy running timers(?). */ + uint64_t u64Elapsed; +#define MAX_ELAPSED 30000U /* ns */ + if (offSlack > MAX_ELAPSED) + u64Elapsed = 0; + else + { + u64Elapsed = u64VirtualNow2 - u64VirtualNow; + if (u64Elapsed > MAX_ELAPSED) + u64Elapsed = MAX_ELAPSED; + u64Elapsed = u64Elapsed > offSlack ? u64Elapsed - offSlack : 0; + } +#undef MAX_ELAPSED + + /* Calc the current offset. */ + uint64_t offNew = u64VirtualNow2 - pVM->tm.s.u64VirtualSync - u64Elapsed; + Assert(!(offNew & RT_BIT_64(63))); + uint64_t offLag = offNew - pVM->tm.s.offVirtualSyncGivenUp; + Assert(!(offLag & RT_BIT_64(63))); + + /* + * Deal with starting, adjusting and stopping catchup. + */ + if (pVM->tm.s.fVirtualSyncCatchUp) + { + if (offLag <= pVM->tm.s.u64VirtualSyncCatchUpStopThreshold) + { + /* stop */ + STAM_PROFILE_ADV_STOP(&pVM->tm.s.StatVirtualSyncCatchup, c); + ASMAtomicWriteBool(&pVM->tm.s.fVirtualSyncCatchUp, false); + Log4(("TM: %'RU64/-%'8RU64: caught up [pt]\n", u64VirtualNow2 - offNew, offLag)); + } + else if (offLag <= pVM->tm.s.u64VirtualSyncCatchUpGiveUpThreshold) + { + /* adjust */ + unsigned i = 0; + while ( i + 1 < RT_ELEMENTS(pVM->tm.s.aVirtualSyncCatchUpPeriods) + && offLag >= pVM->tm.s.aVirtualSyncCatchUpPeriods[i + 1].u64Start) + i++; + if (pVM->tm.s.u32VirtualSyncCatchUpPercentage < pVM->tm.s.aVirtualSyncCatchUpPeriods[i].u32Percentage) + { + STAM_COUNTER_INC(&pVM->tm.s.aStatVirtualSyncCatchupAdjust[i]); + ASMAtomicWriteU32(&pVM->tm.s.u32VirtualSyncCatchUpPercentage, pVM->tm.s.aVirtualSyncCatchUpPeriods[i].u32Percentage); + Log4(("TM: %'RU64/%'8RU64: adj %u%%\n", u64VirtualNow2 - offNew, offLag, pVM->tm.s.u32VirtualSyncCatchUpPercentage)); + } + pVM->tm.s.u64VirtualSyncCatchUpPrev = u64VirtualNow2; + } + else + { + /* give up */ + STAM_COUNTER_INC(&pVM->tm.s.StatVirtualSyncGiveUp); + STAM_PROFILE_ADV_STOP(&pVM->tm.s.StatVirtualSyncCatchup, c); + ASMAtomicWriteU64((uint64_t volatile *)&pVM->tm.s.offVirtualSyncGivenUp, offNew); + ASMAtomicWriteBool(&pVM->tm.s.fVirtualSyncCatchUp, false); + Log4(("TM: %'RU64/%'8RU64: give up %u%%\n", u64VirtualNow2 - offNew, offLag, pVM->tm.s.u32VirtualSyncCatchUpPercentage)); + LogRel(("TM: Giving up catch-up attempt at a %'RU64 ns lag; new total: %'RU64 ns\n", offLag, offNew)); + } + } + else if (offLag >= pVM->tm.s.aVirtualSyncCatchUpPeriods[0].u64Start) + { + if (offLag <= pVM->tm.s.u64VirtualSyncCatchUpGiveUpThreshold) + { + /* start */ + STAM_PROFILE_ADV_START(&pVM->tm.s.StatVirtualSyncCatchup, c); + unsigned i = 0; + while ( i + 1 < RT_ELEMENTS(pVM->tm.s.aVirtualSyncCatchUpPeriods) + && offLag >= pVM->tm.s.aVirtualSyncCatchUpPeriods[i + 1].u64Start) + i++; + STAM_COUNTER_INC(&pVM->tm.s.aStatVirtualSyncCatchupInitial[i]); + ASMAtomicWriteU32(&pVM->tm.s.u32VirtualSyncCatchUpPercentage, pVM->tm.s.aVirtualSyncCatchUpPeriods[i].u32Percentage); + ASMAtomicWriteBool(&pVM->tm.s.fVirtualSyncCatchUp, true); + Log4(("TM: %'RU64/%'8RU64: catch-up %u%%\n", u64VirtualNow2 - offNew, offLag, pVM->tm.s.u32VirtualSyncCatchUpPercentage)); + } + else + { + /* don't bother */ + STAM_COUNTER_INC(&pVM->tm.s.StatVirtualSyncGiveUpBeforeStarting); + ASMAtomicWriteU64((uint64_t volatile *)&pVM->tm.s.offVirtualSyncGivenUp, offNew); + Log4(("TM: %'RU64/%'8RU64: give up\n", u64VirtualNow2 - offNew, offLag)); + LogRel(("TM: Not bothering to attempt catching up a %'RU64 ns lag; new total: %'RU64\n", offLag, offNew)); + } + } + + /* + * Update the offset and restart the clock. + */ + Assert(!(offNew & RT_BIT_64(63))); + ASMAtomicWriteU64(&pVM->tm.s.offVirtualSync, offNew); + ASMAtomicWriteBool(&pVM->tm.s.fVirtualSyncTicking, true); + } +} + + +/** + * Deals with stopped Virtual Sync clock. + * + * This is called by the forced action flag handling code in EM when it + * encounters the VM_FF_TM_VIRTUAL_SYNC flag. It is called by all VCPUs and they + * will block on the VirtualSyncLock until the pending timers has been executed + * and the clock restarted. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * + * @thread EMTs + */ +VMMR3_INT_DECL(void) TMR3VirtualSyncFF(PVM pVM, PVMCPU pVCpu) +{ + Log2(("TMR3VirtualSyncFF:\n")); + + /* + * The EMT doing the timers is diverted to them. + */ + if (pVCpu->idCpu == pVM->tm.s.idTimerCpu) + TMR3TimerQueuesDo(pVM); + /* + * The other EMTs will block on the virtual sync lock and the first owner + * will run the queue and thus restarting the clock. + * + * Note! This is very suboptimal code wrt to resuming execution when there + * are more than two Virtual CPUs, since they will all have to enter + * the critical section one by one. But it's a very simple solution + * which will have to do the job for now. + */ + else + { + STAM_PROFILE_START(&pVM->tm.s.StatVirtualSyncFF, a); + PDMCritSectEnter(&pVM->tm.s.VirtualSyncLock, VERR_IGNORED); + if (pVM->tm.s.fVirtualSyncTicking) + { + STAM_PROFILE_STOP(&pVM->tm.s.StatVirtualSyncFF, a); /* before the unlock! */ + PDMCritSectLeave(&pVM->tm.s.VirtualSyncLock); + Log2(("TMR3VirtualSyncFF: ticking\n")); + } + else + { + PDMCritSectLeave(&pVM->tm.s.VirtualSyncLock); + + /* try run it. */ + TM_LOCK_TIMERS(pVM); + PDMCritSectEnter(&pVM->tm.s.VirtualSyncLock, VERR_IGNORED); + if (pVM->tm.s.fVirtualSyncTicking) + Log2(("TMR3VirtualSyncFF: ticking (2)\n")); + else + { + ASMAtomicWriteBool(&pVM->tm.s.fRunningVirtualSyncQueue, true); + Log2(("TMR3VirtualSyncFF: running queue\n")); + + Assert(!pVM->tm.s.paTimerQueuesR3[TMCLOCK_VIRTUAL_SYNC].offSchedule); + tmR3TimerQueueRunVirtualSync(pVM); + if (pVM->tm.s.fVirtualSyncTicking) /** @todo move into tmR3TimerQueueRunVirtualSync - FIXME */ + VM_FF_CLEAR(pVM, VM_FF_TM_VIRTUAL_SYNC); + + ASMAtomicWriteBool(&pVM->tm.s.fRunningVirtualSyncQueue, false); + } + STAM_PROFILE_STOP(&pVM->tm.s.StatVirtualSyncFF, a); /* before the unlock! */ + PDMCritSectLeave(&pVM->tm.s.VirtualSyncLock); + TM_UNLOCK_TIMERS(pVM); + } + } +} + + +/** @name Saved state values + * @{ */ +#define TMTIMERSTATE_SAVED_PENDING_STOP 4 +#define TMTIMERSTATE_SAVED_PENDING_SCHEDULE 7 +/** @} */ + + +/** + * Saves the state of a timer to a saved state. + * + * @returns VBox status code. + * @param pTimer Timer to save. + * @param pSSM Save State Manager handle. + */ +VMMR3DECL(int) TMR3TimerSave(PTMTIMERR3 pTimer, PSSMHANDLE pSSM) +{ + LogFlow(("TMR3TimerSave: %p:{enmState=%s, .pszDesc={%s}} pSSM=%p\n", pTimer, tmTimerState(pTimer->enmState), pTimer->pszDesc, pSSM)); + switch (pTimer->enmState) + { + case TMTIMERSTATE_STOPPED: + case TMTIMERSTATE_PENDING_STOP: + case TMTIMERSTATE_PENDING_STOP_SCHEDULE: + return SSMR3PutU8(pSSM, TMTIMERSTATE_SAVED_PENDING_STOP); + + case TMTIMERSTATE_PENDING_SCHEDULE_SET_EXPIRE: + case TMTIMERSTATE_PENDING_RESCHEDULE_SET_EXPIRE: + AssertMsgFailed(("u64Expire is being updated! (%s)\n", pTimer->pszDesc)); + if (!RTThreadYield()) + RTThreadSleep(1); + RT_FALL_THRU(); + case TMTIMERSTATE_ACTIVE: + case TMTIMERSTATE_PENDING_SCHEDULE: + case TMTIMERSTATE_PENDING_RESCHEDULE: + SSMR3PutU8(pSSM, TMTIMERSTATE_SAVED_PENDING_SCHEDULE); + return SSMR3PutU64(pSSM, pTimer->u64Expire); + + case TMTIMERSTATE_EXPIRED_GET_UNLINK: + case TMTIMERSTATE_EXPIRED_DELIVER: + case TMTIMERSTATE_DESTROY: + case TMTIMERSTATE_FREE: + AssertMsgFailed(("Invalid timer state %d %s (%s)\n", pTimer->enmState, tmTimerState(pTimer->enmState), pTimer->pszDesc)); + return SSMR3HandleSetStatus(pSSM, VERR_TM_INVALID_STATE); + } + + AssertMsgFailed(("Unknown timer state %d (%s)\n", pTimer->enmState, pTimer->pszDesc)); + return SSMR3HandleSetStatus(pSSM, VERR_TM_UNKNOWN_STATE); +} + + +/** + * Loads the state of a timer from a saved state. + * + * @returns VBox status code. + * @param pTimer Timer to restore. + * @param pSSM Save State Manager handle. + */ +VMMR3DECL(int) TMR3TimerLoad(PTMTIMERR3 pTimer, PSSMHANDLE pSSM) +{ + Assert(pTimer); Assert(pSSM); VM_ASSERT_EMT(pTimer->pVMR3); + LogFlow(("TMR3TimerLoad: %p:{enmState=%s, .pszDesc={%s}} pSSM=%p\n", pTimer, tmTimerState(pTimer->enmState), pTimer->pszDesc, pSSM)); + + /* + * Load the state and validate it. + */ + uint8_t u8State; + int rc = SSMR3GetU8(pSSM, &u8State); + if (RT_FAILURE(rc)) + return rc; + + /* TMTIMERSTATE_SAVED_XXX: Workaround for accidental state shift in r47786 (2009-05-26 19:12:12). */ + if ( u8State == TMTIMERSTATE_SAVED_PENDING_STOP + 1 + || u8State == TMTIMERSTATE_SAVED_PENDING_SCHEDULE + 1) + u8State--; + + if ( u8State != TMTIMERSTATE_SAVED_PENDING_STOP + && u8State != TMTIMERSTATE_SAVED_PENDING_SCHEDULE) + { + AssertLogRelMsgFailed(("u8State=%d\n", u8State)); + return SSMR3HandleSetStatus(pSSM, VERR_TM_LOAD_STATE); + } + + /* Enter the critical sections to make TMTimerSet/Stop happy. */ + if (pTimer->enmClock == TMCLOCK_VIRTUAL_SYNC) + PDMCritSectEnter(&pTimer->pVMR3->tm.s.VirtualSyncLock, VERR_IGNORED); + PPDMCRITSECT pCritSect = pTimer->pCritSect; + if (pCritSect) + PDMCritSectEnter(pCritSect, VERR_IGNORED); + + if (u8State == TMTIMERSTATE_SAVED_PENDING_SCHEDULE) + { + /* + * Load the expire time. + */ + uint64_t u64Expire; + rc = SSMR3GetU64(pSSM, &u64Expire); + if (RT_FAILURE(rc)) + return rc; + + /* + * Set it. + */ + Log(("u8State=%d u64Expire=%llu\n", u8State, u64Expire)); + rc = TMTimerSet(pTimer, u64Expire); + } + else + { + /* + * Stop it. + */ + Log(("u8State=%d\n", u8State)); + rc = TMTimerStop(pTimer); + } + + if (pCritSect) + PDMCritSectLeave(pCritSect); + if (pTimer->enmClock == TMCLOCK_VIRTUAL_SYNC) + PDMCritSectLeave(&pTimer->pVMR3->tm.s.VirtualSyncLock); + + /* + * On failure set SSM status. + */ + if (RT_FAILURE(rc)) + rc = SSMR3HandleSetStatus(pSSM, rc); + return rc; +} + + +/** + * Skips the state of a timer in a given saved state. + * + * @returns VBox status. + * @param pSSM Save State Manager handle. + * @param pfActive Where to store whether the timer was active + * when the state was saved. + */ +VMMR3DECL(int) TMR3TimerSkip(PSSMHANDLE pSSM, bool *pfActive) +{ + Assert(pSSM); AssertPtr(pfActive); + LogFlow(("TMR3TimerSkip: pSSM=%p pfActive=%p\n", pSSM, pfActive)); + + /* + * Load the state and validate it. + */ + uint8_t u8State; + int rc = SSMR3GetU8(pSSM, &u8State); + if (RT_FAILURE(rc)) + return rc; + + /* TMTIMERSTATE_SAVED_XXX: Workaround for accidental state shift in r47786 (2009-05-26 19:12:12). */ + if ( u8State == TMTIMERSTATE_SAVED_PENDING_STOP + 1 + || u8State == TMTIMERSTATE_SAVED_PENDING_SCHEDULE + 1) + u8State--; + + if ( u8State != TMTIMERSTATE_SAVED_PENDING_STOP + && u8State != TMTIMERSTATE_SAVED_PENDING_SCHEDULE) + { + AssertLogRelMsgFailed(("u8State=%d\n", u8State)); + return SSMR3HandleSetStatus(pSSM, VERR_TM_LOAD_STATE); + } + + *pfActive = (u8State == TMTIMERSTATE_SAVED_PENDING_SCHEDULE); + if (*pfActive) + { + /* + * Load the expire time. + */ + uint64_t u64Expire; + rc = SSMR3GetU64(pSSM, &u64Expire); + } + + return rc; +} + + +/** + * Associates a critical section with a timer. + * + * The critical section will be entered prior to doing the timer call back, thus + * avoiding potential races between the timer thread and other threads trying to + * stop or adjust the timer expiration while it's being delivered. The timer + * thread will leave the critical section when the timer callback returns. + * + * In strict builds, ownership of the critical section will be asserted by + * TMTimerSet, TMTimerStop, TMTimerGetExpire and TMTimerDestroy (when called at + * runtime). + * + * @retval VINF_SUCCESS on success. + * @retval VERR_INVALID_HANDLE if the timer handle is NULL or invalid + * (asserted). + * @retval VERR_INVALID_PARAMETER if pCritSect is NULL or has an invalid magic + * (asserted). + * @retval VERR_ALREADY_EXISTS if a critical section was already associated + * with the timer (asserted). + * @retval VERR_INVALID_STATE if the timer isn't stopped. + * + * @param pTimer The timer handle. + * @param pCritSect The critical section. The caller must make sure this + * is around for the life time of the timer. + * + * @thread Any, but the caller is responsible for making sure the timer is not + * active. + */ +VMMR3DECL(int) TMR3TimerSetCritSect(PTMTIMERR3 pTimer, PPDMCRITSECT pCritSect) +{ + AssertPtrReturn(pTimer, VERR_INVALID_HANDLE); + AssertPtrReturn(pCritSect, VERR_INVALID_PARAMETER); + const char *pszName = PDMR3CritSectName(pCritSect); /* exploited for validation */ + AssertReturn(pszName, VERR_INVALID_PARAMETER); + AssertReturn(!pTimer->pCritSect, VERR_ALREADY_EXISTS); + AssertReturn(pTimer->enmState == TMTIMERSTATE_STOPPED, VERR_INVALID_STATE); + LogFlow(("pTimer=%p (%s) pCritSect=%p (%s)\n", pTimer, pTimer->pszDesc, pCritSect, pszName)); + + pTimer->pCritSect = pCritSect; + return VINF_SUCCESS; +} + + +/** + * Get the real world UTC time adjusted for VM lag. + * + * @returns pTime. + * @param pVM The cross context VM structure. + * @param pTime Where to store the time. + */ +VMMR3_INT_DECL(PRTTIMESPEC) TMR3UtcNow(PVM pVM, PRTTIMESPEC pTime) +{ + /* + * Get a stable set of VirtualSync parameters and calc the lag. + */ + uint64_t offVirtualSync; + uint64_t offVirtualSyncGivenUp; + do + { + offVirtualSync = ASMAtomicReadU64(&pVM->tm.s.offVirtualSync); + offVirtualSyncGivenUp = ASMAtomicReadU64((uint64_t volatile *)&pVM->tm.s.offVirtualSyncGivenUp); + } while (ASMAtomicReadU64(&pVM->tm.s.offVirtualSync) != offVirtualSync); + + Assert(offVirtualSync >= offVirtualSyncGivenUp); + uint64_t const offLag = offVirtualSync - offVirtualSyncGivenUp; + + /* + * Get current time and adjust for virtual sync lag and do time displacement. + */ + RTTimeNow(pTime); + RTTimeSpecSubNano(pTime, offLag); + RTTimeSpecAddNano(pTime, pVM->tm.s.offUTC); + + /* + * Log details if the time changed radically (also triggers on first call). + */ + int64_t nsPrev = ASMAtomicXchgS64(&pVM->tm.s.nsLastUtcNow, RTTimeSpecGetNano(pTime)); + int64_t cNsDelta = RTTimeSpecGetNano(pTime) - nsPrev; + if ((uint64_t)RT_ABS(cNsDelta) > RT_NS_1HOUR / 2) + { + RTTIMESPEC NowAgain; + RTTimeNow(&NowAgain); + LogRel(("TMR3UtcNow: nsNow=%'RI64 nsPrev=%'RI64 -> cNsDelta=%'RI64 (offLag=%'RI64 offVirtualSync=%'RU64 offVirtualSyncGivenUp=%'RU64, NowAgain=%'RI64)\n", + RTTimeSpecGetNano(pTime), nsPrev, cNsDelta, offLag, offVirtualSync, offVirtualSyncGivenUp, RTTimeSpecGetNano(&NowAgain))); + if (pVM->tm.s.pszUtcTouchFileOnJump && nsPrev != 0) + { + RTFILE hFile; + int rc = RTFileOpen(&hFile, pVM->tm.s.pszUtcTouchFileOnJump, + RTFILE_O_WRITE | RTFILE_O_APPEND | RTFILE_O_OPEN_CREATE | RTFILE_O_DENY_NONE); + if (RT_SUCCESS(rc)) + { + char szMsg[256]; + size_t cch; + cch = RTStrPrintf(szMsg, sizeof(szMsg), + "TMR3UtcNow: nsNow=%'RI64 nsPrev=%'RI64 -> cNsDelta=%'RI64 (offLag=%'RI64 offVirtualSync=%'RU64 offVirtualSyncGivenUp=%'RU64, NowAgain=%'RI64)\n", + RTTimeSpecGetNano(pTime), nsPrev, cNsDelta, offLag, offVirtualSync, offVirtualSyncGivenUp, RTTimeSpecGetNano(&NowAgain)); + RTFileWrite(hFile, szMsg, cch, NULL); + RTFileClose(hFile); + } + } + } + + return pTime; +} + + +/** + * Pauses all clocks except TMCLOCK_REAL. + * + * @returns VBox status code, all errors are asserted. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @thread EMT corresponding to Pointer to the VMCPU. + */ +VMMR3DECL(int) TMR3NotifySuspend(PVM pVM, PVMCPU pVCpu) +{ + VMCPU_ASSERT_EMT(pVCpu); + + /* + * The shared virtual clock (includes virtual sync which is tied to it). + */ + TM_LOCK_TIMERS(pVM); /* Paranoia: Exploiting the timer lock here. */ + int rc = tmVirtualPauseLocked(pVM); + TM_UNLOCK_TIMERS(pVM); + if (RT_FAILURE(rc)) + return rc; + + /* + * Pause the TSC last since it is normally linked to the virtual + * sync clock, so the above code may actually stop both clocks. + */ + if (!pVM->tm.s.fTSCTiedToExecution) + { + TM_LOCK_TIMERS(pVM); /* Exploit the timer lock for synchronization. */ + rc = tmCpuTickPauseLocked(pVM, pVCpu); + TM_UNLOCK_TIMERS(pVM); + if (RT_FAILURE(rc)) + return rc; + } + +#ifndef VBOX_WITHOUT_NS_ACCOUNTING + /* + * Update cNsTotal. + */ + uint32_t uGen = ASMAtomicIncU32(&pVCpu->tm.s.uTimesGen); Assert(uGen & 1); + pVCpu->tm.s.cNsTotal = RTTimeNanoTS() - pVCpu->tm.s.u64NsTsStartTotal; + pVCpu->tm.s.cNsOther = pVCpu->tm.s.cNsTotal - pVCpu->tm.s.cNsExecuting - pVCpu->tm.s.cNsHalted; + ASMAtomicWriteU32(&pVCpu->tm.s.uTimesGen, (uGen | 1) + 1); +#endif + + return VINF_SUCCESS; +} + + +/** + * Resumes all clocks except TMCLOCK_REAL. + * + * @returns VBox status code, all errors are asserted. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @thread EMT corresponding to Pointer to the VMCPU. + */ +VMMR3DECL(int) TMR3NotifyResume(PVM pVM, PVMCPU pVCpu) +{ + VMCPU_ASSERT_EMT(pVCpu); + int rc; + +#ifndef VBOX_WITHOUT_NS_ACCOUNTING + /* + * Set u64NsTsStartTotal. There is no need to back this out if either of + * the two calls below fail. + */ + pVCpu->tm.s.u64NsTsStartTotal = RTTimeNanoTS() - pVCpu->tm.s.cNsTotal; +#endif + + /* + * Resume the TSC first since it is normally linked to the virtual sync + * clock, so it may actually not be resumed until we've executed the code + * below. + */ + if (!pVM->tm.s.fTSCTiedToExecution) + { + TM_LOCK_TIMERS(pVM); /* Exploit the timer lock for synchronization. */ + rc = tmCpuTickResumeLocked(pVM, pVCpu); + TM_UNLOCK_TIMERS(pVM); + if (RT_FAILURE(rc)) + return rc; + } + + /* + * The shared virtual clock (includes virtual sync which is tied to it). + */ + TM_LOCK_TIMERS(pVM); /* Paranoia: Exploiting the timer lock here. */ + rc = tmVirtualResumeLocked(pVM); + TM_UNLOCK_TIMERS(pVM); + + return rc; +} + + +/** + * Sets the warp drive percent of the virtual time. + * + * @returns VBox status code. + * @param pUVM The user mode VM structure. + * @param u32Percent The new percentage. 100 means normal operation. + */ +VMMDECL(int) TMR3SetWarpDrive(PUVM pUVM, uint32_t u32Percent) +{ + return VMR3ReqPriorityCallWaitU(pUVM, VMCPUID_ANY, (PFNRT)tmR3SetWarpDrive, 2, pUVM, u32Percent); +} + + +/** + * EMT worker for TMR3SetWarpDrive. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param u32Percent See TMR3SetWarpDrive(). + * @internal + */ +static DECLCALLBACK(int) tmR3SetWarpDrive(PUVM pUVM, uint32_t u32Percent) +{ + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + PVMCPU pVCpu = VMMGetCpu(pVM); + + /* + * Validate it. + */ + AssertMsgReturn(u32Percent >= 2 && u32Percent <= 20000, + ("%RX32 is not between 2 and 20000 (inclusive).\n", u32Percent), + VERR_INVALID_PARAMETER); + +/** @todo This isn't a feature specific to virtual time, move the variables to + * TM level and make it affect TMR3UTCNow as well! */ + + /* + * If the time is running we'll have to pause it before we can change + * the warp drive settings. + */ + TM_LOCK_TIMERS(pVM); /* Paranoia: Exploiting the timer lock here. */ + bool fPaused = !!pVM->tm.s.cVirtualTicking; + if (fPaused) /** @todo this isn't really working, but wtf. */ + TMR3NotifySuspend(pVM, pVCpu); + + /** @todo Should switch TM mode to virt-tsc-emulated if it isn't already! */ + pVM->tm.s.u32VirtualWarpDrivePercentage = u32Percent; + pVM->tm.s.fVirtualWarpDrive = u32Percent != 100; + LogRel(("TM: u32VirtualWarpDrivePercentage=%RI32 fVirtualWarpDrive=%RTbool\n", + pVM->tm.s.u32VirtualWarpDrivePercentage, pVM->tm.s.fVirtualWarpDrive)); + + if (fPaused) + TMR3NotifyResume(pVM, pVCpu); + TM_UNLOCK_TIMERS(pVM); + return VINF_SUCCESS; +} + + +/** + * Gets the current TMCLOCK_VIRTUAL time without checking + * timers or anything. + * + * @returns The timestamp. + * @param pUVM The user mode VM structure. + * + * @remarks See TMVirtualGetNoCheck. + */ +VMMR3DECL(uint64_t) TMR3TimeVirtGet(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, UINT64_MAX); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, UINT64_MAX); + return TMVirtualGetNoCheck(pVM); +} + + +/** + * Gets the current TMCLOCK_VIRTUAL time in milliseconds without checking + * timers or anything. + * + * @returns The timestamp in milliseconds. + * @param pUVM The user mode VM structure. + * + * @remarks See TMVirtualGetNoCheck. + */ +VMMR3DECL(uint64_t) TMR3TimeVirtGetMilli(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, UINT64_MAX); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, UINT64_MAX); + return TMVirtualToMilli(pVM, TMVirtualGetNoCheck(pVM)); +} + + +/** + * Gets the current TMCLOCK_VIRTUAL time in microseconds without checking + * timers or anything. + * + * @returns The timestamp in microseconds. + * @param pUVM The user mode VM structure. + * + * @remarks See TMVirtualGetNoCheck. + */ +VMMR3DECL(uint64_t) TMR3TimeVirtGetMicro(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, UINT64_MAX); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, UINT64_MAX); + return TMVirtualToMicro(pVM, TMVirtualGetNoCheck(pVM)); +} + + +/** + * Gets the current TMCLOCK_VIRTUAL time in nanoseconds without checking + * timers or anything. + * + * @returns The timestamp in nanoseconds. + * @param pUVM The user mode VM structure. + * + * @remarks See TMVirtualGetNoCheck. + */ +VMMR3DECL(uint64_t) TMR3TimeVirtGetNano(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, UINT64_MAX); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, UINT64_MAX); + return TMVirtualToNano(pVM, TMVirtualGetNoCheck(pVM)); +} + + +/** + * Gets the current warp drive percent. + * + * @returns The warp drive percent. + * @param pUVM The user mode VM structure. + */ +VMMR3DECL(uint32_t) TMR3GetWarpDrive(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, UINT32_MAX); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, UINT32_MAX); + return pVM->tm.s.u32VirtualWarpDrivePercentage; +} + + +/** + * Gets the performance information for one virtual CPU as seen by the VMM. + * + * The returned times covers the period where the VM is running and will be + * reset when restoring a previous VM state (at least for the time being). + * + * @retval VINF_SUCCESS on success. + * @retval VERR_NOT_IMPLEMENTED if not compiled in. + * @retval VERR_INVALID_STATE if the VM handle is bad. + * @retval VERR_INVALID_PARAMETER if idCpu is out of range. + * + * @param pVM The cross context VM structure. + * @param idCpu The ID of the virtual CPU which times to get. + * @param pcNsTotal Where to store the total run time (nano seconds) of + * the CPU, i.e. the sum of the three other returns. + * Optional. + * @param pcNsExecuting Where to store the time (nano seconds) spent + * executing guest code. Optional. + * @param pcNsHalted Where to store the time (nano seconds) spent + * halted. Optional + * @param pcNsOther Where to store the time (nano seconds) spent + * preempted by the host scheduler, on virtualization + * overhead and on other tasks. + */ +VMMR3DECL(int) TMR3GetCpuLoadTimes(PVM pVM, VMCPUID idCpu, uint64_t *pcNsTotal, uint64_t *pcNsExecuting, + uint64_t *pcNsHalted, uint64_t *pcNsOther) +{ + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_STATE); + AssertReturn(idCpu < pVM->cCpus, VERR_INVALID_PARAMETER); + +#ifndef VBOX_WITHOUT_NS_ACCOUNTING + /* + * Get a stable result set. + * This should be way quicker than an EMT request. + */ + PVMCPU pVCpu = &pVM->aCpus[idCpu]; + uint32_t uTimesGen = ASMAtomicReadU32(&pVCpu->tm.s.uTimesGen); + uint64_t cNsTotal = pVCpu->tm.s.cNsTotal; + uint64_t cNsExecuting = pVCpu->tm.s.cNsExecuting; + uint64_t cNsHalted = pVCpu->tm.s.cNsHalted; + uint64_t cNsOther = pVCpu->tm.s.cNsOther; + while ( (uTimesGen & 1) /* update in progress */ + || uTimesGen != ASMAtomicReadU32(&pVCpu->tm.s.uTimesGen)) + { + RTThreadYield(); + uTimesGen = ASMAtomicReadU32(&pVCpu->tm.s.uTimesGen); + cNsTotal = pVCpu->tm.s.cNsTotal; + cNsExecuting = pVCpu->tm.s.cNsExecuting; + cNsHalted = pVCpu->tm.s.cNsHalted; + cNsOther = pVCpu->tm.s.cNsOther; + } + + /* + * Fill in the return values. + */ + if (pcNsTotal) + *pcNsTotal = cNsTotal; + if (pcNsExecuting) + *pcNsExecuting = cNsExecuting; + if (pcNsHalted) + *pcNsHalted = cNsHalted; + if (pcNsOther) + *pcNsOther = cNsOther; + + return VINF_SUCCESS; + +#else + return VERR_NOT_IMPLEMENTED; +#endif +} + +#ifndef VBOX_WITHOUT_NS_ACCOUNTING + +/** + * Helper for tmR3CpuLoadTimer. + * @returns + * @param pState The state to update. + * @param cNsTotal Total time. + * @param cNsExecuting Time executing. + * @param cNsHalted Time halted. + */ +DECLINLINE(void) tmR3CpuLoadTimerMakeUpdate(PTMCPULOADSTATE pState, uint64_t cNsTotal, uint64_t cNsExecuting, uint64_t cNsHalted) +{ + /* Calc deltas */ + uint64_t cNsTotalDelta = cNsTotal - pState->cNsPrevTotal; + pState->cNsPrevTotal = cNsTotal; + + uint64_t cNsExecutingDelta = cNsExecuting - pState->cNsPrevExecuting; + pState->cNsPrevExecuting = cNsExecuting; + + uint64_t cNsHaltedDelta = cNsHalted - pState->cNsPrevHalted; + pState->cNsPrevHalted = cNsHalted; + + /* Calc pcts. */ + if (!cNsTotalDelta) + { + pState->cPctExecuting = 0; + pState->cPctHalted = 100; + pState->cPctOther = 0; + } + else if (cNsTotalDelta < UINT64_MAX / 4) + { + pState->cPctExecuting = (uint8_t)(cNsExecutingDelta * 100 / cNsTotalDelta); + pState->cPctHalted = (uint8_t)(cNsHaltedDelta * 100 / cNsTotalDelta); + pState->cPctOther = (uint8_t)((cNsTotalDelta - cNsExecutingDelta - cNsHaltedDelta) * 100 / cNsTotalDelta); + } + else + { + pState->cPctExecuting = 0; + pState->cPctHalted = 100; + pState->cPctOther = 0; + } +} + + +/** + * Timer callback that calculates the CPU load since the last time it was + * called. + * + * @param pVM The cross context VM structure. + * @param pTimer The timer. + * @param pvUser NULL, unused. + */ +static DECLCALLBACK(void) tmR3CpuLoadTimer(PVM pVM, PTMTIMER pTimer, void *pvUser) +{ + /* + * Re-arm the timer first. + */ + int rc = TMTimerSetMillies(pTimer, 1000); + AssertLogRelRC(rc); + NOREF(pvUser); + + /* + * Update the values for each CPU. + */ + uint64_t cNsTotalAll = 0; + uint64_t cNsExecutingAll = 0; + uint64_t cNsHaltedAll = 0; + for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[iCpu]; + + /* Try get a stable data set. */ + uint32_t cTries = 3; + uint32_t uTimesGen = ASMAtomicReadU32(&pVCpu->tm.s.uTimesGen); + uint64_t cNsTotal = pVCpu->tm.s.cNsTotal; + uint64_t cNsExecuting = pVCpu->tm.s.cNsExecuting; + uint64_t cNsHalted = pVCpu->tm.s.cNsHalted; + while (RT_UNLIKELY( (uTimesGen & 1) /* update in progress */ + || uTimesGen != ASMAtomicReadU32(&pVCpu->tm.s.uTimesGen))) + { + if (!--cTries) + break; + ASMNopPause(); + uTimesGen = ASMAtomicReadU32(&pVCpu->tm.s.uTimesGen); + cNsTotal = pVCpu->tm.s.cNsTotal; + cNsExecuting = pVCpu->tm.s.cNsExecuting; + cNsHalted = pVCpu->tm.s.cNsHalted; + } + + /* Totals */ + cNsTotalAll += cNsTotal; + cNsExecutingAll += cNsExecuting; + cNsHaltedAll += cNsHalted; + + /* Calc the PCTs and update the state. */ + tmR3CpuLoadTimerMakeUpdate(&pVCpu->tm.s.CpuLoad, cNsTotal, cNsExecuting, cNsHalted); + } + + /* + * Update the value for all the CPUs. + */ + tmR3CpuLoadTimerMakeUpdate(&pVM->tm.s.CpuLoad, cNsTotalAll, cNsExecutingAll, cNsHaltedAll); + + /** @todo Try add 1, 5 and 15 min load stats. */ + +} + +#endif /* !VBOX_WITHOUT_NS_ACCOUNTING */ + + +/** + * @callback_method_impl{PFNVMMEMTRENDEZVOUS, + * Worker for TMR3CpuTickParavirtEnable} + */ +static DECLCALLBACK(VBOXSTRICTRC) tmR3CpuTickParavirtEnable(PVM pVM, PVMCPU pVCpuEmt, void *pvData) +{ + AssertPtr(pVM); Assert(pVM->tm.s.fTSCModeSwitchAllowed); NOREF(pVCpuEmt); NOREF(pvData); + Assert(pVM->tm.s.enmTSCMode != TMTSCMODE_REAL_TSC_OFFSET); + Assert(pVM->tm.s.enmTSCMode != TMTSCMODE_NATIVE_API); /** @todo figure out NEM/win and paravirt */ + Assert(tmR3HasFixedTSC(pVM)); + + /* + * The return value of TMCpuTickGet() and the guest's TSC value for each + * CPU must remain constant across the TM TSC mode-switch. Thus we have + * the following equation (new/old signifies the new/old tsc modes): + * uNewTsc = uOldTsc + * + * Where (see tmCpuTickGetInternal): + * uOldTsc = uRawOldTsc - offTscRawSrcOld + * uNewTsc = uRawNewTsc - offTscRawSrcNew + * + * Solve it for offTscRawSrcNew without replacing uOldTsc: + * uRawNewTsc - offTscRawSrcNew = uOldTsc + * => -offTscRawSrcNew = uOldTsc - uRawNewTsc + * => offTscRawSrcNew = uRawNewTsc - uOldTsc + */ + uint64_t uRawOldTsc = tmR3CpuTickGetRawVirtualNoCheck(pVM); + uint64_t uRawNewTsc = SUPReadTsc(); + uint32_t cCpus = pVM->cCpus; + for (uint32_t i = 0; i < cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + uint64_t uOldTsc = uRawOldTsc - pVCpu->tm.s.offTSCRawSrc; + pVCpu->tm.s.offTSCRawSrc = uRawNewTsc - uOldTsc; + Assert(uRawNewTsc - pVCpu->tm.s.offTSCRawSrc >= uOldTsc); /* paranoia^256 */ + } + + LogRel(("TM: Switching TSC mode from '%s' to '%s'\n", tmR3GetTSCModeNameEx(pVM->tm.s.enmTSCMode), + tmR3GetTSCModeNameEx(TMTSCMODE_REAL_TSC_OFFSET))); + pVM->tm.s.enmTSCMode = TMTSCMODE_REAL_TSC_OFFSET; + return VINF_SUCCESS; +} + + +/** + * Notify TM that the guest has enabled usage of a paravirtualized TSC. + * + * This may perform a EMT rendezvous and change the TSC virtualization mode. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) TMR3CpuTickParavirtEnable(PVM pVM) +{ + int rc = VINF_SUCCESS; + if (pVM->tm.s.fTSCModeSwitchAllowed) + { + if (pVM->tm.s.enmTSCMode != TMTSCMODE_REAL_TSC_OFFSET) + rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, tmR3CpuTickParavirtEnable, NULL); + } + else + LogRel(("TM: Host/VM is not suitable for using TSC mode '%s', request to change TSC mode ignored\n", + tmR3GetTSCModeNameEx(TMTSCMODE_REAL_TSC_OFFSET))); + pVM->tm.s.fParavirtTscEnabled = true; + return rc; +} + + +/** + * @callback_method_impl{PFNVMMEMTRENDEZVOUS, + * Worker for TMR3CpuTickParavirtDisable} + */ +static DECLCALLBACK(VBOXSTRICTRC) tmR3CpuTickParavirtDisable(PVM pVM, PVMCPU pVCpuEmt, void *pvData) +{ + AssertPtr(pVM); Assert(pVM->tm.s.fTSCModeSwitchAllowed); NOREF(pVCpuEmt); + Assert( pVM->tm.s.enmTSCMode == TMTSCMODE_REAL_TSC_OFFSET + && pVM->tm.s.enmTSCMode != pVM->tm.s.enmOriginalTSCMode); + RT_NOREF1(pvData); + + /* + * See tmR3CpuTickParavirtEnable for an explanation of the conversion math. + */ + uint64_t uRawOldTsc = SUPReadTsc(); + uint64_t uRawNewTsc = tmR3CpuTickGetRawVirtualNoCheck(pVM); + uint32_t cCpus = pVM->cCpus; + for (uint32_t i = 0; i < cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + uint64_t uOldTsc = uRawOldTsc - pVCpu->tm.s.offTSCRawSrc; + pVCpu->tm.s.offTSCRawSrc = uRawNewTsc - uOldTsc; + Assert(uRawNewTsc - pVCpu->tm.s.offTSCRawSrc >= uOldTsc); /* paranoia^256 */ + + /* Update the last-seen tick here as we havent't been updating it (as we don't + need it) while in pure TSC-offsetting mode. */ + pVCpu->tm.s.u64TSCLastSeen = uOldTsc; + } + + LogRel(("TM: Switching TSC mode from '%s' to '%s'\n", tmR3GetTSCModeNameEx(pVM->tm.s.enmTSCMode), + tmR3GetTSCModeNameEx(pVM->tm.s.enmOriginalTSCMode))); + pVM->tm.s.enmTSCMode = pVM->tm.s.enmOriginalTSCMode; + return VINF_SUCCESS; +} + + +/** + * Notify TM that the guest has disabled usage of a paravirtualized TSC. + * + * If TMR3CpuTickParavirtEnable() changed the TSC virtualization mode, this will + * perform an EMT rendezvous to revert those changes. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) TMR3CpuTickParavirtDisable(PVM pVM) +{ + int rc = VINF_SUCCESS; + if ( pVM->tm.s.fTSCModeSwitchAllowed + && pVM->tm.s.enmTSCMode == TMTSCMODE_REAL_TSC_OFFSET + && pVM->tm.s.enmTSCMode != pVM->tm.s.enmOriginalTSCMode) + rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, tmR3CpuTickParavirtDisable, NULL); + pVM->tm.s.fParavirtTscEnabled = false; + return rc; +} + + +/** + * Check whether the guest can be presented a fixed rate & monotonic TSC. + * + * @returns true if TSC is stable, false otherwise. + * @param pVM The cross context VM structure. + * @param fWithParavirtEnabled Whether it's fixed & monotonic when + * paravirt. TSC is enabled or not. + * + * @remarks Must be called only after TMR3InitFinalize(). + */ +VMMR3_INT_DECL(bool) TMR3CpuTickIsFixedRateMonotonic(PVM pVM, bool fWithParavirtEnabled) +{ + /** @todo figure out what exactly we want here later. */ + NOREF(fWithParavirtEnabled); + return ( tmR3HasFixedTSC(pVM) /* Host has fixed-rate TSC. */ + && g_pSUPGlobalInfoPage->u32Mode != SUPGIPMODE_ASYNC_TSC); /* GIP thinks it's monotonic. */ +} + + +/** + * Gets the 5 char clock name for the info tables. + * + * @returns The name. + * @param enmClock The clock. + */ +DECLINLINE(const char *) tmR3Get5CharClockName(TMCLOCK enmClock) +{ + switch (enmClock) + { + case TMCLOCK_REAL: return "Real "; + case TMCLOCK_VIRTUAL: return "Virt "; + case TMCLOCK_VIRTUAL_SYNC: return "VrSy "; + case TMCLOCK_TSC: return "TSC "; + default: return "Bad "; + } +} + + +/** + * Display all timers. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helpers. + * @param pszArgs Arguments, ignored. + */ +static DECLCALLBACK(void) tmR3TimerInfo(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + NOREF(pszArgs); + pHlp->pfnPrintf(pHlp, + "Timers (pVM=%p)\n" + "%.*s %.*s %.*s %.*s Clock %18s %18s %6s %-25s Description\n", + pVM, + sizeof(RTR3PTR) * 2, "pTimerR3 ", + sizeof(int32_t) * 2, "offNext ", + sizeof(int32_t) * 2, "offPrev ", + sizeof(int32_t) * 2, "offSched ", + "Time", + "Expire", + "HzHint", + "State"); + TM_LOCK_TIMERS(pVM); + for (PTMTIMERR3 pTimer = pVM->tm.s.pCreated; pTimer; pTimer = pTimer->pBigNext) + { + pHlp->pfnPrintf(pHlp, + "%p %08RX32 %08RX32 %08RX32 %s %18RU64 %18RU64 %6RU32 %-25s %s\n", + pTimer, + pTimer->offNext, + pTimer->offPrev, + pTimer->offScheduleNext, + tmR3Get5CharClockName(pTimer->enmClock), + TMTimerGet(pTimer), + pTimer->u64Expire, + pTimer->uHzHint, + tmTimerState(pTimer->enmState), + pTimer->pszDesc); + } + TM_UNLOCK_TIMERS(pVM); +} + + +/** + * Display all active timers. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helpers. + * @param pszArgs Arguments, ignored. + */ +static DECLCALLBACK(void) tmR3TimerInfoActive(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + NOREF(pszArgs); + pHlp->pfnPrintf(pHlp, + "Active Timers (pVM=%p)\n" + "%.*s %.*s %.*s %.*s Clock %18s %18s %6s %-25s Description\n", + pVM, + sizeof(RTR3PTR) * 2, "pTimerR3 ", + sizeof(int32_t) * 2, "offNext ", + sizeof(int32_t) * 2, "offPrev ", + sizeof(int32_t) * 2, "offSched ", + "Time", + "Expire", + "HzHint", + "State"); + for (unsigned iQueue = 0; iQueue < TMCLOCK_MAX; iQueue++) + { + TM_LOCK_TIMERS(pVM); + for (PTMTIMERR3 pTimer = TMTIMER_GET_HEAD(&pVM->tm.s.paTimerQueuesR3[iQueue]); + pTimer; + pTimer = TMTIMER_GET_NEXT(pTimer)) + { + pHlp->pfnPrintf(pHlp, + "%p %08RX32 %08RX32 %08RX32 %s %18RU64 %18RU64 %6RU32 %-25s %s\n", + pTimer, + pTimer->offNext, + pTimer->offPrev, + pTimer->offScheduleNext, + tmR3Get5CharClockName(pTimer->enmClock), + TMTimerGet(pTimer), + pTimer->u64Expire, + pTimer->uHzHint, + tmTimerState(pTimer->enmState), + pTimer->pszDesc); + } + TM_UNLOCK_TIMERS(pVM); + } +} + + +/** + * Display all clocks. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helpers. + * @param pszArgs Arguments, ignored. + */ +static DECLCALLBACK(void) tmR3InfoClocks(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + NOREF(pszArgs); + + /* + * Read the times first to avoid more than necessary time variation. + */ + const uint64_t u64Virtual = TMVirtualGet(pVM); + const uint64_t u64VirtualSync = TMVirtualSyncGet(pVM); + const uint64_t u64Real = TMRealGet(pVM); + + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + uint64_t u64TSC = TMCpuTickGet(pVCpu); + + /* + * TSC + */ + pHlp->pfnPrintf(pHlp, + "Cpu Tick: %18RU64 (%#016RX64) %RU64Hz %s - virtualized", + u64TSC, u64TSC, TMCpuTicksPerSecond(pVM), + pVCpu->tm.s.fTSCTicking ? "ticking" : "paused"); + if (pVM->tm.s.enmTSCMode == TMTSCMODE_REAL_TSC_OFFSET) + { + pHlp->pfnPrintf(pHlp, " - real tsc offset"); + if (pVCpu->tm.s.offTSCRawSrc) + pHlp->pfnPrintf(pHlp, "\n offset %RU64", pVCpu->tm.s.offTSCRawSrc); + } + else if (pVM->tm.s.enmTSCMode == TMTSCMODE_NATIVE_API) + pHlp->pfnPrintf(pHlp, " - native api"); + else + pHlp->pfnPrintf(pHlp, " - virtual clock"); + pHlp->pfnPrintf(pHlp, "\n"); + } + + /* + * virtual + */ + pHlp->pfnPrintf(pHlp, + " Virtual: %18RU64 (%#016RX64) %RU64Hz %s", + u64Virtual, u64Virtual, TMVirtualGetFreq(pVM), + pVM->tm.s.cVirtualTicking ? "ticking" : "paused"); + if (pVM->tm.s.fVirtualWarpDrive) + pHlp->pfnPrintf(pHlp, " WarpDrive %RU32 %%", pVM->tm.s.u32VirtualWarpDrivePercentage); + pHlp->pfnPrintf(pHlp, "\n"); + + /* + * virtual sync + */ + pHlp->pfnPrintf(pHlp, + "VirtSync: %18RU64 (%#016RX64) %s%s", + u64VirtualSync, u64VirtualSync, + pVM->tm.s.fVirtualSyncTicking ? "ticking" : "paused", + pVM->tm.s.fVirtualSyncCatchUp ? " - catchup" : ""); + if (pVM->tm.s.offVirtualSync) + { + pHlp->pfnPrintf(pHlp, "\n offset %RU64", pVM->tm.s.offVirtualSync); + if (pVM->tm.s.u32VirtualSyncCatchUpPercentage) + pHlp->pfnPrintf(pHlp, " catch-up rate %u %%", pVM->tm.s.u32VirtualSyncCatchUpPercentage); + } + pHlp->pfnPrintf(pHlp, "\n"); + + /* + * real + */ + pHlp->pfnPrintf(pHlp, + " Real: %18RU64 (%#016RX64) %RU64Hz\n", + u64Real, u64Real, TMRealGetFreq(pVM)); +} + + +/** + * Gets the descriptive TM TSC mode name given the enum value. + * + * @returns The name. + * @param enmMode The mode to name. + */ +static const char *tmR3GetTSCModeNameEx(TMTSCMODE enmMode) +{ + switch (enmMode) + { + case TMTSCMODE_REAL_TSC_OFFSET: return "RealTscOffset"; + case TMTSCMODE_VIRT_TSC_EMULATED: return "VirtTscEmulated"; + case TMTSCMODE_DYNAMIC: return "Dynamic"; + case TMTSCMODE_NATIVE_API: return "NativeApi"; + default: return "???"; + } +} + + +/** + * Gets the descriptive TM TSC mode name. + * + * @returns The name. + * @param pVM The cross context VM structure. + */ +static const char *tmR3GetTSCModeName(PVM pVM) +{ + Assert(pVM); + return tmR3GetTSCModeNameEx(pVM->tm.s.enmTSCMode); +} + diff --git a/src/VBox/VMM/VMMR3/TRPM.cpp b/src/VBox/VMM/VMMR3/TRPM.cpp new file mode 100644 index 00000000..b49c0a8e --- /dev/null +++ b/src/VBox/VMM/VMMR3/TRPM.cpp @@ -0,0 +1,1664 @@ +/* $Id: TRPM.cpp $ */ +/** @file + * TRPM - The Trap Monitor. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +/** @page pg_trpm TRPM - The Trap Monitor + * + * The Trap Monitor (TRPM) is responsible for all trap and interrupt handling in + * the VMM. It plays a major role in raw-mode execution and a lesser one in the + * hardware assisted mode. + * + * Note first, the following will use trap as a collective term for faults, + * aborts and traps. + * + * @see grp_trpm + * + * + * @section sec_trpm_rc Raw-Mode Context + * + * When executing in the raw-mode context, TRPM will be managing the IDT and + * processing all traps and interrupts. It will also monitor the guest IDT + * because CSAM wishes to know about changes to it (trap/interrupt/syscall + * handler patching) and TRPM needs to keep the \#BP gate in sync (ring-3 + * considerations). See TRPMR3SyncIDT and CSAMR3CheckGates. + * + * External interrupts will be forwarded to the host context by the quickest + * possible route where they will be reasserted. The other events will be + * categorized into virtualization traps, genuine guest traps and hypervisor + * traps. The latter group may be recoverable depending on when they happen and + * whether there is a handler for it, otherwise it will cause a guru meditation. + * + * TRPM distinguishes the between the first two (virt and guest traps) and the + * latter (hyper) by checking the CPL of the trapping code, if CPL == 0 then + * it's a hyper trap otherwise it's a virt/guest trap. There are three trap + * dispatcher tables, one ad-hoc for one time traps registered via + * TRPMGCSetTempHandler(), one for hyper traps and one for virt/guest traps. + * The latter two live in TRPMGCHandlersA.asm, the former in the VM structure. + * + * The raw-mode context trap handlers found in TRPMGCHandlers.cpp (for the most + * part), will call up the other VMM sub-systems depending on what it things + * happens. The two most busy traps are page faults (\#PF) and general + * protection fault/trap (\#GP). + * + * Before resuming guest code after having taken a virtualization trap or + * injected a guest trap, TRPM will check for pending forced action and + * every now and again let TM check for timed out timers. This allows code that + * is being executed as part of virtualization traps to signal ring-3 exits, + * page table resyncs and similar without necessarily using the status code. It + * also make sure we're more responsive to timers and requests from other + * threads (necessarily running on some different core/cpu in most cases). + * + * + * @section sec_trpm_all All Contexts + * + * TRPM will also dispatch / inject interrupts and traps to the guest, both when + * in raw-mode and when in hardware assisted mode. See TRPMInject(). + * + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_TRPM +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "TRPMInternal.h" +#include +#include +#ifdef VBOX_WITH_REM +# include +#endif +#include + +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * Trap handler function. + * @todo need to specialize this as we go along. + */ +typedef enum TRPMHANDLER +{ + /** Generic Interrupt handler. */ + TRPM_HANDLER_INT = 0, + /** Generic Trap handler. */ + TRPM_HANDLER_TRAP, + /** Trap 8 (\#DF) handler. */ + TRPM_HANDLER_TRAP_08, + /** Trap 12 (\#MC) handler. */ + TRPM_HANDLER_TRAP_12, + /** Max. */ + TRPM_HANDLER_MAX +} TRPMHANDLER, *PTRPMHANDLER; + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +/** Preinitialized IDT. + * The u16OffsetLow is a value of the TRPMHANDLER enum which TRPMR3Relocate() + * will use to pick the right address. The u16SegSel is always VMM CS. + */ +static VBOXIDTE_GENERIC g_aIdt[256] = +{ +/* special trap handler - still, this is an interrupt gate not a trap gate... */ +#define IDTE_TRAP(enm) { (unsigned)enm, 0, 0, VBOX_IDTE_TYPE1, VBOX_IDTE_TYPE2_INT_32, 0, 1, 0 } +/* generic trap handler. */ +#define IDTE_TRAP_GEN() IDTE_TRAP(TRPM_HANDLER_TRAP) +/* special interrupt handler. */ +#define IDTE_INT(enm) { (unsigned)enm, 0, 0, VBOX_IDTE_TYPE1, VBOX_IDTE_TYPE2_INT_32, 0, 1, 0 } +/* generic interrupt handler. */ +#define IDTE_INT_GEN() IDTE_INT(TRPM_HANDLER_INT) +/* special task gate IDT entry (for critical exceptions like #DF). */ +#define IDTE_TASK(enm) { (unsigned)enm, 0, 0, VBOX_IDTE_TYPE1, VBOX_IDTE_TYPE2_TASK, 0, 1, 0 } +/* draft, fixme later when the handler is written. */ +#define IDTE_RESERVED() { 0, 0, 0, 0, 0, 0, 0, 0 } + + /* N - M M - T - C - D i */ + /* o - n o - y - o - e p */ + /* - e n - p - d - s t */ + /* - i - e - e - c . */ + /* - c - - - r */ + /* ============================================================= */ + IDTE_TRAP_GEN(), /* 0 - #DE - F - N - Divide error */ + IDTE_TRAP_GEN(), /* 1 - #DB - F/T - N - Single step, INT 1 instruction */ +#ifdef VBOX_WITH_NMI + IDTE_TRAP_GEN(), /* 2 - - I - N - Non-Maskable Interrupt (NMI) */ +#else + IDTE_INT_GEN(), /* 2 - - I - N - Non-Maskable Interrupt (NMI) */ +#endif + IDTE_TRAP_GEN(), /* 3 - #BP - T - N - Breakpoint, INT 3 instruction. */ + IDTE_TRAP_GEN(), /* 4 - #OF - T - N - Overflow, INTO instruction. */ + IDTE_TRAP_GEN(), /* 5 - #BR - F - N - BOUND Range Exceeded, BOUND instruction. */ + IDTE_TRAP_GEN(), /* 6 - #UD - F - N - Undefined(/Invalid) Opcode. */ + IDTE_TRAP_GEN(), /* 7 - #NM - F - N - Device not available, FP or (F)WAIT instruction. */ + IDTE_TASK(TRPM_HANDLER_TRAP_08), /* 8 - #DF - A - 0 - Double fault. */ + IDTE_TRAP_GEN(), /* 9 - - F - N - Coprocessor Segment Overrun (obsolete). */ + IDTE_TRAP_GEN(), /* a - #TS - F - Y - Invalid TSS, Taskswitch or TSS access. */ + IDTE_TRAP_GEN(), /* b - #NP - F - Y - Segment not present. */ + IDTE_TRAP_GEN(), /* c - #SS - F - Y - Stack-Segment fault. */ + IDTE_TRAP_GEN(), /* d - #GP - F - Y - General protection fault. */ + IDTE_TRAP_GEN(), /* e - #PF - F - Y - Page fault. - interrupt gate!!! */ + IDTE_RESERVED(), /* f - - - - Intel Reserved. Do not use. */ + IDTE_TRAP_GEN(), /* 10 - #MF - F - N - x86 FPU Floating-Point Error (Math fault), FP or (F)WAIT instruction. */ + IDTE_TRAP_GEN(), /* 11 - #AC - F - 0 - Alignment Check. */ + IDTE_TRAP(TRPM_HANDLER_TRAP_12), /* 12 - #MC - A - N - Machine Check. */ + IDTE_TRAP_GEN(), /* 13 - #XF - F - N - SIMD Floating-Point Exception. */ + IDTE_RESERVED(), /* 14 - - - - Intel Reserved. Do not use. */ + IDTE_RESERVED(), /* 15 - - - - Intel Reserved. Do not use. */ + IDTE_RESERVED(), /* 16 - - - - Intel Reserved. Do not use. */ + IDTE_RESERVED(), /* 17 - - - - Intel Reserved. Do not use. */ + IDTE_RESERVED(), /* 18 - - - - Intel Reserved. Do not use. */ + IDTE_RESERVED(), /* 19 - - - - Intel Reserved. Do not use. */ + IDTE_RESERVED(), /* 1a - - - - Intel Reserved. Do not use. */ + IDTE_RESERVED(), /* 1b - - - - Intel Reserved. Do not use. */ + IDTE_RESERVED(), /* 1c - - - - Intel Reserved. Do not use. */ + IDTE_RESERVED(), /* 1d - - - - Intel Reserved. Do not use. */ + IDTE_RESERVED(), /* 1e - - - - Intel Reserved. Do not use. */ + IDTE_RESERVED(), /* 1f - - - - Intel Reserved. Do not use. */ + IDTE_INT_GEN(), /* 20 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 21 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 22 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 23 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 24 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 25 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 26 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 27 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 28 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 29 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 2a - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 2b - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 2c - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 2d - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 2e - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 2f - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 30 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 31 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 32 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 33 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 34 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 35 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 36 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 37 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 38 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 39 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 3a - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 3b - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 3c - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 3d - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 3e - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 3f - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 40 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 41 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 42 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 43 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 44 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 45 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 46 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 47 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 48 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 49 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 4a - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 4b - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 4c - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 4d - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 4e - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 4f - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 50 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 51 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 52 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 53 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 54 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 55 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 56 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 57 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 58 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 59 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 5a - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 5b - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 5c - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 5d - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 5e - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 5f - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 60 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 61 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 62 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 63 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 64 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 65 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 66 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 67 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 68 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 69 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 6a - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 6b - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 6c - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 6d - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 6e - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 6f - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 70 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 71 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 72 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 73 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 74 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 75 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 76 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 77 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 78 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 79 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 7a - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 7b - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 7c - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 7d - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 7e - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 7f - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 80 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 81 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 82 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 83 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 84 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 85 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 86 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 87 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 88 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 89 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 8a - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 8b - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 8c - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 8d - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 8e - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 8f - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 90 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 91 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 92 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 93 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 94 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 95 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 96 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 97 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 98 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 99 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 9a - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 9b - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 9c - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 9d - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 9e - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* 9f - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* a0 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* a1 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* a2 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* a3 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* a4 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* a5 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* a6 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* a7 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* a8 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* a9 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* aa - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* ab - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* ac - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* ad - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* ae - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* af - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* b0 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* b1 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* b2 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* b3 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* b4 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* b5 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* b6 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* b7 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* b8 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* b9 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* ba - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* bb - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* bc - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* bd - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* be - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* bf - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* c0 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* c1 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* c2 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* c3 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* c4 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* c5 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* c6 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* c7 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* c8 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* c9 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* ca - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* cb - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* cc - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* cd - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* ce - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* cf - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* d0 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* d1 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* d2 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* d3 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* d4 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* d5 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* d6 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* d7 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* d8 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* d9 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* da - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* db - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* dc - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* dd - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* de - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* df - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* e0 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* e1 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* e2 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* e3 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* e4 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* e5 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* e6 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* e7 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* e8 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* e9 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* ea - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* eb - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* ec - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* ed - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* ee - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* ef - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* f0 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* f1 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* f2 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* f3 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* f4 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* f5 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* f6 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* f7 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* f8 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* f9 - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* fa - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* fb - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* fc - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* fd - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* fe - - I - - User defined Interrupts, external of INT n. */ + IDTE_INT_GEN(), /* ff - - I - - User defined Interrupts, external of INT n. */ +#undef IDTE_TRAP +#undef IDTE_TRAP_GEN +#undef IDTE_INT +#undef IDTE_INT_GEN +#undef IDTE_TASK +#undef IDTE_UNUSED +#undef IDTE_RESERVED +}; + + +/** TRPM saved state version. */ +#define TRPM_SAVED_STATE_VERSION 9 +#define TRPM_SAVED_STATE_VERSION_UNI 8 /* SMP support bumped the version */ + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static DECLCALLBACK(int) trpmR3Save(PVM pVM, PSSMHANDLE pSSM); +static DECLCALLBACK(int) trpmR3Load(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass); +static DECLCALLBACK(void) trpmR3InfoEvent(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); + + +/** + * Initializes the Trap Manager + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(int) TRPMR3Init(PVM pVM) +{ + LogFlow(("TRPMR3Init\n")); + int rc; + + /* + * Assert sizes and alignments. + */ + AssertRelease(!(RT_UOFFSETOF(VM, trpm.s) & 31)); + AssertRelease(!(RT_UOFFSETOF(VM, trpm.s.aIdt) & 15)); + AssertRelease(sizeof(pVM->trpm.s) <= sizeof(pVM->trpm.padding)); + AssertRelease(RT_ELEMENTS(pVM->trpm.s.aGuestTrapHandler) == sizeof(pVM->trpm.s.au32IdtPatched)*8); + + /* + * Initialize members. + */ + pVM->trpm.s.offVM = RT_UOFFSETOF(VM, trpm); + pVM->trpm.s.offTRPMCPU = RT_UOFFSETOF(VM, aCpus[0].trpm) - RT_UOFFSETOF(VM, trpm); + + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + + pVCpu->trpm.s.offVM = RT_UOFFSETOF_DYN(VM, aCpus[i].trpm); + pVCpu->trpm.s.offVMCpu = RT_UOFFSETOF(VMCPU, trpm); + pVCpu->trpm.s.uActiveVector = ~0U; + } + + pVM->trpm.s.GuestIdtr.pIdt = RTRCPTR_MAX; + pVM->trpm.s.pvMonShwIdtRC = RTRCPTR_MAX; + pVM->trpm.s.fSafeToDropGuestIDTMonitoring = false; + + /* + * Read the configuration (if any). + */ + PCFGMNODE pTRPMNode = CFGMR3GetChild(CFGMR3GetRoot(pVM), "TRPM"); + if (pTRPMNode) + { + bool f; + rc = CFGMR3QueryBool(pTRPMNode, "SafeToDropGuestIDTMonitoring", &f); + if (RT_SUCCESS(rc)) + pVM->trpm.s.fSafeToDropGuestIDTMonitoring = f; + } + + /* write config summary to log */ + if (pVM->trpm.s.fSafeToDropGuestIDTMonitoring) + LogRel(("TRPM: Dropping Guest IDT Monitoring\n")); + + /* + * Initialize the IDT. + * The handler addresses will be set in the TRPMR3Relocate() function. + */ + Assert(sizeof(pVM->trpm.s.aIdt) == sizeof(g_aIdt)); + memcpy(&pVM->trpm.s.aIdt[0], &g_aIdt[0], sizeof(pVM->trpm.s.aIdt)); + + /* + * Register virtual access handlers. + */ + pVM->trpm.s.hShadowIdtWriteHandlerType = NIL_PGMVIRTHANDLERTYPE; + pVM->trpm.s.hGuestIdtWriteHandlerType = NIL_PGMVIRTHANDLERTYPE; +#ifdef VBOX_WITH_RAW_MODE + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { +# ifdef TRPM_TRACK_SHADOW_IDT_CHANGES + rc = PGMR3HandlerVirtualTypeRegister(pVM, PGMVIRTHANDLERKIND_HYPERVISOR, false /*fRelocUserRC*/, + NULL /*pfnInvalidateR3*/, NULL /*pfnHandlerR3*/, + NULL /*pszHandlerRC*/, "trpmRCShadowIDTWritePfHandler", + "Shadow IDT write access handler", &pVM->trpm.s.hShadowIdtWriteHandlerType); + AssertRCReturn(rc, rc); +# endif + rc = PGMR3HandlerVirtualTypeRegister(pVM, PGMVIRTHANDLERKIND_WRITE, false /*fRelocUserRC*/, + NULL /*pfnInvalidateR3*/, trpmGuestIDTWriteHandler, + "trpmGuestIDTWriteHandler", "trpmRCGuestIDTWritePfHandler", + "Guest IDT write access handler", &pVM->trpm.s.hGuestIdtWriteHandlerType); + AssertRCReturn(rc, rc); + } +#endif /* VBOX_WITH_RAW_MODE */ + + /* + * Register the saved state data unit. + */ + rc = SSMR3RegisterInternal(pVM, "trpm", 1, TRPM_SAVED_STATE_VERSION, sizeof(TRPM), + NULL, NULL, NULL, + NULL, trpmR3Save, NULL, + NULL, trpmR3Load, NULL); + if (RT_FAILURE(rc)) + return rc; + + /* + * Register info handlers. + */ + rc = DBGFR3InfoRegisterInternalEx(pVM, "trpmevent", "Dumps TRPM pending event.", trpmR3InfoEvent, + DBGFINFO_FLAGS_ALL_EMTS); + AssertRCReturn(rc, rc); + + /* + * Statistics. + */ +#ifdef VBOX_WITH_RAW_MODE + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + STAM_REG(pVM, &pVM->trpm.s.StatRCWriteGuestIDTFault, STAMTYPE_COUNTER, "/TRPM/RC/IDTWritesFault", STAMUNIT_OCCURENCES, "Guest IDT writes the we returned to R3 to handle."); + STAM_REG(pVM, &pVM->trpm.s.StatRCWriteGuestIDTHandled, STAMTYPE_COUNTER, "/TRPM/RC/IDTWritesHandled", STAMUNIT_OCCURENCES, "Guest IDT writes that we handled successfully."); + STAM_REG(pVM, &pVM->trpm.s.StatSyncIDT, STAMTYPE_PROFILE, "/PROF/TRPM/SyncIDT", STAMUNIT_TICKS_PER_CALL, "Profiling of TRPMR3SyncIDT()."); + + /* traps */ + STAM_REG(pVM, &pVM->trpm.s.aStatGCTraps[0x00], STAMTYPE_PROFILE_ADV, "/TRPM/GC/Traps/00", STAMUNIT_TICKS_PER_CALL, "#DE - Divide error."); + STAM_REG(pVM, &pVM->trpm.s.aStatGCTraps[0x01], STAMTYPE_PROFILE_ADV, "/TRPM/GC/Traps/01", STAMUNIT_TICKS_PER_CALL, "#DB - Debug (single step and more)."); + //STAM_REG(pVM, &pVM->trpm.s.aStatGCTraps[0x02], STAMTYPE_PROFILE_ADV, "/TRPM/GC/Traps/02", STAMUNIT_TICKS_PER_CALL, "NMI"); + STAM_REG(pVM, &pVM->trpm.s.aStatGCTraps[0x03], STAMTYPE_PROFILE_ADV, "/TRPM/GC/Traps/03", STAMUNIT_TICKS_PER_CALL, "#BP - Breakpoint."); + STAM_REG(pVM, &pVM->trpm.s.aStatGCTraps[0x04], STAMTYPE_PROFILE_ADV, "/TRPM/GC/Traps/04", STAMUNIT_TICKS_PER_CALL, "#OF - Overflow."); + STAM_REG(pVM, &pVM->trpm.s.aStatGCTraps[0x05], STAMTYPE_PROFILE_ADV, "/TRPM/GC/Traps/05", STAMUNIT_TICKS_PER_CALL, "#BR - Bound range exceeded."); + STAM_REG(pVM, &pVM->trpm.s.aStatGCTraps[0x06], STAMTYPE_PROFILE_ADV, "/TRPM/GC/Traps/06", STAMUNIT_TICKS_PER_CALL, "#UD - Undefined opcode."); + STAM_REG(pVM, &pVM->trpm.s.aStatGCTraps[0x07], STAMTYPE_PROFILE_ADV, "/TRPM/GC/Traps/07", STAMUNIT_TICKS_PER_CALL, "#NM - Device not available (FPU)."); + //STAM_REG(pVM, &pVM->trpm.s.aStatGCTraps[0x08], STAMTYPE_PROFILE_ADV, "/TRPM/GC/Traps/08", STAMUNIT_TICKS_PER_CALL, "#DF - Double fault."); + STAM_REG(pVM, &pVM->trpm.s.aStatGCTraps[0x09], STAMTYPE_PROFILE_ADV, "/TRPM/GC/Traps/09", STAMUNIT_TICKS_PER_CALL, "#?? - Coprocessor segment overrun (obsolete)."); + STAM_REG(pVM, &pVM->trpm.s.aStatGCTraps[0x0a], STAMTYPE_PROFILE_ADV, "/TRPM/GC/Traps/0a", STAMUNIT_TICKS_PER_CALL, "#TS - Task switch fault."); + STAM_REG(pVM, &pVM->trpm.s.aStatGCTraps[0x0b], STAMTYPE_PROFILE_ADV, "/TRPM/GC/Traps/0b", STAMUNIT_TICKS_PER_CALL, "#NP - Segment not present."); + STAM_REG(pVM, &pVM->trpm.s.aStatGCTraps[0x0c], STAMTYPE_PROFILE_ADV, "/TRPM/GC/Traps/0c", STAMUNIT_TICKS_PER_CALL, "#SS - Stack segment fault."); + STAM_REG(pVM, &pVM->trpm.s.aStatGCTraps[0x0d], STAMTYPE_PROFILE_ADV, "/TRPM/GC/Traps/0d", STAMUNIT_TICKS_PER_CALL, "#GP - General protection fault."); + STAM_REG(pVM, &pVM->trpm.s.aStatGCTraps[0x0e], STAMTYPE_PROFILE_ADV, "/TRPM/GC/Traps/0e", STAMUNIT_TICKS_PER_CALL, "#PF - Page fault."); + //STAM_REG(pVM, &pVM->trpm.s.aStatGCTraps[0x0f], STAMTYPE_PROFILE_ADV, "/TRPM/GC/Traps/0f", STAMUNIT_TICKS_PER_CALL, "Reserved."); + STAM_REG(pVM, &pVM->trpm.s.aStatGCTraps[0x10], STAMTYPE_PROFILE_ADV, "/TRPM/GC/Traps/10", STAMUNIT_TICKS_PER_CALL, "#MF - Math fault.."); + STAM_REG(pVM, &pVM->trpm.s.aStatGCTraps[0x11], STAMTYPE_PROFILE_ADV, "/TRPM/GC/Traps/11", STAMUNIT_TICKS_PER_CALL, "#AC - Alignment check."); + STAM_REG(pVM, &pVM->trpm.s.aStatGCTraps[0x12], STAMTYPE_PROFILE_ADV, "/TRPM/GC/Traps/12", STAMUNIT_TICKS_PER_CALL, "#MC - Machine check."); + STAM_REG(pVM, &pVM->trpm.s.aStatGCTraps[0x13], STAMTYPE_PROFILE_ADV, "/TRPM/GC/Traps/13", STAMUNIT_TICKS_PER_CALL, "#XF - SIMD Floating-Point Exception."); + } +#endif + +# ifdef VBOX_WITH_STATISTICS + rc = MMHyperAlloc(pVM, sizeof(STAMCOUNTER) * 256, sizeof(STAMCOUNTER), MM_TAG_TRPM, (void **)&pVM->trpm.s.paStatForwardedIRQR3); + AssertRCReturn(rc, rc); + pVM->trpm.s.paStatForwardedIRQRC = MMHyperR3ToRC(pVM, pVM->trpm.s.paStatForwardedIRQR3); + for (unsigned i = 0; i < 256; i++) + STAMR3RegisterF(pVM, &pVM->trpm.s.paStatForwardedIRQR3[i], STAMTYPE_COUNTER, STAMVISIBILITY_USED, STAMUNIT_OCCURENCES, "Forwarded interrupts.", + i < 0x20 ? "/TRPM/ForwardRaw/TRAP/%02X" : "/TRPM/ForwardRaw/IRQ/%02X", i); + +# ifdef VBOX_WITH_RAW_MODE + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + rc = MMHyperAlloc(pVM, sizeof(STAMCOUNTER) * 256, sizeof(STAMCOUNTER), MM_TAG_TRPM, (void **)&pVM->trpm.s.paStatHostIrqR3); + AssertRCReturn(rc, rc); + pVM->trpm.s.paStatHostIrqRC = MMHyperR3ToRC(pVM, pVM->trpm.s.paStatHostIrqR3); + for (unsigned i = 0; i < 256; i++) + STAMR3RegisterF(pVM, &pVM->trpm.s.paStatHostIrqR3[i], STAMTYPE_COUNTER, STAMVISIBILITY_USED, STAMUNIT_OCCURENCES, + "Host interrupts.", "/TRPM/HostIRQs/%02x", i); + } +# endif +# endif + +#ifdef VBOX_WITH_RAW_MODE + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + STAM_REG(pVM, &pVM->trpm.s.StatForwardProfR3, STAMTYPE_PROFILE_ADV, "/TRPM/ForwardRaw/ProfR3", STAMUNIT_TICKS_PER_CALL, "Profiling TRPMForwardTrap."); + STAM_REG(pVM, &pVM->trpm.s.StatForwardProfRZ, STAMTYPE_PROFILE_ADV, "/TRPM/ForwardRaw/ProfRZ", STAMUNIT_TICKS_PER_CALL, "Profiling TRPMForwardTrap."); + STAM_REG(pVM, &pVM->trpm.s.StatForwardFailNoHandler, STAMTYPE_COUNTER, "/TRPM/ForwardRaw/FailNoHandler", STAMUNIT_OCCURENCES,"Failure to forward interrupt in raw mode."); + STAM_REG(pVM, &pVM->trpm.s.StatForwardFailPatchAddr, STAMTYPE_COUNTER, "/TRPM/ForwardRaw/FailPatchAddr", STAMUNIT_OCCURENCES,"Failure to forward interrupt in raw mode."); + STAM_REG(pVM, &pVM->trpm.s.StatForwardFailR3, STAMTYPE_COUNTER, "/TRPM/ForwardRaw/FailR3", STAMUNIT_OCCURENCES, "Failure to forward interrupt in raw mode."); + STAM_REG(pVM, &pVM->trpm.s.StatForwardFailRZ, STAMTYPE_COUNTER, "/TRPM/ForwardRaw/FailRZ", STAMUNIT_OCCURENCES, "Failure to forward interrupt in raw mode."); + + STAM_REG(pVM, &pVM->trpm.s.StatTrap0dDisasm, STAMTYPE_PROFILE, "/TRPM/RC/Traps/0d/Disasm", STAMUNIT_TICKS_PER_CALL, "Profiling disassembly part of trpmGCTrap0dHandler."); + STAM_REG(pVM, &pVM->trpm.s.StatTrap0dRdTsc, STAMTYPE_COUNTER, "/TRPM/RC/Traps/0d/RdTsc", STAMUNIT_OCCURENCES, "Number of RDTSC #GPs."); + } +#endif + +#ifdef VBOX_WITH_RAW_MODE + /* + * Default action when entering raw mode for the first time + */ + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + PVMCPU pVCpu = &pVM->aCpus[0]; /* raw mode implies on VCPU */ + VMCPU_FF_SET(pVCpu, VMCPU_FF_TRPM_SYNC_IDT); + } +#endif + return 0; +} + + +/** + * Applies relocations to data and code managed by this component. + * + * This function will be called at init and whenever the VMM need + * to relocate itself inside the GC. + * + * @param pVM The cross context VM structure. + * @param offDelta Relocation delta relative to old location. + */ +VMMR3DECL(void) TRPMR3Relocate(PVM pVM, RTGCINTPTR offDelta) +{ +#ifdef VBOX_WITH_RAW_MODE + if (!VM_IS_RAW_MODE_ENABLED(pVM)) + return; + + /* Only applies to raw mode which supports only 1 VCPU. */ + PVMCPU pVCpu = &pVM->aCpus[0]; + LogFlow(("TRPMR3Relocate\n")); + + /* + * Get the trap handler addresses. + * + * If VMMRC.rc is screwed, so are we. We'll assert here since it elsewise + * would make init order impossible if we should assert the presence of these + * exports in TRPMR3Init(). + */ + RTRCPTR aRCPtrs[TRPM_HANDLER_MAX]; + RT_ZERO(aRCPtrs); + int rc = PDMR3LdrGetSymbolRC(pVM, VMMRC_MAIN_MODULE_NAME, "TRPMGCHandlerInterupt", &aRCPtrs[TRPM_HANDLER_INT]); + AssertReleaseMsgRC(rc, ("Couldn't find TRPMGCHandlerInterupt in VMMRC.rc!\n")); + + rc = PDMR3LdrGetSymbolRC(pVM, VMMRC_MAIN_MODULE_NAME, "TRPMGCHandlerGeneric", &aRCPtrs[TRPM_HANDLER_TRAP]); + AssertReleaseMsgRC(rc, ("Couldn't find TRPMGCHandlerGeneric in VMMRC.rc!\n")); + + rc = PDMR3LdrGetSymbolRC(pVM, VMMRC_MAIN_MODULE_NAME, "TRPMGCHandlerTrap08", &aRCPtrs[TRPM_HANDLER_TRAP_08]); + AssertReleaseMsgRC(rc, ("Couldn't find TRPMGCHandlerTrap08 in VMMRC.rc!\n")); + + rc = PDMR3LdrGetSymbolRC(pVM, VMMRC_MAIN_MODULE_NAME, "TRPMGCHandlerTrap12", &aRCPtrs[TRPM_HANDLER_TRAP_12]); + AssertReleaseMsgRC(rc, ("Couldn't find TRPMGCHandlerTrap12 in VMMRC.rc!\n")); + + RTSEL SelCS = CPUMGetHyperCS(pVCpu); + + /* + * Iterate the idt and set the addresses. + */ + PVBOXIDTE pIdte = &pVM->trpm.s.aIdt[0]; + PVBOXIDTE_GENERIC pIdteTemplate = &g_aIdt[0]; + for (unsigned i = 0; i < RT_ELEMENTS(pVM->trpm.s.aIdt); i++, pIdte++, pIdteTemplate++) + { + if ( pIdte->Gen.u1Present + && !ASMBitTest(&pVM->trpm.s.au32IdtPatched[0], i) + ) + { + Assert(pIdteTemplate->u16OffsetLow < TRPM_HANDLER_MAX); + RTGCPTR Offset = aRCPtrs[pIdteTemplate->u16OffsetLow]; + switch (pIdteTemplate->u16OffsetLow) + { + /* + * Generic handlers have different entrypoints for each possible + * vector number. These entrypoints makes a sort of an array with + * 8 byte entries where the vector number is the index. + * See TRPMGCHandlersA.asm for details. + */ + case TRPM_HANDLER_INT: + case TRPM_HANDLER_TRAP: + Offset += i * 8; + break; + case TRPM_HANDLER_TRAP_12: + break; + case TRPM_HANDLER_TRAP_08: + /* Handle #DF Task Gate in special way. */ + pIdte->Gen.u16SegSel = SELMGetTrap8Selector(pVM); + pIdte->Gen.u16OffsetLow = 0; + pIdte->Gen.u16OffsetHigh = 0; + SELMSetTrap8EIP(pVM, Offset); + continue; + } + /* (non-task gates only ) */ + pIdte->Gen.u16OffsetLow = Offset & 0xffff; + pIdte->Gen.u16OffsetHigh = Offset >> 16; + pIdte->Gen.u16SegSel = SelCS; + } + } + + /* + * Update IDTR (limit is including!). + */ + CPUMSetHyperIDTR(pVCpu, VM_RC_ADDR(pVM, &pVM->trpm.s.aIdt[0]), sizeof(pVM->trpm.s.aIdt)-1); + +# ifdef TRPM_TRACK_SHADOW_IDT_CHANGES + if (pVM->trpm.s.pvMonShwIdtRC != RTRCPTR_MAX) + { + rc = PGMHandlerVirtualDeregister(pVM, pVCpu, pVM->trpm.s.pvMonShwIdtRC, true /*fHypervisor*/); + AssertRC(rc); + } + pVM->trpm.s.pvMonShwIdtRC = VM_RC_ADDR(pVM, &pVM->trpm.s.aIdt[0]); + rc = PGMR3HandlerVirtualRegister(pVM, pVCpu, pVM->trpm.s.hShadowIdtWriteHandlerType, + pVM->trpm.s.pvMonShwIdtRC, pVM->trpm.s.pvMonShwIdtRC + sizeof(pVM->trpm.s.aIdt) - 1, + NULL /*pvUserR3*/, NIL_RTR0PTR /*pvUserRC*/, NULL /*pszDesc*/); + AssertRC(rc); +# endif + + /* Relocate IDT handlers for forwarding guest traps/interrupts. */ + for (uint32_t iTrap = 0; iTrap < RT_ELEMENTS(pVM->trpm.s.aGuestTrapHandler); iTrap++) + { + if (pVM->trpm.s.aGuestTrapHandler[iTrap] != TRPM_INVALID_HANDLER) + { + Log(("TRPMR3Relocate: iGate=%2X Handler %RRv -> %RRv\n", iTrap, pVM->trpm.s.aGuestTrapHandler[iTrap], pVM->trpm.s.aGuestTrapHandler[iTrap] + offDelta)); + pVM->trpm.s.aGuestTrapHandler[iTrap] += offDelta; + } + + if (ASMBitTest(&pVM->trpm.s.au32IdtPatched[0], iTrap)) + { + PVBOXIDTE pIdteCur = &pVM->trpm.s.aIdt[iTrap]; + RTGCPTR pHandler = VBOXIDTE_OFFSET(*pIdteCur); + + Log(("TRPMR3Relocate: *iGate=%2X Handler %RGv -> %RGv\n", iTrap, pHandler, pHandler + offDelta)); + pHandler += offDelta; + + pIdteCur->Gen.u16OffsetHigh = pHandler >> 16; + pIdteCur->Gen.u16OffsetLow = pHandler & 0xFFFF; + } + } + +# ifdef VBOX_WITH_STATISTICS + pVM->trpm.s.paStatForwardedIRQRC += offDelta; + pVM->trpm.s.paStatHostIrqRC += offDelta; +# endif + +#else /* !VBOX_WITH_RAW_MODE */ + RT_NOREF(pVM, offDelta); +#endif /* !VBOX_WITH_RAW_MODE */ +} + + +/** + * Terminates the Trap Manager + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(int) TRPMR3Term(PVM pVM) +{ + NOREF(pVM); + return VINF_SUCCESS; +} + + +/** + * Resets a virtual CPU. + * + * Used by TRPMR3Reset and CPU hot plugging. + * + * @param pVCpu The cross context virtual CPU structure. + */ +VMMR3DECL(void) TRPMR3ResetCpu(PVMCPU pVCpu) +{ + pVCpu->trpm.s.uActiveVector = ~0U; +} + + +/** + * The VM is being reset. + * + * For the TRPM component this means that any IDT write monitors + * needs to be removed, any pending trap cleared, and the IDT reset. + * + * @param pVM The cross context VM structure. + */ +VMMR3DECL(void) TRPMR3Reset(PVM pVM) +{ + /* + * Deregister any virtual handlers. + */ +#ifdef TRPM_TRACK_GUEST_IDT_CHANGES + if (pVM->trpm.s.GuestIdtr.pIdt != RTRCPTR_MAX) + { + if (!pVM->trpm.s.fSafeToDropGuestIDTMonitoring) + { + int rc = PGMHandlerVirtualDeregister(pVM, VMMGetCpu(pVM), pVM->trpm.s.GuestIdtr.pIdt, false /*fHypervisor*/); + AssertRC(rc); + } + pVM->trpm.s.GuestIdtr.pIdt = RTRCPTR_MAX; + } + pVM->trpm.s.GuestIdtr.cbIdt = 0; +#endif + + /* + * Reinitialize other members calling the relocator to get things right. + */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + TRPMR3ResetCpu(&pVM->aCpus[i]); + memcpy(&pVM->trpm.s.aIdt[0], &g_aIdt[0], sizeof(pVM->trpm.s.aIdt)); + memset(pVM->trpm.s.aGuestTrapHandler, 0, sizeof(pVM->trpm.s.aGuestTrapHandler)); + TRPMR3Relocate(pVM, 0); + +#ifdef VBOX_WITH_RAW_MODE + /* + * Default action when entering raw mode for the first time + */ + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + PVMCPU pVCpu = &pVM->aCpus[0]; /* raw mode implies on VCPU */ + VMCPU_FF_SET(pVCpu, VMCPU_FF_TRPM_SYNC_IDT); + } +#endif +} + + +# ifdef VBOX_WITH_RAW_MODE +/** + * Resolve a builtin RC symbol. + * + * Called by PDM when loading or relocating RC modules. + * + * @returns VBox status + * @param pVM The cross context VM structure. + * @param pszSymbol Symbol to resolv + * @param pRCPtrValue Where to store the symbol value. + * + * @remark This has to work before VMMR3Relocate() is called. + */ +VMMR3_INT_DECL(int) TRPMR3GetImportRC(PVM pVM, const char *pszSymbol, PRTRCPTR pRCPtrValue) +{ + if (!strcmp(pszSymbol, "g_TRPM")) + *pRCPtrValue = VM_RC_ADDR(pVM, &pVM->trpm); + else if (!strcmp(pszSymbol, "g_TRPMCPU")) + *pRCPtrValue = VM_RC_ADDR(pVM, &pVM->aCpus[0].trpm); + else if (!strcmp(pszSymbol, "g_trpmGuestCtx")) + { + PCPUMCTX pCtx = CPUMQueryGuestCtxPtr(VMMGetCpuById(pVM, 0)); + *pRCPtrValue = VM_RC_ADDR(pVM, pCtx); + } + else if (!strcmp(pszSymbol, "g_trpmHyperCtx")) + { + PCPUMCTX pCtx = CPUMGetHyperCtxPtr(VMMGetCpuById(pVM, 0)); + *pRCPtrValue = VM_RC_ADDR(pVM, pCtx); + } + else if (!strcmp(pszSymbol, "g_trpmGuestCtxCore")) + { + PCPUMCTX pCtx = CPUMQueryGuestCtxPtr(VMMGetCpuById(pVM, 0)); + *pRCPtrValue = VM_RC_ADDR(pVM, CPUMCTX2CORE(pCtx)); + } + else if (!strcmp(pszSymbol, "g_trpmHyperCtxCore")) + { + PCPUMCTX pCtx = CPUMGetHyperCtxPtr(VMMGetCpuById(pVM, 0)); + *pRCPtrValue = VM_RC_ADDR(pVM, CPUMCTX2CORE(pCtx)); + } + else + return VERR_SYMBOL_NOT_FOUND; + return VINF_SUCCESS; +} +#endif /* VBOX_WITH_RAW_MODE */ + + +/** + * Execute state save operation. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM SSM operation handle. + */ +static DECLCALLBACK(int) trpmR3Save(PVM pVM, PSSMHANDLE pSSM) +{ + PTRPM pTrpm = &pVM->trpm.s; + LogFlow(("trpmR3Save:\n")); + + /* + * Active and saved traps. + */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PTRPMCPU pTrpmCpu = &pVM->aCpus[i].trpm.s; + SSMR3PutUInt(pSSM, pTrpmCpu->uActiveVector); + SSMR3PutUInt(pSSM, pTrpmCpu->enmActiveType); + SSMR3PutGCUInt(pSSM, pTrpmCpu->uActiveErrorCode); + SSMR3PutGCUIntPtr(pSSM, pTrpmCpu->uActiveCR2); + SSMR3PutGCUInt(pSSM, pTrpmCpu->uSavedVector); + SSMR3PutUInt(pSSM, pTrpmCpu->enmSavedType); + SSMR3PutGCUInt(pSSM, pTrpmCpu->uSavedErrorCode); + SSMR3PutGCUIntPtr(pSSM, pTrpmCpu->uSavedCR2); + SSMR3PutGCUInt(pSSM, pTrpmCpu->uPrevVector); + } + SSMR3PutBool(pSSM, !VM_IS_RAW_MODE_ENABLED(pVM)); + PVMCPU pVCpu0 = &pVM->aCpus[0]; NOREF(pVCpu0); /* raw mode implies 1 VCPU */ + SSMR3PutUInt(pSSM, VM_WHEN_RAW_MODE(VMCPU_FF_IS_SET(pVCpu0, VMCPU_FF_TRPM_SYNC_IDT), 0)); + SSMR3PutMem(pSSM, &pTrpm->au32IdtPatched[0], sizeof(pTrpm->au32IdtPatched)); + SSMR3PutU32(pSSM, UINT32_MAX); /* separator. */ + + /* + * Save any trampoline gates. + */ + for (uint32_t iTrap = 0; iTrap < RT_ELEMENTS(pTrpm->aGuestTrapHandler); iTrap++) + { + if (pTrpm->aGuestTrapHandler[iTrap]) + { + SSMR3PutU32(pSSM, iTrap); + SSMR3PutGCPtr(pSSM, pTrpm->aGuestTrapHandler[iTrap]); + SSMR3PutMem(pSSM, &pTrpm->aIdt[iTrap], sizeof(pTrpm->aIdt[iTrap])); + } + } + + return SSMR3PutU32(pSSM, UINT32_MAX); /* terminator */ +} + + +/** + * Execute state load operation. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM SSM operation handle. + * @param uVersion Data layout version. + * @param uPass The data pass. + */ +static DECLCALLBACK(int) trpmR3Load(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + LogFlow(("trpmR3Load:\n")); + Assert(uPass == SSM_PASS_FINAL); NOREF(uPass); + + /* + * Validate version. + */ + if ( uVersion != TRPM_SAVED_STATE_VERSION + && uVersion != TRPM_SAVED_STATE_VERSION_UNI) + { + AssertMsgFailed(("trpmR3Load: Invalid version uVersion=%d!\n", uVersion)); + return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION; + } + + /* + * Call the reset function to kick out any handled gates and other potential trouble. + */ + TRPMR3Reset(pVM); + + /* + * Active and saved traps. + */ + PTRPM pTrpm = &pVM->trpm.s; + + if (uVersion == TRPM_SAVED_STATE_VERSION) + { + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PTRPMCPU pTrpmCpu = &pVM->aCpus[i].trpm.s; + SSMR3GetUInt(pSSM, &pTrpmCpu->uActiveVector); + SSMR3GetUInt(pSSM, (uint32_t *)&pTrpmCpu->enmActiveType); + SSMR3GetGCUInt(pSSM, &pTrpmCpu->uActiveErrorCode); + SSMR3GetGCUIntPtr(pSSM, &pTrpmCpu->uActiveCR2); + SSMR3GetGCUInt(pSSM, &pTrpmCpu->uSavedVector); + SSMR3GetUInt(pSSM, (uint32_t *)&pTrpmCpu->enmSavedType); + SSMR3GetGCUInt(pSSM, &pTrpmCpu->uSavedErrorCode); + SSMR3GetGCUIntPtr(pSSM, &pTrpmCpu->uSavedCR2); + SSMR3GetGCUInt(pSSM, &pTrpmCpu->uPrevVector); + } + + bool fIgnored; + SSMR3GetBool(pSSM, &fIgnored); + } + else + { + PTRPMCPU pTrpmCpu = &pVM->aCpus[0].trpm.s; + SSMR3GetUInt(pSSM, &pTrpmCpu->uActiveVector); + SSMR3GetUInt(pSSM, (uint32_t *)&pTrpmCpu->enmActiveType); + SSMR3GetGCUInt(pSSM, &pTrpmCpu->uActiveErrorCode); + SSMR3GetGCUIntPtr(pSSM, &pTrpmCpu->uActiveCR2); + SSMR3GetGCUInt(pSSM, &pTrpmCpu->uSavedVector); + SSMR3GetUInt(pSSM, (uint32_t *)&pTrpmCpu->enmSavedType); + SSMR3GetGCUInt(pSSM, &pTrpmCpu->uSavedErrorCode); + SSMR3GetGCUIntPtr(pSSM, &pTrpmCpu->uSavedCR2); + SSMR3GetGCUInt(pSSM, &pTrpmCpu->uPrevVector); + + RTGCUINT fIgnored; + SSMR3GetGCUInt(pSSM, &fIgnored); + } + + RTUINT fSyncIDT; + int rc = SSMR3GetUInt(pSSM, &fSyncIDT); + if (RT_FAILURE(rc)) + return rc; + if (fSyncIDT & ~1) + { + AssertMsgFailed(("fSyncIDT=%#x\n", fSyncIDT)); + return VERR_SSM_DATA_UNIT_FORMAT_CHANGED; + } +#ifdef VBOX_WITH_RAW_MODE + if (fSyncIDT) + { + PVMCPU pVCpu = &pVM->aCpus[0]; /* raw mode implies 1 VCPU */ + VMCPU_FF_SET(pVCpu, VMCPU_FF_TRPM_SYNC_IDT); + } + /* else: cleared by reset call above. */ +#endif + + SSMR3GetMem(pSSM, &pTrpm->au32IdtPatched[0], sizeof(pTrpm->au32IdtPatched)); + + /* check the separator */ + uint32_t u32Sep; + rc = SSMR3GetU32(pSSM, &u32Sep); + if (RT_FAILURE(rc)) + return rc; + if (u32Sep != (uint32_t)~0) + { + AssertMsgFailed(("u32Sep=%#x (first)\n", u32Sep)); + return VERR_SSM_DATA_UNIT_FORMAT_CHANGED; + } + + /* + * Restore any trampoline gates. + */ + for (;;) + { + /* gate number / terminator */ + uint32_t iTrap; + rc = SSMR3GetU32(pSSM, &iTrap); + if (RT_FAILURE(rc)) + return rc; + if (iTrap == (uint32_t)~0) + break; + if ( iTrap >= RT_ELEMENTS(pTrpm->aIdt) + || pTrpm->aGuestTrapHandler[iTrap]) + { + AssertMsgFailed(("iTrap=%#x\n", iTrap)); + return VERR_SSM_DATA_UNIT_FORMAT_CHANGED; + } + + /* restore the IDT entry. */ + RTGCPTR GCPtrHandler; + SSMR3GetGCPtr(pSSM, &GCPtrHandler); + VBOXIDTE Idte; + rc = SSMR3GetMem(pSSM, &Idte, sizeof(Idte)); + if (RT_FAILURE(rc)) + return rc; + Assert(GCPtrHandler); + pTrpm->aIdt[iTrap] = Idte; + } + + return VINF_SUCCESS; +} + +#ifdef VBOX_WITH_RAW_MODE + +/** + * Check if gate handlers were updated + * (callback for the VMCPU_FF_TRPM_SYNC_IDT forced action). + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +VMMR3DECL(int) TRPMR3SyncIDT(PVM pVM, PVMCPU pVCpu) +{ + STAM_PROFILE_START(&pVM->trpm.s.StatSyncIDT, a); + const bool fRawRing0 = EMIsRawRing0Enabled(pVM); + int rc; + + AssertReturn(VM_IS_RAW_MODE_ENABLED(pVM), VERR_TRPM_HM_IPE); + + if (fRawRing0 && CSAMIsEnabled(pVM)) + { + /* Clear all handlers */ + Log(("TRPMR3SyncIDT: Clear all trap handlers.\n")); + /** @todo inefficient, but simple */ + for (unsigned iGate = 0; iGate < 256; iGate++) + trpmClearGuestTrapHandler(pVM, iGate); + + /* Scan them all (only the first time) */ + CSAMR3CheckGates(pVM, 0, 256); + } + + /* + * Get the IDTR. + */ + VBOXIDTR IDTR; + IDTR.pIdt = CPUMGetGuestIDTR(pVCpu, &IDTR.cbIdt); + if (!IDTR.cbIdt) + { + Log(("No IDT entries...\n")); + return DBGFSTOP(pVM); + } + +# ifdef TRPM_TRACK_GUEST_IDT_CHANGES + /* + * Check if Guest's IDTR has changed. + */ + if ( IDTR.pIdt != pVM->trpm.s.GuestIdtr.pIdt + || IDTR.cbIdt != pVM->trpm.s.GuestIdtr.cbIdt) + { + Log(("TRPMR3UpdateFromCPUM: Guest's IDT is changed to pIdt=%08X cbIdt=%08X\n", IDTR.pIdt, IDTR.cbIdt)); + if (!pVM->trpm.s.fSafeToDropGuestIDTMonitoring) + { + /* + * [Re]Register write virtual handler for guest's IDT. + */ + if (pVM->trpm.s.GuestIdtr.pIdt != RTRCPTR_MAX) + { + rc = PGMHandlerVirtualDeregister(pVM, pVCpu, pVM->trpm.s.GuestIdtr.pIdt, false /*fHypervisor*/); + AssertRCReturn(rc, rc); + } + /* limit is including */ + rc = PGMR3HandlerVirtualRegister(pVM, pVCpu, pVM->trpm.s.hGuestIdtWriteHandlerType, + IDTR.pIdt, IDTR.pIdt + IDTR.cbIdt /* already inclusive */, + NULL /*pvUserR3*/, NIL_RTR0PTR /*pvUserRC*/, NULL /*pszDesc*/); + + if (rc == VERR_PGM_HANDLER_VIRTUAL_CONFLICT) + { + /* Could be a conflict with CSAM */ + CSAMR3RemovePage(pVM, IDTR.pIdt); + if (PAGE_ADDRESS(IDTR.pIdt) != PAGE_ADDRESS(IDTR.pIdt + IDTR.cbIdt)) + CSAMR3RemovePage(pVM, IDTR.pIdt + IDTR.cbIdt); + + rc = PGMR3HandlerVirtualRegister(pVM, pVCpu, pVM->trpm.s.hGuestIdtWriteHandlerType, + IDTR.pIdt, IDTR.pIdt + IDTR.cbIdt /* already inclusive */, + NULL /*pvUserR3*/, NIL_RTR0PTR /*pvUserRC*/, NULL /*pszDesc*/); + } + + AssertRCReturn(rc, rc); + } + + /* Update saved Guest IDTR. */ + pVM->trpm.s.GuestIdtr = IDTR; + } +# endif + + /* + * Sync the interrupt gate. + * Should probably check/sync the others too, but for now we'll handle that in #GP. + */ + X86DESC Idte3; + rc = PGMPhysSimpleReadGCPtr(pVCpu, &Idte3, IDTR.pIdt + sizeof(Idte3) * 3, sizeof(Idte3)); + if (RT_FAILURE(rc)) + { + AssertMsgRC(rc, ("Failed to read IDT[3]! rc=%Rrc\n", rc)); + return DBGFSTOP(pVM); + } + AssertRCReturn(rc, rc); + if (fRawRing0) + pVM->trpm.s.aIdt[3].Gen.u2DPL = RT_MAX(Idte3.Gen.u2Dpl, 1); + else + pVM->trpm.s.aIdt[3].Gen.u2DPL = Idte3.Gen.u2Dpl; + + /* + * Clear the FF and we're done. + */ + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TRPM_SYNC_IDT); + STAM_PROFILE_STOP(&pVM->trpm.s.StatSyncIDT, a); + return VINF_SUCCESS; +} + + +/** + * Clear passthrough interrupt gate handler (reset to default handler) + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param iTrap Trap/interrupt gate number. + */ +int trpmR3ClearPassThroughHandler(PVM pVM, unsigned iTrap) +{ + /* Only applies to raw mode which supports only 1 VCPU. */ + PVMCPU pVCpu = &pVM->aCpus[0]; + Assert(VM_IS_RAW_MODE_ENABLED(pVM)); + + /** @todo cleanup trpmR3ClearPassThroughHandler()! */ + RTRCPTR aGCPtrs[TRPM_HANDLER_MAX]; + int rc; + + memset(aGCPtrs, 0, sizeof(aGCPtrs)); + + rc = PDMR3LdrGetSymbolRC(pVM, VMMRC_MAIN_MODULE_NAME, "TRPMGCHandlerInterupt", &aGCPtrs[TRPM_HANDLER_INT]); + AssertReleaseMsgRC(rc, ("Couldn't find TRPMGCHandlerInterupt in VMMRC.rc!\n")); + + if ( iTrap < TRPM_HANDLER_INT_BASE + || iTrap >= RT_ELEMENTS(pVM->trpm.s.aIdt)) + { + AssertMsg(iTrap < TRPM_HANDLER_INT_BASE, ("Illegal gate number %#x!\n", iTrap)); + return VERR_INVALID_PARAMETER; + } + memcpy(&pVM->trpm.s.aIdt[iTrap], &g_aIdt[iTrap], sizeof(pVM->trpm.s.aIdt[0])); + + /* Unmark it for relocation purposes. */ + ASMBitClear(&pVM->trpm.s.au32IdtPatched[0], iTrap); + + RTSEL SelCS = CPUMGetHyperCS(pVCpu); + PVBOXIDTE pIdte = &pVM->trpm.s.aIdt[iTrap]; + PVBOXIDTE_GENERIC pIdteTemplate = &g_aIdt[iTrap]; + if (pIdte->Gen.u1Present) + { + Assert(pIdteTemplate->u16OffsetLow == TRPM_HANDLER_INT); + Assert(sizeof(RTRCPTR) == sizeof(aGCPtrs[0])); + RTRCPTR Offset = (RTRCPTR)aGCPtrs[pIdteTemplate->u16OffsetLow]; + + /* + * Generic handlers have different entrypoints for each possible + * vector number. These entrypoints make a sort of an array with + * 8 byte entries where the vector number is the index. + * See TRPMGCHandlersA.asm for details. + */ + Offset += iTrap * 8; + + if (pIdte->Gen.u5Type2 != VBOX_IDTE_TYPE2_TASK) + { + pIdte->Gen.u16OffsetLow = Offset & 0xffff; + pIdte->Gen.u16OffsetHigh = Offset >> 16; + pIdte->Gen.u16SegSel = SelCS; + } + } + + return VINF_SUCCESS; +} + + +/** + * Check if address is a gate handler (interrupt or trap). + * + * @returns gate nr or UINT32_MAX is not found + * + * @param pVM The cross context VM structure. + * @param GCPtr GC address to check. + */ +VMMR3DECL(uint32_t) TRPMR3QueryGateByHandler(PVM pVM, RTRCPTR GCPtr) +{ + AssertReturn(VM_IS_RAW_MODE_ENABLED(pVM), ~0U); + + for (uint32_t iTrap = 0; iTrap < RT_ELEMENTS(pVM->trpm.s.aGuestTrapHandler); iTrap++) + { + if (pVM->trpm.s.aGuestTrapHandler[iTrap] == GCPtr) + return iTrap; + + /* redundant */ + if (ASMBitTest(&pVM->trpm.s.au32IdtPatched[0], iTrap)) + { + PVBOXIDTE pIdte = &pVM->trpm.s.aIdt[iTrap]; + RTGCPTR pHandler = VBOXIDTE_OFFSET(*pIdte); + + if (pHandler == GCPtr) + return iTrap; + } + } + return UINT32_MAX; +} + + +/** + * Get guest trap/interrupt gate handler + * + * @returns Guest trap handler address or TRPM_INVALID_HANDLER if none installed + * @param pVM The cross context VM structure. + * @param iTrap Interrupt/trap number. + */ +VMMR3DECL(RTRCPTR) TRPMR3GetGuestTrapHandler(PVM pVM, unsigned iTrap) +{ + AssertReturn(iTrap < RT_ELEMENTS(pVM->trpm.s.aIdt), TRPM_INVALID_HANDLER); + AssertReturn(VM_IS_RAW_MODE_ENABLED(pVM), TRPM_INVALID_HANDLER); + + return pVM->trpm.s.aGuestTrapHandler[iTrap]; +} + + +/** + * Set guest trap/interrupt gate handler + * Used for setting up trap gates used for kernel calls. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param iTrap Interrupt/trap number. + * @param pHandler GC handler pointer + */ +VMMR3DECL(int) TRPMR3SetGuestTrapHandler(PVM pVM, unsigned iTrap, RTRCPTR pHandler) +{ + /* Only valid in raw mode which implies 1 VCPU */ + Assert(PATMIsEnabled(pVM) && pVM->cCpus == 1); + AssertReturn(VM_IS_RAW_MODE_ENABLED(pVM), VERR_TRPM_HM_IPE); + PVMCPU pVCpu = &pVM->aCpus[0]; + + /* + * Validate. + */ + if (iTrap >= RT_ELEMENTS(pVM->trpm.s.aIdt)) + { + AssertMsg(iTrap < TRPM_HANDLER_INT_BASE, ("Illegal gate number %d!\n", iTrap)); + return VERR_INVALID_PARAMETER; + } + + AssertReturn(pHandler == TRPM_INVALID_HANDLER || PATMIsPatchGCAddr(pVM, pHandler), VERR_INVALID_PARAMETER); + + uint16_t cbIDT; + RTGCPTR GCPtrIDT = CPUMGetGuestIDTR(pVCpu, &cbIDT); + if (iTrap * sizeof(VBOXIDTE) >= cbIDT) + return VERR_INVALID_PARAMETER; /* Silently ignore out of range requests. */ + + if (pHandler == TRPM_INVALID_HANDLER) + { + /* clear trap handler */ + Log(("TRPMR3SetGuestTrapHandler: clear handler %x\n", iTrap)); + return trpmClearGuestTrapHandler(pVM, iTrap); + } + + /* + * Read the guest IDT entry. + */ + VBOXIDTE GuestIdte; + int rc = PGMPhysSimpleReadGCPtr(pVCpu, &GuestIdte, GCPtrIDT + iTrap * sizeof(GuestIdte), sizeof(GuestIdte)); + if (RT_FAILURE(rc)) + { + AssertMsgRC(rc, ("Failed to read IDTE! rc=%Rrc\n", rc)); + return rc; + } + + if ( EMIsRawRing0Enabled(pVM) + && !EMIsRawRing1Enabled(pVM)) /* can't deal with the ambiguity of ring 1 & 2 in the patch code. */ + { + /* + * Only replace handlers for which we are 100% certain there won't be + * any host interrupts. + * + * 0x2E is safe on Windows because it's the system service interrupt gate. Not + * quite certain if this is safe or not on 64-bit Vista, it probably is. + * + * 0x80 is safe on Linux because it's the syscall vector and is part of the + * 32-bit usermode ABI. 64-bit Linux (usually) supports 32-bit processes + * and will therefor never assign hardware interrupts to 0x80. + * + * Exactly why 0x80 is safe on 32-bit Windows is a bit hazy, but it seems + * to work ok... However on 64-bit Vista (SMP?) is doesn't work reliably. + * Booting Linux/BSD guest will cause system lockups on most of the computers. + * -> Update: It seems gate 0x80 is not safe on 32-bits Windows either. See + * @bugref{3604}. + * + * PORTME - Check if your host keeps any of these gates free from hw ints. + * + * Note! SELMR3SyncTSS also has code related to this interrupt handler replacing. + */ + /** @todo handle those dependencies better! */ + /** @todo Solve this in a proper manner. see @bugref{1186} */ +#if defined(RT_OS_WINDOWS) && defined(RT_ARCH_X86) + if (iTrap == 0x2E) +#elif defined(RT_OS_LINUX) + if (iTrap == 0x80) +#else + if (0) +#endif + { + if ( GuestIdte.Gen.u1Present + && ( GuestIdte.Gen.u5Type2 == VBOX_IDTE_TYPE2_TRAP_32 + || GuestIdte.Gen.u5Type2 == VBOX_IDTE_TYPE2_INT_32) + && GuestIdte.Gen.u2DPL == 3) + { + PVBOXIDTE pIdte = &pVM->trpm.s.aIdt[iTrap]; + + GuestIdte.Gen.u5Type2 = VBOX_IDTE_TYPE2_TRAP_32; + GuestIdte.Gen.u16OffsetHigh = pHandler >> 16; + GuestIdte.Gen.u16OffsetLow = pHandler & 0xFFFF; + GuestIdte.Gen.u16SegSel |= 1; //ring 1 + *pIdte = GuestIdte; + + /* Mark it for relocation purposes. */ + ASMBitSet(&pVM->trpm.s.au32IdtPatched[0], iTrap); + + /* Also store it in our guest trap array. */ + pVM->trpm.s.aGuestTrapHandler[iTrap] = pHandler; + + Log(("Setting trap handler %x to %08X (direct)\n", iTrap, pHandler)); + return VINF_SUCCESS; + } + /* ok, let's try to install a trampoline handler then. */ + } + } + + if ( GuestIdte.Gen.u1Present + && ( GuestIdte.Gen.u5Type2 == VBOX_IDTE_TYPE2_TRAP_32 + || GuestIdte.Gen.u5Type2 == VBOX_IDTE_TYPE2_INT_32) + && (GuestIdte.Gen.u2DPL == 3 || GuestIdte.Gen.u2DPL == 0)) + { + /* + * Save handler which can be used for a trampoline call inside the GC + */ + Log(("Setting trap handler %x to %08X\n", iTrap, pHandler)); + pVM->trpm.s.aGuestTrapHandler[iTrap] = pHandler; + return VINF_SUCCESS; + } + return VERR_INVALID_PARAMETER; +} + + +/** + * Check if address is a gate handler (interrupt/trap/task/anything). + * + * @returns True is gate handler, false if not. + * + * @param pVM The cross context VM structure. + * @param GCPtr GC address to check. + */ +VMMR3DECL(bool) TRPMR3IsGateHandler(PVM pVM, RTRCPTR GCPtr) +{ + /* Only valid in raw mode which implies 1 VCPU */ + Assert(PATMIsEnabled(pVM) && pVM->cCpus == 1); + PVMCPU pVCpu = &pVM->aCpus[0]; + + /* + * Read IDTR and calc last entry. + */ + uint16_t cbIDT; + RTGCPTR GCPtrIDTE = CPUMGetGuestIDTR(pVCpu, &cbIDT); + unsigned cEntries = (cbIDT + 1) / sizeof(VBOXIDTE); + if (!cEntries) + return false; + RTGCPTR GCPtrIDTELast = GCPtrIDTE + (cEntries - 1) * sizeof(VBOXIDTE); + + /* + * Outer loop: iterate pages. + */ + while (GCPtrIDTE <= GCPtrIDTELast) + { + /* + * Convert this page to a HC address. + * (This function checks for not-present pages.) + */ + PCVBOXIDTE pIDTE; + PGMPAGEMAPLOCK Lock; + int rc = PGMPhysGCPtr2CCPtrReadOnly(pVCpu, GCPtrIDTE, (const void **)&pIDTE, &Lock); + if (RT_SUCCESS(rc)) + { + /* + * Inner Loop: Iterate the data on this page looking for an entry equal to GCPtr. + * N.B. Member of the Flat Earth Society... + */ + while (GCPtrIDTE <= GCPtrIDTELast) + { + if (pIDTE->Gen.u1Present) + { + RTRCPTR GCPtrHandler = VBOXIDTE_OFFSET(*pIDTE); + if (GCPtr == GCPtrHandler) + { + PGMPhysReleasePageMappingLock(pVM, &Lock); + return true; + } + } + + /* next entry */ + if ((GCPtrIDTE & PAGE_OFFSET_MASK) + sizeof(VBOXIDTE) >= PAGE_SIZE) + { + AssertMsg(!(GCPtrIDTE & (sizeof(VBOXIDTE) - 1)), + ("IDT is crossing pages and it's not aligned! GCPtrIDTE=%#x cbIDT=%#x\n", GCPtrIDTE, cbIDT)); + GCPtrIDTE += sizeof(VBOXIDTE); + break; + } + GCPtrIDTE += sizeof(VBOXIDTE); + pIDTE++; + } + PGMPhysReleasePageMappingLock(pVM, &Lock); + } + else + { + /* Skip to the next page (if any). Take care not to wrap around the address space. */ + if ((GCPtrIDTELast >> PAGE_SHIFT) == (GCPtrIDTE >> PAGE_SHIFT)) + return false; + GCPtrIDTE = RT_ALIGN_T(GCPtrIDTE, PAGE_SIZE, RTGCPTR) + PAGE_SIZE + (GCPtrIDTE & (sizeof(VBOXIDTE) - 1)); + } + } + return false; +} + +#endif /* VBOX_WITH_RAW_MODE */ + +/** + * Inject event (such as external irq or trap). + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param enmEvent Trpm event type + * @param pfInjected Where to store whether the event was injected or not. + */ +VMMR3DECL(int) TRPMR3InjectEvent(PVM pVM, PVMCPU pVCpu, TRPMEVENT enmEvent, bool *pfInjected) +{ +#ifdef VBOX_WITH_RAW_MODE + PCPUMCTX pCtx = CPUMQueryGuestCtxPtr(pVCpu); + Assert(!PATMIsPatchGCAddr(pVM, pCtx->eip)); +#endif + Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)); + Assert(pfInjected); + *pfInjected = false; + + /* Currently only useful for external hardware interrupts. */ + Assert(enmEvent == TRPM_HARDWARE_INT); + +#if defined(TRPM_FORWARD_TRAPS_IN_GC) + +# ifdef LOG_ENABLED + DBGFR3_INFO_LOG(pVM, pVCpu, "cpumguest", "TRPMInject"); + DBGFR3_DISAS_INSTR_CUR_LOG(pVCpu, "TRPMInject"); +# endif + + uint8_t u8Interrupt = 0; + int rc = PDMGetInterrupt(pVCpu, &u8Interrupt); + Log(("TRPMR3InjectEvent: CPU%d u8Interrupt=%d (%#x) rc=%Rrc\n", pVCpu->idCpu, u8Interrupt, u8Interrupt, rc)); + if (RT_SUCCESS(rc)) + { + if (EMIsSupervisorCodeRecompiled(pVM) || !VM_IS_RAW_MODE_ENABLED(pVM)) + { + STAM_COUNTER_INC(&pVM->trpm.s.paStatForwardedIRQR3[u8Interrupt]); + if (!VM_IS_NEM_ENABLED(pVM)) + { + rc = TRPMAssertTrap(pVCpu, u8Interrupt, enmEvent); + AssertRC(rc); + return HMR3IsActive(pVCpu) ? VINF_EM_RESCHEDULE_HM : VINF_EM_RESCHEDULE_REM; + } + VBOXSTRICTRC rcStrict = IEMInjectTrap(pVCpu, u8Interrupt, enmEvent, 0, 0, 0); + if (rcStrict == VINF_SUCCESS) + return VINF_EM_RESCHEDULE; + return VBOXSTRICTRC_TODO(rcStrict); + } + + /* If the guest gate is not patched, then we will check (again) if we can patch it. */ + if (pVM->trpm.s.aGuestTrapHandler[u8Interrupt] == TRPM_INVALID_HANDLER) + { + CSAMR3CheckGates(pVM, u8Interrupt, 1); + Log(("TRPMR3InjectEvent: recheck gate %x -> valid=%d\n", u8Interrupt, TRPMR3GetGuestTrapHandler(pVM, u8Interrupt) != TRPM_INVALID_HANDLER)); + } + + if (pVM->trpm.s.aGuestTrapHandler[u8Interrupt] != TRPM_INVALID_HANDLER) + { + /* Must check pending forced actions as our IDT or GDT might be out of sync */ + rc = EMR3CheckRawForcedActions(pVM, pVCpu); + if (rc == VINF_SUCCESS) + { + /* There's a handler -> let's execute it in raw mode */ + rc = TRPMForwardTrap(pVCpu, CPUMCTX2CORE(pCtx), u8Interrupt, 0, TRPM_TRAP_NO_ERRORCODE, enmEvent, -1); + if (rc == VINF_SUCCESS /* Don't use RT_SUCCESS */) + { + Assert(!VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_SELM_SYNC_GDT | VMCPU_FF_SELM_SYNC_LDT | VMCPU_FF_TRPM_SYNC_IDT | VMCPU_FF_SELM_SYNC_TSS)); + + STAM_COUNTER_INC(&pVM->trpm.s.paStatForwardedIRQR3[u8Interrupt]); + return VINF_EM_RESCHEDULE_RAW; + } + } + } + else + STAM_COUNTER_INC(&pVM->trpm.s.StatForwardFailNoHandler); + + rc = TRPMAssertTrap(pVCpu, u8Interrupt, enmEvent); + AssertRCReturn(rc, rc); + } + else + { + /* Can happen if the interrupt is masked by TPR or APIC is disabled. */ + AssertMsg(rc == VERR_APIC_INTR_MASKED_BY_TPR || rc == VERR_NO_DATA, ("PDMGetInterrupt failed. rc=%Rrc\n", rc)); + return HMR3IsActive(pVCpu) ? VINF_EM_RESCHEDULE_HM + : VM_IS_NEM_ENABLED(pVM) ? VINF_EM_RESCHEDULE + : VINF_EM_RESCHEDULE_REM; /* (Heed the halted state if this is changed!) */ + } + + /** @todo check if it's safe to translate the patch address to the original guest address. + * this implies a safe state in translated instructions and should take sti successors into account (instruction fusing) + */ + /* Note: if it's a PATM address, then we'll go back to raw mode regardless of the return codes below. */ + + /* Fall back to the recompiler */ + return VINF_EM_RESCHEDULE_REM; /* (Heed the halted state if this is changed!) */ + +#else /* !TRPM_FORWARD_TRAPS_IN_GC */ + RT_NOREF(pVM, enmEvent); + uint8_t u8Interrupt = 0; + int rc = PDMGetInterrupt(pVCpu, &u8Interrupt); + Log(("TRPMR3InjectEvent: u8Interrupt=%d (%#x) rc=%Rrc\n", u8Interrupt, u8Interrupt, rc)); + if (RT_SUCCESS(rc)) + { + *pfInjected = true; + if (!VM_IS_NEM_ENABLED(pVM)) + { + rc = TRPMAssertTrap(pVCpu, u8Interrupt, TRPM_HARDWARE_INT); + AssertRC(rc); + } + else + { + VBOXSTRICTRC rcStrict = IEMInjectTrap(pVCpu, u8Interrupt, enmEvent, 0, 0, 0); + if (rcStrict != VINF_SUCCESS) + return VBOXSTRICTRC_TODO(rcStrict); + } + STAM_COUNTER_INC(&pVM->trpm.s.paStatForwardedIRQR3[u8Interrupt]); + } + else + { + /* Can happen if the interrupt is masked by TPR or APIC is disabled. */ + AssertMsg(rc == VERR_APIC_INTR_MASKED_BY_TPR || rc == VERR_NO_DATA, ("PDMGetInterrupt failed. rc=%Rrc\n", rc)); + } + return HMR3IsActive(pVCpu) ? VINF_EM_RESCHEDULE_HM + : VM_IS_NEM_ENABLED(pVM) ? VINF_EM_RESCHEDULE + : VINF_EM_RESCHEDULE_REM; /* (Heed the halted state if this is changed!) */ +#endif /* !TRPM_FORWARD_TRAPS_IN_GC */ +} + + +/** + * Displays the pending TRPM event. + * + * @param pVM The cross context VM structure. + * @param pHlp The info helper functions. + * @param pszArgs Arguments, ignored. + */ +static DECLCALLBACK(void) trpmR3InfoEvent(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + NOREF(pszArgs); + PVMCPU pVCpu = VMMGetCpu(pVM); + if (!pVCpu) + pVCpu = &pVM->aCpus[0]; + + uint8_t uVector; + uint8_t cbInstr; + TRPMEVENT enmTrapEvent; + RTGCUINT uErrorCode; + RTGCUINTPTR uCR2; + int rc = TRPMQueryTrapAll(pVCpu, &uVector, &enmTrapEvent, &uErrorCode, &uCR2, &cbInstr); + if (RT_SUCCESS(rc)) + { + pHlp->pfnPrintf(pHlp, "CPU[%u]: TRPM event\n", pVCpu->idCpu); + static const char * const s_apszTrpmEventType[] = + { + "Trap", + "Hardware Int", + "Software Int" + }; + if (RT_LIKELY((size_t)enmTrapEvent < RT_ELEMENTS(s_apszTrpmEventType))) + { + pHlp->pfnPrintf(pHlp, " Type = %s\n", s_apszTrpmEventType[enmTrapEvent]); + pHlp->pfnPrintf(pHlp, " uVector = %#x\n", uVector); + pHlp->pfnPrintf(pHlp, " uErrorCode = %#RGu\n", uErrorCode); + pHlp->pfnPrintf(pHlp, " uCR2 = %#RGp\n", uCR2); + pHlp->pfnPrintf(pHlp, " cbInstr = %u bytes\n", cbInstr); + } + else + pHlp->pfnPrintf(pHlp, " Type = %#x (Invalid!)\n", enmTrapEvent); + } + else if (rc == VERR_TRPM_NO_ACTIVE_TRAP) + pHlp->pfnPrintf(pHlp, "CPU[%u]: TRPM event (None)\n", pVCpu->idCpu); + else + pHlp->pfnPrintf(pHlp, "CPU[%u]: TRPM event - Query failed! rc=%Rrc\n", pVCpu->idCpu, rc); +} + diff --git a/src/VBox/VMM/VMMR3/VM.cpp b/src/VBox/VMM/VMMR3/VM.cpp new file mode 100644 index 00000000..05c5b9dd --- /dev/null +++ b/src/VBox/VMM/VMMR3/VM.cpp @@ -0,0 +1,4705 @@ +/* $Id: VM.cpp $ */ +/** @file + * VM - Virtual Machine + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +/** @page pg_vm VM API + * + * This is the encapsulating bit. It provides the APIs that Main and VBoxBFE + * use to create a VMM instance for running a guest in. It also provides + * facilities for queuing request for execution in EMT (serialization purposes + * mostly) and for reporting error back to the VMM user (Main/VBoxBFE). + * + * + * @section sec_vm_design Design Critique / Things To Do + * + * In hindsight this component is a big design mistake, all this stuff really + * belongs in the VMM component. It just seemed like a kind of ok idea at a + * time when the VMM bit was a kind of vague. 'VM' also happened to be the name + * of the per-VM instance structure (see vm.h), so it kind of made sense. + * However as it turned out, VMM(.cpp) is almost empty all it provides in ring-3 + * is some minor functionally and some "routing" services. + * + * Fixing this is just a matter of some more or less straight forward + * refactoring, the question is just when someone will get to it. Moving the EMT + * would be a good start. + * + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_VM +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef VBOX_WITH_REM +# include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "VMInternal.h" +#include +#include + +#include +#if defined(VBOX_WITH_DTRACE_R3) && !defined(VBOX_WITH_NATIVE_DTRACE) +# include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static int vmR3CreateUVM(uint32_t cCpus, PCVMM2USERMETHODS pVmm2UserMethods, PUVM *ppUVM); +static int vmR3CreateU(PUVM pUVM, uint32_t cCpus, PFNCFGMCONSTRUCTOR pfnCFGMConstructor, void *pvUserCFGM); +static int vmR3ReadBaseConfig(PVM pVM, PUVM pUVM, uint32_t cCpus); +static int vmR3InitRing3(PVM pVM, PUVM pUVM); +static int vmR3InitRing0(PVM pVM); +#ifdef VBOX_WITH_RAW_MODE +static int vmR3InitRC(PVM pVM); +#endif +static int vmR3InitDoCompleted(PVM pVM, VMINITCOMPLETED enmWhat); +static void vmR3DestroyUVM(PUVM pUVM, uint32_t cMilliesEMTWait); +static bool vmR3ValidateStateTransition(VMSTATE enmStateOld, VMSTATE enmStateNew); +static void vmR3DoAtState(PVM pVM, PUVM pUVM, VMSTATE enmStateNew, VMSTATE enmStateOld); +static int vmR3TrySetState(PVM pVM, const char *pszWho, unsigned cTransitions, ...); +static void vmR3SetStateLocked(PVM pVM, PUVM pUVM, VMSTATE enmStateNew, VMSTATE enmStateOld, bool fSetRatherThanClearFF); +static void vmR3SetState(PVM pVM, VMSTATE enmStateNew, VMSTATE enmStateOld); +static int vmR3SetErrorU(PUVM pUVM, int rc, RT_SRC_POS_DECL, const char *pszFormat, ...) RT_IPRT_FORMAT_ATTR(6, 7); + + +/** + * Creates a virtual machine by calling the supplied configuration constructor. + * + * On successful returned the VM is powered, i.e. VMR3PowerOn() should be + * called to start the execution. + * + * @returns 0 on success. + * @returns VBox error code on failure. + * @param cCpus Number of virtual CPUs for the new VM. + * @param pVmm2UserMethods An optional method table that the VMM can use + * to make the user perform various action, like + * for instance state saving. + * @param pfnVMAtError Pointer to callback function for setting VM + * errors. This was added as an implicit call to + * VMR3AtErrorRegister() since there is no way the + * caller can get to the VM handle early enough to + * do this on its own. + * This is called in the context of an EMT. + * @param pvUserVM The user argument passed to pfnVMAtError. + * @param pfnCFGMConstructor Pointer to callback function for constructing the VM configuration tree. + * This is called in the context of an EMT0. + * @param pvUserCFGM The user argument passed to pfnCFGMConstructor. + * @param ppVM Where to optionally store the 'handle' of the + * created VM. + * @param ppUVM Where to optionally store the user 'handle' of + * the created VM, this includes one reference as + * if VMR3RetainUVM() was called. The caller + * *MUST* remember to pass the returned value to + * VMR3ReleaseUVM() once done with the handle. + */ +VMMR3DECL(int) VMR3Create(uint32_t cCpus, PCVMM2USERMETHODS pVmm2UserMethods, + PFNVMATERROR pfnVMAtError, void *pvUserVM, + PFNCFGMCONSTRUCTOR pfnCFGMConstructor, void *pvUserCFGM, + PVM *ppVM, PUVM *ppUVM) +{ + LogFlow(("VMR3Create: cCpus=%RU32 pVmm2UserMethods=%p pfnVMAtError=%p pvUserVM=%p pfnCFGMConstructor=%p pvUserCFGM=%p ppVM=%p ppUVM=%p\n", + cCpus, pVmm2UserMethods, pfnVMAtError, pvUserVM, pfnCFGMConstructor, pvUserCFGM, ppVM, ppUVM)); + + if (pVmm2UserMethods) + { + AssertPtrReturn(pVmm2UserMethods, VERR_INVALID_POINTER); + AssertReturn(pVmm2UserMethods->u32Magic == VMM2USERMETHODS_MAGIC, VERR_INVALID_PARAMETER); + AssertReturn(pVmm2UserMethods->u32Version == VMM2USERMETHODS_VERSION, VERR_INVALID_PARAMETER); + AssertPtrNullReturn(pVmm2UserMethods->pfnSaveState, VERR_INVALID_POINTER); + AssertPtrNullReturn(pVmm2UserMethods->pfnNotifyEmtInit, VERR_INVALID_POINTER); + AssertPtrNullReturn(pVmm2UserMethods->pfnNotifyEmtTerm, VERR_INVALID_POINTER); + AssertPtrNullReturn(pVmm2UserMethods->pfnNotifyPdmtInit, VERR_INVALID_POINTER); + AssertPtrNullReturn(pVmm2UserMethods->pfnNotifyPdmtTerm, VERR_INVALID_POINTER); + AssertPtrNullReturn(pVmm2UserMethods->pfnNotifyResetTurnedIntoPowerOff, VERR_INVALID_POINTER); + AssertReturn(pVmm2UserMethods->u32EndMagic == VMM2USERMETHODS_MAGIC, VERR_INVALID_PARAMETER); + } + AssertPtrNullReturn(pfnVMAtError, VERR_INVALID_POINTER); + AssertPtrNullReturn(pfnCFGMConstructor, VERR_INVALID_POINTER); + AssertPtrNullReturn(ppVM, VERR_INVALID_POINTER); + AssertPtrNullReturn(ppUVM, VERR_INVALID_POINTER); + AssertReturn(ppVM || ppUVM, VERR_INVALID_PARAMETER); + + /* + * Validate input. + */ + AssertLogRelMsgReturn(cCpus > 0 && cCpus <= VMM_MAX_CPU_COUNT, ("%RU32\n", cCpus), VERR_TOO_MANY_CPUS); + + /* + * Create the UVM so we can register the at-error callback + * and consolidate a bit of cleanup code. + */ + PUVM pUVM = NULL; /* shuts up gcc */ + int rc = vmR3CreateUVM(cCpus, pVmm2UserMethods, &pUVM); + if (RT_FAILURE(rc)) + return rc; + if (pfnVMAtError) + rc = VMR3AtErrorRegister(pUVM, pfnVMAtError, pvUserVM); + if (RT_SUCCESS(rc)) + { + /* + * Initialize the support library creating the session for this VM. + */ + rc = SUPR3Init(&pUVM->vm.s.pSession); + if (RT_SUCCESS(rc)) + { +#if defined(VBOX_WITH_DTRACE_R3) && !defined(VBOX_WITH_NATIVE_DTRACE) + /* Now that we've opened the device, we can register trace probes. */ + static bool s_fRegisteredProbes = false; + if (ASMAtomicCmpXchgBool(&s_fRegisteredProbes, true, false)) + SUPR3TracerRegisterModule(~(uintptr_t)0, "VBoxVMM", &g_VTGObjHeader, (uintptr_t)&g_VTGObjHeader, + SUP_TRACER_UMOD_FLAGS_SHARED); +#endif + + /* + * Call vmR3CreateU in the EMT thread and wait for it to finish. + * + * Note! VMCPUID_ANY is used here because VMR3ReqQueueU would have trouble + * submitting a request to a specific VCPU without a pVM. So, to make + * sure init is running on EMT(0), vmR3EmulationThreadWithId makes sure + * that only EMT(0) is servicing VMCPUID_ANY requests when pVM is NULL. + */ + PVMREQ pReq; + rc = VMR3ReqCallU(pUVM, VMCPUID_ANY, &pReq, RT_INDEFINITE_WAIT, VMREQFLAGS_VBOX_STATUS, + (PFNRT)vmR3CreateU, 4, pUVM, cCpus, pfnCFGMConstructor, pvUserCFGM); + if (RT_SUCCESS(rc)) + { + rc = pReq->iStatus; + VMR3ReqFree(pReq); + if (RT_SUCCESS(rc)) + { + /* + * Success! + */ + if (ppVM) + *ppVM = pUVM->pVM; + if (ppUVM) + { + VMR3RetainUVM(pUVM); + *ppUVM = pUVM; + } + LogFlow(("VMR3Create: returns VINF_SUCCESS (pVM=%p, pUVM=%p\n", pUVM->pVM, pUVM)); + return VINF_SUCCESS; + } + } + else + AssertMsgFailed(("VMR3ReqCallU failed rc=%Rrc\n", rc)); + + /* + * An error occurred during VM creation. Set the error message directly + * using the initial callback, as the callback list might not exist yet. + */ + const char *pszError; + switch (rc) + { + case VERR_VMX_IN_VMX_ROOT_MODE: +#ifdef RT_OS_LINUX + pszError = N_("VirtualBox can't operate in VMX root mode. " + "Please disable the KVM kernel extension, recompile your kernel and reboot"); +#else + pszError = N_("VirtualBox can't operate in VMX root mode. Please close all other virtualization programs."); +#endif + break; + +#ifndef RT_OS_DARWIN + case VERR_HM_CONFIG_MISMATCH: + pszError = N_("VT-x/AMD-V is either not available on your host or disabled. " + "This hardware extension is required by the VM configuration"); + break; +#endif + + case VERR_SVM_IN_USE: +#ifdef RT_OS_LINUX + pszError = N_("VirtualBox can't enable the AMD-V extension. " + "Please disable the KVM kernel extension, recompile your kernel and reboot"); +#else + pszError = N_("VirtualBox can't enable the AMD-V extension. Please close all other virtualization programs."); +#endif + break; + +#ifdef RT_OS_LINUX + case VERR_SUPDRV_COMPONENT_NOT_FOUND: + pszError = N_("One of the kernel modules was not successfully loaded. Make sure " + "that no kernel modules from an older version of VirtualBox exist. " + "Then try to recompile and reload the kernel modules by executing " + "'/sbin/vboxconfig' as root"); + break; +#endif + + case VERR_RAW_MODE_INVALID_SMP: + pszError = N_("VT-x/AMD-V is either not available on your host or disabled. " + "VirtualBox requires this hardware extension to emulate more than one " + "guest CPU"); + break; + + case VERR_SUPDRV_KERNEL_TOO_OLD_FOR_VTX: +#ifdef RT_OS_LINUX + pszError = N_("Because the host kernel is too old, VirtualBox cannot enable the VT-x " + "extension. Either upgrade your kernel to Linux 2.6.13 or later or disable " + "the VT-x extension in the VM settings. Note that without VT-x you have " + "to reduce the number of guest CPUs to one"); +#else + pszError = N_("Because the host kernel is too old, VirtualBox cannot enable the VT-x " + "extension. Either upgrade your kernel or disable the VT-x extension in the " + "VM settings. Note that without VT-x you have to reduce the number of guest " + "CPUs to one"); +#endif + break; + + case VERR_PDM_DEVICE_NOT_FOUND: + pszError = N_("A virtual device is configured in the VM settings but the device " + "implementation is missing.\n" + "A possible reason for this error is a missing extension pack. Note " + "that as of VirtualBox 4.0, certain features (for example USB 2.0 " + "support and remote desktop) are only available from an 'extension " + "pack' which must be downloaded and installed separately"); + break; + + case VERR_PCI_PASSTHROUGH_NO_HM: + pszError = N_("PCI passthrough requires VT-x/AMD-V"); + break; + + case VERR_PCI_PASSTHROUGH_NO_NESTED_PAGING: + pszError = N_("PCI passthrough requires nested paging"); + break; + + default: + if (VMR3GetErrorCount(pUVM) == 0) + pszError = RTErrGetFull(rc); + else + pszError = NULL; /* already set. */ + break; + } + if (pszError) + vmR3SetErrorU(pUVM, rc, RT_SRC_POS, pszError, rc); + } + else + { + /* + * An error occurred at support library initialization time (before the + * VM could be created). Set the error message directly using the + * initial callback, as the callback list doesn't exist yet. + */ + const char *pszError; + switch (rc) + { + case VERR_VM_DRIVER_LOAD_ERROR: +#ifdef RT_OS_LINUX + pszError = N_("VirtualBox kernel driver not loaded. The vboxdrv kernel module " + "was either not loaded or /dev/vboxdrv is not set up properly. " + "Re-setup the kernel module by executing " + "'/sbin/vboxconfig' as root"); +#else + pszError = N_("VirtualBox kernel driver not loaded"); +#endif + break; + case VERR_VM_DRIVER_OPEN_ERROR: + pszError = N_("VirtualBox kernel driver cannot be opened"); + break; + case VERR_VM_DRIVER_NOT_ACCESSIBLE: +#ifdef VBOX_WITH_HARDENING + /* This should only happen if the executable wasn't hardened - bad code/build. */ + pszError = N_("VirtualBox kernel driver not accessible, permission problem. " + "Re-install VirtualBox. If you are building it yourself, you " + "should make sure it installed correctly and that the setuid " + "bit is set on the executables calling VMR3Create."); +#else + /* This should only happen when mixing builds or with the usual /dev/vboxdrv access issues. */ +# if defined(RT_OS_DARWIN) + pszError = N_("VirtualBox KEXT is not accessible, permission problem. " + "If you have built VirtualBox yourself, make sure that you do not " + "have the vboxdrv KEXT from a different build or installation loaded."); +# elif defined(RT_OS_LINUX) + pszError = N_("VirtualBox kernel driver is not accessible, permission problem. " + "If you have built VirtualBox yourself, make sure that you do " + "not have the vboxdrv kernel module from a different build or " + "installation loaded. Also, make sure the vboxdrv udev rule gives " + "you the permission you need to access the device."); +# elif defined(RT_OS_WINDOWS) + pszError = N_("VirtualBox kernel driver is not accessible, permission problem."); +# else /* solaris, freebsd, ++. */ + pszError = N_("VirtualBox kernel module is not accessible, permission problem. " + "If you have built VirtualBox yourself, make sure that you do " + "not have the vboxdrv kernel module from a different install loaded."); +# endif +#endif + break; + case VERR_INVALID_HANDLE: /** @todo track down and fix this error. */ + case VERR_VM_DRIVER_NOT_INSTALLED: +#ifdef RT_OS_LINUX + pszError = N_("VirtualBox kernel driver not installed. The vboxdrv kernel module " + "was either not loaded or /dev/vboxdrv was not created for some " + "reason. Re-setup the kernel module by executing " + "'/sbin/vboxconfig' as root"); +#else + pszError = N_("VirtualBox kernel driver not installed"); +#endif + break; + case VERR_NO_MEMORY: + pszError = N_("VirtualBox support library out of memory"); + break; + case VERR_VERSION_MISMATCH: + case VERR_VM_DRIVER_VERSION_MISMATCH: + pszError = N_("The VirtualBox support driver which is running is from a different " + "version of VirtualBox. You can correct this by stopping all " + "running instances of VirtualBox and reinstalling the software."); + break; + default: + pszError = N_("Unknown error initializing kernel driver"); + AssertMsgFailed(("Add error message for rc=%d (%Rrc)\n", rc, rc)); + } + vmR3SetErrorU(pUVM, rc, RT_SRC_POS, pszError, rc); + } + } + + /* cleanup */ + vmR3DestroyUVM(pUVM, 2000); + LogFlow(("VMR3Create: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Creates the UVM. + * + * This will not initialize the support library even if vmR3DestroyUVM + * will terminate that. + * + * @returns VBox status code. + * @param cCpus Number of virtual CPUs + * @param pVmm2UserMethods Pointer to the optional VMM -> User method + * table. + * @param ppUVM Where to store the UVM pointer. + */ +static int vmR3CreateUVM(uint32_t cCpus, PCVMM2USERMETHODS pVmm2UserMethods, PUVM *ppUVM) +{ + uint32_t i; + + /* + * Create and initialize the UVM. + */ + PUVM pUVM = (PUVM)RTMemPageAllocZ(RT_UOFFSETOF_DYN(UVM, aCpus[cCpus])); + AssertReturn(pUVM, VERR_NO_MEMORY); + pUVM->u32Magic = UVM_MAGIC; + pUVM->cCpus = cCpus; + pUVM->pVmm2UserMethods = pVmm2UserMethods; + + AssertCompile(sizeof(pUVM->vm.s) <= sizeof(pUVM->vm.padding)); + + pUVM->vm.s.cUvmRefs = 1; + pUVM->vm.s.ppAtStateNext = &pUVM->vm.s.pAtState; + pUVM->vm.s.ppAtErrorNext = &pUVM->vm.s.pAtError; + pUVM->vm.s.ppAtRuntimeErrorNext = &pUVM->vm.s.pAtRuntimeError; + + pUVM->vm.s.enmHaltMethod = VMHALTMETHOD_BOOTSTRAP; + RTUuidClear(&pUVM->vm.s.Uuid); + + /* Initialize the VMCPU array in the UVM. */ + for (i = 0; i < cCpus; i++) + { + pUVM->aCpus[i].pUVM = pUVM; + pUVM->aCpus[i].idCpu = i; + } + + /* Allocate a TLS entry to store the VMINTUSERPERVMCPU pointer. */ + int rc = RTTlsAllocEx(&pUVM->vm.s.idxTLS, NULL); + AssertRC(rc); + if (RT_SUCCESS(rc)) + { + /* Allocate a halt method event semaphore for each VCPU. */ + for (i = 0; i < cCpus; i++) + pUVM->aCpus[i].vm.s.EventSemWait = NIL_RTSEMEVENT; + for (i = 0; i < cCpus; i++) + { + rc = RTSemEventCreate(&pUVM->aCpus[i].vm.s.EventSemWait); + if (RT_FAILURE(rc)) + break; + } + if (RT_SUCCESS(rc)) + { + rc = RTCritSectInit(&pUVM->vm.s.AtStateCritSect); + if (RT_SUCCESS(rc)) + { + rc = RTCritSectInit(&pUVM->vm.s.AtErrorCritSect); + if (RT_SUCCESS(rc)) + { + /* + * Init fundamental (sub-)components - STAM, MMR3Heap and PDMLdr. + */ + rc = PDMR3InitUVM(pUVM); + if (RT_SUCCESS(rc)) + { + rc = STAMR3InitUVM(pUVM); + if (RT_SUCCESS(rc)) + { + rc = MMR3InitUVM(pUVM); + if (RT_SUCCESS(rc)) + { + /* + * Start the emulation threads for all VMCPUs. + */ + for (i = 0; i < cCpus; i++) + { + rc = RTThreadCreateF(&pUVM->aCpus[i].vm.s.ThreadEMT, vmR3EmulationThread, &pUVM->aCpus[i], + _1M, RTTHREADTYPE_EMULATION, RTTHREADFLAGS_WAITABLE, + cCpus > 1 ? "EMT-%u" : "EMT", i); + if (RT_FAILURE(rc)) + break; + + pUVM->aCpus[i].vm.s.NativeThreadEMT = RTThreadGetNative(pUVM->aCpus[i].vm.s.ThreadEMT); + } + + if (RT_SUCCESS(rc)) + { + *ppUVM = pUVM; + return VINF_SUCCESS; + } + + /* bail out. */ + while (i-- > 0) + { + /** @todo rainy day: terminate the EMTs. */ + } + MMR3TermUVM(pUVM); + } + STAMR3TermUVM(pUVM); + } + PDMR3TermUVM(pUVM); + } + RTCritSectDelete(&pUVM->vm.s.AtErrorCritSect); + } + RTCritSectDelete(&pUVM->vm.s.AtStateCritSect); + } + } + for (i = 0; i < cCpus; i++) + { + RTSemEventDestroy(pUVM->aCpus[i].vm.s.EventSemWait); + pUVM->aCpus[i].vm.s.EventSemWait = NIL_RTSEMEVENT; + } + RTTlsFree(pUVM->vm.s.idxTLS); + } + RTMemPageFree(pUVM, RT_UOFFSETOF_DYN(UVM, aCpus[pUVM->cCpus])); + return rc; +} + + +/** + * Creates and initializes the VM. + * + * @thread EMT + */ +static int vmR3CreateU(PUVM pUVM, uint32_t cCpus, PFNCFGMCONSTRUCTOR pfnCFGMConstructor, void *pvUserCFGM) +{ +#if (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)) && !defined(VBOX_WITH_OLD_CPU_SUPPORT) + /* + * Require SSE2 to be present (already checked for in supdrv, so we + * shouldn't ever really get here). + */ + if (!(ASMCpuId_EDX(1) & X86_CPUID_FEATURE_EDX_SSE2)) + { + LogRel(("vboxdrv: Requires SSE2 (cpuid(0).EDX=%#x)\n", ASMCpuId_EDX(1))); + return VERR_UNSUPPORTED_CPU; + } +#endif + + /* + * Load the VMMR0.r0 module so that we can call GVMMR0CreateVM. + */ + int rc = PDMR3LdrLoadVMMR0U(pUVM); + if (RT_FAILURE(rc)) + { + /** @todo we need a cleaner solution for this (VERR_VMX_IN_VMX_ROOT_MODE). + * bird: what about moving the message down here? Main picks the first message, right? */ + if (rc == VERR_VMX_IN_VMX_ROOT_MODE) + return rc; /* proper error message set later on */ + return vmR3SetErrorU(pUVM, rc, RT_SRC_POS, N_("Failed to load VMMR0.r0")); + } + + /* + * Request GVMM to create a new VM for us. + */ + GVMMCREATEVMREQ CreateVMReq; + CreateVMReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + CreateVMReq.Hdr.cbReq = sizeof(CreateVMReq); + CreateVMReq.pSession = pUVM->vm.s.pSession; + CreateVMReq.pVMR0 = NIL_RTR0PTR; + CreateVMReq.pVMR3 = NULL; + CreateVMReq.cCpus = cCpus; + rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_GVMM_CREATE_VM, 0, &CreateVMReq.Hdr); + if (RT_SUCCESS(rc)) + { + PVM pVM = pUVM->pVM = CreateVMReq.pVMR3; + AssertRelease(VALID_PTR(pVM)); + AssertRelease(pVM->pVMR0 == CreateVMReq.pVMR0); + AssertRelease(pVM->pSession == pUVM->vm.s.pSession); + AssertRelease(pVM->cCpus == cCpus); + AssertRelease(pVM->uCpuExecutionCap == 100); + AssertRelease(pVM->offVMCPU == RT_UOFFSETOF(VM, aCpus)); + AssertCompileMemberAlignment(VM, cpum, 64); + AssertCompileMemberAlignment(VM, tm, 64); + AssertCompileMemberAlignment(VM, aCpus, PAGE_SIZE); + + Log(("VMR3Create: Created pUVM=%p pVM=%p pVMR0=%p hSelf=%#x cCpus=%RU32\n", + pUVM, pVM, pVM->pVMR0, pVM->hSelf, pVM->cCpus)); + + /* + * Initialize the VM structure and our internal data (VMINT). + */ + pVM->pUVM = pUVM; + + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + pVM->aCpus[i].pUVCpu = &pUVM->aCpus[i]; + pVM->aCpus[i].idCpu = i; + pVM->aCpus[i].hNativeThread = pUVM->aCpus[i].vm.s.NativeThreadEMT; + Assert(pVM->aCpus[i].hNativeThread != NIL_RTNATIVETHREAD); + /* hNativeThreadR0 is initialized on EMT registration. */ + pUVM->aCpus[i].pVCpu = &pVM->aCpus[i]; + pUVM->aCpus[i].pVM = pVM; + } + + + /* + * Init the configuration. + */ + rc = CFGMR3Init(pVM, pfnCFGMConstructor, pvUserCFGM); + if (RT_SUCCESS(rc)) + { + rc = vmR3ReadBaseConfig(pVM, pUVM, cCpus); + if (RT_SUCCESS(rc)) + { + /* + * Init the ring-3 components and ring-3 per cpu data, finishing it off + * by a relocation round (intermediate context finalization will do this). + */ + rc = vmR3InitRing3(pVM, pUVM); + if (RT_SUCCESS(rc)) + { + rc = PGMR3FinalizeMappings(pVM); + if (RT_SUCCESS(rc)) + { + + LogFlow(("Ring-3 init succeeded\n")); + + /* + * Init the Ring-0 components. + */ + rc = vmR3InitRing0(pVM); + if (RT_SUCCESS(rc)) + { + /* Relocate again, because some switcher fixups depends on R0 init results. */ + VMR3Relocate(pVM, 0 /* offDelta */); + +#ifdef VBOX_WITH_DEBUGGER + /* + * Init the tcp debugger console if we're building + * with debugger support. + */ + void *pvUser = NULL; + rc = DBGCTcpCreate(pUVM, &pvUser); + if ( RT_SUCCESS(rc) + || rc == VERR_NET_ADDRESS_IN_USE) + { + pUVM->vm.s.pvDBGC = pvUser; +#endif + /* + * Init the Raw-Mode Context components. + */ +#ifdef VBOX_WITH_RAW_MODE + rc = vmR3InitRC(pVM); + if (RT_SUCCESS(rc)) +#endif + { + /* + * Now we can safely set the VM halt method to default. + */ + rc = vmR3SetHaltMethodU(pUVM, VMHALTMETHOD_DEFAULT); + if (RT_SUCCESS(rc)) + { + /* + * Set the state and we're done. + */ + vmR3SetState(pVM, VMSTATE_CREATED, VMSTATE_CREATING); + return VINF_SUCCESS; + } + } +#ifdef VBOX_WITH_DEBUGGER + DBGCTcpTerminate(pUVM, pUVM->vm.s.pvDBGC); + pUVM->vm.s.pvDBGC = NULL; + } +#endif + //.. + } + } + vmR3Destroy(pVM); + } + } + //.. + + /* Clean CFGM. */ + int rc2 = CFGMR3Term(pVM); + AssertRC(rc2); + } + + /* + * Do automatic cleanups while the VM structure is still alive and all + * references to it are still working. + */ + PDMR3CritSectBothTerm(pVM); + + /* + * Drop all references to VM and the VMCPU structures, then + * tell GVMM to destroy the VM. + */ + pUVM->pVM = NULL; + for (VMCPUID i = 0; i < pUVM->cCpus; i++) + { + pUVM->aCpus[i].pVM = NULL; + pUVM->aCpus[i].pVCpu = NULL; + } + Assert(pUVM->vm.s.enmHaltMethod == VMHALTMETHOD_BOOTSTRAP); + + if (pUVM->cCpus > 1) + { + /* Poke the other EMTs since they may have stale pVM and pVCpu references + on the stack (see VMR3WaitU for instance) if they've been awakened after + VM creation. */ + for (VMCPUID i = 1; i < pUVM->cCpus; i++) + VMR3NotifyCpuFFU(&pUVM->aCpus[i], 0); + RTThreadSleep(RT_MIN(100 + 25 *(pUVM->cCpus - 1), 500)); /* very sophisticated */ + } + + int rc2 = SUPR3CallVMMR0Ex(CreateVMReq.pVMR0, 0 /*idCpu*/, VMMR0_DO_GVMM_DESTROY_VM, 0, NULL); + AssertRC(rc2); + } + else + vmR3SetErrorU(pUVM, rc, RT_SRC_POS, N_("VM creation failed (GVMM)")); + + LogFlow(("vmR3CreateU: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Reads the base configuation from CFGM. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pUVM The user mode VM structure. + * @param cCpus The CPU count given to VMR3Create. + */ +static int vmR3ReadBaseConfig(PVM pVM, PUVM pUVM, uint32_t cCpus) +{ + int rc; + PCFGMNODE pRoot = CFGMR3GetRoot(pVM); + + /* + * If executing in fake suplib mode disable RR3 and RR0 in the config. + */ + const char *psz = RTEnvGet("VBOX_SUPLIB_FAKE"); + if (psz && !strcmp(psz, "fake")) + { + CFGMR3RemoveValue(pRoot, "RawR3Enabled"); + CFGMR3InsertInteger(pRoot, "RawR3Enabled", 0); + CFGMR3RemoveValue(pRoot, "RawR0Enabled"); + CFGMR3InsertInteger(pRoot, "RawR0Enabled", 0); + } + + /* + * Base EM and HM config properties. + */ + /** @todo We don't need to read any of this here. The relevant modules reads + * them again and will be in a better position to set them correctly. */ + Assert(pVM->fRecompileUser == false); /* ASSUMES all zeros at this point */ + bool fEnabled; + rc = CFGMR3QueryBoolDef(pRoot, "RawR3Enabled", &fEnabled, false); AssertRCReturn(rc, rc); + pVM->fRecompileUser = !fEnabled; + rc = CFGMR3QueryBoolDef(pRoot, "RawR0Enabled", &fEnabled, false); AssertRCReturn(rc, rc); + pVM->fRecompileSupervisor = !fEnabled; +#ifdef VBOX_WITH_RAW_MODE +# ifdef VBOX_WITH_RAW_RING1 + rc = CFGMR3QueryBoolDef(pRoot, "RawR1Enabled", &pVM->fRawRing1Enabled, false); +# endif + rc = CFGMR3QueryBoolDef(pRoot, "PATMEnabled", &pVM->fPATMEnabled, true); AssertRCReturn(rc, rc); + rc = CFGMR3QueryBoolDef(pRoot, "CSAMEnabled", &pVM->fCSAMEnabled, true); AssertRCReturn(rc, rc); + rc = CFGMR3QueryBoolDef(pRoot, "HMEnabled", &pVM->fHMEnabled, true); AssertRCReturn(rc, rc); +#else + pVM->fHMEnabled = true; +#endif + LogRel(("VM: fHMEnabled=%RTbool (configured) fRecompileUser=%RTbool fRecompileSupervisor=%RTbool\n" + "VM: fRawRing1Enabled=%RTbool CSAM=%RTbool PATM=%RTbool\n", + pVM->fHMEnabled, pVM->fRecompileUser, pVM->fRecompileSupervisor, + pVM->fRawRing1Enabled, pVM->fCSAMEnabled, pVM->fPATMEnabled)); + + /* + * Make sure the CPU count in the config data matches. + */ + uint32_t cCPUsCfg; + rc = CFGMR3QueryU32Def(pRoot, "NumCPUs", &cCPUsCfg, 1); + AssertLogRelMsgRCReturn(rc, ("Configuration error: Querying \"NumCPUs\" as integer failed, rc=%Rrc\n", rc), rc); + AssertLogRelMsgReturn(cCPUsCfg == cCpus, + ("Configuration error: \"NumCPUs\"=%RU32 and VMR3Create::cCpus=%RU32 does not match!\n", + cCPUsCfg, cCpus), + VERR_INVALID_PARAMETER); + + /* + * Get the CPU execution cap. + */ + rc = CFGMR3QueryU32Def(pRoot, "CpuExecutionCap", &pVM->uCpuExecutionCap, 100); + AssertLogRelMsgRCReturn(rc, ("Configuration error: Querying \"CpuExecutionCap\" as integer failed, rc=%Rrc\n", rc), rc); + + /* + * Get the VM name and UUID. + */ + rc = CFGMR3QueryStringAllocDef(pRoot, "Name", &pUVM->vm.s.pszName, ""); + AssertLogRelMsgRCReturn(rc, ("Configuration error: Querying \"Name\" failed, rc=%Rrc\n", rc), rc); + + rc = CFGMR3QueryBytes(pRoot, "UUID", &pUVM->vm.s.Uuid, sizeof(pUVM->vm.s.Uuid)); + if (rc == VERR_CFGM_VALUE_NOT_FOUND) + rc = VINF_SUCCESS; + AssertLogRelMsgRCReturn(rc, ("Configuration error: Querying \"UUID\" failed, rc=%Rrc\n", rc), rc); + + rc = CFGMR3QueryBoolDef(pRoot, "PowerOffInsteadOfReset", &pVM->vm.s.fPowerOffInsteadOfReset, false); + AssertLogRelMsgRCReturn(rc, ("Configuration error: Querying \"PowerOffInsteadOfReset\" failed, rc=%Rrc\n", rc), rc); + + return VINF_SUCCESS; +} + + +/** + * Register the calling EMT with GVM. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param idCpu The Virtual CPU ID. + */ +static DECLCALLBACK(int) vmR3RegisterEMT(PVM pVM, VMCPUID idCpu) +{ + Assert(VMMGetCpuId(pVM) == idCpu); + int rc = SUPR3CallVMMR0Ex(pVM->pVMR0, idCpu, VMMR0_DO_GVMM_REGISTER_VMCPU, 0, NULL); + if (RT_FAILURE(rc)) + LogRel(("idCpu=%u rc=%Rrc\n", idCpu, rc)); + return rc; +} + + +/** + * Initializes all R3 components of the VM + */ +static int vmR3InitRing3(PVM pVM, PUVM pUVM) +{ + int rc; + + /* + * Register the other EMTs with GVM. + */ + for (VMCPUID idCpu = 1; idCpu < pVM->cCpus; idCpu++) + { + rc = VMR3ReqCallWait(pVM, idCpu, (PFNRT)vmR3RegisterEMT, 2, pVM, idCpu); + if (RT_FAILURE(rc)) + return rc; + } + + /* + * Register statistics. + */ + STAM_REG(pVM, &pVM->StatTotalInGC, STAMTYPE_PROFILE_ADV, "/PROF/VM/InGC", STAMUNIT_TICKS_PER_CALL, "Profiling the total time spent in GC."); + STAM_REG(pVM, &pVM->StatSwitcherToGC, STAMTYPE_PROFILE_ADV, "/PROF/VM/SwitchToGC", STAMUNIT_TICKS_PER_CALL, "Profiling switching to GC."); + STAM_REG(pVM, &pVM->StatSwitcherToHC, STAMTYPE_PROFILE_ADV, "/PROF/VM/SwitchToHC", STAMUNIT_TICKS_PER_CALL, "Profiling switching to HC."); + STAM_REG(pVM, &pVM->StatSwitcherSaveRegs, STAMTYPE_PROFILE_ADV, "/VM/Switcher/ToGC/SaveRegs", STAMUNIT_TICKS_PER_CALL,"Profiling switching to GC."); + STAM_REG(pVM, &pVM->StatSwitcherSysEnter, STAMTYPE_PROFILE_ADV, "/VM/Switcher/ToGC/SysEnter", STAMUNIT_TICKS_PER_CALL,"Profiling switching to GC."); + STAM_REG(pVM, &pVM->StatSwitcherDebug, STAMTYPE_PROFILE_ADV, "/VM/Switcher/ToGC/Debug", STAMUNIT_TICKS_PER_CALL,"Profiling switching to GC."); + STAM_REG(pVM, &pVM->StatSwitcherCR0, STAMTYPE_PROFILE_ADV, "/VM/Switcher/ToGC/CR0", STAMUNIT_TICKS_PER_CALL, "Profiling switching to GC."); + STAM_REG(pVM, &pVM->StatSwitcherCR4, STAMTYPE_PROFILE_ADV, "/VM/Switcher/ToGC/CR4", STAMUNIT_TICKS_PER_CALL, "Profiling switching to GC."); + STAM_REG(pVM, &pVM->StatSwitcherLgdt, STAMTYPE_PROFILE_ADV, "/VM/Switcher/ToGC/Lgdt", STAMUNIT_TICKS_PER_CALL, "Profiling switching to GC."); + STAM_REG(pVM, &pVM->StatSwitcherLidt, STAMTYPE_PROFILE_ADV, "/VM/Switcher/ToGC/Lidt", STAMUNIT_TICKS_PER_CALL, "Profiling switching to GC."); + STAM_REG(pVM, &pVM->StatSwitcherLldt, STAMTYPE_PROFILE_ADV, "/VM/Switcher/ToGC/Lldt", STAMUNIT_TICKS_PER_CALL, "Profiling switching to GC."); + STAM_REG(pVM, &pVM->StatSwitcherTSS, STAMTYPE_PROFILE_ADV, "/VM/Switcher/ToGC/TSS", STAMUNIT_TICKS_PER_CALL, "Profiling switching to GC."); + STAM_REG(pVM, &pVM->StatSwitcherJmpCR3, STAMTYPE_PROFILE_ADV, "/VM/Switcher/ToGC/JmpCR3", STAMUNIT_TICKS_PER_CALL,"Profiling switching to GC."); + STAM_REG(pVM, &pVM->StatSwitcherRstrRegs, STAMTYPE_PROFILE_ADV, "/VM/Switcher/ToGC/RstrRegs", STAMUNIT_TICKS_PER_CALL,"Profiling switching to GC."); + + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + rc = STAMR3RegisterF(pVM, &pUVM->aCpus[idCpu].vm.s.StatHaltYield, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_NS_PER_CALL, "Profiling halted state yielding.", "/PROF/CPU%d/VM/Halt/Yield", idCpu); + AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pUVM->aCpus[idCpu].vm.s.StatHaltBlock, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_NS_PER_CALL, "Profiling halted state blocking.", "/PROF/CPU%d/VM/Halt/Block", idCpu); + AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pUVM->aCpus[idCpu].vm.s.StatHaltBlockOverslept, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_NS_PER_CALL, "Time wasted by blocking too long.", "/PROF/CPU%d/VM/Halt/BlockOverslept", idCpu); + AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pUVM->aCpus[idCpu].vm.s.StatHaltBlockInsomnia, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_NS_PER_CALL, "Time slept when returning to early.","/PROF/CPU%d/VM/Halt/BlockInsomnia", idCpu); + AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pUVM->aCpus[idCpu].vm.s.StatHaltBlockOnTime, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_NS_PER_CALL, "Time slept on time.", "/PROF/CPU%d/VM/Halt/BlockOnTime", idCpu); + AssertRC(rc); + rc = STAMR3RegisterF(pVM, &pUVM->aCpus[idCpu].vm.s.StatHaltTimers, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_NS_PER_CALL, "Profiling halted state timer tasks.", "/PROF/CPU%d/VM/Halt/Timers", idCpu); + AssertRC(rc); + } + + STAM_REG(pVM, &pUVM->vm.s.StatReqAllocNew, STAMTYPE_COUNTER, "/VM/Req/AllocNew", STAMUNIT_OCCURENCES, "Number of VMR3ReqAlloc returning a new packet."); + STAM_REG(pVM, &pUVM->vm.s.StatReqAllocRaces, STAMTYPE_COUNTER, "/VM/Req/AllocRaces", STAMUNIT_OCCURENCES, "Number of VMR3ReqAlloc causing races."); + STAM_REG(pVM, &pUVM->vm.s.StatReqAllocRecycled, STAMTYPE_COUNTER, "/VM/Req/AllocRecycled", STAMUNIT_OCCURENCES, "Number of VMR3ReqAlloc returning a recycled packet."); + STAM_REG(pVM, &pUVM->vm.s.StatReqFree, STAMTYPE_COUNTER, "/VM/Req/Free", STAMUNIT_OCCURENCES, "Number of VMR3ReqFree calls."); + STAM_REG(pVM, &pUVM->vm.s.StatReqFreeOverflow, STAMTYPE_COUNTER, "/VM/Req/FreeOverflow", STAMUNIT_OCCURENCES, "Number of times the request was actually freed."); + STAM_REG(pVM, &pUVM->vm.s.StatReqProcessed, STAMTYPE_COUNTER, "/VM/Req/Processed", STAMUNIT_OCCURENCES, "Number of processed requests (any queue)."); + STAM_REG(pVM, &pUVM->vm.s.StatReqMoreThan1, STAMTYPE_COUNTER, "/VM/Req/MoreThan1", STAMUNIT_OCCURENCES, "Number of times there are more than one request on the queue when processing it."); + STAM_REG(pVM, &pUVM->vm.s.StatReqPushBackRaces, STAMTYPE_COUNTER, "/VM/Req/PushBackRaces", STAMUNIT_OCCURENCES, "Number of push back races."); + + /* + * Init all R3 components, the order here might be important. + * NEM and HM shall be initialized first! + */ + Assert(pVM->bMainExecutionEngine == VM_EXEC_ENGINE_NOT_SET); + rc = NEMR3InitConfig(pVM); + if (RT_SUCCESS(rc)) + rc = HMR3Init(pVM); + if (RT_SUCCESS(rc)) + { + ASMCompilerBarrier(); /* HMR3Init will have modified bMainExecutionEngine */ +#ifdef VBOX_WITH_RAW_MODE + Assert( pVM->bMainExecutionEngine == VM_EXEC_ENGINE_HW_VIRT + || pVM->bMainExecutionEngine == VM_EXEC_ENGINE_RAW_MODE + || pVM->bMainExecutionEngine == VM_EXEC_ENGINE_NATIVE_API); +#else + Assert( pVM->bMainExecutionEngine == VM_EXEC_ENGINE_HW_VIRT + || pVM->bMainExecutionEngine == VM_EXEC_ENGINE_NATIVE_API); +#endif + rc = MMR3Init(pVM); + if (RT_SUCCESS(rc)) + { + rc = CPUMR3Init(pVM); + if (RT_SUCCESS(rc)) + { + rc = NEMR3InitAfterCPUM(pVM); + if (RT_SUCCESS(rc)) + rc = PGMR3Init(pVM); + if (RT_SUCCESS(rc)) + { +#ifdef VBOX_WITH_REM + rc = REMR3Init(pVM); +#endif + if (RT_SUCCESS(rc)) + { + rc = MMR3InitPaging(pVM); + if (RT_SUCCESS(rc)) + rc = TMR3Init(pVM); + if (RT_SUCCESS(rc)) + { + rc = FTMR3Init(pVM); + if (RT_SUCCESS(rc)) + { + rc = VMMR3Init(pVM); + if (RT_SUCCESS(rc)) + { + rc = SELMR3Init(pVM); + if (RT_SUCCESS(rc)) + { + rc = TRPMR3Init(pVM); + if (RT_SUCCESS(rc)) + { +#ifdef VBOX_WITH_RAW_MODE + rc = CSAMR3Init(pVM); + if (RT_SUCCESS(rc)) + { + rc = PATMR3Init(pVM); + if (RT_SUCCESS(rc)) + { +#endif + rc = IOMR3Init(pVM); + if (RT_SUCCESS(rc)) + { + rc = EMR3Init(pVM); + if (RT_SUCCESS(rc)) + { + rc = IEMR3Init(pVM); + if (RT_SUCCESS(rc)) + { + rc = DBGFR3Init(pVM); + if (RT_SUCCESS(rc)) + { + /* GIM must be init'd before PDM, gimdevR3Construct() + requires GIM provider to be setup. */ + rc = GIMR3Init(pVM); + if (RT_SUCCESS(rc)) + { + rc = PDMR3Init(pVM); + if (RT_SUCCESS(rc)) + { + rc = PGMR3InitDynMap(pVM); + if (RT_SUCCESS(rc)) + rc = MMR3HyperInitFinalize(pVM); +#ifdef VBOX_WITH_RAW_MODE + if (RT_SUCCESS(rc)) + rc = PATMR3InitFinalize(pVM); +#endif + if (RT_SUCCESS(rc)) + rc = PGMR3InitFinalize(pVM); + if (RT_SUCCESS(rc)) + rc = SELMR3InitFinalize(pVM); + if (RT_SUCCESS(rc)) + rc = TMR3InitFinalize(pVM); +#ifdef VBOX_WITH_REM + if (RT_SUCCESS(rc)) + rc = REMR3InitFinalize(pVM); +#endif + if (RT_SUCCESS(rc)) + { + PGMR3MemSetup(pVM, false /*fAtReset*/); + PDMR3MemSetup(pVM, false /*fAtReset*/); + } + if (RT_SUCCESS(rc)) + rc = vmR3InitDoCompleted(pVM, VMINITCOMPLETED_RING3); + if (RT_SUCCESS(rc)) + { + LogFlow(("vmR3InitRing3: returns %Rrc\n", VINF_SUCCESS)); + return VINF_SUCCESS; + } + + int rc2 = PDMR3Term(pVM); + AssertRC(rc2); + } + int rc2 = GIMR3Term(pVM); + AssertRC(rc2); + } + int rc2 = DBGFR3Term(pVM); + AssertRC(rc2); + } + int rc2 = IEMR3Term(pVM); + AssertRC(rc2); + } + int rc2 = EMR3Term(pVM); + AssertRC(rc2); + } + int rc2 = IOMR3Term(pVM); + AssertRC(rc2); + } +#ifdef VBOX_WITH_RAW_MODE + int rc2 = PATMR3Term(pVM); + AssertRC(rc2); + } + int rc2 = CSAMR3Term(pVM); + AssertRC(rc2); + } +#endif + int rc2 = TRPMR3Term(pVM); + AssertRC(rc2); + } + int rc2 = SELMR3Term(pVM); + AssertRC(rc2); + } + int rc2 = VMMR3Term(pVM); + AssertRC(rc2); + } + int rc2 = FTMR3Term(pVM); + AssertRC(rc2); + } + int rc2 = TMR3Term(pVM); + AssertRC(rc2); + } +#ifdef VBOX_WITH_REM + int rc2 = REMR3Term(pVM); + AssertRC(rc2); +#endif + } + int rc2 = PGMR3Term(pVM); + AssertRC(rc2); + } + //int rc2 = CPUMR3Term(pVM); + //AssertRC(rc2); + } + /* MMR3Term is not called here because it'll kill the heap. */ + } + int rc2 = HMR3Term(pVM); + AssertRC(rc2); + } + NEMR3Term(pVM); + + LogFlow(("vmR3InitRing3: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Initializes all R0 components of the VM. + */ +static int vmR3InitRing0(PVM pVM) +{ + LogFlow(("vmR3InitRing0:\n")); + + /* + * Check for FAKE suplib mode. + */ + int rc = VINF_SUCCESS; + const char *psz = RTEnvGet("VBOX_SUPLIB_FAKE"); + if (!psz || strcmp(psz, "fake")) + { + /* + * Call the VMMR0 component and let it do the init. + */ + rc = VMMR3InitR0(pVM); + } + else + Log(("vmR3InitRing0: skipping because of VBOX_SUPLIB_FAKE=fake\n")); + + /* + * Do notifications and return. + */ + if (RT_SUCCESS(rc)) + rc = vmR3InitDoCompleted(pVM, VMINITCOMPLETED_RING0); + if (RT_SUCCESS(rc)) + rc = vmR3InitDoCompleted(pVM, VMINITCOMPLETED_HM); + + LogFlow(("vmR3InitRing0: returns %Rrc\n", rc)); + return rc; +} + + +#ifdef VBOX_WITH_RAW_MODE +/** + * Initializes all RC components of the VM + */ +static int vmR3InitRC(PVM pVM) +{ + LogFlow(("vmR3InitRC:\n")); + + /* + * Check for FAKE suplib mode. + */ + int rc = VINF_SUCCESS; + const char *psz = RTEnvGet("VBOX_SUPLIB_FAKE"); + if (!psz || strcmp(psz, "fake")) + { + /* + * Call the VMMR0 component and let it do the init. + */ + rc = VMMR3InitRC(pVM); + } + else + Log(("vmR3InitRC: skipping because of VBOX_SUPLIB_FAKE=fake\n")); + + /* + * Do notifications and return. + */ + if (RT_SUCCESS(rc)) + rc = vmR3InitDoCompleted(pVM, VMINITCOMPLETED_RC); + LogFlow(("vmR3InitRC: returns %Rrc\n", rc)); + return rc; +} +#endif /* VBOX_WITH_RAW_MODE */ + + +/** + * Do init completed notifications. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param enmWhat What's completed. + */ +static int vmR3InitDoCompleted(PVM pVM, VMINITCOMPLETED enmWhat) +{ + int rc = VMMR3InitCompleted(pVM, enmWhat); + if (RT_SUCCESS(rc)) + rc = HMR3InitCompleted(pVM, enmWhat); + if (RT_SUCCESS(rc)) + rc = NEMR3InitCompleted(pVM, enmWhat); + if (RT_SUCCESS(rc)) + rc = PGMR3InitCompleted(pVM, enmWhat); + if (RT_SUCCESS(rc)) + rc = CPUMR3InitCompleted(pVM, enmWhat); + if (RT_SUCCESS(rc)) + rc = EMR3InitCompleted(pVM, enmWhat); + if (enmWhat == VMINITCOMPLETED_RING3) + { +#ifndef VBOX_WITH_RAW_MODE + if (RT_SUCCESS(rc)) + rc = SSMR3RegisterStub(pVM, "CSAM", 0); + if (RT_SUCCESS(rc)) + rc = SSMR3RegisterStub(pVM, "PATM", 0); +#endif +#ifndef VBOX_WITH_REM + if (RT_SUCCESS(rc)) + rc = SSMR3RegisterStub(pVM, "rem", 1); +#endif + } + if (RT_SUCCESS(rc)) + rc = PDMR3InitCompleted(pVM, enmWhat); + return rc; +} + + +/** + * Calls the relocation functions for all VMM components so they can update + * any GC pointers. When this function is called all the basic VM members + * have been updated and the actual memory relocation have been done + * by the PGM/MM. + * + * This is used both on init and on runtime relocations. + * + * @param pVM The cross context VM structure. + * @param offDelta Relocation delta relative to old location. + */ +VMMR3_INT_DECL(void) VMR3Relocate(PVM pVM, RTGCINTPTR offDelta) +{ + LogFlow(("VMR3Relocate: offDelta=%RGv\n", offDelta)); + + /* + * The order here is very important! + */ + PGMR3Relocate(pVM, offDelta); + PDMR3LdrRelocateU(pVM->pUVM, offDelta); + PGMR3Relocate(pVM, 0); /* Repeat after PDM relocation. */ + CPUMR3Relocate(pVM); + HMR3Relocate(pVM); + SELMR3Relocate(pVM); + VMMR3Relocate(pVM, offDelta); + SELMR3Relocate(pVM); /* !hack! fix stack! */ + TRPMR3Relocate(pVM, offDelta); +#ifdef VBOX_WITH_RAW_MODE + PATMR3Relocate(pVM, (RTRCINTPTR)offDelta); + CSAMR3Relocate(pVM, offDelta); +#endif + IOMR3Relocate(pVM, offDelta); + EMR3Relocate(pVM); + TMR3Relocate(pVM, offDelta); + IEMR3Relocate(pVM); + DBGFR3Relocate(pVM, offDelta); + PDMR3Relocate(pVM, offDelta); + GIMR3Relocate(pVM, offDelta); +} + + +/** + * EMT rendezvous worker for VMR3PowerOn. + * + * @returns VERR_VM_INVALID_VM_STATE or VINF_SUCCESS. (This is a strict return + * code, see FNVMMEMTRENDEZVOUS.) + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * @param pvUser Ignored. + */ +static DECLCALLBACK(VBOXSTRICTRC) vmR3PowerOn(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + LogFlow(("vmR3PowerOn: pVM=%p pVCpu=%p/#%u\n", pVM, pVCpu, pVCpu->idCpu)); + Assert(!pvUser); NOREF(pvUser); + + /* + * The first thread thru here tries to change the state. We shouldn't be + * called again if this fails. + */ + if (pVCpu->idCpu == pVM->cCpus - 1) + { + int rc = vmR3TrySetState(pVM, "VMR3PowerOn", 1, VMSTATE_POWERING_ON, VMSTATE_CREATED); + if (RT_FAILURE(rc)) + return rc; + } + + VMSTATE enmVMState = VMR3GetState(pVM); + AssertMsgReturn(enmVMState == VMSTATE_POWERING_ON, + ("%s\n", VMR3GetStateName(enmVMState)), + VERR_VM_UNEXPECTED_UNSTABLE_STATE); + + /* + * All EMTs changes their state to started. + */ + VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED); + + /* + * EMT(0) is last thru here and it will make the notification calls + * and advance the state. + */ + if (pVCpu->idCpu == 0) + { + PDMR3PowerOn(pVM); + vmR3SetState(pVM, VMSTATE_RUNNING, VMSTATE_POWERING_ON); + } + + return VINF_SUCCESS; +} + + +/** + * Powers on the virtual machine. + * + * @returns VBox status code. + * + * @param pUVM The VM to power on. + * + * @thread Any thread. + * @vmstate Created + * @vmstateto PoweringOn+Running + */ +VMMR3DECL(int) VMR3PowerOn(PUVM pUVM) +{ + LogFlow(("VMR3PowerOn: pUVM=%p\n", pUVM)); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + /* + * Gather all the EMTs to reduce the init TSC drift and keep + * the state changing APIs a bit uniform. + */ + int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_DESCENDING | VMMEMTRENDEZVOUS_FLAGS_STOP_ON_ERROR, + vmR3PowerOn, NULL); + LogFlow(("VMR3PowerOn: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Does the suspend notifications. + * + * @param pVM The cross context VM structure. + * @thread EMT(0) + */ +static void vmR3SuspendDoWork(PVM pVM) +{ + PDMR3Suspend(pVM); +} + + +/** + * EMT rendezvous worker for VMR3Suspend. + * + * @returns VERR_VM_INVALID_VM_STATE or VINF_EM_SUSPEND. (This is a strict + * return code, see FNVMMEMTRENDEZVOUS.) + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * @param pvUser Ignored. + */ +static DECLCALLBACK(VBOXSTRICTRC) vmR3Suspend(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + VMSUSPENDREASON enmReason = (VMSUSPENDREASON)(uintptr_t)pvUser; + LogFlow(("vmR3Suspend: pVM=%p pVCpu=%p/#%u enmReason=%d\n", pVM, pVCpu, pVCpu->idCpu, enmReason)); + + /* + * The first EMT switches the state to suspending. If this fails because + * something was racing us in one way or the other, there will be no more + * calls and thus the state assertion below is not going to annoy anyone. + */ + if (pVCpu->idCpu == pVM->cCpus - 1) + { + int rc = vmR3TrySetState(pVM, "VMR3Suspend", 2, + VMSTATE_SUSPENDING, VMSTATE_RUNNING, + VMSTATE_SUSPENDING_EXT_LS, VMSTATE_RUNNING_LS); + if (RT_FAILURE(rc)) + return rc; + pVM->pUVM->vm.s.enmSuspendReason = enmReason; + } + + VMSTATE enmVMState = VMR3GetState(pVM); + AssertMsgReturn( enmVMState == VMSTATE_SUSPENDING + || enmVMState == VMSTATE_SUSPENDING_EXT_LS, + ("%s\n", VMR3GetStateName(enmVMState)), + VERR_VM_UNEXPECTED_UNSTABLE_STATE); + + /* + * EMT(0) does the actually suspending *after* all the other CPUs have + * been thru here. + */ + if (pVCpu->idCpu == 0) + { + vmR3SuspendDoWork(pVM); + + int rc = vmR3TrySetState(pVM, "VMR3Suspend", 2, + VMSTATE_SUSPENDED, VMSTATE_SUSPENDING, + VMSTATE_SUSPENDED_EXT_LS, VMSTATE_SUSPENDING_EXT_LS); + if (RT_FAILURE(rc)) + return VERR_VM_UNEXPECTED_UNSTABLE_STATE; + } + + return VINF_EM_SUSPEND; +} + + +/** + * Suspends a running VM. + * + * @returns VBox status code. When called on EMT, this will be a strict status + * code that has to be propagated up the call stack. + * + * @param pUVM The VM to suspend. + * @param enmReason The reason for suspending. + * + * @thread Any thread. + * @vmstate Running or RunningLS + * @vmstateto Suspending + Suspended or SuspendingExtLS + SuspendedExtLS + */ +VMMR3DECL(int) VMR3Suspend(PUVM pUVM, VMSUSPENDREASON enmReason) +{ + LogFlow(("VMR3Suspend: pUVM=%p\n", pUVM)); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertReturn(enmReason > VMSUSPENDREASON_INVALID && enmReason < VMSUSPENDREASON_END, VERR_INVALID_PARAMETER); + + /* + * Gather all the EMTs to make sure there are no races before + * changing the VM state. + */ + int rc = VMMR3EmtRendezvous(pUVM->pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_DESCENDING | VMMEMTRENDEZVOUS_FLAGS_STOP_ON_ERROR, + vmR3Suspend, (void *)(uintptr_t)enmReason); + LogFlow(("VMR3Suspend: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Retrieves the reason for the most recent suspend. + * + * @returns Suspend reason. VMSUSPENDREASON_INVALID if no suspend has been done + * or the handle is invalid. + * @param pUVM The user mode VM handle. + */ +VMMR3DECL(VMSUSPENDREASON) VMR3GetSuspendReason(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VMSUSPENDREASON_INVALID); + return pUVM->vm.s.enmSuspendReason; +} + + +/** + * EMT rendezvous worker for VMR3Resume. + * + * @returns VERR_VM_INVALID_VM_STATE or VINF_EM_RESUME. (This is a strict + * return code, see FNVMMEMTRENDEZVOUS.) + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * @param pvUser Reason. + */ +static DECLCALLBACK(VBOXSTRICTRC) vmR3Resume(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + VMRESUMEREASON enmReason = (VMRESUMEREASON)(uintptr_t)pvUser; + LogFlow(("vmR3Resume: pVM=%p pVCpu=%p/#%u enmReason=%d\n", pVM, pVCpu, pVCpu->idCpu, enmReason)); + + /* + * The first thread thru here tries to change the state. We shouldn't be + * called again if this fails. + */ + if (pVCpu->idCpu == pVM->cCpus - 1) + { + int rc = vmR3TrySetState(pVM, "VMR3Resume", 1, VMSTATE_RESUMING, VMSTATE_SUSPENDED); + if (RT_FAILURE(rc)) + return rc; + pVM->pUVM->vm.s.enmResumeReason = enmReason; + } + + VMSTATE enmVMState = VMR3GetState(pVM); + AssertMsgReturn(enmVMState == VMSTATE_RESUMING, + ("%s\n", VMR3GetStateName(enmVMState)), + VERR_VM_UNEXPECTED_UNSTABLE_STATE); + +#if 0 + /* + * All EMTs changes their state to started. + */ + VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED); +#endif + + /* + * EMT(0) is last thru here and it will make the notification calls + * and advance the state. + */ + if (pVCpu->idCpu == 0) + { + PDMR3Resume(pVM); + vmR3SetState(pVM, VMSTATE_RUNNING, VMSTATE_RESUMING); + pVM->vm.s.fTeleportedAndNotFullyResumedYet = false; + } + + return VINF_EM_RESUME; +} + + +/** + * Resume VM execution. + * + * @returns VBox status code. When called on EMT, this will be a strict status + * code that has to be propagated up the call stack. + * + * @param pUVM The user mode VM handle. + * @param enmReason The reason we're resuming. + * + * @thread Any thread. + * @vmstate Suspended + * @vmstateto Running + */ +VMMR3DECL(int) VMR3Resume(PUVM pUVM, VMRESUMEREASON enmReason) +{ + LogFlow(("VMR3Resume: pUVM=%p\n", pUVM)); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertReturn(enmReason > VMRESUMEREASON_INVALID && enmReason < VMRESUMEREASON_END, VERR_INVALID_PARAMETER); + + /* + * Gather all the EMTs to make sure there are no races before + * changing the VM state. + */ + int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_DESCENDING | VMMEMTRENDEZVOUS_FLAGS_STOP_ON_ERROR, + vmR3Resume, (void *)(uintptr_t)enmReason); + LogFlow(("VMR3Resume: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Retrieves the reason for the most recent resume. + * + * @returns Resume reason. VMRESUMEREASON_INVALID if no suspend has been + * done or the handle is invalid. + * @param pUVM The user mode VM handle. + */ +VMMR3DECL(VMRESUMEREASON) VMR3GetResumeReason(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VMRESUMEREASON_INVALID); + return pUVM->vm.s.enmResumeReason; +} + + +/** + * EMT rendezvous worker for VMR3Save and VMR3Teleport that suspends the VM + * after the live step has been completed. + * + * @returns VERR_VM_INVALID_VM_STATE or VINF_EM_RESUME. (This is a strict + * return code, see FNVMMEMTRENDEZVOUS.) + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * @param pvUser The pfSuspended argument of vmR3SaveTeleport. + */ +static DECLCALLBACK(VBOXSTRICTRC) vmR3LiveDoSuspend(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + LogFlow(("vmR3LiveDoSuspend: pVM=%p pVCpu=%p/#%u\n", pVM, pVCpu, pVCpu->idCpu)); + bool *pfSuspended = (bool *)pvUser; + + /* + * The first thread thru here tries to change the state. We shouldn't be + * called again if this fails. + */ + if (pVCpu->idCpu == pVM->cCpus - 1U) + { + PUVM pUVM = pVM->pUVM; + int rc; + + RTCritSectEnter(&pUVM->vm.s.AtStateCritSect); + VMSTATE enmVMState = pVM->enmVMState; + switch (enmVMState) + { + case VMSTATE_RUNNING_LS: + vmR3SetStateLocked(pVM, pUVM, VMSTATE_SUSPENDING_LS, VMSTATE_RUNNING_LS, false /*fSetRatherThanClearFF*/); + rc = VINF_SUCCESS; + break; + + case VMSTATE_SUSPENDED_EXT_LS: + case VMSTATE_SUSPENDED_LS: /* (via reset) */ + rc = VINF_SUCCESS; + break; + + case VMSTATE_DEBUGGING_LS: + rc = VERR_TRY_AGAIN; + break; + + case VMSTATE_OFF_LS: + vmR3SetStateLocked(pVM, pUVM, VMSTATE_OFF, VMSTATE_OFF_LS, false /*fSetRatherThanClearFF*/); + rc = VERR_SSM_LIVE_POWERED_OFF; + break; + + case VMSTATE_FATAL_ERROR_LS: + vmR3SetStateLocked(pVM, pUVM, VMSTATE_FATAL_ERROR, VMSTATE_FATAL_ERROR_LS, false /*fSetRatherThanClearFF*/); + rc = VERR_SSM_LIVE_FATAL_ERROR; + break; + + case VMSTATE_GURU_MEDITATION_LS: + vmR3SetStateLocked(pVM, pUVM, VMSTATE_GURU_MEDITATION, VMSTATE_GURU_MEDITATION_LS, false /*fSetRatherThanClearFF*/); + rc = VERR_SSM_LIVE_GURU_MEDITATION; + break; + + case VMSTATE_POWERING_OFF_LS: + case VMSTATE_SUSPENDING_EXT_LS: + case VMSTATE_RESETTING_LS: + default: + AssertMsgFailed(("%s\n", VMR3GetStateName(enmVMState))); + rc = VERR_VM_UNEXPECTED_VM_STATE; + break; + } + RTCritSectLeave(&pUVM->vm.s.AtStateCritSect); + if (RT_FAILURE(rc)) + { + LogFlow(("vmR3LiveDoSuspend: returns %Rrc (state was %s)\n", rc, VMR3GetStateName(enmVMState))); + return rc; + } + } + + VMSTATE enmVMState = VMR3GetState(pVM); + AssertMsgReturn(enmVMState == VMSTATE_SUSPENDING_LS, + ("%s\n", VMR3GetStateName(enmVMState)), + VERR_VM_UNEXPECTED_UNSTABLE_STATE); + + /* + * Only EMT(0) have work to do since it's last thru here. + */ + if (pVCpu->idCpu == 0) + { + vmR3SuspendDoWork(pVM); + int rc = vmR3TrySetState(pVM, "VMR3Suspend", 1, + VMSTATE_SUSPENDED_LS, VMSTATE_SUSPENDING_LS); + if (RT_FAILURE(rc)) + return VERR_VM_UNEXPECTED_UNSTABLE_STATE; + + *pfSuspended = true; + } + + return VINF_EM_SUSPEND; +} + + +/** + * EMT rendezvous worker that VMR3Save and VMR3Teleport uses to clean up a + * SSMR3LiveDoStep1 failure. + * + * Doing this as a rendezvous operation avoids all annoying transition + * states. + * + * @returns VERR_VM_INVALID_VM_STATE, VINF_SUCCESS or some specific VERR_SSM_* + * status code. (This is a strict return code, see FNVMMEMTRENDEZVOUS.) + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * @param pvUser The pfSuspended argument of vmR3SaveTeleport. + */ +static DECLCALLBACK(VBOXSTRICTRC) vmR3LiveDoStep1Cleanup(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + LogFlow(("vmR3LiveDoStep1Cleanup: pVM=%p pVCpu=%p/#%u\n", pVM, pVCpu, pVCpu->idCpu)); + bool *pfSuspended = (bool *)pvUser; + NOREF(pVCpu); + + int rc = vmR3TrySetState(pVM, "vmR3LiveDoStep1Cleanup", 8, + VMSTATE_OFF, VMSTATE_OFF_LS, /* 1 */ + VMSTATE_FATAL_ERROR, VMSTATE_FATAL_ERROR_LS, /* 2 */ + VMSTATE_GURU_MEDITATION, VMSTATE_GURU_MEDITATION_LS, /* 3 */ + VMSTATE_SUSPENDED, VMSTATE_SUSPENDED_LS, /* 4 */ + VMSTATE_SUSPENDED, VMSTATE_SAVING, + VMSTATE_SUSPENDED, VMSTATE_SUSPENDED_EXT_LS, + VMSTATE_RUNNING, VMSTATE_RUNNING_LS, + VMSTATE_DEBUGGING, VMSTATE_DEBUGGING_LS); + if (rc == 1) + rc = VERR_SSM_LIVE_POWERED_OFF; + else if (rc == 2) + rc = VERR_SSM_LIVE_FATAL_ERROR; + else if (rc == 3) + rc = VERR_SSM_LIVE_GURU_MEDITATION; + else if (rc == 4) + { + *pfSuspended = true; + rc = VINF_SUCCESS; + } + else if (rc > 0) + rc = VINF_SUCCESS; + return rc; +} + + +/** + * EMT(0) worker for VMR3Save and VMR3Teleport that completes the live save. + * + * @returns VBox status code. + * @retval VINF_SSM_LIVE_SUSPENDED if VMR3Suspend was called. + * + * @param pVM The cross context VM structure. + * @param pSSM The handle of saved state operation. + * + * @thread EMT(0) + */ +static DECLCALLBACK(int) vmR3LiveDoStep2(PVM pVM, PSSMHANDLE pSSM) +{ + LogFlow(("vmR3LiveDoStep2: pVM=%p pSSM=%p\n", pVM, pSSM)); + VM_ASSERT_EMT0(pVM); + + /* + * Advance the state and mark if VMR3Suspend was called. + */ + int rc = VINF_SUCCESS; + VMSTATE enmVMState = VMR3GetState(pVM); + if (enmVMState == VMSTATE_SUSPENDED_LS) + vmR3SetState(pVM, VMSTATE_SAVING, VMSTATE_SUSPENDED_LS); + else + { + if (enmVMState != VMSTATE_SAVING) + vmR3SetState(pVM, VMSTATE_SAVING, VMSTATE_SUSPENDED_EXT_LS); + rc = VINF_SSM_LIVE_SUSPENDED; + } + + /* + * Finish up and release the handle. Careful with the status codes. + */ + int rc2 = SSMR3LiveDoStep2(pSSM); + if (rc == VINF_SUCCESS || (RT_FAILURE(rc2) && RT_SUCCESS(rc))) + rc = rc2; + + rc2 = SSMR3LiveDone(pSSM); + if (rc == VINF_SUCCESS || (RT_FAILURE(rc2) && RT_SUCCESS(rc))) + rc = rc2; + + /* + * Advance to the final state and return. + */ + vmR3SetState(pVM, VMSTATE_SUSPENDED, VMSTATE_SAVING); + Assert(rc > VINF_EM_LAST || rc < VINF_EM_FIRST); + return rc; +} + + +/** + * Worker for vmR3SaveTeleport that validates the state and calls SSMR3Save or + * SSMR3LiveSave. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param cMsMaxDowntime The maximum downtime given as milliseconds. + * @param pszFilename The name of the file. NULL if pStreamOps is used. + * @param pStreamOps The stream methods. NULL if pszFilename is used. + * @param pvStreamOpsUser The user argument to the stream methods. + * @param enmAfter What to do afterwards. + * @param pfnProgress Progress callback. Optional. + * @param pvProgressUser User argument for the progress callback. + * @param ppSSM Where to return the saved state handle in case of a + * live snapshot scenario. + * @param fSkipStateChanges Set if we're supposed to skip state changes (FTM delta case) + * + * @thread EMT + */ +static DECLCALLBACK(int) vmR3Save(PVM pVM, uint32_t cMsMaxDowntime, const char *pszFilename, PCSSMSTRMOPS pStreamOps, void *pvStreamOpsUser, + SSMAFTER enmAfter, PFNVMPROGRESS pfnProgress, void *pvProgressUser, PSSMHANDLE *ppSSM, + bool fSkipStateChanges) +{ + int rc = VINF_SUCCESS; + + LogFlow(("vmR3Save: pVM=%p cMsMaxDowntime=%u pszFilename=%p:{%s} pStreamOps=%p pvStreamOpsUser=%p enmAfter=%d pfnProgress=%p pvProgressUser=%p ppSSM=%p\n", + pVM, cMsMaxDowntime, pszFilename, pszFilename, pStreamOps, pvStreamOpsUser, enmAfter, pfnProgress, pvProgressUser, ppSSM)); + + /* + * Validate input. + */ + AssertPtrNull(pszFilename); + AssertPtrNull(pStreamOps); + AssertPtr(pVM); + Assert( enmAfter == SSMAFTER_DESTROY + || enmAfter == SSMAFTER_CONTINUE + || enmAfter == SSMAFTER_TELEPORT); + AssertPtr(ppSSM); + *ppSSM = NULL; + + /* + * Change the state and perform/start the saving. + */ + if (!fSkipStateChanges) + { + rc = vmR3TrySetState(pVM, "VMR3Save", 2, + VMSTATE_SAVING, VMSTATE_SUSPENDED, + VMSTATE_RUNNING_LS, VMSTATE_RUNNING); + } + else + { + Assert(enmAfter != SSMAFTER_TELEPORT); + rc = 1; + } + + if (rc == 1 && enmAfter != SSMAFTER_TELEPORT) + { + rc = SSMR3Save(pVM, pszFilename, pStreamOps, pvStreamOpsUser, enmAfter, pfnProgress, pvProgressUser); + if (!fSkipStateChanges) + vmR3SetState(pVM, VMSTATE_SUSPENDED, VMSTATE_SAVING); + } + else if (rc == 2 || enmAfter == SSMAFTER_TELEPORT) + { + Assert(!fSkipStateChanges); + if (enmAfter == SSMAFTER_TELEPORT) + pVM->vm.s.fTeleportedAndNotFullyResumedYet = true; + rc = SSMR3LiveSave(pVM, cMsMaxDowntime, pszFilename, pStreamOps, pvStreamOpsUser, + enmAfter, pfnProgress, pvProgressUser, ppSSM); + /* (We're not subject to cancellation just yet.) */ + } + else + Assert(RT_FAILURE(rc)); + return rc; +} + + +/** + * Common worker for VMR3Save and VMR3Teleport. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param cMsMaxDowntime The maximum downtime given as milliseconds. + * @param pszFilename The name of the file. NULL if pStreamOps is used. + * @param pStreamOps The stream methods. NULL if pszFilename is used. + * @param pvStreamOpsUser The user argument to the stream methods. + * @param enmAfter What to do afterwards. + * @param pfnProgress Progress callback. Optional. + * @param pvProgressUser User argument for the progress callback. + * @param pfSuspended Set if we suspended the VM. + * @param fSkipStateChanges Set if we're supposed to skip state changes (FTM delta case) + * + * @thread Non-EMT + */ +static int vmR3SaveTeleport(PVM pVM, uint32_t cMsMaxDowntime, + const char *pszFilename, PCSSMSTRMOPS pStreamOps, void *pvStreamOpsUser, + SSMAFTER enmAfter, PFNVMPROGRESS pfnProgress, void *pvProgressUser, bool *pfSuspended, + bool fSkipStateChanges) +{ + /* + * Request the operation in EMT(0). + */ + PSSMHANDLE pSSM; + int rc = VMR3ReqCallWait(pVM, 0 /*idDstCpu*/, + (PFNRT)vmR3Save, 10, pVM, cMsMaxDowntime, pszFilename, pStreamOps, pvStreamOpsUser, + enmAfter, pfnProgress, pvProgressUser, &pSSM, fSkipStateChanges); + if ( RT_SUCCESS(rc) + && pSSM) + { + Assert(!fSkipStateChanges); + + /* + * Live snapshot. + * + * The state handling here is kind of tricky, doing it on EMT(0) helps + * a bit. See the VMSTATE diagram for details. + */ + rc = SSMR3LiveDoStep1(pSSM); + if (RT_SUCCESS(rc)) + { + if (VMR3GetState(pVM) != VMSTATE_SAVING) + for (;;) + { + /* Try suspend the VM. */ + rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_DESCENDING | VMMEMTRENDEZVOUS_FLAGS_STOP_ON_ERROR, + vmR3LiveDoSuspend, pfSuspended); + if (rc != VERR_TRY_AGAIN) + break; + + /* Wait for the state to change. */ + RTThreadSleep(250); /** @todo Live Migration: fix this polling wait by some smart use of multiple release event semaphores.. */ + } + if (RT_SUCCESS(rc)) + rc = VMR3ReqCallWait(pVM, 0 /*idDstCpu*/, (PFNRT)vmR3LiveDoStep2, 2, pVM, pSSM); + else + { + int rc2 = VMR3ReqCallWait(pVM, 0 /*idDstCpu*/, (PFNRT)SSMR3LiveDone, 1, pSSM); + AssertMsg(rc2 == rc, ("%Rrc != %Rrc\n", rc2, rc)); NOREF(rc2); + } + } + else + { + int rc2 = VMR3ReqCallWait(pVM, 0 /*idDstCpu*/, (PFNRT)SSMR3LiveDone, 1, pSSM); + AssertMsg(rc2 == rc, ("%Rrc != %Rrc\n", rc2, rc)); + + rc2 = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, vmR3LiveDoStep1Cleanup, pfSuspended); + if (RT_FAILURE(rc2) && rc == VERR_SSM_CANCELLED) + rc = rc2; + } + } + + return rc; +} + + +/** + * Save current VM state. + * + * Can be used for both saving the state and creating snapshots. + * + * When called for a VM in the Running state, the saved state is created live + * and the VM is only suspended when the final part of the saving is preformed. + * The VM state will not be restored to Running in this case and it's up to the + * caller to call VMR3Resume if this is desirable. (The rational is that the + * caller probably wish to reconfigure the disks before resuming the VM.) + * + * @returns VBox status code. + * + * @param pUVM The VM which state should be saved. + * @param pszFilename The name of the save state file. + * @param fContinueAfterwards Whether continue execution afterwards or not. + * When in doubt, set this to true. + * @param pfnProgress Progress callback. Optional. + * @param pvUser User argument for the progress callback. + * @param pfSuspended Set if we suspended the VM. + * + * @thread Non-EMT. + * @vmstate Suspended or Running + * @vmstateto Saving+Suspended or + * RunningLS+SuspendingLS+SuspendedLS+Saving+Suspended. + */ +VMMR3DECL(int) VMR3Save(PUVM pUVM, const char *pszFilename, bool fContinueAfterwards, PFNVMPROGRESS pfnProgress, void *pvUser, + bool *pfSuspended) +{ + LogFlow(("VMR3Save: pUVM=%p pszFilename=%p:{%s} fContinueAfterwards=%RTbool pfnProgress=%p pvUser=%p pfSuspended=%p\n", + pUVM, pszFilename, pszFilename, fContinueAfterwards, pfnProgress, pvUser, pfSuspended)); + + /* + * Validate input. + */ + AssertPtr(pfSuspended); + *pfSuspended = false; + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_OTHER_THREAD(pVM); + AssertReturn(VALID_PTR(pszFilename), VERR_INVALID_POINTER); + AssertReturn(*pszFilename, VERR_INVALID_PARAMETER); + AssertPtrNullReturn(pfnProgress, VERR_INVALID_POINTER); + + /* + * Join paths with VMR3Teleport. + */ + SSMAFTER enmAfter = fContinueAfterwards ? SSMAFTER_CONTINUE : SSMAFTER_DESTROY; + int rc = vmR3SaveTeleport(pVM, 250 /*cMsMaxDowntime*/, + pszFilename, NULL /* pStreamOps */, NULL /* pvStreamOpsUser */, + enmAfter, pfnProgress, pvUser, pfSuspended, + false /* fSkipStateChanges */); + LogFlow(("VMR3Save: returns %Rrc (*pfSuspended=%RTbool)\n", rc, *pfSuspended)); + return rc; +} + +/** + * Save current VM state (used by FTM) + * + * + * @returns VBox status code. + * + * @param pUVM The user mode VM handle. + * @param pStreamOps The stream methods. + * @param pvStreamOpsUser The user argument to the stream methods. + * @param pfSuspended Set if we suspended the VM. + * @param fSkipStateChanges Set if we're supposed to skip state changes (FTM delta case) + * + * @thread Any + * @vmstate Suspended or Running + * @vmstateto Saving+Suspended or + * RunningLS+SuspendingLS+SuspendedLS+Saving+Suspended. + */ +VMMR3_INT_DECL(int) VMR3SaveFT(PUVM pUVM, PCSSMSTRMOPS pStreamOps, void *pvStreamOpsUser, bool *pfSuspended, bool fSkipStateChanges) +{ + LogFlow(("VMR3SaveFT: pUVM=%p pStreamOps=%p pvSteamOpsUser=%p pfSuspended=%p\n", + pUVM, pStreamOps, pvStreamOpsUser, pfSuspended)); + + /* + * Validate input. + */ + AssertPtr(pfSuspended); + *pfSuspended = false; + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertReturn(pStreamOps, VERR_INVALID_PARAMETER); + + /* + * Join paths with VMR3Teleport. + */ + int rc = vmR3SaveTeleport(pVM, 250 /*cMsMaxDowntime*/, + NULL, pStreamOps, pvStreamOpsUser, + SSMAFTER_CONTINUE, NULL, NULL, pfSuspended, + fSkipStateChanges); + LogFlow(("VMR3SaveFT: returns %Rrc (*pfSuspended=%RTbool)\n", rc, *pfSuspended)); + return rc; +} + + +/** + * Teleport the VM (aka live migration). + * + * @returns VBox status code. + * + * @param pUVM The VM which state should be saved. + * @param cMsMaxDowntime The maximum downtime given as milliseconds. + * @param pStreamOps The stream methods. + * @param pvStreamOpsUser The user argument to the stream methods. + * @param pfnProgress Progress callback. Optional. + * @param pvProgressUser User argument for the progress callback. + * @param pfSuspended Set if we suspended the VM. + * + * @thread Non-EMT. + * @vmstate Suspended or Running + * @vmstateto Saving+Suspended or + * RunningLS+SuspendingLS+SuspendedLS+Saving+Suspended. + */ +VMMR3DECL(int) VMR3Teleport(PUVM pUVM, uint32_t cMsMaxDowntime, PCSSMSTRMOPS pStreamOps, void *pvStreamOpsUser, + PFNVMPROGRESS pfnProgress, void *pvProgressUser, bool *pfSuspended) +{ + LogFlow(("VMR3Teleport: pUVM=%p cMsMaxDowntime=%u pStreamOps=%p pvStreamOps=%p pfnProgress=%p pvProgressUser=%p\n", + pUVM, cMsMaxDowntime, pStreamOps, pvStreamOpsUser, pfnProgress, pvProgressUser)); + + /* + * Validate input. + */ + AssertPtr(pfSuspended); + *pfSuspended = false; + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + VM_ASSERT_OTHER_THREAD(pVM); + AssertPtrReturn(pStreamOps, VERR_INVALID_POINTER); + AssertPtrNullReturn(pfnProgress, VERR_INVALID_POINTER); + + /* + * Join paths with VMR3Save. + */ + int rc = vmR3SaveTeleport(pVM, cMsMaxDowntime, + NULL /*pszFilename*/, pStreamOps, pvStreamOpsUser, + SSMAFTER_TELEPORT, pfnProgress, pvProgressUser, pfSuspended, + false /* fSkipStateChanges */); + LogFlow(("VMR3Teleport: returns %Rrc (*pfSuspended=%RTbool)\n", rc, *pfSuspended)); + return rc; +} + + + +/** + * EMT(0) worker for VMR3LoadFromFile and VMR3LoadFromStream. + * + * @returns VBox status code. + * + * @param pUVM Pointer to the VM. + * @param pszFilename The name of the file. NULL if pStreamOps is used. + * @param pStreamOps The stream methods. NULL if pszFilename is used. + * @param pvStreamOpsUser The user argument to the stream methods. + * @param pfnProgress Progress callback. Optional. + * @param pvProgressUser User argument for the progress callback. + * @param fTeleporting Indicates whether we're teleporting or not. + * @param fSkipStateChanges Set if we're supposed to skip state changes (FTM delta case) + * + * @thread EMT. + */ +static DECLCALLBACK(int) vmR3Load(PUVM pUVM, const char *pszFilename, PCSSMSTRMOPS pStreamOps, void *pvStreamOpsUser, + PFNVMPROGRESS pfnProgress, void *pvProgressUser, bool fTeleporting, + bool fSkipStateChanges) +{ + int rc = VINF_SUCCESS; + + LogFlow(("vmR3Load: pUVM=%p pszFilename=%p:{%s} pStreamOps=%p pvStreamOpsUser=%p pfnProgress=%p pvProgressUser=%p fTeleporting=%RTbool\n", + pUVM, pszFilename, pszFilename, pStreamOps, pvStreamOpsUser, pfnProgress, pvProgressUser, fTeleporting)); + + /* + * Validate input (paranoia). + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertPtrNull(pszFilename); + AssertPtrNull(pStreamOps); + AssertPtrNull(pfnProgress); + + if (!fSkipStateChanges) + { + /* + * Change the state and perform the load. + * + * Always perform a relocation round afterwards to make sure hypervisor + * selectors and such are correct. + */ + rc = vmR3TrySetState(pVM, "VMR3Load", 2, + VMSTATE_LOADING, VMSTATE_CREATED, + VMSTATE_LOADING, VMSTATE_SUSPENDED); + if (RT_FAILURE(rc)) + return rc; + } + pVM->vm.s.fTeleportedAndNotFullyResumedYet = fTeleporting; + + uint32_t cErrorsPriorToSave = VMR3GetErrorCount(pUVM); + rc = SSMR3Load(pVM, pszFilename, pStreamOps, pvStreamOpsUser, SSMAFTER_RESUME, pfnProgress, pvProgressUser); + if (RT_SUCCESS(rc)) + { + VMR3Relocate(pVM, 0 /*offDelta*/); + if (!fSkipStateChanges) + vmR3SetState(pVM, VMSTATE_SUSPENDED, VMSTATE_LOADING); + } + else + { + pVM->vm.s.fTeleportedAndNotFullyResumedYet = false; + if (!fSkipStateChanges) + vmR3SetState(pVM, VMSTATE_LOAD_FAILURE, VMSTATE_LOADING); + + if (cErrorsPriorToSave == VMR3GetErrorCount(pUVM)) + rc = VMSetError(pVM, rc, RT_SRC_POS, + N_("Unable to restore the virtual machine's saved state from '%s'. " + "It may be damaged or from an older version of VirtualBox. " + "Please discard the saved state before starting the virtual machine"), + pszFilename); + } + + return rc; +} + + +/** + * Loads a VM state into a newly created VM or a one that is suspended. + * + * To restore a saved state on VM startup, call this function and then resume + * the VM instead of powering it on. + * + * @returns VBox status code. + * + * @param pUVM The user mode VM structure. + * @param pszFilename The name of the save state file. + * @param pfnProgress Progress callback. Optional. + * @param pvUser User argument for the progress callback. + * + * @thread Any thread. + * @vmstate Created, Suspended + * @vmstateto Loading+Suspended + */ +VMMR3DECL(int) VMR3LoadFromFile(PUVM pUVM, const char *pszFilename, PFNVMPROGRESS pfnProgress, void *pvUser) +{ + LogFlow(("VMR3LoadFromFile: pUVM=%p pszFilename=%p:{%s} pfnProgress=%p pvUser=%p\n", + pUVM, pszFilename, pszFilename, pfnProgress, pvUser)); + + /* + * Validate input. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertPtrReturn(pszFilename, VERR_INVALID_POINTER); + + /* + * Forward the request to EMT(0). No need to setup a rendezvous here + * since there is no execution taking place when this call is allowed. + */ + int rc = VMR3ReqCallWaitU(pUVM, 0 /*idDstCpu*/, (PFNRT)vmR3Load, 8, + pUVM, pszFilename, (uintptr_t)NULL /*pStreamOps*/, (uintptr_t)NULL /*pvStreamOpsUser*/, pfnProgress, pvUser, + false /*fTeleporting*/, false /* fSkipStateChanges */); + LogFlow(("VMR3LoadFromFile: returns %Rrc\n", rc)); + return rc; +} + + +/** + * VMR3LoadFromFile for arbitrary file streams. + * + * @returns VBox status code. + * + * @param pUVM Pointer to the VM. + * @param pStreamOps The stream methods. + * @param pvStreamOpsUser The user argument to the stream methods. + * @param pfnProgress Progress callback. Optional. + * @param pvProgressUser User argument for the progress callback. + * + * @thread Any thread. + * @vmstate Created, Suspended + * @vmstateto Loading+Suspended + */ +VMMR3DECL(int) VMR3LoadFromStream(PUVM pUVM, PCSSMSTRMOPS pStreamOps, void *pvStreamOpsUser, + PFNVMPROGRESS pfnProgress, void *pvProgressUser) +{ + LogFlow(("VMR3LoadFromStream: pUVM=%p pStreamOps=%p pvStreamOpsUser=%p pfnProgress=%p pvProgressUser=%p\n", + pUVM, pStreamOps, pvStreamOpsUser, pfnProgress, pvProgressUser)); + + /* + * Validate input. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertPtrReturn(pStreamOps, VERR_INVALID_POINTER); + + /* + * Forward the request to EMT(0). No need to setup a rendezvous here + * since there is no execution taking place when this call is allowed. + */ + int rc = VMR3ReqCallWaitU(pUVM, 0 /*idDstCpu*/, (PFNRT)vmR3Load, 8, + pUVM, (uintptr_t)NULL /*pszFilename*/, pStreamOps, pvStreamOpsUser, pfnProgress, pvProgressUser, + true /*fTeleporting*/, false /* fSkipStateChanges */); + LogFlow(("VMR3LoadFromStream: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Special version for the FT component, it skips state changes. + * + * @returns VBox status code. + * + * @param pUVM The VM handle. + * @param pStreamOps The stream methods. + * @param pvStreamOpsUser The user argument to the stream methods. + * + * @thread Any thread. + * @vmstate Created, Suspended + * @vmstateto Loading+Suspended + */ +VMMR3_INT_DECL(int) VMR3LoadFromStreamFT(PUVM pUVM, PCSSMSTRMOPS pStreamOps, void *pvStreamOpsUser) +{ + LogFlow(("VMR3LoadFromStreamFT: pUVM=%p pStreamOps=%p pvStreamOpsUser=%p\n", pUVM, pStreamOps, pvStreamOpsUser)); + + /* + * Validate input. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertPtrReturn(pStreamOps, VERR_INVALID_POINTER); + + /* + * Forward the request to EMT(0). No need to setup a rendezvous here + * since there is no execution taking place when this call is allowed. + */ + int rc = VMR3ReqCallWaitU(pUVM, 0 /*idDstCpu*/, (PFNRT)vmR3Load, 8, + pUVM, (uintptr_t)NULL /*pszFilename*/, pStreamOps, pvStreamOpsUser, NULL, NULL, + true /*fTeleporting*/, true /* fSkipStateChanges */); + LogFlow(("VMR3LoadFromStream: returns %Rrc\n", rc)); + return rc; +} + +/** + * EMT rendezvous worker for VMR3PowerOff. + * + * @returns VERR_VM_INVALID_VM_STATE or VINF_EM_OFF. (This is a strict + * return code, see FNVMMEMTRENDEZVOUS.) + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * @param pvUser Ignored. + */ +static DECLCALLBACK(VBOXSTRICTRC) vmR3PowerOff(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + LogFlow(("vmR3PowerOff: pVM=%p pVCpu=%p/#%u\n", pVM, pVCpu, pVCpu->idCpu)); + Assert(!pvUser); NOREF(pvUser); + + /* + * The first EMT thru here will change the state to PoweringOff. + */ + if (pVCpu->idCpu == pVM->cCpus - 1) + { + int rc = vmR3TrySetState(pVM, "VMR3PowerOff", 11, + VMSTATE_POWERING_OFF, VMSTATE_RUNNING, /* 1 */ + VMSTATE_POWERING_OFF, VMSTATE_SUSPENDED, /* 2 */ + VMSTATE_POWERING_OFF, VMSTATE_DEBUGGING, /* 3 */ + VMSTATE_POWERING_OFF, VMSTATE_LOAD_FAILURE, /* 4 */ + VMSTATE_POWERING_OFF, VMSTATE_GURU_MEDITATION, /* 5 */ + VMSTATE_POWERING_OFF, VMSTATE_FATAL_ERROR, /* 6 */ + VMSTATE_POWERING_OFF, VMSTATE_CREATED, /* 7 */ /** @todo update the diagram! */ + VMSTATE_POWERING_OFF_LS, VMSTATE_RUNNING_LS, /* 8 */ + VMSTATE_POWERING_OFF_LS, VMSTATE_DEBUGGING_LS, /* 9 */ + VMSTATE_POWERING_OFF_LS, VMSTATE_GURU_MEDITATION_LS,/* 10 */ + VMSTATE_POWERING_OFF_LS, VMSTATE_FATAL_ERROR_LS); /* 11 */ + if (RT_FAILURE(rc)) + return rc; + if (rc >= 7) + SSMR3Cancel(pVM->pUVM); + } + + /* + * Check the state. + */ + VMSTATE enmVMState = VMR3GetState(pVM); + AssertMsgReturn( enmVMState == VMSTATE_POWERING_OFF + || enmVMState == VMSTATE_POWERING_OFF_LS, + ("%s\n", VMR3GetStateName(enmVMState)), + VERR_VM_INVALID_VM_STATE); + + /* + * EMT(0) does the actual power off work here *after* all the other EMTs + * have been thru and entered the STOPPED state. + */ + VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STOPPED); + if (pVCpu->idCpu == 0) + { + /* + * For debugging purposes, we will log a summary of the guest state at this point. + */ + if (enmVMState != VMSTATE_GURU_MEDITATION) + { + /** @todo make the state dumping at VMR3PowerOff optional. */ + bool fOldBuffered = RTLogRelSetBuffering(true /*fBuffered*/); + RTLogRelPrintf("****************** Guest state at power off for VCpu %u ******************\n", pVCpu->idCpu); + DBGFR3InfoEx(pVM->pUVM, pVCpu->idCpu, "cpumguest", "verbose", DBGFR3InfoLogRelHlp()); + RTLogRelPrintf("***\n"); + DBGFR3InfoEx(pVM->pUVM, pVCpu->idCpu, "cpumguesthwvirt", "verbose", DBGFR3InfoLogRelHlp()); + RTLogRelPrintf("***\n"); + DBGFR3InfoEx(pVM->pUVM, pVCpu->idCpu, "mode", NULL, DBGFR3InfoLogRelHlp()); + RTLogRelPrintf("***\n"); + DBGFR3Info(pVM->pUVM, "activetimers", NULL, DBGFR3InfoLogRelHlp()); + RTLogRelPrintf("***\n"); + DBGFR3Info(pVM->pUVM, "gdt", NULL, DBGFR3InfoLogRelHlp()); + /** @todo dump guest call stack. */ + RTLogRelSetBuffering(fOldBuffered); + RTLogRelPrintf("************** End of Guest state at power off ***************\n"); + } + + /* + * Perform the power off notifications and advance the state to + * Off or OffLS. + */ + PDMR3PowerOff(pVM); + DBGFR3PowerOff(pVM); + + PUVM pUVM = pVM->pUVM; + RTCritSectEnter(&pUVM->vm.s.AtStateCritSect); + enmVMState = pVM->enmVMState; + if (enmVMState == VMSTATE_POWERING_OFF_LS) + vmR3SetStateLocked(pVM, pUVM, VMSTATE_OFF_LS, VMSTATE_POWERING_OFF_LS, false /*fSetRatherThanClearFF*/); + else + vmR3SetStateLocked(pVM, pUVM, VMSTATE_OFF, VMSTATE_POWERING_OFF, false /*fSetRatherThanClearFF*/); + RTCritSectLeave(&pUVM->vm.s.AtStateCritSect); + } + else if (enmVMState != VMSTATE_GURU_MEDITATION) + { + /** @todo make the state dumping at VMR3PowerOff optional. */ + bool fOldBuffered = RTLogRelSetBuffering(true /*fBuffered*/); + RTLogRelPrintf("****************** Guest state at power off for VCpu %u ******************\n", pVCpu->idCpu); + DBGFR3InfoEx(pVM->pUVM, pVCpu->idCpu, "cpumguest", "verbose", DBGFR3InfoLogRelHlp()); + RTLogRelPrintf("***\n"); + DBGFR3InfoEx(pVM->pUVM, pVCpu->idCpu, "cpumguesthwvirt", "verbose", DBGFR3InfoLogRelHlp()); + RTLogRelPrintf("***\n"); + DBGFR3InfoEx(pVM->pUVM, pVCpu->idCpu, "mode", NULL, DBGFR3InfoLogRelHlp()); + RTLogRelPrintf("***\n"); + RTLogRelSetBuffering(fOldBuffered); + RTLogRelPrintf("************** End of Guest state at power off for VCpu %u ***************\n", pVCpu->idCpu); + } + + return VINF_EM_OFF; +} + + +/** + * Power off the VM. + * + * @returns VBox status code. When called on EMT, this will be a strict status + * code that has to be propagated up the call stack. + * + * @param pUVM The handle of the VM to be powered off. + * + * @thread Any thread. + * @vmstate Suspended, Running, Guru Meditation, Load Failure + * @vmstateto Off or OffLS + */ +VMMR3DECL(int) VMR3PowerOff(PUVM pUVM) +{ + LogFlow(("VMR3PowerOff: pUVM=%p\n", pUVM)); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + /* + * Gather all the EMTs to make sure there are no races before + * changing the VM state. + */ + int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_DESCENDING | VMMEMTRENDEZVOUS_FLAGS_STOP_ON_ERROR, + vmR3PowerOff, NULL); + LogFlow(("VMR3PowerOff: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Destroys the VM. + * + * The VM must be powered off (or never really powered on) to call this + * function. The VM handle is destroyed and can no longer be used up successful + * return. + * + * @returns VBox status code. + * + * @param pUVM The user mode VM handle. + * + * @thread Any none emulation thread. + * @vmstate Off, Created + * @vmstateto N/A + */ +VMMR3DECL(int) VMR3Destroy(PUVM pUVM) +{ + LogFlow(("VMR3Destroy: pUVM=%p\n", pUVM)); + + /* + * Validate input. + */ + if (!pUVM) + return VERR_INVALID_VM_HANDLE; + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertLogRelReturn(!VM_IS_EMT(pVM), VERR_VM_THREAD_IS_EMT); + + /* + * Change VM state to destroying and aall vmR3Destroy on each of the EMTs + * ending with EMT(0) doing the bulk of the cleanup. + */ + int rc = vmR3TrySetState(pVM, "VMR3Destroy", 1, VMSTATE_DESTROYING, VMSTATE_OFF); + if (RT_FAILURE(rc)) + return rc; + + rc = VMR3ReqCallWait(pVM, VMCPUID_ALL_REVERSE, (PFNRT)vmR3Destroy, 1, pVM); + AssertLogRelRC(rc); + + /* + * Wait for EMTs to quit and destroy the UVM. + */ + vmR3DestroyUVM(pUVM, 30000); + + LogFlow(("VMR3Destroy: returns VINF_SUCCESS\n")); + return VINF_SUCCESS; +} + + +/** + * Internal destruction worker. + * + * This is either called from VMR3Destroy via VMR3ReqCallU or from + * vmR3EmulationThreadWithId when EMT(0) terminates after having called + * VMR3Destroy(). + * + * When called on EMT(0), it will performed the great bulk of the destruction. + * When called on the other EMTs, they will do nothing and the whole purpose is + * to return VINF_EM_TERMINATE so they break out of their run loops. + * + * @returns VINF_EM_TERMINATE. + * @param pVM The cross context VM structure. + */ +DECLCALLBACK(int) vmR3Destroy(PVM pVM) +{ + PUVM pUVM = pVM->pUVM; + PVMCPU pVCpu = VMMGetCpu(pVM); + Assert(pVCpu); + LogFlow(("vmR3Destroy: pVM=%p pUVM=%p pVCpu=%p idCpu=%u\n", pVM, pUVM, pVCpu, pVCpu->idCpu)); + + /* + * Only VCPU 0 does the full cleanup (last). + */ + if (pVCpu->idCpu == 0) + { + /* + * Dump statistics to the log. + */ +#if defined(VBOX_WITH_STATISTICS) || defined(LOG_ENABLED) + RTLogFlags(NULL, "nodisabled nobuffered"); +#endif +//#ifdef VBOX_WITH_STATISTICS +// STAMR3Dump(pUVM, "*"); +//#else + LogRel(("************************* Statistics *************************\n")); + STAMR3DumpToReleaseLog(pUVM, "*"); + LogRel(("********************* End of statistics **********************\n")); +//#endif + + /* + * Destroy the VM components. + */ + int rc = TMR3Term(pVM); + AssertRC(rc); +#ifdef VBOX_WITH_DEBUGGER + rc = DBGCTcpTerminate(pUVM, pUVM->vm.s.pvDBGC); + pUVM->vm.s.pvDBGC = NULL; +#endif + AssertRC(rc); + rc = FTMR3Term(pVM); + AssertRC(rc); + rc = PDMR3Term(pVM); + AssertRC(rc); + rc = GIMR3Term(pVM); + AssertRC(rc); + rc = DBGFR3Term(pVM); + AssertRC(rc); + rc = IEMR3Term(pVM); + AssertRC(rc); + rc = EMR3Term(pVM); + AssertRC(rc); + rc = IOMR3Term(pVM); + AssertRC(rc); +#ifdef VBOX_WITH_RAW_MODE + rc = CSAMR3Term(pVM); + AssertRC(rc); + rc = PATMR3Term(pVM); + AssertRC(rc); +#endif + rc = TRPMR3Term(pVM); + AssertRC(rc); + rc = SELMR3Term(pVM); + AssertRC(rc); +#ifdef VBOX_WITH_REM + rc = REMR3Term(pVM); + AssertRC(rc); +#endif + rc = HMR3Term(pVM); + AssertRC(rc); + rc = NEMR3Term(pVM); + AssertRC(rc); + rc = PGMR3Term(pVM); + AssertRC(rc); + rc = VMMR3Term(pVM); /* Terminates the ring-0 code! */ + AssertRC(rc); + rc = CPUMR3Term(pVM); + AssertRC(rc); + SSMR3Term(pVM); + rc = PDMR3CritSectBothTerm(pVM); + AssertRC(rc); + rc = MMR3Term(pVM); + AssertRC(rc); + + /* + * We're done, tell the other EMTs to quit. + */ + ASMAtomicUoWriteBool(&pUVM->vm.s.fTerminateEMT, true); + ASMAtomicWriteU32(&pVM->fGlobalForcedActions, VM_FF_CHECK_VM_STATE); /* Can't hurt... */ + LogFlow(("vmR3Destroy: returning %Rrc\n", VINF_EM_TERMINATE)); + } + + /* + * Decrement the active EMT count here. + */ + PUVMCPU pUVCpu = &pUVM->aCpus[pVCpu->idCpu]; + if (!pUVCpu->vm.s.fBeenThruVmDestroy) + { + pUVCpu->vm.s.fBeenThruVmDestroy = true; + ASMAtomicDecU32(&pUVM->vm.s.cActiveEmts); + } + else + AssertFailed(); + + return VINF_EM_TERMINATE; +} + + +/** + * Destroys the UVM portion. + * + * This is called as the final step in the VM destruction or as the cleanup + * in case of a creation failure. + * + * @param pUVM The user mode VM structure. + * @param cMilliesEMTWait The number of milliseconds to wait for the emulation + * threads. + */ +static void vmR3DestroyUVM(PUVM pUVM, uint32_t cMilliesEMTWait) +{ + /* + * Signal termination of each the emulation threads and + * wait for them to complete. + */ + /* Signal them - in reverse order since EMT(0) waits for the others. */ + ASMAtomicUoWriteBool(&pUVM->vm.s.fTerminateEMT, true); + if (pUVM->pVM) + VM_FF_SET(pUVM->pVM, VM_FF_CHECK_VM_STATE); /* Can't hurt... */ + VMCPUID iCpu = pUVM->cCpus; + while (iCpu-- > 0) + { + VMR3NotifyGlobalFFU(pUVM, VMNOTIFYFF_FLAGS_DONE_REM); + RTSemEventSignal(pUVM->aCpus[iCpu].vm.s.EventSemWait); + } + + /* Wait for EMT(0), it in turn waits for the rest. */ + ASMAtomicUoWriteBool(&pUVM->vm.s.fTerminateEMT, true); + + RTTHREAD const hSelf = RTThreadSelf(); + RTTHREAD hThread = pUVM->aCpus[0].vm.s.ThreadEMT; + if ( hThread != NIL_RTTHREAD + && hThread != hSelf) + { + int rc2 = RTThreadWait(hThread, RT_MAX(cMilliesEMTWait, 2000), NULL); + if (rc2 == VERR_TIMEOUT) /* avoid the assertion when debugging. */ + rc2 = RTThreadWait(hThread, 1000, NULL); + AssertLogRelMsgRC(rc2, ("iCpu=0 rc=%Rrc\n", rc2)); + if (RT_SUCCESS(rc2)) + pUVM->aCpus[0].vm.s.ThreadEMT = NIL_RTTHREAD; + } + + /* Just in case we're in a weird failure situation w/o EMT(0) to do the + waiting, wait the other EMTs too. */ + for (iCpu = 1; iCpu < pUVM->cCpus; iCpu++) + { + ASMAtomicXchgHandle(&pUVM->aCpus[iCpu].vm.s.ThreadEMT, NIL_RTTHREAD, &hThread); + if (hThread != NIL_RTTHREAD) + { + if (hThread != hSelf) + { + int rc2 = RTThreadWait(hThread, 250 /*ms*/, NULL); + AssertLogRelMsgRC(rc2, ("iCpu=%u rc=%Rrc\n", iCpu, rc2)); + if (RT_SUCCESS(rc2)) + continue; + } + pUVM->aCpus[iCpu].vm.s.ThreadEMT = hThread; + } + } + + /* Cleanup the semaphores. */ + iCpu = pUVM->cCpus; + while (iCpu-- > 0) + { + RTSemEventDestroy(pUVM->aCpus[iCpu].vm.s.EventSemWait); + pUVM->aCpus[iCpu].vm.s.EventSemWait = NIL_RTSEMEVENT; + } + + /* + * Free the event semaphores associated with the request packets. + */ + unsigned cReqs = 0; + for (unsigned i = 0; i < RT_ELEMENTS(pUVM->vm.s.apReqFree); i++) + { + PVMREQ pReq = pUVM->vm.s.apReqFree[i]; + pUVM->vm.s.apReqFree[i] = NULL; + for (; pReq; pReq = pReq->pNext, cReqs++) + { + pReq->enmState = VMREQSTATE_INVALID; + RTSemEventDestroy(pReq->EventSem); + } + } + Assert(cReqs == pUVM->vm.s.cReqFree); NOREF(cReqs); + + /* + * Kill all queued requests. (There really shouldn't be any!) + */ + for (unsigned i = 0; i < 10; i++) + { + PVMREQ pReqHead = ASMAtomicXchgPtrT(&pUVM->vm.s.pPriorityReqs, NULL, PVMREQ); + if (!pReqHead) + { + pReqHead = ASMAtomicXchgPtrT(&pUVM->vm.s.pNormalReqs, NULL, PVMREQ); + if (!pReqHead) + break; + } + AssertLogRelMsgFailed(("Requests pending! VMR3Destroy caller has to serialize this.\n")); + + for (PVMREQ pReq = pReqHead; pReq; pReq = pReq->pNext) + { + ASMAtomicUoWriteS32(&pReq->iStatus, VERR_VM_REQUEST_KILLED); + ASMAtomicWriteSize(&pReq->enmState, VMREQSTATE_INVALID); + RTSemEventSignal(pReq->EventSem); + RTThreadSleep(2); + RTSemEventDestroy(pReq->EventSem); + } + /* give them a chance to respond before we free the request memory. */ + RTThreadSleep(32); + } + + /* + * Now all queued VCPU requests (again, there shouldn't be any). + */ + for (VMCPUID idCpu = 0; idCpu < pUVM->cCpus; idCpu++) + { + PUVMCPU pUVCpu = &pUVM->aCpus[idCpu]; + + for (unsigned i = 0; i < 10; i++) + { + PVMREQ pReqHead = ASMAtomicXchgPtrT(&pUVCpu->vm.s.pPriorityReqs, NULL, PVMREQ); + if (!pReqHead) + { + pReqHead = ASMAtomicXchgPtrT(&pUVCpu->vm.s.pNormalReqs, NULL, PVMREQ); + if (!pReqHead) + break; + } + AssertLogRelMsgFailed(("Requests pending! VMR3Destroy caller has to serialize this.\n")); + + for (PVMREQ pReq = pReqHead; pReq; pReq = pReq->pNext) + { + ASMAtomicUoWriteS32(&pReq->iStatus, VERR_VM_REQUEST_KILLED); + ASMAtomicWriteSize(&pReq->enmState, VMREQSTATE_INVALID); + RTSemEventSignal(pReq->EventSem); + RTThreadSleep(2); + RTSemEventDestroy(pReq->EventSem); + } + /* give them a chance to respond before we free the request memory. */ + RTThreadSleep(32); + } + } + + /* + * Make sure the VMMR0.r0 module and whatever else is unloaded. + */ + PDMR3TermUVM(pUVM); + + RTCritSectDelete(&pUVM->vm.s.AtErrorCritSect); + RTCritSectDelete(&pUVM->vm.s.AtStateCritSect); + + /* + * Terminate the support library if initialized. + */ + if (pUVM->vm.s.pSession) + { + int rc = SUPR3Term(false /*fForced*/); + AssertRC(rc); + pUVM->vm.s.pSession = NIL_RTR0PTR; + } + + /* + * Release the UVM structure reference. + */ + VMR3ReleaseUVM(pUVM); + + /* + * Clean up and flush logs. + */ + RTLogFlush(NULL); +} + + +/** + * Worker which checks integrity of some internal structures. + * This is yet another attempt to track down that AVL tree crash. + */ +static void vmR3CheckIntegrity(PVM pVM) +{ +#ifdef VBOX_STRICT + int rc = PGMR3CheckIntegrity(pVM); + AssertReleaseRC(rc); +#else + RT_NOREF_PV(pVM); +#endif +} + + +/** + * EMT rendezvous worker for VMR3ResetFF for doing soft/warm reset. + * + * @returns VERR_VM_INVALID_VM_STATE, VINF_EM_RESCHEDULE. + * (This is a strict return code, see FNVMMEMTRENDEZVOUS.) + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * @param pvUser The reset flags. + */ +static DECLCALLBACK(VBOXSTRICTRC) vmR3SoftReset(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + uint32_t fResetFlags = *(uint32_t *)pvUser; + + + /* + * The first EMT will try change the state to resetting. If this fails, + * we won't get called for the other EMTs. + */ + if (pVCpu->idCpu == pVM->cCpus - 1) + { + int rc = vmR3TrySetState(pVM, "vmR3ResetSoft", 3, + VMSTATE_SOFT_RESETTING, VMSTATE_RUNNING, + VMSTATE_SOFT_RESETTING, VMSTATE_SUSPENDED, + VMSTATE_SOFT_RESETTING_LS, VMSTATE_RUNNING_LS); + if (RT_FAILURE(rc)) + return rc; + pVM->vm.s.cResets++; + pVM->vm.s.cSoftResets++; + } + + /* + * Check the state. + */ + VMSTATE enmVMState = VMR3GetState(pVM); + AssertLogRelMsgReturn( enmVMState == VMSTATE_SOFT_RESETTING + || enmVMState == VMSTATE_SOFT_RESETTING_LS, + ("%s\n", VMR3GetStateName(enmVMState)), + VERR_VM_UNEXPECTED_UNSTABLE_STATE); + + /* + * EMT(0) does the full cleanup *after* all the other EMTs has been + * thru here and been told to enter the EMSTATE_WAIT_SIPI state. + * + * Because there are per-cpu reset routines and order may/is important, + * the following sequence looks a bit ugly... + */ + + /* Reset the VCpu state. */ + VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED); + + /* + * Soft reset the VM components. + */ + if (pVCpu->idCpu == 0) + { +#ifdef VBOX_WITH_REM + REMR3Reset(pVM); +#endif + PDMR3SoftReset(pVM, fResetFlags); + TRPMR3Reset(pVM); + CPUMR3Reset(pVM); /* This must come *after* PDM (due to APIC base MSR caching). */ + EMR3Reset(pVM); + HMR3Reset(pVM); /* This must come *after* PATM, CSAM, CPUM, SELM and TRPM. */ + NEMR3Reset(pVM); + + /* + * Since EMT(0) is the last to go thru here, it will advance the state. + * (Unlike vmR3HardReset we won't be doing any suspending of live + * migration VMs here since memory is unchanged.) + */ + PUVM pUVM = pVM->pUVM; + RTCritSectEnter(&pUVM->vm.s.AtStateCritSect); + enmVMState = pVM->enmVMState; + if (enmVMState == VMSTATE_SOFT_RESETTING) + { + if (pUVM->vm.s.enmPrevVMState == VMSTATE_SUSPENDED) + vmR3SetStateLocked(pVM, pUVM, VMSTATE_SUSPENDED, VMSTATE_SOFT_RESETTING, false /*fSetRatherThanClearFF*/); + else + vmR3SetStateLocked(pVM, pUVM, VMSTATE_RUNNING, VMSTATE_SOFT_RESETTING, false /*fSetRatherThanClearFF*/); + } + else + vmR3SetStateLocked(pVM, pUVM, VMSTATE_RUNNING_LS, VMSTATE_SOFT_RESETTING_LS, false /*fSetRatherThanClearFF*/); + RTCritSectLeave(&pUVM->vm.s.AtStateCritSect); + } + + return VINF_EM_RESCHEDULE; +} + + +/** + * EMT rendezvous worker for VMR3Reset and VMR3ResetFF. + * + * This is called by the emulation threads as a response to the reset request + * issued by VMR3Reset(). + * + * @returns VERR_VM_INVALID_VM_STATE, VINF_EM_RESET or VINF_EM_SUSPEND. (This + * is a strict return code, see FNVMMEMTRENDEZVOUS.) + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * @param pvUser Ignored. + */ +static DECLCALLBACK(VBOXSTRICTRC) vmR3HardReset(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + Assert(!pvUser); NOREF(pvUser); + + /* + * The first EMT will try change the state to resetting. If this fails, + * we won't get called for the other EMTs. + */ + if (pVCpu->idCpu == pVM->cCpus - 1) + { + int rc = vmR3TrySetState(pVM, "vmR3HardReset", 3, + VMSTATE_RESETTING, VMSTATE_RUNNING, + VMSTATE_RESETTING, VMSTATE_SUSPENDED, + VMSTATE_RESETTING_LS, VMSTATE_RUNNING_LS); + if (RT_FAILURE(rc)) + return rc; + pVM->vm.s.cResets++; + pVM->vm.s.cHardResets++; + } + + /* + * Check the state. + */ + VMSTATE enmVMState = VMR3GetState(pVM); + AssertLogRelMsgReturn( enmVMState == VMSTATE_RESETTING + || enmVMState == VMSTATE_RESETTING_LS, + ("%s\n", VMR3GetStateName(enmVMState)), + VERR_VM_UNEXPECTED_UNSTABLE_STATE); + + /* + * EMT(0) does the full cleanup *after* all the other EMTs has been + * thru here and been told to enter the EMSTATE_WAIT_SIPI state. + * + * Because there are per-cpu reset routines and order may/is important, + * the following sequence looks a bit ugly... + */ + if (pVCpu->idCpu == 0) + vmR3CheckIntegrity(pVM); + + /* Reset the VCpu state. */ + VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED); + + /* Clear all pending forced actions. */ + VMCPU_FF_CLEAR_MASK(pVCpu, VMCPU_FF_ALL_MASK & ~VMCPU_FF_REQUEST); + + /* + * Reset the VM components. + */ + if (pVCpu->idCpu == 0) + { +#ifdef VBOX_WITH_RAW_MODE + PATMR3Reset(pVM); + CSAMR3Reset(pVM); +#endif + GIMR3Reset(pVM); /* This must come *before* PDM and TM. */ + PDMR3Reset(pVM); + PGMR3Reset(pVM); + SELMR3Reset(pVM); + TRPMR3Reset(pVM); +#ifdef VBOX_WITH_REM + REMR3Reset(pVM); +#endif + IOMR3Reset(pVM); + CPUMR3Reset(pVM); /* This must come *after* PDM (due to APIC base MSR caching). */ + TMR3Reset(pVM); + EMR3Reset(pVM); + HMR3Reset(pVM); /* This must come *after* PATM, CSAM, CPUM, SELM and TRPM. */ + NEMR3Reset(pVM); + + /* + * Do memory setup. + */ + PGMR3MemSetup(pVM, true /*fAtReset*/); + PDMR3MemSetup(pVM, true /*fAtReset*/); + + /* + * Since EMT(0) is the last to go thru here, it will advance the state. + * When a live save is active, we will move on to SuspendingLS but + * leave it for VMR3Reset to do the actual suspending due to deadlock risks. + */ + PUVM pUVM = pVM->pUVM; + RTCritSectEnter(&pUVM->vm.s.AtStateCritSect); + enmVMState = pVM->enmVMState; + if (enmVMState == VMSTATE_RESETTING) + { + if (pUVM->vm.s.enmPrevVMState == VMSTATE_SUSPENDED) + vmR3SetStateLocked(pVM, pUVM, VMSTATE_SUSPENDED, VMSTATE_RESETTING, false /*fSetRatherThanClearFF*/); + else + vmR3SetStateLocked(pVM, pUVM, VMSTATE_RUNNING, VMSTATE_RESETTING, false /*fSetRatherThanClearFF*/); + } + else + vmR3SetStateLocked(pVM, pUVM, VMSTATE_SUSPENDING_LS, VMSTATE_RESETTING_LS, false /*fSetRatherThanClearFF*/); + RTCritSectLeave(&pUVM->vm.s.AtStateCritSect); + + vmR3CheckIntegrity(pVM); + + /* + * Do the suspend bit as well. + * It only requires some EMT(0) work at present. + */ + if (enmVMState != VMSTATE_RESETTING) + { + vmR3SuspendDoWork(pVM); + vmR3SetState(pVM, VMSTATE_SUSPENDED_LS, VMSTATE_SUSPENDING_LS); + } + } + + return enmVMState == VMSTATE_RESETTING + ? VINF_EM_RESET + : VINF_EM_SUSPEND; /** @todo VINF_EM_SUSPEND has lower priority than VINF_EM_RESET, so fix races. Perhaps add a new code for this combined case. */ +} + + +/** + * Internal worker for VMR3Reset, VMR3ResetFF, VMR3TripleFault. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param fHardReset Whether it's a hard reset or not. + * @param fResetFlags The reset flags (PDMVMRESET_F_XXX). + */ +static VBOXSTRICTRC vmR3ResetCommon(PVM pVM, bool fHardReset, uint32_t fResetFlags) +{ + LogFlow(("vmR3ResetCommon: fHardReset=%RTbool fResetFlags=%#x\n", fHardReset, fResetFlags)); + int rc; + if (fHardReset) + { + /* + * Hard reset. + */ + /* Check whether we're supposed to power off instead of resetting. */ + if (pVM->vm.s.fPowerOffInsteadOfReset) + { + PUVM pUVM = pVM->pUVM; + if ( pUVM->pVmm2UserMethods + && pUVM->pVmm2UserMethods->pfnNotifyResetTurnedIntoPowerOff) + pUVM->pVmm2UserMethods->pfnNotifyResetTurnedIntoPowerOff(pUVM->pVmm2UserMethods, pUVM); + return VMR3PowerOff(pUVM); + } + + /* Gather all the EMTs to make sure there are no races before changing + the VM state. */ + rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_DESCENDING | VMMEMTRENDEZVOUS_FLAGS_STOP_ON_ERROR, + vmR3HardReset, NULL); + } + else + { + /* + * Soft reset. Since we only support this with a single CPU active, + * we must be on EMT #0 here. + */ + VM_ASSERT_EMT0(pVM); + rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_DESCENDING | VMMEMTRENDEZVOUS_FLAGS_STOP_ON_ERROR, + vmR3SoftReset, &fResetFlags); + } + + LogFlow(("vmR3ResetCommon: returns %Rrc\n", rc)); + return rc; +} + + + +/** + * Reset the current VM. + * + * @returns VBox status code. + * @param pUVM The VM to reset. + */ +VMMR3DECL(int) VMR3Reset(PUVM pUVM) +{ + LogFlow(("VMR3Reset:\n")); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + return VBOXSTRICTRC_VAL(vmR3ResetCommon(pVM, true, 0)); +} + + +/** + * Handle the reset force flag or triple fault. + * + * This handles both soft and hard resets (see PDMVMRESET_F_XXX). + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @thread EMT + * + * @remarks Caller is expected to clear the VM_FF_RESET force flag. + */ +VMMR3_INT_DECL(VBOXSTRICTRC) VMR3ResetFF(PVM pVM) +{ + LogFlow(("VMR3ResetFF:\n")); + + /* + * First consult the firmware on whether this is a hard or soft reset. + */ + uint32_t fResetFlags; + bool fHardReset = PDMR3GetResetInfo(pVM, 0 /*fOverride*/, &fResetFlags); + return vmR3ResetCommon(pVM, fHardReset, fResetFlags); +} + + +/** + * For handling a CPU reset on triple fault. + * + * According to one mainboard manual, a CPU triple fault causes the 286 CPU to + * send a SHUTDOWN signal to the chipset. The chipset responds by sending a + * RESET signal to the CPU. So, it should be very similar to a soft/warm reset. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @thread EMT + */ +VMMR3_INT_DECL(VBOXSTRICTRC) VMR3ResetTripleFault(PVM pVM) +{ + LogFlow(("VMR3ResetTripleFault:\n")); + + /* + * First consult the firmware on whether this is a hard or soft reset. + */ + uint32_t fResetFlags; + bool fHardReset = PDMR3GetResetInfo(pVM, PDMVMRESET_F_TRIPLE_FAULT, &fResetFlags); + return vmR3ResetCommon(pVM, fHardReset, fResetFlags); +} + + +/** + * Gets the user mode VM structure pointer given Pointer to the VM. + * + * @returns Pointer to the user mode VM structure on success. NULL if @a pVM is + * invalid (asserted). + * @param pVM The cross context VM structure. + * @sa VMR3GetVM, VMR3RetainUVM + */ +VMMR3DECL(PUVM) VMR3GetUVM(PVM pVM) +{ + VM_ASSERT_VALID_EXT_RETURN(pVM, NULL); + return pVM->pUVM; +} + + +/** + * Gets the shared VM structure pointer given the pointer to the user mode VM + * structure. + * + * @returns Pointer to the VM. + * NULL if @a pUVM is invalid (asserted) or if no shared VM structure + * is currently associated with it. + * @param pUVM The user mode VM handle. + * @sa VMR3GetUVM + */ +VMMR3DECL(PVM) VMR3GetVM(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, NULL); + return pUVM->pVM; +} + + +/** + * Retain the user mode VM handle. + * + * @returns Reference count. + * UINT32_MAX if @a pUVM is invalid. + * + * @param pUVM The user mode VM handle. + * @sa VMR3ReleaseUVM + */ +VMMR3DECL(uint32_t) VMR3RetainUVM(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, UINT32_MAX); + uint32_t cRefs = ASMAtomicIncU32(&pUVM->vm.s.cUvmRefs); + AssertMsg(cRefs > 0 && cRefs < _64K, ("%u\n", cRefs)); + return cRefs; +} + + +/** + * Does the final release of the UVM structure. + * + * @param pUVM The user mode VM handle. + */ +static void vmR3DoReleaseUVM(PUVM pUVM) +{ + /* + * Free the UVM. + */ + Assert(!pUVM->pVM); + + MMR3HeapFree(pUVM->vm.s.pszName); + pUVM->vm.s.pszName = NULL; + + MMR3TermUVM(pUVM); + STAMR3TermUVM(pUVM); + + ASMAtomicUoWriteU32(&pUVM->u32Magic, UINT32_MAX); + RTTlsFree(pUVM->vm.s.idxTLS); + RTMemPageFree(pUVM, RT_UOFFSETOF_DYN(UVM, aCpus[pUVM->cCpus])); +} + + +/** + * Releases a refernece to the mode VM handle. + * + * @returns The new reference count, 0 if destroyed. + * UINT32_MAX if @a pUVM is invalid. + * + * @param pUVM The user mode VM handle. + * @sa VMR3RetainUVM + */ +VMMR3DECL(uint32_t) VMR3ReleaseUVM(PUVM pUVM) +{ + if (!pUVM) + return 0; + UVM_ASSERT_VALID_EXT_RETURN(pUVM, UINT32_MAX); + uint32_t cRefs = ASMAtomicDecU32(&pUVM->vm.s.cUvmRefs); + if (!cRefs) + vmR3DoReleaseUVM(pUVM); + else + AssertMsg(cRefs < _64K, ("%u\n", cRefs)); + return cRefs; +} + + +/** + * Gets the VM name. + * + * @returns Pointer to a read-only string containing the name. NULL if called + * too early. + * @param pUVM The user mode VM handle. + */ +VMMR3DECL(const char *) VMR3GetName(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, NULL); + return pUVM->vm.s.pszName; +} + + +/** + * Gets the VM UUID. + * + * @returns pUuid on success, NULL on failure. + * @param pUVM The user mode VM handle. + * @param pUuid Where to store the UUID. + */ +VMMR3DECL(PRTUUID) VMR3GetUuid(PUVM pUVM, PRTUUID pUuid) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, NULL); + AssertPtrReturn(pUuid, NULL); + + *pUuid = pUVM->vm.s.Uuid; + return pUuid; +} + + +/** + * Gets the current VM state. + * + * @returns The current VM state. + * @param pVM The cross context VM structure. + * @thread Any + */ +VMMR3DECL(VMSTATE) VMR3GetState(PVM pVM) +{ + AssertMsgReturn(RT_VALID_ALIGNED_PTR(pVM, PAGE_SIZE), ("%p\n", pVM), VMSTATE_TERMINATED); + VMSTATE enmVMState = pVM->enmVMState; + return enmVMState >= VMSTATE_CREATING && enmVMState <= VMSTATE_TERMINATED ? enmVMState : VMSTATE_TERMINATED; +} + + +/** + * Gets the current VM state. + * + * @returns The current VM state. + * @param pUVM The user-mode VM handle. + * @thread Any + */ +VMMR3DECL(VMSTATE) VMR3GetStateU(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VMSTATE_TERMINATED); + if (RT_UNLIKELY(!pUVM->pVM)) + return VMSTATE_TERMINATED; + return pUVM->pVM->enmVMState; +} + + +/** + * Gets the state name string for a VM state. + * + * @returns Pointer to the state name. (readonly) + * @param enmState The state. + */ +VMMR3DECL(const char *) VMR3GetStateName(VMSTATE enmState) +{ + switch (enmState) + { + case VMSTATE_CREATING: return "CREATING"; + case VMSTATE_CREATED: return "CREATED"; + case VMSTATE_LOADING: return "LOADING"; + case VMSTATE_POWERING_ON: return "POWERING_ON"; + case VMSTATE_RESUMING: return "RESUMING"; + case VMSTATE_RUNNING: return "RUNNING"; + case VMSTATE_RUNNING_LS: return "RUNNING_LS"; + case VMSTATE_RUNNING_FT: return "RUNNING_FT"; + case VMSTATE_RESETTING: return "RESETTING"; + case VMSTATE_RESETTING_LS: return "RESETTING_LS"; + case VMSTATE_SOFT_RESETTING: return "SOFT_RESETTING"; + case VMSTATE_SOFT_RESETTING_LS: return "SOFT_RESETTING_LS"; + case VMSTATE_SUSPENDED: return "SUSPENDED"; + case VMSTATE_SUSPENDED_LS: return "SUSPENDED_LS"; + case VMSTATE_SUSPENDED_EXT_LS: return "SUSPENDED_EXT_LS"; + case VMSTATE_SUSPENDING: return "SUSPENDING"; + case VMSTATE_SUSPENDING_LS: return "SUSPENDING_LS"; + case VMSTATE_SUSPENDING_EXT_LS: return "SUSPENDING_EXT_LS"; + case VMSTATE_SAVING: return "SAVING"; + case VMSTATE_DEBUGGING: return "DEBUGGING"; + case VMSTATE_DEBUGGING_LS: return "DEBUGGING_LS"; + case VMSTATE_POWERING_OFF: return "POWERING_OFF"; + case VMSTATE_POWERING_OFF_LS: return "POWERING_OFF_LS"; + case VMSTATE_FATAL_ERROR: return "FATAL_ERROR"; + case VMSTATE_FATAL_ERROR_LS: return "FATAL_ERROR_LS"; + case VMSTATE_GURU_MEDITATION: return "GURU_MEDITATION"; + case VMSTATE_GURU_MEDITATION_LS:return "GURU_MEDITATION_LS"; + case VMSTATE_LOAD_FAILURE: return "LOAD_FAILURE"; + case VMSTATE_OFF: return "OFF"; + case VMSTATE_OFF_LS: return "OFF_LS"; + case VMSTATE_DESTROYING: return "DESTROYING"; + case VMSTATE_TERMINATED: return "TERMINATED"; + + default: + AssertMsgFailed(("Unknown state %d\n", enmState)); + return "Unknown!\n"; + } +} + + +/** + * Validates the state transition in strict builds. + * + * @returns true if valid, false if not. + * + * @param enmStateOld The old (current) state. + * @param enmStateNew The proposed new state. + * + * @remarks The reference for this is found in doc/vp/VMM.vpp, the VMSTATE + * diagram (under State Machine Diagram). + */ +static bool vmR3ValidateStateTransition(VMSTATE enmStateOld, VMSTATE enmStateNew) +{ +#ifndef VBOX_STRICT + RT_NOREF2(enmStateOld, enmStateNew); +#else + switch (enmStateOld) + { + case VMSTATE_CREATING: + AssertMsgReturn(enmStateNew == VMSTATE_CREATED, ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_CREATED: + AssertMsgReturn( enmStateNew == VMSTATE_LOADING + || enmStateNew == VMSTATE_POWERING_ON + || enmStateNew == VMSTATE_POWERING_OFF + , ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_LOADING: + AssertMsgReturn( enmStateNew == VMSTATE_SUSPENDED + || enmStateNew == VMSTATE_LOAD_FAILURE + , ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_POWERING_ON: + AssertMsgReturn( enmStateNew == VMSTATE_RUNNING + /*|| enmStateNew == VMSTATE_FATAL_ERROR ?*/ + , ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_RESUMING: + AssertMsgReturn( enmStateNew == VMSTATE_RUNNING + /*|| enmStateNew == VMSTATE_FATAL_ERROR ?*/ + , ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_RUNNING: + AssertMsgReturn( enmStateNew == VMSTATE_POWERING_OFF + || enmStateNew == VMSTATE_SUSPENDING + || enmStateNew == VMSTATE_RESETTING + || enmStateNew == VMSTATE_SOFT_RESETTING + || enmStateNew == VMSTATE_RUNNING_LS + || enmStateNew == VMSTATE_RUNNING_FT + || enmStateNew == VMSTATE_DEBUGGING + || enmStateNew == VMSTATE_FATAL_ERROR + || enmStateNew == VMSTATE_GURU_MEDITATION + , ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_RUNNING_LS: + AssertMsgReturn( enmStateNew == VMSTATE_POWERING_OFF_LS + || enmStateNew == VMSTATE_SUSPENDING_LS + || enmStateNew == VMSTATE_SUSPENDING_EXT_LS + || enmStateNew == VMSTATE_RESETTING_LS + || enmStateNew == VMSTATE_SOFT_RESETTING_LS + || enmStateNew == VMSTATE_RUNNING + || enmStateNew == VMSTATE_DEBUGGING_LS + || enmStateNew == VMSTATE_FATAL_ERROR_LS + || enmStateNew == VMSTATE_GURU_MEDITATION_LS + , ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_RUNNING_FT: + AssertMsgReturn( enmStateNew == VMSTATE_POWERING_OFF + || enmStateNew == VMSTATE_FATAL_ERROR + || enmStateNew == VMSTATE_GURU_MEDITATION + , ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_RESETTING: + AssertMsgReturn(enmStateNew == VMSTATE_RUNNING, ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_SOFT_RESETTING: + AssertMsgReturn(enmStateNew == VMSTATE_RUNNING, ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_RESETTING_LS: + AssertMsgReturn( enmStateNew == VMSTATE_SUSPENDING_LS + , ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_SOFT_RESETTING_LS: + AssertMsgReturn( enmStateNew == VMSTATE_RUNNING_LS + , ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_SUSPENDING: + AssertMsgReturn(enmStateNew == VMSTATE_SUSPENDED, ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_SUSPENDING_LS: + AssertMsgReturn( enmStateNew == VMSTATE_SUSPENDING + || enmStateNew == VMSTATE_SUSPENDED_LS + , ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_SUSPENDING_EXT_LS: + AssertMsgReturn( enmStateNew == VMSTATE_SUSPENDING + || enmStateNew == VMSTATE_SUSPENDED_EXT_LS + , ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_SUSPENDED: + AssertMsgReturn( enmStateNew == VMSTATE_POWERING_OFF + || enmStateNew == VMSTATE_SAVING + || enmStateNew == VMSTATE_RESETTING + || enmStateNew == VMSTATE_SOFT_RESETTING + || enmStateNew == VMSTATE_RESUMING + || enmStateNew == VMSTATE_LOADING + , ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_SUSPENDED_LS: + AssertMsgReturn( enmStateNew == VMSTATE_SUSPENDED + || enmStateNew == VMSTATE_SAVING + , ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_SUSPENDED_EXT_LS: + AssertMsgReturn( enmStateNew == VMSTATE_SUSPENDED + || enmStateNew == VMSTATE_SAVING + , ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_SAVING: + AssertMsgReturn(enmStateNew == VMSTATE_SUSPENDED, ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_DEBUGGING: + AssertMsgReturn( enmStateNew == VMSTATE_RUNNING + || enmStateNew == VMSTATE_POWERING_OFF + , ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_DEBUGGING_LS: + AssertMsgReturn( enmStateNew == VMSTATE_DEBUGGING + || enmStateNew == VMSTATE_RUNNING_LS + || enmStateNew == VMSTATE_POWERING_OFF_LS + , ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_POWERING_OFF: + AssertMsgReturn(enmStateNew == VMSTATE_OFF, ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_POWERING_OFF_LS: + AssertMsgReturn( enmStateNew == VMSTATE_POWERING_OFF + || enmStateNew == VMSTATE_OFF_LS + , ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_OFF: + AssertMsgReturn(enmStateNew == VMSTATE_DESTROYING, ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_OFF_LS: + AssertMsgReturn(enmStateNew == VMSTATE_OFF, ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_FATAL_ERROR: + AssertMsgReturn(enmStateNew == VMSTATE_POWERING_OFF, ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_FATAL_ERROR_LS: + AssertMsgReturn( enmStateNew == VMSTATE_FATAL_ERROR + || enmStateNew == VMSTATE_POWERING_OFF_LS + , ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_GURU_MEDITATION: + AssertMsgReturn( enmStateNew == VMSTATE_DEBUGGING + || enmStateNew == VMSTATE_POWERING_OFF + , ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_GURU_MEDITATION_LS: + AssertMsgReturn( enmStateNew == VMSTATE_GURU_MEDITATION + || enmStateNew == VMSTATE_DEBUGGING_LS + || enmStateNew == VMSTATE_POWERING_OFF_LS + , ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_LOAD_FAILURE: + AssertMsgReturn(enmStateNew == VMSTATE_POWERING_OFF, ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_DESTROYING: + AssertMsgReturn(enmStateNew == VMSTATE_TERMINATED, ("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + + case VMSTATE_TERMINATED: + default: + AssertMsgFailedReturn(("%s -> %s\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew)), false); + break; + } +#endif /* VBOX_STRICT */ + return true; +} + + +/** + * Does the state change callouts. + * + * The caller owns the AtStateCritSect. + * + * @param pVM The cross context VM structure. + * @param pUVM The UVM handle. + * @param enmStateNew The New state. + * @param enmStateOld The old state. + */ +static void vmR3DoAtState(PVM pVM, PUVM pUVM, VMSTATE enmStateNew, VMSTATE enmStateOld) +{ + LogRel(("Changing the VM state from '%s' to '%s'\n", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew))); + + for (PVMATSTATE pCur = pUVM->vm.s.pAtState; pCur; pCur = pCur->pNext) + { + pCur->pfnAtState(pUVM, enmStateNew, enmStateOld, pCur->pvUser); + if ( enmStateNew != VMSTATE_DESTROYING + && pVM->enmVMState == VMSTATE_DESTROYING) + break; + AssertMsg(pVM->enmVMState == enmStateNew, + ("You are not allowed to change the state while in the change callback, except " + "from destroying the VM. There are restrictions in the way the state changes " + "are propagated up to the EM execution loop and it makes the program flow very " + "difficult to follow. (%s, expected %s, old %s)\n", + VMR3GetStateName(pVM->enmVMState), VMR3GetStateName(enmStateNew), + VMR3GetStateName(enmStateOld))); + } +} + + +/** + * Sets the current VM state, with the AtStatCritSect already entered. + * + * @param pVM The cross context VM structure. + * @param pUVM The UVM handle. + * @param enmStateNew The new state. + * @param enmStateOld The old state. + * @param fSetRatherThanClearFF The usual behavior is to clear the + * VM_FF_CHECK_VM_STATE force flag, but for + * some transitions (-> guru) we need to kick + * the other EMTs to stop what they're doing. + */ +static void vmR3SetStateLocked(PVM pVM, PUVM pUVM, VMSTATE enmStateNew, VMSTATE enmStateOld, bool fSetRatherThanClearFF) +{ + vmR3ValidateStateTransition(enmStateOld, enmStateNew); + + AssertMsg(pVM->enmVMState == enmStateOld, + ("%s != %s\n", VMR3GetStateName(pVM->enmVMState), VMR3GetStateName(enmStateOld))); + + pUVM->vm.s.enmPrevVMState = enmStateOld; + pVM->enmVMState = enmStateNew; + + if (!fSetRatherThanClearFF) + VM_FF_CLEAR(pVM, VM_FF_CHECK_VM_STATE); + else if (pVM->cCpus > 0) + VM_FF_SET(pVM, VM_FF_CHECK_VM_STATE); + + vmR3DoAtState(pVM, pUVM, enmStateNew, enmStateOld); +} + + +/** + * Sets the current VM state. + * + * @param pVM The cross context VM structure. + * @param enmStateNew The new state. + * @param enmStateOld The old state (for asserting only). + */ +static void vmR3SetState(PVM pVM, VMSTATE enmStateNew, VMSTATE enmStateOld) +{ + PUVM pUVM = pVM->pUVM; + RTCritSectEnter(&pUVM->vm.s.AtStateCritSect); + + RT_NOREF_PV(enmStateOld); + AssertMsg(pVM->enmVMState == enmStateOld, + ("%s != %s\n", VMR3GetStateName(pVM->enmVMState), VMR3GetStateName(enmStateOld))); + vmR3SetStateLocked(pVM, pUVM, enmStateNew, pVM->enmVMState, false /*fSetRatherThanClearFF*/); + + RTCritSectLeave(&pUVM->vm.s.AtStateCritSect); +} + + +/** + * Tries to perform a state transition. + * + * @returns The 1-based ordinal of the succeeding transition. + * VERR_VM_INVALID_VM_STATE and Assert+LogRel on failure. + * + * @param pVM The cross context VM structure. + * @param pszWho Who is trying to change it. + * @param cTransitions The number of transitions in the ellipsis. + * @param ... Transition pairs; new, old. + */ +static int vmR3TrySetState(PVM pVM, const char *pszWho, unsigned cTransitions, ...) +{ + va_list va; + VMSTATE enmStateNew = VMSTATE_CREATED; + VMSTATE enmStateOld = VMSTATE_CREATED; + +#ifdef VBOX_STRICT + /* + * Validate the input first. + */ + va_start(va, cTransitions); + for (unsigned i = 0; i < cTransitions; i++) + { + enmStateNew = (VMSTATE)va_arg(va, /*VMSTATE*/int); + enmStateOld = (VMSTATE)va_arg(va, /*VMSTATE*/int); + vmR3ValidateStateTransition(enmStateOld, enmStateNew); + } + va_end(va); +#endif + + /* + * Grab the lock and see if any of the proposed transitions works out. + */ + va_start(va, cTransitions); + int rc = VERR_VM_INVALID_VM_STATE; + PUVM pUVM = pVM->pUVM; + RTCritSectEnter(&pUVM->vm.s.AtStateCritSect); + + VMSTATE enmStateCur = pVM->enmVMState; + + for (unsigned i = 0; i < cTransitions; i++) + { + enmStateNew = (VMSTATE)va_arg(va, /*VMSTATE*/int); + enmStateOld = (VMSTATE)va_arg(va, /*VMSTATE*/int); + if (enmStateCur == enmStateOld) + { + vmR3SetStateLocked(pVM, pUVM, enmStateNew, enmStateOld, false /*fSetRatherThanClearFF*/); + rc = i + 1; + break; + } + } + + if (RT_FAILURE(rc)) + { + /* + * Complain about it. + */ + if (cTransitions == 1) + { + LogRel(("%s: %s -> %s failed, because the VM state is actually %s\n", + pszWho, VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew), VMR3GetStateName(enmStateCur))); + VMSetError(pVM, VERR_VM_INVALID_VM_STATE, RT_SRC_POS, + N_("%s failed because the VM state is %s instead of %s"), + pszWho, VMR3GetStateName(enmStateCur), VMR3GetStateName(enmStateOld)); + AssertMsgFailed(("%s: %s -> %s failed, because the VM state is actually %s\n", + pszWho, VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew), VMR3GetStateName(enmStateCur))); + } + else + { + va_end(va); + va_start(va, cTransitions); + LogRel(("%s:\n", pszWho)); + for (unsigned i = 0; i < cTransitions; i++) + { + enmStateNew = (VMSTATE)va_arg(va, /*VMSTATE*/int); + enmStateOld = (VMSTATE)va_arg(va, /*VMSTATE*/int); + LogRel(("%s%s -> %s", + i ? ", " : " ", VMR3GetStateName(enmStateOld), VMR3GetStateName(enmStateNew))); + } + LogRel((" failed, because the VM state is actually %s\n", VMR3GetStateName(enmStateCur))); + VMSetError(pVM, VERR_VM_INVALID_VM_STATE, RT_SRC_POS, + N_("%s failed because the current VM state, %s, was not found in the state transition table (old state %s)"), + pszWho, VMR3GetStateName(enmStateCur), VMR3GetStateName(enmStateOld)); + AssertMsgFailed(("%s - state=%s, see release log for full details. Check the cTransitions passed us.\n", + pszWho, VMR3GetStateName(enmStateCur))); + } + } + + RTCritSectLeave(&pUVM->vm.s.AtStateCritSect); + va_end(va); + Assert(rc > 0 || rc < 0); + return rc; +} + + +/** + * Interface used by EM to signal that it's entering the guru meditation state. + * + * This will notifying other threads. + * + * @returns true if the state changed to Guru, false if no state change. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(bool) VMR3SetGuruMeditation(PVM pVM) +{ + PUVM pUVM = pVM->pUVM; + RTCritSectEnter(&pUVM->vm.s.AtStateCritSect); + + VMSTATE enmStateCur = pVM->enmVMState; + bool fRc = true; + if (enmStateCur == VMSTATE_RUNNING) + vmR3SetStateLocked(pVM, pUVM, VMSTATE_GURU_MEDITATION, VMSTATE_RUNNING, true /*fSetRatherThanClearFF*/); + else if (enmStateCur == VMSTATE_RUNNING_LS) + { + vmR3SetStateLocked(pVM, pUVM, VMSTATE_GURU_MEDITATION_LS, VMSTATE_RUNNING_LS, true /*fSetRatherThanClearFF*/); + SSMR3Cancel(pUVM); + } + else + fRc = false; + + RTCritSectLeave(&pUVM->vm.s.AtStateCritSect); + return fRc; +} + + +/** + * Called by vmR3EmulationThreadWithId just before the VM structure is freed. + * + * @param pVM The cross context VM structure. + */ +void vmR3SetTerminated(PVM pVM) +{ + vmR3SetState(pVM, VMSTATE_TERMINATED, VMSTATE_DESTROYING); +} + + +/** + * Checks if the VM was teleported and hasn't been fully resumed yet. + * + * This applies to both sides of the teleportation since we may leave a working + * clone behind and the user is allowed to resume this... + * + * @returns true / false. + * @param pVM The cross context VM structure. + * @thread Any thread. + */ +VMMR3_INT_DECL(bool) VMR3TeleportedAndNotFullyResumedYet(PVM pVM) +{ + VM_ASSERT_VALID_EXT_RETURN(pVM, false); + return pVM->vm.s.fTeleportedAndNotFullyResumedYet; +} + + +/** + * Registers a VM state change callback. + * + * You are not allowed to call any function which changes the VM state from a + * state callback. + * + * @returns VBox status code. + * @param pUVM The VM handle. + * @param pfnAtState Pointer to callback. + * @param pvUser User argument. + * @thread Any. + */ +VMMR3DECL(int) VMR3AtStateRegister(PUVM pUVM, PFNVMATSTATE pfnAtState, void *pvUser) +{ + LogFlow(("VMR3AtStateRegister: pfnAtState=%p pvUser=%p\n", pfnAtState, pvUser)); + + /* + * Validate input. + */ + AssertPtrReturn(pfnAtState, VERR_INVALID_PARAMETER); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + + /* + * Allocate a new record. + */ + PVMATSTATE pNew = (PVMATSTATE)MMR3HeapAllocU(pUVM, MM_TAG_VM, sizeof(*pNew)); + if (!pNew) + return VERR_NO_MEMORY; + + /* fill */ + pNew->pfnAtState = pfnAtState; + pNew->pvUser = pvUser; + + /* insert */ + RTCritSectEnter(&pUVM->vm.s.AtStateCritSect); + pNew->pNext = *pUVM->vm.s.ppAtStateNext; + *pUVM->vm.s.ppAtStateNext = pNew; + pUVM->vm.s.ppAtStateNext = &pNew->pNext; + RTCritSectLeave(&pUVM->vm.s.AtStateCritSect); + + return VINF_SUCCESS; +} + + +/** + * Deregisters a VM state change callback. + * + * @returns VBox status code. + * @param pUVM The VM handle. + * @param pfnAtState Pointer to callback. + * @param pvUser User argument. + * @thread Any. + */ +VMMR3DECL(int) VMR3AtStateDeregister(PUVM pUVM, PFNVMATSTATE pfnAtState, void *pvUser) +{ + LogFlow(("VMR3AtStateDeregister: pfnAtState=%p pvUser=%p\n", pfnAtState, pvUser)); + + /* + * Validate input. + */ + AssertPtrReturn(pfnAtState, VERR_INVALID_PARAMETER); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + + RTCritSectEnter(&pUVM->vm.s.AtStateCritSect); + + /* + * Search the list for the entry. + */ + PVMATSTATE pPrev = NULL; + PVMATSTATE pCur = pUVM->vm.s.pAtState; + while ( pCur + && ( pCur->pfnAtState != pfnAtState + || pCur->pvUser != pvUser)) + { + pPrev = pCur; + pCur = pCur->pNext; + } + if (!pCur) + { + AssertMsgFailed(("pfnAtState=%p was not found\n", pfnAtState)); + RTCritSectLeave(&pUVM->vm.s.AtStateCritSect); + return VERR_FILE_NOT_FOUND; + } + + /* + * Unlink it. + */ + if (pPrev) + { + pPrev->pNext = pCur->pNext; + if (!pCur->pNext) + pUVM->vm.s.ppAtStateNext = &pPrev->pNext; + } + else + { + pUVM->vm.s.pAtState = pCur->pNext; + if (!pCur->pNext) + pUVM->vm.s.ppAtStateNext = &pUVM->vm.s.pAtState; + } + + RTCritSectLeave(&pUVM->vm.s.AtStateCritSect); + + /* + * Free it. + */ + pCur->pfnAtState = NULL; + pCur->pNext = NULL; + MMR3HeapFree(pCur); + + return VINF_SUCCESS; +} + + +/** + * Registers a VM error callback. + * + * @returns VBox status code. + * @param pUVM The VM handle. + * @param pfnAtError Pointer to callback. + * @param pvUser User argument. + * @thread Any. + */ +VMMR3DECL(int) VMR3AtErrorRegister(PUVM pUVM, PFNVMATERROR pfnAtError, void *pvUser) +{ + LogFlow(("VMR3AtErrorRegister: pfnAtError=%p pvUser=%p\n", pfnAtError, pvUser)); + + /* + * Validate input. + */ + AssertPtrReturn(pfnAtError, VERR_INVALID_PARAMETER); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + + /* + * Allocate a new record. + */ + PVMATERROR pNew = (PVMATERROR)MMR3HeapAllocU(pUVM, MM_TAG_VM, sizeof(*pNew)); + if (!pNew) + return VERR_NO_MEMORY; + + /* fill */ + pNew->pfnAtError = pfnAtError; + pNew->pvUser = pvUser; + + /* insert */ + RTCritSectEnter(&pUVM->vm.s.AtErrorCritSect); + pNew->pNext = *pUVM->vm.s.ppAtErrorNext; + *pUVM->vm.s.ppAtErrorNext = pNew; + pUVM->vm.s.ppAtErrorNext = &pNew->pNext; + RTCritSectLeave(&pUVM->vm.s.AtErrorCritSect); + + return VINF_SUCCESS; +} + + +/** + * Deregisters a VM error callback. + * + * @returns VBox status code. + * @param pUVM The VM handle. + * @param pfnAtError Pointer to callback. + * @param pvUser User argument. + * @thread Any. + */ +VMMR3DECL(int) VMR3AtErrorDeregister(PUVM pUVM, PFNVMATERROR pfnAtError, void *pvUser) +{ + LogFlow(("VMR3AtErrorDeregister: pfnAtError=%p pvUser=%p\n", pfnAtError, pvUser)); + + /* + * Validate input. + */ + AssertPtrReturn(pfnAtError, VERR_INVALID_PARAMETER); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + + RTCritSectEnter(&pUVM->vm.s.AtErrorCritSect); + + /* + * Search the list for the entry. + */ + PVMATERROR pPrev = NULL; + PVMATERROR pCur = pUVM->vm.s.pAtError; + while ( pCur + && ( pCur->pfnAtError != pfnAtError + || pCur->pvUser != pvUser)) + { + pPrev = pCur; + pCur = pCur->pNext; + } + if (!pCur) + { + AssertMsgFailed(("pfnAtError=%p was not found\n", pfnAtError)); + RTCritSectLeave(&pUVM->vm.s.AtErrorCritSect); + return VERR_FILE_NOT_FOUND; + } + + /* + * Unlink it. + */ + if (pPrev) + { + pPrev->pNext = pCur->pNext; + if (!pCur->pNext) + pUVM->vm.s.ppAtErrorNext = &pPrev->pNext; + } + else + { + pUVM->vm.s.pAtError = pCur->pNext; + if (!pCur->pNext) + pUVM->vm.s.ppAtErrorNext = &pUVM->vm.s.pAtError; + } + + RTCritSectLeave(&pUVM->vm.s.AtErrorCritSect); + + /* + * Free it. + */ + pCur->pfnAtError = NULL; + pCur->pNext = NULL; + MMR3HeapFree(pCur); + + return VINF_SUCCESS; +} + + +/** + * Ellipsis to va_list wrapper for calling pfnAtError. + */ +static void vmR3SetErrorWorkerDoCall(PVM pVM, PVMATERROR pCur, int rc, RT_SRC_POS_DECL, const char *pszFormat, ...) +{ + va_list va; + va_start(va, pszFormat); + pCur->pfnAtError(pVM->pUVM, pCur->pvUser, rc, RT_SRC_POS_ARGS, pszFormat, va); + va_end(va); +} + + +/** + * This is a worker function for GC and Ring-0 calls to VMSetError and VMSetErrorV. + * The message is found in VMINT. + * + * @param pVM The cross context VM structure. + * @thread EMT. + */ +VMMR3_INT_DECL(void) VMR3SetErrorWorker(PVM pVM) +{ + VM_ASSERT_EMT(pVM); + AssertReleaseMsgFailed(("And we have a winner! You get to implement Ring-0 and GC VMSetErrorV! Congrats!\n")); + + /* + * Unpack the error (if we managed to format one). + */ + PVMERROR pErr = pVM->vm.s.pErrorR3; + const char *pszFile = NULL; + const char *pszFunction = NULL; + uint32_t iLine = 0; + const char *pszMessage; + int32_t rc = VERR_MM_HYPER_NO_MEMORY; + if (pErr) + { + AssertCompile(sizeof(const char) == sizeof(uint8_t)); + if (pErr->offFile) + pszFile = (const char *)pErr + pErr->offFile; + iLine = pErr->iLine; + if (pErr->offFunction) + pszFunction = (const char *)pErr + pErr->offFunction; + if (pErr->offMessage) + pszMessage = (const char *)pErr + pErr->offMessage; + else + pszMessage = "No message!"; + } + else + pszMessage = "No message! (Failed to allocate memory to put the error message in!)"; + + /* + * Call the at error callbacks. + */ + PUVM pUVM = pVM->pUVM; + RTCritSectEnter(&pUVM->vm.s.AtErrorCritSect); + ASMAtomicIncU32(&pUVM->vm.s.cRuntimeErrors); + for (PVMATERROR pCur = pUVM->vm.s.pAtError; pCur; pCur = pCur->pNext) + vmR3SetErrorWorkerDoCall(pVM, pCur, rc, RT_SRC_POS_ARGS, "%s", pszMessage); + RTCritSectLeave(&pUVM->vm.s.AtErrorCritSect); +} + + +/** + * Gets the number of errors raised via VMSetError. + * + * This can be used avoid double error messages. + * + * @returns The error count. + * @param pUVM The VM handle. + */ +VMMR3_INT_DECL(uint32_t) VMR3GetErrorCount(PUVM pUVM) +{ + AssertPtrReturn(pUVM, 0); + AssertReturn(pUVM->u32Magic == UVM_MAGIC, 0); + return pUVM->vm.s.cErrors; +} + + +/** + * Creation time wrapper for vmR3SetErrorUV. + * + * @returns rc. + * @param pUVM Pointer to the user mode VM structure. + * @param rc The VBox status code. + * @param SRC_POS The source position of this error. + * @param pszFormat Format string. + * @param ... The arguments. + * @thread Any thread. + */ +static int vmR3SetErrorU(PUVM pUVM, int rc, RT_SRC_POS_DECL, const char *pszFormat, ...) +{ + va_list va; + va_start(va, pszFormat); + vmR3SetErrorUV(pUVM, rc, pszFile, iLine, pszFunction, pszFormat, &va); + va_end(va); + return rc; +} + + +/** + * Worker which calls everyone listening to the VM error messages. + * + * @param pUVM Pointer to the user mode VM structure. + * @param rc The VBox status code. + * @param SRC_POS The source position of this error. + * @param pszFormat Format string. + * @param pArgs Pointer to the format arguments. + * @thread EMT + */ +DECLCALLBACK(void) vmR3SetErrorUV(PUVM pUVM, int rc, RT_SRC_POS_DECL, const char *pszFormat, va_list *pArgs) +{ + /* + * Log the error. + */ + va_list va3; + va_copy(va3, *pArgs); + RTLogRelPrintf("VMSetError: %s(%d) %s; rc=%Rrc\n" + "VMSetError: %N\n", + pszFile, iLine, pszFunction, rc, + pszFormat, &va3); + va_end(va3); + +#ifdef LOG_ENABLED + va_copy(va3, *pArgs); + RTLogPrintf("VMSetError: %s(%d) %s; rc=%Rrc\n" + "%N\n", + pszFile, iLine, pszFunction, rc, + pszFormat, &va3); + va_end(va3); +#endif + + /* + * Make a copy of the message. + */ + if (pUVM->pVM) + vmSetErrorCopy(pUVM->pVM, rc, RT_SRC_POS_ARGS, pszFormat, *pArgs); + + /* + * Call the at error callbacks. + */ + bool fCalledSomeone = false; + RTCritSectEnter(&pUVM->vm.s.AtErrorCritSect); + ASMAtomicIncU32(&pUVM->vm.s.cErrors); + for (PVMATERROR pCur = pUVM->vm.s.pAtError; pCur; pCur = pCur->pNext) + { + va_list va2; + va_copy(va2, *pArgs); + pCur->pfnAtError(pUVM, pCur->pvUser, rc, RT_SRC_POS_ARGS, pszFormat, va2); + va_end(va2); + fCalledSomeone = true; + } + RTCritSectLeave(&pUVM->vm.s.AtErrorCritSect); +} + + +/** + * Sets the error message. + * + * @returns rc. Meaning you can do: + * @code + * return VM_SET_ERROR_U(pUVM, VERR_OF_YOUR_CHOICE, "descriptive message"); + * @endcode + * @param pUVM The user mode VM handle. + * @param rc VBox status code. + * @param SRC_POS Use RT_SRC_POS. + * @param pszFormat Error message format string. + * @param ... Error message arguments. + * @thread Any + */ +VMMR3DECL(int) VMR3SetError(PUVM pUVM, int rc, RT_SRC_POS_DECL, const char *pszFormat, ...) +{ + va_list va; + va_start(va, pszFormat); + int rcRet = VMR3SetErrorV(pUVM, rc, pszFile, iLine, pszFunction, pszFormat, va); + va_end(va); + return rcRet; +} + + +/** + * Sets the error message. + * + * @returns rc. Meaning you can do: + * @code + * return VM_SET_ERROR_U(pUVM, VERR_OF_YOUR_CHOICE, "descriptive message"); + * @endcode + * @param pUVM The user mode VM handle. + * @param rc VBox status code. + * @param SRC_POS Use RT_SRC_POS. + * @param pszFormat Error message format string. + * @param va Error message arguments. + * @thread Any + */ +VMMR3DECL(int) VMR3SetErrorV(PUVM pUVM, int rc, RT_SRC_POS_DECL, const char *pszFormat, va_list va) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + + /* Take shortcut when called on EMT, skipping VM handle requirement + validation. */ + if (VMR3GetVMCPUThread(pUVM) != NIL_RTTHREAD) + { + va_list vaCopy; + va_copy(vaCopy, va); + vmR3SetErrorUV(pUVM, rc, RT_SRC_POS_ARGS, pszFormat, &vaCopy); + va_end(vaCopy); + return rc; + } + + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, VERR_INVALID_VM_HANDLE); + return VMSetErrorV(pUVM->pVM, rc, pszFile, iLine, pszFunction, pszFormat, va); +} + + + +/** + * Registers a VM runtime error callback. + * + * @returns VBox status code. + * @param pUVM The user mode VM structure. + * @param pfnAtRuntimeError Pointer to callback. + * @param pvUser User argument. + * @thread Any. + */ +VMMR3DECL(int) VMR3AtRuntimeErrorRegister(PUVM pUVM, PFNVMATRUNTIMEERROR pfnAtRuntimeError, void *pvUser) +{ + LogFlow(("VMR3AtRuntimeErrorRegister: pfnAtRuntimeError=%p pvUser=%p\n", pfnAtRuntimeError, pvUser)); + + /* + * Validate input. + */ + AssertPtrReturn(pfnAtRuntimeError, VERR_INVALID_PARAMETER); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + + /* + * Allocate a new record. + */ + PVMATRUNTIMEERROR pNew = (PVMATRUNTIMEERROR)MMR3HeapAllocU(pUVM, MM_TAG_VM, sizeof(*pNew)); + if (!pNew) + return VERR_NO_MEMORY; + + /* fill */ + pNew->pfnAtRuntimeError = pfnAtRuntimeError; + pNew->pvUser = pvUser; + + /* insert */ + RTCritSectEnter(&pUVM->vm.s.AtErrorCritSect); + pNew->pNext = *pUVM->vm.s.ppAtRuntimeErrorNext; + *pUVM->vm.s.ppAtRuntimeErrorNext = pNew; + pUVM->vm.s.ppAtRuntimeErrorNext = &pNew->pNext; + RTCritSectLeave(&pUVM->vm.s.AtErrorCritSect); + + return VINF_SUCCESS; +} + + +/** + * Deregisters a VM runtime error callback. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param pfnAtRuntimeError Pointer to callback. + * @param pvUser User argument. + * @thread Any. + */ +VMMR3DECL(int) VMR3AtRuntimeErrorDeregister(PUVM pUVM, PFNVMATRUNTIMEERROR pfnAtRuntimeError, void *pvUser) +{ + LogFlow(("VMR3AtRuntimeErrorDeregister: pfnAtRuntimeError=%p pvUser=%p\n", pfnAtRuntimeError, pvUser)); + + /* + * Validate input. + */ + AssertPtrReturn(pfnAtRuntimeError, VERR_INVALID_PARAMETER); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + + RTCritSectEnter(&pUVM->vm.s.AtErrorCritSect); + + /* + * Search the list for the entry. + */ + PVMATRUNTIMEERROR pPrev = NULL; + PVMATRUNTIMEERROR pCur = pUVM->vm.s.pAtRuntimeError; + while ( pCur + && ( pCur->pfnAtRuntimeError != pfnAtRuntimeError + || pCur->pvUser != pvUser)) + { + pPrev = pCur; + pCur = pCur->pNext; + } + if (!pCur) + { + AssertMsgFailed(("pfnAtRuntimeError=%p was not found\n", pfnAtRuntimeError)); + RTCritSectLeave(&pUVM->vm.s.AtErrorCritSect); + return VERR_FILE_NOT_FOUND; + } + + /* + * Unlink it. + */ + if (pPrev) + { + pPrev->pNext = pCur->pNext; + if (!pCur->pNext) + pUVM->vm.s.ppAtRuntimeErrorNext = &pPrev->pNext; + } + else + { + pUVM->vm.s.pAtRuntimeError = pCur->pNext; + if (!pCur->pNext) + pUVM->vm.s.ppAtRuntimeErrorNext = &pUVM->vm.s.pAtRuntimeError; + } + + RTCritSectLeave(&pUVM->vm.s.AtErrorCritSect); + + /* + * Free it. + */ + pCur->pfnAtRuntimeError = NULL; + pCur->pNext = NULL; + MMR3HeapFree(pCur); + + return VINF_SUCCESS; +} + + +/** + * EMT rendezvous worker that vmR3SetRuntimeErrorCommon uses to safely change + * the state to FatalError(LS). + * + * @returns VERR_VM_INVALID_VM_STATE or VINF_EM_SUSPEND. (This is a strict + * return code, see FNVMMEMTRENDEZVOUS.) + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * @param pvUser Ignored. + */ +static DECLCALLBACK(VBOXSTRICTRC) vmR3SetRuntimeErrorChangeState(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + NOREF(pVCpu); + Assert(!pvUser); NOREF(pvUser); + + /* + * The first EMT thru here changes the state. + */ + if (pVCpu->idCpu == pVM->cCpus - 1) + { + int rc = vmR3TrySetState(pVM, "VMSetRuntimeError", 2, + VMSTATE_FATAL_ERROR, VMSTATE_RUNNING, + VMSTATE_FATAL_ERROR_LS, VMSTATE_RUNNING_LS); + if (RT_FAILURE(rc)) + return rc; + if (rc == 2) + SSMR3Cancel(pVM->pUVM); + + VM_FF_SET(pVM, VM_FF_CHECK_VM_STATE); + } + + /* This'll make sure we get out of whereever we are (e.g. REM). */ + return VINF_EM_SUSPEND; +} + + +/** + * Worker for VMR3SetRuntimeErrorWorker and vmR3SetRuntimeErrorV. + * + * This does the common parts after the error has been saved / retrieved. + * + * @returns VBox status code with modifications, see VMSetRuntimeErrorV. + * + * @param pVM The cross context VM structure. + * @param fFlags The error flags. + * @param pszErrorId Error ID string. + * @param pszFormat Format string. + * @param pVa Pointer to the format arguments. + */ +static int vmR3SetRuntimeErrorCommon(PVM pVM, uint32_t fFlags, const char *pszErrorId, const char *pszFormat, va_list *pVa) +{ + LogRel(("VM: Raising runtime error '%s' (fFlags=%#x)\n", pszErrorId, fFlags)); + PUVM pUVM = pVM->pUVM; + + /* + * Take actions before the call. + */ + int rc; + if (fFlags & VMSETRTERR_FLAGS_FATAL) + rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_DESCENDING | VMMEMTRENDEZVOUS_FLAGS_STOP_ON_ERROR, + vmR3SetRuntimeErrorChangeState, NULL); + else if (fFlags & VMSETRTERR_FLAGS_SUSPEND) + rc = VMR3Suspend(pUVM, VMSUSPENDREASON_RUNTIME_ERROR); + else + rc = VINF_SUCCESS; + + /* + * Do the callback round. + */ + RTCritSectEnter(&pUVM->vm.s.AtErrorCritSect); + ASMAtomicIncU32(&pUVM->vm.s.cRuntimeErrors); + for (PVMATRUNTIMEERROR pCur = pUVM->vm.s.pAtRuntimeError; pCur; pCur = pCur->pNext) + { + va_list va; + va_copy(va, *pVa); + pCur->pfnAtRuntimeError(pUVM, pCur->pvUser, fFlags, pszErrorId, pszFormat, va); + va_end(va); + } + RTCritSectLeave(&pUVM->vm.s.AtErrorCritSect); + + return rc; +} + + +/** + * Ellipsis to va_list wrapper for calling vmR3SetRuntimeErrorCommon. + */ +static int vmR3SetRuntimeErrorCommonF(PVM pVM, uint32_t fFlags, const char *pszErrorId, const char *pszFormat, ...) +{ + va_list va; + va_start(va, pszFormat); + int rc = vmR3SetRuntimeErrorCommon(pVM, fFlags, pszErrorId, pszFormat, &va); + va_end(va); + return rc; +} + + +/** + * This is a worker function for RC and Ring-0 calls to VMSetError and + * VMSetErrorV. + * + * The message is found in VMINT. + * + * @returns VBox status code, see VMSetRuntimeError. + * @param pVM The cross context VM structure. + * @thread EMT. + */ +VMMR3_INT_DECL(int) VMR3SetRuntimeErrorWorker(PVM pVM) +{ + VM_ASSERT_EMT(pVM); + AssertReleaseMsgFailed(("And we have a winner! You get to implement Ring-0 and GC VMSetRuntimeErrorV! Congrats!\n")); + + /* + * Unpack the error (if we managed to format one). + */ + const char *pszErrorId = "SetRuntimeError"; + const char *pszMessage = "No message!"; + uint32_t fFlags = VMSETRTERR_FLAGS_FATAL; + PVMRUNTIMEERROR pErr = pVM->vm.s.pRuntimeErrorR3; + if (pErr) + { + AssertCompile(sizeof(const char) == sizeof(uint8_t)); + if (pErr->offErrorId) + pszErrorId = (const char *)pErr + pErr->offErrorId; + if (pErr->offMessage) + pszMessage = (const char *)pErr + pErr->offMessage; + fFlags = pErr->fFlags; + } + + /* + * Join cause with vmR3SetRuntimeErrorV. + */ + return vmR3SetRuntimeErrorCommonF(pVM, fFlags, pszErrorId, "%s", pszMessage); +} + + +/** + * Worker for VMSetRuntimeErrorV for doing the job on EMT in ring-3. + * + * @returns VBox status code with modifications, see VMSetRuntimeErrorV. + * + * @param pVM The cross context VM structure. + * @param fFlags The error flags. + * @param pszErrorId Error ID string. + * @param pszMessage The error message residing the MM heap. + * + * @thread EMT + */ +DECLCALLBACK(int) vmR3SetRuntimeError(PVM pVM, uint32_t fFlags, const char *pszErrorId, char *pszMessage) +{ +#if 0 /** @todo make copy of the error msg. */ + /* + * Make a copy of the message. + */ + va_list va2; + va_copy(va2, *pVa); + vmSetRuntimeErrorCopy(pVM, fFlags, pszErrorId, pszFormat, va2); + va_end(va2); +#endif + + /* + * Join paths with VMR3SetRuntimeErrorWorker. + */ + int rc = vmR3SetRuntimeErrorCommonF(pVM, fFlags, pszErrorId, "%s", pszMessage); + MMR3HeapFree(pszMessage); + return rc; +} + + +/** + * Worker for VMSetRuntimeErrorV for doing the job on EMT in ring-3. + * + * @returns VBox status code with modifications, see VMSetRuntimeErrorV. + * + * @param pVM The cross context VM structure. + * @param fFlags The error flags. + * @param pszErrorId Error ID string. + * @param pszFormat Format string. + * @param pVa Pointer to the format arguments. + * + * @thread EMT + */ +DECLCALLBACK(int) vmR3SetRuntimeErrorV(PVM pVM, uint32_t fFlags, const char *pszErrorId, const char *pszFormat, va_list *pVa) +{ + /* + * Make a copy of the message. + */ + va_list va2; + va_copy(va2, *pVa); + vmSetRuntimeErrorCopy(pVM, fFlags, pszErrorId, pszFormat, va2); + va_end(va2); + + /* + * Join paths with VMR3SetRuntimeErrorWorker. + */ + return vmR3SetRuntimeErrorCommon(pVM, fFlags, pszErrorId, pszFormat, pVa); +} + + +/** + * Gets the number of runtime errors raised via VMR3SetRuntimeError. + * + * This can be used avoid double error messages. + * + * @returns The runtime error count. + * @param pUVM The user mode VM handle. + */ +VMMR3_INT_DECL(uint32_t) VMR3GetRuntimeErrorCount(PUVM pUVM) +{ + return pUVM->vm.s.cRuntimeErrors; +} + + +/** + * Gets the ID virtual of the virtual CPU associated with the calling thread. + * + * @returns The CPU ID. NIL_VMCPUID if the thread isn't an EMT. + * + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(RTCPUID) VMR3GetVMCPUId(PVM pVM) +{ + PUVMCPU pUVCpu = (PUVMCPU)RTTlsGet(pVM->pUVM->vm.s.idxTLS); + return pUVCpu + ? pUVCpu->idCpu + : NIL_VMCPUID; +} + + +/** + * Checks if the VM is long-mode (64-bit) capable or not. + * + * @returns true if VM can operate in long-mode, false otherwise. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(bool) VMR3IsLongModeAllowed(PVM pVM) +{ + switch (pVM->bMainExecutionEngine) + { + case VM_EXEC_ENGINE_HW_VIRT: + return HMIsLongModeAllowed(pVM); + + case VM_EXEC_ENGINE_NATIVE_API: +#ifndef IN_RC + return NEMHCIsLongModeAllowed(pVM); +#else + return false; +#endif + + case VM_EXEC_ENGINE_NOT_SET: + AssertFailed(); + RT_FALL_THRU(); + default: + return false; + } +} + + +/** + * Returns the native ID of the current EMT VMCPU thread. + * + * @returns Handle if this is an EMT thread; NIL_RTNATIVETHREAD otherwise + * @param pVM The cross context VM structure. + * @thread EMT + */ +VMMR3DECL(RTNATIVETHREAD) VMR3GetVMCPUNativeThread(PVM pVM) +{ + PUVMCPU pUVCpu = (PUVMCPU)RTTlsGet(pVM->pUVM->vm.s.idxTLS); + + if (!pUVCpu) + return NIL_RTNATIVETHREAD; + + return pUVCpu->vm.s.NativeThreadEMT; +} + + +/** + * Returns the native ID of the current EMT VMCPU thread. + * + * @returns Handle if this is an EMT thread; NIL_RTNATIVETHREAD otherwise + * @param pUVM The user mode VM structure. + * @thread EMT + */ +VMMR3DECL(RTNATIVETHREAD) VMR3GetVMCPUNativeThreadU(PUVM pUVM) +{ + PUVMCPU pUVCpu = (PUVMCPU)RTTlsGet(pUVM->vm.s.idxTLS); + + if (!pUVCpu) + return NIL_RTNATIVETHREAD; + + return pUVCpu->vm.s.NativeThreadEMT; +} + + +/** + * Returns the handle of the current EMT VMCPU thread. + * + * @returns Handle if this is an EMT thread; NIL_RTNATIVETHREAD otherwise + * @param pUVM The user mode VM handle. + * @thread EMT + */ +VMMR3DECL(RTTHREAD) VMR3GetVMCPUThread(PUVM pUVM) +{ + PUVMCPU pUVCpu = (PUVMCPU)RTTlsGet(pUVM->vm.s.idxTLS); + + if (!pUVCpu) + return NIL_RTTHREAD; + + return pUVCpu->vm.s.ThreadEMT; +} + + +/** + * Returns the handle of the current EMT VMCPU thread. + * + * @returns The IPRT thread handle. + * @param pUVCpu The user mode CPU handle. + * @thread EMT + */ +VMMR3_INT_DECL(RTTHREAD) VMR3GetThreadHandle(PUVMCPU pUVCpu) +{ + return pUVCpu->vm.s.ThreadEMT; +} + + +/** + * Return the package and core ID of a CPU. + * + * @returns VBOX status code. + * @param pUVM The user mode VM handle. + * @param idCpu Virtual CPU to get the ID from. + * @param pidCpuCore Where to store the core ID of the virtual CPU. + * @param pidCpuPackage Where to store the package ID of the virtual CPU. + * + */ +VMMR3DECL(int) VMR3GetCpuCoreAndPackageIdFromCpuId(PUVM pUVM, VMCPUID idCpu, uint32_t *pidCpuCore, uint32_t *pidCpuPackage) +{ + /* + * Validate input. + */ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertPtrReturn(pidCpuCore, VERR_INVALID_POINTER); + AssertPtrReturn(pidCpuPackage, VERR_INVALID_POINTER); + if (idCpu >= pVM->cCpus) + return VERR_INVALID_CPU_ID; + + /* + * Set return values. + */ +#ifdef VBOX_WITH_MULTI_CORE + *pidCpuCore = idCpu; + *pidCpuPackage = 0; +#else + *pidCpuCore = 0; + *pidCpuPackage = idCpu; +#endif + + return VINF_SUCCESS; +} + + +/** + * Worker for VMR3HotUnplugCpu. + * + * @returns VINF_EM_WAIT_SPIP (strict status code). + * @param pVM The cross context VM structure. + * @param idCpu The current CPU. + */ +static DECLCALLBACK(int) vmR3HotUnplugCpu(PVM pVM, VMCPUID idCpu) +{ + PVMCPU pVCpu = VMMGetCpuById(pVM, idCpu); + VMCPU_ASSERT_EMT(pVCpu); + + /* + * Reset per CPU resources. + * + * Actually only needed for VT-x because the CPU seems to be still in some + * paged mode and startup fails after a new hot plug event. SVM works fine + * even without this. + */ + Log(("vmR3HotUnplugCpu for VCPU %u\n", idCpu)); + PGMR3ResetCpu(pVM, pVCpu); + PDMR3ResetCpu(pVCpu); + TRPMR3ResetCpu(pVCpu); + CPUMR3ResetCpu(pVM, pVCpu); + EMR3ResetCpu(pVCpu); + HMR3ResetCpu(pVCpu); + NEMR3ResetCpu(pVCpu, false /*fInitIpi*/); + return VINF_EM_WAIT_SIPI; +} + + +/** + * Hot-unplugs a CPU from the guest. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param idCpu Virtual CPU to perform the hot unplugging operation on. + */ +VMMR3DECL(int) VMR3HotUnplugCpu(PUVM pUVM, VMCPUID idCpu) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertReturn(idCpu < pVM->cCpus, VERR_INVALID_CPU_ID); + + /** @todo r=bird: Don't destroy the EMT, it'll break VMMR3EmtRendezvous and + * broadcast requests. Just note down somewhere that the CPU is + * offline and send it to SPIP wait. Maybe modify VMCPUSTATE and push + * it out of the EM loops when offline. */ + return VMR3ReqCallNoWaitU(pUVM, idCpu, (PFNRT)vmR3HotUnplugCpu, 2, pVM, idCpu); +} + + +/** + * Hot-plugs a CPU on the guest. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param idCpu Virtual CPU to perform the hot plugging operation on. + */ +VMMR3DECL(int) VMR3HotPlugCpu(PUVM pUVM, VMCPUID idCpu) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertReturn(idCpu < pVM->cCpus, VERR_INVALID_CPU_ID); + + /** @todo r-bird: Just mark it online and make sure it waits on SPIP. */ + return VINF_SUCCESS; +} + + +/** + * Changes the VMM execution cap. + * + * @returns VBox status code. + * @param pUVM The user mode VM structure. + * @param uCpuExecutionCap New CPU execution cap in precent, 1-100. Where + * 100 is max performance (default). + */ +VMMR3DECL(int) VMR3SetCpuExecutionCap(PUVM pUVM, uint32_t uCpuExecutionCap) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertReturn(uCpuExecutionCap > 0 && uCpuExecutionCap <= 100, VERR_INVALID_PARAMETER); + + Log(("VMR3SetCpuExecutionCap: new priority = %d\n", uCpuExecutionCap)); + /* Note: not called from EMT. */ + pVM->uCpuExecutionCap = uCpuExecutionCap; + return VINF_SUCCESS; +} + + +/** + * Control whether the VM should power off when resetting. + * + * @returns VBox status code. + * @param pUVM The user mode VM handle. + * @param fPowerOffInsteadOfReset Flag whether the VM should power off when + * resetting. + */ +VMMR3DECL(int) VMR3SetPowerOffInsteadOfReset(PUVM pUVM, bool fPowerOffInsteadOfReset) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + PVM pVM = pUVM->pVM; + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + + /* Note: not called from EMT. */ + pVM->vm.s.fPowerOffInsteadOfReset = fPowerOffInsteadOfReset; + return VINF_SUCCESS; +} + diff --git a/src/VBox/VMM/VMMR3/VMEmt.cpp b/src/VBox/VMM/VMMR3/VMEmt.cpp new file mode 100644 index 00000000..7c56c5b3 --- /dev/null +++ b/src/VBox/VMM/VMMR3/VMEmt.cpp @@ -0,0 +1,1443 @@ +/* $Id: VMEmt.cpp $ */ +/** @file + * VM - Virtual Machine, The Emulation Thread. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_VM +#include +#include +#include +#include +#include +#ifdef VBOX_WITH_REM +# include +#endif +#include +#include "VMInternal.h" +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +int vmR3EmulationThreadWithId(RTTHREAD hThreadSelf, PUVMCPU pUVCpu, VMCPUID idCpu); + + +/** + * The emulation thread main function. + * + * @returns Thread exit code. + * @param hThreadSelf The handle to the executing thread. + * @param pvArgs Pointer to the user mode per-VCpu structure (UVMPCU). + */ +DECLCALLBACK(int) vmR3EmulationThread(RTTHREAD hThreadSelf, void *pvArgs) +{ + PUVMCPU pUVCpu = (PUVMCPU)pvArgs; + return vmR3EmulationThreadWithId(hThreadSelf, pUVCpu, pUVCpu->idCpu); +} + + +/** + * The emulation thread main function, with Virtual CPU ID for debugging. + * + * @returns Thread exit code. + * @param hThreadSelf The handle to the executing thread. + * @param pUVCpu Pointer to the user mode per-VCpu structure. + * @param idCpu The virtual CPU ID, for backtrace purposes. + */ +int vmR3EmulationThreadWithId(RTTHREAD hThreadSelf, PUVMCPU pUVCpu, VMCPUID idCpu) +{ + PUVM pUVM = pUVCpu->pUVM; + int rc; + RT_NOREF_PV(hThreadSelf); + + AssertReleaseMsg(VALID_PTR(pUVM) && pUVM->u32Magic == UVM_MAGIC, + ("Invalid arguments to the emulation thread!\n")); + + rc = RTTlsSet(pUVM->vm.s.idxTLS, pUVCpu); + AssertReleaseMsgRCReturn(rc, ("RTTlsSet %x failed with %Rrc\n", pUVM->vm.s.idxTLS, rc), rc); + + if ( pUVM->pVmm2UserMethods + && pUVM->pVmm2UserMethods->pfnNotifyEmtInit) + pUVM->pVmm2UserMethods->pfnNotifyEmtInit(pUVM->pVmm2UserMethods, pUVM, pUVCpu); + + /* + * The request loop. + */ + rc = VINF_SUCCESS; + Log(("vmR3EmulationThread: Emulation thread starting the days work... Thread=%#x pUVM=%p\n", hThreadSelf, pUVM)); + VMSTATE enmBefore = VMSTATE_CREATED; /* (only used for logging atm.) */ + ASMAtomicIncU32(&pUVM->vm.s.cActiveEmts); + for (;;) + { + /* + * During early init there is no pVM and/or pVCpu, so make a special path + * for that to keep things clearly separate. + */ + PVM pVM = pUVM->pVM; + PVMCPU pVCpu = pUVCpu->pVCpu; + if (!pVCpu || !pVM) + { + /* + * Check for termination first. + */ + if (pUVM->vm.s.fTerminateEMT) + { + rc = VINF_EM_TERMINATE; + break; + } + + /* + * Only the first VCPU may initialize the VM during early init + * and must therefore service all VMCPUID_ANY requests. + * See also VMR3Create + */ + if ( (pUVM->vm.s.pNormalReqs || pUVM->vm.s.pPriorityReqs) + && pUVCpu->idCpu == 0) + { + /* + * Service execute in any EMT request. + */ + rc = VMR3ReqProcessU(pUVM, VMCPUID_ANY, false /*fPriorityOnly*/); + Log(("vmR3EmulationThread: Req rc=%Rrc, VM state %s -> %s\n", rc, VMR3GetStateName(enmBefore), pUVM->pVM ? VMR3GetStateName(pUVM->pVM->enmVMState) : "CREATING")); + } + else if (pUVCpu->vm.s.pNormalReqs || pUVCpu->vm.s.pPriorityReqs) + { + /* + * Service execute in specific EMT request. + */ + rc = VMR3ReqProcessU(pUVM, pUVCpu->idCpu, false /*fPriorityOnly*/); + Log(("vmR3EmulationThread: Req (cpu=%u) rc=%Rrc, VM state %s -> %s\n", pUVCpu->idCpu, rc, VMR3GetStateName(enmBefore), pUVM->pVM ? VMR3GetStateName(pUVM->pVM->enmVMState) : "CREATING")); + } + else + { + /* + * Nothing important is pending, so wait for something. + */ + rc = VMR3WaitU(pUVCpu); + if (RT_FAILURE(rc)) + { + AssertLogRelMsgFailed(("VMR3WaitU failed with %Rrc\n", rc)); + break; + } + } + } + else + { + /* + * Pending requests which needs servicing? + * + * We check for state changes in addition to status codes when + * servicing requests. (Look after the ifs.) + */ + enmBefore = pVM->enmVMState; + if (pUVM->vm.s.fTerminateEMT) + { + rc = VINF_EM_TERMINATE; + break; + } + + if (VM_FF_IS_SET(pVM, VM_FF_EMT_RENDEZVOUS)) + { + rc = VMMR3EmtRendezvousFF(pVM, &pVM->aCpus[idCpu]); + Log(("vmR3EmulationThread: Rendezvous rc=%Rrc, VM state %s -> %s\n", rc, VMR3GetStateName(enmBefore), VMR3GetStateName(pVM->enmVMState))); + } + else if (pUVM->vm.s.pNormalReqs || pUVM->vm.s.pPriorityReqs) + { + /* + * Service execute in any EMT request. + */ + rc = VMR3ReqProcessU(pUVM, VMCPUID_ANY, false /*fPriorityOnly*/); + Log(("vmR3EmulationThread: Req rc=%Rrc, VM state %s -> %s\n", rc, VMR3GetStateName(enmBefore), VMR3GetStateName(pVM->enmVMState))); + } + else if (pUVCpu->vm.s.pNormalReqs || pUVCpu->vm.s.pPriorityReqs) + { + /* + * Service execute in specific EMT request. + */ + rc = VMR3ReqProcessU(pUVM, pUVCpu->idCpu, false /*fPriorityOnly*/); + Log(("vmR3EmulationThread: Req (cpu=%u) rc=%Rrc, VM state %s -> %s\n", pUVCpu->idCpu, rc, VMR3GetStateName(enmBefore), VMR3GetStateName(pVM->enmVMState))); + } + else if ( VM_FF_IS_SET(pVM, VM_FF_DBGF) + || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_DBGF)) + { + /* + * Service the debugger request. + */ + rc = DBGFR3VMMForcedAction(pVM, pVCpu); + Log(("vmR3EmulationThread: Dbg rc=%Rrc, VM state %s -> %s\n", rc, VMR3GetStateName(enmBefore), VMR3GetStateName(pVM->enmVMState))); + } + else if (VM_FF_TEST_AND_CLEAR(pVM, VM_FF_RESET)) + { + /* + * Service a delayed reset request. + */ + rc = VBOXSTRICTRC_VAL(VMR3ResetFF(pVM)); + VM_FF_CLEAR(pVM, VM_FF_RESET); + Log(("vmR3EmulationThread: Reset rc=%Rrc, VM state %s -> %s\n", rc, VMR3GetStateName(enmBefore), VMR3GetStateName(pVM->enmVMState))); + } + else + { + /* + * Nothing important is pending, so wait for something. + */ + rc = VMR3WaitU(pUVCpu); + if (RT_FAILURE(rc)) + { + AssertLogRelMsgFailed(("VMR3WaitU failed with %Rrc\n", rc)); + break; + } + } + + /* + * Check for termination requests, these have extremely high priority. + */ + if ( rc == VINF_EM_TERMINATE + || pUVM->vm.s.fTerminateEMT) + break; + } + + /* + * Some requests (both VMR3Req* and the DBGF) can potentially resume + * or start the VM, in that case we'll get a change in VM status + * indicating that we're now running. + */ + if (RT_SUCCESS(rc)) + { + pVM = pUVM->pVM; + if (pVM) + { + pVCpu = &pVM->aCpus[idCpu]; + if ( pVM->enmVMState == VMSTATE_RUNNING + && VMCPUSTATE_IS_STARTED(VMCPU_GET_STATE(pVCpu))) + { + rc = EMR3ExecuteVM(pVM, pVCpu); + Log(("vmR3EmulationThread: EMR3ExecuteVM() -> rc=%Rrc, enmVMState=%d\n", rc, pVM->enmVMState)); + } + } + } + + } /* forever */ + + + /* + * Decrement the active EMT count if we haven't done it yet in vmR3Destroy. + */ + if (!pUVCpu->vm.s.fBeenThruVmDestroy) + ASMAtomicDecU32(&pUVM->vm.s.cActiveEmts); + + + /* + * Cleanup and exit. + * EMT0 does the VM destruction after all other EMTs have deregistered and terminated. + */ + Log(("vmR3EmulationThread: Terminating emulation thread! Thread=%#x pUVM=%p rc=%Rrc enmBefore=%d enmVMState=%d\n", + hThreadSelf, pUVM, rc, enmBefore, pUVM->pVM ? pUVM->pVM->enmVMState : VMSTATE_TERMINATED)); + PVM pVM; + if ( idCpu == 0 + && (pVM = pUVM->pVM) != NULL) + { + /* Wait for any other EMTs to terminate before we destroy the VM (see vmR3DestroyVM). */ + for (VMCPUID iCpu = 1; iCpu < pUVM->cCpus; iCpu++) + { + RTTHREAD hThread; + ASMAtomicXchgHandle(&pUVM->aCpus[iCpu].vm.s.ThreadEMT, NIL_RTTHREAD, &hThread); + if (hThread != NIL_RTTHREAD) + { + int rc2 = RTThreadWait(hThread, 5 * RT_MS_1SEC, NULL); + AssertLogRelMsgRC(rc2, ("iCpu=%u rc=%Rrc\n", iCpu, rc2)); + if (RT_FAILURE(rc2)) + pUVM->aCpus[iCpu].vm.s.ThreadEMT = hThread; + } + } + + /* Switch to the terminated state, clearing the VM pointer and finally destroy the VM. */ + vmR3SetTerminated(pVM); + + pUVM->pVM = NULL; + for (VMCPUID iCpu = 0; iCpu < pUVM->cCpus; iCpu++) + { + pUVM->aCpus[iCpu].pVM = NULL; + pUVM->aCpus[iCpu].pVCpu = NULL; + } + + int rc2 = SUPR3CallVMMR0Ex(pVM->pVMR0, 0 /*idCpu*/, VMMR0_DO_GVMM_DESTROY_VM, 0, NULL); + AssertLogRelRC(rc2); + } + /* Deregister the EMT with VMMR0. */ + else if ( idCpu != 0 + && (pVM = pUVM->pVM) != NULL) + { + int rc2 = SUPR3CallVMMR0Ex(pVM->pVMR0, idCpu, VMMR0_DO_GVMM_DEREGISTER_VMCPU, 0, NULL); + AssertLogRelRC(rc2); + } + + if ( pUVM->pVmm2UserMethods + && pUVM->pVmm2UserMethods->pfnNotifyEmtTerm) + pUVM->pVmm2UserMethods->pfnNotifyEmtTerm(pUVM->pVmm2UserMethods, pUVM, pUVCpu); + + pUVCpu->vm.s.NativeThreadEMT = NIL_RTNATIVETHREAD; + Log(("vmR3EmulationThread: EMT is terminated.\n")); + return rc; +} + + +/** + * Gets the name of a halt method. + * + * @returns Pointer to a read only string. + * @param enmMethod The method. + */ +static const char *vmR3GetHaltMethodName(VMHALTMETHOD enmMethod) +{ + switch (enmMethod) + { + case VMHALTMETHOD_BOOTSTRAP: return "bootstrap"; + case VMHALTMETHOD_DEFAULT: return "default"; + case VMHALTMETHOD_OLD: return "old"; + case VMHALTMETHOD_1: return "method1"; + //case VMHALTMETHOD_2: return "method2"; + case VMHALTMETHOD_GLOBAL_1: return "global1"; + default: return "unknown"; + } +} + + +/** + * Signal a fatal wait error. + * + * @returns Fatal error code to be propagated up the call stack. + * @param pUVCpu The user mode per CPU structure of the calling + * EMT. + * @param pszFmt The error format with a single %Rrc in it. + * @param rcFmt The status code to format. + */ +static int vmR3FatalWaitError(PUVMCPU pUVCpu, const char *pszFmt, int rcFmt) +{ + /** @todo This is wrong ... raise a fatal error / guru meditation + * instead. */ + AssertLogRelMsgFailed((pszFmt, rcFmt)); + ASMAtomicUoWriteBool(&pUVCpu->pUVM->vm.s.fTerminateEMT, true); + if (pUVCpu->pVM) + VM_FF_SET(pUVCpu->pVM, VM_FF_CHECK_VM_STATE); + return VERR_VM_FATAL_WAIT_ERROR; +} + + +/** + * The old halt loop. + */ +static DECLCALLBACK(int) vmR3HaltOldDoHalt(PUVMCPU pUVCpu, const uint32_t fMask, uint64_t /* u64Now*/) +{ + /* + * Halt loop. + */ + PVM pVM = pUVCpu->pVM; + PVMCPU pVCpu = pUVCpu->pVCpu; + + int rc = VINF_SUCCESS; + ASMAtomicWriteBool(&pUVCpu->vm.s.fWait, true); + //unsigned cLoops = 0; + for (;;) + { + /* + * Work the timers and check if we can exit. + * The poll call gives us the ticks left to the next event in + * addition to perhaps set an FF. + */ + uint64_t const u64StartTimers = RTTimeNanoTS(); + TMR3TimerQueuesDo(pVM); + uint64_t const cNsElapsedTimers = RTTimeNanoTS() - u64StartTimers; + STAM_REL_PROFILE_ADD_PERIOD(&pUVCpu->vm.s.StatHaltTimers, cNsElapsedTimers); + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_EXTERNAL_HALTED_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, fMask)) + break; + uint64_t u64NanoTS; + TMTimerPollGIP(pVM, pVCpu, &u64NanoTS); + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_EXTERNAL_HALTED_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, fMask)) + break; + + /* + * Wait for a while. Someone will wake us up or interrupt the call if + * anything needs our attention. + */ + if (u64NanoTS < 50000) + { + //RTLogPrintf("u64NanoTS=%RI64 cLoops=%d spin\n", u64NanoTS, cLoops++); + /* spin */; + } + else + { + VMMR3YieldStop(pVM); + //uint64_t u64Start = RTTimeNanoTS(); + if (u64NanoTS < 870000) /* this is a bit speculative... works fine on linux. */ + { + //RTLogPrintf("u64NanoTS=%RI64 cLoops=%d yield", u64NanoTS, cLoops++); + uint64_t const u64StartSchedYield = RTTimeNanoTS(); + RTThreadYield(); /* this is the best we can do here */ + uint64_t const cNsElapsedSchedYield = RTTimeNanoTS() - u64StartSchedYield; + STAM_REL_PROFILE_ADD_PERIOD(&pUVCpu->vm.s.StatHaltYield, cNsElapsedSchedYield); + } + else if (u64NanoTS < 2000000) + { + //RTLogPrintf("u64NanoTS=%RI64 cLoops=%d sleep 1ms", u64NanoTS, cLoops++); + uint64_t const u64StartSchedHalt = RTTimeNanoTS(); + rc = RTSemEventWait(pUVCpu->vm.s.EventSemWait, 1); + uint64_t const cNsElapsedSchedHalt = RTTimeNanoTS() - u64StartSchedHalt; + STAM_REL_PROFILE_ADD_PERIOD(&pUVCpu->vm.s.StatHaltBlock, cNsElapsedSchedHalt); + } + else + { + //RTLogPrintf("u64NanoTS=%RI64 cLoops=%d sleep %dms", u64NanoTS, cLoops++, (uint32_t)RT_MIN((u64NanoTS - 500000) / 1000000, 15)); + uint64_t const u64StartSchedHalt = RTTimeNanoTS(); + rc = RTSemEventWait(pUVCpu->vm.s.EventSemWait, RT_MIN((u64NanoTS - 1000000) / 1000000, 15)); + uint64_t const cNsElapsedSchedHalt = RTTimeNanoTS() - u64StartSchedHalt; + STAM_REL_PROFILE_ADD_PERIOD(&pUVCpu->vm.s.StatHaltBlock, cNsElapsedSchedHalt); + } + //uint64_t u64Slept = RTTimeNanoTS() - u64Start; + //RTLogPrintf(" -> rc=%Rrc in %RU64 ns / %RI64 ns delta\n", rc, u64Slept, u64NanoTS - u64Slept); + } + if (rc == VERR_TIMEOUT) + rc = VINF_SUCCESS; + else if (RT_FAILURE(rc)) + { + rc = vmR3FatalWaitError(pUVCpu, "RTSemEventWait->%Rrc\n", rc); + break; + } + } + + ASMAtomicUoWriteBool(&pUVCpu->vm.s.fWait, false); + return rc; +} + + +/** + * Initialize the configuration of halt method 1 & 2. + * + * @return VBox status code. Failure on invalid CFGM data. + * @param pUVM The user mode VM structure. + */ +static int vmR3HaltMethod12ReadConfigU(PUVM pUVM) +{ + /* + * The defaults. + */ +#if 1 /* DEBUGGING STUFF - REMOVE LATER */ + pUVM->vm.s.Halt.Method12.u32LagBlockIntervalDivisorCfg = 4; + pUVM->vm.s.Halt.Method12.u32MinBlockIntervalCfg = 2*1000000; + pUVM->vm.s.Halt.Method12.u32MaxBlockIntervalCfg = 75*1000000; + pUVM->vm.s.Halt.Method12.u32StartSpinningCfg = 30*1000000; + pUVM->vm.s.Halt.Method12.u32StopSpinningCfg = 20*1000000; +#else + pUVM->vm.s.Halt.Method12.u32LagBlockIntervalDivisorCfg = 4; + pUVM->vm.s.Halt.Method12.u32MinBlockIntervalCfg = 5*1000000; + pUVM->vm.s.Halt.Method12.u32MaxBlockIntervalCfg = 200*1000000; + pUVM->vm.s.Halt.Method12.u32StartSpinningCfg = 20*1000000; + pUVM->vm.s.Halt.Method12.u32StopSpinningCfg = 2*1000000; +#endif + + /* + * Query overrides. + * + * I don't have time to bother with niceties such as invalid value checks + * here right now. sorry. + */ + PCFGMNODE pCfg = CFGMR3GetChild(CFGMR3GetRoot(pUVM->pVM), "/VMM/HaltedMethod1"); + if (pCfg) + { + uint32_t u32; + if (RT_SUCCESS(CFGMR3QueryU32(pCfg, "LagBlockIntervalDivisor", &u32))) + pUVM->vm.s.Halt.Method12.u32LagBlockIntervalDivisorCfg = u32; + if (RT_SUCCESS(CFGMR3QueryU32(pCfg, "MinBlockInterval", &u32))) + pUVM->vm.s.Halt.Method12.u32MinBlockIntervalCfg = u32; + if (RT_SUCCESS(CFGMR3QueryU32(pCfg, "MaxBlockInterval", &u32))) + pUVM->vm.s.Halt.Method12.u32MaxBlockIntervalCfg = u32; + if (RT_SUCCESS(CFGMR3QueryU32(pCfg, "StartSpinning", &u32))) + pUVM->vm.s.Halt.Method12.u32StartSpinningCfg = u32; + if (RT_SUCCESS(CFGMR3QueryU32(pCfg, "StopSpinning", &u32))) + pUVM->vm.s.Halt.Method12.u32StopSpinningCfg = u32; + LogRel(("VMEmt: HaltedMethod1 config: %d/%d/%d/%d/%d\n", + pUVM->vm.s.Halt.Method12.u32LagBlockIntervalDivisorCfg, + pUVM->vm.s.Halt.Method12.u32MinBlockIntervalCfg, + pUVM->vm.s.Halt.Method12.u32MaxBlockIntervalCfg, + pUVM->vm.s.Halt.Method12.u32StartSpinningCfg, + pUVM->vm.s.Halt.Method12.u32StopSpinningCfg)); + } + + return VINF_SUCCESS; +} + + +/** + * Initialize halt method 1. + * + * @return VBox status code. + * @param pUVM Pointer to the user mode VM structure. + */ +static DECLCALLBACK(int) vmR3HaltMethod1Init(PUVM pUVM) +{ + return vmR3HaltMethod12ReadConfigU(pUVM); +} + + +/** + * Method 1 - Block whenever possible, and when lagging behind + * switch to spinning for 10-30ms with occasional blocking until + * the lag has been eliminated. + */ +static DECLCALLBACK(int) vmR3HaltMethod1Halt(PUVMCPU pUVCpu, const uint32_t fMask, uint64_t u64Now) +{ + PUVM pUVM = pUVCpu->pUVM; + PVMCPU pVCpu = pUVCpu->pVCpu; + PVM pVM = pUVCpu->pVM; + + /* + * To simplify things, we decide up-front whether we should switch to spinning or + * not. This makes some ASSUMPTIONS about the cause of the spinning (PIT/RTC/PCNet) + * and that it will generate interrupts or other events that will cause us to exit + * the halt loop. + */ + bool fBlockOnce = false; + bool fSpinning = false; + uint32_t u32CatchUpPct = TMVirtualSyncGetCatchUpPct(pVM); + if (u32CatchUpPct /* non-zero if catching up */) + { + if (pUVCpu->vm.s.Halt.Method12.u64StartSpinTS) + { + fSpinning = TMVirtualSyncGetLag(pVM) >= pUVM->vm.s.Halt.Method12.u32StopSpinningCfg; + if (fSpinning) + { + uint64_t u64Lag = TMVirtualSyncGetLag(pVM); + fBlockOnce = u64Now - pUVCpu->vm.s.Halt.Method12.u64LastBlockTS + > RT_MAX(pUVM->vm.s.Halt.Method12.u32MinBlockIntervalCfg, + RT_MIN(u64Lag / pUVM->vm.s.Halt.Method12.u32LagBlockIntervalDivisorCfg, + pUVM->vm.s.Halt.Method12.u32MaxBlockIntervalCfg)); + } + else + { + //RTLogRelPrintf("Stopped spinning (%u ms)\n", (u64Now - pUVCpu->vm.s.Halt.Method12.u64StartSpinTS) / 1000000); + pUVCpu->vm.s.Halt.Method12.u64StartSpinTS = 0; + } + } + else + { + fSpinning = TMVirtualSyncGetLag(pVM) >= pUVM->vm.s.Halt.Method12.u32StartSpinningCfg; + if (fSpinning) + pUVCpu->vm.s.Halt.Method12.u64StartSpinTS = u64Now; + } + } + else if (pUVCpu->vm.s.Halt.Method12.u64StartSpinTS) + { + //RTLogRelPrintf("Stopped spinning (%u ms)\n", (u64Now - pUVCpu->vm.s.Halt.Method12.u64StartSpinTS) / 1000000); + pUVCpu->vm.s.Halt.Method12.u64StartSpinTS = 0; + } + + /* + * Halt loop. + */ + int rc = VINF_SUCCESS; + ASMAtomicWriteBool(&pUVCpu->vm.s.fWait, true); + unsigned cLoops = 0; + for (;; cLoops++) + { + /* + * Work the timers and check if we can exit. + */ + uint64_t const u64StartTimers = RTTimeNanoTS(); + TMR3TimerQueuesDo(pVM); + uint64_t const cNsElapsedTimers = RTTimeNanoTS() - u64StartTimers; + STAM_REL_PROFILE_ADD_PERIOD(&pUVCpu->vm.s.StatHaltTimers, cNsElapsedTimers); + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_EXTERNAL_HALTED_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, fMask)) + break; + + /* + * Estimate time left to the next event. + */ + uint64_t u64NanoTS; + TMTimerPollGIP(pVM, pVCpu, &u64NanoTS); + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_EXTERNAL_HALTED_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, fMask)) + break; + + /* + * Block if we're not spinning and the interval isn't all that small. + */ + if ( ( !fSpinning + || fBlockOnce) +#if 1 /* DEBUGGING STUFF - REMOVE LATER */ + && u64NanoTS >= 100000) /* 0.100 ms */ +#else + && u64NanoTS >= 250000) /* 0.250 ms */ +#endif + { + const uint64_t Start = pUVCpu->vm.s.Halt.Method12.u64LastBlockTS = RTTimeNanoTS(); + VMMR3YieldStop(pVM); + + uint32_t cMilliSecs = RT_MIN(u64NanoTS / 1000000, 15); + if (cMilliSecs <= pUVCpu->vm.s.Halt.Method12.cNSBlockedTooLongAvg) + cMilliSecs = 1; + else + cMilliSecs -= pUVCpu->vm.s.Halt.Method12.cNSBlockedTooLongAvg; + + //RTLogRelPrintf("u64NanoTS=%RI64 cLoops=%3d sleep %02dms (%7RU64) ", u64NanoTS, cLoops, cMilliSecs, u64NanoTS); + uint64_t const u64StartSchedHalt = RTTimeNanoTS(); + rc = RTSemEventWait(pUVCpu->vm.s.EventSemWait, cMilliSecs); + uint64_t const cNsElapsedSchedHalt = RTTimeNanoTS() - u64StartSchedHalt; + STAM_REL_PROFILE_ADD_PERIOD(&pUVCpu->vm.s.StatHaltBlock, cNsElapsedSchedHalt); + + if (rc == VERR_TIMEOUT) + rc = VINF_SUCCESS; + else if (RT_FAILURE(rc)) + { + rc = vmR3FatalWaitError(pUVCpu, "RTSemEventWait->%Rrc\n", rc); + break; + } + + /* + * Calc the statistics. + * Update averages every 16th time, and flush parts of the history every 64th time. + */ + const uint64_t Elapsed = RTTimeNanoTS() - Start; + pUVCpu->vm.s.Halt.Method12.cNSBlocked += Elapsed; + if (Elapsed > u64NanoTS) + pUVCpu->vm.s.Halt.Method12.cNSBlockedTooLong += Elapsed - u64NanoTS; + pUVCpu->vm.s.Halt.Method12.cBlocks++; + if (!(pUVCpu->vm.s.Halt.Method12.cBlocks & 0xf)) + { + pUVCpu->vm.s.Halt.Method12.cNSBlockedTooLongAvg = pUVCpu->vm.s.Halt.Method12.cNSBlockedTooLong / pUVCpu->vm.s.Halt.Method12.cBlocks; + if (!(pUVCpu->vm.s.Halt.Method12.cBlocks & 0x3f)) + { + pUVCpu->vm.s.Halt.Method12.cNSBlockedTooLong = pUVCpu->vm.s.Halt.Method12.cNSBlockedTooLongAvg * 0x40; + pUVCpu->vm.s.Halt.Method12.cBlocks = 0x40; + } + } + //RTLogRelPrintf(" -> %7RU64 ns / %7RI64 ns delta%s\n", Elapsed, Elapsed - u64NanoTS, fBlockOnce ? " (block once)" : ""); + + /* + * Clear the block once flag if we actually blocked. + */ + if ( fBlockOnce + && Elapsed > 100000 /* 0.1 ms */) + fBlockOnce = false; + } + } + //if (fSpinning) RTLogRelPrintf("spun for %RU64 ns %u loops; lag=%RU64 pct=%d\n", RTTimeNanoTS() - u64Now, cLoops, TMVirtualSyncGetLag(pVM), u32CatchUpPct); + + ASMAtomicUoWriteBool(&pUVCpu->vm.s.fWait, false); + return rc; +} + + +/** + * Initialize the global 1 halt method. + * + * @return VBox status code. + * @param pUVM Pointer to the user mode VM structure. + */ +static DECLCALLBACK(int) vmR3HaltGlobal1Init(PUVM pUVM) +{ + /* + * The defaults. + */ + uint32_t cNsResolution = SUPSemEventMultiGetResolution(pUVM->vm.s.pSession); + if (cNsResolution > 5*RT_NS_100US) + pUVM->vm.s.Halt.Global1.cNsSpinBlockThresholdCfg = 50000; + else if (cNsResolution > RT_NS_100US) + pUVM->vm.s.Halt.Global1.cNsSpinBlockThresholdCfg = cNsResolution / 4; + else + pUVM->vm.s.Halt.Global1.cNsSpinBlockThresholdCfg = 2000; + + /* + * Query overrides. + * + * I don't have time to bother with niceties such as invalid value checks + * here right now. sorry. + */ + PCFGMNODE pCfg = CFGMR3GetChild(CFGMR3GetRoot(pUVM->pVM), "/VMM/HaltedGlobal1"); + if (pCfg) + { + uint32_t u32; + if (RT_SUCCESS(CFGMR3QueryU32(pCfg, "SpinBlockThreshold", &u32))) + pUVM->vm.s.Halt.Global1.cNsSpinBlockThresholdCfg = u32; + } + LogRel(("VMEmt: HaltedGlobal1 config: cNsSpinBlockThresholdCfg=%u\n", + pUVM->vm.s.Halt.Global1.cNsSpinBlockThresholdCfg)); + return VINF_SUCCESS; +} + + +/** + * The global 1 halt method - Block in GMM (ring-0) and let it + * try take care of the global scheduling of EMT threads. + */ +static DECLCALLBACK(int) vmR3HaltGlobal1Halt(PUVMCPU pUVCpu, const uint32_t fMask, uint64_t u64Now) +{ + PUVM pUVM = pUVCpu->pUVM; + PVMCPU pVCpu = pUVCpu->pVCpu; + PVM pVM = pUVCpu->pVM; + Assert(VMMGetCpu(pVM) == pVCpu); + NOREF(u64Now); + + /* + * Halt loop. + */ + //uint64_t u64NowLog, u64Start; + //u64Start = u64NowLog = RTTimeNanoTS(); + int rc = VINF_SUCCESS; + ASMAtomicWriteBool(&pUVCpu->vm.s.fWait, true); + unsigned cLoops = 0; + for (;; cLoops++) + { + /* + * Work the timers and check if we can exit. + */ + uint64_t const u64StartTimers = RTTimeNanoTS(); + TMR3TimerQueuesDo(pVM); + uint64_t const cNsElapsedTimers = RTTimeNanoTS() - u64StartTimers; + STAM_REL_PROFILE_ADD_PERIOD(&pUVCpu->vm.s.StatHaltTimers, cNsElapsedTimers); + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_EXTERNAL_HALTED_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, fMask)) + break; + + /* + * Estimate time left to the next event. + */ + //u64NowLog = RTTimeNanoTS(); + uint64_t u64Delta; + uint64_t u64GipTime = TMTimerPollGIP(pVM, pVCpu, &u64Delta); + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_EXTERNAL_HALTED_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, fMask)) + break; + + /* + * Block if we're not spinning and the interval isn't all that small. + */ + if (u64Delta >= pUVM->vm.s.Halt.Global1.cNsSpinBlockThresholdCfg) + { + VMMR3YieldStop(pVM); + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_EXTERNAL_HALTED_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, fMask)) + break; + + //RTLogPrintf("loop=%-3d u64GipTime=%'llu / %'llu now=%'llu / %'llu\n", cLoops, u64GipTime, u64Delta, u64NowLog, u64GipTime - u64NowLog); + uint64_t const u64StartSchedHalt = RTTimeNanoTS(); + rc = SUPR3CallVMMR0Ex(pVM->pVMR0, pVCpu->idCpu, VMMR0_DO_GVMM_SCHED_HALT, u64GipTime, NULL); + uint64_t const u64EndSchedHalt = RTTimeNanoTS(); + uint64_t const cNsElapsedSchedHalt = u64EndSchedHalt - u64StartSchedHalt; + STAM_REL_PROFILE_ADD_PERIOD(&pUVCpu->vm.s.StatHaltBlock, cNsElapsedSchedHalt); + + if (rc == VERR_INTERRUPTED) + rc = VINF_SUCCESS; + else if (RT_FAILURE(rc)) + { + rc = vmR3FatalWaitError(pUVCpu, "vmR3HaltGlobal1Halt: VMMR0_DO_GVMM_SCHED_HALT->%Rrc\n", rc); + break; + } + else + { + int64_t const cNsOverslept = u64EndSchedHalt - u64GipTime; + if (cNsOverslept > 50000) + STAM_REL_PROFILE_ADD_PERIOD(&pUVCpu->vm.s.StatHaltBlockOverslept, cNsOverslept); + else if (cNsOverslept < -50000) + STAM_REL_PROFILE_ADD_PERIOD(&pUVCpu->vm.s.StatHaltBlockInsomnia, cNsElapsedSchedHalt); + else + STAM_REL_PROFILE_ADD_PERIOD(&pUVCpu->vm.s.StatHaltBlockOnTime, cNsElapsedSchedHalt); + } + } + /* + * When spinning call upon the GVMM and do some wakups once + * in a while, it's not like we're actually busy or anything. + */ + else if (!(cLoops & 0x1fff)) + { + uint64_t const u64StartSchedYield = RTTimeNanoTS(); + rc = SUPR3CallVMMR0Ex(pVM->pVMR0, pVCpu->idCpu, VMMR0_DO_GVMM_SCHED_POLL, false /* don't yield */, NULL); + uint64_t const cNsElapsedSchedYield = RTTimeNanoTS() - u64StartSchedYield; + STAM_REL_PROFILE_ADD_PERIOD(&pUVCpu->vm.s.StatHaltYield, cNsElapsedSchedYield); + } + } + //RTLogPrintf("*** %u loops %'llu; lag=%RU64\n", cLoops, u64NowLog - u64Start, TMVirtualSyncGetLag(pVM)); + + ASMAtomicUoWriteBool(&pUVCpu->vm.s.fWait, false); + return rc; +} + + +/** + * The global 1 halt method - VMR3Wait() worker. + * + * @returns VBox status code. + * @param pUVCpu Pointer to the user mode VMCPU structure. + */ +static DECLCALLBACK(int) vmR3HaltGlobal1Wait(PUVMCPU pUVCpu) +{ + ASMAtomicWriteBool(&pUVCpu->vm.s.fWait, true); + + PVM pVM = pUVCpu->pUVM->pVM; + PVMCPU pVCpu = VMMGetCpu(pVM); + Assert(pVCpu->idCpu == pUVCpu->idCpu); + + int rc = VINF_SUCCESS; + for (;;) + { + /* + * Check Relevant FFs. + */ + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_EXTERNAL_SUSPENDED_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_EXTERNAL_SUSPENDED_MASK)) + break; + + /* + * Wait for a while. Someone will wake us up or interrupt the call if + * anything needs our attention. + */ + rc = SUPR3CallVMMR0Ex(pVM->pVMR0, pVCpu->idCpu, VMMR0_DO_GVMM_SCHED_HALT, RTTimeNanoTS() + 1000000000 /* +1s */, NULL); + if (rc == VERR_INTERRUPTED) + rc = VINF_SUCCESS; + else if (RT_FAILURE(rc)) + { + rc = vmR3FatalWaitError(pUVCpu, "vmR3HaltGlobal1Wait: VMMR0_DO_GVMM_SCHED_HALT->%Rrc\n", rc); + break; + } + } + + ASMAtomicUoWriteBool(&pUVCpu->vm.s.fWait, false); + return rc; +} + + +/** + * The global 1 halt method - VMR3NotifyFF() worker. + * + * @param pUVCpu Pointer to the user mode VMCPU structure. + * @param fFlags Notification flags, VMNOTIFYFF_FLAGS_*. + */ +static DECLCALLBACK(void) vmR3HaltGlobal1NotifyCpuFF(PUVMCPU pUVCpu, uint32_t fFlags) +{ + /* + * With ring-0 halting, the fWait flag isn't set, so we have to check the + * CPU state to figure out whether to do a wakeup call. + */ + PVMCPU pVCpu = pUVCpu->pVCpu; + if (pVCpu) + { + VMCPUSTATE enmState = VMCPU_GET_STATE(pVCpu); + if (enmState == VMCPUSTATE_STARTED_HALTED || pUVCpu->vm.s.fWait) + { + int rc = SUPR3CallVMMR0Ex(pUVCpu->pVM->pVMR0, pUVCpu->idCpu, VMMR0_DO_GVMM_SCHED_WAKE_UP, 0, NULL); + AssertRC(rc); + + } + else if ( (fFlags & VMNOTIFYFF_FLAGS_POKE) + || !(fFlags & VMNOTIFYFF_FLAGS_DONE_REM)) + { + if (enmState == VMCPUSTATE_STARTED_EXEC) + { + if (fFlags & VMNOTIFYFF_FLAGS_POKE) + { + int rc = SUPR3CallVMMR0Ex(pUVCpu->pVM->pVMR0, pUVCpu->idCpu, VMMR0_DO_GVMM_SCHED_POKE, 0, NULL); + AssertRC(rc); + } + } + else if ( enmState == VMCPUSTATE_STARTED_EXEC_NEM + || enmState == VMCPUSTATE_STARTED_EXEC_NEM_WAIT) + NEMR3NotifyFF(pUVCpu->pVM, pVCpu, fFlags); +#ifdef VBOX_WITH_REM + else if (enmState == VMCPUSTATE_STARTED_EXEC_REM) + { + if (!(fFlags & VMNOTIFYFF_FLAGS_DONE_REM)) + REMR3NotifyFF(pUVCpu->pVM); + } +#endif + } + } + /* This probably makes little sense: */ + else if (pUVCpu->vm.s.fWait) + { + int rc = SUPR3CallVMMR0Ex(pUVCpu->pVM->pVMR0, pUVCpu->idCpu, VMMR0_DO_GVMM_SCHED_WAKE_UP, 0, NULL); + AssertRC(rc); + } +} + + +/** + * Bootstrap VMR3Wait() worker. + * + * @returns VBox status code. + * @param pUVCpu Pointer to the user mode VMCPU structure. + */ +static DECLCALLBACK(int) vmR3BootstrapWait(PUVMCPU pUVCpu) +{ + PUVM pUVM = pUVCpu->pUVM; + + ASMAtomicWriteBool(&pUVCpu->vm.s.fWait, true); + + int rc = VINF_SUCCESS; + for (;;) + { + /* + * Check Relevant FFs. + */ + if (pUVM->vm.s.pNormalReqs || pUVM->vm.s.pPriorityReqs) /* global requests pending? */ + break; + if (pUVCpu->vm.s.pNormalReqs || pUVCpu->vm.s.pPriorityReqs) /* local requests pending? */ + break; + + if ( pUVCpu->pVM + && ( VM_FF_IS_ANY_SET(pUVCpu->pVM, VM_FF_EXTERNAL_SUSPENDED_MASK) + || VMCPU_FF_IS_ANY_SET(VMMGetCpu(pUVCpu->pVM), VMCPU_FF_EXTERNAL_SUSPENDED_MASK) + ) + ) + break; + if (pUVM->vm.s.fTerminateEMT) + break; + + /* + * Wait for a while. Someone will wake us up or interrupt the call if + * anything needs our attention. + */ + rc = RTSemEventWait(pUVCpu->vm.s.EventSemWait, 1000); + if (rc == VERR_TIMEOUT) + rc = VINF_SUCCESS; + else if (RT_FAILURE(rc)) + { + rc = vmR3FatalWaitError(pUVCpu, "RTSemEventWait->%Rrc\n", rc); + break; + } + } + + ASMAtomicUoWriteBool(&pUVCpu->vm.s.fWait, false); + return rc; +} + + +/** + * Bootstrap VMR3NotifyFF() worker. + * + * @param pUVCpu Pointer to the user mode VMCPU structure. + * @param fFlags Notification flags, VMNOTIFYFF_FLAGS_*. + */ +static DECLCALLBACK(void) vmR3BootstrapNotifyCpuFF(PUVMCPU pUVCpu, uint32_t fFlags) +{ + if (pUVCpu->vm.s.fWait) + { + int rc = RTSemEventSignal(pUVCpu->vm.s.EventSemWait); + AssertRC(rc); + } + NOREF(fFlags); +} + + +/** + * Default VMR3Wait() worker. + * + * @returns VBox status code. + * @param pUVCpu Pointer to the user mode VMCPU structure. + */ +static DECLCALLBACK(int) vmR3DefaultWait(PUVMCPU pUVCpu) +{ + ASMAtomicWriteBool(&pUVCpu->vm.s.fWait, true); + + PVM pVM = pUVCpu->pVM; + PVMCPU pVCpu = pUVCpu->pVCpu; + int rc = VINF_SUCCESS; + for (;;) + { + /* + * Check Relevant FFs. + */ + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_EXTERNAL_SUSPENDED_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_EXTERNAL_SUSPENDED_MASK)) + break; + + /* + * Wait for a while. Someone will wake us up or interrupt the call if + * anything needs our attention. + */ + rc = RTSemEventWait(pUVCpu->vm.s.EventSemWait, 1000); + if (rc == VERR_TIMEOUT) + rc = VINF_SUCCESS; + else if (RT_FAILURE(rc)) + { + rc = vmR3FatalWaitError(pUVCpu, "RTSemEventWait->%Rrc", rc); + break; + } + } + + ASMAtomicUoWriteBool(&pUVCpu->vm.s.fWait, false); + return rc; +} + + +/** + * Default VMR3NotifyFF() worker. + * + * @param pUVCpu Pointer to the user mode VMCPU structure. + * @param fFlags Notification flags, VMNOTIFYFF_FLAGS_*. + */ +static DECLCALLBACK(void) vmR3DefaultNotifyCpuFF(PUVMCPU pUVCpu, uint32_t fFlags) +{ + if (pUVCpu->vm.s.fWait) + { + int rc = RTSemEventSignal(pUVCpu->vm.s.EventSemWait); + AssertRC(rc); + } + else + { + PVMCPU pVCpu = pUVCpu->pVCpu; + if (pVCpu) + { + VMCPUSTATE enmState = pVCpu->enmState; + if ( enmState == VMCPUSTATE_STARTED_EXEC_NEM + || enmState == VMCPUSTATE_STARTED_EXEC_NEM_WAIT) + NEMR3NotifyFF(pUVCpu->pVM, pVCpu, fFlags); +#ifdef VBOX_WITH_REM + else if ( !(fFlags & VMNOTIFYFF_FLAGS_DONE_REM) + && enmState == VMCPUSTATE_STARTED_EXEC_REM) + REMR3NotifyFF(pUVCpu->pVM); +#endif + } + } +} + + +/** + * Array with halt method descriptors. + * VMINT::iHaltMethod contains an index into this array. + */ +static const struct VMHALTMETHODDESC +{ + /** The halt method ID. */ + VMHALTMETHOD enmHaltMethod; + /** Set if the method support halting directly in ring-0. */ + bool fMayHaltInRing0; + /** The init function for loading config and initialize variables. */ + DECLR3CALLBACKMEMBER(int, pfnInit,(PUVM pUVM)); + /** The term function. */ + DECLR3CALLBACKMEMBER(void, pfnTerm,(PUVM pUVM)); + /** The VMR3WaitHaltedU function. */ + DECLR3CALLBACKMEMBER(int, pfnHalt,(PUVMCPU pUVCpu, const uint32_t fMask, uint64_t u64Now)); + /** The VMR3WaitU function. */ + DECLR3CALLBACKMEMBER(int, pfnWait,(PUVMCPU pUVCpu)); + /** The VMR3NotifyCpuFFU function. */ + DECLR3CALLBACKMEMBER(void, pfnNotifyCpuFF,(PUVMCPU pUVCpu, uint32_t fFlags)); + /** The VMR3NotifyGlobalFFU function. */ + DECLR3CALLBACKMEMBER(void, pfnNotifyGlobalFF,(PUVM pUVM, uint32_t fFlags)); +} g_aHaltMethods[] = +{ + { VMHALTMETHOD_BOOTSTRAP, false, NULL, NULL, NULL, vmR3BootstrapWait, vmR3BootstrapNotifyCpuFF, NULL }, + { VMHALTMETHOD_OLD, false, NULL, NULL, vmR3HaltOldDoHalt, vmR3DefaultWait, vmR3DefaultNotifyCpuFF, NULL }, + { VMHALTMETHOD_1, false, vmR3HaltMethod1Init, NULL, vmR3HaltMethod1Halt, vmR3DefaultWait, vmR3DefaultNotifyCpuFF, NULL }, + { VMHALTMETHOD_GLOBAL_1, true, vmR3HaltGlobal1Init, NULL, vmR3HaltGlobal1Halt, vmR3HaltGlobal1Wait, vmR3HaltGlobal1NotifyCpuFF, NULL }, +}; + + +/** + * Notify the emulation thread (EMT) about pending Forced Action (FF). + * + * This function is called by thread other than EMT to make + * sure EMT wakes up and promptly service an FF request. + * + * @param pUVM Pointer to the user mode VM structure. + * @param fFlags Notification flags, VMNOTIFYFF_FLAGS_*. + * @internal + */ +VMMR3_INT_DECL(void) VMR3NotifyGlobalFFU(PUVM pUVM, uint32_t fFlags) +{ + LogFlow(("VMR3NotifyGlobalFFU:\n")); + uint32_t iHaltMethod = pUVM->vm.s.iHaltMethod; + + if (g_aHaltMethods[iHaltMethod].pfnNotifyGlobalFF) /** @todo make mandatory. */ + g_aHaltMethods[iHaltMethod].pfnNotifyGlobalFF(pUVM, fFlags); + else + for (VMCPUID iCpu = 0; iCpu < pUVM->cCpus; iCpu++) + g_aHaltMethods[iHaltMethod].pfnNotifyCpuFF(&pUVM->aCpus[iCpu], fFlags); +} + + +/** + * Notify the emulation thread (EMT) about pending Forced Action (FF). + * + * This function is called by thread other than EMT to make + * sure EMT wakes up and promptly service an FF request. + * + * @param pUVCpu Pointer to the user mode per CPU VM structure. + * @param fFlags Notification flags, VMNOTIFYFF_FLAGS_*. + * @internal + */ +VMMR3_INT_DECL(void) VMR3NotifyCpuFFU(PUVMCPU pUVCpu, uint32_t fFlags) +{ + PUVM pUVM = pUVCpu->pUVM; + + LogFlow(("VMR3NotifyCpuFFU:\n")); + g_aHaltMethods[pUVM->vm.s.iHaltMethod].pfnNotifyCpuFF(pUVCpu, fFlags); +} + + +/** + * Halted VM Wait. + * Any external event will unblock the thread. + * + * @returns VINF_SUCCESS unless a fatal error occurred. In the latter + * case an appropriate status code is returned. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param fIgnoreInterrupts If set the VM_FF_INTERRUPT flags is ignored. + * @thread The emulation thread. + * @remarks Made visible for implementing vmsvga sync register. + * @internal + */ +VMMR3_INT_DECL(int) VMR3WaitHalted(PVM pVM, PVMCPU pVCpu, bool fIgnoreInterrupts) +{ + LogFlow(("VMR3WaitHalted: fIgnoreInterrupts=%d\n", fIgnoreInterrupts)); + + /* + * Check Relevant FFs. + */ + const uint32_t fMask = !fIgnoreInterrupts + ? VMCPU_FF_EXTERNAL_HALTED_MASK + : VMCPU_FF_EXTERNAL_HALTED_MASK & ~(VMCPU_FF_UPDATE_APIC | VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC); + if ( VM_FF_IS_ANY_SET(pVM, VM_FF_EXTERNAL_HALTED_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, fMask)) + { + LogFlow(("VMR3WaitHalted: returns VINF_SUCCESS (FF %#x FFCPU %#RX64)\n", pVM->fGlobalForcedActions, (uint64_t)pVCpu->fLocalForcedActions)); + return VINF_SUCCESS; + } + + /* + * The yielder is suspended while we're halting, while TM might have clock(s) running + * only at certain times and need to be notified.. + */ + if (pVCpu->idCpu == 0) + VMMR3YieldSuspend(pVM); + TMNotifyStartOfHalt(pVCpu); + + /* + * Record halt averages for the last second. + */ + PUVMCPU pUVCpu = pVCpu->pUVCpu; + uint64_t u64Now = RTTimeNanoTS(); + int64_t off = u64Now - pUVCpu->vm.s.u64HaltsStartTS; + if (off > 1000000000) + { + if (off > _4G || !pUVCpu->vm.s.cHalts) + { + pUVCpu->vm.s.HaltInterval = 1000000000 /* 1 sec */; + pUVCpu->vm.s.HaltFrequency = 1; + } + else + { + pUVCpu->vm.s.HaltInterval = (uint32_t)off / pUVCpu->vm.s.cHalts; + pUVCpu->vm.s.HaltFrequency = ASMMultU64ByU32DivByU32(pUVCpu->vm.s.cHalts, 1000000000, (uint32_t)off); + } + pUVCpu->vm.s.u64HaltsStartTS = u64Now; + pUVCpu->vm.s.cHalts = 0; + } + pUVCpu->vm.s.cHalts++; + + /* + * Do the halt. + */ + VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED); + VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HALTED); + PUVM pUVM = pUVCpu->pUVM; + int rc = g_aHaltMethods[pUVM->vm.s.iHaltMethod].pfnHalt(pUVCpu, fMask, u64Now); + VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED); + + /* + * Notify TM and resume the yielder + */ + TMNotifyEndOfHalt(pVCpu); + if (pVCpu->idCpu == 0) + VMMR3YieldResume(pVM); + + LogFlow(("VMR3WaitHalted: returns %Rrc (FF %#x)\n", rc, pVM->fGlobalForcedActions)); + return rc; +} + + +/** + * Suspended VM Wait. + * Only a handful of forced actions will cause the function to + * return to the caller. + * + * @returns VINF_SUCCESS unless a fatal error occurred. In the latter + * case an appropriate status code is returned. + * @param pUVCpu Pointer to the user mode VMCPU structure. + * @thread The emulation thread. + * @internal + */ +VMMR3_INT_DECL(int) VMR3WaitU(PUVMCPU pUVCpu) +{ + LogFlow(("VMR3WaitU:\n")); + + /* + * Check Relevant FFs. + */ + PVM pVM = pUVCpu->pVM; + PVMCPU pVCpu = pUVCpu->pVCpu; + + if ( pVM + && ( VM_FF_IS_ANY_SET(pVM, VM_FF_EXTERNAL_SUSPENDED_MASK) + || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_EXTERNAL_SUSPENDED_MASK) + ) + ) + { + LogFlow(("VMR3Wait: returns VINF_SUCCESS (FF %#x)\n", pVM->fGlobalForcedActions)); + return VINF_SUCCESS; + } + + /* + * Do waiting according to the halt method (so VMR3NotifyFF + * doesn't have to special case anything). + */ + PUVM pUVM = pUVCpu->pUVM; + int rc = g_aHaltMethods[pUVM->vm.s.iHaltMethod].pfnWait(pUVCpu); + LogFlow(("VMR3WaitU: returns %Rrc (FF %#x)\n", rc, pUVM->pVM ? pUVM->pVM->fGlobalForcedActions : 0)); + return rc; +} + + +/** + * Interface that PDMR3Suspend, PDMR3PowerOff and PDMR3Reset uses when they wait + * for the handling of asynchronous notifications to complete. + * + * @returns VINF_SUCCESS unless a fatal error occurred. In the latter + * case an appropriate status code is returned. + * @param pUVCpu Pointer to the user mode VMCPU structure. + * @thread The emulation thread. + */ +VMMR3_INT_DECL(int) VMR3AsyncPdmNotificationWaitU(PUVMCPU pUVCpu) +{ + LogFlow(("VMR3AsyncPdmNotificationWaitU:\n")); + return VMR3WaitU(pUVCpu); +} + + +/** + * Interface that PDM the helper asynchronous notification completed methods + * uses for EMT0 when it is waiting inside VMR3AsyncPdmNotificationWaitU(). + * + * @param pUVM Pointer to the user mode VM structure. + */ +VMMR3_INT_DECL(void) VMR3AsyncPdmNotificationWakeupU(PUVM pUVM) +{ + LogFlow(("VMR3AsyncPdmNotificationWakeupU:\n")); + VM_FF_SET(pUVM->pVM, VM_FF_REQUEST); /* this will have to do for now. */ + g_aHaltMethods[pUVM->vm.s.iHaltMethod].pfnNotifyCpuFF(&pUVM->aCpus[0], 0 /*fFlags*/); +} + + +/** + * Rendezvous callback that will be called once. + * + * @returns VBox strict status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * @param pvUser The new g_aHaltMethods index. + */ +static DECLCALLBACK(VBOXSTRICTRC) vmR3SetHaltMethodCallback(PVM pVM, PVMCPU pVCpu, void *pvUser) +{ + PUVM pUVM = pVM->pUVM; + uintptr_t i = (uintptr_t)pvUser; + Assert(i < RT_ELEMENTS(g_aHaltMethods)); + NOREF(pVCpu); + + /* + * Terminate the old one. + */ + if ( pUVM->vm.s.enmHaltMethod != VMHALTMETHOD_INVALID + && g_aHaltMethods[pUVM->vm.s.iHaltMethod].pfnTerm) + { + g_aHaltMethods[pUVM->vm.s.iHaltMethod].pfnTerm(pUVM); + pUVM->vm.s.enmHaltMethod = VMHALTMETHOD_INVALID; + } + + /* Assert that the failure fallback is where we expect. */ + Assert(g_aHaltMethods[0].enmHaltMethod == VMHALTMETHOD_BOOTSTRAP); + Assert(!g_aHaltMethods[0].pfnTerm && !g_aHaltMethods[0].pfnInit); + + /* + * Init the new one. + */ + int rc = VINF_SUCCESS; + memset(&pUVM->vm.s.Halt, 0, sizeof(pUVM->vm.s.Halt)); + if (g_aHaltMethods[i].pfnInit) + { + rc = g_aHaltMethods[i].pfnInit(pUVM); + if (RT_FAILURE(rc)) + { + /* Fall back on the bootstrap method. This requires no + init/term (see assertion above), and will always work. */ + AssertLogRelRC(rc); + i = 0; + } + } + + /* + * Commit it. + */ + pUVM->vm.s.enmHaltMethod = g_aHaltMethods[i].enmHaltMethod; + ASMAtomicWriteU32(&pUVM->vm.s.iHaltMethod, i); + + VMMR3SetMayHaltInRing0(pVCpu, g_aHaltMethods[i].fMayHaltInRing0, + g_aHaltMethods[i].enmHaltMethod == VMHALTMETHOD_GLOBAL_1 + ? pUVM->vm.s.Halt.Global1.cNsSpinBlockThresholdCfg : 0); + + return rc; +} + + +/** + * Changes the halt method. + * + * @returns VBox status code. + * @param pUVM Pointer to the user mode VM structure. + * @param enmHaltMethod The new halt method. + * @thread EMT. + */ +int vmR3SetHaltMethodU(PUVM pUVM, VMHALTMETHOD enmHaltMethod) +{ + PVM pVM = pUVM->pVM; Assert(pVM); + VM_ASSERT_EMT(pVM); + AssertReturn(enmHaltMethod > VMHALTMETHOD_INVALID && enmHaltMethod < VMHALTMETHOD_END, VERR_INVALID_PARAMETER); + + /* + * Resolve default (can be overridden in the configuration). + */ + if (enmHaltMethod == VMHALTMETHOD_DEFAULT) + { + uint32_t u32; + int rc = CFGMR3QueryU32(CFGMR3GetChild(CFGMR3GetRoot(pVM), "VM"), "HaltMethod", &u32); + if (RT_SUCCESS(rc)) + { + enmHaltMethod = (VMHALTMETHOD)u32; + if (enmHaltMethod <= VMHALTMETHOD_INVALID || enmHaltMethod >= VMHALTMETHOD_END) + return VMSetError(pVM, VERR_INVALID_PARAMETER, RT_SRC_POS, N_("Invalid VM/HaltMethod value %d"), enmHaltMethod); + } + else if (rc == VERR_CFGM_VALUE_NOT_FOUND || rc == VERR_CFGM_CHILD_NOT_FOUND) + return VMSetError(pVM, rc, RT_SRC_POS, N_("Failed to Query VM/HaltMethod as uint32_t")); + else + enmHaltMethod = VMHALTMETHOD_GLOBAL_1; + //enmHaltMethod = VMHALTMETHOD_1; + //enmHaltMethod = VMHALTMETHOD_OLD; + } + LogRel(("VMEmt: Halt method %s (%d)\n", vmR3GetHaltMethodName(enmHaltMethod), enmHaltMethod)); + + /* + * Find the descriptor. + */ + unsigned i = 0; + while ( i < RT_ELEMENTS(g_aHaltMethods) + && g_aHaltMethods[i].enmHaltMethod != enmHaltMethod) + i++; + AssertReturn(i < RT_ELEMENTS(g_aHaltMethods), VERR_INVALID_PARAMETER); + + /* + * This needs to be done while the other EMTs are not sleeping or otherwise messing around. + */ + return VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, vmR3SetHaltMethodCallback, (void *)(uintptr_t)i); +} + + +/** + * Special interface for implementing a HLT-like port on a device. + * + * This can be called directly from device code, provide the device is trusted + * to access the VMM directly. Since we may not have an accurate register set + * and the caller certainly shouldn't (device code does not access CPU + * registers), this function will return when interrupts are pending regardless + * of the actual EFLAGS.IF state. + * + * @returns VBox error status (never informational statuses). + * @param pVM The cross context VM structure. + * @param idCpu The id of the calling EMT. + */ +VMMR3DECL(int) VMR3WaitForDeviceReady(PVM pVM, VMCPUID idCpu) +{ + /* + * Validate caller and resolve the CPU ID. + */ + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertReturn(idCpu < pVM->cCpus, VERR_INVALID_CPU_ID); + PVMCPU pVCpu = &pVM->aCpus[idCpu]; + VMCPU_ASSERT_EMT_RETURN(pVCpu, VERR_VM_THREAD_NOT_EMT); + + /* + * Tag along with the HLT mechanics for now. + */ + int rc = VMR3WaitHalted(pVM, pVCpu, false /*fIgnoreInterrupts*/); + if (RT_SUCCESS(rc)) + return VINF_SUCCESS; + return rc; +} + + +/** + * Wakes up a CPU that has called VMR3WaitForDeviceReady. + * + * @returns VBox error status (never informational statuses). + * @param pVM The cross context VM structure. + * @param idCpu The id of the calling EMT. + */ +VMMR3DECL(int) VMR3NotifyCpuDeviceReady(PVM pVM, VMCPUID idCpu) +{ + /* + * Validate caller and resolve the CPU ID. + */ + VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE); + AssertReturn(idCpu < pVM->cCpus, VERR_INVALID_CPU_ID); + PVMCPU pVCpu = &pVM->aCpus[idCpu]; + + /* + * Pretend it was an FF that got set since we've got logic for that already. + */ + VMR3NotifyCpuFFU(pVCpu->pUVCpu, VMNOTIFYFF_FLAGS_DONE_REM); + return VINF_SUCCESS; +} + + +/** + * Returns the number of active EMTs. + * + * This is used by the rendezvous code during VM destruction to avoid waiting + * for EMTs that aren't around any more. + * + * @returns Number of active EMTs. 0 if invalid parameter. + * @param pUVM The user mode VM structure. + */ +VMMR3_INT_DECL(uint32_t) VMR3GetActiveEmts(PUVM pUVM) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, 0); + return pUVM->vm.s.cActiveEmts; +} + diff --git a/src/VBox/VMM/VMMR3/VMM.cpp b/src/VBox/VMM/VMMR3/VMM.cpp new file mode 100644 index 00000000..aebfdac8 --- /dev/null +++ b/src/VBox/VMM/VMMR3/VMM.cpp @@ -0,0 +1,3233 @@ +/* $Id: VMM.cpp $ */ +/** @file + * VMM - The Virtual Machine Monitor Core. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +//#define NO_SUPCALLR0VMM + +/** @page pg_vmm VMM - The Virtual Machine Monitor + * + * The VMM component is two things at the moment, it's a component doing a few + * management and routing tasks, and it's the whole virtual machine monitor + * thing. For hysterical reasons, it is not doing all the management that one + * would expect, this is instead done by @ref pg_vm. We'll address this + * misdesign eventually, maybe. + * + * VMM is made up of these components: + * - @subpage pg_cfgm + * - @subpage pg_cpum + * - @subpage pg_csam + * - @subpage pg_dbgf + * - @subpage pg_em + * - @subpage pg_gim + * - @subpage pg_gmm + * - @subpage pg_gvmm + * - @subpage pg_hm + * - @subpage pg_iem + * - @subpage pg_iom + * - @subpage pg_mm + * - @subpage pg_patm + * - @subpage pg_pdm + * - @subpage pg_pgm + * - @subpage pg_rem + * - @subpage pg_selm + * - @subpage pg_ssm + * - @subpage pg_stam + * - @subpage pg_tm + * - @subpage pg_trpm + * - @subpage pg_vm + * + * + * @see @ref grp_vmm @ref grp_vm @subpage pg_vmm_guideline @subpage pg_raw + * + * + * @section sec_vmmstate VMM State + * + * @image html VM_Statechart_Diagram.gif + * + * To be written. + * + * + * @subsection subsec_vmm_init VMM Initialization + * + * To be written. + * + * + * @subsection subsec_vmm_term VMM Termination + * + * To be written. + * + * + * @section sec_vmm_limits VMM Limits + * + * There are various resource limits imposed by the VMM and it's + * sub-components. We'll list some of them here. + * + * On 64-bit hosts: + * - Max 8191 VMs. Imposed by GVMM's handle allocation (GVMM_MAX_HANDLES), + * can be increased up to 64K - 1. + * - Max 16TB - 64KB of the host memory can be used for backing VM RAM and + * ROM pages. The limit is imposed by the 32-bit page ID used by GMM. + * - A VM can be assigned all the memory we can use (16TB), however, the + * Main API will restrict this to 2TB (MM_RAM_MAX_IN_MB). + * - Max 32 virtual CPUs (VMM_MAX_CPU_COUNT). + * + * On 32-bit hosts: + * - Max 127 VMs. Imposed by GMM's per page structure. + * - Max 64GB - 64KB of the host memory can be used for backing VM RAM and + * ROM pages. The limit is imposed by the 28-bit page ID used + * internally in GMM. It is also limited by PAE. + * - A VM can be assigned all the memory GMM can allocate, however, the + * Main API will restrict this to 3584MB (MM_RAM_MAX_IN_MB). + * - Max 32 virtual CPUs (VMM_MAX_CPU_COUNT). + * + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_VMM +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef VBOX_WITH_NESTED_HWVIRT_VMX +# include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef VBOX_WITH_REM +# include +#endif +#include +#include +#include +#include "VMMInternal.h" +#include "VMMSwitcher.h" +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** The saved state version. */ +#define VMM_SAVED_STATE_VERSION 4 +/** The saved state version used by v3.0 and earlier. (Teleportation) */ +#define VMM_SAVED_STATE_VERSION_3_0 3 + +/** Macro for flushing the ring-0 logging. */ +#define VMM_FLUSH_R0_LOG(a_pR0Logger, a_pR3Logger) \ + do { \ + PVMMR0LOGGER pVmmLogger = (a_pR0Logger); \ + if (!pVmmLogger || pVmmLogger->Logger.offScratch == 0) \ + { /* likely? */ } \ + else \ + RTLogFlushR0(a_pR3Logger, &pVmmLogger->Logger); \ + } while (0) + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static int vmmR3InitStacks(PVM pVM); +static int vmmR3InitLoggers(PVM pVM); +static void vmmR3InitRegisterStats(PVM pVM); +static DECLCALLBACK(int) vmmR3Save(PVM pVM, PSSMHANDLE pSSM); +static DECLCALLBACK(int) vmmR3Load(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass); +static DECLCALLBACK(void) vmmR3YieldEMT(PVM pVM, PTMTIMER pTimer, void *pvUser); +static VBOXSTRICTRC vmmR3EmtRendezvousCommon(PVM pVM, PVMCPU pVCpu, bool fIsCaller, + uint32_t fFlags, PFNVMMEMTRENDEZVOUS pfnRendezvous, void *pvUser); +static int vmmR3ServiceCallRing3Request(PVM pVM, PVMCPU pVCpu); +static DECLCALLBACK(void) vmmR3InfoFF(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); + + +/** + * Initializes the VMM. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) VMMR3Init(PVM pVM) +{ + LogFlow(("VMMR3Init\n")); + + /* + * Assert alignment, sizes and order. + */ + AssertMsg(pVM->vmm.s.offVM == 0, ("Already initialized!\n")); + AssertCompile(sizeof(pVM->vmm.s) <= sizeof(pVM->vmm.padding)); + AssertCompile(sizeof(pVM->aCpus[0].vmm.s) <= sizeof(pVM->aCpus[0].vmm.padding)); + + /* + * Init basic VM VMM members. + */ + pVM->vmm.s.offVM = RT_UOFFSETOF(VM, vmm); + pVM->vmm.s.pahEvtRendezvousEnterOrdered = NULL; + pVM->vmm.s.hEvtRendezvousEnterOneByOne = NIL_RTSEMEVENT; + pVM->vmm.s.hEvtMulRendezvousEnterAllAtOnce = NIL_RTSEMEVENTMULTI; + pVM->vmm.s.hEvtMulRendezvousDone = NIL_RTSEMEVENTMULTI; + pVM->vmm.s.hEvtRendezvousDoneCaller = NIL_RTSEMEVENT; + pVM->vmm.s.hEvtMulRendezvousRecursionPush = NIL_RTSEMEVENTMULTI; + pVM->vmm.s.hEvtMulRendezvousRecursionPop = NIL_RTSEMEVENTMULTI; + pVM->vmm.s.hEvtRendezvousRecursionPushCaller = NIL_RTSEMEVENT; + pVM->vmm.s.hEvtRendezvousRecursionPopCaller = NIL_RTSEMEVENT; + + /** @cfgm{/YieldEMTInterval, uint32_t, 1, UINT32_MAX, 23, ms} + * The EMT yield interval. The EMT yielding is a hack we employ to play a + * bit nicer with the rest of the system (like for instance the GUI). + */ + int rc = CFGMR3QueryU32Def(CFGMR3GetRoot(pVM), "YieldEMTInterval", &pVM->vmm.s.cYieldEveryMillies, + 23 /* Value arrived at after experimenting with the grub boot prompt. */); + AssertMsgRCReturn(rc, ("Configuration error. Failed to query \"YieldEMTInterval\", rc=%Rrc\n", rc), rc); + + + /** @cfgm{/VMM/UsePeriodicPreemptionTimers, boolean, true} + * Controls whether we employ per-cpu preemption timers to limit the time + * spent executing guest code. This option is not available on all + * platforms and we will silently ignore this setting then. If we are + * running in VT-x mode, we will use the VMX-preemption timer instead of + * this one when possible. + */ + PCFGMNODE pCfgVMM = CFGMR3GetChild(CFGMR3GetRoot(pVM), "VMM"); + rc = CFGMR3QueryBoolDef(pCfgVMM, "UsePeriodicPreemptionTimers", &pVM->vmm.s.fUsePeriodicPreemptionTimers, true); + AssertMsgRCReturn(rc, ("Configuration error. Failed to query \"VMM/UsePeriodicPreemptionTimers\", rc=%Rrc\n", rc), rc); + + /* + * Initialize the VMM rendezvous semaphores. + */ + pVM->vmm.s.pahEvtRendezvousEnterOrdered = (PRTSEMEVENT)MMR3HeapAlloc(pVM, MM_TAG_VMM, sizeof(RTSEMEVENT) * pVM->cCpus); + if (!pVM->vmm.s.pahEvtRendezvousEnterOrdered) + return VERR_NO_MEMORY; + for (VMCPUID i = 0; i < pVM->cCpus; i++) + pVM->vmm.s.pahEvtRendezvousEnterOrdered[i] = NIL_RTSEMEVENT; + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + rc = RTSemEventCreate(&pVM->vmm.s.pahEvtRendezvousEnterOrdered[i]); + AssertRCReturn(rc, rc); + } + rc = RTSemEventCreate(&pVM->vmm.s.hEvtRendezvousEnterOneByOne); + AssertRCReturn(rc, rc); + rc = RTSemEventMultiCreate(&pVM->vmm.s.hEvtMulRendezvousEnterAllAtOnce); + AssertRCReturn(rc, rc); + rc = RTSemEventMultiCreate(&pVM->vmm.s.hEvtMulRendezvousDone); + AssertRCReturn(rc, rc); + rc = RTSemEventCreate(&pVM->vmm.s.hEvtRendezvousDoneCaller); + AssertRCReturn(rc, rc); + rc = RTSemEventMultiCreate(&pVM->vmm.s.hEvtMulRendezvousRecursionPush); + AssertRCReturn(rc, rc); + rc = RTSemEventMultiCreate(&pVM->vmm.s.hEvtMulRendezvousRecursionPop); + AssertRCReturn(rc, rc); + rc = RTSemEventCreate(&pVM->vmm.s.hEvtRendezvousRecursionPushCaller); + AssertRCReturn(rc, rc); + rc = RTSemEventCreate(&pVM->vmm.s.hEvtRendezvousRecursionPopCaller); + AssertRCReturn(rc, rc); + + /* + * Register the saved state data unit. + */ + rc = SSMR3RegisterInternal(pVM, "vmm", 1, VMM_SAVED_STATE_VERSION, VMM_STACK_SIZE + sizeof(RTGCPTR), + NULL, NULL, NULL, + NULL, vmmR3Save, NULL, + NULL, vmmR3Load, NULL); + if (RT_FAILURE(rc)) + return rc; + + /* + * Register the Ring-0 VM handle with the session for fast ioctl calls. + */ + rc = SUPR3SetVMForFastIOCtl(pVM->pVMR0); + if (RT_FAILURE(rc)) + return rc; + + /* + * Init various sub-components. + */ + rc = vmmR3SwitcherInit(pVM); + if (RT_SUCCESS(rc)) + { + rc = vmmR3InitStacks(pVM); + if (RT_SUCCESS(rc)) + { + rc = vmmR3InitLoggers(pVM); + +#ifdef VBOX_WITH_NMI + /* + * Allocate mapping for the host APIC. + */ + if (RT_SUCCESS(rc)) + { + rc = MMR3HyperReserve(pVM, PAGE_SIZE, "Host APIC", &pVM->vmm.s.GCPtrApicBase); + AssertRC(rc); + } +#endif + if (RT_SUCCESS(rc)) + { + /* + * Debug info and statistics. + */ + DBGFR3InfoRegisterInternal(pVM, "fflags", "Displays the current Forced actions Flags.", vmmR3InfoFF); + vmmR3InitRegisterStats(pVM); + vmmInitFormatTypes(); + + return VINF_SUCCESS; + } + } + /** @todo Need failure cleanup. */ + + //more todo in here? + //if (RT_SUCCESS(rc)) + //{ + //} + //int rc2 = vmmR3TermCoreCode(pVM); + //AssertRC(rc2)); + } + + return rc; +} + + +/** + * Allocate & setup the VMM RC stack(s) (for EMTs). + * + * The stacks are also used for long jumps in Ring-0. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * + * @remarks The optional guard page gets it protection setup up during R3 init + * completion because of init order issues. + */ +static int vmmR3InitStacks(PVM pVM) +{ + int rc = VINF_SUCCESS; +#ifdef VMM_R0_SWITCH_STACK + uint32_t fFlags = MMHYPER_AONR_FLAGS_KERNEL_MAPPING; +#else + uint32_t fFlags = 0; +#endif + + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + PVMCPU pVCpu = &pVM->aCpus[idCpu]; + +#ifdef VBOX_STRICT_VMM_STACK + rc = MMR3HyperAllocOnceNoRelEx(pVM, PAGE_SIZE + VMM_STACK_SIZE + PAGE_SIZE, +#else + rc = MMR3HyperAllocOnceNoRelEx(pVM, VMM_STACK_SIZE, +#endif + PAGE_SIZE, MM_TAG_VMM, fFlags, (void **)&pVCpu->vmm.s.pbEMTStackR3); + if (RT_SUCCESS(rc)) + { +#ifdef VBOX_STRICT_VMM_STACK + pVCpu->vmm.s.pbEMTStackR3 += PAGE_SIZE; +#endif +#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE + /* MMHyperR3ToR0 returns R3 when not doing hardware assisted virtualization. */ + if (VM_IS_RAW_MODE_ENABLED(pVM)) + pVCpu->vmm.s.CallRing3JmpBufR0.pvSavedStack = NIL_RTR0PTR; + else +#endif + pVCpu->vmm.s.CallRing3JmpBufR0.pvSavedStack = MMHyperR3ToR0(pVM, pVCpu->vmm.s.pbEMTStackR3); + pVCpu->vmm.s.pbEMTStackRC = MMHyperR3ToRC(pVM, pVCpu->vmm.s.pbEMTStackR3); + pVCpu->vmm.s.pbEMTStackBottomRC = pVCpu->vmm.s.pbEMTStackRC + VMM_STACK_SIZE; + AssertRelease(pVCpu->vmm.s.pbEMTStackRC); + + CPUMSetHyperESP(pVCpu, pVCpu->vmm.s.pbEMTStackBottomRC); + } + } + + return rc; +} + + +/** + * Initialize the loggers. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +static int vmmR3InitLoggers(PVM pVM) +{ + int rc; +#define RTLogCalcSizeForR0(cGroups, fFlags) (RT_UOFFSETOF_DYN(VMMR0LOGGER, Logger.afGroups[cGroups]) + PAGE_SIZE) + + /* + * Allocate RC & R0 Logger instances (they are finalized in the relocator). + */ +#ifdef LOG_ENABLED + PRTLOGGER pLogger = RTLogDefaultInstance(); + if (pLogger) + { + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + pVM->vmm.s.cbRCLogger = RT_UOFFSETOF_DYN(RTLOGGERRC, afGroups[pLogger->cGroups]); + rc = MMR3HyperAllocOnceNoRel(pVM, pVM->vmm.s.cbRCLogger, 0, MM_TAG_VMM, (void **)&pVM->vmm.s.pRCLoggerR3); + if (RT_FAILURE(rc)) + return rc; + pVM->vmm.s.pRCLoggerRC = MMHyperR3ToRC(pVM, pVM->vmm.s.pRCLoggerR3); + } + +# ifdef VBOX_WITH_R0_LOGGING + size_t const cbLogger = RTLogCalcSizeForR0(pLogger->cGroups, 0); + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + rc = MMR3HyperAllocOnceNoRelEx(pVM, cbLogger, PAGE_SIZE, MM_TAG_VMM, MMHYPER_AONR_FLAGS_KERNEL_MAPPING, + (void **)&pVCpu->vmm.s.pR0LoggerR3); + if (RT_FAILURE(rc)) + return rc; + pVCpu->vmm.s.pR0LoggerR3->pVM = pVM->pVMR0; + //pVCpu->vmm.s.pR0LoggerR3->fCreated = false; + pVCpu->vmm.s.pR0LoggerR3->cbLogger = (uint32_t)cbLogger; + pVCpu->vmm.s.pR0LoggerR0 = MMHyperR3ToR0(pVM, pVCpu->vmm.s.pR0LoggerR3); + } +# endif + } +#endif /* LOG_ENABLED */ + + /* + * Release logging. + */ + PRTLOGGER pRelLogger = RTLogRelGetDefaultInstance(); + if (pRelLogger) + { +#ifdef VBOX_WITH_RC_RELEASE_LOGGING + /* + * Allocate RC release logger instances (finalized in the relocator). + */ + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + pVM->vmm.s.cbRCRelLogger = RT_UOFFSETOF_DYN(RTLOGGERRC, afGroups[pRelLogger->cGroups]); + rc = MMR3HyperAllocOnceNoRel(pVM, pVM->vmm.s.cbRCRelLogger, 0, MM_TAG_VMM, (void **)&pVM->vmm.s.pRCRelLoggerR3); + if (RT_FAILURE(rc)) + return rc; + pVM->vmm.s.pRCRelLoggerRC = MMHyperR3ToRC(pVM, pVM->vmm.s.pRCRelLoggerR3); + } +#endif + + /* + * Ring-0 release logger. + */ + RTR0PTR pfnLoggerWrapper = NIL_RTR0PTR; + rc = PDMR3LdrGetSymbolR0(pVM, VMMR0_MAIN_MODULE_NAME, "vmmR0LoggerWrapper", &pfnLoggerWrapper); + AssertReleaseMsgRCReturn(rc, ("vmmR0LoggerWrapper not found! rc=%Rra\n", rc), rc); + + RTR0PTR pfnLoggerFlush = NIL_RTR0PTR; + rc = PDMR3LdrGetSymbolR0(pVM, VMMR0_MAIN_MODULE_NAME, "vmmR0LoggerFlush", &pfnLoggerFlush); + AssertReleaseMsgRCReturn(rc, ("vmmR0LoggerFlush not found! rc=%Rra\n", rc), rc); + + size_t const cbLogger = RTLogCalcSizeForR0(pRelLogger->cGroups, 0); + + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + rc = MMR3HyperAllocOnceNoRelEx(pVM, cbLogger, PAGE_SIZE, MM_TAG_VMM, MMHYPER_AONR_FLAGS_KERNEL_MAPPING, + (void **)&pVCpu->vmm.s.pR0RelLoggerR3); + if (RT_FAILURE(rc)) + return rc; + PVMMR0LOGGER pVmmLogger = pVCpu->vmm.s.pR0RelLoggerR3; + RTR0PTR R0PtrVmmLogger = MMHyperR3ToR0(pVM, pVmmLogger); + pVCpu->vmm.s.pR0RelLoggerR0 = R0PtrVmmLogger; + pVmmLogger->pVM = pVM->pVMR0; + pVmmLogger->cbLogger = (uint32_t)cbLogger; + pVmmLogger->fCreated = false; + pVmmLogger->fFlushingDisabled = false; + pVmmLogger->fRegistered = false; + pVmmLogger->idCpu = i; + + char szR0ThreadName[16]; + RTStrPrintf(szR0ThreadName, sizeof(szR0ThreadName), "EMT-%u-R0", i); + rc = RTLogCreateForR0(&pVmmLogger->Logger, pVmmLogger->cbLogger, R0PtrVmmLogger + RT_UOFFSETOF(VMMR0LOGGER, Logger), + pfnLoggerWrapper, pfnLoggerFlush, + RTLOGFLAGS_BUFFERED, RTLOGDEST_DUMMY, szR0ThreadName); + AssertReleaseMsgRCReturn(rc, ("RTLogCreateForR0 failed! rc=%Rra\n", rc), rc); + + /* We only update the release log instance here. */ + rc = RTLogCopyGroupsAndFlagsForR0(&pVmmLogger->Logger, R0PtrVmmLogger + RT_UOFFSETOF(VMMR0LOGGER, Logger), + pRelLogger, RTLOGFLAGS_BUFFERED, UINT32_MAX); + AssertReleaseMsgRCReturn(rc, ("RTLogCopyGroupsAndFlagsForR0 failed! rc=%Rra\n", rc), rc); + + pVmmLogger->fCreated = true; + } + } + + return VINF_SUCCESS; +} + + +/** + * VMMR3Init worker that register the statistics with STAM. + * + * @param pVM The cross context VM structure. + */ +static void vmmR3InitRegisterStats(PVM pVM) +{ + RT_NOREF_PV(pVM); + + /* + * Statistics. + */ + STAM_REG(pVM, &pVM->vmm.s.StatRunRC, STAMTYPE_COUNTER, "/VMM/RunRC", STAMUNIT_OCCURENCES, "Number of context switches."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetNormal, STAMTYPE_COUNTER, "/VMM/RZRet/Normal", STAMUNIT_OCCURENCES, "Number of VINF_SUCCESS returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetInterrupt, STAMTYPE_COUNTER, "/VMM/RZRet/Interrupt", STAMUNIT_OCCURENCES, "Number of VINF_EM_RAW_INTERRUPT returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetInterruptHyper, STAMTYPE_COUNTER, "/VMM/RZRet/InterruptHyper", STAMUNIT_OCCURENCES, "Number of VINF_EM_RAW_INTERRUPT_HYPER returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetGuestTrap, STAMTYPE_COUNTER, "/VMM/RZRet/GuestTrap", STAMUNIT_OCCURENCES, "Number of VINF_EM_RAW_GUEST_TRAP returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetRingSwitch, STAMTYPE_COUNTER, "/VMM/RZRet/RingSwitch", STAMUNIT_OCCURENCES, "Number of VINF_EM_RAW_RING_SWITCH returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetRingSwitchInt, STAMTYPE_COUNTER, "/VMM/RZRet/RingSwitchInt", STAMUNIT_OCCURENCES, "Number of VINF_EM_RAW_RING_SWITCH_INT returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetStaleSelector, STAMTYPE_COUNTER, "/VMM/RZRet/StaleSelector", STAMUNIT_OCCURENCES, "Number of VINF_EM_RAW_STALE_SELECTOR returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetIRETTrap, STAMTYPE_COUNTER, "/VMM/RZRet/IRETTrap", STAMUNIT_OCCURENCES, "Number of VINF_EM_RAW_IRET_TRAP returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetEmulate, STAMTYPE_COUNTER, "/VMM/RZRet/Emulate", STAMUNIT_OCCURENCES, "Number of VINF_EM_EXECUTE_INSTRUCTION returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetPatchEmulate, STAMTYPE_COUNTER, "/VMM/RZRet/PatchEmulate", STAMUNIT_OCCURENCES, "Number of VINF_PATCH_EMULATE_INSTR returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetIORead, STAMTYPE_COUNTER, "/VMM/RZRet/IORead", STAMUNIT_OCCURENCES, "Number of VINF_IOM_R3_IOPORT_READ returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetIOWrite, STAMTYPE_COUNTER, "/VMM/RZRet/IOWrite", STAMUNIT_OCCURENCES, "Number of VINF_IOM_R3_IOPORT_WRITE returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetIOCommitWrite, STAMTYPE_COUNTER, "/VMM/RZRet/IOCommitWrite", STAMUNIT_OCCURENCES, "Number of VINF_IOM_R3_IOPORT_COMMIT_WRITE returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetMMIORead, STAMTYPE_COUNTER, "/VMM/RZRet/MMIORead", STAMUNIT_OCCURENCES, "Number of VINF_IOM_R3_MMIO_READ returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetMMIOWrite, STAMTYPE_COUNTER, "/VMM/RZRet/MMIOWrite", STAMUNIT_OCCURENCES, "Number of VINF_IOM_R3_MMIO_WRITE returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetMMIOCommitWrite, STAMTYPE_COUNTER, "/VMM/RZRet/MMIOCommitWrite", STAMUNIT_OCCURENCES, "Number of VINF_IOM_R3_MMIO_COMMIT_WRITE returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetMMIOReadWrite, STAMTYPE_COUNTER, "/VMM/RZRet/MMIOReadWrite", STAMUNIT_OCCURENCES, "Number of VINF_IOM_R3_MMIO_READ_WRITE returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetMMIOPatchRead, STAMTYPE_COUNTER, "/VMM/RZRet/MMIOPatchRead", STAMUNIT_OCCURENCES, "Number of VINF_IOM_HC_MMIO_PATCH_READ returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetMMIOPatchWrite, STAMTYPE_COUNTER, "/VMM/RZRet/MMIOPatchWrite", STAMUNIT_OCCURENCES, "Number of VINF_IOM_HC_MMIO_PATCH_WRITE returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetMSRRead, STAMTYPE_COUNTER, "/VMM/RZRet/MSRRead", STAMUNIT_OCCURENCES, "Number of VINF_CPUM_R3_MSR_READ returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetMSRWrite, STAMTYPE_COUNTER, "/VMM/RZRet/MSRWrite", STAMUNIT_OCCURENCES, "Number of VINF_CPUM_R3_MSR_WRITE returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetLDTFault, STAMTYPE_COUNTER, "/VMM/RZRet/LDTFault", STAMUNIT_OCCURENCES, "Number of VINF_EM_EXECUTE_INSTRUCTION_GDT_FAULT returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetGDTFault, STAMTYPE_COUNTER, "/VMM/RZRet/GDTFault", STAMUNIT_OCCURENCES, "Number of VINF_EM_EXECUTE_INSTRUCTION_LDT_FAULT returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetIDTFault, STAMTYPE_COUNTER, "/VMM/RZRet/IDTFault", STAMUNIT_OCCURENCES, "Number of VINF_EM_EXECUTE_INSTRUCTION_IDT_FAULT returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetTSSFault, STAMTYPE_COUNTER, "/VMM/RZRet/TSSFault", STAMUNIT_OCCURENCES, "Number of VINF_EM_EXECUTE_INSTRUCTION_TSS_FAULT returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetCSAMTask, STAMTYPE_COUNTER, "/VMM/RZRet/CSAMTask", STAMUNIT_OCCURENCES, "Number of VINF_CSAM_PENDING_ACTION returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetSyncCR3, STAMTYPE_COUNTER, "/VMM/RZRet/SyncCR", STAMUNIT_OCCURENCES, "Number of VINF_PGM_SYNC_CR3 returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetMisc, STAMTYPE_COUNTER, "/VMM/RZRet/Misc", STAMUNIT_OCCURENCES, "Number of misc returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetPatchInt3, STAMTYPE_COUNTER, "/VMM/RZRet/PatchInt3", STAMUNIT_OCCURENCES, "Number of VINF_PATM_PATCH_INT3 returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetPatchPF, STAMTYPE_COUNTER, "/VMM/RZRet/PatchPF", STAMUNIT_OCCURENCES, "Number of VINF_PATM_PATCH_TRAP_PF returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetPatchGP, STAMTYPE_COUNTER, "/VMM/RZRet/PatchGP", STAMUNIT_OCCURENCES, "Number of VINF_PATM_PATCH_TRAP_GP returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetPatchIretIRQ, STAMTYPE_COUNTER, "/VMM/RZRet/PatchIret", STAMUNIT_OCCURENCES, "Number of VINF_PATM_PENDING_IRQ_AFTER_IRET returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetRescheduleREM, STAMTYPE_COUNTER, "/VMM/RZRet/ScheduleREM", STAMUNIT_OCCURENCES, "Number of VINF_EM_RESCHEDULE_REM returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetToR3Total, STAMTYPE_COUNTER, "/VMM/RZRet/ToR3", STAMUNIT_OCCURENCES, "Number of VINF_EM_RAW_TO_R3 returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetToR3Unknown, STAMTYPE_COUNTER, "/VMM/RZRet/ToR3/Unknown", STAMUNIT_OCCURENCES, "Number of VINF_EM_RAW_TO_R3 returns without responsible force flag."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetToR3FF, STAMTYPE_COUNTER, "/VMM/RZRet/ToR3/ToR3", STAMUNIT_OCCURENCES, "Number of VINF_EM_RAW_TO_R3 returns with VMCPU_FF_TO_R3."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetToR3TMVirt, STAMTYPE_COUNTER, "/VMM/RZRet/ToR3/TMVirt", STAMUNIT_OCCURENCES, "Number of VINF_EM_RAW_TO_R3 returns with VM_FF_TM_VIRTUAL_SYNC."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetToR3HandyPages, STAMTYPE_COUNTER, "/VMM/RZRet/ToR3/Handy", STAMUNIT_OCCURENCES, "Number of VINF_EM_RAW_TO_R3 returns with VM_FF_PGM_NEED_HANDY_PAGES."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetToR3PDMQueues, STAMTYPE_COUNTER, "/VMM/RZRet/ToR3/PDMQueue", STAMUNIT_OCCURENCES, "Number of VINF_EM_RAW_TO_R3 returns with VM_FF_PDM_QUEUES."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetToR3Rendezvous, STAMTYPE_COUNTER, "/VMM/RZRet/ToR3/Rendezvous", STAMUNIT_OCCURENCES, "Number of VINF_EM_RAW_TO_R3 returns with VM_FF_EMT_RENDEZVOUS."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetToR3Timer, STAMTYPE_COUNTER, "/VMM/RZRet/ToR3/Timer", STAMUNIT_OCCURENCES, "Number of VINF_EM_RAW_TO_R3 returns with VMCPU_FF_TIMER."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetToR3DMA, STAMTYPE_COUNTER, "/VMM/RZRet/ToR3/DMA", STAMUNIT_OCCURENCES, "Number of VINF_EM_RAW_TO_R3 returns with VM_FF_PDM_DMA."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetToR3CritSect, STAMTYPE_COUNTER, "/VMM/RZRet/ToR3/CritSect", STAMUNIT_OCCURENCES, "Number of VINF_EM_RAW_TO_R3 returns with VMCPU_FF_PDM_CRITSECT."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetToR3Iem, STAMTYPE_COUNTER, "/VMM/RZRet/ToR3/IEM", STAMUNIT_OCCURENCES, "Number of VINF_EM_RAW_TO_R3 returns with VMCPU_FF_IEM."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetToR3Iom, STAMTYPE_COUNTER, "/VMM/RZRet/ToR3/IOM", STAMUNIT_OCCURENCES, "Number of VINF_EM_RAW_TO_R3 returns with VMCPU_FF_IOM."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetTimerPending, STAMTYPE_COUNTER, "/VMM/RZRet/TimerPending", STAMUNIT_OCCURENCES, "Number of VINF_EM_RAW_TIMER_PENDING returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetInterruptPending, STAMTYPE_COUNTER, "/VMM/RZRet/InterruptPending", STAMUNIT_OCCURENCES, "Number of VINF_EM_RAW_INTERRUPT_PENDING returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetPATMDuplicateFn, STAMTYPE_COUNTER, "/VMM/RZRet/PATMDuplicateFn", STAMUNIT_OCCURENCES, "Number of VINF_PATM_DUPLICATE_FUNCTION returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetPGMChangeMode, STAMTYPE_COUNTER, "/VMM/RZRet/PGMChangeMode", STAMUNIT_OCCURENCES, "Number of VINF_PGM_CHANGE_MODE returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetPGMFlushPending, STAMTYPE_COUNTER, "/VMM/RZRet/PGMFlushPending", STAMUNIT_OCCURENCES, "Number of VINF_PGM_POOL_FLUSH_PENDING returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetPendingRequest, STAMTYPE_COUNTER, "/VMM/RZRet/PendingRequest", STAMUNIT_OCCURENCES, "Number of VINF_EM_PENDING_REQUEST returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetPatchTPR, STAMTYPE_COUNTER, "/VMM/RZRet/PatchTPR", STAMUNIT_OCCURENCES, "Number of VINF_EM_HM_PATCH_TPR_INSTR returns."); + STAM_REG(pVM, &pVM->vmm.s.StatRZRetCallRing3, STAMTYPE_COUNTER, "/VMM/RZCallR3/Misc", STAMUNIT_OCCURENCES, "Number of Other ring-3 calls."); + STAM_REG(pVM, &pVM->vmm.s.StatRZCallPDMLock, STAMTYPE_COUNTER, "/VMM/RZCallR3/PDMLock", STAMUNIT_OCCURENCES, "Number of VMMCALLRING3_PDM_LOCK calls."); + STAM_REG(pVM, &pVM->vmm.s.StatRZCallPDMCritSectEnter, STAMTYPE_COUNTER, "/VMM/RZCallR3/PDMCritSectEnter", STAMUNIT_OCCURENCES, "Number of VMMCALLRING3_PDM_CRITSECT_ENTER calls."); + STAM_REG(pVM, &pVM->vmm.s.StatRZCallPGMLock, STAMTYPE_COUNTER, "/VMM/RZCallR3/PGMLock", STAMUNIT_OCCURENCES, "Number of VMMCALLRING3_PGM_LOCK calls."); + STAM_REG(pVM, &pVM->vmm.s.StatRZCallPGMPoolGrow, STAMTYPE_COUNTER, "/VMM/RZCallR3/PGMPoolGrow", STAMUNIT_OCCURENCES, "Number of VMMCALLRING3_PGM_POOL_GROW calls."); + STAM_REG(pVM, &pVM->vmm.s.StatRZCallPGMMapChunk, STAMTYPE_COUNTER, "/VMM/RZCallR3/PGMMapChunk", STAMUNIT_OCCURENCES, "Number of VMMCALLRING3_PGM_MAP_CHUNK calls."); + STAM_REG(pVM, &pVM->vmm.s.StatRZCallPGMAllocHandy, STAMTYPE_COUNTER, "/VMM/RZCallR3/PGMAllocHandy", STAMUNIT_OCCURENCES, "Number of VMMCALLRING3_PGM_ALLOCATE_HANDY_PAGES calls."); + STAM_REG(pVM, &pVM->vmm.s.StatRZCallRemReplay, STAMTYPE_COUNTER, "/VMM/RZCallR3/REMReplay", STAMUNIT_OCCURENCES, "Number of VMMCALLRING3_REM_REPLAY_HANDLER_NOTIFICATIONS calls."); + STAM_REG(pVM, &pVM->vmm.s.StatRZCallLogFlush, STAMTYPE_COUNTER, "/VMM/RZCallR3/VMMLogFlush", STAMUNIT_OCCURENCES, "Number of VMMCALLRING3_VMM_LOGGER_FLUSH calls."); + STAM_REG(pVM, &pVM->vmm.s.StatRZCallVMSetError, STAMTYPE_COUNTER, "/VMM/RZCallR3/VMSetError", STAMUNIT_OCCURENCES, "Number of VMMCALLRING3_VM_SET_ERROR calls."); + STAM_REG(pVM, &pVM->vmm.s.StatRZCallVMSetRuntimeError, STAMTYPE_COUNTER, "/VMM/RZCallR3/VMRuntimeError", STAMUNIT_OCCURENCES, "Number of VMMCALLRING3_VM_SET_RUNTIME_ERROR calls."); + +#ifdef VBOX_WITH_STATISTICS + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + STAMR3RegisterF(pVM, &pVM->aCpus[i].vmm.s.CallRing3JmpBufR0.cbUsedMax, STAMTYPE_U32_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES, "Max amount of stack used.", "/VMM/Stack/CPU%u/Max", i); + STAMR3RegisterF(pVM, &pVM->aCpus[i].vmm.s.CallRing3JmpBufR0.cbUsedAvg, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES, "Average stack usage.", "/VMM/Stack/CPU%u/Avg", i); + STAMR3RegisterF(pVM, &pVM->aCpus[i].vmm.s.CallRing3JmpBufR0.cUsedTotal, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of stack usages.", "/VMM/Stack/CPU%u/Uses", i); + } +#endif + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + STAMR3RegisterF(pVM, &pVM->aCpus[i].vmm.s.StatR0HaltBlock, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_NS_PER_CALL, "", "/PROF/CPU%u/VM/Halt/R0HaltBlock", i); + STAMR3RegisterF(pVM, &pVM->aCpus[i].vmm.s.StatR0HaltBlockOnTime, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_NS_PER_CALL, "", "/PROF/CPU%u/VM/Halt/R0HaltBlockOnTime", i); + STAMR3RegisterF(pVM, &pVM->aCpus[i].vmm.s.StatR0HaltBlockOverslept, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_NS_PER_CALL, "", "/PROF/CPU%u/VM/Halt/R0HaltBlockOverslept", i); + STAMR3RegisterF(pVM, &pVM->aCpus[i].vmm.s.StatR0HaltBlockInsomnia, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_NS_PER_CALL, "", "/PROF/CPU%u/VM/Halt/R0HaltBlockInsomnia", i); + STAMR3RegisterF(pVM, &pVM->aCpus[i].vmm.s.StatR0HaltExec, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "", "/PROF/CPU%u/VM/Halt/R0HaltExec", i); + STAMR3RegisterF(pVM, &pVM->aCpus[i].vmm.s.StatR0HaltExecFromSpin, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "", "/PROF/CPU%u/VM/Halt/R0HaltExec/FromSpin", i); + STAMR3RegisterF(pVM, &pVM->aCpus[i].vmm.s.StatR0HaltExecFromBlock, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "", "/PROF/CPU%u/VM/Halt/R0HaltExec/FromBlock", i); + STAMR3RegisterF(pVM, &pVM->aCpus[i].vmm.s.cR0Halts, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "", "/PROF/CPU%u/VM/Halt/R0HaltHistoryCounter", i); + STAMR3RegisterF(pVM, &pVM->aCpus[i].vmm.s.cR0HaltsSucceeded, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "", "/PROF/CPU%u/VM/Halt/R0HaltHistorySucceeded", i); + STAMR3RegisterF(pVM, &pVM->aCpus[i].vmm.s.cR0HaltsToRing3, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "", "/PROF/CPU%u/VM/Halt/R0HaltHistoryToRing3", i); + } +} + + +/** + * Worker for VMMR3InitR0 that calls ring-0 to do EMT specific initialization. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context per CPU structure. + * @thread EMT(pVCpu) + */ +static DECLCALLBACK(int) vmmR3InitR0Emt(PVM pVM, PVMCPU pVCpu) +{ + return VMMR3CallR0Emt(pVM, pVCpu, VMMR0_DO_VMMR0_INIT_EMT, 0, NULL); +} + + +/** + * Initializes the R0 VMM. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) VMMR3InitR0(PVM pVM) +{ + int rc; + PVMCPU pVCpu = VMMGetCpu(pVM); + Assert(pVCpu && pVCpu->idCpu == 0); + +#ifdef LOG_ENABLED + /* + * Initialize the ring-0 logger if we haven't done so yet. + */ + if ( pVCpu->vmm.s.pR0LoggerR3 + && !pVCpu->vmm.s.pR0LoggerR3->fCreated) + { + rc = VMMR3UpdateLoggers(pVM); + if (RT_FAILURE(rc)) + return rc; + } +#endif + + /* + * Call Ring-0 entry with init code. + */ + for (;;) + { +#ifdef NO_SUPCALLR0VMM + //rc = VERR_GENERAL_FAILURE; + rc = VINF_SUCCESS; +#else + rc = SUPR3CallVMMR0Ex(pVM->pVMR0, 0 /*idCpu*/, VMMR0_DO_VMMR0_INIT, RT_MAKE_U64(VMMGetSvnRev(), vmmGetBuildType()), NULL); +#endif + /* + * Flush the logs. + */ +#ifdef LOG_ENABLED + VMM_FLUSH_R0_LOG(pVCpu->vmm.s.pR0LoggerR3, NULL); +#endif + VMM_FLUSH_R0_LOG(pVCpu->vmm.s.pR0RelLoggerR3, RTLogRelGetDefaultInstance()); + if (rc != VINF_VMM_CALL_HOST) + break; + rc = vmmR3ServiceCallRing3Request(pVM, pVCpu); + if (RT_FAILURE(rc) || (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST)) + break; + /* Resume R0 */ + } + + if (RT_FAILURE(rc) || (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST)) + { + LogRel(("VMM: R0 init failed, rc=%Rra\n", rc)); + if (RT_SUCCESS(rc)) + rc = VERR_IPE_UNEXPECTED_INFO_STATUS; + } + + /* Log whether thread-context hooks are used (on Linux this can depend on how the kernel is configured). */ + if (pVM->aCpus[0].vmm.s.hCtxHook != NIL_RTTHREADCTXHOOK) + LogRel(("VMM: Enabled thread-context hooks\n")); + else + LogRel(("VMM: Thread-context hooks unavailable\n")); + + /* Log RTThreadPreemptIsPendingTrusty() and RTThreadPreemptIsPossible() results. */ + if (pVM->vmm.s.fIsPreemptPendingApiTrusty) + LogRel(("VMM: RTThreadPreemptIsPending() can be trusted\n")); + else + LogRel(("VMM: Warning! RTThreadPreemptIsPending() cannot be trusted! Need to update kernel info?\n")); + if (pVM->vmm.s.fIsPreemptPossible) + LogRel(("VMM: Kernel preemption is possible\n")); + else + LogRel(("VMM: Kernel preemption is not possible it seems\n")); + + /* + * Send all EMTs to ring-0 to get their logger initialized. + */ + for (VMCPUID idCpu = 0; RT_SUCCESS(rc) && idCpu < pVM->cCpus; idCpu++) + rc = VMR3ReqCallWait(pVM, idCpu, (PFNRT)vmmR3InitR0Emt, 2, pVM, &pVM->aCpus[idCpu]); + + return rc; +} + + +#ifdef VBOX_WITH_RAW_MODE +/** + * Initializes the RC VMM. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) VMMR3InitRC(PVM pVM) +{ + PVMCPU pVCpu = VMMGetCpu(pVM); + Assert(pVCpu && pVCpu->idCpu == 0); + + /* In VMX mode, there's no need to init RC. */ + if (!VM_IS_RAW_MODE_ENABLED(pVM)) + return VINF_SUCCESS; + + AssertReturn(pVM->cCpus == 1, VERR_RAW_MODE_INVALID_SMP); + + /* + * Call VMMRCInit(): + * -# resolve the address. + * -# setup stackframe and EIP to use the trampoline. + * -# do a generic hypervisor call. + */ + RTRCPTR RCPtrEP; + int rc = PDMR3LdrGetSymbolRC(pVM, VMMRC_MAIN_MODULE_NAME, "VMMRCEntry", &RCPtrEP); + if (RT_SUCCESS(rc)) + { + CPUMSetHyperESP(pVCpu, pVCpu->vmm.s.pbEMTStackBottomRC); /* Clear the stack. */ + uint64_t u64TS = RTTimeProgramStartNanoTS(); + CPUMPushHyper(pVCpu, RT_HI_U32(u64TS)); /* Param 4: The program startup TS - Hi. */ + CPUMPushHyper(pVCpu, RT_LO_U32(u64TS)); /* Param 4: The program startup TS - Lo. */ + CPUMPushHyper(pVCpu, vmmGetBuildType()); /* Param 3: Version argument. */ + CPUMPushHyper(pVCpu, VMMGetSvnRev()); /* Param 2: Version argument. */ + CPUMPushHyper(pVCpu, VMMRC_DO_VMMRC_INIT); /* Param 1: Operation. */ + CPUMPushHyper(pVCpu, pVM->pVMRC); /* Param 0: pVM */ + CPUMPushHyper(pVCpu, 6 * sizeof(RTRCPTR)); /* trampoline param: stacksize. */ + CPUMPushHyper(pVCpu, RCPtrEP); /* Call EIP. */ + CPUMSetHyperEIP(pVCpu, pVM->vmm.s.pfnCallTrampolineRC); + Assert(CPUMGetHyperCR3(pVCpu) && CPUMGetHyperCR3(pVCpu) == PGMGetHyperCR3(pVCpu)); + + for (;;) + { +#ifdef NO_SUPCALLR0VMM + //rc = VERR_GENERAL_FAILURE; + rc = VINF_SUCCESS; +#else + rc = SUPR3CallVMMR0(pVM->pVMR0, 0 /* VCPU 0 */, VMMR0_DO_CALL_HYPERVISOR, NULL); +#endif +#ifdef LOG_ENABLED + PRTLOGGERRC pLogger = pVM->vmm.s.pRCLoggerR3; + if ( pLogger + && pLogger->offScratch > 0) + RTLogFlushRC(NULL, pLogger); +#endif +#ifdef VBOX_WITH_RC_RELEASE_LOGGING + PRTLOGGERRC pRelLogger = pVM->vmm.s.pRCRelLoggerR3; + if (RT_UNLIKELY(pRelLogger && pRelLogger->offScratch > 0)) + RTLogFlushRC(RTLogRelGetDefaultInstance(), pRelLogger); +#endif + if (rc != VINF_VMM_CALL_HOST) + break; + rc = vmmR3ServiceCallRing3Request(pVM, pVCpu); + if (RT_FAILURE(rc) || (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST)) + break; + } + + /* Don't trigger assertions or guru if raw-mode is unavailable. */ + if (rc != VERR_SUPDRV_NO_RAW_MODE_HYPER_V_ROOT) + { + if (RT_FAILURE(rc) || (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST)) + { + VMMR3FatalDump(pVM, pVCpu, rc); + if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST) + rc = VERR_IPE_UNEXPECTED_INFO_STATUS; + } + AssertRC(rc); + } + } + return rc; +} +#endif /* VBOX_WITH_RAW_MODE */ + + +/** + * Called when an init phase completes. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param enmWhat Which init phase. + */ +VMMR3_INT_DECL(int) VMMR3InitCompleted(PVM pVM, VMINITCOMPLETED enmWhat) +{ + int rc = VINF_SUCCESS; + + switch (enmWhat) + { + case VMINITCOMPLETED_RING3: + { + /* + * Set page attributes to r/w for stack pages. + */ + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + rc = PGMMapSetPage(pVM, pVM->aCpus[idCpu].vmm.s.pbEMTStackRC, VMM_STACK_SIZE, + X86_PTE_P | X86_PTE_A | X86_PTE_D | X86_PTE_RW); + AssertRCReturn(rc, rc); + } + + /* + * Create the EMT yield timer. + */ + rc = TMR3TimerCreateInternal(pVM, TMCLOCK_REAL, vmmR3YieldEMT, NULL, "EMT Yielder", &pVM->vmm.s.pYieldTimer); + AssertRCReturn(rc, rc); + + rc = TMTimerSetMillies(pVM->vmm.s.pYieldTimer, pVM->vmm.s.cYieldEveryMillies); + AssertRCReturn(rc, rc); + +#ifdef VBOX_WITH_NMI + /* + * Map the host APIC into GC - This is AMD/Intel + Host OS specific! + */ + rc = PGMMap(pVM, pVM->vmm.s.GCPtrApicBase, 0xfee00000, PAGE_SIZE, + X86_PTE_P | X86_PTE_RW | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_A | X86_PTE_D); + AssertRCReturn(rc, rc); +#endif + +#ifdef VBOX_STRICT_VMM_STACK + /* + * Setup the stack guard pages: Two inaccessible pages at each sides of the + * stack to catch over/under-flows. + */ + for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) + { + uint8_t *pbEMTStackR3 = pVM->aCpus[idCpu].vmm.s.pbEMTStackR3; + + memset(pbEMTStackR3 - PAGE_SIZE, 0xcc, PAGE_SIZE); + MMR3HyperSetGuard(pVM, pbEMTStackR3 - PAGE_SIZE, PAGE_SIZE, true /*fSet*/); + + memset(pbEMTStackR3 + VMM_STACK_SIZE, 0xcc, PAGE_SIZE); + MMR3HyperSetGuard(pVM, pbEMTStackR3 + VMM_STACK_SIZE, PAGE_SIZE, true /*fSet*/); + } + pVM->vmm.s.fStackGuardsStationed = true; +#endif + break; + } + + case VMINITCOMPLETED_HM: + { + /* + * Disable the periodic preemption timers if we can use the + * VMX-preemption timer instead. + */ + if ( pVM->vmm.s.fUsePeriodicPreemptionTimers + && HMR3IsVmxPreemptionTimerUsed(pVM)) + pVM->vmm.s.fUsePeriodicPreemptionTimers = false; + LogRel(("VMM: fUsePeriodicPreemptionTimers=%RTbool\n", pVM->vmm.s.fUsePeriodicPreemptionTimers)); + + /* + * Last chance for GIM to update its CPUID leaves if it requires + * knowledge/information from HM initialization. + */ + rc = GIMR3InitCompleted(pVM); + AssertRCReturn(rc, rc); + + /* + * CPUM's post-initialization (print CPUIDs). + */ + CPUMR3LogCpuIdAndMsrFeatures(pVM); + break; + } + + default: /* shuts up gcc */ + break; + } + + return rc; +} + + +/** + * Terminate the VMM bits. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) VMMR3Term(PVM pVM) +{ + PVMCPU pVCpu = VMMGetCpu(pVM); + Assert(pVCpu && pVCpu->idCpu == 0); + + /* + * Call Ring-0 entry with termination code. + */ + int rc; + for (;;) + { +#ifdef NO_SUPCALLR0VMM + //rc = VERR_GENERAL_FAILURE; + rc = VINF_SUCCESS; +#else + rc = SUPR3CallVMMR0Ex(pVM->pVMR0, 0 /*idCpu*/, VMMR0_DO_VMMR0_TERM, 0, NULL); +#endif + /* + * Flush the logs. + */ +#ifdef LOG_ENABLED + VMM_FLUSH_R0_LOG(pVCpu->vmm.s.pR0LoggerR3, NULL); +#endif + VMM_FLUSH_R0_LOG(pVCpu->vmm.s.pR0RelLoggerR3, RTLogRelGetDefaultInstance()); + if (rc != VINF_VMM_CALL_HOST) + break; + rc = vmmR3ServiceCallRing3Request(pVM, pVCpu); + if (RT_FAILURE(rc) || (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST)) + break; + /* Resume R0 */ + } + if (RT_FAILURE(rc) || (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST)) + { + LogRel(("VMM: VMMR3Term: R0 term failed, rc=%Rra. (warning)\n", rc)); + if (RT_SUCCESS(rc)) + rc = VERR_IPE_UNEXPECTED_INFO_STATUS; + } + + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + RTSemEventDestroy(pVM->vmm.s.pahEvtRendezvousEnterOrdered[i]); + pVM->vmm.s.pahEvtRendezvousEnterOrdered[i] = NIL_RTSEMEVENT; + } + RTSemEventDestroy(pVM->vmm.s.hEvtRendezvousEnterOneByOne); + pVM->vmm.s.hEvtRendezvousEnterOneByOne = NIL_RTSEMEVENT; + RTSemEventMultiDestroy(pVM->vmm.s.hEvtMulRendezvousEnterAllAtOnce); + pVM->vmm.s.hEvtMulRendezvousEnterAllAtOnce = NIL_RTSEMEVENTMULTI; + RTSemEventMultiDestroy(pVM->vmm.s.hEvtMulRendezvousDone); + pVM->vmm.s.hEvtMulRendezvousDone = NIL_RTSEMEVENTMULTI; + RTSemEventDestroy(pVM->vmm.s.hEvtRendezvousDoneCaller); + pVM->vmm.s.hEvtRendezvousDoneCaller = NIL_RTSEMEVENT; + RTSemEventMultiDestroy(pVM->vmm.s.hEvtMulRendezvousRecursionPush); + pVM->vmm.s.hEvtMulRendezvousRecursionPush = NIL_RTSEMEVENTMULTI; + RTSemEventMultiDestroy(pVM->vmm.s.hEvtMulRendezvousRecursionPop); + pVM->vmm.s.hEvtMulRendezvousRecursionPop = NIL_RTSEMEVENTMULTI; + RTSemEventDestroy(pVM->vmm.s.hEvtRendezvousRecursionPushCaller); + pVM->vmm.s.hEvtRendezvousRecursionPushCaller = NIL_RTSEMEVENT; + RTSemEventDestroy(pVM->vmm.s.hEvtRendezvousRecursionPopCaller); + pVM->vmm.s.hEvtRendezvousRecursionPopCaller = NIL_RTSEMEVENT; + +#ifdef VBOX_STRICT_VMM_STACK + /* + * Make the two stack guard pages present again. + */ + if (pVM->vmm.s.fStackGuardsStationed) + { + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + uint8_t *pbEMTStackR3 = pVM->aCpus[i].vmm.s.pbEMTStackR3; + MMR3HyperSetGuard(pVM, pbEMTStackR3 - PAGE_SIZE, PAGE_SIZE, false /*fSet*/); + MMR3HyperSetGuard(pVM, pbEMTStackR3 + VMM_STACK_SIZE, PAGE_SIZE, false /*fSet*/); + } + pVM->vmm.s.fStackGuardsStationed = false; + } +#endif + + vmmTermFormatTypes(); + return rc; +} + + +/** + * Applies relocations to data and code managed by this + * component. This function will be called at init and + * whenever the VMM need to relocate it self inside the GC. + * + * The VMM will need to apply relocations to the core code. + * + * @param pVM The cross context VM structure. + * @param offDelta The relocation delta. + */ +VMMR3_INT_DECL(void) VMMR3Relocate(PVM pVM, RTGCINTPTR offDelta) +{ + LogFlow(("VMMR3Relocate: offDelta=%RGv\n", offDelta)); + + /* + * Recalc the RC address. + */ +#ifdef VBOX_WITH_RAW_MODE + pVM->vmm.s.pvCoreCodeRC = MMHyperR3ToRC(pVM, pVM->vmm.s.pvCoreCodeR3); +#endif + + /* + * The stack. + */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + + CPUMSetHyperESP(pVCpu, CPUMGetHyperESP(pVCpu) + offDelta); + + pVCpu->vmm.s.pbEMTStackRC = MMHyperR3ToRC(pVM, pVCpu->vmm.s.pbEMTStackR3); + pVCpu->vmm.s.pbEMTStackBottomRC = pVCpu->vmm.s.pbEMTStackRC + VMM_STACK_SIZE; + } + + /* + * All the switchers. + */ + vmmR3SwitcherRelocate(pVM, offDelta); + + /* + * Get other RC entry points. + */ + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + int rc = PDMR3LdrGetSymbolRC(pVM, VMMRC_MAIN_MODULE_NAME, "CPUMGCResumeGuest", &pVM->vmm.s.pfnCPUMRCResumeGuest); + AssertReleaseMsgRC(rc, ("CPUMGCResumeGuest not found! rc=%Rra\n", rc)); + + rc = PDMR3LdrGetSymbolRC(pVM, VMMRC_MAIN_MODULE_NAME, "CPUMGCResumeGuestV86", &pVM->vmm.s.pfnCPUMRCResumeGuestV86); + AssertReleaseMsgRC(rc, ("CPUMGCResumeGuestV86 not found! rc=%Rra\n", rc)); + } + + /* + * Update the logger. + */ + VMMR3UpdateLoggers(pVM); +} + + +/** + * Updates the settings for the RC and R0 loggers. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(int) VMMR3UpdateLoggers(PVM pVM) +{ + /* + * Simply clone the logger instance (for RC). + */ + int rc = VINF_SUCCESS; + RTRCPTR RCPtrLoggerFlush = 0; + + if ( pVM->vmm.s.pRCLoggerR3 +#ifdef VBOX_WITH_RC_RELEASE_LOGGING + || pVM->vmm.s.pRCRelLoggerR3 +#endif + ) + { + Assert(VM_IS_RAW_MODE_ENABLED(pVM)); + rc = PDMR3LdrGetSymbolRC(pVM, VMMRC_MAIN_MODULE_NAME, "vmmGCLoggerFlush", &RCPtrLoggerFlush); + AssertReleaseMsgRC(rc, ("vmmGCLoggerFlush not found! rc=%Rra\n", rc)); + } + + if (pVM->vmm.s.pRCLoggerR3) + { + Assert(VM_IS_RAW_MODE_ENABLED(pVM)); + RTRCPTR RCPtrLoggerWrapper = 0; + rc = PDMR3LdrGetSymbolRC(pVM, VMMRC_MAIN_MODULE_NAME, "vmmGCLoggerWrapper", &RCPtrLoggerWrapper); + AssertReleaseMsgRC(rc, ("vmmGCLoggerWrapper not found! rc=%Rra\n", rc)); + + pVM->vmm.s.pRCLoggerRC = MMHyperR3ToRC(pVM, pVM->vmm.s.pRCLoggerR3); + rc = RTLogCloneRC(NULL /* default */, pVM->vmm.s.pRCLoggerR3, pVM->vmm.s.cbRCLogger, + RCPtrLoggerWrapper, RCPtrLoggerFlush, RTLOGFLAGS_BUFFERED); + AssertReleaseMsgRC(rc, ("RTLogCloneRC failed! rc=%Rra\n", rc)); + } + +#ifdef VBOX_WITH_RC_RELEASE_LOGGING + if (pVM->vmm.s.pRCRelLoggerR3) + { + Assert(VM_IS_RAW_MODE_ENABLED(pVM)); + RTRCPTR RCPtrLoggerWrapper = 0; + rc = PDMR3LdrGetSymbolRC(pVM, VMMRC_MAIN_MODULE_NAME, "vmmGCRelLoggerWrapper", &RCPtrLoggerWrapper); + AssertReleaseMsgRC(rc, ("vmmGCRelLoggerWrapper not found! rc=%Rra\n", rc)); + + pVM->vmm.s.pRCRelLoggerRC = MMHyperR3ToRC(pVM, pVM->vmm.s.pRCRelLoggerR3); + rc = RTLogCloneRC(RTLogRelGetDefaultInstance(), pVM->vmm.s.pRCRelLoggerR3, pVM->vmm.s.cbRCRelLogger, + RCPtrLoggerWrapper, RCPtrLoggerFlush, RTLOGFLAGS_BUFFERED); + AssertReleaseMsgRC(rc, ("RTLogCloneRC failed! rc=%Rra\n", rc)); + } +#endif /* VBOX_WITH_RC_RELEASE_LOGGING */ + +#ifdef LOG_ENABLED + /* + * For the ring-0 EMT logger, we use a per-thread logger instance + * in ring-0. Only initialize it once. + */ + PRTLOGGER const pDefault = RTLogDefaultInstance(); + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + PVMMR0LOGGER pR0LoggerR3 = pVCpu->vmm.s.pR0LoggerR3; + if (pR0LoggerR3) + { + if (!pR0LoggerR3->fCreated) + { + RTR0PTR pfnLoggerWrapper = NIL_RTR0PTR; + rc = PDMR3LdrGetSymbolR0(pVM, VMMR0_MAIN_MODULE_NAME, "vmmR0LoggerWrapper", &pfnLoggerWrapper); + AssertReleaseMsgRCReturn(rc, ("vmmR0LoggerWrapper not found! rc=%Rra\n", rc), rc); + + RTR0PTR pfnLoggerFlush = NIL_RTR0PTR; + rc = PDMR3LdrGetSymbolR0(pVM, VMMR0_MAIN_MODULE_NAME, "vmmR0LoggerFlush", &pfnLoggerFlush); + AssertReleaseMsgRCReturn(rc, ("vmmR0LoggerFlush not found! rc=%Rra\n", rc), rc); + + char szR0ThreadName[16]; + RTStrPrintf(szR0ThreadName, sizeof(szR0ThreadName), "EMT-%u-R0", i); + rc = RTLogCreateForR0(&pR0LoggerR3->Logger, pR0LoggerR3->cbLogger, + pVCpu->vmm.s.pR0LoggerR0 + RT_UOFFSETOF(VMMR0LOGGER, Logger), + pfnLoggerWrapper, pfnLoggerFlush, + RTLOGFLAGS_BUFFERED, RTLOGDEST_DUMMY, szR0ThreadName); + AssertReleaseMsgRCReturn(rc, ("RTLogCreateForR0 failed! rc=%Rra\n", rc), rc); + + pR0LoggerR3->idCpu = i; + pR0LoggerR3->fCreated = true; + pR0LoggerR3->fFlushingDisabled = false; + } + + rc = RTLogCopyGroupsAndFlagsForR0(&pR0LoggerR3->Logger, pVCpu->vmm.s.pR0LoggerR0 + RT_UOFFSETOF(VMMR0LOGGER, Logger), + pDefault, RTLOGFLAGS_BUFFERED, UINT32_MAX); + AssertRC(rc); + } + } +#endif + return rc; +} + + +/** + * Gets the pointer to a buffer containing the R0/RC RTAssertMsg1Weak output. + * + * @returns Pointer to the buffer. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(const char *) VMMR3GetRZAssertMsg1(PVM pVM) +{ + if (!VM_IS_RAW_MODE_ENABLED(pVM)) + return pVM->vmm.s.szRing0AssertMsg1; + + RTRCPTR RCPtr; + int rc = PDMR3LdrGetSymbolRC(pVM, NULL, "g_szRTAssertMsg1", &RCPtr); + if (RT_SUCCESS(rc)) + return (const char *)MMHyperRCToR3(pVM, RCPtr); + + return NULL; +} + + +/** + * Returns the VMCPU of the specified virtual CPU. + * + * @returns The VMCPU pointer. NULL if @a idCpu or @a pUVM is invalid. + * + * @param pUVM The user mode VM handle. + * @param idCpu The ID of the virtual CPU. + */ +VMMR3DECL(PVMCPU) VMMR3GetCpuByIdU(PUVM pUVM, RTCPUID idCpu) +{ + UVM_ASSERT_VALID_EXT_RETURN(pUVM, NULL); + AssertReturn(idCpu < pUVM->cCpus, NULL); + VM_ASSERT_VALID_EXT_RETURN(pUVM->pVM, NULL); + return &pUVM->pVM->aCpus[idCpu]; +} + + +/** + * Gets the pointer to a buffer containing the R0/RC RTAssertMsg2Weak output. + * + * @returns Pointer to the buffer. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(const char *) VMMR3GetRZAssertMsg2(PVM pVM) +{ + if (!VM_IS_RAW_MODE_ENABLED(pVM)) + return pVM->vmm.s.szRing0AssertMsg2; + + RTRCPTR RCPtr; + int rc = PDMR3LdrGetSymbolRC(pVM, NULL, "g_szRTAssertMsg2", &RCPtr); + if (RT_SUCCESS(rc)) + return (const char *)MMHyperRCToR3(pVM, RCPtr); + + return NULL; +} + + +/** + * Execute state save operation. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM SSM operation handle. + */ +static DECLCALLBACK(int) vmmR3Save(PVM pVM, PSSMHANDLE pSSM) +{ + LogFlow(("vmmR3Save:\n")); + + /* + * Save the started/stopped state of all CPUs except 0 as it will always + * be running. This avoids breaking the saved state version. :-) + */ + for (VMCPUID i = 1; i < pVM->cCpus; i++) + SSMR3PutBool(pSSM, VMCPUSTATE_IS_STARTED(VMCPU_GET_STATE(&pVM->aCpus[i]))); + + return SSMR3PutU32(pSSM, UINT32_MAX); /* terminator */ +} + + +/** + * Execute state load operation. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pSSM SSM operation handle. + * @param uVersion Data layout version. + * @param uPass The data pass. + */ +static DECLCALLBACK(int) vmmR3Load(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + LogFlow(("vmmR3Load:\n")); + Assert(uPass == SSM_PASS_FINAL); NOREF(uPass); + + /* + * Validate version. + */ + if ( uVersion != VMM_SAVED_STATE_VERSION + && uVersion != VMM_SAVED_STATE_VERSION_3_0) + { + AssertMsgFailed(("vmmR3Load: Invalid version uVersion=%u!\n", uVersion)); + return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION; + } + + if (uVersion <= VMM_SAVED_STATE_VERSION_3_0) + { + /* Ignore the stack bottom, stack pointer and stack bits. */ + RTRCPTR RCPtrIgnored; + SSMR3GetRCPtr(pSSM, &RCPtrIgnored); + SSMR3GetRCPtr(pSSM, &RCPtrIgnored); +#ifdef RT_OS_DARWIN + if ( SSMR3HandleVersion(pSSM) >= VBOX_FULL_VERSION_MAKE(3,0,0) + && SSMR3HandleVersion(pSSM) < VBOX_FULL_VERSION_MAKE(3,1,0) + && SSMR3HandleRevision(pSSM) >= 48858 + && ( !strcmp(SSMR3HandleHostOSAndArch(pSSM), "darwin.x86") + || !strcmp(SSMR3HandleHostOSAndArch(pSSM), "") ) + ) + SSMR3Skip(pSSM, 16384); + else + SSMR3Skip(pSSM, 8192); +#else + SSMR3Skip(pSSM, 8192); +#endif + } + + /* + * Restore the VMCPU states. VCPU 0 is always started. + */ + VMCPU_SET_STATE(&pVM->aCpus[0], VMCPUSTATE_STARTED); + for (VMCPUID i = 1; i < pVM->cCpus; i++) + { + bool fStarted; + int rc = SSMR3GetBool(pSSM, &fStarted); + if (RT_FAILURE(rc)) + return rc; + VMCPU_SET_STATE(&pVM->aCpus[i], fStarted ? VMCPUSTATE_STARTED : VMCPUSTATE_STOPPED); + } + + /* terminator */ + uint32_t u32; + int rc = SSMR3GetU32(pSSM, &u32); + if (RT_FAILURE(rc)) + return rc; + if (u32 != UINT32_MAX) + { + AssertMsgFailed(("u32=%#x\n", u32)); + return VERR_SSM_DATA_UNIT_FORMAT_CHANGED; + } + return VINF_SUCCESS; +} + + +#ifdef VBOX_WITH_RAW_MODE +/** + * Resolve a builtin RC symbol. + * + * Called by PDM when loading or relocating RC modules. + * + * @returns VBox status + * @param pVM The cross context VM structure. + * @param pszSymbol Symbol to resolve. + * @param pRCPtrValue Where to store the symbol value. + * + * @remark This has to work before VMMR3Relocate() is called. + */ +VMMR3_INT_DECL(int) VMMR3GetImportRC(PVM pVM, const char *pszSymbol, PRTRCPTR pRCPtrValue) +{ + if (!strcmp(pszSymbol, "g_Logger")) + { + if (pVM->vmm.s.pRCLoggerR3) + pVM->vmm.s.pRCLoggerRC = MMHyperR3ToRC(pVM, pVM->vmm.s.pRCLoggerR3); + *pRCPtrValue = pVM->vmm.s.pRCLoggerRC; + } + else if (!strcmp(pszSymbol, "g_RelLogger")) + { +# ifdef VBOX_WITH_RC_RELEASE_LOGGING + if (pVM->vmm.s.pRCRelLoggerR3) + pVM->vmm.s.pRCRelLoggerRC = MMHyperR3ToRC(pVM, pVM->vmm.s.pRCRelLoggerR3); + *pRCPtrValue = pVM->vmm.s.pRCRelLoggerRC; +# else + *pRCPtrValue = NIL_RTRCPTR; +# endif + } + else + return VERR_SYMBOL_NOT_FOUND; + return VINF_SUCCESS; +} +#endif /* VBOX_WITH_RAW_MODE */ + + +/** + * Suspends the CPU yielder. + * + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(void) VMMR3YieldSuspend(PVM pVM) +{ + VMCPU_ASSERT_EMT(&pVM->aCpus[0]); + if (!pVM->vmm.s.cYieldResumeMillies) + { + uint64_t u64Now = TMTimerGet(pVM->vmm.s.pYieldTimer); + uint64_t u64Expire = TMTimerGetExpire(pVM->vmm.s.pYieldTimer); + if (u64Now >= u64Expire || u64Expire == ~(uint64_t)0) + pVM->vmm.s.cYieldResumeMillies = pVM->vmm.s.cYieldEveryMillies; + else + pVM->vmm.s.cYieldResumeMillies = TMTimerToMilli(pVM->vmm.s.pYieldTimer, u64Expire - u64Now); + TMTimerStop(pVM->vmm.s.pYieldTimer); + } + pVM->vmm.s.u64LastYield = RTTimeNanoTS(); +} + + +/** + * Stops the CPU yielder. + * + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(void) VMMR3YieldStop(PVM pVM) +{ + if (!pVM->vmm.s.cYieldResumeMillies) + TMTimerStop(pVM->vmm.s.pYieldTimer); + pVM->vmm.s.cYieldResumeMillies = pVM->vmm.s.cYieldEveryMillies; + pVM->vmm.s.u64LastYield = RTTimeNanoTS(); +} + + +/** + * Resumes the CPU yielder when it has been a suspended or stopped. + * + * @param pVM The cross context VM structure. + */ +VMMR3_INT_DECL(void) VMMR3YieldResume(PVM pVM) +{ + if (pVM->vmm.s.cYieldResumeMillies) + { + TMTimerSetMillies(pVM->vmm.s.pYieldTimer, pVM->vmm.s.cYieldResumeMillies); + pVM->vmm.s.cYieldResumeMillies = 0; + } +} + + +/** + * Internal timer callback function. + * + * @param pVM The cross context VM structure. + * @param pTimer The timer handle. + * @param pvUser User argument specified upon timer creation. + */ +static DECLCALLBACK(void) vmmR3YieldEMT(PVM pVM, PTMTIMER pTimer, void *pvUser) +{ + NOREF(pvUser); + + /* + * This really needs some careful tuning. While we shouldn't be too greedy since + * that'll cause the rest of the system to stop up, we shouldn't be too nice either + * because that'll cause us to stop up. + * + * The current logic is to use the default interval when there is no lag worth + * mentioning, but when we start accumulating lag we don't bother yielding at all. + * + * (This depends on the TMCLOCK_VIRTUAL_SYNC to be scheduled before TMCLOCK_REAL + * so the lag is up to date.) + */ + const uint64_t u64Lag = TMVirtualSyncGetLag(pVM); + if ( u64Lag < 50000000 /* 50ms */ + || ( u64Lag < 1000000000 /* 1s */ + && RTTimeNanoTS() - pVM->vmm.s.u64LastYield < 500000000 /* 500 ms */) + ) + { + uint64_t u64Elapsed = RTTimeNanoTS(); + pVM->vmm.s.u64LastYield = u64Elapsed; + + RTThreadYield(); + +#ifdef LOG_ENABLED + u64Elapsed = RTTimeNanoTS() - u64Elapsed; + Log(("vmmR3YieldEMT: %RI64 ns\n", u64Elapsed)); +#endif + } + TMTimerSetMillies(pTimer, pVM->vmm.s.cYieldEveryMillies); +} + + +#ifdef VBOX_WITH_RAW_MODE +/** + * Executes guest code in the raw-mode context. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +VMMR3_INT_DECL(int) VMMR3RawRunGC(PVM pVM, PVMCPU pVCpu) +{ + Log2(("VMMR3RawRunGC: (cs:eip=%04x:%08x)\n", CPUMGetGuestCS(pVCpu), CPUMGetGuestEIP(pVCpu))); + + AssertReturn(pVM->cCpus == 1, VERR_RAW_MODE_INVALID_SMP); + + /* + * Set the hypervisor to resume executing a CPUM resume function + * in CPUMRCA.asm. + */ + CPUMSetHyperState(pVCpu, + CPUMGetGuestEFlags(pVCpu) & X86_EFL_VM + ? pVM->vmm.s.pfnCPUMRCResumeGuestV86 + : pVM->vmm.s.pfnCPUMRCResumeGuest, /* eip */ + pVCpu->vmm.s.pbEMTStackBottomRC, /* esp */ + 0, /* eax */ + VM_RC_ADDR(pVM, &pVCpu->cpum) /* edx */); + + /* + * We hide log flushes (outer) and hypervisor interrupts (inner). + */ + for (;;) + { +#ifdef VBOX_STRICT + if (RT_UNLIKELY(!CPUMGetHyperCR3(pVCpu) || CPUMGetHyperCR3(pVCpu) != PGMGetHyperCR3(pVCpu))) + EMR3FatalError(pVCpu, VERR_VMM_HYPER_CR3_MISMATCH); + PGMMapCheck(pVM); +# ifdef VBOX_WITH_SAFE_STR + SELMR3CheckShadowTR(pVM); +# endif +#endif + int rc; + do + { +#ifdef NO_SUPCALLR0VMM + rc = VERR_GENERAL_FAILURE; +#else + rc = SUPR3CallVMMR0Fast(pVM->pVMR0, VMMR0_DO_RAW_RUN, 0); + if (RT_LIKELY(rc == VINF_SUCCESS)) + rc = pVCpu->vmm.s.iLastGZRc; +#endif + } while (rc == VINF_EM_RAW_INTERRUPT_HYPER); + + /* + * Flush the logs. + */ +#ifdef LOG_ENABLED + PRTLOGGERRC pLogger = pVM->vmm.s.pRCLoggerR3; + if ( pLogger + && pLogger->offScratch > 0) + RTLogFlushRC(NULL, pLogger); +#endif +#ifdef VBOX_WITH_RC_RELEASE_LOGGING + PRTLOGGERRC pRelLogger = pVM->vmm.s.pRCRelLoggerR3; + if (RT_UNLIKELY(pRelLogger && pRelLogger->offScratch > 0)) + RTLogFlushRC(RTLogRelGetDefaultInstance(), pRelLogger); +#endif + if (rc != VINF_VMM_CALL_HOST) + { + Log2(("VMMR3RawRunGC: returns %Rrc (cs:eip=%04x:%08x)\n", rc, CPUMGetGuestCS(pVCpu), CPUMGetGuestEIP(pVCpu))); + return rc; + } + rc = vmmR3ServiceCallRing3Request(pVM, pVCpu); + if (RT_FAILURE(rc)) + return rc; + /* Resume GC */ + } +} +#endif /* VBOX_WITH_RAW_MODE */ + + +/** + * Executes guest code (Intel VT-x and AMD-V). + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +VMMR3_INT_DECL(int) VMMR3HmRunGC(PVM pVM, PVMCPU pVCpu) +{ + Log2(("VMMR3HmRunGC: (cs:rip=%04x:%RX64)\n", CPUMGetGuestCS(pVCpu), CPUMGetGuestRIP(pVCpu))); + + for (;;) + { + int rc; + do + { +#ifdef NO_SUPCALLR0VMM + rc = VERR_GENERAL_FAILURE; +#else + rc = SUPR3CallVMMR0Fast(pVM->pVMR0, VMMR0_DO_HM_RUN, pVCpu->idCpu); + if (RT_LIKELY(rc == VINF_SUCCESS)) + rc = pVCpu->vmm.s.iLastGZRc; +#endif + } while (rc == VINF_EM_RAW_INTERRUPT_HYPER); + +#if 0 /** @todo triggers too often */ + Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TO_R3)); +#endif + + /* + * Flush the logs + */ +#ifdef LOG_ENABLED + VMM_FLUSH_R0_LOG(pVCpu->vmm.s.pR0LoggerR3, NULL); +#endif + VMM_FLUSH_R0_LOG(pVCpu->vmm.s.pR0RelLoggerR3, RTLogRelGetDefaultInstance()); + if (rc != VINF_VMM_CALL_HOST) + { + Log2(("VMMR3HmRunGC: returns %Rrc (cs:rip=%04x:%RX64)\n", rc, CPUMGetGuestCS(pVCpu), CPUMGetGuestRIP(pVCpu))); + return rc; + } + rc = vmmR3ServiceCallRing3Request(pVM, pVCpu); + if (RT_FAILURE(rc)) + return rc; + /* Resume R0 */ + } +} + + +/** + * Perform one of the fast I/O control VMMR0 operation. + * + * @returns VBox strict status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param enmOperation The operation to perform. + */ +VMMR3_INT_DECL(VBOXSTRICTRC) VMMR3CallR0EmtFast(PVM pVM, PVMCPU pVCpu, VMMR0OPERATION enmOperation) +{ + for (;;) + { + VBOXSTRICTRC rcStrict; + do + { +#ifdef NO_SUPCALLR0VMM + rcStrict = VERR_GENERAL_FAILURE; +#else + rcStrict = SUPR3CallVMMR0Fast(pVM->pVMR0, enmOperation, pVCpu->idCpu); + if (RT_LIKELY(rcStrict == VINF_SUCCESS)) + rcStrict = pVCpu->vmm.s.iLastGZRc; +#endif + } while (rcStrict == VINF_EM_RAW_INTERRUPT_HYPER); + + /* + * Flush the logs + */ +#ifdef LOG_ENABLED + VMM_FLUSH_R0_LOG(pVCpu->vmm.s.pR0LoggerR3, NULL); +#endif + VMM_FLUSH_R0_LOG(pVCpu->vmm.s.pR0RelLoggerR3, RTLogRelGetDefaultInstance()); + if (rcStrict != VINF_VMM_CALL_HOST) + return rcStrict; + int rc = vmmR3ServiceCallRing3Request(pVM, pVCpu); + if (RT_FAILURE(rc)) + return rc; + /* Resume R0 */ + } +} + + +/** + * VCPU worker for VMMR3SendStartupIpi. + * + * @param pVM The cross context VM structure. + * @param idCpu Virtual CPU to perform SIPI on. + * @param uVector The SIPI vector. + */ +static DECLCALLBACK(int) vmmR3SendStarupIpi(PVM pVM, VMCPUID idCpu, uint32_t uVector) +{ + PVMCPU pVCpu = VMMGetCpuById(pVM, idCpu); + VMCPU_ASSERT_EMT(pVCpu); + + /* + * In the INIT state, the target CPU is only responsive to an SIPI. + * This is also true for when when the CPU is in VMX non-root mode. + * + * See AMD spec. 16.5 "Interprocessor Interrupts (IPI)". + * See Intel spec. 26.6.2 "Activity State". + */ + if (EMGetState(pVCpu) != EMSTATE_WAIT_SIPI) + return VINF_SUCCESS; + + PCPUMCTX pCtx = CPUMQueryGuestCtxPtr(pVCpu); +#ifdef VBOX_WITH_NESTED_HWVIRT_VMX + if (CPUMIsGuestInVmxRootMode(pCtx)) + { + /* If the CPU is in VMX non-root mode we must cause a VM-exit. */ + if (CPUMIsGuestInVmxNonRootMode(pCtx)) + return VBOXSTRICTRC_TODO(IEMExecVmxVmexitStartupIpi(pVCpu, uVector)); + + /* If the CPU is in VMX root mode (and not in VMX non-root mode) SIPIs are blocked. */ + return VINF_SUCCESS; + } +#endif + + pCtx->cs.Sel = uVector << 8; + pCtx->cs.ValidSel = uVector << 8; + pCtx->cs.fFlags = CPUMSELREG_FLAGS_VALID; + pCtx->cs.u64Base = uVector << 12; + pCtx->cs.u32Limit = UINT32_C(0x0000ffff); + pCtx->rip = 0; + + Log(("vmmR3SendSipi for VCPU %d with vector %x\n", idCpu, uVector)); + +# if 1 /* If we keep the EMSTATE_WAIT_SIPI method, then move this to EM.cpp. */ + EMSetState(pVCpu, EMSTATE_HALTED); + return VINF_EM_RESCHEDULE; +# else /* And if we go the VMCPU::enmState way it can stay here. */ + VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STOPPED); + VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED); + return VINF_SUCCESS; +# endif +} + + +/** + * VCPU worker for VMMR3SendInitIpi. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param idCpu Virtual CPU to perform SIPI on. + */ +static DECLCALLBACK(int) vmmR3SendInitIpi(PVM pVM, VMCPUID idCpu) +{ + PVMCPU pVCpu = VMMGetCpuById(pVM, idCpu); + VMCPU_ASSERT_EMT(pVCpu); + + Log(("vmmR3SendInitIpi for VCPU %d\n", idCpu)); + + /** @todo r=ramshankar: We should probably block INIT signal when the CPU is in + * wait-for-SIPI state. Verify. */ + + /* If the CPU is in VMX non-root mode, INIT signals cause VM-exits. */ +#ifdef VBOX_WITH_NESTED_HWVIRT_VMX + PCCPUMCTX pCtx = CPUMQueryGuestCtxPtr(pVCpu); + if (CPUMIsGuestInVmxNonRootMode(pCtx)) + return VBOXSTRICTRC_TODO(IEMExecVmxVmexitInitIpi(pVCpu)); +#endif + + /** @todo Figure out how to handle a SVM nested-guest intercepts here for INIT + * IPI (e.g. SVM_EXIT_INIT). */ + + PGMR3ResetCpu(pVM, pVCpu); + PDMR3ResetCpu(pVCpu); /* Only clears pending interrupts force flags */ + APICR3InitIpi(pVCpu); + TRPMR3ResetCpu(pVCpu); + CPUMR3ResetCpu(pVM, pVCpu); + EMR3ResetCpu(pVCpu); + HMR3ResetCpu(pVCpu); + NEMR3ResetCpu(pVCpu, true /*fInitIpi*/); + + /* This will trickle up on the target EMT. */ + return VINF_EM_WAIT_SIPI; +} + + +/** + * Sends a Startup IPI to the virtual CPU by setting CS:EIP into + * vector-dependent state and unhalting processor. + * + * @param pVM The cross context VM structure. + * @param idCpu Virtual CPU to perform SIPI on. + * @param uVector SIPI vector. + */ +VMMR3_INT_DECL(void) VMMR3SendStartupIpi(PVM pVM, VMCPUID idCpu, uint32_t uVector) +{ + AssertReturnVoid(idCpu < pVM->cCpus); + + int rc = VMR3ReqCallNoWait(pVM, idCpu, (PFNRT)vmmR3SendStarupIpi, 3, pVM, idCpu, uVector); + AssertRC(rc); +} + + +/** + * Sends init IPI to the virtual CPU. + * + * @param pVM The cross context VM structure. + * @param idCpu Virtual CPU to perform int IPI on. + */ +VMMR3_INT_DECL(void) VMMR3SendInitIpi(PVM pVM, VMCPUID idCpu) +{ + AssertReturnVoid(idCpu < pVM->cCpus); + + int rc = VMR3ReqCallNoWait(pVM, idCpu, (PFNRT)vmmR3SendInitIpi, 2, pVM, idCpu); + AssertRC(rc); +} + + +/** + * Registers the guest memory range that can be used for patching. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatchMem Patch memory range. + * @param cbPatchMem Size of the memory range. + */ +VMMR3DECL(int) VMMR3RegisterPatchMemory(PVM pVM, RTGCPTR pPatchMem, unsigned cbPatchMem) +{ + VM_ASSERT_EMT(pVM); + if (HMIsEnabled(pVM)) + return HMR3EnablePatching(pVM, pPatchMem, cbPatchMem); + + return VERR_NOT_SUPPORTED; +} + + +/** + * Deregisters the guest memory range that can be used for patching. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pPatchMem Patch memory range. + * @param cbPatchMem Size of the memory range. + */ +VMMR3DECL(int) VMMR3DeregisterPatchMemory(PVM pVM, RTGCPTR pPatchMem, unsigned cbPatchMem) +{ + if (HMIsEnabled(pVM)) + return HMR3DisablePatching(pVM, pPatchMem, cbPatchMem); + + return VINF_SUCCESS; +} + + +/** + * Common recursion handler for the other EMTs. + * + * @returns Strict VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * @param rcStrict Current status code to be combined with the one + * from this recursion and returned. + */ +static VBOXSTRICTRC vmmR3EmtRendezvousCommonRecursion(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rcStrict) +{ + int rc2; + + /* + * We wait here while the initiator of this recursion reconfigures + * everything. The last EMT to get in signals the initiator. + */ + if (ASMAtomicIncU32(&pVM->vmm.s.cRendezvousEmtsRecursingPush) == pVM->cCpus) + { + rc2 = RTSemEventSignal(pVM->vmm.s.hEvtRendezvousRecursionPushCaller); + AssertLogRelRC(rc2); + } + + rc2 = RTSemEventMultiWait(pVM->vmm.s.hEvtMulRendezvousRecursionPush, RT_INDEFINITE_WAIT); + AssertLogRelRC(rc2); + + /* + * Do the normal rendezvous processing. + */ + VBOXSTRICTRC rcStrict2 = vmmR3EmtRendezvousCommon(pVM, pVCpu, false /* fIsCaller */, pVM->vmm.s.fRendezvousFlags, + pVM->vmm.s.pfnRendezvous, pVM->vmm.s.pvRendezvousUser); + + /* + * Wait for the initiator to restore everything. + */ + rc2 = RTSemEventMultiWait(pVM->vmm.s.hEvtMulRendezvousRecursionPop, RT_INDEFINITE_WAIT); + AssertLogRelRC(rc2); + + /* + * Last thread out of here signals the initiator. + */ + if (ASMAtomicIncU32(&pVM->vmm.s.cRendezvousEmtsRecursingPop) == pVM->cCpus) + { + rc2 = RTSemEventSignal(pVM->vmm.s.hEvtRendezvousRecursionPopCaller); + AssertLogRelRC(rc2); + } + + /* + * Merge status codes and return. + */ + AssertRC(VBOXSTRICTRC_VAL(rcStrict2)); + if ( rcStrict2 != VINF_SUCCESS + && ( rcStrict == VINF_SUCCESS + || rcStrict > rcStrict2)) + rcStrict = rcStrict2; + return rcStrict; +} + + +/** + * Count returns and have the last non-caller EMT wake up the caller. + * + * @returns VBox strict informational status code for EM scheduling. No failures + * will be returned here, those are for the caller only. + * + * @param pVM The cross context VM structure. + * @param rcStrict The current accumulated recursive status code, + * to be merged with i32RendezvousStatus and + * returned. + */ +DECL_FORCE_INLINE(VBOXSTRICTRC) vmmR3EmtRendezvousNonCallerReturn(PVM pVM, VBOXSTRICTRC rcStrict) +{ + VBOXSTRICTRC rcStrict2 = ASMAtomicReadS32(&pVM->vmm.s.i32RendezvousStatus); + + uint32_t cReturned = ASMAtomicIncU32(&pVM->vmm.s.cRendezvousEmtsReturned); + if (cReturned == pVM->cCpus - 1U) + { + int rc = RTSemEventSignal(pVM->vmm.s.hEvtRendezvousDoneCaller); + AssertLogRelRC(rc); + } + + /* + * Merge the status codes, ignoring error statuses in this code path. + */ + AssertLogRelMsgReturn( rcStrict2 <= VINF_SUCCESS + || (rcStrict2 >= VINF_EM_FIRST && rcStrict2 <= VINF_EM_LAST), + ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict2)), + VERR_IPE_UNEXPECTED_INFO_STATUS); + + if (RT_SUCCESS(rcStrict2)) + { + if ( rcStrict2 != VINF_SUCCESS + && ( rcStrict == VINF_SUCCESS + || rcStrict > rcStrict2)) + rcStrict = rcStrict2; + } + return rcStrict; +} + + +/** + * Common worker for VMMR3EmtRendezvous and VMMR3EmtRendezvousFF. + * + * @returns VBox strict informational status code for EM scheduling. No failures + * will be returned here, those are for the caller only. When + * fIsCaller is set, VINF_SUCCESS is always returned. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * @param fIsCaller Whether we're the VMMR3EmtRendezvous caller or + * not. + * @param fFlags The flags. + * @param pfnRendezvous The callback. + * @param pvUser The user argument for the callback. + */ +static VBOXSTRICTRC vmmR3EmtRendezvousCommon(PVM pVM, PVMCPU pVCpu, bool fIsCaller, + uint32_t fFlags, PFNVMMEMTRENDEZVOUS pfnRendezvous, void *pvUser) +{ + int rc; + VBOXSTRICTRC rcStrictRecursion = VINF_SUCCESS; + + /* + * Enter, the last EMT triggers the next callback phase. + */ + uint32_t cEntered = ASMAtomicIncU32(&pVM->vmm.s.cRendezvousEmtsEntered); + if (cEntered != pVM->cCpus) + { + if ((fFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) == VMMEMTRENDEZVOUS_FLAGS_TYPE_ONE_BY_ONE) + { + /* Wait for our turn. */ + for (;;) + { + rc = RTSemEventWait(pVM->vmm.s.hEvtRendezvousEnterOneByOne, RT_INDEFINITE_WAIT); + AssertLogRelRC(rc); + if (!pVM->vmm.s.fRendezvousRecursion) + break; + rcStrictRecursion = vmmR3EmtRendezvousCommonRecursion(pVM, pVCpu, rcStrictRecursion); + } + } + else if ((fFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) == VMMEMTRENDEZVOUS_FLAGS_TYPE_ALL_AT_ONCE) + { + /* Wait for the last EMT to arrive and wake everyone up. */ + rc = RTSemEventMultiWait(pVM->vmm.s.hEvtMulRendezvousEnterAllAtOnce, RT_INDEFINITE_WAIT); + AssertLogRelRC(rc); + Assert(!pVM->vmm.s.fRendezvousRecursion); + } + else if ( (fFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) == VMMEMTRENDEZVOUS_FLAGS_TYPE_ASCENDING + || (fFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) == VMMEMTRENDEZVOUS_FLAGS_TYPE_DESCENDING) + { + /* Wait for our turn. */ + for (;;) + { + rc = RTSemEventWait(pVM->vmm.s.pahEvtRendezvousEnterOrdered[pVCpu->idCpu], RT_INDEFINITE_WAIT); + AssertLogRelRC(rc); + if (!pVM->vmm.s.fRendezvousRecursion) + break; + rcStrictRecursion = vmmR3EmtRendezvousCommonRecursion(pVM, pVCpu, rcStrictRecursion); + } + } + else + { + Assert((fFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) == VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE); + + /* + * The execute once is handled specially to optimize the code flow. + * + * The last EMT to arrive will perform the callback and the other + * EMTs will wait on the Done/DoneCaller semaphores (instead of + * the EnterOneByOne/AllAtOnce) in the meanwhile. When the callback + * returns, that EMT will initiate the normal return sequence. + */ + if (!fIsCaller) + { + for (;;) + { + rc = RTSemEventMultiWait(pVM->vmm.s.hEvtMulRendezvousDone, RT_INDEFINITE_WAIT); + AssertLogRelRC(rc); + if (!pVM->vmm.s.fRendezvousRecursion) + break; + rcStrictRecursion = vmmR3EmtRendezvousCommonRecursion(pVM, pVCpu, rcStrictRecursion); + } + + return vmmR3EmtRendezvousNonCallerReturn(pVM, rcStrictRecursion); + } + return VINF_SUCCESS; + } + } + else + { + /* + * All EMTs are waiting, clear the FF and take action according to the + * execution method. + */ + VM_FF_CLEAR(pVM, VM_FF_EMT_RENDEZVOUS); + + if ((fFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) == VMMEMTRENDEZVOUS_FLAGS_TYPE_ALL_AT_ONCE) + { + /* Wake up everyone. */ + rc = RTSemEventMultiSignal(pVM->vmm.s.hEvtMulRendezvousEnterAllAtOnce); + AssertLogRelRC(rc); + } + else if ( (fFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) == VMMEMTRENDEZVOUS_FLAGS_TYPE_ASCENDING + || (fFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) == VMMEMTRENDEZVOUS_FLAGS_TYPE_DESCENDING) + { + /* Figure out who to wake up and wake it up. If it's ourself, then + it's easy otherwise wait for our turn. */ + VMCPUID iFirst = (fFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) == VMMEMTRENDEZVOUS_FLAGS_TYPE_ASCENDING + ? 0 + : pVM->cCpus - 1U; + if (pVCpu->idCpu != iFirst) + { + rc = RTSemEventSignal(pVM->vmm.s.pahEvtRendezvousEnterOrdered[iFirst]); + AssertLogRelRC(rc); + for (;;) + { + rc = RTSemEventWait(pVM->vmm.s.pahEvtRendezvousEnterOrdered[pVCpu->idCpu], RT_INDEFINITE_WAIT); + AssertLogRelRC(rc); + if (!pVM->vmm.s.fRendezvousRecursion) + break; + rcStrictRecursion = vmmR3EmtRendezvousCommonRecursion(pVM, pVCpu, rcStrictRecursion); + } + } + } + /* else: execute the handler on the current EMT and wake up one or more threads afterwards. */ + } + + + /* + * Do the callback and update the status if necessary. + */ + if ( !(fFlags & VMMEMTRENDEZVOUS_FLAGS_STOP_ON_ERROR) + || RT_SUCCESS(ASMAtomicUoReadS32(&pVM->vmm.s.i32RendezvousStatus)) ) + { + VBOXSTRICTRC rcStrict2 = pfnRendezvous(pVM, pVCpu, pvUser); + if (rcStrict2 != VINF_SUCCESS) + { + AssertLogRelMsg( rcStrict2 <= VINF_SUCCESS + || (rcStrict2 >= VINF_EM_FIRST && rcStrict2 <= VINF_EM_LAST), + ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict2))); + int32_t i32RendezvousStatus; + do + { + i32RendezvousStatus = ASMAtomicUoReadS32(&pVM->vmm.s.i32RendezvousStatus); + if ( rcStrict2 == i32RendezvousStatus + || RT_FAILURE(i32RendezvousStatus) + || ( i32RendezvousStatus != VINF_SUCCESS + && rcStrict2 > i32RendezvousStatus)) + break; + } while (!ASMAtomicCmpXchgS32(&pVM->vmm.s.i32RendezvousStatus, VBOXSTRICTRC_VAL(rcStrict2), i32RendezvousStatus)); + } + } + + /* + * Increment the done counter and take action depending on whether we're + * the last to finish callback execution. + */ + uint32_t cDone = ASMAtomicIncU32(&pVM->vmm.s.cRendezvousEmtsDone); + if ( cDone != pVM->cCpus + && (fFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) != VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE) + { + /* Signal the next EMT? */ + if ((fFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) == VMMEMTRENDEZVOUS_FLAGS_TYPE_ONE_BY_ONE) + { + rc = RTSemEventSignal(pVM->vmm.s.hEvtRendezvousEnterOneByOne); + AssertLogRelRC(rc); + } + else if ((fFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) == VMMEMTRENDEZVOUS_FLAGS_TYPE_ASCENDING) + { + Assert(cDone == pVCpu->idCpu + 1U); + rc = RTSemEventSignal(pVM->vmm.s.pahEvtRendezvousEnterOrdered[pVCpu->idCpu + 1U]); + AssertLogRelRC(rc); + } + else if ((fFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) == VMMEMTRENDEZVOUS_FLAGS_TYPE_DESCENDING) + { + Assert(pVM->cCpus - cDone == pVCpu->idCpu); + rc = RTSemEventSignal(pVM->vmm.s.pahEvtRendezvousEnterOrdered[pVM->cCpus - cDone - 1U]); + AssertLogRelRC(rc); + } + + /* Wait for the rest to finish (the caller waits on hEvtRendezvousDoneCaller). */ + if (!fIsCaller) + { + for (;;) + { + rc = RTSemEventMultiWait(pVM->vmm.s.hEvtMulRendezvousDone, RT_INDEFINITE_WAIT); + AssertLogRelRC(rc); + if (!pVM->vmm.s.fRendezvousRecursion) + break; + rcStrictRecursion = vmmR3EmtRendezvousCommonRecursion(pVM, pVCpu, rcStrictRecursion); + } + } + } + else + { + /* Callback execution is all done, tell the rest to return. */ + rc = RTSemEventMultiSignal(pVM->vmm.s.hEvtMulRendezvousDone); + AssertLogRelRC(rc); + } + + if (!fIsCaller) + return vmmR3EmtRendezvousNonCallerReturn(pVM, rcStrictRecursion); + return rcStrictRecursion; +} + + +/** + * Called in response to VM_FF_EMT_RENDEZVOUS. + * + * @returns VBox strict status code - EM scheduling. No errors will be returned + * here, nor will any non-EM scheduling status codes be returned. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * + * @thread EMT + */ +VMMR3_INT_DECL(int) VMMR3EmtRendezvousFF(PVM pVM, PVMCPU pVCpu) +{ + Assert(!pVCpu->vmm.s.fInRendezvous); + Log(("VMMR3EmtRendezvousFF: EMT%#u\n", pVCpu->idCpu)); + pVCpu->vmm.s.fInRendezvous = true; + VBOXSTRICTRC rcStrict = vmmR3EmtRendezvousCommon(pVM, pVCpu, false /* fIsCaller */, pVM->vmm.s.fRendezvousFlags, + pVM->vmm.s.pfnRendezvous, pVM->vmm.s.pvRendezvousUser); + pVCpu->vmm.s.fInRendezvous = false; + Log(("VMMR3EmtRendezvousFF: EMT%#u returns %Rrc\n", pVCpu->idCpu, VBOXSTRICTRC_VAL(rcStrict))); + return VBOXSTRICTRC_TODO(rcStrict); +} + + +/** + * Helper for resetting an single wakeup event sempahore. + * + * @returns VERR_TIMEOUT on success, RTSemEventWait status otherwise. + * @param hEvt The event semaphore to reset. + */ +static int vmmR3HlpResetEvent(RTSEMEVENT hEvt) +{ + for (uint32_t cLoops = 0; ; cLoops++) + { + int rc = RTSemEventWait(hEvt, 0 /*cMsTimeout*/); + if (rc != VINF_SUCCESS || cLoops > _4K) + return rc; + } +} + + +/** + * Worker for VMMR3EmtRendezvous that handles recursion. + * + * @returns VBox strict status code. This will be the first error, + * VINF_SUCCESS, or an EM scheduling status code. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the + * calling EMT. + * @param fFlags Flags indicating execution methods. See + * grp_VMMR3EmtRendezvous_fFlags. + * @param pfnRendezvous The callback. + * @param pvUser User argument for the callback. + * + * @thread EMT(pVCpu) + */ +static VBOXSTRICTRC vmmR3EmtRendezvousRecursive(PVM pVM, PVMCPU pVCpu, uint32_t fFlags, + PFNVMMEMTRENDEZVOUS pfnRendezvous, void *pvUser) +{ + Log(("vmmR3EmtRendezvousRecursive: %#x EMT#%u depth=%d\n", fFlags, pVCpu->idCpu, pVM->vmm.s.cRendezvousRecursions)); + AssertLogRelReturn(pVM->vmm.s.cRendezvousRecursions < 3, VERR_DEADLOCK); + Assert(pVCpu->vmm.s.fInRendezvous); + + /* + * Save the current state. + */ + uint32_t const fParentFlags = pVM->vmm.s.fRendezvousFlags; + uint32_t const cParentDone = pVM->vmm.s.cRendezvousEmtsDone; + int32_t const iParentStatus = pVM->vmm.s.i32RendezvousStatus; + PFNVMMEMTRENDEZVOUS const pfnParent = pVM->vmm.s.pfnRendezvous; + void * const pvParentUser = pVM->vmm.s.pvRendezvousUser; + + /* + * Check preconditions and save the current state. + */ + AssertReturn( (fParentFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) == VMMEMTRENDEZVOUS_FLAGS_TYPE_ASCENDING + || (fParentFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) == VMMEMTRENDEZVOUS_FLAGS_TYPE_DESCENDING + || (fParentFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) == VMMEMTRENDEZVOUS_FLAGS_TYPE_ONE_BY_ONE + || (fParentFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) == VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, + VERR_INTERNAL_ERROR); + AssertReturn(pVM->vmm.s.cRendezvousEmtsEntered == pVM->cCpus, VERR_INTERNAL_ERROR_2); + AssertReturn(pVM->vmm.s.cRendezvousEmtsReturned == 0, VERR_INTERNAL_ERROR_3); + + /* + * Reset the recursion prep and pop semaphores. + */ + int rc = RTSemEventMultiReset(pVM->vmm.s.hEvtMulRendezvousRecursionPush); + AssertLogRelRCReturn(rc, rc); + rc = RTSemEventMultiReset(pVM->vmm.s.hEvtMulRendezvousRecursionPop); + AssertLogRelRCReturn(rc, rc); + rc = vmmR3HlpResetEvent(pVM->vmm.s.hEvtRendezvousRecursionPushCaller); + AssertLogRelMsgReturn(rc == VERR_TIMEOUT, ("%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS); + rc = vmmR3HlpResetEvent(pVM->vmm.s.hEvtRendezvousRecursionPopCaller); + AssertLogRelMsgReturn(rc == VERR_TIMEOUT, ("%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS); + + /* + * Usher the other thread into the recursion routine. + */ + ASMAtomicWriteU32(&pVM->vmm.s.cRendezvousEmtsRecursingPush, 0); + ASMAtomicWriteBool(&pVM->vmm.s.fRendezvousRecursion, true); + + uint32_t cLeft = pVM->cCpus - (cParentDone + 1U); + if ((fParentFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) == VMMEMTRENDEZVOUS_FLAGS_TYPE_ONE_BY_ONE) + while (cLeft-- > 0) + { + rc = RTSemEventSignal(pVM->vmm.s.hEvtRendezvousEnterOneByOne); + AssertLogRelRC(rc); + } + else if ((fParentFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) == VMMEMTRENDEZVOUS_FLAGS_TYPE_ASCENDING) + { + Assert(cLeft == pVM->cCpus - (pVCpu->idCpu + 1U)); + for (VMCPUID iCpu = pVCpu->idCpu + 1U; iCpu < pVM->cCpus; iCpu++) + { + rc = RTSemEventSignal(pVM->vmm.s.pahEvtRendezvousEnterOrdered[iCpu]); + AssertLogRelRC(rc); + } + } + else if ((fParentFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) == VMMEMTRENDEZVOUS_FLAGS_TYPE_DESCENDING) + { + Assert(cLeft == pVCpu->idCpu); + for (VMCPUID iCpu = pVCpu->idCpu; iCpu > 0; iCpu--) + { + rc = RTSemEventSignal(pVM->vmm.s.pahEvtRendezvousEnterOrdered[iCpu - 1U]); + AssertLogRelRC(rc); + } + } + else + AssertLogRelReturn((fParentFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) == VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, + VERR_INTERNAL_ERROR_4); + + rc = RTSemEventMultiSignal(pVM->vmm.s.hEvtMulRendezvousDone); + AssertLogRelRC(rc); + rc = RTSemEventSignal(pVM->vmm.s.hEvtRendezvousDoneCaller); + AssertLogRelRC(rc); + + + /* + * Wait for the EMTs to wake up and get out of the parent rendezvous code. + */ + if (ASMAtomicIncU32(&pVM->vmm.s.cRendezvousEmtsRecursingPush) != pVM->cCpus) + { + rc = RTSemEventWait(pVM->vmm.s.hEvtRendezvousRecursionPushCaller, RT_INDEFINITE_WAIT); + AssertLogRelRC(rc); + } + + ASMAtomicWriteBool(&pVM->vmm.s.fRendezvousRecursion, false); + + /* + * Clear the slate and setup the new rendezvous. + */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + rc = vmmR3HlpResetEvent(pVM->vmm.s.pahEvtRendezvousEnterOrdered[i]); + AssertLogRelMsg(rc == VERR_TIMEOUT, ("%Rrc\n", rc)); + } + rc = vmmR3HlpResetEvent(pVM->vmm.s.hEvtRendezvousEnterOneByOne); AssertLogRelMsg(rc == VERR_TIMEOUT, ("%Rrc\n", rc)); + rc = RTSemEventMultiReset(pVM->vmm.s.hEvtMulRendezvousEnterAllAtOnce); AssertLogRelRC(rc); + rc = RTSemEventMultiReset(pVM->vmm.s.hEvtMulRendezvousDone); AssertLogRelRC(rc); + rc = vmmR3HlpResetEvent(pVM->vmm.s.hEvtRendezvousDoneCaller); AssertLogRelMsg(rc == VERR_TIMEOUT, ("%Rrc\n", rc)); + + ASMAtomicWriteU32(&pVM->vmm.s.cRendezvousEmtsEntered, 0); + ASMAtomicWriteU32(&pVM->vmm.s.cRendezvousEmtsDone, 0); + ASMAtomicWriteU32(&pVM->vmm.s.cRendezvousEmtsReturned, 0); + ASMAtomicWriteS32(&pVM->vmm.s.i32RendezvousStatus, VINF_SUCCESS); + ASMAtomicWritePtr((void * volatile *)&pVM->vmm.s.pfnRendezvous, (void *)(uintptr_t)pfnRendezvous); + ASMAtomicWritePtr(&pVM->vmm.s.pvRendezvousUser, pvUser); + ASMAtomicWriteU32(&pVM->vmm.s.fRendezvousFlags, fFlags); + ASMAtomicIncU32(&pVM->vmm.s.cRendezvousRecursions); + + /* + * We're ready to go now, do normal rendezvous processing. + */ + rc = RTSemEventMultiSignal(pVM->vmm.s.hEvtMulRendezvousRecursionPush); + AssertLogRelRC(rc); + + VBOXSTRICTRC rcStrict = vmmR3EmtRendezvousCommon(pVM, pVCpu, true /*fIsCaller*/, fFlags, pfnRendezvous, pvUser); + + /* + * The caller waits for the other EMTs to be done, return and waiting on the + * pop semaphore. + */ + for (;;) + { + rc = RTSemEventWait(pVM->vmm.s.hEvtRendezvousDoneCaller, RT_INDEFINITE_WAIT); + AssertLogRelRC(rc); + if (!pVM->vmm.s.fRendezvousRecursion) + break; + rcStrict = vmmR3EmtRendezvousCommonRecursion(pVM, pVCpu, rcStrict); + } + + /* + * Get the return code and merge it with the above recursion status. + */ + VBOXSTRICTRC rcStrict2 = pVM->vmm.s.i32RendezvousStatus; + if ( rcStrict2 != VINF_SUCCESS + && ( rcStrict == VINF_SUCCESS + || rcStrict > rcStrict2)) + rcStrict = rcStrict2; + + /* + * Restore the parent rendezvous state. + */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + rc = vmmR3HlpResetEvent(pVM->vmm.s.pahEvtRendezvousEnterOrdered[i]); + AssertLogRelMsg(rc == VERR_TIMEOUT, ("%Rrc\n", rc)); + } + rc = vmmR3HlpResetEvent(pVM->vmm.s.hEvtRendezvousEnterOneByOne); AssertLogRelMsg(rc == VERR_TIMEOUT, ("%Rrc\n", rc)); + rc = RTSemEventMultiReset(pVM->vmm.s.hEvtMulRendezvousEnterAllAtOnce); AssertLogRelRC(rc); + rc = RTSemEventMultiReset(pVM->vmm.s.hEvtMulRendezvousDone); AssertLogRelRC(rc); + rc = vmmR3HlpResetEvent(pVM->vmm.s.hEvtRendezvousDoneCaller); AssertLogRelMsg(rc == VERR_TIMEOUT, ("%Rrc\n", rc)); + + ASMAtomicWriteU32(&pVM->vmm.s.cRendezvousEmtsEntered, pVM->cCpus); + ASMAtomicWriteU32(&pVM->vmm.s.cRendezvousEmtsReturned, 0); + ASMAtomicWriteU32(&pVM->vmm.s.cRendezvousEmtsDone, cParentDone); + ASMAtomicWriteS32(&pVM->vmm.s.i32RendezvousStatus, iParentStatus); + ASMAtomicWriteU32(&pVM->vmm.s.fRendezvousFlags, fParentFlags); + ASMAtomicWritePtr(&pVM->vmm.s.pvRendezvousUser, pvParentUser); + ASMAtomicWritePtr((void * volatile *)&pVM->vmm.s.pfnRendezvous, (void *)(uintptr_t)pfnParent); + + /* + * Usher the other EMTs back to their parent recursion routine, waiting + * for them to all get there before we return (makes sure they've been + * scheduled and are past the pop event sem, see below). + */ + ASMAtomicWriteU32(&pVM->vmm.s.cRendezvousEmtsRecursingPop, 0); + rc = RTSemEventMultiSignal(pVM->vmm.s.hEvtMulRendezvousRecursionPop); + AssertLogRelRC(rc); + + if (ASMAtomicIncU32(&pVM->vmm.s.cRendezvousEmtsRecursingPop) != pVM->cCpus) + { + rc = RTSemEventWait(pVM->vmm.s.hEvtRendezvousRecursionPopCaller, RT_INDEFINITE_WAIT); + AssertLogRelRC(rc); + } + + /* + * We must reset the pop semaphore on the way out (doing the pop caller too, + * just in case). The parent may be another recursion. + */ + rc = RTSemEventMultiReset(pVM->vmm.s.hEvtMulRendezvousRecursionPop); AssertLogRelRC(rc); + rc = vmmR3HlpResetEvent(pVM->vmm.s.hEvtRendezvousRecursionPopCaller); AssertLogRelMsg(rc == VERR_TIMEOUT, ("%Rrc\n", rc)); + + ASMAtomicDecU32(&pVM->vmm.s.cRendezvousRecursions); + + Log(("vmmR3EmtRendezvousRecursive: %#x EMT#%u depth=%d returns %Rrc\n", + fFlags, pVCpu->idCpu, pVM->vmm.s.cRendezvousRecursions, VBOXSTRICTRC_VAL(rcStrict))); + return rcStrict; +} + + +/** + * EMT rendezvous. + * + * Gathers all the EMTs and execute some code on each of them, either in a one + * by one fashion or all at once. + * + * @returns VBox strict status code. This will be the first error, + * VINF_SUCCESS, or an EM scheduling status code. + * + * @retval VERR_DEADLOCK if recursion is attempted using a rendezvous type that + * doesn't support it or if the recursion is too deep. + * + * @param pVM The cross context VM structure. + * @param fFlags Flags indicating execution methods. See + * grp_VMMR3EmtRendezvous_fFlags. The one-by-one, + * descending and ascending rendezvous types support + * recursion from inside @a pfnRendezvous. + * @param pfnRendezvous The callback. + * @param pvUser User argument for the callback. + * + * @thread Any. + */ +VMMR3DECL(int) VMMR3EmtRendezvous(PVM pVM, uint32_t fFlags, PFNVMMEMTRENDEZVOUS pfnRendezvous, void *pvUser) +{ + /* + * Validate input. + */ + AssertReturn(pVM, VERR_INVALID_VM_HANDLE); + AssertMsg( (fFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) != VMMEMTRENDEZVOUS_FLAGS_TYPE_INVALID + && (fFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) <= VMMEMTRENDEZVOUS_FLAGS_TYPE_DESCENDING + && !(fFlags & ~VMMEMTRENDEZVOUS_FLAGS_VALID_MASK), ("%#x\n", fFlags)); + AssertMsg( !(fFlags & VMMEMTRENDEZVOUS_FLAGS_STOP_ON_ERROR) + || ( (fFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) != VMMEMTRENDEZVOUS_FLAGS_TYPE_ALL_AT_ONCE + && (fFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) != VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE), + ("type %u\n", fFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK)); + + VBOXSTRICTRC rcStrict; + PVMCPU pVCpu = VMMGetCpu(pVM); + if (!pVCpu) + { + /* + * Forward the request to an EMT thread. + */ + Log(("VMMR3EmtRendezvous: %#x non-EMT\n", fFlags)); + if (!(fFlags & VMMEMTRENDEZVOUS_FLAGS_PRIORITY)) + rcStrict = VMR3ReqCallWait(pVM, VMCPUID_ANY, (PFNRT)VMMR3EmtRendezvous, 4, pVM, fFlags, pfnRendezvous, pvUser); + else + rcStrict = VMR3ReqPriorityCallWait(pVM, VMCPUID_ANY, (PFNRT)VMMR3EmtRendezvous, 4, pVM, fFlags, pfnRendezvous, pvUser); + Log(("VMMR3EmtRendezvous: %#x non-EMT returns %Rrc\n", fFlags, VBOXSTRICTRC_VAL(rcStrict))); + } + else if ( pVM->cCpus == 1 + || ( pVM->enmVMState == VMSTATE_DESTROYING + && VMR3GetActiveEmts(pVM->pUVM) < pVM->cCpus ) ) + { + /* + * Shortcut for the single EMT case. + * + * We also ends up here if EMT(0) (or others) tries to issue a rendezvous + * during vmR3Destroy after other emulation threads have started terminating. + */ + if (!pVCpu->vmm.s.fInRendezvous) + { + Log(("VMMR3EmtRendezvous: %#x EMT (uni)\n", fFlags)); + pVCpu->vmm.s.fInRendezvous = true; + pVM->vmm.s.fRendezvousFlags = fFlags; + rcStrict = pfnRendezvous(pVM, pVCpu, pvUser); + pVCpu->vmm.s.fInRendezvous = false; + } + else + { + /* Recursion. Do the same checks as in the SMP case. */ + Log(("VMMR3EmtRendezvous: %#x EMT (uni), recursion depth=%d\n", fFlags, pVM->vmm.s.cRendezvousRecursions)); + uint32_t fType = pVM->vmm.s.fRendezvousFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK; + AssertLogRelReturn( !pVCpu->vmm.s.fInRendezvous + || fType == VMMEMTRENDEZVOUS_FLAGS_TYPE_ASCENDING + || fType == VMMEMTRENDEZVOUS_FLAGS_TYPE_DESCENDING + || fType == VMMEMTRENDEZVOUS_FLAGS_TYPE_ONE_BY_ONE + || fType == VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE + , VERR_DEADLOCK); + + AssertLogRelReturn(pVM->vmm.s.cRendezvousRecursions < 3, VERR_DEADLOCK); + pVM->vmm.s.cRendezvousRecursions++; + uint32_t const fParentFlags = pVM->vmm.s.fRendezvousFlags; + pVM->vmm.s.fRendezvousFlags = fFlags; + + rcStrict = pfnRendezvous(pVM, pVCpu, pvUser); + + pVM->vmm.s.fRendezvousFlags = fParentFlags; + pVM->vmm.s.cRendezvousRecursions--; + } + Log(("VMMR3EmtRendezvous: %#x EMT (uni) returns %Rrc\n", fFlags, VBOXSTRICTRC_VAL(rcStrict))); + } + else + { + /* + * Spin lock. If busy, check for recursion, if not recursing wait for + * the other EMT to finish while keeping a lookout for the RENDEZVOUS FF. + */ + int rc; + rcStrict = VINF_SUCCESS; + if (RT_UNLIKELY(!ASMAtomicCmpXchgU32(&pVM->vmm.s.u32RendezvousLock, 0x77778888, 0))) + { + /* Allow recursion in some cases. */ + if ( pVCpu->vmm.s.fInRendezvous + && ( (pVM->vmm.s.fRendezvousFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) == VMMEMTRENDEZVOUS_FLAGS_TYPE_ASCENDING + || (pVM->vmm.s.fRendezvousFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) == VMMEMTRENDEZVOUS_FLAGS_TYPE_DESCENDING + || (pVM->vmm.s.fRendezvousFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) == VMMEMTRENDEZVOUS_FLAGS_TYPE_ONE_BY_ONE + || (pVM->vmm.s.fRendezvousFlags & VMMEMTRENDEZVOUS_FLAGS_TYPE_MASK) == VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE + )) + return VBOXSTRICTRC_TODO(vmmR3EmtRendezvousRecursive(pVM, pVCpu, fFlags, pfnRendezvous, pvUser)); + + AssertLogRelMsgReturn(!pVCpu->vmm.s.fInRendezvous, ("fRendezvousFlags=%#x\n", pVM->vmm.s.fRendezvousFlags), + VERR_DEADLOCK); + + Log(("VMMR3EmtRendezvous: %#x EMT#%u, waiting for lock...\n", fFlags, pVCpu->idCpu)); + while (!ASMAtomicCmpXchgU32(&pVM->vmm.s.u32RendezvousLock, 0x77778888, 0)) + { + if (VM_FF_IS_SET(pVM, VM_FF_EMT_RENDEZVOUS)) + { + rc = VMMR3EmtRendezvousFF(pVM, pVCpu); + if ( rc != VINF_SUCCESS + && ( rcStrict == VINF_SUCCESS + || rcStrict > rc)) + rcStrict = rc; + /** @todo Perhaps deal with termination here? */ + } + ASMNopPause(); + } + } + + Log(("VMMR3EmtRendezvous: %#x EMT#%u\n", fFlags, pVCpu->idCpu)); + Assert(!VM_FF_IS_SET(pVM, VM_FF_EMT_RENDEZVOUS)); + Assert(!pVCpu->vmm.s.fInRendezvous); + pVCpu->vmm.s.fInRendezvous = true; + + /* + * Clear the slate and setup the rendezvous. This is a semaphore ping-pong orgy. :-) + */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + rc = RTSemEventWait(pVM->vmm.s.pahEvtRendezvousEnterOrdered[i], 0); + AssertLogRelMsg(rc == VERR_TIMEOUT || rc == VINF_SUCCESS, ("%Rrc\n", rc)); + } + rc = RTSemEventWait(pVM->vmm.s.hEvtRendezvousEnterOneByOne, 0); AssertLogRelMsg(rc == VERR_TIMEOUT || rc == VINF_SUCCESS, ("%Rrc\n", rc)); + rc = RTSemEventMultiReset(pVM->vmm.s.hEvtMulRendezvousEnterAllAtOnce); AssertLogRelRC(rc); + rc = RTSemEventMultiReset(pVM->vmm.s.hEvtMulRendezvousDone); AssertLogRelRC(rc); + rc = RTSemEventWait(pVM->vmm.s.hEvtRendezvousDoneCaller, 0); AssertLogRelMsg(rc == VERR_TIMEOUT || rc == VINF_SUCCESS, ("%Rrc\n", rc)); + ASMAtomicWriteU32(&pVM->vmm.s.cRendezvousEmtsEntered, 0); + ASMAtomicWriteU32(&pVM->vmm.s.cRendezvousEmtsDone, 0); + ASMAtomicWriteU32(&pVM->vmm.s.cRendezvousEmtsReturned, 0); + ASMAtomicWriteS32(&pVM->vmm.s.i32RendezvousStatus, VINF_SUCCESS); + ASMAtomicWritePtr((void * volatile *)&pVM->vmm.s.pfnRendezvous, (void *)(uintptr_t)pfnRendezvous); + ASMAtomicWritePtr(&pVM->vmm.s.pvRendezvousUser, pvUser); + ASMAtomicWriteU32(&pVM->vmm.s.fRendezvousFlags, fFlags); + + /* + * Set the FF and poke the other EMTs. + */ + VM_FF_SET(pVM, VM_FF_EMT_RENDEZVOUS); + VMR3NotifyGlobalFFU(pVM->pUVM, VMNOTIFYFF_FLAGS_POKE); + + /* + * Do the same ourselves. + */ + VBOXSTRICTRC rcStrict2 = vmmR3EmtRendezvousCommon(pVM, pVCpu, true /* fIsCaller */, fFlags, pfnRendezvous, pvUser); + + /* + * The caller waits for the other EMTs to be done and return before doing + * the cleanup. This makes away with wakeup / reset races we would otherwise + * risk in the multiple release event semaphore code (hEvtRendezvousDoneCaller). + */ + for (;;) + { + rc = RTSemEventWait(pVM->vmm.s.hEvtRendezvousDoneCaller, RT_INDEFINITE_WAIT); + AssertLogRelRC(rc); + if (!pVM->vmm.s.fRendezvousRecursion) + break; + rcStrict2 = vmmR3EmtRendezvousCommonRecursion(pVM, pVCpu, rcStrict2); + } + + /* + * Get the return code and clean up a little bit. + */ + VBOXSTRICTRC rcStrict3 = pVM->vmm.s.i32RendezvousStatus; + ASMAtomicWriteNullPtr((void * volatile *)&pVM->vmm.s.pfnRendezvous); + + ASMAtomicWriteU32(&pVM->vmm.s.u32RendezvousLock, 0); + pVCpu->vmm.s.fInRendezvous = false; + + /* + * Merge rcStrict, rcStrict2 and rcStrict3. + */ + AssertRC(VBOXSTRICTRC_VAL(rcStrict)); + AssertRC(VBOXSTRICTRC_VAL(rcStrict2)); + if ( rcStrict2 != VINF_SUCCESS + && ( rcStrict == VINF_SUCCESS + || rcStrict > rcStrict2)) + rcStrict = rcStrict2; + if ( rcStrict3 != VINF_SUCCESS + && ( rcStrict == VINF_SUCCESS + || rcStrict > rcStrict3)) + rcStrict = rcStrict3; + Log(("VMMR3EmtRendezvous: %#x EMT#%u returns %Rrc\n", fFlags, pVCpu->idCpu, VBOXSTRICTRC_VAL(rcStrict))); + } + + AssertLogRelMsgReturn( rcStrict <= VINF_SUCCESS + || (rcStrict >= VINF_EM_FIRST && rcStrict <= VINF_EM_LAST), + ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)), + VERR_IPE_UNEXPECTED_INFO_STATUS); + return VBOXSTRICTRC_VAL(rcStrict); +} + + +/** + * Interface for vmR3SetHaltMethodU. + * + * @param pVCpu The cross context virtual CPU structure of the + * calling EMT. + * @param fMayHaltInRing0 The new state. + * @param cNsSpinBlockThreshold The spin-vs-blocking threashold. + * @thread EMT(pVCpu) + * + * @todo Move the EMT handling to VMM (or EM). I soooooo regret that VM + * component. + */ +VMMR3_INT_DECL(void) VMMR3SetMayHaltInRing0(PVMCPU pVCpu, bool fMayHaltInRing0, uint32_t cNsSpinBlockThreshold) +{ + pVCpu->vmm.s.fMayHaltInRing0 = fMayHaltInRing0; + pVCpu->vmm.s.cNsSpinBlockThreshold = cNsSpinBlockThreshold; +} + + +/** + * Read from the ring 0 jump buffer stack. + * + * @returns VBox status code. + * + * @param pVM The cross context VM structure. + * @param idCpu The ID of the source CPU context (for the address). + * @param R0Addr Where to start reading. + * @param pvBuf Where to store the data we've read. + * @param cbRead The number of bytes to read. + */ +VMMR3_INT_DECL(int) VMMR3ReadR0Stack(PVM pVM, VMCPUID idCpu, RTHCUINTPTR R0Addr, void *pvBuf, size_t cbRead) +{ + PVMCPU pVCpu = VMMGetCpuById(pVM, idCpu); + AssertReturn(pVCpu, VERR_INVALID_PARAMETER); + AssertReturn(cbRead < ~(size_t)0 / 2, VERR_INVALID_PARAMETER); + + int rc; +#ifdef VMM_R0_SWITCH_STACK + RTHCUINTPTR off = R0Addr - MMHyperCCToR0(pVM, pVCpu->vmm.s.pbEMTStackR3); +#else + RTHCUINTPTR off = pVCpu->vmm.s.CallRing3JmpBufR0.cbSavedStack - (pVCpu->vmm.s.CallRing3JmpBufR0.SpCheck - R0Addr); +#endif + if ( off < VMM_STACK_SIZE + && off + cbRead <= VMM_STACK_SIZE) + { + memcpy(pvBuf, &pVCpu->vmm.s.pbEMTStackR3[off], cbRead); + rc = VINF_SUCCESS; + } + else + rc = VERR_INVALID_POINTER; + + /* Supply the setjmp return RIP/EIP. */ + if ( pVCpu->vmm.s.CallRing3JmpBufR0.UnwindRetPcLocation + sizeof(RTR0UINTPTR) > R0Addr + && pVCpu->vmm.s.CallRing3JmpBufR0.UnwindRetPcLocation < R0Addr + cbRead) + { + uint8_t const *pbSrc = (uint8_t const *)&pVCpu->vmm.s.CallRing3JmpBufR0.UnwindRetPcValue; + size_t cbSrc = sizeof(pVCpu->vmm.s.CallRing3JmpBufR0.UnwindRetPcValue); + size_t offDst = 0; + if (R0Addr < pVCpu->vmm.s.CallRing3JmpBufR0.UnwindRetPcLocation) + offDst = pVCpu->vmm.s.CallRing3JmpBufR0.UnwindRetPcLocation - R0Addr; + else if (R0Addr > pVCpu->vmm.s.CallRing3JmpBufR0.UnwindRetPcLocation) + { + size_t offSrc = R0Addr - pVCpu->vmm.s.CallRing3JmpBufR0.UnwindRetPcLocation; + Assert(offSrc < cbSrc); + pbSrc -= offSrc; + cbSrc -= offSrc; + } + if (cbSrc > cbRead - offDst) + cbSrc = cbRead - offDst; + memcpy((uint8_t *)pvBuf + offDst, pbSrc, cbSrc); + + if (cbSrc == cbRead) + rc = VINF_SUCCESS; + } + + return rc; +} + + +/** + * Used by the DBGF stack unwinder to initialize the register state. + * + * @param pUVM The user mode VM handle. + * @param idCpu The ID of the CPU being unwound. + * @param pState The unwind state to initialize. + */ +VMMR3_INT_DECL(void) VMMR3InitR0StackUnwindState(PUVM pUVM, VMCPUID idCpu, struct RTDBGUNWINDSTATE *pState) +{ + PVMCPU pVCpu = VMMR3GetCpuByIdU(pUVM, idCpu); + AssertReturnVoid(pVCpu); + + /* + * Locate the resume point on the stack. + */ +#ifdef VMM_R0_SWITCH_STACK + uintptr_t off = pVCpu->vmm.s.CallRing3JmpBufR0.SpResume - MMHyperCCToR0(pVCpu->pVMR3, pVCpu->vmm.s.pbEMTStackR3); + AssertReturnVoid(off < VMM_STACK_SIZE); +#else + uintptr_t off = 0; +#endif + +#ifdef RT_ARCH_AMD64 + /* + * This code must match the .resume stuff in VMMR0JmpA-amd64.asm exactly. + */ +# ifdef VBOX_STRICT + Assert(*(uint64_t const *)&pVCpu->vmm.s.pbEMTStackR3[off] == UINT32_C(0x7eadf00d)); + off += 8; /* RESUME_MAGIC */ +# endif +# ifdef RT_OS_WINDOWS + off += 0xa0; /* XMM6 thru XMM15 */ +# endif + pState->u.x86.uRFlags = *(uint64_t const *)&pVCpu->vmm.s.pbEMTStackR3[off]; + off += 8; + pState->u.x86.auRegs[X86_GREG_xBX] = *(uint64_t const *)&pVCpu->vmm.s.pbEMTStackR3[off]; + off += 8; +# ifdef RT_OS_WINDOWS + pState->u.x86.auRegs[X86_GREG_xSI] = *(uint64_t const *)&pVCpu->vmm.s.pbEMTStackR3[off]; + off += 8; + pState->u.x86.auRegs[X86_GREG_xDI] = *(uint64_t const *)&pVCpu->vmm.s.pbEMTStackR3[off]; + off += 8; +# endif + pState->u.x86.auRegs[X86_GREG_x12] = *(uint64_t const *)&pVCpu->vmm.s.pbEMTStackR3[off]; + off += 8; + pState->u.x86.auRegs[X86_GREG_x13] = *(uint64_t const *)&pVCpu->vmm.s.pbEMTStackR3[off]; + off += 8; + pState->u.x86.auRegs[X86_GREG_x14] = *(uint64_t const *)&pVCpu->vmm.s.pbEMTStackR3[off]; + off += 8; + pState->u.x86.auRegs[X86_GREG_x15] = *(uint64_t const *)&pVCpu->vmm.s.pbEMTStackR3[off]; + off += 8; + pState->u.x86.auRegs[X86_GREG_xBP] = *(uint64_t const *)&pVCpu->vmm.s.pbEMTStackR3[off]; + off += 8; + pState->uPc = *(uint64_t const *)&pVCpu->vmm.s.pbEMTStackR3[off]; + off += 8; + +#elif defined(RT_ARCH_X86) + /* + * This code must match the .resume stuff in VMMR0JmpA-x86.asm exactly. + */ +# ifdef VBOX_STRICT + Assert(*(uint32_t const *)&pVCpu->vmm.s.pbEMTStackR3[off] == UINT32_C(0x7eadf00d)); + off += 4; /* RESUME_MAGIC */ +# endif + pState->u.x86.uRFlags = *(uint32_t const *)&pVCpu->vmm.s.pbEMTStackR3[off]; + off += 4; + pState->u.x86.auRegs[X86_GREG_xBX] = *(uint32_t const *)&pVCpu->vmm.s.pbEMTStackR3[off]; + off += 4; + pState->u.x86.auRegs[X86_GREG_xSI] = *(uint32_t const *)&pVCpu->vmm.s.pbEMTStackR3[off]; + off += 4; + pState->u.x86.auRegs[X86_GREG_xDI] = *(uint32_t const *)&pVCpu->vmm.s.pbEMTStackR3[off]; + off += 4; + pState->u.x86.auRegs[X86_GREG_xBP] = *(uint32_t const *)&pVCpu->vmm.s.pbEMTStackR3[off]; + off += 4; + pState->uPc = *(uint32_t const *)&pVCpu->vmm.s.pbEMTStackR3[off]; + off += 4; +#else +# error "Port me" +#endif + + /* + * This is all we really need here, though the above helps if the assembly + * doesn't contain unwind info (currently only on win/64, so that is useful). + */ + pState->u.x86.auRegs[X86_GREG_xBP] = pVCpu->vmm.s.CallRing3JmpBufR0.SavedEbp; + pState->u.x86.auRegs[X86_GREG_xSP] = pVCpu->vmm.s.CallRing3JmpBufR0.SpResume; +} + +#ifdef VBOX_WITH_RAW_MODE + +/** + * Calls a RC function. + * + * @param pVM The cross context VM structure. + * @param RCPtrEntry The address of the RC function. + * @param cArgs The number of arguments in the .... + * @param ... Arguments to the function. + */ +VMMR3DECL(int) VMMR3CallRC(PVM pVM, RTRCPTR RCPtrEntry, unsigned cArgs, ...) +{ + va_list args; + va_start(args, cArgs); + int rc = VMMR3CallRCV(pVM, RCPtrEntry, cArgs, args); + va_end(args); + return rc; +} + + +/** + * Calls a RC function. + * + * @param pVM The cross context VM structure. + * @param RCPtrEntry The address of the RC function. + * @param cArgs The number of arguments in the .... + * @param args Arguments to the function. + */ +VMMR3DECL(int) VMMR3CallRCV(PVM pVM, RTRCPTR RCPtrEntry, unsigned cArgs, va_list args) +{ + /* Raw mode implies 1 VCPU. */ + AssertReturn(pVM->cCpus == 1, VERR_RAW_MODE_INVALID_SMP); + PVMCPU pVCpu = &pVM->aCpus[0]; + + Log2(("VMMR3CallGCV: RCPtrEntry=%RRv cArgs=%d\n", RCPtrEntry, cArgs)); + + /* + * Setup the call frame using the trampoline. + */ + CPUMSetHyperState(pVCpu, + pVM->vmm.s.pfnCallTrampolineRC, /* eip */ + pVCpu->vmm.s.pbEMTStackBottomRC - cArgs * sizeof(RTGCUINTPTR32), /* esp */ + RCPtrEntry, /* eax */ + cArgs /* edx */ + ); + +#if 0 + memset(pVCpu->vmm.s.pbEMTStackR3, 0xaa, VMM_STACK_SIZE); /* Clear the stack. */ +#endif + PRTGCUINTPTR32 pFrame = (PRTGCUINTPTR32)(pVCpu->vmm.s.pbEMTStackR3 + VMM_STACK_SIZE) - cArgs; + int i = cArgs; + while (i-- > 0) + *pFrame++ = va_arg(args, RTGCUINTPTR32); + + CPUMPushHyper(pVCpu, cArgs * sizeof(RTGCUINTPTR32)); /* stack frame size */ + CPUMPushHyper(pVCpu, RCPtrEntry); /* what to call */ + + /* + * We hide log flushes (outer) and hypervisor interrupts (inner). + */ + for (;;) + { + int rc; + Assert(CPUMGetHyperCR3(pVCpu) && CPUMGetHyperCR3(pVCpu) == PGMGetHyperCR3(pVCpu)); + do + { +#ifdef NO_SUPCALLR0VMM + rc = VERR_GENERAL_FAILURE; +#else + rc = SUPR3CallVMMR0Fast(pVM->pVMR0, VMMR0_DO_RAW_RUN, 0); + if (RT_LIKELY(rc == VINF_SUCCESS)) + rc = pVCpu->vmm.s.iLastGZRc; +#endif + } while (rc == VINF_EM_RAW_INTERRUPT_HYPER); + + /* + * Flush the loggers. + */ +#ifdef LOG_ENABLED + PRTLOGGERRC pLogger = pVM->vmm.s.pRCLoggerR3; + if ( pLogger + && pLogger->offScratch > 0) + RTLogFlushRC(NULL, pLogger); +#endif +#ifdef VBOX_WITH_RC_RELEASE_LOGGING + PRTLOGGERRC pRelLogger = pVM->vmm.s.pRCRelLoggerR3; + if (RT_UNLIKELY(pRelLogger && pRelLogger->offScratch > 0)) + RTLogFlushRC(RTLogRelGetDefaultInstance(), pRelLogger); +#endif + if (rc == VERR_TRPM_PANIC || rc == VERR_TRPM_DONT_PANIC) + VMMR3FatalDump(pVM, pVCpu, rc); + if (rc != VINF_VMM_CALL_HOST) + { + Log2(("VMMR3CallGCV: returns %Rrc (cs:eip=%04x:%08x)\n", rc, CPUMGetGuestCS(pVCpu), CPUMGetGuestEIP(pVCpu))); + return rc; + } + rc = vmmR3ServiceCallRing3Request(pVM, pVCpu); + if (RT_FAILURE(rc)) + return rc; + } +} + +#endif /* VBOX_WITH_RAW_MODE */ + +/** + * Wrapper for SUPR3CallVMMR0Ex which will deal with VINF_VMM_CALL_HOST returns. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param uOperation Operation to execute. + * @param u64Arg Constant argument. + * @param pReqHdr Pointer to a request header. See SUPR3CallVMMR0Ex for + * details. + */ +VMMR3DECL(int) VMMR3CallR0(PVM pVM, uint32_t uOperation, uint64_t u64Arg, PSUPVMMR0REQHDR pReqHdr) +{ + PVMCPU pVCpu = VMMGetCpu(pVM); + AssertReturn(pVCpu, VERR_VM_THREAD_NOT_EMT); + return VMMR3CallR0Emt(pVM, pVCpu, (VMMR0OPERATION)uOperation, u64Arg, pReqHdr); +} + + +/** + * Wrapper for SUPR3CallVMMR0Ex which will deal with VINF_VMM_CALL_HOST returns. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context VM structure. + * @param enmOperation Operation to execute. + * @param u64Arg Constant argument. + * @param pReqHdr Pointer to a request header. See SUPR3CallVMMR0Ex for + * details. + */ +VMMR3_INT_DECL(int) VMMR3CallR0Emt(PVM pVM, PVMCPU pVCpu, VMMR0OPERATION enmOperation, uint64_t u64Arg, PSUPVMMR0REQHDR pReqHdr) +{ + int rc; + for (;;) + { +#ifdef NO_SUPCALLR0VMM + rc = VERR_GENERAL_FAILURE; +#else + rc = SUPR3CallVMMR0Ex(pVM->pVMR0, pVCpu->idCpu, enmOperation, u64Arg, pReqHdr); +#endif + /* + * Flush the logs. + */ +#ifdef LOG_ENABLED + VMM_FLUSH_R0_LOG(pVCpu->vmm.s.pR0LoggerR3, NULL); +#endif + VMM_FLUSH_R0_LOG(pVCpu->vmm.s.pR0RelLoggerR3, RTLogRelGetDefaultInstance()); + if (rc != VINF_VMM_CALL_HOST) + break; + rc = vmmR3ServiceCallRing3Request(pVM, pVCpu); + if (RT_FAILURE(rc) || (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST)) + break; + /* Resume R0 */ + } + + AssertLogRelMsgReturn(rc == VINF_SUCCESS || RT_FAILURE(rc), + ("enmOperation=%u rc=%Rrc\n", enmOperation, rc), + VERR_IPE_UNEXPECTED_INFO_STATUS); + return rc; +} + + +#ifdef VBOX_WITH_RAW_MODE +/** + * Resumes executing hypervisor code when interrupted by a queue flush or a + * debug event. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +VMMR3DECL(int) VMMR3ResumeHyper(PVM pVM, PVMCPU pVCpu) +{ + Log(("VMMR3ResumeHyper: eip=%RRv esp=%RRv\n", CPUMGetHyperEIP(pVCpu), CPUMGetHyperESP(pVCpu))); + AssertReturn(pVM->cCpus == 1, VERR_RAW_MODE_INVALID_SMP); + + /* + * We hide log flushes (outer) and hypervisor interrupts (inner). + */ + for (;;) + { + int rc; + Assert(CPUMGetHyperCR3(pVCpu) && CPUMGetHyperCR3(pVCpu) == PGMGetHyperCR3(pVCpu)); + do + { +# ifdef NO_SUPCALLR0VMM + rc = VERR_GENERAL_FAILURE; +# else + rc = SUPR3CallVMMR0Fast(pVM->pVMR0, VMMR0_DO_RAW_RUN, 0); + if (RT_LIKELY(rc == VINF_SUCCESS)) + rc = pVCpu->vmm.s.iLastGZRc; +# endif + } while (rc == VINF_EM_RAW_INTERRUPT_HYPER); + + /* + * Flush the loggers. + */ +# ifdef LOG_ENABLED + PRTLOGGERRC pLogger = pVM->vmm.s.pRCLoggerR3; + if ( pLogger + && pLogger->offScratch > 0) + RTLogFlushRC(NULL, pLogger); +# endif +# ifdef VBOX_WITH_RC_RELEASE_LOGGING + PRTLOGGERRC pRelLogger = pVM->vmm.s.pRCRelLoggerR3; + if (RT_UNLIKELY(pRelLogger && pRelLogger->offScratch > 0)) + RTLogFlushRC(RTLogRelGetDefaultInstance(), pRelLogger); +# endif + if (rc == VERR_TRPM_PANIC || rc == VERR_TRPM_DONT_PANIC) + VMMR3FatalDump(pVM, pVCpu, rc); + if (rc != VINF_VMM_CALL_HOST) + { + Log(("VMMR3ResumeHyper: returns %Rrc\n", rc)); + return rc; + } + rc = vmmR3ServiceCallRing3Request(pVM, pVCpu); + if (RT_FAILURE(rc)) + return rc; + } +} +#endif /* VBOX_WITH_RAW_MODE */ + + +/** + * Service a call to the ring-3 host code. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @remarks Careful with critsects. + */ +static int vmmR3ServiceCallRing3Request(PVM pVM, PVMCPU pVCpu) +{ + /* + * We must also check for pending critsect exits or else we can deadlock + * when entering other critsects here. + */ + if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PDM_CRITSECT)) + PDMCritSectBothFF(pVCpu); + + switch (pVCpu->vmm.s.enmCallRing3Operation) + { + /* + * Acquire a critical section. + */ + case VMMCALLRING3_PDM_CRIT_SECT_ENTER: + { + pVCpu->vmm.s.rcCallRing3 = PDMR3CritSectEnterEx((PPDMCRITSECT)(uintptr_t)pVCpu->vmm.s.u64CallRing3Arg, + true /*fCallRing3*/); + break; + } + + /* + * Enter a r/w critical section exclusively. + */ + case VMMCALLRING3_PDM_CRIT_SECT_RW_ENTER_EXCL: + { + pVCpu->vmm.s.rcCallRing3 = PDMR3CritSectRwEnterExclEx((PPDMCRITSECTRW)(uintptr_t)pVCpu->vmm.s.u64CallRing3Arg, + true /*fCallRing3*/); + break; + } + + /* + * Enter a r/w critical section shared. + */ + case VMMCALLRING3_PDM_CRIT_SECT_RW_ENTER_SHARED: + { + pVCpu->vmm.s.rcCallRing3 = PDMR3CritSectRwEnterSharedEx((PPDMCRITSECTRW)(uintptr_t)pVCpu->vmm.s.u64CallRing3Arg, + true /*fCallRing3*/); + break; + } + + /* + * Acquire the PDM lock. + */ + case VMMCALLRING3_PDM_LOCK: + { + pVCpu->vmm.s.rcCallRing3 = PDMR3LockCall(pVM); + break; + } + + /* + * Grow the PGM pool. + */ + case VMMCALLRING3_PGM_POOL_GROW: + { + pVCpu->vmm.s.rcCallRing3 = PGMR3PoolGrow(pVM); + break; + } + + /* + * Maps an page allocation chunk into ring-3 so ring-0 can use it. + */ + case VMMCALLRING3_PGM_MAP_CHUNK: + { + pVCpu->vmm.s.rcCallRing3 = PGMR3PhysChunkMap(pVM, pVCpu->vmm.s.u64CallRing3Arg); + break; + } + + /* + * Allocates more handy pages. + */ + case VMMCALLRING3_PGM_ALLOCATE_HANDY_PAGES: + { + pVCpu->vmm.s.rcCallRing3 = PGMR3PhysAllocateHandyPages(pVM); + break; + } + + /* + * Allocates a large page. + */ + case VMMCALLRING3_PGM_ALLOCATE_LARGE_HANDY_PAGE: + { + pVCpu->vmm.s.rcCallRing3 = PGMR3PhysAllocateLargeHandyPage(pVM, pVCpu->vmm.s.u64CallRing3Arg); + break; + } + + /* + * Acquire the PGM lock. + */ + case VMMCALLRING3_PGM_LOCK: + { + pVCpu->vmm.s.rcCallRing3 = PGMR3LockCall(pVM); + break; + } + + /* + * Acquire the MM hypervisor heap lock. + */ + case VMMCALLRING3_MMHYPER_LOCK: + { + pVCpu->vmm.s.rcCallRing3 = MMR3LockCall(pVM); + break; + } + +#ifdef VBOX_WITH_REM + /* + * Flush REM handler notifications. + */ + case VMMCALLRING3_REM_REPLAY_HANDLER_NOTIFICATIONS: + { + REMR3ReplayHandlerNotifications(pVM); + pVCpu->vmm.s.rcCallRing3 = VINF_SUCCESS; + break; + } +#endif + + /* + * This is a noop. We just take this route to avoid unnecessary + * tests in the loops. + */ + case VMMCALLRING3_VMM_LOGGER_FLUSH: + pVCpu->vmm.s.rcCallRing3 = VINF_SUCCESS; + LogAlways(("*FLUSH*\n")); + break; + + /* + * Set the VM error message. + */ + case VMMCALLRING3_VM_SET_ERROR: + VMR3SetErrorWorker(pVM); + pVCpu->vmm.s.rcCallRing3 = VINF_SUCCESS; + break; + + /* + * Set the VM runtime error message. + */ + case VMMCALLRING3_VM_SET_RUNTIME_ERROR: + pVCpu->vmm.s.rcCallRing3 = VMR3SetRuntimeErrorWorker(pVM); + break; + + /* + * Signal a ring 0 hypervisor assertion. + * Cancel the longjmp operation that's in progress. + */ + case VMMCALLRING3_VM_R0_ASSERTION: + pVCpu->vmm.s.enmCallRing3Operation = VMMCALLRING3_INVALID; + pVCpu->vmm.s.CallRing3JmpBufR0.fInRing3Call = false; +#ifdef RT_ARCH_X86 + pVCpu->vmm.s.CallRing3JmpBufR0.eip = 0; +#else + pVCpu->vmm.s.CallRing3JmpBufR0.rip = 0; +#endif +#ifdef VMM_R0_SWITCH_STACK + *(uint64_t *)pVCpu->vmm.s.pbEMTStackR3 = 0; /* clear marker */ +#endif + LogRel(("%s", pVM->vmm.s.szRing0AssertMsg1)); + LogRel(("%s", pVM->vmm.s.szRing0AssertMsg2)); + return VERR_VMM_RING0_ASSERTION; + + /* + * A forced switch to ring 0 for preemption purposes. + */ + case VMMCALLRING3_VM_R0_PREEMPT: + pVCpu->vmm.s.rcCallRing3 = VINF_SUCCESS; + break; + + case VMMCALLRING3_FTM_SET_CHECKPOINT: + pVCpu->vmm.s.rcCallRing3 = FTMR3SetCheckpoint(pVM, (FTMCHECKPOINTTYPE)pVCpu->vmm.s.u64CallRing3Arg); + break; + + default: + AssertMsgFailed(("enmCallRing3Operation=%d\n", pVCpu->vmm.s.enmCallRing3Operation)); + return VERR_VMM_UNKNOWN_RING3_CALL; + } + + pVCpu->vmm.s.enmCallRing3Operation = VMMCALLRING3_INVALID; + return VINF_SUCCESS; +} + + +/** + * Displays the Force action Flags. + * + * @param pVM The cross context VM structure. + * @param pHlp The output helpers. + * @param pszArgs The additional arguments (ignored). + */ +static DECLCALLBACK(void) vmmR3InfoFF(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + int c; + uint32_t f; + NOREF(pszArgs); + +#define PRINT_FLAG(prf,flag) do { \ + if (f & (prf##flag)) \ + { \ + static const char *s_psz = #flag; \ + if (!(c % 6)) \ + pHlp->pfnPrintf(pHlp, "%s\n %s", c ? "," : "", s_psz); \ + else \ + pHlp->pfnPrintf(pHlp, ", %s", s_psz); \ + c++; \ + f &= ~(prf##flag); \ + } \ + } while (0) + +#define PRINT_GROUP(prf,grp,sfx) do { \ + if (f & (prf##grp##sfx)) \ + { \ + static const char *s_psz = #grp; \ + if (!(c % 5)) \ + pHlp->pfnPrintf(pHlp, "%s %s", c ? ",\n" : " Groups:\n", s_psz); \ + else \ + pHlp->pfnPrintf(pHlp, ", %s", s_psz); \ + c++; \ + } \ + } while (0) + + /* + * The global flags. + */ + const uint32_t fGlobalForcedActions = pVM->fGlobalForcedActions; + pHlp->pfnPrintf(pHlp, "Global FFs: %#RX32", fGlobalForcedActions); + + /* show the flag mnemonics */ + c = 0; + f = fGlobalForcedActions; + PRINT_FLAG(VM_FF_,TM_VIRTUAL_SYNC); + PRINT_FLAG(VM_FF_,PDM_QUEUES); + PRINT_FLAG(VM_FF_,PDM_DMA); + PRINT_FLAG(VM_FF_,DBGF); + PRINT_FLAG(VM_FF_,REQUEST); + PRINT_FLAG(VM_FF_,CHECK_VM_STATE); + PRINT_FLAG(VM_FF_,RESET); + PRINT_FLAG(VM_FF_,EMT_RENDEZVOUS); + PRINT_FLAG(VM_FF_,PGM_NEED_HANDY_PAGES); + PRINT_FLAG(VM_FF_,PGM_NO_MEMORY); + PRINT_FLAG(VM_FF_,PGM_POOL_FLUSH_PENDING); + PRINT_FLAG(VM_FF_,REM_HANDLER_NOTIFY); + PRINT_FLAG(VM_FF_,DEBUG_SUSPEND); + if (f) + pHlp->pfnPrintf(pHlp, "%s\n Unknown bits: %#RX32\n", c ? "," : "", f); + else + pHlp->pfnPrintf(pHlp, "\n"); + + /* the groups */ + c = 0; + f = fGlobalForcedActions; + PRINT_GROUP(VM_FF_,EXTERNAL_SUSPENDED,_MASK); + PRINT_GROUP(VM_FF_,EXTERNAL_HALTED,_MASK); + PRINT_GROUP(VM_FF_,HIGH_PRIORITY_PRE,_MASK); + PRINT_GROUP(VM_FF_,HIGH_PRIORITY_PRE_RAW,_MASK); + PRINT_GROUP(VM_FF_,HIGH_PRIORITY_POST,_MASK); + PRINT_GROUP(VM_FF_,NORMAL_PRIORITY_POST,_MASK); + PRINT_GROUP(VM_FF_,NORMAL_PRIORITY,_MASK); + PRINT_GROUP(VM_FF_,ALL_REM,_MASK); + if (c) + pHlp->pfnPrintf(pHlp, "\n"); + + /* + * Per CPU flags. + */ + for (VMCPUID i = 0; i < pVM->cCpus; i++) + { + const uint64_t fLocalForcedActions = pVM->aCpus[i].fLocalForcedActions; + pHlp->pfnPrintf(pHlp, "CPU %u FFs: %#RX64", i, fLocalForcedActions); + + /* show the flag mnemonics */ + c = 0; + f = fLocalForcedActions; + PRINT_FLAG(VMCPU_FF_,INTERRUPT_APIC); + PRINT_FLAG(VMCPU_FF_,INTERRUPT_PIC); + PRINT_FLAG(VMCPU_FF_,TIMER); + PRINT_FLAG(VMCPU_FF_,INTERRUPT_NMI); + PRINT_FLAG(VMCPU_FF_,INTERRUPT_SMI); + PRINT_FLAG(VMCPU_FF_,PDM_CRITSECT); + PRINT_FLAG(VMCPU_FF_,UNHALT); + PRINT_FLAG(VMCPU_FF_,IEM); + PRINT_FLAG(VMCPU_FF_,UPDATE_APIC); + PRINT_FLAG(VMCPU_FF_,DBGF); + PRINT_FLAG(VMCPU_FF_,REQUEST); + PRINT_FLAG(VMCPU_FF_,HM_UPDATE_CR3); + PRINT_FLAG(VMCPU_FF_,HM_UPDATE_PAE_PDPES); + PRINT_FLAG(VMCPU_FF_,PGM_SYNC_CR3); + PRINT_FLAG(VMCPU_FF_,PGM_SYNC_CR3_NON_GLOBAL); + PRINT_FLAG(VMCPU_FF_,TLB_FLUSH); + PRINT_FLAG(VMCPU_FF_,INHIBIT_INTERRUPTS); + PRINT_FLAG(VMCPU_FF_,BLOCK_NMIS); + PRINT_FLAG(VMCPU_FF_,TO_R3); + PRINT_FLAG(VMCPU_FF_,IOM); +#ifdef VBOX_WITH_RAW_MODE + PRINT_FLAG(VMCPU_FF_,TRPM_SYNC_IDT); + PRINT_FLAG(VMCPU_FF_,SELM_SYNC_TSS); + PRINT_FLAG(VMCPU_FF_,SELM_SYNC_GDT); + PRINT_FLAG(VMCPU_FF_,SELM_SYNC_LDT); + PRINT_FLAG(VMCPU_FF_,CSAM_SCAN_PAGE); + PRINT_FLAG(VMCPU_FF_,CSAM_PENDING_ACTION); + PRINT_FLAG(VMCPU_FF_,CPUM); +#endif + if (f) + pHlp->pfnPrintf(pHlp, "%s\n Unknown bits: %#RX64\n", c ? "," : "", f); + else + pHlp->pfnPrintf(pHlp, "\n"); + + if (fLocalForcedActions & VMCPU_FF_INHIBIT_INTERRUPTS) + pHlp->pfnPrintf(pHlp, " intr inhibit RIP: %RGp\n", EMGetInhibitInterruptsPC(&pVM->aCpus[i])); + + /* the groups */ + c = 0; + f = fLocalForcedActions; + PRINT_GROUP(VMCPU_FF_,EXTERNAL_SUSPENDED,_MASK); + PRINT_GROUP(VMCPU_FF_,EXTERNAL_HALTED,_MASK); + PRINT_GROUP(VMCPU_FF_,HIGH_PRIORITY_PRE,_MASK); + PRINT_GROUP(VMCPU_FF_,HIGH_PRIORITY_PRE_RAW,_MASK); + PRINT_GROUP(VMCPU_FF_,HIGH_PRIORITY_POST,_MASK); + PRINT_GROUP(VMCPU_FF_,NORMAL_PRIORITY_POST,_MASK); + PRINT_GROUP(VMCPU_FF_,NORMAL_PRIORITY,_MASK); + PRINT_GROUP(VMCPU_FF_,RESUME_GUEST,_MASK); + PRINT_GROUP(VMCPU_FF_,HM_TO_R3,_MASK); + PRINT_GROUP(VMCPU_FF_,ALL_REM,_MASK); + if (c) + pHlp->pfnPrintf(pHlp, "\n"); + } + +#undef PRINT_FLAG +#undef PRINT_GROUP +} + diff --git a/src/VBox/VMM/VMMR3/VMMGuruMeditation.cpp b/src/VBox/VMM/VMMR3/VMMGuruMeditation.cpp new file mode 100644 index 00000000..c04c2011 --- /dev/null +++ b/src/VBox/VMM/VMMR3/VMMGuruMeditation.cpp @@ -0,0 +1,790 @@ +/* $Id: VMMGuruMeditation.cpp $ */ +/** @file + * VMM - The Virtual Machine Monitor, Guru Meditation Code. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_VMM +#include +#include +#include +#include +#include +#include "VMMInternal.h" +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * Structure to pass to DBGFR3Info() and for doing all other + * output during fatal dump. + */ +typedef struct VMMR3FATALDUMPINFOHLP +{ + /** The helper core. */ + DBGFINFOHLP Core; + /** The release logger instance. */ + PRTLOGGER pRelLogger; + /** The saved release logger flags. */ + uint32_t fRelLoggerFlags; + /** The logger instance. */ + PRTLOGGER pLogger; + /** The saved logger flags. */ + uint32_t fLoggerFlags; + /** The saved logger destination flags. */ + uint32_t fLoggerDestFlags; + /** Whether to output to stderr or not. */ + bool fStdErr; + /** Whether we're still recording the summary or not. */ + bool fRecSummary; + /** Buffer for the summary. */ + char szSummary[4096 - 2]; + /** The current summary offset. */ + size_t offSummary; + /** Standard error buffer. */ + char achStdErrBuf[4096 - 8]; + /** Standard error buffer offset. */ + size_t offStdErrBuf; +} VMMR3FATALDUMPINFOHLP, *PVMMR3FATALDUMPINFOHLP; +/** Pointer to a VMMR3FATALDUMPINFOHLP structure. */ +typedef const VMMR3FATALDUMPINFOHLP *PCVMMR3FATALDUMPINFOHLP; + + +/** + * Flushes the content of achStdErrBuf, setting offStdErrBuf to zero. + * + * @param pHlp The instance to flush. + */ +static void vmmR3FatalDumpInfoHlpFlushStdErr(PVMMR3FATALDUMPINFOHLP pHlp) +{ + size_t cch = pHlp->offStdErrBuf; + if (cch) + { + RTStrmWrite(g_pStdErr, pHlp->achStdErrBuf, cch); + pHlp->offStdErrBuf = 0; + } +} + +/** + * @callback_method_impl{FNRTSTROUTPUT, For buffering stderr output.} + */ +static DECLCALLBACK(size_t) vmmR3FatalDumpInfoHlp_BufferedStdErrOutput(void *pvArg, const char *pachChars, size_t cbChars) +{ + PVMMR3FATALDUMPINFOHLP pHlp = (PVMMR3FATALDUMPINFOHLP)pvArg; + if (cbChars) + { + size_t offBuf = pHlp->offStdErrBuf; + if (cbChars < sizeof(pHlp->achStdErrBuf) - offBuf) + { /* likely */ } + else + { + vmmR3FatalDumpInfoHlpFlushStdErr(pHlp); + if (cbChars < sizeof(pHlp->achStdErrBuf)) + offBuf = 0; + else + { + RTStrmWrite(g_pStdErr, pachChars, cbChars); + return cbChars; + } + } + memcpy(&pHlp->achStdErrBuf[offBuf], pachChars, cbChars); + pHlp->offStdErrBuf = offBuf + cbChars; + } + return cbChars; +} + + +/** + * Print formatted string. + * + * @param pHlp Pointer to this structure. + * @param pszFormat The format string. + * @param ... Arguments. + */ +static DECLCALLBACK(void) vmmR3FatalDumpInfoHlp_pfnPrintf(PCDBGFINFOHLP pHlp, const char *pszFormat, ...) +{ + va_list args; + va_start(args, pszFormat); + pHlp->pfnPrintfV(pHlp, pszFormat, args); + va_end(args); +} + +/** + * Print formatted string. + * + * @param pHlp Pointer to this structure. + * @param pszFormat The format string. + * @param args Argument list. + */ +static DECLCALLBACK(void) vmmR3FatalDumpInfoHlp_pfnPrintfV(PCDBGFINFOHLP pHlp, const char *pszFormat, va_list args) +{ + PVMMR3FATALDUMPINFOHLP pMyHlp = (PVMMR3FATALDUMPINFOHLP)pHlp; + + if (pMyHlp->pRelLogger) + { + va_list args2; + va_copy(args2, args); + RTLogLoggerV(pMyHlp->pRelLogger, pszFormat, args2); + va_end(args2); + } + if (pMyHlp->pLogger) + { + va_list args2; + va_copy(args2, args); + RTLogLoggerV(pMyHlp->pLogger, pszFormat, args); + va_end(args2); + } + if (pMyHlp->fStdErr) + { + va_list args2; + va_copy(args2, args); + RTStrFormatV(vmmR3FatalDumpInfoHlp_BufferedStdErrOutput, pMyHlp, NULL, NULL, pszFormat, args2); + //RTStrmPrintfV(g_pStdErr, pszFormat, args2); + va_end(args2); + } + if (pMyHlp->fRecSummary) + { + size_t cchLeft = sizeof(pMyHlp->szSummary) - pMyHlp->offSummary; + if (cchLeft > 1) + { + va_list args2; + va_copy(args2, args); + size_t cch = RTStrPrintfV(&pMyHlp->szSummary[pMyHlp->offSummary], cchLeft, pszFormat, args); + va_end(args2); + Assert(cch <= cchLeft); + pMyHlp->offSummary += cch; + } + } +} + + +/** + * Initializes the fatal dump output helper. + * + * @param pHlp The structure to initialize. + */ +static void vmmR3FatalDumpInfoHlpInit(PVMMR3FATALDUMPINFOHLP pHlp) +{ + RT_BZERO(pHlp, sizeof(*pHlp)); + + pHlp->Core.pfnPrintf = vmmR3FatalDumpInfoHlp_pfnPrintf; + pHlp->Core.pfnPrintfV = vmmR3FatalDumpInfoHlp_pfnPrintfV; + + /* + * The loggers. + */ + pHlp->pRelLogger = RTLogRelGetDefaultInstance(); +#ifdef LOG_ENABLED + pHlp->pLogger = RTLogDefaultInstance(); +#else + if (pHlp->pRelLogger) + pHlp->pLogger = RTLogGetDefaultInstance(); + else + pHlp->pLogger = RTLogDefaultInstance(); +#endif + + if (pHlp->pRelLogger) + { + pHlp->fRelLoggerFlags = pHlp->pRelLogger->fFlags; + pHlp->pRelLogger->fFlags &= ~RTLOGFLAGS_DISABLED; + pHlp->pRelLogger->fFlags |= RTLOGFLAGS_BUFFERED; + } + + if (pHlp->pLogger) + { + pHlp->fLoggerFlags = pHlp->pLogger->fFlags; + pHlp->fLoggerDestFlags = pHlp->pLogger->fDestFlags; + pHlp->pLogger->fFlags &= ~RTLOGFLAGS_DISABLED; + pHlp->pLogger->fFlags |= RTLOGFLAGS_BUFFERED; +#ifndef DEBUG_sandervl + pHlp->pLogger->fDestFlags |= RTLOGDEST_DEBUGGER; +#endif + } + + /* + * Check if we need write to stderr. + */ + pHlp->fStdErr = (!pHlp->pRelLogger || !(pHlp->pRelLogger->fDestFlags & (RTLOGDEST_STDOUT | RTLOGDEST_STDERR))) + && (!pHlp->pLogger || !(pHlp->pLogger->fDestFlags & (RTLOGDEST_STDOUT | RTLOGDEST_STDERR))); +#ifdef DEBUG_sandervl + pHlp->fStdErr = false; /* takes too long to display here */ +#endif + pHlp->offStdErrBuf = 0; + + /* + * Init the summary recording. + */ + pHlp->fRecSummary = true; + pHlp->offSummary = 0; + pHlp->szSummary[0] = '\0'; +} + + +/** + * Deletes the fatal dump output helper. + * + * @param pHlp The structure to delete. + */ +static void vmmR3FatalDumpInfoHlpDelete(PVMMR3FATALDUMPINFOHLP pHlp) +{ + if (pHlp->pRelLogger) + { + RTLogFlush(pHlp->pRelLogger); + pHlp->pRelLogger->fFlags = pHlp->fRelLoggerFlags; + } + + if (pHlp->pLogger) + { + RTLogFlush(pHlp->pLogger); + pHlp->pLogger->fFlags = pHlp->fLoggerFlags; + pHlp->pLogger->fDestFlags = pHlp->fLoggerDestFlags; + } + + if (pHlp->fStdErr) + vmmR3FatalDumpInfoHlpFlushStdErr(pHlp); +} + + +/** + * Dumps the VM state on a fatal error. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param rcErr VBox status code. + */ +VMMR3DECL(void) VMMR3FatalDump(PVM pVM, PVMCPU pVCpu, int rcErr) +{ + /* + * Create our output helper and sync it with the log settings. + * This helper will be used for all the output. + */ + VMMR3FATALDUMPINFOHLP Hlp; + PCDBGFINFOHLP pHlp = &Hlp.Core; + vmmR3FatalDumpInfoHlpInit(&Hlp); + + /* Release owned locks to make sure other VCPUs can continue in case they were waiting for one. */ + PDMR3CritSectLeaveAll(pVM); + + /* + * Header. + */ + pHlp->pfnPrintf(pHlp, + "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n" + "!!\n" + "!! VCPU%u: Guru Meditation %d (%Rrc)\n" + "!!\n", + pVCpu->idCpu, rcErr, rcErr); + + /* + * Continue according to context. + */ + bool fDoneHyper = false; + bool fDoneImport = false; + switch (rcErr) + { + /* + * Hypervisor errors. + */ + case VERR_VMM_RING0_ASSERTION: + case VINF_EM_DBG_HYPER_ASSERTION: + case VERR_VMM_RING3_CALL_DISABLED: + { + const char *pszMsg1 = VMMR3GetRZAssertMsg1(pVM); + while (pszMsg1 && *pszMsg1 == '\n') + pszMsg1++; + const char *pszMsg2 = VMMR3GetRZAssertMsg2(pVM); + while (pszMsg2 && *pszMsg2 == '\n') + pszMsg2++; + pHlp->pfnPrintf(pHlp, + "%s" + "%s", + pszMsg1, + pszMsg2); + if ( !pszMsg2 + || !*pszMsg2 + || strchr(pszMsg2, '\0')[-1] != '\n') + pHlp->pfnPrintf(pHlp, "\n"); + } + RT_FALL_THRU(); + case VERR_TRPM_DONT_PANIC: + case VERR_TRPM_PANIC: + case VINF_EM_RAW_STALE_SELECTOR: + case VINF_EM_RAW_IRET_TRAP: + case VINF_EM_DBG_HYPER_BREAKPOINT: + case VINF_EM_DBG_HYPER_STEPPED: + case VINF_EM_TRIPLE_FAULT: + case VERR_VMM_HYPER_CR3_MISMATCH: + { + /* + * Active trap? This is only of partial interest when in hardware + * assisted virtualization mode, thus the different messages. + */ + uint32_t uEIP = CPUMGetHyperEIP(pVCpu); + TRPMEVENT enmType; + uint8_t u8TrapNo = 0xce; + RTGCUINT uErrorCode = 0xdeadface; + RTGCUINTPTR uCR2 = 0xdeadface; + uint8_t cbInstr = UINT8_MAX; + int rc2 = TRPMQueryTrapAll(pVCpu, &u8TrapNo, &enmType, &uErrorCode, &uCR2, &cbInstr); + if (VM_IS_RAW_MODE_ENABLED(pVM)) + { + if (RT_SUCCESS(rc2)) + pHlp->pfnPrintf(pHlp, + "!! TRAP=%02x ERRCD=%RGv CR2=%RGv EIP=%RX32 Type=%d cbInstr=%02x\n", + u8TrapNo, uErrorCode, uCR2, uEIP, enmType, cbInstr); + else + pHlp->pfnPrintf(pHlp, + "!! EIP=%RX32 NOTRAP\n", + uEIP); + } + else if (RT_SUCCESS(rc2)) + pHlp->pfnPrintf(pHlp, + "!! ACTIVE TRAP=%02x ERRCD=%RGv CR2=%RGv PC=%RGr Type=%d cbInstr=%02x (Guest!)\n", + u8TrapNo, uErrorCode, uCR2, CPUMGetGuestRIP(pVCpu), enmType, cbInstr); + + /* + * Dump the relevant hypervisor registers and stack. + */ + if (!VM_IS_RAW_MODE_ENABLED(pVM)) + { + if ( rcErr == VERR_VMM_RING0_ASSERTION /* fInRing3Call has already been cleared here. */ + || pVCpu->vmm.s.CallRing3JmpBufR0.fInRing3Call) + { + /* Dump the jmpbuf. */ + pHlp->pfnPrintf(pHlp, + "!!\n" + "!! CallRing3JmpBuf:\n" + "!!\n"); + pHlp->pfnPrintf(pHlp, + "SavedEsp=%RHv SavedEbp=%RHv SpResume=%RHv SpCheck=%RHv\n", + pVCpu->vmm.s.CallRing3JmpBufR0.SavedEsp, + pVCpu->vmm.s.CallRing3JmpBufR0.SavedEbp, + pVCpu->vmm.s.CallRing3JmpBufR0.SpResume, + pVCpu->vmm.s.CallRing3JmpBufR0.SpCheck); + pHlp->pfnPrintf(pHlp, + "pvSavedStack=%RHv cbSavedStack=%#x fInRing3Call=%RTbool\n", + pVCpu->vmm.s.CallRing3JmpBufR0.pvSavedStack, + pVCpu->vmm.s.CallRing3JmpBufR0.cbSavedStack, + pVCpu->vmm.s.CallRing3JmpBufR0.fInRing3Call); + pHlp->pfnPrintf(pHlp, + "cbUsedMax=%#x cbUsedAvg=%#x cbUsedTotal=%#llx cUsedTotal=%#llx\n", + pVCpu->vmm.s.CallRing3JmpBufR0.cbUsedMax, + pVCpu->vmm.s.CallRing3JmpBufR0.cbUsedAvg, + pVCpu->vmm.s.CallRing3JmpBufR0.cbUsedTotal, + pVCpu->vmm.s.CallRing3JmpBufR0.cUsedTotal); + + /* Dump the resume register frame on the stack. */ + PRTHCUINTPTR pBP; +#ifdef VMM_R0_SWITCH_STACK + pBP = (PRTHCUINTPTR)&pVCpu->vmm.s.pbEMTStackR3[ pVCpu->vmm.s.CallRing3JmpBufR0.SavedEbp + - MMHyperCCToR0(pVM, pVCpu->vmm.s.pbEMTStackR3)]; +#else + pBP = (PRTHCUINTPTR)&pVCpu->vmm.s.pbEMTStackR3[ pVCpu->vmm.s.CallRing3JmpBufR0.cbSavedStack + - pVCpu->vmm.s.CallRing3JmpBufR0.SpCheck + + pVCpu->vmm.s.CallRing3JmpBufR0.SavedEbp]; +#endif +#if HC_ARCH_BITS == 32 + pHlp->pfnPrintf(pHlp, + "eax=volatile ebx=%08x ecx=volatile edx=volatile esi=%08x edi=%08x\n" + "eip=%08x esp=%08x ebp=%08x efl=%08x\n" + , + pBP[-3], pBP[-2], pBP[-1], + pBP[1], pVCpu->vmm.s.CallRing3JmpBufR0.SavedEbp - 8, pBP[0], pBP[-4]); +#else +# ifdef RT_OS_WINDOWS + pHlp->pfnPrintf(pHlp, + "rax=volatile rbx=%016RX64 rcx=volatile rdx=volatile\n" + "rsi=%016RX64 rdi=%016RX64 r8=volatile r9=volatile \n" + "r10=volatile r11=volatile r12=%016RX64 r13=%016RX64\n" + "r14=%016RX64 r15=%016RX64\n" + "rip=%016RX64 rsp=%016RX64 rbp=%016RX64 rfl=%08RX64\n" + , + pBP[-7], + pBP[-6], pBP[-5], + pBP[-4], pBP[-3], + pBP[-2], pBP[-1], + pBP[1], pVCpu->vmm.s.CallRing3JmpBufR0.SavedEbp - 16, pBP[0], pBP[-8]); +# else + pHlp->pfnPrintf(pHlp, + "rax=volatile rbx=%016RX64 rcx=volatile rdx=volatile\n" + "rsi=volatile rdi=volatile r8=volatile r9=volatile \n" + "r10=volatile r11=volatile r12=%016RX64 r13=%016RX64\n" + "r14=%016RX64 r15=%016RX64\n" + "rip=%016RX64 rsp=%016RX64 rbp=%016RX64 rflags=%08RX64\n" + , + pBP[-5], + pBP[-4], pBP[-3], + pBP[-2], pBP[-1], + pBP[1], pVCpu->vmm.s.CallRing3JmpBufR0.SavedEbp - 16, pBP[0], pBP[-6]); +# endif +#endif + + /* Callstack. */ + DBGFADDRESS AddrPc, AddrBp, AddrSp; + PCDBGFSTACKFRAME pFirstFrame; + rc2 = DBGFR3StackWalkBeginEx(pVM->pUVM, pVCpu->idCpu, DBGFCODETYPE_RING0, + DBGFR3AddrFromHostR0(&AddrBp, pVCpu->vmm.s.CallRing3JmpBufR0.SavedEbp), + DBGFR3AddrFromHostR0(&AddrSp, pVCpu->vmm.s.CallRing3JmpBufR0.SpResume), + DBGFR3AddrFromHostR0(&AddrPc, pVCpu->vmm.s.CallRing3JmpBufR0.SavedEipForUnwind), + RTDBGRETURNTYPE_INVALID, &pFirstFrame); + if (RT_SUCCESS(rc2)) + { + pHlp->pfnPrintf(pHlp, + "!!\n" + "!! Call Stack:\n" + "!!\n"); +#if HC_ARCH_BITS == 32 + pHlp->pfnPrintf(pHlp, "EBP Ret EBP Ret CS:EIP Arg0 Arg1 Arg2 Arg3 CS:EIP Symbol [line]\n"); +#else + pHlp->pfnPrintf(pHlp, "RBP Ret RBP Ret RIP RIP Symbol [line]\n"); +#endif + for (PCDBGFSTACKFRAME pFrame = pFirstFrame; + pFrame; + pFrame = DBGFR3StackWalkNext(pFrame)) + { +#if HC_ARCH_BITS == 32 + pHlp->pfnPrintf(pHlp, + "%RHv %RHv %04RX32:%RHv %RHv %RHv %RHv %RHv", + (RTHCUINTPTR)pFrame->AddrFrame.off, + (RTHCUINTPTR)pFrame->AddrReturnFrame.off, + (RTHCUINTPTR)pFrame->AddrReturnPC.Sel, + (RTHCUINTPTR)pFrame->AddrReturnPC.off, + pFrame->Args.au32[0], + pFrame->Args.au32[1], + pFrame->Args.au32[2], + pFrame->Args.au32[3]); + pHlp->pfnPrintf(pHlp, " %RTsel:%08RHv", pFrame->AddrPC.Sel, pFrame->AddrPC.off); +#else + pHlp->pfnPrintf(pHlp, + "%RHv %RHv %RHv %RHv", + (RTHCUINTPTR)pFrame->AddrFrame.off, + (RTHCUINTPTR)pFrame->AddrReturnFrame.off, + (RTHCUINTPTR)pFrame->AddrReturnPC.off, + (RTHCUINTPTR)pFrame->AddrPC.off); +#endif + if (pFrame->pSymPC) + { + RTGCINTPTR offDisp = pFrame->AddrPC.FlatPtr - pFrame->pSymPC->Value; + if (offDisp > 0) + pHlp->pfnPrintf(pHlp, " %s+%llx", pFrame->pSymPC->szName, (int64_t)offDisp); + else if (offDisp < 0) + pHlp->pfnPrintf(pHlp, " %s-%llx", pFrame->pSymPC->szName, -(int64_t)offDisp); + else + pHlp->pfnPrintf(pHlp, " %s", pFrame->pSymPC->szName); + } + if (pFrame->pLinePC) + pHlp->pfnPrintf(pHlp, " [%s @ 0i%d]", pFrame->pLinePC->szFilename, pFrame->pLinePC->uLineNo); + pHlp->pfnPrintf(pHlp, "\n"); + for (uint32_t iReg = 0; iReg < pFrame->cSureRegs; iReg++) + { + const char *pszName = pFrame->paSureRegs[iReg].pszName; + if (!pszName) + pszName = DBGFR3RegCpuName(pVM->pUVM, pFrame->paSureRegs[iReg].enmReg, + pFrame->paSureRegs[iReg].enmType); + char szValue[1024]; + szValue[0] = '\0'; + DBGFR3RegFormatValue(szValue, sizeof(szValue), &pFrame->paSureRegs[iReg].Value, + pFrame->paSureRegs[iReg].enmType, false); + pHlp->pfnPrintf(pHlp, " %-3s=%s\n", pszName, szValue); + } + } + DBGFR3StackWalkEnd(pFirstFrame); + } + + /* Symbols on the stack. */ +#ifdef VMM_R0_SWITCH_STACK + uint32_t const iLast = VMM_STACK_SIZE / sizeof(uintptr_t); + uint32_t iAddr = (uint32_t)( pVCpu->vmm.s.CallRing3JmpBufR0.SavedEsp + - MMHyperCCToR0(pVM, pVCpu->vmm.s.pbEMTStackR3)) / sizeof(uintptr_t); + if (iAddr > iLast) + iAddr = 0; +#else + uint32_t const iLast = RT_MIN(pVCpu->vmm.s.CallRing3JmpBufR0.cbSavedStack, VMM_STACK_SIZE) + / sizeof(uintptr_t); + uint32_t iAddr = 0; +#endif + pHlp->pfnPrintf(pHlp, + "!!\n" + "!! Addresses on the stack (iAddr=%#x, iLast=%#x)\n" + "!!\n", + iAddr, iLast); + uintptr_t const *paAddr = (uintptr_t const *)pVCpu->vmm.s.pbEMTStackR3; + while (iAddr < iLast) + { + uintptr_t const uAddr = paAddr[iAddr]; + if (uAddr > X86_PAGE_SIZE) + { + DBGFADDRESS Addr; + DBGFR3AddrFromFlat(pVM->pUVM, &Addr, uAddr); + RTGCINTPTR offDisp = 0; + PRTDBGSYMBOL pSym = DBGFR3AsSymbolByAddrA(pVM->pUVM, DBGF_AS_R0, &Addr, + RTDBGSYMADDR_FLAGS_LESS_OR_EQUAL | RTDBGSYMADDR_FLAGS_SKIP_ABS_IN_DEFERRED, + &offDisp, NULL); + RTGCINTPTR offLineDisp; + PRTDBGLINE pLine = DBGFR3AsLineByAddrA(pVM->pUVM, DBGF_AS_R0, &Addr, &offLineDisp, NULL); + if (pLine || pSym) + { + pHlp->pfnPrintf(pHlp, "%#06x: %p =>", iAddr * sizeof(uintptr_t), uAddr); + if (pSym) + pHlp->pfnPrintf(pHlp, " %s + %#x", pSym->szName, (intptr_t)offDisp); + if (pLine) + pHlp->pfnPrintf(pHlp, " [%s:%u + %#x]\n", pLine->szFilename, pLine->uLineNo, offLineDisp); + else + pHlp->pfnPrintf(pHlp, "\n"); + RTDbgSymbolFree(pSym); + RTDbgLineFree(pLine); + } + } + iAddr++; + } + + /* raw stack */ + Hlp.fRecSummary = false; + pHlp->pfnPrintf(pHlp, + "!!\n" + "!! Raw stack (mind the direction).\n" + "!! pbEMTStackR0=%RHv pbEMTStackBottomR0=%RHv VMM_STACK_SIZE=%#x\n" + "!! pbEmtStackR3=%p\n" + "!!\n" + "%.*Rhxd\n", + MMHyperCCToR0(pVM, pVCpu->vmm.s.pbEMTStackR3), + MMHyperCCToR0(pVM, pVCpu->vmm.s.pbEMTStackR3) + VMM_STACK_SIZE, + VMM_STACK_SIZE, + pVCpu->vmm.s.pbEMTStackR3, + VMM_STACK_SIZE, pVCpu->vmm.s.pbEMTStackR3); + } + else + { + pHlp->pfnPrintf(pHlp, + "!! Skipping ring-0 registers and stack, rcErr=%Rrc\n", rcErr); + } + } + else + { + /* + * Try figure out where eip is. + */ + /* core code? */ + if (uEIP - (RTGCUINTPTR)pVM->vmm.s.pvCoreCodeRC < pVM->vmm.s.cbCoreCode) + pHlp->pfnPrintf(pHlp, + "!! EIP is in CoreCode, offset %#x\n", + uEIP - (RTGCUINTPTR)pVM->vmm.s.pvCoreCodeRC); + else + { /* ask PDM */ /** @todo ask DBGFR3Sym later? */ + char szModName[64]; + RTRCPTR RCPtrMod; + char szNearSym1[260]; + RTRCPTR RCPtrNearSym1; + char szNearSym2[260]; + RTRCPTR RCPtrNearSym2; + int rc = PDMR3LdrQueryRCModFromPC(pVM, uEIP, + &szModName[0], sizeof(szModName), &RCPtrMod, + &szNearSym1[0], sizeof(szNearSym1), &RCPtrNearSym1, + &szNearSym2[0], sizeof(szNearSym2), &RCPtrNearSym2); + if (RT_SUCCESS(rc)) + pHlp->pfnPrintf(pHlp, + "!! EIP in %s (%RRv) at rva %x near symbols:\n" + "!! %RRv rva %RRv off %08x %s\n" + "!! %RRv rva %RRv off -%08x %s\n", + szModName, RCPtrMod, (unsigned)(uEIP - RCPtrMod), + RCPtrNearSym1, RCPtrNearSym1 - RCPtrMod, (unsigned)(uEIP - RCPtrNearSym1), szNearSym1, + RCPtrNearSym2, RCPtrNearSym2 - RCPtrMod, (unsigned)(RCPtrNearSym2 - uEIP), szNearSym2); + else + pHlp->pfnPrintf(pHlp, + "!! EIP is not in any code known to VMM!\n"); + } + + /* Disassemble the instruction. */ + char szInstr[256]; + rc2 = DBGFR3DisasInstrEx(pVM->pUVM, pVCpu->idCpu, 0, 0, + DBGF_DISAS_FLAGS_CURRENT_HYPER | DBGF_DISAS_FLAGS_DEFAULT_MODE, + &szInstr[0], sizeof(szInstr), NULL); + if (RT_SUCCESS(rc2)) + pHlp->pfnPrintf(pHlp, + "!! %s\n", szInstr); + + /* Dump the hypervisor cpu state. */ + pHlp->pfnPrintf(pHlp, + "!!\n" + "!!\n" + "!!\n"); + rc2 = DBGFR3Info(pVM->pUVM, "cpumhyper", "verbose", pHlp); + fDoneHyper = true; + + /* Callstack. */ + PCDBGFSTACKFRAME pFirstFrame; + rc2 = DBGFR3StackWalkBegin(pVM->pUVM, pVCpu->idCpu, DBGFCODETYPE_HYPER, &pFirstFrame); + if (RT_SUCCESS(rc2)) + { + pHlp->pfnPrintf(pHlp, + "!!\n" + "!! Call Stack:\n" + "!!\n" + "EBP Ret EBP Ret CS:EIP Arg0 Arg1 Arg2 Arg3 CS:EIP Symbol [line]\n"); + for (PCDBGFSTACKFRAME pFrame = pFirstFrame; + pFrame; + pFrame = DBGFR3StackWalkNext(pFrame)) + { + pHlp->pfnPrintf(pHlp, + "%08RX32 %08RX32 %04RX32:%08RX32 %08RX32 %08RX32 %08RX32 %08RX32", + (uint32_t)pFrame->AddrFrame.off, + (uint32_t)pFrame->AddrReturnFrame.off, + (uint32_t)pFrame->AddrReturnPC.Sel, + (uint32_t)pFrame->AddrReturnPC.off, + pFrame->Args.au32[0], + pFrame->Args.au32[1], + pFrame->Args.au32[2], + pFrame->Args.au32[3]); + pHlp->pfnPrintf(pHlp, " %RTsel:%08RGv", pFrame->AddrPC.Sel, pFrame->AddrPC.off); + if (pFrame->pSymPC) + { + RTGCINTPTR offDisp = pFrame->AddrPC.FlatPtr - pFrame->pSymPC->Value; + if (offDisp > 0) + pHlp->pfnPrintf(pHlp, " %s+%llx", pFrame->pSymPC->szName, (int64_t)offDisp); + else if (offDisp < 0) + pHlp->pfnPrintf(pHlp, " %s-%llx", pFrame->pSymPC->szName, -(int64_t)offDisp); + else + pHlp->pfnPrintf(pHlp, " %s", pFrame->pSymPC->szName); + } + if (pFrame->pLinePC) + pHlp->pfnPrintf(pHlp, " [%s @ 0i%d]", pFrame->pLinePC->szFilename, pFrame->pLinePC->uLineNo); + pHlp->pfnPrintf(pHlp, "\n"); + } + DBGFR3StackWalkEnd(pFirstFrame); + } + + /* raw stack */ + Hlp.fRecSummary = false; + pHlp->pfnPrintf(pHlp, + "!!\n" + "!! Raw stack (mind the direction). pbEMTStackRC=%RRv pbEMTStackBottomRC=%RRv\n" + "!!\n" + "%.*Rhxd\n", + pVCpu->vmm.s.pbEMTStackRC, pVCpu->vmm.s.pbEMTStackBottomRC, + VMM_STACK_SIZE, pVCpu->vmm.s.pbEMTStackR3); + } /* !HMIsEnabled */ + break; + } + + case VERR_IEM_INSTR_NOT_IMPLEMENTED: + case VERR_IEM_ASPECT_NOT_IMPLEMENTED: + case VERR_PATM_IPE_TRAP_IN_PATCH_CODE: + case VERR_EM_GUEST_CPU_HANG: + { + CPUMImportGuestStateOnDemand(pVCpu, CPUMCTX_EXTRN_ABSOLUTELY_ALL); + fDoneImport = true; + + DBGFR3Info(pVM->pUVM, "cpumguest", NULL, pHlp); + DBGFR3Info(pVM->pUVM, "cpumguestinstr", NULL, pHlp); + DBGFR3Info(pVM->pUVM, "cpumguesthwvirt", NULL, pHlp); + break; + } + + default: + { + break; + } + + } /* switch (rcErr) */ + Hlp.fRecSummary = false; + + + /* + * Generic info dumper loop. + */ + if (!fDoneImport) + CPUMImportGuestStateOnDemand(pVCpu, CPUMCTX_EXTRN_ABSOLUTELY_ALL); + static struct + { + const char *pszInfo; + const char *pszArgs; + } const aInfo[] = + { + { "mappings", NULL }, + { "hma", NULL }, + { "cpumguest", "verbose" }, + { "cpumguesthwvirt", "verbose" }, + { "cpumguestinstr", "verbose" }, + { "cpumhyper", "verbose" }, + { "cpumhost", "verbose" }, + { "mode", "all" }, + { "cpuid", "verbose" }, + { "handlers", "phys virt hyper stats" }, + { "timers", NULL }, + { "activetimers", NULL }, + }; + for (unsigned i = 0; i < RT_ELEMENTS(aInfo); i++) + { + if (fDoneHyper && !strcmp(aInfo[i].pszInfo, "cpumhyper")) + continue; + pHlp->pfnPrintf(pHlp, + "!!\n" + "!! {%s, %s}\n" + "!!\n", + aInfo[i].pszInfo, aInfo[i].pszArgs); + DBGFR3Info(pVM->pUVM, aInfo[i].pszInfo, aInfo[i].pszArgs, pHlp); + } + + /* All other info items */ + DBGFR3InfoMulti(pVM, + "*", + "mappings|hma|cpum|cpumguest|cpumguesthwvirt|cpumguestinstr|cpumhyper|cpumhost|mode|cpuid" + "|pgmpd|pgmcr3|timers|activetimers|handlers|help|exithistory", + "!!\n" + "!! {%s}\n" + "!!\n", + pHlp); + + + /* done */ + pHlp->pfnPrintf(pHlp, + "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); + + + /* + * Repeat the summary to stderr so we don't have to scroll half a mile up. + */ + vmmR3FatalDumpInfoHlpFlushStdErr(&Hlp); + if (Hlp.szSummary[0]) + RTStrmPrintf(g_pStdErr, + "%s" + "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n", + Hlp.szSummary); + + /* + * Delete the output instance (flushing and restoring of flags). + */ + vmmR3FatalDumpInfoHlpDelete(&Hlp); +} + diff --git a/src/VBox/VMM/VMMR3/VMMR3.def b/src/VBox/VMM/VMMR3/VMMR3.def new file mode 100644 index 00000000..74087f11 --- /dev/null +++ b/src/VBox/VMM/VMMR3/VMMR3.def @@ -0,0 +1,447 @@ +; $Id: VMMR3.def $ +;; @file +; VMM Ring-3 Context DLL - Definition file. + +; +; Copyright (C) 2010-2019 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; + +LIBRARY VBoxVMM.dll +EXPORTS + ; data + + ; code + CFGMR3GetRoot + CFGMR3GetFirstChild + CFGMR3GetNextChild + CFGMR3GetNameLen + CFGMR3GetFirstValue + CFGMR3GetNextValue + CFGMR3GetValueNameLen + CFGMR3GetValueType + CFGMR3Dump + CFGMR3CreateTree + CFGMR3DestroyTree + CFGMR3GetValueName + CFGMR3GetName + CFGMR3RemoveNode + CFGMR3InsertBytes + CFGMR3InsertStringFV + CFGMR3InsertStringF + CFGMR3InsertStringN + CFGMR3InsertString + CFGMR3InsertStringW + CFGMR3InsertInteger + CFGMR3QueryStringAllocDef + CFGMR3RemoveValue + CFGMR3QueryIntegerDef + CFGMR3QueryGCPtrSDef + CFGMR3QueryGCPtrUDef + CFGMR3QueryGCPtrDef + CFGMR3QueryPtrDef + CFGMR3QueryBoolDef + CFGMR3QueryS8Def + CFGMR3QueryU8Def + CFGMR3QueryS16Def + CFGMR3QueryU16Def + CFGMR3QueryPortDef + CFGMR3QueryS32Def + CFGMR3QuerySIntDef + CFGMR3QueryU32Def + CFGMR3QueryUIntDef + CFGMR3QueryS64Def + CFGMR3QueryU64Def + CFGMR3QueryInteger + CFGMR3QueryGCPtrS + CFGMR3QueryGCPtrU + CFGMR3QueryGCPtr + CFGMR3QueryPtr + CFGMR3QueryBool + CFGMR3QueryS8 + CFGMR3QueryU8 + CFGMR3QueryS16 + CFGMR3QueryU16 + CFGMR3QueryPort + CFGMR3QueryS32 + CFGMR3QuerySInt + CFGMR3QueryU32 + CFGMR3QueryUInt + CFGMR3QueryS64 + CFGMR3QueryU64 + CFGMR3QuerySize + CFGMR3QueryType + CFGMR3AreValuesValid + CFGMR3AreChildrenValid + CFGMR3GetChildFV + CFGMR3GetChildF + CFGMR3GetChild + CFGMR3InsertNode + CFGMR3InsertNodeFV + CFGMR3InsertNodeF + CFGMR3InsertSubTree + CFGMR3ValidateConfig + CFGMR3QueryBytes + CFGMR3QueryStringDef + CFGMR3QueryString + CFGMR3QueryStringAlloc + CFGMR3GetParent + CFGMR3GetRootU + + CSAMR3IsEnabled + CSAMR3SetScanningEnabled + + DBGCCreate + + DBGFR3CoreWrite + DBGFR3Info + DBGFR3InfoRegisterExternal + DBGFR3InfoDeregisterExternal + DBGFR3InjectNMI + DBGFR3LogModifyDestinations + DBGFR3LogModifyFlags + DBGFR3LogModifyGroups + DBGFR3OSDetect + DBGFR3OSQueryNameAndVersion + DBGFR3RegCpuQueryU8 + DBGFR3RegCpuQueryU16 + DBGFR3RegCpuQueryU32 + DBGFR3RegCpuQueryU64 + DBGFR3RegCpuQueryXdtr + DBGFR3RegCpuQueryLrd + DBGFR3RegFormatValue + DBGFR3RegNmQuery + DBGFR3RegNmQueryAll + DBGFR3RegNmQueryAllCount + DBGFR3OSDeregister + DBGFR3OSRegister + DBGFR3OSQueryInterface + DBGFR3MemReadString + DBGFR3MemRead + DBGFR3MemScan + DBGFR3ModInMem + DBGFR3AddrFromFlat + DBGFR3AsSymbolByName + DBGFR3AsResolveAndRetain + DBGFR3AsSetAlias + DBGFR3AddrAdd + DBGFR3AddrSub + DBGFR3AsGetConfig + DBGFR3CpuGetMode + DBGFR3AddrFromSelOff + DBGFR3FlowCreate + DBGFR3FlowRetain + DBGFR3FlowRelease + DBGFR3FlowQueryStartBb + DBGFR3FlowQueryBbByAddress + DBGFR3FlowQueryBranchTblByAddress + DBGFR3FlowGetBbCount + DBGFR3FlowGetBranchTblCount + DBGFR3FlowBbRetain + DBGFR3FlowBbRelease + DBGFR3FlowBbGetStartAddress + DBGFR3FlowBbGetEndAddress + DBGFR3FlowBbGetBranchAddress + DBGFR3FlowBbGetFollowingAddress + DBGFR3FlowBbGetType + DBGFR3FlowBbGetInstrCount + DBGFR3FlowBbGetFlags + DBGFR3FlowBbQueryBranchTbl + DBGFR3FlowBbQueryError + DBGFR3FlowBbQueryInstr + DBGFR3FlowBbQuerySuccessors + DBGFR3FlowBbGetRefBbCount + DBGFR3FlowBbGetRefBb + DBGFR3FlowBranchTblRetain + DBGFR3FlowBranchTblRelease + DBGFR3FlowBranchTblGetSlots + DBGFR3FlowBranchTblGetStartAddress + DBGFR3FlowBranchTblGetAddrAtSlot + DBGFR3FlowBranchTblQueryAddresses + DBGFR3FlowItCreate + DBGFR3FlowItDestroy + DBGFR3FlowItNext + DBGFR3FlowItReset + DBGFR3FlowBranchTblItCreate + DBGFR3FlowBranchTblItDestroy + DBGFR3FlowBranchTblItNext + DBGFR3FlowBranchTblItReset + DBGFR3PlugInLoad + DBGFR3PlugInUnload + DBGFR3PlugInLoadAll + DBGFR3PlugInUnloadAll + DBGFR3SelQueryInfo + DBGFR3StackWalkBegin + DBGFR3StackWalkNext + DBGFR3StackWalkEnd + DBGFR3TypeDeregister + DBGFR3TypeDumpEx + DBGFR3TypeQueryReg + DBGFR3TypeQuerySize + DBGFR3TypeQueryValByType + DBGFR3TypeRegister + DBGFR3TypeSetSize + DBGFR3TypeValFree + DBGFR3TypeValDumpEx + + EMR3QueryExecutionPolicy + EMR3QueryMainExecutionEngine + EMR3SetExecutionPolicy + + FTMR3CancelStandby + FTMR3PowerOn + + MMHyperR3ToR0 + MMHyperR3ToRC + + HMR3IsEnabled + HMR3IsNestedPagingActive + HMR3IsUXActive + HMR3IsVpidActive + + MMR3HeapFree + MMR3HeapRealloc + MMR3HeapAllocU + + MMR3HyperAllocOnceNoRel + + PATMR3AllowPatching + PATMR3IsEnabled + + PDMR3AsyncCompletionBwMgrSetMaxForFile + PDMR3DeviceAttach + PDMR3DeviceDetach + PDMR3DriverAttach + PDMR3DriverDetach + PDMR3NsBwGroupSetLimit + PDMR3QueryDeviceLun + PDMR3QueryDriverOnLun + PDMR3QueryLun + + PDMCritSectEnter + PDMCritSectEnterDebug + PDMCritSectTryEnter + PDMCritSectTryEnterDebug + PDMR3CritSectEnterEx + PDMCritSectLeave + PDMCritSectIsOwner + PDMCritSectIsOwnerEx + PDMCritSectIsOwned + PDMCritSectIsInitialized + PDMCritSectHasWaiters + PDMCritSectGetRecursion + PDMR3CritSectYield + PDMR3CritSectName + PDMR3CritSectScheduleExitEvent + PDMR3CritSectDelete + + PDMR3QueueDestroy + PDMQueueAlloc + PDMQueueInsert + PDMQueueInsertEx + PDMQueueR0Ptr + PDMQueueRCPtr + + PDMR3ThreadDestroy + PDMR3ThreadIAmRunning + PDMR3ThreadIAmSuspending + PDMR3ThreadResume + PDMR3ThreadSleep + PDMR3ThreadSuspend + + PDMR3UsbCreateEmulatedDevice + PDMR3UsbCreateProxyDevice + PDMR3UsbDetachDevice + PDMR3UsbHasHub + PDMR3UsbDriverAttach + PDMR3UsbDriverDetach + PDMR3UsbQueryLun + PDMR3UsbQueryDriverOnLun + + PGMHandlerPhysicalPageTempOff + PGMPhysReadGCPtr + PGMPhysSimpleDirtyWriteGCPtr + PGMPhysSimpleReadGCPtr + PGMPhysSimpleWriteGCPhys + PGMPhysSimpleWriteGCPtr + PGMPhysWriteGCPtr + PGMShwMakePageWritable + PGMR3QueryGlobalMemoryStats + PGMR3QueryMemoryStats + + SSMR3Close + SSMR3DeregisterExternal + SSMR3DeregisterInternal + SSMR3GetBool + SSMR3GetGCPhys + SSMR3GetGCPhys32 + SSMR3GetGCPhys64 + SSMR3GetGCPtr + SSMR3GetGCUInt + SSMR3GetGCUIntPtr + SSMR3GetGCUIntReg + SSMR3GetIOPort + SSMR3GetMem + SSMR3GetRCPtr + SSMR3GetS128 + SSMR3GetS16 + SSMR3GetS32 + SSMR3GetS64 + SSMR3GetS8 + SSMR3GetSInt + SSMR3GetSel + SSMR3GetStrZ + SSMR3GetStrZEx + SSMR3GetStruct + SSMR3GetStructEx + SSMR3GetU128 + SSMR3GetU16 + SSMR3GetU32 + SSMR3GetU64 + SSMR3GetU8 + SSMR3GetUInt + SSMR3HandleGetAfter + SSMR3HandleGetStatus + SSMR3HandleHostBits + SSMR3HandleHostOSAndArch + SSMR3HandleIsLiveSave + SSMR3HandleMaxDowntime + SSMR3HandleReportLivePercent + SSMR3HandleRevision + SSMR3HandleSetStatus + SSMR3HandleVersion + SSMR3Open + SSMR3PutBool + SSMR3PutGCPhys + SSMR3PutGCPhys32 + SSMR3PutGCPhys64 + SSMR3PutGCPtr + SSMR3PutGCUInt + SSMR3PutGCUIntPtr + SSMR3PutGCUIntReg + SSMR3PutIOPort + SSMR3PutMem + SSMR3PutRCPtr + SSMR3PutS128 + SSMR3PutS16 + SSMR3PutS32 + SSMR3PutS64 + SSMR3PutS8 + SSMR3PutSInt + SSMR3PutSel + SSMR3PutStrZ + SSMR3PutStruct + SSMR3PutStructEx + SSMR3PutU128 + SSMR3PutU16 + SSMR3PutU32 + SSMR3PutU64 + SSMR3PutU8 + SSMR3PutUInt + SSMR3Seek + SSMR3SetCfgError + SSMR3SetLoadError + SSMR3SetLoadErrorV + SSMR3Skip + SSMR3SkipToEndOfUnit + SSMR3ValidateFile + SSMR3Cancel + SSMR3RegisterExternal + + STAMR3Dump + STAMR3Enum + STAMR3Reset + STAMR3Snapshot + STAMR3SnapshotFree + STAMR3GetUnit + STAMR3RegisterFU + STAMR3RegisterVU + STAMR3DeregisterF + STAMR3DeregisterV + + TMR3TimerSetCritSect + TMR3TimerLoad + TMR3TimerSave + TMR3TimerSkip + TMR3TimerDestroy + TMTimerFromMicro + TMTimerFromMilli + TMTimerFromNano + TMTimerGet + TMTimerGetFreq + TMTimerGetMicro + TMTimerGetMilli + TMTimerGetNano + TMTimerIsActive + TMTimerIsLockOwner + TMTimerLock + TMTimerR0Ptr + TMTimerR3Ptr + TMTimerRCPtr + TMTimerSet + TMTimerSetFrequencyHint + TMTimerSetMicro + TMTimerSetMillies + TMTimerSetNano + TMTimerSetRelative + TMTimerStop + TMTimerToMicro + TMTimerToMilli + TMTimerToNano + TMTimerUnlock + TMR3GetWarpDrive + TMR3SetWarpDrive + TMR3TimeVirtGet + TMR3TimeVirtGetMicro + TMR3TimeVirtGetMilli + TMR3TimeVirtGetNano + + VMMGetCpu + + VMMGetSvnRev + VMSetError + VMSetErrorV + VMR3AtErrorDeregister + VMR3AtErrorRegister + VMR3AtRuntimeErrorRegister + VMR3AtStateRegister + VMR3Create + VMR3Destroy + VMR3GetCpuCoreAndPackageIdFromCpuId + VMR3GetStateName + VMR3GetStateU + VMR3GetSuspendReason + VMR3GetVM + VMR3HotPlugCpu + VMR3HotUnplugCpu + VMR3LoadFromFile + VMR3LoadFromStream + VMR3PowerOff + VMR3PowerOn + VMR3ReleaseUVM + VMR3ReqCallNoWaitU + VMR3ReqCallU + VMR3ReqCallVoidWaitU + VMR3ReqCallWaitU + VMR3ReqFree + VMR3ReqPriorityCallWaitU + VMR3ReqWait + VMR3Reset + VMR3Resume + VMR3RetainUVM + VMR3Save + VMR3SetCpuExecutionCap + VMR3SetError + VMR3SetPowerOffInsteadOfReset + VMR3Suspend + VMR3Teleport + VMR3AtStateDeregister + VMR3GetUVM + diff --git a/src/VBox/VMM/VMMR3/VMMSwitcher.cpp b/src/VBox/VMM/VMMR3/VMMSwitcher.cpp new file mode 100644 index 00000000..ac15c7ca --- /dev/null +++ b/src/VBox/VMM/VMMR3/VMMSwitcher.cpp @@ -0,0 +1,1188 @@ +/* $Id: VMMSwitcher.cpp $ */ +/** @file + * VMM - The Virtual Machine Monitor, World Switcher(s). + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_VMM +#include +#include +#include +#include +#include +#include +#include "VMMInternal.h" +#include "VMMSwitcher.h" +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +#if defined(VBOX_WITH_RAW_MODE) || HC_ARCH_BITS != 64 + +/** Array of switcher definitions. + * The type and index shall match! + */ +static PVMMSWITCHERDEF g_apRawModeSwitchers[VMMSWITCHER_MAX] = +{ + NULL, /* invalid entry */ +# ifdef VBOX_WITH_RAW_MODE +# ifndef RT_ARCH_AMD64 + &vmmR3Switcher32BitTo32Bit_Def, + &vmmR3Switcher32BitToPAE_Def, + NULL, //&vmmR3Switcher32BitToAMD64_Def, + &vmmR3SwitcherPAETo32Bit_Def, + &vmmR3SwitcherPAEToPAE_Def, + NULL, //&vmmR3SwitcherPAEToAMD64_Def, + NULL, //&vmmR3SwitcherPAETo32Bit_Def, + NULL, //&vmmR3SwitcherAMD64ToPAE_Def, + NULL, //&vmmR3SwitcherAMD64ToAMD64_Def, +# else /* RT_ARCH_AMD64 */ + NULL, //&vmmR3Switcher32BitTo32Bit_Def, + NULL, //&vmmR3Switcher32BitToPAE_Def, + NULL, //&vmmR3Switcher32BitToAMD64_Def, + NULL, //&vmmR3SwitcherPAETo32Bit_Def, + NULL, //&vmmR3SwitcherPAEToPAE_Def, + NULL, //&vmmR3SwitcherPAEToAMD64_Def, + &vmmR3SwitcherAMD64To32Bit_Def, + &vmmR3SwitcherAMD64ToPAE_Def, + NULL, //&vmmR3SwitcherAMD64ToAMD64_Def, +# endif /* RT_ARCH_AMD64 */ +# else /* !VBOX_WITH_RAW_MODE */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, +# endif /* !VBOX_WITH_RAW_MODE */ +# ifndef RT_ARCH_AMD64 + &vmmR3SwitcherX86Stub_Def, + NULL, +# else + NULL, + &vmmR3SwitcherAMD64Stub_Def, +# endif +}; + +/** Array of switcher definitions. + * The type and index shall match! + */ +static PVMMSWITCHERDEF g_apHmSwitchers[VMMSWITCHER_MAX] = +{ + NULL, /* invalid entry */ +# if HC_ARCH_BITS == 32 + NULL, //&vmmR3Switcher32BitTo32Bit_Def, + NULL, //&vmmR3Switcher32BitToPAE_Def, + &vmmR3Switcher32BitToAMD64_Def, + NULL, //&vmmR3SwitcherPAETo32Bit_Def, + NULL, //&vmmR3SwitcherPAEToPAE_Def, + &vmmR3SwitcherPAEToAMD64_Def, + NULL, //&vmmR3SwitcherPAETo32Bit_Def, + NULL, //&vmmR3SwitcherAMD64ToPAE_Def, + NULL, //&vmmR3SwitcherAMD64ToAMD64_Def, +# else /* !VBOX_WITH_RAW_MODE */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, +# endif /* !VBOX_WITH_RAW_MODE */ +# ifndef RT_ARCH_AMD64 + &vmmR3SwitcherX86Stub_Def, + NULL, +# else + NULL, + &vmmR3SwitcherAMD64Stub_Def, +# endif +}; + +#endif /* VBOX_WITH_RAW_MODE || HC_ARCH_BITS != 64 */ + + +# ifdef VBOX_WITH_64ON32_IDT +/** + * Initializes the 64-bit IDT for 64-bit guest on 32-bit host switchers. + * + * This is only used as a debugging aid when we cannot find out why something + * goes haywire in the intermediate context. + * + * @param pVM The cross context VM structure. + * @param pSwitcher The switcher descriptor. + * @param pbDst Where the switcher code was just copied. + * @param HCPhysDst The host physical address corresponding to @a pbDst. + */ +static void vmmR3Switcher32On64IdtInit(PVM pVM, PVMMSWITCHERDEF pSwitcher, uint8_t *pbDst, RTHCPHYS HCPhysDst) +{ + AssertRelease(pSwitcher->offGCCode > 0 && pSwitcher->offGCCode < pSwitcher->cbCode); + AssertRelease(pSwitcher->cbCode < _64K); + RTSEL uCs64 = SELMGetHyperCS64(pVM); + + PX86DESC64GATE paIdt = (PX86DESC64GATE)(pbDst + pSwitcher->offGCCode); + for (uint32_t i = 0 ; i < 256; i++) + { + AssertRelease(((uint64_t *)&paIdt[i])[0] < pSwitcher->cbCode); + AssertRelease(((uint64_t *)&paIdt[i])[1] == 0); + uint64_t uHandler = HCPhysDst + paIdt[i].u16OffsetLow; + paIdt[i].u16OffsetLow = (uint16_t)uHandler; + paIdt[i].u16Sel = uCs64; + paIdt[i].u3IST = 0; + paIdt[i].u5Reserved = 0; + paIdt[i].u4Type = AMD64_SEL_TYPE_SYS_INT_GATE; + paIdt[i].u1DescType = 0 /* system */; + paIdt[i].u2Dpl = 3; + paIdt[i].u1Present = 1; + paIdt[i].u16OffsetHigh = (uint16_t)(uHandler >> 16); + paIdt[i].u32Reserved = (uint32_t)(uHandler >> 32); + } + + for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++) + { + uint64_t uIdtr = HCPhysDst + pSwitcher->offGCCode; AssertRelease(uIdtr < UINT32_MAX); + CPUMSetHyperIDTR(&pVM->aCpus[iCpu], uIdtr, 16*256 + iCpu); + } +} + + +/** + * Relocates the 64-bit IDT for 64-bit guest on 32-bit host switchers. + * + * @param pVM The cross context VM structure. + * @param pSwitcher The switcher descriptor. + * @param pbDst Where the switcher code was just copied. + * @param HCPhysDst The host physical address corresponding to @a pbDst. + */ +static void vmmR3Switcher32On64IdtRelocate(PVM pVM, PVMMSWITCHERDEF pSwitcher, uint8_t *pbDst, RTHCPHYS HCPhysDst) +{ + AssertRelease(pSwitcher->offGCCode > 0 && pSwitcher->offGCCode < pSwitcher->cbCode && pSwitcher->cbCode < _64K); + + /* The intermediate context doesn't move, but the CS may. */ + RTSEL uCs64 = SELMGetHyperCS64(pVM); + PX86DESC64GATE paIdt = (PX86DESC64GATE)(pbDst + pSwitcher->offGCCode); + for (uint32_t i = 0 ; i < 256; i++) + paIdt[i].u16Sel = uCs64; + + /* Just in case... */ + for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++) + { + uint64_t uIdtr = HCPhysDst + pSwitcher->offGCCode; AssertRelease(uIdtr < UINT32_MAX); + CPUMSetHyperIDTR(&pVM->aCpus[iCpu], uIdtr, 16*256 + iCpu); + } +} +# endif /* VBOX_WITH_64ON32_IDT */ + + +/** + * VMMR3Init worker that initiates the switcher code (aka core code). + * + * This is core per VM code which might need fixups and/or for ease of use are + * put on linear contiguous backing. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +int vmmR3SwitcherInit(PVM pVM) +{ +#if !defined(VBOX_WITH_RAW_MODE) && (HC_ARCH_BITS == 64) + RT_NOREF(pVM); + return VINF_SUCCESS; +#else + + /* + * Calc the size. + */ + const PVMMSWITCHERDEF *papSwitchers = VM_IS_RAW_MODE_ENABLED(pVM) ? g_apRawModeSwitchers : g_apHmSwitchers; + unsigned cbCoreCode = 0; + for (unsigned iSwitcher = 0; iSwitcher < VMMSWITCHER_MAX; iSwitcher++) + { + pVM->vmm.s.aoffSwitchers[iSwitcher] = cbCoreCode; + PVMMSWITCHERDEF pSwitcher = papSwitchers[iSwitcher]; + if (pSwitcher) + { + AssertRelease((unsigned)pSwitcher->enmType == iSwitcher); + cbCoreCode += RT_ALIGN_32(pSwitcher->cbCode + 1, 32); + } + } + + /* + * Allocate contiguous pages for switchers and deal with + * conflicts in the intermediate mapping of the code. + */ + pVM->vmm.s.cbCoreCode = RT_ALIGN_32(cbCoreCode, PAGE_SIZE); + pVM->vmm.s.pvCoreCodeR3 = SUPR3ContAlloc(pVM->vmm.s.cbCoreCode >> PAGE_SHIFT, &pVM->vmm.s.pvCoreCodeR0, &pVM->vmm.s.HCPhysCoreCode); + int rc = VERR_NO_MEMORY; + if (pVM->vmm.s.pvCoreCodeR3) + { + rc = PGMR3MapIntermediate(pVM, pVM->vmm.s.pvCoreCodeR0, pVM->vmm.s.HCPhysCoreCode, cbCoreCode); + if (rc == VERR_PGM_INTERMEDIATE_PAGING_CONFLICT) + { + /* try more allocations - Solaris, Linux. */ + const unsigned cTries = 8234; + struct VMMInitBadTry + { + RTR0PTR pvR0; + void *pvR3; + RTHCPHYS HCPhys; + RTUINT cb; + } *paBadTries = (struct VMMInitBadTry *)RTMemTmpAlloc(sizeof(*paBadTries) * cTries); + AssertReturn(paBadTries, VERR_NO_TMP_MEMORY); + unsigned i = 0; + do + { + paBadTries[i].pvR3 = pVM->vmm.s.pvCoreCodeR3; + paBadTries[i].pvR0 = pVM->vmm.s.pvCoreCodeR0; + paBadTries[i].HCPhys = pVM->vmm.s.HCPhysCoreCode; + i++; + pVM->vmm.s.pvCoreCodeR0 = NIL_RTR0PTR; + pVM->vmm.s.HCPhysCoreCode = NIL_RTHCPHYS; + pVM->vmm.s.pvCoreCodeR3 = SUPR3ContAlloc(pVM->vmm.s.cbCoreCode >> PAGE_SHIFT, &pVM->vmm.s.pvCoreCodeR0, &pVM->vmm.s.HCPhysCoreCode); + if (!pVM->vmm.s.pvCoreCodeR3) + break; + rc = PGMR3MapIntermediate(pVM, pVM->vmm.s.pvCoreCodeR0, pVM->vmm.s.HCPhysCoreCode, cbCoreCode); + } while ( rc == VERR_PGM_INTERMEDIATE_PAGING_CONFLICT + && i < cTries - 1); + + /* cleanup */ + if (RT_FAILURE(rc)) + { + paBadTries[i].pvR3 = pVM->vmm.s.pvCoreCodeR3; + paBadTries[i].pvR0 = pVM->vmm.s.pvCoreCodeR0; + paBadTries[i].HCPhys = pVM->vmm.s.HCPhysCoreCode; + paBadTries[i].cb = pVM->vmm.s.cbCoreCode; + i++; + LogRel(("VMM: Failed to allocated and map core code: rc=%Rrc\n", rc)); + } + while (i-- > 0) + { + LogRel(("VMM: Core code alloc attempt #%d: pvR3=%p pvR0=%RKv HCPhys=%RHp\n", + i, paBadTries[i].pvR3, paBadTries[i].pvR0, paBadTries[i].HCPhys)); + SUPR3ContFree(paBadTries[i].pvR3, paBadTries[i].cb >> PAGE_SHIFT); + } + RTMemTmpFree(paBadTries); + } + } + if (RT_SUCCESS(rc)) + { + /* + * Copy the code. + */ + for (unsigned iSwitcher = 0; iSwitcher < VMMSWITCHER_MAX; iSwitcher++) + { + PVMMSWITCHERDEF pSwitcher = papSwitchers[iSwitcher]; + if (pSwitcher) + { + uint8_t *pbDst = (uint8_t *)pVM->vmm.s.pvCoreCodeR3 + pVM->vmm.s.aoffSwitchers[iSwitcher]; + memcpy(pbDst, pSwitcher->pvCode, pSwitcher->cbCode); +# ifdef VBOX_WITH_64ON32_IDT + if ( pSwitcher->enmType == VMMSWITCHER_32_TO_AMD64 + || pSwitcher->enmType == VMMSWITCHER_PAE_TO_AMD64) + vmmR3Switcher32On64IdtInit(pVM, pSwitcher, pbDst, + pVM->vmm.s.HCPhysCoreCode + pVM->vmm.s.aoffSwitchers[iSwitcher]); +# endif + } + } + + /* + * Map the code into the GC address space. + */ + RTGCPTR GCPtr; + rc = MMR3HyperMapHCPhys(pVM, pVM->vmm.s.pvCoreCodeR3, pVM->vmm.s.pvCoreCodeR0, pVM->vmm.s.HCPhysCoreCode, + cbCoreCode, "Core Code", &GCPtr); + if (RT_SUCCESS(rc)) + { + pVM->vmm.s.pvCoreCodeRC = GCPtr; + MMR3HyperReserve(pVM, PAGE_SIZE, "fence", NULL); + LogRel(("VMM: CoreCode: R3=%RHv R0=%RKv RC=%RRv Phys=%RHp cb=%#x\n", + pVM->vmm.s.pvCoreCodeR3, pVM->vmm.s.pvCoreCodeR0, pVM->vmm.s.pvCoreCodeRC, pVM->vmm.s.HCPhysCoreCode, pVM->vmm.s.cbCoreCode)); + + /* + * Finally, PGM probably has selected a switcher already but we need + * to get the routine addresses, so we'll reselect it. + * This may legally fail so, we're ignoring the rc. + * Note! See HMIsEnabled hack in selector function. + */ + VMMR3SelectSwitcher(pVM, pVM->vmm.s.enmSwitcher); + return rc; + } + + /* shit */ + AssertMsgFailed(("PGMR3Map(,%RRv, %RHp, %#x, 0) failed with rc=%Rrc\n", pVM->vmm.s.pvCoreCodeRC, pVM->vmm.s.HCPhysCoreCode, cbCoreCode, rc)); + SUPR3ContFree(pVM->vmm.s.pvCoreCodeR3, pVM->vmm.s.cbCoreCode >> PAGE_SHIFT); + } + else + VMSetError(pVM, rc, RT_SRC_POS, + N_("Failed to allocate %d bytes of contiguous memory for the world switcher code"), + cbCoreCode); + + pVM->vmm.s.pvCoreCodeR3 = NULL; + pVM->vmm.s.pvCoreCodeR0 = NIL_RTR0PTR; + pVM->vmm.s.pvCoreCodeRC = 0; + return rc; +#endif +} + +/** + * Relocate the switchers, called by VMMR#Relocate. + * + * @param pVM The cross context VM structure. + * @param offDelta The relocation delta. + */ +void vmmR3SwitcherRelocate(PVM pVM, RTGCINTPTR offDelta) +{ +#if defined(VBOX_WITH_RAW_MODE) || (HC_ARCH_BITS != 64) + /* + * Relocate all the switchers. + */ + const PVMMSWITCHERDEF *papSwitchers = VM_IS_RAW_MODE_ENABLED(pVM) ? g_apRawModeSwitchers : g_apHmSwitchers; + for (unsigned iSwitcher = 0; iSwitcher < VMMSWITCHER_MAX; iSwitcher++) + { + PVMMSWITCHERDEF pSwitcher = papSwitchers[iSwitcher]; + if (pSwitcher && pSwitcher->pfnRelocate) + { + unsigned off = pVM->vmm.s.aoffSwitchers[iSwitcher]; + pSwitcher->pfnRelocate(pVM, + pSwitcher, + pVM->vmm.s.pvCoreCodeR0 + off, + (uint8_t *)pVM->vmm.s.pvCoreCodeR3 + off, + pVM->vmm.s.pvCoreCodeRC + off, + pVM->vmm.s.HCPhysCoreCode + off); +# ifdef VBOX_WITH_64ON32_IDT + if ( pSwitcher->enmType == VMMSWITCHER_32_TO_AMD64 + || pSwitcher->enmType == VMMSWITCHER_PAE_TO_AMD64) + vmmR3Switcher32On64IdtRelocate(pVM, pSwitcher, + (uint8_t *)pVM->vmm.s.pvCoreCodeR3 + off, + pVM->vmm.s.HCPhysCoreCode + off); +# endif + } + } + + /* + * Recalc the RC address for the current switcher. + */ + PVMMSWITCHERDEF pSwitcher = papSwitchers[pVM->vmm.s.enmSwitcher]; + if (pSwitcher) + { + RTRCPTR RCPtr = pVM->vmm.s.pvCoreCodeRC + pVM->vmm.s.aoffSwitchers[pVM->vmm.s.enmSwitcher]; + pVM->vmm.s.pfnRCToHost = RCPtr + pSwitcher->offRCToHost; + pVM->vmm.s.pfnCallTrampolineRC = RCPtr + pSwitcher->offRCCallTrampoline; + pVM->pfnVMMRCToHostAsm = RCPtr + pSwitcher->offRCToHostAsm; + pVM->pfnVMMRCToHostAsmNoReturn = RCPtr + pSwitcher->offRCToHostAsmNoReturn; + } + else + AssertRelease(!VM_IS_RAW_MODE_ENABLED(pVM)); + +#else + NOREF(pVM); +#endif + NOREF(offDelta); +} + + +#if defined(VBOX_WITH_RAW_MODE) || (HC_ARCH_BITS != 64) + +/** + * Generic switcher code relocator. + * + * @param pVM The cross context VM structure. + * @param pSwitcher The switcher definition. + * @param pu8CodeR3 Pointer to the core code block for the switcher, ring-3 mapping. + * @param R0PtrCode Pointer to the core code block for the switcher, ring-0 mapping. + * @param GCPtrCode The guest context address corresponding to pu8Code. + * @param u32IDCode The identity mapped (ID) address corresponding to pu8Code. + * @param SelCS The hypervisor CS selector. + * @param SelDS The hypervisor DS selector. + * @param SelTSS The hypervisor TSS selector. + * @param GCPtrGDT The GC address of the hypervisor GDT. + * @param SelCS64 The 64-bit mode hypervisor CS selector. + */ +static void vmmR3SwitcherGenericRelocate(PVM pVM, PVMMSWITCHERDEF pSwitcher, + RTR0PTR R0PtrCode, uint8_t *pu8CodeR3, RTGCPTR GCPtrCode, uint32_t u32IDCode, + RTSEL SelCS, RTSEL SelDS, RTSEL SelTSS, RTGCPTR GCPtrGDT, RTSEL SelCS64) +{ + union + { + const uint8_t *pu8; + const uint16_t *pu16; + const uint32_t *pu32; + const uint64_t *pu64; + const void *pv; + uintptr_t u; + } u; + u.pv = pSwitcher->pvFixups; + + /* + * Process fixups. + */ + uint8_t u8; + while ((u8 = *u.pu8++) != FIX_THE_END) + { + /* + * Get the source (where to write the fixup). + */ + uint32_t offSrc = *u.pu32++; + Assert(offSrc < pSwitcher->cbCode); + union + { + uint8_t *pu8; + uint16_t *pu16; + uint32_t *pu32; + uint64_t *pu64; + uintptr_t u; + } uSrc; + uSrc.pu8 = pu8CodeR3 + offSrc; + + /* The fixup target and method depends on the type. */ + switch (u8) + { + /* + * 32-bit relative, source in HC and target in GC. + */ + case FIX_HC_2_GC_NEAR_REL: + { + Assert(offSrc - pSwitcher->offHCCode0 < pSwitcher->cbHCCode0 || offSrc - pSwitcher->offHCCode1 < pSwitcher->cbHCCode1); + uint32_t offTrg = *u.pu32++; + Assert(offTrg - pSwitcher->offGCCode < pSwitcher->cbGCCode); + *uSrc.pu32 = (uint32_t)((GCPtrCode + offTrg) - (uSrc.u + 4)); + break; + } + + /* + * 32-bit relative, source in HC and target in ID. + */ + case FIX_HC_2_ID_NEAR_REL: + { + Assert(offSrc - pSwitcher->offHCCode0 < pSwitcher->cbHCCode0 || offSrc - pSwitcher->offHCCode1 < pSwitcher->cbHCCode1); + uint32_t offTrg = *u.pu32++; + Assert(offTrg - pSwitcher->offIDCode0 < pSwitcher->cbIDCode0 || offTrg - pSwitcher->offIDCode1 < pSwitcher->cbIDCode1); + *uSrc.pu32 = (uint32_t)((u32IDCode + offTrg) - (R0PtrCode + offSrc + 4)); + break; + } + + /* + * 32-bit relative, source in GC and target in HC. + */ + case FIX_GC_2_HC_NEAR_REL: + { + Assert(offSrc - pSwitcher->offGCCode < pSwitcher->cbGCCode); + uint32_t offTrg = *u.pu32++; + Assert(offTrg - pSwitcher->offHCCode0 < pSwitcher->cbHCCode0 || offTrg - pSwitcher->offHCCode1 < pSwitcher->cbHCCode1); + *uSrc.pu32 = (uint32_t)((R0PtrCode + offTrg) - (GCPtrCode + offSrc + 4)); + break; + } + + /* + * 32-bit relative, source in GC and target in ID. + */ + case FIX_GC_2_ID_NEAR_REL: + { + AssertMsg(offSrc - pSwitcher->offGCCode < pSwitcher->cbGCCode, ("%x - %x < %x\n", offSrc, pSwitcher->offGCCode, pSwitcher->cbGCCode)); + uint32_t offTrg = *u.pu32++; + Assert(offTrg - pSwitcher->offIDCode0 < pSwitcher->cbIDCode0 || offTrg - pSwitcher->offIDCode1 < pSwitcher->cbIDCode1); + *uSrc.pu32 = (uint32_t)((u32IDCode + offTrg) - (GCPtrCode + offSrc + 4)); + break; + } + + /* + * 32-bit relative, source in ID and target in HC. + */ + case FIX_ID_2_HC_NEAR_REL: + { + Assert(offSrc - pSwitcher->offIDCode0 < pSwitcher->cbIDCode0 || offSrc - pSwitcher->offIDCode1 < pSwitcher->cbIDCode1); + uint32_t offTrg = *u.pu32++; + Assert(offTrg - pSwitcher->offHCCode0 < pSwitcher->cbHCCode0 || offTrg - pSwitcher->offHCCode1 < pSwitcher->cbHCCode1); + *uSrc.pu32 = (uint32_t)((R0PtrCode + offTrg) - (u32IDCode + offSrc + 4)); + break; + } + + /* + * 32-bit relative, source in ID and target in HC. + */ + case FIX_ID_2_GC_NEAR_REL: + { + Assert(offSrc - pSwitcher->offIDCode0 < pSwitcher->cbIDCode0 || offSrc - pSwitcher->offIDCode1 < pSwitcher->cbIDCode1); + uint32_t offTrg = *u.pu32++; + Assert(offTrg - pSwitcher->offGCCode < pSwitcher->cbGCCode); + *uSrc.pu32 = (uint32_t)((GCPtrCode + offTrg) - (u32IDCode + offSrc + 4)); + break; + } + + /* + * 16:32 far jump, target in GC. + */ + case FIX_GC_FAR32: + { + uint32_t offTrg = *u.pu32++; + Assert(offTrg - pSwitcher->offGCCode < pSwitcher->cbGCCode); + *uSrc.pu32++ = (uint32_t)(GCPtrCode + offTrg); + *uSrc.pu16++ = SelCS; + break; + } + + /* + * Make 32-bit GC pointer given CPUM offset. + */ + case FIX_GC_CPUM_OFF: + { + uint32_t offCPUM = *u.pu32++; + Assert(offCPUM < sizeof(pVM->cpum)); + *uSrc.pu32 = (uint32_t)(VM_RC_ADDR(pVM, &pVM->cpum) + offCPUM); + break; + } + + /* + * Make 32-bit GC pointer given CPUMCPU offset. + */ + case FIX_GC_CPUMCPU_OFF: + { + uint32_t offCPUM = *u.pu32++; + Assert(offCPUM < sizeof(pVM->aCpus[0].cpum)); + *uSrc.pu32 = (uint32_t)(VM_RC_ADDR(pVM, &pVM->aCpus[0].cpum) + offCPUM); + break; + } + + /* + * Make 32-bit GC pointer given VM offset. + */ + case FIX_GC_VM_OFF: + { + uint32_t offVM = *u.pu32++; + Assert(offVM < sizeof(VM)); + *uSrc.pu32 = (uint32_t)(VM_RC_ADDR(pVM, pVM) + offVM); + break; + } + + /* + * Make 32-bit HC pointer given CPUM offset. + */ + case FIX_HC_CPUM_OFF: + { + uint32_t offCPUM = *u.pu32++; + Assert(offCPUM < sizeof(pVM->cpum)); + *uSrc.pu32 = (uint32_t)pVM->pVMR0 + RT_UOFFSETOF(VM, cpum) + offCPUM; + break; + } + + /* + * Make 32-bit R0 pointer given VM offset. + */ + case FIX_HC_VM_OFF: + { + uint32_t offVM = *u.pu32++; + Assert(offVM < sizeof(VM)); + *uSrc.pu32 = (uint32_t)pVM->pVMR0 + offVM; + break; + } + + /* + * Store the 32-Bit CR3 (32-bit) for the intermediate memory context. + */ + case FIX_INTER_32BIT_CR3: + { + + *uSrc.pu32 = PGMGetInter32BitCR3(pVM); + break; + } + + /* + * Store the PAE CR3 (32-bit) for the intermediate memory context. + */ + case FIX_INTER_PAE_CR3: + { + + *uSrc.pu32 = PGMGetInterPaeCR3(pVM); + break; + } + + /* + * Store the AMD64 CR3 (32-bit) for the intermediate memory context. + */ + case FIX_INTER_AMD64_CR3: + { + + *uSrc.pu32 = PGMGetInterAmd64CR3(pVM); + break; + } + + /* + * Store Hypervisor CS (16-bit). + */ + case FIX_HYPER_CS: + { + *uSrc.pu16 = SelCS; + break; + } + + /* + * Store Hypervisor DS (16-bit). + */ + case FIX_HYPER_DS: + { + *uSrc.pu16 = SelDS; + break; + } + + /* + * Store Hypervisor TSS (16-bit). + */ + case FIX_HYPER_TSS: + { + *uSrc.pu16 = SelTSS; + break; + } + + /* + * Store the 32-bit GC address of the 2nd dword of the TSS descriptor (in the GDT). + */ + case FIX_GC_TSS_GDTE_DW2: + { + RTGCPTR GCPtr = GCPtrGDT + (SelTSS & ~7) + 4; + *uSrc.pu32 = (uint32_t)GCPtr; + break; + } + + /* + * Store the EFER or mask for the 32->64 bit switcher. + */ + case FIX_EFER_OR_MASK: + { + uint32_t u32OrMask = MSR_K6_EFER_LME | MSR_K6_EFER_SCE; + /* + * We don't care if cpuid 0x8000001 isn't supported as that implies + * long mode isn't supported either, so this switched would never be used. + */ + if (!!(ASMCpuId_EDX(0x80000001) & X86_CPUID_EXT_FEATURE_EDX_NX)) + u32OrMask |= MSR_K6_EFER_NXE; + + *uSrc.pu32 = u32OrMask; + break; + } + +#if 0 /* Reusable for XSAVE. */ + /* + * Insert relative jump to specified target it FXSAVE/FXRSTOR isn't supported by the cpu. + */ + case FIX_NO_FXSAVE_JMP: + { + uint32_t offTrg = *u.pu32++; + Assert(offTrg < pSwitcher->cbCode); + if (!CPUMSupportsXSave(pVM)) + { + *uSrc.pu8++ = 0xe9; /* jmp rel32 */ + *uSrc.pu32++ = offTrg - (offSrc + 5); + } + else + { + *uSrc.pu8++ = *((uint8_t *)pSwitcher->pvCode + offSrc); + *uSrc.pu32++ = *(uint32_t *)((uint8_t *)pSwitcher->pvCode + offSrc + 1); + } + break; + } +#endif + + /* + * Insert relative jump to specified target it SYSENTER isn't used by the host. + */ + case FIX_NO_SYSENTER_JMP: + { + uint32_t offTrg = *u.pu32++; + Assert(offTrg < pSwitcher->cbCode); + if (!CPUMIsHostUsingSysEnter(pVM)) + { + *uSrc.pu8++ = 0xe9; /* jmp rel32 */ + *uSrc.pu32++ = offTrg - (offSrc + 5); + } + else + { + *uSrc.pu8++ = *((uint8_t *)pSwitcher->pvCode + offSrc); + *uSrc.pu32++ = *(uint32_t *)((uint8_t *)pSwitcher->pvCode + offSrc + 1); + } + break; + } + + /* + * Insert relative jump to specified target it SYSCALL isn't used by the host. + */ + case FIX_NO_SYSCALL_JMP: + { + uint32_t offTrg = *u.pu32++; + Assert(offTrg < pSwitcher->cbCode); + if (!CPUMIsHostUsingSysCall(pVM)) + { + *uSrc.pu8++ = 0xe9; /* jmp rel32 */ + *uSrc.pu32++ = offTrg - (offSrc + 5); + } + else + { + *uSrc.pu8++ = *((uint8_t *)pSwitcher->pvCode + offSrc); + *uSrc.pu32++ = *(uint32_t *)((uint8_t *)pSwitcher->pvCode + offSrc + 1); + } + break; + } + + /* + * 32-bit HC pointer fixup to (HC) target within the code (32-bit offset). + */ + case FIX_HC_32BIT: + { + uint32_t offTrg = *u.pu32++; + Assert(offSrc < pSwitcher->cbCode); + Assert(offTrg - pSwitcher->offHCCode0 < pSwitcher->cbHCCode0 || offTrg - pSwitcher->offHCCode1 < pSwitcher->cbHCCode1); + *uSrc.pu32 = R0PtrCode + offTrg; + break; + } + +# if defined(RT_ARCH_AMD64) + /* + * 64-bit HC Code Selector (no argument). + */ + case FIX_HC_64BIT_CS: + { + Assert(offSrc < pSwitcher->cbCode); + AssertFatalMsgFailed(("FIX_HC_64BIT_CS not implemented for this host\n")); + break; + } + + /* + * 64-bit HC pointer to the CPUM instance data (no argument). + */ + case FIX_HC_64BIT_CPUM: + { + Assert(offSrc < pSwitcher->cbCode); + *uSrc.pu64 = pVM->pVMR0 + RT_UOFFSETOF(VM, cpum); + break; + } +# endif + /* + * 64-bit HC pointer fixup to (HC) target within the code (32-bit offset). + */ + case FIX_HC_64BIT: + { + uint32_t offTrg = *u.pu32++; + Assert(offSrc < pSwitcher->cbCode); + Assert(offTrg - pSwitcher->offHCCode0 < pSwitcher->cbHCCode0 || offTrg - pSwitcher->offHCCode1 < pSwitcher->cbHCCode1); + *uSrc.pu64 = R0PtrCode + offTrg; + break; + } + +# ifdef RT_ARCH_X86 + case FIX_GC_64_BIT_CPUM_OFF: + { + uint32_t offCPUM = *u.pu32++; + Assert(offCPUM < sizeof(pVM->cpum)); + *uSrc.pu64 = (uint32_t)(VM_RC_ADDR(pVM, &pVM->cpum) + offCPUM); + break; + } +# endif + + /* + * 32-bit ID pointer to (ID) target within the code (32-bit offset). + */ + case FIX_ID_32BIT: + { + uint32_t offTrg = *u.pu32++; + Assert(offSrc < pSwitcher->cbCode); + Assert(offTrg - pSwitcher->offIDCode0 < pSwitcher->cbIDCode0 || offTrg - pSwitcher->offIDCode1 < pSwitcher->cbIDCode1); + *uSrc.pu32 = u32IDCode + offTrg; + break; + } + + /* + * 64-bit ID pointer to (ID) target within the code (32-bit offset). + */ + case FIX_ID_64BIT: + case FIX_HC_64BIT_NOCHECK: + { + uint32_t offTrg = *u.pu32++; + Assert(offSrc < pSwitcher->cbCode); + Assert(u8 == FIX_HC_64BIT_NOCHECK || offTrg - pSwitcher->offIDCode0 < pSwitcher->cbIDCode0 || offTrg - pSwitcher->offIDCode1 < pSwitcher->cbIDCode1); + *uSrc.pu64 = u32IDCode + offTrg; + break; + } + + /* + * Far 16:32 ID pointer to 64-bit mode (ID) target within the code (32-bit offset). + */ + case FIX_ID_FAR32_TO_64BIT_MODE: + { + uint32_t offTrg = *u.pu32++; + Assert(offSrc < pSwitcher->cbCode); + Assert(offTrg - pSwitcher->offIDCode0 < pSwitcher->cbIDCode0 || offTrg - pSwitcher->offIDCode1 < pSwitcher->cbIDCode1); + *uSrc.pu32++ = u32IDCode + offTrg; + *uSrc.pu16 = SelCS64; + AssertRelease(SelCS64); + break; + } + +# ifdef VBOX_WITH_NMI + /* + * 32-bit address to the APIC base. + */ + case FIX_GC_APIC_BASE_32BIT: + { + *uSrc.pu32 = pVM->vmm.s.GCPtrApicBase; + break; + } +# endif + + default: + AssertReleaseMsgFailed(("Unknown fixup %d in switcher %s\n", u8, pSwitcher->pszDesc)); + break; + } + } + +# ifdef LOG_ENABLED + /* + * If Log2 is enabled disassemble the switcher code. + * + * The switcher code have 1-2 HC parts, 1 GC part and 0-2 ID parts. + */ + if (LogIs2Enabled()) + { + RTLogPrintf("*** Disassembly of switcher %d '%s' %#x bytes ***\n" + " R0PtrCode = %p\n" + " pu8CodeR3 = %p\n" + " GCPtrCode = %RGv\n" + " u32IDCode = %08x\n" + " pVMRC = %RRv\n" + " pCPUMRC = %RRv\n" + " pVMR3 = %p\n" + " pCPUMR3 = %p\n" + " GCPtrGDT = %RGv\n" + " InterCR3s = %08RHp, %08RHp, %08RHp (32-Bit, PAE, AMD64)\n" + " HyperCR3s = %08RHp (32-Bit, PAE & AMD64)\n" + " SelCS = %04x\n" + " SelDS = %04x\n" + " SelCS64 = %04x\n" + " SelTSS = %04x\n", + pSwitcher->enmType, pSwitcher->pszDesc, pSwitcher->cbCode, + R0PtrCode, + pu8CodeR3, + GCPtrCode, + u32IDCode, + VM_RC_ADDR(pVM, pVM), + VM_RC_ADDR(pVM, &pVM->cpum), + pVM, + &pVM->cpum, + GCPtrGDT, + PGMGetInter32BitCR3(pVM), PGMGetInterPaeCR3(pVM), PGMGetInterAmd64CR3(pVM), + PGMGetHyperCR3(VMMGetCpu(pVM)), + SelCS, SelDS, SelCS64, SelTSS); + + uint32_t offCode = 0; + while (offCode < pSwitcher->cbCode) + { + /* + * Figure out where this is. + */ + const char *pszDesc = NULL; + RTUINTPTR uBase; + uint32_t cbCode; + if (offCode - pSwitcher->offHCCode0 < pSwitcher->cbHCCode0) + { + pszDesc = "HCCode0"; + uBase = R0PtrCode; + offCode = pSwitcher->offHCCode0; + cbCode = pSwitcher->cbHCCode0; + } + else if (offCode - pSwitcher->offHCCode1 < pSwitcher->cbHCCode1) + { + pszDesc = "HCCode1"; + uBase = R0PtrCode; + offCode = pSwitcher->offHCCode1; + cbCode = pSwitcher->cbHCCode1; + } + else if (offCode - pSwitcher->offGCCode < pSwitcher->cbGCCode) + { + pszDesc = "GCCode"; + uBase = GCPtrCode; + offCode = pSwitcher->offGCCode; + cbCode = pSwitcher->cbGCCode; + } + else if (offCode - pSwitcher->offIDCode0 < pSwitcher->cbIDCode0) + { + pszDesc = "IDCode0"; + uBase = u32IDCode; + offCode = pSwitcher->offIDCode0; + cbCode = pSwitcher->cbIDCode0; + } + else if (offCode - pSwitcher->offIDCode1 < pSwitcher->cbIDCode1) + { + pszDesc = "IDCode1"; + uBase = u32IDCode; + offCode = pSwitcher->offIDCode1; + cbCode = pSwitcher->cbIDCode1; + } + else + { + RTLogPrintf(" %04x: %02x '%c' (nowhere)\n", + offCode, pu8CodeR3[offCode], RT_C_IS_PRINT(pu8CodeR3[offCode]) ? pu8CodeR3[offCode] : ' '); + offCode++; + continue; + } + + /* + * Disassemble it. + */ + RTLogPrintf(" %s: offCode=%#x cbCode=%#x\n", pszDesc, offCode, cbCode); + + while (cbCode > 0) + { + /* try label it */ + if (pSwitcher->offR0ToRawMode == offCode) + RTLogPrintf(" *R0ToRawMode:\n"); + if (pSwitcher->offRCToHost == offCode) + RTLogPrintf(" *RCToHost:\n"); + if (pSwitcher->offRCCallTrampoline == offCode) + RTLogPrintf(" *RCCallTrampoline:\n"); + if (pSwitcher->offRCToHostAsm == offCode) + RTLogPrintf(" *RCToHostAsm:\n"); + if (pSwitcher->offRCToHostAsmNoReturn == offCode) + RTLogPrintf(" *RCToHostAsmNoReturn:\n"); + + /* disas */ + uint32_t cbInstr = 0; + DISCPUSTATE Cpu; + char szDisas[256]; + int rc = DISInstr(pu8CodeR3 + offCode, DISCPUMODE_32BIT, &Cpu, &cbInstr); + if (RT_SUCCESS(rc)) + { + Cpu.uInstrAddr += uBase - (uintptr_t)pu8CodeR3; + DISFormatYasmEx(&Cpu, szDisas, sizeof(szDisas), + DIS_FMT_FLAGS_ADDR_LEFT | DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_BYTES_SPACED + | DIS_FMT_FLAGS_RELATIVE_BRANCH, + NULL, NULL); + } + if (RT_SUCCESS(rc)) + RTLogPrintf(" %04x: %s\n", offCode, szDisas); + else + { + RTLogPrintf(" %04x: %02x '%c' (rc=%Rrc\n", + offCode, pu8CodeR3[offCode], RT_C_IS_PRINT(pu8CodeR3[offCode]) ? pu8CodeR3[offCode] : ' ', rc); + cbInstr = 1; + } + offCode += cbInstr; + cbCode -= RT_MIN(cbInstr, cbCode); + } + } + } +# endif +} + +/** + * Wrapper around SELMGetHyperGDT() that avoids calling it when raw-mode context + * is not initialized. + * + * @returns Raw-mode contet GDT address. Null pointer if not applicable. + * @param pVM The cross context VM structure. + */ +static RTRCPTR vmmR3SwitcherGetHyperGDT(PVM pVM) +{ + if (VM_IS_RAW_MODE_ENABLED(pVM) || HMIsRawModeCtxNeeded(pVM)) + return SELMGetHyperGDT(pVM); +# if HC_ARCH_BITS != 32 + AssertFailed(); /* This path is only applicable to some 32-bit hosts. */ +# endif + return NIL_RTRCPTR; +} + +/** + * Relocator for the 32-Bit to 32-Bit world switcher. + */ +DECLCALLBACK(void) vmmR3Switcher32BitTo32Bit_Relocate(PVM pVM, PVMMSWITCHERDEF pSwitcher, RTR0PTR R0PtrCode, uint8_t *pu8CodeR3, RTGCPTR GCPtrCode, uint32_t u32IDCode) +{ + vmmR3SwitcherGenericRelocate(pVM, pSwitcher, R0PtrCode, pu8CodeR3, GCPtrCode, u32IDCode, + SELMGetHyperCS(pVM), SELMGetHyperDS(pVM), SELMGetHyperTSS(pVM), SELMGetHyperGDT(pVM), 0); +} + + +/** + * Relocator for the 32-Bit to PAE world switcher. + */ +DECLCALLBACK(void) vmmR3Switcher32BitToPAE_Relocate(PVM pVM, PVMMSWITCHERDEF pSwitcher, RTR0PTR R0PtrCode, uint8_t *pu8CodeR3, RTGCPTR GCPtrCode, uint32_t u32IDCode) +{ + vmmR3SwitcherGenericRelocate(pVM, pSwitcher, R0PtrCode, pu8CodeR3, GCPtrCode, u32IDCode, + SELMGetHyperCS(pVM), SELMGetHyperDS(pVM), SELMGetHyperTSS(pVM), SELMGetHyperGDT(pVM), 0); +} + + +/** + * Relocator for the 32-Bit to AMD64 world switcher. + */ +DECLCALLBACK(void) vmmR3Switcher32BitToAMD64_Relocate(PVM pVM, PVMMSWITCHERDEF pSwitcher, RTR0PTR R0PtrCode, uint8_t *pu8CodeR3, RTGCPTR GCPtrCode, uint32_t u32IDCode) +{ + vmmR3SwitcherGenericRelocate(pVM, pSwitcher, R0PtrCode, pu8CodeR3, GCPtrCode, u32IDCode, + SELMGetHyperCS(pVM), SELMGetHyperDS(pVM), SELMGetHyperTSS(pVM), vmmR3SwitcherGetHyperGDT(pVM), SELMGetHyperCS64(pVM)); +} + + +/** + * Relocator for the PAE to 32-Bit world switcher. + */ +DECLCALLBACK(void) vmmR3SwitcherPAETo32Bit_Relocate(PVM pVM, PVMMSWITCHERDEF pSwitcher, RTR0PTR R0PtrCode, uint8_t *pu8CodeR3, RTGCPTR GCPtrCode, uint32_t u32IDCode) +{ + vmmR3SwitcherGenericRelocate(pVM, pSwitcher, R0PtrCode, pu8CodeR3, GCPtrCode, u32IDCode, + SELMGetHyperCS(pVM), SELMGetHyperDS(pVM), SELMGetHyperTSS(pVM), SELMGetHyperGDT(pVM), 0); +} + + +/** + * Relocator for the PAE to PAE world switcher. + */ +DECLCALLBACK(void) vmmR3SwitcherPAEToPAE_Relocate(PVM pVM, PVMMSWITCHERDEF pSwitcher, RTR0PTR R0PtrCode, uint8_t *pu8CodeR3, RTGCPTR GCPtrCode, uint32_t u32IDCode) +{ + vmmR3SwitcherGenericRelocate(pVM, pSwitcher, R0PtrCode, pu8CodeR3, GCPtrCode, u32IDCode, + SELMGetHyperCS(pVM), SELMGetHyperDS(pVM), SELMGetHyperTSS(pVM), SELMGetHyperGDT(pVM), 0); +} + +/** + * Relocator for the PAE to AMD64 world switcher. + */ +DECLCALLBACK(void) vmmR3SwitcherPAEToAMD64_Relocate(PVM pVM, PVMMSWITCHERDEF pSwitcher, RTR0PTR R0PtrCode, uint8_t *pu8CodeR3, RTGCPTR GCPtrCode, uint32_t u32IDCode) +{ + vmmR3SwitcherGenericRelocate(pVM, pSwitcher, R0PtrCode, pu8CodeR3, GCPtrCode, u32IDCode, + SELMGetHyperCS(pVM), SELMGetHyperDS(pVM), SELMGetHyperTSS(pVM), vmmR3SwitcherGetHyperGDT(pVM), SELMGetHyperCS64(pVM)); +} + + +/** + * Relocator for the AMD64 to 32-bit world switcher. + */ +DECLCALLBACK(void) vmmR3SwitcherAMD64To32Bit_Relocate(PVM pVM, PVMMSWITCHERDEF pSwitcher, RTR0PTR R0PtrCode, uint8_t *pu8CodeR3, RTGCPTR GCPtrCode, uint32_t u32IDCode) +{ + vmmR3SwitcherGenericRelocate(pVM, pSwitcher, R0PtrCode, pu8CodeR3, GCPtrCode, u32IDCode, + SELMGetHyperCS(pVM), SELMGetHyperDS(pVM), SELMGetHyperTSS(pVM), SELMGetHyperGDT(pVM), SELMGetHyperCS64(pVM)); +} + + +/** + * Relocator for the AMD64 to PAE world switcher. + */ +DECLCALLBACK(void) vmmR3SwitcherAMD64ToPAE_Relocate(PVM pVM, PVMMSWITCHERDEF pSwitcher, RTR0PTR R0PtrCode, uint8_t *pu8CodeR3, RTGCPTR GCPtrCode, uint32_t u32IDCode) +{ + vmmR3SwitcherGenericRelocate(pVM, pSwitcher, R0PtrCode, pu8CodeR3, GCPtrCode, u32IDCode, + SELMGetHyperCS(pVM), SELMGetHyperDS(pVM), SELMGetHyperTSS(pVM), SELMGetHyperGDT(pVM), SELMGetHyperCS64(pVM)); +} + + +/** + * Selects the switcher to be used for switching to raw-mode context. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param enmSwitcher The new switcher. + * @remark This function may be called before the VMM is initialized. + */ +VMMR3_INT_DECL(int) VMMR3SelectSwitcher(PVM pVM, VMMSWITCHER enmSwitcher) +{ + /* + * Validate input. + */ + if ( enmSwitcher < VMMSWITCHER_INVALID + || enmSwitcher >= VMMSWITCHER_MAX) + { + AssertMsgFailed(("Invalid input enmSwitcher=%d\n", enmSwitcher)); + return VERR_INVALID_PARAMETER; + } + + /* + * Override it if HM is active. + */ + if (!VM_IS_RAW_MODE_ENABLED(pVM)) + pVM->vmm.s.enmSwitcher = HC_ARCH_BITS == 64 ? VMMSWITCHER_AMD64_STUB : VMMSWITCHER_X86_STUB; + + /* + * Select the new switcher. + */ + const PVMMSWITCHERDEF *papSwitchers = VM_IS_RAW_MODE_ENABLED(pVM) ? g_apRawModeSwitchers : g_apHmSwitchers; + PVMMSWITCHERDEF pSwitcher = papSwitchers[enmSwitcher]; + if (pSwitcher) + { + Log(("VMMR3SelectSwitcher: enmSwitcher %d -> %d %s\n", pVM->vmm.s.enmSwitcher, enmSwitcher, pSwitcher->pszDesc)); + pVM->vmm.s.enmSwitcher = enmSwitcher; + + RTR0PTR pbCodeR0 = (RTR0PTR)pVM->vmm.s.pvCoreCodeR0 + pVM->vmm.s.aoffSwitchers[enmSwitcher]; /** @todo fix the pvCoreCodeR0 type */ + pVM->vmm.s.pfnR0ToRawMode = pbCodeR0 + pSwitcher->offR0ToRawMode; + + RTRCPTR RCPtr = pVM->vmm.s.pvCoreCodeRC + pVM->vmm.s.aoffSwitchers[enmSwitcher]; + pVM->vmm.s.pfnRCToHost = RCPtr + pSwitcher->offRCToHost; + pVM->vmm.s.pfnCallTrampolineRC = RCPtr + pSwitcher->offRCCallTrampoline; + pVM->pfnVMMRCToHostAsm = RCPtr + pSwitcher->offRCToHostAsm; + pVM->pfnVMMRCToHostAsmNoReturn = RCPtr + pSwitcher->offRCToHostAsmNoReturn; + return VINF_SUCCESS; + } + + return VERR_NOT_IMPLEMENTED; +} + +#endif /* #defined(VBOX_WITH_RAW_MODE) || (HC_ARCH_BITS != 64) */ + + +#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) +/** + * Gets the switcher to be used for switching to GC. + * + * This is for 64-on-32 with HM. Caller is HMR3Relocate(). + * + * @returns host to guest ring 0 switcher entrypoint + * @param pVM The cross context VM structure. + * @param enmSwitcher The new switcher. + */ +VMMR3_INT_DECL(RTR0PTR) VMMR3GetHostToGuestSwitcher(PVM pVM, VMMSWITCHER enmSwitcher) +{ + /* + * Validate input. + */ + AssertMsgReturn( enmSwitcher == VMMSWITCHER_32_TO_AMD64 + || enmSwitcher == VMMSWITCHER_PAE_TO_AMD64, + ("%d\n", enmSwitcher), + NIL_RTR0PTR); + AssertReturn(HMIsEnabled(pVM), NIL_RTR0PTR); + + /* + * Select the new switcher. + */ + const PVMMSWITCHERDEF *papSwitchers = g_apHmSwitchers; + PVMMSWITCHERDEF pSwitcher = papSwitchers[enmSwitcher]; + if (pSwitcher) + { + /** @todo fix the pvCoreCodeR0 type */ + RTR0PTR pbCodeR0 = (RTR0PTR)pVM->vmm.s.pvCoreCodeR0 + pVM->vmm.s.aoffSwitchers[enmSwitcher]; + return pbCodeR0 + pSwitcher->offR0ToRawMode; + } + return NIL_RTR0PTR; +} +#endif + diff --git a/src/VBox/VMM/VMMR3/VMMTests.cpp b/src/VBox/VMM/VMMR3/VMMTests.cpp new file mode 100644 index 00000000..38d1f150 --- /dev/null +++ b/src/VBox/VMM/VMMR3/VMMTests.cpp @@ -0,0 +1,960 @@ +/* $Id: VMMTests.cpp $ */ +/** @file + * VMM - The Virtual Machine Monitor Core, Tests. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +//#define NO_SUPCALLR0VMM + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_VMM +#include /* for SUPGetCpuHzFromGIP */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "VMMInternal.h" +#include +#include +#include + +#include +#include +#include +#include +#include +#include + + +#ifdef VBOX_WITH_RAW_MODE + +static void vmmR3TestClearStack(PVMCPU pVCpu) +{ + /* We leave the first 64 bytes of the stack alone because of strict + ring-0 long jump code uses it. */ + memset(pVCpu->vmm.s.pbEMTStackR3 + 64, 0xaa, VMM_STACK_SIZE - 64); +} + + +static int vmmR3ReportMsrRange(PVM pVM, uint32_t uMsr, uint64_t cMsrs, PRTSTREAM pReportStrm, uint32_t *pcMsrsFound) +{ + /* + * Preps. + */ + RTRCPTR RCPtrEP; + int rc = PDMR3LdrGetSymbolRC(pVM, VMMRC_MAIN_MODULE_NAME, "VMMRCTestReadMsrs", &RCPtrEP); + AssertMsgRCReturn(rc, ("Failed to resolved VMMRC.rc::VMMRCEntry(), rc=%Rrc\n", rc), rc); + + uint32_t const cMsrsPerCall = 16384; + uint32_t cbResults = cMsrsPerCall * sizeof(VMMTESTMSRENTRY); + PVMMTESTMSRENTRY paResults; + rc = MMHyperAlloc(pVM, cbResults, 0, MM_TAG_VMM, (void **)&paResults); + AssertMsgRCReturn(rc, ("Error allocating %#x bytes off the hyper heap: %Rrc\n", cbResults, rc), rc); + /* + * The loop. + */ + RTRCPTR RCPtrResults = MMHyperR3ToRC(pVM, paResults); + uint32_t cMsrsFound = 0; + uint32_t uLastMsr = uMsr; + uint64_t uNsTsStart = RTTimeNanoTS(); + + for (;;) + { + if ( pReportStrm + && uMsr - uLastMsr > _64K + && (uMsr & (_4M - 1)) == 0) + { + if (uMsr - uLastMsr < 16U*_1M) + RTStrmFlush(pReportStrm); + RTPrintf("... %#010x [%u ns/msr] ...\n", uMsr, (RTTimeNanoTS() - uNsTsStart) / uMsr); + } + + /*RT_BZERO(paResults, cbResults);*/ + uint32_t const cBatch = RT_MIN(cMsrsPerCall, cMsrs); + rc = VMMR3CallRC(pVM, RCPtrEP, 4, pVM->pVMRC, uMsr, cBatch, RCPtrResults); + if (RT_FAILURE(rc)) + { + RTPrintf("VMM: VMMR3CallRC failed rc=%Rrc, uMsr=%#x\n", rc, uMsr); + break; + } + + for (uint32_t i = 0; i < cBatch; i++) + if (paResults[i].uMsr != UINT64_MAX) + { + if (paResults[i].uValue == 0) + { + if (pReportStrm) + RTStrmPrintf(pReportStrm, + " MVO(%#010llx, \"MSR\", UINT64_C(%#018llx)),\n", paResults[i].uMsr, paResults[i].uValue); + RTPrintf("%#010llx = 0\n", paResults[i].uMsr); + } + else + { + if (pReportStrm) + RTStrmPrintf(pReportStrm, + " MVO(%#010llx, \"MSR\", UINT64_C(%#018llx)),\n", paResults[i].uMsr, paResults[i].uValue); + RTPrintf("%#010llx = %#010x`%08x\n", paResults[i].uMsr, + RT_HI_U32(paResults[i].uValue), RT_LO_U32(paResults[i].uValue)); + } + cMsrsFound++; + uLastMsr = paResults[i].uMsr; + } + + /* Advance. */ + if (cMsrs <= cMsrsPerCall) + break; + cMsrs -= cMsrsPerCall; + uMsr += cMsrsPerCall; + } + + *pcMsrsFound += cMsrsFound; + MMHyperFree(pVM, paResults); + return rc; +} + + +/** + * Produces a quick report of MSRs. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pReportStrm Pointer to the report output stream. Optional. + * @param fWithCpuId Whether CPUID should be included. + */ +static int vmmR3DoMsrQuickReport(PVM pVM, PRTSTREAM pReportStrm, bool fWithCpuId) +{ + uint64_t uTsStart = RTTimeNanoTS(); + RTPrintf("=== MSR Quick Report Start ===\n"); + RTStrmFlush(g_pStdOut); + if (fWithCpuId) + { + DBGFR3InfoStdErr(pVM->pUVM, "cpuid", "verbose"); + RTPrintf("\n"); + } + if (pReportStrm) + RTStrmPrintf(pReportStrm, "\n\n{\n"); + + static struct { uint32_t uFirst, cMsrs; } const s_aRanges[] = + { + { 0x00000000, 0x00042000 }, + { 0x10000000, 0x00001000 }, + { 0x20000000, 0x00001000 }, + { 0x40000000, 0x00012000 }, + { 0x80000000, 0x00012000 }, +// Need 0xc0000000..0xc001106f (at least), but trouble on solaris w/ 10h and 0fh family cpus: +// { 0xc0000000, 0x00022000 }, + { 0xc0000000, 0x00010000 }, + { 0xc0010000, 0x00001040 }, + { 0xc0011040, 0x00004040 }, /* should cause trouble... */ + }; + uint32_t cMsrsFound = 0; + int rc = VINF_SUCCESS; + for (unsigned i = 0; i < RT_ELEMENTS(s_aRanges) && RT_SUCCESS(rc); i++) + { +//if (i >= 3) +//{ +//RTStrmFlush(g_pStdOut); +//RTThreadSleep(40); +//} + rc = vmmR3ReportMsrRange(pVM, s_aRanges[i].uFirst, s_aRanges[i].cMsrs, pReportStrm, &cMsrsFound); + } + + if (pReportStrm) + RTStrmPrintf(pReportStrm, "}; /* %u (%#x) MSRs; rc=%Rrc */\n", cMsrsFound, cMsrsFound, rc); + RTPrintf("Total %u (%#x) MSRs\n", cMsrsFound, cMsrsFound); + RTPrintf("=== MSR Quick Report End (rc=%Rrc, %'llu ns) ===\n", rc, RTTimeNanoTS() - uTsStart); + return rc; +} + + +/** + * Performs a testcase. + * + * @returns return value from the test. + * @param pVM The cross context VM structure. + * @param enmTestcase The testcase operation to perform. + * @param uVariation The testcase variation id. + */ +static int vmmR3DoGCTest(PVM pVM, VMMRCOPERATION enmTestcase, unsigned uVariation) +{ + PVMCPU pVCpu = &pVM->aCpus[0]; + + RTRCPTR RCPtrEP; + int rc = PDMR3LdrGetSymbolRC(pVM, VMMRC_MAIN_MODULE_NAME, "VMMRCEntry", &RCPtrEP); + if (RT_FAILURE(rc)) + return rc; + + Log(("vmmR3DoGCTest: %d %#x\n", enmTestcase, uVariation)); + CPUMSetHyperState(pVCpu, pVM->vmm.s.pfnCallTrampolineRC, pVCpu->vmm.s.pbEMTStackBottomRC, 0, 0); + vmmR3TestClearStack(pVCpu); + CPUMPushHyper(pVCpu, uVariation); + CPUMPushHyper(pVCpu, enmTestcase); + CPUMPushHyper(pVCpu, pVM->pVMRC); + CPUMPushHyper(pVCpu, 3 * sizeof(RTRCPTR)); /* stack frame size */ + CPUMPushHyper(pVCpu, RCPtrEP); /* what to call */ + Assert(CPUMGetHyperCR3(pVCpu) && CPUMGetHyperCR3(pVCpu) == PGMGetHyperCR3(pVCpu)); + rc = SUPR3CallVMMR0Fast(pVM->pVMR0, VMMR0_DO_RAW_RUN, 0); + +# if 1 + /* flush the raw-mode logs. */ +# ifdef LOG_ENABLED + PRTLOGGERRC pLogger = pVM->vmm.s.pRCLoggerR3; + if ( pLogger + && pLogger->offScratch > 0) + RTLogFlushRC(NULL, pLogger); +# endif +# ifdef VBOX_WITH_RC_RELEASE_LOGGING + PRTLOGGERRC pRelLogger = pVM->vmm.s.pRCRelLoggerR3; + if (RT_UNLIKELY(pRelLogger && pRelLogger->offScratch > 0)) + RTLogFlushRC(RTLogRelGetDefaultInstance(), pRelLogger); +# endif +# endif + + Log(("vmmR3DoGCTest: rc=%Rrc iLastGZRc=%Rrc\n", rc, pVCpu->vmm.s.iLastGZRc)); + if (RT_LIKELY(rc == VINF_SUCCESS)) + rc = pVCpu->vmm.s.iLastGZRc; + return rc; +} + + +/** + * Performs a trap test. + * + * @returns Return value from the trap test. + * @param pVM The cross context VM structure. + * @param u8Trap The trap number to test. + * @param uVariation The testcase variation. + * @param rcExpect The expected result. + * @param u32Eax The expected eax value. + * @param pszFaultEIP The fault address. Pass NULL if this isn't available or doesn't apply. + * @param pszDesc The test description. + */ +static int vmmR3DoTrapTest(PVM pVM, uint8_t u8Trap, unsigned uVariation, int rcExpect, uint32_t u32Eax, const char *pszFaultEIP, const char *pszDesc) +{ + PVMCPU pVCpu = &pVM->aCpus[0]; + + RTPrintf("VMM: testing 0%x / %d - %s\n", u8Trap, uVariation, pszDesc); + + RTRCPTR RCPtrEP; + int rc = PDMR3LdrGetSymbolRC(pVM, VMMRC_MAIN_MODULE_NAME, "VMMRCEntry", &RCPtrEP); + if (RT_FAILURE(rc)) + return rc; + + CPUMSetHyperState(pVCpu, pVM->vmm.s.pfnCallTrampolineRC, pVCpu->vmm.s.pbEMTStackBottomRC, 0, 0); + vmmR3TestClearStack(pVCpu); + CPUMPushHyper(pVCpu, uVariation); + CPUMPushHyper(pVCpu, u8Trap + VMMRC_DO_TESTCASE_TRAP_FIRST); + CPUMPushHyper(pVCpu, pVM->pVMRC); + CPUMPushHyper(pVCpu, 3 * sizeof(RTRCPTR)); /* stack frame size */ + CPUMPushHyper(pVCpu, RCPtrEP); /* what to call */ + Assert(CPUMGetHyperCR3(pVCpu) && CPUMGetHyperCR3(pVCpu) == PGMGetHyperCR3(pVCpu)); + rc = SUPR3CallVMMR0Fast(pVM->pVMR0, VMMR0_DO_RAW_RUN, 0); + if (RT_LIKELY(rc == VINF_SUCCESS)) + rc = pVCpu->vmm.s.iLastGZRc; + bool fDump = false; + if (rc != rcExpect) + { + RTPrintf("VMM: FAILURE - rc=%Rrc expected %Rrc\n", rc, rcExpect); + if (rc != VERR_NOT_IMPLEMENTED) + fDump = true; + } + else if ( rcExpect != VINF_SUCCESS + && u8Trap != 8 /* double fault doesn't dare set TrapNo. */ + && u8Trap != 3 /* guest only, we're not in guest. */ + && u8Trap != 1 /* guest only, we're not in guest. */ + && u8Trap != TRPMGetTrapNo(pVCpu)) + { + RTPrintf("VMM: FAILURE - Trap %#x expected %#x\n", TRPMGetTrapNo(pVCpu), u8Trap); + fDump = true; + } + else if (pszFaultEIP) + { + RTRCPTR RCPtrFault; + int rc2 = PDMR3LdrGetSymbolRC(pVM, VMMRC_MAIN_MODULE_NAME, pszFaultEIP, &RCPtrFault); + if (RT_FAILURE(rc2)) + RTPrintf("VMM: FAILURE - Failed to resolve symbol '%s', %Rrc!\n", pszFaultEIP, rc); + else if (RCPtrFault != CPUMGetHyperEIP(pVCpu)) + { + RTPrintf("VMM: FAILURE - EIP=%08RX32 expected %RRv (%s)\n", CPUMGetHyperEIP(pVCpu), RCPtrFault, pszFaultEIP); + fDump = true; + } + } + else if (rcExpect != VINF_SUCCESS) + { + if (CPUMGetHyperSS(pVCpu) == SELMGetHyperDS(pVM)) + RTPrintf("VMM: FAILURE - ss=%x expected %x\n", CPUMGetHyperSS(pVCpu), SELMGetHyperDS(pVM)); + if (CPUMGetHyperES(pVCpu) == SELMGetHyperDS(pVM)) + RTPrintf("VMM: FAILURE - es=%x expected %x\n", CPUMGetHyperES(pVCpu), SELMGetHyperDS(pVM)); + if (CPUMGetHyperDS(pVCpu) == SELMGetHyperDS(pVM)) + RTPrintf("VMM: FAILURE - ds=%x expected %x\n", CPUMGetHyperDS(pVCpu), SELMGetHyperDS(pVM)); + if (CPUMGetHyperFS(pVCpu) == SELMGetHyperDS(pVM)) + RTPrintf("VMM: FAILURE - fs=%x expected %x\n", CPUMGetHyperFS(pVCpu), SELMGetHyperDS(pVM)); + if (CPUMGetHyperGS(pVCpu) == SELMGetHyperDS(pVM)) + RTPrintf("VMM: FAILURE - gs=%x expected %x\n", CPUMGetHyperGS(pVCpu), SELMGetHyperDS(pVM)); + if (CPUMGetHyperEDI(pVCpu) == 0x01234567) + RTPrintf("VMM: FAILURE - edi=%x expected %x\n", CPUMGetHyperEDI(pVCpu), 0x01234567); + if (CPUMGetHyperESI(pVCpu) == 0x42000042) + RTPrintf("VMM: FAILURE - esi=%x expected %x\n", CPUMGetHyperESI(pVCpu), 0x42000042); + if (CPUMGetHyperEBP(pVCpu) == 0xffeeddcc) + RTPrintf("VMM: FAILURE - ebp=%x expected %x\n", CPUMGetHyperEBP(pVCpu), 0xffeeddcc); + if (CPUMGetHyperEBX(pVCpu) == 0x89abcdef) + RTPrintf("VMM: FAILURE - ebx=%x expected %x\n", CPUMGetHyperEBX(pVCpu), 0x89abcdef); + if (CPUMGetHyperECX(pVCpu) == 0xffffaaaa) + RTPrintf("VMM: FAILURE - ecx=%x expected %x\n", CPUMGetHyperECX(pVCpu), 0xffffaaaa); + if (CPUMGetHyperEDX(pVCpu) == 0x77778888) + RTPrintf("VMM: FAILURE - edx=%x expected %x\n", CPUMGetHyperEDX(pVCpu), 0x77778888); + if (CPUMGetHyperEAX(pVCpu) == u32Eax) + RTPrintf("VMM: FAILURE - eax=%x expected %x\n", CPUMGetHyperEAX(pVCpu), u32Eax); + } + if (fDump) + VMMR3FatalDump(pVM, pVCpu, rc); + return rc; +} + +#endif /* VBOX_WITH_RAW_MODE */ + + +/* execute the switch. */ +VMMR3DECL(int) VMMDoTest(PVM pVM) +{ + int rc = VINF_SUCCESS; + +#ifdef VBOX_WITH_RAW_MODE + PVMCPU pVCpu = &pVM->aCpus[0]; + PUVM pUVM = pVM->pUVM; + +# ifdef NO_SUPCALLR0VMM + RTPrintf("NO_SUPCALLR0VMM\n"); + return rc; +# endif + + /* + * Setup stack for calling VMMRCEntry(). + */ + RTRCPTR RCPtrEP; + rc = PDMR3LdrGetSymbolRC(pVM, VMMRC_MAIN_MODULE_NAME, "VMMRCEntry", &RCPtrEP); + if (RT_SUCCESS(rc)) + { + RTPrintf("VMM: VMMRCEntry=%RRv\n", RCPtrEP); + + /* + * Test various crashes which we must be able to recover from. + */ + vmmR3DoTrapTest(pVM, 0x3, 0, VINF_EM_DBG_HYPER_ASSERTION, 0xf0f0f0f0, "vmmGCTestTrap3_FaultEIP", "int3"); + vmmR3DoTrapTest(pVM, 0x3, 1, VINF_EM_DBG_HYPER_ASSERTION, 0xf0f0f0f0, "vmmGCTestTrap3_FaultEIP", "int3 WP"); + +# if 0//defined(DEBUG_bird) /* guess most people would like to skip these since they write to com1. */ + vmmR3DoTrapTest(pVM, 0x8, 0, VERR_TRPM_PANIC, 0x00000000, "vmmGCTestTrap8_FaultEIP", "#DF [#PG]"); + SELMR3Relocate(pVM); /* this resets the busy flag of the Trap 08 TSS */ + bool f; + rc = CFGMR3QueryBool(CFGMR3GetRoot(pVM), "DoubleFault", &f); +# if !defined(DEBUG_bird) + if (RT_SUCCESS(rc) && f) +# endif + { + /* see triple fault warnings in SELM and VMMRC.cpp. */ + vmmR3DoTrapTest(pVM, 0x8, 1, VERR_TRPM_PANIC, 0x00000000, "vmmGCTestTrap8_FaultEIP", "#DF [#PG] WP"); + SELMR3Relocate(pVM); /* this resets the busy flag of the Trap 08 TSS */ + } +# endif + + vmmR3DoTrapTest(pVM, 0xd, 0, VERR_TRPM_DONT_PANIC, 0xf0f0f0f0, "vmmGCTestTrap0d_FaultEIP", "ltr #GP"); + /// @todo find a better \#GP case, on intel ltr will \#PF (busy update?) and not \#GP. + //vmmR3DoTrapTest(pVM, 0xd, 1, VERR_TRPM_DONT_PANIC, 0xf0f0f0f0, "vmmGCTestTrap0d_FaultEIP", "ltr #GP WP"); + + vmmR3DoTrapTest(pVM, 0xe, 0, VERR_TRPM_DONT_PANIC, 0x00000000, "vmmGCTestTrap0e_FaultEIP", "#PF (NULL)"); + vmmR3DoTrapTest(pVM, 0xe, 1, VERR_TRPM_DONT_PANIC, 0x00000000, "vmmGCTestTrap0e_FaultEIP", "#PF (NULL) WP"); + vmmR3DoTrapTest(pVM, 0xe, 2, VINF_SUCCESS, 0x00000000, NULL, "#PF w/Tmp Handler"); + /* This test is no longer relevant as fs and gs are loaded with NULL + selectors and we will always return to HC if a #GP occurs while + returning to guest code. + vmmR3DoTrapTest(pVM, 0xe, 4, VINF_SUCCESS, 0x00000000, NULL, "#PF w/Tmp Handler and bad fs"); + */ + + /* + * Set a debug register and perform a context switch. + */ + rc = vmmR3DoGCTest(pVM, VMMRC_DO_TESTCASE_NOP, 0); + if (rc != VINF_SUCCESS) + { + RTPrintf("VMM: Nop test failed, rc=%Rrc not VINF_SUCCESS\n", rc); + return RT_FAILURE(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS; + } + + /* a harmless breakpoint */ + RTPrintf("VMM: testing hardware bp at 0x10000 (not hit)\n"); + DBGFADDRESS Addr; + DBGFR3AddrFromFlat(pUVM, &Addr, 0x10000); + RTUINT iBp0; + rc = DBGFR3BpSetReg(pUVM, &Addr, 0, ~(uint64_t)0, X86_DR7_RW_EO, 1, &iBp0); + AssertReleaseRC(rc); + rc = vmmR3DoGCTest(pVM, VMMRC_DO_TESTCASE_NOP, 0); + if (rc != VINF_SUCCESS) + { + RTPrintf("VMM: DR0=0x10000 test failed with rc=%Rrc!\n", rc); + return RT_FAILURE(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS; + } + + /* a bad one at VMMRCEntry */ + RTPrintf("VMM: testing hardware bp at VMMRCEntry (hit)\n"); + DBGFR3AddrFromFlat(pUVM, &Addr, RCPtrEP); + RTUINT iBp1; + rc = DBGFR3BpSetReg(pUVM, &Addr, 0, ~(uint64_t)0, X86_DR7_RW_EO, 1, &iBp1); + AssertReleaseRC(rc); + rc = vmmR3DoGCTest(pVM, VMMRC_DO_TESTCASE_NOP, 0); + if (rc != VINF_EM_DBG_HYPER_BREAKPOINT) + { + RTPrintf("VMM: DR1=VMMRCEntry test failed with rc=%Rrc! expected VINF_EM_RAW_BREAKPOINT_HYPER\n", rc); + return RT_FAILURE(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS; + } + + /* resume the breakpoint */ + RTPrintf("VMM: resuming hyper after breakpoint\n"); + CPUMSetHyperEFlags(pVCpu, CPUMGetHyperEFlags(pVCpu) | X86_EFL_RF); + rc = VMMR3ResumeHyper(pVM, pVCpu); + if (rc != VINF_SUCCESS) + { + RTPrintf("VMM: failed to resume on hyper breakpoint, rc=%Rrc = KNOWN BUG\n", rc); /** @todo fix VMMR3ResumeHyper */ + return RT_FAILURE(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS; + } + + /* engage the breakpoint again and try single stepping. */ + RTPrintf("VMM: testing hardware bp at VMMRCEntry + stepping\n"); + rc = vmmR3DoGCTest(pVM, VMMRC_DO_TESTCASE_NOP, 0); + if (rc != VINF_EM_DBG_HYPER_BREAKPOINT) + { + RTPrintf("VMM: DR1=VMMRCEntry test failed with rc=%Rrc! expected VINF_EM_RAW_BREAKPOINT_HYPER\n", rc); + return RT_FAILURE(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS; + } + + RTGCUINTREG OldPc = CPUMGetHyperEIP(pVCpu); + RTPrintf("%RGr=>", OldPc); + unsigned i; + for (i = 0; i < 8; i++) + { + CPUMSetHyperEFlags(pVCpu, CPUMGetHyperEFlags(pVCpu) | X86_EFL_TF | X86_EFL_RF); + rc = VMMR3ResumeHyper(pVM, pVCpu); + if (rc != VINF_EM_DBG_HYPER_STEPPED) + { + RTPrintf("\nVMM: failed to step on hyper breakpoint, rc=%Rrc\n", rc); + return RT_FAILURE(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS; + } + RTGCUINTREG Pc = CPUMGetHyperEIP(pVCpu); + RTPrintf("%RGr=>", Pc); + if (Pc == OldPc) + { + RTPrintf("\nVMM: step failed, PC: %RGr -> %RGr\n", OldPc, Pc); + return VERR_GENERAL_FAILURE; + } + OldPc = Pc; + } + RTPrintf("ok\n"); + + /* done, clear it */ + if ( RT_FAILURE(DBGFR3BpClear(pUVM, iBp0)) + || RT_FAILURE(DBGFR3BpClear(pUVM, iBp1))) + { + RTPrintf("VMM: Failed to clear breakpoints!\n"); + return VERR_GENERAL_FAILURE; + } + rc = vmmR3DoGCTest(pVM, VMMRC_DO_TESTCASE_NOP, 0); + if (rc != VINF_SUCCESS) + { + RTPrintf("VMM: NOP failed, rc=%Rrc\n", rc); + return RT_FAILURE(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS; + } + + /* + * Interrupt masking. Failure may indiate NMI watchdog activity. + */ + RTPrintf("VMM: interrupt masking...\n"); RTStrmFlush(g_pStdOut); RTThreadSleep(250); + for (i = 0; i < 10000; i++) + { + uint64_t StartTick = ASMReadTSC(); + rc = vmmR3DoGCTest(pVM, VMMRC_DO_TESTCASE_INTERRUPT_MASKING, 0); + if (rc != VINF_SUCCESS) + { + RTPrintf("VMM: Interrupt masking failed: rc=%Rrc\n", rc); + return RT_FAILURE(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS; + } + uint64_t Ticks = ASMReadTSC() - StartTick; + if (Ticks < (SUPGetCpuHzFromGip(g_pSUPGlobalInfoPage) / 10000)) + RTPrintf("Warning: Ticks=%RU64 (< %RU64)\n", Ticks, SUPGetCpuHzFromGip(g_pSUPGlobalInfoPage) / 10000); + } + + /* + * Interrupt forwarding. + */ + CPUMSetHyperState(pVCpu, pVM->vmm.s.pfnCallTrampolineRC, pVCpu->vmm.s.pbEMTStackBottomRC, 0, 0); + CPUMPushHyper(pVCpu, 0); + CPUMPushHyper(pVCpu, VMMRC_DO_TESTCASE_HYPER_INTERRUPT); + CPUMPushHyper(pVCpu, pVM->pVMRC); + CPUMPushHyper(pVCpu, 3 * sizeof(RTRCPTR)); /* stack frame size */ + CPUMPushHyper(pVCpu, RCPtrEP); /* what to call */ + Log(("trampoline=%x\n", pVM->vmm.s.pfnCallTrampolineRC)); + + /* + * Switch and do da thing. + */ + RTPrintf("VMM: interrupt forwarding...\n"); RTStrmFlush(g_pStdOut); RTThreadSleep(250); + i = 0; + uint64_t tsBegin = RTTimeNanoTS(); + uint64_t TickStart = ASMReadTSC(); + Assert(CPUMGetHyperCR3(pVCpu) && CPUMGetHyperCR3(pVCpu) == PGMGetHyperCR3(pVCpu)); + do + { + rc = SUPR3CallVMMR0Fast(pVM->pVMR0, VMMR0_DO_RAW_RUN, 0); + if (RT_LIKELY(rc == VINF_SUCCESS)) + rc = pVCpu->vmm.s.iLastGZRc; + if (RT_FAILURE(rc)) + { + Log(("VMM: GC returned fatal %Rra in iteration %d\n", rc, i)); + VMMR3FatalDump(pVM, pVCpu, rc); + return rc; + } + i++; + if (!(i % 32)) + Log(("VMM: iteration %d, esi=%08x edi=%08x ebx=%08x\n", + i, CPUMGetHyperESI(pVCpu), CPUMGetHyperEDI(pVCpu), CPUMGetHyperEBX(pVCpu))); + } while (rc == VINF_EM_RAW_INTERRUPT_HYPER); + uint64_t TickEnd = ASMReadTSC(); + uint64_t tsEnd = RTTimeNanoTS(); + + uint64_t Elapsed = tsEnd - tsBegin; + uint64_t PerIteration = Elapsed / (uint64_t)i; + uint64_t cTicksElapsed = TickEnd - TickStart; + uint64_t cTicksPerIteration = cTicksElapsed / (uint64_t)i; + + RTPrintf("VMM: %8d interrupts in %11llu ns (%11llu ticks), %10llu ns/iteration (%11llu ticks)\n", + i, Elapsed, cTicksElapsed, PerIteration, cTicksPerIteration); + Log(("VMM: %8d interrupts in %11llu ns (%11llu ticks), %10llu ns/iteration (%11llu ticks)\n", + i, Elapsed, cTicksElapsed, PerIteration, cTicksPerIteration)); + + /* + * These forced actions are not necessary for the test and trigger breakpoints too. + */ + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TRPM_SYNC_IDT); + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_SELM_SYNC_TSS); + + /* + * Profile switching. + */ + RTPrintf("VMM: profiling switcher...\n"); + Log(("VMM: profiling switcher...\n")); + uint64_t TickMin = UINT64_MAX; + tsBegin = RTTimeNanoTS(); + TickStart = ASMReadTSC(); + Assert(CPUMGetHyperCR3(pVCpu) && CPUMGetHyperCR3(pVCpu) == PGMGetHyperCR3(pVCpu)); + for (i = 0; i < 1000000; i++) + { + CPUMSetHyperState(pVCpu, pVM->vmm.s.pfnCallTrampolineRC, pVCpu->vmm.s.pbEMTStackBottomRC, 0, 0); + CPUMPushHyper(pVCpu, 0); + CPUMPushHyper(pVCpu, VMMRC_DO_TESTCASE_NOP); + CPUMPushHyper(pVCpu, pVM->pVMRC); + CPUMPushHyper(pVCpu, 3 * sizeof(RTRCPTR)); /* stack frame size */ + CPUMPushHyper(pVCpu, RCPtrEP); /* what to call */ + + uint64_t TickThisStart = ASMReadTSC(); + rc = SUPR3CallVMMR0Fast(pVM->pVMR0, VMMR0_DO_RAW_RUN, 0); + if (RT_LIKELY(rc == VINF_SUCCESS)) + rc = pVCpu->vmm.s.iLastGZRc; + uint64_t TickThisElapsed = ASMReadTSC() - TickThisStart; + if (RT_FAILURE(rc)) + { + Log(("VMM: GC returned fatal %Rra in iteration %d\n", rc, i)); + VMMR3FatalDump(pVM, pVCpu, rc); + return rc; + } + if (TickThisElapsed < TickMin) + TickMin = TickThisElapsed; + } + TickEnd = ASMReadTSC(); + tsEnd = RTTimeNanoTS(); + + Elapsed = tsEnd - tsBegin; + PerIteration = Elapsed / (uint64_t)i; + cTicksElapsed = TickEnd - TickStart; + cTicksPerIteration = cTicksElapsed / (uint64_t)i; + + RTPrintf("VMM: %8d cycles in %11llu ns (%11lld ticks), %10llu ns/iteration (%11lld ticks) Min %11lld ticks\n", + i, Elapsed, cTicksElapsed, PerIteration, cTicksPerIteration, TickMin); + Log(("VMM: %8d cycles in %11llu ns (%11lld ticks), %10llu ns/iteration (%11lld ticks) Min %11lld ticks\n", + i, Elapsed, cTicksElapsed, PerIteration, cTicksPerIteration, TickMin)); + + rc = VINF_SUCCESS; + +# if 0 /* drop this for now as it causes trouble on AMDs (Opteron 2384 and possibly others). */ + /* + * A quick MSR report. + */ + vmmR3DoMsrQuickReport(pVM, NULL, true); +# endif + } + else + AssertMsgFailed(("Failed to resolved VMMRC.rc::VMMRCEntry(), rc=%Rrc\n", rc)); +#else /* !VBOX_WITH_RAW_MODE */ + RT_NOREF(pVM); +#endif /* !VBOX_WITH_RAW_MODE */ + return rc; +} + +#define SYNC_SEL(pHyperCtx, reg) \ + if (pHyperCtx->reg.Sel) \ + { \ + DBGFSELINFO selInfo; \ + int rc2 = SELMR3GetShadowSelectorInfo(pVM, pHyperCtx->reg.Sel, &selInfo); \ + AssertRC(rc2); \ + \ + pHyperCtx->reg.u64Base = selInfo.GCPtrBase; \ + pHyperCtx->reg.u32Limit = selInfo.cbLimit; \ + pHyperCtx->reg.Attr.n.u1Present = selInfo.u.Raw.Gen.u1Present; \ + pHyperCtx->reg.Attr.n.u1DefBig = selInfo.u.Raw.Gen.u1DefBig; \ + pHyperCtx->reg.Attr.n.u1Granularity = selInfo.u.Raw.Gen.u1Granularity; \ + pHyperCtx->reg.Attr.n.u4Type = selInfo.u.Raw.Gen.u4Type; \ + pHyperCtx->reg.Attr.n.u2Dpl = selInfo.u.Raw.Gen.u2Dpl; \ + pHyperCtx->reg.Attr.n.u1DescType = selInfo.u.Raw.Gen.u1DescType; \ + pHyperCtx->reg.Attr.n.u1Long = selInfo.u.Raw.Gen.u1Long; \ + } + +/* execute the switch. */ +VMMR3DECL(int) VMMDoHmTest(PVM pVM) +{ + uint32_t i; + int rc; + PCPUMCTX pHyperCtx, pGuestCtx; + RTGCPHYS CR3Phys = 0x0; /* fake address */ + PVMCPU pVCpu = &pVM->aCpus[0]; + + if (!HMIsEnabled(pVM)) + { + RTPrintf("VMM: Hardware accelerated test not available!\n"); + return VERR_ACCESS_DENIED; + } + +#ifdef VBOX_WITH_RAW_MODE + /* + * These forced actions are not necessary for the test and trigger breakpoints too. + */ + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TRPM_SYNC_IDT); + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_SELM_SYNC_TSS); +#endif + + /* Enable mapping of the hypervisor into the shadow page table. */ + uint32_t cb; + rc = PGMR3MappingsSize(pVM, &cb); + AssertRCReturn(rc, rc); + + /* Pretend the mappings are now fixed; to force a refresh of the reserved PDEs. */ + rc = PGMR3MappingsFix(pVM, MM_HYPER_AREA_ADDRESS, cb); + AssertRCReturn(rc, rc); + + pHyperCtx = CPUMGetHyperCtxPtr(pVCpu); + + pHyperCtx->cr0 = X86_CR0_PE | X86_CR0_WP | X86_CR0_PG | X86_CR0_TS | X86_CR0_ET | X86_CR0_NE | X86_CR0_MP; + pHyperCtx->cr4 = X86_CR4_PGE | X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT; + PGMChangeMode(pVCpu, pHyperCtx->cr0, pHyperCtx->cr4, pHyperCtx->msrEFER); + PGMSyncCR3(pVCpu, pHyperCtx->cr0, CR3Phys, pHyperCtx->cr4, true); + + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3); + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TIMER); + VM_FF_CLEAR(pVM, VM_FF_TM_VIRTUAL_SYNC); + VM_FF_CLEAR(pVM, VM_FF_REQUEST); + + /* + * Setup stack for calling VMMRCEntry(). + */ + RTRCPTR RCPtrEP; + rc = PDMR3LdrGetSymbolRC(pVM, VMMRC_MAIN_MODULE_NAME, "VMMRCEntry", &RCPtrEP); + if (RT_SUCCESS(rc)) + { + RTPrintf("VMM: VMMRCEntry=%RRv\n", RCPtrEP); + + pHyperCtx = CPUMGetHyperCtxPtr(pVCpu); + + /* Fill in hidden selector registers for the hypervisor state. */ + SYNC_SEL(pHyperCtx, cs); + SYNC_SEL(pHyperCtx, ds); + SYNC_SEL(pHyperCtx, es); + SYNC_SEL(pHyperCtx, fs); + SYNC_SEL(pHyperCtx, gs); + SYNC_SEL(pHyperCtx, ss); + SYNC_SEL(pHyperCtx, tr); + + /* + * Profile switching. + */ + RTPrintf("VMM: profiling switcher...\n"); + Log(("VMM: profiling switcher...\n")); + uint64_t TickMin = UINT64_MAX; + uint64_t tsBegin = RTTimeNanoTS(); + uint64_t TickStart = ASMReadTSC(); + for (i = 0; i < 1000000; i++) + { + CPUMSetHyperState(pVCpu, pVM->vmm.s.pfnCallTrampolineRC, pVCpu->vmm.s.pbEMTStackBottomRC, 0, 0); + CPUMPushHyper(pVCpu, 0); + CPUMPushHyper(pVCpu, VMMRC_DO_TESTCASE_HM_NOP); + CPUMPushHyper(pVCpu, pVM->pVMRC); + CPUMPushHyper(pVCpu, 3 * sizeof(RTRCPTR)); /* stack frame size */ + CPUMPushHyper(pVCpu, RCPtrEP); /* what to call */ + + pHyperCtx = CPUMGetHyperCtxPtr(pVCpu); + pGuestCtx = CPUMQueryGuestCtxPtr(pVCpu); + + /* Copy the hypervisor context to make sure we have a valid guest context. */ + *pGuestCtx = *pHyperCtx; + pGuestCtx->cr3 = CR3Phys; + + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3); + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TIMER); + VM_FF_CLEAR(pVM, VM_FF_TM_VIRTUAL_SYNC); + + uint64_t TickThisStart = ASMReadTSC(); + rc = SUPR3CallVMMR0Fast(pVM->pVMR0, VMMR0_DO_HM_RUN, 0); + uint64_t TickThisElapsed = ASMReadTSC() - TickThisStart; + if (RT_FAILURE(rc)) + { + Log(("VMM: R0 returned fatal %Rrc in iteration %d\n", rc, i)); + VMMR3FatalDump(pVM, pVCpu, rc); + return rc; + } + if (TickThisElapsed < TickMin) + TickMin = TickThisElapsed; + } + uint64_t TickEnd = ASMReadTSC(); + uint64_t tsEnd = RTTimeNanoTS(); + + uint64_t Elapsed = tsEnd - tsBegin; + uint64_t PerIteration = Elapsed / (uint64_t)i; + uint64_t cTicksElapsed = TickEnd - TickStart; + uint64_t cTicksPerIteration = cTicksElapsed / (uint64_t)i; + + RTPrintf("VMM: %8d cycles in %11llu ns (%11lld ticks), %10llu ns/iteration (%11lld ticks) Min %11lld ticks\n", + i, Elapsed, cTicksElapsed, PerIteration, cTicksPerIteration, TickMin); + Log(("VMM: %8d cycles in %11llu ns (%11lld ticks), %10llu ns/iteration (%11lld ticks) Min %11lld ticks\n", + i, Elapsed, cTicksElapsed, PerIteration, cTicksPerIteration, TickMin)); + + rc = VINF_SUCCESS; + } + else + AssertMsgFailed(("Failed to resolved VMMRC.rc::VMMRCEntry(), rc=%Rrc\n", rc)); + + return rc; +} + + +#ifdef VBOX_WITH_RAW_MODE + +/** + * Used by VMMDoBruteForceMsrs to dump the CPUID info of the host CPU as a + * prefix to the MSR report. + */ +static DECLCALLBACK(void) vmmDoPrintfVToStream(PCDBGFINFOHLP pHlp, const char *pszFormat, va_list va) +{ + PRTSTREAM pOutStrm = ((PRTSTREAM *)pHlp)[-1]; + RTStrmPrintfV(pOutStrm, pszFormat, va); +} + +/** + * Used by VMMDoBruteForceMsrs to dump the CPUID info of the host CPU as a + * prefix to the MSR report. + */ +static DECLCALLBACK(void) vmmDoPrintfToStream(PCDBGFINFOHLP pHlp, const char *pszFormat, ...) +{ + va_list va; + va_start(va, pszFormat); + vmmDoPrintfVToStream(pHlp, pszFormat, va); + va_end(va); +} + +#endif + + +/** + * Uses raw-mode to query all possible MSRs on the real hardware. + * + * This generates a msr-report.txt file (appending, no overwriting) as well as + * writing the values and process to stdout. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(int) VMMDoBruteForceMsrs(PVM pVM) +{ +#ifdef VBOX_WITH_RAW_MODE + PRTSTREAM pOutStrm; + int rc = RTStrmOpen("msr-report.txt", "a", &pOutStrm); + if (RT_SUCCESS(rc)) + { + /* Header */ + struct + { + PRTSTREAM pOutStrm; + DBGFINFOHLP Hlp; + } MyHlp = { pOutStrm, { vmmDoPrintfToStream, vmmDoPrintfVToStream } }; + DBGFR3Info(pVM->pUVM, "cpuid", "verbose", &MyHlp.Hlp); + RTStrmPrintf(pOutStrm, "\n"); + + uint32_t cMsrsFound = 0; + vmmR3ReportMsrRange(pVM, 0, _4G, pOutStrm, &cMsrsFound); + + RTStrmPrintf(pOutStrm, "Total %u (%#x) MSRs\n", cMsrsFound, cMsrsFound); + RTPrintf("Total %u (%#x) MSRs\n", cMsrsFound, cMsrsFound); + + RTStrmClose(pOutStrm); + } + return rc; +#else + RT_NOREF(pVM); + return VERR_NOT_SUPPORTED; +#endif +} + + +/** + * Uses raw-mode to query all known MSRS on the real hardware. + * + * This generates a known-msr-report.txt file (appending, no overwriting) as + * well as writing the values and process to stdout. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(int) VMMDoKnownMsrs(PVM pVM) +{ +#ifdef VBOX_WITH_RAW_MODE + PRTSTREAM pOutStrm; + int rc = RTStrmOpen("known-msr-report.txt", "a", &pOutStrm); + if (RT_SUCCESS(rc)) + { + vmmR3DoMsrQuickReport(pVM, pOutStrm, false); + RTStrmClose(pOutStrm); + } + return rc; +#else + RT_NOREF(pVM); + return VERR_NOT_SUPPORTED; +#endif +} + + +/** + * MSR experimentation. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +VMMR3DECL(int) VMMDoMsrExperiments(PVM pVM) +{ +#ifdef VBOX_WITH_RAW_MODE + /* + * Preps. + */ + RTRCPTR RCPtrEP; + int rc = PDMR3LdrGetSymbolRC(pVM, VMMRC_MAIN_MODULE_NAME, "VMMRCTestTestWriteMsr", &RCPtrEP); + AssertMsgRCReturn(rc, ("Failed to resolved VMMRC.rc::VMMRCEntry(), rc=%Rrc\n", rc), rc); + + uint64_t *pauValues; + rc = MMHyperAlloc(pVM, 2 * sizeof(uint64_t), 0, MM_TAG_VMM, (void **)&pauValues); + AssertMsgRCReturn(rc, ("Error allocating %#x bytes off the hyper heap: %Rrc\n", 2 * sizeof(uint64_t), rc), rc); + RTRCPTR RCPtrValues = MMHyperR3ToRC(pVM, pauValues); + + /* + * Do the experiments. + */ + uint32_t uMsr = 0x00000277; + uint64_t uValue = UINT64_C(0x0007010600070106); +# if 0 + uValue &= ~(RT_BIT_64(17) | RT_BIT_64(16) | RT_BIT_64(15) | RT_BIT_64(14) | RT_BIT_64(13)); + uValue |= RT_BIT_64(13); + rc = VMMR3CallRC(pVM, RCPtrEP, 6, pVM->pVMRC, uMsr, RT_LODWORD(uValue), RT_HIDWORD(uValue), + RCPtrValues, RCPtrValues + sizeof(uint64_t)); + RTPrintf("uMsr=%#010x before=%#018llx written=%#018llx after=%#018llx rc=%Rrc\n", + uMsr, pauValues[0], uValue, pauValues[1], rc); +# elif 1 + const uint64_t uOrgValue = uValue; + uint32_t cChanges = 0; + for (int iBit = 63; iBit >= 58; iBit--) + { + uValue = uOrgValue & ~RT_BIT_64(iBit); + rc = VMMR3CallRC(pVM, RCPtrEP, 6, pVM->pVMRC, uMsr, RT_LODWORD(uValue), RT_HIDWORD(uValue), + RCPtrValues, RCPtrValues + sizeof(uint64_t)); + RTPrintf("uMsr=%#010x before=%#018llx written=%#018llx after=%#018llx rc=%Rrc\nclear bit=%u -> %s\n", + uMsr, pauValues[0], uValue, pauValues[1], rc, iBit, + (pauValues[0] ^ pauValues[1]) & RT_BIT_64(iBit) ? "changed" : "unchanged"); + cChanges += RT_BOOL(pauValues[0] ^ pauValues[1]); + + uValue = uOrgValue | RT_BIT_64(iBit); + rc = VMMR3CallRC(pVM, RCPtrEP, 6, pVM->pVMRC, uMsr, RT_LODWORD(uValue), RT_HIDWORD(uValue), + RCPtrValues, RCPtrValues + sizeof(uint64_t)); + RTPrintf("uMsr=%#010x before=%#018llx written=%#018llx after=%#018llx rc=%Rrc\nset bit=%u -> %s\n", + uMsr, pauValues[0], uValue, pauValues[1], rc, iBit, + (pauValues[0] ^ pauValues[1]) & RT_BIT_64(iBit) ? "changed" : "unchanged"); + cChanges += RT_BOOL(pauValues[0] ^ pauValues[1]); + } + RTPrintf("%u change(s)\n", cChanges); +# else + uint64_t fWriteable = 0; + for (uint32_t i = 0; i <= 63; i++) + { + uValue = RT_BIT_64(i); +# if 0 + if (uValue & (0x7)) + continue; +# endif + rc = VMMR3CallRC(pVM, RCPtrEP, 6, pVM->pVMRC, uMsr, RT_LODWORD(uValue), RT_HIDWORD(uValue), + RCPtrValues, RCPtrValues + sizeof(uint64_t)); + RTPrintf("uMsr=%#010x before=%#018llx written=%#018llx after=%#018llx rc=%Rrc\n", + uMsr, pauValues[0], uValue, pauValues[1], rc); + if (RT_SUCCESS(rc)) + fWriteable |= RT_BIT_64(i); + } + + uValue = 0; + rc = VMMR3CallRC(pVM, RCPtrEP, 6, pVM->pVMRC, uMsr, RT_LODWORD(uValue), RT_HIDWORD(uValue), + RCPtrValues, RCPtrValues + sizeof(uint64_t)); + RTPrintf("uMsr=%#010x before=%#018llx written=%#018llx after=%#018llx rc=%Rrc\n", + uMsr, pauValues[0], uValue, pauValues[1], rc); + + uValue = UINT64_MAX; + rc = VMMR3CallRC(pVM, RCPtrEP, 6, pVM->pVMRC, uMsr, RT_LODWORD(uValue), RT_HIDWORD(uValue), + RCPtrValues, RCPtrValues + sizeof(uint64_t)); + RTPrintf("uMsr=%#010x before=%#018llx written=%#018llx after=%#018llx rc=%Rrc\n", + uMsr, pauValues[0], uValue, pauValues[1], rc); + + uValue = fWriteable; + rc = VMMR3CallRC(pVM, RCPtrEP, 6, pVM->pVMRC, uMsr, RT_LODWORD(uValue), RT_HIDWORD(uValue), + RCPtrValues, RCPtrValues + sizeof(uint64_t)); + RTPrintf("uMsr=%#010x before=%#018llx written=%#018llx after=%#018llx rc=%Rrc [fWriteable]\n", + uMsr, pauValues[0], uValue, pauValues[1], rc); + +# endif + + /* + * Cleanups. + */ + MMHyperFree(pVM, pauValues); + return rc; +#else + RT_NOREF(pVM); + return VERR_NOT_SUPPORTED; +#endif +} + diff --git a/src/VBox/VMM/VMMR3/VMReq.cpp b/src/VBox/VMM/VMMR3/VMReq.cpp new file mode 100644 index 00000000..61723eaf --- /dev/null +++ b/src/VBox/VMM/VMMR3/VMReq.cpp @@ -0,0 +1,1333 @@ +/* $Id: VMReq.cpp $ */ +/** @file + * VM - Virtual Machine + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_VM +#include +#include +#include "VMInternal.h" +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static int vmR3ReqProcessOne(PVMREQ pReq); + + +/** + * Convenience wrapper for VMR3ReqCallU. + * + * This assumes (1) you're calling a function that returns an VBox status code, + * (2) that you want it's return code on success, and (3) that you wish to wait + * for ever for it to return. + * + * @returns VBox status code. In the unlikely event that VMR3ReqCallVU fails, + * its status code is return. Otherwise, the status of pfnFunction is + * returned. + * + * @param pVM The cross context VM structure. + * @param idDstCpu The destination CPU(s). Either a specific CPU ID or + * one of the following special values: + * VMCPUID_ANY, VMCPUID_ANY_QUEUE, VMCPUID_ALL or VMCPUID_ALL_REVERSE. + * @param pfnFunction Pointer to the function to call. + * @param cArgs Number of arguments following in the ellipsis. + * @param ... Function arguments. + * + * @remarks See remarks on VMR3ReqCallVU. + * @internal + */ +VMMR3_INT_DECL(int) VMR3ReqCallWait(PVM pVM, VMCPUID idDstCpu, PFNRT pfnFunction, unsigned cArgs, ...) +{ + PVMREQ pReq; + va_list va; + va_start(va, cArgs); + int rc = VMR3ReqCallVU(pVM->pUVM, idDstCpu, &pReq, RT_INDEFINITE_WAIT, VMREQFLAGS_VBOX_STATUS, + pfnFunction, cArgs, va); + va_end(va); + if (RT_SUCCESS(rc)) + rc = pReq->iStatus; + VMR3ReqFree(pReq); + return rc; +} + + +/** + * Convenience wrapper for VMR3ReqCallU. + * + * This assumes (1) you're calling a function that returns an VBox status code, + * (2) that you want it's return code on success, and (3) that you wish to wait + * for ever for it to return. + * + * @returns VBox status code. In the unlikely event that VMR3ReqCallVU fails, + * its status code is return. Otherwise, the status of pfnFunction is + * returned. + * + * @param pUVM The user mode VM structure. + * @param idDstCpu The destination CPU(s). Either a specific CPU ID or + * one of the following special values: + * VMCPUID_ANY, VMCPUID_ANY_QUEUE, VMCPUID_ALL or VMCPUID_ALL_REVERSE. + * @param pfnFunction Pointer to the function to call. + * @param cArgs Number of arguments following in the ellipsis. + * @param ... Function arguments. + * + * @remarks See remarks on VMR3ReqCallVU. + * @internal + */ +VMMR3DECL(int) VMR3ReqCallWaitU(PUVM pUVM, VMCPUID idDstCpu, PFNRT pfnFunction, unsigned cArgs, ...) +{ + PVMREQ pReq; + va_list va; + va_start(va, cArgs); + int rc = VMR3ReqCallVU(pUVM, idDstCpu, &pReq, RT_INDEFINITE_WAIT, VMREQFLAGS_VBOX_STATUS, + pfnFunction, cArgs, va); + va_end(va); + if (RT_SUCCESS(rc)) + rc = pReq->iStatus; + VMR3ReqFree(pReq); + return rc; +} + + +/** + * Convenience wrapper for VMR3ReqCallU. + * + * This assumes (1) you're calling a function that returns an VBox status code + * and that you do not wish to wait for it to complete. + * + * @returns VBox status code returned by VMR3ReqCallVU. + * + * @param pVM The cross context VM structure. + * @param idDstCpu The destination CPU(s). Either a specific CPU ID or + * one of the following special values: + * VMCPUID_ANY, VMCPUID_ANY_QUEUE, VMCPUID_ALL or VMCPUID_ALL_REVERSE. + * @param pfnFunction Pointer to the function to call. + * @param cArgs Number of arguments following in the ellipsis. + * @param ... Function arguments. + * + * @remarks See remarks on VMR3ReqCallVU. + * @internal + */ +VMMR3DECL(int) VMR3ReqCallNoWait(PVM pVM, VMCPUID idDstCpu, PFNRT pfnFunction, unsigned cArgs, ...) +{ + va_list va; + va_start(va, cArgs); + int rc = VMR3ReqCallVU(pVM->pUVM, idDstCpu, NULL, 0, VMREQFLAGS_VBOX_STATUS | VMREQFLAGS_NO_WAIT, + pfnFunction, cArgs, va); + va_end(va); + return rc; +} + + +/** + * Convenience wrapper for VMR3ReqCallU. + * + * This assumes (1) you're calling a function that returns an VBox status code + * and that you do not wish to wait for it to complete. + * + * @returns VBox status code returned by VMR3ReqCallVU. + * + * @param pUVM Pointer to the VM. + * @param idDstCpu The destination CPU(s). Either a specific CPU ID or + * one of the following special values: + * VMCPUID_ANY, VMCPUID_ANY_QUEUE, VMCPUID_ALL or VMCPUID_ALL_REVERSE. + * @param pfnFunction Pointer to the function to call. + * @param cArgs Number of arguments following in the ellipsis. + * @param ... Function arguments. + * + * @remarks See remarks on VMR3ReqCallVU. + */ +VMMR3DECL(int) VMR3ReqCallNoWaitU(PUVM pUVM, VMCPUID idDstCpu, PFNRT pfnFunction, unsigned cArgs, ...) +{ + va_list va; + va_start(va, cArgs); + int rc = VMR3ReqCallVU(pUVM, idDstCpu, NULL, 0, VMREQFLAGS_VBOX_STATUS | VMREQFLAGS_NO_WAIT, + pfnFunction, cArgs, va); + va_end(va); + return rc; +} + + +/** + * Convenience wrapper for VMR3ReqCallU. + * + * This assumes (1) you're calling a function that returns void, and (2) that + * you wish to wait for ever for it to return. + * + * @returns VBox status code of VMR3ReqCallVU. + * + * @param pVM The cross context VM structure. + * @param idDstCpu The destination CPU(s). Either a specific CPU ID or + * one of the following special values: + * VMCPUID_ANY, VMCPUID_ANY_QUEUE, VMCPUID_ALL or VMCPUID_ALL_REVERSE. + * @param pfnFunction Pointer to the function to call. + * @param cArgs Number of arguments following in the ellipsis. + * @param ... Function arguments. + * + * @remarks See remarks on VMR3ReqCallVU. + * @internal + */ +VMMR3_INT_DECL(int) VMR3ReqCallVoidWait(PVM pVM, VMCPUID idDstCpu, PFNRT pfnFunction, unsigned cArgs, ...) +{ + PVMREQ pReq; + va_list va; + va_start(va, cArgs); + int rc = VMR3ReqCallVU(pVM->pUVM, idDstCpu, &pReq, RT_INDEFINITE_WAIT, VMREQFLAGS_VOID, + pfnFunction, cArgs, va); + va_end(va); + VMR3ReqFree(pReq); + return rc; +} + + +/** + * Convenience wrapper for VMR3ReqCallU. + * + * This assumes (1) you're calling a function that returns void, and (2) that + * you wish to wait for ever for it to return. + * + * @returns VBox status code of VMR3ReqCallVU. + * + * @param pUVM Pointer to the VM. + * @param idDstCpu The destination CPU(s). Either a specific CPU ID or + * one of the following special values: + * VMCPUID_ANY, VMCPUID_ANY_QUEUE, VMCPUID_ALL or VMCPUID_ALL_REVERSE. + * @param pfnFunction Pointer to the function to call. + * @param cArgs Number of arguments following in the ellipsis. + * @param ... Function arguments. + * + * @remarks See remarks on VMR3ReqCallVU. + */ +VMMR3DECL(int) VMR3ReqCallVoidWaitU(PUVM pUVM, VMCPUID idDstCpu, PFNRT pfnFunction, unsigned cArgs, ...) +{ + PVMREQ pReq; + va_list va; + va_start(va, cArgs); + int rc = VMR3ReqCallVU(pUVM, idDstCpu, &pReq, RT_INDEFINITE_WAIT, VMREQFLAGS_VOID, + pfnFunction, cArgs, va); + va_end(va); + VMR3ReqFree(pReq); + return rc; +} + + +/** + * Convenience wrapper for VMR3ReqCallU. + * + * This assumes (1) you're calling a function that returns void, and (2) that + * you do not wish to wait for it to complete. + * + * @returns VBox status code of VMR3ReqCallVU. + * + * @param pVM The cross context VM structure. + * @param idDstCpu The destination CPU(s). Either a specific CPU ID or + * one of the following special values: + * VMCPUID_ANY, VMCPUID_ANY_QUEUE, VMCPUID_ALL or VMCPUID_ALL_REVERSE. + * @param pfnFunction Pointer to the function to call. + * @param cArgs Number of arguments following in the ellipsis. + * @param ... Function arguments. + * + * @remarks See remarks on VMR3ReqCallVU. + * @internal + */ +VMMR3DECL(int) VMR3ReqCallVoidNoWait(PVM pVM, VMCPUID idDstCpu, PFNRT pfnFunction, unsigned cArgs, ...) +{ + PVMREQ pReq; + va_list va; + va_start(va, cArgs); + int rc = VMR3ReqCallVU(pVM->pUVM, idDstCpu, &pReq, RT_INDEFINITE_WAIT, VMREQFLAGS_VOID | VMREQFLAGS_NO_WAIT, + pfnFunction, cArgs, va); + va_end(va); + VMR3ReqFree(pReq); + return rc; +} + + +/** + * Convenience wrapper for VMR3ReqCallU. + * + * This assumes (1) you're calling a function that returns an VBox status code, + * (2) that you want it's return code on success, (3) that you wish to wait for + * ever for it to return, and (4) that it's priority request that can be safely + * be handled during async suspend and power off. + * + * @returns VBox status code. In the unlikely event that VMR3ReqCallVU fails, + * its status code is return. Otherwise, the status of pfnFunction is + * returned. + * + * @param pVM The cross context VM structure. + * @param idDstCpu The destination CPU(s). Either a specific CPU ID or + * one of the following special values: + * VMCPUID_ANY, VMCPUID_ANY_QUEUE, VMCPUID_ALL or VMCPUID_ALL_REVERSE. + * @param pfnFunction Pointer to the function to call. + * @param cArgs Number of arguments following in the ellipsis. + * @param ... Function arguments. + * + * @remarks See remarks on VMR3ReqCallVU. + * @internal + */ +VMMR3DECL(int) VMR3ReqPriorityCallWait(PVM pVM, VMCPUID idDstCpu, PFNRT pfnFunction, unsigned cArgs, ...) +{ + PVMREQ pReq; + va_list va; + va_start(va, cArgs); + int rc = VMR3ReqCallVU(pVM->pUVM, idDstCpu, &pReq, RT_INDEFINITE_WAIT, VMREQFLAGS_VBOX_STATUS | VMREQFLAGS_PRIORITY, + pfnFunction, cArgs, va); + va_end(va); + if (RT_SUCCESS(rc)) + rc = pReq->iStatus; + VMR3ReqFree(pReq); + return rc; +} + + +/** + * Convenience wrapper for VMR3ReqCallU. + * + * This assumes (1) you're calling a function that returns an VBox status code, + * (2) that you want it's return code on success, (3) that you wish to wait for + * ever for it to return, and (4) that it's priority request that can be safely + * be handled during async suspend and power off. + * + * @returns VBox status code. In the unlikely event that VMR3ReqCallVU fails, + * its status code is return. Otherwise, the status of pfnFunction is + * returned. + * + * @param pUVM The user mode VM handle. + * @param idDstCpu The destination CPU(s). Either a specific CPU ID or + * one of the following special values: + * VMCPUID_ANY, VMCPUID_ANY_QUEUE, VMCPUID_ALL or VMCPUID_ALL_REVERSE. + * @param pfnFunction Pointer to the function to call. + * @param cArgs Number of arguments following in the ellipsis. + * @param ... Function arguments. + * + * @remarks See remarks on VMR3ReqCallVU. + */ +VMMR3DECL(int) VMR3ReqPriorityCallWaitU(PUVM pUVM, VMCPUID idDstCpu, PFNRT pfnFunction, unsigned cArgs, ...) +{ + PVMREQ pReq; + va_list va; + va_start(va, cArgs); + int rc = VMR3ReqCallVU(pUVM, idDstCpu, &pReq, RT_INDEFINITE_WAIT, VMREQFLAGS_VBOX_STATUS | VMREQFLAGS_PRIORITY, + pfnFunction, cArgs, va); + va_end(va); + if (RT_SUCCESS(rc)) + rc = pReq->iStatus; + VMR3ReqFree(pReq); + return rc; +} + + +/** + * Convenience wrapper for VMR3ReqCallU. + * + * This assumes (1) you're calling a function that returns void, (2) that you + * wish to wait for ever for it to return, and (3) that it's priority request + * that can be safely be handled during async suspend and power off. + * + * @returns VBox status code of VMR3ReqCallVU. + * + * @param pUVM The user mode VM handle. + * @param idDstCpu The destination CPU(s). Either a specific CPU ID or + * one of the following special values: + * VMCPUID_ANY, VMCPUID_ANY_QUEUE, VMCPUID_ALL or VMCPUID_ALL_REVERSE. + * @param pfnFunction Pointer to the function to call. + * @param cArgs Number of arguments following in the ellipsis. + * @param ... Function arguments. + * + * @remarks See remarks on VMR3ReqCallVU. + */ +VMMR3DECL(int) VMR3ReqPriorityCallVoidWaitU(PUVM pUVM, VMCPUID idDstCpu, PFNRT pfnFunction, unsigned cArgs, ...) +{ + PVMREQ pReq; + va_list va; + va_start(va, cArgs); + int rc = VMR3ReqCallVU(pUVM, idDstCpu, &pReq, RT_INDEFINITE_WAIT, VMREQFLAGS_VOID | VMREQFLAGS_PRIORITY, + pfnFunction, cArgs, va); + va_end(va); + VMR3ReqFree(pReq); + return rc; +} + + +/** + * Allocate and queue a call request to a void function. + * + * If it's desired to poll on the completion of the request set cMillies + * to 0 and use VMR3ReqWait() to check for completion. In the other case + * use RT_INDEFINITE_WAIT. + * The returned request packet must be freed using VMR3ReqFree(). + * + * @returns VBox status code. + * Will not return VERR_INTERRUPTED. + * @returns VERR_TIMEOUT if cMillies was reached without the packet being completed. + * + * @param pUVM Pointer to the user mode VM structure. + * @param idDstCpu The destination CPU(s). Either a specific CPU ID or + * one of the following special values: + * VMCPUID_ANY, VMCPUID_ANY_QUEUE, VMCPUID_ALL or VMCPUID_ALL_REVERSE. + * @param ppReq Where to store the pointer to the request. + * This will be NULL or a valid request pointer not matter what happens, unless fFlags + * contains VMREQFLAGS_NO_WAIT when it will be optional and always NULL. + * @param cMillies Number of milliseconds to wait for the request to + * be completed. Use RT_INDEFINITE_WAIT to only + * wait till it's completed. + * @param fFlags A combination of the VMREQFLAGS values. + * @param pfnFunction Pointer to the function to call. + * @param cArgs Number of arguments following in the ellipsis. + * @param ... Function arguments. + * + * @remarks See remarks on VMR3ReqCallVU. + */ +VMMR3DECL(int) VMR3ReqCallU(PUVM pUVM, VMCPUID idDstCpu, PVMREQ *ppReq, RTMSINTERVAL cMillies, uint32_t fFlags, + PFNRT pfnFunction, unsigned cArgs, ...) +{ + va_list va; + va_start(va, cArgs); + int rc = VMR3ReqCallVU(pUVM, idDstCpu, ppReq, cMillies, fFlags, pfnFunction, cArgs, va); + va_end(va); + return rc; +} + + +/** + * Allocate and queue a call request. + * + * If it's desired to poll on the completion of the request set cMillies + * to 0 and use VMR3ReqWait() to check for completion. In the other case + * use RT_INDEFINITE_WAIT. + * The returned request packet must be freed using VMR3ReqFree(). + * + * @returns VBox status code. + * Will not return VERR_INTERRUPTED. + * @returns VERR_TIMEOUT if cMillies was reached without the packet being completed. + * + * @param pUVM Pointer to the user mode VM structure. + * @param idDstCpu The destination CPU(s). Either a specific CPU ID or + * one of the following special values: + * VMCPUID_ANY, VMCPUID_ANY_QUEUE, VMCPUID_ALL or VMCPUID_ALL_REVERSE. + * @param ppReq Where to store the pointer to the request. + * This will be NULL or a valid request pointer not matter what happens, unless fFlags + * contains VMREQFLAGS_NO_WAIT when it will be optional and always NULL. + * @param cMillies Number of milliseconds to wait for the request to + * be completed. Use RT_INDEFINITE_WAIT to only + * wait till it's completed. + * @param pfnFunction Pointer to the function to call. + * @param fFlags A combination of the VMREQFLAGS values. + * @param cArgs Number of arguments following in the ellipsis. + * Stuff which differs in size from uintptr_t is gonna make trouble, so don't try! + * @param Args Argument vector. + * + * @remarks Caveats: + * - Do not pass anything which is larger than an uintptr_t. + * - 64-bit integers are larger than uintptr_t on 32-bit hosts. + * Pass integers > 32-bit by reference (pointers). + * - Don't use NULL since it should be the integer 0 in C++ and may + * therefore end up with garbage in the bits 63:32 on 64-bit + * hosts because 'int' is 32-bit. + * Use (void *)NULL or (uintptr_t)0 instead of NULL. + */ +VMMR3DECL(int) VMR3ReqCallVU(PUVM pUVM, VMCPUID idDstCpu, PVMREQ *ppReq, RTMSINTERVAL cMillies, uint32_t fFlags, + PFNRT pfnFunction, unsigned cArgs, va_list Args) +{ + LogFlow(("VMR3ReqCallV: idDstCpu=%u cMillies=%d fFlags=%#x pfnFunction=%p cArgs=%d\n", idDstCpu, cMillies, fFlags, pfnFunction, cArgs)); + + /* + * Validate input. + */ + AssertPtrReturn(pfnFunction, VERR_INVALID_POINTER); + UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE); + AssertReturn(!(fFlags & ~(VMREQFLAGS_RETURN_MASK | VMREQFLAGS_NO_WAIT | VMREQFLAGS_POKE | VMREQFLAGS_PRIORITY)), VERR_INVALID_PARAMETER); + if (!(fFlags & VMREQFLAGS_NO_WAIT) || ppReq) + { + AssertPtrReturn(ppReq, VERR_INVALID_POINTER); + *ppReq = NULL; + } + PVMREQ pReq = NULL; + AssertMsgReturn(cArgs * sizeof(uintptr_t) <= sizeof(pReq->u.Internal.aArgs), + ("cArg=%d\n", cArgs), + VERR_TOO_MUCH_DATA); + + /* + * Allocate request + */ + int rc = VMR3ReqAlloc(pUVM, &pReq, VMREQTYPE_INTERNAL, idDstCpu); + if (RT_FAILURE(rc)) + return rc; + + /* + * Initialize the request data. + */ + pReq->fFlags = fFlags; + pReq->u.Internal.pfn = pfnFunction; + pReq->u.Internal.cArgs = cArgs; + for (unsigned iArg = 0; iArg < cArgs; iArg++) + pReq->u.Internal.aArgs[iArg] = va_arg(Args, uintptr_t); + + /* + * Queue the request and return. + */ + rc = VMR3ReqQueue(pReq, cMillies); + if ( RT_FAILURE(rc) + && rc != VERR_TIMEOUT) + { + VMR3ReqFree(pReq); + pReq = NULL; + } + if (!(fFlags & VMREQFLAGS_NO_WAIT)) + { + *ppReq = pReq; + LogFlow(("VMR3ReqCallV: returns %Rrc *ppReq=%p\n", rc, pReq)); + } + else + LogFlow(("VMR3ReqCallV: returns %Rrc\n", rc)); + Assert(rc != VERR_INTERRUPTED); + return rc; +} + + +/** + * Joins the list pList with whatever is linked up at *pHead. + */ +static void vmr3ReqJoinFreeSub(volatile PVMREQ *ppHead, PVMREQ pList) +{ + for (unsigned cIterations = 0;; cIterations++) + { + PVMREQ pHead = ASMAtomicXchgPtrT(ppHead, pList, PVMREQ); + if (!pHead) + return; + PVMREQ pTail = pHead; + while (pTail->pNext) + pTail = pTail->pNext; + ASMAtomicWritePtr(&pTail->pNext, pList); + ASMCompilerBarrier(); + if (ASMAtomicCmpXchgPtr(ppHead, pHead, pList)) + return; + ASMAtomicWriteNullPtr(&pTail->pNext); + ASMCompilerBarrier(); + if (ASMAtomicCmpXchgPtr(ppHead, pHead, NULL)) + return; + pList = pHead; + Assert(cIterations != 32); + Assert(cIterations != 64); + } +} + + +/** + * Joins the list pList with whatever is linked up at *pHead. + */ +static void vmr3ReqJoinFree(PVMINTUSERPERVM pVMInt, PVMREQ pList) +{ + /* + * Split the list if it's too long. + */ + unsigned cReqs = 1; + PVMREQ pTail = pList; + while (pTail->pNext) + { + if (cReqs++ > 25) + { + const uint32_t i = pVMInt->iReqFree; + vmr3ReqJoinFreeSub(&pVMInt->apReqFree[(i + 2) % RT_ELEMENTS(pVMInt->apReqFree)], pTail->pNext); + + pTail->pNext = NULL; + vmr3ReqJoinFreeSub(&pVMInt->apReqFree[(i + 2 + (i == pVMInt->iReqFree)) % RT_ELEMENTS(pVMInt->apReqFree)], pTail->pNext); + return; + } + pTail = pTail->pNext; + } + vmr3ReqJoinFreeSub(&pVMInt->apReqFree[(pVMInt->iReqFree + 2) % RT_ELEMENTS(pVMInt->apReqFree)], pList); +} + + +/** + * Allocates a request packet. + * + * The caller allocates a request packet, fills in the request data + * union and queues the request. + * + * @returns VBox status code. + * + * @param pUVM Pointer to the user mode VM structure. + * @param ppReq Where to store the pointer to the allocated packet. + * @param enmType Package type. + * @param idDstCpu The destination CPU(s). Either a specific CPU ID or + * one of the following special values: + * VMCPUID_ANY, VMCPUID_ANY_QUEUE, VMCPUID_ALL or VMCPUID_ALL_REVERSE. + */ +VMMR3DECL(int) VMR3ReqAlloc(PUVM pUVM, PVMREQ *ppReq, VMREQTYPE enmType, VMCPUID idDstCpu) +{ + /* + * Validate input. + */ + AssertMsgReturn(enmType > VMREQTYPE_INVALID && enmType < VMREQTYPE_MAX, + ("Invalid package type %d valid range %d-%d inclusively.\n", + enmType, VMREQTYPE_INVALID + 1, VMREQTYPE_MAX - 1), + VERR_VM_REQUEST_INVALID_TYPE); + AssertPtrReturn(ppReq, VERR_INVALID_POINTER); + AssertMsgReturn( idDstCpu == VMCPUID_ANY + || idDstCpu == VMCPUID_ANY_QUEUE + || idDstCpu < pUVM->cCpus + || idDstCpu == VMCPUID_ALL + || idDstCpu == VMCPUID_ALL_REVERSE, + ("Invalid destination %u (max=%u)\n", idDstCpu, pUVM->cCpus), VERR_INVALID_PARAMETER); + + /* + * Try get a recycled packet. + * While this could all be solved with a single list with a lock, it's a sport + * of mine to avoid locks. + */ + int cTries = RT_ELEMENTS(pUVM->vm.s.apReqFree) * 2; + while (--cTries >= 0) + { + PVMREQ volatile *ppHead = &pUVM->vm.s.apReqFree[ASMAtomicIncU32(&pUVM->vm.s.iReqFree) % RT_ELEMENTS(pUVM->vm.s.apReqFree)]; +#if 0 /* sad, but this won't work safely because the reading of pReq->pNext. */ + PVMREQ pNext = NULL; + PVMREQ pReq = *ppHead; + if ( pReq + && !ASMAtomicCmpXchgPtr(ppHead, (pNext = pReq->pNext), pReq) + && (pReq = *ppHead) + && !ASMAtomicCmpXchgPtr(ppHead, (pNext = pReq->pNext), pReq)) + pReq = NULL; + if (pReq) + { + Assert(pReq->pNext == pNext); NOREF(pReq); +#else + PVMREQ pReq = ASMAtomicXchgPtrT(ppHead, NULL, PVMREQ); + if (pReq) + { + PVMREQ pNext = pReq->pNext; + if ( pNext + && !ASMAtomicCmpXchgPtr(ppHead, pNext, NULL)) + { + STAM_COUNTER_INC(&pUVM->vm.s.StatReqAllocRaces); + vmr3ReqJoinFree(&pUVM->vm.s, pReq->pNext); + } +#endif + ASMAtomicDecU32(&pUVM->vm.s.cReqFree); + + /* + * Make sure the event sem is not signaled. + */ + if (!pReq->fEventSemClear) + { + int rc = RTSemEventWait(pReq->EventSem, 0); + if (rc != VINF_SUCCESS && rc != VERR_TIMEOUT) + { + /* + * This shall not happen, but if it does we'll just destroy + * the semaphore and create a new one. + */ + AssertMsgFailed(("rc=%Rrc from RTSemEventWait(%#x).\n", rc, pReq->EventSem)); + RTSemEventDestroy(pReq->EventSem); + rc = RTSemEventCreate(&pReq->EventSem); + AssertRC(rc); + if (RT_FAILURE(rc)) + return rc; +#if 0 /// @todo @bugref{4725} - def RT_LOCK_STRICT + for (VMCPUID idCpu = 0; idCpu < pUVM->cCpus; idCpu++) + RTSemEventAddSignaller(pReq->EventSem, pUVM->aCpus[idCpu].vm.s.ThreadEMT); +#endif + } + pReq->fEventSemClear = true; + } + else + Assert(RTSemEventWait(pReq->EventSem, 0) == VERR_TIMEOUT); + + /* + * Initialize the packet and return it. + */ + Assert(pReq->enmType == VMREQTYPE_INVALID); + Assert(pReq->enmState == VMREQSTATE_FREE); + Assert(pReq->pUVM == pUVM); + ASMAtomicXchgSize(&pReq->pNext, NULL); + pReq->enmState = VMREQSTATE_ALLOCATED; + pReq->iStatus = VERR_VM_REQUEST_STATUS_STILL_PENDING; + pReq->fFlags = VMREQFLAGS_VBOX_STATUS; + pReq->enmType = enmType; + pReq->idDstCpu = idDstCpu; + + *ppReq = pReq; + STAM_COUNTER_INC(&pUVM->vm.s.StatReqAllocRecycled); + LogFlow(("VMR3ReqAlloc: returns VINF_SUCCESS *ppReq=%p recycled\n", pReq)); + return VINF_SUCCESS; + } + } + + /* + * Ok allocate one. + */ + PVMREQ pReq = (PVMREQ)MMR3HeapAllocU(pUVM, MM_TAG_VM_REQ, sizeof(*pReq)); + if (!pReq) + return VERR_NO_MEMORY; + + /* + * Create the semaphore. + */ + int rc = RTSemEventCreate(&pReq->EventSem); + AssertRC(rc); + if (RT_FAILURE(rc)) + { + MMR3HeapFree(pReq); + return rc; + } +#if 0 /// @todo @bugref{4725} - def RT_LOCK_STRICT + for (VMCPUID idCpu = 0; idCpu < pUVM->cCpus; idCpu++) + RTSemEventAddSignaller(pReq->EventSem, pUVM->aCpus[idCpu].vm.s.ThreadEMT); +#endif + + /* + * Initialize the packet and return it. + */ + pReq->pNext = NULL; + pReq->pUVM = pUVM; + pReq->enmState = VMREQSTATE_ALLOCATED; + pReq->iStatus = VERR_VM_REQUEST_STATUS_STILL_PENDING; + pReq->fEventSemClear = true; + pReq->fFlags = VMREQFLAGS_VBOX_STATUS; + pReq->enmType = enmType; + pReq->idDstCpu = idDstCpu; + + *ppReq = pReq; + STAM_COUNTER_INC(&pUVM->vm.s.StatReqAllocNew); + LogFlow(("VMR3ReqAlloc: returns VINF_SUCCESS *ppReq=%p new\n", pReq)); + return VINF_SUCCESS; +} + + +/** + * Free a request packet. + * + * @returns VBox status code. + * + * @param pReq Package to free. + * @remark The request packet must be in allocated or completed state! + */ +VMMR3DECL(int) VMR3ReqFree(PVMREQ pReq) +{ + /* + * Ignore NULL (all free functions should do this imho). + */ + if (!pReq) + return VINF_SUCCESS; + + /* + * Check packet state. + */ + switch (pReq->enmState) + { + case VMREQSTATE_ALLOCATED: + case VMREQSTATE_COMPLETED: + break; + default: + AssertMsgFailed(("Invalid state %d!\n", pReq->enmState)); + return VERR_VM_REQUEST_STATE; + } + + /* + * Make it a free packet and put it into one of the free packet lists. + */ + pReq->enmState = VMREQSTATE_FREE; + pReq->iStatus = VERR_VM_REQUEST_STATUS_FREED; + pReq->enmType = VMREQTYPE_INVALID; + + PUVM pUVM = pReq->pUVM; + STAM_COUNTER_INC(&pUVM->vm.s.StatReqFree); + + if (pUVM->vm.s.cReqFree < 128) + { + ASMAtomicIncU32(&pUVM->vm.s.cReqFree); + PVMREQ volatile *ppHead = &pUVM->vm.s.apReqFree[ASMAtomicIncU32(&pUVM->vm.s.iReqFree) % RT_ELEMENTS(pUVM->vm.s.apReqFree)]; + PVMREQ pNext; + do + { + pNext = ASMAtomicUoReadPtrT(ppHead, PVMREQ); + ASMAtomicWritePtr(&pReq->pNext, pNext); + ASMCompilerBarrier(); + } while (!ASMAtomicCmpXchgPtr(ppHead, pReq, pNext)); + } + else + { + STAM_COUNTER_INC(&pReq->pUVM->vm.s.StatReqFreeOverflow); + RTSemEventDestroy(pReq->EventSem); + MMR3HeapFree(pReq); + } + return VINF_SUCCESS; +} + + +/** + * Queue a request. + * + * The quest must be allocated using VMR3ReqAlloc() and contain + * all the required data. + * If it's desired to poll on the completion of the request set cMillies + * to 0 and use VMR3ReqWait() to check for completion. In the other case + * use RT_INDEFINITE_WAIT. + * + * @returns VBox status code. + * Will not return VERR_INTERRUPTED. + * @returns VERR_TIMEOUT if cMillies was reached without the packet being completed. + * + * @param pReq The request to queue. + * @param cMillies Number of milliseconds to wait for the request to + * be completed. Use RT_INDEFINITE_WAIT to only + * wait till it's completed. + */ +VMMR3DECL(int) VMR3ReqQueue(PVMREQ pReq, RTMSINTERVAL cMillies) +{ + LogFlow(("VMR3ReqQueue: pReq=%p cMillies=%d\n", pReq, cMillies)); + /* + * Verify the supplied package. + */ + AssertMsgReturn(pReq->enmState == VMREQSTATE_ALLOCATED, ("%d\n", pReq->enmState), VERR_VM_REQUEST_STATE); + AssertMsgReturn( VALID_PTR(pReq->pUVM) + && !pReq->pNext + && pReq->EventSem != NIL_RTSEMEVENT, + ("Invalid request package! Anyone cooking their own packages???\n"), + VERR_VM_REQUEST_INVALID_PACKAGE); + AssertMsgReturn( pReq->enmType > VMREQTYPE_INVALID + && pReq->enmType < VMREQTYPE_MAX, + ("Invalid package type %d valid range %d-%d inclusively. This was verified on alloc too...\n", + pReq->enmType, VMREQTYPE_INVALID + 1, VMREQTYPE_MAX - 1), + VERR_VM_REQUEST_INVALID_TYPE); + Assert(!(pReq->fFlags & ~(VMREQFLAGS_RETURN_MASK | VMREQFLAGS_NO_WAIT | VMREQFLAGS_POKE | VMREQFLAGS_PRIORITY))); + + /* + * Are we the EMT or not? + * Also, store pVM (and fFlags) locally since pReq may be invalid after queuing it. + */ + int rc = VINF_SUCCESS; + PUVM pUVM = ((VMREQ volatile *)pReq)->pUVM; /* volatile paranoia */ + PUVMCPU pUVCpu = (PUVMCPU)RTTlsGet(pUVM->vm.s.idxTLS); + + if (pReq->idDstCpu == VMCPUID_ALL) + { + /* One-by-one. */ + Assert(!(pReq->fFlags & VMREQFLAGS_NO_WAIT)); + for (unsigned i = 0; i < pUVM->cCpus; i++) + { + /* Reinit some members. */ + pReq->enmState = VMREQSTATE_ALLOCATED; + pReq->idDstCpu = i; + rc = VMR3ReqQueue(pReq, cMillies); + if (RT_FAILURE(rc)) + break; + } + } + else if (pReq->idDstCpu == VMCPUID_ALL_REVERSE) + { + /* One-by-one. */ + Assert(!(pReq->fFlags & VMREQFLAGS_NO_WAIT)); + for (int i = pUVM->cCpus-1; i >= 0; i--) + { + /* Reinit some members. */ + pReq->enmState = VMREQSTATE_ALLOCATED; + pReq->idDstCpu = i; + rc = VMR3ReqQueue(pReq, cMillies); + if (RT_FAILURE(rc)) + break; + } + } + else if ( pReq->idDstCpu != VMCPUID_ANY /* for a specific VMCPU? */ + && pReq->idDstCpu != VMCPUID_ANY_QUEUE + && ( !pUVCpu /* and it's not the current thread. */ + || pUVCpu->idCpu != pReq->idDstCpu)) + { + VMCPUID idTarget = pReq->idDstCpu; Assert(idTarget < pUVM->cCpus); + PVMCPU pVCpu = &pUVM->pVM->aCpus[idTarget]; + unsigned fFlags = ((VMREQ volatile *)pReq)->fFlags; /* volatile paranoia */ + + /* Fetch the right UVMCPU */ + pUVCpu = &pUVM->aCpus[idTarget]; + + /* + * Insert it. + */ + volatile PVMREQ *ppQueueHead = pReq->fFlags & VMREQFLAGS_PRIORITY ? &pUVCpu->vm.s.pPriorityReqs : &pUVCpu->vm.s.pNormalReqs; + pReq->enmState = VMREQSTATE_QUEUED; + PVMREQ pNext; + do + { + pNext = ASMAtomicUoReadPtrT(ppQueueHead, PVMREQ); + ASMAtomicWritePtr(&pReq->pNext, pNext); + ASMCompilerBarrier(); + } while (!ASMAtomicCmpXchgPtr(ppQueueHead, pReq, pNext)); + + /* + * Notify EMT. + */ + if (pUVM->pVM) + VMCPU_FF_SET(pVCpu, VMCPU_FF_REQUEST); + VMR3NotifyCpuFFU(pUVCpu, fFlags & VMREQFLAGS_POKE ? VMNOTIFYFF_FLAGS_POKE : 0); + + /* + * Wait and return. + */ + if (!(fFlags & VMREQFLAGS_NO_WAIT)) + rc = VMR3ReqWait(pReq, cMillies); + LogFlow(("VMR3ReqQueue: returns %Rrc\n", rc)); + } + else if ( ( pReq->idDstCpu == VMCPUID_ANY + && !pUVCpu /* only EMT threads have a valid pointer stored in the TLS slot. */) + || pReq->idDstCpu == VMCPUID_ANY_QUEUE) + { + unsigned fFlags = ((VMREQ volatile *)pReq)->fFlags; /* volatile paranoia */ + + /* Note: pUVCpu may or may not be NULL in the VMCPUID_ANY_QUEUE case; we don't care. */ + + /* + * Insert it. + */ + volatile PVMREQ *ppQueueHead = pReq->fFlags & VMREQFLAGS_PRIORITY ? &pUVM->vm.s.pPriorityReqs : &pUVM->vm.s.pNormalReqs; + pReq->enmState = VMREQSTATE_QUEUED; + PVMREQ pNext; + do + { + pNext = ASMAtomicUoReadPtrT(ppQueueHead, PVMREQ); + ASMAtomicWritePtr(&pReq->pNext, pNext); + ASMCompilerBarrier(); + } while (!ASMAtomicCmpXchgPtr(ppQueueHead, pReq, pNext)); + + /* + * Notify EMT. + */ + if (pUVM->pVM) + VM_FF_SET(pUVM->pVM, VM_FF_REQUEST); + VMR3NotifyGlobalFFU(pUVM, fFlags & VMREQFLAGS_POKE ? VMNOTIFYFF_FLAGS_POKE : 0); + + /* + * Wait and return. + */ + if (!(fFlags & VMREQFLAGS_NO_WAIT)) + rc = VMR3ReqWait(pReq, cMillies); + LogFlow(("VMR3ReqQueue: returns %Rrc\n", rc)); + } + else + { + Assert(pUVCpu); + + /* + * The requester was an EMT, just execute it. + */ + pReq->enmState = VMREQSTATE_QUEUED; + rc = vmR3ReqProcessOne(pReq); + LogFlow(("VMR3ReqQueue: returns %Rrc (processed)\n", rc)); + } + return rc; +} + + +/** + * Wait for a request to be completed. + * + * @returns VBox status code. + * @returns VERR_TIMEOUT if cMillies was reached without the packet being completed. + * + * @param pReq The request to wait for. + * @param cMillies Number of milliseconds to wait. + * Use RT_INDEFINITE_WAIT to only wait till it's completed. + */ +VMMR3DECL(int) VMR3ReqWait(PVMREQ pReq, RTMSINTERVAL cMillies) +{ + LogFlow(("VMR3ReqWait: pReq=%p cMillies=%d\n", pReq, cMillies)); + + /* + * Verify the supplied package. + */ + AssertMsgReturn( pReq->enmState == VMREQSTATE_QUEUED + || pReq->enmState == VMREQSTATE_PROCESSING + || pReq->enmState == VMREQSTATE_COMPLETED, + ("Invalid state %d\n", pReq->enmState), + VERR_VM_REQUEST_STATE); + AssertMsgReturn( VALID_PTR(pReq->pUVM) + && pReq->EventSem != NIL_RTSEMEVENT, + ("Invalid request package! Anyone cooking their own packages???\n"), + VERR_VM_REQUEST_INVALID_PACKAGE); + AssertMsgReturn( pReq->enmType > VMREQTYPE_INVALID + && pReq->enmType < VMREQTYPE_MAX, + ("Invalid package type %d valid range %d-%d inclusively. This was verified on alloc too...\n", + pReq->enmType, VMREQTYPE_INVALID + 1, VMREQTYPE_MAX - 1), + VERR_VM_REQUEST_INVALID_TYPE); + + /* + * Check for deadlock condition + */ + PUVM pUVM = pReq->pUVM; + NOREF(pUVM); + + /* + * Wait on the package. + */ + int rc; + if (cMillies != RT_INDEFINITE_WAIT) + rc = RTSemEventWait(pReq->EventSem, cMillies); + else + { + do + { + rc = RTSemEventWait(pReq->EventSem, RT_INDEFINITE_WAIT); + Assert(rc != VERR_TIMEOUT); + } while ( pReq->enmState != VMREQSTATE_COMPLETED + && pReq->enmState != VMREQSTATE_INVALID); + } + if (RT_SUCCESS(rc)) + ASMAtomicXchgSize(&pReq->fEventSemClear, true); + if (pReq->enmState == VMREQSTATE_COMPLETED) + rc = VINF_SUCCESS; + LogFlow(("VMR3ReqWait: returns %Rrc\n", rc)); + Assert(rc != VERR_INTERRUPTED); + return rc; +} + + +/** + * Sets the relevant FF. + * + * @param pUVM Pointer to the user mode VM structure. + * @param idDstCpu VMCPUID_ANY or the ID of the current CPU. + */ +DECLINLINE(void) vmR3ReqSetFF(PUVM pUVM, VMCPUID idDstCpu) +{ + if (RT_LIKELY(pUVM->pVM)) + { + if (idDstCpu == VMCPUID_ANY) + VM_FF_SET(pUVM->pVM, VM_FF_REQUEST); + else + VMCPU_FF_SET(&pUVM->pVM->aCpus[idDstCpu], VMCPU_FF_REQUEST); + } +} + + +/** + * VMR3ReqProcessU helper that handles cases where there are more than one + * pending request. + * + * @returns The oldest request. + * @param pUVM Pointer to the user mode VM structure + * @param idDstCpu VMCPUID_ANY or virtual CPU ID. + * @param pReqList The list of requests. + * @param ppReqs Pointer to the list head. + */ +static PVMREQ vmR3ReqProcessUTooManyHelper(PUVM pUVM, VMCPUID idDstCpu, PVMREQ pReqList, PVMREQ volatile *ppReqs) +{ + STAM_COUNTER_INC(&pUVM->vm.s.StatReqMoreThan1); + + /* + * Chop off the last one (pReq). + */ + PVMREQ pPrev; + PVMREQ pReqRet = pReqList; + do + { + pPrev = pReqRet; + pReqRet = pReqRet->pNext; + } while (pReqRet->pNext); + ASMAtomicWriteNullPtr(&pPrev->pNext); + + /* + * Push the others back onto the list (end of it). + */ + Log2(("VMR3ReqProcess: Pushing back %p %p...\n", pReqList, pReqList->pNext)); + if (RT_UNLIKELY(!ASMAtomicCmpXchgPtr(ppReqs, pReqList, NULL))) + { + STAM_COUNTER_INC(&pUVM->vm.s.StatReqPushBackRaces); + do + { + ASMNopPause(); + PVMREQ pReqList2 = ASMAtomicXchgPtrT(ppReqs, NULL, PVMREQ); + if (pReqList2) + { + PVMREQ pLast = pReqList2; + while (pLast->pNext) + pLast = pLast->pNext; + ASMAtomicWritePtr(&pLast->pNext, pReqList); + pReqList = pReqList2; + } + } while (!ASMAtomicCmpXchgPtr(ppReqs, pReqList, NULL)); + } + + vmR3ReqSetFF(pUVM, idDstCpu); + return pReqRet; +} + + +/** + * Process pending request(s). + * + * This function is called from a forced action handler in the EMT + * or from one of the EMT loops. + * + * @returns VBox status code. + * + * @param pUVM Pointer to the user mode VM structure. + * @param idDstCpu Pass VMCPUID_ANY to process the common request queue + * and the CPU ID for a CPU specific one. In the latter + * case the calling thread must be the EMT of that CPU. + * @param fPriorityOnly When set, only process the priority request queue. + * + * @note SMP safe (multiple EMTs trying to satisfy VM_FF_REQUESTs). + * + * @remarks This was made reentrant for async PDM handling, the debugger and + * others. + * @internal + */ +VMMR3_INT_DECL(int) VMR3ReqProcessU(PUVM pUVM, VMCPUID idDstCpu, bool fPriorityOnly) +{ + LogFlow(("VMR3ReqProcessU: (enmVMState=%d) idDstCpu=%d\n", pUVM->pVM ? pUVM->pVM->enmVMState : VMSTATE_CREATING, idDstCpu)); + + /* + * Determine which queues to process. + */ + PVMREQ volatile *ppNormalReqs; + PVMREQ volatile *ppPriorityReqs; + if (idDstCpu == VMCPUID_ANY) + { + ppPriorityReqs = &pUVM->vm.s.pPriorityReqs; + ppNormalReqs = !fPriorityOnly ? &pUVM->vm.s.pNormalReqs : ppPriorityReqs; + } + else + { + Assert(idDstCpu < pUVM->cCpus); + Assert(pUVM->aCpus[idDstCpu].vm.s.NativeThreadEMT == RTThreadNativeSelf()); + ppPriorityReqs = &pUVM->aCpus[idDstCpu].vm.s.pPriorityReqs; + ppNormalReqs = !fPriorityOnly ? &pUVM->aCpus[idDstCpu].vm.s.pNormalReqs : ppPriorityReqs; + } + + /* + * Process loop. + * + * We do not repeat the outer loop if we've got an informational status code + * since that code needs processing by our caller (usually EM). + */ + int rc = VINF_SUCCESS; + for (;;) + { + /* + * Get the pending requests. + * + * If there are more than one request, unlink the oldest and put the + * rest back so that we're reentrant. + */ + if (RT_LIKELY(pUVM->pVM)) + { + if (idDstCpu == VMCPUID_ANY) + VM_FF_CLEAR(pUVM->pVM, VM_FF_REQUEST); + else + VMCPU_FF_CLEAR(&pUVM->pVM->aCpus[idDstCpu], VMCPU_FF_REQUEST); + } + + PVMREQ pReq = ASMAtomicXchgPtrT(ppPriorityReqs, NULL, PVMREQ); + if (pReq) + { + if (RT_UNLIKELY(pReq->pNext)) + pReq = vmR3ReqProcessUTooManyHelper(pUVM, idDstCpu, pReq, ppPriorityReqs); + else if (ASMAtomicReadPtrT(ppNormalReqs, PVMREQ)) + vmR3ReqSetFF(pUVM, idDstCpu); + } + else + { + pReq = ASMAtomicXchgPtrT(ppNormalReqs, NULL, PVMREQ); + if (!pReq) + break; + if (RT_UNLIKELY(pReq->pNext)) + pReq = vmR3ReqProcessUTooManyHelper(pUVM, idDstCpu, pReq, ppNormalReqs); + } + + /* + * Process the request + */ + STAM_COUNTER_INC(&pUVM->vm.s.StatReqProcessed); + int rc2 = vmR3ReqProcessOne(pReq); + if ( rc2 >= VINF_EM_FIRST + && rc2 <= VINF_EM_LAST) + { + rc = rc2; + break; + } + } + + LogFlow(("VMR3ReqProcess: returns %Rrc (enmVMState=%d)\n", rc, pUVM->pVM ? pUVM->pVM->enmVMState : VMSTATE_CREATING)); + return rc; +} + + +/** + * Process one request. + * + * @returns VBox status code. + * + * @param pReq Request packet to process. + */ +static int vmR3ReqProcessOne(PVMREQ pReq) +{ + LogFlow(("vmR3ReqProcessOne: pReq=%p type=%d fFlags=%#x\n", pReq, pReq->enmType, pReq->fFlags)); + + /* + * Process the request. + */ + Assert(pReq->enmState == VMREQSTATE_QUEUED); + pReq->enmState = VMREQSTATE_PROCESSING; + int rcRet = VINF_SUCCESS; /* the return code of this function. */ + int rcReq = VERR_NOT_IMPLEMENTED; /* the request status. */ + switch (pReq->enmType) + { + /* + * A packed down call frame. + */ + case VMREQTYPE_INTERNAL: + { + uintptr_t *pauArgs = &pReq->u.Internal.aArgs[0]; + union + { + PFNRT pfn; + DECLCALLBACKMEMBER(int, pfn00)(void); + DECLCALLBACKMEMBER(int, pfn01)(uintptr_t); + DECLCALLBACKMEMBER(int, pfn02)(uintptr_t, uintptr_t); + DECLCALLBACKMEMBER(int, pfn03)(uintptr_t, uintptr_t, uintptr_t); + DECLCALLBACKMEMBER(int, pfn04)(uintptr_t, uintptr_t, uintptr_t, uintptr_t); + DECLCALLBACKMEMBER(int, pfn05)(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); + DECLCALLBACKMEMBER(int, pfn06)(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); + DECLCALLBACKMEMBER(int, pfn07)(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); + DECLCALLBACKMEMBER(int, pfn08)(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); + DECLCALLBACKMEMBER(int, pfn09)(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); + DECLCALLBACKMEMBER(int, pfn10)(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); + DECLCALLBACKMEMBER(int, pfn11)(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); + DECLCALLBACKMEMBER(int, pfn12)(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); + DECLCALLBACKMEMBER(int, pfn13)(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); + DECLCALLBACKMEMBER(int, pfn14)(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); + DECLCALLBACKMEMBER(int, pfn15)(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); + } u; + u.pfn = pReq->u.Internal.pfn; +#ifdef RT_ARCH_AMD64 + switch (pReq->u.Internal.cArgs) + { + case 0: rcRet = u.pfn00(); break; + case 1: rcRet = u.pfn01(pauArgs[0]); break; + case 2: rcRet = u.pfn02(pauArgs[0], pauArgs[1]); break; + case 3: rcRet = u.pfn03(pauArgs[0], pauArgs[1], pauArgs[2]); break; + case 4: rcRet = u.pfn04(pauArgs[0], pauArgs[1], pauArgs[2], pauArgs[3]); break; + case 5: rcRet = u.pfn05(pauArgs[0], pauArgs[1], pauArgs[2], pauArgs[3], pauArgs[4]); break; + case 6: rcRet = u.pfn06(pauArgs[0], pauArgs[1], pauArgs[2], pauArgs[3], pauArgs[4], pauArgs[5]); break; + case 7: rcRet = u.pfn07(pauArgs[0], pauArgs[1], pauArgs[2], pauArgs[3], pauArgs[4], pauArgs[5], pauArgs[6]); break; + case 8: rcRet = u.pfn08(pauArgs[0], pauArgs[1], pauArgs[2], pauArgs[3], pauArgs[4], pauArgs[5], pauArgs[6], pauArgs[7]); break; + case 9: rcRet = u.pfn09(pauArgs[0], pauArgs[1], pauArgs[2], pauArgs[3], pauArgs[4], pauArgs[5], pauArgs[6], pauArgs[7], pauArgs[8]); break; + case 10: rcRet = u.pfn10(pauArgs[0], pauArgs[1], pauArgs[2], pauArgs[3], pauArgs[4], pauArgs[5], pauArgs[6], pauArgs[7], pauArgs[8], pauArgs[9]); break; + case 11: rcRet = u.pfn11(pauArgs[0], pauArgs[1], pauArgs[2], pauArgs[3], pauArgs[4], pauArgs[5], pauArgs[6], pauArgs[7], pauArgs[8], pauArgs[9], pauArgs[10]); break; + case 12: rcRet = u.pfn12(pauArgs[0], pauArgs[1], pauArgs[2], pauArgs[3], pauArgs[4], pauArgs[5], pauArgs[6], pauArgs[7], pauArgs[8], pauArgs[9], pauArgs[10], pauArgs[11]); break; + case 13: rcRet = u.pfn13(pauArgs[0], pauArgs[1], pauArgs[2], pauArgs[3], pauArgs[4], pauArgs[5], pauArgs[6], pauArgs[7], pauArgs[8], pauArgs[9], pauArgs[10], pauArgs[11], pauArgs[12]); break; + case 14: rcRet = u.pfn14(pauArgs[0], pauArgs[1], pauArgs[2], pauArgs[3], pauArgs[4], pauArgs[5], pauArgs[6], pauArgs[7], pauArgs[8], pauArgs[9], pauArgs[10], pauArgs[11], pauArgs[12], pauArgs[13]); break; + case 15: rcRet = u.pfn15(pauArgs[0], pauArgs[1], pauArgs[2], pauArgs[3], pauArgs[4], pauArgs[5], pauArgs[6], pauArgs[7], pauArgs[8], pauArgs[9], pauArgs[10], pauArgs[11], pauArgs[12], pauArgs[13], pauArgs[14]); break; + default: + AssertReleaseMsgFailed(("cArgs=%d\n", pReq->u.Internal.cArgs)); + rcRet = rcReq = VERR_VM_REQUEST_TOO_MANY_ARGS_IPE; + break; + } +#else /* x86: */ + size_t cbArgs = pReq->u.Internal.cArgs * sizeof(uintptr_t); +# ifdef __GNUC__ + __asm__ __volatile__("movl %%esp, %%edx\n\t" + "subl %2, %%esp\n\t" + "andl $0xfffffff0, %%esp\n\t" + "shrl $2, %2\n\t" + "movl %%esp, %%edi\n\t" + "rep movsl\n\t" + "movl %%edx, %%edi\n\t" + "call *%%eax\n\t" + "mov %%edi, %%esp\n\t" + : "=a" (rcRet), + "=S" (pauArgs), + "=c" (cbArgs) + : "0" (u.pfn), + "1" (pauArgs), + "2" (cbArgs) + : "edi", "edx"); +# else + __asm + { + xor edx, edx /* just mess it up. */ + mov eax, u.pfn + mov ecx, cbArgs + shr ecx, 2 + mov esi, pauArgs + mov ebx, esp + sub esp, cbArgs + and esp, 0xfffffff0 + mov edi, esp + rep movsd + call eax + mov esp, ebx + mov rcRet, eax + } +# endif +#endif /* x86 */ + if ((pReq->fFlags & (VMREQFLAGS_RETURN_MASK)) == VMREQFLAGS_VOID) + rcRet = VINF_SUCCESS; + rcReq = rcRet; + break; + } + + default: + AssertMsgFailed(("pReq->enmType=%d\n", pReq->enmType)); + rcReq = VERR_NOT_IMPLEMENTED; + break; + } + + /* + * Complete the request. + */ + pReq->iStatus = rcReq; + pReq->enmState = VMREQSTATE_COMPLETED; + if (pReq->fFlags & VMREQFLAGS_NO_WAIT) + { + /* Free the packet, nobody is waiting. */ + LogFlow(("vmR3ReqProcessOne: Completed request %p: rcReq=%Rrc rcRet=%Rrc - freeing it\n", + pReq, rcReq, rcRet)); + VMR3ReqFree(pReq); + } + else + { + /* Notify the waiter and him free up the packet. */ + LogFlow(("vmR3ReqProcessOne: Completed request %p: rcReq=%Rrc rcRet=%Rrc - notifying waiting thread\n", + pReq, rcReq, rcRet)); + ASMAtomicXchgSize(&pReq->fEventSemClear, false); + int rc2 = RTSemEventSignal(pReq->EventSem); + if (RT_FAILURE(rc2)) + { + AssertRC(rc2); + rcRet = rc2; + } + } + + return rcRet; +} + diff --git a/src/VBox/VMM/VMMR3/cpus/AMD_Athlon_64_3200.h b/src/VBox/VMM/VMMR3/cpus/AMD_Athlon_64_3200.h new file mode 100644 index 00000000..1784ffce --- /dev/null +++ b/src/VBox/VMM/VMMR3/cpus/AMD_Athlon_64_3200.h @@ -0,0 +1,224 @@ +/* $Id: AMD_Athlon_64_3200.h $ */ +/** @file + * CPU database entry "AMD Athlon 64 3200+". + * Generated at 2013-07-12T02:09:05Z by VBoxCpuReport v4.3.53r91376 on win.x86. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VBOX_CPUDB_AMD_Athlon_64_3200_h +#define VBOX_CPUDB_AMD_Athlon_64_3200_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + + +#ifndef CPUM_DB_STANDALONE +/** + * CPUID leaves for AMD Athlon(tm) 64 Processor 3200+. + */ +static CPUMCPUIDLEAF const g_aCpuIdLeaves_AMD_Athlon_64_3200[] = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x68747541, 0x444d4163, 0x69746e65, 0 }, + { 0x00000001, 0x00000000, 0x00000000, 0x00000f48, 0x00000800, 0x00000000, 0x078bfbff, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID | CPUMCPUIDLEAF_F_CONTAINS_APIC }, + { 0x80000000, 0x00000000, 0x00000000, 0x80000018, 0x68747541, 0x444d4163, 0x69746e65, 0 }, + { 0x80000001, 0x00000000, 0x00000000, 0x00000f48, 0x0000010a, 0x00000000, 0xe1d3fbff, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC }, + { 0x80000002, 0x00000000, 0x00000000, 0x20444d41, 0x6c687441, 0x74286e6f, 0x3620296d, 0 }, + { 0x80000003, 0x00000000, 0x00000000, 0x72502034, 0x7365636f, 0x20726f73, 0x30303233, 0 }, + { 0x80000004, 0x00000000, 0x00000000, 0x0000002b, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000005, 0x00000000, 0x00000000, 0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140, 0 }, + { 0x80000006, 0x00000000, 0x00000000, 0x00000000, 0x42004200, 0x04008140, 0x00000000, 0 }, + { 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000000f, 0 }, + { 0x80000008, 0x00000000, 0x00000000, 0x00003028, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000009, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000000a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000000b, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000000c, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000000d, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000000e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000010, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000011, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000012, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000013, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000014, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000015, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000016, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000017, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000018, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8fffffff, 0x00000000, 0x00000000, 0x53275449, 0x4d414820, 0x2052454d, 0x454d4954, 0 }, +}; +#endif /* !CPUM_DB_STANDALONE */ + + +#ifndef CPUM_DB_STANDALONE +/** + * MSR ranges for AMD Athlon(tm) 64 Processor 3200+. + */ +static CPUMMSRRANGE const g_aMsrRanges_AMD_Athlon_64_3200[] = +{ + MAL(0x00000000, "IA32_P5_MC_ADDR", 0x00000402), + MAL(0x00000001, "IA32_P5_MC_TYPE", 0x00000401), + MFN(0x00000010, "IA32_TIME_STAMP_COUNTER", Ia32TimestampCounter, Ia32TimestampCounter), /* value=0x28`4505cb65 */ + MFX(0x0000001b, "IA32_APIC_BASE", Ia32ApicBase, Ia32ApicBase, UINT32_C(0xfee00900), 0, UINT64_C(0xffffff00000006ff)), + MFX(0x0000002a, "EBL_CR_POWERON", IntelEblCrPowerOn, ReadOnly, 0, 0, 0), /* value=0x0 */ + MFO(0x0000008b, "AMD_K8_PATCH_LEVEL", AmdK8PatchLevel), /* value=0x39 */ + MFX(0x000000fe, "IA32_MTRRCAP", Ia32MtrrCap, ReadOnly, 0x508, 0, 0), /* value=0x508 */ + MFX(0x00000174, "IA32_SYSENTER_CS", Ia32SysEnterCs, Ia32SysEnterCs, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x8 */ + MFX(0x00000175, "IA32_SYSENTER_ESP", Ia32SysEnterEsp, Ia32SysEnterEsp, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x8059e000 */ + MFX(0x00000176, "IA32_SYSENTER_EIP", Ia32SysEnterEip, Ia32SysEnterEip, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x81872950 */ + MFX(0x00000179, "IA32_MCG_CAP", Ia32McgCap, ReadOnly, 0x105, 0, 0), /* value=0x105 */ + MFX(0x0000017a, "IA32_MCG_STATUS", Ia32McgStatus, Ia32McgStatus, 0, UINT64_C(0xfffffffffffffff8), 0), /* value=0x0 */ + MFX(0x0000017b, "IA32_MCG_CTL", Ia32McgCtl, Ia32McgCtl, 0, UINT64_C(0xffffffffffffffe0), 0), /* value=0x1f */ + MFX(0x000001d9, "IA32_DEBUGCTL", Ia32DebugCtl, Ia32DebugCtl, 0, UINT64_C(0xffffffffffffff80), 0x40), /* value=0x0 */ + MFO(0x000001db, "P6_LAST_BRANCH_FROM_IP", P6LastBranchFromIp), /* value=0xffffffed`bf1be178 */ + MFO(0x000001dc, "P6_LAST_BRANCH_TO_IP", P6LastBranchToIp), /* value=0xffff7f49`bf1bedec */ + MFO(0x000001dd, "P6_LAST_INT_FROM_IP", P6LastIntFromIp), /* value=0x0 */ + MFO(0x000001de, "P6_LAST_INT_TO_IP", P6LastIntToIp), /* value=0x0 */ + MFX(0x00000200, "IA32_MTRR_PHYS_BASE0", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x0, 0, UINT64_C(0xffffff0000000ff8)), /* value=0x6 */ + MFX(0x00000201, "IA32_MTRR_PHYS_MASK0", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x0, 0, UINT64_C(0xffffff00000007ff)), /* value=0xff`c0000800 */ + MFX(0x00000202, "IA32_MTRR_PHYS_BASE1", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x1, 0, UINT64_C(0xffffff0000000ff8)), /* value=0xf8000001 */ + MFX(0x00000203, "IA32_MTRR_PHYS_MASK1", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x1, 0, UINT64_C(0xffffff00000007ff)), /* value=0xff`fc000800 */ + MFX(0x00000204, "IA32_MTRR_PHYS_BASE2", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x2, 0, UINT64_C(0xffffff0000000ff8)), /* value=0x0 */ + MFX(0x00000205, "IA32_MTRR_PHYS_MASK2", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x2, 0, UINT64_C(0xffffff00000007ff)), /* value=0x0 */ + MFX(0x00000206, "IA32_MTRR_PHYS_BASE3", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x3, 0, UINT64_C(0xffffff0000000ff8)), /* value=0x0 */ + MFX(0x00000207, "IA32_MTRR_PHYS_MASK3", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x3, 0, UINT64_C(0xffffff00000007ff)), /* value=0x0 */ + MFX(0x00000208, "IA32_MTRR_PHYS_BASE4", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x4, 0, UINT64_C(0xffffff0000000ff8)), /* value=0x0 */ + MFX(0x00000209, "IA32_MTRR_PHYS_MASK4", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x4, 0, UINT64_C(0xffffff00000007ff)), /* value=0x0 */ + MFX(0x0000020a, "IA32_MTRR_PHYS_BASE5", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x5, 0, UINT64_C(0xffffff0000000ff8)), /* value=0x0 */ + MFX(0x0000020b, "IA32_MTRR_PHYS_MASK5", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x5, 0, UINT64_C(0xffffff00000007ff)), /* value=0x0 */ + MFX(0x0000020c, "IA32_MTRR_PHYS_BASE6", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x6, 0, UINT64_C(0xffffff0000000ff8)), /* value=0x0 */ + MFX(0x0000020d, "IA32_MTRR_PHYS_MASK6", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x6, 0, UINT64_C(0xffffff00000007ff)), /* value=0x0 */ + MFX(0x0000020e, "IA32_MTRR_PHYS_BASE7", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x7, 0, UINT64_C(0xffffff0000000ff8)), /* value=0x0 */ + MFX(0x0000020f, "IA32_MTRR_PHYS_MASK7", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x7, 0, UINT64_C(0xffffff00000007ff)), /* value=0x0 */ + MFS(0x00000250, "IA32_MTRR_FIX64K_00000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix64K_00000), + MFS(0x00000258, "IA32_MTRR_FIX16K_80000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_80000), + MFS(0x00000259, "IA32_MTRR_FIX16K_A0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_A0000), + MFS(0x00000268, "IA32_MTRR_FIX4K_C0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C0000), + MFS(0x00000269, "IA32_MTRR_FIX4K_C8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C8000), + MFS(0x0000026a, "IA32_MTRR_FIX4K_D0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D0000), + MFS(0x0000026b, "IA32_MTRR_FIX4K_D8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D8000), + MFS(0x0000026c, "IA32_MTRR_FIX4K_E0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E0000), + MFS(0x0000026d, "IA32_MTRR_FIX4K_E8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E8000), + MFS(0x0000026e, "IA32_MTRR_FIX4K_F0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F0000), + MFS(0x0000026f, "IA32_MTRR_FIX4K_F8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F8000), + MFS(0x00000277, "IA32_PAT", Ia32Pat, Ia32Pat, Guest.msrPAT), + MFZ(0x000002ff, "IA32_MTRR_DEF_TYPE", Ia32MtrrDefType, Ia32MtrrDefType, GuestMsrs.msr.MtrrDefType, 0, UINT64_C(0xfffffffffffff3f8)), + RFN(0x00000400, 0x00000413, "IA32_MCi_CTL_STATUS_ADDR_MISC", Ia32McCtlStatusAddrMiscN, Ia32McCtlStatusAddrMiscN), + MFX(0xc0000080, "AMD64_EFER", Amd64Efer, Amd64Efer, 0x800, 0xfe, UINT64_C(0xfffffffffffff200)), + MFN(0xc0000081, "AMD64_STAR", Amd64SyscallTarget, Amd64SyscallTarget), /* value=0x0 */ + MFN(0xc0000082, "AMD64_STAR64", Amd64LongSyscallTarget, Amd64LongSyscallTarget), /* value=0x0 */ + MFN(0xc0000083, "AMD64_STARCOMPAT", Amd64CompSyscallTarget, Amd64CompSyscallTarget), /* value=0x0 */ + MFX(0xc0000084, "AMD64_SYSCALL_FLAG_MASK", Amd64SyscallFlagMask, Amd64SyscallFlagMask, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFN(0xc0000100, "AMD64_FS_BASE", Amd64FsBase, Amd64FsBase), /* value=0x81913800 */ + MFN(0xc0000101, "AMD64_GS_BASE", Amd64GsBase, Amd64GsBase), /* value=0x0 */ + MFN(0xc0000102, "AMD64_KERNEL_GS_BASE", Amd64KernelGsBase, Amd64KernelGsBase), /* value=0x0 */ + RSN(0xc0010000, 0xc0010003, "AMD_K8_PERF_CTL_n", AmdK8PerfCtlN, AmdK8PerfCtlN, 0x0, UINT64_C(0xffffffff00200000), 0), + RSN(0xc0010004, 0xc0010007, "AMD_K8_PERF_CTR_n", AmdK8PerfCtrN, AmdK8PerfCtrN, 0x0, UINT64_C(0xffff000000000000), 0), + MFX(0xc0010010, "AMD_K8_SYS_CFG", AmdK8SysCfg, AmdK8SysCfg, 0x160601, UINT64_C(0xffffffffffc0f800), 0), /* value=0x160601 */ + MFX(0xc0010015, "AMD_K8_HW_CFG", AmdK8HwCr, AmdK8HwCr, 0xc000000, UINT64_C(0xffffffff3ff00000), 0), /* value=0xc000000 */ + MFW(0xc0010016, "AMD_K8_IORR_BASE_0", AmdK8IorrBaseN, AmdK8IorrBaseN, UINT64_C(0xffffff0000000fe7)), /* value=0x0 */ + MFW(0xc0010017, "AMD_K8_IORR_MASK_0", AmdK8IorrMaskN, AmdK8IorrMaskN, UINT64_C(0xffffff00000007ff)), /* value=0x0 */ + MFX(0xc0010018, "AMD_K8_IORR_BASE_1", AmdK8IorrBaseN, AmdK8IorrBaseN, 0x1, UINT64_C(0xffffff0000000fe7), 0), /* value=0xf8000018 */ + MFX(0xc0010019, "AMD_K8_IORR_MASK_1", AmdK8IorrMaskN, AmdK8IorrMaskN, 0x1, UINT64_C(0xffffff00000007ff), 0), /* value=0xff`fc000800 */ + MFW(0xc001001a, "AMD_K8_TOP_MEM", AmdK8TopOfMemN, AmdK8TopOfMemN, UINT64_C(0xffffff00007fffff)), /* value=0x40000000 */ + MFX(0xc001001d, "AMD_K8_TOP_MEM2", AmdK8TopOfMemN, AmdK8TopOfMemN, 0x1, UINT64_C(0xffffff00007fffff), 0), /* value=0x0 */ + MVI(0xc001001e, "AMD_K8_MANID", 0x20), + MFX(0xc001001f, "AMD_K8_NB_CFG1", AmdK8NbCfg1, AmdK8NbCfg1, 0, UINT64_C(0xffffff0000000000), 0), /* value=0x11`00000008 */ + MFN(0xc0010020, "AMD_K8_PATCH_LOADER", WriteOnly, AmdK8PatchLoader), + MVX(0xc0010021, "AMD_K8_UNK_c001_0021", 0, UINT64_C(0xfffffffe00000000), 0), + MFX(0xc0010022, "AMD_K8_MC_XCPT_REDIR", AmdK8McXcptRedir, AmdK8McXcptRedir, 0, UINT64_C(0xfffffffeffffffff), 0), /* value=0x0 */ + RFN(0xc0010030, 0xc0010035, "AMD_K8_CPU_NAME_n", AmdK8CpuNameN, AmdK8CpuNameN), + MFX(0xc001003e, "AMD_K8_HTC", AmdK8HwThermalCtrl, AmdK8HwThermalCtrl, 0, UINT64_MAX, 0), /* value=0x0 */ + MFI(0xc001003f, "AMD_K8_STC", AmdK8SwThermalCtrl), /* value=0x0 */ + MFX(0xc0010041, "AMD_K8_FIDVID_CTL", AmdK8FidVidControl, AmdK8FidVidControl, UINT64_C(0x4e200000000c), 0x33, UINT64_C(0xfff00000fffee0c0)), /* value=0x4e20`0000000c */ + MFX(0xc0010042, "AMD_K8_FIDVID_STATUS", AmdK8FidVidStatus, ReadOnly, UINT64_C(0x200000c0c0c), 0, 0), /* value=0x200`000c0c0c */ + MVO(0xc0010043, "AMD_K8_THERMTRIP_STATUS", 0x521020), + RFN(0xc0010044, 0xc0010048, "AMD_K8_MC_CTL_MASK_n", AmdK8McCtlMaskN, AmdK8McCtlMaskN), + RSN(0xc0010050, 0xc0010053, "AMD_K8_SMI_ON_IO_TRAP_n", AmdK8SmiOnIoTrapN, AmdK8SmiOnIoTrapN, 0x0, 0, UINT64_C(0x1f00000000000000)), + MFX(0xc0010054, "AMD_K8_SMI_ON_IO_TRAP_CTL_STS", AmdK8SmiOnIoTrapCtlSts, AmdK8SmiOnIoTrapCtlSts, 0, 0, UINT64_C(0xffffffffffff1f00)), /* value=0x0 */ + MFX(0xc0010111, "AMD_K8_SMM_BASE", AmdK8SmmBase, AmdK8SmmBase, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x98000 */ + MFX(0xc0010112, "AMD_K8_SMM_ADDR", AmdK8SmmAddr, AmdK8SmmAddr, 0, UINT64_C(0xffffff000001ffff), 0), /* value=0x0 */ + MFX(0xc0010113, "AMD_K8_SMM_MASK", AmdK8SmmMask, AmdK8SmmMask, 0, UINT64_C(0xffffff00000188c0), 0), /* value=0x1 */ + MVX(0xc0010114, "AMD_K8_UNK_c001_0114", 0, 0, UINT64_C(0xffffffffffffffe4)), + MVX(0xc0010115, "AMD_K8_UNK_c001_0115", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0xc0010116, "AMD_K8_UNK_c001_0116", 0, 0, UINT64_C(0xffff0000ffff0000)), + MVX(0xc0010117, "AMD_K8_UNK_c001_0117", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0xc0010118, "AMD_K8_UNK_c001_0118",0,0,0), + MVX(0xc0010119, "AMD_K8_UNK_c001_0119",0,0,0), + MVX(0xc001011a, "AMD_K8_UNK_c001_011a", 0, 0, UINT64_C(0xffffffff00000fff)), + MVX(0xc001011b, "AMD_K8_UNK_c001_011b", 0, 0, ~(uint64_t)UINT32_MAX), + MVX(0xc001011c, "AMD_K8_UNK_c001_011c", UINT32_C(0xdb1f5000), 0, UINT64_C(0xffffffff00000fff)), + MFX(0xc0011000, "AMD_K7_MCODE_CTL", AmdK7MicrocodeCtl, AmdK7MicrocodeCtl, 0, ~(uint64_t)UINT32_MAX, 0x204), /* value=0x0 */ + MFX(0xc0011001, "AMD_K7_APIC_CLUSTER_ID", AmdK7ClusterIdMaybe, AmdK7ClusterIdMaybe, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0xc0011004, "AMD_K8_CPUID_CTL_STD01", AmdK8CpuIdCtlStd01hEdcx, AmdK8CpuIdCtlStd01hEdcx, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x78bfbff */ + MFX(0xc0011005, "AMD_K8_CPUID_CTL_EXT01", AmdK8CpuIdCtlExt01hEdcx, AmdK8CpuIdCtlExt01hEdcx, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0xf1f3fbff */ + MFX(0xc0011006, "AMD_K7_DEBUG_STS?", AmdK7DebugStatusMaybe, AmdK7DebugStatusMaybe, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFN(0xc0011007, "AMD_K7_BH_TRACE_BASE?", AmdK7BHTraceBaseMaybe, AmdK7BHTraceBaseMaybe), /* value=0x0 */ + MFN(0xc0011008, "AMD_K7_BH_TRACE_PTR?", AmdK7BHTracePtrMaybe, AmdK7BHTracePtrMaybe), /* value=0x0 */ + MFN(0xc0011009, "AMD_K7_BH_TRACE_LIM?", AmdK7BHTraceLimitMaybe, AmdK7BHTraceLimitMaybe), /* value=0x0 */ + MFX(0xc001100a, "AMD_K7_HDT_CFG?", AmdK7HardwareDebugToolCfgMaybe, AmdK7HardwareDebugToolCfgMaybe, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0xc001100b, "AMD_K7_FAST_FLUSH_COUNT?", AmdK7FastFlushCountMaybe, AmdK7FastFlushCountMaybe, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x7c0 */ + MFX(0xc001100c, "AMD_K7_NODE_ID", AmdK7NodeId, AmdK7NodeId, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x20906 */ + MVX(0xc001100d, "AMD_K8_LOGICAL_CPUS_NUM?", 0x10a, 0, 0), + MVX(0xc001100e, "AMD_K8_WRMSR_BP?", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0xc001100f, "AMD_K8_WRMSR_BP_MASK?", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0xc0011010, "AMD_K8_BH_TRACE_CTL?", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0xc0011011, "AMD_K8_BH_TRACE_USRD?", 0, 0, 0), /* value=0xc0011011`00000283 */ + MVX(0xc0011014, "AMD_K8_XCPT_BP_RIP?", 0, 0, 0), + MVX(0xc0011015, "AMD_K8_XCPT_BP_RIP_MASK?", 0, 0, 0), + MVX(0xc0011016, "AMD_K8_COND_HDT_VAL?", 0, 0, 0), + MVX(0xc0011017, "AMD_K8_COND_HDT_VAL_MASK?", 0, 0, 0), + MVX(0xc0011018, "AMD_K8_XCPT_BP_CTL?", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0xc001101d, "AMD_K8_NB_BIST?", 0, UINT64_C(0xfffffffffc000000), 0), + MVI(0xc001101e, "AMD_K8_THERMTRIP_2?", 0x521020), /* Villain? */ + MVX(0xc001101f, "AMD_K8_NB_CFG?", UINT64_C(0x1100000008), UINT64_C(0xffffff0000000000), 0), + MFX(0xc0011020, "AMD_K7_LS_CFG", AmdK7LoadStoreCfg, AmdK7LoadStoreCfg, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x1000 */ + MFX(0xc0011021, "AMD_K7_IC_CFG", AmdK7InstrCacheCfg, AmdK7InstrCacheCfg, 0x800, ~(uint64_t)UINT32_MAX, 0), /* value=0x800 */ + MFX(0xc0011022, "AMD_K7_DC_CFG", AmdK7DataCacheCfg, AmdK7DataCacheCfg, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x24000008 */ + MFN(0xc0011023, "AMD_K7_BU_CFG", AmdK7BusUnitCfg, AmdK7BusUnitCfg), /* Villain? value=0x2020 */ + MFX(0xc0011024, "AMD_K7_DEBUG_CTL_2?", AmdK7DebugCtl2Maybe, AmdK7DebugCtl2Maybe, 0, UINT64_C(0xffffffffffffff00), 0), /* value=0x0 */ + MFN(0xc0011025, "AMD_K7_DR0_DATA_MATCH?", AmdK7Dr0DataMatchMaybe, AmdK7Dr0DataMatchMaybe), /* value=0x0 */ + MFN(0xc0011026, "AMD_K7_DR0_DATA_MATCH?", AmdK7Dr0DataMaskMaybe, AmdK7Dr0DataMaskMaybe), /* value=0x0 */ + MFX(0xc0011027, "AMD_K7_DR0_ADDR_MASK", AmdK7DrXAddrMaskN, AmdK7DrXAddrMaskN, 0x0, UINT64_C(0xfffffffffffff000), 0), /* value=0x0 */ +}; +#endif /* !CPUM_DB_STANDALONE */ + + +/** + * Database entry for AMD Athlon(tm) 64 Processor 3200+. + */ +static CPUMDBENTRY const g_Entry_AMD_Athlon_64_3200 = +{ + /*.pszName = */ "AMD Athlon 64 3200+", + /*.pszFullName = */ "AMD Athlon(tm) 64 Processor 3200+", + /*.enmVendor = */ CPUMCPUVENDOR_AMD, + /*.uFamily = */ 15, + /*.uModel = */ 4, + /*.uStepping = */ 8, + /*.enmMicroarch = */ kCpumMicroarch_AMD_K8_130nm, + /*.uScalableBusFreq = */ CPUM_SBUSFREQ_UNKNOWN, + /*.fFlags = */ 0, + /*.cMaxPhysAddrWidth= */ 40, + /*.fMxCsrMask = */ 0xffff, ///< @todo check. + /*.paCpuIdLeaves = */ NULL_ALONE(g_aCpuIdLeaves_AMD_Athlon_64_3200), + /*.cCpuIdLeaves = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_AMD_Athlon_64_3200)), + /*.enmUnknownCpuId = */ CPUMUNKNOWNCPUID_DEFAULTS, + /*.DefUnknownCpuId = */ { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + /*.fMsrMask = */ UINT32_MAX, + /*.cMsrRanges = */ ZERO_ALONE(RT_ELEMENTS(g_aMsrRanges_AMD_Athlon_64_3200)), + /*.paMsrRanges = */ NULL_ALONE(g_aMsrRanges_AMD_Athlon_64_3200), +}; + +#endif /* !VBOX_CPUDB_AMD_Athlon_64_3200_h */ + diff --git a/src/VBox/VMM/VMMR3/cpus/AMD_Athlon_64_X2_Dual_Core_4200.h b/src/VBox/VMM/VMMR3/cpus/AMD_Athlon_64_X2_Dual_Core_4200.h new file mode 100644 index 00000000..8f3f8704 --- /dev/null +++ b/src/VBox/VMM/VMMR3/cpus/AMD_Athlon_64_X2_Dual_Core_4200.h @@ -0,0 +1,232 @@ +/* $Id: AMD_Athlon_64_X2_Dual_Core_4200.h $ */ +/** @file + * CPU database entry "AMD Athlon 64 X2 Dual Core 4200+". + * Generated at 2014-02-28T15:19:16Z by VBoxCpuReport v4.3.53r92578 on linux.amd64 . + * . + * @remarks Possible that we're missing a few special MSRs due to no . + * magic register value capabilities in the linux hosted . + * MSR probing code. + * @todo Regenerate this using windows/whatever where we can get to the secret AMD MSRs. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VBOX_CPUDB_AMD_Athlon_64_X2_Dual_Core_4200_h +#define VBOX_CPUDB_AMD_Athlon_64_X2_Dual_Core_4200_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + + +#ifndef CPUM_DB_STANDALONE +/** + * CPUID leaves for AMD Athlon(tm) 64 X2 Dual Core Processor 4200+. + */ +static CPUMCPUIDLEAF const g_aCpuIdLeaves_AMD_Athlon_64_X2_Dual_Core_4200[] = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x68747541, 0x444d4163, 0x69746e65, 0 }, + { 0x00000001, 0x00000000, 0x00000000, 0x00040fb2, 0x01020800, 0x00002001, 0x178bfbff, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID | CPUMCPUIDLEAF_F_CONTAINS_APIC }, + { 0x80000000, 0x00000000, 0x00000000, 0x80000018, 0x68747541, 0x444d4163, 0x69746e65, 0 }, + { 0x80000001, 0x00000000, 0x00000000, 0x00040fb2, 0x000008d1, 0x0000001f, 0xebd3fbff, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC }, + { 0x80000002, 0x00000000, 0x00000000, 0x20444d41, 0x6c687441, 0x74286e6f, 0x3620296d, 0 }, + { 0x80000003, 0x00000000, 0x00000000, 0x32582034, 0x61754420, 0x6f43206c, 0x50206572, 0 }, + { 0x80000004, 0x00000000, 0x00000000, 0x65636f72, 0x726f7373, 0x30323420, 0x00002b30, 0 }, + { 0x80000005, 0x00000000, 0x00000000, 0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140, 0 }, + { 0x80000006, 0x00000000, 0x00000000, 0x00000000, 0x42004200, 0x02008140, 0x00000000, 0 }, + { 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000003f, 0 }, + { 0x80000008, 0x00000000, 0x00000000, 0x00003028, 0x00000000, 0x00000001, 0x00000000, 0 }, + { 0x80000009, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000000a, 0x00000000, 0x00000000, 0x00000001, 0x00000040, 0x00000000, 0x00000000, 0 }, + { 0x8000000b, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000000c, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000000d, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000000e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000010, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000011, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000012, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000013, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000014, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000015, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000016, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000017, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000018, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, +}; +#endif /* !CPUM_DB_STANDALONE */ + + +#ifndef CPUM_DB_STANDALONE +/** + * MSR ranges for AMD Athlon(tm) 64 X2 Dual Core Processor 4200+. + */ +static CPUMMSRRANGE const g_aMsrRanges_AMD_Athlon_64_X2_Dual_Core_4200[] = +{ + MAL(0x00000000, "IA32_P5_MC_ADDR", 0x00000402), + MAL(0x00000001, "IA32_P5_MC_TYPE", 0x00000401), + MFN(0x00000010, "IA32_TIME_STAMP_COUNTER", Ia32TimestampCounter, Ia32TimestampCounter), /* value=0x7e`171166b8 */ + MFX(0x0000001b, "IA32_APIC_BASE", Ia32ApicBase, Ia32ApicBase, UINT32_C(0xfee00800), 0, UINT64_C(0xffffff00000006ff)), + MFX(0x0000002a, "EBL_CR_POWERON", IntelEblCrPowerOn, ReadOnly, 0, 0, 0), /* value=0x0 */ + MFO(0x0000008b, "AMD_K8_PATCH_LEVEL", AmdK8PatchLevel), /* value=0x0 */ + MFX(0x000000fe, "IA32_MTRRCAP", Ia32MtrrCap, ReadOnly, 0x508, 0, 0), /* value=0x508 */ + MFX(0x00000174, "IA32_SYSENTER_CS", Ia32SysEnterCs, Ia32SysEnterCs, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x10 */ + MFX(0x00000175, "IA32_SYSENTER_ESP", Ia32SysEnterEsp, Ia32SysEnterEsp, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0x00000176, "IA32_SYSENTER_EIP", Ia32SysEnterEip, Ia32SysEnterEip, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x8103ca80 */ + MFX(0x00000179, "IA32_MCG_CAP", Ia32McgCap, ReadOnly, 0x105, 0, 0), /* value=0x105 */ + MFX(0x0000017a, "IA32_MCG_STATUS", Ia32McgStatus, Ia32McgStatus, 0, UINT64_C(0xfffffffffffffff8), 0), /* value=0x0 */ + MFX(0x0000017b, "IA32_MCG_CTL", Ia32McgCtl, Ia32McgCtl, 0, UINT64_C(0xffffffffffffffe0), 0), /* value=0x1f */ + MFX(0x000001d9, "IA32_DEBUGCTL", Ia32DebugCtl, Ia32DebugCtl, 0, UINT64_C(0xffffffffffffff80), 0x40), /* value=0x0 */ + MFO(0x000001db, "P6_LAST_BRANCH_FROM_IP", P6LastBranchFromIp), /* value=0xffffffff`a0425995 */ + MFO(0x000001dc, "P6_LAST_BRANCH_TO_IP", P6LastBranchToIp), /* value=0xffffffff`8103124a */ + MFO(0x000001dd, "P6_LAST_INT_FROM_IP", P6LastIntFromIp), /* value=0x0 */ + MFO(0x000001de, "P6_LAST_INT_TO_IP", P6LastIntToIp), /* value=0x0 */ + MFX(0x00000200, "IA32_MTRR_PHYS_BASE0", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x0, 0, UINT64_C(0xffffff0000000ff8)), /* value=0x6 */ + MFX(0x00000201, "IA32_MTRR_PHYS_MASK0", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x0, 0, UINT64_C(0xffffff00000007ff)), /* value=0xff`80000800 */ + MFX(0x00000202, "IA32_MTRR_PHYS_BASE1", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x1, 0, UINT64_C(0xffffff0000000ff8)), /* value=0x80000006 */ + MFX(0x00000203, "IA32_MTRR_PHYS_MASK1", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x1, 0, UINT64_C(0xffffff00000007ff)), /* value=0xff`c0000800 */ + MFX(0x00000204, "IA32_MTRR_PHYS_BASE2", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x2, 0, UINT64_C(0xffffff0000000ff8)), /* value=0xf8000001 */ + MFX(0x00000205, "IA32_MTRR_PHYS_MASK2", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x2, 0, UINT64_C(0xffffff00000007ff)), /* value=0xff`ff000800 */ + MFX(0x00000206, "IA32_MTRR_PHYS_BASE3", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x3, 0, UINT64_C(0xffffff0000000ff8)), /* value=0x0 */ + MFX(0x00000207, "IA32_MTRR_PHYS_MASK3", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x3, 0, UINT64_C(0xffffff00000007ff)), /* value=0x0 */ + MFX(0x00000208, "IA32_MTRR_PHYS_BASE4", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x4, 0, UINT64_C(0xffffff0000000ff8)), /* value=0x0 */ + MFX(0x00000209, "IA32_MTRR_PHYS_MASK4", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x4, 0, UINT64_C(0xffffff00000007ff)), /* value=0x0 */ + MFX(0x0000020a, "IA32_MTRR_PHYS_BASE5", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x5, 0, UINT64_C(0xffffff0000000ff8)), /* value=0x0 */ + MFX(0x0000020b, "IA32_MTRR_PHYS_MASK5", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x5, 0, UINT64_C(0xffffff00000007ff)), /* value=0x0 */ + MFX(0x0000020c, "IA32_MTRR_PHYS_BASE6", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x6, 0, UINT64_C(0xffffff0000000ff8)), /* value=0x0 */ + MFX(0x0000020d, "IA32_MTRR_PHYS_MASK6", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x6, 0, UINT64_C(0xffffff00000007ff)), /* value=0x0 */ + MFX(0x0000020e, "IA32_MTRR_PHYS_BASE7", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x7, 0, UINT64_C(0xffffff0000000ff8)), /* value=0x0 */ + MFX(0x0000020f, "IA32_MTRR_PHYS_MASK7", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x7, 0, UINT64_C(0xffffff00000007ff)), /* value=0x0 */ + MFS(0x00000250, "IA32_MTRR_FIX64K_00000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix64K_00000), + MFS(0x00000258, "IA32_MTRR_FIX16K_80000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_80000), + MFS(0x00000259, "IA32_MTRR_FIX16K_A0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_A0000), + MFS(0x00000268, "IA32_MTRR_FIX4K_C0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C0000), + MFS(0x00000269, "IA32_MTRR_FIX4K_C8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C8000), + MFS(0x0000026a, "IA32_MTRR_FIX4K_D0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D0000), + MFS(0x0000026b, "IA32_MTRR_FIX4K_D8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D8000), + MFS(0x0000026c, "IA32_MTRR_FIX4K_E0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E0000), + MFS(0x0000026d, "IA32_MTRR_FIX4K_E8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E8000), + MFS(0x0000026e, "IA32_MTRR_FIX4K_F0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F0000), + MFS(0x0000026f, "IA32_MTRR_FIX4K_F8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F8000), + MFS(0x00000277, "IA32_PAT", Ia32Pat, Ia32Pat, Guest.msrPAT), + MFZ(0x000002ff, "IA32_MTRR_DEF_TYPE", Ia32MtrrDefType, Ia32MtrrDefType, GuestMsrs.msr.MtrrDefType, 0, UINT64_C(0xfffffffffffff3f8)), + RFN(0x00000400, 0x00000413, "IA32_MCi_CTL_STATUS_ADDR_MISC", Ia32McCtlStatusAddrMiscN, Ia32McCtlStatusAddrMiscN), + MFX(0xc0000080, "AMD64_EFER", Amd64Efer, Amd64Efer, 0xd01, 0xfe, UINT64_C(0xffffffffffff8200)), + MFN(0xc0000081, "AMD64_STAR", Amd64SyscallTarget, Amd64SyscallTarget), /* value=0x230010`00000000 */ + MFN(0xc0000082, "AMD64_STAR64", Amd64LongSyscallTarget, Amd64LongSyscallTarget), /* value=0xffffffff`81011d20 */ + MFN(0xc0000083, "AMD64_STARCOMPAT", Amd64CompSyscallTarget, Amd64CompSyscallTarget), /* value=0xffffffff`8103ccb0 */ + MFX(0xc0000084, "AMD64_SYSCALL_FLAG_MASK", Amd64SyscallFlagMask, Amd64SyscallFlagMask, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x3700 */ + MFN(0xc0000100, "AMD64_FS_BASE", Amd64FsBase, Amd64FsBase), /* value=0x1da4880 */ + MFN(0xc0000101, "AMD64_GS_BASE", Amd64GsBase, Amd64GsBase), /* value=0xffff8800`28300000 */ + MFN(0xc0000102, "AMD64_KERNEL_GS_BASE", Amd64KernelGsBase, Amd64KernelGsBase), /* value=0x0 */ + MFX(0xc0000103, "AMD64_TSC_AUX", Amd64TscAux, Amd64TscAux, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x1 */ + RSN(0xc0010000, 0xc0010003, "AMD_K8_PERF_CTL_n", AmdK8PerfCtlN, AmdK8PerfCtlN, 0x0, UINT64_C(0xffffffff00200000), 0), + RSN(0xc0010004, 0xc0010007, "AMD_K8_PERF_CTR_n", AmdK8PerfCtrN, AmdK8PerfCtrN, 0x0, UINT64_C(0xffff000000000000), 0), + MFX(0xc0010010, "AMD_K8_SYS_CFG", AmdK8SysCfg, AmdK8SysCfg, 0x760601, UINT64_C(0xffffffffff80f800), 0), /* value=0x760601 */ + MFX(0xc0010015, "AMD_K8_HW_CFG", AmdK8HwCr, AmdK8HwCr, 0x2000060, UINT64_C(0xffffffff3ff00020), 0), /* value=0x2000060 */ + MFW(0xc0010016, "AMD_K8_IORR_BASE_0", AmdK8IorrBaseN, AmdK8IorrBaseN, UINT64_C(0xffffff0000000fe7)), /* value=0xa30000 */ + MFW(0xc0010017, "AMD_K8_IORR_MASK_0", AmdK8IorrMaskN, AmdK8IorrMaskN, UINT64_C(0xffffff00000007ff)), /* value=0x0 */ + MFX(0xc0010018, "AMD_K8_IORR_BASE_1", AmdK8IorrBaseN, AmdK8IorrBaseN, 0x1, UINT64_C(0xffffff0000000fe7), 0), /* value=0x0 */ + MFX(0xc0010019, "AMD_K8_IORR_MASK_1", AmdK8IorrMaskN, AmdK8IorrMaskN, 0x1, UINT64_C(0xffffff00000007ff), 0), /* value=0x0 */ + MFW(0xc001001a, "AMD_K8_TOP_MEM", AmdK8TopOfMemN, AmdK8TopOfMemN, UINT64_C(0xffffff00007fffff)), /* value=0xc0000000 */ + MFX(0xc001001d, "AMD_K8_TOP_MEM2", AmdK8TopOfMemN, AmdK8TopOfMemN, 0x1, UINT64_C(0xffffff00007fffff), 0), /* value=0x1`40000000 */ + MVI(0xc001001e, "AMD_K8_MANID", 0x52), + MFX(0xc001001f, "AMD_K8_NB_CFG1", AmdK8NbCfg1, AmdK8NbCfg1, 0, UINT64_C(0x3fbf000000000000), 0), /* value=0x400001`00100008 */ + MFN(0xc0010020, "AMD_K8_PATCH_LOADER", WriteOnly, AmdK8PatchLoader), + MFN(0xc0010021, "AMD_K8_UNK_c001_0021", WriteOnly, IgnoreWrite), + RFN(0xc0010030, 0xc0010035, "AMD_K8_CPU_NAME_n", AmdK8CpuNameN, AmdK8CpuNameN), + MFX(0xc001003e, "AMD_K8_HTC", AmdK8HwThermalCtrl, AmdK8HwThermalCtrl, 0, UINT64_C(0xfffffffff0e088fc), 0), /* value=0x0 */ + MFX(0xc001003f, "AMD_K8_STC", AmdK8SwThermalCtrl, AmdK8SwThermalCtrl, 0, UINT64_C(0xfffffffff0e088e0), 0), /* value=0x0 */ + MFX(0xc0010041, "AMD_K8_FIDVID_CTL", AmdK8FidVidControl, AmdK8FidVidControl, UINT64_C(0x100001202), 0xc31, UINT64_C(0xfff00000fffec0c0)), /* value=0x1`00001202 */ + MFX(0xc0010042, "AMD_K8_FIDVID_STATUS", AmdK8FidVidStatus, ReadOnly, UINT64_C(0x310c12120c0e0202), 0, 0), /* value=0x310c1212`0c0e0202 */ + MVO(0xc0010043, "AMD_K8_THERMTRIP_STATUS", 0x4e1a24), + RFN(0xc0010044, 0xc0010048, "AMD_K8_MC_CTL_MASK_n", AmdK8McCtlMaskN, AmdK8McCtlMaskN), + RSN(0xc0010050, 0xc0010053, "AMD_K8_SMI_ON_IO_TRAP_n", AmdK8SmiOnIoTrapN, AmdK8SmiOnIoTrapN, 0x0, 0, UINT64_C(0x1f00000000000000)), + MFX(0xc0010054, "AMD_K8_SMI_ON_IO_TRAP_CTL_STS", AmdK8SmiOnIoTrapCtlSts, AmdK8SmiOnIoTrapCtlSts, 0, ~(uint64_t)UINT32_MAX, UINT32_C(0xffff1f00)), /* value=0x0 */ + MFX(0xc0010055, "AMD_K8_INT_PENDING_MSG", AmdK8IntPendingMessage, AmdK8IntPendingMessage, 0, ~(uint64_t)UINT32_MAX, UINT32_C(0xe0000000)), /* value=0x3000000 */ + MVO(0xc0010060, "AMD_K8_BIST_RESULT", 0), + MFX(0xc0010111, "AMD_K8_SMM_BASE", AmdK8SmmBase, AmdK8SmmBase, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x98200 */ + MFX(0xc0010112, "AMD_K8_SMM_ADDR", AmdK8SmmAddr, AmdK8SmmAddr, 0, UINT64_C(0xffffff000001ffff), 0), /* value=0x0 */ + MFX(0xc0010113, "AMD_K8_SMM_MASK", AmdK8SmmMask, AmdK8SmmMask, 0, UINT64_C(0xffffff00000188c0), 0), /* value=0x1 */ + MFX(0xc0010114, "AMD_K8_VM_CR", AmdK8VmCr, AmdK8VmCr, 0, ~(uint64_t)UINT32_MAX, UINT32_C(0xffffffe0)), /* value=0x0 */ + MFX(0xc0010115, "AMD_K8_IGNNE", AmdK8IgnNe, AmdK8IgnNe, 0, ~(uint64_t)UINT32_MAX, UINT32_C(0xfffffffe)), /* value=0x0 */ + MFN(0xc0010116, "AMD_K8_SMM_CTL", WriteOnly, AmdK8SmmCtl), + MFX(0xc0010117, "AMD_K8_VM_HSAVE_PA", AmdK8VmHSavePa, AmdK8VmHSavePa, 0, 0, UINT64_C(0xffffff0000000fff)), /* value=0x0 */ + + /* Copy & paste from the AMD_Athlon_64_3200 (130nm) profile: */ + MVX(0xc0010118, "AMD_K8_UNK_c001_0118",0,0,0), + MVX(0xc0010119, "AMD_K8_UNK_c001_0119",0,0,0), + MVX(0xc001011a, "AMD_K8_UNK_c001_011a", 0, 0, UINT64_C(0xffffffff00000fff)), + MVX(0xc001011b, "AMD_K8_UNK_c001_011b", 0, 0, ~(uint64_t)UINT32_MAX), + MVX(0xc001011c, "AMD_K8_UNK_c001_011c", UINT32_C(0xdb1f5000), 0, UINT64_C(0xffffffff00000fff)), + MFX(0xc0011000, "AMD_K7_MCODE_CTL", AmdK7MicrocodeCtl, AmdK7MicrocodeCtl, 0, ~(uint64_t)UINT32_MAX, 0x204), /* value=0x0 */ + MFX(0xc0011001, "AMD_K7_APIC_CLUSTER_ID", AmdK7ClusterIdMaybe, AmdK7ClusterIdMaybe, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0xc0011004, "AMD_K8_CPUID_CTL_STD01", AmdK8CpuIdCtlStd01hEdcx, AmdK8CpuIdCtlStd01hEdcx, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x78bfbff */ + MFX(0xc0011005, "AMD_K8_CPUID_CTL_EXT01", AmdK8CpuIdCtlExt01hEdcx, AmdK8CpuIdCtlExt01hEdcx, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0xf1f3fbff */ + MFX(0xc0011006, "AMD_K7_DEBUG_STS?", AmdK7DebugStatusMaybe, AmdK7DebugStatusMaybe, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFN(0xc0011007, "AMD_K7_BH_TRACE_BASE?", AmdK7BHTraceBaseMaybe, AmdK7BHTraceBaseMaybe), /* value=0x0 */ + MFN(0xc0011008, "AMD_K7_BH_TRACE_PTR?", AmdK7BHTracePtrMaybe, AmdK7BHTracePtrMaybe), /* value=0x0 */ + MFN(0xc0011009, "AMD_K7_BH_TRACE_LIM?", AmdK7BHTraceLimitMaybe, AmdK7BHTraceLimitMaybe), /* value=0x0 */ + MFX(0xc001100a, "AMD_K7_HDT_CFG?", AmdK7HardwareDebugToolCfgMaybe, AmdK7HardwareDebugToolCfgMaybe, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0xc001100b, "AMD_K7_FAST_FLUSH_COUNT?", AmdK7FastFlushCountMaybe, AmdK7FastFlushCountMaybe, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x7c0 */ + MFX(0xc001100c, "AMD_K7_NODE_ID", AmdK7NodeId, AmdK7NodeId, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x20906 */ + MVX(0xc001100d, "AMD_K8_LOGICAL_CPUS_NUM?", 0x10a, 0, 0), + MVX(0xc001100e, "AMD_K8_WRMSR_BP?", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0xc001100f, "AMD_K8_WRMSR_BP_MASK?", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0xc0011010, "AMD_K8_BH_TRACE_CTL?", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0xc0011011, "AMD_K8_BH_TRACE_USRD?", 0, 0, 0), /* value=0xc0011011`00000283 */ + MVX(0xc0011014, "AMD_K8_XCPT_BP_RIP?", 0, 0, 0), + MVX(0xc0011015, "AMD_K8_XCPT_BP_RIP_MASK?", 0, 0, 0), + MVX(0xc0011016, "AMD_K8_COND_HDT_VAL?", 0, 0, 0), + MVX(0xc0011017, "AMD_K8_COND_HDT_VAL_MASK?", 0, 0, 0), + MVX(0xc0011018, "AMD_K8_XCPT_BP_CTL?", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0xc001101d, "AMD_K8_NB_BIST?", 0, UINT64_C(0xfffffffffc000000), 0), + MVI(0xc001101e, "AMD_K8_THERMTRIP_2?", 0x521020), /* Villain? */ + MVX(0xc001101f, "AMD_K8_NB_CFG?", UINT64_C(0x1100000008), UINT64_C(0xffffff0000000000), 0), + MFX(0xc0011020, "AMD_K7_LS_CFG", AmdK7LoadStoreCfg, AmdK7LoadStoreCfg, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x1000 */ + MFX(0xc0011021, "AMD_K7_IC_CFG", AmdK7InstrCacheCfg, AmdK7InstrCacheCfg, 0x800, ~(uint64_t)UINT32_MAX, 0), /* value=0x800 */ + MFX(0xc0011022, "AMD_K7_DC_CFG", AmdK7DataCacheCfg, AmdK7DataCacheCfg, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x24000008 */ + MFN(0xc0011023, "AMD_K7_BU_CFG", AmdK7BusUnitCfg, AmdK7BusUnitCfg), /* Villain? value=0x2020 */ + MFX(0xc0011024, "AMD_K7_DEBUG_CTL_2?", AmdK7DebugCtl2Maybe, AmdK7DebugCtl2Maybe, 0, UINT64_C(0xffffffffffffff00), 0), /* value=0x0 */ + MFN(0xc0011025, "AMD_K7_DR0_DATA_MATCH?", AmdK7Dr0DataMatchMaybe, AmdK7Dr0DataMatchMaybe), /* value=0x0 */ + MFN(0xc0011026, "AMD_K7_DR0_DATA_MATCH?", AmdK7Dr0DataMaskMaybe, AmdK7Dr0DataMaskMaybe), /* value=0x0 */ + MFX(0xc0011027, "AMD_K7_DR0_ADDR_MASK", AmdK7DrXAddrMaskN, AmdK7DrXAddrMaskN, 0x0, UINT64_C(0xfffffffffffff000), 0), /* value=0x0 */ +}; +#endif /* !CPUM_DB_STANDALONE */ + + +/** + * Database entry for AMD Athlon(tm) 64 X2 Dual Core Processor 4200+. + */ +static CPUMDBENTRY const g_Entry_AMD_Athlon_64_X2_Dual_Core_4200 = +{ + /*.pszName = */ "AMD Athlon 64 X2 Dual Core 4200+", + /*.pszFullName = */ "AMD Athlon(tm) 64 X2 Dual Core Processor 4200+", + /*.enmVendor = */ CPUMCPUVENDOR_AMD, + /*.uFamily = */ 15, + /*.uModel = */ 75, + /*.uStepping = */ 2, + /*.enmMicroarch = */ kCpumMicroarch_AMD_K8_90nm_AMDV, + /*.uScalableBusFreq = */ CPUM_SBUSFREQ_UNKNOWN, + /*.fFlags = */ 0, + /*.cMaxPhysAddrWidth= */ 40, + /*.fMxCsrMask = */ 0xffff, + /*.paCpuIdLeaves = */ NULL_ALONE(g_aCpuIdLeaves_AMD_Athlon_64_X2_Dual_Core_4200), + /*.cCpuIdLeaves = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_AMD_Athlon_64_X2_Dual_Core_4200)), + /*.enmUnknownCpuId = */ CPUMUNKNOWNCPUID_DEFAULTS, + /*.DefUnknownCpuId = */ { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + /*.fMsrMask = */ UINT32_MAX, + /*.cMsrRanges = */ ZERO_ALONE(RT_ELEMENTS(g_aMsrRanges_AMD_Athlon_64_X2_Dual_Core_4200)), + /*.paMsrRanges = */ NULL_ALONE(g_aMsrRanges_AMD_Athlon_64_X2_Dual_Core_4200), +}; + +#endif /* !VBOX_CPUDB_AMD_Athlon_64_X2_Dual_Core_4200_h */ + diff --git a/src/VBox/VMM/VMMR3/cpus/AMD_FX_8150_Eight_Core.h b/src/VBox/VMM/VMMR3/cpus/AMD_FX_8150_Eight_Core.h new file mode 100644 index 00000000..9c1a767b --- /dev/null +++ b/src/VBox/VMM/VMMR3/cpus/AMD_FX_8150_Eight_Core.h @@ -0,0 +1,383 @@ +/* $Id: AMD_FX_8150_Eight_Core.h $ */ +/** @file + * CPU database entry "AMD FX-8150 Eight-Core". + * Generated at 2013-12-09T11:27:04Z by VBoxCpuReport v4.3.51r91084 on win.amd64. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VBOX_CPUDB_AMD_FX_8150_Eight_Core_h +#define VBOX_CPUDB_AMD_FX_8150_Eight_Core_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + + +#ifndef CPUM_DB_STANDALONE +/** + * CPUID leaves for AMD FX(tm)-8150 Eight-Core Processor. + */ +static CPUMCPUIDLEAF const g_aCpuIdLeaves_AMD_FX_8150_Eight_Core[] = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x0000000d, 0x68747541, 0x444d4163, 0x69746e65, 0 }, + { 0x00000001, 0x00000000, 0x00000000, 0x00600f12, 0x02080800, 0x1e98220b, 0x178bfbff, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID | CPUMCPUIDLEAF_F_CONTAINS_APIC }, + { 0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000005, 0x00000000, 0x00000000, 0x00000040, 0x00000040, 0x00000003, 0x00000000, 0 }, + { 0x00000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0 }, + { 0x00000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000009, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000b, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000c, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000d, 0x00000000, UINT32_MAX, 0x00000007, 0x00000340, 0x000003c0, 0x40000000, 0 }, + { 0x0000000d, 0x00000001, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000000, 0x00000000, 0x00000000, 0x8000001e, 0x68747541, 0x444d4163, 0x69746e65, 0 }, + { 0x80000001, 0x00000000, 0x00000000, 0x00600f12, 0x10000000, 0x01c9bfff, 0x2fd3fbff, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC }, + { 0x80000002, 0x00000000, 0x00000000, 0x20444d41, 0x74285846, 0x382d296d, 0x20303531, 0 }, + { 0x80000003, 0x00000000, 0x00000000, 0x68676945, 0x6f432d74, 0x50206572, 0x65636f72, 0 }, + { 0x80000004, 0x00000000, 0x00000000, 0x726f7373, 0x20202020, 0x20202020, 0x00202020, 0 }, + { 0x80000005, 0x00000000, 0x00000000, 0xff20ff18, 0xff20ff30, 0x10040140, 0x40020140, 0 }, + { 0x80000006, 0x00000000, 0x00000000, 0x64000000, 0x64004200, 0x08008140, 0x0040c140, 0 }, + { 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000003d9, 0 }, + { 0x80000008, 0x00000000, 0x00000000, 0x00003030, 0x00000000, 0x00004007, 0x00000000, 0 }, + { 0x80000009, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000000a, 0x00000000, 0x00000000, 0x00000001, 0x00010000, 0x00000000, 0x000014ff, 0 }, + { 0x8000000b, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000000c, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000000d, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000000e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000010, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000011, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000012, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000013, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000014, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000015, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000016, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000017, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000018, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000019, 0x00000000, 0x00000000, 0xf020f018, 0x64000000, 0x00000000, 0x00000000, 0 }, + { 0x8000001a, 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000001b, 0x00000000, 0x00000000, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000001c, 0x00000000, 0x00000000, 0x00000000, 0x80032013, 0x00010200, 0x8000000f, 0 }, + { 0x8000001d, 0x00000000, UINT32_MAX, 0x00000121, 0x00c0003f, 0x0000003f, 0x00000000, 0 }, + { 0x8000001d, 0x00000001, UINT32_MAX, 0x00004122, 0x0040003f, 0x000001ff, 0x00000000, 0 }, + { 0x8000001d, 0x00000002, UINT32_MAX, 0x00004143, 0x03c0003f, 0x000007ff, 0x00000001, 0 }, + { 0x8000001d, 0x00000003, UINT32_MAX, 0x0001c163, 0x0fc0003f, 0x000007ff, 0x00000001, 0 }, + { 0x8000001d, 0x00000004, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000001e, 0x00000000, 0x00000000, 0x00000012, 0x00000101, 0x00000000, 0x00000000, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID }, +}; +#endif /* !CPUM_DB_STANDALONE */ + + +#ifndef CPUM_DB_STANDALONE +/** + * MSR ranges for AMD FX(tm)-8150 Eight-Core Processor. + */ +static CPUMMSRRANGE const g_aMsrRanges_AMD_FX_8150_Eight_Core[] = +{ + MAL(0x00000000, "IA32_P5_MC_ADDR", 0x00000402), + MAL(0x00000001, "IA32_P5_MC_TYPE", 0x00000401), + MFN(0x00000010, "IA32_TIME_STAMP_COUNTER", Ia32TimestampCounter, Ia32TimestampCounter), + MFX(0x0000001b, "IA32_APIC_BASE", Ia32ApicBase, Ia32ApicBase, UINT32_C(0xfee00800), 0, UINT64_C(0xffff0000000006ff)), + MFX(0x0000002a, "EBL_CR_POWERON", IntelEblCrPowerOn, ReadOnly, 0, 0, 0), /* value=0x0 */ + MVO(0x0000008b, "BBL_CR_D3|BIOS_SIGN", 0x6000626), + MFN(0x000000e7, "IA32_MPERF", Ia32MPerf, Ia32MPerf), + MFN(0x000000e8, "IA32_APERF", Ia32APerf, Ia32APerf), + MFX(0x000000fe, "IA32_MTRRCAP", Ia32MtrrCap, ReadOnly, 0x508, 0, 0), /* value=0x508 */ + MFX(0x00000174, "IA32_SYSENTER_CS", Ia32SysEnterCs, Ia32SysEnterCs, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0x00000175, "IA32_SYSENTER_ESP", Ia32SysEnterEsp, Ia32SysEnterEsp, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0x00000176, "IA32_SYSENTER_EIP", Ia32SysEnterEip, Ia32SysEnterEip, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0x00000179, "IA32_MCG_CAP", Ia32McgCap, ReadOnly, 0x107, 0, 0), /* value=0x107 */ + MFX(0x0000017a, "IA32_MCG_STATUS", Ia32McgStatus, Ia32McgStatus, 0, UINT64_C(0xfffffffffffffff8), 0), /* value=0x0 */ + MFX(0x0000017b, "IA32_MCG_CTL", Ia32McgCtl, Ia32McgCtl, 0, UINT64_C(0xffffffffffffff88), 0), /* value=0x77 */ + MFX(0x000001d9, "IA32_DEBUGCTL", Ia32DebugCtl, Ia32DebugCtl, 0, UINT64_C(0xffffffffffffff80), 0x40), /* value=0x0 */ + MFO(0x000001db, "P6_LAST_BRANCH_FROM_IP", P6LastBranchFromIp), /* value=0x0 */ + MFO(0x000001dc, "P6_LAST_BRANCH_TO_IP", P6LastBranchToIp), /* value=0x0 */ + MFO(0x000001dd, "P6_LAST_INT_FROM_IP", P6LastIntFromIp), /* value=0x0 */ + MFO(0x000001de, "P6_LAST_INT_TO_IP", P6LastIntToIp), /* value=0x0 */ + MFX(0x00000200, "IA32_MTRR_PHYS_BASE0", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x0, 0, UINT64_C(0xffff000000000ff8)), /* value=0x6 */ + MFX(0x00000201, "IA32_MTRR_PHYS_MASK0", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x0, 0, UINT64_C(0xffff0000000007ff)), /* value=0xffff`80000800 */ + MFX(0x00000202, "IA32_MTRR_PHYS_BASE1", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x1, 0, UINT64_C(0xffff000000000ff8)), /* value=0x80000006 */ + MFX(0x00000203, "IA32_MTRR_PHYS_MASK1", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x1, 0, UINT64_C(0xffff0000000007ff)), /* value=0xffff`c0000800 */ + MFX(0x00000204, "IA32_MTRR_PHYS_BASE2", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x2, 0, UINT64_C(0xffff000000000ff8)), /* value=0xc0000006 */ + MFX(0x00000205, "IA32_MTRR_PHYS_MASK2", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x2, 0, UINT64_C(0xffff0000000007ff)), /* value=0xffff`f0000800 */ + MFX(0x00000206, "IA32_MTRR_PHYS_BASE3", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x3, 0, UINT64_C(0xffff000000000ff8)), /* value=0xcdf00000 */ + MFX(0x00000207, "IA32_MTRR_PHYS_MASK3", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x3, 0, UINT64_C(0xffff0000000007ff)), /* value=0xffff`fff00800 */ + MFX(0x00000208, "IA32_MTRR_PHYS_BASE4", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x4, 0, UINT64_C(0xffff000000000ff8)), /* value=0xce000000 */ + MFX(0x00000209, "IA32_MTRR_PHYS_MASK4", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x4, 0, UINT64_C(0xffff0000000007ff)), /* value=0xffff`fe000800 */ + MFX(0x0000020a, "IA32_MTRR_PHYS_BASE5", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x5, 0, UINT64_C(0xffff000000000ff8)), /* value=0x0 */ + MFX(0x0000020b, "IA32_MTRR_PHYS_MASK5", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x5, 0, UINT64_C(0xffff0000000007ff)), /* value=0x0 */ + MFX(0x0000020c, "IA32_MTRR_PHYS_BASE6", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x6, 0, UINT64_C(0xffff000000000ff8)), /* value=0x0 */ + MFX(0x0000020d, "IA32_MTRR_PHYS_MASK6", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x6, 0, UINT64_C(0xffff0000000007ff)), /* value=0x0 */ + MFX(0x0000020e, "IA32_MTRR_PHYS_BASE7", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x7, 0, UINT64_C(0xffff000000000ff8)), /* value=0x0 */ + MFX(0x0000020f, "IA32_MTRR_PHYS_MASK7", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x7, 0, UINT64_C(0xffff0000000007ff)), /* value=0x0 */ + MFS(0x00000250, "IA32_MTRR_FIX64K_00000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix64K_00000), + MFS(0x00000258, "IA32_MTRR_FIX16K_80000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_80000), + MFS(0x00000259, "IA32_MTRR_FIX16K_A0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_A0000), + MFS(0x00000268, "IA32_MTRR_FIX4K_C0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C0000), + MFS(0x00000269, "IA32_MTRR_FIX4K_C8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C8000), + MFS(0x0000026a, "IA32_MTRR_FIX4K_D0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D0000), + MFS(0x0000026b, "IA32_MTRR_FIX4K_D8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D8000), + MFS(0x0000026c, "IA32_MTRR_FIX4K_E0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E0000), + MFS(0x0000026d, "IA32_MTRR_FIX4K_E8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E8000), + MFS(0x0000026e, "IA32_MTRR_FIX4K_F0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F0000), + MFS(0x0000026f, "IA32_MTRR_FIX4K_F8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F8000), + MFS(0x00000277, "IA32_PAT", Ia32Pat, Ia32Pat, Guest.msrPAT), + MFZ(0x000002ff, "IA32_MTRR_DEF_TYPE", Ia32MtrrDefType, Ia32MtrrDefType, GuestMsrs.msr.MtrrDefType, 0, UINT64_C(0xfffffffffffff3f8)), + RFN(0x00000400, 0x0000041b, "IA32_MCi_CTL_STATUS_ADDR_MISC", Ia32McCtlStatusAddrMiscN, Ia32McCtlStatusAddrMiscN), + MFX(0xc0000080, "AMD64_EFER", Amd64Efer, Amd64Efer, 0x4d01, 0xfe, UINT64_C(0xffffffffffff8200)), + MFN(0xc0000081, "AMD64_STAR", Amd64SyscallTarget, Amd64SyscallTarget), /* value=0x230010`00000000 */ + MFN(0xc0000082, "AMD64_STAR64", Amd64LongSyscallTarget, Amd64LongSyscallTarget), /* value=0xfffff800`02ed0bc0 */ + MFN(0xc0000083, "AMD64_STARCOMPAT", Amd64CompSyscallTarget, Amd64CompSyscallTarget), /* value=0xfffff800`02ed0900 */ + MFX(0xc0000084, "AMD64_SYSCALL_FLAG_MASK", Amd64SyscallFlagMask, Amd64SyscallFlagMask, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x4700 */ + MFN(0xc0000100, "AMD64_FS_BASE", Amd64FsBase, Amd64FsBase), /* value=0xfffe0000 */ + MFN(0xc0000101, "AMD64_GS_BASE", Amd64GsBase, Amd64GsBase), /* value=0xfffff880`02f65000 */ + MFN(0xc0000102, "AMD64_KERNEL_GS_BASE", Amd64KernelGsBase, Amd64KernelGsBase), /* value=0x7ff`fffde000 */ + MFX(0xc0000103, "AMD64_TSC_AUX", Amd64TscAux, Amd64TscAux, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0xc0000104, "AMD_15H_TSC_RATE", AmdFam15hTscRate, AmdFam15hTscRate, 0, 0, UINT64_C(0xffffff0000000000)), /* value=0x1`00000000 */ + MFX(0xc0000105, "AMD_15H_LWP_CFG", AmdFam15hLwpCfg, AmdFam15hLwpCfg, 0, UINT64_C(0xffff000000000001), 0x7ffffff0), /* value=0x0 */ + MFX(0xc0000106, "AMD_15H_LWP_CBADDR", AmdFam15hLwpCbAddr, AmdFam15hLwpCbAddr, 0, 0, UINT64_MAX), /* value=0x0 */ + RSN(0xc0000408, 0xc0000409, "AMD_10H_MC4_MISCn", AmdFam10hMc4MiscN, AmdFam10hMc4MiscN, 0, UINT64_C(0xff00f000ffffffff), 0), + RVI(0xc000040a, 0xc000040f, "AMD_10H_MC4_MISCn", 0), + MAL(0xc0010000, "AMD_K8_PERF_CTL_0", 0xc0010200), + MAL(0xc0010001, "AMD_K8_PERF_CTL_1", 0xc0010202), + MAL(0xc0010002, "AMD_K8_PERF_CTL_2", 0xc0010204), + MAL(0xc0010003, "AMD_K8_PERF_CTL_3", 0xc0010206), + MAL(0xc0010004, "AMD_K8_PERF_CTR_0", 0xc0010201), + MAL(0xc0010005, "AMD_K8_PERF_CTR_1", 0xc0010203), + MAL(0xc0010006, "AMD_K8_PERF_CTR_2", 0xc0010205), + MAL(0xc0010007, "AMD_K8_PERF_CTR_3", 0xc0010207), + MFX(0xc0010010, "AMD_K8_SYS_CFG", AmdK8SysCfg, AmdK8SysCfg, 0x740000, UINT64_C(0xffffffffff82ffff), 0), /* value=0x740000 */ + MFX(0xc0010015, "AMD_K8_HW_CFG", AmdK8HwCr, AmdK8HwCr, 0, UINT64_C(0xffffffff01006020), 0), /* value=0x1001031 */ + MFW(0xc0010016, "AMD_K8_IORR_BASE_0", AmdK8IorrBaseN, AmdK8IorrBaseN, UINT64_C(0xffff000000000fe7)), /* value=0x0 */ + MFW(0xc0010017, "AMD_K8_IORR_MASK_0", AmdK8IorrMaskN, AmdK8IorrMaskN, UINT64_C(0xffff0000000007ff)), /* value=0x0 */ + MFX(0xc0010018, "AMD_K8_IORR_BASE_1", AmdK8IorrBaseN, AmdK8IorrBaseN, 0x1, UINT64_C(0xffff000000000fe7), 0), /* value=0x0 */ + MFX(0xc0010019, "AMD_K8_IORR_MASK_1", AmdK8IorrMaskN, AmdK8IorrMaskN, 0x1, UINT64_C(0xffff0000000007ff), 0), /* value=0x0 */ + MFW(0xc001001a, "AMD_K8_TOP_MEM", AmdK8TopOfMemN, AmdK8TopOfMemN, UINT64_C(0xffff0000007fffff)), /* value=0xd0000000 */ + MFX(0xc001001d, "AMD_K8_TOP_MEM2", AmdK8TopOfMemN, AmdK8TopOfMemN, 0x1, UINT64_C(0xffff0000007fffff), 0), /* value=0x4`2f000000 */ + MFN(0xc001001f, "AMD_K8_NB_CFG1", AmdK8NbCfg1, AmdK8NbCfg1), /* value=0x400000`00810008 */ + MFN(0xc0010020, "AMD_K8_PATCH_LOADER", WriteOnly, AmdK8PatchLoader), + MFX(0xc0010022, "AMD_K8_MC_XCPT_REDIR", AmdK8McXcptRedir, AmdK8McXcptRedir, 0, UINT64_C(0xffffffffffff0000), 0), /* value=0x0 */ + MVO(0xc0010028, "AMD_K8_UNK_c001_0028", 0), + MVO(0xc0010029, "AMD_K8_UNK_c001_0029", 0), + MVO(0xc001002a, "AMD_K8_UNK_c001_002a", 0), + MVO(0xc001002b, "AMD_K8_UNK_c001_002b", 0), + MVO(0xc001002c, "AMD_K8_UNK_c001_002c", 0), + MVO(0xc001002d, "AMD_K8_UNK_c001_002d", 0), + RFN(0xc0010030, 0xc0010035, "AMD_K8_CPU_NAME_n", AmdK8CpuNameN, AmdK8CpuNameN), + MFX(0xc001003e, "AMD_K8_HTC", AmdK8HwThermalCtrl, AmdK8HwThermalCtrl, 0x664c0005, UINT64_C(0xffffffff90008838), 0), /* value=0x664c0005 */ + MFX(0xc001003f, "AMD_K8_STC", AmdK8SwThermalCtrl, AmdK8SwThermalCtrl, 0, UINT64_C(0xffffffff9fffffdf), 0), /* value=0x60000000 */ + MVO(0xc0010043, "AMD_K8_THERMTRIP_STATUS", 0x20), + MFX(0xc0010044, "AMD_K8_MC_CTL_MASK_0", AmdK8McCtlMaskN, AmdK8McCtlMaskN, 0x0, UINT64_C(0xfffffffffffffc00), 0), /* value=0x0 */ + MFX(0xc0010045, "AMD_K8_MC_CTL_MASK_1", AmdK8McCtlMaskN, AmdK8McCtlMaskN, 0x1, UINT64_C(0xffffffffff004d01), 0), /* value=0x48080 */ + MFX(0xc0010046, "AMD_K8_MC_CTL_MASK_2", AmdK8McCtlMaskN, AmdK8McCtlMaskN, 0x2, UINT64_C(0xffffffffffff8000), 0), /* value=0x0 */ + MFX(0xc0010047, "AMD_K8_MC_CTL_MASK_3", AmdK8McCtlMaskN, AmdK8McCtlMaskN, 0x3, UINT64_MAX, 0), /* value=0x0 */ + MFX(0xc0010048, "AMD_K8_MC_CTL_MASK_4", AmdK8McCtlMaskN, AmdK8McCtlMaskN, 0x4, ~(uint64_t)UINT32_MAX, 0), /* value=0x780400 */ + MFX(0xc0010049, "AMD_K8_MC_CTL_MASK_5", AmdK8McCtlMaskN, AmdK8McCtlMaskN, 0x5, UINT64_C(0xffffffffffffe000), 0), /* value=0x0 */ + MFX(0xc001004a, "AMD_K8_MC_CTL_MASK_6", AmdK8McCtlMaskN, AmdK8McCtlMaskN, 0x6, UINT64_C(0xffffffffffffffc0), 0), /* value=0x0 */ + RFN(0xc0010050, 0xc0010053, "AMD_K8_SMI_ON_IO_TRAP_n", AmdK8SmiOnIoTrapN, AmdK8SmiOnIoTrapN), + MFX(0xc0010054, "AMD_K8_SMI_ON_IO_TRAP_CTL_STS", AmdK8SmiOnIoTrapCtlSts, AmdK8SmiOnIoTrapCtlSts, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0xc0010055, "AMD_K8_INT_PENDING_MSG", AmdK8IntPendingMessage, AmdK8IntPendingMessage, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x20000800 */ + MFX(0xc0010056, "AMD_K8_SMI_TRIGGER_IO_CYCLE", AmdK8SmiTriggerIoCycle, AmdK8SmiTriggerIoCycle, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x2000000 */ + MFX(0xc0010058, "AMD_10H_MMIO_CFG_BASE_ADDR", AmdFam10hMmioCfgBaseAddr, AmdFam10hMmioCfgBaseAddr, 0, UINT64_C(0xffff0000000fffc0), 0), /* value=0xe0000021 */ + MFX(0xc0010059, "AMD_10H_TRAP_CTL?", AmdFam10hTrapCtlMaybe, AmdFam10hTrapCtlMaybe, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MVX(0xc001005a, "AMD_10H_UNK_c001_005a", 0, 0, 0), + MVX(0xc001005b, "AMD_10H_UNK_c001_005b", 0, 0, 0), + MVX(0xc001005c, "AMD_10H_UNK_c001_005c", 0, 0, 0), + MVX(0xc001005d, "AMD_10H_UNK_c001_005d", 0, 0, 0), + MVO(0xc0010060, "AMD_K8_BIST_RESULT", 0), + MFX(0xc0010061, "AMD_10H_P_ST_CUR_LIM", AmdFam10hPStateCurLimit, ReadOnly, 0x40, 0, 0), /* value=0x40 */ + MFX(0xc0010062, "AMD_10H_P_ST_CTL", AmdFam10hPStateControl, AmdFam10hPStateControl, 0, 0, UINT64_C(0xfffffffffffffff8)), /* value=0x0 */ + MFX(0xc0010063, "AMD_10H_P_ST_STS", AmdFam10hPStateStatus, ReadOnly, 0, 0, 0), /* value=0x0 */ + MFX(0xc0010064, "AMD_10H_P_ST_0", AmdFam10hPStateN, AmdFam10hPStateN, UINT64_C(0x800001b10000161a), UINT64_C(0x7ffffc00ffbf0000), 0), /* value=0x800001b1`0000161a */ + MFX(0xc0010065, "AMD_10H_P_ST_1", AmdFam10hPStateN, AmdFam10hPStateN, UINT64_C(0x800001b100001a17), UINT64_C(0x7ffffc00ffbf0000), 0), /* value=0x800001b1`00001a17 */ + MFX(0xc0010066, "AMD_10H_P_ST_2", AmdFam10hPStateN, AmdFam10hPStateN, UINT64_C(0x8000017300003014), UINT64_C(0x7ffffc00ffbf0000), 0), /* value=0x80000173`00003014 */ + MFX(0xc0010067, "AMD_10H_P_ST_3", AmdFam10hPStateN, AmdFam10hPStateN, UINT64_C(0x8000016300003a11), UINT64_C(0x7ffffc00ffbf0000), 0), /* value=0x80000163`00003a11 */ + MFX(0xc0010068, "AMD_10H_P_ST_4", AmdFam10hPStateN, AmdFam10hPStateN, UINT64_C(0x8000014900004c0b), UINT64_C(0x7ffffc00ffbf0000), 0), /* value=0x80000149`00004c0b */ + MFX(0xc0010069, "AMD_10H_P_ST_5", AmdFam10hPStateN, AmdFam10hPStateN, UINT64_C(0x8000013100006205), UINT64_C(0x7ffffc00ffbf0000), 0), /* value=0x80000131`00006205 */ + MFX(0xc001006a, "AMD_10H_P_ST_6", AmdFam10hPStateN, AmdFam10hPStateN, UINT64_C(0x800001200000724c), UINT64_C(0x7ffffc00ffbf0000), 0), /* value=0x80000120`0000724c */ + MFX(0xc001006b, "AMD_10H_P_ST_7", AmdFam10hPStateN, AmdFam10hPStateN, 0, UINT64_C(0x7ffffc00ffbf0000), 0), /* value=0x0 */ + MFX(0xc0010070, "AMD_10H_COFVID_CTL", AmdFam10hCofVidControl, AmdFam10hCofVidControl, 0x40011a17, UINT64_C(0xffffffff00b80000), 0), /* value=0x40011a17 */ + MFX(0xc0010071, "AMD_10H_COFVID_STS", AmdFam10hCofVidStatus, AmdFam10hCofVidStatus, UINT64_C(0x18000064006724c), UINT64_MAX, 0), /* value=0x1800006`4006724c */ + MFX(0xc0010073, "AMD_10H_C_ST_IO_BASE_ADDR", AmdFam10hCStateIoBaseAddr, AmdFam10hCStateIoBaseAddr, 0, UINT64_C(0xffffffffffff0000), 0), /* value=0x814 */ + MFX(0xc0010074, "AMD_10H_CPU_WD_TMR_CFG", AmdFam10hCpuWatchdogTimer, AmdFam10hCpuWatchdogTimer, 0, UINT64_C(0xffffffffffffff80), 0), /* value=0x0 */ + MFX(0xc0010111, "AMD_K8_SMM_BASE", AmdK8SmmBase, AmdK8SmmBase, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0xcdef8800 */ + MFX(0xc0010112, "AMD_K8_SMM_ADDR", AmdK8SmmAddr, AmdK8SmmAddr, 0, UINT64_C(0xffff00000001ffff), 0), /* value=0xcdf00000 */ + MFX(0xc0010113, "AMD_K8_SMM_MASK", AmdK8SmmMask, AmdK8SmmMask, 0, UINT64_C(0xffff0000000188c0), 0), /* value=0xffff`fff00003 */ + MFX(0xc0010114, "AMD_K8_VM_CR", AmdK8VmCr, AmdK8VmCr, 0, ~(uint64_t)UINT32_MAX, UINT32_C(0xffffffe0)), /* value=0x8 */ + MFX(0xc0010115, "AMD_K8_IGNNE", AmdK8IgnNe, AmdK8IgnNe, 0, ~(uint64_t)UINT32_MAX, UINT32_C(0xfffffffe)), /* value=0x0 */ + MFX(0xc0010117, "AMD_K8_VM_HSAVE_PA", AmdK8VmHSavePa, AmdK8VmHSavePa, 0, 0, UINT64_C(0xffff000000000fff)), /* value=0x0 */ + MFN(0xc0010118, "AMD_10H_VM_LOCK_KEY", AmdFam10hVmLockKey, AmdFam10hVmLockKey), /* value=0x0 */ + MFN(0xc0010119, "AMD_10H_SSM_LOCK_KEY", AmdFam10hSmmLockKey, AmdFam10hSmmLockKey), /* value=0x0 */ + MFX(0xc001011a, "AMD_10H_LOCAL_SMI_STS", AmdFam10hLocalSmiStatus, AmdFam10hLocalSmiStatus, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0xc0010140, "AMD_10H_OSVW_ID_LEN", AmdFam10hOsVisWrkIdLength, AmdFam10hOsVisWrkIdLength, 0x4, 0, 0), /* value=0x4 */ + MFN(0xc0010141, "AMD_10H_OSVW_STS", AmdFam10hOsVisWrkStatus, AmdFam10hOsVisWrkStatus), /* value=0x0 */ + MFX(0xc0010200, "AMD_K8_PERF_CTL_0", AmdK8PerfCtlN, AmdK8PerfCtlN, 0x0, UINT64_C(0xfffffcf000200000), 0), /* value=0x0 */ + MFX(0xc0010201, "AMD_K8_PERF_CTR_0", AmdK8PerfCtrN, AmdK8PerfCtrN, 0x0, UINT64_C(0xffff000000000000), 0), /* value=0x0 */ + MFX(0xc0010202, "AMD_K8_PERF_CTL_1", AmdK8PerfCtlN, AmdK8PerfCtlN, 0x1, UINT64_C(0xfffffcf000200000), 0), /* value=0x0 */ + MFX(0xc0010203, "AMD_K8_PERF_CTR_1", AmdK8PerfCtrN, AmdK8PerfCtrN, 0x1, UINT64_C(0xffff000000000000), 0), /* value=0x0 */ + MFX(0xc0010204, "AMD_K8_PERF_CTL_2", AmdK8PerfCtlN, AmdK8PerfCtlN, 0x2, UINT64_C(0xfffffcf000200000), 0), /* value=0x0 */ + MFX(0xc0010205, "AMD_K8_PERF_CTR_2", AmdK8PerfCtrN, AmdK8PerfCtrN, 0x2, UINT64_C(0xffff000000000000), 0), /* value=0x0 */ + MFX(0xc0010206, "AMD_K8_PERF_CTL_3", AmdK8PerfCtlN, AmdK8PerfCtlN, 0x3, UINT64_C(0xfffffcf000200000), 0), /* value=0x0 */ + MFX(0xc0010207, "AMD_K8_PERF_CTR_3", AmdK8PerfCtrN, AmdK8PerfCtrN, 0x3, UINT64_C(0xffff000000000000), 0), /* value=0x0 */ + MFX(0xc0010208, "AMD_K8_PERF_CTL_4", AmdK8PerfCtlN, AmdK8PerfCtlN, 0x4, UINT64_C(0xfffffcf000200000), 0), /* value=0x0 */ + MFX(0xc0010209, "AMD_K8_PERF_CTR_4", AmdK8PerfCtrN, AmdK8PerfCtrN, 0x4, UINT64_C(0xffff000000000000), 0), /* value=0x0 */ + MFX(0xc001020a, "AMD_K8_PERF_CTL_5", AmdK8PerfCtlN, AmdK8PerfCtlN, 0x5, UINT64_C(0xfffffcf000200000), 0), /* value=0x0 */ + MFX(0xc001020b, "AMD_K8_PERF_CTR_5", AmdK8PerfCtrN, AmdK8PerfCtrN, 0x5, UINT64_C(0xffff000000000000), 0), /* value=0x0 */ + MFX(0xc0010240, "AMD_15H_NB_PERF_CTL_0", AmdFam15hNorthbridgePerfCtlN, AmdFam15hNorthbridgePerfCtlN, 0x0, UINT64_C(0xfffffe00ffa70000), 0), /* value=0x0 */ + MFX(0xc0010241, "AMD_15H_NB_PERF_CTR_0", AmdFam15hNorthbridgePerfCtrN, AmdFam15hNorthbridgePerfCtrN, 0x0, UINT64_C(0xffff000000000000), 0), /* value=0x0 */ + MFX(0xc0010242, "AMD_15H_NB_PERF_CTL_1", AmdFam15hNorthbridgePerfCtlN, AmdFam15hNorthbridgePerfCtlN, 0x1, UINT64_C(0xfffffe00ffa70000), 0), /* value=0x0 */ + MFX(0xc0010243, "AMD_15H_NB_PERF_CTR_1", AmdFam15hNorthbridgePerfCtrN, AmdFam15hNorthbridgePerfCtrN, 0x1, UINT64_C(0xffff000000000000), 0), /* value=0x0 */ + MFX(0xc0010244, "AMD_15H_NB_PERF_CTL_2", AmdFam15hNorthbridgePerfCtlN, AmdFam15hNorthbridgePerfCtlN, 0x2, UINT64_C(0xfffffe00ffa70000), 0), /* value=0x0 */ + MFX(0xc0010245, "AMD_15H_NB_PERF_CTR_2", AmdFam15hNorthbridgePerfCtrN, AmdFam15hNorthbridgePerfCtrN, 0x2, UINT64_C(0xffff000000000000), 0), /* value=0x0 */ + MFX(0xc0010246, "AMD_15H_NB_PERF_CTL_3", AmdFam15hNorthbridgePerfCtlN, AmdFam15hNorthbridgePerfCtlN, 0x3, UINT64_C(0xfffffe00ffa70000), 0), /* value=0x0 */ + MFX(0xc0010247, "AMD_15H_NB_PERF_CTR_3", AmdFam15hNorthbridgePerfCtrN, AmdFam15hNorthbridgePerfCtrN, 0x3, UINT64_C(0xffff000000000000), 0), /* value=0x0 */ + MFX(0xc0011000, "AMD_K7_MCODE_CTL", AmdK7MicrocodeCtl, AmdK7MicrocodeCtl, 0x30000, ~(uint64_t)UINT32_MAX, 0x204), /* value=0x30000 */ + MFX(0xc0011001, "AMD_K7_APIC_CLUSTER_ID", AmdK7ClusterIdMaybe, AmdK7ClusterIdMaybe, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0xc0011003, "AMD_K8_CPUID_CTL_STD06", AmdK8CpuIdCtlStd06hEcx, AmdK8CpuIdCtlStd06hEcx, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x1 */ + MFN(0xc0011004, "AMD_K8_CPUID_CTL_STD01", AmdK8CpuIdCtlStd01hEdcx, AmdK8CpuIdCtlStd01hEdcx), /* value=0x1e98220b`178bfbff */ + MFN(0xc0011005, "AMD_K8_CPUID_CTL_EXT01", AmdK8CpuIdCtlExt01hEdcx, AmdK8CpuIdCtlExt01hEdcx), /* value=0x1c9ffff`2fd3fbff */ + MFX(0xc0011006, "AMD_K7_DEBUG_STS?", AmdK7DebugStatusMaybe, AmdK7DebugStatusMaybe, 0, UINT64_C(0xffffffff00000080), 0), /* value=0x10 */ + MFN(0xc0011007, "AMD_K7_BH_TRACE_BASE?", AmdK7BHTraceBaseMaybe, AmdK7BHTraceBaseMaybe), /* value=0x0 */ + MFN(0xc0011008, "AMD_K7_BH_TRACE_PTR?", AmdK7BHTracePtrMaybe, AmdK7BHTracePtrMaybe), /* value=0x0 */ + MFN(0xc0011009, "AMD_K7_BH_TRACE_LIM?", AmdK7BHTraceLimitMaybe, AmdK7BHTraceLimitMaybe), /* value=0x0 */ + MFX(0xc001100a, "AMD_K7_HDT_CFG?", AmdK7HardwareDebugToolCfgMaybe, AmdK7HardwareDebugToolCfgMaybe, 0, UINT64_C(0xffffffff00800000), 0), /* value=0x0 */ + MFX(0xc001100b, "AMD_K7_FAST_FLUSH_COUNT?", AmdK7FastFlushCountMaybe, AmdK7FastFlushCountMaybe, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x7c0 */ + MFX(0xc001100c, "AMD_K7_NODE_ID", AmdK7NodeId, AmdK7NodeId, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x80 */ + MVX(0xc001100e, "AMD_K8_WRMSR_BP?", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0xc001100f, "AMD_K8_WRMSR_BP_MASK?", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0xc0011010, "AMD_K8_BH_TRACE_CTL?", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0xc0011011, "AMD_K8_BH_TRACE_USRD?", 0, 0, 0), /* value=0xfffffcf0`093634f0 */ + MVI(0xc0011012, "AMD_K7_UNK_c001_1012", UINT32_MAX), + MVI(0xc0011013, "AMD_K7_UNK_c001_1013", UINT64_MAX), + MVX(0xc0011014, "AMD_K8_XCPT_BP_RIP?", 0, 0, 0), + MVX(0xc0011015, "AMD_K8_XCPT_BP_RIP_MASK?", 0, 0, 0), + MVX(0xc0011016, "AMD_K8_COND_HDT_VAL?", 0, 0, 0), + MVX(0xc0011017, "AMD_K8_COND_HDT_VAL_MASK?", 0, 0, 0), + MVX(0xc0011018, "AMD_K8_XCPT_BP_CTL?", 0, 0, 0), + MVX(0xc001101d, "AMD_K8_NB_BIST?", 0, ~(uint64_t)UINT32_MAX, 0), + MVI(0xc001101e, "AMD_K8_THERMTRIP_2?", 0x20), /* Villain? */ + MVX(0xc001101f, "AMD_K8_NB_CFG?", UINT64_C(0x40000000810008), 0, 0), + MFX(0xc0011020, "AMD_K7_LS_CFG", AmdK7LoadStoreCfg, AmdK7LoadStoreCfg, 0, UINT64_C(0x3fffedafbffe2a), 0), /* value=0x0 */ + MFW(0xc0011021, "AMD_K7_IC_CFG", AmdK7InstrCacheCfg, AmdK7InstrCacheCfg, UINT64_C(0xffffff0000000000)), /* value=0x0 */ + MFX(0xc0011022, "AMD_K7_DC_CFG", AmdK7DataCacheCfg, AmdK7DataCacheCfg, 0, UINT64_C(0x1ffffbfffff13e0), 0), /* value=0x0 */ + MFX(0xc0011023, "AMD_15H_CU_CFG", AmdFam15hCombUnitCfg, AmdFam15hCombUnitCfg, 0x220, UINT64_C(0x3ff03c760042000), 0), /* value=0x80004000`00000220 */ + MFX(0xc0011024, "AMD_K7_DEBUG_CTL_2?", AmdK7DebugCtl2Maybe, AmdK7DebugCtl2Maybe, 0, UINT64_C(0xfffffffffffffe04), 0), /* value=0x0 */ + MFN(0xc0011025, "AMD_K7_DR0_DATA_MATCH?", AmdK7Dr0DataMatchMaybe, AmdK7Dr0DataMatchMaybe), /* value=0x0 */ + MFN(0xc0011026, "AMD_K7_DR0_DATA_MATCH?", AmdK7Dr0DataMaskMaybe, AmdK7Dr0DataMaskMaybe), /* value=0x0 */ + MFX(0xc0011027, "AMD_K7_DR0_ADDR_MASK", AmdK7DrXAddrMaskN, AmdK7DrXAddrMaskN, 0x0, UINT64_C(0xfffffffffffff000), 0), /* value=0x0 */ + MFX(0xc0011028, "AMD_15H_FP_CFG", AmdFam15hFpuCfg, AmdFam15hFpuCfg, 0, UINT64_C(0xffffe000000000ff), 0), /* value=0x40`e91d0000 */ + MFX(0xc0011029, "AMD_15H_DC_CFG", AmdFam15hDecoderCfg, AmdFam15hDecoderCfg, 0, UINT64_C(0xffffffffc0188001), 0), /* value=0x488400 */ + MFX(0xc001102a, "AMD_15H_CU_CFG2", AmdFam15hCombUnitCfg2, AmdFam15hCombUnitCfg2, 0, UINT64_C(0xfffbfb8ff2fc623f), 0), /* value=0x40040`00000cc0 */ + MFX(0xc001102b, "AMD_15H_CU_CFG3", AmdFam15hCombUnitCfg3, AmdFam15hCombUnitCfg3, 0, UINT64_C(0xffe0027afff00000), 0), /* value=0x33400`00002b93 */ + MFX(0xc001102c, "AMD_15H_EX_CFG", AmdFam15hExecUnitCfg, AmdFam15hExecUnitCfg, 0x7aac0, UINT64_C(0xffb0c003fbe00024), 0), /* value=0x400`0007aac0 */ + MFX(0xc0011030, "AMD_10H_IBS_FETCH_CTL", AmdFam10hIbsFetchCtl, AmdFam10hIbsFetchCtl, 0, UINT64_C(0xfdfeffffffff0000), 0), /* value=0x0 */ + MFI(0xc0011031, "AMD_10H_IBS_FETCH_LIN_ADDR", AmdFam10hIbsFetchLinAddr), /* value=0x0 */ + MFI(0xc0011032, "AMD_10H_IBS_FETCH_PHYS_ADDR", AmdFam10hIbsFetchPhysAddr), /* value=0x0 */ + MFX(0xc0011033, "AMD_10H_IBS_OP_EXEC_CTL", AmdFam10hIbsOpExecCtl, AmdFam10hIbsOpExecCtl, 0, UINT64_C(0xf8000000f8010000), 0), /* value=0x0 */ + MFN(0xc0011034, "AMD_10H_IBS_OP_RIP", AmdFam10hIbsOpRip, AmdFam10hIbsOpRip), /* value=0x0 */ + MFX(0xc0011035, "AMD_10H_IBS_OP_DATA", AmdFam10hIbsOpData, AmdFam10hIbsOpData, 0, UINT64_C(0xffffffc000000000), 0), /* value=0x0 */ + MFX(0xc0011036, "AMD_10H_IBS_OP_DATA2", AmdFam10hIbsOpData2, AmdFam10hIbsOpData2, 0, UINT64_C(0xffffffffffffffc8), 0), /* value=0x0 */ + MFX(0xc0011037, "AMD_10H_IBS_OP_DATA3", AmdFam10hIbsOpData3, AmdFam10hIbsOpData3, 0, UINT64_C(0xffff0000fff00400), 0), /* value=0x0 */ + MFN(0xc0011038, "AMD_10H_IBS_DC_LIN_ADDR", AmdFam10hIbsDcLinAddr, AmdFam10hIbsDcLinAddr), /* value=0x0 */ + MFX(0xc0011039, "AMD_10H_IBS_DC_PHYS_ADDR", AmdFam10hIbsDcPhysAddr, AmdFam10hIbsDcPhysAddr, 0, UINT64_C(0xffff000000000000), 0), /* value=0x0 */ + MFO(0xc001103a, "AMD_10H_IBS_CTL", AmdFam10hIbsCtl), /* value=0x100 */ + MFN(0xc001103b, "AMD_14H_IBS_BR_TARGET", AmdFam14hIbsBrTarget, AmdFam14hIbsBrTarget), /* value=0x0 */ + MVX(0xc0011040, "AMD_15H_UNK_c001_1040", 0, UINT64_C(0xffe0000000000003), 0), + MVX(0xc0011041, "AMD_15H_UNK_c001_1041", UINT64_C(0x99dd57b219), 0xa0c820, 0), + MVX(0xc0011042, "AMD_15H_UNK_c001_1042", 0, 0, 0), + MVX(0xc0011043, "AMD_15H_UNK_c001_1043", UINT64_C(0x300000438), 0, 0), + MVX(0xc0011044, "AMD_15H_UNK_c001_1044", UINT64_C(0x300000438), 0, 0), + MVX(0xc0011045, "AMD_15H_UNK_c001_1045", UINT64_C(0x300000420), 0, 0), + MVX(0xc0011046, "AMD_15H_UNK_c001_1046", UINT64_C(0x300000420), 0, 0), + MVX(0xc0011047, "AMD_15H_UNK_c001_1047", 0, UINT64_C(0xffff000000000000), 0), + MVX(0xc0011048, "AMD_15H_UNK_c001_1048", 0xc000001, UINT64_C(0xffff000000000000), 0), + MVX(0xc0011049, "AMD_15H_UNK_c001_1049", 0, UINT64_C(0xffff000000000000), 0), + MVX(0xc001104a, "AMD_15H_UNK_c001_104a", 0, UINT64_C(0xffff000000000000), 0), + MVX(0xc001104b, "AMD_15H_UNK_c001_104b", 0, 0, 0), + MVX(0xc001104c, "AMD_15H_UNK_c001_104c", 0, 0, 0), + MVX(0xc001104d, "AMD_15H_UNK_c001_104d", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0xc001104e, "AMD_15H_UNK_c001_104e", 0, UINT64_C(0xfffffc0000000000), 0), + MVX(0xc001104f, "AMD_15H_UNK_c001_104f", 0, UINT64_C(0xfffffc0000000000), 0), + MVX(0xc0011050, "AMD_15H_UNK_c001_1050", 0, UINT64_C(0xfffffc0000000000), 0), + MVX(0xc0011051, "AMD_15H_UNK_c001_1051", 0, UINT64_C(0xfffffc0000000000), 0), + MVX(0xc0011052, "AMD_15H_UNK_c001_1052", 0, UINT64_C(0xfffffc0000000000), 0), + MVX(0xc0011053, "AMD_15H_UNK_c001_1053", 0, UINT64_C(0xfffffc0000000000), 0), + MVX(0xc0011054, "AMD_15H_UNK_c001_1054", 0, UINT64_C(0xfffffc0000000000), 0), + MVX(0xc0011055, "AMD_15H_UNK_c001_1055", 0, UINT64_C(0xfffffc0000000000), 0), + MVX(0xc0011056, "AMD_15H_UNK_c001_1056", 0, UINT64_C(0xfffffc0000000000), 0), + MVX(0xc0011057, "AMD_15H_UNK_c001_1057", 0, UINT64_C(0xfffffc0000000000), 0), + MVX(0xc0011058, "AMD_15H_UNK_c001_1058", 0, UINT64_C(0xfffffc0000000000), 0), + MVX(0xc0011059, "AMD_15H_UNK_c001_1059", 0, UINT64_C(0xfffffc0000000000), 0), + MVX(0xc001105a, "AMD_15H_UNK_c001_105a", UINT64_C(0x3060c183060c183), UINT64_C(0x8000000000000000), 0), + MVX(0xc001105b, "AMD_15H_UNK_c001_105b", UINT64_C(0x318c6318c60c183), UINT64_C(0xe000000000000000), 0), + MVX(0xc001105c, "AMD_15H_UNK_c001_105c", 0, UINT64_C(0xff00000000000000), 0), + MVX(0xc001105d, "AMD_15H_UNK_c001_105d", 0, UINT64_C(0xff00000000000000), 0), + MVX(0xc001105e, "AMD_15H_UNK_c001_105e", 0, UINT64_C(0xfffffffffffffc00), 0), + MVX(0xc001105f, "AMD_15H_UNK_c001_105f", 0, UINT64_C(0xffff000000000000), 0), + MVX(0xc0011060, "AMD_15H_UNK_c001_1060", 0, UINT64_C(0xffff000000000000), 0), + MVX(0xc0011061, "AMD_15H_UNK_c001_1061", 0, 0, 0), + MVX(0xc0011062, "AMD_15H_UNK_c001_1062", 0, UINT64_C(0xffffffffffffe000), 0), + MVX(0xc0011063, "AMD_15H_UNK_c001_1063", 0, UINT64_C(0xfffffffffffe4000), 0), + MVX(0xc0011064, "AMD_15H_UNK_c001_1064", 0x1, UINT64_C(0xfffffffffffff000), 0), + MVX(0xc0011065, "AMD_15H_UNK_c001_1065", 0x1, UINT64_C(0xfffffffff0000000), 0), + MVX(0xc0011066, "AMD_15H_UNK_c001_1066", 0, 0, 0), + MVX(0xc0011067, "AMD_15H_UNK_c001_1067", 0x1, UINT64_C(0xffffffffffffff80), 0), + MVX(0xc0011068, "AMD_15H_UNK_c001_1068", 0, 0, 0), + MVX(0xc0011069, "AMD_15H_UNK_c001_1069", 0, UINT64_C(0xffffffffffff0000), 0), + MVX(0xc001106a, "AMD_15H_UNK_c001_106a", 0x1, 0, 0), + MVX(0xc001106b, "AMD_15H_UNK_c001_106b", 0, UINT64_C(0xfffffffffffffff0), 0), + MVX(0xc001106c, "AMD_15H_UNK_c001_106c", 0x1, UINT64_C(0xffffffffffff0000), 0), + MVX(0xc001106d, "AMD_15H_UNK_c001_106d", 0x1, UINT64_C(0xf000000000000080), 0), + MVX(0xc001106e, "AMD_15H_UNK_c001_106e", 0x1, UINT64_C(0xffffffffffff0000), 0), + MVX(0xc001106f, "AMD_15H_UNK_c001_106f", 0x1, UINT64_C(0xfffffffffffff800), 0), + MVI(0xc0011070, "AMD_15H_UNK_c001_1070", UINT64_C(0x20000000000)), + MVX(0xc0011071, "AMD_15H_UNK_c001_1071", 0x400000, UINT64_C(0xffffffff01ffffff), 0), + MVI(0xc0011072, "AMD_15H_UNK_c001_1072", UINT64_C(0x101592c00000021)), + MVI(0xc0011073, "AMD_15H_UNK_c001_1073", UINT64_C(0xec541c0050000000)), + MVX(0xc0011080, "AMD_15H_UNK_c001_1080", 0, 0, 0), +}; +#endif /* !CPUM_DB_STANDALONE */ + + +/** + * Database entry for AMD FX(tm)-8150 Eight-Core Processor. + */ +static CPUMDBENTRY const g_Entry_AMD_FX_8150_Eight_Core = +{ + /*.pszName = */ "AMD FX-8150 Eight-Core", + /*.pszFullName = */ "AMD FX(tm)-8150 Eight-Core Processor", + /*.enmVendor = */ CPUMCPUVENDOR_AMD, + /*.uFamily = */ 21, + /*.uModel = */ 1, + /*.uStepping = */ 2, + /*.enmMicroarch = */ kCpumMicroarch_AMD_15h_Bulldozer, + /*.uScalableBusFreq = */ CPUM_SBUSFREQ_UNKNOWN, + /*.fFlags = */ 0, + /*.cMaxPhysAddrWidth= */ 48, + /*.fMxCsrMask = */ 0x2ffff, + /*.paCpuIdLeaves = */ NULL_ALONE(g_aCpuIdLeaves_AMD_FX_8150_Eight_Core), + /*.cCpuIdLeaves = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_AMD_FX_8150_Eight_Core)), + /*.enmUnknownCpuId = */ CPUMUNKNOWNCPUID_DEFAULTS, + /*.DefUnknownCpuId = */ { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + /*.fMsrMask = */ UINT32_MAX, + /*.cMsrRanges = */ ZERO_ALONE(RT_ELEMENTS(g_aMsrRanges_AMD_FX_8150_Eight_Core)), + /*.paMsrRanges = */ NULL_ALONE(g_aMsrRanges_AMD_FX_8150_Eight_Core), +}; + +#endif /* !VBOX_CPUDB_AMD_FX_8150_Eight_Core_h */ + diff --git a/src/VBox/VMM/VMMR3/cpus/AMD_Phenom_II_X6_1100T.h b/src/VBox/VMM/VMMR3/cpus/AMD_Phenom_II_X6_1100T.h new file mode 100644 index 00000000..ba260643 --- /dev/null +++ b/src/VBox/VMM/VMMR3/cpus/AMD_Phenom_II_X6_1100T.h @@ -0,0 +1,272 @@ +/* $Id: AMD_Phenom_II_X6_1100T.h $ */ +/** @file + * CPU database entry "AMD Phenom II X6 1100T". + * Generated at 2013-12-17T13:39:08Z by VBoxCpuReport v4.3.53r91360 on linux.amd64. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VBOX_CPUDB_AMD_Phenom_II_X6_1100T_h +#define VBOX_CPUDB_AMD_Phenom_II_X6_1100T_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + + +#ifndef CPUM_DB_STANDALONE +/** + * CPUID leaves for AMD Phenom(tm) II X6 1100T Processor. + */ +static CPUMCPUIDLEAF const g_aCpuIdLeaves_AMD_Phenom_II_X6_1100T[] = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x00000006, 0x68747541, 0x444d4163, 0x69746e65, 0 }, + { 0x00000001, 0x00000000, 0x00000000, 0x00100fa0, 0x01060800, 0x00802009, 0x178bfbff, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID | CPUMCPUIDLEAF_F_CONTAINS_APIC }, + { 0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000005, 0x00000000, 0x00000000, 0x00000040, 0x00000040, 0x00000003, 0x00000000, 0 }, + { 0x00000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0 }, + { 0x80000000, 0x00000000, 0x00000000, 0x8000001b, 0x68747541, 0x444d4163, 0x69746e65, 0 }, + { 0x80000001, 0x00000000, 0x00000000, 0x00100fa0, 0x100000a1, 0x000837ff, 0xefd3fbff, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC }, + { 0x80000002, 0x00000000, 0x00000000, 0x20444d41, 0x6e656850, 0x74286d6f, 0x4920296d, 0 }, + { 0x80000003, 0x00000000, 0x00000000, 0x36582049, 0x30313120, 0x50205430, 0x65636f72, 0 }, + { 0x80000004, 0x00000000, 0x00000000, 0x726f7373, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000005, 0x00000000, 0x00000000, 0xff30ff10, 0xff30ff20, 0x40020140, 0x40020140, 0 }, + { 0x80000006, 0x00000000, 0x00000000, 0x20800000, 0x42004200, 0x02008140, 0x0030b140, 0 }, + { 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000003f9, 0 }, + { 0x80000008, 0x00000000, 0x00000000, 0x00003030, 0x00000000, 0x00003005, 0x00000000, 0 }, + { 0x80000009, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000000a, 0x00000000, 0x00000000, 0x00000001, 0x00000040, 0x00000000, 0x0000040f, 0 }, + { 0x8000000b, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000000c, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000000d, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000000e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000010, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000011, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000012, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000013, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000014, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000015, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000016, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000017, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000018, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000019, 0x00000000, 0x00000000, 0xf0300000, 0x60100000, 0x00000000, 0x00000000, 0 }, + { 0x8000001a, 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000001b, 0x00000000, 0x00000000, 0x0000001f, 0x00000000, 0x00000000, 0x00000000, 0 }, +}; +#endif /* !CPUM_DB_STANDALONE */ + + +#ifndef CPUM_DB_STANDALONE +/** + * MSR ranges for AMD Phenom(tm) II X6 1100T Processor. + */ +static CPUMMSRRANGE const g_aMsrRanges_AMD_Phenom_II_X6_1100T[] = +{ + MAL(0x00000000, "IA32_P5_MC_ADDR", 0x00000402), + MAL(0x00000001, "IA32_P5_MC_TYPE", 0x00000401), + MFN(0x00000010, "IA32_TIME_STAMP_COUNTER", Ia32TimestampCounter, Ia32TimestampCounter), /* value=0x6db`c482d0b9 */ + MFX(0x0000001b, "IA32_APIC_BASE", Ia32ApicBase, Ia32ApicBase, UINT32_C(0xfee00800), 0, UINT64_C(0xffff0000000006ff)), + MFX(0x0000002a, "EBL_CR_POWERON", IntelEblCrPowerOn, ReadOnly, 0, 0, 0), /* value=0x0 */ + MVO(0x0000008b, "BBL_CR_D3|BIOS_SIGN", 0x10000bf), + MFX(0x000000e7, "IA32_MPERF", Ia32MPerf, Ia32MPerf, 0, UINT64_C(0x8644930520000000), 0), /* value=0xa66664d9`32c329b1 */ + MFN(0x000000e8, "IA32_APERF", Ia32APerf, Ia32APerf), /* value=0x25`092f34be */ + MFX(0x000000fe, "IA32_MTRRCAP", Ia32MtrrCap, ReadOnly, 0x508, 0, 0), /* value=0x508 */ + MFX(0x00000174, "IA32_SYSENTER_CS", Ia32SysEnterCs, Ia32SysEnterCs, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x10 */ + MFX(0x00000175, "IA32_SYSENTER_ESP", Ia32SysEnterEsp, Ia32SysEnterEsp, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0x00000176, "IA32_SYSENTER_EIP", Ia32SysEnterEip, Ia32SysEnterEip, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x8174c700 */ + MFX(0x00000179, "IA32_MCG_CAP", Ia32McgCap, ReadOnly, 0x106, 0, 0), /* value=0x106 */ + MFX(0x0000017a, "IA32_MCG_STATUS", Ia32McgStatus, Ia32McgStatus, 0, UINT64_C(0xfffffffffffffff8), 0), /* value=0x0 */ + MFX(0x0000017b, "IA32_MCG_CTL", Ia32McgCtl, Ia32McgCtl, 0, UINT64_C(0xffffffffffffffc0), 0), /* value=0x3f */ + MFX(0x000001d9, "IA32_DEBUGCTL", Ia32DebugCtl, Ia32DebugCtl, 0, UINT64_C(0xffffffffffffff80), 0x40), /* value=0x0 */ + MFO(0x000001db, "P6_LAST_BRANCH_FROM_IP", P6LastBranchFromIp), /* value=0xffffefdf`00890004 */ + MFO(0x000001dc, "P6_LAST_BRANCH_TO_IP", P6LastBranchToIp), /* value=0xffffeed0`c7b3ffbc */ + MFO(0x000001dd, "P6_LAST_INT_FROM_IP", P6LastIntFromIp), /* value=0x0 */ + MFO(0x000001de, "P6_LAST_INT_TO_IP", P6LastIntToIp), /* value=0x0 */ + MFX(0x00000200, "IA32_MTRR_PHYS_BASE0", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x0, 0, UINT64_C(0xffff000000000ff8)), /* value=0x6 */ + MFX(0x00000201, "IA32_MTRR_PHYS_MASK0", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x0, 0, UINT64_C(0xffff0000000007ff)), /* value=0xffff`00000800 */ + MFX(0x00000202, "IA32_MTRR_PHYS_BASE1", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x1, 0, UINT64_C(0xffff000000000ff8)), /* value=0xbdf00000 */ + MFX(0x00000203, "IA32_MTRR_PHYS_MASK1", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x1, 0, UINT64_C(0xffff0000000007ff)), /* value=0xffff`fff00800 */ + MFX(0x00000204, "IA32_MTRR_PHYS_BASE2", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x2, 0, UINT64_C(0xffff000000000ff8)), /* value=0xbe000000 */ + MFX(0x00000205, "IA32_MTRR_PHYS_MASK2", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x2, 0, UINT64_C(0xffff0000000007ff)), /* value=0xffff`fe000800 */ + MFX(0x00000206, "IA32_MTRR_PHYS_BASE3", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x3, 0, UINT64_C(0xffff000000000ff8)), /* value=0xc0000000 */ + MFX(0x00000207, "IA32_MTRR_PHYS_MASK3", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x3, 0, UINT64_C(0xffff0000000007ff)), /* value=0xffff`c0000800 */ + MFX(0x00000208, "IA32_MTRR_PHYS_BASE4", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x4, 0, UINT64_C(0xffff000000000ff8)), /* value=0x0 */ + MFX(0x00000209, "IA32_MTRR_PHYS_MASK4", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x4, 0, UINT64_C(0xffff0000000007ff)), /* value=0x0 */ + MFX(0x0000020a, "IA32_MTRR_PHYS_BASE5", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x5, 0, UINT64_C(0xffff000000000ff8)), /* value=0x0 */ + MFX(0x0000020b, "IA32_MTRR_PHYS_MASK5", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x5, 0, UINT64_C(0xffff0000000007ff)), /* value=0x0 */ + MFX(0x0000020c, "IA32_MTRR_PHYS_BASE6", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x6, 0, UINT64_C(0xffff000000000ff8)), /* value=0x0 */ + MFX(0x0000020d, "IA32_MTRR_PHYS_MASK6", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x6, 0, UINT64_C(0xffff0000000007ff)), /* value=0x0 */ + MFX(0x0000020e, "IA32_MTRR_PHYS_BASE7", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x7, 0, UINT64_C(0xffff000000000ff8)), /* value=0x0 */ + MFX(0x0000020f, "IA32_MTRR_PHYS_MASK7", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x7, 0, UINT64_C(0xffff0000000007ff)), /* value=0x0 */ + MFS(0x00000250, "IA32_MTRR_FIX64K_00000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix64K_00000), + MFS(0x00000258, "IA32_MTRR_FIX16K_80000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_80000), + MFS(0x00000259, "IA32_MTRR_FIX16K_A0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_A0000), + MFS(0x00000268, "IA32_MTRR_FIX4K_C0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C0000), + MFS(0x00000269, "IA32_MTRR_FIX4K_C8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C8000), + MFS(0x0000026a, "IA32_MTRR_FIX4K_D0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D0000), + MFS(0x0000026b, "IA32_MTRR_FIX4K_D8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D8000), + MFS(0x0000026c, "IA32_MTRR_FIX4K_E0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E0000), + MFS(0x0000026d, "IA32_MTRR_FIX4K_E8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E8000), + MFS(0x0000026e, "IA32_MTRR_FIX4K_F0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F0000), + MFS(0x0000026f, "IA32_MTRR_FIX4K_F8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F8000), + MFS(0x00000277, "IA32_PAT", Ia32Pat, Ia32Pat, Guest.msrPAT), + MFZ(0x000002ff, "IA32_MTRR_DEF_TYPE", Ia32MtrrDefType, Ia32MtrrDefType, GuestMsrs.msr.MtrrDefType, 0, UINT64_C(0xfffffffffffff3f8)), + RFN(0x00000400, 0x00000417, "IA32_MCi_CTL_STATUS_ADDR_MISC", Ia32McCtlStatusAddrMiscN, Ia32McCtlStatusAddrMiscN), + MFX(0xc0000080, "AMD64_EFER", Amd64Efer, Amd64Efer, 0xd01, 0xfe, UINT64_C(0xffffffffffff8200)), + MFN(0xc0000081, "AMD64_STAR", Amd64SyscallTarget, Amd64SyscallTarget), /* value=0x230010`00000000 */ + MFN(0xc0000082, "AMD64_STAR64", Amd64LongSyscallTarget, Amd64LongSyscallTarget), /* value=0xffffffff`8174b4f0 */ + MFN(0xc0000083, "AMD64_STARCOMPAT", Amd64CompSyscallTarget, Amd64CompSyscallTarget), /* value=0xffffffff`8174c860 */ + MFX(0xc0000084, "AMD64_SYSCALL_FLAG_MASK", Amd64SyscallFlagMask, Amd64SyscallFlagMask, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x3700 */ + MFN(0xc0000100, "AMD64_FS_BASE", Amd64FsBase, Amd64FsBase), /* value=0x7f01`3f916740 */ + MFN(0xc0000101, "AMD64_GS_BASE", Amd64GsBase, Amd64GsBase), /* value=0xffff8804`3fc00000 */ + MFN(0xc0000102, "AMD64_KERNEL_GS_BASE", Amd64KernelGsBase, Amd64KernelGsBase), /* value=0xf2c95840 */ + MFX(0xc0000103, "AMD64_TSC_AUX", Amd64TscAux, Amd64TscAux, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + RSN(0xc0000408, 0xc000040a, "AMD_10H_MC4_MISCn", AmdFam10hMc4MiscN, AmdFam10hMc4MiscN, 0, UINT64_C(0xff00f000ffffffff), 0), + RVI(0xc000040b, 0xc000040f, "AMD_10H_MC4_MISCn", 0), + RSN(0xc0010000, 0xc0010003, "AMD_K8_PERF_CTL_n", AmdK8PerfCtlN, AmdK8PerfCtlN, 0x0, UINT64_C(0xfffffcf000200000), 0), + RSN(0xc0010004, 0xc0010007, "AMD_K8_PERF_CTR_n", AmdK8PerfCtrN, AmdK8PerfCtrN, 0x0, UINT64_C(0xffff000000000000), 0), + MFX(0xc0010010, "AMD_K8_SYS_CFG", AmdK8SysCfg, AmdK8SysCfg, 0x760600, UINT64_C(0xffffffffff80f8ff), 0), /* value=0x760600 */ + MFX(0xc0010015, "AMD_K8_HW_CFG", AmdK8HwCr, AmdK8HwCr, 0x1000031, UINT64_C(0xffffffff00006020), 0), /* value=0x1000031 */ + MFW(0xc0010016, "AMD_K8_IORR_BASE_0", AmdK8IorrBaseN, AmdK8IorrBaseN, UINT64_C(0xffff000000000fe7)), /* value=0x3`40200000 */ + MFW(0xc0010017, "AMD_K8_IORR_MASK_0", AmdK8IorrMaskN, AmdK8IorrMaskN, UINT64_C(0xffff0000000007ff)), /* value=0x0 */ + MFX(0xc0010018, "AMD_K8_IORR_BASE_1", AmdK8IorrBaseN, AmdK8IorrBaseN, 0x1, UINT64_C(0xffff000000000fe7), 0), /* value=0x0 */ + MFX(0xc0010019, "AMD_K8_IORR_MASK_1", AmdK8IorrMaskN, AmdK8IorrMaskN, 0x1, UINT64_C(0xffff0000000007ff), 0), /* value=0x0 */ + MFW(0xc001001a, "AMD_K8_TOP_MEM", AmdK8TopOfMemN, AmdK8TopOfMemN, UINT64_C(0xffff0000007fffff)), /* value=0xc0000000 */ + MFX(0xc001001d, "AMD_K8_TOP_MEM2", AmdK8TopOfMemN, AmdK8TopOfMemN, 0x1, UINT64_C(0xffff0000007fffff), 0), /* value=0x4`40000000 */ + MFN(0xc001001f, "AMD_K8_NB_CFG1", AmdK8NbCfg1, AmdK8NbCfg1), /* value=0x584000`00000008 */ + MFN(0xc0010020, "AMD_K8_PATCH_LOADER", WriteOnly, AmdK8PatchLoader), + MFN(0xc0010021, "AMD_10H_UNK_c001_0021", WriteOnly, IgnoreWrite), + MFX(0xc0010022, "AMD_K8_MC_XCPT_REDIR", AmdK8McXcptRedir, AmdK8McXcptRedir, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + RFN(0xc0010030, 0xc0010035, "AMD_K8_CPU_NAME_n", AmdK8CpuNameN, AmdK8CpuNameN), + MFX(0xc001003e, "AMD_K8_HTC", AmdK8HwThermalCtrl, AmdK8HwThermalCtrl, 0x4a4c0005, UINT64_C(0xffffffffb0008838), 0), /* value=0x4a4c0005 */ + MFX(0xc001003f, "AMD_K8_STC", AmdK8SwThermalCtrl, AmdK8SwThermalCtrl, 0, UINT64_C(0xffffffffc00088c0), 0), /* value=0x10000000 */ + MVO(0xc0010043, "AMD_K8_THERMTRIP_STATUS", 0x1dc01430), + MFX(0xc0010044, "AMD_K8_MC_CTL_MASK_0", AmdK8McCtlMaskN, AmdK8McCtlMaskN, 0x0, UINT64_C(0xffffffffffffff00), 0), /* value=0x80 */ + MFX(0xc0010045, "AMD_K8_MC_CTL_MASK_1", AmdK8McCtlMaskN, AmdK8McCtlMaskN, 0x1, ~(uint64_t)UINT32_MAX, 0), /* value=0x80 */ + MFX(0xc0010046, "AMD_K8_MC_CTL_MASK_2", AmdK8McCtlMaskN, AmdK8McCtlMaskN, 0x2, UINT64_C(0xfffffffffffff000), 0), /* value=0x200 */ + MFX(0xc0010047, "AMD_K8_MC_CTL_MASK_3", AmdK8McCtlMaskN, AmdK8McCtlMaskN, 0x3, UINT64_C(0xfffffffffffffffc), 0), /* value=0x0 */ + MFX(0xc0010048, "AMD_K8_MC_CTL_MASK_4", AmdK8McCtlMaskN, AmdK8McCtlMaskN, 0x4, UINT64_C(0xffffffffc0000000), 0), /* value=0x780400 */ + MFX(0xc0010049, "AMD_K8_MC_CTL_MASK_5", AmdK8McCtlMaskN, AmdK8McCtlMaskN, 0x5, UINT64_C(0xfffffffffffffffe), 0), /* value=0x0 */ + RFN(0xc0010050, 0xc0010053, "AMD_K8_SMI_ON_IO_TRAP_n", AmdK8SmiOnIoTrapN, AmdK8SmiOnIoTrapN), + MFX(0xc0010054, "AMD_K8_SMI_ON_IO_TRAP_CTL_STS", AmdK8SmiOnIoTrapCtlSts, AmdK8SmiOnIoTrapCtlSts, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0xc0010055, "AMD_K8_INT_PENDING_MSG", AmdK8IntPendingMessage, AmdK8IntPendingMessage, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x14000815 */ + MFX(0xc0010056, "AMD_K8_SMI_TRIGGER_IO_CYCLE", AmdK8SmiTriggerIoCycle, AmdK8SmiTriggerIoCycle, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x2000000 */ + MFX(0xc0010058, "AMD_10H_MMIO_CFG_BASE_ADDR", AmdFam10hMmioCfgBaseAddr, AmdFam10hMmioCfgBaseAddr, 0, UINT64_C(0xffff0000000fffc0), 0), /* value=0xe0000021 */ + MFX(0xc0010059, "AMD_10H_TRAP_CTL?", AmdFam10hTrapCtlMaybe, AmdFam10hTrapCtlMaybe, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MVX(0xc001005a, "AMD_10H_UNK_c001_005a", 0, 0, 0), + MVX(0xc001005b, "AMD_10H_UNK_c001_005b", 0, 0, 0), + MVX(0xc001005c, "AMD_10H_UNK_c001_005c", 0, 0, 0), + MVX(0xc001005d, "AMD_10H_UNK_c001_005d", 0, 0, 0), + MVO(0xc0010060, "AMD_K8_BIST_RESULT", 0), + MFX(0xc0010061, "AMD_10H_P_ST_CUR_LIM", AmdFam10hPStateCurLimit, ReadOnly, 0x30, 0, 0), /* value=0x30 */ + MFX(0xc0010062, "AMD_10H_P_ST_CTL", AmdFam10hPStateControl, AmdFam10hPStateControl, 0x3, 0, UINT64_C(0xfffffffffffffff8)), /* value=0x3 */ + MFX(0xc0010063, "AMD_10H_P_ST_STS", AmdFam10hPStateStatus, ReadOnly, 0x3, 0, 0), /* value=0x3 */ + MFX(0xc0010064, "AMD_10H_P_ST_0", AmdFam10hPStateN, AmdFam10hPStateN, UINT64_C(0x8000019e40001015), 0, 0), /* value=0x8000019e`40001015 */ + MFX(0xc0010065, "AMD_10H_P_ST_1", AmdFam10hPStateN, AmdFam10hPStateN, UINT64_C(0x8000019f40002411), 0, 0), /* value=0x8000019f`40002411 */ + MFX(0xc0010066, "AMD_10H_P_ST_2", AmdFam10hPStateN, AmdFam10hPStateN, UINT64_C(0x8000017540002809), 0, 0), /* value=0x80000175`40002809 */ + MFX(0xc0010067, "AMD_10H_P_ST_3", AmdFam10hPStateN, AmdFam10hPStateN, UINT64_C(0x8000015540002c01), 0, 0), /* value=0x80000155`40002c01 */ + MFX(0xc0010068, "AMD_10H_P_ST_4", AmdFam10hPStateN, AmdFam10hPStateN, UINT64_C(0x8000013340003840), 0, 0), /* value=0x80000133`40003840 */ + MFX(0xc0010070, "AMD_10H_COFVID_CTL", AmdFam10hCofVidControl, AmdFam10hCofVidControl, 0x40043840, UINT64_C(0xffffffff01b80000), 0), /* value=0x40043840 */ + MFX(0xc0010071, "AMD_10H_COFVID_STS", AmdFam10hCofVidStatus, AmdFam10hCofVidStatus, UINT64_C(0x140043840), UINT64_MAX, 0), /* value=0x1`40043840 */ + MFO(0xc0010073, "AMD_10H_C_ST_IO_BASE_ADDR", AmdFam10hCStateIoBaseAddr), /* value=0x814 */ + MFX(0xc0010074, "AMD_10H_CPU_WD_TMR_CFG", AmdFam10hCpuWatchdogTimer, AmdFam10hCpuWatchdogTimer, 0, UINT64_C(0xffffffffffffff80), 0), /* value=0x0 */ + MFX(0xc0010111, "AMD_K8_SMM_BASE", AmdK8SmmBase, AmdK8SmmBase, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0xbdef8000 */ + MFX(0xc0010112, "AMD_K8_SMM_ADDR", AmdK8SmmAddr, AmdK8SmmAddr, 0, UINT64_C(0xffff00000001ffff), 0), /* value=0xbdf00000 */ + MFX(0xc0010113, "AMD_K8_SMM_MASK", AmdK8SmmMask, AmdK8SmmMask, 0, UINT64_C(0xffff0000000188c0), 0), /* value=0xffff`fff00003 */ + MFX(0xc0010114, "AMD_K8_VM_CR", AmdK8VmCr, AmdK8VmCr, 0, ~(uint64_t)UINT32_MAX, UINT32_C(0xffffffe0)), /* value=0x8 */ + MFX(0xc0010115, "AMD_K8_IGNNE", AmdK8IgnNe, AmdK8IgnNe, 0, ~(uint64_t)UINT32_MAX, UINT32_C(0xfffffffe)), /* value=0x0 */ + MFX(0xc0010117, "AMD_K8_VM_HSAVE_PA", AmdK8VmHSavePa, AmdK8VmHSavePa, 0, 0, UINT64_C(0xffff000000000fff)), /* value=0x0 */ + MFN(0xc0010118, "AMD_10H_VM_LOCK_KEY", AmdFam10hVmLockKey, AmdFam10hVmLockKey), /* value=0x0 */ + MFN(0xc0010119, "AMD_10H_SSM_LOCK_KEY", AmdFam10hSmmLockKey, AmdFam10hSmmLockKey), /* value=0x0 */ + MFX(0xc001011a, "AMD_10H_LOCAL_SMI_STS", AmdFam10hLocalSmiStatus, AmdFam10hLocalSmiStatus, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0xc0010140, "AMD_10H_OSVW_ID_LEN", AmdFam10hOsVisWrkIdLength, AmdFam10hOsVisWrkIdLength, 0x4, 0, 0), /* value=0x4 */ + MFX(0xc0010141, "AMD_10H_OSVW_STS", AmdFam10hOsVisWrkStatus, AmdFam10hOsVisWrkStatus, 0xe, 0, 0), /* value=0xe */ + MFX(0xc0011000, "AMD_K7_MCODE_CTL", AmdK7MicrocodeCtl, AmdK7MicrocodeCtl, 0, ~(uint64_t)UINT32_MAX, 0x4), /* value=0x0 */ + MFX(0xc0011001, "AMD_K7_APIC_CLUSTER_ID", AmdK7ClusterIdMaybe, AmdK7ClusterIdMaybe, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFN(0xc0011004, "AMD_K8_CPUID_CTL_STD01", AmdK8CpuIdCtlStd01hEdcx, AmdK8CpuIdCtlStd01hEdcx), /* value=0x802009`178bfbff */ + MFN(0xc0011005, "AMD_K8_CPUID_CTL_EXT01", AmdK8CpuIdCtlExt01hEdcx, AmdK8CpuIdCtlExt01hEdcx), /* value=0x837ff`efd3fbff */ + MFX(0xc0011006, "AMD_K7_DEBUG_STS?", AmdK7DebugStatusMaybe, AmdK7DebugStatusMaybe, 0, UINT64_C(0xffffffff00000080), 0), /* value=0x10 */ + MFN(0xc0011007, "AMD_K7_BH_TRACE_BASE?", AmdK7BHTraceBaseMaybe, AmdK7BHTraceBaseMaybe), /* value=0x0 */ + MFN(0xc0011008, "AMD_K7_BH_TRACE_PTR?", AmdK7BHTracePtrMaybe, AmdK7BHTracePtrMaybe), /* value=0x0 */ + MFN(0xc0011009, "AMD_K7_BH_TRACE_LIM?", AmdK7BHTraceLimitMaybe, AmdK7BHTraceLimitMaybe), /* value=0x0 */ + MFX(0xc001100a, "AMD_K7_HDT_CFG?", AmdK7HardwareDebugToolCfgMaybe, AmdK7HardwareDebugToolCfgMaybe, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0xc001100b, "AMD_K7_FAST_FLUSH_COUNT?", AmdK7FastFlushCountMaybe, AmdK7FastFlushCountMaybe, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x7c0 */ + MFX(0xc001100c, "AMD_K7_NODE_ID", AmdK7NodeId, AmdK7NodeId, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MVX(0xc001100d, "AMD_K8_LOGICAL_CPUS_NUM?", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0xc001100e, "AMD_K8_WRMSR_BP?", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0xc001100f, "AMD_K8_WRMSR_BP_MASK?", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0xc0011010, "AMD_K8_BH_TRACE_CTL?", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0xc0011011, "AMD_K8_BH_TRACE_USRD?", 0, 0, 0), /* value=0x259a5de0`ffffffff */ + MVX(0xc0011014, "AMD_K8_XCPT_BP_RIP?", 0, 0, 0), + MVX(0xc0011015, "AMD_K8_XCPT_BP_RIP_MASK?", 0, 0, 0), + MVX(0xc0011016, "AMD_K8_COND_HDT_VAL?", 0, 0, 0), + MVX(0xc0011017, "AMD_K8_COND_HDT_VAL_MASK?", 0, 0, 0), + MVX(0xc0011018, "AMD_K8_XCPT_BP_CTL?", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0xc001101d, "AMD_K8_NB_BIST?", 0, ~(uint64_t)UINT32_MAX, 0), + MVI(0xc001101e, "AMD_K8_THERMTRIP_2?", 0x1dc01430), /* Villain? */ + MVX(0xc001101f, "AMD_K8_NB_CFG?", UINT64_C(0x58400000000008), 0, 0), + MFX(0xc0011020, "AMD_K7_LS_CFG", AmdK7LoadStoreCfg, AmdK7LoadStoreCfg, 0, UINT64_C(0xfffc012000000000), 0), /* value=0x20010`00001000 */ + MFW(0xc0011021, "AMD_K7_IC_CFG", AmdK7InstrCacheCfg, AmdK7InstrCacheCfg, ~(uint64_t)UINT32_MAX), /* value=0x0 */ + MFX(0xc0011022, "AMD_K7_DC_CFG", AmdK7DataCacheCfg, AmdK7DataCacheCfg, 0, UINT64_C(0xffc0000000000000), 0), /* value=0x9c`49000000 */ + MFN(0xc0011023, "AMD_K7_BU_CFG", AmdK7BusUnitCfg, AmdK7BusUnitCfg), /* Villain? value=0x10200020 */ + MFX(0xc0011024, "AMD_K7_DEBUG_CTL_2?", AmdK7DebugCtl2Maybe, AmdK7DebugCtl2Maybe, 0, UINT64_C(0xffffffffffffff00), 0), /* value=0x0 */ + MFN(0xc0011025, "AMD_K7_DR0_DATA_MATCH?", AmdK7Dr0DataMatchMaybe, AmdK7Dr0DataMatchMaybe), /* value=0x0 */ + MFN(0xc0011026, "AMD_K7_DR0_DATA_MATCH?", AmdK7Dr0DataMaskMaybe, AmdK7Dr0DataMaskMaybe), /* value=0x0 */ + MFX(0xc0011027, "AMD_K7_DR0_ADDR_MASK", AmdK7DrXAddrMaskN, AmdK7DrXAddrMaskN, 0x0, UINT64_C(0xfffffffffffff000), 0), /* value=0x0 */ + MVX(0xc0011028, "AMD_10H_UNK_c001_1028", 0, UINT64_C(0xfffffffffffffff8), 0), + MVX(0xc0011029, "AMD_10H_UNK_c001_1029", 0, ~(uint64_t)UINT32_MAX, 0), + MFX(0xc001102a, "AMD_10H_BU_CFG2", AmdFam10hBusUnitCfg2, AmdFam10hBusUnitCfg2, 0, UINT64_C(0xfff00000c0000000), 0), /* value=0x40050`01000040 */ + MFX(0xc0011030, "AMD_10H_IBS_FETCH_CTL", AmdFam10hIbsFetchCtl, AmdFam10hIbsFetchCtl, 0, UINT64_C(0xfdfcffff00000000), 0), /* value=0x140003`00000000 */ + MFI(0xc0011031, "AMD_10H_IBS_FETCH_LIN_ADDR", AmdFam10hIbsFetchLinAddr), /* value=0xffffffff`a08cf13e */ + MFI(0xc0011032, "AMD_10H_IBS_FETCH_PHYS_ADDR", AmdFam10hIbsFetchPhysAddr), /* value=0x4`24ce313e */ + MFX(0xc0011033, "AMD_10H_IBS_OP_EXEC_CTL", AmdFam10hIbsOpExecCtl, AmdFam10hIbsOpExecCtl, 0, UINT64_C(0xfffffffffff00000), 0), /* value=0x0 */ + MFN(0xc0011034, "AMD_10H_IBS_OP_RIP", AmdFam10hIbsOpRip, AmdFam10hIbsOpRip), /* value=0x4d231923 */ + MFI(0xc0011035, "AMD_10H_IBS_OP_DATA", AmdFam10hIbsOpData), /* value=0x12`7fc7bc0e */ + MFX(0xc0011036, "AMD_10H_IBS_OP_DATA2", AmdFam10hIbsOpData2, AmdFam10hIbsOpData2, 0, UINT64_C(0xffffffffffffffc8), 0), /* value=0x0 */ + MFI(0xc0011037, "AMD_10H_IBS_OP_DATA3", AmdFam10hIbsOpData3), /* value=0x0 */ + MFX(0xc0011038, "AMD_10H_IBS_DC_LIN_ADDR", AmdFam10hIbsDcLinAddr, AmdFam10hIbsDcLinAddr, 0, UINT64_C(0x7fffffffffff), 0), /* value=0x0 */ + MFI(0xc0011039, "AMD_10H_IBS_DC_PHYS_ADDR", AmdFam10hIbsDcPhysAddr), /* value=0x0 */ + MFO(0xc001103a, "AMD_10H_IBS_CTL", AmdFam10hIbsCtl), /* value=0x101 */ +}; +#endif /* !CPUM_DB_STANDALONE */ + + +/** + * Database entry for AMD Phenom(tm) II X6 1100T Processor. + */ +static CPUMDBENTRY const g_Entry_AMD_Phenom_II_X6_1100T = +{ + /*.pszName = */ "AMD Phenom II X6 1100T", + /*.pszFullName = */ "AMD Phenom(tm) II X6 1100T Processor", + /*.enmVendor = */ CPUMCPUVENDOR_AMD, + /*.uFamily = */ 16, + /*.uModel = */ 10, + /*.uStepping = */ 0, + /*.enmMicroarch = */ kCpumMicroarch_AMD_K10, + /*.uScalableBusFreq = */ CPUM_SBUSFREQ_UNKNOWN, + /*.fFlags = */ 0, + /*.cMaxPhysAddrWidth= */ 48, + /*.fMxCsrMask = */ 0x2ffff, + /*.paCpuIdLeaves = */ NULL_ALONE(g_aCpuIdLeaves_AMD_Phenom_II_X6_1100T), + /*.cCpuIdLeaves = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_AMD_Phenom_II_X6_1100T)), + /*.enmUnknownCpuId = */ CPUMUNKNOWNCPUID_DEFAULTS, + /*.DefUnknownCpuId = */ { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + /*.fMsrMask = */ UINT32_MAX, + /*.cMsrRanges = */ ZERO_ALONE(RT_ELEMENTS(g_aMsrRanges_AMD_Phenom_II_X6_1100T)), + /*.paMsrRanges = */ NULL_ALONE(g_aMsrRanges_AMD_Phenom_II_X6_1100T), +}; + +#endif /* !VBOX_CPUDB_AMD_Phenom_II_X6_1100T_h */ + diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_80186.h b/src/VBox/VMM/VMMR3/cpus/Intel_80186.h new file mode 100644 index 00000000..48551323 --- /dev/null +++ b/src/VBox/VMM/VMMR3/cpus/Intel_80186.h @@ -0,0 +1,75 @@ +/* $Id: Intel_80186.h $ */ +/** @file + * CPU database entry "Intel 80186". + * Handcrafted. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VBOX_CPUDB_Intel_80186_h +#define VBOX_CPUDB_Intel_80186_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#ifndef CPUM_DB_STANDALONE +/** + * Fake CPUID leaves for Intel(R) 80186. + * + * We fake these to keep the CPUM ignorant of CPUs wihtout CPUID leaves + * and avoid having to seed CPUM::GuestFeatures filling with bits from the + * CPUMDBENTRY. + */ +static CPUMCPUIDLEAF const g_aCpuIdLeaves_Intel_80186[] = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x756e6547, 0x6c65746e, 0x49656e69, 0 }, + { 0x00000001, 0x00000000, 0x00000000, 0x00000100, 0x00000100, 0x00000000, 0x00000000, 0 }, + { 0x80000000, 0x00000000, 0x00000000, 0x80000008, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000002, 0x00000000, 0x00000000, 0x65746e49, 0x2952286c, 0x31303820, 0x20203638, 0 }, + { 0x80000003, 0x00000000, 0x00000000, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0 }, + { 0x80000004, 0x00000000, 0x00000000, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0 }, + { 0x80000005, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000008, 0x00000000, 0x00000000, 0x00001414, 0x00000000, 0x00000000, 0x00000000, 0 }, +}; +#endif /* !CPUM_DB_STANDALONE */ + +/** + * Database entry for Intel(R) 80186. + */ +static CPUMDBENTRY const g_Entry_Intel_80186 = +{ + /*.pszName = */ "Intel 80186", + /*.pszFullName = */ "Intel(R) 80186", + /*.enmVendor = */ CPUMCPUVENDOR_INTEL, + /*.uFamily = */ 2, + /*.uModel = */ 0, + /*.uStepping = */ 0, + /*.enmMicroarch = */ kCpumMicroarch_Intel_80186, + /*.uScalableBusFreq = */ CPUM_SBUSFREQ_UNKNOWN, + /*.fFlags = */ CPUDB_F_EXECUTE_ALL_IN_IEM, + /*.cMaxPhysAddrWidth= */ 20, + /*.fMxCsrMask = */ 0, + /*.paCpuIdLeaves = */ NULL_ALONE(g_aCpuIdLeaves_Intel_80186), + /*.cCpuIdLeaves = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_80186)), + /*.enmUnknownCpuId = */ CPUMUNKNOWNCPUID_DEFAULTS, + /*.DefUnknownCpuId = */ { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + /*.fMsrMask = */ 0, + /*.cMsrRanges = */ 0, + /*.paMsrRanges = */ NULL, +}; + +#endif /* !VBOX_CPUDB_Intel_80186_h */ + diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_80286.h b/src/VBox/VMM/VMMR3/cpus/Intel_80286.h new file mode 100644 index 00000000..409b82a9 --- /dev/null +++ b/src/VBox/VMM/VMMR3/cpus/Intel_80286.h @@ -0,0 +1,75 @@ +/* $Id: Intel_80286.h $ */ +/** @file + * CPU database entry "Intel 80286". + * Handcrafted. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VBOX_CPUDB_Intel_80286_h +#define VBOX_CPUDB_Intel_80286_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#ifndef CPUM_DB_STANDALONE +/** + * Fake CPUID leaves for Intel(R) 80286. + * + * We fake these to keep the CPUM ignorant of CPUs wihtout CPUID leaves + * and avoid having to seed CPUM::GuestFeatures filling with bits from the + * CPUMDBENTRY. + */ +static CPUMCPUIDLEAF const g_aCpuIdLeaves_Intel_80286[] = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x756e6547, 0x6c65746e, 0x49656e69, 0 }, + { 0x00000001, 0x00000000, 0x00000000, 0x00000200, 0x00000100, 0x00000000, 0x00000000, 0 }, + { 0x80000000, 0x00000000, 0x00000000, 0x80000008, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000002, 0x00000000, 0x00000000, 0x65746e49, 0x2952286c, 0x32303820, 0x20203638, 0 }, + { 0x80000003, 0x00000000, 0x00000000, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0 }, + { 0x80000004, 0x00000000, 0x00000000, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0 }, + { 0x80000005, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000008, 0x00000000, 0x00000000, 0x00001818, 0x00000000, 0x00000000, 0x00000000, 0 }, +}; +#endif /* !CPUM_DB_STANDALONE */ + +/** + * Database entry for Intel(R) 80286. + */ +static CPUMDBENTRY const g_Entry_Intel_80286 = +{ + /*.pszName = */ "Intel 80286", + /*.pszFullName = */ "Intel(R) 80286", + /*.enmVendor = */ CPUMCPUVENDOR_INTEL, + /*.uFamily = */ 2, + /*.uModel = */ 0, + /*.uStepping = */ 0, + /*.enmMicroarch = */ kCpumMicroarch_Intel_80286, + /*.uScalableBusFreq = */ CPUM_SBUSFREQ_UNKNOWN, + /*.fFlags = */ CPUDB_F_EXECUTE_ALL_IN_IEM, + /*.cMaxPhysAddrWidth= */ 24, + /*.fMxCsrMask = */ 0, + /*.paCpuIdLeaves = */ NULL_ALONE(g_aCpuIdLeaves_Intel_80286), + /*.cCpuIdLeaves = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_80286)), + /*.enmUnknownCpuId = */ CPUMUNKNOWNCPUID_DEFAULTS, + /*.DefUnknownCpuId = */ { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + /*.fMsrMask = */ 0, + /*.cMsrRanges = */ 0, + /*.paMsrRanges = */ NULL, +}; + +#endif /* !VBOX_CPUDB_Intel_80286_h */ + diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_80386.h b/src/VBox/VMM/VMMR3/cpus/Intel_80386.h new file mode 100644 index 00000000..9a931cbe --- /dev/null +++ b/src/VBox/VMM/VMMR3/cpus/Intel_80386.h @@ -0,0 +1,75 @@ +/* $Id: Intel_80386.h $ */ +/** @file + * CPU database entry "Intel 80386". + * Handcrafted. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VBOX_CPUDB_Intel_80386_h +#define VBOX_CPUDB_Intel_80386_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#ifndef CPUM_DB_STANDALONE +/** + * Fake CPUID leaves for Intel(R) 80386. + * + * We fake these to keep the CPUM ignorant of CPUs withou CPUID leaves + * and avoid having to seed CPUM::GuestFeatures filling with bits from the + * CPUMDBENTRY. + */ +static CPUMCPUIDLEAF const g_aCpuIdLeaves_Intel_80386[] = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x756e6547, 0x6c65746e, 0x49656e69, 0 }, + { 0x00000001, 0x00000000, 0x00000000, 0x00000300, 0x00000100, 0x00000000, 0x00000000, 0 }, + { 0x80000000, 0x00000000, 0x00000000, 0x80000008, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000002, 0x00000000, 0x00000000, 0x65746e49, 0x2952286c, 0x33303820, 0x20203638, 0 }, + { 0x80000003, 0x00000000, 0x00000000, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0 }, + { 0x80000004, 0x00000000, 0x00000000, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0 }, + { 0x80000005, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000008, 0x00000000, 0x00000000, 0x00001818, 0x00000000, 0x00000000, 0x00000000, 0 }, +}; +#endif /* !CPUM_DB_STANDALONE */ + +/** + * Database entry for Intel(R) 80386. + */ +static CPUMDBENTRY const g_Entry_Intel_80386 = +{ + /*.pszName = */ "Intel 80386", + /*.pszFullName = */ "Intel(R) 80386", + /*.enmVendor = */ CPUMCPUVENDOR_INTEL, + /*.uFamily = */ 3, + /*.uModel = */ 0, + /*.uStepping = */ 0, + /*.enmMicroarch = */ kCpumMicroarch_Intel_80386, + /*.uScalableBusFreq = */ CPUM_SBUSFREQ_UNKNOWN, + /*.fFlags = */ CPUDB_F_EXECUTE_ALL_IN_IEM, + /*.cMaxPhysAddrWidth= */ 24, + /*.fMxCsrMask = */ 0, + /*.paCpuIdLeaves = */ NULL_ALONE(g_aCpuIdLeaves_Intel_80386), + /*.cCpuIdLeaves = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_80386)), + /*.enmUnknownCpuId = */ CPUMUNKNOWNCPUID_DEFAULTS, + /*.DefUnknownCpuId = */ { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + /*.fMsrMask = */ 0, + /*.cMsrRanges = */ 0, + /*.paMsrRanges = */ NULL, +}; + +#endif /* !VBOX_CPUDB_Intel_80386_h */ + diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_80486.h b/src/VBox/VMM/VMMR3/cpus/Intel_80486.h new file mode 100644 index 00000000..dcb2a3b4 --- /dev/null +++ b/src/VBox/VMM/VMMR3/cpus/Intel_80486.h @@ -0,0 +1,73 @@ +/* $Id: Intel_80486.h $ */ +/** @file + * CPU database entry "Intel 80486". + * Handcrafted. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VBOX_CPUDB_Intel_80486_h +#define VBOX_CPUDB_Intel_80486_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#ifndef CPUM_DB_STANDALONE +/** + * Fake CPUID leaves for Intel(R) 80486(DX2). + * + * The extended leaves are fake to make CPUM happy. + */ +static CPUMCPUIDLEAF const g_aCpuIdLeaves_Intel_80486[] = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x756e6547, 0x6c65746e, 0x49656e69, 0 }, + { 0x00000001, 0x00000000, 0x00000000, 0x00000430, 0x00000100, 0x00000000, 0x00000111, 0 }, + { 0x80000000, 0x00000000, 0x00000000, 0x80000008, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000002, 0x00000000, 0x00000000, 0x65746e49, 0x2952286c, 0x34303820, 0x58443638, 0 }, + { 0x80000003, 0x00000000, 0x00000000, 0x20202032, 0x20202020, 0x20202020, 0x20202020, 0 }, + { 0x80000004, 0x00000000, 0x00000000, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0 }, + { 0x80000005, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000008, 0x00000000, 0x00000000, 0x00002020, 0x00000000, 0x00000000, 0x00000000, 0 }, +}; +#endif /* !CPUM_DB_STANDALONE */ + +/** + * Database entry for Intel(R) 80486. + */ +static CPUMDBENTRY const g_Entry_Intel_80486 = +{ + /*.pszName = */ "Intel 80486", + /*.pszFullName = */ "Intel(R) 80486DX2", + /*.enmVendor = */ CPUMCPUVENDOR_INTEL, + /*.uFamily = */ 4, + /*.uModel = */ 3, + /*.uStepping = */ 0, + /*.enmMicroarch = */ kCpumMicroarch_Intel_80486, + /*.uScalableBusFreq = */ CPUM_SBUSFREQ_UNKNOWN, + /*.fFlags = */ 0, + /*.cMaxPhysAddrWidth= */ 32, + /*.fMxCsrMask = */ 0, + /*.paCpuIdLeaves = */ NULL_ALONE(g_aCpuIdLeaves_Intel_80486), + /*.cCpuIdLeaves = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_80486)), + /*.enmUnknownCpuId = */ CPUMUNKNOWNCPUID_DEFAULTS, + /*.DefUnknownCpuId = */ { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + /*.fMsrMask = */ 0, + /*.cMsrRanges = */ 0, + /*.paMsrRanges = */ NULL, +}; + +#endif /* !VBOX_CPUDB_Intel_80486_h */ + diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_8086.h b/src/VBox/VMM/VMMR3/cpus/Intel_8086.h new file mode 100644 index 00000000..c9af86ee --- /dev/null +++ b/src/VBox/VMM/VMMR3/cpus/Intel_8086.h @@ -0,0 +1,75 @@ +/* $Id: Intel_8086.h $ */ +/** @file + * CPU database entry "Intel 8086". + * Handcrafted. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VBOX_CPUDB_Intel_8086_h +#define VBOX_CPUDB_Intel_8086_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#ifndef CPUM_DB_STANDALONE +/** + * Fake CPUID leaves for Intel(R) 8086. + * + * We fake these to keep the CPUM ignorant of CPUs wihtout CPUID leaves + * and avoid having to seed CPUM::GuestFeatures filling with bits from the + * CPUMDBENTRY. + */ +static CPUMCPUIDLEAF const g_aCpuIdLeaves_Intel_8086[] = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x756e6547, 0x6c65746e, 0x49656e69, 0 }, + { 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000100, 0x00000000, 0x00000000, 0 }, + { 0x80000000, 0x00000000, 0x00000000, 0x80000008, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000002, 0x00000000, 0x00000000, 0x65746e49, 0x2952286c, 0x38303820, 0x20202036, 0 }, + { 0x80000003, 0x00000000, 0x00000000, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0 }, + { 0x80000004, 0x00000000, 0x00000000, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0 }, + { 0x80000005, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000008, 0x00000000, 0x00000000, 0x00001414, 0x00000000, 0x00000000, 0x00000000, 0 }, +}; +#endif /* !CPUM_DB_STANDALONE */ + +/** + * Database entry for Intel(R) 8086. + */ +static CPUMDBENTRY const g_Entry_Intel_8086 = +{ + /*.pszName = */ "Intel 8086", + /*.pszFullName = */ "Intel(R) 8086", + /*.enmVendor = */ CPUMCPUVENDOR_INTEL, + /*.uFamily = */ 2, + /*.uModel = */ 0, + /*.uStepping = */ 0, + /*.enmMicroarch = */ kCpumMicroarch_Intel_8086, + /*.uScalableBusFreq = */ CPUM_SBUSFREQ_UNKNOWN, + /*.fFlags = */ CPUDB_F_EXECUTE_ALL_IN_IEM, + /*.cMaxPhysAddrWidth= */ 20, + /*.fMxCsrMask = */ 0, + /*.paCpuIdLeaves = */ NULL_ALONE(g_aCpuIdLeaves_Intel_8086), + /*.cCpuIdLeaves = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_8086)), + /*.enmUnknownCpuId = */ CPUMUNKNOWNCPUID_DEFAULTS, + /*.DefUnknownCpuId = */ { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + /*.fMsrMask = */ 0, + /*.cMsrRanges = */ 0, + /*.paMsrRanges = */ NULL, +}; + +#endif /* !VBOX_CPUDB_Intel_8086_h */ + diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_Atom_330_1_60GHz.h b/src/VBox/VMM/VMMR3/cpus/Intel_Atom_330_1_60GHz.h new file mode 100644 index 00000000..7766b7c6 --- /dev/null +++ b/src/VBox/VMM/VMMR3/cpus/Intel_Atom_330_1_60GHz.h @@ -0,0 +1,210 @@ +/* $Id: Intel_Atom_330_1_60GHz.h $ */ +/** @file + * CPU database entry "Intel Atom 330 1.60GHz". + * Generated at 2015-11-04T12:58:59Z by VBoxCpuReport v5.0.51r103818 on linux.amd64. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VBOX_CPUDB_Intel_Atom_330_1_60GHz_h +#define VBOX_CPUDB_Intel_Atom_330_1_60GHz_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + + +#ifndef CPUM_DB_STANDALONE +/** + * CPUID leaves for Intel(R) Atom(TM) CPU 330 @ 1.60GHz. + */ +static CPUMCPUIDLEAF const g_aCpuIdLeaves_Intel_Atom_330_1_60GHz[] = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69, 0 }, + { 0x00000001, 0x00000000, 0x00000000, 0x000106c2, 0x01040800, 0x0040e31d, 0xbfe9fbff, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID | CPUMCPUIDLEAF_F_CONTAINS_APIC }, + { 0x00000002, 0x00000000, 0x00000000, 0x4fba5901, 0x0e3080c0, 0x00000000, 0x00000000, 0 }, + { 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000004, 0x00000000, UINT32_MAX, 0x04004121, 0x0140003f, 0x0000003f, 0x00000001, 0 }, + { 0x00000004, 0x00000001, UINT32_MAX, 0x04004122, 0x01c0003f, 0x0000003f, 0x00000001, 0 }, + { 0x00000004, 0x00000002, UINT32_MAX, 0x04004143, 0x01c0003f, 0x000003ff, 0x00000001, 0 }, + { 0x00000004, 0x00000003, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000005, 0x00000000, 0x00000000, 0x00000040, 0x00000040, 0x00000003, 0x00000010, 0 }, + { 0x00000006, 0x00000000, 0x00000000, 0x00000001, 0x00000002, 0x00000001, 0x00000000, 0 }, + { 0x00000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000009, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000a, 0x00000000, 0x00000000, 0x07280203, 0x00000000, 0x00000000, 0x00002501, 0 }, + { 0x80000000, 0x00000000, 0x00000000, 0x80000008, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x20100800, 0 }, + { 0x80000002, 0x00000000, 0x00000000, 0x20202020, 0x20202020, 0x746e4920, 0x52286c65, 0 }, + { 0x80000003, 0x00000000, 0x00000000, 0x74412029, 0x54286d6f, 0x4320294d, 0x20205550, 0 }, + { 0x80000004, 0x00000000, 0x00000000, 0x20303333, 0x20402020, 0x30362e31, 0x007a4847, 0 }, + { 0x80000005, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02008040, 0x00000000, 0 }, + { 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000008, 0x00000000, 0x00000000, 0x00003020, 0x00000000, 0x00000000, 0x00000000, 0 }, +}; +#endif /* !CPUM_DB_STANDALONE */ + + +#ifndef CPUM_DB_STANDALONE +/** + * MSR ranges for Intel(R) Atom(TM) CPU 330 @ 1.60GHz. + */ +static CPUMMSRRANGE const g_aMsrRanges_Intel_Atom_330_1_60GHz[] = +{ + MFI(0x00000000, "IA32_P5_MC_ADDR", Ia32P5McAddr), /* value=0x0 */ + MFX(0x00000001, "IA32_P5_MC_TYPE", Ia32P5McType, Ia32P5McType, 0, 0, UINT64_MAX), /* value=0x0 */ + MFX(0x00000006, "IA32_MONITOR_FILTER_LINE_SIZE", Ia32MonitorFilterLineSize, Ia32MonitorFilterLineSize, 0, 0, UINT64_C(0xffffffffffff0000)), /* value=0x40 */ + MFN(0x00000010, "IA32_TIME_STAMP_COUNTER", Ia32TimestampCounter, Ia32TimestampCounter), /* value=0x5a7`e94bd2c0 */ + MFX(0x00000017, "IA32_PLATFORM_ID", Ia32PlatformId, ReadOnly, UINT64_C(0xc00008836ac1b), 0, 0), /* value=0xc0000`8836ac1b */ + MFX(0x0000001b, "IA32_APIC_BASE", Ia32ApicBase, Ia32ApicBase, UINT32_C(0xfee00800), 0, UINT64_C(0xffffffff000006ff)), + MVX(0x00000033, "TEST_CTL", 0, 0, UINT64_C(0xffffffff7fffffff)), + MVO(0x00000039, "C2_UNK_0000_0039", 0x1), + MFO(0x0000003a, "IA32_FEATURE_CONTROL", Ia32FeatureControl), /* value=0x1 */ + MVO(0x0000003f, "P6_UNK_0000_003f", 0), + RFN(0x00000040, 0x00000047, "MSR_LASTBRANCH_n_FROM_IP", IntelLastBranchToN, IntelLastBranchToN), + RFN(0x00000060, 0x00000067, "MSR_LASTBRANCH_n_TO_IP", IntelLastBranchFromN, IntelLastBranchFromN), + MFN(0x00000079, "IA32_BIOS_UPDT_TRIG", WriteOnly, IgnoreWrite), + MFX(0x0000008b, "BBL_CR_D3|BIOS_SIGN", Ia32BiosSignId, Ia32BiosSignId, 0, 0, UINT32_MAX), /* value=0x20d`00000000 */ + RSN(0x000000c1, 0x000000c2, "IA32_PMCn", Ia32PmcN, Ia32PmcN, 0x0, ~(uint64_t)UINT32_MAX, 0), + MFX(0x000000c7, "IA32_PMC6", Ia32PmcN, Ia32PmcN, 0, UINT64_C(0xfff7bdefff7df7df), 0), /* value=0x16101c00`00000000 */ + MFX(0x000000cd, "MSR_FSB_FREQ", IntelP6FsbFrequency, ReadOnly, 0x101, 0, 0), /* value=0x101 */ + MVO(0x000000ce, "IA32_PLATFORM_INFO", UINT64_C(0x1b1b0c004e4e0000)), + MVO(0x000000cf, "C2_UNK_0000_00cf", 0x1f), + MVO(0x000000e0, "C2_UNK_0000_00e0", 0x6800f0), + MVO(0x000000e1, "C2_UNK_0000_00e1", UINT32_C(0xf0f00000)), + MFX(0x000000e2, "MSR_PKG_CST_CONFIG_CONTROL", IntelPkgCStConfigControl, IntelPkgCStConfigControl, 0, 0xbfff, UINT64_C(0xfffffffffc804000)), /* value=0x26b001 */ + MFX(0x000000e3, "C2_SMM_CST_MISC_INFO", IntelCore2SmmCStMiscInfo, IntelCore2SmmCStMiscInfo, 0, 0, ~(uint64_t)UINT32_MAX), /* value=0x0 */ + MFX(0x000000e4, "MSR_PMG_IO_CAPTURE_BASE", IntelPmgIoCaptureBase, IntelPmgIoCaptureBase, 0, 0, UINT64_C(0xffffffffff800000)), /* value=0x0 */ + MVO(0x000000e5, "C2_UNK_0000_00e5", UINT32_C(0xd00a00f8)), + MFN(0x000000e7, "IA32_MPERF", Ia32MPerf, Ia32MPerf), /* value=0x63`19743600 */ + MFN(0x000000e8, "IA32_APERF", Ia32APerf, Ia32APerf), /* value=0x63`199424b8 */ + MFX(0x000000ee, "C1_EXT_CONFIG", IntelCore1ExtConfig, IntelCore1ExtConfig, 0, UINT64_C(0xff7bdeffffc5ffff), 0), /* value=0x3384103 */ + MFX(0x000000fe, "IA32_MTRRCAP", Ia32MtrrCap, ReadOnly, 0x508, 0, 0), /* value=0x508 */ + MVX(0x00000116, "BBL_CR_ADDR", 0x3fc0, UINT64_C(0xfffffff00000001f), 0), + MVX(0x00000118, "BBL_CR_DECC", 0, UINT64_C(0xfffc0000fffc0000), 0), + MFX(0x00000119, "BBL_CR_CTL", IntelBblCrCtl, IntelBblCrCtl, 0x938008, 0x4080017f, ~(uint64_t)UINT32_MAX), /* value=0x938008 */ + MFN(0x0000011a, "BBL_CR_TRIG", WriteOnly, IgnoreWrite), + MVX(0x0000011b, "P6_UNK_0000_011b", 0, 0x1, UINT64_C(0xfffffffffffffffe)), + MVX(0x0000011c, "C2_UNK_0000_011c", 0xd96000, 0, UINT64_C(0xfffffffff0000000)), + MFX(0x0000011e, "BBL_CR_CTL3", IntelBblCrCtl3, IntelBblCrCtl3, 0x7f00011f, UINT32_C(0xff83f81f), UINT64_C(0xffffffff007c06e0)), /* value=0x7f00011f */ + MFX(0x00000174, "IA32_SYSENTER_CS", Ia32SysEnterCs, Ia32SysEnterCs, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x10 */ + MFN(0x00000175, "IA32_SYSENTER_ESP", Ia32SysEnterEsp, Ia32SysEnterEsp), /* value=0x0 */ + MFN(0x00000176, "IA32_SYSENTER_EIP", Ia32SysEnterEip, Ia32SysEnterEip), /* value=0xffffffff`81573970 */ + MFX(0x00000179, "IA32_MCG_CAP", Ia32McgCap, ReadOnly, 0x805, 0, 0), /* value=0x805 */ + MFX(0x0000017a, "IA32_MCG_STATUS", Ia32McgStatus, Ia32McgStatus, 0, 0, UINT64_MAX), /* value=0x0 */ + RSN(0x00000186, 0x00000187, "IA32_PERFEVTSELn", Ia32PerfEvtSelN, Ia32PerfEvtSelN, 0x0, 0, ~(uint64_t)UINT32_MAX), + MFX(0x00000194, "CLOCK_FLEX_MAX", IntelFlexRatio, IntelFlexRatio, 0, UINT32_C(0xfffee0c0), ~(uint64_t)UINT32_MAX), /* value=0x0 */ + MFX(0x00000198, "IA32_PERF_STATUS", Ia32PerfStatus, ReadOnly, UINT64_C(0xc1b0c1b06000c1b), 0, 0), /* value=0xc1b0c1b`06000c1b */ + MFX(0x00000199, "IA32_PERF_CTL", Ia32PerfCtl, Ia32PerfCtl, 0xc1b, 0, 0), /* Might bite. value=0xc1b */ + MFX(0x0000019a, "IA32_CLOCK_MODULATION", Ia32ClockModulation, Ia32ClockModulation, 0x2, 0, UINT64_C(0xffffffffffffffe1)), /* value=0x2 */ + MFX(0x0000019b, "IA32_THERM_INTERRUPT", Ia32ThermInterrupt, Ia32ThermInterrupt, 0x3, 0, UINT64_C(0xffffffffff0000e0)), /* value=0x3 */ + MFX(0x0000019c, "IA32_THERM_STATUS", Ia32ThermStatus, Ia32ThermStatus, UINT32_C(0x884c0000), UINT32_C(0xf87f03ff), UINT64_C(0xffffffff0780fc00)), /* value=0x884c0000 */ + MFX(0x0000019d, "IA32_THERM2_CTL", Ia32Therm2Ctl, ReadOnly, 0x61b, 0, 0), /* value=0x61b */ + MVX(0x0000019e, "P6_UNK_0000_019e", 0, UINT32_C(0xffff0000), ~(uint64_t)UINT32_MAX), + MFX(0x000001a0, "IA32_MISC_ENABLE", Ia32MiscEnable, Ia32MiscEnable, 0x60940488, UINT64_C(0x366131884), UINT64_C(0xfffffff89908c372)), /* value=0x60940488 */ + MVX(0x000001aa, "P6_PIC_SENS_CFG", UINT32_C(0x800f0421), UINT64_C(0xffffffffff80000e), 0), + MFX(0x000001c9, "MSR_LASTBRANCH_TOS", IntelLastBranchTos, IntelLastBranchTos, 0, 0, UINT64_C(0xfffffffffffffff8)), /* value=0x0 */ + MFX(0x000001d9, "IA32_DEBUGCTL", Ia32DebugCtl, Ia32DebugCtl, 0, 0, UINT64_C(0xffffffffffffe03c)), /* value=0x0 */ + MFO(0x000001db, "P6_LAST_BRANCH_FROM_IP", P6LastBranchFromIp), /* value=0xffffffff`a07ac16e */ + MFO(0x000001dc, "P6_LAST_BRANCH_TO_IP", P6LastBranchToIp), /* value=0xffffffff`8105c4f0 */ + MFN(0x000001dd, "P6_LAST_INT_FROM_IP", P6LastIntFromIp, P6LastIntFromIp), /* value=0x0 */ + MFN(0x000001de, "P6_LAST_INT_TO_IP", P6LastIntToIp, P6LastIntToIp), /* value=0x0 */ + MFX(0x00000200, "IA32_MTRR_PHYS_BASE0", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x0, 0, UINT64_C(0xffffffff00000ff8)), /* value=0xe0000000 */ + MFX(0x00000201, "IA32_MTRR_PHYS_MASK0", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x0, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xe0000800 */ + MFX(0x00000202, "IA32_MTRR_PHYS_BASE1", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x1, 0, UINT64_C(0xffffffff00000ff8)), /* value=0x6 */ + MFX(0x00000203, "IA32_MTRR_PHYS_MASK1", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x1, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x800 */ + MFX(0x00000204, "IA32_MTRR_PHYS_BASE2", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x2, 0, UINT64_C(0xffffffff00000ff8)), /* value=0x0 */ + MFX(0x00000205, "IA32_MTRR_PHYS_MASK2", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x2, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFX(0x00000206, "IA32_MTRR_PHYS_BASE3", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x3, 0, UINT64_C(0xffffffff00000ff8)), /* value=0x0 */ + MFX(0x00000207, "IA32_MTRR_PHYS_MASK3", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x3, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFX(0x00000208, "IA32_MTRR_PHYS_BASE4", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x4, 0, UINT64_C(0xffffffff00000ff8)), /* value=0x0 */ + MFX(0x00000209, "IA32_MTRR_PHYS_MASK4", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x4, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFX(0x0000020a, "IA32_MTRR_PHYS_BASE5", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x5, 0, UINT64_C(0xffffffff00000ff8)), /* value=0x0 */ + MFX(0x0000020b, "IA32_MTRR_PHYS_MASK5", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x5, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFX(0x0000020c, "IA32_MTRR_PHYS_BASE6", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x6, 0, UINT64_C(0xffffffff00000ff8)), /* value=0x0 */ + MFX(0x0000020d, "IA32_MTRR_PHYS_MASK6", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x6, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFX(0x0000020e, "IA32_MTRR_PHYS_BASE7", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x7, 0, UINT64_C(0xffffffff00000ff8)), /* value=0x0 */ + MFX(0x0000020f, "IA32_MTRR_PHYS_MASK7", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x7, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFS(0x00000250, "IA32_MTRR_FIX64K_00000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix64K_00000), + MFS(0x00000258, "IA32_MTRR_FIX16K_80000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_80000), + MFS(0x00000259, "IA32_MTRR_FIX16K_A0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_A0000), + MFS(0x00000268, "IA32_MTRR_FIX4K_C0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C0000), + MFS(0x00000269, "IA32_MTRR_FIX4K_C8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C8000), + MFS(0x0000026a, "IA32_MTRR_FIX4K_D0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D0000), + MFS(0x0000026b, "IA32_MTRR_FIX4K_D8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D8000), + MFS(0x0000026c, "IA32_MTRR_FIX4K_E0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E0000), + MFS(0x0000026d, "IA32_MTRR_FIX4K_E8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E8000), + MFS(0x0000026e, "IA32_MTRR_FIX4K_F0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F0000), + MFS(0x0000026f, "IA32_MTRR_FIX4K_F8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F8000), + MVX(0x000002e0, "I7_SB_NO_EVICT_MODE", 0, 0, UINT64_C(0xffffffff7ffffffc)), + MFZ(0x000002ff, "IA32_MTRR_DEF_TYPE", Ia32MtrrDefType, Ia32MtrrDefType, GuestMsrs.msr.MtrrDefType, 0, UINT64_C(0xfffffffffffff3f8)), + MFX(0x00000309, "IA32_FIXED_CTR0", Ia32FixedCtrN, Ia32FixedCtrN, 0x0, 0, UINT64_C(0xffffff0000000000)), /* value=0x8c */ + MFX(0x0000030a, "IA32_FIXED_CTR1", Ia32FixedCtrN, Ia32FixedCtrN, 0x1, 0x81201, UINT64_C(0xffffff0000000000)), /* value=0xff`ad893763 */ + MFX(0x0000030b, "IA32_FIXED_CTR2", Ia32FixedCtrN, Ia32FixedCtrN, 0x2, 0, UINT64_C(0xffffff0000000000)), /* value=0x8f4 */ + MFX(0x00000345, "IA32_PERF_CAPABILITIES", Ia32PerfCapabilities, ReadOnly, 0xc1, 0, 0), /* value=0xc1 */ + MFX(0x0000038d, "IA32_FIXED_CTR_CTRL", Ia32FixedCtrCtrl, Ia32FixedCtrCtrl, 0, 0, UINT64_C(0xfffffffffffff000)), /* value=0xb0 */ + MFX(0x0000038e, "IA32_PERF_GLOBAL_STATUS", Ia32PerfGlobalStatus, ReadOnly, 0, 0, 0), /* value=0x0 */ + MFX(0x0000038f, "IA32_PERF_GLOBAL_CTRL", Ia32PerfGlobalCtrl, Ia32PerfGlobalCtrl, 0, 0, UINT64_C(0xfffffff8fffffffc)), /* value=0x7`00000003 */ + MFX(0x00000390, "IA32_PERF_GLOBAL_OVF_CTRL", Ia32PerfGlobalOvfCtrl, Ia32PerfGlobalOvfCtrl, 0, UINT64_C(0xc000000700000003), UINT64_C(0x3ffffff8fffffffc)), /* value=0x0 */ + MVX(0x000003ca, "TODO_0000_03ca", 0x10510, 0, UINT64_C(0xffffffffffe00000)), + MFX(0x000003f1, "IA32_PEBS_ENABLE", Ia32PebsEnable, Ia32PebsEnable, 0, 0, UINT64_C(0xfffffffffffffffe)), /* value=0x0 */ + RFN(0x00000400, 0x00000417, "IA32_MCi_CTL_STATUS_ADDR_MISC", Ia32McCtlStatusAddrMiscN, Ia32McCtlStatusAddrMiscN), + MVX(0x000004f8, "C2_UNK_0000_04f8", 0, 0, 0), + MVX(0x000004f9, "C2_UNK_0000_04f9", 0, 0, 0), + MVX(0x000004fa, "C2_UNK_0000_04fa", 0, 0, 0), + MVX(0x000004fb, "C2_UNK_0000_04fb", 0, 0, 0), + MVX(0x000004fc, "C2_UNK_0000_04fc", 0, 0, 0), + MVX(0x000004fd, "C2_UNK_0000_04fd", 0, 0, 0), + MVX(0x000004fe, "C2_UNK_0000_04fe", 0, 0, 0), + MVX(0x000004ff, "C2_UNK_0000_04ff", 0, 0, 0), + MFN(0x00000600, "IA32_DS_AREA", Ia32DsArea, Ia32DsArea), /* value=0xffff8800`d6ee1c00 */ + MFX(0xc0000080, "AMD64_EFER", Amd64Efer, Amd64Efer, 0xd01, 0x400, UINT64_C(0xfffffffffffff2fe)), + MFN(0xc0000081, "AMD64_STAR", Amd64SyscallTarget, Amd64SyscallTarget), /* value=0x230010`00000000 */ + MFN(0xc0000082, "AMD64_STAR64", Amd64LongSyscallTarget, Amd64LongSyscallTarget), /* value=0xffffffff`815715d0 */ + MFN(0xc0000083, "AMD64_STARCOMPAT", Amd64CompSyscallTarget, Amd64CompSyscallTarget), /* value=0xffffffff`81573ad0 */ + MFX(0xc0000084, "AMD64_SYSCALL_FLAG_MASK", Amd64SyscallFlagMask, Amd64SyscallFlagMask, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x47700 */ + MFN(0xc0000100, "AMD64_FS_BASE", Amd64FsBase, Amd64FsBase), /* value=0x7fe4`93136740 */ + MFN(0xc0000101, "AMD64_GS_BASE", Amd64GsBase, Amd64GsBase), /* value=0xffff8800`db500000 */ + MFN(0xc0000102, "AMD64_KERNEL_GS_BASE", Amd64KernelGsBase, Amd64KernelGsBase), /* value=0x0 */ +}; +#endif /* !CPUM_DB_STANDALONE */ + + +/** + * Database entry for Intel(R) Atom(TM) CPU 330 @ 1.60GHz. + */ +static CPUMDBENTRY const g_Entry_Intel_Atom_330_1_60GHz = +{ + /*.pszName = */ "Intel Atom 330 1.60GHz", + /*.pszFullName = */ "Intel(R) Atom(TM) CPU 330 @ 1.60GHz", + /*.enmVendor = */ CPUMCPUVENDOR_INTEL, + /*.uFamily = */ 6, + /*.uModel = */ 28, + /*.uStepping = */ 2, + /*.enmMicroarch = */ kCpumMicroarch_Intel_Atom_Bonnell, + /*.uScalableBusFreq = */ CPUM_SBUSFREQ_133MHZ, + /*.fFlags = */ 0, + /*.cMaxPhysAddrWidth= */ 32, + /*.fMxCsrMask = */ 0xffff, + /*.paCpuIdLeaves = */ NULL_ALONE(g_aCpuIdLeaves_Intel_Atom_330_1_60GHz), + /*.cCpuIdLeaves = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_Atom_330_1_60GHz)), + /*.enmUnknownCpuId = */ CPUMUNKNOWNCPUID_LAST_STD_LEAF, + /*.DefUnknownCpuId = */ { 0x07280203, 0x00000000, 0x00000000, 0x00002501 }, + /*.fMsrMask = */ UINT32_MAX, + /*.cMsrRanges = */ ZERO_ALONE(RT_ELEMENTS(g_aMsrRanges_Intel_Atom_330_1_60GHz)), + /*.paMsrRanges = */ NULL_ALONE(g_aMsrRanges_Intel_Atom_330_1_60GHz), +}; + +#endif /* !VBOX_CPUDB_Intel_Atom_330_1_60GHz_h */ + diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_Core2_T7600_2_33GHz.h b/src/VBox/VMM/VMMR3/cpus/Intel_Core2_T7600_2_33GHz.h new file mode 100644 index 00000000..0411ef9a --- /dev/null +++ b/src/VBox/VMM/VMMR3/cpus/Intel_Core2_T7600_2_33GHz.h @@ -0,0 +1,195 @@ +/* $Id: Intel_Core2_T7600_2_33GHz.h $ */ +/** @file + * CPU database entry "Intel Core2 T7600 2.33GHz". + * Generated at 2017-10-12T18:17:56Z by VBoxCpuReport v5.2.0_RC1r118339 on linux.x86. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VBOX_CPUDB_Intel_Core2_T7600_2_33GHz_h +#define VBOX_CPUDB_Intel_Core2_T7600_2_33GHz_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + + +#ifndef CPUM_DB_STANDALONE +/** + * CPUID leaves for Intel(R) Core(TM)2 CPU T7600 @ 2.33GHz. + */ +static CPUMCPUIDLEAF const g_aCpuIdLeaves_Intel_Core2_T7600_2_33GHz[] = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69, 0 }, + { 0x00000001, 0x00000000, 0x00000000, 0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID | CPUMCPUIDLEAF_F_CONTAINS_APIC }, + { 0x00000002, 0x00000000, 0x00000000, 0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049, 0 }, + { 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000004, 0x00000000, UINT32_MAX, 0x04000121, 0x01c0003f, 0x0000003f, 0x00000001, 0 }, + { 0x00000004, 0x00000001, UINT32_MAX, 0x04000122, 0x01c0003f, 0x0000003f, 0x00000001, 0 }, + { 0x00000004, 0x00000002, UINT32_MAX, 0x04004143, 0x03c0003f, 0x00000fff, 0x00000001, 0 }, + { 0x00000004, 0x00000003, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000005, 0x00000000, 0x00000000, 0x00000040, 0x00000040, 0x00000003, 0x00022220, 0 }, + { 0x00000006, 0x00000000, 0x00000000, 0x00000001, 0x00000002, 0x00000001, 0x00000000, 0 }, + { 0x00000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000008, 0x00000000, 0x00000000, 0x00000400, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000009, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000a, 0x00000000, 0x00000000, 0x07280202, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000000, 0x00000000, 0x00000000, 0x80000008, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x20100000, 0 }, + { 0x80000002, 0x00000000, 0x00000000, 0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865, 0 }, + { 0x80000003, 0x00000000, 0x00000000, 0x43203229, 0x20205550, 0x20202020, 0x54202020, 0 }, + { 0x80000004, 0x00000000, 0x00000000, 0x30303637, 0x20402020, 0x33332e32, 0x007a4847, 0 }, + { 0x80000005, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x10008040, 0x00000000, 0 }, + { 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000008, 0x00000000, 0x00000000, 0x00003024, 0x00000000, 0x00000000, 0x00000000, 0 }, +}; +#endif /* !CPUM_DB_STANDALONE */ + + +#ifndef CPUM_DB_STANDALONE +/** + * MSR ranges for Intel(R) Core(TM)2 CPU T7600 @ 2.33GHz. + */ +static CPUMMSRRANGE const g_aMsrRanges_Intel_Core2_T7600_2_33GHz[] = +{ + MFO(0x00000000, "IA32_P5_MC_ADDR", Ia32P5McAddr), /* value=0x12c5e80 */ + MFO(0x00000001, "IA32_P5_MC_TYPE", Ia32P5McType), /* value=0x0 */ + MFO(0x00000006, "IA32_MONITOR_FILTER_LINE_SIZE", Ia32MonitorFilterLineSize), /* value=0x40 */ + MFO(0x00000010, "IA32_TIME_STAMP_COUNTER", Ia32TimestampCounter), /* value=0x215`a3e44b5c */ + MFX(0x00000017, "IA32_PLATFORM_ID", Ia32PlatformId, ReadOnly, UINT64_C(0x14000098548e25), 0, 0), /* value=0x140000`98548e25 */ + MFX(0x0000001b, "IA32_APIC_BASE", Ia32ApicBase, Ia32ApicBase, UINT32_C(0xfee00900), 0, UINT64_C(0xfffffffffffff7ff)), + MVO(0x00000021, "C2_UNK_0000_0021", 0), + MFX(0x0000002a, "EBL_CR_POWERON", IntelEblCrPowerOn, ReadOnly, 0x41880000, 0, 0), /* value=0x41880000 */ + MVO(0x0000002f, "P6_UNK_0000_002f", 0), + MVO(0x00000032, "P6_UNK_0000_0032", 0), + MVO(0x00000033, "TEST_CTL", 0), + MFO(0x0000003a, "IA32_FEATURE_CONTROL", Ia32FeatureControl), /* value=0x5 */ + MVO(0x0000003f, "P6_UNK_0000_003f", 0), + RFN(0x00000040, 0x00000043, "MSR_LASTBRANCH_n_FROM_IP", IntelLastBranchToN, ReadOnly), + MVO(0x0000004a, "P6_UNK_0000_004a", 0), /* value=0x0 */ + MVO(0x0000004b, "P6_UNK_0000_004b", 0), /* value=0x0 */ + MVO(0x0000004c, "P6_UNK_0000_004c", 0), /* value=0x0 */ + MVO(0x0000004d, "P6_UNK_0000_004d", 0), /* value=0x3c3a9b64`1d8552bb */ + MVO(0x0000004e, "P6_UNK_0000_004e", 0), /* value=0x3b96f62f`156143b9 */ + MVO(0x0000004f, "P6_UNK_0000_004f", 0), /* value=0xb8 */ + RFN(0x00000060, 0x00000063, "MSR_LASTBRANCH_n_TO_IP", IntelLastBranchFromN, ReadOnly), + MVO(0x0000006c, "P6_UNK_0000_006c", 0), + MVO(0x0000006d, "P6_UNK_0000_006d", 0), + MVO(0x0000006e, "P6_UNK_0000_006e", 0), + MVO(0x0000006f, "P6_UNK_0000_006f", 0xadb), + MFN(0x00000079, "IA32_BIOS_UPDT_TRIG", WriteOnly, IgnoreWrite), + MFO(0x0000008b, "BBL_CR_D3|BIOS_SIGN", Ia32BiosSignId), /* value=0xc7`00000000 */ + MFO(0x0000009b, "IA32_SMM_MONITOR_CTL", Ia32SmmMonitorCtl), /* value=0x0 */ + MFX(0x000000a8, "C2_EMTTM_CR_TABLES_0", IntelCore2EmttmCrTablesN, ReadOnly, 0x613, 0, 0), /* value=0x613 */ + MFX(0x000000a9, "C2_EMTTM_CR_TABLES_1", IntelCore2EmttmCrTablesN, ReadOnly, 0x613, 0, 0), /* value=0x613 */ + MFX(0x000000aa, "C2_EMTTM_CR_TABLES_2", IntelCore2EmttmCrTablesN, ReadOnly, 0x613, 0, 0), /* value=0x613 */ + MFX(0x000000ab, "C2_EMTTM_CR_TABLES_3", IntelCore2EmttmCrTablesN, ReadOnly, 0x613, 0, 0), /* value=0x613 */ + MFX(0x000000ac, "C2_EMTTM_CR_TABLES_4", IntelCore2EmttmCrTablesN, ReadOnly, 0x613, 0, 0), /* value=0x613 */ + MFX(0x000000ad, "C2_EMTTM_CR_TABLES_5", IntelCore2EmttmCrTablesN, ReadOnly, 0x613, 0, 0), /* value=0x613 */ + RFN(0x000000c1, 0x000000c2, "IA32_PMCn", Ia32PmcN, ReadOnly), + MVO(0x000000c7, "P6_UNK_0000_00c7", UINT64_C(0x1e00000042000000)), + MFX(0x000000cd, "MSR_FSB_FREQ", IntelP6FsbFrequency, ReadOnly, 0x933, 0, 0), /* value=0x933 */ + MVO(0x000000ce, "P6_UNK_0000_00ce", UINT64_C(0x130e253b530613)), + MVO(0x000000e0, "C2_UNK_0000_00e0", 0x14860f0), + MVO(0x000000e1, "C2_UNK_0000_00e1", UINT32_C(0xf0f00000)), + MFX(0x000000e2, "MSR_PKG_CST_CONFIG_CONTROL", IntelPkgCStConfigControl, IntelPkgCStConfigControl, 0, 0x404000, UINT64_C(0xfffffffffc001000)), /* value=0x202a01 */ + MFO(0x000000e3, "C2_SMM_CST_MISC_INFO", IntelCore2SmmCStMiscInfo), /* value=0x8040414 */ + MFO(0x000000e4, "MSR_PMG_IO_CAPTURE_BASE", IntelPmgIoCaptureBase), /* value=0x20414 */ + MVO(0x000000e5, "C2_UNK_0000_00e5", UINT32_C(0xd0220dc8)), + MFO(0x000000e7, "IA32_MPERF", Ia32MPerf), /* value=0xc7`b82ef32a */ + MFO(0x000000e8, "IA32_APERF", Ia32APerf), /* value=0x55`9818510c */ + MFO(0x000000ee, "C1_EXT_CONFIG", IntelCore1ExtConfig), /* value=0x80b90400 */ + MFX(0x000000fe, "IA32_MTRRCAP", Ia32MtrrCap, ReadOnly, 0x508, 0, 0), /* value=0x508 */ + MVO(0x00000116, "BBL_CR_ADDR", 0), + MVO(0x00000118, "BBL_CR_DECC", 0xffebe), + MVO(0x0000011b, "P6_UNK_0000_011b", 0), + MVO(0x0000011c, "C2_UNK_0000_011c", UINT32_C(0xe00000cc)), + MFX(0x0000011e, "BBL_CR_CTL3", IntelBblCrCtl3, ReadOnly, 0x74702109, 0, 0), /* value=0x74702109 */ + MVO(0x0000014a, "TODO_0000_014a", 0), + MVO(0x0000014b, "TODO_0000_014b", 0), + MVO(0x0000014c, "TODO_0000_014c", 0), + MVO(0x0000014e, "P6_UNK_0000_014e", UINT32_C(0xe4dfe927)), + MVO(0x0000014f, "P6_UNK_0000_014f", 0), + MVO(0x00000151, "P6_UNK_0000_0151", 0x3bfcb56f), + MFO(0x0000015f, "C1_DTS_CAL_CTRL", IntelCore1DtsCalControl), /* value=0x230613 */ + MFO(0x00000174, "IA32_SYSENTER_CS", Ia32SysEnterCs), /* value=0x60 */ + MFO(0x00000175, "IA32_SYSENTER_ESP", Ia32SysEnterEsp), /* value=0xf5a07c40 */ + MFO(0x00000176, "IA32_SYSENTER_EIP", Ia32SysEnterEip), /* value=0xc15af09c */ + MFX(0x00000179, "IA32_MCG_CAP", Ia32McgCap, ReadOnly, 0x806, 0, 0), /* value=0x806 */ + MFO(0x0000017a, "IA32_MCG_STATUS", Ia32McgStatus), /* value=0x0 */ + RFN(0x00000186, 0x00000187, "IA32_PERFEVTSELn", Ia32PerfEvtSelN, ReadOnly), + MVO(0x00000193, "C2_UNK_0000_0193", 0), + MFX(0x00000194, "CLOCK_FLEX_MAX", IntelFlexRatio, ReadOnly, 0, 0, 0), /* value=0x0 */ + MFX(0x00000198, "IA32_PERF_STATUS", Ia32PerfStatus, ReadOnly, UINT64_C(0x6130e2506040613), 0, 0), /* value=0x6130e25`06040613 */ + MFX(0x00000199, "IA32_PERF_CTL", Ia32PerfCtl, ReadOnly, 0x613, 0, UINT64_MAX), /* Might bite. value=0x613 */ + MFX(0x0000019a, "IA32_CLOCK_MODULATION", Ia32ClockModulation, ReadOnly, 0x2, 0, 0), /* value=0x2 */ + MFX(0x0000019b, "IA32_THERM_INTERRUPT", Ia32ThermInterrupt, ReadOnly, 0x3, 0, 0), /* value=0x3 */ + MFX(0x0000019c, "IA32_THERM_STATUS", Ia32ThermStatus, ReadOnly, UINT32_C(0x8831000c), 0, 0), /* value=0x8831000c */ + MFX(0x0000019d, "IA32_THERM2_CTL", Ia32Therm2Ctl, ReadOnly, 0x613, 0, 0), /* value=0x613 */ + MVO(0x0000019e, "P6_UNK_0000_019e", 0xb240000), + MVO(0x0000019f, "P6_UNK_0000_019f", 0), + MFX(0x000001a0, "IA32_MISC_ENABLE", Ia32MiscEnable, Ia32MiscEnable, UINT64_C(0x4066a52489), UINT64_C(0x52600099f6), UINT64_C(0xffffff0019004000)), /* value=0x40`66a52489 */ + MVO(0x000001a1, "P6_UNK_0000_01a1", 0), + MFX(0x000001a2, "I7_MSR_TEMPERATURE_TARGET", IntelI7TemperatureTarget, ReadOnly, 0, 0, 0), /* value=0x0 */ + MVO(0x000001aa, "P6_PIC_SENS_CFG", 0x5ebf042f), + MVO(0x000001bf, "C2_UNK_0000_01bf", 0x404), + MFO(0x000001c9, "MSR_LASTBRANCH_TOS", IntelLastBranchTos), /* value=0x3 */ + MVO(0x000001d3, "P6_UNK_0000_01d3", 0x8000), + MFO(0x000001d9, "IA32_DEBUGCTL", Ia32DebugCtl), /* value=0x1 */ + MFO(0x000001db, "P6_LAST_BRANCH_FROM_IP", P6LastBranchFromIp), /* value=0xc12c5d73 */ + MFO(0x000001dc, "P6_LAST_BRANCH_TO_IP", P6LastBranchToIp), /* value=0xc10357d0 */ + MFO(0x000001dd, "P6_LAST_INT_FROM_IP", P6LastIntFromIp), /* value=0xc132a284 */ + MFO(0x000001de, "P6_LAST_INT_TO_IP", P6LastIntToIp), /* value=0xc1329543 */ + MVO(0x000001e0, "MSR_ROB_CR_BKUPTMPDR6", 0xff0), + MFO(0x000001f8, "IA32_PLATFORM_DCA_CAP", Ia32PlatformDcaCap), /* value=0x0 */ + MFO(0x000001f9, "IA32_CPU_DCA_CAP", Ia32CpuDcaCap), /* value=0x0 */ + MFO(0x000001fa, "IA32_DCA_0_CAP", Ia32Dca0Cap), /* value=0xc01e488 */ + MFX(0xc0000080, "AMD64_EFER", Amd64Efer, Amd64Efer, 0xd01, 0x400, UINT64_C(0xfffffffffffff2fe)), + MFN(0xc0000081, "AMD64_STAR", Amd64SyscallTarget, Amd64SyscallTarget), /* value=0x1b0008`00000000 */ + MFN(0xc0000082, "AMD64_STAR64", Amd64LongSyscallTarget, Amd64LongSyscallTarget), /* value=0xffffff80`0d2ce6c0 */ + MFN(0xc0000083, "AMD64_STARCOMPAT", Amd64CompSyscallTarget, Amd64CompSyscallTarget), /* value=0x0 */ + MFX(0xc0000084, "AMD64_SYSCALL_FLAG_MASK", Amd64SyscallFlagMask, Amd64SyscallFlagMask, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x4700 */ + MFN(0xc0000100, "AMD64_FS_BASE", Amd64FsBase, Amd64FsBase), /* value=0x0 */ + MFN(0xc0000101, "AMD64_GS_BASE", Amd64GsBase, Amd64GsBase), /* value=0xffffff82`0dcfd000 */ + MFN(0xc0000102, "AMD64_KERNEL_GS_BASE", Amd64KernelGsBase, Amd64KernelGsBase), /* value=0x7fff`7c7511e0 */ +}; +#endif /* !CPUM_DB_STANDALONE */ + + +/** + * Database entry for Intel(R) Core(TM)2 CPU T7600 @ 2.33GHz. + */ +static CPUMDBENTRY const g_Entry_Intel_Core2_T7600_2_33GHz = +{ + /*.pszName = */ "Intel Core2 T7600 2.33GHz", + /*.pszFullName = */ "Intel(R) Core(TM)2 CPU T7600 @ 2.33GHz", + /*.enmVendor = */ CPUMCPUVENDOR_INTEL, + /*.uFamily = */ 6, + /*.uModel = */ 15, + /*.uStepping = */ 6, + /*.enmMicroarch = */ kCpumMicroarch_Intel_Core2_Merom, + /*.uScalableBusFreq = */ CPUM_SBUSFREQ_167MHZ, + /*.fFlags = */ 0, + /*.cMaxPhysAddrWidth= */ 36, + /*.fMxCsrMask = */ 0x0000ffff, + /*.paCpuIdLeaves = */ NULL_ALONE(g_aCpuIdLeaves_Intel_Core2_T7600_2_33GHz), + /*.cCpuIdLeaves = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_Core2_T7600_2_33GHz)), + /*.enmUnknownCpuId = */ CPUMUNKNOWNCPUID_LAST_STD_LEAF, + /*.DefUnknownCpuId = */ { 0x07280202, 0x00000000, 0x00000000, 0x00000000 }, + /*.fMsrMask = */ UINT32_MAX, + /*.cMsrRanges = */ ZERO_ALONE(RT_ELEMENTS(g_aMsrRanges_Intel_Core2_T7600_2_33GHz)), + /*.paMsrRanges = */ NULL_ALONE(g_aMsrRanges_Intel_Core2_T7600_2_33GHz), +}; + +#endif /* !VBOX_CPUDB_Intel_Core2_T7600_2_33GHz_h */ + diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_Core2_X6800_2_93GHz.h b/src/VBox/VMM/VMMR3/cpus/Intel_Core2_X6800_2_93GHz.h new file mode 100644 index 00000000..a55815a2 --- /dev/null +++ b/src/VBox/VMM/VMMR3/cpus/Intel_Core2_X6800_2_93GHz.h @@ -0,0 +1,260 @@ +/* $Id: Intel_Core2_X6800_2_93GHz.h $ */ +/** @file + * CPU database entry "Intel Core2 X6800 2.93GHz". + * Generated at 2017-11-04T22:32:41Z by VBoxCpuReport v5.2.1r118907 on linux.amd64. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VBOX_CPUDB_Intel_Core2_X6800_2_93GHz_h +#define VBOX_CPUDB_Intel_Core2_X6800_2_93GHz_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + + +#ifndef CPUM_DB_STANDALONE +/** + * CPUID leaves for Intel(R) Core(TM)2 CPU X6800 @ 2.93GHz. + */ +static CPUMCPUIDLEAF const g_aCpuIdLeaves_Intel_Core2_X6800_2_93GHz[] = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69, 0 }, + { 0x00000001, 0x00000000, 0x00000000, 0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID | CPUMCPUIDLEAF_F_CONTAINS_APIC }, + { 0x00000002, 0x00000000, 0x00000000, 0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049, 0 }, + { 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000004, 0x00000000, UINT32_MAX, 0x04000121, 0x01c0003f, 0x0000003f, 0x00000001, 0 }, + { 0x00000004, 0x00000001, UINT32_MAX, 0x04000122, 0x01c0003f, 0x0000003f, 0x00000001, 0 }, + { 0x00000004, 0x00000002, UINT32_MAX, 0x04004143, 0x03c0003f, 0x00000fff, 0x00000001, 0 }, + { 0x00000004, 0x00000003, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000005, 0x00000000, 0x00000000, 0x00000040, 0x00000040, 0x00000003, 0x00000020, 0 }, + { 0x00000006, 0x00000000, 0x00000000, 0x00000001, 0x00000002, 0x00000001, 0x00000000, 0 }, + { 0x00000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000008, 0x00000000, 0x00000000, 0x00000400, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000009, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000a, 0x00000000, 0x00000000, 0x07280202, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000000, 0x00000000, 0x00000000, 0x80000008, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x20100800, 0 }, + { 0x80000002, 0x00000000, 0x00000000, 0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865, 0 }, + { 0x80000003, 0x00000000, 0x00000000, 0x43203229, 0x20205550, 0x20202020, 0x58202020, 0 }, + { 0x80000004, 0x00000000, 0x00000000, 0x30303836, 0x20402020, 0x33392e32, 0x007a4847, 0 }, + { 0x80000005, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x10008040, 0x00000000, 0 }, + { 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000008, 0x00000000, 0x00000000, 0x00003024, 0x00000000, 0x00000000, 0x00000000, 0 }, +}; +#endif /* !CPUM_DB_STANDALONE */ + + +#ifndef CPUM_DB_STANDALONE +/** + * MSR ranges for Intel(R) Core(TM)2 CPU X6800 @ 2.93GHz. + */ +static CPUMMSRRANGE const g_aMsrRanges_Intel_Core2_X6800_2_93GHz[] = +{ + MFX(0x00000000, "IA32_P5_MC_ADDR", Ia32P5McAddr, Ia32P5McAddr, 0, UINT64_C(0xfffffffffffbffff), 0), /* value=0x1398780 */ + MFI(0x00000001, "IA32_P5_MC_TYPE", Ia32P5McType), /* value=0x0 */ + MFX(0x00000006, "IA32_MONITOR_FILTER_LINE_SIZE", Ia32MonitorFilterLineSize, Ia32MonitorFilterLineSize, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x40 */ + MFN(0x00000010, "IA32_TIME_STAMP_COUNTER", Ia32TimestampCounter, Ia32TimestampCounter), /* Villain? value=0x11d1`f468a982 */ + MFX(0x00000017, "IA32_PLATFORM_ID", Ia32PlatformId, ReadOnly, UINT32_C(0x88040b27), 0, 0), /* value=0x88040b27 */ + MFX(0x0000001b, "IA32_APIC_BASE", Ia32ApicBase, Ia32ApicBase, UINT32_C(0xfee00900), 0x600, UINT64_C(0xfffffff0000000ff)), + MVX(0x00000021, "C2_UNK_0000_0021", 0, ~(uint64_t)UINT32_MAX, UINT32_C(0xffffffe0)), + MFX(0x0000002a, "EBL_CR_POWERON", IntelEblCrPowerOn, IntelEblCrPowerOn, 0x41880000, UINT64_C(0xffffffffdff7ffbe), 0), /* value=0x41880000 */ + MVI(0x0000002f, "P6_UNK_0000_002f", 0), + MVX(0x00000032, "P6_UNK_0000_0032", 0, UINT64_C(0xffffffff01fe0000), 0), + MVX(0x00000033, "TEST_CTL", 0, UINT64_C(0xffffffff7fffffff), 0), + MFO(0x0000003a, "IA32_FEATURE_CONTROL", Ia32FeatureControl), /* value=0x5 */ + MVO(0x0000003f, "P6_UNK_0000_003f", 0xea), + RFN(0x00000040, 0x00000043, "MSR_LASTBRANCH_n_FROM_IP", IntelLastBranchToN, IntelLastBranchToN), + MVX(0x0000004a, "P6_UNK_0000_004a", 0, UINT64_C(0xffffff0000000000), 0), /* value=0x0 */ + MVX(0x0000004b, "P6_UNK_0000_004b", 0, UINT64_C(0xffffff0000000000), 0), /* value=0x0 */ + MVX(0x0000004c, "P6_UNK_0000_004c", 0, UINT64_C(0xffffff0000000000), 0), /* value=0x0 */ + MVX(0x0000004d, "P6_UNK_0000_004d", 0, 0, 0), /* value=0xf53ed6ff`f9f9e16e */ + MVX(0x0000004e, "P6_UNK_0000_004e", 0, 0, 0), /* value=0xf7ffbdfb`bfbfabeb */ + MVX(0x0000004f, "P6_UNK_0000_004f", 0, UINT64_C(0xffffffffffffff00), 0), /* value=0xff */ + RFN(0x00000060, 0x00000063, "MSR_LASTBRANCH_n_TO_IP", IntelLastBranchFromN, IntelLastBranchFromN), + MVX(0x0000006c, "P6_UNK_0000_006c", 0, UINT64_C(0xffffffff00000080), 0), + MVX(0x0000006d, "P6_UNK_0000_006d", 0, UINT64_C(0xffffffff00000080), 0), + MVX(0x0000006e, "P6_UNK_0000_006e", 0, UINT64_C(0xffffffff00000080), 0), + MVO(0x0000006f, "P6_UNK_0000_006f", 0xadb), + MFN(0x00000079, "IA32_BIOS_UPDT_TRIG", WriteOnly, IgnoreWrite), + MFX(0x0000008b, "BBL_CR_D3|BIOS_SIGN", Ia32BiosSignId, Ia32BiosSignId, 0, UINT32_MAX, 0), /* value=0xc6`00000000 */ + MFO(0x0000009b, "IA32_SMM_MONITOR_CTL", Ia32SmmMonitorCtl), /* value=0x0 */ + MFX(0x000000a8, "C2_EMTTM_CR_TABLES_0", IntelCore2EmttmCrTablesN, IntelCore2EmttmCrTablesN, 0x61b, UINT64_MAX, 0), /* value=0x61b */ + MFX(0x000000a9, "C2_EMTTM_CR_TABLES_1", IntelCore2EmttmCrTablesN, IntelCore2EmttmCrTablesN, 0x61b, UINT64_MAX, 0), /* value=0x61b */ + MFX(0x000000aa, "C2_EMTTM_CR_TABLES_2", IntelCore2EmttmCrTablesN, IntelCore2EmttmCrTablesN, 0x61b, UINT64_MAX, 0), /* value=0x61b */ + MFX(0x000000ab, "C2_EMTTM_CR_TABLES_3", IntelCore2EmttmCrTablesN, IntelCore2EmttmCrTablesN, 0x61b, UINT64_MAX, 0), /* value=0x61b */ + MFX(0x000000ac, "C2_EMTTM_CR_TABLES_4", IntelCore2EmttmCrTablesN, IntelCore2EmttmCrTablesN, 0x61b, UINT64_MAX, 0), /* value=0x61b */ + MFX(0x000000ad, "C2_EMTTM_CR_TABLES_5", IntelCore2EmttmCrTablesN, IntelCore2EmttmCrTablesN, UINT32_C(0x8000061b), UINT64_MAX, 0), /* value=0x8000061b */ + RSN(0x000000c1, 0x000000c2, "IA32_PMCn", Ia32PmcN, Ia32PmcN, 0x0, ~(uint64_t)UINT32_MAX, 0), + MVI(0x000000c7, "P6_UNK_0000_00c7", UINT64_C(0x3200000058000000)), + MFX(0x000000cd, "MSR_FSB_FREQ", IntelP6FsbFrequency, ReadOnly, 0x800, 0, 0), /* value=0x800 */ + MVO(0x000000ce, "P6_UNK_0000_00ce", UINT64_C(0x1b0b277f7f071b)), + MVO(0x000000e0, "C2_UNK_0000_00e0", 0x7820f0), + MVO(0x000000e1, "C2_UNK_0000_00e1", UINT32_C(0xf0f00000)), + MFX(0x000000e2, "MSR_PKG_CST_CONFIG_CONTROL", IntelPkgCStConfigControl, IntelPkgCStConfigControl, 0, UINT64_C(0xffffffff0000ffff), UINT32_C(0xff000000)), /* value=0x26b204 */ + MFX(0x000000e3, "C2_SMM_CST_MISC_INFO", IntelCore2SmmCStMiscInfo, IntelCore2SmmCStMiscInfo, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0x000000e4, "MSR_PMG_IO_CAPTURE_BASE", IntelPmgIoCaptureBase, IntelPmgIoCaptureBase, 0, ~(uint64_t)UINT32_MAX, UINT32_C(0xff800000)), /* value=0x0 */ + MVO(0x000000e5, "C2_UNK_0000_00e5", UINT32_C(0xd00201c8)), + MFN(0x000000e7, "IA32_MPERF", Ia32MPerf, Ia32MPerf), /* value=0xa0`16e07631 */ + MFN(0x000000e8, "IA32_APERF", Ia32APerf, Ia32APerf), /* value=0x7e`79c4e805 */ + MFX(0x000000ee, "C1_EXT_CONFIG", IntelCore1ExtConfig, IntelCore1ExtConfig, 0, UINT64_C(0xffffffffefc5ffff), 0), /* value=0xa8000000`c17d4300 */ + MFX(0x000000fe, "IA32_MTRRCAP", Ia32MtrrCap, ReadOnly, 0x508, 0, 0), /* value=0x508 */ + MVX(0x00000116, "BBL_CR_ADDR", 0, UINT64_C(0xffffff000000001f), 0), + MVX(0x00000118, "BBL_CR_DECC", 0xffdfe, UINT64_C(0xfffffffffff00000), 0), + MFN(0x0000011a, "BBL_CR_TRIG", WriteOnly, IgnoreWrite), + MVI(0x0000011b, "P6_UNK_0000_011b", 0), + MVX(0x0000011c, "C2_UNK_0000_011c", UINT32_C(0xe003cf6f), UINT64_C(0xffffffff07f80000), 0), + MFX(0x0000011e, "BBL_CR_CTL3", IntelBblCrCtl3, IntelBblCrCtl3, UINT32_C(0xbf702109), UINT64_C(0xfffffffffff3fe9f), 0), /* value=0xbf702109 */ + MVX(0x0000014a, "TODO_0000_014a", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0x0000014b, "TODO_0000_014b", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0x0000014c, "TODO_0000_014c", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0x0000014e, "P6_UNK_0000_014e", 0x7ab9f777, UINT64_C(0xffffffff00000080), 0), + MVI(0x0000014f, "P6_UNK_0000_014f", 0xf000), + MVX(0x00000151, "P6_UNK_0000_0151", 0x42100400, ~(uint64_t)UINT32_MAX, 0), + MFX(0x0000015f, "C1_DTS_CAL_CTRL", IntelCore1DtsCalControl, IntelCore1DtsCalControl, 0, UINT64_C(0xffffffffffc0ffff), 0), /* value=0x230820 */ + MFX(0x00000174, "IA32_SYSENTER_CS", Ia32SysEnterCs, Ia32SysEnterCs, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x10 */ + MFN(0x00000175, "IA32_SYSENTER_ESP", Ia32SysEnterEsp, Ia32SysEnterEsp), /* value=0x0 */ + MFN(0x00000176, "IA32_SYSENTER_EIP", Ia32SysEnterEip, Ia32SysEnterEip), /* value=0xffffffff`81846c20 */ + MFX(0x00000179, "IA32_MCG_CAP", Ia32McgCap, ReadOnly, 0x6, 0, 0), /* value=0x6 */ + MFX(0x0000017a, "IA32_MCG_STATUS", Ia32McgStatus, Ia32McgStatus, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + RSN(0x00000186, 0x00000187, "IA32_PERFEVTSELn", Ia32PerfEvtSelN, Ia32PerfEvtSelN, 0x0, ~(uint64_t)UINT32_MAX, 0), + MVO(0x00000193, "C2_UNK_0000_0193", 0), + MFX(0x00000194, "CLOCK_FLEX_MAX", IntelFlexRatio, IntelFlexRatio, 0, UINT64_C(0xfffffffffffee0c0), 0), /* value=0x0 */ + MFX(0x00000198, "IA32_PERF_STATUS", Ia32PerfStatus, ReadOnly, UINT64_C(0xb270b2786320620), 0, 0), /* value=0xb270b27`86320620 */ + MFX(0x00000199, "IA32_PERF_CTL", Ia32PerfCtl, Ia32PerfCtl, 0x820, 0, 0), /* Might bite. value=0x820 */ + MFX(0x0000019a, "IA32_CLOCK_MODULATION", Ia32ClockModulation, Ia32ClockModulation, 0x2, UINT64_C(0xffffffffffffffe1), 0), /* value=0x2 */ + MFX(0x0000019b, "IA32_THERM_INTERRUPT", Ia32ThermInterrupt, Ia32ThermInterrupt, 0x3, UINT64_C(0xffffffff00010100), UINT32_C(0xff0000e0)), /* value=0x3 */ + MFX(0x0000019c, "IA32_THERM_STATUS", Ia32ThermStatus, Ia32ThermStatus, UINT32_C(0x881c0000), UINT64_C(0xfffffffff87f017f), 0x780fc00), /* value=0x881c0000 */ + MFX(0x0000019d, "IA32_THERM2_CTL", Ia32Therm2Ctl, ReadOnly, 0x61b, 0, 0), /* value=0x61b */ + MVX(0x0000019e, "P6_UNK_0000_019e", 0x6930000, UINT64_C(0xffffffffffff0000), 0), + MVI(0x0000019f, "P6_UNK_0000_019f", 0), + MFX(0x000001a0, "IA32_MISC_ENABLE", Ia32MiscEnable, Ia32MiscEnable, UINT64_C(0x4062972489), UINT64_C(0x52603199f6), 0), /* value=0x40`62972489 */ + MVX(0x000001a1, "P6_UNK_0000_01a1", 0, UINT64_C(0xffff000000000000), 0), + MFX(0x000001a2, "I7_MSR_TEMPERATURE_TARGET", IntelI7TemperatureTarget, ReadOnly, 0x1000, 0, 0), /* value=0x1000 */ + MVX(0x000001aa, "P6_PIC_SENS_CFG", 0x7e1f042f, ~(uint64_t)UINT32_MAX, 0), + MVX(0x000001bf, "C2_UNK_0000_01bf", 0x404, UINT64_C(0xffffffffffff0000), 0), + MFX(0x000001c9, "MSR_LASTBRANCH_TOS", IntelLastBranchTos, IntelLastBranchTos, 0, UINT64_C(0xfffffffffffffffe), 0), /* value=0x3 */ + MVX(0x000001d3, "P6_UNK_0000_01d3", 0x8000, UINT64_C(0xffffffffffff7fff), 0), + MFX(0x000001d9, "IA32_DEBUGCTL", Ia32DebugCtl, Ia32DebugCtl, 0, 0, UINT64_C(0xffffffffffffe03c)), /* value=0x1 */ + MFO(0x000001db, "P6_LAST_BRANCH_FROM_IP", P6LastBranchFromIp), /* value=0xffffffff`8142d5f6 */ + MFO(0x000001dc, "P6_LAST_BRANCH_TO_IP", P6LastBranchToIp), /* value=0xffffffff`810644e0 */ + MFN(0x000001dd, "P6_LAST_INT_FROM_IP", P6LastIntFromIp, P6LastIntFromIp), /* value=0xffffffff`81039669 */ + MFN(0x000001de, "P6_LAST_INT_TO_IP", P6LastIntToIp, P6LastIntToIp), /* value=0xffffffff`81039020 */ + MVO(0x000001e0, "MSR_ROB_CR_BKUPTMPDR6", 0xff0), + MFX(0x000001f8, "IA32_PLATFORM_DCA_CAP", Ia32PlatformDcaCap, Ia32PlatformDcaCap, 0, UINT64_C(0xfffffffffffffffe), 0), /* value=0x0 */ + MFO(0x000001f9, "IA32_CPU_DCA_CAP", Ia32CpuDcaCap), /* value=0x0 */ + MFX(0x000001fa, "IA32_DCA_0_CAP", Ia32Dca0Cap, Ia32Dca0Cap, 0, UINT64_C(0xfffffffffefe17ff), 0), /* value=0xc01e488 */ + MFX(0x00000200, "IA32_MTRR_PHYS_BASE0", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x0, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x6 */ + MFX(0x00000201, "IA32_MTRR_PHYS_MASK0", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x0, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`80000800 */ + MFX(0x00000202, "IA32_MTRR_PHYS_BASE1", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x1, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x80000006 */ + MFX(0x00000203, "IA32_MTRR_PHYS_MASK1", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x1, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`c0000800 */ + MFX(0x00000204, "IA32_MTRR_PHYS_BASE2", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x2, 0, UINT64_C(0xfffffff000000ff8)), /* value=0xc0000006 */ + MFX(0x00000205, "IA32_MTRR_PHYS_MASK2", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x2, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`f0000800 */ + MFX(0x00000206, "IA32_MTRR_PHYS_BASE3", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x3, 0, UINT64_C(0xfffffff000000ff8)), /* value=0xcff00000 */ + MFX(0x00000207, "IA32_MTRR_PHYS_MASK3", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x3, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`fff00800 */ + MFX(0x00000208, "IA32_MTRR_PHYS_BASE4", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x4, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x1`00000006 */ + MFX(0x00000209, "IA32_MTRR_PHYS_MASK4", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x4, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`e0000800 */ + MFX(0x0000020a, "IA32_MTRR_PHYS_BASE5", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x5, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x1`20000006 */ + MFX(0x0000020b, "IA32_MTRR_PHYS_MASK5", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x5, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`f0000800 */ + MFX(0x0000020c, "IA32_MTRR_PHYS_BASE6", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x6, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x0000020d, "IA32_MTRR_PHYS_MASK6", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x6, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFX(0x0000020e, "IA32_MTRR_PHYS_BASE7", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x7, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x0000020f, "IA32_MTRR_PHYS_MASK7", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x7, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFS(0x00000250, "IA32_MTRR_FIX64K_00000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix64K_00000), + MFS(0x00000258, "IA32_MTRR_FIX16K_80000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_80000), + MFS(0x00000259, "IA32_MTRR_FIX16K_A0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_A0000), + MFS(0x00000268, "IA32_MTRR_FIX4K_C0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C0000), + MFS(0x00000269, "IA32_MTRR_FIX4K_C8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C8000), + MFS(0x0000026a, "IA32_MTRR_FIX4K_D0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D0000), + MFS(0x0000026b, "IA32_MTRR_FIX4K_D8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D8000), + MFS(0x0000026c, "IA32_MTRR_FIX4K_E0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E0000), + MFS(0x0000026d, "IA32_MTRR_FIX4K_E8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E8000), + MFS(0x0000026e, "IA32_MTRR_FIX4K_F0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F0000), + MFS(0x0000026f, "IA32_MTRR_FIX4K_F8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F8000), + MFS(0x00000277, "IA32_PAT", Ia32Pat, Ia32Pat, Guest.msrPAT), + MFZ(0x000002ff, "IA32_MTRR_DEF_TYPE", Ia32MtrrDefType, Ia32MtrrDefType, GuestMsrs.msr.MtrrDefType, 0, UINT64_C(0xfffffffffffff3f8)), + MFX(0x00000309, "IA32_FIXED_CTR0", Ia32FixedCtrN, Ia32FixedCtrN, 0x0, UINT64_C(0xffffff0000000000), 0), /* value=0xc4e */ + MFX(0x0000030a, "IA32_FIXED_CTR1", Ia32FixedCtrN, Ia32FixedCtrN, 0x1, UINT64_C(0xffffff0000000c00), 0), /* value=0xff`9dd0e550 */ + MFX(0x0000030b, "IA32_FIXED_CTR2", Ia32FixedCtrN, Ia32FixedCtrN, 0x2, UINT64_C(0xffffff0000000000), 0), /* value=0x205b */ + MFX(0x00000345, "IA32_PERF_CAPABILITIES", Ia32PerfCapabilities, ReadOnly, 0x82, 0, 0), /* value=0x82 */ + MFX(0x0000038d, "IA32_FIXED_CTR_CTRL", Ia32FixedCtrCtrl, Ia32FixedCtrCtrl, 0, UINT64_C(0xfffffffffffff000), 0), /* value=0xf0 */ + MFX(0x0000038e, "IA32_PERF_GLOBAL_STATUS", Ia32PerfGlobalStatus, ReadOnly, 0, 0, 0), /* value=0x0 */ + MFN(0x0000038f, "IA32_PERF_GLOBAL_CTRL", Ia32PerfGlobalCtrl, Ia32PerfGlobalCtrl), /* value=0x7`00000003 */ + MFI(0x00000390, "IA32_PERF_GLOBAL_OVF_CTRL", Ia32PerfGlobalOvfCtrl), /* value=0x7`00000007 */ + MFX(0x000003f1, "IA32_PEBS_ENABLE", Ia32PebsEnable, Ia32PebsEnable, 0, UINT64_C(0xfffffffffffffffe), 0), /* value=0x0 */ + RFN(0x00000400, 0x00000417, "IA32_MCi_CTL_STATUS_ADDR_MISC", Ia32McCtlStatusAddrMiscN, Ia32McCtlStatusAddrMiscN), + MVX(0x00000478, "TODO_0000_0478", 0, 0, 0), + MFX(0x00000480, "IA32_VMX_BASIC", Ia32VmxBasic, ReadOnly, UINT64_C(0x1a040000000007), 0, 0), /* value=0x1a0400`00000007 */ + MFX(0x00000481, "IA32_VMX_PINBASED_CTLS", Ia32VmxPinbasedCtls, ReadOnly, UINT64_C(0x1f00000016), 0, 0), /* value=0x1f`00000016 */ + MFX(0x00000482, "IA32_VMX_PROCBASED_CTLS", Ia32VmxProcbasedCtls, ReadOnly, UINT64_C(0x77b9fffe0401e172), 0, 0), /* value=0x77b9fffe`0401e172 */ + MFX(0x00000483, "IA32_VMX_EXIT_CTLS", Ia32VmxExitCtls, ReadOnly, UINT64_C(0x3efff00036dff), 0, 0), /* value=0x3efff`00036dff */ + MFX(0x00000484, "IA32_VMX_ENTRY_CTLS", Ia32VmxEntryCtls, ReadOnly, UINT64_C(0x1fff000011ff), 0, 0), /* value=0x1fff`000011ff */ + MFX(0x00000485, "IA32_VMX_MISC", Ia32VmxMisc, ReadOnly, 0x403c0, 0, 0), /* value=0x403c0 */ + MFX(0x00000486, "IA32_VMX_CR0_FIXED0", Ia32VmxCr0Fixed0, ReadOnly, UINT32_C(0x80000021), 0, 0), /* value=0x80000021 */ + MFX(0x00000487, "IA32_VMX_CR0_FIXED1", Ia32VmxCr0Fixed1, ReadOnly, UINT32_MAX, 0, 0), /* value=0xffffffff */ + MFX(0x00000488, "IA32_VMX_CR4_FIXED0", Ia32VmxCr4Fixed0, ReadOnly, 0x2000, 0, 0), /* value=0x2000 */ + MFX(0x00000489, "IA32_VMX_CR4_FIXED1", Ia32VmxCr4Fixed1, ReadOnly, 0x27ff, 0, 0), /* value=0x27ff */ + MFX(0x0000048a, "IA32_VMX_VMCS_ENUM", Ia32VmxVmcsEnum, ReadOnly, 0x2c, 0, 0), /* value=0x2c */ + MVX(0x000004f8, "C2_UNK_0000_04f8", UINT64_C(0xf5d5fc5e567f6a8e), 0, 0), + MVX(0x000004f9, "C2_UNK_0000_04f9", UINT64_C(0xb595ed5afff3a8ff), 0, 0), + MVX(0x000004fa, "C2_UNK_0000_04fa", UINT64_C(0xfddfae7f5bfb7c47), 0, 0), + MVX(0x000004fb, "C2_UNK_0000_04fb", UINT64_C(0xf7ffbc5f93fd6fde), 0, 0), + MVX(0x000004fc, "C2_UNK_0000_04fc", UINT64_C(0xb7c5c95891fb71c6), 0, 0), + MVX(0x000004fd, "C2_UNK_0000_04fd", UINT64_C(0xb5d5cc5c95799df6), 0, 0), + MVX(0x000004fe, "C2_UNK_0000_04fe", UINT64_C(0xba95c85ad1fb3973), 0, 0), + MVX(0x000004ff, "C2_UNK_0000_04ff", UINT64_C(0xf5bdda4f9aff3943), 0, 0), + MVX(0x00000590, "C2_UNK_0000_0590", 0, 0, 0), + MVX(0x00000591, "C2_UNK_0000_0591", 0, ~(uint64_t)UINT32_MAX, 0), + MFN(0x000005a0, "C2_PECI_CTL", IntelCore2PeciControl, IntelCore2PeciControl), /* Might bite. value=0x1 */ + MVI(0x000005a1, "C2_UNK_0000_05a1", 0), /* Might bite. */ + MFN(0x00000600, "IA32_DS_AREA", Ia32DsArea, Ia32DsArea), /* value=0xffff8801`2aaeba00 */ + MFX(0xc0000080, "AMD64_EFER", Amd64Efer, Amd64Efer, 0xd01, 0x400, UINT64_C(0xfffffffffffff2fe)), + MFN(0xc0000081, "AMD64_STAR", Amd64SyscallTarget, Amd64SyscallTarget), /* Might bite. value=0x230010`00000000 */ + MFN(0xc0000082, "AMD64_STAR64", Amd64LongSyscallTarget, Amd64LongSyscallTarget), /* Might bite. value=0xffffffff`81844650 */ + MFN(0xc0000083, "AMD64_STARCOMPAT", Amd64CompSyscallTarget, Amd64CompSyscallTarget), /* Might bite. value=0xffffffff`81846c90 */ + MFN(0xc0000084, "AMD64_SYSCALL_FLAG_MASK", Amd64SyscallFlagMask, Amd64SyscallFlagMask), /* Might bite. value=0x47700 */ + MFN(0xc0000100, "AMD64_FS_BASE", Amd64FsBase, Amd64FsBase), /* Might bite. value=0x7fb5`e58d0740 */ + MFN(0xc0000101, "AMD64_GS_BASE", Amd64GsBase, Amd64GsBase), /* Might bite. value=0xffff8801`2fc00000 */ + MFN(0xc0000102, "AMD64_KERNEL_GS_BASE", Amd64KernelGsBase, Amd64KernelGsBase), /* Might bite. value=0x0 */ +}; +#endif /* !CPUM_DB_STANDALONE */ + + +/** + * Database entry for Intel(R) Core(TM)2 CPU X6800 @ 2.93GHz. + */ +static CPUMDBENTRY const g_Entry_Intel_Core2_X6800_2_93GHz = +{ + /*.pszName = */ "Intel Core2 X6800 2.93GHz", + /*.pszFullName = */ "Intel(R) Core(TM)2 CPU X6800 @ 2.93GHz", + /*.enmVendor = */ CPUMCPUVENDOR_INTEL, + /*.uFamily = */ 6, + /*.uModel = */ 15, + /*.uStepping = */ 6, + /*.enmMicroarch = */ kCpumMicroarch_Intel_Core2_Merom, + /*.uScalableBusFreq = */ CPUM_SBUSFREQ_267MHZ, + /*.fFlags = */ 0, + /*.cMaxPhysAddrWidth= */ 36, + /*.fMxCsrMask = */ 0x0000ffff, + /*.paCpuIdLeaves = */ NULL_ALONE(g_aCpuIdLeaves_Intel_Core2_X6800_2_93GHz), + /*.cCpuIdLeaves = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_Core2_X6800_2_93GHz)), + /*.enmUnknownCpuId = */ CPUMUNKNOWNCPUID_LAST_STD_LEAF, + /*.DefUnknownCpuId = */ { 0x07280202, 0x00000000, 0x00000000, 0x00000000 }, + /*.fMsrMask = */ UINT32_MAX, + /*.cMsrRanges = */ ZERO_ALONE(RT_ELEMENTS(g_aMsrRanges_Intel_Core2_X6800_2_93GHz)), + /*.paMsrRanges = */ NULL_ALONE(g_aMsrRanges_Intel_Core2_X6800_2_93GHz), +}; + +#endif /* !VBOX_CPUDB_Intel_Core2_X6800_2_93GHz_h */ + diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_Core_Duo_T2600_2_16GHz.h b/src/VBox/VMM/VMMR3/cpus/Intel_Core_Duo_T2600_2_16GHz.h new file mode 100644 index 00000000..f08a5569 --- /dev/null +++ b/src/VBox/VMM/VMMR3/cpus/Intel_Core_Duo_T2600_2_16GHz.h @@ -0,0 +1,225 @@ +/* $Id: Intel_Core_Duo_T2600_2_16GHz.h $ */ +/** @file + * CPU database entry "Intel Core Duo T2600 2.16GHz". + * Generated at 2017-11-02T10:39:16Z by VBoxCpuReport v5.2.0_RC1r118339 on linux.x86. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VBOX_CPUDB_Intel_Core_Duo_T2600_2_16GHz_h +#define VBOX_CPUDB_Intel_Core_Duo_T2600_2_16GHz_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + + +#ifndef CPUM_DB_STANDALONE +/** + * CPUID leaves for Intel(R) Core(TM) Duo CPU T2600 @ 2.16GHz. + */ +static CPUMCPUIDLEAF const g_aCpuIdLeaves_Intel_Core_Duo_T2600_2_16GHz[] = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69, 0 }, + { 0x00000001, 0x00000000, 0x00000000, 0x000006e8, 0x01020800, 0x0000c1a9, 0xbfe9fbff, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID | CPUMCPUIDLEAF_F_CONTAINS_APIC }, + { 0x00000002, 0x00000000, 0x00000000, 0x02b3b001, 0x000000f0, 0x00000000, 0x2c04307d, 0 }, + { 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000004, 0x00000000, UINT32_MAX, 0x04000121, 0x01c0003f, 0x0000003f, 0x00000001, 0 }, + { 0x00000004, 0x00000001, UINT32_MAX, 0x04000122, 0x01c0003f, 0x0000003f, 0x00000001, 0 }, + { 0x00000004, 0x00000002, UINT32_MAX, 0x04004143, 0x01c0003f, 0x00000fff, 0x00000001, 0 }, + { 0x00000004, 0x00000003, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000005, 0x00000000, 0x00000000, 0x00000040, 0x00000040, 0x00000003, 0x00022220, 0 }, + { 0x00000006, 0x00000000, 0x00000000, 0x00000001, 0x00000002, 0x00000001, 0x00000000, 0 }, + { 0x00000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000009, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000a, 0x00000000, 0x00000000, 0x07280201, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000000, 0x00000000, 0x00000000, 0x80000008, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00100000, 0 }, + { 0x80000002, 0x00000000, 0x00000000, 0x756e6547, 0x20656e69, 0x65746e49, 0x2952286c, 0 }, + { 0x80000003, 0x00000000, 0x00000000, 0x55504320, 0x20202020, 0x20202020, 0x54202020, 0 }, + { 0x80000004, 0x00000000, 0x00000000, 0x30303632, 0x20402020, 0x36312e32, 0x007a4847, 0 }, + { 0x80000005, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x08006040, 0x00000000, 0 }, + { 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000008, 0x00000000, 0x00000000, 0x00002020, 0x00000000, 0x00000000, 0x00000000, 0 }, +}; +#endif /* !CPUM_DB_STANDALONE */ + + +#ifndef CPUM_DB_STANDALONE +/** + * MSR ranges for Intel(R) Core(TM) Duo CPU T2600 @ 2.16GHz. + */ +static CPUMMSRRANGE const g_aMsrRanges_Intel_Core_Duo_T2600_2_16GHz[] = +{ + MFI(0x00000000, "IA32_P5_MC_ADDR", Ia32P5McAddr), /* value=0xf`eeda5160 */ + MFI(0x00000001, "IA32_P5_MC_TYPE", Ia32P5McType), /* value=0x0 */ + MFX(0x00000006, "IA32_MONITOR_FILTER_LINE_SIZE", Ia32MonitorFilterLineSize, Ia32MonitorFilterLineSize, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x40 */ + MFN(0x00000010, "IA32_TIME_STAMP_COUNTER", Ia32TimestampCounter, Ia32TimestampCounter), /* Villain? value=0x243`e2b88071 */ + MFX(0x00000017, "IA32_PLATFORM_ID", Ia32PlatformId, ReadOnly, UINT64_C(0x140000d80486ac), 0, 0), /* value=0x140000`d80486ac */ + MFX(0x0000001b, "IA32_APIC_BASE", Ia32ApicBase, Ia32ApicBase, UINT32_C(0xfee00900), 0x600, UINT64_C(0xfffffff0000000ff)), + MVX(0x00000021, "C2_UNK_0000_0021", 0, ~(uint64_t)UINT32_MAX, UINT32_C(0xfffffffe)), + MFX(0x0000002a, "EBL_CR_POWERON", IntelEblCrPowerOn, IntelEblCrPowerOn, 0x41880000, UINT64_C(0xfffffffffff7fffe), 0), /* value=0x41880000 */ + MVI(0x0000002f, "P6_UNK_0000_002f", 0), + MVX(0x00000032, "P6_UNK_0000_0032", 0, UINT64_C(0xffffffff01fe0000), 0), + MVX(0x00000033, "TEST_CTL", 0, UINT64_C(0xffffffff7fffffff), 0), + MFO(0x0000003a, "IA32_FEATURE_CONTROL", Ia32FeatureControl), /* value=0x5 */ + MVO(0x0000003f, "P6_UNK_0000_003f", 0), + RFN(0x00000040, 0x00000047, "MSR_LASTBRANCH_n_FROM_IP", IntelLastBranchToN, IntelLastBranchToN), + MVX(0x0000004a, "P6_UNK_0000_004a", 0, 0, 0), /* value=0x0 */ + MVX(0x0000004b, "P6_UNK_0000_004b", 0, 0, 0), /* value=0x0 */ + MVX(0x0000004c, "P6_UNK_0000_004c", 0, 0, 0), /* value=0x0 */ + MVX(0x0000004d, "P6_UNK_0000_004d", 0, 0, 0), /* value=0x3392fbd9`ffbefffd */ + MVX(0x0000004e, "P6_UNK_0000_004e", 0, 0, 0), /* value=0xa6b77ad3`7ffbffe7 */ + MVX(0x0000004f, "P6_UNK_0000_004f", 0, UINT64_C(0xffffffffffffff00), 0), /* value=0x9d`0000009d */ + MVX(0x0000006c, "P6_UNK_0000_006c", 0, UINT64_C(0xffffffff00000082), 0), + MVX(0x0000006d, "P6_UNK_0000_006d", 0, UINT64_C(0xffffffff00000082), 0), + MVX(0x0000006e, "P6_UNK_0000_006e", UINT32_C(0x80000000), UINT64_C(0xffffffff00000082), 0), + MVO(0x0000006f, "P6_UNK_0000_006f", 0xadb), + MFN(0x00000079, "IA32_BIOS_UPDT_TRIG", WriteOnly, IgnoreWrite), + MFX(0x0000008b, "BBL_CR_D3|BIOS_SIGN", Ia32BiosSignId, Ia32BiosSignId, 0, UINT32_MAX, 0), /* value=0x39`00000000 */ + MFO(0x0000009b, "IA32_SMM_MONITOR_CTL", Ia32SmmMonitorCtl), /* value=0x0 */ + MFX(0x000000c1, "IA32_PMC0", Ia32PmcN, Ia32PmcN, 0x0, UINT64_C(0xffffffff00124101), 0), /* XXX: The range ended earlier than expected! */ + MFX(0x000000c2, "IA32_PMC1", Ia32PmcN, Ia32PmcN, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MVI(0x000000c7, "P6_UNK_0000_00c7", UINT64_C(0x1f00000044000000)), + MFX(0x000000cd, "MSR_FSB_FREQ", IntelP6FsbFrequency, ReadOnly, 0x133, 0, 0), /* value=0x133 */ + MVO(0x000000ce, "P6_UNK_0000_00ce", UINT64_C(0x2c130d003b538000)), + MVO(0x000000e0, "C2_UNK_0000_00e0", 0x14ce0f0), + MVO(0x000000e1, "C2_UNK_0000_00e1", UINT32_C(0xf0f00000)), + MFX(0x000000e2, "MSR_PKG_CST_CONFIG_CONTROL", IntelPkgCStConfigControl, IntelPkgCStConfigControl, 0, ~(uint64_t)UINT32_MAX, UINT32_C(0xff000000)), /* value=0x26740c */ + MFX(0x000000e3, "C2_SMM_CST_MISC_INFO", IntelCore2SmmCStMiscInfo, IntelCore2SmmCStMiscInfo, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x8040414 */ + MFX(0x000000e4, "MSR_PMG_IO_CAPTURE_BASE", IntelPmgIoCaptureBase, IntelPmgIoCaptureBase, 0, ~(uint64_t)UINT32_MAX, UINT32_C(0xff800000)), /* value=0x20414 */ + MVO(0x000000e5, "C2_UNK_0000_00e5", 0x51c20cc0), + MFX(0x000000e7, "IA32_MPERF", Ia32MPerf, Ia32MPerf, 0, UINT64_C(0xffffffffe0000000), 0), /* value=0x5e`dc779a5a */ + MFX(0x000000e8, "IA32_APERF", Ia32APerf, Ia32APerf, 0, UINT64_C(0xffffffffe0000000), 0), /* value=0x2b`c8585b9a */ + MFX(0x000000ee, "C1_EXT_CONFIG", IntelCore1ExtConfig, IntelCore1ExtConfig, 0, UINT64_C(0xffffffffffc5ffff), 0), /* value=0x82b90000 */ + MFX(0x000000fe, "IA32_MTRRCAP", Ia32MtrrCap, ReadOnly, 0x508, 0, 0), /* value=0x508 */ + MVX(0x00000116, "BBL_CR_ADDR", 0, UINT64_C(0xffffff000000001f), 0), + MVX(0x00000118, "BBL_CR_DECC", UINT64_C(0x88000fef00030892), UINT64_C(0x4780000fff00000), 0), + MFN(0x0000011a, "BBL_CR_TRIG", WriteOnly, IgnoreWrite), + MVI(0x0000011b, "P6_UNK_0000_011b", 0), + MFX(0x0000011e, "BBL_CR_CTL3", IntelBblCrCtl3, IntelBblCrCtl3, 0x7874211f, UINT64_C(0xffffffffc0f3feff), 0), /* value=0x7874211f */ + MVX(0x0000014e, "P6_UNK_0000_014e", 0x49a49f20, UINT64_C(0xffffffff0000008f), 0), + MVX(0x0000014f, "P6_UNK_0000_014f", UINT32_MAX, UINT64_C(0xffffffff00100000), 0), + MVX(0x00000151, "P6_UNK_0000_0151", 0x25febbf6, ~(uint64_t)UINT32_MAX, 0), + MFX(0x0000015f, "C1_DTS_CAL_CTRL", IntelCore1DtsCalControl, IntelCore1DtsCalControl, 0, UINT64_C(0xffffffffffc0ffff), 0), /* value=0x260613 */ + MFN(0x00000174, "IA32_SYSENTER_CS", Ia32SysEnterCs, Ia32SysEnterCs), /* Villain? value=0x60 */ + MFN(0x00000175, "IA32_SYSENTER_ESP", Ia32SysEnterEsp, Ia32SysEnterEsp), /* Villain? value=0xf5a07c40 */ + MFN(0x00000176, "IA32_SYSENTER_EIP", Ia32SysEnterEip, Ia32SysEnterEip), /* Villain? value=0xc15af09c */ + MFX(0x00000179, "IA32_MCG_CAP", Ia32McgCap, ReadOnly, 0x6, 0, 0), /* value=0x6 */ + MFX(0x0000017a, "IA32_MCG_STATUS", Ia32McgStatus, Ia32McgStatus, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + RSN(0x00000186, 0x00000187, "IA32_PERFEVTSELn", Ia32PerfEvtSelN, Ia32PerfEvtSelN, 0x0, ~(uint64_t)UINT32_MAX, 0), + MFX(0x00000194, "CLOCK_FLEX_MAX", IntelFlexRatio, IntelFlexRatio, 0, UINT64_C(0xfffffffffffee0c0), 0), /* value=0x0 */ + MFX(0x00000198, "IA32_PERF_STATUS", Ia32PerfStatus, ReadOnly, UINT64_C(0x6130d2c060c0613), 0, 0), /* value=0x6130d2c`060c0613 */ + MFX(0x00000199, "IA32_PERF_CTL", Ia32PerfCtl, Ia32PerfCtl, 0x613, 0, 0), /* Might bite. value=0x613 */ + MFX(0x0000019a, "IA32_CLOCK_MODULATION", Ia32ClockModulation, Ia32ClockModulation, 0x2, UINT64_C(0xffffffffffffffe1), 0), /* value=0x2 */ + MFX(0x0000019b, "IA32_THERM_INTERRUPT", Ia32ThermInterrupt, Ia32ThermInterrupt, 0x3, UINT64_C(0xffffffff00616100), UINT32_C(0xff0000e0)), /* value=0x3 */ + MFX(0x0000019c, "IA32_THERM_STATUS", Ia32ThermStatus, Ia32ThermStatus, UINT32_C(0x8838000c), UINT64_C(0xfffffffff87f017f), 0x780fc00), /* value=0x8838000c */ + MFX(0x0000019d, "IA32_THERM2_CTL", Ia32Therm2Ctl, ReadOnly, 0x613, 0, 0), /* value=0x613 */ + MVX(0x0000019e, "P6_UNK_0000_019e", 0x11b0000, UINT64_C(0xffffffffffff0000), 0), + MVI(0x0000019f, "P6_UNK_0000_019f", 0), + MFX(0x000001a0, "IA32_MISC_ENABLE", Ia32MiscEnable, Ia32MiscEnable, UINT64_C(0x264973488), 0x60319bf7, 0), /* value=0x2`64973488 */ + MVX(0x000001a1, "P6_UNK_0000_01a1", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0x000001aa, "P6_PIC_SENS_CFG", 0x263f04b7, ~(uint64_t)UINT32_MAX, 0), + MFX(0x000001c9, "MSR_LASTBRANCH_TOS", IntelLastBranchTos, IntelLastBranchTos, 0, UINT64_C(0xfffffffffffffffa), 0), /* value=0x8000003 */ + MVX(0x000001d3, "P6_UNK_0000_01d3", 0x8000, UINT64_C(0xffffffffffff7fff), 0), + MFX(0x000001d9, "IA32_DEBUGCTL", Ia32DebugCtl, Ia32DebugCtl, 0, 0, UINT64_C(0xfffffffffffffe3c)), /* value=0x1 */ + MFO(0x000001db, "P6_LAST_BRANCH_FROM_IP", P6LastBranchFromIp), /* value=0xc12c5d73 */ + MFO(0x000001dc, "P6_LAST_BRANCH_TO_IP", P6LastBranchToIp), /* value=0xc10357d0 */ + MFX(0x000001dd, "P6_LAST_INT_FROM_IP", P6LastIntFromIp, P6LastIntFromIp, 0, UINT64_C(0xffffffffff97dc5d), 0), /* value=0xc132a284 */ + MFX(0x000001de, "P6_LAST_INT_TO_IP", P6LastIntToIp, P6LastIntToIp, 0, UINT64_C(0xfffffffffffffff0), 0), /* value=0xc1329543 */ + MVO(0x000001e0, "MSR_ROB_CR_BKUPTMPDR6", 0xff0), + MFX(0x00000200, "IA32_MTRR_PHYS_BASE0", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x0, 0, UINT64_C(0xfffffff000000ff8)), /* value=0xffe00005 */ + MFX(0x00000201, "IA32_MTRR_PHYS_MASK0", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x0, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`ffe00800 */ + MFX(0x00000202, "IA32_MTRR_PHYS_BASE1", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x1, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x6 */ + MFX(0x00000203, "IA32_MTRR_PHYS_MASK1", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x1, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`80000800 */ + MFX(0x00000204, "IA32_MTRR_PHYS_BASE2", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x2, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x7ff00000 */ + MFX(0x00000205, "IA32_MTRR_PHYS_MASK2", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x2, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`fff00800 */ + MFX(0x00000206, "IA32_MTRR_PHYS_BASE3", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x3, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x80000001 */ + MFX(0x00000207, "IA32_MTRR_PHYS_MASK3", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x3, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf0000800 */ + MFX(0x00000208, "IA32_MTRR_PHYS_BASE4", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x4, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x00000209, "IA32_MTRR_PHYS_MASK4", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x4, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFX(0x0000020a, "IA32_MTRR_PHYS_BASE5", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x5, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x0000020b, "IA32_MTRR_PHYS_MASK5", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x5, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFX(0x0000020c, "IA32_MTRR_PHYS_BASE6", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x6, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x0000020d, "IA32_MTRR_PHYS_MASK6", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x6, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFX(0x0000020e, "IA32_MTRR_PHYS_BASE7", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x7, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x0000020f, "IA32_MTRR_PHYS_MASK7", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x7, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFS(0x00000250, "IA32_MTRR_FIX64K_00000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix64K_00000), + MFS(0x00000258, "IA32_MTRR_FIX16K_80000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_80000), + MFS(0x00000259, "IA32_MTRR_FIX16K_A0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_A0000), + MFS(0x00000268, "IA32_MTRR_FIX4K_C0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C0000), + MFS(0x00000269, "IA32_MTRR_FIX4K_C8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C8000), + MFS(0x0000026a, "IA32_MTRR_FIX4K_D0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D0000), + MFS(0x0000026b, "IA32_MTRR_FIX4K_D8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D8000), + MFS(0x0000026c, "IA32_MTRR_FIX4K_E0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E0000), + MFS(0x0000026d, "IA32_MTRR_FIX4K_E8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E8000), + MFS(0x0000026e, "IA32_MTRR_FIX4K_F0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F0000), + MFS(0x0000026f, "IA32_MTRR_FIX4K_F8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F8000), + MFS(0x00000277, "IA32_PAT", Ia32Pat, Ia32Pat, Guest.msrPAT), + MFZ(0x000002ff, "IA32_MTRR_DEF_TYPE", Ia32MtrrDefType, Ia32MtrrDefType, GuestMsrs.msr.MtrrDefType, 0, UINT64_C(0xfffffffffffff3f8)), + MFX(0x00000345, "IA32_PERF_CAPABILITIES", Ia32PerfCapabilities, ReadOnly, 0, 0, 0), /* value=0x0 */ + RFN(0x00000400, 0x00000417, "IA32_MCi_CTL_STATUS_ADDR_MISC", Ia32McCtlStatusAddrMiscN, Ia32McCtlStatusAddrMiscN), + MFX(0x00000480, "IA32_VMX_BASIC", Ia32VmxBasic, ReadOnly, UINT64_C(0x1b040000000005), 0, 0), /* value=0x1b0400`00000005 */ + MFX(0x00000481, "IA32_VMX_PINBASED_CTLS", Ia32VmxPinbasedCtls, ReadOnly, UINT64_C(0x1f00000016), 0, 0), /* value=0x1f`00000016 */ + MFX(0x00000482, "IA32_VMX_PROCBASED_CTLS", Ia32VmxProcbasedCtls, ReadOnly, UINT64_C(0x7781fffe0401e172), 0, 0), /* value=0x7781fffe`0401e172 */ + MFX(0x00000483, "IA32_VMX_EXIT_CTLS", Ia32VmxExitCtls, ReadOnly, UINT64_C(0x3edff00036dff), 0, 0), /* value=0x3edff`00036dff */ + MFX(0x00000484, "IA32_VMX_ENTRY_CTLS", Ia32VmxEntryCtls, ReadOnly, UINT64_C(0x1dff000011ff), 0, 0), /* value=0x1dff`000011ff */ + MFX(0x00000485, "IA32_VMX_MISC", Ia32VmxMisc, ReadOnly, 0x403c0, 0, 0), /* value=0x403c0 */ + MFX(0x00000486, "IA32_VMX_CR0_FIXED0", Ia32VmxCr0Fixed0, ReadOnly, UINT32_C(0x80000021), 0, 0), /* value=0x80000021 */ + MFX(0x00000487, "IA32_VMX_CR0_FIXED1", Ia32VmxCr0Fixed1, ReadOnly, UINT32_MAX, 0, 0), /* value=0xffffffff */ + MFX(0x00000488, "IA32_VMX_CR4_FIXED0", Ia32VmxCr4Fixed0, ReadOnly, 0x2000, 0, 0), /* value=0x2000 */ + MFX(0x00000489, "IA32_VMX_CR4_FIXED1", Ia32VmxCr4Fixed1, ReadOnly, 0x27ff, 0, 0), /* value=0x27ff */ + MFX(0x0000048a, "IA32_VMX_VMCS_ENUM", Ia32VmxVmcsEnum, ReadOnly, 0x2c, 0, 0), /* value=0x2c */ + MVX(0x000004f8, "C2_UNK_0000_04f8", UINT64_C(0x1f5e86fb9f7f6dce), 0, 0), + MVX(0x000004f9, "C2_UNK_0000_04f9", UINT64_C(0xafb14bb80b893244), 0, 0), + MVX(0x000004fa, "C2_UNK_0000_04fa", UINT64_C(0xfecd26a6e39aeefe), 0, 0), + MVX(0x000004fb, "C2_UNK_0000_04fb", UINT64_C(0xd5baca676b503675), 0, 0), + MVX(0x000004fc, "C2_UNK_0000_04fc", UINT64_C(0x2e9b76a2bdde6ed7), 0, 0), + MVX(0x000004fd, "C2_UNK_0000_04fd", UINT64_C(0xfdbb141e45043200), 0, 0), + MVX(0x000004fe, "C2_UNK_0000_04fe", UINT64_C(0x4a68f426372a837f), 0, 0), + MVX(0x000004ff, "C2_UNK_0000_04ff", UINT64_C(0x4104628e2e437f40), 0, 0), + MFX(0x00000600, "IA32_DS_AREA", Ia32DsArea, Ia32DsArea, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0xc0000080, "AMD64_EFER", Amd64Efer, Amd64Efer, 0x800, 0, UINT64_C(0xfffffffffffff3ff)), +}; +#endif /* !CPUM_DB_STANDALONE */ + + +/** + * Database entry for Intel(R) Core(TM) Duo CPU T2600 @ 2.16GHz. + */ +static CPUMDBENTRY const g_Entry_Intel_Core_Duo_T2600_2_16GHz = +{ + /*.pszName = */ "Intel Core Duo T2600 2.16GHz", + /*.pszFullName = */ "Genuine Intel(R) CPU T2600 @ 2.16GHz", +// /*.pszFullName = */ "Intel(R) Core(TM) Duo CPU T2600 @ 2.16GHz", + /*.enmVendor = */ CPUMCPUVENDOR_INTEL, + /*.uFamily = */ 6, + /*.uModel = */ 14, + /*.uStepping = */ 8, + /*.enmMicroarch = */ kCpumMicroarch_Intel_Core_Yonah, + /*.uScalableBusFreq = */ CPUM_SBUSFREQ_167MHZ, + /*.fFlags = */ 0, + /*.cMaxPhysAddrWidth= */ 32, + /*.fMxCsrMask = */ 0x0000ffff, + /*.paCpuIdLeaves = */ NULL_ALONE(g_aCpuIdLeaves_Intel_Core_Duo_T2600_2_16GHz), + /*.cCpuIdLeaves = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_Core_Duo_T2600_2_16GHz)), + /*.enmUnknownCpuId = */ CPUMUNKNOWNCPUID_LAST_STD_LEAF, + /*.DefUnknownCpuId = */ { 0x07280201, 0x00000000, 0x00000000, 0x00000000 }, + /*.fMsrMask = */ UINT32_MAX, + /*.cMsrRanges = */ ZERO_ALONE(RT_ELEMENTS(g_aMsrRanges_Intel_Core_Duo_T2600_2_16GHz)), + /*.paMsrRanges = */ NULL_ALONE(g_aMsrRanges_Intel_Core_Duo_T2600_2_16GHz), +}; + +#endif /* !VBOX_CPUDB_Intel_Core_Duo_T2600_2_16GHz_h */ + diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_Core_i5_3570.h b/src/VBox/VMM/VMMR3/cpus/Intel_Core_i5_3570.h new file mode 100644 index 00000000..416cb87d --- /dev/null +++ b/src/VBox/VMM/VMMR3/cpus/Intel_Core_i5_3570.h @@ -0,0 +1,339 @@ +/* $Id: Intel_Core_i5_3570.h $ */ +/** @file + * CPU database entry "Intel Core i5-3570". + * Generated at 2013-12-13T16:13:56Z by VBoxCpuReport v4.3.53r91216 on linux.amd64. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VBOX_CPUDB_Intel_Core_i5_3570_h +#define VBOX_CPUDB_Intel_Core_i5_3570_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + + +#ifndef CPUM_DB_STANDALONE +/** + * CPUID leaves for Intel(R) Core(TM) i5-3570 CPU @ 3.40GHz. + */ +static CPUMCPUIDLEAF const g_aCpuIdLeaves_Intel_Core_i5_3570[] = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69, 0 }, + { 0x00000001, 0x00000000, 0x00000000, 0x000306a9, 0x04100800, 0x7fbae3ff, 0xbfebfbff, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID | CPUMCPUIDLEAF_F_CONTAINS_APIC }, + { 0x00000002, 0x00000000, 0x00000000, 0x76035a01, 0x00f0b0ff, 0x00000000, 0x00ca0000, 0 }, + { 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000004, 0x00000000, 0x00000000, 0x1c004121, 0x01c0003f, 0x0000003f, 0x00000000, 0 }, + { 0x00000005, 0x00000000, 0x00000000, 0x00000040, 0x00000040, 0x00000003, 0x00001120, 0 }, + { 0x00000006, 0x00000000, 0x00000000, 0x00000077, 0x00000002, 0x00000009, 0x00000000, 0 }, + { 0x00000007, 0x00000000, 0x00000000, 0x00000000, 0x00000281, 0x00000000, 0x00000000, 0 }, + { 0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000009, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000a, 0x00000000, 0x00000000, 0x07300803, 0x00000000, 0x00000000, 0x00000603, 0 }, + { 0x0000000b, 0x00000000, 0x00000000, 0x00000001, 0x00000001, 0x00000100, 0x00000004, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID }, + { 0x0000000c, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000d, 0x00000000, 0x00000000, 0x00000007, 0x00000340, 0x00000340, 0x00000000, 0 }, + { 0x80000000, 0x00000000, 0x00000000, 0x80000008, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x28100800, 0 }, + { 0x80000002, 0x00000000, 0x00000000, 0x20202020, 0x20202020, 0x65746e49, 0x2952286c, 0 }, + { 0x80000003, 0x00000000, 0x00000000, 0x726f4320, 0x4d542865, 0x35692029, 0x3735332d, 0 }, + { 0x80000004, 0x00000000, 0x00000000, 0x50432030, 0x20402055, 0x30342e33, 0x007a4847, 0 }, + { 0x80000005, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x01006040, 0x00000000, 0 }, + { 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000100, 0 }, + { 0x80000008, 0x00000000, 0x00000000, 0x00003024, 0x00000000, 0x00000000, 0x00000000, 0 }, +}; +#endif /* !CPUM_DB_STANDALONE */ + + +#ifndef CPUM_DB_STANDALONE +/** + * MSR ranges for Intel(R) Core(TM) i5-3570 CPU @ 3.40GHz. + */ +static CPUMMSRRANGE const g_aMsrRanges_Intel_Core_i5_3570[] = +{ + MFX(0x00000000, "IA32_P5_MC_ADDR", Ia32P5McAddr, Ia32P5McAddr, 0, UINT64_C(0xffffffffffffffe0), 0), /* value=0x1f */ + MFX(0x00000001, "IA32_P5_MC_TYPE", Ia32P5McType, Ia32P5McType, 0, 0, UINT64_MAX), /* value=0x0 */ + MFX(0x00000006, "IA32_MONITOR_FILTER_LINE_SIZE", Ia32MonitorFilterLineSize, Ia32MonitorFilterLineSize, 0, 0, UINT64_C(0xffffffffffff0000)), /* value=0x40 */ + MFN(0x00000010, "IA32_TIME_STAMP_COUNTER", Ia32TimestampCounter, Ia32TimestampCounter), /* value=0x4293`b0a3f54a */ + MFV(0x00000017, "IA32_PLATFORM_ID", Ia32PlatformId, ReadOnly, UINT64_C(0x4000000000000)), + MFX(0x0000001b, "IA32_APIC_BASE", Ia32ApicBase, Ia32ApicBase, UINT32_C(0xfee00c00), 0, UINT64_C(0xfffffff0000002ff)), + MFX(0x0000002a, "EBL_CR_POWERON", IntelEblCrPowerOn, ReadOnly, 0, 0, 0), /* value=0x0 */ + MVX(0x0000002e, "I7_UNK_0000_002e", 0, 0x400, UINT64_C(0xfffffffffffffbff)), + MVX(0x00000033, "TEST_CTL", 0, 0, UINT64_C(0xffffffff7fffffff)), + MVO(0x00000034, "P6_UNK_0000_0034", 0x285), + MFO(0x00000035, "MSR_CORE_THREAD_COUNT", IntelI7CoreThreadCount), /* value=0x40004*/ + MVO(0x00000036, "I7_UNK_0000_0036", UINT64_C(0x1000000000105df2)), + MFO(0x0000003a, "IA32_FEATURE_CONTROL", Ia32FeatureControl), /* value=0x5 */ + MVX(0x0000003e, "I7_UNK_0000_003e", 0x1, 0, UINT64_C(0xfffffffffffffffe)), + MFN(0x00000079, "IA32_BIOS_UPDT_TRIG", WriteOnly, IgnoreWrite), + MVX(0x0000008b, "BBL_CR_D3|BIOS_SIGN", UINT64_C(0x1900000000), 0x1, UINT32_C(0xfffffffe)), + MFO(0x0000009b, "IA32_SMM_MONITOR_CTL", Ia32SmmMonitorCtl), /* value=0x0 */ + RSN(0x000000c1, 0x000000c8, "IA32_PMCn", Ia32PmcN, Ia32PmcN, 0x0, ~(uint64_t)UINT32_MAX, 0), + MFO(0x000000ce, "MSR_PLATFORM_INFO", IntelPlatformInfo), /* value=0x81010'e0012200*/ + MFX(0x000000e2, "MSR_PKG_CST_CONFIG_CONTROL", IntelPkgCStConfigControl, IntelPkgCStConfigControl, 0, 0, UINT64_C(0xffffffffe1ffffff)), /* value=0x1e008403 */ + MFX(0x000000e4, "MSR_PMG_IO_CAPTURE_BASE", IntelPmgIoCaptureBase, IntelPmgIoCaptureBase, 0, 0, UINT64_C(0xfffffffffff80000)), /* value=0x10414 */ + MFN(0x000000e7, "IA32_MPERF", Ia32MPerf, Ia32MPerf), /* value=0x3a`2c710584 */ + MFN(0x000000e8, "IA32_APERF", Ia32APerf, Ia32APerf), /* value=0x39`f97c8410 */ + MFX(0x000000fe, "IA32_MTRRCAP", Ia32MtrrCap, ReadOnly, 0xd0a, 0, 0), /* value=0xd0a */ + MVX(0x00000102, "I7_IB_UNK_0000_0102", 0, 0, UINT64_C(0xffffffff7fff8000)), + MVX(0x00000103, "I7_IB_UNK_0000_0103", 0, 0, UINT64_C(0xffffffffffffff00)), + MVX(0x00000104, "I7_IB_UNK_0000_0104", 0, 0, UINT64_C(0xfffffffffffffffe)), + MFN(0x00000132, "CPUID1_FEATURE_MASK", IntelCpuId1FeatureMaskEax, IntelCpuId1FeatureMaskEax), /* value=0xffffffff`ffffffff */ + MFN(0x00000133, "CPUIDD_01_FEATURE_MASK", IntelCpuId1FeatureMaskEcdx, IntelCpuId1FeatureMaskEcdx), /* value=0xffffffff`ffffffff */ + MFN(0x00000134, "CPUID80000001_FEATURE_MASK", IntelCpuId80000001FeatureMaskEcdx, IntelCpuId80000001FeatureMaskEcdx), /* value=0xffffffff`ffffffff */ + MFX(0x0000013c, "I7_SB_AES_NI_CTL", IntelI7SandyAesNiCtl, IntelI7SandyAesNiCtl, 0, 0, UINT64_C(0xfffffffffffffffc)), /* value=0x0 */ + MVX(0x00000140, "I7_IB_UNK_0000_0140", 0, 0, UINT64_C(0xfffffffffffffffe)), + MVX(0x00000142, "I7_IB_UNK_0000_0142", 0, 0, UINT64_C(0xfffffffffffffffc)), + MFX(0x00000174, "IA32_SYSENTER_CS", Ia32SysEnterCs, Ia32SysEnterCs, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x10 */ + MFN(0x00000175, "IA32_SYSENTER_ESP", Ia32SysEnterEsp, Ia32SysEnterEsp), /* value=0x0 */ + MFN(0x00000176, "IA32_SYSENTER_EIP", Ia32SysEnterEip, Ia32SysEnterEip), /* value=0xffffffff`8159cbe0 */ + MFX(0x00000179, "IA32_MCG_CAP", Ia32McgCap, ReadOnly, 0xc09, 0, 0), /* value=0xc09 */ + MFX(0x0000017a, "IA32_MCG_STATUS", Ia32McgStatus, Ia32McgStatus, 0, 0, UINT64_C(0xfffffffffffffff8)), /* value=0x0 */ + RSN(0x00000186, 0x0000018d, "IA32_PERFEVTSELn", Ia32PerfEvtSelN, Ia32PerfEvtSelN, 0x0, 0, UINT64_C(0xffffffff00080000)), + MFX(0x00000194, "CLOCK_FLEX_MAX", IntelFlexRatio, IntelFlexRatio, 0x190000, 0x1e00ff, UINT64_C(0xffffffffffe00000)), + MFX(0x00000198, "IA32_PERF_STATUS", Ia32PerfStatus, ReadOnly, UINT64_C(0x1d2400001000), 0, 0), /* value=0x1d24`00001000 */ + MFX(0x00000199, "IA32_PERF_CTL", Ia32PerfCtl, Ia32PerfCtl, 0x1000, 0, 0), /* Might bite. value=0x1000 */ + MFX(0x0000019a, "IA32_CLOCK_MODULATION", Ia32ClockModulation, Ia32ClockModulation, 0, 0, UINT64_C(0xffffffffffffffe0)), /* value=0x0 */ + MFX(0x0000019b, "IA32_THERM_INTERRUPT", Ia32ThermInterrupt, Ia32ThermInterrupt, 0x1000013, 0, UINT64_C(0xfffffffffe0000e8)), /* value=0x1000013 */ + MFX(0x0000019c, "IA32_THERM_STATUS", Ia32ThermStatus, Ia32ThermStatus, UINT32_C(0x884c0000), UINT32_C(0xf87f0fff), UINT64_C(0xffffffff0780f000)), /* value=0x884c0000 */ + MFX(0x0000019d, "IA32_THERM2_CTL", Ia32Therm2Ctl, ReadOnly, 0, 0, 0), /* value=0x0 */ + MFX(0x000001a0, "IA32_MISC_ENABLE", Ia32MiscEnable, Ia32MiscEnable, 0x850089, 0x1080, UINT64_C(0xffffffbbff3aef72)), /* value=0x850089 */ + MFX(0x000001a2, "I7_MSR_TEMPERATURE_TARGET", IntelI7TemperatureTarget, IntelI7TemperatureTarget, 0x691400, 0xffff00, UINT64_C(0xfffffffff00000ff)), /* value=0x691400 */ + MVX(0x000001a4, "I7_UNK_0000_01a4", 0, 0, UINT64_C(0xfffffffffffff7f0)), + RSN(0x000001a6, 0x000001a7, "I7_MSR_OFFCORE_RSP_n", IntelI7MsrOffCoreResponseN, IntelI7MsrOffCoreResponseN, 0x0, 0, UINT64_C(0xffffffc000007000)), + MVX(0x000001a8, "I7_UNK_0000_01a8", 0, 0, UINT64_C(0xfffffffffffffffc)), + MFX(0x000001aa, "MSR_MISC_PWR_MGMT", IntelI7MiscPwrMgmt, IntelI7MiscPwrMgmt, 0, 0, UINT64_C(0xffffffffffbffffe)), /* value=0x400000 */ + MFX(0x000001ad, "I7_MSR_TURBO_RATIO_LIMIT", IntelI7TurboRatioLimit, ReadOnly, 0x24252626, 0, 0), /* value=0x24252626 */ + MVX(0x000001b0, "IA32_ENERGY_PERF_BIAS", 0x6, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x000001b1, "IA32_PACKAGE_THERM_STATUS", UINT32_C(0x88490000), UINT32_C(0xf87f0fff), UINT64_C(0xffffffff0780f000)), + MVX(0x000001b2, "IA32_PACKAGE_THERM_INTERRUPT", 0x1000003, 0, UINT64_C(0xfffffffffe0000e8)), + MVO(0x000001c6, "I7_UNK_0000_01c6", 0x3), + MFX(0x000001c8, "MSR_LBR_SELECT", IntelI7LbrSelect, IntelI7LbrSelect, 0, 0, UINT64_C(0xfffffffffffffe00)), /* value=0x0 */ + MFX(0x000001c9, "MSR_LASTBRANCH_TOS", IntelLastBranchTos, IntelLastBranchTos, 0, 0, UINT64_C(0xfffffffffffffff0)), /* value=0x8 */ + MFX(0x000001d9, "IA32_DEBUGCTL", Ia32DebugCtl, Ia32DebugCtl, 0, 0, UINT64_C(0xffffffffffff803c)), /* value=0x0 */ + MFO(0x000001db, "P6_LAST_BRANCH_FROM_IP", P6LastBranchFromIp), /* value=0x7fffffff`a061f4c9 */ + MFO(0x000001dc, "P6_LAST_BRANCH_TO_IP", P6LastBranchToIp), /* value=0xffffffff`810473c0 */ + MFN(0x000001dd, "P6_LAST_INT_FROM_IP", P6LastIntFromIp, P6LastIntFromIp), /* value=0x0 */ + MFN(0x000001de, "P6_LAST_INT_TO_IP", P6LastIntToIp, P6LastIntToIp), /* value=0x0 */ + MFO(0x000001f0, "I7_VLW_CAPABILITY", IntelI7VirtualLegacyWireCap), /* value=0x74 */ + MFO(0x000001f2, "IA32_SMRR_PHYSBASE", Ia32SmrrPhysBase), /* value=0xdb000006 */ + MFO(0x000001f3, "IA32_SMRR_PHYSMASK", Ia32SmrrPhysMask), /* value=0xff800800 */ + MFX(0x000001fc, "I7_MSR_POWER_CTL", IntelI7PowerCtl, IntelI7PowerCtl, 0, 0x20, UINT64_C(0xffffffffffc20000)), /* value=0x14005f */ + MFX(0x00000200, "IA32_MTRR_PHYS_BASE0", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x0, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x6 */ + MFX(0x00000201, "IA32_MTRR_PHYS_MASK0", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x0, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xc`00000800 */ + MFX(0x00000202, "IA32_MTRR_PHYS_BASE1", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x1, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x4`00000006 */ + MFX(0x00000203, "IA32_MTRR_PHYS_MASK1", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x1, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`e0000800 */ + MFX(0x00000204, "IA32_MTRR_PHYS_BASE2", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x2, 0, UINT64_C(0xfffffff000000ff8)), /* value=0xe0000000 */ + MFX(0x00000205, "IA32_MTRR_PHYS_MASK2", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x2, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`e0000800 */ + MFX(0x00000206, "IA32_MTRR_PHYS_BASE3", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x3, 0, UINT64_C(0xfffffff000000ff8)), /* value=0xdc000000 */ + MFX(0x00000207, "IA32_MTRR_PHYS_MASK3", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x3, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`fc000800 */ + MFX(0x00000208, "IA32_MTRR_PHYS_BASE4", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x4, 0, UINT64_C(0xfffffff000000ff8)), /* value=0xdb800000 */ + MFX(0x00000209, "IA32_MTRR_PHYS_MASK4", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x4, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`ff800800 */ + MFX(0x0000020a, "IA32_MTRR_PHYS_BASE5", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x5, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x4`1f000000 */ + MFX(0x0000020b, "IA32_MTRR_PHYS_MASK5", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x5, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`ff000800 */ + MFX(0x0000020c, "IA32_MTRR_PHYS_BASE6", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x6, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x4`1e800000 */ + MFX(0x0000020d, "IA32_MTRR_PHYS_MASK6", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x6, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`ff800800 */ + MFX(0x0000020e, "IA32_MTRR_PHYS_BASE7", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x7, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x4`1e600000 */ + MFX(0x0000020f, "IA32_MTRR_PHYS_MASK7", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x7, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`ffe00800 */ + MFX(0x00000210, "IA32_MTRR_PHYS_BASE8", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x8, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x00000211, "IA32_MTRR_PHYS_MASK8", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x8, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFX(0x00000212, "IA32_MTRR_PHYS_BASE9", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x9, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x00000213, "IA32_MTRR_PHYS_MASK9", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x9, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFS(0x00000250, "IA32_MTRR_FIX64K_00000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix64K_00000), + MFS(0x00000258, "IA32_MTRR_FIX16K_80000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_80000), + MFS(0x00000259, "IA32_MTRR_FIX16K_A0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_A0000), + MFS(0x00000268, "IA32_MTRR_FIX4K_C0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C0000), + MFS(0x00000269, "IA32_MTRR_FIX4K_C8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C8000), + MFS(0x0000026a, "IA32_MTRR_FIX4K_D0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D0000), + MFS(0x0000026b, "IA32_MTRR_FIX4K_D8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D8000), + MFS(0x0000026c, "IA32_MTRR_FIX4K_E0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E0000), + MFS(0x0000026d, "IA32_MTRR_FIX4K_E8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E8000), + MFS(0x0000026e, "IA32_MTRR_FIX4K_F0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F0000), + MFS(0x0000026f, "IA32_MTRR_FIX4K_F8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F8000), + MFS(0x00000277, "IA32_PAT", Ia32Pat, Ia32Pat, Guest.msrPAT), + RSN(0x00000280, 0x00000281, "IA32_MC0_CTLn", Ia32McNCtl2, Ia32McNCtl2, 0x0, 0, UINT64_C(0xffffffffbfff8000)), + MFX(0x00000282, "IA32_MC2_CTL2", Ia32McNCtl2, Ia32McNCtl2, 0x2, 0x40007fff, UINT64_C(0xffffffffbfff8000)), /* value=0x0 */ + MFX(0x00000283, "IA32_MC3_CTL2", Ia32McNCtl2, Ia32McNCtl2, 0x3, 0, UINT64_C(0xffffffffbfff8000)), /* value=0x40000001 */ + MFX(0x00000284, "IA32_MC4_CTL2", Ia32McNCtl2, Ia32McNCtl2, 0x4, 0x40007fff, UINT64_C(0xffffffffbfff8000)), /* value=0x0 */ + RSN(0x00000285, 0x00000288, "IA32_MC5_CTLn", Ia32McNCtl2, Ia32McNCtl2, 0x5, 0, UINT64_C(0xffffffffbfff8000)), + MVX(0x000002e0, "I7_SB_NO_EVICT_MODE", 0, 0, UINT64_C(0xfffffffffffffffc)), + MFN(0x000002e6, "I7_IB_UNK_0000_02e6", WriteOnly, IgnoreWrite), + MVX(0x000002e7, "I7_IB_UNK_0000_02e7", 0x1, 0x1, UINT64_C(0xfffffffffffffffe)), + MFZ(0x000002ff, "IA32_MTRR_DEF_TYPE", Ia32MtrrDefType, Ia32MtrrDefType, GuestMsrs.msr.MtrrDefType, 0, UINT64_C(0xfffffffffffff3f8)), + MVO(0x00000305, "I7_SB_UNK_0000_0305", 0), + MFX(0x00000309, "IA32_FIXED_CTR0", Ia32FixedCtrN, Ia32FixedCtrN, 0x0, 0, UINT64_C(0xffff000000000000)), /* value=0x46 */ + MFX(0x0000030a, "IA32_FIXED_CTR1", Ia32FixedCtrN, Ia32FixedCtrN, 0x1, 0x816506, UINT64_C(0xffff000000000000)), /* value=0xffff`d65aa6fb */ + MFX(0x0000030b, "IA32_FIXED_CTR2", Ia32FixedCtrN, Ia32FixedCtrN, 0x2, 0, UINT64_C(0xffff000000000000)), /* value=0x264 */ + MFX(0x00000345, "IA32_PERF_CAPABILITIES", Ia32PerfCapabilities, ReadOnly, 0x31c3, 0, 0), /* value=0x31c3 */ + MFX(0x0000038d, "IA32_FIXED_CTR_CTRL", Ia32FixedCtrCtrl, Ia32FixedCtrCtrl, 0, 0, UINT64_C(0xfffffffffffff000)), /* value=0xb0 */ + MFX(0x0000038e, "IA32_PERF_GLOBAL_STATUS", Ia32PerfGlobalStatus, ReadOnly, 0, 0, 0), /* value=0x0 */ + MFX(0x0000038f, "IA32_PERF_GLOBAL_CTRL", Ia32PerfGlobalCtrl, Ia32PerfGlobalCtrl, 0, 0, UINT64_C(0xfffffff8ffffff00)), /* value=0x7`000000ff */ + MFX(0x00000390, "IA32_PERF_GLOBAL_OVF_CTRL", Ia32PerfGlobalOvfCtrl, Ia32PerfGlobalOvfCtrl, 0, UINT64_C(0xe0000007000000ff), UINT64_C(0x1ffffff8ffffff00)), /* value=0x0 */ + MFX(0x00000391, "I7_UNC_PERF_GLOBAL_CTRL", IntelI7UncPerfGlobalCtrl, IntelI7UncPerfGlobalCtrl, 0, 0, UINT64_C(0xffffffff1fffffe0)), /* value=0x2000000f */ + MFX(0x00000392, "I7_UNC_PERF_GLOBAL_STATUS", IntelI7UncPerfGlobalStatus, IntelI7UncPerfGlobalStatus, 0, 0xf, UINT64_C(0xfffffffffffffff0)), /* value=0x0 */ + MFX(0x00000393, "I7_UNC_PERF_GLOBAL_OVF_CTRL", IntelI7UncPerfGlobalOvfCtrl, IntelI7UncPerfGlobalOvfCtrl, 0, 0x3, UINT64_C(0xfffffffffffffffc)), /* value=0x0 */ + MFX(0x00000394, "I7_UNC_PERF_FIXED_CTR_CTRL", IntelI7UncPerfFixedCtrCtrl, IntelI7UncPerfFixedCtrCtrl, 0, 0, UINT64_C(0xffffffffffafffff)), /* value=0x0 */ + MFX(0x00000395, "I7_UNC_PERF_FIXED_CTR", IntelI7UncPerfFixedCtr, IntelI7UncPerfFixedCtr, 0, 0, UINT64_C(0xffff000000000000)), /* value=0x1950 */ + MFO(0x00000396, "I7_UNC_CBO_CONFIG", IntelI7UncCBoxConfig), /* value=0x5 */ + MVX(0x00000397, "I7_IB_UNK_0000_0397", 0, 0, UINT64_C(0xfffffffffffffff0)), + MFX(0x000003b0, "I7_UNC_ARB_PERF_CTR0", IntelI7UncArbPerfCtrN, IntelI7UncArbPerfCtrN, 0, 0, UINT64_C(0xfffff00000000000)), /* value=0x0 */ + MFX(0x000003b1, "I7_UNC_ARB_PERF_CTR1", IntelI7UncArbPerfCtrN, IntelI7UncArbPerfCtrN, 0, 0, UINT64_C(0xfffff00000000000)), /* value=0x0 */ + MFX(0x000003b2, "I7_UNC_ARB_PERF_EVT_SEL0", IntelI7UncArbPerfEvtSelN, IntelI7UncArbPerfEvtSelN, 0, 0, UINT64_C(0xffffffffc0230000)), /* value=0x0 */ + MFX(0x000003b3, "I7_UNC_ARB_PERF_EVT_SEL1", IntelI7UncArbPerfEvtSelN, IntelI7UncArbPerfEvtSelN, 0, 0, UINT64_C(0xffffffffc0230000)), /* value=0x0 */ + MFX(0x000003f1, "IA32_PEBS_ENABLE", Ia32PebsEnable, Ia32PebsEnable, 0, 0, UINT64_C(0x7ffffff0fffffff0)), /* value=0x0 */ + MFX(0x000003f6, "I7_MSR_PEBS_LD_LAT", IntelI7PebsLdLat, IntelI7PebsLdLat, 0, UINT64_C(0xffffffffffff0000), 0), /* value=0xffff */ + MFX(0x000003f8, "I7_MSR_PKG_C3_RESIDENCY", IntelI7PkgCnResidencyN, ReadOnly, 0x3, 0, UINT64_MAX), /* value=0x7`7827f19a */ + RSN(0x000003f9, 0x000003fa, "I7_MSR_PKG_Cn_RESIDENCY", IntelI7PkgCnResidencyN, ReadOnly, 0x6, 0, UINT64_MAX), + MFX(0x000003fc, "I7_MSR_CORE_C3_RESIDENCY", IntelI7CoreCnResidencyN, ReadOnly, 0x3, 0, UINT64_MAX), /* value=0x1`3e604592 */ + RSN(0x000003fd, 0x000003fe, "I7_MSR_CORE_Cn_RESIDENCY", IntelI7CoreCnResidencyN, ReadOnly, 0x6, 0, UINT64_MAX), + RFN(0x00000400, 0x00000423, "IA32_MCi_CTL_STATUS_ADDR_MISC", Ia32McCtlStatusAddrMiscN, Ia32McCtlStatusAddrMiscN), + MFX(0x00000480, "IA32_VMX_BASIC", Ia32VmxBasic, ReadOnly, UINT64_C(0xda040000000010), 0, 0), /* value=0xda0400`00000010 */ + MFX(0x00000481, "IA32_VMX_PINBASED_CTLS", Ia32VmxPinbasedCtls, ReadOnly, UINT64_C(0x7f00000016), 0, 0), /* value=0x7f`00000016 */ + MFX(0x00000482, "IA32_VMX_PROCBASED_CTLS", Ia32VmxProcbasedCtls, ReadOnly, UINT64_C(0xfff9fffe0401e172), 0, 0), /* value=0xfff9fffe`0401e172 */ + MFX(0x00000483, "IA32_VMX_EXIT_CTLS", Ia32VmxExitCtls, ReadOnly, UINT64_C(0x7fffff00036dff), 0, 0), /* value=0x7fffff`00036dff */ + MFX(0x00000484, "IA32_VMX_ENTRY_CTLS", Ia32VmxEntryCtls, ReadOnly, UINT64_C(0xffff000011ff), 0, 0), /* value=0xffff`000011ff */ + MFX(0x00000485, "IA32_VMX_MISC", Ia32VmxMisc, ReadOnly, 0x100401e5, 0, 0), /* value=0x100401e5 */ + MFX(0x00000486, "IA32_VMX_CR0_FIXED0", Ia32VmxCr0Fixed0, ReadOnly, UINT32_C(0x80000021), 0, 0), /* value=0x80000021 */ + MFX(0x00000487, "IA32_VMX_CR0_FIXED1", Ia32VmxCr0Fixed1, ReadOnly, UINT32_MAX, 0, 0), /* value=0xffffffff */ + MFX(0x00000488, "IA32_VMX_CR4_FIXED0", Ia32VmxCr4Fixed0, ReadOnly, 0x2000, 0, 0), /* value=0x2000 */ + MFX(0x00000489, "IA32_VMX_CR4_FIXED1", Ia32VmxCr4Fixed1, ReadOnly, 0x1767ff, 0, 0), /* value=0x1767ff */ + MFX(0x0000048a, "IA32_VMX_VMCS_ENUM", Ia32VmxVmcsEnum, ReadOnly, 0x2a, 0, 0), /* value=0x2a */ + MFX(0x0000048b, "IA32_VMX_PROCBASED_CTLS2", Ia32VmxProcBasedCtls2, ReadOnly, UINT64_C(0x8ff00000000), 0, 0), /* value=0x8ff`00000000 */ + MFX(0x0000048c, "IA32_VMX_EPT_VPID_CAP", Ia32VmxEptVpidCap, ReadOnly, UINT64_C(0xf0106114141), 0, 0), /* value=0xf01`06114141 */ + MFX(0x0000048d, "IA32_VMX_TRUE_PINBASED_CTLS", Ia32VmxTruePinbasedCtls, ReadOnly, UINT64_C(0x7f00000016), 0, 0), /* value=0x7f`00000016 */ + MFX(0x0000048e, "IA32_VMX_TRUE_PROCBASED_CTLS", Ia32VmxTrueProcbasedCtls, ReadOnly, UINT64_C(0xfff9fffe04006172), 0, 0), /* value=0xfff9fffe`04006172 */ + MFX(0x0000048f, "IA32_VMX_TRUE_EXIT_CTLS", Ia32VmxTrueExitCtls, ReadOnly, UINT64_C(0x7fffff00036dfb), 0, 0), /* value=0x7fffff`00036dfb */ + MFX(0x00000490, "IA32_VMX_TRUE_ENTRY_CTLS", Ia32VmxTrueEntryCtls, ReadOnly, UINT64_C(0xffff000011fb), 0, 0), /* value=0xffff`000011fb */ + RSN(0x000004c1, 0x000004c8, "IA32_A_PMCn", Ia32PmcN, Ia32PmcN, 0x0, 0, UINT64_C(0xffff000000000000)), + MFN(0x00000600, "IA32_DS_AREA", Ia32DsArea, Ia32DsArea), /* value=0xffff8804`07da1cc0 */ + MFX(0x00000601, "I7_SB_MSR_VR_CURRENT_CONFIG", IntelI7SandyVrCurrentConfig, IntelI7SandyVrCurrentConfig, 0, UINT32_C(0x80001fff), 0x7fffe000), /* value=0x18141494`80000380 */ + MVX(0x00000602, "I7_IB_UNK_0000_0602", UINT64_C(0x1814149480000170), UINT32_C(0x80001fff), 0x7fffe000), + MFX(0x00000603, "I7_SB_MSR_VR_MISC_CONFIG", IntelI7SandyVrMiscConfig, IntelI7SandyVrMiscConfig, 0, UINT32_C(0x80ffffff), UINT64_C(0xffffffff7f000000)), /* value=0x802c2c2c */ + MVX(0x00000604, "I7_IB_UNK_0000_0602", UINT32_C(0x80686868), UINT32_C(0x80ffffff), UINT64_C(0xffffffff7f000000)), + MFO(0x00000606, "I7_SB_MSR_RAPL_POWER_UNIT", IntelI7SandyRaplPowerUnit), /* value=0xa1003 */ + MFX(0x0000060a, "I7_SB_MSR_PKGC3_IRTL", IntelI7SandyPkgCnIrtlN, IntelI7SandyPkgCnIrtlN, 0x3, 0, UINT64_C(0xffffffffffff6000)), /* value=0x883b */ + RSN(0x0000060b, 0x0000060c, "I7_SB_MSR_PKGC6_IRTn", IntelI7SandyPkgCnIrtlN, IntelI7SandyPkgCnIrtlN, 0x6, 0, UINT64_C(0xffffffffffff6000)), + MFO(0x0000060d, "I7_SB_MSR_PKG_C2_RESIDENCY", IntelI7SandyPkgC2Residency), /* value=0x76c`bd67b914 */ + MFX(0x00000610, "I7_SB_MSR_PKG_POWER_LIMIT", IntelI7RaplPkgPowerLimit, IntelI7RaplPkgPowerLimit, 0, UINT64_C(0x80ffffff00ffffff), UINT64_C(0x7f000000ff000000)), /* value=0x80008302`00148268 */ + MFO(0x00000611, "I7_SB_MSR_PKG_ENERGY_STATUS", IntelI7RaplPkgEnergyStatus), /* value=0x3451b969 */ + MFO(0x00000614, "I7_SB_MSR_PKG_POWER_INFO", IntelI7RaplPkgPowerInfo), /* value=0xd0000`01e00268 */ + MFX(0x00000638, "I7_SB_MSR_PP0_POWER_LIMIT", IntelI7RaplPp0PowerLimit, IntelI7RaplPp0PowerLimit, 0, UINT32_C(0x80ffffff), UINT64_C(0xffffffff7f000000)), /* value=0x80000000 */ + MFO(0x00000639, "I7_SB_MSR_PP0_ENERGY_STATUS", IntelI7RaplPp0EnergyStatus), /* value=0x357de52e */ + MFX(0x0000063a, "I7_SB_MSR_PP0_POLICY", IntelI7RaplPp0Policy, IntelI7RaplPp0Policy, 0, 0, UINT64_C(0xffffffffffffffe0)), /* value=0x0 */ + MFX(0x00000640, "I7_HW_MSR_PP0_POWER_LIMIT", IntelI7RaplPp1PowerLimit, IntelI7RaplPp1PowerLimit, 0, UINT32_C(0x80ffffff), UINT64_C(0xffffffff7f000000)), /* value=0x80000000 */ + MFO(0x00000641, "I7_HW_MSR_PP0_ENERGY_STATUS", IntelI7RaplPp1EnergyStatus), /* value=0x6eeef */ + MFX(0x00000642, "I7_HW_MSR_PP0_POLICY", IntelI7RaplPp1Policy, IntelI7RaplPp1Policy, 0, 0, UINT64_C(0xffffffffffffffe0)), /* value=0x10 */ + MFO(0x00000648, "I7_IB_MSR_CONFIG_TDP_NOMINAL", IntelI7IvyConfigTdpNominal), /* value=0x22 */ + MFO(0x00000649, "I7_IB_MSR_CONFIG_TDP_LEVEL1", IntelI7IvyConfigTdpLevel1), /* value=0x1e00000`00000000 */ + MFO(0x0000064a, "I7_IB_MSR_CONFIG_TDP_LEVEL2", IntelI7IvyConfigTdpLevel2), /* value=0x1e00000`00000000 */ + MFO(0x0000064b, "I7_IB_MSR_CONFIG_TDP_CONTROL", IntelI7IvyConfigTdpControl), /* value=0x80000000 */ + MFX(0x0000064c, "I7_IB_MSR_TURBO_ACTIVATION_RATIO", IntelI7IvyTurboActivationRatio, IntelI7IvyTurboActivationRatio, 0, 0, UINT64_C(0xffffffff7fffff00)), /* value=0x80000000 */ + RFN(0x00000680, 0x0000068f, "MSR_LASTBRANCH_n_FROM_IP", IntelLastBranchFromN, IntelLastBranchFromN), + RFN(0x000006c0, 0x000006cf, "MSR_LASTBRANCH_n_TO_IP", IntelLastBranchFromN, IntelLastBranchFromN), + MFX(0x000006e0, "IA32_TSC_DEADLINE", Ia32TscDeadline, Ia32TscDeadline, 0, UINT64_C(0xb280452208b), 0), /* value=0x4293`ef1535a6 */ + MVX(0x00000700, "I7_IB_UNK_0000_0700", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000701, "I7_IB_UNK_0000_0701", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000702, "I7_IB_UNK_0000_0702", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000703, "I7_IB_UNK_0000_0703", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000704, "I7_IB_UNK_0000_0704", 0, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000705, "I7_IB_UNK_0000_0705", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000706, "I7_IB_UNK_0000_0706", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000707, "I7_IB_UNK_0000_0707", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000708, "I7_IB_UNK_0000_0708", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000709, "I7_IB_UNK_0000_0709", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000710, "I7_IB_UNK_0000_0710", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000711, "I7_IB_UNK_0000_0711", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000712, "I7_IB_UNK_0000_0712", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000713, "I7_IB_UNK_0000_0713", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000714, "I7_IB_UNK_0000_0714", 0, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000715, "I7_IB_UNK_0000_0715", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000716, "I7_IB_UNK_0000_0716", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000717, "I7_IB_UNK_0000_0717", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000718, "I7_IB_UNK_0000_0718", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000719, "I7_IB_UNK_0000_0719", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000720, "I7_IB_UNK_0000_0720", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000721, "I7_IB_UNK_0000_0721", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000722, "I7_IB_UNK_0000_0722", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000723, "I7_IB_UNK_0000_0723", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000724, "I7_IB_UNK_0000_0724", 0, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000725, "I7_IB_UNK_0000_0725", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000726, "I7_IB_UNK_0000_0726", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000727, "I7_IB_UNK_0000_0727", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000728, "I7_IB_UNK_0000_0728", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000729, "I7_IB_UNK_0000_0729", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000730, "I7_IB_UNK_0000_0730", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000731, "I7_IB_UNK_0000_0731", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000732, "I7_IB_UNK_0000_0732", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000733, "I7_IB_UNK_0000_0733", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000734, "I7_IB_UNK_0000_0734", 0, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000735, "I7_IB_UNK_0000_0735", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000736, "I7_IB_UNK_0000_0736", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000737, "I7_IB_UNK_0000_0737", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000738, "I7_IB_UNK_0000_0738", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000739, "I7_IB_UNK_0000_0739", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000740, "I7_IB_UNK_0000_0740", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000741, "I7_IB_UNK_0000_0741", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000742, "I7_IB_UNK_0000_0742", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000743, "I7_IB_UNK_0000_0743", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000744, "I7_IB_UNK_0000_0744", 0, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000745, "I7_IB_UNK_0000_0745", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000746, "I7_IB_UNK_0000_0746", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000747, "I7_IB_UNK_0000_0747", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000748, "I7_IB_UNK_0000_0748", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000749, "I7_IB_UNK_0000_0749", 0, 0, UINT64_C(0xfffff00000000000)), + RFN(0x00000800, 0x000008ff, "IA32_X2APIC_n", Ia32X2ApicN, Ia32X2ApicN), + MFN(0x00000c80, "IA32_DEBUG_INTERFACE", Ia32DebugInterface, Ia32DebugInterface), /* value=0x0 */ + MVX(0x00000c81, "I7_IB_UNK_0000_0c81", 0, 0, 0), + MVX(0x00000c82, "I7_IB_UNK_0000_0c82", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0x00000c83, "I7_IB_UNK_0000_0c83", 0, ~(uint64_t)UINT32_MAX, 0), + MFX(0xc0000080, "AMD64_EFER", Amd64Efer, Amd64Efer, 0xd01, 0x400, UINT64_C(0xfffffffffffff2fe)), + MFN(0xc0000081, "AMD64_STAR", Amd64SyscallTarget, Amd64SyscallTarget), /* value=0x230010`00000000 */ + MFN(0xc0000082, "AMD64_STAR64", Amd64LongSyscallTarget, Amd64LongSyscallTarget), /* value=0xffffffff`8159b620 */ + MFN(0xc0000083, "AMD64_STARCOMPAT", Amd64CompSyscallTarget, Amd64CompSyscallTarget), /* value=0xffffffff`8159ce10 */ + MFX(0xc0000084, "AMD64_SYSCALL_FLAG_MASK", Amd64SyscallFlagMask, Amd64SyscallFlagMask, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x43700 */ + MFN(0xc0000100, "AMD64_FS_BASE", Amd64FsBase, Amd64FsBase), /* value=0x908880 */ + MFN(0xc0000101, "AMD64_GS_BASE", Amd64GsBase, Amd64GsBase), /* value=0xffff8804`1e200000 */ + MFN(0xc0000102, "AMD64_KERNEL_GS_BASE", Amd64KernelGsBase, Amd64KernelGsBase), /* value=0x0 */ + MFX(0xc0000103, "AMD64_TSC_AUX", Amd64TscAux, Amd64TscAux, 0, 0, ~(uint64_t)UINT32_MAX), /* value=0x0 */ +}; +#endif /* !CPUM_DB_STANDALONE */ + + +/** + * Database entry for Intel(R) Core(TM) i5-3570 CPU @ 3.40GHz. + */ +static CPUMDBENTRY const g_Entry_Intel_Core_i5_3570 = +{ + /*.pszName = */ "Intel Core i5-3570", + /*.pszFullName = */ "Intel(R) Core(TM) i5-3570 CPU @ 3.40GHz", + /*.enmVendor = */ CPUMCPUVENDOR_INTEL, + /*.uFamily = */ 6, + /*.uModel = */ 58, + /*.uStepping = */ 9, + /*.enmMicroarch = */ kCpumMicroarch_Intel_Core7_IvyBridge, + /*.uScalableBusFreq = */ CPUM_SBUSFREQ_100MHZ, + /*.fFlags = */ 0, + /*.cMaxPhysAddrWidth= */ 36, + /*.fMxCsrMask = */ 0xffff, + /*.paCpuIdLeaves = */ NULL_ALONE(g_aCpuIdLeaves_Intel_Core_i5_3570), + /*.cCpuIdLeaves = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_Core_i5_3570)), + /*.enmUnknownCpuId = */ CPUMUNKNOWNCPUID_LAST_STD_LEAF_WITH_ECX, + /*.DefUnknownCpuId = */ { 0x00000007, 0x00000340, 0x00000340, 0x00000000 }, + /*.fMsrMask = */ UINT32_MAX, + /*.cMsrRanges = */ ZERO_ALONE(RT_ELEMENTS(g_aMsrRanges_Intel_Core_i5_3570)), + /*.paMsrRanges = */ NULL_ALONE(g_aMsrRanges_Intel_Core_i5_3570), +}; + +#endif /* !VBOX_CPUDB_Intel_Core_i5_3570_h */ + diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_2635QM.h b/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_2635QM.h new file mode 100644 index 00000000..e718b0ea --- /dev/null +++ b/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_2635QM.h @@ -0,0 +1,332 @@ +/* $Id: Intel_Core_i7_2635QM.h $ */ +/** @file + * CPU database entry "Intel Core i7-2635QM". + * Generated at 2014-02-28T18:53:09Z by VBoxCpuReport v4.3.53r92586 on darwin.amd64. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VBOX_CPUDB_Intel_Core_i7_2635QM_h +#define VBOX_CPUDB_Intel_Core_i7_2635QM_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + + +#ifndef CPUM_DB_STANDALONE +/** + * CPUID leaves for Intel(R) Core(TM) i7-2635QM CPU @ 2.00GHz. + */ +static CPUMCPUIDLEAF const g_aCpuIdLeaves_Intel_Core_i7_2635QM[] = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69, 0 }, + { 0x00000001, 0x00000000, 0x00000000, 0x000206a7, 0x04100800, 0x1fbae3bf, 0xbfebfbff, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID | CPUMCPUIDLEAF_F_CONTAINS_APIC }, + { 0x00000002, 0x00000000, 0x00000000, 0x76035a01, 0x00f0b2ff, 0x00000000, 0x00ca0000, 0 }, + { 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000004, 0x00000000, UINT32_MAX, 0x1c004121, 0x01c0003f, 0x0000003f, 0x00000000, 0 }, + { 0x00000004, 0x00000001, UINT32_MAX, 0x1c004122, 0x01c0003f, 0x0000003f, 0x00000000, 0 }, + { 0x00000004, 0x00000002, UINT32_MAX, 0x1c004143, 0x01c0003f, 0x000001ff, 0x00000000, 0 }, + { 0x00000004, 0x00000003, UINT32_MAX, 0x1c03c163, 0x02c0003f, 0x00001fff, 0x00000006, 0 }, + { 0x00000004, 0x00000004, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000005, 0x00000000, 0x00000000, 0x00000040, 0x00000040, 0x00000003, 0x00021120, 0 }, + { 0x00000006, 0x00000000, 0x00000000, 0x00000077, 0x00000002, 0x00000009, 0x00000000, 0 }, + { 0x00000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000009, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000a, 0x00000000, 0x00000000, 0x07300403, 0x00000000, 0x00000000, 0x00000603, 0 }, + /** @todo the b entry here is WRONG! */ + { 0x0000000b, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 | CPUMCPUIDLEAF_F_INTEL_TOPOLOGY_SUBLEAVES | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID }, + { 0x0000000c, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000d, 0x00000000, UINT32_MAX, 0x00000007, 0x00000340, 0x00000340, 0x00000000, 0 }, + { 0x0000000d, 0x00000001, UINT32_MAX, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000d, 0x00000002, UINT32_MAX, 0x00000100, 0x00000240, 0x00000000, 0x00000000, 0 }, + { 0x0000000d, 0x00000003, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000000, 0x00000000, 0x00000000, 0x80000008, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x28100800, 0 }, + { 0x80000002, 0x00000000, 0x00000000, 0x20202020, 0x6e492020, 0x286c6574, 0x43202952, 0 }, + { 0x80000003, 0x00000000, 0x00000000, 0x2865726f, 0x20294d54, 0x322d3769, 0x51353336, 0 }, + { 0x80000004, 0x00000000, 0x00000000, 0x5043204d, 0x20402055, 0x30302e32, 0x007a4847, 0 }, + { 0x80000005, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x01006040, 0x00000000, 0 }, + { 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000100, 0 }, + { 0x80000008, 0x00000000, 0x00000000, 0x00003024, 0x00000000, 0x00000000, 0x00000000, 0 }, +}; +#endif /* !CPUM_DB_STANDALONE */ + + +#ifndef CPUM_DB_STANDALONE +/** + * MSR ranges for Intel(R) Core(TM) i7-2635QM CPU @ 2.00GHz. + */ +static CPUMMSRRANGE const g_aMsrRanges_Intel_Core_i7_2635QM[] = +{ + MFX(0x00000000, "IA32_P5_MC_ADDR", Ia32P5McAddr, Ia32P5McAddr, 0, UINT64_C(0xffffffffffffffe0), 0), /* value=0x1f */ + MFX(0x00000001, "IA32_P5_MC_TYPE", Ia32P5McType, Ia32P5McType, 0, 0, UINT64_MAX), /* value=0x0 */ + MFX(0x00000006, "IA32_MONITOR_FILTER_LINE_SIZE", Ia32MonitorFilterLineSize, Ia32MonitorFilterLineSize, 0, 0, UINT64_C(0xffffffffffff0000)), /* value=0x40 */ + MFN(0x00000010, "IA32_TIME_STAMP_COUNTER", Ia32TimestampCounter, Ia32TimestampCounter), /* value=0x94d`1967512c */ + MFX(0x00000017, "IA32_PLATFORM_ID", Ia32PlatformId, ReadOnly, UINT64_C(0x10000000000000), 0, 0), /* value=0x100000`00000000 */ + MFX(0x0000001b, "IA32_APIC_BASE", Ia32ApicBase, Ia32ApicBase, UINT32_C(0xfee00800), 0, UINT64_C(0xfffffff0000002ff)), + MFX(0x0000002a, "EBL_CR_POWERON", IntelEblCrPowerOn, ReadOnly, 0, 0, 0), /* value=0x0 */ + MVX(0x0000002e, "I7_UNK_0000_002e", 0, 0x400, UINT64_C(0xfffffffffffffbff)), + MVX(0x00000033, "TEST_CTL", 0, 0, UINT64_C(0xffffffff7fffffff)), + MVO(0x00000034, "P6_UNK_0000_0034", 0x5), + MFO(0x00000035, "MSR_CORE_THREAD_COUNT", IntelI7CoreThreadCount), /* value=0x40008 */ + MFO(0x0000003a, "IA32_FEATURE_CONTROL", Ia32FeatureControl), /* value=0x5 */ + MVX(0x0000003e, "I7_UNK_0000_003e", 0, 0, UINT64_C(0xfffffffffffffffe)), + MFN(0x00000079, "IA32_BIOS_UPDT_TRIG", WriteOnly, IgnoreWrite), + MFX(0x0000008b, "BBL_CR_D3|BIOS_SIGN", Ia32BiosSignId, Ia32BiosSignId, 0, 0, UINT32_C(0xfffffffe)), /* value=0x28`00000000 */ + MFO(0x0000009b, "IA32_SMM_MONITOR_CTL", Ia32SmmMonitorCtl), /* value=0x0 */ + RSN(0x000000c1, 0x000000c4, "IA32_PMCn", Ia32PmcN, Ia32PmcN, 0x0, ~(uint64_t)UINT32_MAX, 0), + MFO(0x000000ce, "IA32_PLATFORM_INFO", IntelPlatformInfo), /* value=0x800`60011400 */ + MFX(0x000000e2, "MSR_PKG_CST_CONFIG_CONTROL", IntelPkgCStConfigControl, IntelPkgCStConfigControl, 0, 0, UINT64_C(0xffffffffe1ffffff)), /* value=0x405 */ + MFX(0x000000e4, "MSR_PMG_IO_CAPTURE_BASE", IntelPmgIoCaptureBase, IntelPmgIoCaptureBase, 0, 0, UINT64_C(0xfffffffffff80000)), /* value=0x20414 */ + MFN(0x000000e7, "IA32_MPERF", Ia32MPerf, Ia32MPerf), /* value=0x6a`9190b14b */ + MFN(0x000000e8, "IA32_APERF", Ia32APerf, Ia32APerf), /* value=0x69`df4de05c */ + MFX(0x000000fe, "IA32_MTRRCAP", Ia32MtrrCap, ReadOnly, 0xd0a, 0, 0), /* value=0xd0a */ + MFN(0x00000132, "CPUID1_FEATURE_MASK", IntelCpuId1FeatureMaskEax, IntelCpuId1FeatureMaskEax), /* value=0xffffffff`ffffffff */ + MFN(0x00000133, "CPUIDD_01_FEATURE_MASK", IntelCpuId1FeatureMaskEcdx, IntelCpuId1FeatureMaskEcdx), /* value=0xffffffff`ffffffff */ + MFN(0x00000134, "CPUID80000001_FEATURE_MASK", IntelCpuId80000001FeatureMaskEcdx, IntelCpuId80000001FeatureMaskEcdx), /* value=0xffffffff`ffffffff */ + MFX(0x0000013c, "I7_SB_AES_NI_CTL", IntelI7SandyAesNiCtl, IntelI7SandyAesNiCtl, 0, 0, UINT64_C(0xfffffffffffffffc)), /* value=0x0 */ + MFX(0x00000174, "IA32_SYSENTER_CS", Ia32SysEnterCs, Ia32SysEnterCs, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0xb */ + MFN(0x00000175, "IA32_SYSENTER_ESP", Ia32SysEnterEsp, Ia32SysEnterEsp), /* value=0xffffff80`22904080 */ + MFN(0x00000176, "IA32_SYSENTER_EIP", Ia32SysEnterEip, Ia32SysEnterEip), /* value=0xffffff80`222f3030 */ + MFX(0x00000179, "IA32_MCG_CAP", Ia32McgCap, ReadOnly, 0xc09, 0, 0), /* value=0xc09 */ + MFX(0x0000017a, "IA32_MCG_STATUS", Ia32McgStatus, Ia32McgStatus, 0, 0, UINT64_C(0xfffffffffffffff8)), /* value=0x0 */ + RSN(0x00000186, 0x00000189, "IA32_PERFEVTSELn", Ia32PerfEvtSelN, Ia32PerfEvtSelN, 0x0, 0, UINT64_C(0xffffffff00080000)), + MFX(0x00000194, "CLOCK_FLEX_MAX", IntelFlexRatio, IntelFlexRatio, 0, 0xe0000, UINT64_C(0xfffffffffff00000)), /* value=0x0 */ + MFX(0x00000198, "IA32_PERF_STATUS", Ia32PerfStatus, ReadOnly, UINT64_C(0x1d4d00000e00), 0, 0), /* value=0x1d4d`00000e00 */ + MFX(0x00000199, "IA32_PERF_CTL", Ia32PerfCtl, Ia32PerfCtl, 0x1d00, 0, 0), /* Might bite. value=0x1d00 */ + MFX(0x0000019a, "IA32_CLOCK_MODULATION", Ia32ClockModulation, Ia32ClockModulation, 0, 0, UINT64_C(0xffffffffffffffe0)), /* value=0x0 */ + MFX(0x0000019b, "IA32_THERM_INTERRUPT", Ia32ThermInterrupt, Ia32ThermInterrupt, 0, 0, UINT64_C(0xfffffffffe0000e8)), /* value=0x0 */ + MFX(0x0000019c, "IA32_THERM_STATUS", Ia32ThermStatus, Ia32ThermStatus, UINT32_C(0x883d0000), UINT32_C(0xf87f0fff), UINT64_C(0xffffffff0780f000)), /* value=0x883d0000 */ + MFX(0x0000019d, "IA32_THERM2_CTL", Ia32Therm2Ctl, ReadOnly, 0, 0, 0), /* value=0x0 */ + MFX(0x000001a0, "IA32_MISC_ENABLE", Ia32MiscEnable, Ia32MiscEnable, 0x850089, 0x1080, UINT64_C(0xffffffbbff3aef72)), /* value=0x850089 */ + MFX(0x000001a2, "I7_MSR_TEMPERATURE_TARGET", IntelI7TemperatureTarget, IntelI7TemperatureTarget, 0x640e00, 0xffff00, UINT64_C(0xfffffffff00000ff)), /* value=0x640e00 */ + MVX(0x000001a4, "I7_UNK_0000_01a4", 0, 0, UINT64_C(0xfffffffffffff7f0)), + RSN(0x000001a6, 0x000001a7, "I7_MSR_OFFCORE_RSP_n", IntelI7MsrOffCoreResponseN, IntelI7MsrOffCoreResponseN, 0x0, 0, UINT64_C(0xffffffc000007000)), + MVX(0x000001a8, "I7_UNK_0000_01a8", 0, 0, UINT64_C(0xfffffffffffffffc)), + MFX(0x000001aa, "MSR_MISC_PWR_MGMT", IntelI7MiscPwrMgmt, IntelI7MiscPwrMgmt, 0, 0, UINT64_C(0xffffffffffbffffe)), /* value=0x400001 */ + MFX(0x000001ad, "I7_MSR_TURBO_RATIO_LIMIT", IntelI7TurboRatioLimit, ReadOnly, 0x1a1a1c1d, 0, 0), /* value=0x1a1a1c1d */ + MVX(0x000001b0, "IA32_ENERGY_PERF_BIAS", 0x4, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x000001b1, "IA32_PACKAGE_THERM_STATUS", UINT32_C(0x883a0000), UINT32_C(0xf87f0fff), UINT64_C(0xffffffff0780f000)), + MVX(0x000001b2, "IA32_PACKAGE_THERM_INTERRUPT", 0, 0, UINT64_C(0xfffffffffe0000e8)), + MVO(0x000001c6, "I7_UNK_0000_01c6", 0x3), + MFX(0x000001c8, "MSR_LBR_SELECT", IntelI7LbrSelect, IntelI7LbrSelect, 0, 0, UINT64_C(0xfffffffffffffe00)), /* value=0x0 */ + MFX(0x000001c9, "MSR_LASTBRANCH_TOS", IntelLastBranchTos, IntelLastBranchTos, 0, 0, UINT64_C(0xfffffffffffffff0)), /* value=0xc */ + MFX(0x000001d9, "IA32_DEBUGCTL", Ia32DebugCtl, Ia32DebugCtl, 0, 0, UINT64_C(0xffffffffffff803c)), /* value=0x0 */ + MFO(0x000001db, "P6_LAST_BRANCH_FROM_IP", P6LastBranchFromIp), /* value=0x7fffff7f`a4a6e188 */ + MFO(0x000001dc, "P6_LAST_BRANCH_TO_IP", P6LastBranchToIp), /* value=0xffffff80`222d5ad0 */ + MFN(0x000001dd, "P6_LAST_INT_FROM_IP", P6LastIntFromIp, P6LastIntFromIp), /* value=0x0 */ + MFN(0x000001de, "P6_LAST_INT_TO_IP", P6LastIntToIp, P6LastIntToIp), /* value=0x0 */ + MVO(0x000001e1, "I7_SB_UNK_0000_01e1", 0x2), + MFO(0x000001f0, "I7_VLW_CAPABILITY", IntelI7VirtualLegacyWireCap), /* value=0x74 */ + MFO(0x000001f2, "IA32_SMRR_PHYSBASE", Ia32SmrrPhysBase), /* value=0x0 */ + MFO(0x000001f3, "IA32_SMRR_PHYSMASK", Ia32SmrrPhysMask), /* value=0x0 */ + MFX(0x000001fc, "I7_MSR_POWER_CTL", IntelI7PowerCtl, IntelI7PowerCtl, 0, 0x20, UINT64_C(0xfffffffffff20000)), /* value=0x4005f */ + MFX(0x00000200, "IA32_MTRR_PHYS_BASE0", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x0, 0, UINT64_C(0xfffffff000000ff8)), /* value=0xc0000000 */ + MFX(0x00000201, "IA32_MTRR_PHYS_MASK0", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x0, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`c0000800 */ + MFX(0x00000202, "IA32_MTRR_PHYS_BASE1", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x1, 0, UINT64_C(0xfffffff000000ff8)), /* value=0xa0000000 */ + MFX(0x00000203, "IA32_MTRR_PHYS_MASK1", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x1, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`e0000800 */ + MFX(0x00000204, "IA32_MTRR_PHYS_BASE2", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x2, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x90000000 */ + MFX(0x00000205, "IA32_MTRR_PHYS_MASK2", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x2, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`f0000800 */ + MFX(0x00000206, "IA32_MTRR_PHYS_BASE3", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x3, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x8c000000 */ + MFX(0x00000207, "IA32_MTRR_PHYS_MASK3", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x3, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`fc000800 */ + MFX(0x00000208, "IA32_MTRR_PHYS_BASE4", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x4, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x8b800000 */ + MFX(0x00000209, "IA32_MTRR_PHYS_MASK4", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x4, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`ff800800 */ + MFX(0x0000020a, "IA32_MTRR_PHYS_BASE5", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x5, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x0000020b, "IA32_MTRR_PHYS_MASK5", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x5, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFX(0x0000020c, "IA32_MTRR_PHYS_BASE6", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x6, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x0000020d, "IA32_MTRR_PHYS_MASK6", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x6, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFX(0x0000020e, "IA32_MTRR_PHYS_BASE7", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x7, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x0000020f, "IA32_MTRR_PHYS_MASK7", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x7, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFX(0x00000210, "IA32_MTRR_PHYS_BASE8", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x8, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x00000211, "IA32_MTRR_PHYS_MASK8", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x8, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFX(0x00000212, "IA32_MTRR_PHYS_BASE9", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x9, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x00000213, "IA32_MTRR_PHYS_MASK9", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x9, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFS(0x00000250, "IA32_MTRR_FIX64K_00000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix64K_00000), + MFS(0x00000258, "IA32_MTRR_FIX16K_80000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_80000), + MFS(0x00000259, "IA32_MTRR_FIX16K_A0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_A0000), + MFS(0x00000268, "IA32_MTRR_FIX4K_C0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C0000), + MFS(0x00000269, "IA32_MTRR_FIX4K_C8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C8000), + MFS(0x0000026a, "IA32_MTRR_FIX4K_D0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D0000), + MFS(0x0000026b, "IA32_MTRR_FIX4K_D8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D8000), + MFS(0x0000026c, "IA32_MTRR_FIX4K_E0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E0000), + MFS(0x0000026d, "IA32_MTRR_FIX4K_E8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E8000), + MFS(0x0000026e, "IA32_MTRR_FIX4K_F0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F0000), + MFS(0x0000026f, "IA32_MTRR_FIX4K_F8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F8000), + MFS(0x00000277, "IA32_PAT", Ia32Pat, Ia32Pat, Guest.msrPAT), + RSN(0x00000280, 0x00000281, "IA32_MC0_CTLn", Ia32McNCtl2, Ia32McNCtl2, 0x0, 0, UINT64_C(0xffffffffbfff8000)), + MFX(0x00000282, "IA32_MC2_CTL2", Ia32McNCtl2, Ia32McNCtl2, 0x2, 0x40007fff, UINT64_C(0xffffffffbfff8000)), /* value=0x0 */ + MFX(0x00000283, "IA32_MC3_CTL2", Ia32McNCtl2, Ia32McNCtl2, 0x3, 0, UINT64_C(0xffffffffbfff8000)), /* value=0x0 */ + MFX(0x00000284, "IA32_MC4_CTL2", Ia32McNCtl2, Ia32McNCtl2, 0x4, 0x40007fff, UINT64_C(0xffffffffbfff8000)), /* value=0x0 */ + RSN(0x00000285, 0x00000288, "IA32_MC5_CTLn", Ia32McNCtl2, Ia32McNCtl2, 0x5, 0, UINT64_C(0xffffffffbfff8000)), + MVX(0x000002e0, "I7_SB_NO_EVICT_MODE", 0, 0, UINT64_C(0xfffffffffffffffc)), + MFN(0x000002e6, "I7_IB_UNK_0000_02e6", WriteOnly, IgnoreWrite), + MVX(0x000002e7, "I7_IB_UNK_0000_02e7", 0x1, 0x1, UINT64_C(0xfffffffffffffffe)), + MFZ(0x000002ff, "IA32_MTRR_DEF_TYPE", Ia32MtrrDefType, Ia32MtrrDefType, GuestMsrs.msr.MtrrDefType, 0, UINT64_C(0xfffffffffffff3f8)), + MVO(0x00000305, "I7_SB_UNK_0000_0305", 0), + RSN(0x00000309, 0x0000030b, "IA32_FIXED_CTRn", Ia32FixedCtrN, Ia32FixedCtrN, 0x0, 0, UINT64_C(0xffff000000000000)), + MFX(0x00000345, "IA32_PERF_CAPABILITIES", Ia32PerfCapabilities, ReadOnly, 0x31c3, 0, 0), /* value=0x31c3 */ + MFX(0x0000038d, "IA32_FIXED_CTR_CTRL", Ia32FixedCtrCtrl, Ia32FixedCtrCtrl, 0, 0, UINT64_C(0xfffffffffffff000)), /* value=0x0 */ + MFX(0x0000038e, "IA32_PERF_GLOBAL_STATUS", Ia32PerfGlobalStatus, ReadOnly, 0, 0, 0), /* value=0x0 */ + MFX(0x0000038f, "IA32_PERF_GLOBAL_CTRL", Ia32PerfGlobalCtrl, Ia32PerfGlobalCtrl, 0, 0, UINT64_C(0xfffffff8fffffff0)), /* value=0xf */ + MFX(0x00000390, "IA32_PERF_GLOBAL_OVF_CTRL", Ia32PerfGlobalOvfCtrl, Ia32PerfGlobalOvfCtrl, 0, UINT64_C(0xe00000070000000f), UINT64_C(0x1ffffff8fffffff0)), /* value=0x0 */ + MFX(0x00000391, "I7_UNC_PERF_GLOBAL_CTRL", IntelI7UncPerfGlobalCtrl, IntelI7UncPerfGlobalCtrl, 0, 0, UINT64_C(0xffffffff1fffffe0)), /* value=0x0 */ + MFX(0x00000392, "I7_UNC_PERF_GLOBAL_STATUS", IntelI7UncPerfGlobalStatus, IntelI7UncPerfGlobalStatus, 0, 0xf, UINT64_C(0xfffffffffffffff0)), /* value=0x0 */ + MFX(0x00000393, "I7_UNC_PERF_GLOBAL_OVF_CTRL", IntelI7UncPerfGlobalOvfCtrl, IntelI7UncPerfGlobalOvfCtrl, 0, 0x3, UINT64_C(0xfffffffffffffffc)), /* value=0x0 */ + MFX(0x00000394, "I7_UNC_PERF_FIXED_CTR_CTRL", IntelI7UncPerfFixedCtrCtrl, IntelI7UncPerfFixedCtrCtrl, 0, 0, UINT64_C(0xffffffffffafffff)), /* value=0x0 */ + MFX(0x00000395, "I7_UNC_PERF_FIXED_CTR", IntelI7UncPerfFixedCtr, IntelI7UncPerfFixedCtr, 0, 0, UINT64_C(0xffff000000000000)), /* value=0x0 */ + MFO(0x00000396, "I7_UNC_CBO_CONFIG", IntelI7UncCBoxConfig), /* value=0x5 */ + MVX(0x00000397, "I7_SB_UNK_0000_0397", 0, 0, UINT64_C(0xfffffffffffffff0)), + MFX(0x000003b0, "I7_UNC_ARB_PERF_CTR0", IntelI7UncArbPerfCtrN, IntelI7UncArbPerfCtrN, 0, 0, UINT64_C(0xfffff00000000000)), /* value=0x0 */ + MFX(0x000003b1, "I7_UNC_ARB_PERF_CTR1", IntelI7UncArbPerfCtrN, IntelI7UncArbPerfCtrN, 0, 0, UINT64_C(0xfffff00000000000)), /* value=0x0 */ + MFX(0x000003b2, "I7_UNC_ARB_PERF_EVT_SEL0", IntelI7UncArbPerfEvtSelN, IntelI7UncArbPerfEvtSelN, 0, 0, UINT64_C(0xffffffffe0230000)), /* value=0x0 */ + MFX(0x000003b3, "I7_UNC_ARB_PERF_EVT_SEL1", IntelI7UncArbPerfEvtSelN, IntelI7UncArbPerfEvtSelN, 0, 0, UINT64_C(0xffffffffe0230000)), /* value=0x0 */ + MFX(0x000003f1, "IA32_PEBS_ENABLE", Ia32PebsEnable, Ia32PebsEnable, 0, 0, UINT64_C(0x7ffffff0fffffff0)), /* value=0x0 */ + MFX(0x000003f6, "I7_MSR_PEBS_LD_LAT", IntelI7PebsLdLat, IntelI7PebsLdLat, 0, UINT64_C(0xffffffffffff0000), 0), /* value=0xffff */ + MFX(0x000003f8, "I7_MSR_PKG_C3_RESIDENCY", IntelI7PkgCnResidencyN, ReadOnly, 0x3, 0, UINT64_MAX), /* value=0x0 */ + RSN(0x000003f9, 0x000003fa, "I7_MSR_PKG_Cn_RESIDENCY", IntelI7PkgCnResidencyN, ReadOnly, 0x6, 0, UINT64_MAX), + MFX(0x000003fc, "I7_MSR_CORE_C3_RESIDENCY", IntelI7CoreCnResidencyN, ReadOnly, 0x3, 0, UINT64_MAX), /* value=0x278ad50 */ + RSN(0x000003fd, 0x000003fe, "I7_MSR_CORE_Cn_RESIDENCY", IntelI7CoreCnResidencyN, ReadOnly, 0x6, 0, UINT64_MAX), + RFN(0x00000400, 0x00000423, "IA32_MCi_CTL_STATUS_ADDR_MISC", Ia32McCtlStatusAddrMiscN, Ia32McCtlStatusAddrMiscN), + MFX(0x00000480, "IA32_VMX_BASIC", Ia32VmxBasic, ReadOnly, UINT64_C(0xda040000000010), 0, 0), /* value=0xda0400`00000010 */ + MFX(0x00000481, "IA32_VMX_PINBASED_CTLS", Ia32VmxPinbasedCtls, ReadOnly, UINT64_C(0x7f00000016), 0, 0), /* value=0x7f`00000016 */ + MFX(0x00000482, "IA32_VMX_PROCBASED_CTLS", Ia32VmxProcbasedCtls, ReadOnly, UINT64_C(0xfff9fffe0401e172), 0, 0), /* value=0xfff9fffe`0401e172 */ + MFX(0x00000483, "IA32_VMX_EXIT_CTLS", Ia32VmxExitCtls, ReadOnly, UINT64_C(0x7fffff00036dff), 0, 0), /* value=0x7fffff`00036dff */ + MFX(0x00000484, "IA32_VMX_ENTRY_CTLS", Ia32VmxEntryCtls, ReadOnly, UINT64_C(0xffff000011ff), 0, 0), /* value=0xffff`000011ff */ + MFX(0x00000485, "IA32_VMX_MISC", Ia32VmxMisc, ReadOnly, 0x100401e5, 0, 0), /* value=0x100401e5 */ + MFX(0x00000486, "IA32_VMX_CR0_FIXED0", Ia32VmxCr0Fixed0, ReadOnly, UINT32_C(0x80000021), 0, 0), /* value=0x80000021 */ + MFX(0x00000487, "IA32_VMX_CR0_FIXED1", Ia32VmxCr0Fixed1, ReadOnly, UINT32_MAX, 0, 0), /* value=0xffffffff */ + MFX(0x00000488, "IA32_VMX_CR4_FIXED0", Ia32VmxCr4Fixed0, ReadOnly, 0x2000, 0, 0), /* value=0x2000 */ + MFX(0x00000489, "IA32_VMX_CR4_FIXED1", Ia32VmxCr4Fixed1, ReadOnly, 0x627ff, 0, 0), /* value=0x627ff */ + MFX(0x0000048a, "IA32_VMX_VMCS_ENUM", Ia32VmxVmcsEnum, ReadOnly, 0x2a, 0, 0), /* value=0x2a */ + MFX(0x0000048b, "IA32_VMX_PROCBASED_CTLS2", Ia32VmxProcBasedCtls2, ReadOnly, UINT64_C(0xff00000000), 0, 0), /* value=0xff`00000000 */ + MFX(0x0000048c, "IA32_VMX_EPT_VPID_CAP", Ia32VmxEptVpidCap, ReadOnly, UINT64_C(0xf0106114141), 0, 0), /* value=0xf01`06114141 */ + MFX(0x0000048d, "IA32_VMX_TRUE_PINBASED_CTLS", Ia32VmxTruePinbasedCtls, ReadOnly, UINT64_C(0x7f00000016), 0, 0), /* value=0x7f`00000016 */ + MFX(0x0000048e, "IA32_VMX_TRUE_PROCBASED_CTLS", Ia32VmxTrueProcbasedCtls, ReadOnly, UINT64_C(0xfff9fffe04006172), 0, 0), /* value=0xfff9fffe`04006172 */ + MFX(0x0000048f, "IA32_VMX_TRUE_EXIT_CTLS", Ia32VmxTrueExitCtls, ReadOnly, UINT64_C(0x7fffff00036dfb), 0, 0), /* value=0x7fffff`00036dfb */ + MFX(0x00000490, "IA32_VMX_TRUE_ENTRY_CTLS", Ia32VmxTrueEntryCtls, ReadOnly, UINT64_C(0xffff000011fb), 0, 0), /* value=0xffff`000011fb */ + RSN(0x000004c1, 0x000004c4, "IA32_A_PMCn", Ia32PmcN, Ia32PmcN, 0x0, 0, UINT64_C(0xffff000000000000)), + MVO(0x00000502, "I7_SB_UNK_0000_0502", 0), + MFN(0x00000600, "IA32_DS_AREA", Ia32DsArea, Ia32DsArea), /* value=0x0 */ + MFX(0x00000601, "I7_SB_MSR_VR_CURRENT_CONFIG", IntelI7SandyVrCurrentConfig, IntelI7SandyVrCurrentConfig, 0, UINT32_C(0x80001fff), 0x7fffe000), /* value=0x18141494`8000030c */ + MVX(0x00000602, "I7_IB_UNK_0000_0602", UINT64_C(0x1814149480000104), UINT32_C(0x80001fff), 0x7fffe000), + MFX(0x00000603, "I7_SB_MSR_VR_MISC_CONFIG", IntelI7SandyVrMiscConfig, IntelI7SandyVrMiscConfig, 0, UINT32_C(0x80ffffff), UINT64_C(0xffffffff7f000000)), /* value=0x80303030 */ + MVX(0x00000604, "I7_IB_UNK_0000_0602", UINT32_C(0x80646464), UINT32_C(0x80ffffff), UINT64_C(0xffffffff7f000000)), + MFO(0x00000606, "I7_SB_MSR_RAPL_POWER_UNIT", IntelI7SandyRaplPowerUnit), /* value=0xa1003 */ + MVX(0x00000609, "I7_SB_UNK_0000_0609", 0, 0, UINT64_C(0xffffffffffffff00)), + MFX(0x0000060a, "I7_SB_MSR_PKGC3_IRTL", IntelI7SandyPkgCnIrtlN, IntelI7SandyPkgCnIrtlN, 0x3, 0, UINT64_C(0xffffffffffff6000)), /* value=0x8c02 */ + RSN(0x0000060b, 0x0000060c, "I7_SB_MSR_PKGC6_IRTn", IntelI7SandyPkgCnIrtlN, IntelI7SandyPkgCnIrtlN, 0x6, 0, UINT64_C(0xffffffffffff6000)), + MFO(0x0000060d, "I7_SB_MSR_PKG_C2_RESIDENCY", IntelI7SandyPkgC2Residency), /* value=0x11`06f311d4 */ + MFX(0x00000610, "I7_SB_MSR_PKG_POWER_LIMIT", IntelI7RaplPkgPowerLimit, IntelI7RaplPkgPowerLimit, 0, UINT64_C(0x80ffffff00ffffff), UINT64_C(0x7f000000ff000000)), /* value=0x800001c2`00dc8168 */ + MFO(0x00000611, "I7_SB_MSR_PKG_ENERGY_STATUS", IntelI7RaplPkgEnergyStatus), /* value=0x55a9ec99 */ + MFO(0x00000614, "I7_SB_MSR_PKG_POWER_INFO", IntelI7RaplPkgPowerInfo), /* value=0x100240`01200168 */ + MFX(0x00000638, "I7_SB_MSR_PP0_POWER_LIMIT", IntelI7RaplPp0PowerLimit, IntelI7RaplPp0PowerLimit, 0, UINT32_C(0x80ffffff), UINT64_C(0xffffffff7f000000)), /* value=0x80000000 */ + MFO(0x00000639, "I7_SB_MSR_PP0_ENERGY_STATUS", IntelI7RaplPp0EnergyStatus), /* value=0x1dcdc9a0 */ + MFX(0x0000063a, "I7_SB_MSR_PP0_POLICY", IntelI7RaplPp0Policy, IntelI7RaplPp0Policy, 0, 0, UINT64_C(0xffffffffffffffe0)), /* value=0x0 */ + MFX(0x00000640, "I7_HW_MSR_PP0_POWER_LIMIT", IntelI7RaplPp1PowerLimit, IntelI7RaplPp1PowerLimit, 0, UINT32_C(0x80ffffff), UINT64_C(0xffffffff7f000000)), /* value=0x80000000 */ + MFO(0x00000641, "I7_HW_MSR_PP0_ENERGY_STATUS", IntelI7RaplPp1EnergyStatus), /* value=0x39748b6 */ + MFX(0x00000642, "I7_HW_MSR_PP0_POLICY", IntelI7RaplPp1Policy, IntelI7RaplPp1Policy, 0, 0, UINT64_C(0xffffffffffffffe0)), /* value=0x10 */ + RFN(0x00000680, 0x0000068f, "MSR_LASTBRANCH_n_FROM_IP", IntelLastBranchFromN, IntelLastBranchFromN), + RFN(0x000006c0, 0x000006cf, "MSR_LASTBRANCH_n_TO_IP", IntelLastBranchToN, IntelLastBranchToN), + MFX(0x000006e0, "IA32_TSC_DEADLINE", Ia32TscDeadline, Ia32TscDeadline, 0, UINT64_C(0x1000000018), 0), /* value=0x94d`402e841f */ + MVX(0x00000700, "MSR_UNC_CBO_0_PERFEVTSEL0", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000701, "MSR_UNC_CBO_0_PERFEVTSEL1", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000702, "MSR_UNC_CBO_0_PERFEVTSEL2?", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000703, "MSR_UNC_CBO_0_PERFEVTSEL3?", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000704, "MSR_UNC_CBO_0_UNK_4", 0, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000705, "MSR_UNC_CBO_0_UNK_5", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000706, "MSR_UNC_CBO_0_PER_CTR0", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000707, "MSR_UNC_CBO_0_PER_CTR1", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000708, "MSR_UNC_CBO_0_PER_CTR2?", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000709, "MSR_UNC_CBO_0_PER_CTR3?", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000710, "MSR_UNC_CBO_1_PERFEVTSEL0", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000711, "MSR_UNC_CBO_1_PERFEVTSEL1", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000712, "MSR_UNC_CBO_1_PERFEVTSEL2?", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000713, "MSR_UNC_CBO_1_PERFEVTSEL3?", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000714, "MSR_UNC_CBO_1_UNK_4", 0, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000715, "MSR_UNC_CBO_1_UNK_5", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000716, "MSR_UNC_CBO_1_PER_CTR0", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000717, "MSR_UNC_CBO_1_PER_CTR1", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000718, "MSR_UNC_CBO_1_PER_CTR2?", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000719, "MSR_UNC_CBO_1_PER_CTR3?", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000720, "MSR_UNC_CBO_2_PERFEVTSEL0", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000721, "MSR_UNC_CBO_2_PERFEVTSEL1", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000722, "MSR_UNC_CBO_2_PERFEVTSEL2?", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000723, "MSR_UNC_CBO_2_PERFEVTSEL3?", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000724, "MSR_UNC_CBO_2_UNK_4", 0, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000725, "MSR_UNC_CBO_2_UNK_5", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000726, "MSR_UNC_CBO_2_PER_CTR0", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000727, "MSR_UNC_CBO_2_PER_CTR1", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000728, "MSR_UNC_CBO_2_PER_CTR2?", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000729, "MSR_UNC_CBO_2_PER_CTR3?", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000730, "MSR_UNC_CBO_3_PERFEVTSEL0", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000731, "MSR_UNC_CBO_3_PERFEVTSEL1", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000732, "MSR_UNC_CBO_3_PERFEVTSEL2?", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000733, "MSR_UNC_CBO_3_PERFEVTSEL3?", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000734, "MSR_UNC_CBO_3_UNK_4", 0, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000735, "MSR_UNC_CBO_3_UNK_5", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000736, "MSR_UNC_CBO_3_PER_CTR0", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000737, "MSR_UNC_CBO_3_PER_CTR1", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000738, "MSR_UNC_CBO_3_PER_CTR2?", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000739, "MSR_UNC_CBO_3_PER_CTR3?", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000740, "MSR_UNC_CBO_4_PERFEVTSEL0?", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000741, "MSR_UNC_CBO_4_PERFEVTSEL1?", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000742, "MSR_UNC_CBO_4_PERFEVTSEL2?", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000743, "MSR_UNC_CBO_4_PERFEVTSEL3?", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000744, "MSR_UNC_CBO_4_UNK_4", 0, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000745, "MSR_UNC_CBO_4_UNK_5", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000746, "MSR_UNC_CBO_4_PER_CTR0?", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000747, "MSR_UNC_CBO_4_PER_CTR1?", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000748, "MSR_UNC_CBO_4_PER_CTR2?", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000749, "MSR_UNC_CBO_4_PER_CTR3?", 0, 0, UINT64_C(0xfffff00000000000)), + MFX(0xc0000080, "AMD64_EFER", Amd64Efer, Amd64Efer, 0xd01, 0x400, UINT64_C(0xfffffffffffff2fe)), + MFN(0xc0000081, "AMD64_STAR", Amd64SyscallTarget, Amd64SyscallTarget), /* value=0x1b0008`00000000 */ + MFN(0xc0000082, "AMD64_STAR64", Amd64LongSyscallTarget, Amd64LongSyscallTarget), /* value=0xffffff80`222f2fd0 */ + MFN(0xc0000083, "AMD64_STARCOMPAT", Amd64CompSyscallTarget, Amd64CompSyscallTarget), /* value=0x0 */ + MFX(0xc0000084, "AMD64_SYSCALL_FLAG_MASK", Amd64SyscallFlagMask, Amd64SyscallFlagMask, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x4700 */ + MFN(0xc0000100, "AMD64_FS_BASE", Amd64FsBase, Amd64FsBase), /* value=0x0 */ + MFN(0xc0000101, "AMD64_GS_BASE", Amd64GsBase, Amd64GsBase), /* value=0xffffff81`0500f000 */ + MFN(0xc0000102, "AMD64_KERNEL_GS_BASE", Amd64KernelGsBase, Amd64KernelGsBase), /* value=0x7fff`7b14d3f0 */ + MFX(0xc0000103, "AMD64_TSC_AUX", Amd64TscAux, Amd64TscAux, 0, 0, ~(uint64_t)UINT32_MAX), /* value=0x0 */ +}; +#endif /* !CPUM_DB_STANDALONE */ + + +/** + * Database entry for Intel(R) Core(TM) i7-2635QM CPU @ 2.00GHz. + */ +static CPUMDBENTRY const g_Entry_Intel_Core_i7_2635QM = +{ + /*.pszName = */ "Intel Core i7-2635QM", + /*.pszFullName = */ "Intel(R) Core(TM) i7-2635QM CPU @ 2.00GHz", + /*.enmVendor = */ CPUMCPUVENDOR_INTEL, + /*.uFamily = */ 6, + /*.uModel = */ 42, + /*.uStepping = */ 7, + /*.enmMicroarch = */ kCpumMicroarch_Intel_Core7_SandyBridge, + /*.uScalableBusFreq = */ CPUM_SBUSFREQ_100MHZ, + /*.fFlags = */ 0, + /*.cMaxPhysAddrWidth= */ 36, + /*.fMxCsrMask = */ 0xffff, + /*.paCpuIdLeaves = */ NULL_ALONE(g_aCpuIdLeaves_Intel_Core_i7_2635QM), + /*.cCpuIdLeaves = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_Core_i7_2635QM)), + /*.enmUnknownCpuId = */ CPUMUNKNOWNCPUID_LAST_STD_LEAF_WITH_ECX, + /*.DefUnknownCpuId = */ { 0x00000007, 0x00000340, 0x00000340, 0x00000000 }, + /*.fMsrMask = */ UINT32_MAX, + /*.cMsrRanges = */ ZERO_ALONE(RT_ELEMENTS(g_aMsrRanges_Intel_Core_i7_2635QM)), + /*.paMsrRanges = */ NULL_ALONE(g_aMsrRanges_Intel_Core_i7_2635QM), +}; + +#endif /* !VBOX_CPUDB_Intel_Core_i7_2635QM_h */ + diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_3820QM.h b/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_3820QM.h new file mode 100644 index 00000000..b9df82ee --- /dev/null +++ b/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_3820QM.h @@ -0,0 +1,386 @@ +/* $Id: Intel_Core_i7_3820QM.h $ */ +/** @file + * CPU database entry "Intel Core i7-3820QM". + * Generated at 2013-12-04T12:54:32Z by VBoxCpuReport v4.3.51r91071 on darwin.amd64. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VBOX_CPUDB_Intel_Core_i7_3820QM_h +#define VBOX_CPUDB_Intel_Core_i7_3820QM_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + + +#ifndef CPUM_DB_STANDALONE +/** + * CPUID leaves for Intel(R) Core(TM) i7-3820QM CPU @ 2.70GHz. + */ +static CPUMCPUIDLEAF const g_aCpuIdLeaves_Intel_Core_i7_3820QM[] = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69, 0 }, + { 0x00000001, 0x00000000, 0x00000000, 0x000306a9, 0x02100800, 0x7fbae3ff, 0xbfebfbff, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID | CPUMCPUIDLEAF_F_CONTAINS_APIC }, + { 0x00000002, 0x00000000, 0x00000000, 0x76035a01, 0x00f0b2ff, 0x00000000, 0x00ca0000, 0 }, + { 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000004, 0x00000000, 0x00000000, 0x1c004121, 0x01c0003f, 0x0000003f, 0x00000000, 0 }, + { 0x00000005, 0x00000000, 0x00000000, 0x00000040, 0x00000040, 0x00000003, 0x00021120, 0 }, + { 0x00000006, 0x00000000, 0x00000000, 0x00000077, 0x00000002, 0x00000009, 0x00000000, 0 }, + { 0x00000007, 0x00000000, 0x00000000, 0x00000000, 0x00000281, 0x00000000, 0x00000000, 0 }, + { 0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000009, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000a, 0x00000000, 0x00000000, 0x07300403, 0x00000000, 0x00000000, 0x00000603, 0 }, + { 0x0000000b, 0x00000000, 0x00000000, 0x00000001, 0x00000002, 0x00000100, 0x00000002, 0 | CPUMCPUIDLEAF_F_INTEL_TOPOLOGY_SUBLEAVES | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID }, + { 0x0000000c, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000d, 0x00000000, 0x00000000, 0x00000007, 0x00000340, 0x00000340, 0x00000000, 0 }, + { 0x80000000, 0x00000000, 0x00000000, 0x80000008, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x28100800, 0 }, + { 0x80000002, 0x00000000, 0x00000000, 0x20202020, 0x6e492020, 0x286c6574, 0x43202952, 0 }, + { 0x80000003, 0x00000000, 0x00000000, 0x2865726f, 0x20294d54, 0x332d3769, 0x51303238, 0 }, + { 0x80000004, 0x00000000, 0x00000000, 0x5043204d, 0x20402055, 0x30372e32, 0x007a4847, 0 }, + { 0x80000005, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x01006040, 0x00000000, 0 }, + { 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000100, 0 }, + { 0x80000008, 0x00000000, 0x00000000, 0x00003024, 0x00000000, 0x00000000, 0x00000000, 0 }, +}; +#endif /* !CPUM_DB_STANDALONE */ + + +#ifndef CPUM_DB_STANDALONE +/** + * MSR ranges for Intel(R) Core(TM) i7-3820QM CPU @ 2.70GHz. + */ +static CPUMMSRRANGE const g_aMsrRanges_Intel_Core_i7_3820QM[] = +{ + MFX(0x00000000, "IA32_P5_MC_ADDR", Ia32P5McAddr, Ia32P5McAddr, 0, UINT64_C(0xffffffffffffffe0), 0), /* value=0x1f */ + MFX(0x00000001, "IA32_P5_MC_TYPE", Ia32P5McType, Ia32P5McType, 0, 0, UINT64_MAX), /* value=0x0 */ + MFX(0x00000006, "IA32_MONITOR_FILTER_LINE_SIZE", Ia32MonitorFilterLineSize, Ia32MonitorFilterLineSize, 0, 0, UINT64_C(0xffffffffffff0000)), /* value=0x40 */ + MFX(0x00000010, "IA32_TIME_STAMP_COUNTER", Ia32TimestampCounter, Ia32TimestampCounter, 0, 0, 0), + MFV(0x00000017, "IA32_PLATFORM_ID", Ia32PlatformId, ReadOnly, UINT64_C(0x10000000000000)), + MFX(0x0000001b, "IA32_APIC_BASE", Ia32ApicBase, Ia32ApicBase, UINT32_C(0xfee00900), 0, UINT64_C(0xfffffff0000002ff)), + MFX(0x0000002a, "EBL_CR_POWERON", IntelEblCrPowerOn, ReadOnly, 0, 0, 0), /* value=0x0 */ + MVX(0x0000002e, "I7_UNK_0000_002e", 0, 0x400, UINT64_C(0xfffffffffffffbff)), + MVX(0x00000033, "TEST_CTL", 0, 0, UINT64_C(0xffffffff7fffffff)), + MVO(0x00000034, "P6_UNK_0000_0034", 0xe), + MFO(0x00000035, "MSR_CORE_THREAD_COUNT", IntelI7CoreThreadCount), /* value=0x40008*/ + MVO(0x00000036, "I7_UNK_0000_0036", 0x6c405eec), + MFO(0x0000003a, "IA32_FEATURE_CONTROL", Ia32FeatureControl), /* value=0xff07 */ + MVX(0x0000003e, "I7_UNK_0000_003e", 0, 0, UINT64_C(0xfffffffffffffffe)), + MFN(0x00000079, "IA32_BIOS_UPDT_TRIG", WriteOnly, Ia32BiosUpdateTrigger), + MVX(0x0000008b, "BBL_CR_D3|BIOS_SIGN", UINT64_C(0x1500000000), 0x1, UINT32_C(0xfffffffe)), + MFO(0x0000009b, "IA32_SMM_MONITOR_CTL", Ia32SmmMonitorCtl), /* value=0x0 */ + MFX(0x000000c1, "IA32_PMC0", Ia32PmcN, Ia32PmcN, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0x000000c2, "IA32_PMC1", Ia32PmcN, Ia32PmcN, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0x000000c3, "IA32_PMC2", Ia32PmcN, Ia32PmcN, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0x000000c4, "IA32_PMC3", Ia32PmcN, Ia32PmcN, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MVO(0x000000ce, "P6_UNK_0000_00ce", UINT64_C(0x80c10f0011b00)), + MFX(0x000000e2, "MSR_PKG_CST_CONFIG_CONTROL", IntelPkgCStConfigControl, IntelPkgCStConfigControl, 0, 0, UINT64_C(0xffffffffe1fffbf8)), /* value=0x8405 */ + MFX(0x000000e4, "MSR_PMG_IO_CAPTURE_BASE", IntelPmgIoCaptureBase, IntelPmgIoCaptureBase, 0, 0, UINT64_C(0xfffffffffff80000)), /* value=0x20414 */ + MFX(0x000000e7, "IA32_MPERF", Ia32MPerf, Ia32MPerf, 0, 0x47810, 0), /* value=0x6b`5d075e9c */ + MFX(0x000000e8, "IA32_APERF", Ia32APerf, Ia32APerf, 0, 0x1121880, 0), /* value=0x55`2bec768b */ + MFX(0x000000fe, "IA32_MTRRCAP", Ia32MtrrCap, ReadOnly, 0xd0a, 0, 0), /* value=0xd0a */ + MVX(0x00000102, "I7_IVY_UNK_0000_0102", 0, 0, UINT64_C(0xffffffff7fff8000)), + MVX(0x00000103, "I7_IVY_UNK_0000_0103", 0, 0, UINT64_C(0xffffffffffffff00)), + MVX(0x00000104, "I7_IVY_UNK_0000_0104", 0, 0, UINT64_C(0xfffffffffffffffe)), + MVX(0x00000132, "I7_UNK_0000_0132", UINT64_MAX, 0, 0), + MVX(0x00000133, "I7_UNK_0000_0133", UINT64_MAX, 0, 0), + MVX(0x00000134, "I7_UNK_0000_0134", UINT64_MAX, 0, 0), + MVO(0x0000013c, "TODO_0000_013c", 0x1), + MVX(0x00000140, "I7_IVY_UNK_0000_0140", 0, 0, UINT64_C(0xfffffffffffffffe)), + MVX(0x00000142, "I7_IVY_UNK_0000_0142", 0, 0, UINT64_C(0xfffffffffffffffc)), + MFX(0x00000174, "IA32_SYSENTER_CS", Ia32SysEnterCs, Ia32SysEnterCs, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0xb */ + MFN(0x00000175, "IA32_SYSENTER_ESP", Ia32SysEnterEsp, Ia32SysEnterEsp), /* value=0xffffff80`21af5080 */ + MFN(0x00000176, "IA32_SYSENTER_EIP", Ia32SysEnterEip, Ia32SysEnterEip), /* value=0xffffff80`214ce720 */ + MFX(0x00000179, "IA32_MCG_CAP", Ia32McgCap, ReadOnly, 0xc09, 0, 0), /* value=0xc09 */ + MFX(0x0000017a, "IA32_MCG_STATUS", Ia32McgStatus, Ia32McgStatus, 0, 0, UINT64_C(0xfffffffffffffff8)), /* value=0x0 */ + RSN(0x00000186, 0x00000189, "IA32_PERFEVTSELn", Ia32PerfEvtSelN, Ia32PerfEvtSelN, 0, 0, UINT64_C(0xffffffff00080000)), + MVX(0x00000194, "CLOCK_FLEX_MAX", 0x180000, 0x1e00ff, UINT64_C(0xffffffffffe00000)), + MFX(0x00000198, "IA32_PERF_STATUS", Ia32PerfStatus, ReadOnly, UINT64_C(0x240700002400), 0, 0), /* value=0x2407`00002400 */ + MFX(0x00000199, "IA32_PERF_CTL", Ia32PerfCtl, Ia32PerfCtl, 0x2500, 0, 0), /* Might bite. value=0x2500 */ + MFX(0x0000019a, "IA32_CLOCK_MODULATION", Ia32ClockModulation, Ia32ClockModulation, 0, 0, UINT64_C(0xffffffffffffffe0)), /* value=0x0 */ + MFX(0x0000019b, "IA32_THERM_INTERRUPT", Ia32ThermInterrupt, Ia32ThermInterrupt, 0x10, 0, UINT64_C(0xfffffffffe0000e8)), /* value=0x10 */ + MFX(0x0000019c, "IA32_THERM_STATUS", Ia32ThermStatus, Ia32ThermStatus, UINT32_C(0x88340000), UINT32_C(0xf87f0fff), UINT64_C(0xffffffff0780f000)), /* value=0x88340000 */ + MFX(0x0000019d, "IA32_THERM2_CTL", Ia32Therm2Ctl, ReadOnly, 0, 0, 0), /* value=0x0 */ + MFX(0x000001a0, "IA32_MISC_ENABLE", Ia32MiscEnable, Ia32MiscEnable, 0x850089, 0x1080, UINT64_C(0xffffffbbff3aef72)), /* value=0x850089 */ + MFX(0x000001a2, "I7_MSR_TEMPERATURE_TARGET", IntelI7TemperatureTarget, IntelI7TemperatureTarget, 0x691200, 0xffff00, UINT64_C(0xfffffffff00000ff)), /* value=0x691200 */ + MVX(0x000001a4, "I7_UNK_0000_01a4", 0, 0, UINT64_C(0xfffffffffffff7f0)), + RSN(0x000001a6, 0x000001a7, "I7_MSR_OFFCORE_RSP_n", IntelI7MsrOffCoreResponseN, IntelI7MsrOffCoreResponseN, 0, 0, UINT64_C(0xffffffc000007000)), /* XXX: The range ended earlier than expected! */ + MVX(0x000001a8, "I7_UNK_0000_01a8", 0, 0, UINT64_C(0xfffffffffffffffc)), + MFX(0x000001aa, "MSR_MISC_PWR_MGMT", IntelI7MiscPwrMgmt, IntelI7MiscPwrMgmt, 0, 0, UINT64_C(0xffffffffffbffffe)), /* value=0x400001 */ + MVX(0x000001ad, "TODO_0000_01ad", 0x23232425, UINT32_MAX, ~(uint64_t)UINT32_MAX), + MVX(0x000001b0, "IA32_ENERGY_PERF_BIAS", 0x4, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x000001b1, "IA32_PACKAGE_THERM_STATUS", UINT32_C(0x88300000), UINT32_C(0xf87f0fff), UINT64_C(0xffffffff0780f000)), + MVX(0x000001b2, "IA32_PACKAGE_THERM_INTERRUPT", 0, 0, UINT64_C(0xfffffffffe0000e8)), + MVO(0x000001c6, "TODO_0000_01c6", 0x3), + MVX(0x000001c8, "TODO_0000_01c8", 0, 0, UINT64_C(0xfffffffffffffe00)), + MFX(0x000001c9, "MSR_LASTBRANCH_TOS", IntelLastBranchTos, IntelLastBranchTos, 0, 0, UINT64_C(0xfffffffffffffff0)), /* value=0x8 */ + MFX(0x000001d9, "IA32_DEBUGCTL", Ia32DebugCtl, Ia32DebugCtl, 0, 0, UINT64_C(0xffffffffffff803c)), /* value=0x0 */ + MFO(0x000001db, "P6_LAST_BRANCH_FROM_IP", P6LastBranchFromIp), /* value=0x7fffff7f`a38c2298 */ + MFO(0x000001dc, "P6_LAST_BRANCH_TO_IP", P6LastBranchToIp), /* value=0xffffff80`214b24e0 */ + MFN(0x000001dd, "P6_LAST_INT_FROM_IP", P6LastIntFromIp, P6LastIntFromIp), /* value=0x0 */ + MFN(0x000001de, "P6_LAST_INT_TO_IP", P6LastIntToIp, P6LastIntToIp), /* value=0x0 */ + MVO(0x000001f0, "TODO_0000_01f0", 0x74), + MVO(0x000001f2, "TODO_0000_01f2", UINT32_C(0x8b000006)), + MVO(0x000001f3, "TODO_0000_01f3", UINT32_C(0xff800800)), + MVX(0x000001fc, "TODO_0000_01fc", 0x340047, 0x20, UINT64_C(0xffffffffffc20000)), + MFX(0x00000200, "IA32_MTRR_PHYS_BASE0", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0, 0, UINT64_C(0xfffffff000000ff8)), /* value=0xc0000000 */ + MFX(0x00000201, "IA32_MTRR_PHYS_MASK0", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`c0000800 */ + MFX(0x00000202, "IA32_MTRR_PHYS_BASE1", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x1, 0, UINT64_C(0xfffffff000000ff8)), /* value=0xa0000000 */ + MFX(0x00000203, "IA32_MTRR_PHYS_MASK1", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x1, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`e0000800 */ + MFX(0x00000204, "IA32_MTRR_PHYS_BASE2", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x2, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x90000000 */ + MFX(0x00000205, "IA32_MTRR_PHYS_MASK2", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x2, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`f0000800 */ + MFX(0x00000206, "IA32_MTRR_PHYS_BASE3", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x3, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x8c000000 */ + MFX(0x00000207, "IA32_MTRR_PHYS_MASK3", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x3, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`fc000800 */ + MFX(0x00000208, "IA32_MTRR_PHYS_BASE4", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x4, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x8b000000 */ + MFX(0x00000209, "IA32_MTRR_PHYS_MASK4", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x4, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`ff000800 */ + MFX(0x0000020a, "IA32_MTRR_PHYS_BASE5", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x5, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x0000020b, "IA32_MTRR_PHYS_MASK5", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x5, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFX(0x0000020c, "IA32_MTRR_PHYS_BASE6", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x6, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x0000020d, "IA32_MTRR_PHYS_MASK6", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x6, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFX(0x0000020e, "IA32_MTRR_PHYS_BASE7", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x7, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x0000020f, "IA32_MTRR_PHYS_MASK7", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x7, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFX(0x00000210, "IA32_MTRR_PHYS_BASE8", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x8, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x00000211, "IA32_MTRR_PHYS_MASK8", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x8, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFX(0x00000212, "IA32_MTRR_PHYS_BASE9", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x9, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x00000213, "IA32_MTRR_PHYS_MASK9", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x9, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFS(0x00000250, "IA32_MTRR_FIX64K_00000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix64K_00000), + MFS(0x00000258, "IA32_MTRR_FIX16K_80000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_80000), + MFS(0x00000259, "IA32_MTRR_FIX16K_A0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_A0000), + MFS(0x00000268, "IA32_MTRR_FIX4K_C0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C0000), + MFS(0x00000269, "IA32_MTRR_FIX4K_C8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C8000), + MFS(0x0000026a, "IA32_MTRR_FIX4K_D0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D0000), + MFS(0x0000026b, "IA32_MTRR_FIX4K_D8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D8000), + MFS(0x0000026c, "IA32_MTRR_FIX4K_E0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E0000), + MFS(0x0000026d, "IA32_MTRR_FIX4K_E8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E8000), + MFS(0x0000026e, "IA32_MTRR_FIX4K_F0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F0000), + MFS(0x0000026f, "IA32_MTRR_FIX4K_F8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F8000), + MFS(0x00000277, "IA32_PAT", Ia32Pat, Ia32Pat, Guest.msrPAT), + MVX(0x00000280, "TODO_0000_0280", 0, 0, UINT64_C(0xffffffffbfff8000)), + MVX(0x00000281, "TODO_0000_0281", 0, 0, UINT64_C(0xffffffffbfff8000)), + MVX(0x00000282, "TODO_0000_0282", 0, 0x40007fff, UINT64_C(0xffffffffbfff8000)), + MVX(0x00000283, "TODO_0000_0283", 0, 0, UINT64_C(0xffffffffbfff8000)), + MVX(0x00000284, "TODO_0000_0284", 0, 0x40007fff, UINT64_C(0xffffffffbfff8000)), + MVX(0x00000285, "TODO_0000_0285", 0, 0, UINT64_C(0xffffffffbfff8000)), + MVX(0x00000286, "TODO_0000_0286", 0, 0, UINT64_C(0xffffffffbfff8000)), + MVX(0x00000287, "TODO_0000_0287", 0, 0, UINT64_C(0xffffffffbfff8000)), + MVX(0x00000288, "TODO_0000_0288", 0, 0, UINT64_C(0xffffffffbfff8000)), + MVX(0x000002e0, "TODO_0000_02e0", 0, 0, UINT64_C(0xfffffffffffffffc)), + MFN(0x000002e6, "TODO_0000_02e6", WriteOnly, IgnoreWrite), + MVX(0x000002e7, "TODO_0000_02e7", 0x1, 0x1, UINT64_C(0xfffffffffffffffe)), + MFZ(0x000002ff, "IA32_MTRR_DEF_TYPE", Ia32MtrrDefType, Ia32MtrrDefType, GuestMsrs.msr.MtrrDefType, 0, UINT64_C(0xfffffffffffff3f8)), + MVO(0x00000305, "TODO_0000_0305", 0), + MVX(0x00000309, "TODO_0000_0309", 0, 0, UINT64_C(0xffff000000000000)), + MVX(0x0000030a, "TODO_0000_030a", 0, 0, UINT64_C(0xffff000000000000)), + MVX(0x0000030b, "TODO_0000_030b", 0, 0, UINT64_C(0xffff000000000000)), + MVO(0x00000345, "TODO_0000_0345", 0x31c3), + MVX(0x0000038d, "TODO_0000_038d", 0, 0, UINT64_C(0xfffffffffffff000)), + MVO(0x0000038e, "TODO_0000_038e", UINT64_C(0x8000000000000000)), + MVX(0x0000038f, "TODO_0000_038f", 0xf, 0, UINT64_C(0xfffffff8fffffff0)), + MVX(0x00000390, "TODO_0000_0390", 0, UINT64_C(0xe00000070000000f), UINT64_C(0x1ffffff8fffffff0)), + MVX(0x00000391, "TODO_0000_0391", 0, 0, UINT64_C(0xffffffff1fffffe0)), + MVX(0x00000392, "TODO_0000_0392", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000393, "TODO_0000_0393", 0, 0x3, UINT64_C(0xfffffffffffffffc)), + MVX(0x00000394, "TODO_0000_0394", 0, 0, UINT64_C(0xffffffffffafffff)), + MVX(0x00000395, "TODO_0000_0395", 0, 0, UINT64_C(0xffff000000000000)), + MVO(0x00000396, "TODO_0000_0396", 0x5), + MVX(0x00000397, "TODO_0000_0397", 0, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x000003b0, "TODO_0000_03b0", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x000003b1, "TODO_0000_03b1", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x000003b2, "TODO_0000_03b2", 0, 0, UINT64_C(0xffffffffc0230000)), + MVX(0x000003b3, "TODO_0000_03b3", 0, 0, UINT64_C(0xffffffffc0230000)), + MVX(0x000003f1, "TODO_0000_03f1", 0, 0, UINT64_C(0x7ffffff0fffffff0)), + MVX(0x000003f6, "TODO_0000_03f6", UINT16_MAX, UINT64_C(0xffffffffffff0000), 0), + MVO(0x000003f8, "TODO_0000_03f8", 0), + MVO(0x000003f9, "TODO_0000_03f9", UINT64_C(0x27495a818)), + MVO(0x000003fa, "TODO_0000_03fa", UINT64_C(0x428fa6c6207)), + MVO(0x000003fc, "TODO_0000_03fc", 0x389bb693), + MVO(0x000003fd, "TODO_0000_03fd", 0x13323393), + MVO(0x000003fe, "TODO_0000_03fe", UINT64_C(0x48d7ffc9bd1)), + RFN(0x00000400, 0x00000423, "IA32_MCi_CTL_STATUS_ADDR_MISC", Ia32McCtlStatusAddrMiscN, Ia32McCtlStatusAddrMiscN), + MVO(0x00000480, "TODO_0000_0480", UINT64_C(0xda040000000010)), + MVO(0x00000481, "TODO_0000_0481", UINT64_C(0x7f00000016)), + MVO(0x00000482, "TODO_0000_0482", UINT64_C(0xfff9fffe0401e172)), + MVO(0x00000483, "TODO_0000_0483", UINT64_C(0x7fffff00036dff)), + MVO(0x00000484, "TODO_0000_0484", UINT64_C(0xffff000011ff)), + MVO(0x00000485, "TODO_0000_0485", 0x100401e5), + MVO(0x00000486, "TODO_0000_0486", UINT32_C(0x80000021)), + MVO(0x00000487, "TODO_0000_0487", UINT32_MAX), + MVO(0x00000488, "TODO_0000_0488", 0x2000), + MVO(0x00000489, "TODO_0000_0489", 0x1767ff), + MVO(0x0000048a, "TODO_0000_048a", 0x2a), + MVO(0x0000048b, "TODO_0000_048b", UINT64_C(0x8ff00000000)), + MVO(0x0000048c, "TODO_0000_048c", UINT64_C(0xf0106114141)), + MVO(0x0000048d, "TODO_0000_048d", UINT64_C(0x7f00000016)), + MVO(0x0000048e, "TODO_0000_048e", UINT64_C(0xfff9fffe04006172)), + MVO(0x0000048f, "TODO_0000_048f", UINT64_C(0x7fffff00036dfb)), + MVO(0x00000490, "TODO_0000_0490", UINT64_C(0xffff000011fb)), + MVX(0x000004c1, "TODO_0000_04c1", 0, 0, UINT64_C(0xffff000000000000)), + MVX(0x000004c2, "TODO_0000_04c2", 0, 0, UINT64_C(0xffff000000000000)), + MVX(0x000004c3, "TODO_0000_04c3", 0, 0, UINT64_C(0xffff000000000000)), + MVX(0x000004c4, "TODO_0000_04c4", 0, 0, UINT64_C(0xffff000000000000)), + MFN(0x00000600, "IA32_DS_AREA", Ia32DsArea, Ia32DsArea), /* value=0x0 */ + MVX(0x00000601, "TODO_0000_0601", UINT64_C(0x1814149480000380), UINT32_C(0x80001fff), 0x7fffe000), + MVX(0x00000602, "TODO_0000_0602", UINT64_C(0x1814149480000170), UINT32_C(0x80001fff), 0x7fffe000), + MVX(0x00000603, "TODO_0000_0603", UINT32_C(0x80303030), UINT32_C(0x80ffffff), UINT64_C(0xffffffff7f000000)), + MVX(0x00000604, "TODO_0000_0604", UINT32_C(0x80646464), UINT32_C(0x80ffffff), UINT64_C(0xffffffff7f000000)), + MVO(0x00000606, "TODO_0000_0606", 0xa1003), + MVX(0x0000060a, "TODO_0000_060a", 0x8894, 0, UINT64_C(0xffffffffffff6000)), + MVX(0x0000060b, "TODO_0000_060b", 0x88a9, 0, UINT64_C(0xffffffffffff6000)), + MVX(0x0000060c, "TODO_0000_060c", 0x88c6, 0, UINT64_C(0xffffffffffff6000)), + MVO(0x0000060d, "TODO_0000_060d", UINT64_C(0xd0fd23dd9)), + MVX(0x00000610, "TODO_0000_0610", UINT64_C(0x800083e800dd8320), UINT64_C(0x80ffffff00ffffff), UINT64_C(0x7f000000ff000000)), + MVO(0x00000611, "TODO_0000_0611", 0x2ed06e3b), + MVO(0x00000614, "TODO_0000_0614", 0x1200168), + MVX(0x00000638, "TODO_0000_0638", UINT32_C(0x80000000), UINT32_C(0x80ffffff), UINT64_C(0xffffffff7f000000)), + MVO(0x00000639, "TODO_0000_0639", 0x106344fd), + MVX(0x0000063a, "TODO_0000_063a", 0, 0, UINT64_C(0xffffffffffffffe0)), + MVX(0x00000640, "TODO_0000_0640", UINT32_C(0x80000000), UINT32_C(0x80ffffff), UINT64_C(0xffffffff7f000000)), + MVO(0x00000641, "TODO_0000_0641", 0xb39e93), + MVX(0x00000642, "TODO_0000_0642", 0x10, 0, UINT64_C(0xffffffffffffffe0)), + MVO(0x00000648, "TODO_0000_0648", 0x1b), + MVO(0x00000649, "TODO_0000_0649", UINT64_C(0x120000000000000)), + MVO(0x0000064a, "TODO_0000_064a", UINT64_C(0x120000000000000)), + MVO(0x0000064b, "TODO_0000_064b", UINT32_C(0x80000000)), + MVX(0x0000064c, "TODO_0000_064c", UINT32_C(0x80000000), UINT32_C(0x800000ff), UINT64_C(0xffffffff7fffff00)), + MVX(0x00000680, "TODO_0000_0680", 0, 0, UINT64_C(0x7fff800000000000)), + MVX(0x00000681, "TODO_0000_0681", 0, 0, UINT64_C(0x7fff800000000000)), + MVX(0x00000682, "TODO_0000_0682", UINT64_C(0x7fffff7fa38c2289), 0, UINT64_C(0x7fff800000000000)), + MVX(0x00000683, "TODO_0000_0683", UINT64_C(0x7fffff80214b24cb), 0, UINT64_C(0x7fff800000000000)), + MVX(0x00000684, "TODO_0000_0684", UINT64_C(0x7fffff7fa38c2298), 0, UINT64_C(0x7fff800000000000)), + MVX(0x00000685, "TODO_0000_0685", UINT64_C(0x7fffff80214b24ee), 0, UINT64_C(0x7fff800000000000)), + MVX(0x00000686, "TODO_0000_0686", UINT64_C(0x7fffff7fa38c2289), 0, UINT64_C(0x7fff800000000000)), + MVX(0x00000687, "TODO_0000_0687", UINT64_C(0x7fffff80214b24cb), 0, UINT64_C(0x7fff800000000000)), + MVX(0x00000688, "TODO_0000_0688", UINT64_C(0x7fffff7fa38c2298), 0, UINT64_C(0x7fff800000000000)), + MVX(0x00000689, "TODO_0000_0689", 0, 0, UINT64_C(0x7fff800000000000)), + MVX(0x0000068a, "TODO_0000_068a", 0, 0, UINT64_C(0x7fff800000000000)), + MVX(0x0000068b, "TODO_0000_068b", 0, 0, UINT64_C(0x7fff800000000000)), + MVX(0x0000068c, "TODO_0000_068c", 0, 0, UINT64_C(0x7fff800000000000)), + MVX(0x0000068d, "TODO_0000_068d", 0, 0, UINT64_C(0x7fff800000000000)), + MVX(0x0000068e, "TODO_0000_068e", 0, 0, UINT64_C(0x7fff800000000000)), + MVX(0x0000068f, "TODO_0000_068f", 0, 0, UINT64_C(0x7fff800000000000)), + MVX(0x000006c0, "TODO_0000_06c0", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006c1, "TODO_0000_06c1", UINT64_C(0xffffff7fa38c227f), 0, UINT64_C(0xffff800000000000)), + MVX(0x000006c2, "TODO_0000_06c2", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006c3, "TODO_0000_06c3", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006c4, "TODO_0000_06c4", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006c5, "TODO_0000_06c5", UINT64_C(0xffffff7fa38c227f), 0, UINT64_C(0xffff800000000000)), + MVX(0x000006c6, "TODO_0000_06c6", UINT64_C(0xffffff80214b24c0), 0, UINT64_C(0xffff800000000000)), + MVX(0x000006c7, "TODO_0000_06c7", UINT64_C(0xffffff7fa38c228f), 0, UINT64_C(0xffff800000000000)), + MVX(0x000006c8, "TODO_0000_06c8", UINT64_C(0xffffff80214b24e0), 0, UINT64_C(0xffff800000000000)), + MVX(0x000006c9, "TODO_0000_06c9", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006ca, "TODO_0000_06ca", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006cb, "TODO_0000_06cb", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006cc, "TODO_0000_06cc", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006cd, "TODO_0000_06cd", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006ce, "TODO_0000_06ce", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006cf, "TODO_0000_06cf", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006e0, "TODO_0000_06e0", UINT64_C(0x535157ca1ca), 0x80000, 0), + MVX(0x00000700, "TODO_0000_0700", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000701, "TODO_0000_0701", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000702, "TODO_0000_0702", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000703, "TODO_0000_0703", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000704, "TODO_0000_0704", 0, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000705, "TODO_0000_0705", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000706, "TODO_0000_0706", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000707, "TODO_0000_0707", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000708, "TODO_0000_0708", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000709, "TODO_0000_0709", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000710, "TODO_0000_0710", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000711, "TODO_0000_0711", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000712, "TODO_0000_0712", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000713, "TODO_0000_0713", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000714, "TODO_0000_0714", 0, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000715, "TODO_0000_0715", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000716, "TODO_0000_0716", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000717, "TODO_0000_0717", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000718, "TODO_0000_0718", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000719, "TODO_0000_0719", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000720, "TODO_0000_0720", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000721, "TODO_0000_0721", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000722, "TODO_0000_0722", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000723, "TODO_0000_0723", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000724, "TODO_0000_0724", 0, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000725, "TODO_0000_0725", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000726, "TODO_0000_0726", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000727, "TODO_0000_0727", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000728, "TODO_0000_0728", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000729, "TODO_0000_0729", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000730, "TODO_0000_0730", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000731, "TODO_0000_0731", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000732, "TODO_0000_0732", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000733, "TODO_0000_0733", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000734, "TODO_0000_0734", 0, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000735, "TODO_0000_0735", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000736, "TODO_0000_0736", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000737, "TODO_0000_0737", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000738, "TODO_0000_0738", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000739, "TODO_0000_0739", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000740, "TODO_0000_0740", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000741, "TODO_0000_0741", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000742, "TODO_0000_0742", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000743, "TODO_0000_0743", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000744, "TODO_0000_0744", 0, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000745, "TODO_0000_0745", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000746, "TODO_0000_0746", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000747, "TODO_0000_0747", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000748, "TODO_0000_0748", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000749, "TODO_0000_0749", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000c80, "TODO_0000_0c80", 0, 0, 0), + MVX(0x00000c81, "TODO_0000_0c81", 0, 0, 0), + MVX(0x00000c82, "TODO_0000_0c82", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0x00000c83, "TODO_0000_0c83", 0, ~(uint64_t)UINT32_MAX, 0), + MFX(0xc0000080, "AMD64_EFER", Amd64Efer, Amd64Efer, 0xd01, 0x400, UINT64_C(0xfffffffffffff2fe)), + MFX(0xc0000081, "AMD64_STAR", Amd64SyscallTarget, Amd64SyscallTarget, 0, 0, 0), /* value=0x1b0008`00000000 */ + MFN(0xc0000082, "AMD64_STAR64", Amd64LongSyscallTarget, Amd64LongSyscallTarget), /* value=0xffffff80`214ce6c0 */ + MFN(0xc0000083, "AMD64_STARCOMPAT", Amd64CompSyscallTarget, Amd64CompSyscallTarget), /* value=0x0 */ + MFX(0xc0000084, "AMD64_SYSCALL_FLAG_MASK", Amd64SyscallFlagMask, Amd64SyscallFlagMask, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x4700 */ + MFN(0xc0000100, "AMD64_FS_BASE", Amd64FsBase, Amd64FsBase), /* value=0x0 */ + MFN(0xc0000101, "AMD64_GS_BASE", Amd64GsBase, Amd64GsBase), /* value=0xffffff81`e942f000 */ + MFN(0xc0000102, "AMD64_KERNEL_GS_BASE", Amd64KernelGsBase, Amd64KernelGsBase), /* value=0x7fff`7ccad1e0 */ + MFX(0xc0000103, "AMD64_TSC_AUX", Amd64TscAux, Amd64TscAux, 0, 0, ~(uint64_t)UINT32_MAX), /* value=0x0 */ +}; +#endif /* !CPUM_DB_STANDALONE */ + + +/** + * Database entry for Intel(R) Core(TM) i7-3820QM CPU @ 2.70GHz. + */ +static CPUMDBENTRY const g_Entry_Intel_Core_i7_3820QM = +{ + /*.pszName = */ "Intel Core i7-3820QM", + /*.pszFullName = */ "Intel(R) Core(TM) i7-3820QM CPU @ 2.70GHz", + /*.enmVendor = */ CPUMCPUVENDOR_INTEL, + /*.uFamily = */ 6, + /*.uModel = */ 58, + /*.uStepping = */ 9, + /*.enmMicroarch = */ kCpumMicroarch_Intel_Core7_IvyBridge, + /*.uScalableBusFreq = */ CPUM_SBUSFREQ_UNKNOWN, + /*.fFlags = */ 0, + /*.cMaxPhysAddrWidth= */ 36, + /*.paCpuIdLeaves = */ NULL_ALONE(g_aCpuIdLeaves_Intel_Core_i7_3820QM), + /*.cCpuIdLeaves = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_Core_i7_3820QM)), + /*.enmUnknownCpuId = */ CPUMUNKNOWNCPUID_LAST_STD_LEAF_WITH_ECX, + /*.DefUnknownCpuId = */ { 0x00000007, 0x00000340, 0x00000340, 0x00000000 }, + /*.fMsrMask = */ UINT32_MAX, + /*.apaMsrRanges[] = */ + { + NULL_ALONE(g_aMsrRanges_Intel_Core_i7_3820QM), + NULL, + NULL, + NULL, + NULL, + } +}; + +#endif /* !VBOX_CPUDB_Intel_Core_i7_3820QM_h */ + diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_3960X.h b/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_3960X.h new file mode 100644 index 00000000..de803e55 --- /dev/null +++ b/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_3960X.h @@ -0,0 +1,369 @@ +/* $Id: Intel_Core_i7_3960X.h $ */ +/** @file + * CPU database entry "Intel Core i7-3960X". + * Generated at 2013-12-12T15:29:11Z by VBoxCpuReport v4.3.53r91237 on win.amd64. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VBOX_CPUDB_Intel_Core_i7_3960X_h +#define VBOX_CPUDB_Intel_Core_i7_3960X_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + + +#ifndef CPUM_DB_STANDALONE +/** + * CPUID leaves for Intel(R) Core(TM) i7-3960X CPU @ 3.30GHz. + */ +static CPUMCPUIDLEAF const g_aCpuIdLeaves_Intel_Core_i7_3960X[] = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69, 0 }, + { 0x00000001, 0x00000000, 0x00000000, 0x000206d6, 0x02200800, 0x1fbee3bf, 0xbfebfbff, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID | CPUMCPUIDLEAF_F_CONTAINS_APIC }, + { 0x00000002, 0x00000000, 0x00000000, 0x76035a01, 0x00f0b2ff, 0x00000000, 0x00ca0000, 0 }, + { 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000004, 0x00000000, UINT32_MAX, 0x3c004121, 0x01c0003f, 0x0000003f, 0x00000000, 0 }, + { 0x00000004, 0x00000001, UINT32_MAX, 0x3c004122, 0x01c0003f, 0x0000003f, 0x00000000, 0 }, + { 0x00000004, 0x00000002, UINT32_MAX, 0x3c004143, 0x01c0003f, 0x000001ff, 0x00000000, 0 }, + { 0x00000004, 0x00000003, UINT32_MAX, 0x3c07c163, 0x04c0003f, 0x00002fff, 0x00000006, 0 }, + { 0x00000004, 0x00000004, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000005, 0x00000000, 0x00000000, 0x00000040, 0x00000040, 0x00000003, 0x00021120, 0 }, + { 0x00000006, 0x00000000, 0x00000000, 0x00000077, 0x00000002, 0x00000001, 0x00000000, 0 }, + { 0x00000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000009, 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000a, 0x00000000, 0x00000000, 0x07300403, 0x00000000, 0x00000000, 0x00000603, 0 }, + { 0x0000000b, 0x00000000, UINT32_MAX, 0x00000001, 0x00000002, 0x00000100, 0x00000002, 0 | CPUMCPUIDLEAF_F_INTEL_TOPOLOGY_SUBLEAVES | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID }, + { 0x0000000b, 0x00000001, UINT32_MAX, 0x00000005, 0x0000000c, 0x00000201, 0x00000002, 0 | CPUMCPUIDLEAF_F_INTEL_TOPOLOGY_SUBLEAVES | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID }, + { 0x0000000b, 0x00000002, UINT32_MAX, 0x00000000, 0x00000000, 0x00000002, 0x00000002, 0 | CPUMCPUIDLEAF_F_INTEL_TOPOLOGY_SUBLEAVES | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID }, + { 0x0000000c, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000d, 0x00000000, UINT32_MAX, 0x00000007, 0x00000340, 0x00000340, 0x00000000, 0 }, + { 0x0000000d, 0x00000001, UINT32_MAX, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000d, 0x00000002, UINT32_MAX, 0x00000100, 0x00000240, 0x00000000, 0x00000000, 0 }, + { 0x0000000d, 0x00000003, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000000, 0x00000000, 0x00000000, 0x80000008, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x2c100800, 0 }, + { 0x80000002, 0x00000000, 0x00000000, 0x20202020, 0x49202020, 0x6c65746e, 0x20295228, 0 }, + { 0x80000003, 0x00000000, 0x00000000, 0x65726f43, 0x294d5428, 0x2d376920, 0x30363933, 0 }, + { 0x80000004, 0x00000000, 0x00000000, 0x50432058, 0x20402055, 0x30332e33, 0x007a4847, 0 }, + { 0x80000005, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x01006040, 0x00000000, 0 }, + { 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000100, 0 }, + { 0x80000008, 0x00000000, 0x00000000, 0x0000302e, 0x00000000, 0x00000000, 0x00000000, 0 }, +}; +#endif /* !CPUM_DB_STANDALONE */ + + +#ifndef CPUM_DB_STANDALONE +/** + * MSR ranges for Intel(R) Core(TM) i7-3960X CPU @ 3.30GHz. + */ +static CPUMMSRRANGE const g_aMsrRanges_Intel_Core_i7_3960X[] = +{ + MFX(0x00000000, "IA32_P5_MC_ADDR", Ia32P5McAddr, Ia32P5McAddr, 0, UINT64_C(0xffffffffffffffe0), 0), /* value=0x1f */ + MFX(0x00000001, "IA32_P5_MC_TYPE", Ia32P5McType, Ia32P5McType, 0, 0, UINT64_MAX), /* value=0x0 */ + MFX(0x00000006, "IA32_MONITOR_FILTER_LINE_SIZE", Ia32MonitorFilterLineSize, Ia32MonitorFilterLineSize, 0, 0, UINT64_C(0xffffffffffff0000)), /* value=0x40 */ + MFN(0x00000010, "IA32_TIME_STAMP_COUNTER", Ia32TimestampCounter, Ia32TimestampCounter), /* value=0x177ab4`48466b19 */ + MFV(0x00000017, "IA32_PLATFORM_ID", Ia32PlatformId, ReadOnly, UINT64_C(0x8000000000000)), + MFX(0x0000001b, "IA32_APIC_BASE", Ia32ApicBase, Ia32ApicBase, UINT32_C(0xfee00800), 0, UINT64_C(0xffffc000000002ff)), + MFX(0x0000002a, "EBL_CR_POWERON", IntelEblCrPowerOn, ReadOnly, 0, 0, 0), /* value=0x0 */ + MVX(0x0000002e, "I7_UNK_0000_002e", 0, 0x400, UINT64_C(0xfffffffffffffbff)), + MVX(0x00000033, "TEST_CTL", 0, 0, UINT64_C(0xffffffff7fffffff)), + MVO(0x00000034, "P6_UNK_0000_0034", 0x4cb), + MFO(0x00000035, "MSR_CORE_THREAD_COUNT", IntelI7CoreThreadCount), /* value=0x6000c*/ + MFO(0x0000003a, "IA32_FEATURE_CONTROL", Ia32FeatureControl), /* value=0x5 */ + MVX(0x0000003e, "I7_UNK_0000_003e", 0x1, 0, UINT64_C(0xfffffffffffffffe)), + MFN(0x00000079, "IA32_BIOS_UPDT_TRIG", WriteOnly, Ia32BiosUpdateTrigger), + MVX(0x0000008b, "BBL_CR_D3|BIOS_SIGN", UINT64_C(0x61600000000), 0, UINT32_C(0xfffffffe)), + MFO(0x0000009b, "IA32_SMM_MONITOR_CTL", Ia32SmmMonitorCtl), /* value=0x0 */ + RSN(0x000000c1, 0x000000c4, "IA32_PMCn", Ia32PmcN, Ia32PmcN, 0x0, ~(uint64_t)UINT32_MAX, 0), + MFO(0x000000ce, "MSR_PLATFORM_INFO", IntelPlatformInfo), /* value=0xc00'70012100*/ + MFX(0x000000e2, "MSR_PKG_CST_CONFIG_CONTROL", IntelPkgCStConfigControl, IntelPkgCStConfigControl, 0, 0, UINT64_C(0xffffffffe1ffffff)), /* value=0x1e008400 */ + MFX(0x000000e4, "MSR_PMG_IO_CAPTURE_BASE", IntelPmgIoCaptureBase, IntelPmgIoCaptureBase, 0, 0, UINT64_C(0xfffffffffff80000)), /* value=0x20414 */ + MFN(0x000000e7, "IA32_MPERF", Ia32MPerf, Ia32MPerf), /* value=0x2be98e4 */ + MFN(0x000000e8, "IA32_APERF", Ia32APerf, Ia32APerf), /* value=0x2d84ced */ + MFX(0x000000fe, "IA32_MTRRCAP", Ia32MtrrCap, ReadOnly, 0xd0a, 0, 0), /* value=0xd0a */ + MFN(0x00000132, "CPUID1_FEATURE_MASK", IntelCpuId1FeatureMaskEax, IntelCpuId1FeatureMaskEax), /* value=0xffffffff`ffffffff */ + MFN(0x00000133, "CPUIDD_01_FEATURE_MASK", IntelCpuId1FeatureMaskEcdx, IntelCpuId1FeatureMaskEcdx), /* value=0xffffffff`ffffffff */ + MFN(0x00000134, "CPUID80000001_FEATURE_MASK", IntelCpuId80000001FeatureMaskEcdx, IntelCpuId80000001FeatureMaskEcdx), /* value=0xffffffff`ffffffff */ + MFO(0x0000013c, "I7_SB_AES_NI_CTL", IntelI7SandyAesNiCtl), /* value=0x1 */ + MFX(0x00000174, "IA32_SYSENTER_CS", Ia32SysEnterCs, Ia32SysEnterCs, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFN(0x00000175, "IA32_SYSENTER_ESP", Ia32SysEnterEsp, Ia32SysEnterEsp), /* value=0x0 */ + MFN(0x00000176, "IA32_SYSENTER_EIP", Ia32SysEnterEip, Ia32SysEnterEip), /* value=0x0 */ + MFX(0x00000179, "IA32_MCG_CAP", Ia32McgCap, ReadOnly, 0xc12, 0, 0), /* value=0xc12 */ + MFX(0x0000017a, "IA32_MCG_STATUS", Ia32McgStatus, Ia32McgStatus, 0, 0, UINT64_C(0xfffffffffffffff8)), /* value=0x0 */ + MFX(0x0000017f, "I7_SB_ERROR_CONTROL", IntelI7SandyErrorControl, IntelI7SandyErrorControl, 0, 0xc, UINT64_C(0xffffffffffffffe1)), /* value=0x0 */ + RSN(0x00000186, 0x00000189, "IA32_PERFEVTSELn", Ia32PerfEvtSelN, Ia32PerfEvtSelN, 0x0, 0, UINT64_C(0xffffffff00080000)), + MFX(0x00000194, "CLOCK_FLEX_MAX", IntelFlexRatio, IntelFlexRatio, 0xf2100, 0xe0000, UINT64_C(0xfffffffffff00000)), + MFX(0x00000198, "IA32_PERF_STATUS", Ia32PerfStatus, ReadOnly, UINT64_C(0x288300002400), 0, 0), /* value=0x2883`00002400 */ + MFX(0x00000199, "IA32_PERF_CTL", Ia32PerfCtl, Ia32PerfCtl, 0x2700, 0, 0), /* Might bite. value=0x2700 */ + MFX(0x0000019a, "IA32_CLOCK_MODULATION", Ia32ClockModulation, Ia32ClockModulation, 0, 0, UINT64_C(0xffffffffffffffe0)), /* value=0x0 */ + MFX(0x0000019b, "IA32_THERM_INTERRUPT", Ia32ThermInterrupt, Ia32ThermInterrupt, 0, 0, UINT64_C(0xfffffffffe0000e8)), /* value=0x0 */ + MFX(0x0000019c, "IA32_THERM_STATUS", Ia32ThermStatus, Ia32ThermStatus, UINT32_C(0x88380000), UINT32_C(0xf87f0fff), UINT64_C(0xffffffff0780f000)), /* value=0x88380000 */ + MFX(0x0000019d, "IA32_THERM2_CTL", Ia32Therm2Ctl, ReadOnly, 0, 0, 0), /* value=0x0 */ + MFX(0x000001a0, "IA32_MISC_ENABLE", Ia32MiscEnable, Ia32MiscEnable, 0x850089, 0x1080, UINT64_C(0xffffffbbff3aef72)), /* value=0x850089 */ + MFX(0x000001a2, "I7_MSR_TEMPERATURE_TARGET", IntelI7TemperatureTarget, IntelI7TemperatureTarget, 0x5b0a00, 0xffff00, UINT64_C(0xfffffffff00000ff)), /* value=0x5b0a00 */ + MVX(0x000001a4, "I7_UNK_0000_01a4", 0, 0, UINT64_C(0xfffffffffffff7f0)), + RSN(0x000001a6, 0x000001a7, "I7_MSR_OFFCORE_RSP_n", IntelI7MsrOffCoreResponseN, IntelI7MsrOffCoreResponseN, 0x0, 0, UINT64_C(0xffffffc000007000)), + MVX(0x000001a8, "I7_UNK_0000_01a8", 0, 0, UINT64_C(0xfffffffffffffffc)), + MFX(0x000001aa, "MSR_MISC_PWR_MGMT", IntelI7MiscPwrMgmt, IntelI7MiscPwrMgmt, 0, 0, UINT64_C(0xffffffffffbffffe)), /* value=0x400000 */ + MFX(0x000001ad, "I7_MSR_TURBO_RATIO_LIMIT", IntelI7TurboRatioLimit, IntelI7TurboRatioLimit, UINT64_C(0x2424242425252727), 0, 0), /* value=0x24242424`25252727 */ + MVX(0x000001b1, "IA32_PACKAGE_THERM_STATUS", UINT32_C(0x88310000), UINT32_C(0xf87f0fff), UINT64_C(0xffffffff0780f000)), + MVX(0x000001b2, "IA32_PACKAGE_THERM_INTERRUPT", 0, 0, UINT64_C(0xfffffffffe0000e8)), + MVO(0x000001c6, "I7_UNK_0000_01c6", 0x3), + MFX(0x000001c8, "MSR_LBR_SELECT", IntelI7LbrSelect, IntelI7LbrSelect, 0, 0, UINT64_C(0xfffffffffffffe00)), /* value=0x0 */ + MFX(0x000001c9, "MSR_LASTBRANCH_TOS", IntelLastBranchTos, IntelLastBranchTos, 0, 0, UINT64_C(0xfffffffffffffff0)), /* value=0xc */ + MFX(0x000001d9, "IA32_DEBUGCTL", Ia32DebugCtl, Ia32DebugCtl, 0, 0, UINT64_C(0xffffffffffff803c)), /* value=0x0 */ + MFO(0x000001db, "P6_LAST_BRANCH_FROM_IP", P6LastBranchFromIp), /* value=0x7ffff880`093814ea */ + MFO(0x000001dc, "P6_LAST_BRANCH_TO_IP", P6LastBranchToIp), /* value=0xfffff880`093a60e0 */ + MFN(0x000001dd, "P6_LAST_INT_FROM_IP", P6LastIntFromIp, P6LastIntFromIp), /* value=0x0 */ + MFN(0x000001de, "P6_LAST_INT_TO_IP", P6LastIntToIp, P6LastIntToIp), /* value=0x0 */ + MVO(0x000001e1, "I7_SB_UNK_0000_01e1", 0x2), + MVX(0x000001ef, "I7_SB_UNK_0000_01ef", 0xff, 0, UINT64_MAX), + MFO(0x000001f0, "I7_VLW_CAPABILITY", IntelI7VirtualLegacyWireCap), /* value=0x74 */ + MFO(0x000001f2, "IA32_SMRR_PHYSBASE", Ia32SmrrPhysBase), /* value=0xad800006 */ + MFO(0x000001f3, "IA32_SMRR_PHYSMASK", Ia32SmrrPhysMask), /* value=0xff800800 */ + MFX(0x000001f8, "IA32_PLATFORM_DCA_CAP", Ia32PlatformDcaCap, Ia32PlatformDcaCap, 0, 0, UINT64_C(0xfffffffffffffffe)), /* value=0x1 */ + MFO(0x000001f9, "IA32_CPU_DCA_CAP", Ia32CpuDcaCap), /* value=0x1 */ + MFX(0x000001fa, "IA32_DCA_0_CAP", Ia32Dca0Cap, Ia32Dca0Cap, 0, 0x40007ff, UINT64_C(0xfffffffffafe1800)), /* value=0x1e489 */ + MFX(0x000001fc, "I7_MSR_POWER_CTL", IntelI7PowerCtl, IntelI7PowerCtl, 0, 0, UINT64_C(0xffffffff00320020)), /* value=0x2500005b */ + MFX(0x00000200, "IA32_MTRR_PHYS_BASE0", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x0, 0, UINT64_C(0xffffc00000000ff8)), /* value=0x6 */ + MFX(0x00000201, "IA32_MTRR_PHYS_MASK0", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x0, 0, UINT64_C(0xffffc000000007ff)), /* value=0x3ffc`00000800 */ + MFX(0x00000202, "IA32_MTRR_PHYS_BASE1", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x1, 0, UINT64_C(0xffffc00000000ff8)), /* value=0x4`00000006 */ + MFX(0x00000203, "IA32_MTRR_PHYS_MASK1", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x1, 0, UINT64_C(0xffffc000000007ff)), /* value=0x3fff`c0000800 */ + MFX(0x00000204, "IA32_MTRR_PHYS_BASE2", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x2, 0, UINT64_C(0xffffc00000000ff8)), /* value=0x4`40000006 */ + MFX(0x00000205, "IA32_MTRR_PHYS_MASK2", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x2, 0, UINT64_C(0xffffc000000007ff)), /* value=0x3fff`f0000800 */ + MFX(0x00000206, "IA32_MTRR_PHYS_BASE3", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x3, 0, UINT64_C(0xffffc00000000ff8)), /* value=0xae000000 */ + MFX(0x00000207, "IA32_MTRR_PHYS_MASK3", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x3, 0, UINT64_C(0xffffc000000007ff)), /* value=0x3fff`fe000800 */ + MFX(0x00000208, "IA32_MTRR_PHYS_BASE4", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x4, 0, UINT64_C(0xffffc00000000ff8)), /* value=0xb0000000 */ + MFX(0x00000209, "IA32_MTRR_PHYS_MASK4", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x4, 0, UINT64_C(0xffffc000000007ff)), /* value=0x3fff`f0000800 */ + MFX(0x0000020a, "IA32_MTRR_PHYS_BASE5", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x5, 0, UINT64_C(0xffffc00000000ff8)), /* value=0xc0000000 */ + MFX(0x0000020b, "IA32_MTRR_PHYS_MASK5", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x5, 0, UINT64_C(0xffffc000000007ff)), /* value=0x3fff`c0000800 */ + MFX(0x0000020c, "IA32_MTRR_PHYS_BASE6", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x6, 0, UINT64_C(0xffffc00000000ff8)), /* value=0x0 */ + MFX(0x0000020d, "IA32_MTRR_PHYS_MASK6", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x6, 0, UINT64_C(0xffffc000000007ff)), /* value=0x0 */ + MFX(0x0000020e, "IA32_MTRR_PHYS_BASE7", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x7, 0, UINT64_C(0xffffc00000000ff8)), /* value=0x0 */ + MFX(0x0000020f, "IA32_MTRR_PHYS_MASK7", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x7, 0, UINT64_C(0xffffc000000007ff)), /* value=0x0 */ + MFX(0x00000210, "IA32_MTRR_PHYS_BASE8", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x8, 0, UINT64_C(0xffffc00000000ff8)), /* value=0x0 */ + MFX(0x00000211, "IA32_MTRR_PHYS_MASK8", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x8, 0, UINT64_C(0xffffc000000007ff)), /* value=0x0 */ + MFX(0x00000212, "IA32_MTRR_PHYS_BASE9", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x9, 0, UINT64_C(0xffffc00000000ff8)), /* value=0x0 */ + MFX(0x00000213, "IA32_MTRR_PHYS_MASK9", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x9, 0, UINT64_C(0xffffc000000007ff)), /* value=0x0 */ + MFS(0x00000250, "IA32_MTRR_FIX64K_00000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix64K_00000), + MFS(0x00000258, "IA32_MTRR_FIX16K_80000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_80000), + MFS(0x00000259, "IA32_MTRR_FIX16K_A0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_A0000), + MFS(0x00000268, "IA32_MTRR_FIX4K_C0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C0000), + MFS(0x00000269, "IA32_MTRR_FIX4K_C8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C8000), + MFS(0x0000026a, "IA32_MTRR_FIX4K_D0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D0000), + MFS(0x0000026b, "IA32_MTRR_FIX4K_D8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D8000), + MFS(0x0000026c, "IA32_MTRR_FIX4K_E0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E0000), + MFS(0x0000026d, "IA32_MTRR_FIX4K_E8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E8000), + MFS(0x0000026e, "IA32_MTRR_FIX4K_F0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F0000), + MFS(0x0000026f, "IA32_MTRR_FIX4K_F8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F8000), + MFS(0x00000277, "IA32_PAT", Ia32Pat, Ia32Pat, Guest.msrPAT), + RSN(0x00000280, 0x00000281, "IA32_MC0_CTLn", Ia32McNCtl2, Ia32McNCtl2, 0x0, 0, UINT64_C(0xffffffffbfff8000)), + MFX(0x00000282, "IA32_MC2_CTL2", Ia32McNCtl2, Ia32McNCtl2, 0x2, 0x40007fff, UINT64_C(0xffffffffbfff8000)), /* value=0x0 */ + MFX(0x00000283, "IA32_MC3_CTL2", Ia32McNCtl2, Ia32McNCtl2, 0x3, 0, UINT64_C(0xffffffffbfff8000)), /* value=0x40000001 */ + MFX(0x00000284, "IA32_MC4_CTL2", Ia32McNCtl2, Ia32McNCtl2, 0x4, 0x40007fff, UINT64_C(0xffffffffbfff8000)), /* value=0x0 */ + RSN(0x00000285, 0x00000287, "IA32_MC5_CTLn", Ia32McNCtl2, Ia32McNCtl2, 0x5, 0, UINT64_C(0xffffffffbfff8000)), + RSN(0x00000288, 0x0000028b, "IA32_MC8_CTLn", Ia32McNCtl2, Ia32McNCtl2, 0x8, 0x1, UINT64_C(0xffffffffbfff8000)), + RSN(0x0000028c, 0x00000291, "IA32_MC12_CTLn", Ia32McNCtl2, Ia32McNCtl2, 0xc, 0, UINT64_C(0xffffffffbfff8000)), + MVX(0x000002e0, "I7_SB_NO_EVICT_MODE", 0, 0, UINT64_C(0xfffffffffffffffc)), + MFZ(0x000002ff, "IA32_MTRR_DEF_TYPE", Ia32MtrrDefType, Ia32MtrrDefType, GuestMsrs.msr.MtrrDefType, 0, UINT64_C(0xfffffffffffff3f8)), + MVO(0x00000300, "I7_SB_UNK_0000_0300", UINT32_C(0x8000ff00)), + MVO(0x00000305, "I7_SB_UNK_0000_0305", 0), + RSN(0x00000309, 0x0000030b, "IA32_FIXED_CTRn", Ia32FixedCtrN, Ia32FixedCtrN, 0x0, 0, UINT64_C(0xffff000000000000)), + MFX(0x00000345, "IA32_PERF_CAPABILITIES", Ia32PerfCapabilities, ReadOnly, 0x31c3, 0, 0), /* value=0x31c3 */ + MFX(0x0000038d, "IA32_FIXED_CTR_CTRL", Ia32FixedCtrCtrl, Ia32FixedCtrCtrl, 0, 0, UINT64_C(0xfffffffffffff000)), /* value=0x0 */ + MFX(0x0000038e, "IA32_PERF_GLOBAL_STATUS", Ia32PerfGlobalStatus, ReadOnly, 0, 0, 0), /* value=0x0 */ + MFX(0x0000038f, "IA32_PERF_GLOBAL_CTRL", Ia32PerfGlobalCtrl, Ia32PerfGlobalCtrl, 0, 0, UINT64_C(0xfffffff8fffffff0)), /* value=0xf */ + MFX(0x00000390, "IA32_PERF_GLOBAL_OVF_CTRL", Ia32PerfGlobalOvfCtrl, Ia32PerfGlobalOvfCtrl, 0, UINT64_C(0xe00000070000000f), UINT64_C(0x1ffffff8fffffff0)), /* value=0x0 */ + MFX(0x0000039c, "I7_SB_MSR_PEBS_NUM_ALT", IntelI7SandyPebsNumAlt, IntelI7SandyPebsNumAlt, 0, 0, UINT64_C(0xfffffffffffffffe)), /* value=0x0 */ + MFX(0x000003f1, "IA32_PEBS_ENABLE", Ia32PebsEnable, Ia32PebsEnable, 0, 0, UINT64_C(0x7ffffff0fffffff0)), /* value=0x0 */ + MFX(0x000003f6, "I7_MSR_PEBS_LD_LAT", IntelI7PebsLdLat, IntelI7PebsLdLat, 0, UINT64_C(0xffffffffffff0000), 0), /* value=0xffff */ + MFX(0x000003f8, "I7_MSR_PKG_C3_RESIDENCY", IntelI7PkgCnResidencyN, ReadOnly, 0x3, 0, UINT64_MAX), /* value=0x0 */ + RSN(0x000003f9, 0x000003fa, "I7_MSR_PKG_Cn_RESIDENCY", IntelI7PkgCnResidencyN, ReadOnly, 0x6, 0, UINT64_MAX), + MFX(0x000003fc, "I7_MSR_CORE_C3_RESIDENCY", IntelI7CoreCnResidencyN, ReadOnly, 0x3, 0, UINT64_MAX), /* value=0x3f8f`5718a87c */ + RSN(0x000003fd, 0x000003fe, "I7_MSR_CORE_Cn_RESIDENCY", IntelI7CoreCnResidencyN, ReadOnly, 0x6, 0, UINT64_MAX), + RFN(0x00000400, 0x00000447, "IA32_MCi_CTL_STATUS_ADDR_MISC", Ia32McCtlStatusAddrMiscN, Ia32McCtlStatusAddrMiscN), + MFX(0x00000480, "IA32_VMX_BASIC", Ia32VmxBasic, ReadOnly, UINT64_C(0xda040000000010), 0, 0), /* value=0xda0400`00000010 */ + MFX(0x00000481, "IA32_VMX_PINBASED_CTLS", Ia32VmxPinbasedCtls, ReadOnly, UINT64_C(0x7f00000016), 0, 0), /* value=0x7f`00000016 */ + MFX(0x00000482, "IA32_VMX_PROCBASED_CTLS", Ia32VmxProcbasedCtls, ReadOnly, UINT64_C(0xfff9fffe0401e172), 0, 0), /* value=0xfff9fffe`0401e172 */ + MFX(0x00000483, "IA32_VMX_EXIT_CTLS", Ia32VmxExitCtls, ReadOnly, UINT64_C(0x7fffff00036dff), 0, 0), /* value=0x7fffff`00036dff */ + MFX(0x00000484, "IA32_VMX_ENTRY_CTLS", Ia32VmxEntryCtls, ReadOnly, UINT64_C(0xffff000011ff), 0, 0), /* value=0xffff`000011ff */ + MFX(0x00000485, "IA32_VMX_MISC", Ia32VmxMisc, ReadOnly, 0x100401e5, 0, 0), /* value=0x100401e5 */ + MFX(0x00000486, "IA32_VMX_CR0_FIXED0", Ia32VmxCr0Fixed0, ReadOnly, UINT32_C(0x80000021), 0, 0), /* value=0x80000021 */ + MFX(0x00000487, "IA32_VMX_CR0_FIXED1", Ia32VmxCr0Fixed1, ReadOnly, UINT32_MAX, 0, 0), /* value=0xffffffff */ + MFX(0x00000488, "IA32_VMX_CR4_FIXED0", Ia32VmxCr4Fixed0, ReadOnly, 0x2000, 0, 0), /* value=0x2000 */ + MFX(0x00000489, "IA32_VMX_CR4_FIXED1", Ia32VmxCr4Fixed1, ReadOnly, 0x627ff, 0, 0), /* value=0x627ff */ + MFX(0x0000048a, "IA32_VMX_VMCS_ENUM", Ia32VmxVmcsEnum, ReadOnly, 0x2a, 0, 0), /* value=0x2a */ + MFX(0x0000048b, "IA32_VMX_PROCBASED_CTLS2", Ia32VmxProcBasedCtls2, ReadOnly, UINT64_C(0x4ff00000000), 0, 0), /* value=0x4ff`00000000 */ + MFX(0x0000048c, "IA32_VMX_EPT_VPID_CAP", Ia32VmxEptVpidCap, ReadOnly, UINT64_C(0xf0106134141), 0, 0), /* value=0xf01`06134141 */ + MFX(0x0000048d, "IA32_VMX_TRUE_PINBASED_CTLS", Ia32VmxTruePinbasedCtls, ReadOnly, UINT64_C(0x7f00000016), 0, 0), /* value=0x7f`00000016 */ + MFX(0x0000048e, "IA32_VMX_TRUE_PROCBASED_CTLS", Ia32VmxTrueProcbasedCtls, ReadOnly, UINT64_C(0xfff9fffe04006172), 0, 0), /* value=0xfff9fffe`04006172 */ + MFX(0x0000048f, "IA32_VMX_TRUE_EXIT_CTLS", Ia32VmxTrueExitCtls, ReadOnly, UINT64_C(0x7fffff00036dfb), 0, 0), /* value=0x7fffff`00036dfb */ + MFX(0x00000490, "IA32_VMX_TRUE_ENTRY_CTLS", Ia32VmxTrueEntryCtls, ReadOnly, UINT64_C(0xffff000011fb), 0, 0), /* value=0xffff`000011fb */ + RSN(0x000004c1, 0x000004c4, "IA32_A_PMCn", Ia32PmcN, Ia32PmcN, 0x0, 0, UINT64_C(0xffff000000000000)), + MVO(0x00000502, "I7_SB_UNK_0000_0502", 0), + MFN(0x00000600, "IA32_DS_AREA", Ia32DsArea, Ia32DsArea), /* value=0x0 */ + MFX(0x00000601, "I7_SB_MSR_VR_CURRENT_CONFIG", IntelI7SandyVrCurrentConfig, IntelI7SandyVrCurrentConfig, 0, UINT32_C(0x80001fff), 0x7fffe000), /* value=0x141494`80000640 */ + MFX(0x00000603, "I7_SB_MSR_VR_MISC_CONFIG", IntelI7SandyVrMiscConfig, IntelI7SandyVrMiscConfig, 0, UINT32_C(0x80ffffff), UINT64_C(0xffffffff7f000000)), /* value=0x80151515 */ + MFO(0x00000606, "I7_SB_MSR_RAPL_POWER_UNIT", IntelI7SandyRaplPowerUnit), /* value=0xa1003 */ + MFX(0x0000060a, "I7_SB_MSR_PKGC3_IRTL", IntelI7SandyPkgCnIrtlN, IntelI7SandyPkgCnIrtlN, 0x3, 0, UINT64_C(0xffffffffffff6000)), /* value=0x0 */ + RSN(0x0000060b, 0x0000060c, "I7_SB_MSR_PKGC6_IRTn", IntelI7SandyPkgCnIrtlN, IntelI7SandyPkgCnIrtlN, 0x6, 0, UINT64_C(0xffffffffffff6000)), + MFO(0x0000060d, "I7_SB_MSR_PKG_C2_RESIDENCY", IntelI7SandyPkgC2Residency), /* value=0x0 */ + MFX(0x00000610, "I7_SB_MSR_PKG_POWER_LIMIT", IntelI7RaplPkgPowerLimit, IntelI7RaplPkgPowerLimit, 0, UINT64_C(0x80ffffff00ffffff), UINT64_C(0x7f000000ff000000)), /* value=0x80068960`005affff */ + MFO(0x00000611, "I7_SB_MSR_PKG_ENERGY_STATUS", IntelI7RaplPkgEnergyStatus), /* value=0xc120ff02 */ + MFO(0x00000613, "I7_SB_MSR_PKG_PERF_STATUS", IntelI7RaplPkgPerfStatus), /* value=0x0 */ + MFO(0x00000614, "I7_SB_MSR_PKG_POWER_INFO", IntelI7RaplPkgPowerInfo), /* value=0x1a80410 */ + MFX(0x00000618, "I7_SB_MSR_DRAM_POWER_LIMIT", IntelI7RaplDramPowerLimit, IntelI7RaplDramPowerLimit, 0, UINT32_C(0x80feffff), UINT64_C(0xffffffff7f010000)), /* value=0x80000000 */ + MFO(0x00000619, "I7_SB_MSR_DRAM_ENERGY_STATUS", IntelI7RaplDramEnergyStatus), /* value=0x0 */ + MFO(0x0000061b, "I7_SB_MSR_DRAM_PERF_STATUS", IntelI7RaplDramPerfStatus), /* value=0x0 */ + MFO(0x0000061c, "I7_SB_MSR_DRAM_POWER_INFO", IntelI7RaplDramPowerInfo), /* value=0x280258`00780118 */ + MFX(0x00000638, "I7_SB_MSR_PP0_POWER_LIMIT", IntelI7RaplPp0PowerLimit, IntelI7RaplPp0PowerLimit, 0, UINT32_C(0x80ffffff), UINT64_C(0xffffffff7f000000)), /* value=0x80000000 */ + MFO(0x00000639, "I7_SB_MSR_PP0_ENERGY_STATUS", IntelI7RaplPp0EnergyStatus), /* value=0x448bc04 */ + MFX(0x0000063a, "I7_SB_MSR_PP0_POLICY", IntelI7RaplPp0Policy, IntelI7RaplPp0Policy, 0, 0, UINT64_C(0xffffffffffffffe0)), /* value=0x0 */ + MFO(0x0000063b, "I7_SB_MSR_PP0_PERF_STATUS", IntelI7RaplPp0PerfStatus), /* value=0x0 */ + RFN(0x00000680, 0x0000068f, "MSR_LASTBRANCH_n_FROM_IP", IntelLastBranchFromN, IntelLastBranchFromN), + RFN(0x000006c0, 0x000006cf, "MSR_LASTBRANCH_n_TO_IP", IntelLastBranchFromN, IntelLastBranchFromN), + MFI(0x000006e0, "IA32_TSC_DEADLINE", Ia32TscDeadline), /* value=0x0 */ + MVX(0x00000a00, "I7_SB_UNK_0000_0a00", 0, 0, UINT64_C(0xfffffffffffffec0)), + MVX(0x00000a01, "I7_SB_UNK_0000_0a01", 0x178fa000, 0, UINT64_C(0xffffffff00000f80)), + MVX(0x00000a02, "I7_SB_UNK_0000_0a02", 0, 0, UINT64_C(0xffffffff20002000)), + MVX(0x00000c00, "I7_SB_UNK_0000_0c00", 0, 0, UINT64_C(0xffffffffbfffff00)), + MVX(0x00000c01, "I7_SB_UNK_0000_0c01", 0, 0x9229fe7, UINT64_C(0xfffffffff6dd6018)), + MVO(0x00000c06, "I7_SB_UNK_0000_0c06", 0x6), + MVX(0x00000c08, "I7_SB_UNK_0000_0c08", 0, 0, UINT64_C(0xffffffffffafffff)), + MVX(0x00000c09, "I7_SB_UNK_0000_0c09", 0x301a, 0, UINT64_C(0xffff000000000000)), + MVX(0x00000c10, "I7_SB_UNK_0000_0c10", 0, 0x20000, UINT64_C(0xffffffffe0210000)), + MVX(0x00000c11, "I7_SB_UNK_0000_0c11", 0, 0x20000, UINT64_C(0xffffffffe0210000)), + MVX(0x00000c14, "I7_SB_UNK_0000_0c14", 0, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000c15, "I7_SB_UNK_0000_0c15", 0, 0x3, UINT64_C(0xfffffffffffffffc)), + MVX(0x00000c16, "I7_SB_UNK_0000_0c16", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000c17, "I7_SB_UNK_0000_0c17", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000c24, "I7_SB_UNK_0000_0c24", 0, 0x3, UINT64_C(0xfffffffffffcfefc)), + MVX(0x00000c30, "I7_SB_UNK_0000_0c30", 0, 0x20000, UINT64_C(0xffffffff20013f00)), + MVX(0x00000c31, "I7_SB_UNK_0000_0c31", 0, 0x20000, UINT64_C(0xffffffff20013f00)), + MVX(0x00000c32, "I7_SB_UNK_0000_0c32", 0, 0x20000, UINT64_C(0xffffffff20013f00)), + MVX(0x00000c33, "I7_SB_UNK_0000_0c33", 0, 0x20000, UINT64_C(0xffffffff20013f00)), + MVX(0x00000c34, "I7_SB_UNK_0000_0c34", 0, 0, ~(uint64_t)UINT32_MAX), + MVX(0x00000c35, "I7_SB_UNK_0000_0c35", 0, 0x7f, UINT64_C(0xffffffffffffff80)), + MVX(0x00000c36, "I7_SB_UNK_0000_0c36", 0x203, 0, UINT64_C(0xffff000000000000)), + MVX(0x00000c37, "I7_SB_UNK_0000_0c37", 0x203, 0, UINT64_C(0xffff000000000000)), + MVX(0x00000c38, "I7_SB_UNK_0000_0c38", 0x20c, 0, UINT64_C(0xffff000000000000)), + MVX(0x00000c39, "I7_SB_UNK_0000_0c39", 0x203, 0, UINT64_C(0xffff000000000000)), + MVX(0x00000d04, "I7_SB_UNK_0000_0d04", 0, 0x3, UINT64_C(0xfffffffffffcfefc)), + MVX(0x00000d10, "I7_SB_UNK_0000_0d10", 0, 0x30000, UINT64_C(0xffffffff00200000)), + MVX(0x00000d11, "I7_SB_UNK_0000_0d11", 0, 0x30000, UINT64_C(0xffffffff00200000)), + MVX(0x00000d12, "I7_SB_UNK_0000_0d12", 0, 0x30000, UINT64_C(0xffffffff00200000)), + MVX(0x00000d13, "I7_SB_UNK_0000_0d13", 0, 0x30000, UINT64_C(0xffffffff00200000)), + MVX(0x00000d14, "I7_SB_UNK_0000_0d14", 0x20, 0, UINT64_C(0xffffffff00000300)), + MVX(0x00000d15, "I7_SB_UNK_0000_0d15", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000d16, "I7_SB_UNK_0000_0d16", 0x81c, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000d17, "I7_SB_UNK_0000_0d17", 0x80c, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000d18, "I7_SB_UNK_0000_0d18", 0x80c, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000d19, "I7_SB_UNK_0000_0d19", 0x810, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000d24, "I7_SB_UNK_0000_0d24", 0, 0x3, UINT64_C(0xfffffffffffcfefc)), + MVX(0x00000d30, "I7_SB_UNK_0000_0d30", 0, 0x30000, UINT64_C(0xffffffff00200000)), + MVX(0x00000d31, "I7_SB_UNK_0000_0d31", 0, 0x30000, UINT64_C(0xffffffff00200000)), + MVX(0x00000d32, "I7_SB_UNK_0000_0d32", 0, 0x30000, UINT64_C(0xffffffff00200000)), + MVX(0x00000d33, "I7_SB_UNK_0000_0d33", 0, 0x30000, UINT64_C(0xffffffff00200000)), + MVX(0x00000d34, "I7_SB_UNK_0000_0d34", 0x20, 0, UINT64_C(0xffffffff00000300)), + MVX(0x00000d35, "I7_SB_UNK_0000_0d35", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000d36, "I7_SB_UNK_0000_0d36", 0x864, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000d37, "I7_SB_UNK_0000_0d37", 0x804, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000d38, "I7_SB_UNK_0000_0d38", 0x822, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000d39, "I7_SB_UNK_0000_0d39", 0x81c, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000d44, "I7_SB_UNK_0000_0d44", 0, 0x3, UINT64_C(0xfffffffffffcfefc)), + MVX(0x00000d50, "I7_SB_UNK_0000_0d50", 0, 0x30000, UINT64_C(0xffffffff00200000)), + MVX(0x00000d51, "I7_SB_UNK_0000_0d51", 0, 0x30000, UINT64_C(0xffffffff00200000)), + MVX(0x00000d52, "I7_SB_UNK_0000_0d52", 0, 0x30000, UINT64_C(0xffffffff00200000)), + MVX(0x00000d53, "I7_SB_UNK_0000_0d53", 0, 0x30000, UINT64_C(0xffffffff00200000)), + MVX(0x00000d54, "I7_SB_UNK_0000_0d54", 0x20, 0, UINT64_C(0xffffffff00000300)), + MVX(0x00000d55, "I7_SB_UNK_0000_0d55", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000d56, "I7_SB_UNK_0000_0d56", 0x848, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000d57, "I7_SB_UNK_0000_0d57", 0x866, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000d58, "I7_SB_UNK_0000_0d58", 0x83c, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000d59, "I7_SB_UNK_0000_0d59", 0x83c, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000d64, "I7_SB_UNK_0000_0d64", 0, 0x3, UINT64_C(0xfffffffffffcfefc)), + MVX(0x00000d70, "I7_SB_UNK_0000_0d70", 0, 0x30000, UINT64_C(0xffffffff00200000)), + MVX(0x00000d71, "I7_SB_UNK_0000_0d71", 0, 0x30000, UINT64_C(0xffffffff00200000)), + MVX(0x00000d72, "I7_SB_UNK_0000_0d72", 0, 0x30000, UINT64_C(0xffffffff00200000)), + MVX(0x00000d73, "I7_SB_UNK_0000_0d73", 0, 0x30000, UINT64_C(0xffffffff00200000)), + MVX(0x00000d74, "I7_SB_UNK_0000_0d74", 0x20, 0, UINT64_C(0xffffffff00000300)), + MVX(0x00000d75, "I7_SB_UNK_0000_0d75", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000d76, "I7_SB_UNK_0000_0d76", 0x846, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000d77, "I7_SB_UNK_0000_0d77", 0x90c, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000d78, "I7_SB_UNK_0000_0d78", 0x846, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000d79, "I7_SB_UNK_0000_0d79", 0x842, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000d84, "I7_SB_UNK_0000_0d84", 0, 0x3, UINT64_C(0xfffffffffffcfefc)), + MVX(0x00000d90, "I7_SB_UNK_0000_0d90", 0, 0x30000, UINT64_C(0xffffffff00200000)), + MVX(0x00000d91, "I7_SB_UNK_0000_0d91", 0, 0x30000, UINT64_C(0xffffffff00200000)), + MVX(0x00000d92, "I7_SB_UNK_0000_0d92", 0, 0x30000, UINT64_C(0xffffffff00200000)), + MVX(0x00000d93, "I7_SB_UNK_0000_0d93", 0, 0x30000, UINT64_C(0xffffffff00200000)), + MVX(0x00000d94, "I7_SB_UNK_0000_0d94", 0x20, 0, UINT64_C(0xffffffff00000300)), + MVX(0x00000d95, "I7_SB_UNK_0000_0d95", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000d96, "I7_SB_UNK_0000_0d96", 0x8c6, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000d97, "I7_SB_UNK_0000_0d97", 0x840, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000d98, "I7_SB_UNK_0000_0d98", 0x81a, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000d99, "I7_SB_UNK_0000_0d99", 0x910, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000da4, "I7_SB_UNK_0000_0da4", 0, 0x3, UINT64_C(0xfffffffffffcfefc)), + MVX(0x00000db0, "I7_SB_UNK_0000_0db0", 0, 0x30000, UINT64_C(0xffffffff00200000)), + MVX(0x00000db1, "I7_SB_UNK_0000_0db1", 0, 0x30000, UINT64_C(0xffffffff00200000)), + MVX(0x00000db2, "I7_SB_UNK_0000_0db2", 0, 0x30000, UINT64_C(0xffffffff00200000)), + MVX(0x00000db3, "I7_SB_UNK_0000_0db3", 0, 0x30000, UINT64_C(0xffffffff00200000)), + MVX(0x00000db4, "I7_SB_UNK_0000_0db4", 0x20, 0, UINT64_C(0xffffffff00000300)), + MVX(0x00000db5, "I7_SB_UNK_0000_0db5", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000db6, "I7_SB_UNK_0000_0db6", 0x80c, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000db7, "I7_SB_UNK_0000_0db7", 0x81e, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000db8, "I7_SB_UNK_0000_0db8", 0x810, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000db9, "I7_SB_UNK_0000_0db9", 0x80a, 0, UINT64_C(0xfffff00000000000)), + MFX(0xc0000080, "AMD64_EFER", Amd64Efer, Amd64Efer, 0xd01, 0x400, UINT64_C(0xfffffffffffff2fe)), + MFN(0xc0000081, "AMD64_STAR", Amd64SyscallTarget, Amd64SyscallTarget), /* value=0x230010`00000000 */ + MFN(0xc0000082, "AMD64_STAR64", Amd64LongSyscallTarget, Amd64LongSyscallTarget), /* value=0xfffff800`030dac00 */ + MFN(0xc0000083, "AMD64_STARCOMPAT", Amd64CompSyscallTarget, Amd64CompSyscallTarget), /* value=0xfffff800`030da940 */ + MFX(0xc0000084, "AMD64_SYSCALL_FLAG_MASK", Amd64SyscallFlagMask, Amd64SyscallFlagMask, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x4700 */ + MFN(0xc0000100, "AMD64_FS_BASE", Amd64FsBase, Amd64FsBase), /* value=0xfffe0000 */ + MFN(0xc0000101, "AMD64_GS_BASE", Amd64GsBase, Amd64GsBase), /* value=0xfffff880`061e6000 */ + MFN(0xc0000102, "AMD64_KERNEL_GS_BASE", Amd64KernelGsBase, Amd64KernelGsBase), /* value=0x7ff`fffde000 */ + MFX(0xc0000103, "AMD64_TSC_AUX", Amd64TscAux, Amd64TscAux, 0, 0, ~(uint64_t)UINT32_MAX), /* value=0x0 */ +}; +#endif /* !CPUM_DB_STANDALONE */ + + +/** + * Database entry for Intel(R) Core(TM) i7-3960X CPU @ 3.30GHz. + */ +static CPUMDBENTRY const g_Entry_Intel_Core_i7_3960X = +{ + /*.pszName = */ "Intel Core i7-3960X", + /*.pszFullName = */ "Intel(R) Core(TM) i7-3960X CPU @ 3.30GHz", + /*.enmVendor = */ CPUMCPUVENDOR_INTEL, + /*.uFamily = */ 6, + /*.uModel = */ 45, + /*.uStepping = */ 6, + /*.enmMicroarch = */ kCpumMicroarch_Intel_Core7_SandyBridge, + /*.uScalableBusFreq = */ CPUM_SBUSFREQ_100MHZ, + /*.fFlags = */ 0, + /*.cMaxPhysAddrWidth= */ 46, + /*.fMxCsrMask = */ 0xffff, + /*.paCpuIdLeaves = */ NULL_ALONE(g_aCpuIdLeaves_Intel_Core_i7_3960X), + /*.cCpuIdLeaves = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_Core_i7_3960X)), + /*.enmUnknownCpuId = */ CPUMUNKNOWNCPUID_LAST_STD_LEAF_WITH_ECX, + /*.DefUnknownCpuId = */ { 0x00000007, 0x00000340, 0x00000340, 0x00000000 }, + /*.fMsrMask = */ UINT32_MAX, + /*.cMsrRanges = */ ZERO_ALONE(RT_ELEMENTS(g_aMsrRanges_Intel_Core_i7_3960X)), + /*.paMsrRanges = */ NULL_ALONE(g_aMsrRanges_Intel_Core_i7_3960X), +}; + +#endif /* !VBOX_CPUDB_Intel_Core_i7_3960X_h */ + diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_5600U.h b/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_5600U.h new file mode 100644 index 00000000..a3145d2f --- /dev/null +++ b/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_5600U.h @@ -0,0 +1,368 @@ +/* $Id: Intel_Core_i7_5600U.h $ */ +/** @file + * CPU database entry "Intel Core i7-5600U". + * Generated at 2015-11-04T14:14:27Z by VBoxCpuReport v5.0.51r103906 on win.amd64. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VBOX_CPUDB_Intel_Core_i7_5600U_h +#define VBOX_CPUDB_Intel_Core_i7_5600U_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + + +#ifndef CPUM_DB_STANDALONE +/** + * CPUID leaves for Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz. + */ +static CPUMCPUIDLEAF const g_aCpuIdLeaves_Intel_Core_i7_5600U[] = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x00000014, 0x756e6547, 0x6c65746e, 0x49656e69, 0 }, + { 0x00000001, 0x00000000, 0x00000000, 0x000306d4, 0x00100800, 0x7ffafbff, 0xbfebfbff, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID | CPUMCPUIDLEAF_F_CONTAINS_APIC }, + { 0x00000002, 0x00000000, 0x00000000, 0x76036301, 0x00f0b5ff, 0x00000000, 0x00c30000, 0 }, + { 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000004, 0x00000000, UINT32_MAX, 0x1c004121, 0x01c0003f, 0x0000003f, 0x00000000, 0 }, + { 0x00000004, 0x00000001, UINT32_MAX, 0x1c004122, 0x01c0003f, 0x0000003f, 0x00000000, 0 }, + { 0x00000004, 0x00000002, UINT32_MAX, 0x1c004143, 0x01c0003f, 0x000001ff, 0x00000000, 0 }, + { 0x00000004, 0x00000003, UINT32_MAX, 0x1c03c163, 0x03c0003f, 0x00000fff, 0x00000006, 0 }, + { 0x00000004, 0x00000004, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000005, 0x00000000, 0x00000000, 0x00000040, 0x00000040, 0x00000003, 0x11142120, 0 }, + { 0x00000006, 0x00000000, 0x00000000, 0x00000077, 0x00000002, 0x00000009, 0x00000000, 0 }, + { 0x00000007, 0x00000000, UINT32_MAX, 0x00000000, 0x021c2fbb, 0x00000000, 0x00000000, 0 }, + { 0x00000007, 0x00000001, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000009, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000a, 0x00000000, 0x00000000, 0x07300403, 0x00000000, 0x00000000, 0x00000603, 0 }, + { 0x0000000b, 0x00000000, UINT32_MAX, 0x00000001, 0x00000002, 0x00000100, 0x00000000, 0 | CPUMCPUIDLEAF_F_INTEL_TOPOLOGY_SUBLEAVES | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID }, + { 0x0000000b, 0x00000001, UINT32_MAX, 0x00000004, 0x00000004, 0x00000201, 0x00000000, 0 | CPUMCPUIDLEAF_F_INTEL_TOPOLOGY_SUBLEAVES | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID }, + { 0x0000000b, 0x00000002, UINT32_MAX, 0x00000000, 0x00000000, 0x00000002, 0x00000000, 0 | CPUMCPUIDLEAF_F_INTEL_TOPOLOGY_SUBLEAVES | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID }, + { 0x0000000c, 0x00000000, UINT32_MAX, 0x00000000, 0x00000001, 0x00000001, 0x00000000, 0 }, + { 0x0000000c, 0x00000001, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000d, 0x00000000, UINT32_MAX, 0x00000007, 0x00000340, 0x00000340, 0x00000000, 0 }, + { 0x0000000d, 0x00000001, UINT32_MAX, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000d, 0x00000002, UINT32_MAX, 0x00000100, 0x00000240, 0x00000000, 0x00000000, 0 }, + { 0x0000000d, 0x00000003, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000010, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000011, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000012, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000013, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000014, 0x00000000, UINT32_MAX, 0x00000000, 0x00000001, 0x00000001, 0x00000000, 0 }, + { 0x00000014, 0x00000001, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000000, 0x00000000, 0x00000000, 0x80000008, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000121, 0x2c100800, 0 }, + { 0x80000002, 0x00000000, 0x00000000, 0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865, 0 }, + { 0x80000003, 0x00000000, 0x00000000, 0x37692029, 0x3036352d, 0x43205530, 0x40205550, 0 }, + { 0x80000004, 0x00000000, 0x00000000, 0x362e3220, 0x7a484730, 0x00000000, 0x00000000, 0 }, + { 0x80000005, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x01006040, 0x00000000, 0 }, + { 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000100, 0 }, + { 0x80000008, 0x00000000, 0x00000000, 0x00003027, 0x00000000, 0x00000000, 0x00000000, 0 }, +}; +#endif /* !CPUM_DB_STANDALONE */ + + +#ifndef CPUM_DB_STANDALONE +/** + * MSR ranges for Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz. + */ +static CPUMMSRRANGE const g_aMsrRanges_Intel_Core_i7_5600U[] = +{ + MFX(0x00000000, "IA32_P5_MC_ADDR", Ia32P5McAddr, Ia32P5McAddr, 0, UINT64_C(0xffffffffffffff00), 0), /* value=0xff */ + MFX(0x00000001, "IA32_P5_MC_TYPE", Ia32P5McType, Ia32P5McType, 0, 0, UINT64_MAX), /* value=0x0 */ + MFX(0x00000006, "IA32_MONITOR_FILTER_LINE_SIZE", Ia32MonitorFilterLineSize, Ia32MonitorFilterLineSize, 0, 0, UINT64_C(0xffffffffffff0000)), /* value=0x40 */ + MFN(0x00000010, "IA32_TIME_STAMP_COUNTER", Ia32TimestampCounter, Ia32TimestampCounter), /* value=0x1c93`50dd535c */ + MFX(0x00000017, "IA32_PLATFORM_ID", Ia32PlatformId, ReadOnly, UINT64_C(0x18000000000000), 0, 0), /* value=0x180000`00000000 */ + MFX(0x0000001b, "IA32_APIC_BASE", Ia32ApicBase, Ia32ApicBase, UINT32_C(0xfee00900), 0, UINT64_C(0xffffff80000002ff)), + MFX(0x0000002a, "EBL_CR_POWERON", IntelEblCrPowerOn, ReadOnly, 0, 0, 0), /* value=0x0 */ + MVX(0x0000002e, "I7_UNK_0000_002e", 0, 0x400, UINT64_C(0xfffffffffffffbff)), + MVX(0x00000033, "TEST_CTL", 0, 0, UINT64_C(0xffffffff7fffffff)), + MVO(0x00000034, "P6_UNK_0000_0034", 0x97b), + MFO(0x00000035, "MSR_CORE_THREAD_COUNT", IntelI7CoreThreadCount), /* value=0x20004 */ + MFO(0x0000003a, "IA32_FEATURE_CONTROL", Ia32FeatureControl), /* value=0x5 */ + MVX(0x0000003b, "P6_UNK_0000_003b", UINT64_C(0xfffffffffffffffe), 0, 0), + MVX(0x0000003e, "I7_UNK_0000_003e", 0x1, 0, UINT64_C(0xfffffffffffffffe)), + MFN(0x00000079, "IA32_BIOS_UPDT_TRIG", WriteOnly, IgnoreWrite), + MFX(0x0000008b, "BBL_CR_D3|BIOS_SIGN", Ia32BiosSignId, Ia32BiosSignId, 0, 0, UINT32_MAX), /* value=0x1f`00000000 */ + MVX(0x00000095, "TODO_0000_0095", 0, 0, UINT64_C(0xfffffffffffffffe)), + MFO(0x0000009b, "IA32_SMM_MONITOR_CTL", Ia32SmmMonitorCtl), /* value=0x0 */ + RSN(0x000000c1, 0x000000c4, "IA32_PMCn", Ia32PmcN, Ia32PmcN, 0x0, ~(uint64_t)UINT32_MAX, 0), + MFO(0x000000ce, "IA32_PLATFORM_INFO", IntelPlatformInfo), /* value=0x5053b`f3011a00 */ + MFX(0x000000e2, "MSR_PKG_CST_CONFIG_CONTROL", IntelPkgCStConfigControl, IntelPkgCStConfigControl, 0, 0, UINT64_C(0xffffffff01ffffff)), /* value=0x1e008408 */ + MFX(0x000000e3, "C2_SMM_CST_MISC_INFO", IntelCore2SmmCStMiscInfo, IntelCore2SmmCStMiscInfo, 0, UINT32_C(0xffff7000), ~(uint64_t)UINT32_MAX), /* value=0x8b800000 */ + MFX(0x000000e4, "MSR_PMG_IO_CAPTURE_BASE", IntelPmgIoCaptureBase, IntelPmgIoCaptureBase, 0, 0, UINT64_C(0xfffffffffff80000)), /* value=0x51814 */ + MFN(0x000000e7, "IA32_MPERF", Ia32MPerf, Ia32MPerf), /* value=0x23c`764b31c5 */ + MFN(0x000000e8, "IA32_APERF", Ia32APerf, Ia32APerf), /* value=0x2af`f518152c */ + MFX(0x000000fe, "IA32_MTRRCAP", Ia32MtrrCap, ReadOnly, 0xd0a, 0, 0), /* value=0xd0a */ + MVX(0x00000102, "I7_IB_UNK_0000_0102", 0, 0, UINT64_C(0xffffffff7fff8000)), + MVX(0x00000103, "I7_IB_UNK_0000_0103", 0, 0, UINT64_C(0xfffffffffffff000)), + MVX(0x00000104, "I7_IB_UNK_0000_0104", 0, 0, UINT64_C(0xfffffffffffffffe)), + MVO(0x00000110, "TODO_0000_0110", 0x3), + MVX(0x0000011f, "TODO_0000_011f", 0, 0, UINT64_C(0xffffffffffffff00)), + MVO(0x0000013a, "TODO_0000_013a", UINT64_C(0x30000007f)), + MFO(0x0000013c, "I7_SB_AES_NI_CTL", IntelI7SandyAesNiCtl), /* value=0x1 */ + MVX(0x00000140, "I7_IB_UNK_0000_0140", 0, 0, UINT64_C(0xfffffffffffffffe)), + MVX(0x00000142, "I7_IB_UNK_0000_0142", 0, 0, UINT64_C(0xfffffffffffffffe)), + MVX(0x00000150, "P6_UNK_0000_0150", 0, UINT64_C(0x8000ffffffffffff), UINT64_C(0x7fff000000000000)), + MFX(0x00000174, "IA32_SYSENTER_CS", Ia32SysEnterCs, Ia32SysEnterCs, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFN(0x00000175, "IA32_SYSENTER_ESP", Ia32SysEnterEsp, Ia32SysEnterEsp), /* value=0x0 */ + MFN(0x00000176, "IA32_SYSENTER_EIP", Ia32SysEnterEip, Ia32SysEnterEip), /* value=0x0 */ + MVX(0x00000178, "TODO_0000_0178", 0, 0, UINT64_C(0xfffffffffffffffc)), + MFX(0x00000179, "IA32_MCG_CAP", Ia32McgCap, ReadOnly, 0x1000c07, 0, 0), /* value=0x1000c07 */ + MFX(0x0000017a, "IA32_MCG_STATUS", Ia32McgStatus, Ia32McgStatus, 0, 0, UINT64_C(0xfffffffffffffff8)), /* value=0x0 */ + RSN(0x00000186, 0x00000187, "IA32_PERFEVTSELn", Ia32PerfEvtSelN, Ia32PerfEvtSelN, 0x0, 0, UINT64_C(0xfffffffe00080000)), /* XXX: The range ended earlier than expected! */ + MVX(0x00000188, "IA32_PERFEVTSEL2", 0, 0, UINT64_C(0xfffffffc00080000)), + MVX(0x00000189, "IA32_PERFEVTSEL3", 0, 0, UINT64_C(0xfffffffe00080000)), + MFX(0x00000194, "CLOCK_FLEX_MAX", IntelFlexRatio, IntelFlexRatio, 0x90000, 0xe0000, UINT64_C(0xffffffffffe00000)), /* value=0x90000 */ + MFX(0x00000198, "IA32_PERF_STATUS", Ia32PerfStatus, ReadOnly, UINT64_C(0x273c00002000), 0, 0), /* value=0x273c`00002000 */ + MFX(0x00000199, "IA32_PERF_CTL", Ia32PerfCtl, Ia32PerfCtl, 0x2000, 0, 0), /* Might bite. value=0x2000 */ + MFX(0x0000019a, "IA32_CLOCK_MODULATION", Ia32ClockModulation, Ia32ClockModulation, 0, 0, UINT64_C(0xffffffffffffffe0)), /* value=0x0 */ + MFX(0x0000019b, "IA32_THERM_INTERRUPT", Ia32ThermInterrupt, Ia32ThermInterrupt, 0x10, 0, UINT64_C(0xfffffffffe0000e8)), /* value=0x10 */ + MFX(0x0000019c, "IA32_THERM_STATUS", Ia32ThermStatus, Ia32ThermStatus, UINT32_C(0x88150800), UINT32_C(0xf87f07fd), UINT64_C(0xffffffff0780f000)), /* value=0x88150800 */ + MFX(0x0000019d, "IA32_THERM2_CTL", Ia32Therm2Ctl, ReadOnly, 0, 0, 0), /* value=0x0 */ +/// @todo WARNING: IA32_MISC_ENABLE probing needs hacking on this CPU! + MFX(0x000001a0, "IA32_MISC_ENABLE", Ia32MiscEnable, Ia32MiscEnable, 0x850089, 0x1080, UINT64_C(0xffffffbbff3aef72)), /* value=0x850089 */ + MVO(0x000001a1, "P6_UNK_0000_01a1", 0x995), + MFX(0x000001a2, "I7_MSR_TEMPERATURE_TARGET", IntelI7TemperatureTarget, IntelI7TemperatureTarget, 0x5690000, 0xffff00, UINT64_C(0xffffffffc00000ff)), /* value=0x5690000 */ + MVX(0x000001a4, "I7_UNK_0000_01a4", 0, 0, UINT64_C(0xfffffffffffff7f0)), + RSN(0x000001a6, 0x000001a7, "I7_MSR_OFFCORE_RSP_n", IntelI7MsrOffCoreResponseN, IntelI7MsrOffCoreResponseN, 0x0, 0, UINT64_C(0xffffffc000007000)), + MVX(0x000001a8, "I7_UNK_0000_01a8", 0, 0, UINT64_C(0xfffffffffffffffc)), + MFX(0x000001aa, "MSR_MISC_PWR_MGMT", IntelI7MiscPwrMgmt, IntelI7MiscPwrMgmt, 0, 0, UINT64_C(0xffffffffffbffffe)), /* value=0x400000 */ + MFX(0x000001ad, "I7_MSR_TURBO_RATIO_LIMIT", IntelI7TurboRatioLimit, IntelI7TurboRatioLimit, UINT64_C(0x1f1f1f1f1f20), UINT64_MAX, 0), /* value=0x1f1f`1f1f1f20 */ + MVX(0x000001b0, "IA32_ENERGY_PERF_BIAS", 0x6, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x000001b1, "IA32_PACKAGE_THERM_STATUS", UINT32_C(0x880d0802), UINT32_C(0xf87f07fd), UINT64_C(0xffffffff0780f000)), + MVX(0x000001b2, "IA32_PACKAGE_THERM_INTERRUPT", 0, 0, UINT64_C(0xfffffffffe0000e8)), + MVO(0x000001c6, "I7_UNK_0000_01c6", 0x3), + MFX(0x000001c8, "MSR_LBR_SELECT", IntelI7LbrSelect, IntelI7LbrSelect, 0, 0, UINT64_C(0xfffffffffffffc00)), /* value=0x0 */ + MFX(0x000001c9, "MSR_LASTBRANCH_TOS", IntelLastBranchTos, IntelLastBranchTos, 0, 0, UINT64_C(0xfffffffffffffff0)), /* value=0x0 */ + MFX(0x000001d9, "IA32_DEBUGCTL", Ia32DebugCtl, Ia32DebugCtl, 0, 0, UINT64_C(0xffffffffffff003c)), /* value=0x0 */ + MFO(0x000001db, "P6_LAST_BRANCH_FROM_IP", P6LastBranchFromIp), /* value=0x0 */ + MFO(0x000001dc, "P6_LAST_BRANCH_TO_IP", P6LastBranchToIp), /* value=0x0 */ + MFX(0x000001dd, "P6_LAST_INT_FROM_IP", P6LastIntFromIp, P6LastIntFromIp, 0, 0, UINT64_C(0x1fff800000000000)), /* value=0x0 */ + MFN(0x000001de, "P6_LAST_INT_TO_IP", P6LastIntToIp, P6LastIntToIp), /* value=0x0 */ + MFO(0x000001f0, "I7_VLW_CAPABILITY", IntelI7VirtualLegacyWireCap), /* value=0x74 */ + MFO(0x000001f2, "IA32_SMRR_PHYSBASE", Ia32SmrrPhysBase), /* value=0xdc000006 */ + MFO(0x000001f3, "IA32_SMRR_PHYSMASK", Ia32SmrrPhysMask), /* value=0xff000800 */ + MFX(0x000001fc, "I7_MSR_POWER_CTL", IntelI7PowerCtl, IntelI7PowerCtl, 0, UINT32_C(0x80000020), UINT64_C(0xffffffff3e100000)), /* value=0x4005f */ + MFX(0x00000200, "IA32_MTRR_PHYS_BASE0", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x0, 0, UINT64_C(0xffffff8000000ff8)), /* value=0x6 */ + MFX(0x00000201, "IA32_MTRR_PHYS_MASK0", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x0, 0, UINT64_C(0xffffff80000007ff)), /* value=0x7e`00000800 */ + MFX(0x00000202, "IA32_MTRR_PHYS_BASE1", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x1, 0, UINT64_C(0xffffff8000000ff8)), /* value=0x2`00000006 */ + MFX(0x00000203, "IA32_MTRR_PHYS_MASK1", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x1, 0, UINT64_C(0xffffff80000007ff)), /* value=0x7f`f0000800 */ + MFX(0x00000204, "IA32_MTRR_PHYS_BASE2", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x2, 0, UINT64_C(0xffffff8000000ff8)), /* value=0x2`10000006 */ + MFX(0x00000205, "IA32_MTRR_PHYS_MASK2", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x2, 0, UINT64_C(0xffffff80000007ff)), /* value=0x7f`f8000800 */ + MFX(0x00000206, "IA32_MTRR_PHYS_BASE3", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x3, 0, UINT64_C(0xffffff8000000ff8)), /* value=0x2`18000006 */ + MFX(0x00000207, "IA32_MTRR_PHYS_MASK3", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x3, 0, UINT64_C(0xffffff80000007ff)), /* value=0x7f`fc000800 */ + MFX(0x00000208, "IA32_MTRR_PHYS_BASE4", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x4, 0, UINT64_C(0xffffff8000000ff8)), /* value=0x2`1c000006 */ + MFX(0x00000209, "IA32_MTRR_PHYS_MASK4", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x4, 0, UINT64_C(0xffffff80000007ff)), /* value=0x7f`fe000800 */ + MFX(0x0000020a, "IA32_MTRR_PHYS_BASE5", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x5, 0, UINT64_C(0xffffff8000000ff8)), /* value=0x2`1e000006 */ + MFX(0x0000020b, "IA32_MTRR_PHYS_MASK5", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x5, 0, UINT64_C(0xffffff80000007ff)), /* value=0x7f`ff800800 */ + MFX(0x0000020c, "IA32_MTRR_PHYS_BASE6", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x6, 0, UINT64_C(0xffffff8000000ff8)), /* value=0xe0000000 */ + MFX(0x0000020d, "IA32_MTRR_PHYS_MASK6", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x6, 0, UINT64_C(0xffffff80000007ff)), /* value=0x7f`e0000800 */ + MFX(0x0000020e, "IA32_MTRR_PHYS_BASE7", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x7, 0, UINT64_C(0xffffff8000000ff8)), /* value=0xde000000 */ + MFX(0x0000020f, "IA32_MTRR_PHYS_MASK7", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x7, 0, UINT64_C(0xffffff80000007ff)), /* value=0x7f`fe000800 */ + MFX(0x00000210, "IA32_MTRR_PHYS_BASE8", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x8, 0, UINT64_C(0xffffff8000000ff8)), /* value=0xdd000000 */ + MFX(0x00000211, "IA32_MTRR_PHYS_MASK8", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x8, 0, UINT64_C(0xffffff80000007ff)), /* value=0x7f`ff000800 */ + MFX(0x00000212, "IA32_MTRR_PHYS_BASE9", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x9, 0, UINT64_C(0xffffff8000000ff8)), /* value=0x0 */ + MFX(0x00000213, "IA32_MTRR_PHYS_MASK9", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x9, 0, UINT64_C(0xffffff80000007ff)), /* value=0x0 */ + MFS(0x00000250, "IA32_MTRR_FIX64K_00000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix64K_00000), + MFS(0x00000258, "IA32_MTRR_FIX16K_80000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_80000), + MFS(0x00000259, "IA32_MTRR_FIX16K_A0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_A0000), + MFS(0x00000268, "IA32_MTRR_FIX4K_C0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C0000), + MFS(0x00000269, "IA32_MTRR_FIX4K_C8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C8000), + MFS(0x0000026a, "IA32_MTRR_FIX4K_D0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D0000), + MFS(0x0000026b, "IA32_MTRR_FIX4K_D8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D8000), + MFS(0x0000026c, "IA32_MTRR_FIX4K_E0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E0000), + MFS(0x0000026d, "IA32_MTRR_FIX4K_E8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E8000), + MFS(0x0000026e, "IA32_MTRR_FIX4K_F0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F0000), + MFS(0x0000026f, "IA32_MTRR_FIX4K_F8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F8000), + MFS(0x00000277, "IA32_PAT", Ia32Pat, Ia32Pat, Guest.msrPAT), + MFX(0x00000280, "IA32_MC0_CTL2", Ia32McNCtl2, Ia32McNCtl2, 0x0, 0x40000000, UINT64_C(0xffffffffbfff8000)), /* value=0x0 */ + RSN(0x00000281, 0x00000283, "IA32_MC1_CTLn", Ia32McNCtl2, Ia32McNCtl2, 0x1, 0, UINT64_C(0xffffffffbfff8000)), + MFX(0x00000284, "IA32_MC4_CTL2", Ia32McNCtl2, Ia32McNCtl2, 0x4, 0x40007fff, UINT64_C(0xffffffffbfff8000)), /* value=0x0 */ + RSN(0x00000285, 0x00000286, "IA32_MC5_CTLn", Ia32McNCtl2, Ia32McNCtl2, 0x5, 0, UINT64_C(0xffffffffbfff8000)), + MVX(0x000002e0, "I7_SB_NO_EVICT_MODE", 0, 0, UINT64_C(0xfffffffffffffffc)), + MVX(0x000002e7, "I7_IB_UNK_0000_02e7", 0x1, 0x1, UINT64_C(0xfffffffffffffffe)), + MFZ(0x000002ff, "IA32_MTRR_DEF_TYPE", Ia32MtrrDefType, Ia32MtrrDefType, GuestMsrs.msr.MtrrDefType, 0, UINT64_C(0xfffffffffffff3f8)), + MVO(0x00000305, "I7_SB_UNK_0000_0305", 0), + RSN(0x00000309, 0x0000030b, "IA32_FIXED_CTRn", Ia32FixedCtrN, Ia32FixedCtrN, 0x0, 0, UINT64_C(0xffff000000000000)), + MFX(0x00000345, "IA32_PERF_CAPABILITIES", Ia32PerfCapabilities, ReadOnly, 0x32c4, 0, 0), /* value=0x32c4 */ + MFX(0x0000038d, "IA32_FIXED_CTR_CTRL", Ia32FixedCtrCtrl, Ia32FixedCtrCtrl, 0, 0, UINT64_C(0xfffffffffffff000)), /* value=0x0 */ + MFX(0x0000038e, "IA32_PERF_GLOBAL_STATUS", Ia32PerfGlobalStatus, ReadOnly, 0, 0, 0), /* value=0x0 */ + MFX(0x0000038f, "IA32_PERF_GLOBAL_CTRL", Ia32PerfGlobalCtrl, Ia32PerfGlobalCtrl, 0, 0, UINT64_C(0xfffffff8fffffff0)), /* value=0xf */ + MFX(0x00000390, "IA32_PERF_GLOBAL_OVF_CTRL", Ia32PerfGlobalOvfCtrl, Ia32PerfGlobalOvfCtrl, 0, UINT64_C(0xe08000070000000f), UINT64_C(0x1f7ffff8fffffff0)), /* value=0x0 */ + MFX(0x00000391, "I7_UNC_PERF_GLOBAL_CTRL", IntelI7UncPerfGlobalCtrl, IntelI7UncPerfGlobalCtrl, 0, 0, UINT64_C(0xffffffff1fffff80)), /* value=0x0 */ + MFX(0x00000392, "I7_UNC_PERF_GLOBAL_STATUS", IntelI7UncPerfGlobalStatus, IntelI7UncPerfGlobalStatus, 0, 0xf, UINT64_C(0xfffffffffffffff0)), /* value=0x0 */ + MFX(0x00000393, "I7_UNC_PERF_GLOBAL_OVF_CTRL", IntelI7UncPerfGlobalOvfCtrl, IntelI7UncPerfGlobalOvfCtrl, 0, 0x3, UINT64_C(0xfffffffffffffffc)), /* value=0x0 */ + MFX(0x00000394, "I7_UNC_PERF_FIXED_CTR_CTRL", IntelI7UncPerfFixedCtrCtrl, IntelI7UncPerfFixedCtrCtrl, 0, 0, UINT64_C(0xffffffffffafffff)), /* value=0x0 */ + MFX(0x00000395, "I7_UNC_PERF_FIXED_CTR", IntelI7UncPerfFixedCtr, IntelI7UncPerfFixedCtr, 0, 0, UINT64_C(0xffff000000000000)), /* value=0x0 */ + MFO(0x00000396, "I7_UNC_CBO_CONFIG", IntelI7UncCBoxConfig), /* value=0x3 */ + MVX(0x00000397, "I7_SB_UNK_0000_0397", 0, 0, UINT64_C(0xfffffffffffffff0)), + MFX(0x000003b0, "I7_UNC_ARB_PERF_CTR0", IntelI7UncArbPerfCtrN, IntelI7UncArbPerfCtrN, 0, 0, UINT64_C(0xfffff00000000000)), /* value=0x0 */ + MFX(0x000003b1, "I7_UNC_ARB_PERF_CTR1", IntelI7UncArbPerfCtrN, IntelI7UncArbPerfCtrN, 0, 0, UINT64_C(0xfffff00000000000)), /* value=0x0 */ + MFX(0x000003b2, "I7_UNC_ARB_PERF_EVT_SEL0", IntelI7UncArbPerfEvtSelN, IntelI7UncArbPerfEvtSelN, 0, 0, UINT64_C(0xffffffffe0230000)), /* value=0x0 */ + MFX(0x000003b3, "I7_UNC_ARB_PERF_EVT_SEL1", IntelI7UncArbPerfEvtSelN, IntelI7UncArbPerfEvtSelN, 0, 0, UINT64_C(0xffffffffe0230000)), /* value=0x0 */ + MVO(0x000003f0, "TODO_0000_03f0", 0), + MFX(0x000003f1, "IA32_PEBS_ENABLE", Ia32PebsEnable, Ia32PebsEnable, 0, 0, UINT64_C(0xfffffff0fffffff0)), /* value=0x0 */ + MFX(0x000003f6, "I7_MSR_PEBS_LD_LAT", IntelI7PebsLdLat, IntelI7PebsLdLat, 0, UINT64_C(0xffffffffffff0000), 0), /* value=0xffff */ + MFX(0x000003f8, "I7_MSR_PKG_C3_RESIDENCY", IntelI7PkgCnResidencyN, ReadOnly, 0x3, 0, UINT64_MAX), /* value=0x4`465710e6 */ + RSN(0x000003f9, 0x000003fa, "I7_MSR_PKG_Cn_RESIDENCY", IntelI7PkgCnResidencyN, ReadOnly, 0x6, 0, UINT64_MAX), + MFX(0x000003fc, "I7_MSR_CORE_C3_RESIDENCY", IntelI7CoreCnResidencyN, ReadOnly, 0x3, 0, UINT64_MAX), /* value=0x2`3a8a1eca */ + RSN(0x000003fd, 0x000003fe, "I7_MSR_CORE_Cn_RESIDENCY", IntelI7CoreCnResidencyN, ReadOnly, 0x6, 0, UINT64_MAX), + RFN(0x00000400, 0x0000041b, "IA32_MCi_CTL_STATUS_ADDR_MISC", Ia32McCtlStatusAddrMiscN, Ia32McCtlStatusAddrMiscN), + MFX(0x00000480, "IA32_VMX_BASIC", Ia32VmxBasic, ReadOnly, UINT64_C(0xda040000000012), 0, 0), /* value=0xda0400`00000012 */ + MFX(0x00000481, "IA32_VMX_PINBASED_CTLS", Ia32VmxPinbasedCtls, ReadOnly, UINT64_C(0x7f00000016), 0, 0), /* value=0x7f`00000016 */ + MFX(0x00000482, "IA32_VMX_PROCBASED_CTLS", Ia32VmxProcbasedCtls, ReadOnly, UINT64_C(0xfff9fffe0401e172), 0, 0), /* value=0xfff9fffe`0401e172 */ + MFX(0x00000483, "IA32_VMX_EXIT_CTLS", Ia32VmxExitCtls, ReadOnly, UINT64_C(0x7fffff00036dff), 0, 0), /* value=0x7fffff`00036dff */ + MFX(0x00000484, "IA32_VMX_ENTRY_CTLS", Ia32VmxEntryCtls, ReadOnly, UINT64_C(0xffff000011ff), 0, 0), /* value=0xffff`000011ff */ + MFX(0x00000485, "IA32_VMX_MISC", Ia32VmxMisc, ReadOnly, 0x300481e5, 0, 0), /* value=0x300481e5 */ + MFX(0x00000486, "IA32_VMX_CR0_FIXED0", Ia32VmxCr0Fixed0, ReadOnly, UINT32_C(0x80000021), 0, 0), /* value=0x80000021 */ + MFX(0x00000487, "IA32_VMX_CR0_FIXED1", Ia32VmxCr0Fixed1, ReadOnly, UINT32_MAX, 0, 0), /* value=0xffffffff */ + MFX(0x00000488, "IA32_VMX_CR4_FIXED0", Ia32VmxCr4Fixed0, ReadOnly, 0x2000, 0, 0), /* value=0x2000 */ + MFX(0x00000489, "IA32_VMX_CR4_FIXED1", Ia32VmxCr4Fixed1, ReadOnly, 0x3767ff, 0, 0), /* value=0x3767ff */ + MFX(0x0000048a, "IA32_VMX_VMCS_ENUM", Ia32VmxVmcsEnum, ReadOnly, 0x2a, 0, 0), /* value=0x2a */ + MFX(0x0000048b, "IA32_VMX_PROCBASED_CTLS2", Ia32VmxProcBasedCtls2, ReadOnly, UINT64_C(0x57cff00000000), 0, 0), /* value=0x57cff`00000000 */ + MFX(0x0000048c, "IA32_VMX_EPT_VPID_CAP", Ia32VmxEptVpidCap, ReadOnly, UINT64_C(0xf0106334141), 0, 0), /* value=0xf01`06334141 */ + MFX(0x0000048d, "IA32_VMX_TRUE_PINBASED_CTLS", Ia32VmxTruePinbasedCtls, ReadOnly, UINT64_C(0x7f00000016), 0, 0), /* value=0x7f`00000016 */ + MFX(0x0000048e, "IA32_VMX_TRUE_PROCBASED_CTLS", Ia32VmxTrueProcbasedCtls, ReadOnly, UINT64_C(0xfff9fffe04006172), 0, 0), /* value=0xfff9fffe`04006172 */ + MFX(0x0000048f, "IA32_VMX_TRUE_EXIT_CTLS", Ia32VmxTrueExitCtls, ReadOnly, UINT64_C(0x7fffff00036dfb), 0, 0), /* value=0x7fffff`00036dfb */ + MFX(0x00000490, "IA32_VMX_TRUE_ENTRY_CTLS", Ia32VmxTrueEntryCtls, ReadOnly, UINT64_C(0xffff000011fb), 0, 0), /* value=0xffff`000011fb */ + MFX(0x00000491, "IA32_VMX_VMFUNC", Ia32VmxVmFunc, ReadOnly, 0x1, 0, 0), /* value=0x1 */ + RSN(0x000004c1, 0x000004c4, "IA32_A_PMCn", Ia32PmcN, Ia32PmcN, 0x0, 0, UINT64_C(0xffff000000000000)), + MVO(0x000004e0, "TODO_0000_04e0", 0x1), + MVO(0x000004e2, "TODO_0000_04e2", 0x5), + MVO(0x000004e3, "TODO_0000_04e3", 0xff0), + MVX(0x00000560, "TODO_0000_0560", 0, 0, UINT64_C(0xffffff800000007f)), + MVX(0x00000561, "TODO_0000_0561", 0x7f, UINT64_C(0x70000007f), UINT32_C(0xffffff80)), + MVX(0x00000570, "TODO_0000_0570", 0x2100, 0x2100, UINT64_C(0xffffffffffffd272)), + MVX(0x00000571, "TODO_0000_0571", 0, 0x7, UINT64_C(0xffffffffffffffc8)), + MVX(0x00000572, "TODO_0000_0572", 0, 0, UINT64_C(0xffff00000000001f)), + MFN(0x00000600, "IA32_DS_AREA", Ia32DsArea, Ia32DsArea), /* value=0x0 */ + MFX(0x00000601, "I7_SB_MSR_VR_CURRENT_CONFIG", IntelI7SandyVrCurrentConfig, IntelI7SandyVrCurrentConfig, 0, UINT32_C(0x80001fff), UINT64_C(0x800000007fffe000)), /* value=0x40101414`80000100 */ + MFX(0x00000603, "I7_SB_MSR_VR_MISC_CONFIG", IntelI7SandyVrMiscConfig, IntelI7SandyVrMiscConfig, 0, 0, UINT64_C(0xff80000000000000)), /* value=0x360000`00333333 */ + MFO(0x00000606, "I7_SB_MSR_RAPL_POWER_UNIT", IntelI7SandyRaplPowerUnit), /* value=0xa0e03 */ + MVX(0x00000609, "I7_SB_UNK_0000_0609", 0x1a, 0xc0, UINT64_C(0xffffffffffffff00)), + MFX(0x0000060a, "I7_SB_MSR_PKGC3_IRTL", IntelI7SandyPkgCnIrtlN, IntelI7SandyPkgCnIrtlN, 0x3, 0, UINT64_C(0xffffffffffff6000)), /* value=0x8842 */ + RSN(0x0000060b, 0x0000060c, "I7_SB_MSR_PKGC6_IRTn", IntelI7SandyPkgCnIrtlN, IntelI7SandyPkgCnIrtlN, 0x6, 0, UINT64_C(0xffffffffffff6000)), + MFO(0x0000060d, "I7_SB_MSR_PKG_C2_RESIDENCY", IntelI7SandyPkgC2Residency), /* value=0x1b`88fad668 */ + MFX(0x00000610, "I7_SB_MSR_PKG_POWER_LIMIT", IntelI7RaplPkgPowerLimit, IntelI7RaplPkgPowerLimit, 0, UINT64_C(0x80ffffff00ffffff), UINT64_C(0x7f000000ff000000)), /* value=0x804280c8`00dd8078 */ + MFO(0x00000611, "I7_SB_MSR_PKG_ENERGY_STATUS", IntelI7RaplPkgEnergyStatus), /* value=0x7e40b254 */ + MFO(0x00000613, "I7_SB_MSR_PKG_PERF_STATUS", IntelI7RaplPkgPerfStatus), /* value=0xff3 */ + MFO(0x00000614, "I7_SB_MSR_PKG_POWER_INFO", IntelI7RaplPkgPowerInfo), /* value=0x78 */ + MVX(0x00000615, "TODO_0000_0615", 0, 0, UINT64_C(0xffffffff00010000)), + MFX(0x00000618, "I7_SB_MSR_DRAM_POWER_LIMIT", IntelI7RaplDramPowerLimit, IntelI7RaplDramPowerLimit, 0, UINT64_C(0x80feffff00feffff), UINT64_C(0x7f010000ff010000)), /* value=0x805400de`00000000 */ + MFO(0x00000619, "I7_SB_MSR_DRAM_ENERGY_STATUS", IntelI7RaplDramEnergyStatus), /* value=0x9dbe152 */ + MFO(0x0000061b, "I7_SB_MSR_DRAM_PERF_STATUS", IntelI7RaplDramPerfStatus), /* value=0x0 */ + MVO(0x0000061d, "TODO_0000_061d", UINT64_C(0x6e231cb3da)), + MVX(0x00000620, "TODO_0000_0620", 0x71d, 0, UINT64_C(0xffffffffffff8080)), + MVO(0x00000621, "TODO_0000_0621", 0x1d), + MVX(0x00000622, "TODO_0000_0622", 0x1, 0, UINT64_C(0xfffffffffffffffe)), + MVO(0x00000623, "TODO_0000_0623", 0x1), + MVO(0x00000630, "TODO_0000_0630", 0), + MVO(0x00000631, "TODO_0000_0631", 0), + MVO(0x00000632, "TODO_0000_0632", 0), + MVX(0x00000633, "TODO_0000_0633", 0x88e4, 0, UINT64_C(0xffffffffffff6000)), + MVX(0x00000634, "TODO_0000_0634", 0x8945, 0, UINT64_C(0xffffffffffff6000)), + MVX(0x00000635, "TODO_0000_0635", 0x89ef, 0, UINT64_C(0xffffffffffff6000)), + MVX(0x00000636, "TODO_0000_0636", 0x6a, 0, UINT64_C(0xffffffffffff0000)), + MVO(0x00000637, "TODO_0000_0637", UINT64_C(0x43af89cfdf)), + MFX(0x00000638, "I7_SB_MSR_PP0_POWER_LIMIT", IntelI7RaplPp0PowerLimit, IntelI7RaplPp0PowerLimit, 0, 0, UINT64_C(0xffffffff7f000000)), /* value=0x0 */ + MFO(0x00000639, "I7_SB_MSR_PP0_ENERGY_STATUS", IntelI7RaplPp0EnergyStatus), /* value=0x6f9c685f */ + MFX(0x0000063a, "I7_SB_MSR_PP0_POLICY", IntelI7RaplPp0Policy, IntelI7RaplPp0Policy, 0, 0, UINT64_C(0xffffffffffffffe0)), /* value=0x7 */ + MFX(0x00000640, "I7_HW_MSR_PP0_POWER_LIMIT", IntelI7RaplPp1PowerLimit, IntelI7RaplPp1PowerLimit, 0, 0, UINT64_C(0xffffffff7f000000)), /* value=0x0 */ + MFO(0x00000641, "I7_HW_MSR_PP0_ENERGY_STATUS", IntelI7RaplPp1EnergyStatus), /* value=0x4d471 */ + MFX(0x00000642, "I7_HW_MSR_PP0_POLICY", IntelI7RaplPp1Policy, IntelI7RaplPp1Policy, 0, 0, UINT64_C(0xffffffffffffffe0)), /* value=0xb */ + MFO(0x00000648, "I7_IB_MSR_CONFIG_TDP_NOMINAL", IntelI7IvyConfigTdpNominal), /* value=0x1a */ + MFO(0x00000649, "I7_IB_MSR_CONFIG_TDP_LEVEL1", IntelI7IvyConfigTdpLevel1), /* value=0x6003c */ + MFO(0x0000064a, "I7_IB_MSR_CONFIG_TDP_LEVEL2", IntelI7IvyConfigTdpLevel2), /* value=0x0 */ + MFX(0x0000064b, "I7_IB_MSR_CONFIG_TDP_CONTROL", IntelI7IvyConfigTdpControl, IntelI7IvyConfigTdpControl, 0, 0, UINT64_C(0xffffffff7ffffffc)), /* value=0x80000000 */ + MFX(0x0000064c, "I7_IB_MSR_TURBO_ACTIVATION_RATIO", IntelI7IvyTurboActivationRatio, IntelI7IvyTurboActivationRatio, 0, 0, UINT64_C(0xffffffff7fffff00)), /* value=0x80000019 */ + RFN(0x00000680, 0x0000068f, "MSR_LASTBRANCH_n_FROM_IP", IntelLastBranchFromN, IntelLastBranchFromN), + MVX(0x00000690, "TODO_0000_0690", 0x1d200000, UINT32_C(0xe6dfffff), ~(uint64_t)UINT32_MAX), + MVX(0x000006b0, "TODO_0000_06b0", 0x1d000000, UINT32_C(0xe2ffffff), ~(uint64_t)UINT32_MAX), + MVX(0x000006b1, "TODO_0000_06b1", 0xd000000, UINT32_C(0xf2ffffff), ~(uint64_t)UINT32_MAX), + RFN(0x000006c0, 0x000006cf, "MSR_LASTBRANCH_n_TO_IP", IntelLastBranchToN, IntelLastBranchToN), + MFI(0x000006e0, "IA32_TSC_DEADLINE", Ia32TscDeadline), /* value=0x0 */ + MVX(0x00000700, "TODO_0000_0700", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000701, "TODO_0000_0701", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000702, "TODO_0000_0702", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000703, "TODO_0000_0703", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000704, "TODO_0000_0704", 0, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000705, "TODO_0000_0705", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000706, "TODO_0000_0706", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000707, "TODO_0000_0707", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000708, "TODO_0000_0708", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000709, "TODO_0000_0709", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000710, "TODO_0000_0710", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000711, "TODO_0000_0711", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000712, "TODO_0000_0712", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000713, "TODO_0000_0713", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000714, "TODO_0000_0714", 0, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000715, "TODO_0000_0715", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000716, "TODO_0000_0716", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000717, "TODO_0000_0717", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000718, "TODO_0000_0718", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000719, "TODO_0000_0719", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000720, "TODO_0000_0720", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000721, "TODO_0000_0721", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000722, "TODO_0000_0722", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000723, "TODO_0000_0723", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000724, "TODO_0000_0724", 0, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000725, "TODO_0000_0725", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000726, "TODO_0000_0726", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000727, "TODO_0000_0727", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000728, "TODO_0000_0728", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000729, "TODO_0000_0729", 0, 0, UINT64_C(0xfffff00000000000)), + MFO(0x00000c80, "IA32_DEBUG_INTERFACE", Ia32DebugInterface), /* value=0x40000000 */ + MFX(0xc0000080, "AMD64_EFER", Amd64Efer, Amd64Efer, 0xd01, 0x400, UINT64_C(0xfffffffffffff2fe)), + MFN(0xc0000081, "AMD64_STAR", Amd64SyscallTarget, Amd64SyscallTarget), /* value=0x230010`00000000 */ + MFN(0xc0000082, "AMD64_STAR64", Amd64LongSyscallTarget, Amd64LongSyscallTarget), /* value=0xfffff802`f9b59200 */ + MFN(0xc0000083, "AMD64_STARCOMPAT", Amd64CompSyscallTarget, Amd64CompSyscallTarget), /* value=0xfffff802`f9b58f40 */ + MFX(0xc0000084, "AMD64_SYSCALL_FLAG_MASK", Amd64SyscallFlagMask, Amd64SyscallFlagMask, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x4700 */ + MFN(0xc0000100, "AMD64_FS_BASE", Amd64FsBase, Amd64FsBase), /* value=0x212000 */ + MFN(0xc0000101, "AMD64_GS_BASE", Amd64GsBase, Amd64GsBase), /* value=0xffffd001`83740000 */ + MFN(0xc0000102, "AMD64_KERNEL_GS_BASE", Amd64KernelGsBase, Amd64KernelGsBase), /* value=0x210000 */ + MFX(0xc0000103, "AMD64_TSC_AUX", Amd64TscAux, Amd64TscAux, 0, 0, ~(uint64_t)UINT32_MAX), /* value=0x0 */ +}; +#endif /* !CPUM_DB_STANDALONE */ + + +/** + * Database entry for Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz. + */ +static CPUMDBENTRY const g_Entry_Intel_Core_i7_5600U = +{ + /*.pszName = */ "Intel Core i7-5600U", + /*.pszFullName = */ "Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz", + /*.enmVendor = */ CPUMCPUVENDOR_INTEL, + /*.uFamily = */ 6, + /*.uModel = */ 61, + /*.uStepping = */ 4, + /*.enmMicroarch = */ kCpumMicroarch_Intel_Core7_Broadwell, + /*.uScalableBusFreq = */ CPUM_SBUSFREQ_100MHZ, + /*.fFlags = */ 0, + /*.cMaxPhysAddrWidth= */ 39, + /*.fMxCsrMask = */ 0xffff, + /*.paCpuIdLeaves = */ NULL_ALONE(g_aCpuIdLeaves_Intel_Core_i7_5600U), + /*.cCpuIdLeaves = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_Core_i7_5600U)), + /*.enmUnknownCpuId = */ CPUMUNKNOWNCPUID_LAST_STD_LEAF_WITH_ECX, + /*.DefUnknownCpuId = */ { 0x00000000, 0x00000001, 0x00000001, 0x00000000 }, + /*.fMsrMask = */ UINT32_MAX, + /*.cMsrRanges = */ ZERO_ALONE(RT_ELEMENTS(g_aMsrRanges_Intel_Core_i7_5600U)), + /*.paMsrRanges = */ NULL_ALONE(g_aMsrRanges_Intel_Core_i7_5600U), +}; + +#endif /* !VBOX_CPUDB_Intel_Core_i7_5600U_h */ + diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_6700K.h b/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_6700K.h new file mode 100644 index 00000000..52687ee7 --- /dev/null +++ b/src/VBox/VMM/VMMR3/cpus/Intel_Core_i7_6700K.h @@ -0,0 +1,510 @@ +/* $Id: Intel_Core_i7_6700K.h $ */ +/** @file + * CPU database entry "Intel Core i7-6700K". + * Generated at 2015-11-04T14:22:26Z by VBoxCpuReport v5.0.51r103906 on win.amd64. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VBOX_CPUDB_Intel_Core_i7_6700K_h +#define VBOX_CPUDB_Intel_Core_i7_6700K_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + + +#ifndef CPUM_DB_STANDALONE +/** + * CPUID leaves for Intel(R) Core(TM) i7-6700K CPU @ 4.00GHz. + */ +static CPUMCPUIDLEAF const g_aCpuIdLeaves_Intel_Core_i7_6700K[] = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x00000016, 0x756e6547, 0x6c65746e, 0x49656e69, 0 }, + { 0x00000001, 0x00000000, 0x00000000, 0x000506e3, 0x02100800, 0x7ffafbbf, 0xbfebfbff, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID | CPUMCPUIDLEAF_F_CONTAINS_APIC }, + { 0x00000002, 0x00000000, 0x00000000, 0x76036301, 0x00f0b5ff, 0x00000000, 0x00c30000, 0 }, + { 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000004, 0x00000000, UINT32_MAX, 0x1c004121, 0x01c0003f, 0x0000003f, 0x00000000, 0 }, + { 0x00000004, 0x00000001, UINT32_MAX, 0x1c004122, 0x01c0003f, 0x0000003f, 0x00000000, 0 }, + { 0x00000004, 0x00000002, UINT32_MAX, 0x1c004143, 0x00c0003f, 0x000003ff, 0x00000000, 0 }, + { 0x00000004, 0x00000003, UINT32_MAX, 0x1c03c163, 0x03c0003f, 0x00001fff, 0x00000006, 0 }, + { 0x00000004, 0x00000004, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000005, 0x00000000, 0x00000000, 0x00000040, 0x00000040, 0x00000003, 0x00142120, 0 }, + { 0x00000006, 0x00000000, 0x00000000, 0x000027f7, 0x00000002, 0x00000009, 0x00000000, 0 }, + { 0x00000007, 0x00000000, UINT32_MAX, 0x00000000, 0x029c6fbf, 0x00000000, 0x00000000, 0 }, + { 0x00000007, 0x00000001, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000009, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000a, 0x00000000, 0x00000000, 0x07300404, 0x00000000, 0x00000000, 0x00000603, 0 }, + { 0x0000000b, 0x00000000, UINT32_MAX, 0x00000001, 0x00000002, 0x00000100, 0x00000002, 0 | CPUMCPUIDLEAF_F_INTEL_TOPOLOGY_SUBLEAVES | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID }, + { 0x0000000b, 0x00000001, UINT32_MAX, 0x00000004, 0x00000008, 0x00000201, 0x00000002, 0 | CPUMCPUIDLEAF_F_INTEL_TOPOLOGY_SUBLEAVES | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID }, + { 0x0000000b, 0x00000002, UINT32_MAX, 0x00000000, 0x00000000, 0x00000002, 0x00000002, 0 | CPUMCPUIDLEAF_F_INTEL_TOPOLOGY_SUBLEAVES | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID }, + { 0x0000000c, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000d, 0x00000000, UINT32_MAX, 0x0000001f, 0x00000440, 0x00000440, 0x00000000, 0 }, + { 0x0000000d, 0x00000001, UINT32_MAX, 0x0000000f, 0x000003c0, 0x00000100, 0x00000000, 0 }, + { 0x0000000d, 0x00000002, UINT32_MAX, 0x00000100, 0x00000240, 0x00000000, 0x00000000, 0 }, + { 0x0000000d, 0x00000003, UINT32_MAX, 0x00000040, 0x000003c0, 0x00000000, 0x00000000, 0 }, + { 0x0000000d, 0x00000004, UINT32_MAX, 0x00000040, 0x00000400, 0x00000000, 0x00000000, 0 }, + { 0x0000000d, 0x00000005, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000d, 0x00000006, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000d, 0x00000007, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000d, 0x00000008, UINT32_MAX, 0x00000080, 0x00000000, 0x00000001, 0x00000000, 0 }, + { 0x0000000d, 0x00000009, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000010, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000011, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000012, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000013, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000014, 0x00000000, UINT32_MAX, 0x00000001, 0x0000000f, 0x00000007, 0x00000000, 0 }, + { 0x00000014, 0x00000001, UINT32_MAX, 0x02490002, 0x003f3fff, 0x00000000, 0x00000000, 0 }, + { 0x00000014, 0x00000002, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000015, 0x00000000, 0x00000000, 0x00000002, 0x0000014e, 0x00000000, 0x00000000, 0 }, + { 0x00000016, 0x00000000, 0x00000000, 0x00000fa0, 0x00001068, 0x00000064, 0x00000000, 0 }, + { 0x80000000, 0x00000000, 0x00000000, 0x80000008, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000121, 0x2c100800, 0 }, + { 0x80000002, 0x00000000, 0x00000000, 0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865, 0 }, + { 0x80000003, 0x00000000, 0x00000000, 0x37692029, 0x3037362d, 0x43204b30, 0x40205550, 0 }, + { 0x80000004, 0x00000000, 0x00000000, 0x302e3420, 0x7a484730, 0x00000000, 0x00000000, 0 }, + { 0x80000005, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x01006040, 0x00000000, 0 }, + { 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000100, 0 }, + { 0x80000008, 0x00000000, 0x00000000, 0x00003027, 0x00000000, 0x00000000, 0x00000000, 0 }, +}; +#endif /* !CPUM_DB_STANDALONE */ + + +#ifndef CPUM_DB_STANDALONE +/** + * MSR ranges for Intel(R) Core(TM) i7-6700K CPU @ 4.00GHz. + */ +static CPUMMSRRANGE const g_aMsrRanges_Intel_Core_i7_6700K[] = +{ + MFX(0x00000000, "IA32_P5_MC_ADDR", Ia32P5McAddr, Ia32P5McAddr, 0, UINT64_C(0xfffffffffffff000), 0), /* value=0xfff */ + MFX(0x00000001, "IA32_P5_MC_TYPE", Ia32P5McType, Ia32P5McType, 0, 0, UINT64_MAX), /* value=0x0 */ + MFX(0x00000006, "IA32_MONITOR_FILTER_LINE_SIZE", Ia32MonitorFilterLineSize, Ia32MonitorFilterLineSize, 0, 0, UINT64_C(0xffffffffffff0000)), /* value=0x40 */ + MFN(0x00000010, "IA32_TIME_STAMP_COUNTER", Ia32TimestampCounter, Ia32TimestampCounter), /* value=0x12fdb`64facbdf */ + MFX(0x00000017, "IA32_PLATFORM_ID", Ia32PlatformId, ReadOnly, UINT64_C(0x4000000000000), 0, 0), /* value=0x40000`00000000 */ + MFX(0x0000001b, "IA32_APIC_BASE", Ia32ApicBase, Ia32ApicBase, UINT32_C(0xfee00800), 0, UINT64_C(0xffffff80000002ff)), + MVO(0x00000020, "TODO_0000_0020", UINT64_C(0xaab8e94b4b4ac1)), + MVO(0x00000021, "C2_UNK_0000_0021", UINT64_C(0x52d289e67f37651b)), + MVO(0x00000022, "TODO_0000_0022", UINT64_C(0xce7bd366cd8dc6e6)), + MVO(0x00000023, "TODO_0000_0023", UINT64_C(0xfd0cd1679876a91d)), + MFX(0x0000002a, "EBL_CR_POWERON", IntelEblCrPowerOn, ReadOnly, 0, 0, 0), /* value=0x0 */ + MVX(0x0000002e, "I7_UNK_0000_002e", 0, 0x400, UINT64_C(0xfffffffffffffbff)), + MVX(0x00000033, "TEST_CTL", 0, 0, UINT64_C(0xffffffff7fffffff)), + MVO(0x00000034, "P6_UNK_0000_0034", 0x8), + MFO(0x00000035, "MSR_CORE_THREAD_COUNT", IntelI7CoreThreadCount), /* value=0x40008 */ + MFO(0x0000003a, "IA32_FEATURE_CONTROL", Ia32FeatureControl), /* value=0x5 */ + MVX(0x0000003b, "P6_UNK_0000_003b", UINT64_C(0xfff5c5f4e22b), 0, 0), + MVX(0x0000003e, "I7_UNK_0000_003e", 0x1, 0, UINT64_C(0xfffffffffffffffe)), + MVO(0x00000059, "TODO_0000_0059", 0), + MFN(0x00000079, "IA32_BIOS_UPDT_TRIG", WriteOnly, IgnoreWrite), + MVX(0x0000007a, "TODO_0000_007a", 0, 0x1, UINT64_C(0xfffffffffffffffe)), + MVX(0x00000080, "P4_UNK_0000_0080", 0, UINT64_C(0x7ffffffffe), UINT64_C(0xffffff8000000000)), + MFX(0x0000008b, "BBL_CR_D3|BIOS_SIGN", Ia32BiosSignId, Ia32BiosSignId, 0, 0, UINT32_MAX), /* value=0x33`00000000 */ + MVX(0x00000095, "TODO_0000_0095", 0x1, 0, UINT64_C(0xfffffffffffffffe)), + MFO(0x0000009b, "IA32_SMM_MONITOR_CTL", Ia32SmmMonitorCtl), /* value=0x0 */ + RSN(0x000000c1, 0x000000c4, "IA32_PMCn", Ia32PmcN, Ia32PmcN, 0x0, ~(uint64_t)UINT32_MAX, 0), + MFO(0x000000ce, "IA32_PLATFORM_INFO", IntelPlatformInfo), /* value=0x80838`f1012800 */ + MFX(0x000000e2, "MSR_PKG_CST_CONFIG_CONTROL", IntelPkgCStConfigControl, IntelPkgCStConfigControl, 0, 0, UINT64_C(0xffffffff01fffbf0)), /* value=0x1e000006 */ + MFX(0x000000e4, "MSR_PMG_IO_CAPTURE_BASE", IntelPmgIoCaptureBase, IntelPmgIoCaptureBase, 0, 0, UINT64_C(0xfffffffffff80000)), /* value=0x31814 */ + MFN(0x000000e7, "IA32_MPERF", Ia32MPerf, Ia32MPerf), /* value=0x693`992a0bba */ + MFN(0x000000e8, "IA32_APERF", Ia32APerf, Ia32APerf), /* value=0x2d8`96416f36 */ + MFX(0x000000fe, "IA32_MTRRCAP", Ia32MtrrCap, ReadOnly, 0x1d0a, 0, 0), /* value=0x1d0a */ + MVX(0x00000102, "I7_IB_UNK_0000_0102", 0, 0, UINT64_C(0xffffffff7fff8000)), + MVX(0x00000103, "I7_IB_UNK_0000_0103", 0, 0, UINT64_C(0xffffffffffffff00)), + MVX(0x00000104, "I7_IB_UNK_0000_0104", 0, 0, UINT64_C(0xfffffffffffffffe)), + MVO(0x00000110, "TODO_0000_0110", 0x1), + MVO(0x00000118, "BBL_CR_DECC", 0), + MVX(0x0000011f, "TODO_0000_011f", 0, 0, UINT64_C(0xffffffffffffff00)), + MVO(0x00000121, "TODO_0000_0121", 0), + MVO(0x0000013a, "TODO_0000_013a", 0), + MFO(0x0000013c, "I7_SB_AES_NI_CTL", IntelI7SandyAesNiCtl), /* value=0x1 */ + MVX(0x0000013d, "TODO_0000_013d", 0, 0, UINT64_C(0xfffffffffffffffe)), + MVX(0x00000140, "I7_IB_UNK_0000_0140", 0, 0, UINT64_C(0xfffffffffffffffe)), + MVX(0x00000150, "P6_UNK_0000_0150", 0, UINT64_C(0x8000ffffffffffff), UINT64_C(0x7fff000000000000)), + MFX(0x00000174, "IA32_SYSENTER_CS", Ia32SysEnterCs, Ia32SysEnterCs, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFN(0x00000175, "IA32_SYSENTER_ESP", Ia32SysEnterEsp, Ia32SysEnterEsp), /* value=0x0 */ + MFN(0x00000176, "IA32_SYSENTER_EIP", Ia32SysEnterEip, Ia32SysEnterEip), /* value=0x0 */ + MFX(0x00000179, "IA32_MCG_CAP", Ia32McgCap, ReadOnly, 0xc0a, 0, 0), /* value=0xc0a */ + MFX(0x0000017a, "IA32_MCG_STATUS", Ia32McgStatus, Ia32McgStatus, 0, 0, UINT64_C(0xfffffffffffffff0)), /* value=0x0 */ + RSN(0x00000186, 0x00000187, "IA32_PERFEVTSELn", Ia32PerfEvtSelN, Ia32PerfEvtSelN, 0x0, 0, UINT64_C(0xfffffffe00080000)), /* XXX: The range ended earlier than expected! */ + MVX(0x00000188, "IA32_PERFEVTSEL2", 0, 0, UINT64_C(0xfffffffc00080000)), + MVX(0x00000189, "IA32_PERFEVTSEL3", 0, 0, UINT64_C(0xfffffffe00080000)), + MFX(0x00000194, "CLOCK_FLEX_MAX", IntelFlexRatio, IntelFlexRatio, 0xf0000, 0xe0000, UINT64_C(0xffffffffffe00000)), /* value=0xf0000 */ + MFX(0x00000198, "IA32_PERF_STATUS", Ia32PerfStatus, ReadOnly, UINT64_C(0x29eb00002a00), 0, 0), /* value=0x29eb`00002a00 */ + MFX(0x00000199, "IA32_PERF_CTL", Ia32PerfCtl, Ia32PerfCtl, 0x2800, 0, 0), /* Might bite. value=0x2800 */ + MFX(0x0000019a, "IA32_CLOCK_MODULATION", Ia32ClockModulation, Ia32ClockModulation, 0, 0, UINT64_C(0xffffffffffffffe0)), /* value=0x0 */ + MFX(0x0000019b, "IA32_THERM_INTERRUPT", Ia32ThermInterrupt, Ia32ThermInterrupt, 0, 0, UINT64_C(0xfffffffffe0000e8)), /* value=0x0 */ + MFX(0x0000019c, "IA32_THERM_STATUS", Ia32ThermStatus, Ia32ThermStatus, UINT32_C(0x88430800), UINT32_C(0xf87fa7ff), UINT64_C(0xffffffff07805000)), /* value=0x88430800 */ + MFX(0x0000019d, "IA32_THERM2_CTL", Ia32Therm2Ctl, ReadOnly, 0, 0, 0), /* value=0x0 */ +/// @todo WARNING: IA32_MISC_ENABLE probing needs hacking on this CPU! + MFX(0x000001a0, "IA32_MISC_ENABLE", Ia32MiscEnable, Ia32MiscEnable, 0x850089, 0x1080, UINT64_C(0xffffffbbff3aef76)), /* value=0x850089 */ + MVO(0x000001a1, "P6_UNK_0000_01a1", 0x2858), + MFX(0x000001a2, "I7_MSR_TEMPERATURE_TARGET", IntelI7TemperatureTarget, IntelI7TemperatureTarget, 0x641400, 0xffff00, UINT64_C(0xffffffff40000000)), /* value=0x641400 */ + MVX(0x000001a4, "I7_UNK_0000_01a4", 0, 0, UINT64_C(0xfffffffffffff7f0)), + RSN(0x000001a6, 0x000001a7, "I7_MSR_OFFCORE_RSP_n", IntelI7MsrOffCoreResponseN, IntelI7MsrOffCoreResponseN, 0x0, 0, UINT64_C(0xffffffc000006000)), + MVX(0x000001a8, "I7_UNK_0000_01a8", 0, 0, UINT64_C(0xfffffffffffffffc)), + MFX(0x000001aa, "MSR_MISC_PWR_MGMT", IntelI7MiscPwrMgmt, IntelI7MiscPwrMgmt, 0, 0x800, UINT64_C(0xffffffffffbff7fe)), /* value=0x401cc0 */ + MFX(0x000001ad, "I7_MSR_TURBO_RATIO_LIMIT", IntelI7TurboRatioLimit, IntelI7TurboRatioLimit, 0x2a2a2a2a, UINT64_MAX, 0), /* value=0x2a2a2a2a */ + MVX(0x000001b0, "IA32_ENERGY_PERF_BIAS", 0x6, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x000001b1, "IA32_PACKAGE_THERM_STATUS", UINT32_C(0x88370800), UINT32_C(0xf87f07ff), UINT64_C(0xffffffff0780f000)), + MVX(0x000001b2, "IA32_PACKAGE_THERM_INTERRUPT", 0, 0, UINT64_C(0xfffffffffe0000e8)), + MVO(0x000001c6, "I7_UNK_0000_01c6", 0x3), + MFX(0x000001c8, "MSR_LBR_SELECT", IntelI7LbrSelect, IntelI7LbrSelect, 0, 0, UINT64_C(0xfffffffffffffc00)), /* value=0x0 */ + MFX(0x000001c9, "MSR_LASTBRANCH_TOS", IntelLastBranchTos, IntelLastBranchTos, 0, 0, UINT64_C(0xffffffffffffffe0)), /* value=0x0 */ + MFX(0x000001d9, "IA32_DEBUGCTL", Ia32DebugCtl, Ia32DebugCtl, 0, 0, UINT64_C(0xffffffffffff003c)), /* value=0x0 */ + MVO(0x000001da, "TODO_0000_01da", 0), + MFO(0x000001db, "P6_LAST_BRANCH_FROM_IP", P6LastBranchFromIp), /* value=0x0 */ + MFO(0x000001dc, "P6_LAST_BRANCH_TO_IP", P6LastBranchToIp), /* value=0x0 */ + MFN(0x000001dd, "P6_LAST_INT_FROM_IP", P6LastIntFromIp, P6LastIntFromIp), /* value=0x0 */ + MFN(0x000001de, "P6_LAST_INT_TO_IP", P6LastIntToIp, P6LastIntToIp), /* value=0x0 */ + MVX(0x000001e0, "MSR_ROB_CR_BKUPTMPDR6", 0, 0, UINT64_C(0x1fffffffffff0000)), + MFO(0x000001f0, "I7_VLW_CAPABILITY", IntelI7VirtualLegacyWireCap), /* value=0x74 */ + MFO(0x000001f2, "IA32_SMRR_PHYSBASE", Ia32SmrrPhysBase), /* value=0x88400006 */ + MFO(0x000001f3, "IA32_SMRR_PHYSMASK", Ia32SmrrPhysMask), /* value=0xffc00800 */ + MVO(0x000001f4, "TODO_0000_01f4", UINT32_C(0x88000006)), + MVO(0x000001f5, "TODO_0000_01f5", UINT64_C(0x7ffff00c00)), + MVO(0x000001fb, "TODO_0000_01fb", 0xe1), + MFX(0x000001fc, "I7_MSR_POWER_CTL", IntelI7PowerCtl, IntelI7PowerCtl, 0, UINT32_C(0x80000020), UINT64_C(0xffffffff20000000)), /* value=0x3c005f */ + MFX(0x00000200, "IA32_MTRR_PHYS_BASE0", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x0, 0, UINT64_C(0xffffff8000000ff8)), /* value=0xc0000000 */ + MFX(0x00000201, "IA32_MTRR_PHYS_MASK0", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x0, 0, UINT64_C(0xffffff80000007ff)), /* value=0x7f`c0000800 */ + MFX(0x00000202, "IA32_MTRR_PHYS_BASE1", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x1, 0, UINT64_C(0xffffff8000000ff8)), /* value=0xa0000000 */ + MFX(0x00000203, "IA32_MTRR_PHYS_MASK1", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x1, 0, UINT64_C(0xffffff80000007ff)), /* value=0x7f`e0000800 */ + MFX(0x00000204, "IA32_MTRR_PHYS_BASE2", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x2, 0, UINT64_C(0xffffff8000000ff8)), /* value=0x90000000 */ + MFX(0x00000205, "IA32_MTRR_PHYS_MASK2", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x2, 0, UINT64_C(0xffffff80000007ff)), /* value=0x7f`f0000800 */ + MFX(0x00000206, "IA32_MTRR_PHYS_BASE3", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x3, 0, UINT64_C(0xffffff8000000ff8)), /* value=0x8c000000 */ + MFX(0x00000207, "IA32_MTRR_PHYS_MASK3", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x3, 0, UINT64_C(0xffffff80000007ff)), /* value=0x7f`fc000800 */ + MFX(0x00000208, "IA32_MTRR_PHYS_BASE4", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x4, 0, UINT64_C(0xffffff8000000ff8)), /* value=0x8a000000 */ + MFX(0x00000209, "IA32_MTRR_PHYS_MASK4", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x4, 0, UINT64_C(0xffffff80000007ff)), /* value=0x7f`fe000800 */ + MFX(0x0000020a, "IA32_MTRR_PHYS_BASE5", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x5, 0, UINT64_C(0xffffff8000000ff8)), /* value=0x89000000 */ + MFX(0x0000020b, "IA32_MTRR_PHYS_MASK5", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x5, 0, UINT64_C(0xffffff80000007ff)), /* value=0x7f`ff000800 */ + MFX(0x0000020c, "IA32_MTRR_PHYS_BASE6", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x6, 0, UINT64_C(0xffffff8000000ff8)), /* value=0x88800000 */ + MFX(0x0000020d, "IA32_MTRR_PHYS_MASK6", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x6, 0, UINT64_C(0xffffff80000007ff)), /* value=0x7f`ff800800 */ + MFX(0x0000020e, "IA32_MTRR_PHYS_BASE7", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x7, 0, UINT64_C(0xffffff8000000ff8)), /* value=0x0 */ + MFX(0x0000020f, "IA32_MTRR_PHYS_MASK7", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x7, 0, UINT64_C(0xffffff80000007ff)), /* value=0x0 */ + MFX(0x00000210, "IA32_MTRR_PHYS_BASE8", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x8, 0, UINT64_C(0xffffff8000000ff8)), /* value=0x0 */ + MFX(0x00000211, "IA32_MTRR_PHYS_MASK8", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x8, 0, UINT64_C(0xffffff80000007ff)), /* value=0x0 */ + MFX(0x00000212, "IA32_MTRR_PHYS_BASE9", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x9, 0, UINT64_C(0xffffff8000000ff8)), /* value=0x0 */ + MFX(0x00000213, "IA32_MTRR_PHYS_MASK9", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x9, 0, UINT64_C(0xffffff80000007ff)), /* value=0x0 */ + MFS(0x00000250, "IA32_MTRR_FIX64K_00000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix64K_00000), + MFS(0x00000258, "IA32_MTRR_FIX16K_80000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_80000), + MFS(0x00000259, "IA32_MTRR_FIX16K_A0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_A0000), + MFS(0x00000268, "IA32_MTRR_FIX4K_C0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C0000), + MFS(0x00000269, "IA32_MTRR_FIX4K_C8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C8000), + MFS(0x0000026a, "IA32_MTRR_FIX4K_D0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D0000), + MFS(0x0000026b, "IA32_MTRR_FIX4K_D8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D8000), + MFS(0x0000026c, "IA32_MTRR_FIX4K_E0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E0000), + MFS(0x0000026d, "IA32_MTRR_FIX4K_E8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E8000), + MFS(0x0000026e, "IA32_MTRR_FIX4K_F0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F0000), + MFS(0x0000026f, "IA32_MTRR_FIX4K_F8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F8000), + MFS(0x00000277, "IA32_PAT", Ia32Pat, Ia32Pat, Guest.msrPAT), + RSN(0x00000280, 0x00000283, "IA32_MC0_CTLn", Ia32McNCtl2, Ia32McNCtl2, 0x0, 0, UINT64_C(0xffffffffbfff8000)), + RSN(0x00000284, 0x00000285, "IA32_MC4_CTLn", Ia32McNCtl2, Ia32McNCtl2, 0x4, 0x40007fff, UINT64_C(0xffffffffbfff8000)), + RSN(0x00000286, 0x00000289, "IA32_MC6_CTLn", Ia32McNCtl2, Ia32McNCtl2, 0x6, 0, UINT64_C(0xffffffffbfff8000)), + MVO(0x000002e0, "I7_SB_NO_EVICT_MODE", 0), + MVX(0x000002e7, "I7_IB_UNK_0000_02e7", 0x1, 0x1, UINT64_C(0xfffffffffffffffe)), + MVO(0x000002f4, "TODO_0000_02f4", UINT32_C(0x88000000)), + MVO(0x000002f5, "TODO_0000_02f5", UINT64_C(0x7ffff00c00)), + MFZ(0x000002ff, "IA32_MTRR_DEF_TYPE", Ia32MtrrDefType, Ia32MtrrDefType, GuestMsrs.msr.MtrrDefType, 0, UINT64_C(0xfffffffffffff3f8)), + MVX(0x00000302, "TODO_0000_0302", UINT64_C(0x1ffff020000), UINT64_C(0xfe0000fd0000), UINT64_C(0xffff00000000ffff)), + MVO(0x00000305, "I7_SB_UNK_0000_0305", 0), + RSN(0x00000309, 0x0000030b, "IA32_FIXED_CTRn", Ia32FixedCtrN, Ia32FixedCtrN, 0x0, 0, UINT64_C(0xffff000000000000)), + MFX(0x00000345, "IA32_PERF_CAPABILITIES", Ia32PerfCapabilities, ReadOnly, 0x33c5, 0, 0), /* value=0x33c5 */ + MFX(0x0000038d, "IA32_FIXED_CTR_CTRL", Ia32FixedCtrCtrl, Ia32FixedCtrCtrl, 0, 0, UINT64_C(0xfffffffffffff000)), /* value=0x0 */ + MFX(0x0000038e, "IA32_PERF_GLOBAL_STATUS", Ia32PerfGlobalStatus, ReadOnly, 0x1, 0, 0), /* value=0x1 */ + MFX(0x0000038f, "IA32_PERF_GLOBAL_CTRL", Ia32PerfGlobalCtrl, Ia32PerfGlobalCtrl, 0, 0, UINT64_C(0xfffffff8fffffff0)), /* value=0xf */ + MFX(0x00000390, "IA32_PERF_GLOBAL_OVF_CTRL", Ia32PerfGlobalOvfCtrl, Ia32PerfGlobalOvfCtrl, 0, UINT64_C(0xfc8000070000000f), UINT64_C(0x37ffff8fffffff0)), /* value=0x0 */ + MFX(0x00000391, "I7_UNC_PERF_GLOBAL_CTRL", IntelI7UncPerfGlobalCtrl, IntelI7UncPerfGlobalCtrl, 0, UINT64_C(0xfc8000070000000f), UINT64_C(0x37ffff8fffffff0)), /* value=0x0 */ + MFO(0x00000392, "I7_UNC_PERF_GLOBAL_STATUS", IntelI7UncPerfGlobalStatus), /* value=0x0 */ + MFX(0x00000393, "I7_UNC_PERF_GLOBAL_OVF_CTRL", IntelI7UncPerfGlobalOvfCtrl, IntelI7UncPerfGlobalOvfCtrl, 0, 0x3, UINT64_C(0xfffffffffffffffc)), /* value=0x0 */ + MFX(0x00000394, "I7_UNC_PERF_FIXED_CTR_CTRL", IntelI7UncPerfFixedCtrCtrl, IntelI7UncPerfFixedCtrCtrl, 0, 0, UINT64_C(0xffffffffffafffff)), /* value=0x0 */ + MFX(0x00000395, "I7_UNC_PERF_FIXED_CTR", IntelI7UncPerfFixedCtr, IntelI7UncPerfFixedCtr, 0, 0, UINT64_C(0xffff000000000000)), /* value=0x0 */ + MFO(0x00000396, "I7_UNC_CBO_CONFIG", IntelI7UncCBoxConfig), /* value=0x5 */ + MVX(0x00000397, "I7_SB_UNK_0000_0397", 0, 0, UINT64_C(0xfffffffffffffff0)), + MFX(0x000003b0, "I7_UNC_ARB_PERF_CTR0", IntelI7UncArbPerfCtrN, IntelI7UncArbPerfCtrN, 0, 0, UINT64_C(0xfffff00000000000)), /* value=0x0 */ + MFX(0x000003b1, "I7_UNC_ARB_PERF_CTR1", IntelI7UncArbPerfCtrN, IntelI7UncArbPerfCtrN, 0, 0, UINT64_C(0xfffff00000000000)), /* value=0x0 */ + MFX(0x000003b2, "I7_UNC_ARB_PERF_EVT_SEL0", IntelI7UncArbPerfEvtSelN, IntelI7UncArbPerfEvtSelN, 0, 0, UINT64_C(0xffffffffe0230000)), /* value=0x0 */ + MFX(0x000003b3, "I7_UNC_ARB_PERF_EVT_SEL1", IntelI7UncArbPerfEvtSelN, IntelI7UncArbPerfEvtSelN, 0, 0, UINT64_C(0xffffffffe0230000)), /* value=0x0 */ + MFX(0x000003f1, "IA32_PEBS_ENABLE", Ia32PebsEnable, Ia32PebsEnable, 0, 0, UINT64_C(0xfffffff0fffffff0)), /* value=0x0 */ + MFX(0x000003f6, "I7_MSR_PEBS_LD_LAT", IntelI7PebsLdLat, IntelI7PebsLdLat, 0, UINT64_C(0xffffffffffff0000), 0), /* value=0xffff */ + MVX(0x000003f7, "I7_MSR_PEBS_LD_LAT", 0x800, 0, UINT64_C(0xffffffffff8000e8)), + MFX(0x000003f8, "I7_MSR_PKG_C3_RESIDENCY", IntelI7PkgCnResidencyN, ReadOnly, 0x3, 0, UINT64_MAX), /* value=0x4fd`b403a690 */ + RSN(0x000003f9, 0x000003fa, "I7_MSR_PKG_Cn_RESIDENCY", IntelI7PkgCnResidencyN, ReadOnly, 0x6, 0, UINT64_MAX), + MFX(0x000003fc, "I7_MSR_CORE_C3_RESIDENCY", IntelI7CoreCnResidencyN, ReadOnly, 0x3, 0, UINT64_MAX), /* value=0x8d`96b4ea78 */ + RSN(0x000003fd, 0x000003fe, "I7_MSR_CORE_Cn_RESIDENCY", IntelI7CoreCnResidencyN, ReadOnly, 0x6, 0, UINT64_MAX), + RFN(0x00000400, 0x00000427, "IA32_MCi_CTL_STATUS_ADDR_MISC", Ia32McCtlStatusAddrMiscN, Ia32McCtlStatusAddrMiscN), + MFX(0x00000480, "IA32_VMX_BASIC", Ia32VmxBasic, ReadOnly, UINT64_C(0xda040000000004), 0, 0), /* value=0xda0400`00000004 */ + MFX(0x00000481, "IA32_VMX_PINBASED_CTLS", Ia32VmxPinbasedCtls, ReadOnly, UINT64_C(0x7f00000016), 0, 0), /* value=0x7f`00000016 */ + MFX(0x00000482, "IA32_VMX_PROCBASED_CTLS", Ia32VmxProcbasedCtls, ReadOnly, UINT64_C(0xfff9fffe0401e172), 0, 0), /* value=0xfff9fffe`0401e172 */ + MFX(0x00000483, "IA32_VMX_EXIT_CTLS", Ia32VmxExitCtls, ReadOnly, UINT64_C(0x1ffffff00036dff), 0, 0), /* value=0x1ffffff`00036dff */ + MFX(0x00000484, "IA32_VMX_ENTRY_CTLS", Ia32VmxEntryCtls, ReadOnly, UINT64_C(0x3ffff000011ff), 0, 0), /* value=0x3ffff`000011ff */ + MFX(0x00000485, "IA32_VMX_MISC", Ia32VmxMisc, ReadOnly, 0x7004c1e7, 0, 0), /* value=0x7004c1e7 */ + MFX(0x00000486, "IA32_VMX_CR0_FIXED0", Ia32VmxCr0Fixed0, ReadOnly, UINT32_C(0x80000021), 0, 0), /* value=0x80000021 */ + MFX(0x00000487, "IA32_VMX_CR0_FIXED1", Ia32VmxCr0Fixed1, ReadOnly, UINT32_MAX, 0, 0), /* value=0xffffffff */ + MFX(0x00000488, "IA32_VMX_CR4_FIXED0", Ia32VmxCr4Fixed0, ReadOnly, 0x2000, 0, 0), /* value=0x2000 */ + MFX(0x00000489, "IA32_VMX_CR4_FIXED1", Ia32VmxCr4Fixed1, ReadOnly, 0x3727ff, 0, 0), /* value=0x3727ff */ + MFX(0x0000048a, "IA32_VMX_VMCS_ENUM", Ia32VmxVmcsEnum, ReadOnly, 0x2e, 0, 0), /* value=0x2e */ + MFX(0x0000048b, "IA32_VMX_PROCBASED_CTLS2", Ia32VmxProcBasedCtls2, ReadOnly, UINT64_C(0x1ffcff00000000), 0, 0), /* value=0x1ffcff`00000000 */ + MFX(0x0000048c, "IA32_VMX_EPT_VPID_CAP", Ia32VmxEptVpidCap, ReadOnly, UINT64_C(0xf0106334141), 0, 0), /* value=0xf01`06334141 */ + MFX(0x0000048d, "IA32_VMX_TRUE_PINBASED_CTLS", Ia32VmxTruePinbasedCtls, ReadOnly, UINT64_C(0x7f00000016), 0, 0), /* value=0x7f`00000016 */ + MFX(0x0000048e, "IA32_VMX_TRUE_PROCBASED_CTLS", Ia32VmxTrueProcbasedCtls, ReadOnly, UINT64_C(0xfff9fffe04006172), 0, 0), /* value=0xfff9fffe`04006172 */ + MFX(0x0000048f, "IA32_VMX_TRUE_EXIT_CTLS", Ia32VmxTrueExitCtls, ReadOnly, UINT64_C(0x1ffffff00036dfb), 0, 0), /* value=0x1ffffff`00036dfb */ + MFX(0x00000490, "IA32_VMX_TRUE_ENTRY_CTLS", Ia32VmxTrueEntryCtls, ReadOnly, UINT64_C(0x3ffff000011fb), 0, 0), /* value=0x3ffff`000011fb */ + MFX(0x00000491, "IA32_VMX_VMFUNC", Ia32VmxVmFunc, ReadOnly, 0x1, 0, 0), /* value=0x1 */ + RSN(0x000004c1, 0x000004c4, "IA32_A_PMCn", Ia32PmcN, Ia32PmcN, 0x0, 0, UINT64_C(0xffff000000000000)), + MVO(0x000004e0, "TODO_0000_04e0", 0x5), + MVO(0x000004e2, "TODO_0000_04e2", 0x2), + MVO(0x000004e3, "TODO_0000_04e3", 0xf00), + MVO(0x00000500, "TODO_0000_0500", 0), + MVX(0x00000503, "TODO_0000_0503", 0, 0x2, UINT64_C(0xfffffffffffffffd)), + MVX(0x00000560, "TODO_0000_0560", 0, 0, UINT64_C(0xffffff800000007f)), + MVX(0x00000561, "TODO_0000_0561", 0x7f, UINT64_C(0xf0000007f), 0), + MVX(0x00000570, "TODO_0000_0570", 0, 0, UINT64_C(0xffffffccf887d070)), + MVX(0x00000571, "TODO_0000_0571", 0, UINT64_C(0xf00000007), UINT64_C(0xfffe0000ffffffc8)), + MVX(0x00000572, "TODO_0000_0572", 0, 0, 0xfff), + MVX(0x00000580, "TODO_0000_0580", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x00000581, "TODO_0000_0581", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x00000582, "TODO_0000_0582", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x00000583, "TODO_0000_0583", 0, 0, UINT64_C(0xffff800000000000)), + MFN(0x00000600, "IA32_DS_AREA", Ia32DsArea, Ia32DsArea), /* value=0x0 */ + MFX(0x00000601, "I7_SB_MSR_VR_CURRENT_CONFIG", IntelI7SandyVrCurrentConfig, IntelI7SandyVrCurrentConfig, 0, 0, UINT64_C(0x800000007fffe000)), /* value=0x0 */ + MFX(0x00000603, "I7_SB_MSR_VR_MISC_CONFIG", IntelI7SandyVrMiscConfig, IntelI7SandyVrMiscConfig, 0, 0, UINT64_C(0xff80000000000000)), /* value=0x360000`00363636 */ + MFO(0x00000606, "I7_SB_MSR_RAPL_POWER_UNIT", IntelI7SandyRaplPowerUnit), /* value=0xa0e03 */ + MVX(0x00000607, "TODO_0000_0607", 0, 0, UINT64_C(0xffffffff60000000)), + MVX(0x00000608, "TODO_0000_0608", 0, 0, ~(uint64_t)UINT32_MAX), + MVX(0x00000609, "I7_SB_UNK_0000_0609", 0, 0xc0, UINT64_C(0xffffffffffffff00)), + MFX(0x0000060a, "I7_SB_MSR_PKGC3_IRTL", IntelI7SandyPkgCnIrtlN, IntelI7SandyPkgCnIrtlN, 0x3, 0, UINT64_C(0xffffffffffff6000)), /* value=0x884e */ + RSN(0x0000060b, 0x0000060c, "I7_SB_MSR_PKGC6_IRTn", IntelI7SandyPkgCnIrtlN, IntelI7SandyPkgCnIrtlN, 0x6, 0, UINT64_C(0xffffffffffff6000)), + MFO(0x0000060d, "I7_SB_MSR_PKG_C2_RESIDENCY", IntelI7SandyPkgC2Residency), /* value=0x3c6`052d9140 */ + MFX(0x00000610, "I7_SB_MSR_PKG_POWER_LIMIT", IntelI7RaplPkgPowerLimit, IntelI7RaplPkgPowerLimit, 0, 0x8000, UINT64_C(0x7f000000ff000000)), /* value=0x42fff8`0015fff8 */ + MFO(0x00000611, "I7_SB_MSR_PKG_ENERGY_STATUS", IntelI7RaplPkgEnergyStatus), /* value=0x79ba094a */ + MFO(0x00000613, "I7_SB_MSR_PKG_PERF_STATUS", IntelI7RaplPkgPerfStatus), /* value=0x1 */ + MFO(0x00000614, "I7_SB_MSR_PKG_POWER_INFO", IntelI7RaplPkgPowerInfo), /* value=0x2f8 */ + MVX(0x00000615, "TODO_0000_0615", UINT32_C(0x80000000), UINT32_C(0xfffeffff), UINT64_C(0xffffffff00010000)), + MFX(0x00000618, "I7_SB_MSR_DRAM_POWER_LIMIT", IntelI7RaplDramPowerLimit, IntelI7RaplDramPowerLimit, 0, 0, UINT64_C(0x7f010000ff010000)), /* value=0x5400de`00000000 */ + MFO(0x00000619, "I7_SB_MSR_DRAM_ENERGY_STATUS", IntelI7RaplDramEnergyStatus), /* value=0xf282d33 */ + MFO(0x0000061b, "I7_SB_MSR_DRAM_PERF_STATUS", IntelI7RaplDramPerfStatus), /* value=0x0 */ + MVO(0x0000061d, "TODO_0000_061d", UINT64_C(0x7db7e4dfa38)), + MVX(0x00000620, "TODO_0000_0620", 0x829, 0, UINT64_C(0xffffffffffff8080)), + MVO(0x00000621, "TODO_0000_0621", 0x29), + MVX(0x00000622, "TODO_0000_0622", 0x1, 0, UINT64_C(0xfffffffffffffffe)), + MVO(0x00000623, "TODO_0000_0623", 0x1), + MVO(0x00000630, "TODO_0000_0630", 0), + MVO(0x00000631, "TODO_0000_0631", 0), + MVO(0x00000632, "TODO_0000_0632", 0), + MVX(0x00000633, "TODO_0000_0633", 0, 0, UINT64_C(0xffffffffffff6000)), + MVX(0x00000634, "TODO_0000_0634", 0, 0, UINT64_C(0xffffffffffff6000)), + MVX(0x00000635, "TODO_0000_0635", 0, 0, UINT64_C(0xffffffffffff6000)), + MVX(0x00000636, "TODO_0000_0636", 0, 0, UINT64_C(0xffffffffffff0000)), + MVO(0x00000637, "TODO_0000_0637", UINT64_C(0x496ce31e72)), + MFX(0x00000638, "I7_SB_MSR_PP0_POWER_LIMIT", IntelI7RaplPp0PowerLimit, IntelI7RaplPp0PowerLimit, 0, 0, UINT64_C(0xffffffff7f000000)), /* value=0x0 */ + MFO(0x00000639, "I7_SB_MSR_PP0_ENERGY_STATUS", IntelI7RaplPp0EnergyStatus), /* value=0x2aa89b8a */ + MFX(0x0000063a, "I7_SB_MSR_PP0_POLICY", IntelI7RaplPp0Policy, IntelI7RaplPp0Policy, 0, 0, UINT64_C(0xffffffffffffffe0)), /* value=0x10 */ + MFX(0x00000640, "I7_HW_MSR_PP0_POWER_LIMIT", IntelI7RaplPp1PowerLimit, IntelI7RaplPp1PowerLimit, 0, 0, UINT64_C(0xffffffff7f000000)), /* value=0x0 */ + MFO(0x00000641, "I7_HW_MSR_PP0_ENERGY_STATUS", IntelI7RaplPp1EnergyStatus), /* value=0x0 */ + MFX(0x00000642, "I7_HW_MSR_PP0_POLICY", IntelI7RaplPp1Policy, IntelI7RaplPp1Policy, 0, 0, UINT64_C(0xffffffffffffffe0)), /* value=0x10 */ + MFO(0x00000648, "I7_IB_MSR_CONFIG_TDP_NOMINAL", IntelI7IvyConfigTdpNominal), /* value=0x28 */ + MFO(0x00000649, "I7_IB_MSR_CONFIG_TDP_LEVEL1", IntelI7IvyConfigTdpLevel1), /* value=0x0 */ + MFO(0x0000064a, "I7_IB_MSR_CONFIG_TDP_LEVEL2", IntelI7IvyConfigTdpLevel2), /* value=0x0 */ + MFO(0x0000064b, "I7_IB_MSR_CONFIG_TDP_CONTROL", IntelI7IvyConfigTdpControl), /* value=0x80000000 */ + MFX(0x0000064c, "I7_IB_MSR_TURBO_ACTIVATION_RATIO", IntelI7IvyTurboActivationRatio, IntelI7IvyTurboActivationRatio, 0, 0, UINT64_C(0xffffffff7fffff00)), /* value=0x0 */ + MVO(0x0000064d, "TODO_0000_064d", 0), + MVO(0x0000064e, "TODO_0000_064e", UINT64_C(0x1fdf361be5b)), + MVX(0x0000064f, "TODO_0000_064f", 0x4000000, UINT32_C(0xfbffffff), ~(uint64_t)UINT32_MAX), + MVX(0x00000652, "TODO_0000_0652", 0x2, 0, UINT64_C(0xfffffffff8000800)), + MVO(0x00000653, "TODO_0000_0653", 0), + MVO(0x00000655, "TODO_0000_0655", 0), + MVO(0x00000656, "TODO_0000_0656", 0), + MVO(0x00000658, "TODO_0000_0658", UINT64_C(0x296db63257a5)), + MVO(0x00000659, "TODO_0000_0659", UINT64_C(0x195cb5c8d10c)), + MVO(0x0000065a, "TODO_0000_065a", 0), + MVO(0x0000065b, "TODO_0000_065b", 0), + MVX(0x0000065c, "TODO_0000_065c", 0x1402f8, 0, UINT64_C(0x7ffe0000ff000000)), + RFN(0x00000680, 0x0000068f, "MSR_LASTBRANCH_n_FROM_IP", IntelLastBranchFromN, IntelLastBranchFromN), + MVX(0x00000690, "TODO_0000_0690", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x00000691, "TODO_0000_0691", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x00000692, "TODO_0000_0692", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x00000693, "TODO_0000_0693", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x00000694, "TODO_0000_0694", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x00000695, "TODO_0000_0695", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x00000696, "TODO_0000_0696", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x00000697, "TODO_0000_0697", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x00000698, "TODO_0000_0698", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x00000699, "TODO_0000_0699", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x0000069a, "TODO_0000_069a", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x0000069b, "TODO_0000_069b", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x0000069c, "TODO_0000_069c", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x0000069d, "TODO_0000_069d", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x0000069e, "TODO_0000_069e", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x0000069f, "TODO_0000_069f", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006b0, "TODO_0000_06b0", 0, UINT32_MAX, ~(uint64_t)UINT32_MAX), + MVX(0x000006b1, "TODO_0000_06b1", 0xc000000, UINT32_C(0xf3ffffff), ~(uint64_t)UINT32_MAX), + RFN(0x000006c0, 0x000006cf, "MSR_LASTBRANCH_n_TO_IP", IntelLastBranchToN, IntelLastBranchToN), + MVX(0x000006d0, "TODO_0000_06d0", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006d1, "TODO_0000_06d1", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006d2, "TODO_0000_06d2", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006d3, "TODO_0000_06d3", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006d4, "TODO_0000_06d4", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006d5, "TODO_0000_06d5", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006d6, "TODO_0000_06d6", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006d7, "TODO_0000_06d7", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006d8, "TODO_0000_06d8", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006d9, "TODO_0000_06d9", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006da, "TODO_0000_06da", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006db, "TODO_0000_06db", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006dc, "TODO_0000_06dc", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006dd, "TODO_0000_06dd", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006de, "TODO_0000_06de", 0, 0, UINT64_C(0xffff800000000000)), + MVX(0x000006df, "TODO_0000_06df", 0, 0, UINT64_C(0xffff800000000000)), + MFI(0x000006e0, "IA32_TSC_DEADLINE", Ia32TscDeadline), /* value=0x0 */ + MVX(0x00000700, "TODO_0000_0700", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000701, "TODO_0000_0701", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000702, "TODO_0000_0702", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000703, "TODO_0000_0703", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000704, "TODO_0000_0704", 0, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000705, "TODO_0000_0705", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000706, "TODO_0000_0706", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000707, "TODO_0000_0707", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000708, "TODO_0000_0708", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000709, "TODO_0000_0709", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000710, "TODO_0000_0710", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000711, "TODO_0000_0711", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000712, "TODO_0000_0712", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000713, "TODO_0000_0713", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000714, "TODO_0000_0714", 0, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000715, "TODO_0000_0715", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000716, "TODO_0000_0716", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000717, "TODO_0000_0717", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000718, "TODO_0000_0718", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000719, "TODO_0000_0719", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000720, "TODO_0000_0720", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000721, "TODO_0000_0721", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000722, "TODO_0000_0722", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000723, "TODO_0000_0723", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000724, "TODO_0000_0724", 0, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000725, "TODO_0000_0725", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000726, "TODO_0000_0726", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000727, "TODO_0000_0727", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000728, "TODO_0000_0728", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000729, "TODO_0000_0729", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000730, "TODO_0000_0730", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000731, "TODO_0000_0731", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000732, "TODO_0000_0732", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000733, "TODO_0000_0733", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000734, "TODO_0000_0734", 0, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000735, "TODO_0000_0735", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000736, "TODO_0000_0736", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000737, "TODO_0000_0737", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000738, "TODO_0000_0738", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000739, "TODO_0000_0739", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000740, "TODO_0000_0740", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000741, "TODO_0000_0741", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000742, "TODO_0000_0742", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000743, "TODO_0000_0743", 0, 0, UINT64_C(0xffffffffe0230000)), + MVX(0x00000744, "TODO_0000_0744", 0, 0, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000745, "TODO_0000_0745", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MVX(0x00000746, "TODO_0000_0746", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000747, "TODO_0000_0747", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000748, "TODO_0000_0748", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000749, "TODO_0000_0749", 0, 0, UINT64_C(0xfffff00000000000)), + MVX(0x00000770, "TODO_0000_0770", 0x1, 0x1, UINT64_C(0xfffffffffffffffe)), + MVO(0x00000771, "TODO_0000_0771", 0x109282a), + MVX(0x00000773, "TODO_0000_0773", 0x1, 0, UINT64_C(0xfffffffffffffffc)), + MVX(0x00000774, "TODO_0000_0774", UINT64_C(0x19e7f2a2a02), 0, UINT64_C(0xfffffc0000000000)), + MVX(0x00000777, "TODO_0000_0777", 0, 0x5, UINT64_C(0xfffffffffffffffa)), + MFO(0x00000c80, "IA32_DEBUG_INTERFACE", Ia32DebugInterface), /* value=0x40000000 */ + MVX(0x00000c8f, "TODO_0000_0c8f", 0, 0, UINT64_C(0xffffffffffffffe0)), + MVX(0x00000c90, "TODO_0000_0c90", UINT16_MAX, 0, UINT64_C(0xffffffff7fff0000)), + MVX(0x00000c91, "TODO_0000_0c91", 0xf, 0, UINT64_C(0xffffffff7fff0000)), + MVX(0x00000c92, "TODO_0000_0c92", 0x3ff, 0, UINT64_C(0xffffffff7fff0000)), + MVX(0x00000c93, "TODO_0000_0c93", 0xfff, 0, UINT64_C(0xffffffff7fff0000)), + MVX(0x00000d90, "TODO_0000_0d90", 0, 0, UINT64_C(0xffff800000000ffc)), + MVX(0x00000da0, "TODO_0000_0da0", 0, 0, UINT64_C(0xfffffffffffffeff)), + MVX(0x00000db0, "TODO_0000_0db0", 0, 0, UINT64_C(0xfffffffffffffffe)), + MVX(0x00000db1, "TODO_0000_0db1", 0x1, 0, UINT64_C(0xfffffffffffffffe)), + MVO(0x00000db2, "TODO_0000_0db2", 0), + MVX(0x00000dc0, "TODO_0000_0dc0", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dc1, "TODO_0000_0dc1", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dc2, "TODO_0000_0dc2", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dc3, "TODO_0000_0dc3", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dc4, "TODO_0000_0dc4", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dc5, "TODO_0000_0dc5", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dc6, "TODO_0000_0dc6", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dc7, "TODO_0000_0dc7", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dc8, "TODO_0000_0dc8", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dc9, "TODO_0000_0dc9", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dca, "TODO_0000_0dca", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dcb, "TODO_0000_0dcb", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dcc, "TODO_0000_0dcc", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dcd, "TODO_0000_0dcd", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dce, "TODO_0000_0dce", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dcf, "TODO_0000_0dcf", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dd0, "TODO_0000_0dd0", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dd1, "TODO_0000_0dd1", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dd2, "TODO_0000_0dd2", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dd3, "TODO_0000_0dd3", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dd4, "TODO_0000_0dd4", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dd5, "TODO_0000_0dd5", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dd6, "TODO_0000_0dd6", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dd7, "TODO_0000_0dd7", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dd8, "TODO_0000_0dd8", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dd9, "TODO_0000_0dd9", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dda, "TODO_0000_0dda", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000ddb, "TODO_0000_0ddb", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000ddc, "TODO_0000_0ddc", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000ddd, "TODO_0000_0ddd", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000dde, "TODO_0000_0dde", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000ddf, "TODO_0000_0ddf", 0, 0, UINT64_C(0x1fffffffffff0000)), + MVX(0x00000e01, "TODO_0000_0e01", 0, 0, UINT64_C(0xffffffff1fffffe0)), + MVX(0x00000e02, "TODO_0000_0e02", 0, 0xf, UINT64_C(0xfffffffffffffff0)), + MFX(0xc0000080, "AMD64_EFER", Amd64Efer, Amd64Efer, 0xd01, 0x400, UINT64_C(0xfffffffffffff2fe)), + MFN(0xc0000081, "AMD64_STAR", Amd64SyscallTarget, Amd64SyscallTarget), /* value=0x230010`00000000 */ + MFN(0xc0000082, "AMD64_STAR64", Amd64LongSyscallTarget, Amd64LongSyscallTarget), /* value=0xfffff801`a09745c0 */ + MFN(0xc0000083, "AMD64_STARCOMPAT", Amd64CompSyscallTarget, Amd64CompSyscallTarget), /* value=0xfffff801`a0974300 */ + MFX(0xc0000084, "AMD64_SYSCALL_FLAG_MASK", Amd64SyscallFlagMask, Amd64SyscallFlagMask, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x4700 */ + MFN(0xc0000100, "AMD64_FS_BASE", Amd64FsBase, Amd64FsBase), /* value=0x9a90000 */ + MFN(0xc0000101, "AMD64_GS_BASE", Amd64GsBase, Amd64GsBase), /* value=0xffffd000`c5800000 */ + MFN(0xc0000102, "AMD64_KERNEL_GS_BASE", Amd64KernelGsBase, Amd64KernelGsBase), /* value=0x7ff7`09a8e000 */ + MFX(0xc0000103, "AMD64_TSC_AUX", Amd64TscAux, Amd64TscAux, 0, 0, ~(uint64_t)UINT32_MAX), /* value=0x2 */ +}; +#endif /* !CPUM_DB_STANDALONE */ + + +/** + * Database entry for Intel(R) Core(TM) i7-6700K CPU @ 4.00GHz. + */ +static CPUMDBENTRY const g_Entry_Intel_Core_i7_6700K = +{ + /*.pszName = */ "Intel Core i7-6700K", + /*.pszFullName = */ "Intel(R) Core(TM) i7-6700K CPU @ 4.00GHz", + /*.enmVendor = */ CPUMCPUVENDOR_INTEL, + /*.uFamily = */ 6, + /*.uModel = */ 94, + /*.uStepping = */ 3, + /*.enmMicroarch = */ kCpumMicroarch_Intel_Core7_Skylake, + /*.uScalableBusFreq = */ CPUM_SBUSFREQ_100MHZ, + /*.fFlags = */ 0, + /*.cMaxPhysAddrWidth= */ 39, + /*.fMxCsrMask = */ 0xffff, + /*.paCpuIdLeaves = */ NULL_ALONE(g_aCpuIdLeaves_Intel_Core_i7_6700K), + /*.cCpuIdLeaves = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_Core_i7_6700K)), + /*.enmUnknownCpuId = */ CPUMUNKNOWNCPUID_LAST_STD_LEAF, + /*.DefUnknownCpuId = */ { 0x00000fa0, 0x00001068, 0x00000064, 0x00000000 }, + /*.fMsrMask = */ UINT32_MAX, + /*.cMsrRanges = */ ZERO_ALONE(RT_ELEMENTS(g_aMsrRanges_Intel_Core_i7_6700K)), + /*.paMsrRanges = */ NULL_ALONE(g_aMsrRanges_Intel_Core_i7_6700K), +}; + +#endif /* !VBOX_CPUDB_Intel_Core_i7_6700K_h */ + diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_Pentium_4_3_00GHz.h b/src/VBox/VMM/VMMR3/cpus/Intel_Pentium_4_3_00GHz.h new file mode 100644 index 00000000..c3463a98 --- /dev/null +++ b/src/VBox/VMM/VMMR3/cpus/Intel_Pentium_4_3_00GHz.h @@ -0,0 +1,277 @@ +/* $Id: Intel_Pentium_4_3_00GHz.h $ */ +/** @file + * CPU database entry "Intel Pentium 4 3.00GHz". + * Generated at 2013-12-18T06:37:54Z by VBoxCpuReport v4.3.53r91376 on win.amd64. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VBOX_CPUDB_Intel_Pentium_4_3_00GHz_h +#define VBOX_CPUDB_Intel_Pentium_4_3_00GHz_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + + +#ifndef CPUM_DB_STANDALONE +/** + * CPUID leaves for Intel(R) Pentium(R) 4 CPU 3.00GHz. + */ +static CPUMCPUIDLEAF const g_aCpuIdLeaves_Intel_Pentium_4_3_00GHz[] = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x00000005, 0x756e6547, 0x6c65746e, 0x49656e69, 0 }, + { 0x00000001, 0x00000000, 0x00000000, 0x00000f43, 0x00020800, 0x0000649d, 0xbfebfbff, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID | CPUMCPUIDLEAF_F_CONTAINS_APIC }, + { 0x00000002, 0x00000000, 0x00000000, 0x605b5001, 0x00000000, 0x00000000, 0x007d7040, 0 }, + { 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000004, 0x00000000, UINT32_MAX, 0x00004121, 0x01c0003f, 0x0000001f, 0x00000000, 0 }, + { 0x00000004, 0x00000001, UINT32_MAX, 0x00004143, 0x01c0103f, 0x000007ff, 0x00000000, 0 }, + { 0x00000004, 0x00000002, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000005, 0x00000000, 0x00000000, 0x00000040, 0x00000040, 0x00000000, 0x00000000, 0 }, + { 0x80000000, 0x00000000, 0x00000000, 0x80000008, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x20100800, 0 }, + { 0x80000002, 0x00000000, 0x00000000, 0x20202020, 0x20202020, 0x20202020, 0x6e492020, 0 }, + { 0x80000003, 0x00000000, 0x00000000, 0x286c6574, 0x50202952, 0x69746e65, 0x52286d75, 0 }, + { 0x80000004, 0x00000000, 0x00000000, 0x20342029, 0x20555043, 0x30302e33, 0x007a4847, 0 }, + { 0x80000005, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x08006040, 0x00000000, 0 }, + { 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000008, 0x00000000, 0x00000000, 0x00003024, 0x00000000, 0x00000000, 0x00000000, 0 }, +}; +#endif /* !CPUM_DB_STANDALONE */ + + +#ifndef CPUM_DB_STANDALONE +/** + * MSR ranges for Intel(R) Pentium(R) 4 CPU 3.00GHz. + */ +static CPUMMSRRANGE const g_aMsrRanges_Intel_Pentium_4_3_00GHz[] = +{ + MFO(0x00000000, "IA32_P5_MC_ADDR", Ia32P5McAddr), /* value=0xc55df88 */ + MFO(0x00000001, "IA32_P5_MC_TYPE", Ia32P5McType), /* value=0xbe000300`1008081f */ + MFX(0x00000006, "IA32_MONITOR_FILTER_LINE_SIZE", Ia32MonitorFilterLineSize, Ia32MonitorFilterLineSize, 0, UINT64_C(0xffffffffffff0000), 0), /* value=0x40 */ + MFN(0x00000010, "IA32_TIME_STAMP_COUNTER", Ia32TimestampCounter, Ia32TimestampCounter), /* value=0x1ac`2077a134 */ + MFV(0x00000017, "IA32_PLATFORM_ID", Ia32PlatformId, ReadOnly, UINT64_C(0x12000000000000)), + MFX(0x0000001b, "IA32_APIC_BASE", Ia32ApicBase, Ia32ApicBase, UINT32_C(0xfee00800), 0x600, UINT64_C(0xffffff00000000ff)), + MFX(0x0000002a, "P4_EBC_HARD_POWERON", IntelP4EbcHardPowerOn, IntelP4EbcHardPowerOn, 0, UINT64_MAX, 0), /* value=0x0 */ + MFX(0x0000002b, "P4_EBC_SOFT_POWERON", IntelP4EbcSoftPowerOn, IntelP4EbcSoftPowerOn, 0x7e, UINT64_C(0xffffffffffffff80), 0), /* value=0x7e */ + MFX(0x0000002c, "P4_EBC_FREQUENCY_ID", IntelP4EbcFrequencyId, IntelP4EbcFrequencyId, 0xf12010f, UINT64_MAX, 0), /* value=0xf12010f */ + MVX(0x00000039, "C2_UNK_0000_0039", 0x1, 0x1f, ~(uint64_t)UINT32_MAX), + MFN(0x00000079, "IA32_BIOS_UPDT_TRIG", WriteOnly, IgnoreWrite), + MVX(0x00000080, "P4_UNK_0000_0080", 0, ~(uint64_t)UINT32_MAX, UINT32_MAX), + MFX(0x0000008b, "IA32_BIOS_SIGN_ID", Ia32BiosSignId, Ia32BiosSignId, 0, UINT32_MAX, 0), /* value=0x5`00000000 */ + MFX(0x000000fe, "IA32_MTRRCAP", Ia32MtrrCap, ReadOnly, 0x508, 0, 0), /* value=0x508 */ + MFX(0x00000119, "BBL_CR_CTL", IntelBblCrCtl, ReadOnly, 0, 0, 0), /* value=0x0 */ + MFX(0x00000174, "IA32_SYSENTER_CS", Ia32SysEnterCs, Ia32SysEnterCs, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFN(0x00000175, "IA32_SYSENTER_ESP", Ia32SysEnterEsp, Ia32SysEnterEsp), /* value=0x0 */ + MFN(0x00000176, "IA32_SYSENTER_EIP", Ia32SysEnterEip, Ia32SysEnterEip), /* value=0x0 */ + MFX(0x00000179, "IA32_MCG_CAP", Ia32McgCap, ReadOnly, 0x180204, 0, 0), /* value=0x180204 */ + MFN(0x0000017a, "IA32_MCG_STATUS", Ia32McgStatus, Ia32McgStatus), /* value=0x0 */ + MVX(0x00000180, "MSR_MCG_RAX", 0, 0, UINT64_MAX), + MVX(0x00000181, "MSR_MCG_RBX", 0, 0, UINT64_MAX), + MVX(0x00000182, "MSR_MCG_RCX", 0, 0, UINT64_MAX), + MVX(0x00000183, "MSR_MCG_RDX", 0, 0, UINT64_MAX), + MVX(0x00000184, "MSR_MCG_RSI", 0, 0, UINT64_MAX), + MVX(0x00000185, "MSR_MCG_RDI", 0, 0, UINT64_MAX), + MFX(0x00000186, "MSR_MCG_RBP", Ia32PerfEvtSelN, Ia32PerfEvtSelN, 0, 0, UINT64_MAX), /* value=0x0 */ + MFX(0x00000187, "MSR_MCG_RSP", Ia32PerfEvtSelN, Ia32PerfEvtSelN, 0, 0, UINT64_MAX), /* value=0x0 */ + MVX(0x00000188, "MSR_MCG_RFLAGS", 0, 0, UINT64_MAX), + MVX(0x00000189, "MSR_MCG_RIP", 0, 0, UINT64_MAX), + MVX(0x0000018a, "MSR_MCG_MISC", 0, 0, UINT64_MAX), + MVX(0x0000018b, "MSR_MCG_RESERVED1", 0, 0, UINT64_MAX), + MVX(0x0000018c, "MSR_MCG_RESERVED2", 0, 0, UINT64_MAX), + MVX(0x0000018d, "MSR_MCG_RESERVED3", 0, 0, UINT64_MAX), + MVX(0x0000018e, "MSR_MCG_RESERVED4", 0, 0, UINT64_MAX), + MVX(0x0000018f, "MSR_MCG_RESERVED5", 0, 0, UINT64_MAX), + MVX(0x00000190, "MSR_MCG_R8", 0, 0, UINT64_MAX), + MVX(0x00000191, "MSR_MCG_R9", 0, 0, UINT64_MAX), + MVX(0x00000192, "MSR_MCG_R10", 0, 0, UINT64_MAX), + MVX(0x00000193, "MSR_MCG_R11", 0, 0, UINT64_MAX), + MVX(0x00000194, "MSR_MCG_R12", 0, 0, UINT64_MAX), + MVX(0x00000195, "MSR_MCG_R13", 0, 0, UINT64_MAX), + MVX(0x00000196, "MSR_MCG_R14", 0, 0, UINT64_MAX), + MVX(0x00000197, "MSR_MCG_R15", 0, 0, UINT64_MAX), + MFX(0x00000198, "IA32_PERF_STATUS", Ia32PerfStatus, Ia32PerfStatus, UINT64_C(0xf2d00000f2d), UINT64_MAX, 0), /* value=0xf2d`00000f2d */ + MFX(0x00000199, "IA32_PERF_CTL", Ia32PerfCtl, Ia32PerfCtl, 0xf2d, 0, 0), /* Might bite. value=0xf2d */ + MFX(0x0000019a, "IA32_CLOCK_MODULATION", Ia32ClockModulation, Ia32ClockModulation, 0, UINT64_C(0xffffffffffffffe1), 0), /* value=0x0 */ + MFX(0x0000019b, "IA32_THERM_INTERRUPT", Ia32ThermInterrupt, Ia32ThermInterrupt, 0, UINT64_C(0xfffffffffffffffc), 0), /* value=0x0 */ + MFX(0x0000019c, "IA32_THERM_STATUS", Ia32ThermStatus, Ia32ThermStatus, 0, UINT64_C(0xfffffffffffffff5), 0), /* value=0x0 */ + MFX(0x0000019d, "IA32_THERM2_CTL", Ia32Therm2Ctl, ReadOnly, 0xe2d, 0, 0), /* value=0xe2d */ + MVX(0x0000019e, "P6_UNK_0000_019e", 0, UINT64_C(0xffffffffffff0000), 0), + MVX(0x0000019f, "P6_UNK_0000_019f", UINT64_C(0x32050500000101), UINT64_C(0xff000000fff0c0c0), 0), + MFX(0x000001a0, "IA32_MISC_ENABLE", Ia32MiscEnable, Ia32MiscEnable, 0x22850089, 0x20800080, UINT64_C(0xfffffffbdc10f800)), /* value=0x22850089 */ + MVX(0x000001a1, "MSR_PLATFORM_BRV", 0, UINT64_C(0xfffffffffffcc0c0), 0), + MFX(0x000001a2, "P4_UNK_0000_01a2", IntelI7TemperatureTarget, ReadOnly, 0x61048, 0, 0), /* value=0x61048 */ + MFO(0x000001d7, "MSR_LER_FROM_LIP", P6LastIntFromIp), /* value=0x0 */ + MFO(0x000001d8, "MSR_LER_TO_LIP", P6LastIntToIp), /* value=0x0 */ + MFX(0x000001d9, "IA32_DEBUGCTL", Ia32DebugCtl, Ia32DebugCtl, 0, 0, UINT64_C(0xffffffffffffff80)), /* value=0x0 */ + MFX(0x000001da, "MSR_LASTBRANCH_TOS", IntelLastBranchTos, IntelLastBranchTos, 0, UINT64_C(0xfffffffffffffff0), 0), /* value=0x0 */ + MFX(0x00000200, "IA32_MTRR_PHYS_BASE0", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x0, 0, UINT64_C(0xffffff0000000ff8)), /* value=0x6 */ + MFX(0x00000201, "IA32_MTRR_PHYS_MASK0", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x0, 0, UINT64_C(0xffffff00000007ff)), /* value=0xf`c0000800 */ + MFX(0x00000202, "IA32_MTRR_PHYS_BASE1", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x1, 0, UINT64_C(0xffffff0000000ff8)), /* value=0x3f600000 */ + MFX(0x00000203, "IA32_MTRR_PHYS_MASK1", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x1, 0, UINT64_C(0xffffff00000007ff)), /* value=0xf`ffe00800 */ + MFX(0x00000204, "IA32_MTRR_PHYS_BASE2", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x2, 0, UINT64_C(0xffffff0000000ff8)), /* value=0x3f800000 */ + MFX(0x00000205, "IA32_MTRR_PHYS_MASK2", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x2, 0, UINT64_C(0xffffff00000007ff)), /* value=0xf`ff800800 */ + MFX(0x00000206, "IA32_MTRR_PHYS_BASE3", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x3, 0, UINT64_C(0xffffff0000000ff8)), /* value=0x0 */ + MFX(0x00000207, "IA32_MTRR_PHYS_MASK3", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x3, 0, UINT64_C(0xffffff00000007ff)), /* value=0x0 */ + MFX(0x00000208, "IA32_MTRR_PHYS_BASE4", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x4, 0, UINT64_C(0xffffff0000000ff8)), /* value=0x0 */ + MFX(0x00000209, "IA32_MTRR_PHYS_MASK4", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x4, 0, UINT64_C(0xffffff00000007ff)), /* value=0x0 */ + MFX(0x0000020a, "IA32_MTRR_PHYS_BASE5", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x5, 0, UINT64_C(0xffffff0000000ff8)), /* value=0x0 */ + MFX(0x0000020b, "IA32_MTRR_PHYS_MASK5", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x5, 0, UINT64_C(0xffffff00000007ff)), /* value=0x0 */ + MFX(0x0000020c, "IA32_MTRR_PHYS_BASE6", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x6, 0, UINT64_C(0xffffff0000000ff8)), /* value=0x0 */ + MFX(0x0000020d, "IA32_MTRR_PHYS_MASK6", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x6, 0, UINT64_C(0xffffff00000007ff)), /* value=0x0 */ + MFX(0x0000020e, "IA32_MTRR_PHYS_BASE7", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x7, 0, UINT64_C(0xffffff0000000ff8)), /* value=0x0 */ + MFX(0x0000020f, "IA32_MTRR_PHYS_MASK7", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x7, 0, UINT64_C(0xffffff00000007ff)), /* value=0x0 */ + MFS(0x00000250, "IA32_MTRR_FIX64K_00000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix64K_00000), + MFS(0x00000258, "IA32_MTRR_FIX16K_80000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_80000), + MFS(0x00000259, "IA32_MTRR_FIX16K_A0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_A0000), + MFS(0x00000268, "IA32_MTRR_FIX4K_C0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C0000), + MFS(0x00000269, "IA32_MTRR_FIX4K_C8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C8000), + MFS(0x0000026a, "IA32_MTRR_FIX4K_D0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D0000), + MFS(0x0000026b, "IA32_MTRR_FIX4K_D8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D8000), + MFS(0x0000026c, "IA32_MTRR_FIX4K_E0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E0000), + MFS(0x0000026d, "IA32_MTRR_FIX4K_E8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E8000), + MFS(0x0000026e, "IA32_MTRR_FIX4K_F0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F0000), + MFS(0x0000026f, "IA32_MTRR_FIX4K_F8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F8000), + MFS(0x00000277, "IA32_PAT", Ia32Pat, Ia32Pat, Guest.msrPAT), + MFZ(0x000002ff, "IA32_MTRR_DEF_TYPE", Ia32MtrrDefType, Ia32MtrrDefType, GuestMsrs.msr.MtrrDefType, 0, UINT64_C(0xfffffffffffff3f8)), + MVX(0x00000300, "P4_MSR_BPU_COUNTER0", 0, UINT64_C(0xffffff0000000000), 0), + MVX(0x00000301, "P4_MSR_BPU_COUNTER1", 0, UINT64_C(0xffffff0000000000), 0), + MVX(0x00000302, "P4_MSR_BPU_COUNTER2", 0, UINT64_C(0xffffff0000000000), 0), + MVX(0x00000303, "P4_MSR_BPU_COUNTER3", 0, UINT64_C(0xffffff0000000000), 0), + MVX(0x00000304, "P4_MSR_MS_COUNTER0", 0, UINT64_C(0xffffff0000000000), 0), + MVX(0x00000305, "P4_MSR_MS_COUNTER1", 0, UINT64_C(0xffffff0000000000), 0), + MVX(0x00000306, "P4_MSR_MS_COUNTER2", 0, UINT64_C(0xffffff0000000000), 0), + MVX(0x00000307, "P4_MSR_MS_COUNTER3", 0, UINT64_C(0xffffff0000000000), 0), + MVX(0x00000308, "P4_MSR_FLAME_COUNTER0", 0, UINT64_C(0xffffff0000000000), 0), + MVX(0x00000309, "P4_MSR_FLAME_COUNTER1", 0, UINT64_C(0xffffff0000000000), 0), + MVX(0x0000030a, "P4_MSR_FLAME_COUNTER2", 0, UINT64_C(0xffffff0000000000), 0), + MVX(0x0000030b, "P4_MSR_FLAME_COUNTER3", 0, UINT64_C(0xffffff0000000000), 0), + MVX(0x0000030c, "P4_MSR_IQ_COUNTER0", 0, UINT64_C(0xffffff0000000000), 0), + MVX(0x0000030d, "P4_MSR_IQ_COUNTER1", 0, UINT64_C(0xffffff0000000000), 0), + MVX(0x0000030e, "P4_MSR_IQ_COUNTER2", 0, UINT64_C(0xffffff0000000000), 0), + MVX(0x0000030f, "P4_MSR_IQ_COUNTER3", 0, UINT64_C(0xffffff0000000000), 0), + MVX(0x00000310, "P4_MSR_IQ_COUNTER4", 0, UINT64_C(0xffffff0000000000), 0), + MVX(0x00000311, "P4_MSR_IQ_COUNTER5", 0, UINT64_C(0xffffff0000000000), 0), + MVX(0x00000360, "P4_MSR_BPU_CCCR0", 0, UINT64_C(0xffffffff00000fff), 0), + MVX(0x00000361, "P4_MSR_BPU_CCCR1", 0, UINT64_C(0xffffffff00000fff), 0), + MVX(0x00000362, "P4_MSR_BPU_CCCR2", 0, UINT64_C(0xffffffff00000fff), 0), + MVX(0x00000363, "P4_MSR_BPU_CCCR3", 0, UINT64_C(0xffffffff00000fff), 0), + MVX(0x00000364, "P4_MSR_MS_CCCR0", 0, UINT64_C(0xffffffff00000fff), 0), + MVX(0x00000365, "P4_MSR_MS_CCCR1", 0, UINT64_C(0xffffffff00000fff), 0), + MVX(0x00000366, "P4_MSR_MS_CCCR2", 0, UINT64_C(0xffffffff00000fff), 0), + MVX(0x00000367, "P4_MSR_MS_CCCR3", 0, UINT64_C(0xffffffff00000fff), 0), + MVX(0x00000368, "P4_MSR_FLAME_CCCR0", 0, UINT64_C(0xffffffff00000fff), 0), + MVX(0x00000369, "P4_MSR_FLAME_CCCR1", 0, UINT64_C(0xffffffff00000fff), 0), + MVX(0x0000036a, "P4_MSR_FLAME_CCCR2", 0, UINT64_C(0xffffffff00000fff), 0), + MVX(0x0000036b, "P4_MSR_FLAME_CCCR3", 0, UINT64_C(0xffffffff00000fff), 0), + MVX(0x0000036c, "P4_MSR_IQ_CCCR0", 0, UINT64_C(0xffffffff000007ff), 0), + MVX(0x0000036d, "P4_MSR_IQ_CCCR1", 0, UINT64_C(0xffffffff00000fff), 0), + MVX(0x0000036e, "P4_MSR_IQ_CCCR2", 0, UINT64_C(0xffffffff00000fff), 0), + MVX(0x0000036f, "P4_MSR_IQ_CCCR3", 0, UINT64_C(0xffffffff000007ff), 0), + MVX(0x00000370, "P4_MSR_IQ_CCCR4", 0, UINT64_C(0xffffffff000000ff), 0), + MVX(0x00000371, "P4_MSR_IQ_CCCR5", 0, UINT64_C(0xffffffff000000ff), 0), + MVX(0x000003a0, "P4_MSR_BSU_ESCR0", 0, ~(uint64_t)UINT32_MAX, UINT32_C(0x80000000)), + MVX(0x000003a1, "P4_MSR_BSU_ESCR1", 0, ~(uint64_t)UINT32_MAX, UINT32_C(0x80000000)), + MVX(0x000003a2, "P4_MSR_FSB_ESCR0", 0, UINT64_C(0xffffffff40000000), UINT32_C(0x80000000)), + MVX(0x000003a3, "P4_MSR_FSB_ESCR1", 0, UINT64_C(0xffffffff40000000), UINT32_C(0x80000000)), + MVX(0x000003a4, "P4_MSR_FIRM_ESCR0", 0, ~(uint64_t)UINT32_MAX, UINT32_C(0x80000000)), + MVX(0x000003a5, "P4_MSR_FIRM_ESCR1", 0, ~(uint64_t)UINT32_MAX, UINT32_C(0x80000000)), + MVX(0x000003a6, "P4_MSR_FLAME_ESCR0", 0, ~(uint64_t)UINT32_MAX, UINT32_C(0x80000000)), + MVX(0x000003a7, "P4_MSR_FLAME_ESCR1", 0, ~(uint64_t)UINT32_MAX, UINT32_C(0x80000000)), + MVX(0x000003a8, "P4_MSR_DAC_ESCR0", 0, UINT64_C(0xffffffff61fe01f0), UINT32_C(0x80000000)), + MVX(0x000003a9, "P4_MSR_DAC_ESCR1", 0, UINT64_C(0xffffffff61fe01f0), UINT32_C(0x80000000)), + MVX(0x000003aa, "P4_MSR_MOB_ESCR0", 0, ~(uint64_t)UINT32_MAX, UINT32_C(0x80000000)), + MVX(0x000003ab, "P4_MSR_MOB_ESCR1", 0, ~(uint64_t)UINT32_MAX, UINT32_C(0x80000000)), + MVX(0x000003ac, "P4_MSR_PMH_ESCR0", 0, ~(uint64_t)UINT32_MAX, UINT32_C(0x80000000)), + MVX(0x000003ad, "P4_MSR_PMH_ESCR1", 0, ~(uint64_t)UINT32_MAX, UINT32_C(0x80000000)), + MVX(0x000003ae, "P4_MSR_SAAT_ESCR0", 0, ~(uint64_t)UINT32_MAX, UINT32_C(0x80000000)), + MVX(0x000003af, "P4_MSR_SAAT_ESCR1", 0, ~(uint64_t)UINT32_MAX, UINT32_C(0x80000000)), + MVX(0x000003b0, "P4_MSR_U2L_ESCR0", 0, UINT64_C(0xffffffff71c001f0), UINT32_C(0x80000000)), + MVX(0x000003b1, "P4_MSR_U2L_ESCR1", 0, UINT64_C(0xffffffff71c001f0), UINT32_C(0x80000000)), + MVX(0x000003b2, "P4_MSR_BPU_ESCR0", 0, UINT64_C(0xffffffff61fc0000), UINT32_C(0x80000000)), + MVX(0x000003b3, "P4_MSR_BPU_ESCR1", 0, UINT64_C(0xffffffff61fc0000), UINT32_C(0x80000000)), + MVX(0x000003b4, "P4_MSR_IS_ESCR0", 0, UINT64_C(0xffffffff71fe01f0), UINT32_C(0x80000000)), + MVX(0x000003b5, "P4_MSR_IS_ESCR1", 0, UINT64_C(0xffffffff71fe01f0), UINT32_C(0x80000000)), + MVX(0x000003b6, "P4_MSR_ITLB_ESCR0", 0, UINT64_C(0xffffffff0ffff1e0), UINT32_C(0x80000000)), + MVX(0x000003b7, "P4_MSR_ITLB_ESCR1", 0, UINT64_C(0xffffffff0ffff1e0), UINT32_C(0x80000000)), + MVX(0x000003b8, "P4_MSR_CRU_ESCR0", 0, UINT64_C(0xffffffff71fe01f0), UINT32_C(0x80000000)), + MVX(0x000003b9, "P4_MSR_CRU_ESCR1", 0, UINT64_C(0xffffffff71fe01f0), UINT32_C(0x80000000)), + MVX(0x000003ba, "P4_MSR_IQ_ESCR0", 0, UINT64_C(0xffffffff7fffffff), UINT32_C(0x80000000)), + MVX(0x000003bb, "P4_MSR_IQ_ESCR1", 0, UINT64_C(0xffffffff7fffffff), UINT32_C(0x80000000)), + MVX(0x000003bc, "P4_MSR_RAT_ESCR0", 0, ~(uint64_t)UINT32_MAX, UINT32_C(0x80000000)), + MVX(0x000003bd, "P4_MSR_RAT_ESCR1", 0, ~(uint64_t)UINT32_MAX, UINT32_C(0x80000000)), + MVX(0x000003be, "P4_MSR_SSU_ESCR0", 0, ~(uint64_t)UINT32_MAX, UINT32_C(0x80000000)), + MVX(0x000003c0, "P4_MSR_MS_ESCR0", 0, UINT64_C(0xffffffff61ff81e0), UINT32_C(0x80000000)), + MVX(0x000003c1, "P4_MSR_MS_ESCR1", 0, UINT64_C(0xffffffff61ff81e0), UINT32_C(0x80000000)), + MVX(0x000003c2, "P4_MSR_TBPU_ESCR0", 0, UINT64_C(0xffffffff71fe01f0), UINT32_C(0x80000000)), + MVX(0x000003c3, "P4_MSR_TBPU_ESCR1", 0, UINT64_C(0xffffffff71fe01f0), UINT32_C(0x80000000)), + MVX(0x000003c4, "P4_MSR_TC_ESCR0", 0, UINT64_C(0xffffffff61f801f0), UINT32_C(0x80000000)), + MVX(0x000003c5, "P4_MSR_TC_ESCR1", 0, UINT64_C(0xffffffff61f801f0), UINT32_C(0x80000000)), + MVX(0x000003c8, "P4_MSR_IX_ESCR0", 0, UINT64_C(0xffffffff71fe01f0), UINT32_C(0x80000000)), + MVX(0x000003c9, "P4_MSR_IX_ESCR0", 0, UINT64_C(0xffffffff71fe01f0), UINT32_C(0x80000000)), + MVX(0x000003ca, "P4_MSR_ALF_ESCR0", 0, UINT64_C(0xffffffff700001f0), UINT32_C(0x80000000)), + MVX(0x000003cb, "P4_MSR_ALF_ESCR1", 0, UINT64_C(0xffffffff700001f0), UINT32_C(0x80000000)), + MVX(0x000003cc, "P4_MSR_CRU_ESCR2", 0, UINT64_C(0xffffffff61f001f0), UINT32_C(0x80000000)), + MVX(0x000003cd, "P4_MSR_CRU_ESCR3", 0, UINT64_C(0xffffffff61f001f0), UINT32_C(0x80000000)), + MVX(0x000003e0, "P4_MSR_CRU_ESCR4", 0, UINT64_C(0xffffffff71ff01f0), UINT32_C(0x80000000)), + MVX(0x000003e1, "P4_MSR_CRU_ESCR5", 0, UINT64_C(0xffffffff71ff01f0), UINT32_C(0x80000000)), + MVX(0x000003f0, "P4_MSR_TC_PRECISE_EVENT", 0xfc00, UINT64_C(0xfffffffffffc001f), 0), + MFX(0x000003f1, "IA32_PEBS_ENABLE", Ia32PebsEnable, Ia32PebsEnable, 0, UINT64_C(0xfffffffff8000000), 0), /* value=0x0 */ + MVX(0x000003f2, "P4_MSR_PEBS_MATRIX_VERT", 0, UINT64_C(0xffffffffffffe000), 0), + MVX(0x000003f5, "P4_UNK_0000_03f5", 0, UINT64_C(0xffffffffffff0000), 0), + MVX(0x000003f6, "P4_UNK_0000_03f6", 0, UINT64_C(0xffffffffffe00000), 0), + MVX(0x000003f7, "P4_UNK_0000_03f7", 0, UINT64_C(0xfffe000000000000), 0), + MVX(0x000003f8, "P4_UNK_0000_03f8", 0, UINT64_C(0xffffff000000003f), 0), + RFN(0x00000400, 0x0000040f, "IA32_MCi_CTL_STATUS_ADDR_MISC", Ia32McCtlStatusAddrMiscN, Ia32McCtlStatusAddrMiscN), + MFN(0x00000600, "IA32_DS_AREA", Ia32DsArea, Ia32DsArea), /* value=0x0 */ + RFN(0x00000680, 0x0000068f, "MSR_LASTBRANCH_n_FROM_IP", IntelLastBranchFromN, IntelLastBranchFromN), + RFN(0x000006c0, 0x000006cf, "MSR_LASTBRANCH_n_TO_IP", IntelLastBranchToN, IntelLastBranchToN), + MFX(0xc0000080, "AMD64_EFER", Amd64Efer, Amd64Efer, 0xd01, 0x400, UINT64_C(0xfffffffffffff2fe)), + MFN(0xc0000081, "AMD64_STAR", Amd64SyscallTarget, Amd64SyscallTarget), /* value=0x230010`00000000 */ + MFN(0xc0000082, "AMD64_STAR64", Amd64LongSyscallTarget, Amd64LongSyscallTarget), /* value=0xfffff800`654efdc0 */ + MFN(0xc0000083, "AMD64_STARCOMPAT", Amd64CompSyscallTarget, Amd64CompSyscallTarget), /* value=0xfffff800`654efb00 */ + MFX(0xc0000084, "AMD64_SYSCALL_FLAG_MASK", Amd64SyscallFlagMask, Amd64SyscallFlagMask, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x4700 */ + MFN(0xc0000100, "AMD64_FS_BASE", Amd64FsBase, Amd64FsBase), /* value=0xeed1e000 */ + MFN(0xc0000101, "AMD64_GS_BASE", Amd64GsBase, Amd64GsBase), /* value=0xfffff880`009bf000 */ + MFN(0xc0000102, "AMD64_KERNEL_GS_BASE", Amd64KernelGsBase, Amd64KernelGsBase), /* value=0x7f7`eed1c000 */ +}; +#endif /* !CPUM_DB_STANDALONE */ + + +/** + * Database entry for Intel(R) Pentium(R) 4 CPU 3.00GHz. + */ +static CPUMDBENTRY const g_Entry_Intel_Pentium_4_3_00GHz = +{ + /*.pszName = */ "Intel Pentium 4 3.00GHz", + /*.pszFullName = */ "Intel(R) Pentium(R) 4 CPU 3.00GHz", + /*.enmVendor = */ CPUMCPUVENDOR_INTEL, + /*.uFamily = */ 15, + /*.uModel = */ 4, + /*.uStepping = */ 3, + /*.enmMicroarch = */ kCpumMicroarch_Intel_NB_Prescott2M, + /*.uScalableBusFreq = */ CPUM_SBUSFREQ_UNKNOWN, + /*.fFlags = */ 0, + /*.cMaxPhysAddrWidth= */ 36, + /*.fMxCsrMask = */ 0xffff, + /*.paCpuIdLeaves = */ NULL_ALONE(g_aCpuIdLeaves_Intel_Pentium_4_3_00GHz), + /*.cCpuIdLeaves = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_Pentium_4_3_00GHz)), + /*.enmUnknownCpuId = */ CPUMUNKNOWNCPUID_LAST_STD_LEAF, + /*.DefUnknownCpuId = */ { 0x00000040, 0x00000040, 0x00000000, 0x00000000 }, + /*.fMsrMask = */ UINT32_MAX, + /*.cMsrRanges = */ ZERO_ALONE(RT_ELEMENTS(g_aMsrRanges_Intel_Pentium_4_3_00GHz)), + /*.paMsrRanges = */ NULL_ALONE(g_aMsrRanges_Intel_Pentium_4_3_00GHz), +}; + +#endif /* !VBOX_CPUDB_Intel_Pentium_4_3_00GHz_h */ + diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_Pentium_M_processor_2_00GHz.h b/src/VBox/VMM/VMMR3/cpus/Intel_Pentium_M_processor_2_00GHz.h new file mode 100644 index 00000000..7e4ebe14 --- /dev/null +++ b/src/VBox/VMM/VMMR3/cpus/Intel_Pentium_M_processor_2_00GHz.h @@ -0,0 +1,216 @@ +/* $Id: Intel_Pentium_M_processor_2_00GHz.h $ */ +/** @file + * CPU database entry "Intel Pentium M processor 2.00GHz". + * Generated at 2013-12-09T14:18:00Z by VBoxCpuReport v4.3.51r91027 on win.x86. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VBOX_CPUDB_Intel_Pentium_M_processor_2_00GHz_h +#define VBOX_CPUDB_Intel_Pentium_M_processor_2_00GHz_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + + +#ifndef CPUM_DB_STANDALONE +/** + * CPUID leaves for Intel(R) Pentium(R) M processor 2.00GHz. + */ +static CPUMCPUIDLEAF const g_aCpuIdLeaves_Intel_Pentium_M_processor_2_00GHz[] = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x756e6547, 0x6c65746e, 0x49656e69, 0 }, + { 0x00000001, 0x00000000, 0x00000000, 0x000006d6, 0x00000816, 0x00000180, 0xafe9f9bf, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID | CPUMCPUIDLEAF_F_CONTAINS_APIC }, + { 0x00000002, 0x00000000, 0x00000000, 0x02b3b001, 0x000000f0, 0x00000000, 0x2c04307d, 0 }, + { 0x80000000, 0x00000000, 0x00000000, 0x80000004, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000002, 0x00000000, 0x00000000, 0x20202020, 0x20202020, 0x65746e49, 0x2952286c, 0 }, + { 0x80000003, 0x00000000, 0x00000000, 0x6e655020, 0x6d756974, 0x20295228, 0x7270204d, 0 }, + { 0x80000004, 0x00000000, 0x00000000, 0x7365636f, 0x20726f73, 0x30302e32, 0x007a4847, 0 }, +}; +#endif /* !CPUM_DB_STANDALONE */ + + +#ifndef CPUM_DB_STANDALONE +/** + * MSR ranges for Intel(R) Pentium(R) M processor 2.00GHz. + */ +static CPUMMSRRANGE const g_aMsrRanges_Intel_Pentium_M_processor_2_00GHz[] = +{ + MFI(0x00000000, "IA32_P5_MC_ADDR", Ia32P5McAddr), /* value=0x0 */ + MFI(0x00000001, "IA32_P5_MC_TYPE", Ia32P5McType), /* value=0x0 */ + MFX(0x00000010, "IA32_TIME_STAMP_COUNTER", Ia32TimestampCounter, Ia32TimestampCounter, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x22`4d44782e */ + MFV(0x00000017, "IA32_PLATFORM_ID", Ia32PlatformId, ReadOnly, UINT64_C(0x140000d0248a28)), + MVX(0x00000018, "P6_UNK_0000_0018", 0, 0, 0), + MFX(0x0000001b, "IA32_APIC_BASE", Ia32ApicBase, Ia32ApicBase, UINT32_C(0xfee00100), UINT64_C(0xffffffff00000600), 0xff), + MFX(0x0000002a, "EBL_CR_POWERON", IntelEblCrPowerOn, IntelEblCrPowerOn, 0x45080000, UINT64_C(0xfffffffffff7ff7e), 0), /* value=0x45080000 */ + MVX(0x0000002f, "P6_UNK_0000_002f", 0, UINT64_C(0xfffffffffffffff5), 0), + MVX(0x00000032, "P6_UNK_0000_0032", 0, UINT64_C(0xfffffffffffe0000), 0), + MVX(0x00000033, "TEST_CTL", 0, UINT64_C(0xffffffff40000000), 0), + MVX(0x00000034, "P6_UNK_0000_0034", 0x77ff, ~(uint64_t)UINT32_MAX, UINT32_C(0xfff80000)), + MVO(0x00000035, "P6_UNK_0000_0035", 0x300008), + MVX(0x0000003b, "P6_UNK_0000_003b", 0, UINT64_C(0xafffffffe), UINT64_C(0xfffffff500000001)), + MVO(0x0000003f, "P6_UNK_0000_003f", 0x4), + RFN(0x00000040, 0x00000047, "MSR_LASTBRANCH_n", IntelLastBranchFromToN, ReadOnly), + MVX(0x0000004a, "P6_UNK_0000_004a", 0, 0, 0), /* value=0x0 */ + MVX(0x0000004b, "P6_UNK_0000_004b", 0, 0, 0), /* value=0x0 */ + MVX(0x0000004c, "P6_UNK_0000_004c", 0, 0, 0), /* value=0x0 */ + MVX(0x0000004d, "P6_UNK_0000_004d", 0, 0, 0), /* value=0xeb1cffbf`8918200a */ + MVX(0x0000004e, "P6_UNK_0000_004e", 0, 0, 0), /* value=0x8204c60a`e8009512 */ + MVX(0x0000004f, "P6_UNK_0000_004f", 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MVI(0x00000050, "P6_UNK_0000_0050", 0), /* Villain? value=0x0 */ + MVI(0x00000051, "P6_UNK_0000_0051", 0), /* Villain? value=0x0 */ + MVI(0x00000052, "P6_UNK_0000_0052", 0), /* Villain? value=0x0 */ + MVI(0x00000053, "P6_UNK_0000_0053", 0), /* Villain? value=0x0 */ + MVI(0x00000054, "P6_UNK_0000_0054", 0), /* Villain? value=0x0 */ + MVX(0x0000006c, "P6_UNK_0000_006c", 0, UINT64_C(0xffffffff00000082), 0), + MVX(0x0000006d, "P6_UNK_0000_006d", 0, UINT64_C(0xffffffff00000082), 0), + MVX(0x0000006e, "P6_UNK_0000_006e", 0, UINT64_C(0xffffffff00000082), 0), + MVO(0x0000006f, "P6_UNK_0000_006f", 0xadb), + MFN(0x00000079, "IA32_BIOS_UPDT_TRIG", WriteOnly, Ia32BiosUpdateTrigger), + MVX(0x00000088, "BBL_CR_D0", 0, 0, 0), /* value=0xfcaeffff`d779fd3e */ + MVX(0x00000089, "BBL_CR_D1", 0, 0, 0), /* value=0xefffbcb7`ff77fbef */ + MVX(0x0000008a, "BBL_CR_D2", 0, 0, 0), /* value=0xdfff3f2f`fb367d9f */ + MVX(0x0000008b, "BBL_CR_D3|BIOS_SIGN", UINT64_C(0x1800000000), 0, 0), + MVX(0x0000008c, "P6_UNK_0000_008c", 0, 0, 0), /* value=0xeffff3ff`ef39bfff */ + MVX(0x0000008d, "P6_UNK_0000_008d", 0, 0, 0), /* value=0xf773adfb`ef3ff3fc */ + MVX(0x0000008e, "P6_UNK_0000_008e", 0, 0, 0), /* value=0xfeb7f6ff`ebbffeff */ + MVX(0x0000008f, "P6_UNK_0000_008f", 0, 0, 0), /* value=0xd6ffb7af`ffad9e7e */ + MVX(0x00000090, "P6_UNK_0000_0090", 0, UINT64_C(0xfffffffffffffffa), 0), /* value=0x9ebdb4b5 */ + MVX(0x000000ae, "P6_UNK_0000_00ae", UINT64_C(0x1000000000007efc), 0, 0), + MFX(0x000000c1, "IA32_PMC0", Ia32PmcN, Ia32PmcN, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0x000000c2, "IA32_PMC1", Ia32PmcN, Ia32PmcN, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MVI(0x000000c7, "P6_UNK_0000_00c7", UINT64_C(0x5a000000ac000000)), + MFX(0x000000cd, "MSR_FSB_FREQ", IntelP6FsbFrequency, ReadOnly, 0, 0, 0), + MVO(0x000000ce, "P6_UNK_0000_00ce", UINT64_C(0x2812140000000000)), + MFX(0x000000fe, "IA32_MTRRCAP", Ia32MtrrCap, ReadOnly, 0x508, 0, 0), /* value=0x508 */ + MVX(0x00000116, "BBL_CR_ADDR", UINT32_C(0xfe7efff0), UINT64_C(0xffffffff0000000f), 0), + MVX(0x00000118, "BBL_CR_DECC", UINT64_C(0xc0000000c1ae9fda), UINT64_C(0xfffffff00000000), 0), + MFX(0x00000119, "BBL_CR_CTL", IntelBblCrCtl, IntelBblCrCtl, 0x8, UINT64_C(0xffffffffc00001ff), 0), /* value=0x8 */ + MVI(0x0000011b, "P6_UNK_0000_011b", 0), + MFX(0x0000011e, "BBL_CR_CTL3", IntelBblCrCtl3, IntelBblCrCtl3, 0x34272b, UINT64_C(0xfffffffffffbfc1f), 0), /* value=0x34272b */ + MVI(0x00000131, "P6_UNK_0000_0131", 0), + MVX(0x0000014e, "P6_UNK_0000_014e", 0xd31f40, UINT64_C(0xfffffffff000008f), 0), + MVI(0x0000014f, "P6_UNK_0000_014f", 0xd31f40), + MVX(0x00000150, "P6_UNK_0000_0150", 0, UINT64_C(0xffffffffdfffe07f), 0x20000000), + MVX(0x00000151, "P6_UNK_0000_0151", 0x3c531fc6, ~(uint64_t)UINT32_MAX, 0), + MVI(0x00000154, "P6_UNK_0000_0154", 0), + MVX(0x0000015b, "P6_UNK_0000_015b", 0, ~(uint64_t)UINT32_MAX, 0), + MFX(0x00000174, "IA32_SYSENTER_CS", Ia32SysEnterCs, Ia32SysEnterCs, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x8 */ + MFX(0x00000175, "IA32_SYSENTER_ESP", Ia32SysEnterEsp, Ia32SysEnterEsp, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0xf78af000 */ + MFX(0x00000176, "IA32_SYSENTER_EIP", Ia32SysEnterEip, Ia32SysEnterEip, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x804de6f0 */ + MFX(0x00000179, "IA32_MCG_CAP", Ia32McgCap, ReadOnly, 0x5, 0, 0), /* value=0x5 */ + MFX(0x0000017a, "IA32_MCG_STATUS", Ia32McgStatus, Ia32McgStatus, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + RSN(0x00000186, 0x00000187, "IA32_PERFEVTSELn", Ia32PerfEvtSelN, Ia32PerfEvtSelN, 0x0, ~(uint64_t)UINT32_MAX, 0), + MVX(0x00000194, "CLOCK_FLEX_MAX", 0, UINT64_C(0xfffffffffffee0c0), 0), + MFX(0x00000198, "IA32_PERF_STATUS", Ia32PerfStatus, ReadOnly, UINT64_C(0x612142806000612), 0, 0), /* value=0x6121428`06000612 */ + MFX(0x00000199, "IA32_PERF_CTL", Ia32PerfCtl, Ia32PerfCtl, 0x612, 0, 0), /* Might bite. value=0x612 */ + MFX(0x0000019a, "IA32_CLOCK_MODULATION", Ia32ClockModulation, Ia32ClockModulation, 0x2, UINT64_C(0xffffffffffffffe1), 0), /* value=0x2 */ + MFX(0x0000019b, "IA32_THERM_INTERRUPT", Ia32ThermInterrupt, Ia32ThermInterrupt, 0, UINT64_C(0xfffffffffffffffc), 0), /* value=0x0 */ + MFX(0x0000019c, "IA32_THERM_STATUS", Ia32ThermStatus, Ia32ThermStatus, 0, UINT64_C(0xfffffffffffffffd), 0), /* value=0x0 */ + MFX(0x0000019d, "IA32_THERM2_CTL", Ia32Therm2Ctl, Ia32Therm2Ctl, 0x10612, UINT64_C(0xfffffffffffee0c0), 0), /* value=0x10612 */ + MVX(0x0000019e, "P6_UNK_0000_019e", 0, UINT64_C(0xffffffffffff0000), 0), + MVI(0x0000019f, "P6_UNK_0000_019f", 0), + MFX(0x000001a0, "IA32_MISC_ENABLE", Ia32MiscEnable, Ia32MiscEnable, 0x111088, UINT64_C(0xffffffff001ffb77), 0), /* value=0x111088 */ + MVX(0x000001a1, "P6_UNK_0000_01a1", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0x000001aa, "P6_PIC_SENS_CFG", 0x3, UINT64_C(0xfffffffffffffffc), 0), + MVX(0x000001ae, "P6_UNK_0000_01ae", 0, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x000001af, "P6_UNK_0000_01af", 0x3ff, UINT64_C(0xfffffffffffffc00), 0), + MVO(0x000001c9, "TODO_0000_01c9", 0x8000000), + MVX(0x000001d3, "P6_UNK_0000_01d3", 0x8000, UINT64_C(0xffffffffffff7fff), 0), + MFX(0x000001d9, "IA32_DEBUGCTL", Ia32DebugCtl, Ia32DebugCtl, 0, UINT64_C(0xffffffffffffc200), 0), /* value=0x1 */ + MFO(0x000001db, "P6_LAST_BRANCH_FROM_IP", P6LastBranchFromIp), /* value=0xaad05fa1 */ + MFO(0x000001dc, "P6_LAST_BRANCH_TO_IP", P6LastBranchToIp), /* value=0xaad06480 */ + MFO(0x000001dd, "P6_LAST_INT_FROM_IP", P6LastIntFromIp), /* value=0x7dba1245 */ + MFO(0x000001de, "P6_LAST_INT_TO_IP", P6LastIntToIp), /* value=0x806f5d54 */ + MVO(0x000001e0, "MSR_ROB_CR_BKUPTMPDR6", 0xff0), + MFX(0x00000200, "IA32_MTRR_PHYS_BASE0", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x0, UINT64_C(0xf00000000), UINT64_C(0xfffffff000000ff8)), /* value=0x6 */ + MFX(0x00000201, "IA32_MTRR_PHYS_MASK0", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x0, UINT64_C(0xf00000000), UINT64_C(0xfffffff000000fff)), /* value=0xf`c0000800 */ + MFX(0x00000202, "IA32_MTRR_PHYS_BASE1", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x1, UINT64_C(0xf00000000), UINT64_C(0xfffffff000000ff8)), /* value=0x40000006 */ + MFX(0x00000203, "IA32_MTRR_PHYS_MASK1", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x1, UINT64_C(0xf00000000), UINT64_C(0xfffffff000000fff)), /* value=0xf`e0000800 */ + MFX(0x00000204, "IA32_MTRR_PHYS_BASE2", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x2, UINT64_C(0xf00000000), UINT64_C(0xfffffff000000ff8)), /* value=0x5ff80000 */ + MFX(0x00000205, "IA32_MTRR_PHYS_MASK2", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x2, UINT64_C(0xf00000000), UINT64_C(0xfffffff000000fff)), /* value=0xf`fff80800 */ + MFX(0x00000206, "IA32_MTRR_PHYS_BASE3", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x3, UINT64_C(0xf00000000), UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x00000207, "IA32_MTRR_PHYS_MASK3", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x3, UINT64_C(0xf00000000), UINT64_C(0xfffffff000000fff)), /* value=0xf`00000000 */ + MFX(0x00000208, "IA32_MTRR_PHYS_BASE4", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x4, UINT64_C(0xf00000000), UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x00000209, "IA32_MTRR_PHYS_MASK4", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x4, UINT64_C(0xf00000000), UINT64_C(0xfffffff000000fff)), /* value=0xf`00000000 */ + MFX(0x0000020a, "IA32_MTRR_PHYS_BASE5", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x5, UINT64_C(0xf00000000), UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x0000020b, "IA32_MTRR_PHYS_MASK5", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x5, UINT64_C(0xf00000000), UINT64_C(0xfffffff000000fff)), /* value=0xf`00000000 */ + MFX(0x0000020c, "IA32_MTRR_PHYS_BASE6", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x6, UINT64_C(0xf00000000), UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x0000020d, "IA32_MTRR_PHYS_MASK6", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x6, UINT64_C(0xf00000000), UINT64_C(0xfffffff000000fff)), /* value=0xf`00000000 */ + MFX(0x0000020e, "IA32_MTRR_PHYS_BASE7", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x7, UINT64_C(0xf00000000), UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x0000020f, "IA32_MTRR_PHYS_MASK7", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x7, UINT64_C(0xf00000000), UINT64_C(0xfffffff000000fff)), /* value=0xf`00000000 */ + MFS(0x00000250, "IA32_MTRR_FIX64K_00000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix64K_00000), + MFS(0x00000258, "IA32_MTRR_FIX16K_80000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_80000), + MFS(0x00000259, "IA32_MTRR_FIX16K_A0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_A0000), + MFS(0x00000268, "IA32_MTRR_FIX4K_C0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C0000), + MFS(0x00000269, "IA32_MTRR_FIX4K_C8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C8000), + MFS(0x0000026a, "IA32_MTRR_FIX4K_D0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D0000), + MFS(0x0000026b, "IA32_MTRR_FIX4K_D8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D8000), + MFS(0x0000026c, "IA32_MTRR_FIX4K_E0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E0000), + MFS(0x0000026d, "IA32_MTRR_FIX4K_E8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E8000), + MFS(0x0000026e, "IA32_MTRR_FIX4K_F0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F0000), + MFS(0x0000026f, "IA32_MTRR_FIX4K_F8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F8000), + MFS(0x00000277, "IA32_PAT", Ia32Pat, Ia32Pat, Guest.msrPAT), + MFZ(0x000002ff, "IA32_MTRR_DEF_TYPE", Ia32MtrrDefType, Ia32MtrrDefType, GuestMsrs.msr.MtrrDefType, 0, UINT64_C(0xfffffffffffff3f8)), + RFN(0x00000400, 0x00000413, "IA32_MCi_CTL_STATUS_ADDR_MISC", Ia32McCtlStatusAddrMiscN, Ia32McCtlStatusAddrMiscN), + MFX(0x00000600, "IA32_DS_AREA", Ia32DsArea, Ia32DsArea, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MVX(0x00001000, "P6_DEBUG_REGISTER_0", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0x00001001, "P6_DEBUG_REGISTER_1", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0x00001002, "P6_DEBUG_REGISTER_2", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0x00001003, "P6_DEBUG_REGISTER_3", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0x00001004, "P6_DEBUG_REGISTER_4", UINT32_C(0xffff0ff0), ~(uint64_t)UINT32_MAX, 0), + MVX(0x00001005, "P6_DEBUG_REGISTER_5", 0x400, ~(uint64_t)UINT32_MAX, 0), + MVI(0x00001006, "P6_DEBUG_REGISTER_6", UINT32_C(0xffff0ff0)), /* Villain? */ + MVI(0x00001007, "P6_DEBUG_REGISTER_7", 0x400), /* Villain? */ + MVO(0x0000103f, "P6_UNK_0000_103f", 0x4), + MVO(0x000010cd, "P6_UNK_0000_10cd", 0), + MFW(0x00002000, "P6_CR0", IntelP6CrN, IntelP6CrN, UINT64_C(0xffffffff00000010)), /* value=0x8001003b */ + MFX(0x00002002, "P6_CR2", IntelP6CrN, IntelP6CrN, 0x2, ~(uint64_t)UINT32_MAX, 0), /* value=0xc30000 */ + MFX(0x00002003, "P6_CR3", IntelP6CrN, IntelP6CrN, 0x3, ~(uint64_t)UINT32_MAX, 0), /* value=0x29765000 */ + MFX(0x00002004, "P6_CR4", IntelP6CrN, IntelP6CrN, 0x4, ~(uint64_t)UINT32_MAX, 0), /* value=0x6d9 */ + MVO(0x0000203f, "P6_UNK_0000_203f", 0x4), + MVO(0x000020cd, "P6_UNK_0000_20cd", 0), + MVO(0x0000303f, "P6_UNK_0000_303f", 0x4), + MVO(0x000030cd, "P6_UNK_0000_30cd", 0), +}; +#endif /* !CPUM_DB_STANDALONE */ + + +/** + * Database entry for Intel(R) Pentium(R) M processor 2.00GHz. + */ +static CPUMDBENTRY const g_Entry_Intel_Pentium_M_processor_2_00GHz = +{ + /*.pszName = */ "Intel Pentium M processor 2.00GHz", + /*.pszFullName = */ "Intel(R) Pentium(R) M processor 2.00GHz", + /*.enmVendor = */ CPUMCPUVENDOR_INTEL, + /*.uFamily = */ 6, + /*.uModel = */ 13, + /*.uStepping = */ 6, + /*.enmMicroarch = */ kCpumMicroarch_Intel_P6_M_Dothan, + /*.uScalableBusFreq = */ CPUM_SBUSFREQ_UNKNOWN, + /*.fFlags = */ 0, + /*.cMaxPhysAddrWidth= */ 32, + /*.fMxCsrMask = */ 0xffbf, ///< @todo check this + /*.paCpuIdLeaves = */ NULL_ALONE(g_aCpuIdLeaves_Intel_Pentium_M_processor_2_00GHz), + /*.cCpuIdLeaves = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_Pentium_M_processor_2_00GHz)), + /*.enmUnknownCpuId = */ CPUMUNKNOWNCPUID_LAST_STD_LEAF, + /*.DefUnknownCpuId = */ { 0x02b3b001, 0x000000f0, 0x00000000, 0x2c04307d }, + /*.fMsrMask = */ UINT32_C(0x3fff), + /*.cMsrRanges = */ ZERO_ALONE(RT_ELEMENTS(g_aMsrRanges_Intel_Pentium_M_processor_2_00GHz)), + /*.paMsrRanges = */ NULL_ALONE(g_aMsrRanges_Intel_Pentium_M_processor_2_00GHz), +}; + +#endif /* !VBOX_CPUDB_Intel_Pentium_M_processor_2_00GHz_h */ + diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_Pentium_N3530_2_16GHz.h b/src/VBox/VMM/VMMR3/cpus/Intel_Pentium_N3530_2_16GHz.h new file mode 100644 index 00000000..e2c30940 --- /dev/null +++ b/src/VBox/VMM/VMMR3/cpus/Intel_Pentium_N3530_2_16GHz.h @@ -0,0 +1,265 @@ +/* $Id: Intel_Pentium_N3530_2_16GHz.h $ */ +/** @file + * CPU database entry "Intel Pentium N3530 2.16GHz". + * Generated at 2016-04-29T13:34:27Z by VBoxCpuReport v5.0.51r106929 on win.amd64. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VBOX_CPUDB_Intel_Pentium_N3530_2_16GHz_h +#define VBOX_CPUDB_Intel_Pentium_N3530_2_16GHz_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + + +#ifndef CPUM_DB_STANDALONE +/** + * CPUID leaves for Intel(R) Pentium(R) CPU N3530 @ 2.16GHz. + */ +static CPUMCPUIDLEAF const g_aCpuIdLeaves_Intel_Pentium_N3530_2_16GHz[] = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x0000000b, 0x756e6547, 0x6c65746e, 0x49656e69, 0 }, + { 0x00000001, 0x00000000, 0x00000000, 0x00030678, 0x02100800, 0x41d8e3bf, 0xbfebfbff, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID | CPUMCPUIDLEAF_F_CONTAINS_APIC }, + { 0x00000002, 0x00000000, 0x00000000, 0x61b3a001, 0x0000ffc2, 0x00000000, 0x00000000, 0 }, + { 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000004, 0x00000000, UINT32_MAX, 0x1c000121, 0x0140003f, 0x0000003f, 0x00000001, 0 }, + { 0x00000004, 0x00000001, UINT32_MAX, 0x1c000122, 0x01c0003f, 0x0000003f, 0x00000001, 0 }, + { 0x00000004, 0x00000002, UINT32_MAX, 0x1c00c143, 0x03c0003f, 0x000003ff, 0x00000001, 0 }, + { 0x00000004, 0x00000003, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000005, 0x00000000, 0x00000000, 0x00000040, 0x00000040, 0x00000003, 0x33000020, 0 }, + { 0x00000006, 0x00000000, 0x00000000, 0x00000007, 0x00000002, 0x00000009, 0x00000000, 0 }, + { 0x00000007, 0x00000000, UINT32_MAX, 0x00000000, 0x00002282, 0x00000000, 0x00000000, 0 }, + { 0x00000007, 0x00000001, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000009, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000a, 0x00000000, 0x00000000, 0x07280203, 0x00000000, 0x00000000, 0x00004503, 0 }, + { 0x0000000b, 0x00000000, UINT32_MAX, 0x00000001, 0x00000001, 0x00000100, 0x00000002, 0 | CPUMCPUIDLEAF_F_INTEL_TOPOLOGY_SUBLEAVES | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID }, + { 0x0000000b, 0x00000001, UINT32_MAX, 0x00000004, 0x00000004, 0x00000201, 0x00000002, 0 | CPUMCPUIDLEAF_F_INTEL_TOPOLOGY_SUBLEAVES | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID }, + { 0x0000000b, 0x00000002, UINT32_MAX, 0x00000000, 0x00000000, 0x00000002, 0x00000002, 0 | CPUMCPUIDLEAF_F_INTEL_TOPOLOGY_SUBLEAVES | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID }, + { 0x80000000, 0x00000000, 0x00000000, 0x80000008, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000101, 0x28100800, 0 }, + { 0x80000002, 0x00000000, 0x00000000, 0x20202020, 0x6e492020, 0x286c6574, 0x50202952, 0 }, + { 0x80000003, 0x00000000, 0x00000000, 0x69746e65, 0x52286d75, 0x50432029, 0x4e202055, 0 }, + { 0x80000004, 0x00000000, 0x00000000, 0x30333533, 0x20402020, 0x36312e32, 0x007a4847, 0 }, + { 0x80000005, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x04008040, 0x00000000, 0 }, + { 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000100, 0 }, + { 0x80000008, 0x00000000, 0x00000000, 0x00003024, 0x00000000, 0x00000000, 0x00000000, 0 }, +}; +#endif /* !CPUM_DB_STANDALONE */ + + +#ifndef CPUM_DB_STANDALONE +/** + * MSR ranges for Intel(R) Pentium(R) CPU N3530 @ 2.16GHz. + */ +static CPUMMSRRANGE const g_aMsrRanges_Intel_Pentium_N3530_2_16GHz[] = +{ + MFI(0x00000000, "IA32_P5_MC_ADDR", Ia32P5McAddr), /* value=0x0 */ + MFX(0x00000001, "IA32_P5_MC_TYPE", Ia32P5McType, Ia32P5McType, 0, 0, UINT64_MAX), /* value=0x0 */ + MFX(0x00000006, "IA32_MONITOR_FILTER_LINE_SIZE", Ia32MonitorFilterLineSize, Ia32MonitorFilterLineSize, 0, 0, UINT64_C(0xffffffffffff0000)), /* value=0x40 */ + MFN(0x00000010, "IA32_TIME_STAMP_COUNTER", Ia32TimestampCounter, Ia32TimestampCounter), /* value=0x4c5e`43033c62 */ + MFX(0x00000017, "IA32_PLATFORM_ID", Ia32PlatformId, ReadOnly, UINT64_C(0xc000090341f52), 0, 0), /* value=0xc0000`90341f52 */ + MFX(0x0000001b, "IA32_APIC_BASE", Ia32ApicBase, Ia32ApicBase, UINT32_C(0xfee00800), 0, UINT64_C(0xfffffff0000006ff)), + MFX(0x0000002a, "EBL_CR_POWERON", IntelEblCrPowerOn, IntelEblCrPowerOn, 0x40080000, UINT32_C(0xfff7ffff), ~(uint64_t)UINT32_MAX), /* value=0x40080000 */ + MVX(0x00000033, "TEST_CTL", 0, 0, UINT64_C(0xffffffff7fffffff)), + MFO(0x00000034, "MSR_SMI_COUNT", IntelI7SmiCount), /* value=0xa */ + MVO(0x00000039, "C2_UNK_0000_0039", 0x2), + MFO(0x0000003a, "IA32_FEATURE_CONTROL", Ia32FeatureControl), /* value=0x5 */ + MVX(0x0000003b, "P6_UNK_0000_003b", UINT64_C(0x4c27f41f3066), 0x800, 0), + RFN(0x00000040, 0x00000047, "MSR_LASTBRANCH_n_FROM_IP", IntelLastBranchToN, IntelLastBranchToN), + RFN(0x00000060, 0x00000067, "MSR_LASTBRANCH_n_TO_IP", IntelLastBranchFromN, IntelLastBranchFromN), + MFN(0x00000079, "IA32_BIOS_UPDT_TRIG", WriteOnly, IgnoreWrite), + MFX(0x0000008b, "BBL_CR_D3|BIOS_SIGN", Ia32BiosSignId, Ia32BiosSignId, 0, 0, UINT32_MAX), /* value=0x809`00000000 */ + MFO(0x0000009b, "IA32_SMM_MONITOR_CTL", Ia32SmmMonitorCtl), /* value=0x0 */ + RSN(0x000000c1, 0x000000c2, "IA32_PMCn", Ia32PmcN, Ia32PmcN, 0x0, ~(uint64_t)UINT32_MAX, 0), + MFI(0x000000c7, "IA32_PMC6", Ia32PmcN), /* value=0x36c9 */ + MFX(0x000000cd, "MSR_FSB_FREQ", IntelP6FsbFrequency, ReadOnly, 0, 0, 0), /* value=0x0 */ + MVO(0x000000ce, "IA32_PLATFORM_INFO", UINT64_C(0x60000001a00)), + MFX(0x000000e2, "MSR_PKG_CST_CONFIG_CONTROL", IntelPkgCStConfigControl, IntelPkgCStConfigControl, 0, 0, UINT64_C(0xffffffffffc073f0)), /* value=0x1a000f */ + MFX(0x000000e4, "MSR_PMG_IO_CAPTURE_BASE", IntelPmgIoCaptureBase, IntelPmgIoCaptureBase, 0, 0, UINT64_C(0xffffffffff800000)), /* value=0x20000 */ + MVO(0x000000e5, "C2_UNK_0000_00e5", UINT32_C(0x80031838)), + MFN(0x000000e7, "IA32_MPERF", Ia32MPerf, Ia32MPerf), /* value=0x1f8f8 */ + MFN(0x000000e8, "IA32_APERF", Ia32APerf, Ia32APerf), /* value=0x9875 */ + MFX(0x000000ee, "C1_EXT_CONFIG", IntelCore1ExtConfig, IntelCore1ExtConfig, 0, UINT32_C(0xefc5ffff), UINT64_C(0xffffffff10000000)), /* value=0x2380002 */ + MFX(0x000000fe, "IA32_MTRRCAP", Ia32MtrrCap, ReadOnly, 0xd08, 0, 0), /* value=0xd08 */ + MFX(0x0000011e, "BBL_CR_CTL3", IntelBblCrCtl3, IntelBblCrCtl3, 0x7e2801ff, UINT32_C(0xfe83f8ff), UINT64_C(0xffffffff00400600)), /* value=0x7e2801ff */ + MVX(0x00000120, "SILV_UNK_0000_0120", 0x44, 0x40, UINT64_C(0xffffffffffffff33)), + MFX(0x00000174, "IA32_SYSENTER_CS", Ia32SysEnterCs, Ia32SysEnterCs, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFN(0x00000175, "IA32_SYSENTER_ESP", Ia32SysEnterEsp, Ia32SysEnterEsp), /* value=0x0 */ + MFN(0x00000176, "IA32_SYSENTER_EIP", Ia32SysEnterEip, Ia32SysEnterEip), /* value=0x0 */ + MFX(0x00000179, "IA32_MCG_CAP", Ia32McgCap, ReadOnly, 0x806, 0, 0), /* value=0x806 */ + MFX(0x0000017a, "IA32_MCG_STATUS", Ia32McgStatus, Ia32McgStatus, 0, 0, UINT64_C(0xfffffffffffffff8)), /* value=0x0 */ + RSN(0x00000186, 0x00000187, "IA32_PERFEVTSELn", Ia32PerfEvtSelN, Ia32PerfEvtSelN, 0x0, 0, ~(uint64_t)UINT32_MAX), + MFX(0x00000194, "CLOCK_FLEX_MAX", IntelFlexRatio, IntelFlexRatio, 0, UINT32_C(0xfffec080), ~(uint64_t)UINT32_MAX), /* value=0x0 */ + MFX(0x00000198, "IA32_PERF_STATUS", Ia32PerfStatus, ReadOnly, UINT64_C(0x880000001f52), 0, 0), /* value=0x8800`00001f52 */ + MFX(0x00000199, "IA32_PERF_CTL", Ia32PerfCtl, Ia32PerfCtl, 0x1f52, 0, 0), /* Might bite. value=0x1f52 */ + MFX(0x0000019a, "IA32_CLOCK_MODULATION", Ia32ClockModulation, Ia32ClockModulation, 0, 0, UINT64_C(0xffffffffffffffe1)), /* value=0x0 */ + MFX(0x0000019b, "IA32_THERM_INTERRUPT", Ia32ThermInterrupt, Ia32ThermInterrupt, 0xcbb700, 0, UINT64_C(0xfffffffffe0000e8)), /* value=0xcbb700 */ + MFX(0x0000019c, "IA32_THERM_STATUS", Ia32ThermStatus, Ia32ThermStatus, UINT32_C(0x88420100), UINT32_C(0xfffff555), ~(uint64_t)UINT32_MAX), /* value=0x88420100 */ + MFX(0x0000019d, "IA32_THERM2_CTL", Ia32Therm2Ctl, ReadOnly, 0x623, 0, 0), /* value=0x623 */ + MVX(0x0000019e, "P6_UNK_0000_019e", 0, UINT32_MAX, ~(uint64_t)UINT32_MAX), + MFX(0x000001a0, "IA32_MISC_ENABLE", Ia32MiscEnable, Ia32MiscEnable, 0x850089, 0x1080, UINT64_C(0xffffffbbff3aef76)), /* value=0x850089 */ + MFX(0x000001a2, "I7_MSR_TEMPERATURE_TARGET", IntelI7TemperatureTarget, IntelI7TemperatureTarget, 0x690000, 0xff0000, UINT64_C(0xffffffffc000ffff)), /* value=0x690000 */ + MFX(0x000001a6, "I7_MSR_OFFCORE_RSP_0", IntelI7MsrOffCoreResponseN, IntelI7MsrOffCoreResponseN, 0x0, 0, UINT64_C(0xffffff897ffa0000)), /* XXX: The range ended earlier than expected! */ + MFX(0x000001a7, "I7_MSR_OFFCORE_RSP_1", IntelI7MsrOffCoreResponseN, IntelI7MsrOffCoreResponseN, 0x0, 0, UINT64_C(0xffffffc97ffa0000)), /* value=0x0 */ + MFX(0x000001ad, "I7_MSR_TURBO_RATIO_LIMIT", IntelI7TurboRatioLimit, IntelI7TurboRatioLimit, 0, UINT64_C(0x3f3f3f3f00000000), UINT64_C(0xc0c0c0c0c0c0c0c0)), /* value=0x0 */ + MVX(0x000001b0, "IA32_ENERGY_PERF_BIAS", 0x6, 0, UINT64_C(0xfffffffffffffff0)), + MVO(0x000001c6, "I7_UNK_0000_01c6", 0x3), + MFX(0x000001c8, "MSR_LBR_SELECT", IntelI7LbrSelect, IntelI7LbrSelect, 0, 0x200, UINT64_C(0xfffffffffffffc00)), /* value=0x0 */ + MFX(0x000001c9, "MSR_LASTBRANCH_TOS", IntelLastBranchTos, IntelLastBranchTos, 0, 0, UINT64_C(0xfffffffffffffff8)), /* value=0x0 */ + MFX(0x000001d9, "IA32_DEBUGCTL", Ia32DebugCtl, Ia32DebugCtl, 0, 0, UINT64_C(0xffffffffffffa03c)), /* value=0x0 */ + MFO(0x000001db, "P6_LAST_BRANCH_FROM_IP", P6LastBranchFromIp), /* value=0x0 */ + MFO(0x000001dc, "P6_LAST_BRANCH_TO_IP", P6LastBranchToIp), /* value=0x0 */ + MFN(0x000001dd, "P6_LAST_INT_FROM_IP", P6LastIntFromIp, P6LastIntFromIp), /* value=0x0 */ + MFN(0x000001de, "P6_LAST_INT_TO_IP", P6LastIntToIp, P6LastIntToIp), /* value=0x0 */ + MFO(0x000001f2, "IA32_SMRR_PHYSBASE", Ia32SmrrPhysBase), /* value=0x7a000006 */ + MFO(0x000001f3, "IA32_SMRR_PHYSMASK", Ia32SmrrPhysMask), /* value=0xff800800 */ + MFX(0x000001fc, "I7_MSR_POWER_CTL", IntelI7PowerCtl, IntelI7PowerCtl, 0, 0, UINT64_C(0xfffffffffffffffd)), /* value=0x0 */ + MFX(0x00000200, "IA32_MTRR_PHYS_BASE0", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x0, 0, UINT64_C(0xfffffff000000ff8)), /* value=0xffc00005 */ + MFX(0x00000201, "IA32_MTRR_PHYS_MASK0", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x0, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`ffc00800 */ + MFX(0x00000202, "IA32_MTRR_PHYS_BASE1", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x1, 0, UINT64_C(0xfffffff000000ff8)), /* value=0xffb80000 */ + MFX(0x00000203, "IA32_MTRR_PHYS_MASK1", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x1, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`fff80800 */ + MFX(0x00000204, "IA32_MTRR_PHYS_BASE2", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x2, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x6 */ + MFX(0x00000205, "IA32_MTRR_PHYS_MASK2", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x2, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`80000800 */ + MFX(0x00000206, "IA32_MTRR_PHYS_BASE3", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x3, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x7c000000 */ + MFX(0x00000207, "IA32_MTRR_PHYS_MASK3", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x3, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`fc000800 */ + MFX(0x00000208, "IA32_MTRR_PHYS_BASE4", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x4, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x7b000000 */ + MFX(0x00000209, "IA32_MTRR_PHYS_MASK4", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x4, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`ff000800 */ + MFX(0x0000020a, "IA32_MTRR_PHYS_BASE5", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x5, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x7ae00000 */ + MFX(0x0000020b, "IA32_MTRR_PHYS_MASK5", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x5, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`ffe00800 */ + MFX(0x0000020c, "IA32_MTRR_PHYS_BASE6", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x6, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x1`00000006 */ + MFX(0x0000020d, "IA32_MTRR_PHYS_MASK6", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x6, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`80000800 */ + MFX(0x0000020e, "IA32_MTRR_PHYS_BASE7", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x7, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x0000020f, "IA32_MTRR_PHYS_MASK7", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x7, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFS(0x00000250, "IA32_MTRR_FIX64K_00000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix64K_00000), + MFS(0x00000258, "IA32_MTRR_FIX16K_80000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_80000), + MFS(0x00000259, "IA32_MTRR_FIX16K_A0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_A0000), + MFS(0x00000268, "IA32_MTRR_FIX4K_C0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C0000), + MFS(0x00000269, "IA32_MTRR_FIX4K_C8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C8000), + MFS(0x0000026a, "IA32_MTRR_FIX4K_D0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D0000), + MFS(0x0000026b, "IA32_MTRR_FIX4K_D8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D8000), + MFS(0x0000026c, "IA32_MTRR_FIX4K_E0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E0000), + MFS(0x0000026d, "IA32_MTRR_FIX4K_E8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E8000), + MFS(0x0000026e, "IA32_MTRR_FIX4K_F0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F0000), + MFS(0x0000026f, "IA32_MTRR_FIX4K_F8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F8000), + MFS(0x00000277, "IA32_PAT", Ia32Pat, Ia32Pat, Guest.msrPAT), + MVX(0x000002e0, "I7_SB_NO_EVICT_MODE", 0, 0, UINT64_C(0xfffffffffffffffc)), + MFZ(0x000002ff, "IA32_MTRR_DEF_TYPE", Ia32MtrrDefType, Ia32MtrrDefType, GuestMsrs.msr.MtrrDefType, 0, UINT64_C(0xfffffffffffff3f8)), + RSN(0x00000309, 0x0000030b, "IA32_FIXED_CTRn", Ia32FixedCtrN, Ia32FixedCtrN, 0x0, 0, UINT64_C(0xffffff0000000000)), + MFX(0x00000345, "IA32_PERF_CAPABILITIES", Ia32PerfCapabilities, ReadOnly, 0x32c1, 0, 0), /* value=0x32c1 */ + MFX(0x0000038d, "IA32_FIXED_CTR_CTRL", Ia32FixedCtrCtrl, Ia32FixedCtrCtrl, 0, 0, UINT64_C(0xfffffffffffff000)), /* value=0x0 */ + MFX(0x0000038e, "IA32_PERF_GLOBAL_STATUS", Ia32PerfGlobalStatus, ReadOnly, 0, 0, 0), /* value=0x0 */ + MFX(0x0000038f, "IA32_PERF_GLOBAL_CTRL", Ia32PerfGlobalCtrl, Ia32PerfGlobalCtrl, 0, 0, UINT64_C(0xfffffff8fffffffc)), /* value=0x3 */ + MFX(0x00000390, "IA32_PERF_GLOBAL_OVF_CTRL", Ia32PerfGlobalOvfCtrl, Ia32PerfGlobalOvfCtrl, 0, UINT64_C(0xc000000700000003), UINT64_C(0x3ffffff8fffffffc)), /* value=0x0 */ + MFX(0x000003f1, "IA32_PEBS_ENABLE", Ia32PebsEnable, Ia32PebsEnable, 0, 0, UINT64_C(0xfffffffffffffffe)), /* value=0x0 */ + MFX(0x000003f8, "I7_MSR_PKG_C3_RESIDENCY", IntelI7PkgCnResidencyN, ReadOnly, 0x3, 0, UINT64_MAX), /* value=0x0 */ + RSN(0x000003f9, 0x000003fa, "I7_MSR_PKG_Cn_RESIDENCY", IntelI7PkgCnResidencyN, ReadOnly, 0x6, 0, UINT64_MAX), + MFX(0x000003fc, "I7_MSR_CORE_C3_RESIDENCY", IntelI7CoreCnResidencyN, ReadOnly, 0x3, 0, UINT64_MAX), /* value=0x80000000`0000ad5b */ + MFX(0x000003fd, "I7_MSR_CORE_C6_RESIDENCY", IntelI7CoreCnResidencyN, ReadOnly, 0x6, 0, UINT64_MAX), /* value=0x5`51eddedc */ + RFN(0x00000400, 0x00000417, "IA32_MCi_CTL_STATUS_ADDR_MISC", Ia32McCtlStatusAddrMiscN, Ia32McCtlStatusAddrMiscN), + MFX(0x00000480, "IA32_VMX_BASIC", Ia32VmxBasic, ReadOnly, UINT64_C(0xda040000000002), 0, 0), /* value=0xda0400`00000002 */ + MFX(0x00000481, "IA32_VMX_PINBASED_CTLS", Ia32VmxPinbasedCtls, ReadOnly, UINT64_C(0x7f00000016), 0, 0), /* value=0x7f`00000016 */ + MFX(0x00000482, "IA32_VMX_PROCBASED_CTLS", Ia32VmxProcbasedCtls, ReadOnly, UINT64_C(0xfff9fffe0401e172), 0, 0), /* value=0xfff9fffe`0401e172 */ + MFX(0x00000483, "IA32_VMX_EXIT_CTLS", Ia32VmxExitCtls, ReadOnly, UINT64_C(0x7fffff00036dff), 0, 0), /* value=0x7fffff`00036dff */ + MFX(0x00000484, "IA32_VMX_ENTRY_CTLS", Ia32VmxEntryCtls, ReadOnly, UINT64_C(0xffff000011ff), 0, 0), /* value=0xffff`000011ff */ + MFX(0x00000485, "IA32_VMX_MISC", Ia32VmxMisc, ReadOnly, 0x481e6, 0, 0), /* value=0x481e6 */ + MFX(0x00000486, "IA32_VMX_CR0_FIXED0", Ia32VmxCr0Fixed0, ReadOnly, UINT32_C(0x80000021), 0, 0), /* value=0x80000021 */ + MFX(0x00000487, "IA32_VMX_CR0_FIXED1", Ia32VmxCr0Fixed1, ReadOnly, UINT32_MAX, 0, 0), /* value=0xffffffff */ + MFX(0x00000488, "IA32_VMX_CR4_FIXED0", Ia32VmxCr4Fixed0, ReadOnly, 0x2000, 0, 0), /* value=0x2000 */ + MFX(0x00000489, "IA32_VMX_CR4_FIXED1", Ia32VmxCr4Fixed1, ReadOnly, 0x1027ff, 0, 0), /* value=0x1027ff */ + MFX(0x0000048a, "IA32_VMX_VMCS_ENUM", Ia32VmxVmcsEnum, ReadOnly, 0x2e, 0, 0), /* value=0x2e */ + MFX(0x0000048b, "IA32_VMX_PROCBASED_CTLS2", Ia32VmxProcBasedCtls2, ReadOnly, UINT64_C(0x28ef00000000), 0, 0), /* value=0x28ef`00000000 */ + MFX(0x0000048c, "IA32_VMX_EPT_VPID_CAP", Ia32VmxEptVpidCap, ReadOnly, UINT64_C(0xf0106114141), 0, 0), /* value=0xf01`06114141 */ + MFX(0x0000048d, "IA32_VMX_TRUE_PINBASED_CTLS", Ia32VmxTruePinbasedCtls, ReadOnly, UINT64_C(0x7f00000016), 0, 0), /* value=0x7f`00000016 */ + MFX(0x0000048e, "IA32_VMX_TRUE_PROCBASED_CTLS", Ia32VmxTrueProcbasedCtls, ReadOnly, UINT64_C(0xfff9fffe04006172), 0, 0), /* value=0xfff9fffe`04006172 */ + MFX(0x0000048f, "IA32_VMX_TRUE_EXIT_CTLS", Ia32VmxTrueExitCtls, ReadOnly, UINT64_C(0x7fffff00036dfb), 0, 0), /* value=0x7fffff`00036dfb */ + MFX(0x00000490, "IA32_VMX_TRUE_ENTRY_CTLS", Ia32VmxTrueEntryCtls, ReadOnly, UINT64_C(0xffff000011fb), 0, 0), /* value=0xffff`000011fb */ + MFX(0x00000491, "IA32_VMX_VMFUNC", Ia32VmxVmFunc, ReadOnly, 0x1, 0, 0), /* value=0x1 */ + RSN(0x000004c1, 0x000004c2, "IA32_A_PMCn", Ia32PmcN, Ia32PmcN, 0x0, 0, UINT64_C(0xffffff0000000000)), + MFN(0x00000600, "IA32_DS_AREA", Ia32DsArea, Ia32DsArea), /* value=0x0 */ + MFX(0x00000601, "I7_SB_MSR_VR_CURRENT_CONFIG", IntelI7SandyVrCurrentConfig, IntelI7SandyVrCurrentConfig, 0, UINT64_C(0xc00000007fffe000), 0), /* value=0x0 */ + MFX(0x00000606, "I7_SB_MSR_RAPL_POWER_UNIT", IntelI7SandyRaplPowerUnit, IntelI7SandyRaplPowerUnit, 0x505, 0, UINT64_C(0xfffffffffff0e0f0)), /* value=0x505 */ + MFN(0x0000060d, "I7_SB_MSR_PKG_C2_RESIDENCY", IntelI7SandyPkgC2Residency, IntelI7SandyPkgC2Residency), /* value=0x0 */ + MFX(0x00000610, "I7_SB_MSR_PKG_POWER_LIMIT", IntelI7RaplPkgPowerLimit, IntelI7RaplPkgPowerLimit, 0x3880fa, 0x8000, UINT64_C(0xff000000ff000000)), /* value=0x3880fa */ + MFX(0x00000611, "I7_SB_MSR_PKG_ENERGY_STATUS", IntelI7RaplPkgEnergyStatus, ReadOnly, 0x21823a, 0, 0), /* value=0x21823a */ + MFX(0x00000638, "I7_SB_MSR_PP0_POWER_LIMIT", IntelI7RaplPp0PowerLimit, IntelI7RaplPp0PowerLimit, 0x20000, 0, UINT64_C(0xffffffffff000000)), /* value=0x20000 */ + MFX(0x00000639, "I7_SB_MSR_PP0_ENERGY_STATUS", IntelI7RaplPp0EnergyStatus, ReadOnly, 0x792fa, 0, 0), /* value=0x792fa */ + MFO(0x00000660, "SILV_CORE_C1_RESIDENCY", IntelAtSilvCoreC1Recidency), /* value=0x22`70ff1790 */ + MVO(0x00000661, "SILV_UNK_0000_0661", 0), + MVO(0x00000662, "SILV_UNK_0000_0662", 0), + MVO(0x00000663, "SILV_UNK_0000_0663", 0), + MVO(0x00000664, "SILV_UNK_0000_0664", 0), + MVO(0x00000665, "SILV_UNK_0000_0665", 0), + MVO(0x00000666, "SILV_UNK_0000_0666", 0), + MVO(0x00000667, "SILV_UNK_0000_0667", 0x9), + MVX(0x00000668, "SILV_UNK_0000_0668", 0x13130f0b, 0, ~(uint64_t)UINT32_MAX), + MVX(0x00000669, "SILV_UNK_0000_0669", 0x1010f20, 0, ~(uint64_t)UINT32_MAX), + MVO(0x0000066a, "SILV_UNK_0000_066a", 0x1a0602), + MVO(0x0000066b, "SILV_UNK_0000_066b", 0x442323), + MVO(0x0000066c, "SILV_UNK_0000_066c", 0x1f1f1f1f), + MVO(0x0000066d, "SILV_UNK_0000_066d", 0x52525252), + MVX(0x0000066e, "SILV_UNK_0000_066e", 0, 0, ~(uint64_t)UINT32_MAX), + MVX(0x0000066f, "SILV_UNK_0000_066f", 0, 0, ~(uint64_t)UINT32_MAX), + MVX(0x00000670, "SILV_UNK_0000_0670", 0, 0, ~(uint64_t)UINT32_MAX), + MVX(0x00000671, "SILV_UNK_0000_0671", 0, 0, UINT64_C(0xffffffff000000c0)), + MVX(0x00000672, "SILV_UNK_0000_0672", 0, 0, UINT64_C(0xffffffffc0000000)), + MVX(0x00000673, "SILV_UNK_0000_0673", 0x205, 0, UINT64_C(0xffffffffffffc000)), + MVX(0x00000674, "SILV_UNK_0000_0674", 0x4050006, 0, UINT64_C(0xfffffffff8000000)), + MVX(0x00000675, "SILV_UNK_0000_0675", 0x27, 0x20, UINT64_C(0xffffffffffffffc0)), + MVX(0x00000676, "SILV_UNK_0000_0676", 0, UINT64_C(0x7f7f7f7f00000000), UINT64_C(0x8080808080808080)), + MVX(0x00000677, "SILV_UNK_0000_0677", 0, 0, ~(uint64_t)UINT32_MAX), + MFI(0x000006e0, "IA32_TSC_DEADLINE", Ia32TscDeadline), /* value=0x0 */ + MVX(0x00000768, "SILV_UNK_0000_0768", 0, 0, UINT64_C(0xffffffffffff0060)), + MVX(0x00000769, "SILV_UNK_0000_0769", 0, 0x6, UINT64_C(0xfffffffffffffff0)), + MFX(0xc0000080, "AMD64_EFER", Amd64Efer, Amd64Efer, 0xd01, 0x400, UINT64_C(0xfffffffffffff2fe)), + MFN(0xc0000081, "AMD64_STAR", Amd64SyscallTarget, Amd64SyscallTarget), /* value=0x230010`00000000 */ + MFN(0xc0000082, "AMD64_STAR64", Amd64LongSyscallTarget, Amd64LongSyscallTarget), /* value=0xfffff802`6e9de200 */ + MFN(0xc0000083, "AMD64_STARCOMPAT", Amd64CompSyscallTarget, Amd64CompSyscallTarget), /* value=0xfffff802`6e9ddf40 */ + MFX(0xc0000084, "AMD64_SYSCALL_FLAG_MASK", Amd64SyscallFlagMask, Amd64SyscallFlagMask, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x4700 */ + MFN(0xc0000100, "AMD64_FS_BASE", Amd64FsBase, Amd64FsBase), /* value=0x9b440000 */ + MFN(0xc0000101, "AMD64_GS_BASE", Amd64GsBase, Amd64GsBase), /* value=0xffffd000`20661000 */ + MFN(0xc0000102, "AMD64_KERNEL_GS_BASE", Amd64KernelGsBase, Amd64KernelGsBase), /* value=0x7ff7`9b43e000 */ + MFX(0xc0000103, "AMD64_TSC_AUX", Amd64TscAux, Amd64TscAux, 0, 0, ~(uint64_t)UINT32_MAX), /* value=0x0 */ +}; +#endif /* !CPUM_DB_STANDALONE */ + + +/** + * Database entry for Intel(R) Pentium(R) CPU N3530 @ 2.16GHz. + */ +static CPUMDBENTRY const g_Entry_Intel_Pentium_N3530_2_16GHz = +{ + /*.pszName = */ "Intel Pentium N3530 2.16GHz", + /*.pszFullName = */ "Intel(R) Pentium(R) CPU N3530 @ 2.16GHz", + /*.enmVendor = */ CPUMCPUVENDOR_INTEL, + /*.uFamily = */ 6, + /*.uModel = */ 55, + /*.uStepping = */ 8, + /*.enmMicroarch = */ kCpumMicroarch_Intel_Atom_Silvermont, + /*.uScalableBusFreq = */ CPUM_SBUSFREQ_267MHZ, + /*.fFlags = */ 0, + /*.cMaxPhysAddrWidth= */ 36, + /*.fMxCsrMask = */ 0xffff, + /*.paCpuIdLeaves = */ NULL_ALONE(g_aCpuIdLeaves_Intel_Pentium_N3530_2_16GHz), + /*.cCpuIdLeaves = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_Pentium_N3530_2_16GHz)), + /*.enmUnknownCpuId = */ CPUMUNKNOWNCPUID_LAST_STD_LEAF_WITH_ECX, + /*.DefUnknownCpuId = */ { 0x00000001, 0x00000001, 0x00000100, 0x00000004 }, + /*.fMsrMask = */ UINT32_MAX, + /*.cMsrRanges = */ ZERO_ALONE(RT_ELEMENTS(g_aMsrRanges_Intel_Pentium_N3530_2_16GHz)), + /*.paMsrRanges = */ NULL_ALONE(g_aMsrRanges_Intel_Pentium_N3530_2_16GHz), +}; + +#endif /* !VBOX_CPUDB_Intel_Pentium_N3530_2_16GHz_h */ + diff --git a/src/VBox/VMM/VMMR3/cpus/Intel_Xeon_X5482_3_20GHz.h b/src/VBox/VMM/VMMR3/cpus/Intel_Xeon_X5482_3_20GHz.h new file mode 100644 index 00000000..1926a62c --- /dev/null +++ b/src/VBox/VMM/VMMR3/cpus/Intel_Xeon_X5482_3_20GHz.h @@ -0,0 +1,248 @@ +/* $Id: Intel_Xeon_X5482_3_20GHz.h $ */ +/** @file + * CPU database entry "Intel Xeon X5482 3.20GHz". + * Generated at 2013-12-16T12:10:52Z by VBoxCpuReport v4.3.53r91299 on darwin.amd64. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VBOX_CPUDB_Intel_Xeon_X5482_3_20GHz_h +#define VBOX_CPUDB_Intel_Xeon_X5482_3_20GHz_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + + +#ifndef CPUM_DB_STANDALONE +/** + * CPUID leaves for Intel(R) Xeon(R) CPU X5482 @ 3.20GHz. + */ +static CPUMCPUIDLEAF const g_aCpuIdLeaves_Intel_Xeon_X5482_3_20GHz[] = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69, 0 }, + { 0x00000001, 0x00000000, 0x00000000, 0x00010676, 0x04040800, 0x000ce3bd, 0xbfebfbff, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID | CPUMCPUIDLEAF_F_CONTAINS_APIC }, + { 0x00000002, 0x00000000, 0x00000000, 0x05b0b101, 0x005657f0, 0x00000000, 0x2cb4304e, 0 }, + { 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000004, 0x00000000, UINT32_MAX, 0x0c000121, 0x01c0003f, 0x0000003f, 0x00000001, 0 }, + { 0x00000004, 0x00000001, UINT32_MAX, 0x0c000122, 0x01c0003f, 0x0000003f, 0x00000001, 0 }, + { 0x00000004, 0x00000002, UINT32_MAX, 0x0c004143, 0x05c0003f, 0x00000fff, 0x00000001, 0 }, + { 0x00000004, 0x00000003, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000005, 0x00000000, 0x00000000, 0x00000040, 0x00000040, 0x00000003, 0x00002220, 0 }, + { 0x00000006, 0x00000000, 0x00000000, 0x00000001, 0x00000002, 0x00000001, 0x00000000, 0 }, + { 0x00000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000008, 0x00000000, 0x00000000, 0x00000400, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000009, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000a, 0x00000000, 0x00000000, 0x07280202, 0x00000000, 0x00000000, 0x00000503, 0 }, + { 0x80000000, 0x00000000, 0x00000000, 0x80000008, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x20100800, 0 }, + { 0x80000002, 0x00000000, 0x00000000, 0x65746e49, 0x2952286c, 0x6f655820, 0x2952286e, 0 }, + { 0x80000003, 0x00000000, 0x00000000, 0x55504320, 0x20202020, 0x20202020, 0x58202020, 0 }, + { 0x80000004, 0x00000000, 0x00000000, 0x32383435, 0x20402020, 0x30322e33, 0x007a4847, 0 }, + { 0x80000005, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x18008040, 0x00000000, 0 }, + { 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000008, 0x00000000, 0x00000000, 0x00003026, 0x00000000, 0x00000000, 0x00000000, 0 }, +}; +#endif /* !CPUM_DB_STANDALONE */ + + +#ifndef CPUM_DB_STANDALONE +/** + * MSR ranges for Intel(R) Xeon(R) CPU X5482 @ 3.20GHz. + */ +static CPUMMSRRANGE const g_aMsrRanges_Intel_Xeon_X5482_3_20GHz[] = +{ + MFO(0x00000000, "IA32_P5_MC_ADDR", Ia32P5McAddr), /* value=0x610010 */ + MFX(0x00000001, "IA32_P5_MC_TYPE", Ia32P5McType, Ia32P5McType, 0, 0, UINT64_MAX), /* value=0x0 */ + MFX(0x00000006, "IA32_MONITOR_FILTER_LINE_SIZE", Ia32MonitorFilterLineSize, Ia32MonitorFilterLineSize, 0, 0, UINT64_C(0xffffffffffff0000)), /* value=0x40 */ + MFN(0x00000010, "IA32_TIME_STAMP_COUNTER", Ia32TimestampCounter, Ia32TimestampCounter), /* value=0x1358`d28c2c60 */ + MFV(0x00000017, "IA32_PLATFORM_ID", Ia32PlatformId, ReadOnly, UINT64_C(0x18000088e40822)), + MFX(0x0000001b, "IA32_APIC_BASE", Ia32ApicBase, Ia32ApicBase, UINT32_C(0xfee00800), 0, UINT64_C(0xffffffc0000006ff)), + MVX(0x00000021, "C2_UNK_0000_0021", 0, 0, UINT64_C(0xffffffffffffffc0)), + MFX(0x0000002a, "EBL_CR_POWERON", IntelEblCrPowerOn, IntelEblCrPowerOn, UINT32_C(0xc2383400), UINT64_C(0xffffffffdff7df00), 0), /* value=0xc2383400 */ + MVX(0x00000032, "P6_UNK_0000_0032", 0, UINT64_C(0xffffffff01fe0000), 0), + MVX(0x00000033, "TEST_CTL", 0, UINT64_C(0xffffffff7fffffff), 0), + MVO(0x00000039, "C2_UNK_0000_0039", 0x7), + MFO(0x0000003a, "IA32_FEATURE_CONTROL", Ia32FeatureControl), /* value=0x5 */ + MVO(0x0000003f, "P6_UNK_0000_003f", 0), + RFN(0x00000040, 0x00000043, "MSR_LASTBRANCH_n_FROM_IP", IntelLastBranchToN, IntelLastBranchToN), + RFN(0x00000060, 0x00000063, "MSR_LASTBRANCH_n_TO_IP", IntelLastBranchFromN, IntelLastBranchFromN), + MFN(0x00000079, "IA32_BIOS_UPDT_TRIG", WriteOnly, IgnoreWrite), + MVX(0x0000008b, "BBL_CR_D3|BIOS_SIGN", UINT64_C(0x60b00000000), UINT32_MAX, 0), + MFO(0x0000009b, "IA32_SMM_MONITOR_CTL", Ia32SmmMonitorCtl), /* value=0x0 */ + MFX(0x000000a8, "C2_EMTTM_CR_TABLES_0", IntelCore2EmttmCrTablesN, IntelCore2EmttmCrTablesN, 0x612, UINT64_C(0xffffffffffff8000), 0), /* value=0x612 */ + MFX(0x000000a9, "C2_EMTTM_CR_TABLES_1", IntelCore2EmttmCrTablesN, IntelCore2EmttmCrTablesN, 0x612, UINT64_C(0xffffffffffff8000), 0), /* value=0x612 */ + MFX(0x000000aa, "C2_EMTTM_CR_TABLES_2", IntelCore2EmttmCrTablesN, IntelCore2EmttmCrTablesN, 0x612, UINT64_C(0xffffffffffff8000), 0), /* value=0x612 */ + MFX(0x000000ab, "C2_EMTTM_CR_TABLES_3", IntelCore2EmttmCrTablesN, IntelCore2EmttmCrTablesN, 0x612, UINT64_C(0xffffffffffff8000), 0), /* value=0x612 */ + MFX(0x000000ac, "C2_EMTTM_CR_TABLES_4", IntelCore2EmttmCrTablesN, IntelCore2EmttmCrTablesN, 0x612, UINT64_C(0xffffffffffff8000), 0), /* value=0x612 */ + MFX(0x000000ad, "C2_EMTTM_CR_TABLES_5", IntelCore2EmttmCrTablesN, IntelCore2EmttmCrTablesN, 0x612, ~(uint64_t)UINT32_MAX, 0), /* value=0x612 */ + RSN(0x000000c1, 0x000000c2, "IA32_PMCn", Ia32PmcN, Ia32PmcN, 0x0, ~(uint64_t)UINT32_MAX, 0), + MVI(0x000000c7, "P6_UNK_0000_00c7", UINT64_C(0x2300000052000000)), + MFX(0x000000cd, "P6_MSR_FSB_FREQ", IntelP6FsbFrequency, ReadOnly, 0x806, 0, 0), + MVO(0x000000ce, "P6_UNK_0000_00ce", UINT64_C(0x1208227f7f0710)), + MVO(0x000000cf, "C2_UNK_0000_00cf", 0), + MVO(0x000000e0, "C2_UNK_0000_00e0", 0x18820f0), + MVO(0x000000e1, "C2_UNK_0000_00e1", UINT32_C(0xf0f00000)), + MFX(0x000000e2, "MSR_PKG_CST_CONFIG_CONTROL", IntelPkgCStConfigControl, IntelPkgCStConfigControl, 0, 0x404000, UINT64_C(0xfffffffffc001000)), /* value=0x202a01 */ + MFX(0x000000e3, "C2_SMM_CST_MISC_INFO", IntelCore2SmmCStMiscInfo, IntelCore2SmmCStMiscInfo, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0x000000e4, "MSR_PMG_IO_CAPTURE_BASE", IntelPmgIoCaptureBase, IntelPmgIoCaptureBase, 0, 0, UINT64_C(0xffffffffff800000)), /* value=0x0 */ + MVO(0x000000e5, "C2_UNK_0000_00e5", UINT32_C(0xd00208c8)), + MFN(0x000000e7, "IA32_MPERF", Ia32MPerf, Ia32MPerf), /* value=0x40`a0a41c60 */ + MFN(0x000000e8, "IA32_APERF", Ia32APerf, Ia32APerf), /* value=0x3a`cc470b98 */ + MFX(0x000000ee, "C1_EXT_CONFIG", IntelCore1ExtConfig, IntelCore1ExtConfig, 0, UINT64_C(0xffffffffefc5ffff), 0), /* value=0x4000000`877d4b01 */ + MFX(0x000000fe, "IA32_MTRRCAP", Ia32MtrrCap, ReadOnly, 0xd08, 0, 0), /* value=0xd08 */ + MVX(0x00000116, "BBL_CR_ADDR", 0x3fc0, UINT64_C(0xffffff000000001f), 0), + MVX(0x00000118, "BBL_CR_DECC", 0xa7f99, UINT64_C(0xfffffffffff00000), 0), + MFN(0x0000011a, "BBL_CR_TRIG", WriteOnly, IgnoreWrite), + MVI(0x0000011b, "P6_UNK_0000_011b", 0), + MVX(0x0000011c, "C2_UNK_0000_011c", UINT32_C(0xe003b94d), UINT64_C(0xffffffff07c00000), 0), + MFX(0x0000011e, "BBL_CR_CTL3", IntelBblCrCtl3, IntelBblCrCtl3, UINT32_C(0xbe702111), UINT64_C(0xfffffffffef3fe9f), 0), /* value=0xbe702111 */ + MVX(0x0000014e, "P6_UNK_0000_014e", 0x70375245, UINT64_C(0xffffffff00000080), 0), + MVI(0x0000014f, "P6_UNK_0000_014f", UINT32_C(0xffffba7f)), + MVX(0x00000151, "P6_UNK_0000_0151", 0x6b929082, ~(uint64_t)UINT32_MAX, 0), + MVX(0x0000015e, "C2_UNK_0000_015e", 0x6, 0, UINT64_C(0xfffffffffffffff0)), + MFX(0x0000015f, "C1_DTS_CAL_CTRL", IntelCore1DtsCalControl, IntelCore1DtsCalControl, 0, UINT64_C(0xffffffffffc0ffff), 0), /* value=0x822 */ + MFX(0x00000174, "IA32_SYSENTER_CS", Ia32SysEnterCs, Ia32SysEnterCs, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0xb */ + MFN(0x00000175, "IA32_SYSENTER_ESP", Ia32SysEnterEsp, Ia32SysEnterEsp), /* value=0xffffff82`0dce9190 */ + MFN(0x00000176, "IA32_SYSENTER_EIP", Ia32SysEnterEip, Ia32SysEnterEip), /* value=0xffffff80`0d2ce720 */ + MFX(0x00000179, "IA32_MCG_CAP", Ia32McgCap, ReadOnly, 0x806, 0, 0), /* value=0x806 */ + MFX(0x0000017a, "IA32_MCG_STATUS", Ia32McgStatus, Ia32McgStatus, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + RSN(0x00000186, 0x00000187, "IA32_PERFEVTSELn", Ia32PerfEvtSelN, Ia32PerfEvtSelN, 0x0, 0, UINT64_C(0xffffffff00200000)), + MVO(0x00000193, "C2_UNK_0000_0193", 0), + MVX(0x00000194, "CLOCK_FLEX_MAX", 0x14822, UINT64_C(0xfffffffffffea0c0), 0), + MFX(0x00000198, "IA32_PERF_STATUS", Ia32PerfStatus, ReadOnly, UINT64_C(0x822082206300622), 0, 0), /* value=0x8220822`06300622 */ + MFX(0x00000199, "IA32_PERF_CTL", Ia32PerfCtl, Ia32PerfCtl, 0x822, 0, 0), /* Might bite. value=0x822 */ + MFX(0x0000019a, "IA32_CLOCK_MODULATION", Ia32ClockModulation, Ia32ClockModulation, 0x2, 0, UINT64_C(0xffffffffffffffe1)), /* value=0x2 */ + MFX(0x0000019b, "IA32_THERM_INTERRUPT", Ia32ThermInterrupt, Ia32ThermInterrupt, 0x10, 0, UINT64_C(0xffffffffff0000e0)), /* value=0x10 */ + MFX(0x0000019c, "IA32_THERM_STATUS", Ia32ThermStatus, Ia32ThermStatus, UINT32_C(0x883c0000), UINT32_C(0xf87f017f), UINT64_C(0xffffffff0780fc00)), /* value=0x883c0000 */ + MFX(0x0000019d, "IA32_THERM2_CTL", Ia32Therm2Ctl, ReadOnly, 0x612, 0, 0), /* value=0x612 */ + MVX(0x0000019e, "P6_UNK_0000_019e", 0x2120000, UINT64_C(0xffffffffffff0000), 0), + MVI(0x0000019f, "P6_UNK_0000_019f", 0), + MFX(0x000001a0, "IA32_MISC_ENABLE", Ia32MiscEnable, Ia32MiscEnable, UINT64_C(0x4066a52489), UINT64_C(0x52600099f6), UINT64_C(0xffffff0019004000)), /* value=0x40`66a52489 */ + MVX(0x000001a1, "P6_UNK_0000_01a1", 0, UINT64_C(0xffff000000000000), 0), + MFX(0x000001a2, "I7_MSR_TEMPERATURE_TARGET", IntelI7TemperatureTarget, ReadOnly, 0x1400, 0, 0), /* value=0x1400 */ + MVX(0x000001aa, "P6_PIC_SENS_CFG", UINT32_C(0xfe7f042f), UINT64_C(0xffffffff7faf00af), 0), + MVX(0x000001bf, "C2_UNK_0000_01bf", 0x404, UINT64_C(0xffffffffffff0000), 0), + MFX(0x000001c9, "MSR_LASTBRANCH_TOS", IntelLastBranchTos, IntelLastBranchTos, 0, UINT64_C(0xfffffffffffffffe), 0), /* value=0x0 */ + MVX(0x000001d3, "P6_UNK_0000_01d3", 0x8000, UINT64_C(0xffffffffffff7fff), 0), + MFX(0x000001d9, "IA32_DEBUGCTL", Ia32DebugCtl, Ia32DebugCtl, 0, 0, UINT64_C(0xffffffffffffa03c)), /* value=0x1 */ + MFO(0x000001db, "P6_LAST_BRANCH_FROM_IP", P6LastBranchFromIp), /* value=0xffffff7f`8f47ca6b */ + MFO(0x000001dc, "P6_LAST_BRANCH_TO_IP", P6LastBranchToIp), /* value=0xffffff80`0d2b24c0 */ + MFN(0x000001dd, "P6_LAST_INT_FROM_IP", P6LastIntFromIp, P6LastIntFromIp), /* value=0xffffff80`0d2ba20f */ + MFN(0x000001de, "P6_LAST_INT_TO_IP", P6LastIntToIp, P6LastIntToIp), /* value=0xffffff80`0d2ba200 */ + MVO(0x000001e0, "MSR_ROB_CR_BKUPTMPDR6", 0xff0), + MFX(0x000001f8, "IA32_PLATFORM_DCA_CAP", Ia32PlatformDcaCap, Ia32PlatformDcaCap, 0, UINT64_C(0xfffffffffffffffe), 0), /* value=0x0 */ + MFO(0x000001f9, "IA32_CPU_DCA_CAP", Ia32CpuDcaCap), /* value=0x1 */ + MFX(0x000001fa, "IA32_DCA_0_CAP", Ia32Dca0Cap, Ia32Dca0Cap, 0, UINT64_C(0xfffffffffefe17ff), 0), /* value=0xc01e489 */ + MFX(0x00000200, "IA32_MTRR_PHYS_BASE0", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x0, 0, UINT64_C(0xffffffc000000ff8)), /* value=0x80000000 */ + MFX(0x00000201, "IA32_MTRR_PHYS_MASK0", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x0, 0, UINT64_C(0xffffffc0000007ff)), /* value=0x3f`80000800 */ + MFX(0x00000202, "IA32_MTRR_PHYS_BASE1", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x1, 0, UINT64_C(0xffffffc000000ff8)), /* value=0x7fc00000 */ + MFX(0x00000203, "IA32_MTRR_PHYS_MASK1", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x1, 0, UINT64_C(0xffffffc0000007ff)), /* value=0x3f`ffc00800 */ + MFX(0x00000204, "IA32_MTRR_PHYS_BASE2", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x2, 0, UINT64_C(0xffffffc000000ff8)), /* value=0x6 */ + MFX(0x00000205, "IA32_MTRR_PHYS_MASK2", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x2, 0, UINT64_C(0xffffffc0000007ff)), /* value=0x30`00000800 */ + MFX(0x00000206, "IA32_MTRR_PHYS_BASE3", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x3, 0, UINT64_C(0xffffffc000000ff8)), /* value=0x0 */ + MFX(0x00000207, "IA32_MTRR_PHYS_MASK3", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x3, 0, UINT64_C(0xffffffc0000007ff)), /* value=0x0 */ + MFX(0x00000208, "IA32_MTRR_PHYS_BASE4", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x4, 0, UINT64_C(0xffffffc000000ff8)), /* value=0x0 */ + MFX(0x00000209, "IA32_MTRR_PHYS_MASK4", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x4, 0, UINT64_C(0xffffffc0000007ff)), /* value=0x0 */ + MFX(0x0000020a, "IA32_MTRR_PHYS_BASE5", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x5, 0, UINT64_C(0xffffffc000000ff8)), /* value=0x0 */ + MFX(0x0000020b, "IA32_MTRR_PHYS_MASK5", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x5, 0, UINT64_C(0xffffffc0000007ff)), /* value=0x0 */ + MFX(0x0000020c, "IA32_MTRR_PHYS_BASE6", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x6, 0, UINT64_C(0xffffffc000000ff8)), /* value=0x0 */ + MFX(0x0000020d, "IA32_MTRR_PHYS_MASK6", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x6, 0, UINT64_C(0xffffffc0000007ff)), /* value=0x0 */ + MFX(0x0000020e, "IA32_MTRR_PHYS_BASE7", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x7, 0, UINT64_C(0xffffffc000000ff8)), /* value=0x0 */ + MFX(0x0000020f, "IA32_MTRR_PHYS_MASK7", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x7, 0, UINT64_C(0xffffffc0000007ff)), /* value=0x0 */ + MFS(0x00000250, "IA32_MTRR_FIX64K_00000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix64K_00000), + MFS(0x00000258, "IA32_MTRR_FIX16K_80000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_80000), + MFS(0x00000259, "IA32_MTRR_FIX16K_A0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_A0000), + MFS(0x00000268, "IA32_MTRR_FIX4K_C0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C0000), + MFS(0x00000269, "IA32_MTRR_FIX4K_C8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C8000), + MFS(0x0000026a, "IA32_MTRR_FIX4K_D0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D0000), + MFS(0x0000026b, "IA32_MTRR_FIX4K_D8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D8000), + MFS(0x0000026c, "IA32_MTRR_FIX4K_E0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E0000), + MFS(0x0000026d, "IA32_MTRR_FIX4K_E8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E8000), + MFS(0x0000026e, "IA32_MTRR_FIX4K_F0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F0000), + MFS(0x0000026f, "IA32_MTRR_FIX4K_F8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F8000), + MFS(0x00000277, "IA32_PAT", Ia32Pat, Ia32Pat, Guest.msrPAT), + MFZ(0x000002ff, "IA32_MTRR_DEF_TYPE", Ia32MtrrDefType, Ia32MtrrDefType, GuestMsrs.msr.MtrrDefType, 0, UINT64_C(0xfffffffffffff3f8)), + RSN(0x00000309, 0x0000030b, "IA32_FIXED_CTRn", Ia32FixedCtrN, Ia32FixedCtrN, 0x0, 0, UINT64_C(0xffffff0000000000)), + MFX(0x00000345, "IA32_PERF_CAPABILITIES", Ia32PerfCapabilities, ReadOnly, 0x10c2, 0, 0), /* value=0x10c2 */ + MFX(0x0000038d, "IA32_FIXED_CTR_CTRL", Ia32FixedCtrCtrl, Ia32FixedCtrCtrl, 0, 0, UINT64_C(0xfffffffffffff444)), /* value=0x0 */ + MFX(0x0000038e, "IA32_PERF_GLOBAL_STATUS", Ia32PerfGlobalStatus, ReadOnly, 0, 0, 0), /* value=0x0 */ + MFN(0x0000038f, "IA32_PERF_GLOBAL_CTRL", Ia32PerfGlobalCtrl, Ia32PerfGlobalCtrl), /* value=0xffffffff`ffffffff */ + MFO(0x00000390, "IA32_PERF_GLOBAL_OVF_CTRL", Ia32PerfGlobalOvfCtrl), /* value=0xffffffff`ffffffff */ + MFX(0x000003f1, "IA32_PEBS_ENABLE", Ia32PebsEnable, Ia32PebsEnable, 0, UINT64_C(0xfffffffffffffffe), 0), /* value=0x0 */ + RFN(0x00000400, 0x00000417, "IA32_MCi_CTL_STATUS_ADDR_MISC", Ia32McCtlStatusAddrMiscN, Ia32McCtlStatusAddrMiscN), + MFN(0x00000478, "CPUID1_FEATURE_MASK", IntelCpuId1FeatureMaskEcdx, IntelCpuId1FeatureMaskEcdx), /* value=0xffffffff`ffffffff */ + MFX(0x00000480, "IA32_VMX_BASIC", Ia32VmxBasic, ReadOnly, UINT64_C(0x5a08000000000d), 0, 0), /* value=0x5a0800`0000000d */ + MFX(0x00000481, "IA32_VMX_PINBASED_CTLS", Ia32VmxPinbasedCtls, ReadOnly, UINT64_C(0x3f00000016), 0, 0), /* value=0x3f`00000016 */ + MFX(0x00000482, "IA32_VMX_PROCBASED_CTLS", Ia32VmxProcbasedCtls, ReadOnly, UINT64_C(0xf7f9fffe0401e172), 0, 0), /* value=0xf7f9fffe`0401e172 */ + MFX(0x00000483, "IA32_VMX_EXIT_CTLS", Ia32VmxExitCtls, ReadOnly, UINT64_C(0x3ffff00036dff), 0, 0), /* value=0x3ffff`00036dff */ + MFX(0x00000484, "IA32_VMX_ENTRY_CTLS", Ia32VmxEntryCtls, ReadOnly, UINT64_C(0x3fff000011ff), 0, 0), /* value=0x3fff`000011ff */ + MFX(0x00000485, "IA32_VMX_MISC", Ia32VmxMisc, ReadOnly, 0x403c0, 0, 0), /* value=0x403c0 */ + MFX(0x00000486, "IA32_VMX_CR0_FIXED0", Ia32VmxCr0Fixed0, ReadOnly, UINT32_C(0x80000021), 0, 0), /* value=0x80000021 */ + MFX(0x00000487, "IA32_VMX_CR0_FIXED1", Ia32VmxCr0Fixed1, ReadOnly, UINT32_MAX, 0, 0), /* value=0xffffffff */ + MFX(0x00000488, "IA32_VMX_CR4_FIXED0", Ia32VmxCr4Fixed0, ReadOnly, 0x2000, 0, 0), /* value=0x2000 */ + MFX(0x00000489, "IA32_VMX_CR4_FIXED1", Ia32VmxCr4Fixed1, ReadOnly, 0x27ff, 0, 0), /* value=0x27ff */ + MFX(0x0000048a, "IA32_VMX_VMCS_ENUM", Ia32VmxVmcsEnum, ReadOnly, 0x2c, 0, 0), /* value=0x2c */ + MFX(0x0000048b, "IA32_VMX_PROCBASED_CTLS2", Ia32VmxProcBasedCtls2, ReadOnly, UINT64_C(0x4100000000), 0, 0), /* value=0x41`00000000 */ + MVX(0x000004f8, "C2_UNK_0000_04f8", 0, 0, 0), + MVX(0x000004f9, "C2_UNK_0000_04f9", 0, 0, 0), + MVX(0x000004fa, "C2_UNK_0000_04fa", 0, 0, 0), + MVX(0x000004fb, "C2_UNK_0000_04fb", 0, 0, 0), + MVX(0x000004fc, "C2_UNK_0000_04fc", 0, 0, 0), + MVX(0x000004fd, "C2_UNK_0000_04fd", 0, 0, 0), + MVX(0x000004fe, "C2_UNK_0000_04fe", 0, 0, 0), + MVX(0x000004ff, "C2_UNK_0000_04ff", 0, 0, 0), + MVX(0x00000590, "C2_UNK_0000_0590", 0, 0, 0), + MVX(0x00000591, "C2_UNK_0000_0591", 0, ~(uint64_t)UINT32_MAX, 0), + MFX(0x000005a0, "C2_PECI_CTL", IntelCore2PeciControl, IntelCore2PeciControl, 0, UINT64_C(0xfffffffffffffffe), 0), /* value=0x1 */ + MVI(0x000005a1, "C2_UNK_0000_05a1", 0x1), + MFN(0x00000600, "IA32_DS_AREA", Ia32DsArea, Ia32DsArea), /* value=0x0 */ + MFX(0xc0000080, "AMD64_EFER", Amd64Efer, Amd64Efer, 0xd01, 0x400, UINT64_C(0xfffffffffffff2fe)), + MFN(0xc0000081, "AMD64_STAR", Amd64SyscallTarget, Amd64SyscallTarget), /* value=0x1b0008`00000000 */ + MFN(0xc0000082, "AMD64_STAR64", Amd64LongSyscallTarget, Amd64LongSyscallTarget), /* value=0xffffff80`0d2ce6c0 */ + MFN(0xc0000083, "AMD64_STARCOMPAT", Amd64CompSyscallTarget, Amd64CompSyscallTarget), /* value=0x0 */ + MFX(0xc0000084, "AMD64_SYSCALL_FLAG_MASK", Amd64SyscallFlagMask, Amd64SyscallFlagMask, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x4700 */ + MFN(0xc0000100, "AMD64_FS_BASE", Amd64FsBase, Amd64FsBase), /* value=0x0 */ + MFN(0xc0000101, "AMD64_GS_BASE", Amd64GsBase, Amd64GsBase), /* value=0xffffff82`0dcfd000 */ + MFN(0xc0000102, "AMD64_KERNEL_GS_BASE", Amd64KernelGsBase, Amd64KernelGsBase), /* value=0x7fff`7c7511e0 */ +}; +#endif /* !CPUM_DB_STANDALONE */ + + +/** + * Database entry for Intel(R) Xeon(R) CPU X5482 @ 3.20GHz. + */ +static CPUMDBENTRY const g_Entry_Intel_Xeon_X5482_3_20GHz = +{ + /*.pszName = */ "Intel Xeon X5482 3.20GHz", + /*.pszFullName = */ "Intel(R) Xeon(R) CPU X5482 @ 3.20GHz", + /*.enmVendor = */ CPUMCPUVENDOR_INTEL, + /*.uFamily = */ 6, + /*.uModel = */ 23, + /*.uStepping = */ 6, + /*.enmMicroarch = */ kCpumMicroarch_Intel_Core2_Penryn, + /*.uScalableBusFreq = */ CPUM_SBUSFREQ_400MHZ, + /*.fFlags = */ 0, + /*.cMaxPhysAddrWidth= */ 38, + /*.fMxCsrMask = */ 0xffff, + /*.paCpuIdLeaves = */ NULL_ALONE(g_aCpuIdLeaves_Intel_Xeon_X5482_3_20GHz), + /*.cCpuIdLeaves = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Intel_Xeon_X5482_3_20GHz)), + /*.enmUnknownCpuId = */ CPUMUNKNOWNCPUID_LAST_STD_LEAF, + /*.DefUnknownCpuId = */ { 0x07280202, 0x00000000, 0x00000000, 0x00000503 }, + /*.fMsrMask = */ UINT32_MAX, + /*.cMsrRanges = */ ZERO_ALONE(RT_ELEMENTS(g_aMsrRanges_Intel_Xeon_X5482_3_20GHz)), + /*.paMsrRanges = */ NULL_ALONE(g_aMsrRanges_Intel_Xeon_X5482_3_20GHz), +}; + +#endif /* !VBOX_CPUDB_Intel_Xeon_X5482_3_20GHz_h */ + diff --git a/src/VBox/VMM/VMMR3/cpus/Makefile.kup b/src/VBox/VMM/VMMR3/cpus/Makefile.kup new file mode 100644 index 00000000..e69de29b diff --git a/src/VBox/VMM/VMMR3/cpus/Quad_Core_AMD_Opteron_2384.h b/src/VBox/VMM/VMMR3/cpus/Quad_Core_AMD_Opteron_2384.h new file mode 100644 index 00000000..1c509043 --- /dev/null +++ b/src/VBox/VMM/VMMR3/cpus/Quad_Core_AMD_Opteron_2384.h @@ -0,0 +1,270 @@ +/* $Id: Quad_Core_AMD_Opteron_2384.h $ */ +/** @file + * CPU database entry "Quad-Core AMD Opteron 2384". + * Generated at 2013-12-09T21:56:56Z by VBoxCpuReport v4.3.51r91133 on win.amd64. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VBOX_CPUDB_Quad_Core_AMD_Opteron_2384_h +#define VBOX_CPUDB_Quad_Core_AMD_Opteron_2384_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + + +#ifndef CPUM_DB_STANDALONE +/** + * CPUID leaves for Quad-Core AMD Opteron(tm) Processor 2384. + */ +static CPUMCPUIDLEAF const g_aCpuIdLeaves_Quad_Core_AMD_Opteron_2384[] = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x00000005, 0x68747541, 0x444d4163, 0x69746e65, 0 }, + { 0x00000001, 0x00000000, 0x00000000, 0x00100f42, 0x06040800, 0x00802009, 0x178bfbff, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID | CPUMCPUIDLEAF_F_CONTAINS_APIC }, + { 0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000005, 0x00000000, 0x00000000, 0x00000040, 0x00000040, 0x00000003, 0x00000000, 0 }, + { 0x80000000, 0x00000000, 0x00000000, 0x8000001b, 0x68747541, 0x444d4163, 0x69746e65, 0 }, + { 0x80000001, 0x00000000, 0x00000000, 0x00100f42, 0x00000d4f, 0x000037ff, 0xefd3fbff, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC }, + { 0x80000002, 0x00000000, 0x00000000, 0x64617551, 0x726f432d, 0x4d412065, 0x704f2044, 0 }, + { 0x80000003, 0x00000000, 0x00000000, 0x6f726574, 0x6d74286e, 0x72502029, 0x7365636f, 0 }, + { 0x80000004, 0x00000000, 0x00000000, 0x20726f73, 0x34383332, 0x00000000, 0x00000000, 0 }, + { 0x80000005, 0x00000000, 0x00000000, 0xff30ff10, 0xff30ff20, 0x40020140, 0x40020140, 0 }, + { 0x80000006, 0x00000000, 0x00000000, 0x20800000, 0x42004200, 0x02008140, 0x0030b140, 0 }, + { 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000001f9, 0 }, + { 0x80000008, 0x00000000, 0x00000000, 0x00003030, 0x00000000, 0x00002003, 0x00000000, 0 }, + { 0x80000009, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000000a, 0x00000000, 0x00000000, 0x00000001, 0x00000040, 0x00000000, 0x0000000f, 0 }, + { 0x8000000b, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000000c, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000000d, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000000e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000010, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000011, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000012, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000013, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000014, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000015, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000016, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000017, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000018, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000019, 0x00000000, 0x00000000, 0xf0300000, 0x60100000, 0x00000000, 0x00000000, 0 }, + { 0x8000001a, 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x8000001b, 0x00000000, 0x00000000, 0x0000001f, 0x00000000, 0x00000000, 0x00000000, 0 }, +}; +#endif /* !CPUM_DB_STANDALONE */ + + +#ifndef CPUM_DB_STANDALONE +/** + * MSR ranges for Quad-Core AMD Opteron(tm) Processor 2384. + */ +static CPUMMSRRANGE const g_aMsrRanges_Quad_Core_AMD_Opteron_2384[] = +{ + MAL(0x00000000, "IA32_P5_MC_ADDR", 0x00000402), + MAL(0x00000001, "IA32_P5_MC_TYPE", 0x00000401), + MFN(0x00000010, "IA32_TIME_STAMP_COUNTER", Ia32TimestampCounter, Ia32TimestampCounter), /* value=0xbe`410ca9b6 */ + MFX(0x0000001b, "IA32_APIC_BASE", Ia32ApicBase, Ia32ApicBase, UINT32_C(0xfee00800), 0, UINT64_C(0xffff0000000006ff)), + MFX(0x0000002a, "EBL_CR_POWERON", IntelEblCrPowerOn, ReadOnly, 0, 0, 0), /* value=0x0 */ + MVO(0x0000008b, "BBL_CR_D3|BIOS_SIGN", 0x1000086), + MFX(0x000000fe, "IA32_MTRRCAP", Ia32MtrrCap, ReadOnly, 0x508, 0, 0), /* value=0x508 */ + MFX(0x00000174, "IA32_SYSENTER_CS", Ia32SysEnterCs, Ia32SysEnterCs, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0x00000175, "IA32_SYSENTER_ESP", Ia32SysEnterEsp, Ia32SysEnterEsp, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0x00000176, "IA32_SYSENTER_EIP", Ia32SysEnterEip, Ia32SysEnterEip, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0x00000179, "IA32_MCG_CAP", Ia32McgCap, ReadOnly, 0x106, 0, 0), /* value=0x106 */ + MFX(0x0000017a, "IA32_MCG_STATUS", Ia32McgStatus, Ia32McgStatus, 0, UINT64_C(0xfffffffffffffff8), 0), /* value=0x0 */ + MFX(0x0000017b, "IA32_MCG_CTL", Ia32McgCtl, Ia32McgCtl, 0, UINT64_C(0xffffffffffffffc0), 0), /* value=0x3f */ + MFX(0x000001d9, "IA32_DEBUGCTL", Ia32DebugCtl, Ia32DebugCtl, 0, UINT64_C(0xffffffffffffff80), 0x40), /* value=0x0 */ + MFO(0x000001db, "P6_LAST_BRANCH_FROM_IP", P6LastBranchFromIp), /* value=0xfffff800`0245dd94 */ + MFO(0x000001dc, "P6_LAST_BRANCH_TO_IP", P6LastBranchToIp), /* value=0xfffff800`0245e910 */ + MFO(0x000001dd, "P6_LAST_INT_FROM_IP", P6LastIntFromIp), /* value=0x753d3416 */ + MFO(0x000001de, "P6_LAST_INT_TO_IP", P6LastIntToIp), /* value=0x753ea130 */ + MFX(0x00000200, "IA32_MTRR_PHYS_BASE0", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x0, 0, UINT64_C(0xffff000000000ff8)), /* value=0x6 */ + MFX(0x00000201, "IA32_MTRR_PHYS_MASK0", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x0, 0, UINT64_C(0xffff0000000007ff)), /* value=0xffff`80000800 */ + MFX(0x00000202, "IA32_MTRR_PHYS_BASE1", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x1, 0, UINT64_C(0xffff000000000ff8)), /* value=0x80000006 */ + MFX(0x00000203, "IA32_MTRR_PHYS_MASK1", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x1, 0, UINT64_C(0xffff0000000007ff)), /* value=0xffff`c0000800 */ + MFX(0x00000204, "IA32_MTRR_PHYS_BASE2", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x2, 0, UINT64_C(0xffff000000000ff8)), /* value=0xc0000006 */ + MFX(0x00000205, "IA32_MTRR_PHYS_MASK2", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x2, 0, UINT64_C(0xffff0000000007ff)), /* value=0xffff`f8000800 */ + MFX(0x00000206, "IA32_MTRR_PHYS_BASE3", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x3, 0, UINT64_C(0xffff000000000ff8)), /* value=0x0 */ + MFX(0x00000207, "IA32_MTRR_PHYS_MASK3", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x3, 0, UINT64_C(0xffff0000000007ff)), /* value=0x0 */ + MFX(0x00000208, "IA32_MTRR_PHYS_BASE4", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x4, 0, UINT64_C(0xffff000000000ff8)), /* value=0x0 */ + MFX(0x00000209, "IA32_MTRR_PHYS_MASK4", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x4, 0, UINT64_C(0xffff0000000007ff)), /* value=0x0 */ + MFX(0x0000020a, "IA32_MTRR_PHYS_BASE5", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x5, 0, UINT64_C(0xffff000000000ff8)), /* value=0x0 */ + MFX(0x0000020b, "IA32_MTRR_PHYS_MASK5", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x5, 0, UINT64_C(0xffff0000000007ff)), /* value=0x0 */ + MFX(0x0000020c, "IA32_MTRR_PHYS_BASE6", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x6, 0, UINT64_C(0xffff000000000ff8)), /* value=0x0 */ + MFX(0x0000020d, "IA32_MTRR_PHYS_MASK6", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x6, 0, UINT64_C(0xffff0000000007ff)), /* value=0x0 */ + MFX(0x0000020e, "IA32_MTRR_PHYS_BASE7", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x7, 0, UINT64_C(0xffff000000000ff8)), /* value=0x0 */ + MFX(0x0000020f, "IA32_MTRR_PHYS_MASK7", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x7, 0, UINT64_C(0xffff0000000007ff)), /* value=0x0 */ + MFS(0x00000250, "IA32_MTRR_FIX64K_00000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix64K_00000), + MFS(0x00000258, "IA32_MTRR_FIX16K_80000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_80000), + MFS(0x00000259, "IA32_MTRR_FIX16K_A0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_A0000), + MFS(0x00000268, "IA32_MTRR_FIX4K_C0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C0000), + MFS(0x00000269, "IA32_MTRR_FIX4K_C8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C8000), + MFS(0x0000026a, "IA32_MTRR_FIX4K_D0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D0000), + MFS(0x0000026b, "IA32_MTRR_FIX4K_D8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D8000), + MFS(0x0000026c, "IA32_MTRR_FIX4K_E0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E0000), + MFS(0x0000026d, "IA32_MTRR_FIX4K_E8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E8000), + MFS(0x0000026e, "IA32_MTRR_FIX4K_F0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F0000), + MFS(0x0000026f, "IA32_MTRR_FIX4K_F8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F8000), + MFS(0x00000277, "IA32_PAT", Ia32Pat, Ia32Pat, Guest.msrPAT), + MFZ(0x000002ff, "IA32_MTRR_DEF_TYPE", Ia32MtrrDefType, Ia32MtrrDefType, GuestMsrs.msr.MtrrDefType, 0, UINT64_C(0xfffffffffffff3f8)), + RFN(0x00000400, 0x00000417, "IA32_MCi_CTL_STATUS_ADDR_MISC", Ia32McCtlStatusAddrMiscN, Ia32McCtlStatusAddrMiscN), + MFX(0xc0000080, "AMD64_EFER", Amd64Efer, Amd64Efer, 0x4d01, 0xfe, UINT64_C(0xffffffffffff8200)), + MFN(0xc0000081, "AMD64_STAR", Amd64SyscallTarget, Amd64SyscallTarget), /* value=0x230010`00000000 */ + MFN(0xc0000082, "AMD64_STAR64", Amd64LongSyscallTarget, Amd64LongSyscallTarget), /* value=0xfffff800`0245dd00 */ + MFN(0xc0000083, "AMD64_STARCOMPAT", Amd64CompSyscallTarget, Amd64CompSyscallTarget), /* value=0xfffff800`0245da80 */ + MFX(0xc0000084, "AMD64_SYSCALL_FLAG_MASK", Amd64SyscallFlagMask, Amd64SyscallFlagMask, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x14700 */ + MFN(0xc0000100, "AMD64_FS_BASE", Amd64FsBase, Amd64FsBase), /* value=0xfffe0000 */ + MFN(0xc0000101, "AMD64_GS_BASE", Amd64GsBase, Amd64GsBase), /* value=0xfffffa60`01b8a000 */ + MFN(0xc0000102, "AMD64_KERNEL_GS_BASE", Amd64KernelGsBase, Amd64KernelGsBase), /* value=0x7ff`fffde000 */ + MFX(0xc0000103, "AMD64_TSC_AUX", Amd64TscAux, Amd64TscAux, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + RSN(0xc0000408, 0xc000040a, "AMD_10H_MC4_MISCn", AmdFam10hMc4MiscN, AmdFam10hMc4MiscN, 0, UINT64_C(0xff00f000ffffffff), 0), + RVI(0xc000040b, 0xc000040f, "AMD_10H_MC4_MISCn", 0), + RSN(0xc0010000, 0xc0010003, "AMD_K8_PERF_CTL_n", AmdK8PerfCtlN, AmdK8PerfCtlN, 0x0, UINT64_C(0xfffffcf000200000), 0), + RSN(0xc0010004, 0xc0010007, "AMD_K8_PERF_CTR_n", AmdK8PerfCtrN, AmdK8PerfCtrN, 0x0, UINT64_C(0xffff000000000000), 0), + MFX(0xc0010010, "AMD_K8_SYS_CFG", AmdK8SysCfg, AmdK8SysCfg, 0x760600, UINT64_C(0xffffffffff80f8ff), 0), /* value=0x760600 */ + MFX(0xc0010015, "AMD_K8_HW_CFG", AmdK8HwCr, AmdK8HwCr, 0x1000030, UINT64_C(0xffffffff00000020), 0), /* value=0x1000030 */ + MFW(0xc0010016, "AMD_K8_IORR_BASE_0", AmdK8IorrBaseN, AmdK8IorrBaseN, UINT64_C(0xffff000000000fe7)), /* value=0x1`b8210000 */ + MFW(0xc0010017, "AMD_K8_IORR_MASK_0", AmdK8IorrMaskN, AmdK8IorrMaskN, UINT64_C(0xffff0000000007ff)), /* value=0x0 */ + MFX(0xc0010018, "AMD_K8_IORR_BASE_1", AmdK8IorrBaseN, AmdK8IorrBaseN, 0x1, UINT64_C(0xffff000000000fe7), 0), /* value=0x0 */ + MFX(0xc0010019, "AMD_K8_IORR_MASK_1", AmdK8IorrMaskN, AmdK8IorrMaskN, 0x1, UINT64_C(0xffff0000000007ff), 0), /* value=0x0 */ + MFW(0xc001001a, "AMD_K8_TOP_MEM", AmdK8TopOfMemN, AmdK8TopOfMemN, UINT64_C(0xffff0000007fffff)), /* value=0xc8000000 */ + MFX(0xc001001d, "AMD_K8_TOP_MEM2", AmdK8TopOfMemN, AmdK8TopOfMemN, 0x1, UINT64_C(0xffff0000007fffff), 0), /* value=0x2`38000000 */ + MFN(0xc001001f, "AMD_K8_NB_CFG1", AmdK8NbCfg1, AmdK8NbCfg1), /* value=0x400000`00000008 */ + MFN(0xc0010020, "AMD_K8_PATCH_LOADER", WriteOnly, AmdK8PatchLoader), + MFX(0xc0010022, "AMD_K8_MC_XCPT_REDIR", AmdK8McXcptRedir, AmdK8McXcptRedir, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + RFN(0xc0010030, 0xc0010035, "AMD_K8_CPU_NAME_n", AmdK8CpuNameN, AmdK8CpuNameN), + MFX(0xc001003e, "AMD_K8_HTC", AmdK8HwThermalCtrl, AmdK8HwThermalCtrl, 0x327f0004, UINT64_C(0xffffffffc0008838), 0), /* value=0x327f0004 */ + MFX(0xc001003f, "AMD_K8_STC", AmdK8SwThermalCtrl, AmdK8SwThermalCtrl, 0, UINT64_C(0xffffffffc00088c0), 0), /* value=0x30000000 */ + MVO(0xc0010043, "AMD_K8_THERMTRIP_STATUS", 0x1830), + MFX(0xc0010044, "AMD_K8_MC_CTL_MASK_0", AmdK8McCtlMaskN, AmdK8McCtlMaskN, 0x0, UINT64_C(0xffffffffffffff00), 0), /* value=0x80 */ + MFX(0xc0010045, "AMD_K8_MC_CTL_MASK_1", AmdK8McCtlMaskN, AmdK8McCtlMaskN, 0x1, ~(uint64_t)UINT32_MAX, 0), /* value=0x80 */ + MFX(0xc0010046, "AMD_K8_MC_CTL_MASK_2", AmdK8McCtlMaskN, AmdK8McCtlMaskN, 0x2, UINT64_C(0xfffffffffffff000), 0), /* value=0x200 */ + MFX(0xc0010047, "AMD_K8_MC_CTL_MASK_3", AmdK8McCtlMaskN, AmdK8McCtlMaskN, 0x3, UINT64_C(0xfffffffffffffffc), 0), /* value=0x0 */ + MFX(0xc0010048, "AMD_K8_MC_CTL_MASK_4", AmdK8McCtlMaskN, AmdK8McCtlMaskN, 0x4, UINT64_C(0xffffffffc0000000), 0), /* value=0x780400 */ + MFX(0xc0010049, "AMD_K8_MC_CTL_MASK_5", AmdK8McCtlMaskN, AmdK8McCtlMaskN, 0x5, UINT64_C(0xfffffffffffffffe), 0), /* value=0x0 */ + RFN(0xc0010050, 0xc0010053, "AMD_K8_SMI_ON_IO_TRAP_n", AmdK8SmiOnIoTrapN, AmdK8SmiOnIoTrapN), + MFX(0xc0010054, "AMD_K8_SMI_ON_IO_TRAP_CTL_STS", AmdK8SmiOnIoTrapCtlSts, AmdK8SmiOnIoTrapCtlSts, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0xc0010055, "AMD_K8_INT_PENDING_MSG", AmdK8IntPendingMessage, AmdK8IntPendingMessage, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0xc0010056, "AMD_K8_SMI_TRIGGER_IO_CYCLE", AmdK8SmiTriggerIoCycle, AmdK8SmiTriggerIoCycle, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x200242e */ + MVX(0xc0010057, "AMD_10H_UNK_c001_0057", 0, 0, 0), + MFX(0xc0010058, "AMD_10H_MMIO_CFG_BASE_ADDR", AmdFam10hMmioCfgBaseAddr, AmdFam10hMmioCfgBaseAddr, 0, UINT64_C(0xffff0000000fffc0), 0), /* value=0xe0000021 */ + MFX(0xc0010059, "AMD_10H_TRAP_CTL?", AmdFam10hTrapCtlMaybe, AmdFam10hTrapCtlMaybe, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MVX(0xc001005a, "AMD_10H_UNK_c001_005a", 0, 0, 0), + MVX(0xc001005b, "AMD_10H_UNK_c001_005b", 0, 0, 0), + MVX(0xc001005c, "AMD_10H_UNK_c001_005c", 0, 0, 0), + MVX(0xc001005d, "AMD_10H_UNK_c001_005d", 0, 0, 0), + MVO(0xc0010060, "AMD_K8_BIST_RESULT", 0), + MFX(0xc0010061, "AMD_10H_P_ST_CUR_LIM", AmdFam10hPStateCurLimit, ReadOnly, 0x30, 0, 0), /* value=0x30 */ + MFX(0xc0010062, "AMD_10H_P_ST_CTL", AmdFam10hPStateControl, AmdFam10hPStateControl, 0x1, 0, UINT64_C(0xfffffffffffffff8)), /* value=0x1 */ + MFX(0xc0010063, "AMD_10H_P_ST_STS", AmdFam10hPStateStatus, ReadOnly, 0x1, 0, 0), /* value=0x1 */ + MFX(0xc0010064, "AMD_10H_P_ST_0", AmdFam10hPStateN, AmdFam10hPStateN, UINT64_C(0x800001e13000300b), 0, 0), /* value=0x800001e1`3000300b */ + MFX(0xc0010065, "AMD_10H_P_ST_1", AmdFam10hPStateN, AmdFam10hPStateN, UINT64_C(0x800001c840004004), 0, 0), /* value=0x800001c8`40004004 */ + MFX(0xc0010066, "AMD_10H_P_ST_2", AmdFam10hPStateN, AmdFam10hPStateN, UINT64_C(0x800001b64000404e), 0, 0), /* value=0x800001b6`4000404e */ + MFX(0xc0010067, "AMD_10H_P_ST_3", AmdFam10hPStateN, AmdFam10hPStateN, UINT64_C(0x8000019d40004040), 0, 0), /* value=0x8000019d`40004040 */ + MFX(0xc0010068, "AMD_10H_P_ST_4", AmdFam10hPStateN, AmdFam10hPStateN, 0, 0, 0), /* value=0x0 */ + MFX(0xc0010070, "AMD_10H_COFVID_CTL", AmdFam10hCofVidControl, AmdFam10hCofVidControl, 0x40014004, UINT64_C(0xffffffff01b80000), 0), /* value=0x40014004 */ + MFX(0xc0010071, "AMD_10H_COFVID_STS", AmdFam10hCofVidStatus, AmdFam10hCofVidStatus, UINT64_C(0x38b600c340014004), UINT64_MAX, 0), /* value=0x38b600c3`40014004 */ + MFX(0xc0010074, "AMD_10H_CPU_WD_TMR_CFG", AmdFam10hCpuWatchdogTimer, AmdFam10hCpuWatchdogTimer, 0, UINT64_C(0xffffffffffffff80), 0), /* value=0x0 */ + MFX(0xc0010111, "AMD_K8_SMM_BASE", AmdK8SmmBase, AmdK8SmmBase, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x98e00 */ + MFX(0xc0010112, "AMD_K8_SMM_ADDR", AmdK8SmmAddr, AmdK8SmmAddr, 0, UINT64_C(0xffff00000001ffff), 0), /* value=0x0 */ + MFX(0xc0010113, "AMD_K8_SMM_MASK", AmdK8SmmMask, AmdK8SmmMask, 0, UINT64_C(0xffff0000000188c0), 0), /* value=0x1 */ + MFX(0xc0010114, "AMD_K8_VM_CR", AmdK8VmCr, AmdK8VmCr, 0, ~(uint64_t)UINT32_MAX, UINT32_C(0xffffffe0)), /* value=0x8 */ + MFX(0xc0010115, "AMD_K8_IGNNE", AmdK8IgnNe, AmdK8IgnNe, 0, ~(uint64_t)UINT32_MAX, UINT32_C(0xfffffffe)), /* value=0x0 */ + MFX(0xc0010117, "AMD_K8_VM_HSAVE_PA", AmdK8VmHSavePa, AmdK8VmHSavePa, 0, 0, UINT64_C(0xffff000000000fff)), /* value=0x0 */ + MFN(0xc0010118, "AMD_10H_VM_LOCK_KEY", AmdFam10hVmLockKey, AmdFam10hVmLockKey), /* value=0x0 */ + MFN(0xc0010119, "AMD_10H_SSM_LOCK_KEY", AmdFam10hSmmLockKey, AmdFam10hSmmLockKey), /* value=0x0 */ + MFX(0xc001011a, "AMD_10H_LOCAL_SMI_STS", AmdFam10hLocalSmiStatus, AmdFam10hLocalSmiStatus, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0xc0010140, "AMD_10H_OSVW_ID_LEN", AmdFam10hOsVisWrkIdLength, AmdFam10hOsVisWrkIdLength, 0x1, 0, 0), /* value=0x1 */ + MFN(0xc0010141, "AMD_10H_OSVW_STS", AmdFam10hOsVisWrkStatus, AmdFam10hOsVisWrkStatus), /* value=0x0 */ + MFX(0xc0011000, "AMD_K7_MCODE_CTL", AmdK7MicrocodeCtl, AmdK7MicrocodeCtl, 0, ~(uint64_t)UINT32_MAX, 0x204), /* value=0x0 */ + MFX(0xc0011001, "AMD_K7_APIC_CLUSTER_ID", AmdK7ClusterIdMaybe, AmdK7ClusterIdMaybe, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFN(0xc0011004, "AMD_K8_CPUID_CTL_STD01", AmdK8CpuIdCtlStd01hEdcx, AmdK8CpuIdCtlStd01hEdcx), /* value=0x802009`178bfbff */ + MFN(0xc0011005, "AMD_K8_CPUID_CTL_EXT01", AmdK8CpuIdCtlExt01hEdcx, AmdK8CpuIdCtlExt01hEdcx), /* value=0x37ff`efd3fbff */ + MFX(0xc0011006, "AMD_K7_DEBUG_STS?", AmdK7DebugStatusMaybe, AmdK7DebugStatusMaybe, 0, UINT64_C(0xffffffff00000080), 0), /* value=0x0 */ + MFN(0xc0011007, "AMD_K7_BH_TRACE_BASE?", AmdK7BHTraceBaseMaybe, AmdK7BHTraceBaseMaybe), /* value=0x0 */ + MFN(0xc0011008, "AMD_K7_BH_TRACE_PTR?", AmdK7BHTracePtrMaybe, AmdK7BHTracePtrMaybe), /* value=0x0 */ + MFN(0xc0011009, "AMD_K7_BH_TRACE_LIM?", AmdK7BHTraceLimitMaybe, AmdK7BHTraceLimitMaybe), /* value=0x0 */ + MFX(0xc001100a, "AMD_K7_HDT_CFG?", AmdK7HardwareDebugToolCfgMaybe, AmdK7HardwareDebugToolCfgMaybe, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0xc001100b, "AMD_K7_FAST_FLUSH_COUNT?", AmdK7FastFlushCountMaybe, AmdK7FastFlushCountMaybe, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x7c0 */ + MFX(0xc001100c, "AMD_K7_NODE_ID", AmdK7NodeId, AmdK7NodeId, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MVX(0xc001100d, "AMD_K8_LOGICAL_CPUS_NUM?", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0xc001100e, "AMD_K8_WRMSR_BP?", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0xc001100f, "AMD_K8_WRMSR_BP_MASK?", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0xc0011010, "AMD_K8_BH_TRACE_CTL?", 0, ~(uint64_t)UINT32_MAX, 0), + MVI(0xc0011011, "AMD_K8_BH_TRACE_USRD?", 0), /* value=0x0 */ + MVI(0xc0011012, "AMD_K7_UNK_c001_1012", UINT32_MAX), + MVI(0xc0011013, "AMD_K7_UNK_c001_1013", UINT64_MAX), + MVX(0xc0011014, "AMD_K8_XCPT_BP_RIP?", 0, 0, 0), + MVX(0xc0011015, "AMD_K8_XCPT_BP_RIP_MASK?", 0, 0, 0), + MVX(0xc0011016, "AMD_K8_COND_HDT_VAL?", 0, 0, 0), + MVX(0xc0011017, "AMD_K8_COND_HDT_VAL_MASK?", 0, 0, 0), + MVX(0xc0011018, "AMD_K8_XCPT_BP_CTL?", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0xc001101d, "AMD_K8_NB_BIST?", 0, ~(uint64_t)UINT32_MAX, 0), + MVI(0xc001101e, "AMD_K8_THERMTRIP_2?", 0x1830), /* Villain? */ + MVX(0xc001101f, "AMD_K8_NB_CFG?", UINT64_C(0x40000000000008), 0, 0), + MFX(0xc0011020, "AMD_K7_LS_CFG", AmdK7LoadStoreCfg, AmdK7LoadStoreCfg, 0, UINT64_C(0xfffe012000000000), 0), /* value=0x10`00001000 */ + MFW(0xc0011021, "AMD_K7_IC_CFG", AmdK7InstrCacheCfg, AmdK7InstrCacheCfg, ~(uint64_t)UINT32_MAX), /* value=0x0 */ + MFX(0xc0011022, "AMD_K7_DC_CFG", AmdK7DataCacheCfg, AmdK7DataCacheCfg, 0, UINT64_C(0xffc0000000000000), 0), /* value=0x1c94`49000000 */ + MFN(0xc0011023, "AMD_K7_BU_CFG", AmdK7BusUnitCfg, AmdK7BusUnitCfg), /* Villain? value=0x10200020 */ + MFX(0xc0011024, "AMD_K7_DEBUG_CTL_2?", AmdK7DebugCtl2Maybe, AmdK7DebugCtl2Maybe, 0, UINT64_C(0xffffffffffffff00), 0), /* value=0x0 */ + MFN(0xc0011025, "AMD_K7_DR0_DATA_MATCH?", AmdK7Dr0DataMatchMaybe, AmdK7Dr0DataMatchMaybe), /* value=0x0 */ + MFN(0xc0011026, "AMD_K7_DR0_DATA_MATCH?", AmdK7Dr0DataMaskMaybe, AmdK7Dr0DataMaskMaybe), /* value=0x0 */ + MFX(0xc0011027, "AMD_K7_DR0_ADDR_MASK", AmdK7DrXAddrMaskN, AmdK7DrXAddrMaskN, 0x0, UINT64_C(0xfffffffffffff000), 0), /* value=0x0 */ + MVX(0xc0011028, "AMD_10H_UNK_c001_1028", 0, UINT64_C(0xfffffffffffffff8), 0), + MVX(0xc0011029, "AMD_10H_UNK_c001_1029", 0, ~(uint64_t)UINT32_MAX, 0), + MFX(0xc001102a, "AMD_10H_BU_CFG2", AmdFam10hBusUnitCfg2, AmdFam10hBusUnitCfg2, 0, UINT64_C(0xfff00000c0000000), 0), /* value=0x40040`050000c0 */ + MFX(0xc0011030, "AMD_10H_IBS_FETCH_CTL", AmdFam10hIbsFetchCtl, AmdFam10hIbsFetchCtl, 0, UINT64_C(0xfdfcffff00000000), 0), /* value=0x0 */ + MFI(0xc0011031, "AMD_10H_IBS_FETCH_LIN_ADDR", AmdFam10hIbsFetchLinAddr), /* value=0xffffff1f`6ffffec0 */ + MFI(0xc0011032, "AMD_10H_IBS_FETCH_PHYS_ADDR", AmdFam10hIbsFetchPhysAddr), /* value=0xffffbecf`eff1fec0 */ + MFX(0xc0011033, "AMD_10H_IBS_OP_EXEC_CTL", AmdFam10hIbsOpExecCtl, AmdFam10hIbsOpExecCtl, 0, UINT64_C(0xfffffffffff00000), 0), /* value=0x0 */ + MFN(0xc0011034, "AMD_10H_IBS_OP_RIP", AmdFam10hIbsOpRip, AmdFam10hIbsOpRip), /* value=0xffffcf06`409f2d93 */ + MFI(0xc0011035, "AMD_10H_IBS_OP_DATA", AmdFam10hIbsOpData), /* value=0x3b`7701fe63 */ + MFX(0xc0011036, "AMD_10H_IBS_OP_DATA2", AmdFam10hIbsOpData2, AmdFam10hIbsOpData2, 0, UINT64_C(0xffffffffffffffc8), 0), /* value=0x0 */ + MFI(0xc0011037, "AMD_10H_IBS_OP_DATA3", AmdFam10hIbsOpData3), /* value=0x0 */ + MFX(0xc0011038, "AMD_10H_IBS_DC_LIN_ADDR", AmdFam10hIbsDcLinAddr, AmdFam10hIbsDcLinAddr, 0, UINT64_C(0x7fffffffffff), 0), /* value=0x0 */ + MFI(0xc0011039, "AMD_10H_IBS_DC_PHYS_ADDR", AmdFam10hIbsDcPhysAddr), /* value=0x0 */ + MFO(0xc001103a, "AMD_10H_IBS_CTL", AmdFam10hIbsCtl), /* value=0x100 */ +}; +#endif /* !CPUM_DB_STANDALONE */ + + +/** + * Database entry for Quad-Core AMD Opteron(tm) Processor 2384. + */ +static CPUMDBENTRY const g_Entry_Quad_Core_AMD_Opteron_2384 = +{ + /*.pszName = */ "Quad-Core AMD Opteron 2384", + /*.pszFullName = */ "Quad-Core AMD Opteron(tm) Processor 2384", + /*.enmVendor = */ CPUMCPUVENDOR_AMD, + /*.uFamily = */ 16, + /*.uModel = */ 4, + /*.uStepping = */ 2, + /*.enmMicroarch = */ kCpumMicroarch_AMD_K10, + /*.uScalableBusFreq = */ CPUM_SBUSFREQ_UNKNOWN, + /*.fFlags = */ 0, + /*.cMaxPhysAddrWidth= */ 48, + /*.fMxCsrMask = */ 0x2ffff, + /*.paCpuIdLeaves = */ NULL_ALONE(g_aCpuIdLeaves_Quad_Core_AMD_Opteron_2384), + /*.cCpuIdLeaves = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_Quad_Core_AMD_Opteron_2384)), + /*.enmUnknownCpuId = */ CPUMUNKNOWNCPUID_DEFAULTS, + /*.DefUnknownCpuId = */ { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + /*.fMsrMask = */ UINT32_MAX, + /*.cMsrRanges = */ ZERO_ALONE(RT_ELEMENTS(g_aMsrRanges_Quad_Core_AMD_Opteron_2384)), + /*.paMsrRanges = */ NULL_ALONE(g_aMsrRanges_Quad_Core_AMD_Opteron_2384), +}; + +#endif /* !VBOX_CPUDB_Quad_Core_AMD_Opteron_2384_h */ + diff --git a/src/VBox/VMM/VMMR3/cpus/VIA_QuadCore_L4700_1_2_GHz.h b/src/VBox/VMM/VMMR3/cpus/VIA_QuadCore_L4700_1_2_GHz.h new file mode 100644 index 00000000..e54f76b4 --- /dev/null +++ b/src/VBox/VMM/VMMR3/cpus/VIA_QuadCore_L4700_1_2_GHz.h @@ -0,0 +1,404 @@ +/* $Id: VIA_QuadCore_L4700_1_2_GHz.h $ */ +/** @file + * CPU database entry "VIA QuadCore L4700 1.2+ GHz". + * Generated at 2013-12-20T14:40:07Z by VBoxCpuReport v4.3.53r91411 on linux.amd64. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VBOX_CPUDB_VIA_QuadCore_L4700_1_2_GHz_h +#define VBOX_CPUDB_VIA_QuadCore_L4700_1_2_GHz_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + + +#ifndef CPUM_DB_STANDALONE +/** + * CPUID leaves for VIA QuadCore L4700 @ 1.2+ GHz. + */ +static CPUMCPUIDLEAF const g_aCpuIdLeaves_VIA_QuadCore_L4700_1_2_GHz[] = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x0000000a, 0x746e6543, 0x736c7561, 0x48727561, 0 }, + { 0x00000001, 0x00000000, 0x00000000, 0x000006fd, 0x06080800, 0x008863a9, 0xbfc9fbff, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID | CPUMCPUIDLEAF_F_CONTAINS_APIC }, + { 0x00000002, 0x00000000, 0x00000000, 0x02b3b001, 0x00000000, 0x00000000, 0x2c04307d, 0 }, + { 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000004, 0x00000000, 0x00000000, 0x1c000021, 0x03c0003f, 0x0000003f, 0x00000000, 0 }, + { 0x00000005, 0x00000000, 0x00000000, 0x00000040, 0x00000040, 0x00000003, 0x00022220, 0 }, + { 0x00000006, 0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000009, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000a, 0x00000000, 0x00000000, 0x06280202, 0x00000000, 0x00000000, 0x00000503, 0 }, + { 0x80000000, 0x00000000, 0x00000000, 0x80000008, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x20100800, 0 }, + { 0x80000002, 0x00000000, 0x00000000, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0 }, + { 0x80000003, 0x00000000, 0x00000000, 0x49562020, 0x75512041, 0x6f436461, 0x4c206572, 0 }, + { 0x80000004, 0x00000000, 0x00000000, 0x30303734, 0x31204020, 0x202b322e, 0x007a4847, 0 }, + { 0x80000005, 0x00000000, 0x00000000, 0x00000000, 0x08800880, 0x40100140, 0x40100140, 0 }, + { 0x80000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x04008140, 0x00000000, 0 }, + { 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000008, 0x00000000, 0x00000000, 0x00003024, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0xc0000000, 0x00000000, 0x00000000, 0xc0000004, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0xc0000001, 0x00000000, 0x00000000, 0x000006fd, 0x00000000, 0x00000000, 0x1ec03dcc, 0 }, + { 0xc0000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0xc0000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0xc0000004, 0x00000000, 0x00000000, 0x000fffb7, 0x08000955, 0x08530954, 0x00000000, 0 }, +}; +#endif /* !CPUM_DB_STANDALONE */ + + +#ifndef CPUM_DB_STANDALONE +/** + * MSR ranges for VIA QuadCore L4700 @ 1.2+ GHz. + */ +static CPUMMSRRANGE const g_aMsrRanges_VIA_QuadCore_L4700_1_2_GHz[] = +{ + RVI(0x00000000, 0x00000005, "ZERO_0000_0000_THRU_0000_0005", 0), + MFX(0x00000006, "IA32_MONITOR_FILTER_LINE_SIZE", Ia32MonitorFilterLineSize, Ia32MonitorFilterLineSize, 0, UINT64_C(0xffffffffffff0000), 0), /* value=0x40 */ + RVI(0x00000007, 0x0000000f, "ZERO_0000_0007_THRU_0000_000f", 0), + MFN(0x00000010, "IA32_TIME_STAMP_COUNTER", Ia32TimestampCounter, Ia32TimestampCounter), /* value=0x965`912e15ac */ + RVI(0x00000011, 0x0000001a, "ZERO_0000_0011_THRU_0000_001a", 0), + MFX(0x0000001b, "IA32_APIC_BASE", Ia32ApicBase, Ia32ApicBase, UINT32_C(0xfee00800), 0x600, UINT64_C(0xfffffff0000000ff)), + RVI(0x0000001c, 0x00000029, "ZERO_0000_001c_THRU_0000_0029", 0), + MFX(0x0000002a, "EBL_CR_POWERON", IntelEblCrPowerOn, IntelEblCrPowerOn, 0x2580000, UINT64_MAX, 0), /* value=0x2580000 */ + RVI(0x0000002b, 0x00000039, "ZERO_0000_002b_THRU_0000_0039", 0), + MFO(0x0000003a, "IA32_FEATURE_CONTROL", Ia32FeatureControl), /* value=0x5 */ + RVI(0x0000003b, 0x00000078, "ZERO_0000_003b_THRU_0000_0078", 0), + RVI(0x0000007a, 0x0000008a, "ZERO_0000_007a_THRU_0000_008a", 0), + MFN(0x0000008b, "BBL_CR_D3|BIOS_SIGN", Ia32BiosSignId, Ia32BiosSignId), /* value=0xc`00000000 */ + RVI(0x0000008c, 0x0000009a, "ZERO_0000_008c_THRU_0000_009a", 0), + MFO(0x0000009b, "IA32_SMM_MONITOR_CTL", Ia32SmmMonitorCtl), /* value=0x0 */ + RVI(0x0000009c, 0x000000c0, "ZERO_0000_009c_THRU_0000_00c0", 0), + RSN(0x000000c1, 0x000000c3, "IA32_PMCn", Ia32PmcN, Ia32PmcN, 0x0, UINT64_C(0xffffff0000000000), 0), /* XXX: The range ended earlier than expected! */ + RVI(0x000000c4, 0x000000cc, "ZERO_0000_00c4_THRU_0000_00cc", 0), + MFX(0x000000cd, "MSR_FSB_FREQ", IntelP6FsbFrequency, ReadOnly, 0, 0, 0), + RVI(0x000000ce, 0x000000e1, "ZERO_0000_00ce_THRU_0000_00e1", 0), + MFI(0x000000e2, "MSR_PKG_CST_CONFIG_CONTROL", IntelPkgCStConfigControl), /* value=0x6a204 */ + MFX(0x000000e3, "C2_SMM_CST_MISC_INFO", IntelCore2SmmCStMiscInfo, IntelCore2SmmCStMiscInfo, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0x000000e4, "MSR_PMG_IO_CAPTURE_BASE", IntelPmgIoCaptureBase, IntelPmgIoCaptureBase, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + RVI(0x000000e5, 0x000000e6, "ZERO_0000_00e5_THRU_0000_00e6", 0), + MFN(0x000000e7, "IA32_MPERF", Ia32MPerf, Ia32MPerf), /* value=0x2f4 */ + MFN(0x000000e8, "IA32_APERF", Ia32APerf, Ia32APerf), /* value=0x2f2 */ + RVI(0x000000e9, 0x000000fd, "ZERO_0000_00e9_THRU_0000_00fd", 0), + MFX(0x000000fe, "IA32_MTRRCAP", Ia32MtrrCap, ReadOnly, 0xd08, 0, 0), /* value=0xd08 */ + RVI(0x000000ff, 0x0000011d, "ZERO_0000_00ff_THRU_0000_011d", 0), + MFX(0x0000011e, "BBL_CR_CTL3", IntelBblCrCtl3, IntelBblCrCtl3, 0, UINT64_MAX, 0), /* value=0x0 */ + RVI(0x0000011f, 0x00000173, "ZERO_0000_011f_THRU_0000_0173", 0), + MFX(0x00000174, "IA32_SYSENTER_CS", Ia32SysEnterCs, Ia32SysEnterCs, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x10 */ + MFN(0x00000175, "IA32_SYSENTER_ESP", Ia32SysEnterEsp, Ia32SysEnterEsp), /* value=0x0 */ + MFN(0x00000176, "IA32_SYSENTER_EIP", Ia32SysEnterEip, Ia32SysEnterEip), /* value=0xffffffff`8166bfa0 */ + RVI(0x00000177, 0x00000178, "ZERO_0000_0177_THRU_0000_0178", 0), + MFX(0x00000179, "IA32_MCG_CAP", Ia32McgCap, ReadOnly, 0, 0, 0), /* value=0x0 */ + MFX(0x0000017a, "IA32_MCG_STATUS", Ia32McgStatus, Ia32McgStatus, 0, UINT64_C(0xfffffffffffffff8), 0), /* value=0x0 */ + RVI(0x0000017b, 0x00000185, "ZERO_0000_017b_THRU_0000_0185", 0), + RSN(0x00000186, 0x00000188, "IA32_PERFEVTSELn", Ia32PerfEvtSelN, Ia32PerfEvtSelN, 0x0, UINT64_C(0xfffffffff8280000), 0), /* XXX: The range ended earlier than expected! */ + RVI(0x00000189, 0x00000197, "ZERO_0000_0189_THRU_0000_0197", 0), + MFX(0x00000198, "IA32_PERF_STATUS", Ia32PerfStatus, Ia32PerfStatus, UINT64_C(0x853095408000955), UINT64_MAX, 0), /* value=0x8530954`08000955 */ + MFX(0x00000199, "IA32_PERF_CTL", Ia32PerfCtl, Ia32PerfCtl, 0x954, 0, 0), /* Might bite. value=0x954 */ + MFX(0x0000019a, "IA32_CLOCK_MODULATION", Ia32ClockModulation, Ia32ClockModulation, 0x2, UINT64_C(0xffffffffffffffe1), 0), /* value=0x2 */ + MFX(0x0000019b, "IA32_THERM_INTERRUPT", Ia32ThermInterrupt, Ia32ThermInterrupt, 0, UINT64_C(0xffffffffff0000e0), 0), /* value=0x0 */ + MFX(0x0000019c, "IA32_THERM_STATUS", Ia32ThermStatus, Ia32ThermStatus, 0x8320000, UINT64_MAX, 0), /* value=0x8320000 */ + MFX(0x0000019d, "IA32_THERM2_CTL", Ia32Therm2Ctl, ReadOnly, 0x853, 0, 0), /* value=0x853 */ + RVI(0x0000019e, 0x0000019f, "ZERO_0000_019e_THRU_0000_019f", 0), + MFX(0x000001a0, "IA32_MISC_ENABLE", Ia32MiscEnable, Ia32MiscEnable, 0x173c89, UINT64_C(0xffffffb87939c176), 0), /* value=0x173c89 */ + RVI(0x000001a1, 0x000001d8, "ZERO_0000_01a1_THRU_0000_01d8", 0), + MFX(0x000001d9, "IA32_DEBUGCTL", Ia32DebugCtl, Ia32DebugCtl, 0, 0, UINT64_C(0xffffffffffffe03c)), /* value=0x1 */ + RVI(0x000001da, 0x000001f1, "ZERO_0000_01da_THRU_0000_01f1", 0), + MFO(0x000001f2, "IA32_SMRR_PHYSBASE", Ia32SmrrPhysBase), /* value=0x0 */ + MFO(0x000001f3, "IA32_SMRR_PHYSMASK", Ia32SmrrPhysMask), /* value=0x0 */ + RVI(0x000001f4, 0x000001ff, "ZERO_0000_01f4_THRU_0000_01ff", 0), + MFX(0x00000200, "IA32_MTRR_PHYS_BASE0", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x0, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x6 */ + MFX(0x00000201, "IA32_MTRR_PHYS_MASK0", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x0, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`80000800 */ + MFX(0x00000202, "IA32_MTRR_PHYS_BASE1", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x1, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x70000000 */ + MFX(0x00000203, "IA32_MTRR_PHYS_MASK1", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x1, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`f0000800 */ + MFX(0x00000204, "IA32_MTRR_PHYS_BASE2", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x2, 0, UINT64_C(0xfffffff000000ff8)), /* value=0xd0000001 */ + MFX(0x00000205, "IA32_MTRR_PHYS_MASK2", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x2, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`ff800800 */ + MFX(0x00000206, "IA32_MTRR_PHYS_BASE3", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x3, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x00000207, "IA32_MTRR_PHYS_MASK3", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x3, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFX(0x00000208, "IA32_MTRR_PHYS_BASE4", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x4, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x00000209, "IA32_MTRR_PHYS_MASK4", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x4, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFX(0x0000020a, "IA32_MTRR_PHYS_BASE5", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x5, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x0000020b, "IA32_MTRR_PHYS_MASK5", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x5, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFX(0x0000020c, "IA32_MTRR_PHYS_BASE6", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x6, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x0000020d, "IA32_MTRR_PHYS_MASK6", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x6, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFX(0x0000020e, "IA32_MTRR_PHYS_BASE7", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x7, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x0000020f, "IA32_MTRR_PHYS_MASK7", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x7, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + RVI(0x00000210, 0x0000024f, "ZERO_0000_0210_THRU_0000_024f", 0), + MFS(0x00000250, "IA32_MTRR_FIX64K_00000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix64K_00000), + RVI(0x00000251, 0x00000257, "ZERO_0000_0251_THRU_0000_0257", 0), + MFS(0x00000258, "IA32_MTRR_FIX16K_80000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_80000), + MFS(0x00000259, "IA32_MTRR_FIX16K_A0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_A0000), + RVI(0x0000025a, 0x00000267, "ZERO_0000_025a_THRU_0000_0267", 0), + MFS(0x00000268, "IA32_MTRR_FIX4K_C0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C0000), + MFS(0x00000269, "IA32_MTRR_FIX4K_C8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C8000), + MFS(0x0000026a, "IA32_MTRR_FIX4K_D0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D0000), + MFS(0x0000026b, "IA32_MTRR_FIX4K_D8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D8000), + MFS(0x0000026c, "IA32_MTRR_FIX4K_E0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E0000), + MFS(0x0000026d, "IA32_MTRR_FIX4K_E8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E8000), + MFS(0x0000026e, "IA32_MTRR_FIX4K_F0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F0000), + MFS(0x0000026f, "IA32_MTRR_FIX4K_F8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F8000), + RVI(0x00000270, 0x00000276, "ZERO_0000_0270_THRU_0000_0276", 0), + MFS(0x00000277, "IA32_PAT", Ia32Pat, Ia32Pat, Guest.msrPAT), + RVI(0x00000278, 0x000002fe, "ZERO_0000_0278_THRU_0000_02fe", 0), + MFZ(0x000002ff, "IA32_MTRR_DEF_TYPE", Ia32MtrrDefType, Ia32MtrrDefType, GuestMsrs.msr.MtrrDefType, 0, UINT64_C(0xfffffffffffff3f8)), + RVI(0x00000300, 0x00000308, "ZERO_0000_0300_THRU_0000_0308", 0), + RSN(0x00000309, 0x0000030a, "IA32_FIXED_CTRn", Ia32FixedCtrN, Ia32FixedCtrN, 0x0, UINT64_C(0xffffff0000000000), 0), + MFX(0x0000030b, "IA32_FIXED_CTR2", Ia32FixedCtrN, Ia32FixedCtrN, 0x2, UINT64_C(0xfffff8020a068061), 0), /* value=0x2d4 */ + RVI(0x0000030c, 0x0000038c, "ZERO_0000_030c_THRU_0000_038c", 0), + MFX(0x0000038d, "IA32_FIXED_CTR_CTRL", Ia32FixedCtrCtrl, Ia32FixedCtrCtrl, 0, UINT64_C(0xfffffffffffff444), 0), /* value=0x0 */ + MFX(0x0000038e, "IA32_PERF_GLOBAL_STATUS", Ia32PerfGlobalStatus, ReadOnly, 0, 0, 0), /* value=0x0 */ + MFN(0x0000038f, "IA32_PERF_GLOBAL_CTRL", Ia32PerfGlobalCtrl, Ia32PerfGlobalCtrl), /* value=0xffffffff`ffffffff */ + RVI(0x00000390, 0x0000047f, "ZERO_0000_0390_THRU_0000_047f", 0), + MFX(0x00000480, "IA32_VMX_BASIC", Ia32VmxBasic, ReadOnly, UINT64_C(0x1a040000000007), 0, 0), /* value=0x1a0400`00000007 */ + MFX(0x00000481, "IA32_VMX_PINBASED_CTLS", Ia32VmxPinbasedCtls, ReadOnly, UINT64_C(0x3f00000016), 0, 0), /* value=0x3f`00000016 */ + MFX(0x00000482, "IA32_VMX_PROCBASED_CTLS", Ia32VmxProcbasedCtls, ReadOnly, UINT64_C(0x77f9fffe0401e172), 0, 0), /* value=0x77f9fffe`0401e172 */ + MFX(0x00000483, "IA32_VMX_EXIT_CTLS", Ia32VmxExitCtls, ReadOnly, UINT64_C(0x3efff00036dff), 0, 0), /* value=0x3efff`00036dff */ + MFX(0x00000484, "IA32_VMX_ENTRY_CTLS", Ia32VmxEntryCtls, ReadOnly, UINT64_C(0x1fff000011ff), 0, 0), /* value=0x1fff`000011ff */ + MFX(0x00000485, "IA32_VMX_MISC", Ia32VmxMisc, ReadOnly, 0x403c0, 0, 0), /* value=0x403c0 */ + MFX(0x00000486, "IA32_VMX_CR0_FIXED0", Ia32VmxCr0Fixed0, ReadOnly, UINT32_C(0x80000021), 0, 0), /* value=0x80000021 */ + MFX(0x00000487, "IA32_VMX_CR0_FIXED1", Ia32VmxCr0Fixed1, ReadOnly, UINT32_MAX, 0, 0), /* value=0xffffffff */ + MFX(0x00000488, "IA32_VMX_CR4_FIXED0", Ia32VmxCr4Fixed0, ReadOnly, 0x2000, 0, 0), /* value=0x2000 */ + MFX(0x00000489, "IA32_VMX_CR4_FIXED1", Ia32VmxCr4Fixed1, ReadOnly, 0x27ff, 0, 0), /* value=0x27ff */ + MFX(0x0000048a, "IA32_VMX_VMCS_ENUM", Ia32VmxVmcsEnum, ReadOnly, 0x2c, 0, 0), /* value=0x2c */ + RVI(0x0000048b, 0x000005ff, "ZERO_0000_048b_THRU_0000_05ff", 0), + MFN(0x00000600, "IA32_DS_AREA", Ia32DsArea, Ia32DsArea), /* value=0x0 */ + RVI(0x00000601, 0x00001106, "ZERO_0000_0601_THRU_0000_1106", 0), + MVI(0x00001107, "VIA_UNK_0000_1107", 0x2), + RVI(0x00001108, 0x0000110e, "ZERO_0000_1108_THRU_0000_110e", 0), + MVI(0x0000110f, "VIA_UNK_0000_110f", 0x2), + RVI(0x00001110, 0x00001152, "ZERO_0000_1110_THRU_0000_1152", 0), + MVO(0x00001153, "VIA_UNK_0000_1153", 0), + RVI(0x00001154, 0x000011ff, "ZERO_0000_1154_THRU_0000_11ff", 0), + MVX(0x00001200, "VIA_UNK_0000_1200", UINT64_C(0x8863a9bfc9fbff), 0x40000, 0), + MVX(0x00001201, "VIA_UNK_0000_1201", UINT64_C(0x120100800), UINT64_C(0xfffffff000000000), 0), + MVX(0x00001202, "VIA_UNK_0000_1202", 0x3dcc, UINT64_C(0xffffffffffffc233), 0), + MVX(0x00001203, "VIA_UNK_0000_1203", 0x18, 0, 0), + MVX(0x00001204, "VIA_UNK_0000_1204", UINT64_C(0x6fd00000424), 0, 0), + MVX(0x00001205, "VIA_UNK_0000_1205", UINT64_C(0x9890000000001), 0, 0), + MVX(0x00001206, "VIA_ALT_VENDOR_EBX", 0, 0, 0), + MVX(0x00001207, "VIA_ALT_VENDOR_ECDX", 0, 0, 0), + MVX(0x00001208, "VIA_UNK_0000_1208", 0, 0, 0), + MVX(0x00001209, "VIA_UNK_0000_1209", 0, 0, 0), + MVX(0x0000120a, "VIA_UNK_0000_120a", 0, 0, 0), + MVX(0x0000120b, "VIA_UNK_0000_120b", 0, 0, 0), + MVX(0x0000120c, "VIA_UNK_0000_120c", 0, 0, 0), + MVX(0x0000120d, "VIA_UNK_0000_120d", 0, 0, 0), + MVI(0x0000120e, "VIA_UNK_0000_120e", UINT64_C(0x820007b100002080)), /* Villain? */ + MVX(0x0000120f, "VIA_UNK_0000_120f", UINT64_C(0x200000001a000000), 0x18000000, 0), + MVI(0x00001210, "ZERO_0000_1210", 0), + MVX(0x00001211, "VIA_UNK_0000_1211", 0, 0, 0), + MVX(0x00001212, "VIA_UNK_0000_1212", 0, 0, 0), + MVX(0x00001213, "VIA_UNK_0000_1213", 0, ~(uint64_t)UINT32_MAX, 0), + MVO(0x00001214, "VIA_UNK_0000_1214", UINT64_C(0x5dd89e10ffffffff)), + RVI(0x00001215, 0x0000121f, "ZERO_0000_1215_THRU_0000_121f", 0), + MVO(0x00001220, "VIA_UNK_0000_1220", 0), + MVO(0x00001221, "VIA_UNK_0000_1221", 0x4dd2e713), + RVI(0x00001222, 0x0000122f, "ZERO_0000_1222_THRU_0000_122f", 0), + MVX(0x00001230, "VIA_UNK_0000_1230", UINT64_C(0x5dd89e10ffffffff), UINT32_C(0xfffffd68), 0), + MVX(0x00001231, "VIA_UNK_0000_1231", UINT64_C(0x7f9110bdc740), 0x200, 0), + MVO(0x00001232, "VIA_UNK_0000_1232", UINT64_C(0x2603448430479888)), + MVI(0x00001233, "VIA_UNK_0000_1233", UINT64_C(0xb39acda158793c27)), /* Villain? */ + MVX(0x00001234, "VIA_UNK_0000_1234", 0, 0, 0), + MVX(0x00001235, "VIA_UNK_0000_1235", 0, 0, 0), + MVX(0x00001236, "VIA_UNK_0000_1236", UINT64_C(0x5dd89e10ffffffff), UINT32_C(0xfffffd68), 0), + MVX(0x00001237, "VIA_UNK_0000_1237", UINT32_C(0xffc00026), UINT64_C(0xffffffff06000001), 0), + MVO(0x00001238, "VIA_UNK_0000_1238", 0x2), + MVI(0x00001239, "VIA_UNK_0000_1239", 0), /* Villain? */ + RVI(0x0000123a, 0x0000123f, "ZERO_0000_123a_THRU_0000_123f", 0), + MVO(0x00001240, "VIA_UNK_0000_1240", 0), + MVO(0x00001241, "VIA_UNK_0000_1241", UINT64_C(0x5dd89e10ffffffff)), + MVI(0x00001242, "ZERO_0000_1242", 0), + MVX(0x00001243, "VIA_UNK_0000_1243", 0, ~(uint64_t)UINT32_MAX, 0), + MVI(0x00001244, "ZERO_0000_1244", 0), + MVX(0x00001245, "VIA_UNK_0000_1245", UINT64_C(0x3020400000000064), UINT64_C(0xf000000000000000), 0), + MVX(0x00001246, "VIA_UNK_0000_1246", UINT64_C(0x10000000000), 0, 0), + MVX(0x00001247, "VIA_UNK_0000_1247", 0, 0, 0), + MVX(0x00001248, "VIA_UNK_0000_1248", 0, 0, 0), + MVI(0x00001249, "VIA_UNK_0000_1249", 0), /* Villain? */ + MVI(0x0000124a, "VIA_UNK_0000_124a", 0), /* Villain? */ + RVI(0x0000124b, 0x00001300, "ZERO_0000_124b_THRU_0000_1300", 0), + MVX(0x00001301, "VIA_UNK_0000_1301", 0, 0, 0), + MVX(0x00001302, "VIA_UNK_0000_1302", 0, 0, 0), + MVX(0x00001303, "VIA_UNK_0000_1303", 0, 0, 0), + MVX(0x00001304, "VIA_UNK_0000_1304", 0, 0, 0), + MVX(0x00001305, "VIA_UNK_0000_1305", 0, 0, 0), + MVX(0x00001306, "VIA_UNK_0000_1306", 0, 0, 0), + MVX(0x00001307, "VIA_UNK_0000_1307", 0, UINT64_C(0xffffff0000000000), 0), + MVX(0x00001308, "VIA_UNK_0000_1308", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0x00001309, "VIA_UNK_0000_1309", 0, ~(uint64_t)UINT32_MAX, 0), + RVI(0x0000130a, 0x0000130c, "ZERO_0000_130a_THRU_0000_130c", 0), + MVX(0x0000130d, "VIA_UNK_0000_130d", 0, UINT64_C(0xffffffffffff0000), 0), + MVX(0x0000130e, "VIA_UNK_0000_130e", UINT64_MAX, 0, 0), + RVI(0x0000130f, 0x00001311, "ZERO_0000_130f_THRU_0000_1311", 0), + MVX(0x00001312, "VIA_UNK_0000_1312", 0, 0, 0), + RVI(0x00001313, 0x00001314, "ZERO_0000_1313_THRU_0000_1314", 0), + MVX(0x00001315, "VIA_UNK_0000_1315", 0, 0, 0), + MVI(0x00001316, "ZERO_0000_1316", 0), + MVX(0x00001317, "VIA_UNK_0000_1317", 0, 0, 0), + MVX(0x00001318, "VIA_UNK_0000_1318", 0, 0, 0), + MVI(0x00001319, "ZERO_0000_1319", 0), + MVX(0x0000131a, "VIA_UNK_0000_131a", 0, 0, 0), + MVX(0x0000131b, "VIA_UNK_0000_131b", 0x3c20954, 0, 0), + RVI(0x0000131c, 0x00001401, "ZERO_0000_131c_THRU_0000_1401", 0), + MVO(0x00001402, "VIA_UNK_0000_1402", 0x148c48), + MVX(0x00001403, "VIA_UNK_0000_1403", 0, ~(uint64_t)UINT32_MAX, 0), + MVI(0x00001404, "VIA_UNK_0000_1404", 0), /* Villain? */ + MVI(0x00001405, "VIA_UNK_0000_1405", UINT32_C(0x80fffffc)), /* Villain? */ + MVX(0x00001406, "VIA_UNK_0000_1406", UINT32_C(0xc842c800), ~(uint64_t)UINT32_MAX, 0), + MVX(0x00001407, "VIA_UNK_0000_1407", UINT32_C(0x880400c0), ~(uint64_t)UINT32_MAX, 0), + RVI(0x00001408, 0x0000140f, "ZERO_0000_1408_THRU_0000_140f", 0), + MVX(0x00001410, "VIA_UNK_0000_1410", 0xfa0, UINT64_C(0xfffffffffff00000), 0), + MVX(0x00001411, "VIA_UNK_0000_1411", 0xa5a, UINT64_C(0xfffffffffff00000), 0), + MVI(0x00001412, "VIA_UNK_0000_1412", 0x4090), + MVI(0x00001413, "VIA_UNK_0000_1413", 0), /* Villain? */ + MVX(0x00001414, "VIA_UNK_0000_1414", 0x5a, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x00001415, "VIA_UNK_0000_1415", 0x5a, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x00001416, "VIA_UNK_0000_1416", 0x6e, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x00001417, "VIA_UNK_0000_1417", 0x32, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x00001418, "VIA_UNK_0000_1418", 0xa, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x00001419, "VIA_UNK_0000_1419", 0x14, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x0000141a, "VIA_UNK_0000_141a", 0x28, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x0000141b, "VIA_UNK_0000_141b", 0x3c, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x0000141c, "VIA_UNK_0000_141c", 0x69, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x0000141d, "VIA_UNK_0000_141d", 0x69, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x0000141e, "VIA_UNK_0000_141e", 0x69, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x0000141f, "VIA_UNK_0000_141f", 0x32, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x00001420, "VIA_UNK_0000_1420", 0x3, UINT64_C(0xffffffffffffc000), 0), + MVX(0x00001421, "VIA_UNK_0000_1421", 0x1f8, UINT64_C(0xfffffffffffc0000), 0), + MVX(0x00001422, "VIA_UNK_0000_1422", 0x1f4, UINT64_C(0xfffffffffffc0000), 0), + MVI(0x00001423, "VIA_UNK_0000_1423", 0xfffb7), + MVI(0x00001424, "VIA_UNK_0000_1424", 0x5b6), + MVI(0x00001425, "VIA_UNK_0000_1425", 0x65508), + MVI(0x00001426, "VIA_UNK_0000_1426", 0x843b), + MVX(0x00001427, "VIA_UNK_0000_1427", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0x00001428, "VIA_UNK_0000_1428", 0x1ffffff, ~(uint64_t)UINT32_MAX, 0), + MVX(0x00001429, "VIA_UNK_0000_1429", 0, UINT64_C(0xfffffffffff00000), 0), + MVI(0x0000142a, "VIA_UNK_0000_142a", 0x1c85d), + MVO(0x0000142b, "VIA_UNK_0000_142b", 0xf7e), + MVI(0x0000142c, "VIA_UNK_0000_142c", 0x20080), /* Villain? */ + MVI(0x0000142d, "ZERO_0000_142d", 0), + MVI(0x0000142e, "VIA_UNK_0000_142e", 0x8000000), /* Villain? */ + MVX(0x0000142f, "VIA_UNK_0000_142f", UINT64_C(0xffe57bea2ff3fdff), 0, 0), + RVI(0x00001430, 0x00001433, "ZERO_0000_1430_THRU_0000_1433", 0), + MVX(0x00001434, "VIA_UNK_0000_1434", 0x853f0e0, UINT64_C(0xffffffff7e7b0000), 0), + MVI(0x00001435, "VIA_UNK_0000_1435", 0x8000838), /* Villain? */ + MVI(0x00001436, "VIA_UNK_0000_1436", 0x200004f), /* Villain? */ + MVX(0x00001437, "VIA_UNK_0000_1437", 0, ~(uint64_t)UINT32_MAX, 0), + MVI(0x00001438, "VIA_UNK_0000_1438", 0x7004801c), /* Villain? */ + MVI(0x00001439, "ZERO_0000_1439", 0), + MVX(0x0000143a, "VIA_UNK_0000_143a", 0x20000, ~(uint64_t)UINT32_MAX, 0), + MVI(0x0000143b, "ZERO_0000_143b", 0), + MVX(0x0000143c, "VIA_UNK_0000_143c", 0, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x0000143d, "VIA_UNK_0000_143d", 0, UINT64_C(0xfffffffffffffe00), 0), + RVI(0x0000143e, 0x0000143f, "ZERO_0000_143e_THRU_0000_143f", 0), + MVX(0x00001440, "VIA_UNK_0000_1440", UINT32_C(0x80e00954), ~(uint64_t)UINT32_MAX, 0), + MVX(0x00001441, "VIA_UNK_0000_1441", 0xf00954, UINT64_C(0xffffffff00ff7f7f), 0), + MVX(0x00001442, "VIA_UNK_0000_1442", 0xf00954, UINT64_C(0xffffffff00ff7f7f), 0), + RVI(0x00001443, 0x00001448, "ZERO_0000_1443_THRU_0000_1448", 0), + MVI(0x00001449, "VIA_UNK_0000_1449", UINT64_C(0xfffff7e247)), + RVI(0x0000144a, 0x0000144f, "ZERO_0000_144a_THRU_0000_144f", 0), + MVX(0x00001450, "VIA_UNK_0000_1450", 0, UINT64_C(0xffffffffffffe000), 0), + MVX(0x00001451, "VIA_UNK_0000_1451", 0, UINT64_C(0xffffffffff000000), 0), + MVX(0x00001452, "VIA_UNK_0000_1452", 0, UINT64_C(0xffffffffff000000), 0), + MVI(0x00001453, "VIA_UNK_0000_1453", 0x3fffffff), + RVI(0x00001454, 0x0000145f, "ZERO_0000_1454_THRU_0000_145f", 0), + MVX(0x00001460, "VIA_UNK_0000_1460", 0, UINT64_C(0xffffffffffffffc0), 0), + MVX(0x00001461, "VIA_UNK_0000_1461", 0x7b, UINT64_C(0xffffffffffffff00), 0), + MVX(0x00001462, "VIA_UNK_0000_1462", 0x76, UINT64_C(0xffffffffffffff00), 0), + MVI(0x00001463, "VIA_UNK_0000_1463", 0x4a), + MVI(0x00001464, "ZERO_0000_1464", 0), + MVI(0x00001465, "VIA_UNK_0000_1465", 0xc6), + MVI(0x00001466, "VIA_UNK_0000_1466", UINT64_C(0x800000053)), + RVI(0x00001467, 0x0000146f, "ZERO_0000_1467_THRU_0000_146f", 0), + MVX(0x00001470, "VIA_UNK_0000_1470", UINT64_C(0x5dd89e10ffffffff), UINT32_C(0xfffffd68), 0), + MVI(0x00001471, "VIA_UNK_0000_1471", 0x2a000000), + RVI(0x00001472, 0x0000147f, "ZERO_0000_1472_THRU_0000_147f", 0), + MVI(0x00001480, "VIA_UNK_0000_1480", 0x3907), + MVI(0x00001481, "VIA_UNK_0000_1481", 0x12c0), + MVI(0x00001482, "VIA_UNK_0000_1482", 0x320), + MVI(0x00001483, "VIA_UNK_0000_1483", 0x3), + MVI(0x00001484, "VIA_UNK_0000_1484", 0x1647), + MVI(0x00001485, "VIA_UNK_0000_1485", 0x3b7), + MVI(0x00001486, "VIA_UNK_0000_1486", 0x443), + RVI(0x00001487, 0x0000148f, "ZERO_0000_1487_THRU_0000_148f", 0), + MVX(0x00001490, "VIA_UNK_0000_1490", 0xf5, UINT64_C(0xffffffffffffc000), 0), + MVX(0x00001491, "VIA_UNK_0000_1491", 0x200, UINT64_C(0xffffffffff000000), 0), + MVX(0x00001492, "VIA_UNK_0000_1492", 0, UINT64_C(0xffffffffff000000), 0), + MVX(0x00001493, "VIA_UNK_0000_1493", 0x4, UINT64_C(0xffffffffffff0000), 0), + MVX(0x00001494, "VIA_UNK_0000_1494", 0x100, UINT64_C(0xffffffffffff0000), 0), + MVX(0x00001495, "VIA_UNK_0000_1495", 0x100, UINT64_C(0xffffffffff000000), 0), + MVX(0x00001496, "VIA_UNK_0000_1496", 0x8, UINT64_C(0xffffffffffff0000), 0), + MVX(0x00001497, "VIA_UNK_0000_1497", 0, UINT64_C(0xffffffffff000000), 0), + MVX(0x00001498, "VIA_UNK_0000_1498", 0xffffff, UINT64_C(0xfffffffffffffe3c), 0), + MVI(0x00001499, "VIA_UNK_0000_1499", 0x2c5), + MVI(0x0000149a, "VIA_UNK_0000_149a", 0x1c1), + MVI(0x0000149b, "VIA_UNK_0000_149b", 0x2c5a), + MVI(0x0000149c, "VIA_UNK_0000_149c", 0x1c8f), + RVI(0x0000149d, 0x0000149e, "ZERO_0000_149d_THRU_0000_149e", 0), + MVI(0x0000149f, "VIA_UNK_0000_149f", 0x1c9), + RVI(0x000014a0, 0x00001522, "ZERO_0000_14a0_THRU_0000_1522", 0), + MFN(0x00001523, "VIA_UNK_0000_1523", WriteOnly, IgnoreWrite), + RVI(0x00001524, 0x00003179, "ZERO_0000_1524_THRU_0000_3179", 0), + MVO(0x0000317a, "VIA_UNK_0000_317a", UINT64_C(0x139f29749595b8)), + MVO(0x0000317b, "VIA_UNK_0000_317b", UINT64_C(0x5dd89e10ffffffff)), + MVI(0x0000317c, "ZERO_0000_317c", 0), + MFN(0x0000317d, "VIA_UNK_0000_317d", WriteOnly, IgnoreWrite), + MFN(0x0000317e, "VIA_UNK_0000_317e", WriteOnly, IgnoreWrite), + MVI(0x0000317f, "VIA_UNK_0000_317f", 0), /* Villain? */ + RVI(0x00003180, 0x00003fff, "ZERO_0000_3180_THRU_0000_3fff", 0), + RVI(0x40000000, 0x40003fff, "ZERO_4000_0000_THRU_4000_3fff", 0), + RVI(0x80000000, 0x80000197, "ZERO_8000_0000_THRU_8000_0197", 0), + RVI(0x80000199, 0x80003fff, "ZERO_8000_0199_THRU_8000_3fff", 0), + RVI(0xc0000000, 0xc000007f, "ZERO_c000_0000_THRU_c000_007f", 0), + MFX(0xc0000080, "AMD64_EFER", Amd64Efer, Amd64Efer, 0xd01, 0x400, UINT64_C(0xffffffffffffd2fe)), + MFN(0xc0000081, "AMD64_STAR", Amd64SyscallTarget, Amd64SyscallTarget), /* value=0x230010`00000000 */ + MFN(0xc0000082, "AMD64_STAR64", Amd64LongSyscallTarget, Amd64LongSyscallTarget), /* value=0xffffffff`81669af0 */ + MFN(0xc0000083, "AMD64_STARCOMPAT", Amd64CompSyscallTarget, Amd64CompSyscallTarget), /* value=0xffffffff`8166c1d0 */ + MFX(0xc0000084, "AMD64_SYSCALL_FLAG_MASK", Amd64SyscallFlagMask, Amd64SyscallFlagMask, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x3700 */ + RVI(0xc0000085, 0xc00000ff, "ZERO_c000_0085_THRU_c000_00ff", 0), + MFN(0xc0000100, "AMD64_FS_BASE", Amd64FsBase, Amd64FsBase), /* value=0x7f91`10bdc740 */ + MFN(0xc0000101, "AMD64_GS_BASE", Amd64GsBase, Amd64GsBase), /* value=0xffff8800`6fd80000 */ + MFN(0xc0000102, "AMD64_KERNEL_GS_BASE", Amd64KernelGsBase, Amd64KernelGsBase), /* value=0x0 */ + RVI(0xc0000104, 0xc0003fff, "ZERO_c000_0104_THRU_c000_3fff", 0), +}; +#endif /* !CPUM_DB_STANDALONE */ + + +/** + * Database entry for VIA QuadCore L4700 @ 1.2+ GHz. + */ +static CPUMDBENTRY const g_Entry_VIA_QuadCore_L4700_1_2_GHz = +{ + /*.pszName = */ "VIA QuadCore L4700 1.2+ GHz", + /*.pszFullName = */ "VIA QuadCore L4700 @ 1.2+ GHz", + /*.enmVendor = */ CPUMCPUVENDOR_VIA, + /*.uFamily = */ 6, + /*.uModel = */ 15, + /*.uStepping = */ 13, + /*.enmMicroarch = */ kCpumMicroarch_VIA_Isaiah, + /*.uScalableBusFreq = */ CPUM_SBUSFREQ_267MHZ, /*??*/ + /*.fFlags = */ 0, + /*.cMaxPhysAddrWidth= */ 36, + /*.fMxCsrMask = */ 0xffff, + /*.paCpuIdLeaves = */ NULL_ALONE(g_aCpuIdLeaves_VIA_QuadCore_L4700_1_2_GHz), + /*.cCpuIdLeaves = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_VIA_QuadCore_L4700_1_2_GHz)), + /*.enmUnknownCpuId = */ CPUMUNKNOWNCPUID_DEFAULTS, + /*.DefUnknownCpuId = */ { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + /*.fMsrMask = */ UINT32_MAX, + /*.cMsrRanges = */ ZERO_ALONE(RT_ELEMENTS(g_aMsrRanges_VIA_QuadCore_L4700_1_2_GHz)), + /*.paMsrRanges = */ NULL_ALONE(g_aMsrRanges_VIA_QuadCore_L4700_1_2_GHz), +}; + +#endif /* !VBOX_CPUDB_VIA_QuadCore_L4700_1_2_GHz_h */ + diff --git a/src/VBox/VMM/VMMR3/cpus/ZHAOXIN_KaiXian_KX_U5581_1_8GHz.h b/src/VBox/VMM/VMMR3/cpus/ZHAOXIN_KaiXian_KX_U5581_1_8GHz.h new file mode 100644 index 00000000..20d6c58c --- /dev/null +++ b/src/VBox/VMM/VMMR3/cpus/ZHAOXIN_KaiXian_KX_U5581_1_8GHz.h @@ -0,0 +1,417 @@ +/* $Id: ZHAOXIN_KaiXian_KX_U5581_1_8GHz.h $ */ +/** @file + * CPU database entry "ZHAOXIN KaiXian KX-U5581 1.8GHz" + * Generated at 2019-01-15T08:37:25Z by VBoxCpuReport v5.2.22r126460 on linux.amd64. + */ + +/* + * Copyright (C) 2013-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +#ifndef VBOX_CPUDB_ZHAOXIN_KaiXian_KX_U5581_1_8GHz_h +#define VBOX_CPUDB_ZHAOXIN_KaiXian_KX_U5581_1_8GHz_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + + +#ifndef CPUM_DB_STANDALONE +/** + * CPUID leaves for ZHAOXIN KaiXian KX-U5581@1.8GHz. + */ +static CPUMCPUIDLEAF const g_aCpuIdLeaves_ZHAOXIN_KaiXian_KX_U5581_1_8GHz[] = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x0000000d, 0x68532020, 0x20206961, 0x68676e61, 0 }, + { 0x00000001, 0x00000000, 0x00000000, 0x000107b5, 0x07080800, 0x7eda63eb, 0xbfcbfbff, 0 | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID | CPUMCPUIDLEAF_F_CONTAINS_APIC }, + { 0x00000002, 0x00000000, 0x00000000, 0x635af001, 0x00000000, 0x00000000, 0x000000ff, 0 }, + { 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000004, 0x00000000, UINT32_MAX, 0x1c000121, 0x01c0003f, 0x0000003f, 0x00000000, 0 }, + { 0x00000004, 0x00000001, UINT32_MAX, 0x1c000122, 0x01c0003f, 0x0000003f, 0x00000000, 0 }, + { 0x00000004, 0x00000002, UINT32_MAX, 0x1c00c143, 0x03c0003f, 0x00000fff, 0x00000003, 0 }, + { 0x00000004, 0x00000003, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000005, 0x00000000, 0x00000000, 0x00000040, 0x00000040, 0x00000003, 0x00022220, 0 }, + { 0x00000006, 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000007, 0x00000000, UINT32_MAX, 0x00000000, 0x000c258b, 0x00000000, 0x24000000, 0 }, + { 0x00000007, 0x00000001, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x00000009, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000a, 0x00000000, 0x00000000, 0x07300402, 0x00000000, 0x00000000, 0x00000603, 0 }, + { 0x0000000b, 0x00000000, UINT32_MAX, 0x00000000, 0x00000001, 0x00000100, 0x00000007, 0 | CPUMCPUIDLEAF_F_INTEL_TOPOLOGY_SUBLEAVES | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID }, + { 0x0000000b, 0x00000001, UINT32_MAX, 0x00000003, 0x00000008, 0x00000201, 0x00000007, 0 | CPUMCPUIDLEAF_F_INTEL_TOPOLOGY_SUBLEAVES | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID }, + { 0x0000000b, 0x00000002, UINT32_MAX, 0x00000000, 0x00000000, 0x00000002, 0x00000007, 0 | CPUMCPUIDLEAF_F_INTEL_TOPOLOGY_SUBLEAVES | CPUMCPUIDLEAF_F_CONTAINS_APIC_ID }, + { 0x0000000c, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000d, 0x00000000, UINT32_MAX, 0x00000007, 0x00000340, 0x00000340, 0x00000000, 0 }, + { 0x0000000d, 0x00000001, UINT32_MAX, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x0000000d, 0x00000002, UINT32_MAX, 0x00000100, 0x00000240, 0x00000000, 0x00000000, 0 }, + { 0x0000000d, 0x00000003, UINT32_MAX, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000000, 0x00000000, 0x00000000, 0x80000008, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0x80000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000121, 0x2c100800, 0 }, + { 0x80000002, 0x00000000, 0x00000000, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0 }, + { 0x80000003, 0x00000000, 0x00000000, 0x4f41485a, 0x204e4958, 0x5869614b, 0x206e6169, 0 }, + { 0x80000004, 0x00000000, 0x00000000, 0x552d584b, 0x31383535, 0x382e3140, 0x007a4847, 0 }, + { 0x80000005, 0x00000000, 0x00000000, 0x04200420, 0x06600660, 0x20080140, 0x20080140, 0 }, + { 0x80000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x10008140, 0x00000000, 0 }, + { 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000100, 0 }, + { 0x80000008, 0x00000000, 0x00000000, 0x00003028, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0xc0000000, 0x00000000, 0x00000000, 0xc0000004, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0xc0000001, 0x00000000, 0x00000000, 0x000107b5, 0x00000000, 0x00000000, 0x1ec33dfc, 0 }, + { 0xc0000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0xc0000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0 }, + { 0xc0000004, 0x00000000, 0x00000000, 0x00000025, 0x18002463, 0x18502461, 0x00000000, 0 }, +}; +#endif /* !CPUM_DB_STANDALONE */ + + +#ifndef CPUM_DB_STANDALONE +/** + * MSR ranges for ZHAOXIN KaiXian KX-U5581@1.8GHz. + */ +static CPUMMSRRANGE const g_aMsrRanges_ZHAOXIN_KaiXian_KX_U5581_1_8GHz[] = +{ + RVI(0x00000000, 0x00000005, "ZERO_0000_0000_THRU_0000_0005", 0), + MFX(0x00000006, "IA32_MONITOR_FILTER_LINE_SIZE", Ia32MonitorFilterLineSize, Ia32MonitorFilterLineSize, 0, UINT64_C(0xffffffffffff0000), 0), /* value=0x40 */ + RVI(0x00000007, 0x0000000f, "ZERO_0000_0007_THRU_0000_000f", 0), + MFN(0x00000010, "IA32_TIME_STAMP_COUNTER", Ia32TimestampCounter, Ia32TimestampCounter), /* value=0x965`912e15ac */ + RVI(0x00000011, 0x0000001a, "ZERO_0000_0011_THRU_0000_001a", 0), + MFX(0x0000001b, "IA32_APIC_BASE", Ia32ApicBase, Ia32ApicBase, UINT32_C(0xfee00800), 0x600, UINT64_C(0xfffffff0000000ff)), + RVI(0x0000001c, 0x00000029, "ZERO_0000_001c_THRU_0000_0029", 0), + MFX(0x0000002a, "EBL_CR_POWERON", IntelEblCrPowerOn, IntelEblCrPowerOn, 0x2580000, UINT64_MAX, 0), /* value=0x2580000 */ + RVI(0x0000002b, 0x00000039, "ZERO_0000_002b_THRU_0000_0039", 0), + MFO(0x0000003a, "IA32_FEATURE_CONTROL", Ia32FeatureControl), /* value=0x5 */ + RVI(0x0000003b, 0x00000078, "ZERO_0000_003b_THRU_0000_0078", 0), + RVI(0x0000007a, 0x0000008a, "ZERO_0000_007a_THRU_0000_008a", 0), + MFN(0x0000008b, "BBL_CR_D3|BIOS_SIGN", Ia32BiosSignId, Ia32BiosSignId), /* value=0xc`00000000 */ + RVI(0x0000008c, 0x0000009a, "ZERO_0000_008c_THRU_0000_009a", 0), + MFO(0x0000009b, "IA32_SMM_MONITOR_CTL", Ia32SmmMonitorCtl), /* value=0x0 */ + RVI(0x0000009c, 0x000000c0, "ZERO_0000_009c_THRU_0000_00c0", 0), + RSN(0x000000c1, 0x000000c3, "IA32_PMCn", Ia32PmcN, Ia32PmcN, 0x0, UINT64_C(0xffffff0000000000), 0), /* XXX: The range ended earlier than expected! */ + RVI(0x000000c4, 0x000000cc, "ZERO_0000_00c4_THRU_0000_00cc", 0), + MFX(0x000000cd, "MSR_FSB_FREQ", IntelP6FsbFrequency, ReadOnly, 0, 0, 0), + RVI(0x000000ce, 0x000000e1, "ZERO_0000_00ce_THRU_0000_00e1", 0), + MFI(0x000000e2, "MSR_PKG_CST_CONFIG_CONTROL", IntelPkgCStConfigControl), /* value=0x6a204 */ + MFX(0x000000e3, "C2_SMM_CST_MISC_INFO", IntelCore2SmmCStMiscInfo, IntelCore2SmmCStMiscInfo, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + MFX(0x000000e4, "MSR_PMG_IO_CAPTURE_BASE", IntelPmgIoCaptureBase, IntelPmgIoCaptureBase, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x0 */ + RVI(0x000000e5, 0x000000e6, "ZERO_0000_00e5_THRU_0000_00e6", 0), + MFN(0x000000e7, "IA32_MPERF", Ia32MPerf, Ia32MPerf), /* value=0x2f4 */ + MFN(0x000000e8, "IA32_APERF", Ia32APerf, Ia32APerf), /* value=0x2f2 */ + RVI(0x000000e9, 0x000000fd, "ZERO_0000_00e9_THRU_0000_00fd", 0), + MFX(0x000000fe, "IA32_MTRRCAP", Ia32MtrrCap, ReadOnly, 0xd08, 0, 0), /* value=0xd08 */ + RVI(0x000000ff, 0x0000011d, "ZERO_0000_00ff_THRU_0000_011d", 0), + MFX(0x0000011e, "BBL_CR_CTL3", IntelBblCrCtl3, IntelBblCrCtl3, 0, UINT64_MAX, 0), /* value=0x0 */ + RVI(0x0000011f, 0x00000173, "ZERO_0000_011f_THRU_0000_0173", 0), + MFX(0x00000174, "IA32_SYSENTER_CS", Ia32SysEnterCs, Ia32SysEnterCs, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x10 */ + MFN(0x00000175, "IA32_SYSENTER_ESP", Ia32SysEnterEsp, Ia32SysEnterEsp), /* value=0x0 */ + MFN(0x00000176, "IA32_SYSENTER_EIP", Ia32SysEnterEip, Ia32SysEnterEip), /* value=0xffffffff`8166bfa0 */ + RVI(0x00000177, 0x00000178, "ZERO_0000_0177_THRU_0000_0178", 0), + MFX(0x00000179, "IA32_MCG_CAP", Ia32McgCap, ReadOnly, 0, 0, 0), /* value=0x0 */ + MFX(0x0000017a, "IA32_MCG_STATUS", Ia32McgStatus, Ia32McgStatus, 0, UINT64_C(0xfffffffffffffff8), 0), /* value=0x0 */ + RVI(0x0000017b, 0x00000185, "ZERO_0000_017b_THRU_0000_0185", 0), + RSN(0x00000186, 0x00000188, "IA32_PERFEVTSELn", Ia32PerfEvtSelN, Ia32PerfEvtSelN, 0x0, UINT64_C(0xfffffffff8280000), 0), /* XXX: The range ended earlier than expected! */ + RVI(0x00000189, 0x00000197, "ZERO_0000_0189_THRU_0000_0197", 0), + MFX(0x00000198, "IA32_PERF_STATUS", Ia32PerfStatus, Ia32PerfStatus, UINT64_C(0x853095408000955), UINT64_MAX, 0), /* value=0x8530954`08000955 */ + MFX(0x00000199, "IA32_PERF_CTL", Ia32PerfCtl, Ia32PerfCtl, 0x954, 0, 0), /* Might bite. value=0x954 */ + MFX(0x0000019a, "IA32_CLOCK_MODULATION", Ia32ClockModulation, Ia32ClockModulation, 0x2, UINT64_C(0xffffffffffffffe1), 0), /* value=0x2 */ + MFX(0x0000019b, "IA32_THERM_INTERRUPT", Ia32ThermInterrupt, Ia32ThermInterrupt, 0, UINT64_C(0xffffffffff0000e0), 0), /* value=0x0 */ + MFX(0x0000019c, "IA32_THERM_STATUS", Ia32ThermStatus, Ia32ThermStatus, 0x8320000, UINT64_MAX, 0), /* value=0x8320000 */ + MFX(0x0000019d, "IA32_THERM2_CTL", Ia32Therm2Ctl, ReadOnly, 0x853, 0, 0), /* value=0x853 */ + RVI(0x0000019e, 0x0000019f, "ZERO_0000_019e_THRU_0000_019f", 0), + MFX(0x000001a0, "IA32_MISC_ENABLE", Ia32MiscEnable, Ia32MiscEnable, 0x173c89, UINT64_C(0xffffffb87939c176), 0), /* value=0x173c89 */ + RVI(0x000001a1, 0x000001d8, "ZERO_0000_01a1_THRU_0000_01d8", 0), + MFX(0x000001d9, "IA32_DEBUGCTL", Ia32DebugCtl, Ia32DebugCtl, 0, 0, UINT64_C(0xffffffffffffe03c)), /* value=0x1 */ + RVI(0x000001da, 0x000001f1, "ZERO_0000_01da_THRU_0000_01f1", 0), + MFO(0x000001f2, "IA32_SMRR_PHYSBASE", Ia32SmrrPhysBase), /* value=0x0 */ + MFO(0x000001f3, "IA32_SMRR_PHYSMASK", Ia32SmrrPhysMask), /* value=0x0 */ + RVI(0x000001f4, 0x000001ff, "ZERO_0000_01f4_THRU_0000_01ff", 0), + MFX(0x00000200, "IA32_MTRR_PHYS_BASE0", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x0, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x6 */ + MFX(0x00000201, "IA32_MTRR_PHYS_MASK0", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x0, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`80000800 */ + MFX(0x00000202, "IA32_MTRR_PHYS_BASE1", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x1, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x70000000 */ + MFX(0x00000203, "IA32_MTRR_PHYS_MASK1", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x1, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`f0000800 */ + MFX(0x00000204, "IA32_MTRR_PHYS_BASE2", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x2, 0, UINT64_C(0xfffffff000000ff8)), /* value=0xd0000001 */ + MFX(0x00000205, "IA32_MTRR_PHYS_MASK2", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x2, 0, UINT64_C(0xfffffff0000007ff)), /* value=0xf`ff800800 */ + MFX(0x00000206, "IA32_MTRR_PHYS_BASE3", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x3, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x00000207, "IA32_MTRR_PHYS_MASK3", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x3, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFX(0x00000208, "IA32_MTRR_PHYS_BASE4", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x4, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x00000209, "IA32_MTRR_PHYS_MASK4", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x4, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFX(0x0000020a, "IA32_MTRR_PHYS_BASE5", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x5, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x0000020b, "IA32_MTRR_PHYS_MASK5", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x5, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFX(0x0000020c, "IA32_MTRR_PHYS_BASE6", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x6, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x0000020d, "IA32_MTRR_PHYS_MASK6", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x6, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + MFX(0x0000020e, "IA32_MTRR_PHYS_BASE7", Ia32MtrrPhysBaseN, Ia32MtrrPhysBaseN, 0x7, 0, UINT64_C(0xfffffff000000ff8)), /* value=0x0 */ + MFX(0x0000020f, "IA32_MTRR_PHYS_MASK7", Ia32MtrrPhysMaskN, Ia32MtrrPhysMaskN, 0x7, 0, UINT64_C(0xfffffff0000007ff)), /* value=0x0 */ + RVI(0x00000210, 0x0000024f, "ZERO_0000_0210_THRU_0000_024f", 0), + MFS(0x00000250, "IA32_MTRR_FIX64K_00000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix64K_00000), + RVI(0x00000251, 0x00000257, "ZERO_0000_0251_THRU_0000_0257", 0), + MFS(0x00000258, "IA32_MTRR_FIX16K_80000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_80000), + MFS(0x00000259, "IA32_MTRR_FIX16K_A0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix16K_A0000), + RVI(0x0000025a, 0x00000267, "ZERO_0000_025a_THRU_0000_0267", 0), + MFS(0x00000268, "IA32_MTRR_FIX4K_C0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C0000), + MFS(0x00000269, "IA32_MTRR_FIX4K_C8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_C8000), + MFS(0x0000026a, "IA32_MTRR_FIX4K_D0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D0000), + MFS(0x0000026b, "IA32_MTRR_FIX4K_D8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_D8000), + MFS(0x0000026c, "IA32_MTRR_FIX4K_E0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E0000), + MFS(0x0000026d, "IA32_MTRR_FIX4K_E8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_E8000), + MFS(0x0000026e, "IA32_MTRR_FIX4K_F0000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F0000), + MFS(0x0000026f, "IA32_MTRR_FIX4K_F8000", Ia32MtrrFixed, Ia32MtrrFixed, GuestMsrs.msr.MtrrFix4K_F8000), + RVI(0x00000270, 0x00000276, "ZERO_0000_0270_THRU_0000_0276", 0), + MFS(0x00000277, "IA32_PAT", Ia32Pat, Ia32Pat, Guest.msrPAT), + RVI(0x00000278, 0x000002fe, "ZERO_0000_0278_THRU_0000_02fe", 0), + MFZ(0x000002ff, "IA32_MTRR_DEF_TYPE", Ia32MtrrDefType, Ia32MtrrDefType, GuestMsrs.msr.MtrrDefType, 0, UINT64_C(0xfffffffffffff3f8)), + RVI(0x00000300, 0x00000308, "ZERO_0000_0300_THRU_0000_0308", 0), + RSN(0x00000309, 0x0000030a, "IA32_FIXED_CTRn", Ia32FixedCtrN, Ia32FixedCtrN, 0x0, UINT64_C(0xffffff0000000000), 0), + MFX(0x0000030b, "IA32_FIXED_CTR2", Ia32FixedCtrN, Ia32FixedCtrN, 0x2, UINT64_C(0xfffff8020a068061), 0), /* value=0x2d4 */ + RVI(0x0000030c, 0x0000038c, "ZERO_0000_030c_THRU_0000_038c", 0), + MFX(0x0000038d, "IA32_FIXED_CTR_CTRL", Ia32FixedCtrCtrl, Ia32FixedCtrCtrl, 0, UINT64_C(0xfffffffffffff444), 0), /* value=0x0 */ + MFX(0x0000038e, "IA32_PERF_GLOBAL_STATUS", Ia32PerfGlobalStatus, ReadOnly, 0, 0, 0), /* value=0x0 */ + MFN(0x0000038f, "IA32_PERF_GLOBAL_CTRL", Ia32PerfGlobalCtrl, Ia32PerfGlobalCtrl), /* value=0xffffffff`ffffffff */ + RVI(0x00000390, 0x0000047f, "ZERO_0000_0390_THRU_0000_047f", 0), + MFX(0x00000480, "IA32_VMX_BASIC", Ia32VmxBasic, ReadOnly, UINT64_C(0x1a040000000007), 0, 0), /* value=0x1a0400`00000007 */ + MFX(0x00000481, "IA32_VMX_PINBASED_CTLS", Ia32VmxPinbasedCtls, ReadOnly, UINT64_C(0x3f00000016), 0, 0), /* value=0x3f`00000016 */ + MFX(0x00000482, "IA32_VMX_PROCBASED_CTLS", Ia32VmxProcbasedCtls, ReadOnly, UINT64_C(0x77f9fffe0401e172), 0, 0), /* value=0x77f9fffe`0401e172 */ + MFX(0x00000483, "IA32_VMX_EXIT_CTLS", Ia32VmxExitCtls, ReadOnly, UINT64_C(0x3efff00036dff), 0, 0), /* value=0x3efff`00036dff */ + MFX(0x00000484, "IA32_VMX_ENTRY_CTLS", Ia32VmxEntryCtls, ReadOnly, UINT64_C(0x1fff000011ff), 0, 0), /* value=0x1fff`000011ff */ + MFX(0x00000485, "IA32_VMX_MISC", Ia32VmxMisc, ReadOnly, 0x403c0, 0, 0), /* value=0x403c0 */ + MFX(0x00000486, "IA32_VMX_CR0_FIXED0", Ia32VmxCr0Fixed0, ReadOnly, UINT32_C(0x80000021), 0, 0), /* value=0x80000021 */ + MFX(0x00000487, "IA32_VMX_CR0_FIXED1", Ia32VmxCr0Fixed1, ReadOnly, UINT32_MAX, 0, 0), /* value=0xffffffff */ + MFX(0x00000488, "IA32_VMX_CR4_FIXED0", Ia32VmxCr4Fixed0, ReadOnly, 0x2000, 0, 0), /* value=0x2000 */ + MFX(0x00000489, "IA32_VMX_CR4_FIXED1", Ia32VmxCr4Fixed1, ReadOnly, 0x27ff, 0, 0), /* value=0x27ff */ + MFX(0x0000048a, "IA32_VMX_VMCS_ENUM", Ia32VmxVmcsEnum, ReadOnly, 0x2c, 0, 0), /* value=0x2c */ + RVI(0x0000048b, 0x000005ff, "ZERO_0000_048b_THRU_0000_05ff", 0), + MFN(0x00000600, "IA32_DS_AREA", Ia32DsArea, Ia32DsArea), /* value=0x0 */ + RVI(0x00000601, 0x00001106, "ZERO_0000_0601_THRU_0000_1106", 0), + MVI(0x00001107, "VIA_UNK_0000_1107", 0x2), + RVI(0x00001108, 0x0000110e, "ZERO_0000_1108_THRU_0000_110e", 0), + MVI(0x0000110f, "VIA_UNK_0000_110f", 0x2), + RVI(0x00001110, 0x00001152, "ZERO_0000_1110_THRU_0000_1152", 0), + MVO(0x00001153, "VIA_UNK_0000_1153", 0), + RVI(0x00001154, 0x000011ff, "ZERO_0000_1154_THRU_0000_11ff", 0), + MVX(0x00001200, "VIA_UNK_0000_1200", UINT64_C(0x8863a9bfc9fbff), 0x40000, 0), + MVX(0x00001201, "VIA_UNK_0000_1201", UINT64_C(0x120100800), UINT64_C(0xfffffff000000000), 0), + MVX(0x00001202, "VIA_UNK_0000_1202", 0x3dcc, UINT64_C(0xffffffffffffc233), 0), + MVX(0x00001203, "VIA_UNK_0000_1203", 0x18, 0, 0), + MVX(0x00001204, "VIA_UNK_0000_1204", UINT64_C(0x6fd00000424), 0, 0), + MVX(0x00001205, "VIA_UNK_0000_1205", UINT64_C(0x9890000000001), 0, 0), + MVX(0x00001206, "VIA_ALT_VENDOR_EBX", 0, 0, 0), + MVX(0x00001207, "VIA_ALT_VENDOR_ECDX", 0, 0, 0), + MVX(0x00001208, "VIA_UNK_0000_1208", 0, 0, 0), + MVX(0x00001209, "VIA_UNK_0000_1209", 0, 0, 0), + MVX(0x0000120a, "VIA_UNK_0000_120a", 0, 0, 0), + MVX(0x0000120b, "VIA_UNK_0000_120b", 0, 0, 0), + MVX(0x0000120c, "VIA_UNK_0000_120c", 0, 0, 0), + MVX(0x0000120d, "VIA_UNK_0000_120d", 0, 0, 0), + MVI(0x0000120e, "VIA_UNK_0000_120e", UINT64_C(0x820007b100002080)), /* Villain? */ + MVX(0x0000120f, "VIA_UNK_0000_120f", UINT64_C(0x200000001a000000), 0x18000000, 0), + MVI(0x00001210, "ZERO_0000_1210", 0), + MVX(0x00001211, "VIA_UNK_0000_1211", 0, 0, 0), + MVX(0x00001212, "VIA_UNK_0000_1212", 0, 0, 0), + MVX(0x00001213, "VIA_UNK_0000_1213", 0, ~(uint64_t)UINT32_MAX, 0), + MVO(0x00001214, "VIA_UNK_0000_1214", UINT64_C(0x5dd89e10ffffffff)), + RVI(0x00001215, 0x0000121f, "ZERO_0000_1215_THRU_0000_121f", 0), + MVO(0x00001220, "VIA_UNK_0000_1220", 0), + MVO(0x00001221, "VIA_UNK_0000_1221", 0x4dd2e713), + RVI(0x00001222, 0x0000122f, "ZERO_0000_1222_THRU_0000_122f", 0), + MVX(0x00001230, "VIA_UNK_0000_1230", UINT64_C(0x5dd89e10ffffffff), UINT32_C(0xfffffd68), 0), + MVX(0x00001231, "VIA_UNK_0000_1231", UINT64_C(0x7f9110bdc740), 0x200, 0), + MVO(0x00001232, "VIA_UNK_0000_1232", UINT64_C(0x2603448430479888)), + MVI(0x00001233, "VIA_UNK_0000_1233", UINT64_C(0xb39acda158793c27)), /* Villain? */ + MVX(0x00001234, "VIA_UNK_0000_1234", 0, 0, 0), + MVX(0x00001235, "VIA_UNK_0000_1235", 0, 0, 0), + MVX(0x00001236, "VIA_UNK_0000_1236", UINT64_C(0x5dd89e10ffffffff), UINT32_C(0xfffffd68), 0), + MVX(0x00001237, "VIA_UNK_0000_1237", UINT32_C(0xffc00026), UINT64_C(0xffffffff06000001), 0), + MVO(0x00001238, "VIA_UNK_0000_1238", 0x2), + MVI(0x00001239, "VIA_UNK_0000_1239", 0), /* Villain? */ + RVI(0x0000123a, 0x0000123f, "ZERO_0000_123a_THRU_0000_123f", 0), + MVO(0x00001240, "VIA_UNK_0000_1240", 0), + MVO(0x00001241, "VIA_UNK_0000_1241", UINT64_C(0x5dd89e10ffffffff)), + MVI(0x00001242, "ZERO_0000_1242", 0), + MVX(0x00001243, "VIA_UNK_0000_1243", 0, ~(uint64_t)UINT32_MAX, 0), + MVI(0x00001244, "ZERO_0000_1244", 0), + MVX(0x00001245, "VIA_UNK_0000_1245", UINT64_C(0x3020400000000064), UINT64_C(0xf000000000000000), 0), + MVX(0x00001246, "VIA_UNK_0000_1246", UINT64_C(0x10000000000), 0, 0), + MVX(0x00001247, "VIA_UNK_0000_1247", 0, 0, 0), + MVX(0x00001248, "VIA_UNK_0000_1248", 0, 0, 0), + MVI(0x00001249, "VIA_UNK_0000_1249", 0), /* Villain? */ + MVI(0x0000124a, "VIA_UNK_0000_124a", 0), /* Villain? */ + RVI(0x0000124b, 0x00001300, "ZERO_0000_124b_THRU_0000_1300", 0), + MVX(0x00001301, "VIA_UNK_0000_1301", 0, 0, 0), + MVX(0x00001302, "VIA_UNK_0000_1302", 0, 0, 0), + MVX(0x00001303, "VIA_UNK_0000_1303", 0, 0, 0), + MVX(0x00001304, "VIA_UNK_0000_1304", 0, 0, 0), + MVX(0x00001305, "VIA_UNK_0000_1305", 0, 0, 0), + MVX(0x00001306, "VIA_UNK_0000_1306", 0, 0, 0), + MVX(0x00001307, "VIA_UNK_0000_1307", 0, UINT64_C(0xffffff0000000000), 0), + MVX(0x00001308, "VIA_UNK_0000_1308", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0x00001309, "VIA_UNK_0000_1309", 0, ~(uint64_t)UINT32_MAX, 0), + RVI(0x0000130a, 0x0000130c, "ZERO_0000_130a_THRU_0000_130c", 0), + MVX(0x0000130d, "VIA_UNK_0000_130d", 0, UINT64_C(0xffffffffffff0000), 0), + MVX(0x0000130e, "VIA_UNK_0000_130e", UINT64_MAX, 0, 0), + RVI(0x0000130f, 0x00001311, "ZERO_0000_130f_THRU_0000_1311", 0), + MVX(0x00001312, "VIA_UNK_0000_1312", 0, 0, 0), + RVI(0x00001313, 0x00001314, "ZERO_0000_1313_THRU_0000_1314", 0), + MVX(0x00001315, "VIA_UNK_0000_1315", 0, 0, 0), + MVI(0x00001316, "ZERO_0000_1316", 0), + MVX(0x00001317, "VIA_UNK_0000_1317", 0, 0, 0), + MVX(0x00001318, "VIA_UNK_0000_1318", 0, 0, 0), + MVI(0x00001319, "ZERO_0000_1319", 0), + MVX(0x0000131a, "VIA_UNK_0000_131a", 0, 0, 0), + MVX(0x0000131b, "VIA_UNK_0000_131b", 0x3c20954, 0, 0), + RVI(0x0000131c, 0x00001401, "ZERO_0000_131c_THRU_0000_1401", 0), + MVO(0x00001402, "VIA_UNK_0000_1402", 0x148c48), + MVX(0x00001403, "VIA_UNK_0000_1403", 0, ~(uint64_t)UINT32_MAX, 0), + MVI(0x00001404, "VIA_UNK_0000_1404", 0), /* Villain? */ + MVI(0x00001405, "VIA_UNK_0000_1405", UINT32_C(0x80fffffc)), /* Villain? */ + MVX(0x00001406, "VIA_UNK_0000_1406", UINT32_C(0xc842c800), ~(uint64_t)UINT32_MAX, 0), + MVX(0x00001407, "VIA_UNK_0000_1407", UINT32_C(0x880400c0), ~(uint64_t)UINT32_MAX, 0), + RVI(0x00001408, 0x0000140f, "ZERO_0000_1408_THRU_0000_140f", 0), + MVX(0x00001410, "VIA_UNK_0000_1410", 0xfa0, UINT64_C(0xfffffffffff00000), 0), + MVX(0x00001411, "VIA_UNK_0000_1411", 0xa5a, UINT64_C(0xfffffffffff00000), 0), + MVI(0x00001412, "VIA_UNK_0000_1412", 0x4090), + MVI(0x00001413, "VIA_UNK_0000_1413", 0), /* Villain? */ + MVX(0x00001414, "VIA_UNK_0000_1414", 0x5a, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x00001415, "VIA_UNK_0000_1415", 0x5a, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x00001416, "VIA_UNK_0000_1416", 0x6e, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x00001417, "VIA_UNK_0000_1417", 0x32, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x00001418, "VIA_UNK_0000_1418", 0xa, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x00001419, "VIA_UNK_0000_1419", 0x14, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x0000141a, "VIA_UNK_0000_141a", 0x28, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x0000141b, "VIA_UNK_0000_141b", 0x3c, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x0000141c, "VIA_UNK_0000_141c", 0x69, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x0000141d, "VIA_UNK_0000_141d", 0x69, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x0000141e, "VIA_UNK_0000_141e", 0x69, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x0000141f, "VIA_UNK_0000_141f", 0x32, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x00001420, "VIA_UNK_0000_1420", 0x3, UINT64_C(0xffffffffffffc000), 0), + MVX(0x00001421, "VIA_UNK_0000_1421", 0x1f8, UINT64_C(0xfffffffffffc0000), 0), + MVX(0x00001422, "VIA_UNK_0000_1422", 0x1f4, UINT64_C(0xfffffffffffc0000), 0), + MVI(0x00001423, "VIA_UNK_0000_1423", 0xfffb7), + MVI(0x00001424, "VIA_UNK_0000_1424", 0x5b6), + MVI(0x00001425, "VIA_UNK_0000_1425", 0x65508), + MVI(0x00001426, "VIA_UNK_0000_1426", 0x843b), + MVX(0x00001427, "VIA_UNK_0000_1427", 0, ~(uint64_t)UINT32_MAX, 0), + MVX(0x00001428, "VIA_UNK_0000_1428", 0x1ffffff, ~(uint64_t)UINT32_MAX, 0), + MVX(0x00001429, "VIA_UNK_0000_1429", 0, UINT64_C(0xfffffffffff00000), 0), + MVI(0x0000142a, "VIA_UNK_0000_142a", 0x1c85d), + MVO(0x0000142b, "VIA_UNK_0000_142b", 0xf7e), + MVI(0x0000142c, "VIA_UNK_0000_142c", 0x20080), /* Villain? */ + MVI(0x0000142d, "ZERO_0000_142d", 0), + MVI(0x0000142e, "VIA_UNK_0000_142e", 0x8000000), /* Villain? */ + MVX(0x0000142f, "VIA_UNK_0000_142f", UINT64_C(0xffe57bea2ff3fdff), 0, 0), + RVI(0x00001430, 0x00001433, "ZERO_0000_1430_THRU_0000_1433", 0), + MVX(0x00001434, "VIA_UNK_0000_1434", 0x853f0e0, UINT64_C(0xffffffff7e7b0000), 0), + MVI(0x00001435, "VIA_UNK_0000_1435", 0x8000838), /* Villain? */ + MVI(0x00001436, "VIA_UNK_0000_1436", 0x200004f), /* Villain? */ + MVX(0x00001437, "VIA_UNK_0000_1437", 0, ~(uint64_t)UINT32_MAX, 0), + MVI(0x00001438, "VIA_UNK_0000_1438", 0x7004801c), /* Villain? */ + MVI(0x00001439, "ZERO_0000_1439", 0), + MVX(0x0000143a, "VIA_UNK_0000_143a", 0x20000, ~(uint64_t)UINT32_MAX, 0), + MVI(0x0000143b, "ZERO_0000_143b", 0), + MVX(0x0000143c, "VIA_UNK_0000_143c", 0, UINT64_C(0xfffffffffffffe00), 0), + MVX(0x0000143d, "VIA_UNK_0000_143d", 0, UINT64_C(0xfffffffffffffe00), 0), + RVI(0x0000143e, 0x0000143f, "ZERO_0000_143e_THRU_0000_143f", 0), + MVX(0x00001440, "VIA_UNK_0000_1440", UINT32_C(0x80e00954), ~(uint64_t)UINT32_MAX, 0), + MVX(0x00001441, "VIA_UNK_0000_1441", 0xf00954, UINT64_C(0xffffffff00ff7f7f), 0), + MVX(0x00001442, "VIA_UNK_0000_1442", 0xf00954, UINT64_C(0xffffffff00ff7f7f), 0), + RVI(0x00001443, 0x00001448, "ZERO_0000_1443_THRU_0000_1448", 0), + MVI(0x00001449, "VIA_UNK_0000_1449", UINT64_C(0xfffff7e247)), + RVI(0x0000144a, 0x0000144f, "ZERO_0000_144a_THRU_0000_144f", 0), + MVX(0x00001450, "VIA_UNK_0000_1450", 0, UINT64_C(0xffffffffffffe000), 0), + MVX(0x00001451, "VIA_UNK_0000_1451", 0, UINT64_C(0xffffffffff000000), 0), + MVX(0x00001452, "VIA_UNK_0000_1452", 0, UINT64_C(0xffffffffff000000), 0), + MVI(0x00001453, "VIA_UNK_0000_1453", 0x3fffffff), + RVI(0x00001454, 0x0000145f, "ZERO_0000_1454_THRU_0000_145f", 0), + MVX(0x00001460, "VIA_UNK_0000_1460", 0, UINT64_C(0xffffffffffffffc0), 0), + MVX(0x00001461, "VIA_UNK_0000_1461", 0x7b, UINT64_C(0xffffffffffffff00), 0), + MVX(0x00001462, "VIA_UNK_0000_1462", 0x76, UINT64_C(0xffffffffffffff00), 0), + MVI(0x00001463, "VIA_UNK_0000_1463", 0x4a), + MVI(0x00001464, "ZERO_0000_1464", 0), + MVI(0x00001465, "VIA_UNK_0000_1465", 0xc6), + MVI(0x00001466, "VIA_UNK_0000_1466", UINT64_C(0x800000053)), + RVI(0x00001467, 0x0000146f, "ZERO_0000_1467_THRU_0000_146f", 0), + MVX(0x00001470, "VIA_UNK_0000_1470", UINT64_C(0x5dd89e10ffffffff), UINT32_C(0xfffffd68), 0), + MVI(0x00001471, "VIA_UNK_0000_1471", 0x2a000000), + RVI(0x00001472, 0x0000147f, "ZERO_0000_1472_THRU_0000_147f", 0), + MVI(0x00001480, "VIA_UNK_0000_1480", 0x3907), + MVI(0x00001481, "VIA_UNK_0000_1481", 0x12c0), + MVI(0x00001482, "VIA_UNK_0000_1482", 0x320), + MVI(0x00001483, "VIA_UNK_0000_1483", 0x3), + MVI(0x00001484, "VIA_UNK_0000_1484", 0x1647), + MVI(0x00001485, "VIA_UNK_0000_1485", 0x3b7), + MVI(0x00001486, "VIA_UNK_0000_1486", 0x443), + RVI(0x00001487, 0x0000148f, "ZERO_0000_1487_THRU_0000_148f", 0), + MVX(0x00001490, "VIA_UNK_0000_1490", 0xf5, UINT64_C(0xffffffffffffc000), 0), + MVX(0x00001491, "VIA_UNK_0000_1491", 0x200, UINT64_C(0xffffffffff000000), 0), + MVX(0x00001492, "VIA_UNK_0000_1492", 0, UINT64_C(0xffffffffff000000), 0), + MVX(0x00001493, "VIA_UNK_0000_1493", 0x4, UINT64_C(0xffffffffffff0000), 0), + MVX(0x00001494, "VIA_UNK_0000_1494", 0x100, UINT64_C(0xffffffffffff0000), 0), + MVX(0x00001495, "VIA_UNK_0000_1495", 0x100, UINT64_C(0xffffffffff000000), 0), + MVX(0x00001496, "VIA_UNK_0000_1496", 0x8, UINT64_C(0xffffffffffff0000), 0), + MVX(0x00001497, "VIA_UNK_0000_1497", 0, UINT64_C(0xffffffffff000000), 0), + MVX(0x00001498, "VIA_UNK_0000_1498", 0xffffff, UINT64_C(0xfffffffffffffe3c), 0), + MVI(0x00001499, "VIA_UNK_0000_1499", 0x2c5), + MVI(0x0000149a, "VIA_UNK_0000_149a", 0x1c1), + MVI(0x0000149b, "VIA_UNK_0000_149b", 0x2c5a), + MVI(0x0000149c, "VIA_UNK_0000_149c", 0x1c8f), + RVI(0x0000149d, 0x0000149e, "ZERO_0000_149d_THRU_0000_149e", 0), + MVI(0x0000149f, "VIA_UNK_0000_149f", 0x1c9), + RVI(0x000014a0, 0x00001522, "ZERO_0000_14a0_THRU_0000_1522", 0), + MFN(0x00001523, "VIA_UNK_0000_1523", WriteOnly, IgnoreWrite), + RVI(0x00001524, 0x00003179, "ZERO_0000_1524_THRU_0000_3179", 0), + MVO(0x0000317a, "VIA_UNK_0000_317a", UINT64_C(0x139f29749595b8)), + MVO(0x0000317b, "VIA_UNK_0000_317b", UINT64_C(0x5dd89e10ffffffff)), + MVI(0x0000317c, "ZERO_0000_317c", 0), + MFN(0x0000317d, "VIA_UNK_0000_317d", WriteOnly, IgnoreWrite), + MFN(0x0000317e, "VIA_UNK_0000_317e", WriteOnly, IgnoreWrite), + MVI(0x0000317f, "VIA_UNK_0000_317f", 0), /* Villain? */ + RVI(0x00003180, 0x00003fff, "ZERO_0000_3180_THRU_0000_3fff", 0), + RVI(0x40000000, 0x40003fff, "ZERO_4000_0000_THRU_4000_3fff", 0), + RVI(0x80000000, 0x80000197, "ZERO_8000_0000_THRU_8000_0197", 0), + RVI(0x80000199, 0x80003fff, "ZERO_8000_0199_THRU_8000_3fff", 0), + RVI(0xc0000000, 0xc000007f, "ZERO_c000_0000_THRU_c000_007f", 0), + MFX(0xc0000080, "AMD64_EFER", Amd64Efer, Amd64Efer, 0xd01, 0x400, UINT64_C(0xffffffffffffd2fe)), + MFN(0xc0000081, "AMD64_STAR", Amd64SyscallTarget, Amd64SyscallTarget), /* value=0x230010`00000000 */ + MFN(0xc0000082, "AMD64_STAR64", Amd64LongSyscallTarget, Amd64LongSyscallTarget), /* value=0xffffffff`81669af0 */ + MFN(0xc0000083, "AMD64_STARCOMPAT", Amd64CompSyscallTarget, Amd64CompSyscallTarget), /* value=0xffffffff`8166c1d0 */ + MFX(0xc0000084, "AMD64_SYSCALL_FLAG_MASK", Amd64SyscallFlagMask, Amd64SyscallFlagMask, 0, ~(uint64_t)UINT32_MAX, 0), /* value=0x3700 */ + RVI(0xc0000085, 0xc00000ff, "ZERO_c000_0085_THRU_c000_00ff", 0), + MFN(0xc0000100, "AMD64_FS_BASE", Amd64FsBase, Amd64FsBase), /* value=0x7f91`10bdc740 */ + MFN(0xc0000101, "AMD64_GS_BASE", Amd64GsBase, Amd64GsBase), /* value=0xffff8800`6fd80000 */ + MFN(0xc0000102, "AMD64_KERNEL_GS_BASE", Amd64KernelGsBase, Amd64KernelGsBase), /* value=0x0 */ + RVI(0xc0000104, 0xc0003fff, "ZERO_c000_0104_THRU_c000_3fff", 0), +}; +#endif /* !CPUM_DB_STANDALONE */ + + +/** + * Database entry for ZHAOXIN KaiXian KX-U5581@1.8GHz. + */ +static CPUMDBENTRY const g_Entry_ZHAOXIN_KaiXian_KX_U5581_1_8GHz = +{ + /*.pszName = */ "ZHAOXIN KaiXian KX-U5581 1.8GHz", + /*.pszFullName = */ "ZHAOXIN KaiXian KX-U5581@1.8GHz", + /*.enmVendor = */ CPUMCPUVENDOR_SHANGHAI, + /*.uFamily = */ 7, + /*.uModel = */ 11, + /*.uStepping = */ 5, + /*.enmMicroarch = */ kCpumMicroarch_Shanghai_Wudaokou, + /*.uScalableBusFreq = */ CPUM_SBUSFREQ_UNKNOWN, + /*.fFlags = */ 0, + /*.cMaxPhysAddrWidth= */ 40, + /*.fMxCsrMask = */ 0x0000ffff, + /*.paCpuIdLeaves = */ NULL_ALONE(g_aCpuIdLeaves_ZHAOXIN_KaiXian_KX_U5581_1_8GHz), + /*.cCpuIdLeaves = */ ZERO_ALONE(RT_ELEMENTS(g_aCpuIdLeaves_ZHAOXIN_KaiXian_KX_U5581_1_8GHz)), + /*.enmUnknownCpuId = */ CPUMUNKNOWNCPUID_DEFAULTS, + /*.DefUnknownCpuId = */ { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + /*.fMsrMask = */ UINT32_MAX /** @todo */, + /*.cMsrRanges = */ ZERO_ALONE(RT_ELEMENTS(g_aMsrRanges_ZHAOXIN_KaiXian_KX_U5581_1_8GHz)), + /*.paMsrRanges = */ NULL_ALONE(g_aMsrRanges_ZHAOXIN_KaiXian_KX_U5581_1_8GHz), +}; + +#endif /* !VBOX_CPUDB_ZHAOXIN_KaiXian_KX_U5581_1_8GHz_h */ + -- cgit v1.2.3