diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 14:19:18 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 14:19:18 +0000 |
commit | 4035b1bfb1e5843a539a8b624d21952b756974d1 (patch) | |
tree | f1e9cd5bf548cbc57ff2fddfb2b4aa9ae95587e2 /src/VBox/VMM/include | |
parent | Initial commit. (diff) | |
download | virtualbox-upstream.tar.xz virtualbox-upstream.zip |
Adding upstream version 6.1.22-dfsg.upstream/6.1.22-dfsgupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/VBox/VMM/include')
37 files changed, 22478 insertions, 0 deletions
diff --git a/src/VBox/VMM/include/APICInternal.h b/src/VBox/VMM/include/APICInternal.h new file mode 100644 index 00000000..7f9bf3b2 --- /dev/null +++ b/src/VBox/VMM/include/APICInternal.h @@ -0,0 +1,1426 @@ +/* $Id: APICInternal.h $ */ +/** @file + * APIC - Advanced Programmable Interrupt Controller. + */ + +/* + * Copyright (C) 2016-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_APICInternal_h +#define VMM_INCLUDED_SRC_include_APICInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <VBox/sup.h> +#include <VBox/vmm/pdmdev.h> /* before apic.h! */ +#include <VBox/vmm/apic.h> + +/** @defgroup grp_apic_int Internal + * @ingroup grp_apic + * @internal + * @{ + */ + +/** The APIC hardware version number for Pentium 4. */ +#define XAPIC_HARDWARE_VERSION_P4 UINT8_C(0x14) +/** Maximum number of LVT entries for Pentium 4. */ +#define XAPIC_MAX_LVT_ENTRIES_P4 UINT8_C(6) +/** Size of the APIC ID bits for Pentium 4. */ +#define XAPIC_APIC_ID_BIT_COUNT_P4 UINT8_C(8) + +/** The APIC hardware version number for Pentium 6. */ +#define XAPIC_HARDWARE_VERSION_P6 UINT8_C(0x10) +/** Maximum number of LVT entries for Pentium 6. */ +#define XAPIC_MAX_LVT_ENTRIES_P6 UINT8_C(4) +/** Size of the APIC ID bits for Pentium 6. */ +#define XAPIC_APIC_ID_BIT_COUNT_P6 UINT8_C(4) + +/** The APIC hardware version we are emulating. */ +#define XAPIC_HARDWARE_VERSION XAPIC_HARDWARE_VERSION_P4 + +#define VMCPU_TO_XAPICPAGE(a_pVCpu) ((PXAPICPAGE)(CTX_SUFF((a_pVCpu)->apic.s.pvApicPage))) +#define VMCPU_TO_CXAPICPAGE(a_pVCpu) ((PCXAPICPAGE)(CTX_SUFF((a_pVCpu)->apic.s.pvApicPage))) + +#define VMCPU_TO_X2APICPAGE(a_pVCpu) ((PX2APICPAGE)(CTX_SUFF((a_pVCpu)->apic.s.pvApicPage))) +#define VMCPU_TO_CX2APICPAGE(a_pVCpu) ((PCX2APICPAGE)(CTX_SUFF((a_pVCpu)->apic.s.pvApicPage))) + +#define VMCPU_TO_APICCPU(a_pVCpu) (&(a_pVCpu)->apic.s) +#define VM_TO_APIC(a_pVM) (&(a_pVM)->apic.s) +#define VM_TO_APICDEV(a_pVM) CTX_SUFF(VM_TO_APIC(a_pVM)->pApicDev) +#ifdef IN_RING3 +# define VMCPU_TO_DEVINS(a_pVCpu) ((a_pVCpu)->pVMR3->apic.s.pDevInsR3) +#elif defined(IN_RING0) +# define VMCPU_TO_DEVINS(a_pVCpu) ((a_pVCpu)->pGVM->apicr0.s.pDevInsR0) +#endif + +#define APICCPU_TO_XAPICPAGE(a_ApicCpu) ((PXAPICPAGE)(CTX_SUFF((a_ApicCpu)->pvApicPage))) +#define APICCPU_TO_CXAPICPAGE(a_ApicCpu) ((PCXAPICPAGE)(CTX_SUFF((a_ApicCpu)->pvApicPage))) + +/** Whether the APIC is in X2APIC mode or not. */ +#define XAPIC_IN_X2APIC_MODE(a_pVCpu) ( ( ((a_pVCpu)->apic.s.uApicBaseMsr) \ + & (MSR_IA32_APICBASE_EN | MSR_IA32_APICBASE_EXTD)) \ + == (MSR_IA32_APICBASE_EN | MSR_IA32_APICBASE_EXTD) ) + +/** Get an xAPIC page offset for an x2APIC MSR value. */ +#define X2APIC_GET_XAPIC_OFF(a_uMsr) ((((a_uMsr) - MSR_IA32_X2APIC_START) << 4) & UINT32_C(0xff0)) +/** Get an x2APIC MSR for an xAPIC page offset. */ +#define XAPIC_GET_X2APIC_MSR(a_offReg) ((((a_offReg) & UINT32_C(0xff0)) >> 4) | MSR_IA32_X2APIC_START) + +/** Illegal APIC vector value start. */ +#define XAPIC_ILLEGAL_VECTOR_START UINT8_C(0) +/** Illegal APIC vector value end (inclusive). */ +#define XAPIC_ILLEGAL_VECTOR_END UINT8_C(15) +/** Reserved APIC vector value start. */ +#define XAPIC_RSVD_VECTOR_START UINT8_C(16) +/** Reserved APIC vector value end (inclusive). */ +#define XAPIC_RSVD_VECTOR_END UINT8_C(31) + +/** Vector offset in an APIC 256-bit sparse register. */ +#define XAPIC_REG256_VECTOR_OFF(a_Vector) (((a_Vector) & UINT32_C(0xe0)) >> 1) +/** Bit position at offset in an APIC 256-bit sparse register. */ +#define XAPIC_REG256_VECTOR_BIT(a_Vector) ((a_Vector) & UINT32_C(0x1f)) + +/** Maximum valid offset for a register (16-byte aligned, 4 byte wide access). */ +#define XAPIC_OFF_MAX_VALID (sizeof(XAPICPAGE) - 4 * sizeof(uint32_t)) + +#if XAPIC_HARDWARE_VERSION == XAPIC_HARDWARE_VERSION_P6 +/** ESR - Send checksum error. */ +# define XAPIC_ESR_SEND_CHKSUM_ERROR RT_BIT(0) +/** ESR - Send accept error. */ +# define XAPIC_ESR_RECV_CHKSUM_ERROR RT_BIT(1) +/** ESR - Send accept error. */ +# define XAPIC_ESR_SEND_ACCEPT_ERROR RT_BIT(2) +/** ESR - Receive accept error. */ +# define XAPIC_ESR_RECV_ACCEPT_ERROR RT_BIT(3) +#endif +/** ESR - Redirectable IPI. */ +#define XAPIC_ESR_REDIRECTABLE_IPI RT_BIT(4) +/** ESR - Send accept error. */ +#define XAPIC_ESR_SEND_ILLEGAL_VECTOR RT_BIT(5) +/** ESR - Send accept error. */ +#define XAPIC_ESR_RECV_ILLEGAL_VECTOR RT_BIT(6) +/** ESR - Send accept error. */ +#define XAPIC_ESR_ILLEGAL_REG_ADDRESS RT_BIT(7) +/** ESR - Valid write-only bits. */ +#define XAPIC_ESR_WO_VALID UINT32_C(0x0) + +/** TPR - Valid bits. */ +#define XAPIC_TPR_VALID UINT32_C(0xff) +/** TPR - Task-priority class. */ +#define XAPIC_TPR_TP UINT32_C(0xf0) +/** TPR - Task-priority subclass. */ +#define XAPIC_TPR_TP_SUBCLASS UINT32_C(0x0f) +/** TPR - Gets the task-priority class. */ +#define XAPIC_TPR_GET_TP(a_Tpr) ((a_Tpr) & XAPIC_TPR_TP) +/** TPR - Gets the task-priority subclass. */ +#define XAPIC_TPR_GET_TP_SUBCLASS(a_Tpr) ((a_Tpr) & XAPIC_TPR_TP_SUBCLASS) + +/** PPR - Valid bits. */ +#define XAPIC_PPR_VALID UINT32_C(0xff) +/** PPR - Processor-priority class. */ +#define XAPIC_PPR_PP UINT32_C(0xf0) +/** PPR - Processor-priority subclass. */ +#define XAPIC_PPR_PP_SUBCLASS UINT32_C(0x0f) +/** PPR - Get the processor-priority class. */ +#define XAPIC_PPR_GET_PP(a_Ppr) ((a_Ppr) & XAPIC_PPR_PP) +/** PPR - Get the processor-priority subclass. */ +#define XAPIC_PPR_GET_PP_SUBCLASS(a_Ppr) ((a_Ppr) & XAPIC_PPR_PP_SUBCLASS) + +/** Timer mode - One-shot. */ +#define XAPIC_TIMER_MODE_ONESHOT UINT32_C(0) +/** Timer mode - Periodic. */ +#define XAPIC_TIMER_MODE_PERIODIC UINT32_C(1) +/** Timer mode - TSC deadline. */ +#define XAPIC_TIMER_MODE_TSC_DEADLINE UINT32_C(2) + +/** LVT - The vector. */ +#define XAPIC_LVT_VECTOR UINT32_C(0xff) +/** LVT - Gets the vector from an LVT entry. */ +#define XAPIC_LVT_GET_VECTOR(a_Lvt) ((a_Lvt) & XAPIC_LVT_VECTOR) +/** LVT - The mask. */ +#define XAPIC_LVT_MASK RT_BIT(16) +/** LVT - Is the LVT masked? */ +#define XAPIC_LVT_IS_MASKED(a_Lvt) RT_BOOL((a_Lvt) & XAPIC_LVT_MASK) +/** LVT - Timer mode. */ +#define XAPIC_LVT_TIMER_MODE RT_BIT(17) +/** LVT - Timer TSC-deadline timer mode. */ +#define XAPIC_LVT_TIMER_TSCDEADLINE RT_BIT(18) +/** LVT - Gets the timer mode. */ +#define XAPIC_LVT_GET_TIMER_MODE(a_Lvt) (XAPICTIMERMODE)(((a_Lvt) >> 17) & UINT32_C(3)) +/** LVT - Delivery mode. */ +#define XAPIC_LVT_DELIVERY_MODE (RT_BIT(8) | RT_BIT(9) | RT_BIT(10)) +/** LVT - Gets the delivery mode. */ +#define XAPIC_LVT_GET_DELIVERY_MODE(a_Lvt) (XAPICDELIVERYMODE)(((a_Lvt) >> 8) & UINT32_C(7)) +/** LVT - Delivery status. */ +#define XAPIC_LVT_DELIVERY_STATUS RT_BIT(12) +/** LVT - Trigger mode. */ +#define XAPIC_LVT_TRIGGER_MODE RT_BIT(15) +/** LVT - Gets the trigger mode. */ +#define XAPIC_LVT_GET_TRIGGER_MODE(a_Lvt) (XAPICTRIGGERMODE)(((a_Lvt) >> 15) & UINT32_C(1)) +/** LVT - Remote IRR. */ +#define XAPIC_LVT_REMOTE_IRR RT_BIT(14) +/** LVT - Gets the Remote IRR. */ +#define XAPIC_LVT_GET_REMOTE_IRR(a_Lvt) (((a_Lvt) >> 14) & 1) +/** LVT - Interrupt Input Pin Polarity. */ +#define XAPIC_LVT_POLARITY RT_BIT(13) +/** LVT - Gets the Interrupt Input Pin Polarity. */ +#define XAPIC_LVT_GET_POLARITY(a_Lvt) (((a_Lvt) >> 13) & 1) +/** LVT - Valid bits common to all LVTs. */ +#define XAPIC_LVT_COMMON_VALID (XAPIC_LVT_VECTOR | XAPIC_LVT_DELIVERY_STATUS | XAPIC_LVT_MASK) +/** LVT CMCI - Valid bits. */ +#define XAPIC_LVT_CMCI_VALID (XAPIC_LVT_COMMON_VALID | XAPIC_LVT_DELIVERY_MODE) +/** LVT Timer - Valid bits. */ +#define XAPIC_LVT_TIMER_VALID (XAPIC_LVT_COMMON_VALID | XAPIC_LVT_TIMER_MODE | XAPIC_LVT_TIMER_TSCDEADLINE) +/** LVT Thermal - Valid bits. */ +#define XAPIC_LVT_THERMAL_VALID (XAPIC_LVT_COMMON_VALID | XAPIC_LVT_DELIVERY_MODE) +/** LVT Perf - Valid bits. */ +#define XAPIC_LVT_PERF_VALID (XAPIC_LVT_COMMON_VALID | XAPIC_LVT_DELIVERY_MODE) +/** LVT LINTx - Valid bits. */ +#define XAPIC_LVT_LINT_VALID ( XAPIC_LVT_COMMON_VALID | XAPIC_LVT_DELIVERY_MODE | XAPIC_LVT_DELIVERY_STATUS \ + | XAPIC_LVT_POLARITY | XAPIC_LVT_REMOTE_IRR | XAPIC_LVT_TRIGGER_MODE) +/** LVT Error - Valid bits. */ +#define XAPIC_LVT_ERROR_VALID (XAPIC_LVT_COMMON_VALID) + +/** SVR - The vector. */ +#define XAPIC_SVR_VECTOR UINT32_C(0xff) +/** SVR - APIC Software enable. */ +#define XAPIC_SVR_SOFTWARE_ENABLE RT_BIT(8) +/** SVR - Supress EOI broadcast. */ +#define XAPIC_SVR_SUPRESS_EOI_BROADCAST RT_BIT(12) +#if XAPIC_HARDWARE_VERSION == XAPIC_HARDWARE_VERSION_P4 +/** SVR - Valid bits. */ +# define XAPIC_SVR_VALID (XAPIC_SVR_VECTOR | XAPIC_SVR_SOFTWARE_ENABLE) +#else +# error "Implement Pentium and P6 family APIC architectures" +#endif + +/** DFR - Valid bits. */ +#define XAPIC_DFR_VALID UINT32_C(0xf0000000) +/** DFR - Reserved bits that must always remain set. */ +#define XAPIC_DFR_RSVD_MB1 UINT32_C(0x0fffffff) +/** DFR - The model. */ +#define XAPIC_DFR_MODEL UINT32_C(0xf) +/** DFR - Gets the destination model. */ +#define XAPIC_DFR_GET_MODEL(a_uReg) (((a_uReg) >> 28) & XAPIC_DFR_MODEL) + +/** LDR - Valid bits. */ +#define XAPIC_LDR_VALID UINT32_C(0xff000000) +/** LDR - Cluster ID mask (x2APIC). */ +#define X2APIC_LDR_CLUSTER_ID UINT32_C(0xffff0000) +/** LDR - Mask of the LDR cluster ID (x2APIC). */ +#define X2APIC_LDR_GET_CLUSTER_ID(a_uReg) ((a_uReg) & X2APIC_LDR_CLUSTER_ID) +/** LDR - Mask of the LDR logical ID (x2APIC). */ +#define X2APIC_LDR_LOGICAL_ID UINT32_C(0x0000ffff) + +/** LDR - Flat mode logical ID mask. */ +#define XAPIC_LDR_FLAT_LOGICAL_ID UINT32_C(0xff) +/** LDR - Clustered mode cluster ID mask. */ +#define XAPIC_LDR_CLUSTERED_CLUSTER_ID UINT32_C(0xf0) +/** LDR - Clustered mode logical ID mask. */ +#define XAPIC_LDR_CLUSTERED_LOGICAL_ID UINT32_C(0x0f) +/** LDR - Gets the clustered mode cluster ID. */ +#define XAPIC_LDR_CLUSTERED_GET_CLUSTER_ID(a_uReg) ((a_uReg) & XAPIC_LDR_CLUSTERED_CLUSTER_ID) + + +/** EOI - Valid write-only bits. */ +#define XAPIC_EOI_WO_VALID UINT32_C(0x0) +/** Timer ICR - Valid bits. */ +#define XAPIC_TIMER_ICR_VALID UINT32_C(0xffffffff) +/** Timer DCR - Valid bits. */ +#define XAPIC_TIMER_DCR_VALID (RT_BIT(0) | RT_BIT(1) | RT_BIT(3)) + +/** Self IPI - Valid bits. */ +#define XAPIC_SELF_IPI_VALID UINT32_C(0xff) +/** Self IPI - The vector. */ +#define XAPIC_SELF_IPI_VECTOR UINT32_C(0xff) +/** Self IPI - Gets the vector. */ +#define XAPIC_SELF_IPI_GET_VECTOR(a_uReg) ((a_uReg) & XAPIC_SELF_IPI_VECTOR) + +/** ICR Low - The Vector. */ +#define XAPIC_ICR_LO_VECTOR UINT32_C(0xff) +/** ICR Low - Gets the vector. */ +#define XAPIC_ICR_LO_GET_VECTOR(a_uIcr) ((a_uIcr) & XAPIC_ICR_LO_VECTOR) +/** ICR Low - The delivery mode. */ +#define XAPIC_ICR_LO_DELIVERY_MODE (RT_BIT(8) | RT_BIT(9) | RT_BIT(10)) +/** ICR Low - The destination mode. */ +#define XAPIC_ICR_LO_DEST_MODE RT_BIT(11) +/** ICR Low - The delivery status. */ +#define XAPIC_ICR_LO_DELIVERY_STATUS RT_BIT(12) +/** ICR Low - The level. */ +#define XAPIC_ICR_LO_LEVEL RT_BIT(14) +/** ICR Low - The trigger mode. */ +#define XAPIC_ICR_TRIGGER_MODE RT_BIT(15) +/** ICR Low - The destination shorthand. */ +#define XAPIC_ICR_LO_DEST_SHORTHAND (RT_BIT(18) | RT_BIT(19)) +/** ICR Low - Valid write bits. */ +#define XAPIC_ICR_LO_WR_VALID ( XAPIC_ICR_LO_VECTOR | XAPIC_ICR_LO_DELIVERY_MODE | XAPIC_ICR_LO_DEST_MODE \ + | XAPIC_ICR_LO_LEVEL | XAPIC_ICR_TRIGGER_MODE | XAPIC_ICR_LO_DEST_SHORTHAND) + +/** ICR High - The destination field. */ +#define XAPIC_ICR_HI_DEST UINT32_C(0xff000000) +/** ICR High - Get the destination field. */ +#define XAPIC_ICR_HI_GET_DEST(a_u32IcrHi) (((a_u32IcrHi) >> 24) & XAPIC_ICR_HI_DEST) +/** ICR High - Valid write bits in xAPIC mode. */ +#define XAPIC_ICR_HI_WR_VALID XAPIC_ICR_HI_DEST + +/** APIC ID broadcast mask - x2APIC mode. */ +#define X2APIC_ID_BROADCAST_MASK UINT32_C(0xffffffff) +#if XAPIC_HARDWARE_VERSION == XAPIC_HARDWARE_VERSION_P4 +/** APIC ID broadcast mask - xAPIC mode. */ +# define XAPIC_ID_BROADCAST_MASK UINT32_C(0xff) +#else +# error "Implement Pentium and P6 family APIC architectures" +#endif + +/** + * The xAPIC sparse 256-bit register. + */ +typedef union XAPIC256BITREG +{ + /** The sparse-bitmap view. */ + struct + { + uint32_t u32Reg; + uint32_t uReserved0[3]; + } u[8]; + /** The 32-bit view. */ + uint32_t au32[32]; +} XAPIC256BITREG; +/** Pointer to an xAPIC sparse bitmap register. */ +typedef XAPIC256BITREG *PXAPIC256BITREG; +/** Pointer to a const xAPIC sparse bitmap register. */ +typedef XAPIC256BITREG const *PCXAPIC256BITREG; +AssertCompileSize(XAPIC256BITREG, 128); + +/** + * The xAPIC memory layout as per Intel/AMD specs. + */ +typedef struct XAPICPAGE +{ + /* 0x00 - Reserved. */ + uint32_t uReserved0[8]; + /* 0x20 - APIC ID. */ + struct + { + uint8_t u8Reserved0[3]; + uint8_t u8ApicId; + uint32_t u32Reserved0[3]; + } id; + /* 0x30 - APIC version register. */ + union + { + struct + { +#if XAPIC_HARDWARE_VERSION == XAPIC_HARDWARE_VERSION_P4 + uint8_t u8Version; +#else +# error "Implement Pentium and P6 family APIC architectures" +#endif + uint8_t uReserved0; + uint8_t u8MaxLvtEntry; + uint8_t fEoiBroadcastSupression : 1; + uint8_t u7Reserved1 : 7; + uint32_t u32Reserved0[3]; + } u; + struct + { + uint32_t u32Version; + uint32_t u32Reserved0[3]; + } all; + } version; + /* 0x40 - Reserved. */ + uint32_t uReserved1[16]; + /* 0x80 - Task Priority Register (TPR). */ + struct + { + uint8_t u8Tpr; + uint8_t u8Reserved0[3]; + uint32_t u32Reserved0[3]; + } tpr; + /* 0x90 - Arbitration Priority Register (APR). */ + struct + { + uint8_t u8Apr; + uint8_t u8Reserved0[3]; + uint32_t u32Reserved0[3]; + } apr; + /* 0xA0 - Processor Priority Register (PPR). */ + struct + { + uint8_t u8Ppr; + uint8_t u8Reserved0[3]; + uint32_t u32Reserved0[3]; + } ppr; + /* 0xB0 - End Of Interrupt Register (EOI). */ + struct + { + uint32_t u32Eoi; + uint32_t u32Reserved0[3]; + } eoi; + /* 0xC0 - Remote Read Register (RRD). */ + struct + { + uint32_t u32Rrd; + uint32_t u32Reserved0[3]; + } rrd; + /* 0xD0 - Logical Destination Register (LDR). */ + union + { + struct + { + uint8_t u8Reserved0[3]; + uint8_t u8LogicalApicId; + uint32_t u32Reserved0[3]; + } u; + struct + { + uint32_t u32Ldr; + uint32_t u32Reserved0[3]; + } all; + } ldr; + /* 0xE0 - Destination Format Register (DFR). */ + union + { + struct + { + uint32_t u28ReservedMb1 : 28; /* MB1 */ + uint32_t u4Model : 4; + uint32_t u32Reserved0[3]; + } u; + struct + { + uint32_t u32Dfr; + uint32_t u32Reserved0[3]; + } all; + } dfr; + /* 0xF0 - Spurious-Interrupt Vector Register (SVR). */ + union + { + struct + { + uint32_t u8SpuriousVector : 8; + uint32_t fApicSoftwareEnable : 1; + uint32_t u3Reserved0 : 3; + uint32_t fSupressEoiBroadcast : 1; + uint32_t u19Reserved1 : 19; + uint32_t u32Reserved0[3]; + } u; + struct + { + uint32_t u32Svr; + uint32_t u32Reserved0[3]; + } all; + } svr; + /* 0x100 - In-service Register (ISR). */ + XAPIC256BITREG isr; + /* 0x180 - Trigger Mode Register (TMR). */ + XAPIC256BITREG tmr; + /* 0x200 - Interrupt Request Register (IRR). */ + XAPIC256BITREG irr; + /* 0x280 - Error Status Register (ESR). */ + union + { + struct + { +#if XAPIC_HARDWARE_VERSION == XAPIC_HARDWARE_VERSION_P4 + uint32_t u4Reserved0 : 4; +#else +# error "Implement Pentium and P6 family APIC architectures" +#endif + uint32_t fRedirectableIpi : 1; + uint32_t fSendIllegalVector : 1; + uint32_t fRcvdIllegalVector : 1; + uint32_t fIllegalRegAddr : 1; + uint32_t u24Reserved1 : 24; + uint32_t u32Reserved0[3]; + } u; + struct + { + uint32_t u32Errors; + uint32_t u32Reserved0[3]; + } all; + } esr; + /* 0x290 - Reserved. */ + uint32_t uReserved2[28]; + /* 0x300 - Interrupt Command Register (ICR) - Low. */ + union + { + struct + { + uint32_t u8Vector : 8; + uint32_t u3DeliveryMode : 3; + uint32_t u1DestMode : 1; + uint32_t u1DeliveryStatus : 1; + uint32_t fReserved0 : 1; + uint32_t u1Level : 1; + uint32_t u1TriggerMode : 1; + uint32_t u2Reserved1 : 2; + uint32_t u2DestShorthand : 2; + uint32_t u12Reserved2 : 12; + uint32_t u32Reserved0[3]; + } u; + struct + { + uint32_t u32IcrLo; + uint32_t u32Reserved0[3]; + } all; + } icr_lo; + /* 0x310 - Interrupt Comannd Register (ICR) - High. */ + union + { + struct + { + uint32_t u24Reserved0 : 24; + uint32_t u8Dest : 8; + uint32_t u32Reserved0[3]; + } u; + struct + { + uint32_t u32IcrHi; + uint32_t u32Reserved0[3]; + } all; + } icr_hi; + /* 0x320 - Local Vector Table (LVT) Timer Register. */ + union + { + struct + { + uint32_t u8Vector : 8; + uint32_t u4Reserved0 : 4; + uint32_t u1DeliveryStatus : 1; + uint32_t u3Reserved1 : 3; + uint32_t u1Mask : 1; + uint32_t u2TimerMode : 2; + uint32_t u13Reserved2 : 13; + uint32_t u32Reserved0[3]; + } u; + struct + { + uint32_t u32LvtTimer; + uint32_t u32Reserved0[3]; + } all; + } lvt_timer; + /* 0x330 - Local Vector Table (LVT) Thermal Sensor Register. */ +#if XAPIC_HARDWARE_VERSION == XAPIC_HARDWARE_VERSION_P4 + union + { + struct + { + uint32_t u8Vector : 8; + uint32_t u3DeliveryMode : 3; + uint32_t u1Reserved0 : 1; + uint32_t u1DeliveryStatus : 1; + uint32_t u3Reserved1 : 3; + uint32_t u1Mask : 1; + uint32_t u15Reserved2 : 15; + uint32_t u32Reserved0[3]; + } u; + struct + { + uint32_t u32LvtThermal; + uint32_t u32Reserved0[3]; + } all; + } lvt_thermal; +#else +# error "Implement Pentium and P6 family APIC architectures" +#endif + /* 0x340 - Local Vector Table (LVT) Performance Monitor Counter (PMC) Register. */ + union + { + struct + { + uint32_t u8Vector : 8; + uint32_t u3DeliveryMode : 3; + uint32_t u1Reserved0 : 1; + uint32_t u1DeliveryStatus : 1; + uint32_t u3Reserved1 : 3; + uint32_t u1Mask : 1; + uint32_t u15Reserved2 : 15; + uint32_t u32Reserved0[3]; + } u; + struct + { + uint32_t u32LvtPerf; + uint32_t u32Reserved0[3]; + } all; + } lvt_perf; + /* 0x350 - Local Vector Table (LVT) LINT0 Register. */ + union + { + struct + { + uint32_t u8Vector : 8; + uint32_t u3DeliveryMode : 3; + uint32_t u1Reserved0 : 1; + uint32_t u1DeliveryStatus : 1; + uint32_t u1IntrPolarity : 1; + uint32_t u1RemoteIrr : 1; + uint32_t u1TriggerMode : 1; + uint32_t u1Mask : 1; + uint32_t u15Reserved2 : 15; + uint32_t u32Reserved0[3]; + } u; + struct + { + uint32_t u32LvtLint0; + uint32_t u32Reserved0[3]; + } all; + } lvt_lint0; + /* 0x360 - Local Vector Table (LVT) LINT1 Register. */ + union + { + struct + { + uint32_t u8Vector : 8; + uint32_t u3DeliveryMode : 3; + uint32_t u1Reserved0 : 1; + uint32_t u1DeliveryStatus : 1; + uint32_t u1IntrPolarity : 1; + uint32_t u1RemoteIrr : 1; + uint32_t u1TriggerMode : 1; + uint32_t u1Mask : 1; + uint32_t u15Reserved2 : 15; + uint32_t u32Reserved0[3]; + } u; + struct + { + uint32_t u32LvtLint1; + uint32_t u32Reserved0[3]; + } all; + } lvt_lint1; + /* 0x370 - Local Vector Table (LVT) Error Register. */ + union + { + struct + { + uint32_t u8Vector : 8; + uint32_t u4Reserved0 : 4; + uint32_t u1DeliveryStatus : 1; + uint32_t u3Reserved1 : 3; + uint32_t u1Mask : 1; + uint32_t u15Reserved2 : 15; + uint32_t u32Reserved0[3]; + } u; + struct + { + uint32_t u32LvtError; + uint32_t u32Reserved0[3]; + } all; + } lvt_error; + /* 0x380 - Timer Initial Counter Register. */ + struct + { + uint32_t u32InitialCount; + uint32_t u32Reserved0[3]; + } timer_icr; + /* 0x390 - Timer Current Counter Register. */ + struct + { + uint32_t u32CurrentCount; + uint32_t u32Reserved0[3]; + } timer_ccr; + /* 0x3A0 - Reserved. */ + uint32_t u32Reserved3[16]; + /* 0x3E0 - Timer Divide Configuration Register. */ + union + { + struct + { + uint32_t u2DivideValue0 : 2; + uint32_t u1Reserved0 : 1; + uint32_t u1DivideValue1 : 1; + uint32_t u28Reserved1 : 28; + uint32_t u32Reserved0[3]; + } u; + struct + { + uint32_t u32DivideValue; + uint32_t u32Reserved0[3]; + } all; + } timer_dcr; + /* 0x3F0 - Reserved. */ + uint8_t u8Reserved0[3088]; +} XAPICPAGE; +/** Pointer to a XAPICPAGE struct. */ +typedef XAPICPAGE *PXAPICPAGE; +/** Pointer to a const XAPICPAGE struct. */ +typedef const XAPICPAGE *PCXAPICPAGE; +AssertCompileSize(XAPICPAGE, 4096); +AssertCompileMemberOffset(XAPICPAGE, id, XAPIC_OFF_ID); +AssertCompileMemberOffset(XAPICPAGE, version, XAPIC_OFF_VERSION); +AssertCompileMemberOffset(XAPICPAGE, tpr, XAPIC_OFF_TPR); +AssertCompileMemberOffset(XAPICPAGE, apr, XAPIC_OFF_APR); +AssertCompileMemberOffset(XAPICPAGE, ppr, XAPIC_OFF_PPR); +AssertCompileMemberOffset(XAPICPAGE, eoi, XAPIC_OFF_EOI); +AssertCompileMemberOffset(XAPICPAGE, rrd, XAPIC_OFF_RRD); +AssertCompileMemberOffset(XAPICPAGE, ldr, XAPIC_OFF_LDR); +AssertCompileMemberOffset(XAPICPAGE, dfr, XAPIC_OFF_DFR); +AssertCompileMemberOffset(XAPICPAGE, svr, XAPIC_OFF_SVR); +AssertCompileMemberOffset(XAPICPAGE, isr, XAPIC_OFF_ISR0); +AssertCompileMemberOffset(XAPICPAGE, tmr, XAPIC_OFF_TMR0); +AssertCompileMemberOffset(XAPICPAGE, irr, XAPIC_OFF_IRR0); +AssertCompileMemberOffset(XAPICPAGE, esr, XAPIC_OFF_ESR); +AssertCompileMemberOffset(XAPICPAGE, icr_lo, XAPIC_OFF_ICR_LO); +AssertCompileMemberOffset(XAPICPAGE, icr_hi, XAPIC_OFF_ICR_HI); +AssertCompileMemberOffset(XAPICPAGE, lvt_timer, XAPIC_OFF_LVT_TIMER); +AssertCompileMemberOffset(XAPICPAGE, lvt_thermal, XAPIC_OFF_LVT_THERMAL); +AssertCompileMemberOffset(XAPICPAGE, lvt_perf, XAPIC_OFF_LVT_PERF); +AssertCompileMemberOffset(XAPICPAGE, lvt_lint0, XAPIC_OFF_LVT_LINT0); +AssertCompileMemberOffset(XAPICPAGE, lvt_lint1, XAPIC_OFF_LVT_LINT1); +AssertCompileMemberOffset(XAPICPAGE, lvt_error, XAPIC_OFF_LVT_ERROR); +AssertCompileMemberOffset(XAPICPAGE, timer_icr, XAPIC_OFF_TIMER_ICR); +AssertCompileMemberOffset(XAPICPAGE, timer_ccr, XAPIC_OFF_TIMER_CCR); +AssertCompileMemberOffset(XAPICPAGE, timer_dcr, XAPIC_OFF_TIMER_DCR); + +/** + * The x2APIC memory layout as per Intel/AMD specs. + */ +typedef struct X2APICPAGE +{ + /* 0x00 - Reserved. */ + uint32_t uReserved0[8]; + /* 0x20 - APIC ID. */ + struct + { + uint32_t u32ApicId; + uint32_t u32Reserved0[3]; + } id; + /* 0x30 - APIC version register. */ + union + { + struct + { + uint8_t u8Version; + uint8_t u8Reserved0; + uint8_t u8MaxLvtEntry; + uint8_t fEoiBroadcastSupression : 1; + uint8_t u7Reserved1 : 7; + uint32_t u32Reserved0[3]; + } u; + struct + { + uint32_t u32Version; + uint32_t u32Reserved2[3]; + } all; + } version; + /* 0x40 - Reserved. */ + uint32_t uReserved1[16]; + /* 0x80 - Task Priority Register (TPR). */ + struct + { + uint8_t u8Tpr; + uint8_t u8Reserved0[3]; + uint32_t u32Reserved0[3]; + } tpr; + /* 0x90 - Reserved. */ + uint32_t uReserved2[4]; + /* 0xA0 - Processor Priority Register (PPR). */ + struct + { + uint8_t u8Ppr; + uint8_t u8Reserved0[3]; + uint32_t u32Reserved0[3]; + } ppr; + /* 0xB0 - End Of Interrupt Register (EOI). */ + struct + { + uint32_t u32Eoi; + uint32_t u32Reserved0[3]; + } eoi; + /* 0xC0 - Remote Read Register (RRD). */ + struct + { + uint32_t u32Rrd; + uint32_t u32Reserved0[3]; + } rrd; + /* 0xD0 - Logical Destination Register (LDR). */ + struct + { + uint32_t u32LogicalApicId; + uint32_t u32Reserved1[3]; + } ldr; + /* 0xE0 - Reserved. */ + uint32_t uReserved3[4]; + /* 0xF0 - Spurious-Interrupt Vector Register (SVR). */ + union + { + struct + { + uint32_t u8SpuriousVector : 8; + uint32_t fApicSoftwareEnable : 1; + uint32_t u3Reserved0 : 3; + uint32_t fSupressEoiBroadcast : 1; + uint32_t u19Reserved1 : 19; + uint32_t u32Reserved0[3]; + } u; + struct + { + uint32_t u32Svr; + uint32_t uReserved0[3]; + } all; + } svr; + /* 0x100 - In-service Register (ISR). */ + XAPIC256BITREG isr; + /* 0x180 - Trigger Mode Register (TMR). */ + XAPIC256BITREG tmr; + /* 0x200 - Interrupt Request Register (IRR). */ + XAPIC256BITREG irr; + /* 0x280 - Error Status Register (ESR). */ + union + { + struct + { +#if XAPIC_HARDWARE_VERSION == XAPIC_HARDWARE_VERSION_P4 + uint32_t u4Reserved0 : 4; +#else +# error "Implement Pentium and P6 family APIC architectures" +#endif + uint32_t fRedirectableIpi : 1; + uint32_t fSendIllegalVector : 1; + uint32_t fRcvdIllegalVector : 1; + uint32_t fIllegalRegAddr : 1; + uint32_t u24Reserved1 : 24; + uint32_t uReserved0[3]; + } u; + struct + { + uint32_t u32Errors; + uint32_t u32Reserved0[3]; + } all; + } esr; + /* 0x290 - Reserved. */ + uint32_t uReserved4[28]; + /* 0x300 - Interrupt Command Register (ICR) - Low. */ + union + { + struct + { + uint32_t u8Vector : 8; + uint32_t u3DeliveryMode : 3; + uint32_t u1DestMode : 1; + uint32_t u2Reserved0 : 2; + uint32_t u1Level : 1; + uint32_t u1TriggerMode : 1; + uint32_t u2Reserved1 : 2; + uint32_t u2DestShorthand : 2; + uint32_t u12Reserved2 : 12; + uint32_t u32Reserved0[3]; + } u; + struct + { + uint32_t u32IcrLo; + uint32_t u32Reserved3[3]; + } all; + } icr_lo; + /* 0x310 - Interrupt Comannd Register (ICR) - High. */ + struct + { + uint32_t u32IcrHi; + uint32_t uReserved1[3]; + } icr_hi; + /* 0x320 - Local Vector Table (LVT) Timer Register. */ + union + { + struct + { + uint32_t u8Vector : 8; + uint32_t u4Reserved0 : 4; + uint32_t u1DeliveryStatus : 1; + uint32_t u3Reserved1 : 3; + uint32_t u1Mask : 1; + uint32_t u2TimerMode : 2; + uint32_t u13Reserved2 : 13; + uint32_t u32Reserved0[3]; + } u; + struct + { + uint32_t u32LvtTimer; + uint32_t u32Reserved0[3]; + } all; + } lvt_timer; + /* 0x330 - Local Vector Table (LVT) Thermal Sensor Register. */ + union + { + struct + { + uint32_t u8Vector : 8; + uint32_t u3DeliveryMode : 3; + uint32_t u1Reserved0 : 1; + uint32_t u1DeliveryStatus : 1; + uint32_t u3Reserved1 : 3; + uint32_t u1Mask : 1; + uint32_t u15Reserved2 : 15; + uint32_t u32Reserved0[3]; + } u; + struct + { + uint32_t u32LvtThermal; + uint32_t uReserved0[3]; + } all; + } lvt_thermal; + /* 0x340 - Local Vector Table (LVT) Performance Monitor Counter (PMC) Register. */ + union + { + struct + { + uint32_t u8Vector : 8; + uint32_t u3DeliveryMode : 3; + uint32_t u1Reserved0 : 1; + uint32_t u1DeliveryStatus : 1; + uint32_t u3Reserved1 : 3; + uint32_t u1Mask : 1; + uint32_t u15Reserved2 : 15; + uint32_t u32Reserved0[3]; + } u; + struct + { + uint32_t u32LvtPerf; + uint32_t u32Reserved0[3]; + } all; + } lvt_perf; + /* 0x350 - Local Vector Table (LVT) LINT0 Register. */ + union + { + struct + { + uint32_t u8Vector : 8; + uint32_t u3DeliveryMode : 3; + uint32_t u1Reserved0 : 1; + uint32_t u1DeliveryStatus : 1; + uint32_t u1IntrPolarity : 1; + uint32_t u1RemoteIrr : 1; + uint32_t u1TriggerMode : 1; + uint32_t u1Mask : 1; + uint32_t u15Reserved2 : 15; + uint32_t u32Reserved0[3]; + } u; + struct + { + uint32_t u32LvtLint0; + uint32_t u32Reserved0[3]; + } all; + } lvt_lint0; + /* 0x360 - Local Vector Table (LVT) LINT1 Register. */ + union + { + struct + { + uint32_t u8Vector : 8; + uint32_t u3DeliveryMode : 3; + uint32_t u1Reserved0 : 1; + uint32_t u1DeliveryStatus : 1; + uint32_t u1IntrPolarity : 1; + uint32_t u1RemoteIrr : 1; + uint32_t u1TriggerMode : 1; + uint32_t u1Mask : 1; + uint32_t u15Reserved2 : 15; + uint32_t u32Reserved0[3]; + } u; + struct + { + uint32_t u32LvtLint1; + uint32_t u32Reserved0[3]; + } all; + } lvt_lint1; + /* 0x370 - Local Vector Table (LVT) Error Register. */ + union + { + struct + { + uint32_t u8Vector : 8; + uint32_t u4Reserved0 : 4; + uint32_t u1DeliveryStatus : 1; + uint32_t u3Reserved1 : 3; + uint32_t u1Mask : 1; + uint32_t u15Reserved2 : 15; + uint32_t u32Reserved0[3]; + } u; + struct + { + uint32_t u32LvtError; + uint32_t u32Reserved0[3]; + } all; + } lvt_error; + /* 0x380 - Timer Initial Counter Register. */ + struct + { + uint32_t u32InitialCount; + uint32_t u32Reserved0[3]; + } timer_icr; + /* 0x390 - Timer Current Counter Register. */ + struct + { + uint32_t u32CurrentCount; + uint32_t u32Reserved0[3]; + } timer_ccr; + /* 0x3A0 - Reserved. */ + uint32_t uReserved5[16]; + /* 0x3E0 - Timer Divide Configuration Register. */ + union + { + struct + { + uint32_t u2DivideValue0 : 2; + uint32_t u1Reserved0 : 1; + uint32_t u1DivideValue1 : 1; + uint32_t u28Reserved1 : 28; + uint32_t u32Reserved0[3]; + } u; + struct + { + uint32_t u32DivideValue; + uint32_t u32Reserved0[3]; + } all; + } timer_dcr; + /* 0x3F0 - Self IPI Register. */ + struct + { + uint32_t u8Vector : 8; + uint32_t u24Reserved0 : 24; + uint32_t u32Reserved0[3]; + } self_ipi; + /* 0x400 - Reserved. */ + uint8_t u8Reserved0[3072]; +} X2APICPAGE; +/** Pointer to a X2APICPAGE struct. */ +typedef X2APICPAGE *PX2APICPAGE; +/** Pointer to a const X2APICPAGE struct. */ +typedef const X2APICPAGE *PCX2APICPAGE; +AssertCompileSize(X2APICPAGE, 4096); +AssertCompileSize(X2APICPAGE, sizeof(XAPICPAGE)); +AssertCompileMemberOffset(X2APICPAGE, id, XAPIC_OFF_ID); +AssertCompileMemberOffset(X2APICPAGE, version, XAPIC_OFF_VERSION); +AssertCompileMemberOffset(X2APICPAGE, tpr, XAPIC_OFF_TPR); +AssertCompileMemberOffset(X2APICPAGE, ppr, XAPIC_OFF_PPR); +AssertCompileMemberOffset(X2APICPAGE, eoi, XAPIC_OFF_EOI); +AssertCompileMemberOffset(X2APICPAGE, rrd, XAPIC_OFF_RRD); +AssertCompileMemberOffset(X2APICPAGE, ldr, XAPIC_OFF_LDR); +AssertCompileMemberOffset(X2APICPAGE, svr, XAPIC_OFF_SVR); +AssertCompileMemberOffset(X2APICPAGE, isr, XAPIC_OFF_ISR0); +AssertCompileMemberOffset(X2APICPAGE, tmr, XAPIC_OFF_TMR0); +AssertCompileMemberOffset(X2APICPAGE, irr, XAPIC_OFF_IRR0); +AssertCompileMemberOffset(X2APICPAGE, esr, XAPIC_OFF_ESR); +AssertCompileMemberOffset(X2APICPAGE, icr_lo, XAPIC_OFF_ICR_LO); +AssertCompileMemberOffset(X2APICPAGE, icr_hi, XAPIC_OFF_ICR_HI); +AssertCompileMemberOffset(X2APICPAGE, lvt_timer, XAPIC_OFF_LVT_TIMER); +AssertCompileMemberOffset(X2APICPAGE, lvt_thermal, XAPIC_OFF_LVT_THERMAL); +AssertCompileMemberOffset(X2APICPAGE, lvt_perf, XAPIC_OFF_LVT_PERF); +AssertCompileMemberOffset(X2APICPAGE, lvt_lint0, XAPIC_OFF_LVT_LINT0); +AssertCompileMemberOffset(X2APICPAGE, lvt_lint1, XAPIC_OFF_LVT_LINT1); +AssertCompileMemberOffset(X2APICPAGE, lvt_error, XAPIC_OFF_LVT_ERROR); +AssertCompileMemberOffset(X2APICPAGE, timer_icr, XAPIC_OFF_TIMER_ICR); +AssertCompileMemberOffset(X2APICPAGE, timer_ccr, XAPIC_OFF_TIMER_CCR); +AssertCompileMemberOffset(X2APICPAGE, timer_dcr, XAPIC_OFF_TIMER_DCR); +AssertCompileMemberOffset(X2APICPAGE, self_ipi, X2APIC_OFF_SELF_IPI); + +/** + * APIC MSR access error. + * @note The values must match the array indices in apicMsrAccessError(). + */ +typedef enum APICMSRACCESS +{ + /** MSR read while not in x2APIC. */ + APICMSRACCESS_INVALID_READ_MODE = 0, + /** MSR write while not in x2APIC. */ + APICMSRACCESS_INVALID_WRITE_MODE, + /** MSR read for a reserved/unknown/invalid MSR. */ + APICMSRACCESS_READ_RSVD_OR_UNKNOWN, + /** MSR write for a reserved/unknown/invalid MSR. */ + APICMSRACCESS_WRITE_RSVD_OR_UNKNOWN, + /** MSR read for a write-only MSR. */ + APICMSRACCESS_READ_WRITE_ONLY, + /** MSR write for a read-only MSR. */ + APICMSRACCESS_WRITE_READ_ONLY, + /** MSR read to reserved bits. */ + APICMSRACCESS_READ_RSVD_BITS, + /** MSR write to reserved bits. */ + APICMSRACCESS_WRITE_RSVD_BITS, + /** MSR write with invalid value. */ + APICMSRACCESS_WRITE_INVALID, + /** MSR write disallowed due to incompatible config. */ + APICMSRACCESS_WRITE_DISALLOWED_CONFIG, + /** MSR read disallowed due to incompatible config. */ + APICMSRACCESS_READ_DISALLOWED_CONFIG, + /** Count of enum members (don't use). */ + APICMSRACCESS_COUNT +} APICMSRACCESS; + +/** @name xAPIC Destination Format Register bits. + * See Intel spec. 10.6.2.2 "Logical Destination Mode". + * @{ */ +typedef enum XAPICDESTFORMAT +{ + XAPICDESTFORMAT_FLAT = 0xf, + XAPICDESTFORMAT_CLUSTER = 0 +} XAPICDESTFORMAT; +/** @} */ + +/** @name xAPIC Timer Mode bits. + * See Intel spec. 10.5.1 "Local Vector Table". + * @{ */ +typedef enum XAPICTIMERMODE +{ + XAPICTIMERMODE_ONESHOT = XAPIC_TIMER_MODE_ONESHOT, + XAPICTIMERMODE_PERIODIC = XAPIC_TIMER_MODE_PERIODIC, + XAPICTIMERMODE_TSC_DEADLINE = XAPIC_TIMER_MODE_TSC_DEADLINE +} XAPICTIMERMODE; +/** @} */ + +/** @name xAPIC Interrupt Command Register bits. + * See Intel spec. 10.6.1 "Interrupt Command Register (ICR)". + * See Intel spec. 10.5.1 "Local Vector Table". + * @{ */ +/** + * xAPIC destination shorthand. + */ +typedef enum XAPICDESTSHORTHAND +{ + XAPICDESTSHORTHAND_NONE = 0, + XAPICDESTSHORTHAND_SELF, + XAPIDDESTSHORTHAND_ALL_INCL_SELF, + XAPICDESTSHORTHAND_ALL_EXCL_SELF +} XAPICDESTSHORTHAND; + +/** + * xAPIC INIT level de-assert delivery mode. + */ +typedef enum XAPICINITLEVEL +{ + XAPICINITLEVEL_DEASSERT = 0, + XAPICINITLEVEL_ASSERT +} XAPICLEVEL; + +/** + * xAPIC destination mode. + */ +typedef enum XAPICDESTMODE +{ + XAPICDESTMODE_PHYSICAL = 0, + XAPICDESTMODE_LOGICAL +} XAPICDESTMODE; + +/** + * xAPIC delivery mode type. + */ +typedef enum XAPICDELIVERYMODE +{ + XAPICDELIVERYMODE_FIXED = 0, + XAPICDELIVERYMODE_LOWEST_PRIO = 1, + XAPICDELIVERYMODE_SMI = 2, + XAPICDELIVERYMODE_NMI = 4, + XAPICDELIVERYMODE_INIT = 5, + XAPICDELIVERYMODE_STARTUP = 6, + XAPICDELIVERYMODE_EXTINT = 7 +} XAPICDELIVERYMODE; +/** @} */ + +/** @def APIC_CACHE_LINE_SIZE + * Padding (in bytes) for aligning data in different cache lines. Present + * generation x86 CPUs use 64-byte cache lines[1]. However, Intel NetBurst + * architecture supposedly uses 128-byte cache lines[2]. Since 128 is a + * multiple of 64, we use the larger one here. + * + * [1] - Intel spec "Table 11-1. Characteristics of the Caches, TLBs, Store + * Buffer, and Write Combining Buffer in Intel 64 and IA-32 Processors" + * [2] - Intel spec. 8.10.6.7 "Place Locks and Semaphores in Aligned, 128-Byte + * Blocks of Memory". + */ +#define APIC_CACHE_LINE_SIZE 128 + +/** + * APIC Pending-Interrupt Bitmap (PIB). + */ +typedef struct APICPIB +{ + uint64_t volatile au64VectorBitmap[4]; + uint32_t volatile fOutstandingNotification; + uint8_t au8Reserved[APIC_CACHE_LINE_SIZE - sizeof(uint32_t) - (sizeof(uint64_t) * 4)]; +} APICPIB; +AssertCompileMemberOffset(APICPIB, fOutstandingNotification, 256 / 8); +AssertCompileSize(APICPIB, APIC_CACHE_LINE_SIZE); +/** Pointer to a pending-interrupt bitmap. */ +typedef APICPIB *PAPICPIB; +/** Pointer to a const pending-interrupt bitmap. */ +typedef const APICPIB *PCAPICPIB; + +/** + * APIC PDM instance data (per-VM). + */ +typedef struct APICDEV +{ + /** The MMIO handle. */ + IOMMMIOHANDLE hMmio; +} APICDEV; +/** Pointer to an APIC device. */ +typedef APICDEV *PAPICDEV; +/** Pointer to a const APIC device. */ +typedef APICDEV const *PCAPICDEV; + + +/** + * The APIC GVM instance data. + */ +typedef struct APICR0PERVM +{ + /** The ring-0 device instance. */ + PPDMDEVINSR0 pDevInsR0; +} APICR0PERVM; + + +/** + * APIC VM Instance data. + */ +typedef struct APIC +{ + /** The ring-3 device instance. */ + PPDMDEVINSR3 pDevInsR3; + + /** @name The APIC pending-interrupt bitmap (PIB). + * @{ */ + /** The host-context physical address of the PIB. */ + RTHCPHYS HCPhysApicPib; + /** The ring-0 memory object of the PIB. */ + RTR0MEMOBJ hMemObjApicPibR0; + /** The ring-3 mapping of the memory object of the PIB. */ + RTR0MEMOBJ hMapObjApicPibR0; + /** The APIC PIB virtual address - R0 ptr. */ + R0PTRTYPE(void *) pvApicPibR0; + /** The APIC PIB virtual address - R3 ptr. */ + R3PTRTYPE(void *) pvApicPibR3; + /** The size of the page in bytes. */ + uint32_t cbApicPib; + /** @} */ + + /** @name Other miscellaneous data. + * @{ */ + /** Whether full APIC register virtualization is enabled. */ + bool fVirtApicRegsEnabled; + /** Whether posted-interrupt processing is enabled. */ + bool fPostedIntrsEnabled; + /** Whether TSC-deadline timer mode is supported for the guest. */ + bool fSupportsTscDeadline; + /** Whether this VM has an IO-APIC. */ + bool fIoApicPresent; + /** Whether R0 is enabled or not (applies to MSR handling as well). */ + bool fR0Enabled; + /** Whether RC is enabled or not (applies to MSR handling as well). */ + bool fRCEnabled; + /** Whether Hyper-V x2APIC compatibility mode is enabled. */ + bool fHyperVCompatMode; + /** Alignment padding. */ + bool afAlignment[1]; + /** The max supported APIC mode from CFGM. */ + PDMAPICMODE enmMaxMode; + /** @} */ +} APIC; +/** Pointer to APIC VM instance data. */ +typedef APIC *PAPIC; +/** Pointer to const APIC VM instance data. */ +typedef APIC const *PCAPIC; +AssertCompileMemberAlignment(APIC, cbApicPib, 8); +AssertCompileSizeAlignment(APIC, 8); + +/** + * APIC VMCPU Instance data. + */ +typedef struct APICCPU +{ + /** @name The APIC page. + * @{ */ + /** The host-context physical address of the page. */ + RTHCPHYS HCPhysApicPage; + /** The ring-0 memory object of the page. */ + RTR0MEMOBJ hMemObjApicPageR0; + /** The ring-3 mapping of the memory object of the page. */ + RTR0MEMOBJ hMapObjApicPageR0; + /** The APIC page virtual address - R0 ptr. */ + R0PTRTYPE(void *) pvApicPageR0; + /** The APIC page virtual address - R3 ptr. */ + R3PTRTYPE(void *) pvApicPageR3; + /** The size of the page in bytes. */ + uint32_t cbApicPage; + /** @} */ + + /** @name Auxiliary state. + * @{ */ + /** The error status register's internal state. */ + uint32_t uEsrInternal; + /** The APIC base MSR.*/ + uint64_t volatile uApicBaseMsr; + /** @} */ + + /** @name The pending-interrupt bitmaps (PIB). + * @{ */ + /** The host-context physical address of the page. */ + RTHCPHYS HCPhysApicPib; + /** The APIC PIB virtual address - R0 ptr. */ + R0PTRTYPE(void *) pvApicPibR0; + /** The APIC PIB virtual address - R3 ptr. */ + R3PTRTYPE(void *) pvApicPibR3; + /** The APIC PIB for level-sensitive interrupts. */ + APICPIB ApicPibLevel; + /** @} */ + + /** @name Other miscellaneous data. + * @{ */ + /** Whether the LINT0 interrupt line is active. */ + bool volatile fActiveLint0; + /** Whether the LINT1 interrupt line is active. */ + bool volatile fActiveLint1; + /** Alignment padding. */ + uint8_t auAlignment2[6]; + /** The source tags corresponding to each interrupt vector (debugging). */ + uint32_t auSrcTags[256]; + /** @} */ + + /** @name The APIC timer. + * @{ */ + /** The timer. */ + TMTIMERHANDLE hTimer; + /** The time stamp when the timer was initialized. + * @note Access protected by the timer critsect. */ + uint64_t u64TimerInitial; + /** Cache of timer initial count of the frequency hint to TM. */ + uint32_t uHintedTimerInitialCount; + /** Cache of timer shift of the frequency hint to TM. */ + uint32_t uHintedTimerShift; + /** The timer description. */ + char szTimerDesc[16]; + /** @} */ + + /** @name Log Max counters + * @{ */ + uint32_t cLogMaxAccessError; + uint32_t cLogMaxSetApicBaseAddr; + uint32_t cLogMaxGetApicBaseAddr; + uint32_t uAlignment4; + /** @} */ + +#ifdef VBOX_WITH_STATISTICS + /** @name APIC statistics. + * @{ */ + /** Number of MMIO reads in RZ. */ + STAMCOUNTER StatMmioReadRZ; + /** Number of MMIO reads in R3. */ + STAMCOUNTER StatMmioReadR3; + + /** Number of MMIO writes in RZ. */ + STAMCOUNTER StatMmioWriteRZ; + /** Number of MMIO writes in R3. */ + STAMCOUNTER StatMmioWriteR3; + + /** Number of MSR reads in RZ. */ + STAMCOUNTER StatMsrReadRZ; + /** Number of MSR reads in R3. */ + STAMCOUNTER StatMsrReadR3; + + /** Number of MSR writes in RZ. */ + STAMCOUNTER StatMsrWriteRZ; + /** Number of MSR writes in R3. */ + STAMCOUNTER StatMsrWriteR3; + + /** Profiling of APICUpdatePendingInterrupts(). */ + STAMPROFILE StatUpdatePendingIntrs; + /** Profiling of apicPostInterrupt(). */ + STAMPROFILE StatPostIntr; + /** Number of times an interrupt is already pending in + * apicPostInterrupts().*/ + STAMCOUNTER StatPostIntrAlreadyPending; + /** Number of times the timer callback is invoked. */ + STAMCOUNTER StatTimerCallback; + /** Number of times the TPR is written. */ + STAMCOUNTER StatTprWrite; + /** Number of times the TPR is read. */ + STAMCOUNTER StatTprRead; + /** Number of times the EOI is written. */ + STAMCOUNTER StatEoiWrite; + /** Number of times TPR masks an interrupt in apicGetInterrupt(). */ + STAMCOUNTER StatMaskedByTpr; + /** Number of times PPR masks an interrupt in apicGetInterrupt(). */ + STAMCOUNTER StatMaskedByPpr; + /** Number of times the timer ICR is written. */ + STAMCOUNTER StatTimerIcrWrite; + /** Number of times the ICR Lo (send IPI) is written. */ + STAMCOUNTER StatIcrLoWrite; + /** Number of times the ICR Hi is written. */ + STAMCOUNTER StatIcrHiWrite; + /** Number of times the full ICR (x2APIC send IPI) is written. */ + STAMCOUNTER StatIcrFullWrite; + /** Number of times the APIC-ID MSR is read. */ + STAMCOUNTER StatIdMsrRead; + /** @} */ +#endif +} APICCPU; +/** Pointer to APIC VMCPU instance data. */ +typedef APICCPU *PAPICCPU; +/** Pointer to a const APIC VMCPU instance data. */ +typedef APICCPU const *PCAPICCPU; +AssertCompileMemberAlignment(APICCPU, uApicBaseMsr, 8); + +/** + * APIC operating modes as returned by apicGetMode(). + * + * The values match hardware states. + * See Intel spec. 10.12.1 "Detecting and Enabling x2APIC Mode". + */ +typedef enum APICMODE +{ + APICMODE_DISABLED = 0, + APICMODE_INVALID, + APICMODE_XAPIC, + APICMODE_X2APIC +} APICMODE; + +/** + * Gets the timer shift value. + * + * @returns The timer shift value. + * @param pXApicPage The xAPIC page. + */ +DECLINLINE(uint8_t) apicGetTimerShift(PCXAPICPAGE pXApicPage) +{ + /* See Intel spec. 10.5.4 "APIC Timer". */ + uint32_t uShift = pXApicPage->timer_dcr.u.u2DivideValue0 | (pXApicPage->timer_dcr.u.u1DivideValue1 << 2); + return (uShift + 1) & 7; +} + + +const char *apicGetModeName(APICMODE enmMode); +const char *apicGetDestFormatName(XAPICDESTFORMAT enmDestFormat); +const char *apicGetDeliveryModeName(XAPICDELIVERYMODE enmDeliveryMode); +const char *apicGetDestModeName(XAPICDESTMODE enmDestMode); +const char *apicGetTriggerModeName(XAPICTRIGGERMODE enmTriggerMode); +const char *apicGetDestShorthandName(XAPICDESTSHORTHAND enmDestShorthand); +const char *apicGetTimerModeName(XAPICTIMERMODE enmTimerMode); +void apicHintTimerFreq(PPDMDEVINS pDevIns, PAPICCPU pApicCpu, uint32_t uInitialCount, uint8_t uTimerShift); +APICMODE apicGetMode(uint64_t uApicBaseMsr); + +DECLCALLBACK(VBOXSTRICTRC) apicReadMmio(PPDMDEVINS pDevIns, void *pvUser, RTGCPHYS off, void *pv, unsigned cb); +DECLCALLBACK(VBOXSTRICTRC) apicWriteMmio(PPDMDEVINS pDevIns, void *pvUser, RTGCPHYS off, void const *pv, unsigned cb); + +bool apicPostInterrupt(PVMCPUCC pVCpu, uint8_t uVector, XAPICTRIGGERMODE enmTriggerMode, uint32_t uSrcTag); +void apicStartTimer(PVMCPUCC pVCpu, uint32_t uInitialCount); +void apicClearInterruptFF(PVMCPUCC pVCpu, PDMAPICIRQ enmType); +void apicInitIpi(PVMCPUCC pVCpu); +void apicResetCpu(PVMCPUCC pVCpu, bool fResetApicBaseMsr); + +DECLCALLBACK(int) apicR3Construct(PPDMDEVINS pDevIns, int iInstance, PCFGMNODE pCfg); +DECLCALLBACK(int) apicR3Destruct(PPDMDEVINS pDevIns); +DECLCALLBACK(void) apicR3Relocate(PPDMDEVINS pDevIns, RTGCINTPTR offDelta); +DECLCALLBACK(void) apicR3Reset(PPDMDEVINS pDevIns); +DECLCALLBACK(int) apicR3InitComplete(PPDMDEVINS pDevIns); + +/** @} */ + +#endif /* !VMM_INCLUDED_SRC_include_APICInternal_h */ + diff --git a/src/VBox/VMM/include/CFGMInternal.h b/src/VBox/VMM/include/CFGMInternal.h new file mode 100644 index 00000000..054082e9 --- /dev/null +++ b/src/VBox/VMM/include/CFGMInternal.h @@ -0,0 +1,134 @@ +/* $Id: CFGMInternal.h $ */ +/** @file + * CFGM - Internal header file. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_CFGMInternal_h +#define VMM_INCLUDED_SRC_include_CFGMInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <VBox/cdefs.h> +#include <VBox/types.h> + + +/** @defgroup grp_cfgm_int Internals. + * @ingroup grp_cfgm + * @{ + */ + + +/** + * Configuration manager propertype value. + */ +typedef union CFGMVALUE +{ + /** Integer value. */ + struct CFGMVALUE_INTEGER + { + /** The integer represented as 64-bit unsigned. */ + uint64_t u64; + } Integer; + + /** String value. (UTF-8 of course) */ + struct CFGMVALUE_STRING + { + /** Length of string. (In bytes, including the terminator.) */ + size_t cb; + /** Pointer to the string. */ + char *psz; + } String; + + /** Byte string value. */ + struct CFGMVALUE_BYTES + { + /** Length of byte string. (in bytes) */ + size_t cb; + /** Pointer to the byte string. */ + uint8_t *pau8; + } Bytes; +} CFGMVALUE; +/** Pointer to configuration manager property value. */ +typedef CFGMVALUE *PCFGMVALUE; + + +/** + * Configuration manager tree node. + */ +typedef struct CFGMLEAF +{ + /** Pointer to the next leaf. */ + PCFGMLEAF pNext; + /** Pointer to the previous leaf. */ + PCFGMLEAF pPrev; + + /** Property type. */ + CFGMVALUETYPE enmType; + /** Property value. */ + CFGMVALUE Value; + + /** Name length. (exclusive) */ + size_t cchName; + /** Name. */ + char szName[1]; +} CFGMLEAF; + + +/** + * Configuration manager tree node. + */ +typedef struct CFGMNODE +{ + /** Pointer to the next node (on this level). */ + PCFGMNODE pNext; + /** Pointer to the previous node (on this level). */ + PCFGMNODE pPrev; + /** Pointer Parent node. */ + PCFGMNODE pParent; + /** Pointer to first child node. */ + PCFGMNODE pFirstChild; + /** Pointer to first property leaf. */ + PCFGMLEAF pFirstLeaf; + + /** Pointer to the VM owning this node. */ + PVM pVM; + + /** The root of a 'restricted' subtree, i.e. the parent is + * invisible to non-trusted users. + */ + bool fRestrictedRoot; + + /** Name length. (exclusive) */ + size_t cchName; + /** Name. */ + char szName[1]; +} CFGMNODE; + + + +/** + * CFGM VM Instance data. + * Changes to this must checked against the padding of the cfgm union in VM! + */ +typedef struct CFGM +{ + /** Pointer to root node. */ + R3PTRTYPE(PCFGMNODE) pRoot; +} CFGM; + +/** @} */ + +#endif /* !VMM_INCLUDED_SRC_include_CFGMInternal_h */ diff --git a/src/VBox/VMM/include/CPUMInternal.h b/src/VBox/VMM/include/CPUMInternal.h new file mode 100644 index 00000000..2ed78863 --- /dev/null +++ b/src/VBox/VMM/include/CPUMInternal.h @@ -0,0 +1,534 @@ +/* $Id: CPUMInternal.h $ */ +/** @file + * CPUM - Internal header file. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_CPUMInternal_h +#define VMM_INCLUDED_SRC_include_CPUMInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#ifndef VBOX_FOR_DTRACE_LIB +# include <VBox/cdefs.h> +# include <VBox/types.h> +# include <VBox/vmm/stam.h> +# include <iprt/x86.h> +# include <VBox/vmm/pgm.h> +#else +# pragma D depends_on library x86.d +# pragma D depends_on library cpumctx.d +# pragma D depends_on library cpum.d + +/* Some fudging. */ +typedef uint64_t STAMCOUNTER; +#endif + + + + +/** @defgroup grp_cpum_int Internals + * @ingroup grp_cpum + * @internal + * @{ + */ + +/** Flags and types for CPUM fault handlers + * @{ */ +/** Type: Load DS */ +#define CPUM_HANDLER_DS 1 +/** Type: Load ES */ +#define CPUM_HANDLER_ES 2 +/** Type: Load FS */ +#define CPUM_HANDLER_FS 3 +/** Type: Load GS */ +#define CPUM_HANDLER_GS 4 +/** Type: IRET */ +#define CPUM_HANDLER_IRET 5 +/** Type mask. */ +#define CPUM_HANDLER_TYPEMASK 0xff +/** If set EBP points to the CPUMCTXCORE that's being used. */ +#define CPUM_HANDLER_CTXCORE_IN_EBP RT_BIT(31) +/** @} */ + + +/** Use flags (CPUM::fUseFlags). + * (Don't forget to sync this with CPUMInternal.mac !) + * @note Part of saved state. + * @{ */ +/** Indicates that we've saved the host FPU, SSE, whatever state and that it + * needs to be restored. */ +#define CPUM_USED_FPU_HOST RT_BIT(0) +/** Indicates that we've loaded the guest FPU, SSE, whatever state and that it + * needs to be saved. */ +#define CPUM_USED_FPU_GUEST RT_BIT(10) +/** Used the guest FPU, SSE or such stuff since last we were in REM. + * REM syncing is clearing this, lazy FPU is setting it. */ +#define CPUM_USED_FPU_SINCE_REM RT_BIT(1) +/** The XMM state was manually restored. (AMD only) */ +#define CPUM_USED_MANUAL_XMM_RESTORE RT_BIT(2) + +/** Host OS is using SYSENTER and we must NULL the CS. */ +#define CPUM_USE_SYSENTER RT_BIT(3) +/** Host OS is using SYSENTER and we must NULL the CS. */ +#define CPUM_USE_SYSCALL RT_BIT(4) + +/** Debug registers are used by host and that DR7 and DR6 must be saved and + * disabled when switching to raw-mode. */ +#define CPUM_USE_DEBUG_REGS_HOST RT_BIT(5) +/** Records that we've saved the host DRx registers. + * In ring-0 this means all (DR0-7), while in raw-mode context this means DR0-3 + * since DR6 and DR7 are covered by CPUM_USE_DEBUG_REGS_HOST. */ +#define CPUM_USED_DEBUG_REGS_HOST RT_BIT(6) +/** Set to indicate that we should save host DR0-7 and load the hypervisor debug + * registers in the raw-mode world switchers. (See CPUMRecalcHyperDRx.) */ +#define CPUM_USE_DEBUG_REGS_HYPER RT_BIT(7) +/** Used in ring-0 to indicate that we have loaded the hypervisor debug + * registers. */ +#define CPUM_USED_DEBUG_REGS_HYPER RT_BIT(8) +/** Used in ring-0 to indicate that we have loaded the guest debug + * registers (DR0-3 and maybe DR6) for direct use by the guest. + * DR7 (and AMD-V DR6) are handled via the VMCB. */ +#define CPUM_USED_DEBUG_REGS_GUEST RT_BIT(9) + +/** Sync the FPU state on next entry (32->64 switcher only). */ +#define CPUM_SYNC_FPU_STATE RT_BIT(16) +/** Sync the debug state on next entry (32->64 switcher only). */ +#define CPUM_SYNC_DEBUG_REGS_GUEST RT_BIT(17) +/** Sync the debug state on next entry (32->64 switcher only). + * Almost the same as CPUM_USE_DEBUG_REGS_HYPER in the raw-mode switchers. */ +#define CPUM_SYNC_DEBUG_REGS_HYPER RT_BIT(18) +/** Host CPU requires fxsave/fxrstor leaky bit handling. */ +#define CPUM_USE_FFXSR_LEAKY RT_BIT(19) +/** Set if the VM supports long-mode. */ +#define CPUM_USE_SUPPORTS_LONGMODE RT_BIT(20) +/** @} */ + + +/** @name CPUM Saved State Version. + * @{ */ +/** The current saved state version. */ +#define CPUM_SAVED_STATE_VERSION CPUM_SAVED_STATE_VERSION_HWVIRT_VMX_IEM +/** The saved state version including VMX hardware virtualization state (IEM only + * execution). */ +#define CPUM_SAVED_STATE_VERSION_HWVIRT_VMX_IEM 19 +/** The saved state version including SVM hardware virtualization state. */ +#define CPUM_SAVED_STATE_VERSION_HWVIRT_SVM 18 +/** The saved state version including XSAVE state. */ +#define CPUM_SAVED_STATE_VERSION_XSAVE 17 +/** The saved state version with good CPUID leaf count. */ +#define CPUM_SAVED_STATE_VERSION_GOOD_CPUID_COUNT 16 +/** CPUID changes with explode forgetting to update the leaf count on + * restore, resulting in garbage being saved restoring+saving old states). */ +#define CPUM_SAVED_STATE_VERSION_BAD_CPUID_COUNT 15 +/** The saved state version before the CPUIDs changes. */ +#define CPUM_SAVED_STATE_VERSION_PUT_STRUCT 14 +/** The saved state version before using SSMR3PutStruct. */ +#define CPUM_SAVED_STATE_VERSION_MEM 13 +/** The saved state version before introducing the MSR size field. */ +#define CPUM_SAVED_STATE_VERSION_NO_MSR_SIZE 12 +/** The saved state version of 3.2, 3.1 and 3.3 trunk before the hidden + * selector register change (CPUM_CHANGED_HIDDEN_SEL_REGS_INVALID). */ +#define CPUM_SAVED_STATE_VERSION_VER3_2 11 +/** The saved state version of 3.0 and 3.1 trunk before the teleportation + * changes. */ +#define CPUM_SAVED_STATE_VERSION_VER3_0 10 +/** The saved state version for the 2.1 trunk before the MSR changes. */ +#define CPUM_SAVED_STATE_VERSION_VER2_1_NOMSR 9 +/** The saved state version of 2.0, used for backwards compatibility. */ +#define CPUM_SAVED_STATE_VERSION_VER2_0 8 +/** The saved state version of 1.6, used for backwards compatibility. */ +#define CPUM_SAVED_STATE_VERSION_VER1_6 6 +/** @} */ + + +/** + * CPU info + */ +typedef struct CPUMINFO +{ + /** The number of MSR ranges (CPUMMSRRANGE) in the array pointed to below. */ + uint32_t cMsrRanges; + /** Mask applied to ECX before looking up the MSR for a RDMSR/WRMSR + * instruction. Older hardware has been observed to ignore higher bits. */ + uint32_t fMsrMask; + + /** MXCSR mask. */ + uint32_t fMxCsrMask; + + /** The number of CPUID leaves (CPUMCPUIDLEAF) in the array pointed to below. */ + uint32_t cCpuIdLeaves; + /** The index of the first extended CPUID leaf in the array. + * Set to cCpuIdLeaves if none present. */ + uint32_t iFirstExtCpuIdLeaf; + /** How to handle unknown CPUID leaves. */ + CPUMUNKNOWNCPUID enmUnknownCpuIdMethod; + /** For use with CPUMUNKNOWNCPUID_DEFAULTS (DB & VM), + * CPUMUNKNOWNCPUID_LAST_STD_LEAF (VM) and CPUMUNKNOWNCPUID_LAST_STD_LEAF_WITH_ECX (VM). */ + CPUMCPUID DefCpuId; + + /** Scalable bus frequency used for reporting other frequencies. */ + uint64_t uScalableBusFreq; + + /** Pointer to the MSR ranges (ring-0 pointer). */ + R0PTRTYPE(PCPUMMSRRANGE) paMsrRangesR0; + /** Pointer to the CPUID leaves (ring-0 pointer). */ + R0PTRTYPE(PCPUMCPUIDLEAF) paCpuIdLeavesR0; + + /** Pointer to the MSR ranges (ring-3 pointer). */ + R3PTRTYPE(PCPUMMSRRANGE) paMsrRangesR3; + /** Pointer to the CPUID leaves (ring-3 pointer). */ + R3PTRTYPE(PCPUMCPUIDLEAF) paCpuIdLeavesR3; +} CPUMINFO; +/** Pointer to a CPU info structure. */ +typedef CPUMINFO *PCPUMINFO; +/** Pointer to a const CPU info structure. */ +typedef CPUMINFO const *CPCPUMINFO; + + +/** + * The saved host CPU state. + */ +typedef struct CPUMHOSTCTX +{ + /** General purpose register, selectors, flags and more + * @{ */ + /** General purpose register ++ + * { */ + /*uint64_t rax; - scratch*/ + uint64_t rbx; + /*uint64_t rcx; - scratch*/ + /*uint64_t rdx; - scratch*/ + uint64_t rdi; + uint64_t rsi; + uint64_t rbp; + uint64_t rsp; + /*uint64_t r8; - scratch*/ + /*uint64_t r9; - scratch*/ + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + /*uint64_t rip; - scratch*/ + uint64_t rflags; + /** @} */ + + /** Selector registers + * @{ */ + RTSEL ss; + RTSEL ssPadding; + RTSEL gs; + RTSEL gsPadding; + RTSEL fs; + RTSEL fsPadding; + RTSEL es; + RTSEL esPadding; + RTSEL ds; + RTSEL dsPadding; + RTSEL cs; + RTSEL csPadding; + /** @} */ + + /** Control registers. + * @{ */ + /** The CR0 FPU state in HM mode. */ + uint64_t cr0; + /*uint64_t cr2; - scratch*/ + uint64_t cr3; + uint64_t cr4; + uint64_t cr8; + /** @} */ + + /** Debug registers. + * @{ */ + uint64_t dr0; + uint64_t dr1; + uint64_t dr2; + uint64_t dr3; + uint64_t dr6; + uint64_t dr7; + /** @} */ + + /** Global Descriptor Table register. */ + X86XDTR64 gdtr; + uint16_t gdtrPadding; + /** Interrupt Descriptor Table register. */ + X86XDTR64 idtr; + uint16_t idtrPadding; + /** The task register. */ + RTSEL ldtr; + RTSEL ldtrPadding; + /** The task register. */ + RTSEL tr; + RTSEL trPadding; + + /** MSRs + * @{ */ + CPUMSYSENTER SysEnter; + uint64_t FSbase; + uint64_t GSbase; + uint64_t efer; + /** @} */ + + /* padding to get 64byte aligned size */ + uint8_t auPadding[8]; + +#if HC_ARCH_BITS != 64 +# error HC_ARCH_BITS not defined or unsupported +#endif + + /** Pointer to the FPU/SSE/AVX/XXXX state ring-0 mapping. */ + R0PTRTYPE(PX86XSAVEAREA) pXStateR0; + /** Pointer to the FPU/SSE/AVX/XXXX state ring-3 mapping. */ + R3PTRTYPE(PX86XSAVEAREA) pXStateR3; + /** The XCR0 register. */ + uint64_t xcr0; + /** The mask to pass to XSAVE/XRSTOR in EDX:EAX. If zero we use + * FXSAVE/FXRSTOR (since bit 0 will always be set, we only need to test it). */ + uint64_t fXStateMask; +} CPUMHOSTCTX; +#ifndef VBOX_FOR_DTRACE_LIB +AssertCompileSizeAlignment(CPUMHOSTCTX, 64); +#endif +/** Pointer to the saved host CPU state. */ +typedef CPUMHOSTCTX *PCPUMHOSTCTX; + + +/** + * The hypervisor context CPU state (just DRx left now). + */ +typedef struct CPUMHYPERCTX +{ + /** Debug registers. + * @remarks DR4 and DR5 should not be used since they are aliases for + * DR6 and DR7 respectively on both AMD and Intel CPUs. + * @remarks DR8-15 are currently not supported by AMD or Intel, so + * neither do we. + */ + uint64_t dr[8]; + /** @todo eliminiate the rest. */ + uint64_t cr3; + uint64_t au64Padding[7]; +} CPUMHYPERCTX; +#ifndef VBOX_FOR_DTRACE_LIB +AssertCompileSizeAlignment(CPUMHYPERCTX, 64); +#endif +/** Pointer to the hypervisor context CPU state. */ +typedef CPUMHYPERCTX *PCPUMHYPERCTX; + + +/** + * CPUM Data (part of VM) + */ +typedef struct CPUM +{ + /** Use flags. + * These flags indicates which CPU features the host uses. + */ + uint32_t fHostUseFlags; + + /** CR4 mask */ + struct + { + uint32_t AndMask; /**< @todo Move these to the per-CPU structure and fix the switchers. Saves a register! */ + uint32_t OrMask; + } CR4; + + /** The (more) portable CPUID level. */ + uint8_t u8PortableCpuIdLevel; + /** Indicates that a state restore is pending. + * This is used to verify load order dependencies (PGM). */ + bool fPendingRestore; + uint8_t abPadding0[2]; + + /** XSAVE/XRTOR components we can expose to the guest mask. */ + uint64_t fXStateGuestMask; + /** XSAVE/XRSTOR host mask. Only state components in this mask can be exposed + * to the guest. This is 0 if no XSAVE/XRSTOR bits can be exposed. */ + uint64_t fXStateHostMask; + + /** The host MXCSR mask (determined at init). */ + uint32_t fHostMxCsrMask; + /** Nested VMX: Whether to expose VMX-preemption timer to the guest. */ + bool fNestedVmxPreemptTimer; + uint8_t abPadding1[3]; + + /** Align to 64-byte boundary. */ + uint8_t abPadding2[20+4]; + + /** Host CPU feature information. + * Externaly visible via the VM structure, aligned on 64-byte boundrary. */ + CPUMFEATURES HostFeatures; + /** Guest CPU feature information. + * Externaly visible via that VM structure, aligned with HostFeatures. */ + CPUMFEATURES GuestFeatures; + /** Guest CPU info. */ + CPUMINFO GuestInfo; + + /** The standard set of CpuId leaves. */ + CPUMCPUID aGuestCpuIdPatmStd[6]; + /** The extended set of CpuId leaves. */ + CPUMCPUID aGuestCpuIdPatmExt[10]; + /** The centaur set of CpuId leaves. */ + CPUMCPUID aGuestCpuIdPatmCentaur[4]; + + /** @name MSR statistics. + * @{ */ + STAMCOUNTER cMsrWrites; + STAMCOUNTER cMsrWritesToIgnoredBits; + STAMCOUNTER cMsrWritesRaiseGp; + STAMCOUNTER cMsrWritesUnknown; + STAMCOUNTER cMsrReads; + STAMCOUNTER cMsrReadsRaiseGp; + STAMCOUNTER cMsrReadsUnknown; + /** @} */ +} CPUM; +#ifndef VBOX_FOR_DTRACE_LIB +AssertCompileMemberOffset(CPUM, HostFeatures, 64); +AssertCompileMemberOffset(CPUM, GuestFeatures, 112); +#endif +/** Pointer to the CPUM instance data residing in the shared VM structure. */ +typedef CPUM *PCPUM; + +/** + * CPUM Data (part of VMCPU) + */ +typedef struct CPUMCPU +{ + /** + * Guest context. + * Aligned on a 64-byte boundary. + */ + CPUMCTX Guest; + + /** + * Guest context - misc MSRs + * Aligned on a 64-byte boundary. + */ + CPUMCTXMSRS GuestMsrs; + + /** Nested VMX: VMX-preemption timer - R0 ptr. */ + PTMTIMERR0 pNestedVmxPreemptTimerR0; + /** Nested VMX: VMX-preemption timer - R3 ptr. */ + PTMTIMERR3 pNestedVmxPreemptTimerR3; + + /** Use flags. + * These flags indicates both what is to be used and what has been used. + */ + uint32_t fUseFlags; + + /** Changed flags. + * These flags indicates to REM (and others) which important guest + * registers which has been changed since last time the flags were cleared. + * See the CPUM_CHANGED_* defines for what we keep track of. + */ + uint32_t fChanged; + + /** Temporary storage for the return code of the function called in the + * 32-64 switcher. */ + uint32_t u32RetCode; + +#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI + /** Used by the world switcher code to store which vectors needs restoring on + * the way back. */ + uint32_t fApicDisVectors; + /** The address of the APIC mapping, NULL if no APIC. + * Call CPUMR0SetLApic to update this before doing a world switch. */ + RTHCPTR pvApicBase; + /** Set if the CPU has the X2APIC mode enabled. + * Call CPUMR0SetLApic to update this before doing a world switch. */ + bool fX2Apic; +#else + uint8_t abPadding3[4 + sizeof(RTHCPTR) + 1]; +#endif + + /** Have we entered the recompiler? */ + bool fRemEntered; + /** Whether the X86_CPUID_FEATURE_EDX_APIC and X86_CPUID_AMD_FEATURE_EDX_APIC + * (?) bits are visible or not. (The APIC is responsible for setting this + * when loading state, so we won't save it.) */ + bool fCpuIdApicFeatureVisible; + + /** Align the next member on a 64-byte boundary. */ + uint8_t abPadding2[64 - (16 + 12 + 4 + 8 + 1 + 2)]; + + /** Saved host context. Only valid while inside RC or HM contexts. + * Must be aligned on a 64-byte boundary. */ + CPUMHOSTCTX Host; + /** Old hypervisor context, only used for combined DRx values now. + * Must be aligned on a 64-byte boundary. */ + CPUMHYPERCTX Hyper; + +#ifdef VBOX_WITH_CRASHDUMP_MAGIC + uint8_t aMagic[56]; + uint64_t uMagic; +#endif +} CPUMCPU; +/** Pointer to the CPUMCPU instance data residing in the shared VMCPU structure. */ +typedef CPUMCPU *PCPUMCPU; + +#ifndef VBOX_FOR_DTRACE_LIB +RT_C_DECLS_BEGIN + +PCPUMCPUIDLEAF cpumCpuIdGetLeaf(PVM pVM, uint32_t uLeaf); +PCPUMCPUIDLEAF cpumCpuIdGetLeafEx(PVM pVM, uint32_t uLeaf, uint32_t uSubLeaf, bool *pfExactSubLeafHit); + +# ifdef IN_RING3 +int cpumR3DbgInit(PVM pVM); +int cpumR3CpuIdExplodeFeatures(PCCPUMCPUIDLEAF paLeaves, uint32_t cLeaves, PCCPUMMSRS pMsrs, PCPUMFEATURES pFeatures); +int cpumR3InitCpuIdAndMsrs(PVM pVM, PCCPUMMSRS pHostMsrs); +void cpumR3InitVmxGuestFeaturesAndMsrs(PVM pVM, PCVMXMSRS pHostVmxMsrs, PVMXMSRS pGuestVmxMsrs); +void cpumR3SaveCpuId(PVM pVM, PSSMHANDLE pSSM); +int cpumR3LoadCpuId(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, PCCPUMMSRS pGuestMsrs); +int cpumR3LoadCpuIdPre32(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion); +DECLCALLBACK(void) cpumR3CpuIdInfo(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); + +int cpumR3DbGetCpuInfo(const char *pszName, PCPUMINFO pInfo); +int cpumR3MsrRangesInsert(PVM pVM, PCPUMMSRRANGE *ppaMsrRanges, uint32_t *pcMsrRanges, PCCPUMMSRRANGE pNewRange); +int cpumR3MsrReconcileWithCpuId(PVM pVM); +int cpumR3MsrApplyFudge(PVM pVM); +int cpumR3MsrRegStats(PVM pVM); +int cpumR3MsrStrictInitChecks(void); +PCPUMMSRRANGE cpumLookupMsrRange(PVM pVM, uint32_t idMsr); +# endif + +# ifdef IN_RC +DECLASM(int) cpumHandleLazyFPUAsm(PCPUMCPU pCPUM); +# endif + +# ifdef IN_RING0 +DECLASM(int) cpumR0SaveHostRestoreGuestFPUState(PCPUMCPU pCPUM); +DECLASM(void) cpumR0SaveGuestRestoreHostFPUState(PCPUMCPU pCPUM); +# if ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) +DECLASM(void) cpumR0RestoreHostFPUState(PCPUMCPU pCPUM); +# endif +# endif + +# if defined(IN_RC) || defined(IN_RING0) +DECLASM(int) cpumRZSaveHostFPUState(PCPUMCPU pCPUM); +DECLASM(void) cpumRZSaveGuestFpuState(PCPUMCPU pCPUM, bool fLeaveFpuAccessible); +DECLASM(void) cpumRZSaveGuestSseRegisters(PCPUMCPU pCPUM); +DECLASM(void) cpumRZSaveGuestAvxRegisters(PCPUMCPU pCPUM); +# endif + +RT_C_DECLS_END +#endif /* !VBOX_FOR_DTRACE_LIB */ + +/** @} */ + +#endif /* !VMM_INCLUDED_SRC_include_CPUMInternal_h */ + diff --git a/src/VBox/VMM/include/CPUMInternal.mac b/src/VBox/VMM/include/CPUMInternal.mac new file mode 100644 index 00000000..4b571409 --- /dev/null +++ b/src/VBox/VMM/include/CPUMInternal.mac @@ -0,0 +1,709 @@ +; $Id: CPUMInternal.mac $ +;; @file +; CPUM - Internal header file (asm). +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; + +%include "VBox/asmdefs.mac" +%include "VBox/vmm/cpum.mac" + +;; Check sanity. +%ifdef VBOX_WITH_KERNEL_USING_XMM + %ifndef IN_RING0 + %error "What? We've got code assuming VBOX_WITH_KERNEL_USING_XMM is only defined in ring-0!" + %endif +%endif + +;; For numeric expressions +%ifdef RT_ARCH_AMD64 + %define CPUM_IS_AMD64 1 +%else + %define CPUM_IS_AMD64 0 +%endif + + +;; +; CPU info +struc CPUMINFO + .cMsrRanges resd 1 ; uint32_t + .fMsrMask resd 1 ; uint32_t + .fMxCsrMask resd 1 ; uint32_t + .cCpuIdLeaves resd 1 ; uint32_t + .iFirstExtCpuIdLeaf resd 1 ; uint32_t + .enmUnknownCpuIdMethod resd 1 ; CPUMUNKNOWNCPUID + .DefCpuId resb CPUMCPUID_size ; CPUMCPUID + .uScalableBusFreq resq 1 ; uint64_t + .paMsrRangesR0 RTR0PTR_RES 1 ; R0PTRTYPE(PCPUMMSRRANGE) + .paCpuIdLeavesR0 RTR0PTR_RES 1 ; R0PTRTYPE(PCPUMCPUIDLEAF) + .paMsrRangesR3 RTR3PTR_RES 1 ; R3PTRTYPE(PCPUMMSRRANGE) + .paCpuIdLeavesR3 RTR3PTR_RES 1 ; R3PTRTYPE(PCPUMCPUIDLEAF) +endstruc + + +%define CPUM_USED_FPU_HOST RT_BIT(0) +%define CPUM_USED_FPU_GUEST RT_BIT(10) +%define CPUM_USED_FPU_SINCE_REM RT_BIT(1) +%define CPUM_USED_MANUAL_XMM_RESTORE RT_BIT(2) +%define CPUM_USE_SYSENTER RT_BIT(3) +%define CPUM_USE_SYSCALL RT_BIT(4) +%define CPUM_USE_DEBUG_REGS_HOST RT_BIT(5) +%define CPUM_USED_DEBUG_REGS_HOST RT_BIT(6) +%define CPUM_USE_DEBUG_REGS_HYPER RT_BIT(7) +%define CPUM_USED_DEBUG_REGS_HYPER RT_BIT(8) +%define CPUM_USED_DEBUG_REGS_GUEST RT_BIT(9) +%define CPUM_SYNC_FPU_STATE RT_BIT(16) +%define CPUM_SYNC_DEBUG_REGS_GUEST RT_BIT(17) +%define CPUM_SYNC_DEBUG_REGS_HYPER RT_BIT(18) +%define CPUM_USE_FFXSR_LEAKY RT_BIT(19) +%define CPUM_USE_SUPPORTS_LONGMODE RT_BIT(20) + +%define CPUM_HANDLER_DS 1 +%define CPUM_HANDLER_ES 2 +%define CPUM_HANDLER_FS 3 +%define CPUM_HANDLER_GS 4 +%define CPUM_HANDLER_IRET 5 +%define CPUM_HANDLER_TYPEMASK 0ffh +%define CPUM_HANDLER_CTXCORE_IN_EBP RT_BIT(31) + + +struc CPUM + ;... + .fHostUseFlags resd 1 + + ; CR4 masks + .CR4.AndMask resd 1 + .CR4.OrMask resd 1 + .u8PortableCpuIdLevel resb 1 + .fPendingRestore resb 1 + + alignb 8 + .fXStateGuestMask resq 1 + .fXStateHostMask resq 1 + + alignb 64 + .HostFeatures resb 48 + .GuestFeatures resb 48 + .GuestInfo resb RTHCPTR_CB*4 + 4*12 + + ; Patch manager saved state compatability CPUID leaf arrays + .aGuestCpuIdPatmStd resb 16*6 + .aGuestCpuIdPatmExt resb 16*10 + .aGuestCpuIdPatmCentaur resb 16*4 + + alignb 8 + .cMsrWrites resq 1 + .cMsrWritesToIgnoredBits resq 1 + .cMsrWritesRaiseGp resq 1 + .cMsrWritesUnknown resq 1 + .cMsrReads resq 1 + .cMsrReadsRaiseGp resq 1 + .cMsrReadsUnknown resq 1 +endstruc + +struc CPUMCPU + ; + ; Guest context state + ; (Identical to the .Hyper chunk below and to CPUMCTX in cpum.mac.) + ; + .Guest resq 0 + .Guest.eax resq 1 + .Guest.ecx resq 1 + .Guest.edx resq 1 + .Guest.ebx resq 1 + .Guest.esp resq 1 + .Guest.ebp resq 1 + .Guest.esi resq 1 + .Guest.edi resq 1 + .Guest.r8 resq 1 + .Guest.r9 resq 1 + .Guest.r10 resq 1 + .Guest.r11 resq 1 + .Guest.r12 resq 1 + .Guest.r13 resq 1 + .Guest.r14 resq 1 + .Guest.r15 resq 1 + .Guest.es.Sel resw 1 + .Guest.es.PaddingSel resw 1 + .Guest.es.ValidSel resw 1 + .Guest.es.fFlags resw 1 + .Guest.es.u64Base resq 1 + .Guest.es.u32Limit resd 1 + .Guest.es.Attr resd 1 + .Guest.cs.Sel resw 1 + .Guest.cs.PaddingSel resw 1 + .Guest.cs.ValidSel resw 1 + .Guest.cs.fFlags resw 1 + .Guest.cs.u64Base resq 1 + .Guest.cs.u32Limit resd 1 + .Guest.cs.Attr resd 1 + .Guest.ss.Sel resw 1 + .Guest.ss.PaddingSel resw 1 + .Guest.ss.ValidSel resw 1 + .Guest.ss.fFlags resw 1 + .Guest.ss.u64Base resq 1 + .Guest.ss.u32Limit resd 1 + .Guest.ss.Attr resd 1 + .Guest.ds.Sel resw 1 + .Guest.ds.PaddingSel resw 1 + .Guest.ds.ValidSel resw 1 + .Guest.ds.fFlags resw 1 + .Guest.ds.u64Base resq 1 + .Guest.ds.u32Limit resd 1 + .Guest.ds.Attr resd 1 + .Guest.fs.Sel resw 1 + .Guest.fs.PaddingSel resw 1 + .Guest.fs.ValidSel resw 1 + .Guest.fs.fFlags resw 1 + .Guest.fs.u64Base resq 1 + .Guest.fs.u32Limit resd 1 + .Guest.fs.Attr resd 1 + .Guest.gs.Sel resw 1 + .Guest.gs.PaddingSel resw 1 + .Guest.gs.ValidSel resw 1 + .Guest.gs.fFlags resw 1 + .Guest.gs.u64Base resq 1 + .Guest.gs.u32Limit resd 1 + .Guest.gs.Attr resd 1 + .Guest.eip resq 1 + .Guest.eflags resq 1 + .Guest.cr0 resq 1 + .Guest.cr2 resq 1 + .Guest.cr3 resq 1 + .Guest.cr4 resq 1 + .Guest.dr resq 8 + .Guest.gdtrPadding resw 3 + .Guest.gdtr resw 0 + .Guest.gdtr.cbGdt resw 1 + .Guest.gdtr.pGdt resq 1 + .Guest.idtrPadding resw 3 + .Guest.idtr resw 0 + .Guest.idtr.cbIdt resw 1 + .Guest.idtr.pIdt resq 1 + .Guest.ldtr.Sel resw 1 + .Guest.ldtr.PaddingSel resw 1 + .Guest.ldtr.ValidSel resw 1 + .Guest.ldtr.fFlags resw 1 + .Guest.ldtr.u64Base resq 1 + .Guest.ldtr.u32Limit resd 1 + .Guest.ldtr.Attr resd 1 + .Guest.tr.Sel resw 1 + .Guest.tr.PaddingSel resw 1 + .Guest.tr.ValidSel resw 1 + .Guest.tr.fFlags resw 1 + .Guest.tr.u64Base resq 1 + .Guest.tr.u32Limit resd 1 + .Guest.tr.Attr resd 1 + .Guest.SysEnter.cs resb 8 + .Guest.SysEnter.eip resb 8 + .Guest.SysEnter.esp resb 8 + .Guest.msrEFER resb 8 + .Guest.msrSTAR resb 8 + .Guest.msrPAT resb 8 + .Guest.msrLSTAR resb 8 + .Guest.msrCSTAR resb 8 + .Guest.msrSFMASK resb 8 + .Guest.msrKERNELGSBASE resb 8 + .Guest.uMsrPadding0 resb 8 + alignb 8 + .Guest.aXcr resq 2 + .Guest.fXStateMask resq 1 + .Guest.pXStateR0 RTR0PTR_RES 1 + alignb 8 + .Guest.pXStateR3 RTR3PTR_RES 1 + alignb 8 + .Guest.aoffXState resw 64 + .Guest.fWorldSwitcher resd 1 + alignb 8 + .Guest.fExtrn resq 1 + alignb 8 + .Guest.hwvirt.svm.uMsrHSavePa resq 1 + .Guest.hwvirt.svm.GCPhysVmcb resq 1 + .Guest.hwvirt.svm.pVmcbR0 RTR0PTR_RES 1 + alignb 8 + .Guest.hwvirt.svm.pVmcbR3 RTR3PTR_RES 1 + alignb 8 + .Guest.hwvirt.svm.HostState resb 184 + .Guest.hwvirt.svm.uPrevPauseTick resq 1 + .Guest.hwvirt.svm.cPauseFilter resw 1 + .Guest.hwvirt.svm.cPauseFilterThreshold resw 1 + .Guest.hwvirt.svm.fInterceptEvents resb 1 + alignb 8 + .Guest.hwvirt.svm.pvMsrBitmapR0 RTR0PTR_RES 1 + alignb 8 + .Guest.hwvirt.svm.pvMsrBitmapR3 RTR3PTR_RES 1 + alignb 8 + .Guest.hwvirt.svm.pvIoBitmapR0 RTR0PTR_RES 1 + alignb 8 + .Guest.hwvirt.svm.pvIoBitmapR3 RTR3PTR_RES 1 + alignb 8 + .Guest.hwvirt.svm.HCPhysVmcb RTHCPHYS_RES 1 + .Guest.hwvirt.svm.abPadding0 resb 272 + .Guest.hwvirt.enmHwvirt resd 1 + .Guest.hwvirt.fGif resb 1 + alignb 8 + .Guest.hwvirt.fLocalForcedActions resd 1 + alignb 64 + + .GuestMsrs resq 0 + .GuestMsrs.au64 resq 64 + + ; + ; Other stuff. + ; + .pNestedVmxPreemptTimerR0 RTR0PTR_RES 1 + .pNestedVmxPreemptTimerR3 RTR3PTR_RES 1 + + .fUseFlags resd 1 + .fChanged resd 1 + .u32RetCode resd 1 + +%ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI + .fApicDisVectors resd 1 + .pvApicBase RTR0PTR_RES 1 + .fX2Apic resb 1 +%else + .abPadding3 resb (4 + RTR0PTR_CB + 1) +%endif + + .fRemEntered resb 1 + .fCpuIdApicFeatureVisible resb 1 + + .abPadding2 resb (64 - (RTR0PTR_CB + RTR3PTR_CB + 12 + 4 + RTR0PTR_CB + 1 + 2)) + + ; + ; Host context state + ; + alignb 64 + .Host resb 0 + ;.Host.rax resq 1 - scratch + .Host.rbx resq 1 + ;.Host.rcx resq 1 - scratch + ;.Host.rdx resq 1 - scratch + .Host.rdi resq 1 + .Host.rsi resq 1 + .Host.rbp resq 1 + .Host.rsp resq 1 + ;.Host.r8 resq 1 - scratch + ;.Host.r9 resq 1 - scratch + .Host.r10 resq 1 + .Host.r11 resq 1 + .Host.r12 resq 1 + .Host.r13 resq 1 + .Host.r14 resq 1 + .Host.r15 resq 1 + ;.Host.rip resd 1 - scratch + .Host.rflags resq 1 + .Host.ss resw 1 + .Host.ssPadding resw 1 + .Host.gs resw 1 + .Host.gsPadding resw 1 + .Host.fs resw 1 + .Host.fsPadding resw 1 + .Host.es resw 1 + .Host.esPadding resw 1 + .Host.ds resw 1 + .Host.dsPadding resw 1 + .Host.cs resw 1 + .Host.csPadding resw 1 + + .Host.cr0Fpu: + .Host.cr0 resq 1 + ;.Host.cr2 resq 1 - scratch + .Host.cr3 resq 1 + .Host.cr4 resq 1 + .Host.cr8 resq 1 + + .Host.dr0 resq 1 + .Host.dr1 resq 1 + .Host.dr2 resq 1 + .Host.dr3 resq 1 + .Host.dr6 resq 1 + .Host.dr7 resq 1 + + .Host.gdtr resb 10 ; GDT limit + linear address + .Host.gdtrPadding resw 1 + .Host.idtr resb 10 ; IDT limit + linear address + .Host.idtrPadding resw 1 + .Host.ldtr resw 1 + .Host.ldtrPadding resw 1 + .Host.tr resw 1 + .Host.trPadding resw 1 + + .Host.SysEnter.cs resq 1 + .Host.SysEnter.eip resq 1 + .Host.SysEnter.esp resq 1 + .Host.FSbase resq 1 + .Host.GSbase resq 1 + .Host.efer resq 1 + .Host.auPadding resb 4 + alignb RTR0PTR_CB + .Host.pXStateR0 RTR0PTR_RES 1 + .Host.pXStateR3 RTR3PTR_RES 1 + alignb 8 + .Host.xcr0 resq 1 + .Host.fXStateMask resq 1 + + ; + ; Hypervisor Context. + ; + alignb 64 + .Hyper resq 0 + .Hyper.dr resq 8 + .Hyper.cr3 resq 1 + alignb 64 + +%ifdef VBOX_WITH_CRASHDUMP_MAGIC + .aMagic resb 56 + .uMagic resq 1 +%endif +endstruc + + + +%if 0 ; Currently not used anywhere. +;; +; Macro for FXSAVE/FXRSTOR leaky behaviour on AMD CPUs, see cpumR3CheckLeakyFpu(). +; +; Cleans the FPU state, if necessary, before restoring the FPU. +; +; This macro ASSUMES CR0.TS is not set! +; +; @param xDX Pointer to CPUMCPU. +; @uses xAX, EFLAGS +; +; Changes here should also be reflected in CPUMRCA.asm's copy! +; +%macro CLEANFPU 0 + test dword [xDX + CPUMCPU.fUseFlags], CPUM_USE_FFXSR_LEAKY + jz .nothing_to_clean + + xor eax, eax + fnstsw ax ; FSW -> AX. + test eax, RT_BIT(7) ; If FSW.ES (bit 7) is set, clear it to not cause FPU exceptions + ; while clearing & loading the FPU bits in 'clean_fpu' below. + jz .clean_fpu + fnclex + +.clean_fpu: + ffree st7 ; Clear FPU stack register(7)'s tag entry to prevent overflow if a wraparound occurs. + ; for the upcoming push (load) + fild dword [g_r32_Zero xWrtRIP] ; Explicit FPU load to overwrite FIP, FOP, FDP registers in the FPU. +.nothing_to_clean: +%endmacro +%endif ; Unused. + + +;; +; Makes sure we don't trap (#NM) accessing the FPU. +; +; In ring-0 this is a bit of work since we may have try convince the host kernel +; to do the work for us, also, we must report any CR0 changes back to HMR0VMX +; via the VINF_CPUM_HOST_CR0_MODIFIED status code. +; +; If we end up clearing CR0.TS/EM ourselves in ring-0, we'll save the original +; value in CPUMCPU.Host.cr0Fpu. If we don't, we'll store zero there. (See also +; CPUMRZ_RESTORE_CR0_IF_TS_OR_EM_SET.) +; +; In raw-mode we will always have to clear TS and it will be recalculated +; elsewhere and thus needs no saving. +; +; @param %1 Register to return the return status code in. +; @param %2 Temporary scratch register. +; @param %3 Ring-0 only, register pointing to the CPUMCPU structure +; of the EMT we're on. +; @uses EFLAGS, CR0, %1, %2 +; +%macro CPUMRZ_TOUCH_FPU_CLEAR_CR0_FPU_TRAPS_SET_RC 3 + ; + ; ring-0 - slightly complicated (than old raw-mode). + ; + xor %1, %1 ; 0 / VINF_SUCCESS. Wishing for no CR0 changes. + mov [%3 + CPUMCPU.Host.cr0Fpu], %1 + + mov %2, cr0 + test %2, X86_CR0_TS | X86_CR0_EM ; Make sure its safe to access the FPU state. + jz %%no_cr0_change + + %ifdef VMM_R0_TOUCH_FPU + ; Touch the state and check that the kernel updated CR0 for us. + movdqa xmm0, xmm0 + mov %2, cr0 + test %2, X86_CR0_TS | X86_CR0_EM + jz %%cr0_changed + %endif + + ; Save CR0 and clear them flags ourselves. + mov [%3 + CPUMCPU.Host.cr0Fpu], %2 + and %2, ~(X86_CR0_TS | X86_CR0_EM) + mov cr0, %2 + +%%cr0_changed: + mov %1, VINF_CPUM_HOST_CR0_MODIFIED +%%no_cr0_change: +%endmacro + + +;; +; Restore CR0 if CR0.TS or CR0.EM were non-zero in the original state. +; +; @param %1 The original state to restore (or zero). +; +%macro CPUMRZ_RESTORE_CR0_IF_TS_OR_EM_SET 1 + test %1, X86_CR0_TS | X86_CR0_EM + jz %%skip_cr0_restore + mov cr0, %1 +%%skip_cr0_restore: +%endmacro + + +;; +; Saves the host state. +; +; @uses rax, rdx +; @param pCpumCpu Define for the register containing the CPUMCPU pointer. +; @param pXState Define for the register containing the extended state pointer. +; +%macro CPUMR0_SAVE_HOST 0 + ; + ; Load a couple of registers we'll use later in all branches. + ; + %ifdef IN_RING0 + mov pXState, [pCpumCpu + CPUMCPU.Host.pXStateR0] + %else + %error "Unsupported context!" + %endif + mov eax, [pCpumCpu + CPUMCPU.Host.fXStateMask] + + ; + ; XSAVE or FXSAVE? + ; + or eax, eax + jz %%host_fxsave + + ; XSAVE + mov edx, [pCpumCpu + CPUMCPU.Host.fXStateMask + 4] + %ifdef RT_ARCH_AMD64 + o64 xsave [pXState] + %else + xsave [pXState] + %endif + jmp %%host_done + + ; FXSAVE +%%host_fxsave: + %ifdef RT_ARCH_AMD64 + o64 fxsave [pXState] ; Use explicit REX prefix. See @bugref{6398}. + %else + fxsave [pXState] + %endif + +%%host_done: +%endmacro ; CPUMR0_SAVE_HOST + + +;; +; Loads the host state. +; +; @uses rax, rdx +; @param pCpumCpu Define for the register containing the CPUMCPU pointer. +; @param pXState Define for the register containing the extended state pointer. +; +%macro CPUMR0_LOAD_HOST 0 + ; + ; Load a couple of registers we'll use later in all branches. + ; + %ifdef IN_RING0 + mov pXState, [pCpumCpu + CPUMCPU.Host.pXStateR0] + %else + %error "Unsupported context!" + %endif + mov eax, [pCpumCpu + CPUMCPU.Host.fXStateMask] + + ; + ; XRSTOR or FXRSTOR? + ; + or eax, eax + jz %%host_fxrstor + + ; XRSTOR + mov edx, [pCpumCpu + CPUMCPU.Host.fXStateMask + 4] + %ifdef RT_ARCH_AMD64 + o64 xrstor [pXState] + %else + xrstor [pXState] + %endif + jmp %%host_done + + ; FXRSTOR +%%host_fxrstor: + %ifdef RT_ARCH_AMD64 + o64 fxrstor [pXState] ; Use explicit REX prefix. See @bugref{6398}. + %else + fxrstor [pXState] + %endif + +%%host_done: +%endmacro ; CPUMR0_LOAD_HOST + + + +;; Macro for XSAVE/FXSAVE for the guest FPU but tries to figure out whether to +; save the 32-bit FPU state or 64-bit FPU state. +; +; @param %1 Pointer to CPUMCPU. +; @param %2 Pointer to XState. +; @param %3 Force AMD64 +; @param %4 The instruction to use (xsave or fxsave) +; @uses xAX, xDX, EFLAGS, 20h of stack. +; +%macro SAVE_32_OR_64_FPU 4 +%if CPUM_IS_AMD64 || %3 + ; Save the guest FPU (32-bit or 64-bit), preserves existing broken state. See @bugref{7138}. + test dword [pCpumCpu + CPUMCPU.fUseFlags], CPUM_USE_SUPPORTS_LONGMODE + jnz short %%save_long_mode_guest +%endif + %4 [pXState] +%if CPUM_IS_AMD64 || %3 + jmp %%save_done_32bit_cs_ds + +%%save_long_mode_guest: + o64 %4 [pXState] + + xor edx, edx + cmp dword [pXState + X86FXSTATE.FPUCS], 0 + jne short %%save_done + + sub rsp, 20h ; Only need 1ch bytes but keep stack aligned otherwise we #GP(0). + fnstenv [rsp] + movzx eax, word [rsp + 10h] + mov [pXState + X86FXSTATE.FPUCS], eax + movzx eax, word [rsp + 18h] + add rsp, 20h + mov [pXState + X86FXSTATE.FPUDS], eax +%endif +%%save_done_32bit_cs_ds: + mov edx, X86_FXSTATE_RSVD_32BIT_MAGIC +%%save_done: + mov dword [pXState + X86_OFF_FXSTATE_RSVD], edx +%endmacro ; SAVE_32_OR_64_FPU + + +;; +; Save the guest state. +; +; @uses rax, rdx +; @param pCpumCpu Define for the register containing the CPUMCPU pointer. +; @param pXState Define for the register containing the extended state pointer. +; +%macro CPUMR0_SAVE_GUEST 0 + ; + ; Load a couple of registers we'll use later in all branches. + ; + %ifdef IN_RING0 + mov pXState, [pCpumCpu + CPUMCPU.Guest.pXStateR0] + %else + %error "Unsupported context!" + %endif + mov eax, [pCpumCpu + CPUMCPU.Guest.fXStateMask] + + ; + ; XSAVE or FXSAVE? + ; + or eax, eax + jz %%guest_fxsave + + ; XSAVE + mov edx, [pCpumCpu + CPUMCPU.Guest.fXStateMask + 4] + %ifdef VBOX_WITH_KERNEL_USING_XMM + and eax, ~CPUM_VOLATILE_XSAVE_GUEST_COMPONENTS ; Already saved in HMR0A.asm. + %endif + SAVE_32_OR_64_FPU pCpumCpu, pXState, 0, xsave + jmp %%guest_done + + ; FXSAVE +%%guest_fxsave: + SAVE_32_OR_64_FPU pCpumCpu, pXState, 0, fxsave + +%%guest_done: +%endmacro ; CPUMR0_SAVE_GUEST + + +;; +; Wrapper for selecting 32-bit or 64-bit XRSTOR/FXRSTOR according to what SAVE_32_OR_64_FPU did. +; +; @param %1 Pointer to CPUMCPU. +; @param %2 Pointer to XState. +; @param %3 Force AMD64. +; @param %4 The instruction to use (xrstor or fxrstor). +; @uses xAX, xDX, EFLAGS +; +%macro RESTORE_32_OR_64_FPU 4 +%if CPUM_IS_AMD64 || %3 + ; Restore the guest FPU (32-bit or 64-bit), preserves existing broken state. See @bugref{7138}. + test dword [pCpumCpu + CPUMCPU.fUseFlags], CPUM_USE_SUPPORTS_LONGMODE + jz %%restore_32bit_fpu + cmp dword [pXState + X86_OFF_FXSTATE_RSVD], X86_FXSTATE_RSVD_32BIT_MAGIC + jne short %%restore_64bit_fpu +%%restore_32bit_fpu: +%endif + %4 [pXState] +%if CPUM_IS_AMD64 || %3 + ; TODO: Restore XMM8-XMM15! + jmp short %%restore_fpu_done +%%restore_64bit_fpu: + o64 %4 [pXState] +%%restore_fpu_done: +%endif +%endmacro ; RESTORE_32_OR_64_FPU + + +;; +; Loads the guest state. +; +; @uses rax, rdx +; @param pCpumCpu Define for the register containing the CPUMCPU pointer. +; @param pXState Define for the register containing the extended state pointer. +; +%macro CPUMR0_LOAD_GUEST 0 + ; + ; Load a couple of registers we'll use later in all branches. + ; + %ifdef IN_RING0 + mov pXState, [pCpumCpu + CPUMCPU.Guest.pXStateR0] + %else + %error "Unsupported context!" + %endif + mov eax, [pCpumCpu + CPUMCPU.Guest.fXStateMask] + + ; + ; XRSTOR or FXRSTOR? + ; + or eax, eax + jz %%guest_fxrstor + + ; XRSTOR + mov edx, [pCpumCpu + CPUMCPU.Guest.fXStateMask + 4] + %ifdef VBOX_WITH_KERNEL_USING_XMM + and eax, ~CPUM_VOLATILE_XSAVE_GUEST_COMPONENTS ; Will be loaded by HMR0A.asm. + %endif + RESTORE_32_OR_64_FPU pCpumCpu, pXState, 0, xrstor + jmp %%guest_done + + ; FXRSTOR +%%guest_fxrstor: + RESTORE_32_OR_64_FPU pCpumCpu, pXState, 0, fxrstor + +%%guest_done: +%endmacro ; CPUMR0_LOAD_GUEST + diff --git a/src/VBox/VMM/include/DBGFInternal.h b/src/VBox/VMM/include/DBGFInternal.h new file mode 100644 index 00000000..1cf2dc4d --- /dev/null +++ b/src/VBox/VMM/include/DBGFInternal.h @@ -0,0 +1,605 @@ +/* $Id: DBGFInternal.h $ */ +/** @file + * DBGF - Internal header file. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_DBGFInternal_h +#define VMM_INCLUDED_SRC_include_DBGFInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <VBox/cdefs.h> +#ifdef IN_RING3 +# include <VBox/dis.h> +#endif +#include <VBox/types.h> +#include <iprt/semaphore.h> +#include <iprt/critsect.h> +#include <iprt/string.h> +#include <iprt/avl.h> +#include <iprt/dbg.h> +#include <VBox/vmm/dbgf.h> + + + +/** @defgroup grp_dbgf_int Internals + * @ingroup grp_dbgf + * @internal + * @{ + */ + + +/** VMM Debugger Command. */ +typedef enum DBGFCMD +{ + /** No command. + * This is assigned to the field by the emulation thread after + * a command has been completed. */ + DBGFCMD_NO_COMMAND = 0, + /** Halt the VM. */ + DBGFCMD_HALT, + /** Resume execution. */ + DBGFCMD_GO, + /** Single step execution - stepping into calls. */ + DBGFCMD_SINGLE_STEP, + /** Detaches the debugger. + * Disabling all breakpoints, watch points and the like. */ + DBGFCMD_DETACH_DEBUGGER, + /** Detached the debugger. + * The isn't a command as such, it's just that it's necessary for the + * detaching protocol to be racefree. */ + DBGFCMD_DETACHED_DEBUGGER +} DBGFCMD; + +/** + * VMM Debugger Command. + */ +typedef union DBGFCMDDATA +{ + uint32_t uDummy; +} DBGFCMDDATA; +/** Pointer to DBGF Command Data. */ +typedef DBGFCMDDATA *PDBGFCMDDATA; + +/** + * Info type. + */ +typedef enum DBGFINFOTYPE +{ + /** Invalid. */ + DBGFINFOTYPE_INVALID = 0, + /** Device owner. */ + DBGFINFOTYPE_DEV, + /** Driver owner. */ + DBGFINFOTYPE_DRV, + /** Internal owner. */ + DBGFINFOTYPE_INT, + /** External owner. */ + DBGFINFOTYPE_EXT, + /** Device owner. */ + DBGFINFOTYPE_DEV_ARGV, + /** Driver owner. */ + DBGFINFOTYPE_DRV_ARGV, + /** USB device owner. */ + DBGFINFOTYPE_USB_ARGV, + /** Internal owner, argv. */ + DBGFINFOTYPE_INT_ARGV, + /** External owner. */ + DBGFINFOTYPE_EXT_ARGV +} DBGFINFOTYPE; + + +/** Pointer to info structure. */ +typedef struct DBGFINFO *PDBGFINFO; + +#ifdef IN_RING3 +/** + * Info structure. + */ +typedef struct DBGFINFO +{ + /** The flags. */ + uint32_t fFlags; + /** Owner type. */ + DBGFINFOTYPE enmType; + /** Per type data. */ + union + { + /** DBGFINFOTYPE_DEV */ + struct + { + /** Device info handler function. */ + PFNDBGFHANDLERDEV pfnHandler; + /** The device instance. */ + PPDMDEVINS pDevIns; + } Dev; + + /** DBGFINFOTYPE_DRV */ + struct + { + /** Driver info handler function. */ + PFNDBGFHANDLERDRV pfnHandler; + /** The driver instance. */ + PPDMDRVINS pDrvIns; + } Drv; + + /** DBGFINFOTYPE_INT */ + struct + { + /** Internal info handler function. */ + PFNDBGFHANDLERINT pfnHandler; + } Int; + + /** DBGFINFOTYPE_EXT */ + struct + { + /** External info handler function. */ + PFNDBGFHANDLEREXT pfnHandler; + /** The user argument. */ + void *pvUser; + } Ext; + + /** DBGFINFOTYPE_DEV_ARGV */ + struct + { + /** Device info handler function. */ + PFNDBGFINFOARGVDEV pfnHandler; + /** The device instance. */ + PPDMDEVINS pDevIns; + } DevArgv; + + /** DBGFINFOTYPE_DRV_ARGV */ + struct + { + /** Driver info handler function. */ + PFNDBGFINFOARGVDRV pfnHandler; + /** The driver instance. */ + PPDMDRVINS pDrvIns; + } DrvArgv; + + /** DBGFINFOTYPE_USB_ARGV */ + struct + { + /** Driver info handler function. */ + PFNDBGFINFOARGVUSB pfnHandler; + /** The driver instance. */ + PPDMUSBINS pUsbIns; + } UsbArgv; + + /** DBGFINFOTYPE_INT_ARGV */ + struct + { + /** Internal info handler function. */ + PFNDBGFINFOARGVINT pfnHandler; + } IntArgv; + + /** DBGFINFOTYPE_EXT_ARGV */ + struct + { + /** External info handler function. */ + PFNDBGFINFOARGVEXT pfnHandler; + /** The user argument. */ + void *pvUser; + } ExtArgv; + } u; + + /** Pointer to the description. */ + const char *pszDesc; + /** Pointer to the next info structure. */ + PDBGFINFO pNext; + /** The identifier name length. */ + size_t cchName; + /** The identifier name. (Extends 'beyond' the struct as usual.) */ + char szName[1]; +} DBGFINFO; +#endif /* IN_RING3 */ + + +#ifdef IN_RING3 +/** + * Guest OS digger instance. + */ +typedef struct DBGFOS +{ + /** Pointer to the registration record. */ + PCDBGFOSREG pReg; + /** Pointer to the next OS we've registered. */ + struct DBGFOS *pNext; + /** List of EMT interface wrappers. */ + struct DBGFOSEMTWRAPPER *pWrapperHead; + /** The instance data (variable size). */ + uint8_t abData[16]; +} DBGFOS; +#endif +/** Pointer to guest OS digger instance. */ +typedef struct DBGFOS *PDBGFOS; +/** Pointer to const guest OS digger instance. */ +typedef struct DBGFOS const *PCDBGFOS; + + +/** + * Breakpoint search optimization. + */ +typedef struct DBGFBPSEARCHOPT +{ + /** Where to start searching for hits. + * (First enabled is #DBGF::aBreakpoints[iStartSearch]). */ + uint32_t volatile iStartSearch; + /** The number of aBreakpoints entries to search. + * (Last enabled is #DBGF::aBreakpoints[iStartSearch + cToSearch - 1]) */ + uint32_t volatile cToSearch; +} DBGFBPSEARCHOPT; +/** Pointer to a breakpoint search optimziation structure. */ +typedef DBGFBPSEARCHOPT *PDBGFBPSEARCHOPT; + + + +/** + * DBGF Data (part of VM) + */ +typedef struct DBGF +{ + /** Bitmap of enabled hardware interrupt breakpoints. */ + uint32_t bmHardIntBreakpoints[256 / 32]; + /** Bitmap of enabled software interrupt breakpoints. */ + uint32_t bmSoftIntBreakpoints[256 / 32]; + /** Bitmap of selected events. + * This includes non-selectable events too for simplicity, we maintain the + * state for some of these, as it may come in handy. */ + uint64_t bmSelectedEvents[(DBGFEVENT_END + 63) / 64]; + + /** Enabled hardware interrupt breakpoints. */ + uint32_t cHardIntBreakpoints; + /** Enabled software interrupt breakpoints. */ + uint32_t cSoftIntBreakpoints; + + /** The number of selected events. */ + uint32_t cSelectedEvents; + + /** The number of enabled hardware breakpoints. */ + uint8_t cEnabledHwBreakpoints; + /** The number of enabled hardware I/O breakpoints. */ + uint8_t cEnabledHwIoBreakpoints; + /** The number of enabled INT3 breakpoints. */ + uint8_t cEnabledInt3Breakpoints; + uint8_t abPadding; /**< Unused padding space up for grabs. */ + uint32_t uPadding; + + /** Debugger Attached flag. + * Set if a debugger is attached, elsewise it's clear. + */ + bool volatile fAttached; + + /** Stopped in the Hypervisor. + * Set if we're stopped on a trace, breakpoint or assertion inside + * the hypervisor and have to restrict the available operations. + */ + bool volatile fStoppedInHyper; + + /** + * Ping-Pong construct where the Ping side is the VMM and the Pong side + * the Debugger. + */ + RTPINGPONG PingPong; + RTHCUINTPTR uPtrPadding; /**< Alignment padding. */ + + /** The Event to the debugger. + * The VMM will ping the debugger when the event is ready. The event is + * either a response to a command or to a break/watch point issued + * previously. + */ + DBGFEVENT DbgEvent; + + /** The Command to the VMM. + * Operated in an atomic fashion since the VMM will poll on this. + * This means that a the command data must be written before this member + * is set. The VMM will reset this member to the no-command state + * when it have processed it. + */ + DBGFCMD volatile enmVMMCmd; + /** The Command data. + * Not all commands take data. */ + DBGFCMDDATA VMMCmdData; + + /** Stepping filtering. */ + struct + { + /** The CPU doing the stepping. + * Set to NIL_VMCPUID when filtering is inactive */ + VMCPUID idCpu; + /** The specified flags. */ + uint32_t fFlags; + /** The effective PC address to stop at, if given. */ + RTGCPTR AddrPc; + /** The lowest effective stack address to stop at. + * Together with cbStackPop, this forms a range of effective stack pointer + * addresses that we stop for. */ + RTGCPTR AddrStackPop; + /** The size of the stack stop area starting at AddrStackPop. */ + RTGCPTR cbStackPop; + /** Maximum number of steps. */ + uint32_t cMaxSteps; + + /** Number of steps made thus far. */ + uint32_t cSteps; + /** Current call counting balance for step-over handling. */ + uint32_t uCallDepth; + + uint32_t u32Padding; /**< Alignment padding. */ + + } SteppingFilter; + + uint32_t u32Padding[2]; /**< Alignment padding. */ + + /** Array of hardware breakpoints. (0..3) + * This is shared among all the CPUs because life is much simpler that way. */ + DBGFBP aHwBreakpoints[4]; + /** Array of int 3 and REM breakpoints. (4..) + * @remark This is currently a fixed size array for reasons of simplicity. */ + DBGFBP aBreakpoints[32]; + + /** MMIO breakpoint search optimizations. */ + DBGFBPSEARCHOPT Mmio; + /** I/O port breakpoint search optimizations. */ + DBGFBPSEARCHOPT PortIo; + /** INT3 breakpoint search optimizations. */ + DBGFBPSEARCHOPT Int3; + + /** + * Bug check data. + * @note This will not be reset on reset. + */ + struct + { + /** The ID of the CPU reporting it. */ + VMCPUID idCpu; + /** The event associated with the bug check (gives source). + * This is set to DBGFEVENT_END if no BSOD data here. */ + DBGFEVENTTYPE enmEvent; + /** The total reset count at the time (VMGetResetCount). */ + uint32_t uResetNo; + /** Explicit padding. */ + uint32_t uPadding; + /** When it was reported (TMVirtualGet). */ + uint64_t uTimestamp; + /** The bug check number. + * @note This is really just 32-bit wide, see KeBugCheckEx. */ + uint64_t uBugCheck; + /** The bug check parameters. */ + uint64_t auParameters[4]; + } BugCheck; +} DBGF; +AssertCompileMemberAlignment(DBGF, DbgEvent, 8); +AssertCompileMemberAlignment(DBGF, aHwBreakpoints, 8); +AssertCompileMemberAlignment(DBGF, bmHardIntBreakpoints, 8); +/** Pointer to DBGF Data. */ +typedef DBGF *PDBGF; + + +/** + * Event state (for DBGFCPU::aEvents). + */ +typedef enum DBGFEVENTSTATE +{ + /** Invalid event stack entry. */ + DBGFEVENTSTATE_INVALID = 0, + /** The current event stack entry. */ + DBGFEVENTSTATE_CURRENT, + /** Event that should be ignored but hasn't yet actually been ignored. */ + DBGFEVENTSTATE_IGNORE, + /** Event that has been ignored but may be restored to IGNORE should another + * debug event fire before the instruction is completed. */ + DBGFEVENTSTATE_RESTORABLE, + /** End of valid events. */ + DBGFEVENTSTATE_END, + /** Make sure we've got a 32-bit type. */ + DBGFEVENTSTATE_32BIT_HACK = 0x7fffffff +} DBGFEVENTSTATE; + + +/** Converts a DBGFCPU pointer into a VM pointer. */ +#define DBGFCPU_2_VM(pDbgfCpu) ((PVM)((uint8_t *)(pDbgfCpu) + (pDbgfCpu)->offVM)) + +/** + * The per CPU data for DBGF. + */ +typedef struct DBGFCPU +{ + /** The offset into the VM structure. + * @see DBGFCPU_2_VM(). */ + uint32_t offVM; + + /** Current active breakpoint (id). + * This is ~0U if not active. It is set when a execution engine + * encounters a breakpoint and returns VINF_EM_DBG_BREAKPOINT. This is + * currently not used for REM breakpoints because of the lazy coupling + * between VBox and REM. + * + * @todo drop this in favor of aEvents! */ + uint32_t iActiveBp; + /** Set if we're singlestepping in raw mode. + * This is checked and cleared in the \#DB handler. */ + bool fSingleSteppingRaw; + + /** Alignment padding. */ + bool afPadding[3]; + + /** The number of events on the stack (aEvents). + * The pending event is the last one (aEvents[cEvents - 1]), but only when + * enmState is DBGFEVENTSTATE_CURRENT. */ + uint32_t cEvents; + /** Events - current, ignoring and ignored. + * + * We maintain a stack of events in order to try avoid ending up in an infinit + * loop when resuming after an event fired. There are cases where we may end + * generating additional events before the instruction can be executed + * successfully. Like for instance an XCHG on MMIO with separate read and write + * breakpoints, or a MOVSB instruction working on breakpointed MMIO as both + * source and destination. + * + * So, when resuming after dropping into the debugger for an event, we convert + * the DBGFEVENTSTATE_CURRENT event into a DBGFEVENTSTATE_IGNORE event, leaving + * cEvents unchanged. If the event is reported again, we will ignore it and + * tell the reporter to continue executing. The event change to the + * DBGFEVENTSTATE_RESTORABLE state. + * + * Currently, the event reporter has to figure out that it is a nested event and + * tell DBGF to restore DBGFEVENTSTATE_RESTORABLE events (and keep + * DBGFEVENTSTATE_IGNORE, should they happen out of order for some weird + * reason). + */ + struct + { + /** The event details. */ + DBGFEVENT Event; + /** The RIP at which this happend (for validating ignoring). */ + uint64_t rip; + /** The event state. */ + DBGFEVENTSTATE enmState; + /** Alignment padding. */ + uint32_t u32Alignment; + } aEvents[3]; +} DBGFCPU; +AssertCompileMemberAlignment(DBGFCPU, aEvents, 8); +AssertCompileMemberSizeAlignment(DBGFCPU, aEvents[0], 8); +/** Pointer to DBGFCPU data. */ +typedef DBGFCPU *PDBGFCPU; + +struct DBGFOSEMTWRAPPER; + +/** + * The DBGF data kept in the UVM. + */ +typedef struct DBGFUSERPERVM +{ + /** The address space database lock. */ + RTSEMRW hAsDbLock; + /** The address space handle database. (Protected by hAsDbLock.) */ + R3PTRTYPE(AVLPVTREE) AsHandleTree; + /** The address space process id database. (Protected by hAsDbLock.) */ + R3PTRTYPE(AVLU32TREE) AsPidTree; + /** The address space name database. (Protected by hAsDbLock.) */ + R3PTRTYPE(RTSTRSPACE) AsNameSpace; + /** Special address space aliases. (Protected by hAsDbLock.) */ + RTDBGAS volatile ahAsAliases[DBGF_AS_COUNT]; + /** For lazily populating the aliased address spaces. */ + bool volatile afAsAliasPopuplated[DBGF_AS_COUNT]; + /** Alignment padding. */ + bool afAlignment1[2]; + /** Debug configuration. */ + R3PTRTYPE(RTDBGCFG) hDbgCfg; + + /** The register database lock. */ + RTSEMRW hRegDbLock; + /** String space for looking up registers. (Protected by hRegDbLock.) */ + R3PTRTYPE(RTSTRSPACE) RegSpace; + /** String space holding the register sets. (Protected by hRegDbLock.) */ + R3PTRTYPE(RTSTRSPACE) RegSetSpace; + /** The number of registers (aliases, sub-fields and the special CPU + * register aliases (eg AH) are not counted). */ + uint32_t cRegs; + /** For early initialization by . */ + bool volatile fRegDbInitialized; + /** Alignment padding. */ + bool afAlignment2[3]; + + /** Critical section protecting the Guest OS Digger data, the info handlers + * and the plugins. These share to give the best possible plugin unload + * race protection. */ + RTCRITSECTRW CritSect; + /** Head of the LIFO of loaded DBGF plugins. */ + R3PTRTYPE(struct DBGFPLUGIN *) pPlugInHead; + /** The current Guest OS digger. */ + R3PTRTYPE(PDBGFOS) pCurOS; + /** The head of the Guest OS digger instances. */ + R3PTRTYPE(PDBGFOS) pOSHead; + /** List of registered info handlers. */ + R3PTRTYPE(PDBGFINFO) pInfoFirst; + + /** The type database lock. */ + RTSEMRW hTypeDbLock; + /** String space for looking up types. (Protected by hTypeDbLock.) */ + R3PTRTYPE(RTSTRSPACE) TypeSpace; + /** For early initialization by . */ + bool volatile fTypeDbInitialized; + /** Alignment padding. */ + bool afAlignment3[3]; + +} DBGFUSERPERVM; +typedef DBGFUSERPERVM *PDBGFUSERPERVM; +typedef DBGFUSERPERVM const *PCDBGFUSERPERVM; + +/** + * The per-CPU DBGF data kept in the UVM. + */ +typedef struct DBGFUSERPERVMCPU +{ + /** The guest register set for this CPU. Can be NULL. */ + R3PTRTYPE(struct DBGFREGSET *) pGuestRegSet; + /** The hypervisor register set for this CPU. Can be NULL. */ + R3PTRTYPE(struct DBGFREGSET *) pHyperRegSet; +} DBGFUSERPERVMCPU; + + +#ifdef IN_RING3 +int dbgfR3AsInit(PUVM pUVM); +void dbgfR3AsTerm(PUVM pUVM); +void dbgfR3AsRelocate(PUVM pUVM, RTGCUINTPTR offDelta); +int dbgfR3BpInit(PVM pVM); +int dbgfR3InfoInit(PUVM pUVM); +int dbgfR3InfoTerm(PUVM pUVM); +int dbgfR3OSInit(PUVM pUVM); +void dbgfR3OSTermPart1(PUVM pUVM); +void dbgfR3OSTermPart2(PUVM pUVM); +int dbgfR3OSStackUnwindAssist(PUVM pUVM, VMCPUID idCpu, PDBGFSTACKFRAME pFrame, PRTDBGUNWINDSTATE pState, + PCCPUMCTX pInitialCtx, RTDBGAS hAs, uint64_t *puScratch); +int dbgfR3RegInit(PUVM pUVM); +void dbgfR3RegTerm(PUVM pUVM); +int dbgfR3TraceInit(PVM pVM); +void dbgfR3TraceRelocate(PVM pVM); +void dbgfR3TraceTerm(PVM pVM); +DECLHIDDEN(int) dbgfR3TypeInit(PUVM pUVM); +DECLHIDDEN(void) dbgfR3TypeTerm(PUVM pUVM); +int dbgfR3PlugInInit(PUVM pUVM); +void dbgfR3PlugInTerm(PUVM pUVM); +int dbgfR3BugCheckInit(PVM pVM); + +/** + * DBGF disassembler state (substate of DISSTATE). + */ +typedef struct DBGFDISSTATE +{ + /** Pointer to the current instruction. */ + PCDISOPCODE pCurInstr; + /** Size of the instruction in bytes. */ + uint32_t cbInstr; + /** Parameters. */ + DISOPPARAM Param1; + DISOPPARAM Param2; + DISOPPARAM Param3; + DISOPPARAM Param4; +} DBGFDISSTATE; +/** Pointer to a DBGF disassembler state. */ +typedef DBGFDISSTATE *PDBGFDISSTATE; + +DECLHIDDEN(int) dbgfR3DisasInstrStateEx(PUVM pUVM, VMCPUID idCpu, PDBGFADDRESS pAddr, uint32_t fFlags, + char *pszOutput, uint32_t cbOutput, PDBGFDISSTATE pDisState); + +#endif /* IN_RING3 */ + +/** @} */ + +#endif /* !VMM_INCLUDED_SRC_include_DBGFInternal_h */ diff --git a/src/VBox/VMM/include/EMHandleRCTmpl.h b/src/VBox/VMM/include/EMHandleRCTmpl.h new file mode 100644 index 00000000..758dfad4 --- /dev/null +++ b/src/VBox/VMM/include/EMHandleRCTmpl.h @@ -0,0 +1,261 @@ +/* $Id: EMHandleRCTmpl.h $ */ +/** @file + * EM - emR3[Raw|Hm|Nem]HandleRC template. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_EMHandleRCTmpl_h +#define VMM_INCLUDED_SRC_include_EMHandleRCTmpl_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#if defined(EMHANDLERC_WITH_PATM) + defined(EMHANDLERC_WITH_HM) + defined(EMHANDLERC_WITH_NEM) != 1 +# error "Exactly one of these must be defined: EMHANDLERC_WITH_PATM, EMHANDLERC_WITH_HM, EMHANDLERC_WITH_NEM" +#endif + + +/** + * Process a subset of the raw-mode, HM and NEM return codes. + * + * Since we have to share this with raw-mode single stepping, this inline + * function has been created to avoid code duplication. + * + * @returns VINF_SUCCESS if it's ok to continue raw mode. + * @returns VBox status code to return to the EM main loop. + * + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param rc The return code. + */ +#if defined(EMHANDLERC_WITH_HM) || defined(DOXYGEN_RUNNING) +int emR3HmHandleRC(PVM pVM, PVMCPU pVCpu, int rc) +#elif defined(EMHANDLERC_WITH_NEM) +int emR3NemHandleRC(PVM pVM, PVMCPU pVCpu, int rc) +#endif +{ + switch (rc) + { + /* + * Common & simple ones. + */ + case VINF_SUCCESS: + break; + case VINF_EM_RESCHEDULE_RAW: + case VINF_EM_RESCHEDULE_HM: + case VINF_EM_RAW_INTERRUPT: + case VINF_EM_RAW_TO_R3: + case VINF_EM_RAW_TIMER_PENDING: + case VINF_EM_PENDING_REQUEST: + rc = VINF_SUCCESS; + break; + +#ifndef EMHANDLERC_WITH_NEM + /* + * Conflict or out of page tables. + * + * VM_FF_PGM_SYNC_CR3 is set by the hypervisor and all we need to + * do here is to execute the pending forced actions. + */ + case VINF_PGM_SYNC_CR3: + AssertMsg(VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL), + ("VINF_PGM_SYNC_CR3 and no VMCPU_FF_PGM_SYNC_CR3*!\n")); + rc = VINF_SUCCESS; + break; + + /* + * PGM pool flush pending (guest SMP only). + */ + /** @todo jumping back and forth between ring 0 and 3 can burn a lot of cycles + * if the EMT thread that's supposed to handle the flush is currently not active + * (e.g. waiting to be scheduled) -> fix this properly! + * + * bird: Since the clearing is global and done via a rendezvous any CPU can do + * it. They would have to choose who to call VMMR3EmtRendezvous and send + * the rest to VMMR3EmtRendezvousFF ... Hmm ... that's not going to work + * all that well since the latter will race the setup done by the + * first. Guess that means we need some new magic in that area for + * handling this case. :/ + */ + case VINF_PGM_POOL_FLUSH_PENDING: + rc = VINF_SUCCESS; + break; + + /* + * Paging mode change. + */ + case VINF_PGM_CHANGE_MODE: + CPUM_ASSERT_NOT_EXTRN(pVCpu, CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_CR3 | CPUMCTX_EXTRN_CR4 | CPUMCTX_EXTRN_EFER); + rc = PGMChangeMode(pVCpu, pVCpu->cpum.GstCtx.cr0, pVCpu->cpum.GstCtx.cr4, pVCpu->cpum.GstCtx.msrEFER); + if (rc == VINF_SUCCESS) + rc = VINF_EM_RESCHEDULE; + AssertMsg(RT_FAILURE(rc) || (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST), ("%Rrc\n", rc)); + break; +#endif /* !EMHANDLERC_WITH_NEM */ + + /* + * I/O Port access - emulate the instruction. + */ + case VINF_IOM_R3_IOPORT_READ: + case VINF_IOM_R3_IOPORT_WRITE: + case VINF_EM_RESUME_R3_HISTORY_EXEC: /* Resume EMHistoryExec after VMCPU_FF_IOM. */ + rc = emR3ExecuteIOInstruction(pVM, pVCpu); + break; + + /* + * Execute pending I/O Port access. + */ + case VINF_EM_PENDING_R3_IOPORT_WRITE: + rc = VBOXSTRICTRC_TODO(emR3ExecutePendingIoPortWrite(pVM, pVCpu)); + break; + case VINF_EM_PENDING_R3_IOPORT_READ: + rc = VBOXSTRICTRC_TODO(emR3ExecutePendingIoPortRead(pVM, pVCpu)); + break; + + /* + * Memory mapped I/O access - emulate the instruction. + */ + case VINF_IOM_R3_MMIO_READ: + case VINF_IOM_R3_MMIO_WRITE: + case VINF_IOM_R3_MMIO_READ_WRITE: + rc = emR3ExecuteInstruction(pVM, pVCpu, "MMIO"); + break; + + /* + * Machine specific register access - emulate the instruction. + */ + case VINF_CPUM_R3_MSR_READ: + case VINF_CPUM_R3_MSR_WRITE: + rc = emR3ExecuteInstruction(pVM, pVCpu, "MSR"); + break; + + /* + * GIM hypercall. + */ + case VINF_GIM_R3_HYPERCALL: + rc = emR3ExecuteInstruction(pVM, pVCpu, "Hypercall"); + break; + +#ifdef EMHANDLERC_WITH_HM + case VINF_EM_HM_PATCH_TPR_INSTR: + rc = HMR3PatchTprInstr(pVM, pVCpu); + break; +#endif + + case VINF_EM_RAW_GUEST_TRAP: + case VINF_EM_RAW_EMULATE_INSTR: + Assert(!TRPMHasTrap(pVCpu)); /* We're directly executing instructions below without respecting any pending traps! */ + rc = emR3ExecuteInstruction(pVM, pVCpu, "EMUL: "); + break; + + case VINF_EM_RAW_INJECT_TRPM_EVENT: + CPUM_IMPORT_EXTRN_RET(pVCpu, IEM_CPUMCTX_EXTRN_XCPT_MASK); + rc = VBOXSTRICTRC_VAL(IEMInjectTrpmEvent(pVCpu)); + /* The following condition should be removed when IEM_IMPLEMENTS_TASKSWITCH becomes true. */ + if (rc == VERR_IEM_ASPECT_NOT_IMPLEMENTED) + rc = emR3ExecuteInstruction(pVM, pVCpu, "EVENT: "); + break; + + + /* + * Up a level. + */ + case VINF_EM_TERMINATE: + case VINF_EM_OFF: + case VINF_EM_RESET: + case VINF_EM_SUSPEND: + case VINF_EM_HALT: + case VINF_EM_RESUME: + case VINF_EM_NO_MEMORY: + case VINF_EM_RESCHEDULE: + case VINF_EM_RESCHEDULE_REM: + case VINF_EM_WAIT_SIPI: + break; + + /* + * Up a level and invoke the debugger. + */ + case VINF_EM_DBG_STEPPED: + case VINF_EM_DBG_BREAKPOINT: + case VINF_EM_DBG_STEP: + case VINF_EM_DBG_HYPER_BREAKPOINT: + case VINF_EM_DBG_HYPER_STEPPED: + case VINF_EM_DBG_HYPER_ASSERTION: + case VINF_EM_DBG_STOP: + case VINF_EM_DBG_EVENT: + break; + + /* + * Up a level, dump and debug. + */ + case VERR_TRPM_DONT_PANIC: + case VERR_TRPM_PANIC: + case VERR_VMM_RING0_ASSERTION: + case VINF_EM_TRIPLE_FAULT: + case VERR_VMM_HYPER_CR3_MISMATCH: + case VERR_VMM_RING3_CALL_DISABLED: + case VERR_IEM_INSTR_NOT_IMPLEMENTED: + case VERR_IEM_ASPECT_NOT_IMPLEMENTED: + case VERR_EM_GUEST_CPU_HANG: + break; + +#ifdef EMHANDLERC_WITH_HM + /* + * Up a level, after Hm have done some release logging. + */ + case VERR_VMX_INVALID_VMCS_FIELD: + case VERR_VMX_INVALID_VMCS_PTR: + case VERR_VMX_INVALID_VMXON_PTR: + case VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_TYPE: + case VERR_VMX_UNEXPECTED_EXCEPTION: + case VERR_VMX_UNEXPECTED_EXIT: + case VERR_VMX_INVALID_GUEST_STATE: + case VERR_VMX_UNABLE_TO_START_VM: + case VERR_SVM_UNKNOWN_EXIT: + case VERR_SVM_UNEXPECTED_EXIT: + case VERR_SVM_UNEXPECTED_PATCH_TYPE: + case VERR_SVM_UNEXPECTED_XCPT_EXIT: + HMR3CheckError(pVM, rc); + break; + + /* Up a level; fatal */ + case VERR_VMX_IN_VMX_ROOT_MODE: + case VERR_SVM_IN_USE: + case VERR_SVM_UNABLE_TO_START_VM: + break; +#endif + + /* + * These two should be handled via the force flag already, but just in + * case they end up here deal with it. + */ + case VINF_IOM_R3_IOPORT_COMMIT_WRITE: + case VINF_IOM_R3_MMIO_COMMIT_WRITE: + AssertFailed(); + rc = VBOXSTRICTRC_TODO(IOMR3ProcessForceFlag(pVM, pVCpu, rc)); + break; + + /* + * Anything which is not known to us means an internal error + * and the termination of the VM! + */ + default: + AssertMsgFailed(("Unknown GC return code: %Rra\n", rc)); + break; + } + return rc; +} + +#endif /* !VMM_INCLUDED_SRC_include_EMHandleRCTmpl_h */ + diff --git a/src/VBox/VMM/include/EMInternal.h b/src/VBox/VMM/include/EMInternal.h new file mode 100644 index 00000000..c4d6d0ac --- /dev/null +++ b/src/VBox/VMM/include/EMInternal.h @@ -0,0 +1,368 @@ +/* $Id: EMInternal.h $ */ +/** @file + * EM - Internal header file. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_EMInternal_h +#define VMM_INCLUDED_SRC_include_EMInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <VBox/cdefs.h> +#include <VBox/types.h> +#include <VBox/vmm/em.h> +#include <VBox/vmm/stam.h> +#include <VBox/dis.h> +#include <VBox/vmm/pdmcritsect.h> +#include <iprt/avl.h> +#include <setjmp.h> + +RT_C_DECLS_BEGIN + + +/** @defgroup grp_em_int Internal + * @ingroup grp_em + * @internal + * @{ + */ + +/** The saved state version. */ +#define EM_SAVED_STATE_VERSION 5 +#define EM_SAVED_STATE_VERSION_PRE_IEM 4 +#define EM_SAVED_STATE_VERSION_PRE_MWAIT 3 +#define EM_SAVED_STATE_VERSION_PRE_SMP 2 + + +/** @name MWait state flags. + * @{ + */ +/** MWait activated. */ +#define EMMWAIT_FLAG_ACTIVE RT_BIT(0) +/** MWait will continue when an interrupt is pending even when IF=0. */ +#define EMMWAIT_FLAG_BREAKIRQIF0 RT_BIT(1) +/** Monitor instruction was executed previously. */ +#define EMMWAIT_FLAG_MONITOR_ACTIVE RT_BIT(2) +/** @} */ + +/** EM time slice in ms; used for capping execution time. */ +#define EM_TIME_SLICE 100 + +/** + * Cli node structure + */ +typedef struct CLISTAT +{ + /** The key is the cli address. */ + AVLGCPTRNODECORE Core; +#if HC_ARCH_BITS == 32 && !defined(RT_OS_WINDOWS) + /** Padding. */ + uint32_t u32Padding; +#endif + /** Occurrences. */ + STAMCOUNTER Counter; +} CLISTAT, *PCLISTAT; +#ifdef IN_RING3 +AssertCompileMemberAlignment(CLISTAT, Counter, 8); +#endif + + +/** + * Excessive (used to be) EM statistics. + */ +typedef struct EMSTATS +{ +#if 1 /* rawmode only? */ + /** @name Privileged Instructions Ending Up In HC. + * @{ */ + STAMCOUNTER StatIoRestarted; + STAMCOUNTER StatIoIem; + STAMCOUNTER StatCli; + STAMCOUNTER StatSti; + STAMCOUNTER StatInvlpg; + STAMCOUNTER StatHlt; + STAMCOUNTER StatMovReadCR[DISCREG_CR4 + 1]; + STAMCOUNTER StatMovWriteCR[DISCREG_CR4 + 1]; + STAMCOUNTER StatMovDRx; + STAMCOUNTER StatIret; + STAMCOUNTER StatMovLgdt; + STAMCOUNTER StatMovLldt; + STAMCOUNTER StatMovLidt; + STAMCOUNTER StatMisc; + STAMCOUNTER StatSysEnter; + STAMCOUNTER StatSysExit; + STAMCOUNTER StatSysCall; + STAMCOUNTER StatSysRet; + /** @} */ +#endif +} EMSTATS; +/** Pointer to the excessive EM statistics. */ +typedef EMSTATS *PEMSTATS; + + +/** + * Exit history entry. + * + * @remarks We could perhaps trim this down a little bit by assuming uFlatPC + * only needs 48 bits (currently true but will change) and stuffing + * the flags+type in the available 16 bits made available. The + * timestamp could likewise be shortened to accomodate the index, or + * we might skip the index entirely. However, since we will have to + * deal with 56-bit wide PC address before long, there's not point. + * + * On the upside, there are unused bits in both uFlagsAndType and the + * idxSlot fields if needed for anything. + */ +typedef struct EMEXITENTRY +{ + /** The flat PC (CS:EIP/RIP) address of the exit. + * UINT64_MAX if not available. */ + uint64_t uFlatPC; + /** The EMEXIT_MAKE_FLAGS_AND_TYPE */ + uint32_t uFlagsAndType; + /** The index into the exit slot hash table. + * UINT32_MAX if too many collisions and not entered into it. */ + uint32_t idxSlot; + /** The TSC timestamp of the exit. + * This is 0 if not timestamped. */ + uint64_t uTimestamp; +} EMEXITENTRY; +/** Pointer to an exit history entry. */ +typedef EMEXITENTRY *PEMEXITENTRY; +/** Pointer to a const exit history entry. */ +typedef EMEXITENTRY const *PCEMEXITENTRY; + + +/** + * EM VM Instance data. + */ +typedef struct EM +{ + /** Whether IEM executes everything. */ + bool fIemExecutesAll; + /** Whether a triple fault triggers a guru. */ + bool fGuruOnTripleFault; + /** Alignment padding. */ + bool afPadding[2]; + + /** Id of the VCPU that last executed code in the recompiler. */ + VMCPUID idLastRemCpu; +} EM; +/** Pointer to EM VM instance data. */ +typedef EM *PEM; + + +/** + * EM VMCPU Instance data. + */ +typedef struct EMCPU +{ + /** Execution Manager State. */ + EMSTATE volatile enmState; + + /** The state prior to the suspending of the VM. */ + EMSTATE enmPrevState; + + /** Set if hypercall instruction VMMCALL (AMD) & VMCALL (Intel) are enabled. + * GIM sets this and the execution managers queries it. Not saved, as GIM + * takes care of that bit too. */ + bool fHypercallEnabled; + + /** Explicit padding. */ + uint8_t abPadding0[3]; + + /** The number of instructions we've executed in IEM since switching to the + * EMSTATE_IEM_THEN_REM state. */ + uint32_t cIemThenRemInstructions; + + /** Inhibit interrupts for this instruction. Valid only when VM_FF_INHIBIT_INTERRUPTS is set. */ + RTGCUINTPTR GCPtrInhibitInterrupts; + + /** Start of the current time slice in ms. */ + uint64_t u64TimeSliceStart; + /** Start of the current time slice in thread execution time (ms). */ + uint64_t u64TimeSliceStartExec; + /** Current time slice value. */ + uint64_t u64TimeSliceExec; + + /** Pending ring-3 I/O port access (VINF_EM_PENDING_R3_IOPORT_READ / VINF_EM_PENDING_R3_IOPORT_WRITE). */ + struct + { + RTIOPORT uPort; /**< The I/O port number.*/ + uint8_t cbValue; /**< The value size in bytes. Zero when not pending. */ + uint8_t cbInstr; /**< The instruction length. */ + uint32_t uValue; /**< The value to write. */ + } PendingIoPortAccess; + + /** MWait halt state. */ + struct + { + uint32_t fWait; /**< Type of mwait; see EMMWAIT_FLAG_*. */ + uint32_t u32Padding; + RTGCPTR uMWaitRAX; /**< MWAIT hints. */ + RTGCPTR uMWaitRCX; /**< MWAIT extensions. */ + RTGCPTR uMonitorRAX; /**< Monitored address. */ + RTGCPTR uMonitorRCX; /**< Monitor extension. */ + RTGCPTR uMonitorRDX; /**< Monitor hint. */ + } MWait; + + /** Make sure the jmp_buf is at a 32-byte boundrary. */ + uint64_t au64Padding1[3]; + union + { + /** Padding used in the other rings. + * This must be larger than jmp_buf on any supported platform. */ + char achPaddingFatalLongJump[256]; +#ifdef IN_RING3 + /** Long buffer jump for fatal VM errors. + * It will jump to before the outer EM loop is entered. */ + jmp_buf FatalLongJump; +#endif + } u; + + /** For saving stack space, the disassembler state is allocated here instead of + * on the stack. */ + DISCPUSTATE DisState; + + /** @name Execution profiling. + * @{ */ + STAMPROFILE StatForcedActions; + STAMPROFILE StatHalted; + STAMPROFILEADV StatCapped; + STAMPROFILEADV StatHMEntry; + STAMPROFILE StatHMExec; + STAMPROFILE StatIEMEmu; + STAMPROFILE StatIEMThenREM; + STAMPROFILEADV StatNEMEntry; + STAMPROFILE StatNEMExec; + STAMPROFILE StatREMEmu; + STAMPROFILE StatREMExec; + STAMPROFILE StatREMSync; + STAMPROFILEADV StatREMTotal; + STAMPROFILE StatRAWExec; + STAMPROFILEADV StatRAWEntry; + STAMPROFILEADV StatRAWTail; + STAMPROFILEADV StatRAWTotal; + STAMPROFILEADV StatTotal; + /** @} */ + + /** R3: Profiling of emR3RawExecuteIOInstruction. */ + STAMPROFILE StatIOEmu; + /** R3: Profiling of emR3RawPrivileged. */ + STAMPROFILE StatPrivEmu; + /** R3: Number of times emR3HmExecute is called. */ + STAMCOUNTER StatHMExecuteCalled; + /** R3: Number of times emR3NEMExecute is called. */ + STAMCOUNTER StatNEMExecuteCalled; + + /** More statistics (R3). */ + R3PTRTYPE(PEMSTATS) pStatsR3; + /** More statistics (R0). */ + R0PTRTYPE(PEMSTATS) pStatsR0; + + /** Tree for keeping track of cli occurrences (debug only). */ + R3PTRTYPE(PAVLGCPTRNODECORE) pCliStatTree; + STAMCOUNTER StatTotalClis; + /** Align the next member at a 16-byte boundrary. */ + uint64_t au64Padding2[1]; + + /** Exit history table (6KB). */ + EMEXITENTRY aExitHistory[256]; + /** Where to store the next exit history entry. + * Since aExitHistory is 256 items longs, we'll just increment this and + * mask it when using it. That help the readers detect whether we've + * wrapped around or not. */ + uint64_t iNextExit; + + /** Index into aExitRecords set by EMHistoryExec when returning to ring-3. + * This is UINT16_MAX if not armed. */ + uint16_t volatile idxContinueExitRec; + /** Whether exit optimizations are enabled or not (in general). */ + bool fExitOptimizationEnabled : 1; + /** Whether exit optimizations are enabled for ring-0 (in general). */ + bool fExitOptimizationEnabledR0 : 1; + /** Whether exit optimizations are enabled for ring-0 when preemption is disabled. */ + bool fExitOptimizationEnabledR0PreemptDisabled : 1; + /** Explicit padding. */ + bool fPadding2; + /** Max number of instructions to execute. */ + uint16_t cHistoryExecMaxInstructions; + /** Min number of instructions to execute while probing. */ + uint16_t cHistoryProbeMinInstructions; + /** Max number of instructions to execute without an exit before giving up probe. */ + uint16_t cHistoryProbeMaxInstructionsWithoutExit; + uint16_t uPadding3; + /** Number of exit records in use. */ + uint32_t cExitRecordUsed; + /** Profiling the EMHistoryExec when executing (not probing). */ + STAMPROFILE StatHistoryExec; + /** Number of saved exits. */ + STAMCOUNTER StatHistoryExecSavedExits; + /** Number of instructions executed by EMHistoryExec. */ + STAMCOUNTER StatHistoryExecInstructions; + uint64_t uPadding4; + /** Number of instructions executed by EMHistoryExec when probing. */ + STAMCOUNTER StatHistoryProbeInstructions; + /** Number of times probing resulted in EMEXITACTION_NORMAL_PROBED. */ + STAMCOUNTER StatHistoryProbedNormal; + /** Number of times probing resulted in EMEXITACTION_EXEC_WITH_MAX. */ + STAMCOUNTER StatHistoryProbedExecWithMax; + /** Number of times probing resulted in ring-3 continuation. */ + STAMCOUNTER StatHistoryProbedToRing3; + /** Profiling the EMHistoryExec when probing.*/ + STAMPROFILE StatHistoryProbe; + /** Hit statistics for each lookup step. */ + STAMCOUNTER aStatHistoryRecHits[16]; + /** Type change statistics for each lookup step. */ + STAMCOUNTER aStatHistoryRecTypeChanged[16]; + /** Replacement statistics for each lookup step. */ + STAMCOUNTER aStatHistoryRecReplaced[16]; + /** New record statistics for each lookup step. */ + STAMCOUNTER aStatHistoryRecNew[16]; + + /** Exit records (32KB). (Aligned on 32 byte boundrary.) */ + EMEXITREC aExitRecords[1024]; +} EMCPU; +/** Pointer to EM VM instance data. */ +typedef EMCPU *PEMCPU; + +/** @} */ + +int emR3InitDbg(PVM pVM); + +int emR3HmExecute(PVM pVM, PVMCPU pVCpu, bool *pfFFDone); +VBOXSTRICTRC emR3NemExecute(PVM pVM, PVMCPU pVCpu, bool *pfFFDone); +int emR3RawExecute(PVM pVM, PVMCPU pVCpu, bool *pfFFDone); + +EMSTATE emR3Reschedule(PVM pVM, PVMCPU pVCpu); +int emR3ForcedActions(PVM pVM, PVMCPU pVCpu, int rc); +VBOXSTRICTRC emR3HighPriorityPostForcedActions(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc); + +int emR3RawResumeHyper(PVM pVM, PVMCPU pVCpu); +int emR3RawStep(PVM pVM, PVMCPU pVCpu); + +VBOXSTRICTRC emR3NemSingleInstruction(PVM pVM, PVMCPU pVCpu, uint32_t fFlags); + +int emR3SingleStepExecRem(PVM pVM, PVMCPU pVCpu, uint32_t cIterations); + +bool emR3IsExecutionAllowed(PVM pVM, PVMCPU pVCpu); + +VBOXSTRICTRC emR3ExecutePendingIoPortWrite(PVM pVM, PVMCPU pVCpu); +VBOXSTRICTRC emR3ExecutePendingIoPortRead(PVM pVM, PVMCPU pVCpu); + +RT_C_DECLS_END + +#endif /* !VMM_INCLUDED_SRC_include_EMInternal_h */ + diff --git a/src/VBox/VMM/include/GIMHvInternal.h b/src/VBox/VMM/include/GIMHvInternal.h new file mode 100644 index 00000000..9dd86e54 --- /dev/null +++ b/src/VBox/VMM/include/GIMHvInternal.h @@ -0,0 +1,1375 @@ +/* $Id: GIMHvInternal.h $ */ +/** @file + * GIM - Hyper-V, Internal header file. + */ + +/* + * Copyright (C) 2014-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_GIMHvInternal_h +#define VMM_INCLUDED_SRC_include_GIMHvInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <VBox/vmm/gim.h> +#include <VBox/vmm/cpum.h> + +#include <iprt/net.h> + +/** @name Hyper-V base feature identification. + * Features based on current partition privileges (per-VM). + * @{ + */ +/** Virtual processor runtime MSR available. */ +#define GIM_HV_BASE_FEAT_VP_RUNTIME_MSR RT_BIT(0) +/** Partition reference counter MSR available. */ +#define GIM_HV_BASE_FEAT_PART_TIME_REF_COUNT_MSR RT_BIT(1) +/** Basic Synthetic Interrupt Controller MSRs available. */ +#define GIM_HV_BASE_FEAT_BASIC_SYNIC_MSRS RT_BIT(2) +/** Synthetic Timer MSRs available. */ +#define GIM_HV_BASE_FEAT_STIMER_MSRS RT_BIT(3) +/** APIC access MSRs (EOI, ICR, TPR) available. */ +#define GIM_HV_BASE_FEAT_APIC_ACCESS_MSRS RT_BIT(4) +/** Hypercall MSRs available. */ +#define GIM_HV_BASE_FEAT_HYPERCALL_MSRS RT_BIT(5) +/** Access to VCPU index MSR available. */ +#define GIM_HV_BASE_FEAT_VP_ID_MSR RT_BIT(6) +/** Virtual system reset MSR available. */ +#define GIM_HV_BASE_FEAT_VIRT_SYS_RESET_MSR RT_BIT(7) +/** Statistic pages MSRs available. */ +#define GIM_HV_BASE_FEAT_STAT_PAGES_MSR RT_BIT(8) +/** Paritition reference TSC MSR available. */ +#define GIM_HV_BASE_FEAT_PART_REF_TSC_MSR RT_BIT(9) +/** Virtual guest idle state MSR available. */ +#define GIM_HV_BASE_FEAT_GUEST_IDLE_STATE_MSR RT_BIT(10) +/** Timer frequency MSRs (TSC and APIC) available. */ +#define GIM_HV_BASE_FEAT_TIMER_FREQ_MSRS RT_BIT(11) +/** Debug MSRs available. */ +#define GIM_HV_BASE_FEAT_DEBUG_MSRS RT_BIT(12) +/** @} */ + +/** @name Hyper-V partition-creation feature identification. + * Indicates flags specified during partition creation. + * @{ + */ +/** Create partitions. */ +#define GIM_HV_PART_FLAGS_CREATE_PART RT_BIT(0) +/** Access partition Id. */ +#define GIM_HV_PART_FLAGS_ACCESS_PART_ID RT_BIT(1) +/** Access memory pool. */ +#define GIM_HV_PART_FLAGS_ACCESS_MEMORY_POOL RT_BIT(2) +/** Adjust message buffers. */ +#define GIM_HV_PART_FLAGS_ADJUST_MSG_BUFFERS RT_BIT(3) +/** Post messages. */ +#define GIM_HV_PART_FLAGS_POST_MSGS RT_BIT(4) +/** Signal events. */ +#define GIM_HV_PART_FLAGS_SIGNAL_EVENTS RT_BIT(5) +/** Create port. */ +#define GIM_HV_PART_FLAGS_CREATE_PORT RT_BIT(6) +/** Connect port. */ +#define GIM_HV_PART_FLAGS_CONNECT_PORT RT_BIT(7) +/** Access statistics. */ +#define GIM_HV_PART_FLAGS_ACCESS_STATS RT_BIT(8) +/** Debugging.*/ +#define GIM_HV_PART_FLAGS_DEBUGGING RT_BIT(11) +/** CPU management. */ +#define GIM_HV_PART_FLAGS_CPU_MGMT RT_BIT(12) +/** CPU profiler. */ +#define GIM_HV_PART_FLAGS_CPU_PROFILER RT_BIT(13) +/** Enable expanded stack walking. */ +#define GIM_HV_PART_FLAGS_EXPANDED_STACK_WALK RT_BIT(14) +/** Access VSM. */ +#define GIM_HV_PART_FLAGS_ACCESS_VSM RT_BIT(16) +/** Access VP registers. */ +#define GIM_HV_PART_FLAGS_ACCESS_VP_REGS RT_BIT(17) +/** Enable extended hypercalls. */ +#define GIM_HV_PART_FLAGS_EXTENDED_HYPERCALLS RT_BIT(20) +/** Start virtual processor. */ +#define GIM_HV_PART_FLAGS_START_VP RT_BIT(21) +/** @} */ + +/** @name Hyper-V power management feature identification. + * @{ + */ +/** Maximum CPU power state C0. */ +#define GIM_HV_PM_MAX_CPU_POWER_STATE_C0 RT_BIT(0) +/** Maximum CPU power state C1. */ +#define GIM_HV_PM_MAX_CPU_POWER_STATE_C1 RT_BIT(1) +/** Maximum CPU power state C2. */ +#define GIM_HV_PM_MAX_CPU_POWER_STATE_C2 RT_BIT(2) +/** Maximum CPU power state C3. */ +#define GIM_HV_PM_MAX_CPU_POWER_STATE_C3 RT_BIT(3) +/** HPET is required to enter C3 power state. */ +#define GIM_HV_PM_HPET_REQD_FOR_C3 RT_BIT(4) +/** @} */ + +/** @name Hyper-V miscellaneous feature identification. + * Miscellaneous features available for the current partition. + * @{ + */ +/** MWAIT instruction available. */ +#define GIM_HV_MISC_FEAT_MWAIT RT_BIT(0) +/** Guest debugging support available. */ +#define GIM_HV_MISC_FEAT_GUEST_DEBUGGING RT_BIT(1) +/** Performance monitor support is available. */ +#define GIM_HV_MISC_FEAT_PERF_MON RT_BIT(2) +/** Support for physical CPU dynamic partitioning events. */ +#define GIM_HV_MISC_FEAT_PCPU_DYN_PART_EVENT RT_BIT(3) +/** Support for passing hypercall input parameter block via XMM registers. */ +#define GIM_HV_MISC_FEAT_XMM_HYPERCALL_INPUT RT_BIT(4) +/** Support for virtual guest idle state. */ +#define GIM_HV_MISC_FEAT_GUEST_IDLE_STATE RT_BIT(5) +/** Support for hypervisor sleep state. */ +#define GIM_HV_MISC_FEAT_HYPERVISOR_SLEEP_STATE RT_BIT(6) +/** Support for querying NUMA distances. */ +#define GIM_HV_MISC_FEAT_QUERY_NUMA_DISTANCE RT_BIT(7) +/** Support for determining timer frequencies. */ +#define GIM_HV_MISC_FEAT_TIMER_FREQ RT_BIT(8) +/** Support for injecting synthetic machine checks. */ +#define GIM_HV_MISC_FEAT_INJECT_SYNMC_XCPT RT_BIT(9) +/** Support for guest crash MSRs. */ +#define GIM_HV_MISC_FEAT_GUEST_CRASH_MSRS RT_BIT(10) +/** Support for debug MSRs. */ +#define GIM_HV_MISC_FEAT_DEBUG_MSRS RT_BIT(11) +/** Npiep1 Available */ /** @todo What the heck is this? */ +#define GIM_HV_MISC_FEAT_NPIEP1 RT_BIT(12) +/** Disable hypervisor available. */ +#define GIM_HV_MISC_FEAT_DISABLE_HYPERVISOR RT_BIT(13) +/** Extended GVA ranges for FlushVirtualAddressList available. */ +#define GIM_HV_MISC_FEAT_EXT_GVA_RANGE_FOR_FLUSH_VA_LIST RT_BIT(14) +/** Support for returning hypercall output via XMM registers. */ +#define GIM_HV_MISC_FEAT_HYPERCALL_OUTPUT_XMM RT_BIT(15) +/** Synthetic interrupt source polling mode available. */ +#define GIM_HV_MISC_FEAT_SINT_POLLING_MODE RT_BIT(17) +/** Hypercall MSR lock available. */ +#define GIM_HV_MISC_FEAT_HYPERCALL_MSR_LOCK RT_BIT(18) +/** Use direct synthetic MSRs. */ +#define GIM_HV_MISC_FEAT_USE_DIRECT_SYNTH_MSRS RT_BIT(19) +/** @} */ + +/** @name Hyper-V implementation recommendations. + * Recommendations from the hypervisor for the guest for optimal performance. + * @{ + */ +/** Use hypercall for address space switches rather than MOV CR3. */ +#define GIM_HV_HINT_HYPERCALL_FOR_PROCESS_SWITCH RT_BIT(0) +/** Use hypercall for local TLB flushes rather than INVLPG/MOV CR3. */ +#define GIM_HV_HINT_HYPERCALL_FOR_TLB_FLUSH RT_BIT(1) +/** Use hypercall for inter-CPU TLB flushes rather than IPIs. */ +#define GIM_HV_HINT_HYPERCALL_FOR_TLB_SHOOTDOWN RT_BIT(2) +/** Use MSRs for APIC access (EOI, ICR, TPR) rather than MMIO. */ +#define GIM_HV_HINT_MSR_FOR_APIC_ACCESS RT_BIT(3) +/** Use hypervisor provided MSR for a system reset. */ +#define GIM_HV_HINT_MSR_FOR_SYS_RESET RT_BIT(4) +/** Relax timer-related checks (watchdogs/deadman timeouts) that rely on + * timely deliver of external interrupts. */ +#define GIM_HV_HINT_RELAX_TIME_CHECKS RT_BIT(5) +/** Recommend using DMA remapping. */ +#define GIM_HV_HINT_DMA_REMAPPING RT_BIT(6) +/** Recommend using interrupt remapping. */ +#define GIM_HV_HINT_INTERRUPT_REMAPPING RT_BIT(7) +/** Recommend using X2APIC MSRs rather than MMIO. */ +#define GIM_HV_HINT_X2APIC_MSRS RT_BIT(8) +/** Recommend deprecating Auto EOI (end of interrupt). */ +#define GIM_HV_HINT_DEPRECATE_AUTO_EOI RT_BIT(9) +/** Recommend using SyntheticClusterIpi hypercall. */ +#define GIM_HV_HINT_SYNTH_CLUSTER_IPI_HYPERCALL RT_BIT(10) +/** Recommend using newer ExProcessMasks interface. */ +#define GIM_HV_HINT_EX_PROC_MASKS_INTERFACE RT_BIT(11) +/** Indicate that Hyper-V is nested within a Hyper-V partition. */ +#define GIM_HV_HINT_NESTED_HYPERV RT_BIT(12) +/** Recommend using INT for MBEC system calls. */ +#define GIM_HV_HINT_INT_FOR_MBEC_SYSCALLS RT_BIT(13) +/** Recommend using enlightened VMCS interfacea and nested enlightenments. */ +#define GIM_HV_HINT_NESTED_ENLIGHTENED_VMCS_INTERFACE RT_BIT(14) +/** @} */ + + +/** @name Hyper-V implementation hardware features. + * Which hardware features are in use by the hypervisor. + * @{ + */ +/** APIC overlay is used. */ +#define GIM_HV_HOST_FEAT_AVIC RT_BIT(0) +/** MSR bitmaps is used. */ +#define GIM_HV_HOST_FEAT_MSR_BITMAP RT_BIT(1) +/** Architectural performance counter supported. */ +#define GIM_HV_HOST_FEAT_PERF_COUNTER RT_BIT(2) +/** Nested paging is used. */ +#define GIM_HV_HOST_FEAT_NESTED_PAGING RT_BIT(3) +/** DMA remapping is used. */ +#define GIM_HV_HOST_FEAT_DMA_REMAPPING RT_BIT(4) +/** Interrupt remapping is used. */ +#define GIM_HV_HOST_FEAT_INTERRUPT_REMAPPING RT_BIT(5) +/** Memory patrol scrubber is present. */ +#define GIM_HV_HOST_FEAT_MEM_PATROL_SCRUBBER RT_BIT(6) +/** DMA protection is in use. */ +#define GIM_HV_HOST_FEAT_DMA_PROT_IN_USE RT_BIT(7) +/** HPET is requested. */ +#define GIM_HV_HOST_FEAT_HPET_REQUESTED RT_BIT(8) +/** Synthetic timers are volatile. */ +#define GIM_HV_HOST_FEAT_STIMER_VOLATILE RT_BIT(9) +/** @} */ + + +/** @name Hyper-V MSRs. + * @{ + */ +/** Start of range 0. */ +#define MSR_GIM_HV_RANGE0_FIRST UINT32_C(0x40000000) +/** Guest OS identification (R/W) */ +#define MSR_GIM_HV_GUEST_OS_ID UINT32_C(0x40000000) +/** Enable hypercall interface (R/W) */ +#define MSR_GIM_HV_HYPERCALL UINT32_C(0x40000001) +/** Virtual processor's (VCPU) index (R) */ +#define MSR_GIM_HV_VP_INDEX UINT32_C(0x40000002) +/** Reset operation (R/W) */ +#define MSR_GIM_HV_RESET UINT32_C(0x40000003) +/** End of range 0. */ +#define MSR_GIM_HV_RANGE0_LAST MSR_GIM_HV_RESET + +/** Start of range 1. */ +#define MSR_GIM_HV_RANGE1_FIRST UINT32_C(0x40000010) +/** Virtual processor's (VCPU) runtime (R) */ +#define MSR_GIM_HV_VP_RUNTIME UINT32_C(0x40000010) +/** End of range 1. */ +#define MSR_GIM_HV_RANGE1_LAST MSR_GIM_HV_VP_RUNTIME + +/** Start of range 2. */ +#define MSR_GIM_HV_RANGE2_FIRST UINT32_C(0x40000020) +/** Per-VM reference counter (R) */ +#define MSR_GIM_HV_TIME_REF_COUNT UINT32_C(0x40000020) +/** Per-VM TSC page (R/W) */ +#define MSR_GIM_HV_REF_TSC UINT32_C(0x40000021) +/** Frequency of TSC in Hz as reported by the hypervisor (R) */ +#define MSR_GIM_HV_TSC_FREQ UINT32_C(0x40000022) +/** Frequency of LAPIC in Hz as reported by the hypervisor (R) */ +#define MSR_GIM_HV_APIC_FREQ UINT32_C(0x40000023) +/** End of range 2. */ +#define MSR_GIM_HV_RANGE2_LAST MSR_GIM_HV_APIC_FREQ + +/** Start of range 3. */ +#define MSR_GIM_HV_RANGE3_FIRST UINT32_C(0x40000070) +/** Access to APIC EOI (End-Of-Interrupt) register (W) */ +#define MSR_GIM_HV_EOI UINT32_C(0x40000070) +/** Access to APIC ICR (Interrupt Command) register (R/W) */ +#define MSR_GIM_HV_ICR UINT32_C(0x40000071) +/** Access to APIC TPR (Task Priority) register (R/W) */ +#define MSR_GIM_HV_TPR UINT32_C(0x40000072) +/** Enables lazy EOI processing (R/W) */ +#define MSR_GIM_HV_APIC_ASSIST_PAGE UINT32_C(0x40000073) +/** End of range 3. */ +#define MSR_GIM_HV_RANGE3_LAST MSR_GIM_HV_APIC_ASSIST_PAGE + +/** Start of range 4. */ +#define MSR_GIM_HV_RANGE4_FIRST UINT32_C(0x40000080) +/** Control behaviour of synthetic interrupt controller (R/W) */ +#define MSR_GIM_HV_SCONTROL UINT32_C(0x40000080) +/** Synthetic interrupt controller version (R) */ +#define MSR_GIM_HV_SVERSION UINT32_C(0x40000081) +/** Base address of synthetic interrupt event flag (R/W) */ +#define MSR_GIM_HV_SIEFP UINT32_C(0x40000082) +/** Base address of synthetic interrupt message page (R/W) */ +#define MSR_GIM_HV_SIMP UINT32_C(0x40000083) +/** End-Of-Message in synthetic interrupt parameter page (W) */ +#define MSR_GIM_HV_EOM UINT32_C(0x40000084) +/** End of range 4. */ +#define MSR_GIM_HV_RANGE4_LAST MSR_GIM_HV_EOM + +/** Start of range 5. */ +#define MSR_GIM_HV_RANGE5_FIRST UINT32_C(0x40000090) +/** Configures synthetic interrupt source 0 (R/W) */ +#define MSR_GIM_HV_SINT0 UINT32_C(0x40000090) +/** Configures synthetic interrupt source 1 (R/W) */ +#define MSR_GIM_HV_SINT1 UINT32_C(0x40000091) +/** Configures synthetic interrupt source 2 (R/W) */ +#define MSR_GIM_HV_SINT2 UINT32_C(0x40000092) +/** Configures synthetic interrupt source 3 (R/W) */ +#define MSR_GIM_HV_SINT3 UINT32_C(0x40000093) +/** Configures synthetic interrupt source 4 (R/W) */ +#define MSR_GIM_HV_SINT4 UINT32_C(0x40000094) +/** Configures synthetic interrupt source 5 (R/W) */ +#define MSR_GIM_HV_SINT5 UINT32_C(0x40000095) +/** Configures synthetic interrupt source 6 (R/W) */ +#define MSR_GIM_HV_SINT6 UINT32_C(0x40000096) +/** Configures synthetic interrupt source 7 (R/W) */ +#define MSR_GIM_HV_SINT7 UINT32_C(0x40000097) +/** Configures synthetic interrupt source 8 (R/W) */ +#define MSR_GIM_HV_SINT8 UINT32_C(0x40000098) +/** Configures synthetic interrupt source 9 (R/W) */ +#define MSR_GIM_HV_SINT9 UINT32_C(0x40000099) +/** Configures synthetic interrupt source 10 (R/W) */ +#define MSR_GIM_HV_SINT10 UINT32_C(0x4000009A) +/** Configures synthetic interrupt source 11 (R/W) */ +#define MSR_GIM_HV_SINT11 UINT32_C(0x4000009B) +/** Configures synthetic interrupt source 12 (R/W) */ +#define MSR_GIM_HV_SINT12 UINT32_C(0x4000009C) +/** Configures synthetic interrupt source 13 (R/W) */ +#define MSR_GIM_HV_SINT13 UINT32_C(0x4000009D) +/** Configures synthetic interrupt source 14 (R/W) */ +#define MSR_GIM_HV_SINT14 UINT32_C(0x4000009E) +/** Configures synthetic interrupt source 15 (R/W) */ +#define MSR_GIM_HV_SINT15 UINT32_C(0x4000009F) +/** End of range 5. */ +#define MSR_GIM_HV_RANGE5_LAST MSR_GIM_HV_SINT15 + +/** Start of range 6. */ +#define MSR_GIM_HV_RANGE6_FIRST UINT32_C(0x400000B0) +/** Configures register for synthetic timer 0 (R/W) */ +#define MSR_GIM_HV_STIMER0_CONFIG UINT32_C(0x400000B0) +/** Expiration time or period for synthetic timer 0 (R/W) */ +#define MSR_GIM_HV_STIMER0_COUNT UINT32_C(0x400000B1) +/** Configures register for synthetic timer 1 (R/W) */ +#define MSR_GIM_HV_STIMER1_CONFIG UINT32_C(0x400000B2) +/** Expiration time or period for synthetic timer 1 (R/W) */ +#define MSR_GIM_HV_STIMER1_COUNT UINT32_C(0x400000B3) +/** Configures register for synthetic timer 2 (R/W) */ +#define MSR_GIM_HV_STIMER2_CONFIG UINT32_C(0x400000B4) +/** Expiration time or period for synthetic timer 2 (R/W) */ +#define MSR_GIM_HV_STIMER2_COUNT UINT32_C(0x400000B5) +/** Configures register for synthetic timer 3 (R/W) */ +#define MSR_GIM_HV_STIMER3_CONFIG UINT32_C(0x400000B6) +/** Expiration time or period for synthetic timer 3 (R/W) */ +#define MSR_GIM_HV_STIMER3_COUNT UINT32_C(0x400000B7) +/** End of range 6. */ +#define MSR_GIM_HV_RANGE6_LAST MSR_GIM_HV_STIMER3_COUNT + +/** Start of range 7. */ +#define MSR_GIM_HV_RANGE7_FIRST UINT32_C(0x400000C1) +/** Trigger to transition to power state C1 (R) */ +#define MSR_GIM_HV_POWER_STATE_TRIGGER_C1 UINT32_C(0x400000C1) +/** Trigger to transition to power state C2 (R) */ +#define MSR_GIM_HV_POWER_STATE_TRIGGER_C2 UINT32_C(0x400000C2) +/** Trigger to transition to power state C3 (R) */ +#define MSR_GIM_HV_POWER_STATE_TRIGGER_C3 UINT32_C(0x400000C3) +/** End of range 7. */ +#define MSR_GIM_HV_RANGE7_LAST MSR_GIM_HV_POWER_STATE_TRIGGER_C3 + +/** Start of range 8. */ +#define MSR_GIM_HV_RANGE8_FIRST UINT32_C(0x400000D1) +/** Configure the recipe for power state transitions to C1 (R/W) */ +#define MSR_GIM_HV_POWER_STATE_CONFIG_C1 UINT32_C(0x400000D1) +/** Configure the recipe for power state transitions to C2 (R/W) */ +#define MSR_GIM_HV_POWER_STATE_CONFIG_C2 UINT32_C(0x400000D2) +/** Configure the recipe for power state transitions to C3 (R/W) */ +#define MSR_GIM_HV_POWER_STATE_CONFIG_C3 UINT32_C(0x400000D3) +/** End of range 8. */ +#define MSR_GIM_HV_RANGE8_LAST MSR_GIM_HV_POWER_STATE_CONFIG_C3 + +/** Start of range 9. */ +#define MSR_GIM_HV_RANGE9_FIRST UINT32_C(0x400000E0) +/** Map the guest's retail partition stats page (R/W) */ +#define MSR_GIM_HV_STATS_PART_RETAIL_PAGE UINT32_C(0x400000E0) +/** Map the guest's internal partition stats page (R/W) */ +#define MSR_GIM_HV_STATS_PART_INTERNAL_PAGE UINT32_C(0x400000E1) +/** Map the guest's retail VP stats page (R/W) */ +#define MSR_GIM_HV_STATS_VP_RETAIL_PAGE UINT32_C(0x400000E2) +/** Map the guest's internal VP stats page (R/W) */ +#define MSR_GIM_HV_STATS_VP_INTERNAL_PAGE UINT32_C(0x400000E3) +/** End of range 9. */ +#define MSR_GIM_HV_RANGE9_LAST MSR_GIM_HV_STATS_VP_INTERNAL_PAGE + +/** Start of range 10. */ +#define MSR_GIM_HV_RANGE10_FIRST UINT32_C(0x400000F0) +/** Trigger the guest's transition to idle power state (R) */ +#define MSR_GIM_HV_GUEST_IDLE UINT32_C(0x400000F0) +/** Synthetic debug control. */ +#define MSR_GIM_HV_SYNTH_DEBUG_CONTROL UINT32_C(0x400000F1) +/** Synthetic debug status. */ +#define MSR_GIM_HV_SYNTH_DEBUG_STATUS UINT32_C(0x400000F2) +/** Synthetic debug send buffer. */ +#define MSR_GIM_HV_SYNTH_DEBUG_SEND_BUFFER UINT32_C(0x400000F3) +/** Synthetic debug receive buffer. */ +#define MSR_GIM_HV_SYNTH_DEBUG_RECEIVE_BUFFER UINT32_C(0x400000F4) +/** Synthetic debug pending buffer. */ +#define MSR_GIM_HV_SYNTH_DEBUG_PENDING_BUFFER UINT32_C(0x400000F5) +/** End of range 10. */ +#define MSR_GIM_HV_RANGE10_LAST MSR_GIM_HV_SYNTH_DEBUG_PENDING_BUFFER + +/** Start of range 11. */ +#define MSR_GIM_HV_RANGE11_FIRST UINT32_C(0x400000FF) +/** Undocumented debug options MSR. */ +#define MSR_GIM_HV_DEBUG_OPTIONS_MSR UINT32_C(0x400000FF) +/** End of range 11. */ +#define MSR_GIM_HV_RANGE11_LAST MSR_GIM_HV_DEBUG_OPTIONS_MSR + +/** Start of range 12. */ +#define MSR_GIM_HV_RANGE12_FIRST UINT32_C(0x40000100) +/** Guest crash MSR 0. */ +#define MSR_GIM_HV_CRASH_P0 UINT32_C(0x40000100) +/** Guest crash MSR 1. */ +#define MSR_GIM_HV_CRASH_P1 UINT32_C(0x40000101) +/** Guest crash MSR 2. */ +#define MSR_GIM_HV_CRASH_P2 UINT32_C(0x40000102) +/** Guest crash MSR 3. */ +#define MSR_GIM_HV_CRASH_P3 UINT32_C(0x40000103) +/** Guest crash MSR 4. */ +#define MSR_GIM_HV_CRASH_P4 UINT32_C(0x40000104) +/** Guest crash control. */ +#define MSR_GIM_HV_CRASH_CTL UINT32_C(0x40000105) +/** End of range 12. */ +#define MSR_GIM_HV_RANGE12_LAST MSR_GIM_HV_CRASH_CTL +/** @} */ + +AssertCompile(MSR_GIM_HV_RANGE0_FIRST <= MSR_GIM_HV_RANGE0_LAST); +AssertCompile(MSR_GIM_HV_RANGE1_FIRST <= MSR_GIM_HV_RANGE1_LAST); +AssertCompile(MSR_GIM_HV_RANGE2_FIRST <= MSR_GIM_HV_RANGE2_LAST); +AssertCompile(MSR_GIM_HV_RANGE3_FIRST <= MSR_GIM_HV_RANGE3_LAST); +AssertCompile(MSR_GIM_HV_RANGE4_FIRST <= MSR_GIM_HV_RANGE4_LAST); +AssertCompile(MSR_GIM_HV_RANGE5_FIRST <= MSR_GIM_HV_RANGE5_LAST); +AssertCompile(MSR_GIM_HV_RANGE6_FIRST <= MSR_GIM_HV_RANGE6_LAST); +AssertCompile(MSR_GIM_HV_RANGE7_FIRST <= MSR_GIM_HV_RANGE7_LAST); +AssertCompile(MSR_GIM_HV_RANGE8_FIRST <= MSR_GIM_HV_RANGE8_LAST); +AssertCompile(MSR_GIM_HV_RANGE9_FIRST <= MSR_GIM_HV_RANGE9_LAST); +AssertCompile(MSR_GIM_HV_RANGE10_FIRST <= MSR_GIM_HV_RANGE10_LAST); +AssertCompile(MSR_GIM_HV_RANGE11_FIRST <= MSR_GIM_HV_RANGE11_LAST); + +/** @name Hyper-V MSR - Reset (MSR_GIM_HV_RESET). + * @{ + */ +/** The reset enable mask. */ +#define MSR_GIM_HV_RESET_ENABLE RT_BIT_64(0) +/** Whether the reset MSR is enabled. */ +#define MSR_GIM_HV_RESET_IS_ENABLED(a) RT_BOOL((a) & MSR_GIM_HV_RESET_ENABLE) +/** @} */ + +/** @name Hyper-V MSR - Hypercall (MSR_GIM_HV_HYPERCALL). + * @{ + */ +/** Guest-physical page frame number of the hypercall-page. */ +#define MSR_GIM_HV_HYPERCALL_GUEST_PFN(a) ((a) >> 12) +/** The hypercall enable mask. */ +#define MSR_GIM_HV_HYPERCALL_PAGE_ENABLE RT_BIT_64(0) +/** Whether the hypercall-page is enabled or not. */ +#define MSR_GIM_HV_HYPERCALL_PAGE_IS_ENABLED(a) RT_BOOL((a) & MSR_GIM_HV_HYPERCALL_PAGE_ENABLE) +/** @} */ + +/** @name Hyper-V MSR - Reference TSC (MSR_GIM_HV_REF_TSC). + * @{ + */ +/** Guest-physical page frame number of the TSC-page. */ +#define MSR_GIM_HV_REF_TSC_GUEST_PFN(a) ((a) >> 12) +/** The TSC-page enable mask. */ +#define MSR_GIM_HV_REF_TSC_ENABLE RT_BIT_64(0) +/** Whether the TSC-page is enabled or not. */ +#define MSR_GIM_HV_REF_TSC_IS_ENABLED(a) RT_BOOL((a) & MSR_GIM_HV_REF_TSC_ENABLE) +/** @} */ + +/** @name Hyper-V MSR - Guest crash control (MSR_GIM_HV_CRASH_CTL). + * @{ + */ +/** The Crash Control notify mask. */ +#define MSR_GIM_HV_CRASH_CTL_NOTIFY RT_BIT_64(63) +/** @} */ + +/** @name Hyper-V MSR - Guest OS ID (MSR_GIM_HV_GUEST_OS_ID). + * @{ + */ +/** An open-source operating system. */ +#define MSR_GIM_HV_GUEST_OS_ID_IS_OPENSOURCE(a) RT_BOOL((a) & RT_BIT_64(63)) +/** Vendor ID. */ +#define MSR_GIM_HV_GUEST_OS_ID_VENDOR(a) (uint32_t)(((a) >> 48) & 0xfff) +/** Guest OS variant, depending on the vendor ID. */ +#define MSR_GIM_HV_GUEST_OS_ID_OS_VARIANT(a) (uint32_t)(((a) >> 40) & 0xff) +/** Guest OS major version. */ +#define MSR_GIM_HV_GUEST_OS_ID_MAJOR_VERSION(a) (uint32_t)(((a) >> 32) & 0xff) +/** Guest OS minor version. */ +#define MSR_GIM_HV_GUEST_OS_ID_MINOR_VERSION(a) (uint32_t)(((a) >> 24) & 0xff) +/** Guest OS service version (e.g. service pack number in case of Windows). */ +#define MSR_GIM_HV_GUEST_OS_ID_SERVICE_VERSION(a) (uint32_t)(((a) >> 16) & 0xff) +/** Guest OS build number. */ +#define MSR_GIM_HV_GUEST_OS_ID_BUILD(a) (uint32_t)((a) & 0xffff) +/** @} */ + +/** @name Hyper-V MSR - APIC-assist page (MSR_GIM_HV_APIC_ASSIST_PAGE). + * @{ + */ +/** Guest-physical page frame number of the APIC-assist page. */ +#define MSR_GIM_HV_APICASSIST_GUEST_PFN(a) ((a) >> 12) +/** The APIC-assist page enable mask. */ +#define MSR_GIM_HV_APICASSIST_PAGE_ENABLE RT_BIT_64(0) +/** Whether the APIC-assist page is enabled or not. */ +#define MSR_GIM_HV_APICASSIST_PAGE_IS_ENABLED(a) RT_BOOL((a) & MSR_GIM_HV_APICASSIST_PAGE_ENABLE) +/** @} */ + +/** @name Hyper-V MSR - Synthetic Interrupt Event Flags page + * (MSR_GIM_HV_SIEFP). + * @{ + */ +/** Guest-physical page frame number of the APIC-assist page. */ +#define MSR_GIM_HV_SIEF_GUEST_PFN(a) ((a) >> 12) +/** The SIEF enable mask. */ +#define MSR_GIM_HV_SIEF_PAGE_ENABLE RT_BIT_64(0) +/** Whether the SIEF page is enabled or not. */ +#define MSR_GIM_HV_SIEF_PAGE_IS_ENABLED(a) RT_BOOL((a) & MSR_GIM_HV_SIEF_PAGE_ENABLE) +/** @} */ + +/** @name Hyper-V MSR - Synthetic Interrupt Control (MSR_GIM_HV_CONTROL). + * @{ + */ +/** The SControl enable mask. */ +#define MSR_GIM_HV_SCONTROL_ENABLE RT_BIT_64(0) +/** Whether SControl is enabled or not. */ +#define MSR_GIM_HV_SCONTROL_IS_ENABLED(a) RT_BOOL((a) & MSR_GIM_HV_SCONTROL_ENABLE) +/** @} */ + +/** @name Hyper-V MSR - Synthetic Timer Config (MSR_GIM_HV_STIMER_CONFIG). + * @{ + */ +/** The Stimer enable mask. */ +#define MSR_GIM_HV_STIMER_ENABLE RT_BIT_64(0) +/** Whether Stimer is enabled or not. */ +#define MSR_GIM_HV_STIMER_IS_ENABLED(a) RT_BOOL((a) & MSR_GIM_HV_STIMER_ENABLE) +/** The Stimer periodic mask. */ +#define MSR_GIM_HV_STIMER_PERIODIC RT_BIT_64(1) +/** Whether Stimer is enabled or not. */ +#define MSR_GIM_HV_STIMER_IS_PERIODIC(a) RT_BOOL((a) & MSR_GIM_HV_STIMER_PERIODIC) +/** The Stimer lazy mask. */ +#define MSR_GIM_HV_STIMER_LAZY RT_BIT_64(2) +/** Whether Stimer is enabled or not. */ +#define MSR_GIM_HV_STIMER_IS_LAZY(a) RT_BOOL((a) & MSR_GIM_HV_STIMER_LAZY) +/** The Stimer auto-enable mask. */ +#define MSR_GIM_HV_STIMER_AUTO_ENABLE RT_BIT_64(3) +/** Whether Stimer is enabled or not. */ +#define MSR_GIM_HV_STIMER_IS_AUTO_ENABLED(a) RT_BOOL((a) & MSR_GIM_HV_STIMER_AUTO_ENABLE) +/** The Stimer SINTx mask (bits 16:19). */ +#define MSR_GIM_HV_STIMER_SINTX UINT64_C(0xf0000) +/** Gets the Stimer synthetic interrupt source. */ +#define MSR_GIM_HV_STIMER_GET_SINTX(a) (((a) >> 16) & 0xf) +/** The Stimer valid read/write mask. */ +#define MSR_GIM_HV_STIMER_RW_VALID ( MSR_GIM_HV_STIMER_ENABLE | MSR_GIM_HV_STIMER_PERIODIC \ + | MSR_GIM_HV_STIMER_LAZY | MSR_GIM_HV_STIMER_AUTO_ENABLE \ + | MSR_GIM_HV_STIMER_SINTX) +/** @} */ + +/** + * Hyper-V APIC-assist (HV_REFERENCE_TSC_PAGE) structure placed in the TSC + * reference page. + */ +typedef struct GIMHVAPICASSIST +{ + uint32_t fNoEoiRequired : 1; + uint32_t u31Reserved0 : 31; +} GIMHVAPICASSIST; +/** Pointer to Hyper-V reference TSC. */ +typedef GIMHVAPICASSIST *PGIMHVAPICASSIST; +/** Pointer to a const Hyper-V reference TSC. */ +typedef GIMHVAPICASSIST const *PCGIMHVAPICASSIST; +AssertCompileSize(GIMHVAPICASSIST, 4); + +/** + * Hypercall parameter type. + */ +typedef enum GIMHVHYPERCALLPARAM +{ + GIMHVHYPERCALLPARAM_IN = 0, + GIMHVHYPERCALLPARAM_OUT +} GIMHVHYPERCALLPARAM; + + +/** @name Hyper-V hypercall op codes. + * @{ + */ +/** Post message to hypervisor or VMs. */ +#define GIM_HV_HYPERCALL_OP_POST_MESSAGE 0x5C +/** Post debug data to hypervisor. */ +#define GIM_HV_HYPERCALL_OP_POST_DEBUG_DATA 0x69 +/** Retreive debug data from hypervisor. */ +#define GIM_HV_HYPERCALL_OP_RETREIVE_DEBUG_DATA 0x6A +/** Reset debug session. */ +#define GIM_HV_HYPERCALL_OP_RESET_DEBUG_SESSION 0x6B +/** @} */ + +/** @name Hyper-V extended hypercall op codes. + * @{ + */ +/** Query extended hypercall capabilities. */ +#define GIM_HV_EXT_HYPERCALL_OP_QUERY_CAP 0x8001 +/** Query guest physical address range that has zero'd filled memory. */ +#define GIM_HV_EXT_HYPERCALL_OP_GET_BOOT_ZEROED_MEM 0x8002 +/** @} */ + + +/** @name Hyper-V Extended hypercall - HvExtCallQueryCapabilities. + * @{ + */ +/** Boot time zeroed pages. */ +#define GIM_HV_EXT_HYPERCALL_CAP_ZERO_MEM RT_BIT_64(0) +/** Whether boot time zeroed pages capability is enabled. */ +#define GIM_HV_EXT_HYPERCALL_CAP_IS_ZERO_MEM_ENABLED(a) RT_BOOL((a) & GIM_HV_EXT_HYPERCALL_CAP_ZERO_MEM) +/** @} */ + + +/** @name Hyper-V hypercall inputs. + * @{ + */ +/** The hypercall call operation code. */ +#define GIM_HV_HYPERCALL_IN_CALL_CODE(a) ((a) & UINT64_C(0xffff)) +/** Whether it's a fast (register based) hypercall or not (memory-based). */ +#define GIM_HV_HYPERCALL_IN_IS_FAST(a) RT_BOOL((a) & RT_BIT_64(16)) +/** Total number of reps for a rep hypercall. */ +#define GIM_HV_HYPERCALL_IN_REP_COUNT(a) (((a) << 32) & UINT64_C(0xfff)) +/** Rep start index for a rep hypercall. */ +#define GIM_HV_HYPERCALL_IN_REP_START_IDX(a) (((a) << 48) & UINT64_C(0xfff)) +/** Reserved bits range 1. */ +#define GIM_HV_HYPERCALL_IN_RSVD_1(a) (((a) << 17) & UINT64_C(0x7fff)) +/** Reserved bits range 2. */ +#define GIM_HV_HYPERCALL_IN_RSVD_2(a) (((a) << 44) & UINT64_C(0xf)) +/** Reserved bits range 3. */ +#define GIM_HV_HYPERCALL_IN_RSVD_3(a) (((a) << 60) & UINT64_C(0x7)) +/** @} */ + + +/** @name Hyper-V hypercall status codes. + * @{ + */ +/** Success. */ +#define GIM_HV_STATUS_SUCCESS 0x00 +/** Unrecognized hypercall. */ +#define GIM_HV_STATUS_INVALID_HYPERCALL_CODE 0x02 +/** Invalid hypercall input (rep count, rsvd bits). */ +#define GIM_HV_STATUS_INVALID_HYPERCALL_INPUT 0x03 +/** Hypercall guest-physical address not 8-byte aligned or crosses page boundary. */ +#define GIM_HV_STATUS_INVALID_ALIGNMENT 0x04 +/** Invalid hypercall parameters. */ +#define GIM_HV_STATUS_INVALID_PARAMETER 0x05 +/** Access denied. */ +#define GIM_HV_STATUS_ACCESS_DENIED 0x06 +/** The partition state not valid for specified op. */ +#define GIM_HV_STATUS_INVALID_PARTITION_STATE 0x07 +/** The hypercall operation could not be performed. */ +#define GIM_HV_STATUS_OPERATION_DENIED 0x08 +/** Specified partition property ID not recognized. */ +#define GIM_HV_STATUS_UNKNOWN_PROPERTY 0x09 +/** Specified partition property value not within range. */ +#define GIM_HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE 0x0a +/** Insufficient memory for performing the hypercall. */ +#define GIM_HV_STATUS_INSUFFICIENT_MEMORY 0x0b +/** Maximum partition depth has been exceeded for the partition hierarchy. */ +#define GIM_HV_STATUS_PARTITION_TOO_DEEP 0x0c +/** The specified partition ID is not valid. */ +#define GIM_HV_STATUS_INVALID_PARTITION_ID 0x0d +/** The specified virtual processor index in invalid. */ +#define GIM_HV_STATUS_INVALID_VP_INDEX 0x0e +/** The specified port ID is not unique or doesn't exist. */ +#define GIM_HV_STATUS_INVALID_PORT_ID 0x11 +/** The specified connection ID is not unique or doesn't exist. */ +#define GIM_HV_STATUS_INVALID_CONNECTION_ID 0x12 +/** The target port doesn't have sufficient buffers for the caller to post a message. */ +#define GIM_HV_STATUS_INSUFFICIENT_BUFFERS 0x13 +/** External interrupt not acknowledged.*/ +#define GIM_HV_STATUS_NOT_ACKNOWLEDGED 0x14 +/** External interrupt acknowledged. */ +#define GIM_HV_STATUS_ACKNOWLEDGED 0x16 +/** Invalid state due to misordering Hv[Save|Restore]PartitionState. */ +#define GIM_HV_STATUS_INVALID_SAVE_RESTORE_STATE 0x17 +/** Operation not perform due to a required feature of SynIc was disabled. */ +#define GIM_HV_STATUS_INVALID_SYNIC_STATE 0x18 +/** Object or value already in use. */ +#define GIM_HV_STATUS_OBJECT_IN_USE 0x19 +/** Invalid proximity domain information. */ +#define GIM_HV_STATUS_INVALID_PROXIMITY_DOMAIN_INFO 0x1A +/** Attempt to retrieve data failed. */ +#define GIM_HV_STATUS_NO_DATA 0x1B +/** Debug connection has not recieved any new data since the last time. */ +#define GIM_HV_STATUS_INACTIVE 0x1C +/** A resource is unavailable for allocation. */ +#define GIM_HV_STATUS_NO_RESOURCES 0x1D +/** A hypervisor feature is not available to the caller. */ +#define GIM_HV_STATUS_FEATURE_UNAVAILABLE 0x1E +/** The debug packet returned is partial due to an I/O error. */ +#define GIM_HV_STATUS_PARTIAL_PACKET 0x1F +/** Processor feature SSE3 unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_SSE3_NOT_SUPPORTED 0x20 +/** Processor feature LAHSAHF unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_LAHSAHF_NOT_SUPPORTED 0x21 +/** Processor feature SSSE3 unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_SSSE3_NOT_SUPPORTED 0x22 +/** Processor feature SSE4.1 unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_SSE4_1_NOT_SUPPORTED 0x23 +/** Processor feature SSE4.2 unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_SSE4_2_NOT_SUPPORTED 0x24 +/** Processor feature SSE4A unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_SSE4A_NOT_SUPPORTED 0x25 +/** Processor feature XOP unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_XOP_NOT_SUPPORTED 0x26 +/** Processor feature POPCNT unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_POPCNT_NOT_SUPPORTED 0x27 +/** Processor feature CMPXCHG16B unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_CMPXCHG16B_NOT_SUPPORTED 0x28 +/** Processor feature ALTMOVCR8 unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_ALTMOVCR8_NOT_SUPPORTED 0x29 +/** Processor feature LZCNT unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_LZCNT_NOT_SUPPORTED 0x2A +/** Processor feature misaligned SSE unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_MISALIGNED_SSE_NOT_SUPPORTED 0x2B +/** Processor feature MMX extensions unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_MMX_EXT_NOT_SUPPORTED 0x2C +/** Processor feature 3DNow! unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_3DNOW_NOT_SUPPORTED 0x2D +/** Processor feature Extended 3DNow! unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_EXTENDED_3DNOW_NOT_SUPPORTED 0x2E +/** Processor feature 1GB large page unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_PAGE_1GB_NOT_SUPPORTED 0x2F +/** Processor cache line flush size incompatible. */ +#define GIM_HV_STATUS_PROC_CACHE_LINE_FLUSH_SIZE_INCOMPATIBLE 0x30 +/** Processor feature XSAVE unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_XSAVE_NOT_SUPPORTED 0x31 +/** Processor feature XSAVEOPT unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_XSAVEOPT_NOT_SUPPORTED 0x32 +/** The specified buffer was too small for all requested data. */ +#define GIM_HV_STATUS_INSUFFICIENT_BUFFER 0x33 +/** Processor feature XSAVEOPT unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_XSAVE_AVX_NOT_SUPPORTED 0x34 +/** Processor feature XSAVEOPT unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_XSAVE_FEAT_NOT_SUPPORTED 0x35 /** Huh, isn't this same as 0x31? */ +/** Processor feature XSAVEOPT unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_PAGE_XSAVE_SAVE_AREA_INCOMPATIBLE 0x36 +/** Processor architecture unsupoorted. */ +#define GIM_HV_STATUS_INCOMPATIBLE_PROCESSOR 0x37 +/** Max. domains for platform I/O remapping reached. */ +#define GIM_HV_STATUS_INSUFFICIENT_DEVICE_DOMAINS 0x38 +/** Processor feature AES unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_AES_NOT_SUPPORTED 0x39 +/** Processor feature PCMULQDQ unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_PCMULQDQ_NOT_SUPPORTED 0x3A +/** Processor feature XSAVE features unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_XSAVE_FEATURES_INCOMPATIBLE 0x3B +/** Generic CPUID validation error. */ +#define GIM_HV_STATUS_CPUID_FEAT_VALIDATION_ERROR 0x3C +/** XSAVE CPUID validation error. */ +#define GIM_HV_STATUS_CPUID_XSAVE_FEAT_VALIDATION_ERROR 0x3D +/** Processor startup timed out. */ +#define GIM_HV_STATUS_PROCESSOR_STARTUP_TIMEOUT 0x3E +/** SMX enabled by the BIOS. */ +#define GIM_HV_STATUS_SMX_ENABLED 0x3F +/** Processor feature PCID unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_PCID_NOT_SUPPORTED 0x40 +/** Invalid LP index. */ +#define GIM_HV_STATUS_INVALID_LP_INDEX 0x41 +/** Processor feature PCID unsupported. */ +#define GIM_HV_STATUS_FEAT_FMA4_NOT_SUPPORTED 0x42 +/** Processor feature PCID unsupported. */ +#define GIM_HV_STATUS_FEAT_F16C_NOT_SUPPORTED 0x43 +/** Processor feature PCID unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_RDRAND_NOT_SUPPORTED 0x44 +/** Processor feature RDWRFSGS unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_RDWRFSGS_NOT_SUPPORTED 0x45 +/** Processor feature SMEP unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_SMEP_NOT_SUPPORTED 0x46 +/** Processor feature enhanced fast string unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_ENHANCED_FAST_STRING_NOT_SUPPORTED 0x47 +/** Processor feature MOVBE unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_MOVBE_NOT_SUPPORTED 0x48 +/** Processor feature BMI1 unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_BMI1_NOT_SUPPORTED 0x49 +/** Processor feature BMI2 unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_BMI2_NOT_SUPPORTED 0x4A +/** Processor feature HLE unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_HLE_NOT_SUPPORTED 0x4B +/** Processor feature RTM unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_RTM_NOT_SUPPORTED 0x4C +/** Processor feature XSAVE FMA unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_XSAVE_FMA_NOT_SUPPORTED 0x4D +/** Processor feature XSAVE AVX2 unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_XSAVE_AVX2_NOT_SUPPORTED 0x4E +/** Processor feature NPIEP1 unsupported. */ +#define GIM_HV_STATUS_PROC_FEAT_NPIEP1_NOT_SUPPORTED 0x4F +/** @} */ + + +/** @name Hyper-V MSR - Debug control (MSR_GIM_HV_SYNTH_DEBUG_CONTROL). + * @{ + */ +/** Perform debug write. */ +#define MSR_GIM_HV_SYNTH_DEBUG_CONTROL_IS_WRITE(a) RT_BOOL((a) & RT_BIT_64(0)) +/** Perform debug read. */ +#define MSR_GIM_HV_SYNTH_DEBUG_CONTROL_IS_READ(a) RT_BOOL((a) & RT_BIT_64(1)) +/** Returns length of the debug write buffer. */ +#define MSR_GIM_HV_SYNTH_DEBUG_CONTROL_W_LEN(a) (((a) & UINT64_C(0xffff0000)) >> 16) +/** @} */ + + +/** @name Hyper-V MSR - Debug status (MSR_GIM_HV_SYNTH_DEBUG_STATUS). + * @{ + */ +/** Debug send buffer operation success. */ +#define MSR_GIM_HV_SYNTH_DEBUG_STATUS_W_SUCCESS RT_BIT_64(0) +/** Debug receive buffer operation success. */ +#define MSR_GIM_HV_SYNTH_DEBUG_STATUS_R_SUCCESS RT_BIT_64(2) +/** Debug connection was reset. */ +#define MSR_GIM_HV_SYNTH_DEBUG_STATUS_CONN_RESET RT_BIT_64(3) +/** @} */ + + +/** @name Hyper-V MSR - synthetic interrupt (MSR_GIM_HV_SINTx). + * @{ + */ +/** The interrupt masked mask. */ +#define MSR_GIM_HV_SINT_MASKED RT_BIT_64(16) +/** Whether the interrupt source is masked. */ +#define MSR_GIM_HV_SINT_IS_MASKED(a) RT_BOOL((a) & MSR_GIM_HV_SINT_MASKED) +/** Gets the interrupt vector. */ +#define MSR_GIM_HV_SINT_GET_VECTOR(a) ((a) & UINT64_C(0xff)) +/** The AutoEoi mask. */ +#define MSR_GIM_HV_SINT_AUTOEOI RT_BIT_64(17) +/** Gets whether AutoEoi is enabled for the synthetic interrupt. */ +#define MSR_GIM_HV_SINT_IS_AUTOEOI(a) RT_BOOL((a) & MSR_GIM_HV_SINT_AUTOEOI) +/** @} */ + + +/** @name Hyper-V MSR - synthetic interrupt message page (MSR_GIM_HV_SIMP). + * @{ + */ +/** The SIMP enable mask. */ +#define MSR_GIM_HV_SIMP_ENABLE RT_BIT_64(0) +/** Whether the SIMP is enabled. */ +#define MSR_GIM_HV_SIMP_IS_ENABLED(a) RT_BOOL((a) & MSR_GIM_HV_SIMP_ENABLE) +/** The SIMP guest-physical address. */ +#define MSR_GIM_HV_SIMP_GPA(a) ((a) & UINT64_C(0xfffffffffffff000)) +/** @} */ + + +/** @name Hyper-V hypercall debug options. + * @{ */ +/** Maximum debug data payload size in bytes. */ +#define GIM_HV_DEBUG_MAX_DATA_SIZE 4088 + +/** The undocumented bit for MSR_GIM_HV_DEBUG_OPTIONS_MSR that makes it all + * work. */ +#define GIM_HV_DEBUG_OPTIONS_USE_HYPERCALLS RT_BIT(2) + +/** Guest will perform the HvPostDebugData hypercall until completion. */ +#define GIM_HV_DEBUG_POST_LOOP RT_BIT_32(0) +/** Mask of valid HvPostDebugData options. */ +#define GIM_HV_DEBUG_POST_OPTIONS_MASK RT_BIT_32(0) + +/** Guest will perform the HvRetrieveDebugData hypercall until completion. */ +#define GIM_HV_DEBUG_RETREIVE_LOOP RT_BIT_32(0) +/** Guest checks if any global debug session is active. */ +#define GIM_HV_DEBUG_RETREIVE_TEST_ACTIVITY RT_BIT_32(1) +/** Mask of valid HvRetrieveDebugData options. */ +#define GIM_HV_DEBUG_RETREIVE_OPTIONS_MASK RT_BIT_32(0) | RT_BIT_32(1) + +/** Guest requests purging of incoming debug data. */ +#define GIM_HV_DEBUG_PURGE_INCOMING_DATA RT_BIT_32(0) +/** Guest requests purging of outgoing debug data. */ +#define GIM_HV_DEBUG_PURGE_OUTGOING_DATA RT_BIT_32(1) +/** @} */ + + +/** @name VMBus. + * These are just arbitrary definitions made up by Microsoft without + * any publicly available specification behind it. + * @{ */ +/** VMBus connection ID. */ +#define GIM_HV_VMBUS_MSG_CONNECTION_ID 1 +/** VMBus synthetic interrupt source (see VMBUS_MESSAGE_SINT in linux + * sources). */ +#define GIM_HV_VMBUS_MSG_SINT 2 +/** @} */ + +/** @name SynIC. + * Synthetic Interrupt Controller definitions. + * @{ */ +/** SynIC version register. */ +#define GIM_HV_SVERSION 1 +/** Number of synthetic interrupt sources (warning, fixed in saved-states!). */ +#define GIM_HV_SINT_COUNT 16 +/** Lowest valid vector for synthetic interrupt. */ +#define GIM_HV_SINT_VECTOR_VALID_MIN 16 +/** Highest valid vector for synthetic interrupt. */ +#define GIM_HV_SINT_VECTOR_VALID_MAX 255 +/** Number of synthetic timers. */ +#define GIM_HV_STIMER_COUNT 4 +/** @} */ + +/** @name Hyper-V synthetic interrupt message type. + * See 14.8.2 "SynIC Message Types" + * @{ + */ +typedef enum GIMHVMSGTYPE +{ + GIMHVMSGTYPE_NONE = 0, /* Common messages */ + GIMHVMSGTYPE_VMBUS = 1, /* Guest messages */ + GIMHVMSGTYPE_UNMAPPEDGPA = 0x80000000, /* Hypervisor messages */ + GIMHVMSGTYPE_GPAINTERCEPT = 0x80000001, + GIMHVMSGTYPE_TIMEREXPIRED = 0x80000010, + GIMHVMSGTYPE_INVALIDVPREGVAL = 0x80000020, + GIMHVMSGTYPE_UNRECOVERABLEXCPT = 0x80000021, + GIMHVMSGTYPE_UNSUPPORTEDFEAT = 0x80000022, + GIMHVMSGTYPE_APICEOI = 0x80000030, + GIMHVMSGTYPE_X64LEGACYFPERROR = 0x80000031, + GIMHVMSGTYPE_EVENTLOGBUFSCOMPLETE = 0x80000040, + GIMHVMSGTYPE_X64IOPORTINTERCEPT = 0x80010000, + GIMHVMSGTYPE_X64MSRINTERCEPT = 0x80010001, + GIMHVMSGTYPE_X64CPUIDINTERCEPT = 0x80010002, + GIMHVMSGTYPE_X64XCPTINTERCEPT = 0x80010003 +} GIMHVMSGTYPE; +AssertCompileSize(GIMHVMSGTYPE, 4); +/** @} */ + + +/** @name Hyper-V synthetic interrupt message format. + * @{ */ +#define GIM_HV_MSG_SIZE 256 +#define GIM_HV_MSG_MAX_PAYLOAD_SIZE 240 +#define GIM_HV_MSG_MAX_PAYLOAD_UNITS 30 + +/** + * Synthetic interrupt message flags. + */ +typedef union GIMHVMSGFLAGS +{ + struct + { + uint8_t u1Pending : 1; + uint8_t u7Reserved : 7; + } n; + uint8_t u; +} GIMHVMSGFLAGS; +AssertCompileSize(GIMHVMSGFLAGS, sizeof(uint8_t)); + +/** + * Synthetic interrupt message header. + * + * @remarks The layout of this structure differs from + * the Hyper-V spec. Aug 8, 2013 v4.0a. Layout + * in accordance w/ VMBus client expectations. + */ +typedef struct GIMHVMSGHDR +{ + GIMHVMSGTYPE enmMessageType; + uint8_t cbPayload; + GIMHVMSGFLAGS MessageFlags; + uint16_t uRsvd; + union + { + uint64_t uOriginatorId; + uint64_t uPartitionId; + uint64_t uPortId; + } msgid; +} GIMHVMSGHDR; +/** Pointer to a synthetic interrupt message header. */ +typedef GIMHVMSGHDR *PGIMHVMSGHDR; +AssertCompileMemberOffset(GIMHVMSGHDR, cbPayload, 4); +AssertCompileMemberOffset(GIMHVMSGHDR, MessageFlags, 5); +AssertCompileMemberOffset(GIMHVMSGHDR, msgid, 8); +AssertCompileSize(GIMHVMSGHDR, GIM_HV_MSG_SIZE - GIM_HV_MSG_MAX_PAYLOAD_SIZE); + +/** + * Synthetic interrupt message. + */ +typedef struct GIMHVMSG +{ + GIMHVMSGHDR MsgHdr; + uint64_t aPayload[GIM_HV_MSG_MAX_PAYLOAD_UNITS]; +} GIMHVMSG; +/** Pointer to a synthetic interrupt message. */ +typedef GIMHVMSG *PGIMHVMSG; +AssertCompileSize(GIMHVMSG, GIM_HV_MSG_SIZE); +/** @} */ + + +/** @name Hyper-V hypercall parameters. + * @{ */ +/** + * HvPostMessage hypercall input. + */ +typedef struct GIMHVPOSTMESSAGEIN +{ + uint32_t uConnectionId; + uint32_t uPadding; + GIMHVMSGTYPE enmMessageType; + uint32_t cbPayload; +} GIMHVPOSTMESSAGEIN; +/** Pointer to a HvPostMessage input struct. */ +typedef GIMHVPOSTMESSAGEIN *PGIMHVPOSTMESSAGEIN; +AssertCompileSize(GIMHVPOSTMESSAGEIN, 16); + +/** + * HvResetDebugData hypercall input. + */ +typedef struct GIMHVDEBUGRESETIN +{ + uint32_t fFlags; + uint32_t uPadding; +} GIMHVDEBUGRESETIN; +/** Pointer to a HvResetDebugData input struct. */ +typedef GIMHVDEBUGRESETIN *PGIMHVDEBUGRESETIN; +AssertCompileSize(GIMHVDEBUGRESETIN, 8); + +/** + * HvPostDebugData hypercall input. + */ +typedef struct GIMHVDEBUGPOSTIN +{ + uint32_t cbWrite; + uint32_t fFlags; +} GIMHVDEBUGPOSTIN; +/** Pointer to a HvPostDebugData input struct. */ +typedef GIMHVDEBUGPOSTIN *PGIMHVDEBUGPOSTIN; +AssertCompileSize(GIMHVDEBUGPOSTIN, 8); + +/** + * HvPostDebugData hypercall output. + */ +typedef struct GIMHVDEBUGPOSTOUT +{ + uint32_t cbPending; + uint32_t uPadding; +} GIMHVDEBUGPOSTOUT; +/** Pointer to a HvPostDebugData output struct. */ +typedef GIMHVDEBUGPOSTOUT *PGIMHVDEBUGPOSTOUT; +AssertCompileSize(GIMHVDEBUGPOSTOUT, 8); + +/** + * HvRetrieveDebugData hypercall input. + */ +typedef struct GIMHVDEBUGRETRIEVEIN +{ + uint32_t cbRead; + uint32_t fFlags; + uint64_t u64Timeout; +} GIMHVDEBUGRETRIEVEIN; +/** Pointer to a HvRetrieveDebugData input struct. */ +typedef GIMHVDEBUGRETRIEVEIN *PGIMHVDEBUGRETRIEVEIN; +AssertCompileSize(GIMHVDEBUGRETRIEVEIN, 16); + +/** + * HvRetriveDebugData hypercall output. + */ +typedef struct GIMHVDEBUGRETRIEVEOUT +{ + uint32_t cbRead; + uint32_t cbRemaining; +} GIMHVDEBUGRETRIEVEOUT; +/** Pointer to a HvRetrieveDebugData output struct. */ +typedef GIMHVDEBUGRETRIEVEOUT *PGIMHVDEBUGRETRIEVEOUT; +AssertCompileSize(GIMHVDEBUGRETRIEVEOUT, 8); + +/** + * HvExtCallQueryCapabilities hypercall output. + */ +typedef struct GIMHVEXTQUERYCAP +{ + uint64_t fCapabilities; +} GIMHVEXTQUERYCAP; +/** Pointer to a HvExtCallQueryCapabilities output struct. */ +typedef GIMHVEXTQUERYCAP *PGIMHVEXTQUERYCAP; +AssertCompileSize(GIMHVEXTQUERYCAP, 8); + +/** + * HvExtCallGetBootZeroedMemory hypercall output. + */ +typedef struct GIMHVEXTGETBOOTZEROMEM +{ + RTGCPHYS GCPhysStart; + uint64_t cPages; +} GIMHVEXTGETBOOTZEROMEM; +/** Pointer to a HvExtCallGetBootZeroedMemory output struct. */ +typedef GIMHVEXTGETBOOTZEROMEM *PGIMHVEXTGETBOOTZEROMEM; +AssertCompileSize(GIMHVEXTGETBOOTZEROMEM, 16); +/** @} */ + + +/** Hyper-V page size. */ +#define GIM_HV_PAGE_SIZE 4096 +/** Hyper-V page shift. */ +#define GIM_HV_PAGE_SHIFT 12 + +/** Microsoft Hyper-V vendor signature. */ +#define GIM_HV_VENDOR_MICROSOFT "Microsoft Hv" + +/** + * MMIO2 region indices. + */ +/** The hypercall page region. */ +#define GIM_HV_HYPERCALL_PAGE_REGION_IDX UINT8_C(0) +/** The TSC page region. */ +#define GIM_HV_REF_TSC_PAGE_REGION_IDX UINT8_C(1) +/** The maximum region index (must be <= UINT8_MAX). */ +#define GIM_HV_REGION_IDX_MAX GIM_HV_REF_TSC_PAGE_REGION_IDX + +/** + * Hyper-V TSC (HV_REFERENCE_TSC_PAGE) structure placed in the TSC reference + * page. + */ +typedef struct GIMHVREFTSC +{ + uint32_t u32TscSequence; + uint32_t uReserved0; + uint64_t u64TscScale; + int64_t i64TscOffset; +} GIMHVTSCPAGE; +/** Pointer to Hyper-V reference TSC. */ +typedef GIMHVREFTSC *PGIMHVREFTSC; +/** Pointer to a const Hyper-V reference TSC. */ +typedef GIMHVREFTSC const *PCGIMHVREFTSC; + +/** + * Type of the next reply to be sent to the debug connection of the guest. + * + * @remarks This is saved as part of saved-state, so don't re-order or + * alter the size! + */ +typedef enum GIMHVDEBUGREPLY +{ + /** Send UDP packet. */ + GIMHVDEBUGREPLY_UDP = 0, + /** Send DHCP offer for DHCP discover. */ + GIMHVDEBUGREPLY_DHCP_OFFER, + /** DHCP offer sent. */ + GIMHVDEBUGREPLY_DHCP_OFFER_SENT, + /** Send DHCP acknowledgement for DHCP request. */ + GIMHVDEBUGREPLY_DHCP_ACK, + /** DHCP acknowledgement sent. */ + GIMHVDEBUGREPLY_DHCP_ACK_SENT, + /** Sent ARP reply. */ + GIMHVDEBUGREPLY_ARP_REPLY, + /** ARP reply sent. */ + GIMHVDEBUGREPLY_ARP_REPLY_SENT, + /** Customary 32-bit type hack. */ + GIMHVDEBUGREPLY_32BIT_HACK = 0x7fffffff +} GIMHVDEBUGREPLY; +AssertCompileSize(GIMHVDEBUGREPLY, sizeof(uint32_t)); + +/** + * GIM Hyper-V VM instance data. + * Changes to this must checked against the padding of the gim union in VM! + */ +typedef struct GIMHV +{ + /** @name Primary MSRs. + * @{ */ + /** Guest OS identity MSR. */ + uint64_t u64GuestOsIdMsr; + /** Hypercall MSR. */ + uint64_t u64HypercallMsr; + /** Reference TSC page MSR. */ + uint64_t u64TscPageMsr; + /** @} */ + + /** @name CPUID features. + * @{ */ + /** Basic features. */ + uint32_t uBaseFeat; + /** Partition flags. */ + uint32_t uPartFlags; + /** Power management. */ + uint32_t uPowMgmtFeat; + /** Miscellaneous. */ + uint32_t uMiscFeat; + /** Hypervisor hints to the guest. */ + uint32_t uHyperHints; + /** Hypervisor capabilities. */ + uint32_t uHyperCaps; + /** @} */ + + /** @name Guest Crash MSRs. + * @{ + */ + /** Guest crash control MSR. */ + uint64_t uCrashCtlMsr; + /** Guest crash parameter 0 MSR. */ + uint64_t uCrashP0Msr; + /** Guest crash parameter 1 MSR. */ + uint64_t uCrashP1Msr; + /** Guest crash parameter 2 MSR. */ + uint64_t uCrashP2Msr; + /** Guest crash parameter 3 MSR. */ + uint64_t uCrashP3Msr; + /** Guest crash parameter 4 MSR. */ + uint64_t uCrashP4Msr; + /** @} */ + + /** @name Time management. + * @{ */ + /** Per-VM R0 Spinlock for protecting EMT writes to the TSC page. */ + RTSPINLOCK hSpinlockR0; + /** The TSC frequency (in HZ) reported to the guest. */ + uint64_t cTscTicksPerSecond; + /** @} */ + + /** @name Hypercalls. + * @{ */ + /** Guest address of the hypercall input parameter page. */ + RTGCPHYS GCPhysHypercallIn; + /** Guest address of the hypercall output parameter page. */ + RTGCPHYS GCPhysHypercallOut; + /** Pointer to the hypercall input parameter page - R3. */ + R3PTRTYPE(uint8_t *) pbHypercallIn; + /** Pointer to the hypercall output parameter page - R3. */ + R3PTRTYPE(uint8_t *) pbHypercallOut; + /** @} */ + + /** @name Guest debugging. + * @{ */ + /** Whether we're posing as the Microsoft vendor. */ + bool fIsVendorMsHv; + /** Whether we're posing as the Microsoft virtualization service. */ + bool fIsInterfaceVs; + /** Whether debugging support is enabled. */ + bool fDbgEnabled; + /** Whether we should suggest a hypercall-based debug interface to the guest. */ + bool fDbgHypercallInterface; + bool afAlignment0[4]; + /** The action to take while sending replies. */ + GIMHVDEBUGREPLY enmDbgReply; + /** The IP address chosen by/assigned to the guest. */ + RTNETADDRIPV4 DbgGuestIp4Addr; + /** Transaction ID for the BOOTP+DHCP sequence. */ + uint32_t uDbgBootpXId; + /** The source UDP port used by the guest while sending debug packets. */ + uint16_t uUdpGuestSrcPort; + /** The destination UDP port used by the guest while sending debug packets. */ + uint16_t uUdpGuestDstPort; + /** Debug send buffer MSR. */ + uint64_t uDbgSendBufferMsr; + /** Debug receive buffer MSR. */ + uint64_t uDbgRecvBufferMsr; + /** Debug pending buffer MSR. */ + uint64_t uDbgPendingBufferMsr; + /** Debug status MSR. */ + uint64_t uDbgStatusMsr; + /** Intermediate debug I/O buffer. */ + R3PTRTYPE(void *) pvDbgBuffer; + R3PTRTYPE(void *) pvAlignment0; + /** @} */ + + /** Array of MMIO2 regions. */ + GIMMMIO2REGION aMmio2Regions[GIM_HV_REGION_IDX_MAX + 1]; +} GIMHV; +/** Pointer to per-VM GIM Hyper-V instance data. */ +typedef GIMHV *PGIMHV; +/** Pointer to const per-VM GIM Hyper-V instance data. */ +typedef GIMHV const *PCGIMHV; +AssertCompileMemberAlignment(GIMHV, aMmio2Regions, 8); +AssertCompileMemberAlignment(GIMHV, hSpinlockR0, sizeof(uintptr_t)); + +/** + * Hyper-V per-VCPU synthetic timer. + */ +typedef struct GIMHVSTIMER +{ + /** Synthetic timer object - R0 ptr. */ + PTMTIMERR0 pTimerR0; + /** Synthetic timer object - R3 ptr. */ + PTMTIMERR3 pTimerR3; + /** Virtual CPU ID this timer belongs to (for reverse mapping). */ + VMCPUID idCpu; + /** The index of this timer in the auStimers array (for reverse mapping). */ + uint32_t idxStimer; + /** Synthetic timer config MSR. */ + uint64_t uStimerConfigMsr; + /** Synthetic timer count MSR. */ + uint64_t uStimerCountMsr; + /** Timer description. */ + char szTimerDesc[24]; + +} GIMHVSTIMER; +/** Pointer to per-VCPU Hyper-V synthetic timer. */ +typedef GIMHVSTIMER *PGIMHVSTIMER; +/** Pointer to a const per-VCPU Hyper-V synthetic timer. */ +typedef GIMHVSTIMER const *PCGIMHVSTIMER; +AssertCompileSizeAlignment(GIMHVSTIMER, 8); + +/** + * Hyper-V VCPU instance data. + * Changes to this must checked against the padding of the gim union in VMCPU! + */ +typedef struct GIMHVCPU +{ + /** @name Synthetic interrupt MSRs. + * @{ */ + /** Synthetic interrupt message page MSR. */ + uint64_t uSimpMsr; + /** Interrupt source MSRs. */ + uint64_t auSintMsrs[GIM_HV_SINT_COUNT]; + /** Synethtic interrupt events flag page MSR. */ + uint64_t uSiefpMsr; + /** APIC-assist page MSR. */ + uint64_t uApicAssistPageMsr; + /** Synthetic interrupt control MSR. */ + uint64_t uSControlMsr; + /** Synthetic timers. */ + GIMHVSTIMER aStimers[GIM_HV_STIMER_COUNT]; + /** @} */ + + /** @name Statistics. + * @{ */ + STAMCOUNTER aStatStimerFired[GIM_HV_STIMER_COUNT]; + /** @} */ +} GIMHVCPU; +/** Pointer to per-VCPU GIM Hyper-V instance data. */ +typedef GIMHVCPU *PGIMHVCPU; +/** Pointer to const per-VCPU GIM Hyper-V instance data. */ +typedef GIMHVCPU const *PCGIMHVCPU; + + +RT_C_DECLS_BEGIN + +#ifdef IN_RING0 +VMMR0_INT_DECL(int) gimR0HvInitVM(PVMCC pVM); +VMMR0_INT_DECL(int) gimR0HvTermVM(PVMCC pVM); +VMMR0_INT_DECL(int) gimR0HvUpdateParavirtTsc(PVMCC pVM, uint64_t u64Offset); +#endif /* IN_RING0 */ + +#ifdef IN_RING3 +VMMR3_INT_DECL(int) gimR3HvInit(PVM pVM, PCFGMNODE pGimCfg); +VMMR3_INT_DECL(int) gimR3HvInitCompleted(PVM pVM); +VMMR3_INT_DECL(int) gimR3HvTerm(PVM pVM); +VMMR3_INT_DECL(void) gimR3HvRelocate(PVM pVM, RTGCINTPTR offDelta); +VMMR3_INT_DECL(void) gimR3HvReset(PVM pVM); +VMMR3_INT_DECL(int) gimR3HvSave(PVM pVM, PSSMHANDLE pSSM); +VMMR3_INT_DECL(int) gimR3HvLoad(PVM pVM, PSSMHANDLE pSSM); +VMMR3_INT_DECL(int) gimR3HvLoadDone(PVM pVM, PSSMHANDLE pSSM); +VMMR3_INT_DECL(int) gimR3HvGetDebugSetup(PVM pVM, PGIMDEBUGSETUP pDbgSetup); + +VMMR3_INT_DECL(int) gimR3HvDisableSiefPage(PVMCPU pVCpu); +VMMR3_INT_DECL(int) gimR3HvEnableSiefPage(PVMCPU pVCpu, RTGCPHYS GCPhysSiefPage); +VMMR3_INT_DECL(int) gimR3HvEnableSimPage(PVMCPU pVCpu, RTGCPHYS GCPhysSimPage); +VMMR3_INT_DECL(int) gimR3HvDisableSimPage(PVMCPU pVCpu); +VMMR3_INT_DECL(int) gimR3HvDisableApicAssistPage(PVMCPU pVCpu); +VMMR3_INT_DECL(int) gimR3HvEnableApicAssistPage(PVMCPU pVCpu, RTGCPHYS GCPhysTscPage); +VMMR3_INT_DECL(int) gimR3HvDisableTscPage(PVM pVM); +VMMR3_INT_DECL(int) gimR3HvEnableTscPage(PVM pVM, RTGCPHYS GCPhysTscPage, bool fUseThisTscSeq, uint32_t uTscSeq); +VMMR3_INT_DECL(int) gimR3HvDisableHypercallPage(PVM pVM); +VMMR3_INT_DECL(int) gimR3HvEnableHypercallPage(PVM pVM, RTGCPHYS GCPhysHypercallPage); + +VMMR3_INT_DECL(int) gimR3HvHypercallPostDebugData(PVM pVM, int *prcHv); +VMMR3_INT_DECL(int) gimR3HvHypercallRetrieveDebugData(PVM pVM, int *prcHv); +VMMR3_INT_DECL(int) gimR3HvDebugWrite(PVM pVM, void *pvData, uint32_t cbWrite, uint32_t *pcbWritten, bool fUdpPkt); +VMMR3_INT_DECL(int) gimR3HvDebugRead(PVM pVM, void *pvBuf, uint32_t cbBuf, uint32_t cbRead, uint32_t *pcbRead, + uint32_t cMsTimeout, bool fUdpPkt); +VMMR3_INT_DECL(int) gimR3HvHypercallExtQueryCap(PVM pVM, int *prcHv); +VMMR3_INT_DECL(int) gimR3HvHypercallExtGetBootZeroedMem(PVM pVM, int *prcHv); + +#endif /* IN_RING3 */ + +VMM_INT_DECL(PGIMMMIO2REGION) gimHvGetMmio2Regions(PVM pVM, uint32_t *pcRegions); +VMM_INT_DECL(bool) gimHvIsParavirtTscEnabled(PVM pVM); +VMM_INT_DECL(bool) gimHvAreHypercallsEnabled(PCVM pVM); +VMM_INT_DECL(bool) gimHvShouldTrapXcptUD(PVMCPU pVCpu); +VMM_INT_DECL(VBOXSTRICTRC) gimHvXcptUD(PVMCPUCC pVCpu, PCPUMCTX pCtx, PDISCPUSTATE pDis, uint8_t *pcbInstr); +VMM_INT_DECL(VBOXSTRICTRC) gimHvHypercall(PVMCPUCC pVCpu, PCPUMCTX pCtx); +VMM_INT_DECL(VBOXSTRICTRC) gimHvHypercallEx(PVMCPUCC pVCpu, PCPUMCTX pCtx, unsigned uDisOpcode, uint8_t cbInstr); +VMM_INT_DECL(VBOXSTRICTRC) gimHvReadMsr(PVMCPUCC pVCpu, uint32_t idMsr, PCCPUMMSRRANGE pRange, uint64_t *puValue); +VMM_INT_DECL(VBOXSTRICTRC) gimHvWriteMsr(PVMCPUCC pVCpu, uint32_t idMsr, PCCPUMMSRRANGE pRange, uint64_t uRawValue); + +VMM_INT_DECL(void) gimHvStartStimer(PVMCPUCC pVCpu, PCGIMHVSTIMER pHvStimer); + +RT_C_DECLS_END + +#endif /* !VMM_INCLUDED_SRC_include_GIMHvInternal_h */ + diff --git a/src/VBox/VMM/include/GIMInternal.h b/src/VBox/VMM/include/GIMInternal.h new file mode 100644 index 00000000..cdd4fa1a --- /dev/null +++ b/src/VBox/VMM/include/GIMInternal.h @@ -0,0 +1,123 @@ +/* $Id: GIMInternal.h $ */ +/** @file + * GIM - Internal header file. + */ + +/* + * Copyright (C) 2014-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_GIMInternal_h +#define VMM_INCLUDED_SRC_include_GIMInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <VBox/vmm/gim.h> +#include <VBox/vmm/pgm.h> +#include "GIMHvInternal.h" +#include "GIMKvmInternal.h" +#include "GIMMinimalInternal.h" + +RT_C_DECLS_BEGIN + +/** @defgroup grp_gim_int Internal + * @ingroup grp_gim + * @internal + * @{ + */ + +/** The saved state version. */ +#define GIM_SAVED_STATE_VERSION 1 + +/** + * GIM VM Instance data. + */ +typedef struct GIM +{ + /** The provider that is active for this VM. */ + GIMPROVIDERID enmProviderId; + /** The interface implementation version. */ + uint32_t u32Version; + + /** Physical access handler type for semi-read-only MMIO2 memory. Lazy creation. */ + PGMPHYSHANDLERTYPE hSemiReadOnlyMmio2Handler; + /** Alignment padding. */ + uint32_t u32Padding; + + /** Pointer to the GIM device - R3 ptr. */ + R3PTRTYPE(PPDMDEVINS) pDevInsR3; + /** The debug struct - R3 ptr. */ + R3PTRTYPE(PGIMDEBUG) pDbgR3; + + /** The provider specific data. */ + union + { + GIMHV Hv; + GIMKVM Kvm; + } u; + + /** Number of hypercalls initiated. */ + STAMCOUNTER StatHypercalls; + /** Debug packets sent. */ + STAMCOUNTER StatDbgXmit; + /** Debug bytes sent. */ + STAMCOUNTER StatDbgXmitBytes; + /** Debug packets received. */ + STAMCOUNTER StatDbgRecv; + /** Debug bytes received. */ + STAMCOUNTER StatDbgRecvBytes; +} GIM; +/** Pointer to GIM VM instance data. */ +typedef GIM *PGIM; + +/** + * GIM VMCPU Instance data. + */ +typedef struct GIMCPU +{ + union + { + GIMKVMCPU KvmCpu; + GIMHVCPU HvCpu; + } u; +} GIMCPU; +/** Pointer to GIM VMCPU instance data. */ +typedef GIMCPU *PGIMCPU; + +/** + * Callback when a debug buffer read has completed and before signalling the next + * read. + * + * @param pVM The cross context VM structure. + */ +typedef DECLCALLBACK(void) FNGIMDEBUGBUFREADCOMPLETED(PVM pVM); +/** Pointer to GIM debug buffer read completion callback. */ +typedef FNGIMDEBUGBUFREADCOMPLETED *PFNGIMDEBUGBUFREADCOMPLETED; + +#ifdef IN_RING3 +#if 0 +VMMR3_INT_DECL(int) gimR3Mmio2Unmap(PVM pVM, PGIMMMIO2REGION pRegion); +VMMR3_INT_DECL(int) gimR3Mmio2Map(PVM pVM, PGIMMMIO2REGION pRegion, RTGCPHYS GCPhysRegion); +VMMR3_INT_DECL(int) gimR3Mmio2HandlerPhysicalRegister(PVM pVM, PGIMMMIO2REGION pRegion); +VMMR3_INT_DECL(int) gimR3Mmio2HandlerPhysicalDeregister(PVM pVM, PGIMMMIO2REGION pRegion); +#endif + +VMMR3_INT_DECL(int) gimR3DebugRead(PVM pVM, void *pvRead, size_t *pcbRead, PFNGIMDEBUGBUFREADCOMPLETED pfnReadComplete); +VMMR3_INT_DECL(int) gimR3DebugWrite(PVM pVM, void *pvWrite, size_t *pcbWrite); +#endif /* IN_RING3 */ + +/** @} */ + +RT_C_DECLS_END + +#endif /* !VMM_INCLUDED_SRC_include_GIMInternal_h */ + diff --git a/src/VBox/VMM/include/GIMKvmInternal.h b/src/VBox/VMM/include/GIMKvmInternal.h new file mode 100644 index 00000000..4d68b1d2 --- /dev/null +++ b/src/VBox/VMM/include/GIMKvmInternal.h @@ -0,0 +1,272 @@ +/* $Id: GIMKvmInternal.h $ */ +/** @file + * GIM - KVM, Internal header file. + */ + +/* + * Copyright (C) 2015-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_GIMKvmInternal_h +#define VMM_INCLUDED_SRC_include_GIMKvmInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <VBox/vmm/gim.h> +#include <VBox/vmm/cpum.h> + + +/** @name KVM base features. + * @{ + */ +/** Old, deprecated clock source available. */ +#define GIM_KVM_BASE_FEAT_CLOCK_OLD RT_BIT(0) +/** No need for artifical delays on IO operations. */ +#define GIM_KVM_BASE_FEAT_NOP_IO_DELAY RT_BIT(1) +/** MMU op supported (deprecated, unused). */ +#define GIM_KVM_BASE_FEAT_MMU_OP RT_BIT(2) +/** Clock source available. */ +#define GIM_KVM_BASE_FEAT_CLOCK RT_BIT(3) +/** Asynchronous page faults supported. */ +#define GIM_KVM_BASE_FEAT_ASYNC_PF RT_BIT(4) +/** Steal time (VCPU not executing guest code time in ns) available. */ +#define GIM_KVM_BASE_FEAT_STEAL_TIME RT_BIT(5) +/** Paravirtualized EOI (end-of-interrupt) supported. */ +#define GIM_KVM_BASE_FEAT_PV_EOI RT_BIT(6) +/** Paravirtualized spinlock (unhalting VCPU) supported. */ +#define GIM_KVM_BASE_FEAT_PV_UNHALT RT_BIT(7) +/** The TSC is stable (fixed rate, monotonic). */ +#define GIM_KVM_BASE_FEAT_TSC_STABLE RT_BIT(24) +/** @} */ + + +/** @name KVM MSRs. + * @{ + */ +/** Start of range 0. */ +#define MSR_GIM_KVM_RANGE0_FIRST UINT32_C(0x11) +/** Old, deprecated wall clock. */ +#define MSR_GIM_KVM_WALL_CLOCK_OLD UINT32_C(0x11) +/** Old, deprecated System time. */ +#define MSR_GIM_KVM_SYSTEM_TIME_OLD UINT32_C(0x12) +/** End of range 0. */ +#define MSR_GIM_KVM_RANGE0_LAST MSR_GIM_KVM_SYSTEM_TIME_OLD + +/** Start of range 1. */ +#define MSR_GIM_KVM_RANGE1_FIRST UINT32_C(0x4b564d00) +/** Wall clock. */ +#define MSR_GIM_KVM_WALL_CLOCK UINT32_C(0x4b564d00) +/** System time. */ +#define MSR_GIM_KVM_SYSTEM_TIME UINT32_C(0x4b564d01) +/** Asynchronous page fault. */ +#define MSR_GIM_KVM_ASYNC_PF UINT32_C(0x4b564d02) +/** Steal time. */ +#define MSR_GIM_KVM_STEAL_TIME UINT32_C(0x4b564d03) +/** Paravirtualized EOI (end-of-interrupt). */ +#define MSR_GIM_KVM_EOI UINT32_C(0x4b564d04) +/** End of range 1. */ +#define MSR_GIM_KVM_RANGE1_LAST MSR_GIM_KVM_EOI + +AssertCompile(MSR_GIM_KVM_RANGE0_FIRST <= MSR_GIM_KVM_RANGE0_LAST); +AssertCompile(MSR_GIM_KVM_RANGE1_FIRST <= MSR_GIM_KVM_RANGE1_LAST); +/** @} */ + +/** KVM page size. */ +#define GIM_KVM_PAGE_SIZE 0x1000 + +/** + * MMIO2 region indices. + */ +/** The system time page(s) region. */ +#define GIM_KVM_SYSTEM_TIME_PAGE_REGION_IDX UINT8_C(0) +/** The steal time page(s) region. */ +#define GIM_KVM_STEAL_TIME_PAGE_REGION_IDX UINT8_C(1) +/** The maximum region index (must be <= UINT8_MAX). */ +#define GIM_KVM_REGION_IDX_MAX GIM_KVM_STEAL_TIME_PAGE_REGION_IDX + +/** + * KVM system-time structure (GIM_KVM_SYSTEM_TIME_FLAGS_XXX) flags. + * See "Documentation/virtual/kvm/api.txt". + */ +/** The TSC is stable (monotonic). */ +#define GIM_KVM_SYSTEM_TIME_FLAGS_TSC_STABLE RT_BIT(0) +/** The guest VCPU has been paused by the hypervisor. */ +#define GIM_KVM_SYSTEM_TIME_FLAGS_GUEST_PAUSED RT_BIT(1) +/** */ + +/** @name KVM MSR - System time (MSR_GIM_KVM_SYSTEM_TIME and + * MSR_GIM_KVM_SYSTEM_TIME_OLD). + * @{ + */ +/** The system-time enable bit. */ +#define MSR_GIM_KVM_SYSTEM_TIME_ENABLE_BIT RT_BIT_64(0) +/** Whether the system-time struct. is enabled or not. */ +#define MSR_GIM_KVM_SYSTEM_TIME_IS_ENABLED(a) RT_BOOL((a) & MSR_GIM_KVM_SYSTEM_TIME_ENABLE_BIT) +/** Guest-physical address of the system-time struct. */ +#define MSR_GIM_KVM_SYSTEM_TIME_GUEST_GPA(a) ((a) & ~MSR_GIM_KVM_SYSTEM_TIME_ENABLE_BIT) +/** @} */ + +/** @name KVM MSR - Wall clock (MSR_GIM_KVM_WALL_CLOCK and + * MSR_GIM_KVM_WALL_CLOCK_OLD). + * @{ + */ +/** Guest-physical address of the wall-clock struct. */ +#define MSR_GIM_KVM_WALL_CLOCK_GUEST_GPA(a) (a) +/** @} */ + + +/** @name KVM Hypercall operations. + * @{ */ +#define KVM_HYPERCALL_OP_VAPIC_POLL_IRQ 1 +#define KVM_HYPERCALL_OP_MMU 2 +#define KVM_HYPERCALL_OP_FEATURES 3 +#define KVM_HYPERCALL_OP_KICK_CPU 5 +/** @} */ + +/** @name KVM Hypercall return values. + * @{ */ +/* Return values for hypercalls */ +#define KVM_HYPERCALL_RET_SUCCESS 0 +#define KVM_HYPERCALL_RET_ENOSYS (uint64_t)(-1000) +#define KVM_HYPERCALL_RET_EFAULT (uint64_t)(-14) +#define KVM_HYPERCALL_RET_E2BIG (uint64_t)(-7) +#define KVM_HYPERCALL_RET_EPERM (uint64_t)(-1) +/** @} */ + +/** + * KVM per-VCPU system-time structure. + */ +typedef struct GIMKVMSYSTEMTIME +{ + /** Version (sequence number). */ + uint32_t u32Version; + /** Alignment padding. */ + uint32_t u32Padding0; + /** TSC time stamp. */ + uint64_t u64Tsc; + /** System time in nanoseconds. */ + uint64_t u64NanoTS; + /** TSC to system time scale factor. */ + uint32_t u32TscScale; + /** TSC frequency shift. */ + int8_t i8TscShift; + /** Clock source (GIM_KVM_SYSTEM_TIME_FLAGS_XXX) flags. */ + uint8_t fFlags; + /** Alignment padding. */ + uint8_t abPadding0[2]; +} GIMKVMSYSTEMTIME; +/** Pointer to KVM system-time struct. */ +typedef GIMKVMSYSTEMTIME *PGIMKVMSYSTEMTIME; +/** Pointer to a const KVM system-time struct. */ +typedef GIMKVMSYSTEMTIME const *PCGIMKVMSYSTEMTIME; +AssertCompileSize(GIMKVMSYSTEMTIME, 32); + + +/** + * KVM per-VM wall-clock structure. + */ +typedef struct GIMKVMWALLCLOCK +{ + /** Version (sequence number). */ + uint32_t u32Version; + /** Number of seconds since boot. */ + uint32_t u32Sec; + /** Number of nanoseconds since boot. */ + uint32_t u32Nano; +} GIMKVMWALLCLOCK; +/** Pointer to KVM wall-clock struct. */ +typedef GIMKVMWALLCLOCK *PGIMKVMWALLCLOCK; +/** Pointer to a const KVM wall-clock struct. */ +typedef GIMKVMWALLCLOCK const *PCGIMKVMWALLCLOCK; +AssertCompileSize(GIMKVMWALLCLOCK, 12); + + +/** + * GIM KVM VM instance data. + * Changes to this must checked against the padding of the gim union in VM! + */ +typedef struct GIMKVM +{ + /** Wall-clock MSR. */ + uint64_t u64WallClockMsr; + /** CPUID features: Basic. */ + uint32_t uBaseFeat; + /** Whether GIM needs to trap \#UD exceptions. */ + bool fTrapXcptUD; + /** Disassembler opcode of hypercall instruction native for this host CPU. */ + uint16_t uOpcodeNative; + /** Native hypercall opcode bytes. Use for replacing. */ + uint8_t abOpcodeNative[3]; + /** Alignment padding. */ + uint8_t abPadding[5]; + /** The TSC frequency (in HZ) reported to the guest. */ + uint64_t cTscTicksPerSecond; +} GIMKVM; +/** Pointer to per-VM GIM KVM instance data. */ +typedef GIMKVM *PGIMKVM; +/** Pointer to const per-VM GIM KVM instance data. */ +typedef GIMKVM const *PCGIMKVM; + +/** + * GIM KVMV VCPU instance data. + * Changes to this must checked against the padding of the gim union in VMCPU! + */ +typedef struct GIMKVMCPU +{ + /** System-time MSR. */ + uint64_t u64SystemTimeMsr; + /** The guest-physical address of the system-time struct. */ + RTGCPHYS GCPhysSystemTime; + /** The version (sequence number) of the system-time struct. */ + uint32_t u32SystemTimeVersion; + /** The guest TSC value while enabling the system-time MSR. */ + uint64_t uTsc; + /** The guest virtual time while enabling the system-time MSR. */ + uint64_t uVirtNanoTS; + /** The flags of the system-time struct. */ + uint8_t fSystemTimeFlags; +} GIMKVMCPU; +/** Pointer to per-VCPU GIM KVM instance data. */ +typedef GIMKVMCPU *PGIMKVMCPU; +/** Pointer to const per-VCPU GIM KVM instance data. */ +typedef GIMKVMCPU const *PCGIMKVMCPU; + + +RT_C_DECLS_BEGIN + +#ifdef IN_RING3 +VMMR3_INT_DECL(int) gimR3KvmInit(PVM pVM); +VMMR3_INT_DECL(int) gimR3KvmInitCompleted(PVM pVM); +VMMR3_INT_DECL(int) gimR3KvmTerm(PVM pVM); +VMMR3_INT_DECL(void) gimR3KvmRelocate(PVM pVM, RTGCINTPTR offDelta); +VMMR3_INT_DECL(void) gimR3KvmReset(PVM pVM); +VMMR3_INT_DECL(int) gimR3KvmSave(PVM pVM, PSSMHANDLE pSSM); +VMMR3_INT_DECL(int) gimR3KvmLoad(PVM pVM, PSSMHANDLE pSSM); + +VMMR3_INT_DECL(int) gimR3KvmDisableSystemTime(PVM pVM); +VMMR3_INT_DECL(int) gimR3KvmEnableSystemTime(PVM pVM, PVMCPU pVCpu, uint64_t uMsrSystemTime); +VMMR3_INT_DECL(int) gimR3KvmEnableWallClock(PVM pVM, RTGCPHYS GCPhysSysTime); +#endif /* IN_RING3 */ + +VMM_INT_DECL(bool) gimKvmIsParavirtTscEnabled(PVMCC pVM); +VMM_INT_DECL(bool) gimKvmAreHypercallsEnabled(PVMCPU pVCpu); +VMM_INT_DECL(VBOXSTRICTRC) gimKvmHypercall(PVMCPUCC pVCpu, PCPUMCTX pCtx); +VMM_INT_DECL(VBOXSTRICTRC) gimKvmReadMsr(PVMCPUCC pVCpu, uint32_t idMsr, PCCPUMMSRRANGE pRange, uint64_t *puValue); +VMM_INT_DECL(VBOXSTRICTRC) gimKvmWriteMsr(PVMCPUCC pVCpu, uint32_t idMsr, PCCPUMMSRRANGE pRange, uint64_t uRawValue); +VMM_INT_DECL(bool) gimKvmShouldTrapXcptUD(PVM pVM); +VMM_INT_DECL(VBOXSTRICTRC) gimKvmXcptUD(PVMCC pVM, PVMCPUCC pVCpu, PCPUMCTX pCtx, PDISCPUSTATE pDis, uint8_t *pcbInstr); +VMM_INT_DECL(VBOXSTRICTRC) gimKvmHypercallEx(PVMCPUCC pVCpu, PCPUMCTX pCtx, unsigned uDisOpcode, uint8_t cbInstr); + +RT_C_DECLS_END + +#endif /* !VMM_INCLUDED_SRC_include_GIMKvmInternal_h */ + diff --git a/src/VBox/VMM/include/GIMMinimalInternal.h b/src/VBox/VMM/include/GIMMinimalInternal.h new file mode 100644 index 00000000..91ed3a84 --- /dev/null +++ b/src/VBox/VMM/include/GIMMinimalInternal.h @@ -0,0 +1,38 @@ +/* $Id: GIMMinimalInternal.h $ */ +/** @file + * GIM - Minimal, Internal header file. + */ + +/* + * Copyright (C) 2014-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_GIMMinimalInternal_h +#define VMM_INCLUDED_SRC_include_GIMMinimalInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <iprt/cdefs.h> +#include <VBox/types.h> + +RT_C_DECLS_BEGIN + +#ifdef IN_RING3 +VMMR3_INT_DECL(int) gimR3MinimalInit(PVM pVM); +VMMR3_INT_DECL(int) gimR3MinimalInitCompleted(PVM pVM); +VMMR3_INT_DECL(void) gimR3MinimalRelocate(PVM pVM, RTGCINTPTR offDelta); +#endif /* IN_RING3 */ + +RT_C_DECLS_END + +#endif /* !VMM_INCLUDED_SRC_include_GIMMinimalInternal_h */ + diff --git a/src/VBox/VMM/include/HMInternal.h b/src/VBox/VMM/include/HMInternal.h new file mode 100644 index 00000000..450b0baf --- /dev/null +++ b/src/VBox/VMM/include/HMInternal.h @@ -0,0 +1,1239 @@ +/* $Id: HMInternal.h $ */ +/** @file + * HM - Internal header file. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_HMInternal_h +#define VMM_INCLUDED_SRC_include_HMInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <VBox/cdefs.h> +#include <VBox/types.h> +#include <VBox/vmm/stam.h> +#include <VBox/dis.h> +#include <VBox/vmm/hm.h> +#include <VBox/vmm/hm_vmx.h> +#include <VBox/vmm/hm_svm.h> +#include <VBox/vmm/pgm.h> +#include <VBox/vmm/cpum.h> +#include <VBox/vmm/trpm.h> +#include <iprt/memobj.h> +#include <iprt/cpuset.h> +#include <iprt/mp.h> +#include <iprt/avl.h> +#include <iprt/string.h> + +#if HC_ARCH_BITS == 32 +# error "32-bit hosts are no longer supported. Go back to 6.0 or earlier!" +#endif + +/** @def HM_PROFILE_EXIT_DISPATCH + * Enables profiling of the VM exit handler dispatching. */ +#if 0 || defined(DOXYGEN_RUNNING) +# define HM_PROFILE_EXIT_DISPATCH +#endif + +RT_C_DECLS_BEGIN + + +/** @defgroup grp_hm_int Internal + * @ingroup grp_hm + * @internal + * @{ + */ + +/** @name HM_CHANGED_XXX + * HM CPU-context changed flags. + * + * These flags are used to keep track of which registers and state has been + * modified since they were imported back into the guest-CPU context. + * + * @{ + */ +#define HM_CHANGED_HOST_CONTEXT UINT64_C(0x0000000000000001) +#define HM_CHANGED_GUEST_RIP UINT64_C(0x0000000000000004) +#define HM_CHANGED_GUEST_RFLAGS UINT64_C(0x0000000000000008) + +#define HM_CHANGED_GUEST_RAX UINT64_C(0x0000000000000010) +#define HM_CHANGED_GUEST_RCX UINT64_C(0x0000000000000020) +#define HM_CHANGED_GUEST_RDX UINT64_C(0x0000000000000040) +#define HM_CHANGED_GUEST_RBX UINT64_C(0x0000000000000080) +#define HM_CHANGED_GUEST_RSP UINT64_C(0x0000000000000100) +#define HM_CHANGED_GUEST_RBP UINT64_C(0x0000000000000200) +#define HM_CHANGED_GUEST_RSI UINT64_C(0x0000000000000400) +#define HM_CHANGED_GUEST_RDI UINT64_C(0x0000000000000800) +#define HM_CHANGED_GUEST_R8_R15 UINT64_C(0x0000000000001000) +#define HM_CHANGED_GUEST_GPRS_MASK UINT64_C(0x0000000000001ff0) + +#define HM_CHANGED_GUEST_ES UINT64_C(0x0000000000002000) +#define HM_CHANGED_GUEST_CS UINT64_C(0x0000000000004000) +#define HM_CHANGED_GUEST_SS UINT64_C(0x0000000000008000) +#define HM_CHANGED_GUEST_DS UINT64_C(0x0000000000010000) +#define HM_CHANGED_GUEST_FS UINT64_C(0x0000000000020000) +#define HM_CHANGED_GUEST_GS UINT64_C(0x0000000000040000) +#define HM_CHANGED_GUEST_SREG_MASK UINT64_C(0x000000000007e000) + +#define HM_CHANGED_GUEST_GDTR UINT64_C(0x0000000000080000) +#define HM_CHANGED_GUEST_IDTR UINT64_C(0x0000000000100000) +#define HM_CHANGED_GUEST_LDTR UINT64_C(0x0000000000200000) +#define HM_CHANGED_GUEST_TR UINT64_C(0x0000000000400000) +#define HM_CHANGED_GUEST_TABLE_MASK UINT64_C(0x0000000000780000) + +#define HM_CHANGED_GUEST_CR0 UINT64_C(0x0000000000800000) +#define HM_CHANGED_GUEST_CR2 UINT64_C(0x0000000001000000) +#define HM_CHANGED_GUEST_CR3 UINT64_C(0x0000000002000000) +#define HM_CHANGED_GUEST_CR4 UINT64_C(0x0000000004000000) +#define HM_CHANGED_GUEST_CR_MASK UINT64_C(0x0000000007800000) + +#define HM_CHANGED_GUEST_APIC_TPR UINT64_C(0x0000000008000000) +#define HM_CHANGED_GUEST_EFER_MSR UINT64_C(0x0000000010000000) + +#define HM_CHANGED_GUEST_DR0_DR3 UINT64_C(0x0000000020000000) +#define HM_CHANGED_GUEST_DR6 UINT64_C(0x0000000040000000) +#define HM_CHANGED_GUEST_DR7 UINT64_C(0x0000000080000000) +#define HM_CHANGED_GUEST_DR_MASK UINT64_C(0x00000000e0000000) + +#define HM_CHANGED_GUEST_X87 UINT64_C(0x0000000100000000) +#define HM_CHANGED_GUEST_SSE_AVX UINT64_C(0x0000000200000000) +#define HM_CHANGED_GUEST_OTHER_XSAVE UINT64_C(0x0000000400000000) +#define HM_CHANGED_GUEST_XCRx UINT64_C(0x0000000800000000) + +#define HM_CHANGED_GUEST_KERNEL_GS_BASE UINT64_C(0x0000001000000000) +#define HM_CHANGED_GUEST_SYSCALL_MSRS UINT64_C(0x0000002000000000) +#define HM_CHANGED_GUEST_SYSENTER_CS_MSR UINT64_C(0x0000004000000000) +#define HM_CHANGED_GUEST_SYSENTER_EIP_MSR UINT64_C(0x0000008000000000) +#define HM_CHANGED_GUEST_SYSENTER_ESP_MSR UINT64_C(0x0000010000000000) +#define HM_CHANGED_GUEST_SYSENTER_MSR_MASK UINT64_C(0x000001c000000000) +#define HM_CHANGED_GUEST_TSC_AUX UINT64_C(0x0000020000000000) +#define HM_CHANGED_GUEST_OTHER_MSRS UINT64_C(0x0000040000000000) +#define HM_CHANGED_GUEST_ALL_MSRS ( HM_CHANGED_GUEST_EFER \ + | HM_CHANGED_GUEST_KERNEL_GS_BASE \ + | HM_CHANGED_GUEST_SYSCALL_MSRS \ + | HM_CHANGED_GUEST_SYSENTER_MSR_MASK \ + | HM_CHANGED_GUEST_TSC_AUX \ + | HM_CHANGED_GUEST_OTHER_MSRS) + +#define HM_CHANGED_GUEST_HWVIRT UINT64_C(0x0000080000000000) +#define HM_CHANGED_GUEST_MASK UINT64_C(0x00000ffffffffffc) + +#define HM_CHANGED_KEEPER_STATE_MASK UINT64_C(0xffff000000000000) + +#define HM_CHANGED_VMX_XCPT_INTERCEPTS UINT64_C(0x0001000000000000) +#define HM_CHANGED_VMX_GUEST_AUTO_MSRS UINT64_C(0x0002000000000000) +#define HM_CHANGED_VMX_GUEST_LAZY_MSRS UINT64_C(0x0004000000000000) +#define HM_CHANGED_VMX_ENTRY_EXIT_CTLS UINT64_C(0x0008000000000000) +#define HM_CHANGED_VMX_MASK UINT64_C(0x000f000000000000) +#define HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE ( HM_CHANGED_GUEST_DR_MASK \ + | HM_CHANGED_VMX_GUEST_LAZY_MSRS) + +#define HM_CHANGED_SVM_XCPT_INTERCEPTS UINT64_C(0x0001000000000000) +#define HM_CHANGED_SVM_MASK UINT64_C(0x0001000000000000) +#define HM_CHANGED_SVM_HOST_GUEST_SHARED_STATE HM_CHANGED_GUEST_DR_MASK + +#define HM_CHANGED_ALL_GUEST ( HM_CHANGED_GUEST_MASK \ + | HM_CHANGED_KEEPER_STATE_MASK) + +/** Mask of what state might have changed when IEM raised an exception. + * This is a based on IEM_CPUMCTX_EXTRN_XCPT_MASK. */ +#define HM_CHANGED_RAISED_XCPT_MASK ( HM_CHANGED_GUEST_GPRS_MASK \ + | HM_CHANGED_GUEST_RIP \ + | HM_CHANGED_GUEST_RFLAGS \ + | HM_CHANGED_GUEST_SS \ + | HM_CHANGED_GUEST_CS \ + | HM_CHANGED_GUEST_CR0 \ + | HM_CHANGED_GUEST_CR3 \ + | HM_CHANGED_GUEST_CR4 \ + | HM_CHANGED_GUEST_APIC_TPR \ + | HM_CHANGED_GUEST_EFER_MSR \ + | HM_CHANGED_GUEST_DR7 \ + | HM_CHANGED_GUEST_CR2 \ + | HM_CHANGED_GUEST_SREG_MASK \ + | HM_CHANGED_GUEST_TABLE_MASK) + +#ifdef VBOX_WITH_NESTED_HWVIRT_SVM +/** Mask of what state might have changed when \#VMEXIT is emulated. */ +# define HM_CHANGED_SVM_VMEXIT_MASK ( HM_CHANGED_GUEST_RSP \ + | HM_CHANGED_GUEST_RAX \ + | HM_CHANGED_GUEST_RIP \ + | HM_CHANGED_GUEST_RFLAGS \ + | HM_CHANGED_GUEST_CS \ + | HM_CHANGED_GUEST_SS \ + | HM_CHANGED_GUEST_DS \ + | HM_CHANGED_GUEST_ES \ + | HM_CHANGED_GUEST_GDTR \ + | HM_CHANGED_GUEST_IDTR \ + | HM_CHANGED_GUEST_CR_MASK \ + | HM_CHANGED_GUEST_EFER_MSR \ + | HM_CHANGED_GUEST_DR6 \ + | HM_CHANGED_GUEST_DR7 \ + | HM_CHANGED_GUEST_OTHER_MSRS \ + | HM_CHANGED_GUEST_HWVIRT \ + | HM_CHANGED_SVM_MASK \ + | HM_CHANGED_GUEST_APIC_TPR) + +/** Mask of what state might have changed when VMRUN is emulated. */ +# define HM_CHANGED_SVM_VMRUN_MASK HM_CHANGED_SVM_VMEXIT_MASK +#endif +#ifdef VBOX_WITH_NESTED_HWVIRT_VMX +/** Mask of what state might have changed when VM-exit is emulated. + * + * This is currently unused, but keeping it here in case we can get away a bit more + * fine-grained state handling. + * + * @note Update IEM_CPUMCTX_EXTRN_VMX_VMEXIT_MASK when this changes. */ +# define HM_CHANGED_VMX_VMEXIT_MASK ( HM_CHANGED_GUEST_CR0 | HM_CHANGED_GUEST_CR3 | HM_CHANGED_GUEST_CR4 \ + | HM_CHANGED_GUEST_DR7 | HM_CHANGED_GUEST_DR6 \ + | HM_CHANGED_GUEST_EFER_MSR \ + | HM_CHANGED_GUEST_SYSENTER_MSR_MASK \ + | HM_CHANGED_GUEST_OTHER_MSRS /* for PAT MSR */ \ + | HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS \ + | HM_CHANGED_GUEST_SREG_MASK \ + | HM_CHANGED_GUEST_TR \ + | HM_CHANGED_GUEST_LDTR | HM_CHANGED_GUEST_GDTR | HM_CHANGED_GUEST_IDTR \ + | HM_CHANGED_GUEST_HWVIRT ) +#endif +/** @} */ + +/** Maximum number of exit reason statistics counters. */ +#define MAX_EXITREASON_STAT 0x100 +#define MASK_EXITREASON_STAT 0xff +#define MASK_INJECT_IRQ_STAT 0xff + +/** Size for the EPT identity page table (1024 4 MB pages to cover the entire address space). */ +#define HM_EPT_IDENTITY_PG_TABLE_SIZE PAGE_SIZE +/** Size of the TSS structure + 2 pages for the IO bitmap + end byte. */ +#define HM_VTX_TSS_SIZE (sizeof(VBOXTSS) + 2 * PAGE_SIZE + 1) +/** Total guest mapped memory needed. */ +#define HM_VTX_TOTAL_DEVHEAP_MEM (HM_EPT_IDENTITY_PG_TABLE_SIZE + HM_VTX_TSS_SIZE) + + +/** @name Macros for enabling and disabling preemption. + * These are really just for hiding the RTTHREADPREEMPTSTATE and asserting that + * preemption has already been disabled when there is no context hook. + * @{ */ +#ifdef VBOX_STRICT +# define HM_DISABLE_PREEMPT(a_pVCpu) \ + RTTHREADPREEMPTSTATE PreemptStateInternal = RTTHREADPREEMPTSTATE_INITIALIZER; \ + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD) || VMMR0ThreadCtxHookIsEnabled((a_pVCpu))); \ + RTThreadPreemptDisable(&PreemptStateInternal) +#else +# define HM_DISABLE_PREEMPT(a_pVCpu) \ + RTTHREADPREEMPTSTATE PreemptStateInternal = RTTHREADPREEMPTSTATE_INITIALIZER; \ + RTThreadPreemptDisable(&PreemptStateInternal) +#endif /* VBOX_STRICT */ +#define HM_RESTORE_PREEMPT() do { RTThreadPreemptRestore(&PreemptStateInternal); } while(0) +/** @} */ + + +/** @name HM saved state versions. + * @{ + */ +#define HM_SAVED_STATE_VERSION HM_SAVED_STATE_VERSION_SVM_NESTED_HWVIRT +#define HM_SAVED_STATE_VERSION_SVM_NESTED_HWVIRT 6 +#define HM_SAVED_STATE_VERSION_TPR_PATCHING 5 +#define HM_SAVED_STATE_VERSION_NO_TPR_PATCHING 4 +#define HM_SAVED_STATE_VERSION_2_0_X 3 +/** @} */ + + +/** + * HM physical (host) CPU information. + */ +typedef struct HMPHYSCPU +{ + /** The CPU ID. */ + RTCPUID idCpu; + /** The VM_HSAVE_AREA (AMD-V) / VMXON region (Intel) memory backing. */ + RTR0MEMOBJ hMemObj; + /** The physical address of the first page in hMemObj (it's a + * physcially contigous allocation if it spans multiple pages). */ + RTHCPHYS HCPhysMemObj; + /** The address of the memory (for pfnEnable). */ + void *pvMemObj; + /** Current ASID (AMD-V) / VPID (Intel). */ + uint32_t uCurrentAsid; + /** TLB flush count. */ + uint32_t cTlbFlushes; + /** Whether to flush each new ASID/VPID before use. */ + bool fFlushAsidBeforeUse; + /** Configured for VT-x or AMD-V. */ + bool fConfigured; + /** Set if the VBOX_HWVIRTEX_IGNORE_SVM_IN_USE hack is active. */ + bool fIgnoreAMDVInUseError; + /** Whether CR4.VMXE was already enabled prior to us enabling it. */ + bool fVmxeAlreadyEnabled; + /** In use by our code. (for power suspend) */ + bool volatile fInUse; +#ifdef VBOX_WITH_NESTED_HWVIRT_SVM + /** Nested-guest union (put data common to SVM/VMX outside the union). */ + union + { + /** Nested-guest SVM data. */ + struct + { + /** The active nested-guest MSR permission bitmap memory backing. */ + RTR0MEMOBJ hNstGstMsrpm; + /** The physical address of the first page in hNstGstMsrpm (physcially + * contiguous allocation). */ + RTHCPHYS HCPhysNstGstMsrpm; + /** The address of the active nested-guest MSRPM. */ + void *pvNstGstMsrpm; + } svm; + /** @todo Nested-VMX. */ + } n; +#endif +} HMPHYSCPU; +/** Pointer to HMPHYSCPU struct. */ +typedef HMPHYSCPU *PHMPHYSCPU; +/** Pointer to a const HMPHYSCPU struct. */ +typedef const HMPHYSCPU *PCHMPHYSCPU; + +/** + * TPR-instruction type. + */ +typedef enum +{ + HMTPRINSTR_INVALID, + HMTPRINSTR_READ, + HMTPRINSTR_READ_SHR4, + HMTPRINSTR_WRITE_REG, + HMTPRINSTR_WRITE_IMM, + HMTPRINSTR_JUMP_REPLACEMENT, + /** The usual 32-bit paranoia. */ + HMTPRINSTR_32BIT_HACK = 0x7fffffff +} HMTPRINSTR; + +/** + * TPR patch information. + */ +typedef struct +{ + /** The key is the address of patched instruction. (32 bits GC ptr) */ + AVLOU32NODECORE Core; + /** Original opcode. */ + uint8_t aOpcode[16]; + /** Instruction size. */ + uint32_t cbOp; + /** Replacement opcode. */ + uint8_t aNewOpcode[16]; + /** Replacement instruction size. */ + uint32_t cbNewOp; + /** Instruction type. */ + HMTPRINSTR enmType; + /** Source operand. */ + uint32_t uSrcOperand; + /** Destination operand. */ + uint32_t uDstOperand; + /** Number of times the instruction caused a fault. */ + uint32_t cFaults; + /** Patch address of the jump replacement. */ + RTGCPTR32 pJumpTarget; +} HMTPRPATCH; +/** Pointer to HMTPRPATCH. */ +typedef HMTPRPATCH *PHMTPRPATCH; +/** Pointer to a const HMTPRPATCH. */ +typedef const HMTPRPATCH *PCHMTPRPATCH; + + +/** + * Makes a HMEXITSTAT::uKey value from a program counter and an exit code. + * + * @returns 64-bit key + * @param a_uPC The RIP + CS.BASE value of the exit. + * @param a_uExit The exit code. + * @todo Add CPL? + */ +#define HMEXITSTAT_MAKE_KEY(a_uPC, a_uExit) (((a_uPC) & UINT64_C(0x0000ffffffffffff)) | (uint64_t)(a_uExit) << 48) + +typedef struct HMEXITINFO +{ + /** See HMEXITSTAT_MAKE_KEY(). */ + uint64_t uKey; + /** Number of recent hits (depreciates with time). */ + uint32_t volatile cHits; + /** The age + lock. */ + uint16_t volatile uAge; + /** Action or action table index. */ + uint16_t iAction; +} HMEXITINFO; +AssertCompileSize(HMEXITINFO, 16); /* Lots of these guys, so don't add any unnecessary stuff! */ + +typedef struct HMEXITHISTORY +{ + /** The exit timestamp. */ + uint64_t uTscExit; + /** The index of the corresponding HMEXITINFO entry. + * UINT32_MAX if none (too many collisions, race, whatever). */ + uint32_t iExitInfo; + /** Figure out later, needed for padding now. */ + uint32_t uSomeClueOrSomething; +} HMEXITHISTORY; + +/** + * Switcher function, HC to the special 64-bit RC. + * + * @param pVM The cross context VM structure. + * @param offCpumVCpu Offset from pVM->cpum to pVM->aCpus[idCpu].cpum. + * @returns Return code indicating the action to take. + */ +typedef DECLCALLBACK(int) FNHMSWITCHERHC(PVM pVM, uint32_t offCpumVCpu); +/** Pointer to switcher function. */ +typedef FNHMSWITCHERHC *PFNHMSWITCHERHC; + +/** @def HM_UNION_NM + * For compilers (like DTrace) that does not grok nameless unions, we have a + * little hack to make them palatable. + */ +/** @def HM_STRUCT_NM + * For compilers (like DTrace) that does not grok nameless structs (it is + * non-standard C++), we have a little hack to make them palatable. + */ +#ifdef VBOX_FOR_DTRACE_LIB +# define HM_UNION_NM(a_Nm) a_Nm +# define HM_STRUCT_NM(a_Nm) a_Nm +#elif defined(IPRT_WITHOUT_NAMED_UNIONS_AND_STRUCTS) +# define HM_UNION_NM(a_Nm) a_Nm +# define HM_STRUCT_NM(a_Nm) a_Nm +#else +# define HM_UNION_NM(a_Nm) +# define HM_STRUCT_NM(a_Nm) +#endif + +/** + * HM event. + * + * VT-x and AMD-V common event injection structure. + */ +typedef struct HMEVENT +{ + /** Whether the event is pending. */ + uint32_t fPending; + /** The error-code associated with the event. */ + uint32_t u32ErrCode; + /** The length of the instruction in bytes (only relevant for software + * interrupts or software exceptions). */ + uint32_t cbInstr; + /** Alignment. */ + uint32_t u32Padding; + /** The encoded event (VM-entry interruption-information for VT-x or EVENTINJ + * for SVM). */ + uint64_t u64IntInfo; + /** Guest virtual address if this is a page-fault event. */ + RTGCUINTPTR GCPtrFaultAddress; +} HMEVENT; +/** Pointer to a HMEVENT struct. */ +typedef HMEVENT *PHMEVENT; +/** Pointer to a const HMEVENT struct. */ +typedef const HMEVENT *PCHMEVENT; +AssertCompileSizeAlignment(HMEVENT, 8); + +/** + * HM VM Instance data. + * Changes to this must checked against the padding of the hm union in VM! + */ +typedef struct HM +{ + /** Set if nested paging is enabled. */ + bool fNestedPaging; + /** Set when we've initialized VMX or SVM. */ + bool fInitialized; + /** Set if nested paging is allowed. */ + bool fAllowNestedPaging; + /** Set if large pages are enabled (requires nested paging). */ + bool fLargePages; + /** Set if we can support 64-bit guests or not. */ + bool fAllow64BitGuests; + /** Set when TPR patching is allowed. */ + bool fTprPatchingAllowed; + /** Set when we initialize VT-x or AMD-V once for all CPUs. */ + bool fGlobalInit; + /** Set when TPR patching is active. */ + bool fTPRPatchingActive; + /** Set when the debug facility has breakpoints/events enabled that requires + * us to use the debug execution loop in ring-0. */ + bool fUseDebugLoop; + /** Set if hardware APIC virtualization is enabled. */ + bool fVirtApicRegs; + /** Set if posted interrupt processing is enabled. */ + bool fPostedIntrs; + /** Set if indirect branch prediction barrier on VM exit. */ + bool fIbpbOnVmExit; + /** Set if indirect branch prediction barrier on VM entry. */ + bool fIbpbOnVmEntry; + /** Set if level 1 data cache should be flushed on VM entry. */ + bool fL1dFlushOnVmEntry; + /** Set if level 1 data cache should be flushed on EMT scheduling. */ + bool fL1dFlushOnSched; + /** Set if host manages speculation control settings. */ + bool fSpecCtrlByHost; + /** Set if MDS related buffers should be cleared on VM entry. */ + bool fMdsClearOnVmEntry; + /** Set if MDS related buffers should be cleared on EMT scheduling. */ + bool fMdsClearOnSched; + /** Alignment padding. */ + bool afPaddingMinus1[6]; + + /** Maximum ASID allowed. */ + uint32_t uMaxAsid; + /** The maximum number of resumes loops allowed in ring-0 (safety precaution). + * This number is set much higher when RTThreadPreemptIsPending is reliable. */ + uint32_t cMaxResumeLoops; + + /** Host kernel flags that HM might need to know (SUPKERNELFEATURES_XXX). */ + uint32_t fHostKernelFeatures; + + /** Size of the guest patch memory block. */ + uint32_t cbGuestPatchMem; + /** Guest allocated memory for patching purposes. */ + RTGCPTR pGuestPatchMem; + /** Current free pointer inside the patch block. */ + RTGCPTR pFreeGuestPatchMem; + + struct + { + /** Set by the ring-0 side of HM to indicate VMX is supported by the + * CPU. */ + bool fSupported; + /** Set when we've enabled VMX. */ + bool fEnabled; + /** Set if VPID is supported. */ + bool fVpid; + /** Set if VT-x VPID is allowed. */ + bool fAllowVpid; + /** Set if unrestricted guest execution is in use (real and protected mode + * without paging). */ + bool fUnrestrictedGuest; + /** Set if unrestricted guest execution is allowed to be used. */ + bool fAllowUnrestricted; + /** Set if the preemption timer is in use or not. */ + bool fUsePreemptTimer; + /** The shift mask employed by the VMX-Preemption timer. */ + uint8_t cPreemptTimerShift; + + /** Virtual address of the APIC-access page. */ + R0PTRTYPE(uint8_t *) pbApicAccess; + /** Pointer to the VMREAD bitmap. */ + R0PTRTYPE(void *) pvVmreadBitmap; + /** Pointer to the VMWRITE bitmap. */ + R0PTRTYPE(void *) pvVmwriteBitmap; + + /** Pointer to the shadow VMCS read-only fields array. */ + R0PTRTYPE(uint32_t *) paShadowVmcsRoFields; + /** Pointer to the shadow VMCS read/write fields array. */ + R0PTRTYPE(uint32_t *) paShadowVmcsFields; + /** Number of elements in the shadow VMCS read-only fields array. */ + uint32_t cShadowVmcsRoFields; + /** Number of elements in the shadow VMCS read-write fields array. */ + uint32_t cShadowVmcsFields; + + /** Tagged-TLB flush type. */ + VMXTLBFLUSHTYPE enmTlbFlushType; + /** Flush type to use for INVEPT. */ + VMXTLBFLUSHEPT enmTlbFlushEpt; + /** Flush type to use for INVVPID. */ + VMXTLBFLUSHVPID enmTlbFlushVpid; + + /** Pause-loop exiting (PLE) gap in ticks. */ + uint32_t cPleGapTicks; + /** Pause-loop exiting (PLE) window in ticks. */ + uint32_t cPleWindowTicks; + uint32_t u32Alignment0; + + /** Host CR4 value (set by ring-0 VMX init) */ + uint64_t u64HostCr4; + /** Host SMM monitor control (set by ring-0 VMX init) */ + uint64_t u64HostSmmMonitorCtl; + /** Host EFER value (set by ring-0 VMX init) */ + uint64_t u64HostMsrEfer; + /** Whether the CPU supports VMCS fields for swapping EFER. */ + bool fSupportsVmcsEfer; + /** Whether to use VMCS shadowing. */ + bool fUseVmcsShadowing; + /** Set if Last Branch Record (LBR) is enabled. */ + bool fLbr; + uint8_t u8Alignment2[5]; + + /** The first valid host LBR branch-from-IP stack range. */ + uint32_t idLbrFromIpMsrFirst; + /** The last valid host LBR branch-from-IP stack range. */ + uint32_t idLbrFromIpMsrLast; + + /** The first valid host LBR branch-to-IP stack range. */ + uint32_t idLbrToIpMsrFirst; + /** The last valid host LBR branch-to-IP stack range. */ + uint32_t idLbrToIpMsrLast; + + /** The host LBR TOS (top-of-stack) MSR id. */ + uint32_t idLbrTosMsr; + /** Padding. */ + uint32_t u32Alignment1; + + /** VMX MSR values. */ + VMXMSRS Msrs; + + /** Host-physical address for a failing VMXON instruction. */ + RTHCPHYS HCPhysVmxEnableError; + /** Host-physical address of the APIC-access page. */ + RTHCPHYS HCPhysApicAccess; + /** Host-physical address of the VMREAD bitmap. */ + RTHCPHYS HCPhysVmreadBitmap; + /** Host-physical address of the VMWRITE bitmap. */ + RTHCPHYS HCPhysVmwriteBitmap; +#ifdef VBOX_WITH_CRASHDUMP_MAGIC + /** Host-physical address of the crash-dump scratch area. */ + RTHCPHYS HCPhysScratch; +#endif + +#ifdef VBOX_WITH_CRASHDUMP_MAGIC + /** Pointer to the crash-dump scratch bitmap. */ + R0PTRTYPE(uint8_t *) pbScratch; +#endif + /** Virtual address of the TSS page used for real mode emulation. */ + R3PTRTYPE(PVBOXTSS) pRealModeTSS; + /** Virtual address of the identity page table used for real mode and protected + * mode without paging emulation in EPT mode. */ + R3PTRTYPE(PX86PD) pNonPagingModeEPTPageTable; + + /** Ring-0 memory object for per-VM VMX structures. */ + RTR0MEMOBJ hMemObj; + } vmx; + + struct + { + /** Set by the ring-0 side of HM to indicate SVM is supported by the + * CPU. */ + bool fSupported; + /** Set when we've enabled SVM. */ + bool fEnabled; + /** Set if erratum 170 affects the AMD cpu. */ + bool fAlwaysFlushTLB; + /** Set when the hack to ignore VERR_SVM_IN_USE is active. */ + bool fIgnoreInUseError; + /** Whether to use virtualized VMSAVE/VMLOAD feature. */ + bool fVirtVmsaveVmload; + /** Whether to use virtual GIF feature. */ + bool fVGif; + /** Whether to use LBR virtualization feature. */ + bool fLbrVirt; + uint8_t u8Alignment0[1]; + + /** Physical address of the IO bitmap (12kb). */ + RTHCPHYS HCPhysIOBitmap; + /** R0 memory object for the IO bitmap (12kb). */ + RTR0MEMOBJ hMemObjIOBitmap; + /** Virtual address of the IO bitmap. */ + R0PTRTYPE(void *) pvIOBitmap; + + /* HWCR MSR (for diagnostics) */ + uint64_t u64MsrHwcr; + + /** SVM revision. */ + uint32_t u32Rev; + /** SVM feature bits from cpuid 0x8000000a */ + uint32_t u32Features; + + /** Pause filter counter. */ + uint16_t cPauseFilter; + /** Pause filter treshold in ticks. */ + uint16_t cPauseFilterThresholdTicks; + uint32_t u32Alignment0; + } svm; + + /** + * AVL tree with all patches (active or disabled) sorted by guest instruction + * address. + */ + AVLOU32TREE PatchTree; + uint32_t cPatches; + HMTPRPATCH aPatches[64]; + + /** Last recorded error code during HM ring-0 init. */ + int32_t rcInit; + + /** HMR0Init was run */ + bool fHMR0Init; + bool u8Alignment1[3]; + + STAMCOUNTER StatTprPatchSuccess; + STAMCOUNTER StatTprPatchFailure; + STAMCOUNTER StatTprReplaceSuccessCr8; + STAMCOUNTER StatTprReplaceSuccessVmc; + STAMCOUNTER StatTprReplaceFailure; +} HM; +/** Pointer to HM VM instance data. */ +typedef HM *PHM; +AssertCompileMemberAlignment(HM, StatTprPatchSuccess, 8); +AssertCompileMemberAlignment(HM, vmx, 8); +AssertCompileMemberAlignment(HM, svm, 8); + + +/** + * VMX StartVM function. + * + * @returns VBox status code (no informational stuff). + * @param fResume Whether to use VMRESUME (true) or VMLAUNCH (false). + * @param pCtx The CPU register context. + * @param pvUnused Unused argument. + * @param pVM Pointer to the cross context VM structure. + * @param pVCpu Pointer to the cross context per-CPU structure. + */ +typedef DECLCALLBACK(int) FNHMVMXSTARTVM(RTHCUINT fResume, PCPUMCTX pCtx, void *pvUnused, PVMCC pVM, PVMCPUCC pVCpu); +/** Pointer to a VMX StartVM function. */ +typedef R0PTRTYPE(FNHMVMXSTARTVM *) PFNHMVMXSTARTVM; + +/** SVM VMRun function. */ +typedef DECLCALLBACK(int) FNHMSVMVMRUN(RTHCPHYS pVmcbHostPhys, RTHCPHYS pVmcbPhys, PCPUMCTX pCtx, PVMCC pVM, PVMCPUCC pVCpu); +/** Pointer to a SVM VMRun function. */ +typedef R0PTRTYPE(FNHMSVMVMRUN *) PFNHMSVMVMRUN; + +/** + * VMX VMCS information. + * + * This structure provides information maintained for and during the executing of a + * guest (or nested-guest) VMCS (VM control structure) using hardware-assisted VMX. + * + * Note! The members here are ordered and aligned based on estimated frequency of + * usage and grouped to fit within a cache line in hot code paths. Even subtle + * changes here have a noticeable effect in the bootsector benchmarks. Modify with + * care. + */ +typedef struct VMXVMCSINFO +{ + /** @name Auxiliary information. + * @{ */ + /** Ring-0 pointer to the hardware-assisted VMX execution function. */ + PFNHMVMXSTARTVM pfnStartVM; + /** Host-physical address of the EPTP. */ + RTHCPHYS HCPhysEPTP; + /** The VMCS launch state, see VMX_V_VMCS_LAUNCH_STATE_XXX. */ + uint32_t fVmcsState; + /** The VMCS launch state of the shadow VMCS, see VMX_V_VMCS_LAUNCH_STATE_XXX. */ + uint32_t fShadowVmcsState; + /** The host CPU for which its state has been exported to this VMCS. */ + RTCPUID idHostCpuState; + /** The host CPU on which we last executed this VMCS. */ + RTCPUID idHostCpuExec; + /** Number of guest MSRs in the VM-entry MSR-load area. */ + uint32_t cEntryMsrLoad; + /** Number of guest MSRs in the VM-exit MSR-store area. */ + uint32_t cExitMsrStore; + /** Number of host MSRs in the VM-exit MSR-load area. */ + uint32_t cExitMsrLoad; + /** @} */ + + /** @name Cache of execution related VMCS fields. + * @{ */ + /** Pin-based VM-execution controls. */ + uint32_t u32PinCtls; + /** Processor-based VM-execution controls. */ + uint32_t u32ProcCtls; + /** Secondary processor-based VM-execution controls. */ + uint32_t u32ProcCtls2; + /** VM-entry controls. */ + uint32_t u32EntryCtls; + /** VM-exit controls. */ + uint32_t u32ExitCtls; + /** Exception bitmap. */ + uint32_t u32XcptBitmap; + /** Page-fault exception error-code mask. */ + uint32_t u32XcptPFMask; + /** Page-fault exception error-code match. */ + uint32_t u32XcptPFMatch; + /** Padding. */ + uint32_t u32Alignment0; + /** TSC offset. */ + uint64_t u64TscOffset; + /** VMCS link pointer. */ + uint64_t u64VmcsLinkPtr; + /** CR0 guest/host mask. */ + uint64_t u64Cr0Mask; + /** CR4 guest/host mask. */ + uint64_t u64Cr4Mask; + /** @} */ + + /** @name Host-virtual address of VMCS and related data structures. + * @{ */ + /** The VMCS. */ + R0PTRTYPE(void *) pvVmcs; + /** The shadow VMCS. */ + R0PTRTYPE(void *) pvShadowVmcs; + /** The virtual-APIC page. */ + R0PTRTYPE(uint8_t *) pbVirtApic; + /** The MSR bitmap. */ + R0PTRTYPE(void *) pvMsrBitmap; + /** The VM-entry MSR-load area. */ + R0PTRTYPE(void *) pvGuestMsrLoad; + /** The VM-exit MSR-store area. */ + R0PTRTYPE(void *) pvGuestMsrStore; + /** The VM-exit MSR-load area. */ + R0PTRTYPE(void *) pvHostMsrLoad; + /** @} */ + + /** @name Real-mode emulation state. + * @{ */ + /** Set if guest was executing in real mode (extra checks). */ + bool fWasInRealMode; + /** Set if the guest switched to 64-bit mode on a 32-bit host. */ + bool fSwitchedTo64on32Obsolete; + /** Padding. */ + bool afPadding0[6]; + struct + { + X86DESCATTR AttrCS; + X86DESCATTR AttrDS; + X86DESCATTR AttrES; + X86DESCATTR AttrFS; + X86DESCATTR AttrGS; + X86DESCATTR AttrSS; + X86EFLAGS Eflags; + bool fRealOnV86Active; + bool afPadding1[3]; + } RealMode; + /** @} */ + + /** @name Host-physical address of VMCS and related data structures. + * @{ */ + /** The VMCS. */ + RTHCPHYS HCPhysVmcs; + /** The shadow VMCS. */ + RTHCPHYS HCPhysShadowVmcs; + /** The virtual APIC page. */ + RTHCPHYS HCPhysVirtApic; + /** The MSR bitmap. */ + RTHCPHYS HCPhysMsrBitmap; + /** The VM-entry MSR-load area. */ + RTHCPHYS HCPhysGuestMsrLoad; + /** The VM-exit MSR-store area. */ + RTHCPHYS HCPhysGuestMsrStore; + /** The VM-exit MSR-load area. */ + RTHCPHYS HCPhysHostMsrLoad; + /** @} */ + + /** @name R0-memory objects address for VMCS and related data structures. + * @{ */ + /** R0-memory object for VMCS and related data structures. */ + RTR0MEMOBJ hMemObj; + /** @} */ + + /** @name LBR MSR data. + * @{ */ + /** List of LastBranch-From-IP MSRs. */ + uint64_t au64LbrFromIpMsr[32]; + /** List of LastBranch-To-IP MSRs. */ + uint64_t au64LbrToIpMsr[32]; + /** The MSR containing the index to the most recent branch record. */ + uint64_t u64LbrTosMsr; + /** @} */ +} VMXVMCSINFO; +/** Pointer to a VMXVMCSINFO struct. */ +typedef VMXVMCSINFO *PVMXVMCSINFO; +/** Pointer to a const VMXVMCSINFO struct. */ +typedef const VMXVMCSINFO *PCVMXVMCSINFO; +AssertCompileSizeAlignment(VMXVMCSINFO, 8); +AssertCompileMemberAlignment(VMXVMCSINFO, pfnStartVM, 8); +AssertCompileMemberAlignment(VMXVMCSINFO, u32PinCtls, 4); +AssertCompileMemberAlignment(VMXVMCSINFO, u64VmcsLinkPtr, 8); +AssertCompileMemberAlignment(VMXVMCSINFO, pvVmcs, 8); +AssertCompileMemberAlignment(VMXVMCSINFO, pvShadowVmcs, 8); +AssertCompileMemberAlignment(VMXVMCSINFO, pbVirtApic, 8); +AssertCompileMemberAlignment(VMXVMCSINFO, pvMsrBitmap, 8); +AssertCompileMemberAlignment(VMXVMCSINFO, pvGuestMsrLoad, 8); +AssertCompileMemberAlignment(VMXVMCSINFO, pvGuestMsrStore, 8); +AssertCompileMemberAlignment(VMXVMCSINFO, pvHostMsrLoad, 8); +AssertCompileMemberAlignment(VMXVMCSINFO, HCPhysVmcs, 8); +AssertCompileMemberAlignment(VMXVMCSINFO, hMemObj, 8); + +/** + * HM VMCPU Instance data. + * + * Note! If you change members of this struct, make sure to check if the + * assembly counterpart in HMInternal.mac needs to be updated as well. + * + * Note! The members here are ordered and aligned based on estimated frequency of + * usage and grouped to fit within a cache line in hot code paths. Even subtle + * changes here have a noticeable effect in the bootsector benchmarks. Modify with + * care. + */ +typedef struct HMCPU +{ + /** Set when the TLB has been checked until we return from the world switch. */ + bool volatile fCheckedTLBFlush; + /** Set when we're using VT-x or AMD-V at that moment. */ + bool fActive; + /** Whether we've completed the inner HM leave function. */ + bool fLeaveDone; + /** Whether we're using the hyper DR7 or guest DR7. */ + bool fUsingHyperDR7; + + /** Set if we need to flush the TLB during the world switch. */ + bool fForceTLBFlush; + /** Whether we should use the debug loop because of single stepping or special + * debug breakpoints / events are armed. */ + bool fUseDebugLoop; + /** Whether we are currently executing in the debug loop. + * Mainly for assertions. */ + bool fUsingDebugLoop; + /** Set if we using the debug loop and wish to intercept RDTSC. */ + bool fDebugWantRdTscExit; + + /** Set if XCR0 needs to be saved/restored when entering/exiting guest code + * execution. */ + bool fLoadSaveGuestXcr0; + /** Whether \#UD needs to be intercepted (required by certain GIM providers). */ + bool fGIMTrapXcptUD; + /** Whether \#GP needs to be intercept for mesa driver workaround. */ + bool fTrapXcptGpForLovelyMesaDrv; + /** Whether we're executing a single instruction. */ + bool fSingleInstruction; + + /** Set if we need to clear the trap flag because of single stepping. */ + bool fClearTrapFlag; + bool afAlignment0[3]; + + /** World switch exit counter. */ + uint32_t volatile cWorldSwitchExits; + /** The last CPU we were executing code on (NIL_RTCPUID for the first time). */ + RTCPUID idLastCpu; + /** TLB flush count. */ + uint32_t cTlbFlushes; + /** Current ASID in use by the VM. */ + uint32_t uCurrentAsid; + /** An additional error code used for some gurus. */ + uint32_t u32HMError; + /** The last exit-to-ring-3 reason. */ + int32_t rcLastExitToR3; + /** CPU-context changed flags (see HM_CHANGED_xxx). */ + uint64_t fCtxChanged; + + union /* no tag! */ + { + /** VT-x data. */ + struct + { + /** @name Guest information. + * @{ */ + /** Guest VMCS information. */ + VMXVMCSINFO VmcsInfo; + /** Nested-guest VMCS information. */ + VMXVMCSINFO VmcsInfoNstGst; + /** Whether the nested-guest VMCS was the last current VMCS. */ + bool fSwitchedToNstGstVmcs; + /** Whether the static guest VMCS controls has been merged with the + * nested-guest VMCS controls. */ + bool fMergedNstGstCtls; + /** Whether the nested-guest VMCS has been copied to the shadow VMCS. */ + bool fCopiedNstGstToShadowVmcs; + /** Whether flushing the TLB is required due to switching to/from the + * nested-guest. */ + bool fSwitchedNstGstFlushTlb; + /** Alignment. */ + bool afAlignment0[4]; + /** Cached guest APIC-base MSR for identifying when to map the APIC-access page. */ + uint64_t u64GstMsrApicBase; + /** @} */ + + /** @name Host information. + * @{ */ + /** Host LSTAR MSR to restore lazily while leaving VT-x. */ + uint64_t u64HostMsrLStar; + /** Host STAR MSR to restore lazily while leaving VT-x. */ + uint64_t u64HostMsrStar; + /** Host SF_MASK MSR to restore lazily while leaving VT-x. */ + uint64_t u64HostMsrSfMask; + /** Host KernelGS-Base MSR to restore lazily while leaving VT-x. */ + uint64_t u64HostMsrKernelGsBase; + /** The mask of lazy MSRs swap/restore state, see VMX_LAZY_MSRS_XXX. */ + uint32_t fLazyMsrs; + /** Whether the host MSR values are up-to-date in the auto-load/store MSR area. */ + bool fUpdatedHostAutoMsrs; + /** Alignment. */ + uint8_t au8Alignment0[3]; + /** Which host-state bits to restore before being preempted. */ + uint32_t fRestoreHostFlags; + /** Alignment. */ + uint32_t u32Alignment0; + /** The host-state restoration structure. */ + VMXRESTOREHOST RestoreHost; + /** @} */ + + /** @name Error reporting and diagnostics. + * @{ */ + /** VT-x error-reporting (mainly for ring-3 propagation). */ + struct + { + RTCPUID idCurrentCpu; + RTCPUID idEnteredCpu; + RTHCPHYS HCPhysCurrentVmcs; + uint32_t u32VmcsRev; + uint32_t u32InstrError; + uint32_t u32ExitReason; + uint32_t u32GuestIntrState; + } LastError; + /** @} */ + } vmx; + + /** SVM data. */ + struct + { + /** Ring 0 handlers for VT-x. */ + PFNHMSVMVMRUN pfnVMRun; + + /** Physical address of the host VMCB which holds additional host-state. */ + RTHCPHYS HCPhysVmcbHost; + /** R0 memory object for the host VMCB which holds additional host-state. */ + RTR0MEMOBJ hMemObjVmcbHost; + /** Padding. */ + R0PTRTYPE(void *) pvPadding; + + /** Physical address of the guest VMCB. */ + RTHCPHYS HCPhysVmcb; + /** R0 memory object for the guest VMCB. */ + RTR0MEMOBJ hMemObjVmcb; + /** Pointer to the guest VMCB. */ + R0PTRTYPE(PSVMVMCB) pVmcb; + + /** Physical address of the MSR bitmap (8 KB). */ + RTHCPHYS HCPhysMsrBitmap; + /** R0 memory object for the MSR bitmap (8 KB). */ + RTR0MEMOBJ hMemObjMsrBitmap; + /** Pointer to the MSR bitmap. */ + R0PTRTYPE(void *) pvMsrBitmap; + + /** Whether VTPR with V_INTR_MASKING set is in effect, indicating + * we should check if the VTPR changed on every VM-exit. */ + bool fSyncVTpr; + uint8_t au8Alignment0[7]; + + /** Host's TSC_AUX MSR (used when RDTSCP doesn't cause VM-exits). */ + uint64_t u64HostTscAux; + + /** Cache of the nested-guest's VMCB fields that we modify in order to run the + * nested-guest using AMD-V. This will be restored on \#VMEXIT. */ + SVMNESTEDVMCBCACHE NstGstVmcbCache; + } svm; + } HM_UNION_NM(u); + + /** Event injection state. */ + HMEVENT Event; + + /** The CPU ID of the CPU currently owning the VMCS. Set in + * HMR0Enter and cleared in HMR0Leave. */ + RTCPUID idEnteredCpu; + + /** Current shadow paging mode for updating CR4. */ + PGMMODE enmShadowMode; + + /** The PAE PDPEs used with Nested Paging (only valid when + * VMCPU_FF_HM_UPDATE_PAE_PDPES is set). */ + X86PDPE aPdpes[4]; + + /** For saving stack space, the disassembler state is allocated here instead of + * on the stack. */ + DISCPUSTATE DisState; + + STAMPROFILEADV StatEntry; + STAMPROFILEADV StatPreExit; + STAMPROFILEADV StatExitHandling; + STAMPROFILEADV StatExitIO; + STAMPROFILEADV StatExitMovCRx; + STAMPROFILEADV StatExitXcptNmi; + STAMPROFILEADV StatExitVmentry; + STAMPROFILEADV StatImportGuestState; + STAMPROFILEADV StatExportGuestState; + STAMPROFILEADV StatLoadGuestFpuState; + STAMPROFILEADV StatInGC; + STAMPROFILEADV StatPoke; + STAMPROFILEADV StatSpinPoke; + STAMPROFILEADV StatSpinPokeFailed; + + STAMCOUNTER StatInjectInterrupt; + STAMCOUNTER StatInjectXcpt; + STAMCOUNTER StatInjectReflect; + STAMCOUNTER StatInjectConvertDF; + STAMCOUNTER StatInjectInterpret; + STAMCOUNTER StatInjectReflectNPF; + + STAMCOUNTER StatExitAll; + STAMCOUNTER StatNestedExitAll; + STAMCOUNTER StatExitShadowNM; + STAMCOUNTER StatExitGuestNM; + STAMCOUNTER StatExitShadowPF; /**< Misleading, currently used for MMIO \#PFs as well. */ + STAMCOUNTER StatExitShadowPFEM; + STAMCOUNTER StatExitGuestPF; + STAMCOUNTER StatExitGuestUD; + STAMCOUNTER StatExitGuestSS; + STAMCOUNTER StatExitGuestNP; + STAMCOUNTER StatExitGuestTS; + STAMCOUNTER StatExitGuestOF; + STAMCOUNTER StatExitGuestGP; + STAMCOUNTER StatExitGuestDE; + STAMCOUNTER StatExitGuestDF; + STAMCOUNTER StatExitGuestBR; + STAMCOUNTER StatExitGuestAC; + STAMCOUNTER StatExitGuestDB; + STAMCOUNTER StatExitGuestMF; + STAMCOUNTER StatExitGuestBP; + STAMCOUNTER StatExitGuestXF; + STAMCOUNTER StatExitGuestXcpUnk; + STAMCOUNTER StatExitDRxWrite; + STAMCOUNTER StatExitDRxRead; + STAMCOUNTER StatExitCR0Read; + STAMCOUNTER StatExitCR2Read; + STAMCOUNTER StatExitCR3Read; + STAMCOUNTER StatExitCR4Read; + STAMCOUNTER StatExitCR8Read; + STAMCOUNTER StatExitCR0Write; + STAMCOUNTER StatExitCR2Write; + STAMCOUNTER StatExitCR3Write; + STAMCOUNTER StatExitCR4Write; + STAMCOUNTER StatExitCR8Write; + STAMCOUNTER StatExitRdmsr; + STAMCOUNTER StatExitWrmsr; + STAMCOUNTER StatExitClts; + STAMCOUNTER StatExitXdtrAccess; + STAMCOUNTER StatExitLmsw; + STAMCOUNTER StatExitIOWrite; + STAMCOUNTER StatExitIORead; + STAMCOUNTER StatExitIOStringWrite; + STAMCOUNTER StatExitIOStringRead; + STAMCOUNTER StatExitIntWindow; + STAMCOUNTER StatExitExtInt; + STAMCOUNTER StatExitHostNmiInGC; + STAMCOUNTER StatExitHostNmiInGCIpi; + STAMCOUNTER StatExitPreemptTimer; + STAMCOUNTER StatExitTprBelowThreshold; + STAMCOUNTER StatExitTaskSwitch; + STAMCOUNTER StatExitApicAccess; + STAMCOUNTER StatExitReasonNpf; + + STAMCOUNTER StatNestedExitReasonNpf; + + STAMCOUNTER StatFlushPage; + STAMCOUNTER StatFlushPageManual; + STAMCOUNTER StatFlushPhysPageManual; + STAMCOUNTER StatFlushTlb; + STAMCOUNTER StatFlushTlbNstGst; + STAMCOUNTER StatFlushTlbManual; + STAMCOUNTER StatFlushTlbWorldSwitch; + STAMCOUNTER StatNoFlushTlbWorldSwitch; + STAMCOUNTER StatFlushEntire; + STAMCOUNTER StatFlushAsid; + STAMCOUNTER StatFlushNestedPaging; + STAMCOUNTER StatFlushTlbInvlpgVirt; + STAMCOUNTER StatFlushTlbInvlpgPhys; + STAMCOUNTER StatTlbShootdown; + STAMCOUNTER StatTlbShootdownFlush; + + STAMCOUNTER StatSwitchPendingHostIrq; + STAMCOUNTER StatSwitchTprMaskedIrq; + STAMCOUNTER StatSwitchGuestIrq; + STAMCOUNTER StatSwitchHmToR3FF; + STAMCOUNTER StatSwitchVmReq; + STAMCOUNTER StatSwitchPgmPoolFlush; + STAMCOUNTER StatSwitchDma; + STAMCOUNTER StatSwitchExitToR3; + STAMCOUNTER StatSwitchLongJmpToR3; + STAMCOUNTER StatSwitchMaxResumeLoops; + STAMCOUNTER StatSwitchHltToR3; + STAMCOUNTER StatSwitchApicAccessToR3; + STAMCOUNTER StatSwitchPreempt; + STAMCOUNTER StatSwitchNstGstVmexit; + + STAMCOUNTER StatTscParavirt; + STAMCOUNTER StatTscOffset; + STAMCOUNTER StatTscIntercept; + + STAMCOUNTER StatDRxArmed; + STAMCOUNTER StatDRxContextSwitch; + STAMCOUNTER StatDRxIoCheck; + + STAMCOUNTER StatExportMinimal; + STAMCOUNTER StatExportFull; + STAMCOUNTER StatLoadGuestFpu; + STAMCOUNTER StatExportHostState; + + STAMCOUNTER StatVmxCheckBadRmSelBase; + STAMCOUNTER StatVmxCheckBadRmSelLimit; + STAMCOUNTER StatVmxCheckBadRmSelAttr; + STAMCOUNTER StatVmxCheckBadV86SelBase; + STAMCOUNTER StatVmxCheckBadV86SelLimit; + STAMCOUNTER StatVmxCheckBadV86SelAttr; + STAMCOUNTER StatVmxCheckRmOk; + STAMCOUNTER StatVmxCheckBadSel; + STAMCOUNTER StatVmxCheckBadRpl; + STAMCOUNTER StatVmxCheckPmOk; + +#ifdef VBOX_WITH_STATISTICS + R3PTRTYPE(PSTAMCOUNTER) paStatExitReason; + R0PTRTYPE(PSTAMCOUNTER) paStatExitReasonR0; + R3PTRTYPE(PSTAMCOUNTER) paStatInjectedIrqs; + R0PTRTYPE(PSTAMCOUNTER) paStatInjectedIrqsR0; + R3PTRTYPE(PSTAMCOUNTER) paStatInjectedXcpts; + R0PTRTYPE(PSTAMCOUNTER) paStatInjectedXcptsR0; + R3PTRTYPE(PSTAMCOUNTER) paStatNestedExitReason; + R0PTRTYPE(PSTAMCOUNTER) paStatNestedExitReasonR0; +#endif +#ifdef HM_PROFILE_EXIT_DISPATCH + STAMPROFILEADV StatExitDispatch; +#endif +} HMCPU; +/** Pointer to HM VMCPU instance data. */ +typedef HMCPU *PHMCPU; +AssertCompileMemberAlignment(HMCPU, fCheckedTLBFlush, 4); +AssertCompileMemberAlignment(HMCPU, fForceTLBFlush, 4); +AssertCompileMemberAlignment(HMCPU, cWorldSwitchExits, 4); +AssertCompileMemberAlignment(HMCPU, fCtxChanged, 8); +AssertCompileMemberAlignment(HMCPU, HM_UNION_NM(u.) vmx, 8); +AssertCompileMemberAlignment(HMCPU, HM_UNION_NM(u.) vmx.VmcsInfo, 8); +AssertCompileMemberAlignment(HMCPU, HM_UNION_NM(u.) vmx.VmcsInfoNstGst, 8); +AssertCompileMemberAlignment(HMCPU, HM_UNION_NM(u.) vmx.RestoreHost, 8); +AssertCompileMemberAlignment(HMCPU, HM_UNION_NM(u.) svm, 8); +AssertCompileMemberAlignment(HMCPU, Event, 8); + +#ifdef IN_RING0 +VMMR0_INT_DECL(PHMPHYSCPU) hmR0GetCurrentCpu(void); +VMMR0_INT_DECL(int) hmR0EnterCpu(PVMCPUCC pVCpu); + +# ifdef VBOX_STRICT +# define HM_DUMP_REG_FLAGS_GPRS RT_BIT(0) +# define HM_DUMP_REG_FLAGS_FPU RT_BIT(1) +# define HM_DUMP_REG_FLAGS_MSRS RT_BIT(2) +# define HM_DUMP_REG_FLAGS_ALL (HM_DUMP_REG_FLAGS_GPRS | HM_DUMP_REG_FLAGS_FPU | HM_DUMP_REG_FLAGS_MSRS) + +VMMR0_INT_DECL(void) hmR0DumpRegs(PVMCPUCC pVCpu, uint32_t fFlags); +VMMR0_INT_DECL(void) hmR0DumpDescriptor(PCX86DESCHC pDesc, RTSEL Sel, const char *pszMsg); +# endif + +# ifdef VBOX_WITH_KERNEL_USING_XMM +DECLASM(int) hmR0VMXStartVMWrapXMM(RTHCUINT fResume, PCPUMCTX pCtx, void *pvUnused, PVMCC pVM, PVMCPUCC pVCpu, + PFNHMVMXSTARTVM pfnStartVM); +DECLASM(int) hmR0SVMRunWrapXMM(RTHCPHYS pVmcbHostPhys, RTHCPHYS pVmcbPhys, PCPUMCTX pCtx, PVMCC pVM, PVMCPUCC pVCpu, + PFNHMSVMVMRUN pfnVMRun); +# endif +DECLASM(void) hmR0MdsClear(void); +#endif /* IN_RING0 */ + +VMM_INT_DECL(int) hmEmulateSvmMovTpr(PVMCC pVM, PVMCPUCC pVCpu); + +VMM_INT_DECL(PVMXVMCSINFO) hmGetVmxActiveVmcsInfo(PVMCPU pVCpu); + +/** @} */ + +RT_C_DECLS_END + +#endif /* !VMM_INCLUDED_SRC_include_HMInternal_h */ + diff --git a/src/VBox/VMM/include/HMInternal.mac b/src/VBox/VMM/include/HMInternal.mac new file mode 100644 index 00000000..84ca1d6c --- /dev/null +++ b/src/VBox/VMM/include/HMInternal.mac @@ -0,0 +1,45 @@ +;$Id: HMInternal.mac $ +;; @file +; HM - Internal header file. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; + +struc HMCPU + .fCheckedTLBFlush resb 1 + .fActive resb 1 + .fLeaveDone resb 1 + .fUsingHyperDR7 resb 1 + .fForceTLBFlush resb 1 + .fUseDebugLoop resb 1 + .fUsingDebugLoop resb 1 + .fDebugWantRdTscExit resb 1 + + .fLoadSaveGuestXcr0 resb 1 + .fGIMTrapXcptUD resb 1 + .fTrapXcptGpForLovelyMesaDrv resb 1 + .fSingleInstruction resb 1 + .fClearTrapFlag resb 1 + alignb 8 + + .cWorldSwitchExits resd 1 + .idLastCpu resd 1 + .cTlbFlushes resd 1 + .uCurrentAsid resd 1 + .u32HMError resd 1 + .rcLastExitToR3 resd 1 + .fCtxChanged resq 1 + + ; incomplete to save unnecessary pain... +endstruc + diff --git a/src/VBox/VMM/include/IEMInternal.h b/src/VBox/VMM/include/IEMInternal.h new file mode 100644 index 00000000..fc622d7f --- /dev/null +++ b/src/VBox/VMM/include/IEMInternal.h @@ -0,0 +1,1902 @@ +/* $Id: IEMInternal.h $ */ +/** @file + * IEM - Internal header file. + */ + +/* + * Copyright (C) 2011-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_IEMInternal_h +#define VMM_INCLUDED_SRC_include_IEMInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <VBox/vmm/cpum.h> +#include <VBox/vmm/iem.h> +#include <VBox/vmm/pgm.h> +#include <VBox/vmm/stam.h> +#include <VBox/param.h> + +#include <setjmp.h> + + +RT_C_DECLS_BEGIN + + +/** @defgroup grp_iem_int Internals + * @ingroup grp_iem + * @internal + * @{ + */ + +/** For expanding symbol in slickedit and other products tagging and + * crossreferencing IEM symbols. */ +#ifndef IEM_STATIC +# define IEM_STATIC static +#endif + +/** @def IEM_WITH_3DNOW + * Includes the 3DNow decoding. */ +#define IEM_WITH_3DNOW + +/** @def IEM_WITH_THREE_0F_38 + * Includes the three byte opcode map for instrs starting with 0x0f 0x38. */ +#define IEM_WITH_THREE_0F_38 + +/** @def IEM_WITH_THREE_0F_3A + * Includes the three byte opcode map for instrs starting with 0x0f 0x38. */ +#define IEM_WITH_THREE_0F_3A + +/** @def IEM_WITH_VEX + * Includes the VEX decoding. */ +#define IEM_WITH_VEX + +/** @def IEM_CFG_TARGET_CPU + * The minimum target CPU for the IEM emulation (IEMTARGETCPU_XXX value). + * + * By default we allow this to be configured by the user via the + * CPUM/GuestCpuName config string, but this comes at a slight cost during + * decoding. So, for applications of this code where there is no need to + * be dynamic wrt target CPU, just modify this define. + */ +#if !defined(IEM_CFG_TARGET_CPU) || defined(DOXYGEN_RUNNING) +# define IEM_CFG_TARGET_CPU IEMTARGETCPU_DYNAMIC +#endif + + +//#define IEM_WITH_CODE_TLB// - work in progress + + +#if !defined(IN_TSTVMSTRUCT) && !defined(DOXYGEN_RUNNING) +/** Instruction statistics. */ +typedef struct IEMINSTRSTATS +{ +# define IEM_DO_INSTR_STAT(a_Name, a_szDesc) uint32_t a_Name; +# include "IEMInstructionStatisticsTmpl.h" +# undef IEM_DO_INSTR_STAT +} IEMINSTRSTATS; +#else +struct IEMINSTRSTATS; +typedef struct IEMINSTRSTATS IEMINSTRSTATS; +#endif +/** Pointer to IEM instruction statistics. */ +typedef IEMINSTRSTATS *PIEMINSTRSTATS; + +/** Finish and move to types.h */ +typedef union +{ + uint32_t u32; +} RTFLOAT32U; +typedef RTFLOAT32U *PRTFLOAT32U; +typedef RTFLOAT32U const *PCRTFLOAT32U; + + +/** + * Extended operand mode that includes a representation of 8-bit. + * + * This is used for packing down modes when invoking some C instruction + * implementations. + */ +typedef enum IEMMODEX +{ + IEMMODEX_16BIT = IEMMODE_16BIT, + IEMMODEX_32BIT = IEMMODE_32BIT, + IEMMODEX_64BIT = IEMMODE_64BIT, + IEMMODEX_8BIT +} IEMMODEX; +AssertCompileSize(IEMMODEX, 4); + + +/** + * Branch types. + */ +typedef enum IEMBRANCH +{ + IEMBRANCH_JUMP = 1, + IEMBRANCH_CALL, + IEMBRANCH_TRAP, + IEMBRANCH_SOFTWARE_INT, + IEMBRANCH_HARDWARE_INT +} IEMBRANCH; +AssertCompileSize(IEMBRANCH, 4); + + +/** + * INT instruction types. + */ +typedef enum IEMINT +{ + /** INT n instruction (opcode 0xcd imm). */ + IEMINT_INTN = 0, + /** Single byte INT3 instruction (opcode 0xcc). */ + IEMINT_INT3 = IEM_XCPT_FLAGS_BP_INSTR, + /** Single byte INTO instruction (opcode 0xce). */ + IEMINT_INTO = IEM_XCPT_FLAGS_OF_INSTR, + /** Single byte INT1 (ICEBP) instruction (opcode 0xf1). */ + IEMINT_INT1 = IEM_XCPT_FLAGS_ICEBP_INSTR +} IEMINT; +AssertCompileSize(IEMINT, 4); + + +/** + * A FPU result. + */ +typedef struct IEMFPURESULT +{ + /** The output value. */ + RTFLOAT80U r80Result; + /** The output status. */ + uint16_t FSW; +} IEMFPURESULT; +AssertCompileMemberOffset(IEMFPURESULT, FSW, 10); +/** Pointer to a FPU result. */ +typedef IEMFPURESULT *PIEMFPURESULT; +/** Pointer to a const FPU result. */ +typedef IEMFPURESULT const *PCIEMFPURESULT; + + +/** + * A FPU result consisting of two output values and FSW. + */ +typedef struct IEMFPURESULTTWO +{ + /** The first output value. */ + RTFLOAT80U r80Result1; + /** The output status. */ + uint16_t FSW; + /** The second output value. */ + RTFLOAT80U r80Result2; +} IEMFPURESULTTWO; +AssertCompileMemberOffset(IEMFPURESULTTWO, FSW, 10); +AssertCompileMemberOffset(IEMFPURESULTTWO, r80Result2, 12); +/** Pointer to a FPU result consisting of two output values and FSW. */ +typedef IEMFPURESULTTWO *PIEMFPURESULTTWO; +/** Pointer to a const FPU result consisting of two output values and FSW. */ +typedef IEMFPURESULTTWO const *PCIEMFPURESULTTWO; + + +/** + * IEM TLB entry. + * + * Lookup assembly: + * @code{.asm} + ; Calculate tag. + mov rax, [VA] + shl rax, 16 + shr rax, 16 + X86_PAGE_SHIFT + or rax, [uTlbRevision] + + ; Do indexing. + movzx ecx, al + lea rcx, [pTlbEntries + rcx] + + ; Check tag. + cmp [rcx + IEMTLBENTRY.uTag], rax + jne .TlbMiss + + ; Check access. + movsx rax, ACCESS_FLAGS | MAPPING_R3_NOT_VALID | 0xffffff00 + and rax, [rcx + IEMTLBENTRY.fFlagsAndPhysRev] + cmp rax, [uTlbPhysRev] + jne .TlbMiss + + ; Calc address and we're done. + mov eax, X86_PAGE_OFFSET_MASK + and eax, [VA] + or rax, [rcx + IEMTLBENTRY.pMappingR3] + %ifdef VBOX_WITH_STATISTICS + inc qword [cTlbHits] + %endif + jmp .Done + + .TlbMiss: + mov r8d, ACCESS_FLAGS + mov rdx, [VA] + mov rcx, [pVCpu] + call iemTlbTypeMiss + .Done: + + @endcode + * + */ +typedef struct IEMTLBENTRY +{ + /** The TLB entry tag. + * Bits 35 thru 0 are made up of the virtual address shifted right 12 bits. + * Bits 63 thru 36 are made up of the TLB revision (zero means invalid). + * + * The TLB lookup code uses the current TLB revision, which won't ever be zero, + * enabling an extremely cheap TLB invalidation most of the time. When the TLB + * revision wraps around though, the tags needs to be zeroed. + * + * @note Try use SHRD instruction? After seeing + * https://gmplib.org/~tege/x86-timing.pdf, maybe not. + */ + uint64_t uTag; + /** Access flags and physical TLB revision. + * + * - Bit 0 - page tables - not executable (X86_PTE_PAE_NX). + * - Bit 1 - page tables - not writable (complemented X86_PTE_RW). + * - Bit 2 - page tables - not user (complemented X86_PTE_US). + * - Bit 3 - pgm phys/virt - not directly writable. + * - Bit 4 - pgm phys page - not directly readable. + * - Bit 5 - currently unused. + * - Bit 6 - page tables - not dirty (complemented X86_PTE_D). + * - Bit 7 - tlb entry - pMappingR3 member not valid. + * - Bits 63 thru 8 are used for the physical TLB revision number. + * + * We're using complemented bit meanings here because it makes it easy to check + * whether special action is required. For instance a user mode write access + * would do a "TEST fFlags, (X86_PTE_RW | X86_PTE_US | X86_PTE_D)" and a + * non-zero result would mean special handling needed because either it wasn't + * writable, or it wasn't user, or the page wasn't dirty. A user mode read + * access would do "TEST fFlags, X86_PTE_US"; and a kernel mode read wouldn't + * need to check any PTE flag. + */ + uint64_t fFlagsAndPhysRev; + /** The guest physical page address. */ + uint64_t GCPhys; + /** Pointer to the ring-3 mapping (possibly also valid in ring-0). */ +#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE + R3PTRTYPE(uint8_t *) pbMappingR3; +#else + R3R0PTRTYPE(uint8_t *) pbMappingR3; +#endif +#if HC_ARCH_BITS == 32 + uint32_t u32Padding1; +#endif +} IEMTLBENTRY; +AssertCompileSize(IEMTLBENTRY, 32); +/** Pointer to an IEM TLB entry. */ +typedef IEMTLBENTRY *PIEMTLBENTRY; + +/** @name IEMTLBE_F_XXX - TLB entry flags (IEMTLBENTRY::fFlagsAndPhysRev) + * @{ */ +#define IEMTLBE_F_PT_NO_EXEC RT_BIT_64(0) /**< Page tables: Not executable. */ +#define IEMTLBE_F_PT_NO_WRITE RT_BIT_64(1) /**< Page tables: Not writable. */ +#define IEMTLBE_F_PT_NO_USER RT_BIT_64(2) /**< Page tables: Not user accessible (supervisor only). */ +#define IEMTLBE_F_PG_NO_WRITE RT_BIT_64(3) /**< Phys page: Not writable (access handler, ROM, whatever). */ +#define IEMTLBE_F_PG_NO_READ RT_BIT_64(4) /**< Phys page: Not readable (MMIO / access handler, ROM) */ +#define IEMTLBE_F_PT_NO_DIRTY RT_BIT_64(5) /**< Page tables: Not dirty (needs to be made dirty on write). */ +#define IEMTLBE_F_NO_MAPPINGR3 RT_BIT_64(6) /**< TLB entry: The IEMTLBENTRY::pMappingR3 member is invalid. */ +#define IEMTLBE_F_PHYS_REV UINT64_C(0xffffffffffffff00) /**< Physical revision mask. */ +/** @} */ + + +/** + * An IEM TLB. + * + * We've got two of these, one for data and one for instructions. + */ +typedef struct IEMTLB +{ + /** The TLB entries. + * We've choosen 256 because that way we can obtain the result directly from a + * 8-bit register without an additional AND instruction. */ + IEMTLBENTRY aEntries[256]; + /** The TLB revision. + * This is actually only 28 bits wide (see IEMTLBENTRY::uTag) and is incremented + * by adding RT_BIT_64(36) to it. When it wraps around and becomes zero, all + * the tags in the TLB must be zeroed and the revision set to RT_BIT_64(36). + * (The revision zero indicates an invalid TLB entry.) + * + * The initial value is choosen to cause an early wraparound. */ + uint64_t uTlbRevision; + /** The TLB physical address revision - shadow of PGM variable. + * + * This is actually only 56 bits wide (see IEMTLBENTRY::fFlagsAndPhysRev) and is + * incremented by adding RT_BIT_64(8). When it wraps around and becomes zero, + * a rendezvous is called and each CPU wipe the IEMTLBENTRY::pMappingR3 as well + * as IEMTLBENTRY::fFlagsAndPhysRev bits 63 thru 8, 4, and 3. + * + * The initial value is choosen to cause an early wraparound. */ + uint64_t volatile uTlbPhysRev; + + /* Statistics: */ + + /** TLB hits (VBOX_WITH_STATISTICS only). */ + uint64_t cTlbHits; + /** TLB misses. */ + uint32_t cTlbMisses; + /** Slow read path. */ + uint32_t cTlbSlowReadPath; +#if 0 + /** TLB misses because of tag mismatch. */ + uint32_t cTlbMissesTag; + /** TLB misses because of virtual access violation. */ + uint32_t cTlbMissesVirtAccess; + /** TLB misses because of dirty bit. */ + uint32_t cTlbMissesDirty; + /** TLB misses because of MMIO */ + uint32_t cTlbMissesMmio; + /** TLB misses because of write access handlers. */ + uint32_t cTlbMissesWriteHandler; + /** TLB misses because no r3(/r0) mapping. */ + uint32_t cTlbMissesMapping; +#endif + /** Alignment padding. */ + uint32_t au32Padding[3+5]; +} IEMTLB; +AssertCompileSizeAlignment(IEMTLB, 64); +/** IEMTLB::uTlbRevision increment. */ +#define IEMTLB_REVISION_INCR RT_BIT_64(36) +/** IEMTLB::uTlbPhysRev increment. */ +#define IEMTLB_PHYS_REV_INCR RT_BIT_64(8) + + +/** + * The per-CPU IEM state. + */ +typedef struct IEMCPU +{ + /** Info status code that needs to be propagated to the IEM caller. + * This cannot be passed internally, as it would complicate all success + * checks within the interpreter making the code larger and almost impossible + * to get right. Instead, we'll store status codes to pass on here. Each + * source of these codes will perform appropriate sanity checks. */ + int32_t rcPassUp; /* 0x00 */ + + /** The current CPU execution mode (CS). */ + IEMMODE enmCpuMode; /* 0x04 */ + /** The CPL. */ + uint8_t uCpl; /* 0x05 */ + + /** Whether to bypass access handlers or not. */ + bool fBypassHandlers; /* 0x06 */ + bool fUnusedWasInPatchCode; /* 0x07 */ + + /** @name Decoder state. + * @{ */ +#ifdef IEM_WITH_CODE_TLB + /** The offset of the next instruction byte. */ + uint32_t offInstrNextByte; /* 0x08 */ + /** The number of bytes available at pbInstrBuf for the current instruction. + * This takes the max opcode length into account so that doesn't need to be + * checked separately. */ + uint32_t cbInstrBuf; /* 0x0c */ + /** Pointer to the page containing RIP, user specified buffer or abOpcode. + * This can be NULL if the page isn't mappable for some reason, in which + * case we'll do fallback stuff. + * + * If we're executing an instruction from a user specified buffer, + * IEMExecOneWithPrefetchedByPC and friends, this is not necessarily a page + * aligned pointer but pointer to the user data. + * + * For instructions crossing pages, this will start on the first page and be + * advanced to the next page by the time we've decoded the instruction. This + * therefore precludes stuff like <tt>pbInstrBuf[offInstrNextByte + cbInstrBuf - cbCurInstr]</tt> + */ + uint8_t const *pbInstrBuf; /* 0x10 */ +# if ARCH_BITS == 32 + uint32_t uInstrBufHigh; /** The high dword of the host context pbInstrBuf member. */ +# endif + /** The program counter corresponding to pbInstrBuf. + * This is set to a non-canonical address when we need to invalidate it. */ + uint64_t uInstrBufPc; /* 0x18 */ + /** The number of bytes available at pbInstrBuf in total (for IEMExecLots). + * This takes the CS segment limit into account. */ + uint16_t cbInstrBufTotal; /* 0x20 */ + /** Offset into pbInstrBuf of the first byte of the current instruction. + * Can be negative to efficiently handle cross page instructions. */ + int16_t offCurInstrStart; /* 0x22 */ + + /** The prefix mask (IEM_OP_PRF_XXX). */ + uint32_t fPrefixes; /* 0x24 */ + /** The extra REX ModR/M register field bit (REX.R << 3). */ + uint8_t uRexReg; /* 0x28 */ + /** The extra REX ModR/M r/m field, SIB base and opcode reg bit + * (REX.B << 3). */ + uint8_t uRexB; /* 0x29 */ + /** The extra REX SIB index field bit (REX.X << 3). */ + uint8_t uRexIndex; /* 0x2a */ + + /** The effective segment register (X86_SREG_XXX). */ + uint8_t iEffSeg; /* 0x2b */ + + /** The offset of the ModR/M byte relative to the start of the instruction. */ + uint8_t offModRm; /* 0x2c */ +#else + /** The size of what has currently been fetched into abOpcode. */ + uint8_t cbOpcode; /* 0x08 */ + /** The current offset into abOpcode. */ + uint8_t offOpcode; /* 0x09 */ + /** The offset of the ModR/M byte relative to the start of the instruction. */ + uint8_t offModRm; /* 0x0a */ + + /** The effective segment register (X86_SREG_XXX). */ + uint8_t iEffSeg; /* 0x0b */ + + /** The prefix mask (IEM_OP_PRF_XXX). */ + uint32_t fPrefixes; /* 0x0c */ + /** The extra REX ModR/M register field bit (REX.R << 3). */ + uint8_t uRexReg; /* 0x10 */ + /** The extra REX ModR/M r/m field, SIB base and opcode reg bit + * (REX.B << 3). */ + uint8_t uRexB; /* 0x11 */ + /** The extra REX SIB index field bit (REX.X << 3). */ + uint8_t uRexIndex; /* 0x12 */ + +#endif + + /** The effective operand mode. */ + IEMMODE enmEffOpSize; /* 0x2d, 0x13 */ + /** The default addressing mode. */ + IEMMODE enmDefAddrMode; /* 0x2e, 0x14 */ + /** The effective addressing mode. */ + IEMMODE enmEffAddrMode; /* 0x2f, 0x15 */ + /** The default operand mode. */ + IEMMODE enmDefOpSize; /* 0x30, 0x16 */ + + /** Prefix index (VEX.pp) for two byte and three byte tables. */ + uint8_t idxPrefix; /* 0x31, 0x17 */ + /** 3rd VEX/EVEX/XOP register. + * Please use IEM_GET_EFFECTIVE_VVVV to access. */ + uint8_t uVex3rdReg; /* 0x32, 0x18 */ + /** The VEX/EVEX/XOP length field. */ + uint8_t uVexLength; /* 0x33, 0x19 */ + /** Additional EVEX stuff. */ + uint8_t fEvexStuff; /* 0x34, 0x1a */ + + /** Explicit alignment padding. */ + uint8_t abAlignment2a[1]; /* 0x35, 0x1b */ + /** The FPU opcode (FOP). */ + uint16_t uFpuOpcode; /* 0x36, 0x1c */ +#ifndef IEM_WITH_CODE_TLB + /** Explicit alignment padding. */ + uint8_t abAlignment2b[2]; /* 0x1e */ +#endif + + /** The opcode bytes. */ + uint8_t abOpcode[15]; /* 0x48, 0x20 */ + /** Explicit alignment padding. */ +#ifdef IEM_WITH_CODE_TLB + uint8_t abAlignment2c[0x48 - 0x47]; /* 0x37 */ +#else + uint8_t abAlignment2c[0x48 - 0x2f]; /* 0x2f */ +#endif + /** @} */ + + + /** The flags of the current exception / interrupt. */ + uint32_t fCurXcpt; /* 0x48, 0x48 */ + /** The current exception / interrupt. */ + uint8_t uCurXcpt; + /** Exception / interrupt recursion depth. */ + int8_t cXcptRecursions; + + /** The number of active guest memory mappings. */ + uint8_t cActiveMappings; + /** The next unused mapping index. */ + uint8_t iNextMapping; + /** Records for tracking guest memory mappings. */ + struct + { + /** The address of the mapped bytes. */ + void *pv; + /** The access flags (IEM_ACCESS_XXX). + * IEM_ACCESS_INVALID if the entry is unused. */ + uint32_t fAccess; +#if HC_ARCH_BITS == 64 + uint32_t u32Alignment4; /**< Alignment padding. */ +#endif + } aMemMappings[3]; + + /** Locking records for the mapped memory. */ + union + { + PGMPAGEMAPLOCK Lock; + uint64_t au64Padding[2]; + } aMemMappingLocks[3]; + + /** Bounce buffer info. + * This runs in parallel to aMemMappings. */ + struct + { + /** The physical address of the first byte. */ + RTGCPHYS GCPhysFirst; + /** The physical address of the second page. */ + RTGCPHYS GCPhysSecond; + /** The number of bytes in the first page. */ + uint16_t cbFirst; + /** The number of bytes in the second page. */ + uint16_t cbSecond; + /** Whether it's unassigned memory. */ + bool fUnassigned; + /** Explicit alignment padding. */ + bool afAlignment5[3]; + } aMemBbMappings[3]; + + /** Bounce buffer storage. + * This runs in parallel to aMemMappings and aMemBbMappings. */ + struct + { + uint8_t ab[512]; + } aBounceBuffers[3]; + + + /** Pointer set jump buffer - ring-3 context. */ + R3PTRTYPE(jmp_buf *) pJmpBufR3; + /** Pointer set jump buffer - ring-0 context. */ + R0PTRTYPE(jmp_buf *) pJmpBufR0; + + /** @todo Should move this near @a fCurXcpt later. */ + /** The CR2 for the current exception / interrupt. */ + uint64_t uCurXcptCr2; + /** The error code for the current exception / interrupt. */ + uint32_t uCurXcptErr; + /** The VMX APIC-access page handler type. */ + PGMPHYSHANDLERTYPE hVmxApicAccessPage; + + /** @name Statistics + * @{ */ + /** The number of instructions we've executed. */ + uint32_t cInstructions; + /** The number of potential exits. */ + uint32_t cPotentialExits; + /** The number of bytes data or stack written (mostly for IEMExecOneEx). + * This may contain uncommitted writes. */ + uint32_t cbWritten; + /** Counts the VERR_IEM_INSTR_NOT_IMPLEMENTED returns. */ + uint32_t cRetInstrNotImplemented; + /** Counts the VERR_IEM_ASPECT_NOT_IMPLEMENTED returns. */ + uint32_t cRetAspectNotImplemented; + /** Counts informational statuses returned (other than VINF_SUCCESS). */ + uint32_t cRetInfStatuses; + /** Counts other error statuses returned. */ + uint32_t cRetErrStatuses; + /** Number of times rcPassUp has been used. */ + uint32_t cRetPassUpStatus; + /** Number of times RZ left with instruction commit pending for ring-3. */ + uint32_t cPendingCommit; + /** Number of long jumps. */ + uint32_t cLongJumps; + /** @} */ + + /** @name Target CPU information. + * @{ */ +#if IEM_CFG_TARGET_CPU == IEMTARGETCPU_DYNAMIC + /** The target CPU. */ + uint32_t uTargetCpu; +#else + uint32_t u32TargetCpuPadding; +#endif + /** The CPU vendor. */ + CPUMCPUVENDOR enmCpuVendor; + /** @} */ + + /** @name Host CPU information. + * @{ */ + /** The CPU vendor. */ + CPUMCPUVENDOR enmHostCpuVendor; + /** @} */ + + /** Counts RDMSR \#GP(0) LogRel(). */ + uint8_t cLogRelRdMsr; + /** Counts WRMSR \#GP(0) LogRel(). */ + uint8_t cLogRelWrMsr; + /** Alignment padding. */ + uint8_t abAlignment8[50]; + + /** Data TLB. + * @remarks Must be 64-byte aligned. */ + IEMTLB DataTlb; + /** Instruction TLB. + * @remarks Must be 64-byte aligned. */ + IEMTLB CodeTlb; + + /** Pointer to instruction statistics for ring-0 context. */ + R0PTRTYPE(PIEMINSTRSTATS) pStatsR0; + /** Ring-3 pointer to instruction statistics for non-ring-3 code. */ + R3PTRTYPE(PIEMINSTRSTATS) pStatsCCR3; + /** Pointer to instruction statistics for ring-3 context. */ + R3PTRTYPE(PIEMINSTRSTATS) pStatsR3; +} IEMCPU; +AssertCompileMemberOffset(IEMCPU, fCurXcpt, 0x48); +AssertCompileMemberAlignment(IEMCPU, DataTlb, 64); +AssertCompileMemberAlignment(IEMCPU, CodeTlb, 64); +/** Pointer to the per-CPU IEM state. */ +typedef IEMCPU *PIEMCPU; +/** Pointer to the const per-CPU IEM state. */ +typedef IEMCPU const *PCIEMCPU; + + +/** @def IEM_GET_CTX + * Gets the guest CPU context for the calling EMT. + * @returns PCPUMCTX + * @param a_pVCpu The cross context virtual CPU structure of the calling thread. + */ +#define IEM_GET_CTX(a_pVCpu) (&(a_pVCpu)->cpum.GstCtx) + +/** @def IEM_CTX_ASSERT + * Asserts that the @a a_fExtrnMbz is present in the CPU context. + * @param a_pVCpu The cross context virtual CPU structure of the calling thread. + * @param a_fExtrnMbz The mask of CPUMCTX_EXTRN_XXX flags that must be zero. + */ +#define IEM_CTX_ASSERT(a_pVCpu, a_fExtrnMbz) AssertMsg(!((a_pVCpu)->cpum.GstCtx.fExtrn & (a_fExtrnMbz)), \ + ("fExtrn=%#RX64 fExtrnMbz=%#RX64\n", (a_pVCpu)->cpum.GstCtx.fExtrn, \ + (a_fExtrnMbz))) + +/** @def IEM_CTX_IMPORT_RET + * Makes sure the CPU context bits given by @a a_fExtrnImport are imported. + * + * Will call the keep to import the bits as needed. + * + * Returns on import failure. + * + * @param a_pVCpu The cross context virtual CPU structure of the calling thread. + * @param a_fExtrnImport The mask of CPUMCTX_EXTRN_XXX flags to import. + */ +#define IEM_CTX_IMPORT_RET(a_pVCpu, a_fExtrnImport) \ + do { \ + if (!((a_pVCpu)->cpum.GstCtx.fExtrn & (a_fExtrnImport))) \ + { /* likely */ } \ + else \ + { \ + int rcCtxImport = CPUMImportGuestStateOnDemand(a_pVCpu, a_fExtrnImport); \ + AssertRCReturn(rcCtxImport, rcCtxImport); \ + } \ + } while (0) + +/** @def IEM_CTX_IMPORT_NORET + * Makes sure the CPU context bits given by @a a_fExtrnImport are imported. + * + * Will call the keep to import the bits as needed. + * + * @param a_pVCpu The cross context virtual CPU structure of the calling thread. + * @param a_fExtrnImport The mask of CPUMCTX_EXTRN_XXX flags to import. + */ +#define IEM_CTX_IMPORT_NORET(a_pVCpu, a_fExtrnImport) \ + do { \ + if (!((a_pVCpu)->cpum.GstCtx.fExtrn & (a_fExtrnImport))) \ + { /* likely */ } \ + else \ + { \ + int rcCtxImport = CPUMImportGuestStateOnDemand(a_pVCpu, a_fExtrnImport); \ + AssertLogRelRC(rcCtxImport); \ + } \ + } while (0) + +/** @def IEM_CTX_IMPORT_JMP + * Makes sure the CPU context bits given by @a a_fExtrnImport are imported. + * + * Will call the keep to import the bits as needed. + * + * Jumps on import failure. + * + * @param a_pVCpu The cross context virtual CPU structure of the calling thread. + * @param a_fExtrnImport The mask of CPUMCTX_EXTRN_XXX flags to import. + */ +#define IEM_CTX_IMPORT_JMP(a_pVCpu, a_fExtrnImport) \ + do { \ + if (!((a_pVCpu)->cpum.GstCtx.fExtrn & (a_fExtrnImport))) \ + { /* likely */ } \ + else \ + { \ + int rcCtxImport = CPUMImportGuestStateOnDemand(a_pVCpu, a_fExtrnImport); \ + AssertRCStmt(rcCtxImport, longjmp(*pVCpu->iem.s.CTX_SUFF(pJmpBuf), rcCtxImport)); \ + } \ + } while (0) + + + +/** Gets the current IEMTARGETCPU value. + * @returns IEMTARGETCPU value. + * @param a_pVCpu The cross context virtual CPU structure of the calling thread. + */ +#if IEM_CFG_TARGET_CPU != IEMTARGETCPU_DYNAMIC +# define IEM_GET_TARGET_CPU(a_pVCpu) (IEM_CFG_TARGET_CPU) +#else +# define IEM_GET_TARGET_CPU(a_pVCpu) ((a_pVCpu)->iem.s.uTargetCpu) +#endif + +/** @def Gets the instruction length. */ +#ifdef IEM_WITH_CODE_TLB +# define IEM_GET_INSTR_LEN(a_pVCpu) ((a_pVCpu)->iem.s.offInstrNextByte - (uint32_t)(int32_t)(a_pVCpu)->iem.s.offCurInstrStart) +#else +# define IEM_GET_INSTR_LEN(a_pVCpu) ((a_pVCpu)->iem.s.offOpcode) +#endif + + +/** @name IEM_ACCESS_XXX - Access details. + * @{ */ +#define IEM_ACCESS_INVALID UINT32_C(0x000000ff) +#define IEM_ACCESS_TYPE_READ UINT32_C(0x00000001) +#define IEM_ACCESS_TYPE_WRITE UINT32_C(0x00000002) +#define IEM_ACCESS_TYPE_EXEC UINT32_C(0x00000004) +#define IEM_ACCESS_TYPE_MASK UINT32_C(0x00000007) +#define IEM_ACCESS_WHAT_CODE UINT32_C(0x00000010) +#define IEM_ACCESS_WHAT_DATA UINT32_C(0x00000020) +#define IEM_ACCESS_WHAT_STACK UINT32_C(0x00000030) +#define IEM_ACCESS_WHAT_SYS UINT32_C(0x00000040) +#define IEM_ACCESS_WHAT_MASK UINT32_C(0x00000070) +/** The writes are partial, so if initialize the bounce buffer with the + * orignal RAM content. */ +#define IEM_ACCESS_PARTIAL_WRITE UINT32_C(0x00000100) +/** Used in aMemMappings to indicate that the entry is bounce buffered. */ +#define IEM_ACCESS_BOUNCE_BUFFERED UINT32_C(0x00000200) +/** Bounce buffer with ring-3 write pending, first page. */ +#define IEM_ACCESS_PENDING_R3_WRITE_1ST UINT32_C(0x00000400) +/** Bounce buffer with ring-3 write pending, second page. */ +#define IEM_ACCESS_PENDING_R3_WRITE_2ND UINT32_C(0x00000800) +/** Valid bit mask. */ +#define IEM_ACCESS_VALID_MASK UINT32_C(0x00000fff) +/** Read+write data alias. */ +#define IEM_ACCESS_DATA_RW (IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_WHAT_DATA) +/** Write data alias. */ +#define IEM_ACCESS_DATA_W (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_WHAT_DATA) +/** Read data alias. */ +#define IEM_ACCESS_DATA_R (IEM_ACCESS_TYPE_READ | IEM_ACCESS_WHAT_DATA) +/** Instruction fetch alias. */ +#define IEM_ACCESS_INSTRUCTION (IEM_ACCESS_TYPE_EXEC | IEM_ACCESS_WHAT_CODE) +/** Stack write alias. */ +#define IEM_ACCESS_STACK_W (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_WHAT_STACK) +/** Stack read alias. */ +#define IEM_ACCESS_STACK_R (IEM_ACCESS_TYPE_READ | IEM_ACCESS_WHAT_STACK) +/** Stack read+write alias. */ +#define IEM_ACCESS_STACK_RW (IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_WHAT_STACK) +/** Read system table alias. */ +#define IEM_ACCESS_SYS_R (IEM_ACCESS_TYPE_READ | IEM_ACCESS_WHAT_SYS) +/** Read+write system table alias. */ +#define IEM_ACCESS_SYS_RW (IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_WHAT_SYS) +/** @} */ + +/** @name Prefix constants (IEMCPU::fPrefixes) + * @{ */ +#define IEM_OP_PRF_SEG_CS RT_BIT_32(0) /**< CS segment prefix (0x2e). */ +#define IEM_OP_PRF_SEG_SS RT_BIT_32(1) /**< SS segment prefix (0x36). */ +#define IEM_OP_PRF_SEG_DS RT_BIT_32(2) /**< DS segment prefix (0x3e). */ +#define IEM_OP_PRF_SEG_ES RT_BIT_32(3) /**< ES segment prefix (0x26). */ +#define IEM_OP_PRF_SEG_FS RT_BIT_32(4) /**< FS segment prefix (0x64). */ +#define IEM_OP_PRF_SEG_GS RT_BIT_32(5) /**< GS segment prefix (0x65). */ +#define IEM_OP_PRF_SEG_MASK UINT32_C(0x3f) + +#define IEM_OP_PRF_SIZE_OP RT_BIT_32(8) /**< Operand size prefix (0x66). */ +#define IEM_OP_PRF_SIZE_REX_W RT_BIT_32(9) /**< REX.W prefix (0x48-0x4f). */ +#define IEM_OP_PRF_SIZE_ADDR RT_BIT_32(10) /**< Address size prefix (0x67). */ + +#define IEM_OP_PRF_LOCK RT_BIT_32(16) /**< Lock prefix (0xf0). */ +#define IEM_OP_PRF_REPNZ RT_BIT_32(17) /**< Repeat-not-zero prefix (0xf2). */ +#define IEM_OP_PRF_REPZ RT_BIT_32(18) /**< Repeat-if-zero prefix (0xf3). */ + +#define IEM_OP_PRF_REX RT_BIT_32(24) /**< Any REX prefix (0x40-0x4f). */ +#define IEM_OP_PRF_REX_R RT_BIT_32(25) /**< REX.R prefix (0x44,0x45,0x46,0x47,0x4c,0x4d,0x4e,0x4f). */ +#define IEM_OP_PRF_REX_B RT_BIT_32(26) /**< REX.B prefix (0x41,0x43,0x45,0x47,0x49,0x4b,0x4d,0x4f). */ +#define IEM_OP_PRF_REX_X RT_BIT_32(27) /**< REX.X prefix (0x42,0x43,0x46,0x47,0x4a,0x4b,0x4e,0x4f). */ +/** Mask with all the REX prefix flags. + * This is generally for use when needing to undo the REX prefixes when they + * are followed legacy prefixes and therefore does not immediately preceed + * the first opcode byte. + * For testing whether any REX prefix is present, use IEM_OP_PRF_REX instead. */ +#define IEM_OP_PRF_REX_MASK (IEM_OP_PRF_REX | IEM_OP_PRF_REX_R | IEM_OP_PRF_REX_B | IEM_OP_PRF_REX_X | IEM_OP_PRF_SIZE_REX_W ) + +#define IEM_OP_PRF_VEX RT_BIT_32(28) /**< Indiciates VEX prefix. */ +#define IEM_OP_PRF_EVEX RT_BIT_32(29) /**< Indiciates EVEX prefix. */ +#define IEM_OP_PRF_XOP RT_BIT_32(30) /**< Indiciates XOP prefix. */ +/** @} */ + +/** @name IEMOPFORM_XXX - Opcode forms + * @note These are ORed together with IEMOPHINT_XXX. + * @{ */ +/** ModR/M: reg, r/m */ +#define IEMOPFORM_RM 0 +/** ModR/M: reg, r/m (register) */ +#define IEMOPFORM_RM_REG (IEMOPFORM_RM | IEMOPFORM_MOD3) +/** ModR/M: reg, r/m (memory) */ +#define IEMOPFORM_RM_MEM (IEMOPFORM_RM | IEMOPFORM_NOT_MOD3) +/** ModR/M: r/m, reg */ +#define IEMOPFORM_MR 1 +/** ModR/M: r/m (register), reg */ +#define IEMOPFORM_MR_REG (IEMOPFORM_MR | IEMOPFORM_MOD3) +/** ModR/M: r/m (memory), reg */ +#define IEMOPFORM_MR_MEM (IEMOPFORM_MR | IEMOPFORM_NOT_MOD3) +/** ModR/M: r/m only */ +#define IEMOPFORM_M 2 +/** ModR/M: r/m only (register). */ +#define IEMOPFORM_M_REG (IEMOPFORM_M | IEMOPFORM_MOD3) +/** ModR/M: r/m only (memory). */ +#define IEMOPFORM_M_MEM (IEMOPFORM_M | IEMOPFORM_NOT_MOD3) +/** ModR/M: reg only */ +#define IEMOPFORM_R 3 + +/** VEX+ModR/M: reg, r/m */ +#define IEMOPFORM_VEX_RM 4 +/** VEX+ModR/M: reg, r/m (register) */ +#define IEMOPFORM_VEX_RM_REG (IEMOPFORM_VEX_RM | IEMOPFORM_MOD3) +/** VEX+ModR/M: reg, r/m (memory) */ +#define IEMOPFORM_VEX_RM_MEM (IEMOPFORM_VEX_RM | IEMOPFORM_NOT_MOD3) +/** VEX+ModR/M: r/m, reg */ +#define IEMOPFORM_VEX_MR 5 +/** VEX+ModR/M: r/m (register), reg */ +#define IEMOPFORM_VEX_MR_REG (IEMOPFORM_VEX_MR | IEMOPFORM_MOD3) +/** VEX+ModR/M: r/m (memory), reg */ +#define IEMOPFORM_VEX_MR_MEM (IEMOPFORM_VEX_MR | IEMOPFORM_NOT_MOD3) +/** VEX+ModR/M: r/m only */ +#define IEMOPFORM_VEX_M 6 +/** VEX+ModR/M: r/m only (register). */ +#define IEMOPFORM_VEX_M_REG (IEMOPFORM_VEX_M | IEMOPFORM_MOD3) +/** VEX+ModR/M: r/m only (memory). */ +#define IEMOPFORM_VEX_M_MEM (IEMOPFORM_VEX_M | IEMOPFORM_NOT_MOD3) +/** VEX+ModR/M: reg only */ +#define IEMOPFORM_VEX_R 7 +/** VEX+ModR/M: reg, vvvv, r/m */ +#define IEMOPFORM_VEX_RVM 8 +/** VEX+ModR/M: reg, vvvv, r/m (register). */ +#define IEMOPFORM_VEX_RVM_REG (IEMOPFORM_VEX_RVM | IEMOPFORM_MOD3) +/** VEX+ModR/M: reg, vvvv, r/m (memory). */ +#define IEMOPFORM_VEX_RVM_MEM (IEMOPFORM_VEX_RVM | IEMOPFORM_NOT_MOD3) +/** VEX+ModR/M: r/m, vvvv, reg */ +#define IEMOPFORM_VEX_MVR 9 +/** VEX+ModR/M: r/m, vvvv, reg (register) */ +#define IEMOPFORM_VEX_MVR_REG (IEMOPFORM_VEX_MVR | IEMOPFORM_MOD3) +/** VEX+ModR/M: r/m, vvvv, reg (memory) */ +#define IEMOPFORM_VEX_MVR_MEM (IEMOPFORM_VEX_MVR | IEMOPFORM_NOT_MOD3) + +/** Fixed register instruction, no R/M. */ +#define IEMOPFORM_FIXED 16 + +/** The r/m is a register. */ +#define IEMOPFORM_MOD3 RT_BIT_32(8) +/** The r/m is a memory access. */ +#define IEMOPFORM_NOT_MOD3 RT_BIT_32(9) +/** @} */ + +/** @name IEMOPHINT_XXX - Additional Opcode Hints + * @note These are ORed together with IEMOPFORM_XXX. + * @{ */ +/** Ignores the operand size prefix (66h). */ +#define IEMOPHINT_IGNORES_OZ_PFX RT_BIT_32(10) +/** Ignores REX.W (aka WIG). */ +#define IEMOPHINT_IGNORES_REXW RT_BIT_32(11) +/** Both the operand size prefixes (66h + REX.W) are ignored. */ +#define IEMOPHINT_IGNORES_OP_SIZES (IEMOPHINT_IGNORES_OZ_PFX | IEMOPHINT_IGNORES_REXW) +/** Allowed with the lock prefix. */ +#define IEMOPHINT_LOCK_ALLOWED RT_BIT_32(11) +/** The VEX.L value is ignored (aka LIG). */ +#define IEMOPHINT_VEX_L_IGNORED RT_BIT_32(12) +/** The VEX.L value must be zero (i.e. 128-bit width only). */ +#define IEMOPHINT_VEX_L_ZERO RT_BIT_32(13) + +/** Hint to IEMAllInstructionPython.py that this macro should be skipped. */ +#define IEMOPHINT_SKIP_PYTHON RT_BIT_32(31) +/** @} */ + +/** + * Possible hardware task switch sources. + */ +typedef enum IEMTASKSWITCH +{ + /** Task switch caused by an interrupt/exception. */ + IEMTASKSWITCH_INT_XCPT = 1, + /** Task switch caused by a far CALL. */ + IEMTASKSWITCH_CALL, + /** Task switch caused by a far JMP. */ + IEMTASKSWITCH_JUMP, + /** Task switch caused by an IRET. */ + IEMTASKSWITCH_IRET +} IEMTASKSWITCH; +AssertCompileSize(IEMTASKSWITCH, 4); + +/** + * Possible CrX load (write) sources. + */ +typedef enum IEMACCESSCRX +{ + /** CrX access caused by 'mov crX' instruction. */ + IEMACCESSCRX_MOV_CRX, + /** CrX (CR0) write caused by 'lmsw' instruction. */ + IEMACCESSCRX_LMSW, + /** CrX (CR0) write caused by 'clts' instruction. */ + IEMACCESSCRX_CLTS, + /** CrX (CR0) read caused by 'smsw' instruction. */ + IEMACCESSCRX_SMSW +} IEMACCESSCRX; + +# ifdef VBOX_WITH_NESTED_HWVIRT_VMX +PGM_ALL_CB2_PROTO(FNPGMPHYSHANDLER) iemVmxApicAccessPageHandler; +# endif + +/** + * Indicates to the verifier that the given flag set is undefined. + * + * Can be invoked again to add more flags. + * + * This is a NOOP if the verifier isn't compiled in. + * + * @note We're temporarily keeping this until code is converted to new + * disassembler style opcode handling. + */ +#define IEMOP_VERIFICATION_UNDEFINED_EFLAGS(a_fEfl) do { } while (0) + + +/** @def IEM_DECL_IMPL_TYPE + * For typedef'ing an instruction implementation function. + * + * @param a_RetType The return type. + * @param a_Name The name of the type. + * @param a_ArgList The argument list enclosed in parentheses. + */ + +/** @def IEM_DECL_IMPL_DEF + * For defining an instruction implementation function. + * + * @param a_RetType The return type. + * @param a_Name The name of the type. + * @param a_ArgList The argument list enclosed in parentheses. + */ + +#if defined(__GNUC__) && defined(RT_ARCH_X86) +# define IEM_DECL_IMPL_TYPE(a_RetType, a_Name, a_ArgList) \ + __attribute__((__fastcall__)) a_RetType (a_Name) a_ArgList +# define IEM_DECL_IMPL_DEF(a_RetType, a_Name, a_ArgList) \ + __attribute__((__fastcall__, __nothrow__)) a_RetType a_Name a_ArgList + +#elif defined(_MSC_VER) && defined(RT_ARCH_X86) +# define IEM_DECL_IMPL_TYPE(a_RetType, a_Name, a_ArgList) \ + a_RetType (__fastcall a_Name) a_ArgList +# define IEM_DECL_IMPL_DEF(a_RetType, a_Name, a_ArgList) \ + a_RetType __fastcall a_Name a_ArgList + +#else +# define IEM_DECL_IMPL_TYPE(a_RetType, a_Name, a_ArgList) \ + a_RetType (VBOXCALL a_Name) a_ArgList +# define IEM_DECL_IMPL_DEF(a_RetType, a_Name, a_ArgList) \ + a_RetType VBOXCALL a_Name a_ArgList + +#endif + +/** @name Arithmetic assignment operations on bytes (binary). + * @{ */ +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLBINU8, (uint8_t *pu8Dst, uint8_t u8Src, uint32_t *pEFlags)); +typedef FNIEMAIMPLBINU8 *PFNIEMAIMPLBINU8; +FNIEMAIMPLBINU8 iemAImpl_add_u8, iemAImpl_add_u8_locked; +FNIEMAIMPLBINU8 iemAImpl_adc_u8, iemAImpl_adc_u8_locked; +FNIEMAIMPLBINU8 iemAImpl_sub_u8, iemAImpl_sub_u8_locked; +FNIEMAIMPLBINU8 iemAImpl_sbb_u8, iemAImpl_sbb_u8_locked; +FNIEMAIMPLBINU8 iemAImpl_or_u8, iemAImpl_or_u8_locked; +FNIEMAIMPLBINU8 iemAImpl_xor_u8, iemAImpl_xor_u8_locked; +FNIEMAIMPLBINU8 iemAImpl_and_u8, iemAImpl_and_u8_locked; +/** @} */ + +/** @name Arithmetic assignment operations on words (binary). + * @{ */ +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLBINU16, (uint16_t *pu16Dst, uint16_t u16Src, uint32_t *pEFlags)); +typedef FNIEMAIMPLBINU16 *PFNIEMAIMPLBINU16; +FNIEMAIMPLBINU16 iemAImpl_add_u16, iemAImpl_add_u16_locked; +FNIEMAIMPLBINU16 iemAImpl_adc_u16, iemAImpl_adc_u16_locked; +FNIEMAIMPLBINU16 iemAImpl_sub_u16, iemAImpl_sub_u16_locked; +FNIEMAIMPLBINU16 iemAImpl_sbb_u16, iemAImpl_sbb_u16_locked; +FNIEMAIMPLBINU16 iemAImpl_or_u16, iemAImpl_or_u16_locked; +FNIEMAIMPLBINU16 iemAImpl_xor_u16, iemAImpl_xor_u16_locked; +FNIEMAIMPLBINU16 iemAImpl_and_u16, iemAImpl_and_u16_locked; +/** @} */ + +/** @name Arithmetic assignment operations on double words (binary). + * @{ */ +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLBINU32, (uint32_t *pu32Dst, uint32_t u32Src, uint32_t *pEFlags)); +typedef FNIEMAIMPLBINU32 *PFNIEMAIMPLBINU32; +FNIEMAIMPLBINU32 iemAImpl_add_u32, iemAImpl_add_u32_locked; +FNIEMAIMPLBINU32 iemAImpl_adc_u32, iemAImpl_adc_u32_locked; +FNIEMAIMPLBINU32 iemAImpl_sub_u32, iemAImpl_sub_u32_locked; +FNIEMAIMPLBINU32 iemAImpl_sbb_u32, iemAImpl_sbb_u32_locked; +FNIEMAIMPLBINU32 iemAImpl_or_u32, iemAImpl_or_u32_locked; +FNIEMAIMPLBINU32 iemAImpl_xor_u32, iemAImpl_xor_u32_locked; +FNIEMAIMPLBINU32 iemAImpl_and_u32, iemAImpl_and_u32_locked; +/** @} */ + +/** @name Arithmetic assignment operations on quad words (binary). + * @{ */ +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLBINU64, (uint64_t *pu64Dst, uint64_t u64Src, uint32_t *pEFlags)); +typedef FNIEMAIMPLBINU64 *PFNIEMAIMPLBINU64; +FNIEMAIMPLBINU64 iemAImpl_add_u64, iemAImpl_add_u64_locked; +FNIEMAIMPLBINU64 iemAImpl_adc_u64, iemAImpl_adc_u64_locked; +FNIEMAIMPLBINU64 iemAImpl_sub_u64, iemAImpl_sub_u64_locked; +FNIEMAIMPLBINU64 iemAImpl_sbb_u64, iemAImpl_sbb_u64_locked; +FNIEMAIMPLBINU64 iemAImpl_or_u64, iemAImpl_or_u64_locked; +FNIEMAIMPLBINU64 iemAImpl_xor_u64, iemAImpl_xor_u64_locked; +FNIEMAIMPLBINU64 iemAImpl_and_u64, iemAImpl_and_u64_locked; +/** @} */ + +/** @name Compare operations (thrown in with the binary ops). + * @{ */ +FNIEMAIMPLBINU8 iemAImpl_cmp_u8; +FNIEMAIMPLBINU16 iemAImpl_cmp_u16; +FNIEMAIMPLBINU32 iemAImpl_cmp_u32; +FNIEMAIMPLBINU64 iemAImpl_cmp_u64; +/** @} */ + +/** @name Test operations (thrown in with the binary ops). + * @{ */ +FNIEMAIMPLBINU8 iemAImpl_test_u8; +FNIEMAIMPLBINU16 iemAImpl_test_u16; +FNIEMAIMPLBINU32 iemAImpl_test_u32; +FNIEMAIMPLBINU64 iemAImpl_test_u64; +/** @} */ + +/** @name Bit operations operations (thrown in with the binary ops). + * @{ */ +FNIEMAIMPLBINU16 iemAImpl_bt_u16, iemAImpl_bt_u16_locked; +FNIEMAIMPLBINU32 iemAImpl_bt_u32, iemAImpl_bt_u32_locked; +FNIEMAIMPLBINU64 iemAImpl_bt_u64, iemAImpl_bt_u64_locked; +FNIEMAIMPLBINU16 iemAImpl_btc_u16, iemAImpl_btc_u16_locked; +FNIEMAIMPLBINU32 iemAImpl_btc_u32, iemAImpl_btc_u32_locked; +FNIEMAIMPLBINU64 iemAImpl_btc_u64, iemAImpl_btc_u64_locked; +FNIEMAIMPLBINU16 iemAImpl_btr_u16, iemAImpl_btr_u16_locked; +FNIEMAIMPLBINU32 iemAImpl_btr_u32, iemAImpl_btr_u32_locked; +FNIEMAIMPLBINU64 iemAImpl_btr_u64, iemAImpl_btr_u64_locked; +FNIEMAIMPLBINU16 iemAImpl_bts_u16, iemAImpl_bts_u16_locked; +FNIEMAIMPLBINU32 iemAImpl_bts_u32, iemAImpl_bts_u32_locked; +FNIEMAIMPLBINU64 iemAImpl_bts_u64, iemAImpl_bts_u64_locked; +/** @} */ + +/** @name Exchange memory with register operations. + * @{ */ +IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u8, (uint8_t *pu8Mem, uint8_t *pu8Reg)); +IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u16,(uint16_t *pu16Mem, uint16_t *pu16Reg)); +IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u32,(uint32_t *pu32Mem, uint32_t *pu32Reg)); +IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u64,(uint64_t *pu64Mem, uint64_t *pu64Reg)); +/** @} */ + +/** @name Exchange and add operations. + * @{ */ +IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u8, (uint8_t *pu8Dst, uint8_t *pu8Reg, uint32_t *pEFlags)); +IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u16,(uint16_t *pu16Dst, uint16_t *pu16Reg, uint32_t *pEFlags)); +IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u32,(uint32_t *pu32Dst, uint32_t *pu32Reg, uint32_t *pEFlags)); +IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u64,(uint64_t *pu64Dst, uint64_t *pu64Reg, uint32_t *pEFlags)); +IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u8_locked, (uint8_t *pu8Dst, uint8_t *pu8Reg, uint32_t *pEFlags)); +IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u16_locked,(uint16_t *pu16Dst, uint16_t *pu16Reg, uint32_t *pEFlags)); +IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u32_locked,(uint32_t *pu32Dst, uint32_t *pu32Reg, uint32_t *pEFlags)); +IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u64_locked,(uint64_t *pu64Dst, uint64_t *pu64Reg, uint32_t *pEFlags)); +/** @} */ + +/** @name Compare and exchange. + * @{ */ +IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u8, (uint8_t *pu8Dst, uint8_t *puAl, uint8_t uSrcReg, uint32_t *pEFlags)); +IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u8_locked, (uint8_t *pu8Dst, uint8_t *puAl, uint8_t uSrcReg, uint32_t *pEFlags)); +IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u16, (uint16_t *pu16Dst, uint16_t *puAx, uint16_t uSrcReg, uint32_t *pEFlags)); +IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u16_locked,(uint16_t *pu16Dst, uint16_t *puAx, uint16_t uSrcReg, uint32_t *pEFlags)); +IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u32, (uint32_t *pu32Dst, uint32_t *puEax, uint32_t uSrcReg, uint32_t *pEFlags)); +IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u32_locked,(uint32_t *pu32Dst, uint32_t *puEax, uint32_t uSrcReg, uint32_t *pEFlags)); +#ifdef RT_ARCH_X86 +IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64, (uint64_t *pu64Dst, uint64_t *puRax, uint64_t *puSrcReg, uint32_t *pEFlags)); +IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64_locked,(uint64_t *pu64Dst, uint64_t *puRax, uint64_t *puSrcReg, uint32_t *pEFlags)); +#else +IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64, (uint64_t *pu64Dst, uint64_t *puRax, uint64_t uSrcReg, uint32_t *pEFlags)); +IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64_locked,(uint64_t *pu64Dst, uint64_t *puRax, uint64_t uSrcReg, uint32_t *pEFlags)); +#endif +IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx, + uint32_t *pEFlags)); +IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b_locked,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx, + uint32_t *pEFlags)); +IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx, PRTUINT128U pu128RbxRcx, + uint32_t *pEFlags)); +IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_locked,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx, PRTUINT128U pu128RbxRcx, + uint32_t *pEFlags)); +IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_fallback,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx, + PRTUINT128U pu128RbxRcx, uint32_t *pEFlags)); +/** @} */ + +/** @name Memory ordering + * @{ */ +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLMEMFENCE,(void)); +typedef FNIEMAIMPLMEMFENCE *PFNIEMAIMPLMEMFENCE; +IEM_DECL_IMPL_DEF(void, iemAImpl_mfence,(void)); +IEM_DECL_IMPL_DEF(void, iemAImpl_sfence,(void)); +IEM_DECL_IMPL_DEF(void, iemAImpl_lfence,(void)); +IEM_DECL_IMPL_DEF(void, iemAImpl_alt_mem_fence,(void)); +/** @} */ + +/** @name Double precision shifts + * @{ */ +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLSHIFTDBLU16,(uint16_t *pu16Dst, uint16_t u16Src, uint8_t cShift, uint32_t *pEFlags)); +typedef FNIEMAIMPLSHIFTDBLU16 *PFNIEMAIMPLSHIFTDBLU16; +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLSHIFTDBLU32,(uint32_t *pu32Dst, uint32_t u32Src, uint8_t cShift, uint32_t *pEFlags)); +typedef FNIEMAIMPLSHIFTDBLU32 *PFNIEMAIMPLSHIFTDBLU32; +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLSHIFTDBLU64,(uint64_t *pu64Dst, uint64_t u64Src, uint8_t cShift, uint32_t *pEFlags)); +typedef FNIEMAIMPLSHIFTDBLU64 *PFNIEMAIMPLSHIFTDBLU64; +FNIEMAIMPLSHIFTDBLU16 iemAImpl_shld_u16; +FNIEMAIMPLSHIFTDBLU32 iemAImpl_shld_u32; +FNIEMAIMPLSHIFTDBLU64 iemAImpl_shld_u64; +FNIEMAIMPLSHIFTDBLU16 iemAImpl_shrd_u16; +FNIEMAIMPLSHIFTDBLU32 iemAImpl_shrd_u32; +FNIEMAIMPLSHIFTDBLU64 iemAImpl_shrd_u64; +/** @} */ + + +/** @name Bit search operations (thrown in with the binary ops). + * @{ */ +FNIEMAIMPLBINU16 iemAImpl_bsf_u16; +FNIEMAIMPLBINU32 iemAImpl_bsf_u32; +FNIEMAIMPLBINU64 iemAImpl_bsf_u64; +FNIEMAIMPLBINU16 iemAImpl_bsr_u16; +FNIEMAIMPLBINU32 iemAImpl_bsr_u32; +FNIEMAIMPLBINU64 iemAImpl_bsr_u64; +/** @} */ + +/** @name Signed multiplication operations (thrown in with the binary ops). + * @{ */ +FNIEMAIMPLBINU16 iemAImpl_imul_two_u16; +FNIEMAIMPLBINU32 iemAImpl_imul_two_u32; +FNIEMAIMPLBINU64 iemAImpl_imul_two_u64; +/** @} */ + +/** @name Arithmetic assignment operations on bytes (unary). + * @{ */ +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLUNARYU8, (uint8_t *pu8Dst, uint32_t *pEFlags)); +typedef FNIEMAIMPLUNARYU8 *PFNIEMAIMPLUNARYU8; +FNIEMAIMPLUNARYU8 iemAImpl_inc_u8, iemAImpl_inc_u8_locked; +FNIEMAIMPLUNARYU8 iemAImpl_dec_u8, iemAImpl_dec_u8_locked; +FNIEMAIMPLUNARYU8 iemAImpl_not_u8, iemAImpl_not_u8_locked; +FNIEMAIMPLUNARYU8 iemAImpl_neg_u8, iemAImpl_neg_u8_locked; +/** @} */ + +/** @name Arithmetic assignment operations on words (unary). + * @{ */ +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLUNARYU16, (uint16_t *pu16Dst, uint32_t *pEFlags)); +typedef FNIEMAIMPLUNARYU16 *PFNIEMAIMPLUNARYU16; +FNIEMAIMPLUNARYU16 iemAImpl_inc_u16, iemAImpl_inc_u16_locked; +FNIEMAIMPLUNARYU16 iemAImpl_dec_u16, iemAImpl_dec_u16_locked; +FNIEMAIMPLUNARYU16 iemAImpl_not_u16, iemAImpl_not_u16_locked; +FNIEMAIMPLUNARYU16 iemAImpl_neg_u16, iemAImpl_neg_u16_locked; +/** @} */ + +/** @name Arithmetic assignment operations on double words (unary). + * @{ */ +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLUNARYU32, (uint32_t *pu32Dst, uint32_t *pEFlags)); +typedef FNIEMAIMPLUNARYU32 *PFNIEMAIMPLUNARYU32; +FNIEMAIMPLUNARYU32 iemAImpl_inc_u32, iemAImpl_inc_u32_locked; +FNIEMAIMPLUNARYU32 iemAImpl_dec_u32, iemAImpl_dec_u32_locked; +FNIEMAIMPLUNARYU32 iemAImpl_not_u32, iemAImpl_not_u32_locked; +FNIEMAIMPLUNARYU32 iemAImpl_neg_u32, iemAImpl_neg_u32_locked; +/** @} */ + +/** @name Arithmetic assignment operations on quad words (unary). + * @{ */ +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLUNARYU64, (uint64_t *pu64Dst, uint32_t *pEFlags)); +typedef FNIEMAIMPLUNARYU64 *PFNIEMAIMPLUNARYU64; +FNIEMAIMPLUNARYU64 iemAImpl_inc_u64, iemAImpl_inc_u64_locked; +FNIEMAIMPLUNARYU64 iemAImpl_dec_u64, iemAImpl_dec_u64_locked; +FNIEMAIMPLUNARYU64 iemAImpl_not_u64, iemAImpl_not_u64_locked; +FNIEMAIMPLUNARYU64 iemAImpl_neg_u64, iemAImpl_neg_u64_locked; +/** @} */ + + +/** @name Shift operations on bytes (Group 2). + * @{ */ +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLSHIFTU8,(uint8_t *pu8Dst, uint8_t cShift, uint32_t *pEFlags)); +typedef FNIEMAIMPLSHIFTU8 *PFNIEMAIMPLSHIFTU8; +FNIEMAIMPLSHIFTU8 iemAImpl_rol_u8; +FNIEMAIMPLSHIFTU8 iemAImpl_ror_u8; +FNIEMAIMPLSHIFTU8 iemAImpl_rcl_u8; +FNIEMAIMPLSHIFTU8 iemAImpl_rcr_u8; +FNIEMAIMPLSHIFTU8 iemAImpl_shl_u8; +FNIEMAIMPLSHIFTU8 iemAImpl_shr_u8; +FNIEMAIMPLSHIFTU8 iemAImpl_sar_u8; +/** @} */ + +/** @name Shift operations on words (Group 2). + * @{ */ +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLSHIFTU16,(uint16_t *pu16Dst, uint8_t cShift, uint32_t *pEFlags)); +typedef FNIEMAIMPLSHIFTU16 *PFNIEMAIMPLSHIFTU16; +FNIEMAIMPLSHIFTU16 iemAImpl_rol_u16; +FNIEMAIMPLSHIFTU16 iemAImpl_ror_u16; +FNIEMAIMPLSHIFTU16 iemAImpl_rcl_u16; +FNIEMAIMPLSHIFTU16 iemAImpl_rcr_u16; +FNIEMAIMPLSHIFTU16 iemAImpl_shl_u16; +FNIEMAIMPLSHIFTU16 iemAImpl_shr_u16; +FNIEMAIMPLSHIFTU16 iemAImpl_sar_u16; +/** @} */ + +/** @name Shift operations on double words (Group 2). + * @{ */ +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLSHIFTU32,(uint32_t *pu32Dst, uint8_t cShift, uint32_t *pEFlags)); +typedef FNIEMAIMPLSHIFTU32 *PFNIEMAIMPLSHIFTU32; +FNIEMAIMPLSHIFTU32 iemAImpl_rol_u32; +FNIEMAIMPLSHIFTU32 iemAImpl_ror_u32; +FNIEMAIMPLSHIFTU32 iemAImpl_rcl_u32; +FNIEMAIMPLSHIFTU32 iemAImpl_rcr_u32; +FNIEMAIMPLSHIFTU32 iemAImpl_shl_u32; +FNIEMAIMPLSHIFTU32 iemAImpl_shr_u32; +FNIEMAIMPLSHIFTU32 iemAImpl_sar_u32; +/** @} */ + +/** @name Shift operations on words (Group 2). + * @{ */ +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLSHIFTU64,(uint64_t *pu64Dst, uint8_t cShift, uint32_t *pEFlags)); +typedef FNIEMAIMPLSHIFTU64 *PFNIEMAIMPLSHIFTU64; +FNIEMAIMPLSHIFTU64 iemAImpl_rol_u64; +FNIEMAIMPLSHIFTU64 iemAImpl_ror_u64; +FNIEMAIMPLSHIFTU64 iemAImpl_rcl_u64; +FNIEMAIMPLSHIFTU64 iemAImpl_rcr_u64; +FNIEMAIMPLSHIFTU64 iemAImpl_shl_u64; +FNIEMAIMPLSHIFTU64 iemAImpl_shr_u64; +FNIEMAIMPLSHIFTU64 iemAImpl_sar_u64; +/** @} */ + +/** @name Multiplication and division operations. + * @{ */ +typedef IEM_DECL_IMPL_TYPE(int, FNIEMAIMPLMULDIVU8,(uint16_t *pu16AX, uint8_t u8FactorDivisor, uint32_t *pEFlags)); +typedef FNIEMAIMPLMULDIVU8 *PFNIEMAIMPLMULDIVU8; +FNIEMAIMPLMULDIVU8 iemAImpl_mul_u8, iemAImpl_imul_u8; +FNIEMAIMPLMULDIVU8 iemAImpl_div_u8, iemAImpl_idiv_u8; + +typedef IEM_DECL_IMPL_TYPE(int, FNIEMAIMPLMULDIVU16,(uint16_t *pu16AX, uint16_t *pu16DX, uint16_t u16FactorDivisor, uint32_t *pEFlags)); +typedef FNIEMAIMPLMULDIVU16 *PFNIEMAIMPLMULDIVU16; +FNIEMAIMPLMULDIVU16 iemAImpl_mul_u16, iemAImpl_imul_u16; +FNIEMAIMPLMULDIVU16 iemAImpl_div_u16, iemAImpl_idiv_u16; + +typedef IEM_DECL_IMPL_TYPE(int, FNIEMAIMPLMULDIVU32,(uint32_t *pu32EAX, uint32_t *pu32EDX, uint32_t u32FactorDivisor, uint32_t *pEFlags)); +typedef FNIEMAIMPLMULDIVU32 *PFNIEMAIMPLMULDIVU32; +FNIEMAIMPLMULDIVU32 iemAImpl_mul_u32, iemAImpl_imul_u32; +FNIEMAIMPLMULDIVU32 iemAImpl_div_u32, iemAImpl_idiv_u32; + +typedef IEM_DECL_IMPL_TYPE(int, FNIEMAIMPLMULDIVU64,(uint64_t *pu64RAX, uint64_t *pu64RDX, uint64_t u64FactorDivisor, uint32_t *pEFlags)); +typedef FNIEMAIMPLMULDIVU64 *PFNIEMAIMPLMULDIVU64; +FNIEMAIMPLMULDIVU64 iemAImpl_mul_u64, iemAImpl_imul_u64; +FNIEMAIMPLMULDIVU64 iemAImpl_div_u64, iemAImpl_idiv_u64; +/** @} */ + +/** @name Byte Swap. + * @{ */ +IEM_DECL_IMPL_TYPE(void, iemAImpl_bswap_u16,(uint32_t *pu32Dst)); /* Yes, 32-bit register access. */ +IEM_DECL_IMPL_TYPE(void, iemAImpl_bswap_u32,(uint32_t *pu32Dst)); +IEM_DECL_IMPL_TYPE(void, iemAImpl_bswap_u64,(uint64_t *pu64Dst)); +/** @} */ + +/** @name Misc. + * @{ */ +FNIEMAIMPLBINU16 iemAImpl_arpl; +/** @} */ + + +/** @name FPU operations taking a 32-bit float argument + * @{ */ +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUR32FSW,(PCX86FXSTATE pFpuState, uint16_t *pFSW, + PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2)); +typedef FNIEMAIMPLFPUR32FSW *PFNIEMAIMPLFPUR32FSW; + +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUR32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, + PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2)); +typedef FNIEMAIMPLFPUR32 *PFNIEMAIMPLFPUR32; + +FNIEMAIMPLFPUR32FSW iemAImpl_fcom_r80_by_r32; +FNIEMAIMPLFPUR32 iemAImpl_fadd_r80_by_r32; +FNIEMAIMPLFPUR32 iemAImpl_fmul_r80_by_r32; +FNIEMAIMPLFPUR32 iemAImpl_fsub_r80_by_r32; +FNIEMAIMPLFPUR32 iemAImpl_fsubr_r80_by_r32; +FNIEMAIMPLFPUR32 iemAImpl_fdiv_r80_by_r32; +FNIEMAIMPLFPUR32 iemAImpl_fdivr_r80_by_r32; + +IEM_DECL_IMPL_DEF(void, iemAImpl_fld_r32_to_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT32U pr32Val)); +IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_r32,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW, + PRTFLOAT32U pr32Val, PCRTFLOAT80U pr80Val)); +/** @} */ + +/** @name FPU operations taking a 64-bit float argument + * @{ */ +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUR64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, + PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2)); +typedef FNIEMAIMPLFPUR64 *PFNIEMAIMPLFPUR64; + +FNIEMAIMPLFPUR64 iemAImpl_fadd_r80_by_r64; +FNIEMAIMPLFPUR64 iemAImpl_fmul_r80_by_r64; +FNIEMAIMPLFPUR64 iemAImpl_fsub_r80_by_r64; +FNIEMAIMPLFPUR64 iemAImpl_fsubr_r80_by_r64; +FNIEMAIMPLFPUR64 iemAImpl_fdiv_r80_by_r64; +FNIEMAIMPLFPUR64 iemAImpl_fdivr_r80_by_r64; + +IEM_DECL_IMPL_DEF(void, iemAImpl_fcom_r80_by_r64,(PCX86FXSTATE pFpuState, uint16_t *pFSW, + PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2)); +IEM_DECL_IMPL_DEF(void, iemAImpl_fld_r64_to_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT64U pr64Val)); +IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_r64,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW, + PRTFLOAT64U pr32Val, PCRTFLOAT80U pr80Val)); +/** @} */ + +/** @name FPU operations taking a 80-bit float argument + * @{ */ +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUR80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, + PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2)); +typedef FNIEMAIMPLFPUR80 *PFNIEMAIMPLFPUR80; +FNIEMAIMPLFPUR80 iemAImpl_fadd_r80_by_r80; +FNIEMAIMPLFPUR80 iemAImpl_fmul_r80_by_r80; +FNIEMAIMPLFPUR80 iemAImpl_fsub_r80_by_r80; +FNIEMAIMPLFPUR80 iemAImpl_fsubr_r80_by_r80; +FNIEMAIMPLFPUR80 iemAImpl_fdiv_r80_by_r80; +FNIEMAIMPLFPUR80 iemAImpl_fdivr_r80_by_r80; +FNIEMAIMPLFPUR80 iemAImpl_fprem_r80_by_r80; +FNIEMAIMPLFPUR80 iemAImpl_fprem1_r80_by_r80; +FNIEMAIMPLFPUR80 iemAImpl_fscale_r80_by_r80; + +FNIEMAIMPLFPUR80 iemAImpl_fpatan_r80_by_r80; +FNIEMAIMPLFPUR80 iemAImpl_fyl2x_r80_by_r80; +FNIEMAIMPLFPUR80 iemAImpl_fyl2xp1_r80_by_r80; + +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUR80FSW,(PCX86FXSTATE pFpuState, uint16_t *pFSW, + PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2)); +typedef FNIEMAIMPLFPUR80FSW *PFNIEMAIMPLFPUR80FSW; +FNIEMAIMPLFPUR80FSW iemAImpl_fcom_r80_by_r80; +FNIEMAIMPLFPUR80FSW iemAImpl_fucom_r80_by_r80; + +typedef IEM_DECL_IMPL_TYPE(uint32_t, FNIEMAIMPLFPUR80EFL,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw, + PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2)); +typedef FNIEMAIMPLFPUR80EFL *PFNIEMAIMPLFPUR80EFL; +FNIEMAIMPLFPUR80EFL iemAImpl_fcomi_r80_by_r80; +FNIEMAIMPLFPUR80EFL iemAImpl_fucomi_r80_by_r80; + +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUR80UNARY,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val)); +typedef FNIEMAIMPLFPUR80UNARY *PFNIEMAIMPLFPUR80UNARY; +FNIEMAIMPLFPUR80UNARY iemAImpl_fabs_r80; +FNIEMAIMPLFPUR80UNARY iemAImpl_fchs_r80; +FNIEMAIMPLFPUR80UNARY iemAImpl_f2xm1_r80; +FNIEMAIMPLFPUR80UNARY iemAImpl_fsqrt_r80; +FNIEMAIMPLFPUR80UNARY iemAImpl_frndint_r80; +FNIEMAIMPLFPUR80UNARY iemAImpl_fsin_r80; +FNIEMAIMPLFPUR80UNARY iemAImpl_fcos_r80; + +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUR80UNARYFSW,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw, PCRTFLOAT80U pr80Val)); +typedef FNIEMAIMPLFPUR80UNARYFSW *PFNIEMAIMPLFPUR80UNARYFSW; +FNIEMAIMPLFPUR80UNARYFSW iemAImpl_ftst_r80; +FNIEMAIMPLFPUR80UNARYFSW iemAImpl_fxam_r80; + +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUR80LDCONST,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes)); +typedef FNIEMAIMPLFPUR80LDCONST *PFNIEMAIMPLFPUR80LDCONST; +FNIEMAIMPLFPUR80LDCONST iemAImpl_fld1; +FNIEMAIMPLFPUR80LDCONST iemAImpl_fldl2t; +FNIEMAIMPLFPUR80LDCONST iemAImpl_fldl2e; +FNIEMAIMPLFPUR80LDCONST iemAImpl_fldpi; +FNIEMAIMPLFPUR80LDCONST iemAImpl_fldlg2; +FNIEMAIMPLFPUR80LDCONST iemAImpl_fldln2; +FNIEMAIMPLFPUR80LDCONST iemAImpl_fldz; + +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUR80UNARYTWO,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, + PCRTFLOAT80U pr80Val)); +typedef FNIEMAIMPLFPUR80UNARYTWO *PFNIEMAIMPLFPUR80UNARYTWO; +FNIEMAIMPLFPUR80UNARYTWO iemAImpl_fptan_r80_r80; +FNIEMAIMPLFPUR80UNARYTWO iemAImpl_fxtract_r80_r80; +FNIEMAIMPLFPUR80UNARYTWO iemAImpl_fsincos_r80_r80; + +IEM_DECL_IMPL_DEF(void, iemAImpl_fld_r80_from_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val)); +IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_r80,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW, + PRTFLOAT80U pr80Dst, PCRTFLOAT80U pr80Src)); + +/** @} */ + +/** @name FPU operations taking a 16-bit signed integer argument + * @{ */ +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUI16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, + PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2)); +typedef FNIEMAIMPLFPUI16 *PFNIEMAIMPLFPUI16; + +FNIEMAIMPLFPUI16 iemAImpl_fiadd_r80_by_i16; +FNIEMAIMPLFPUI16 iemAImpl_fimul_r80_by_i16; +FNIEMAIMPLFPUI16 iemAImpl_fisub_r80_by_i16; +FNIEMAIMPLFPUI16 iemAImpl_fisubr_r80_by_i16; +FNIEMAIMPLFPUI16 iemAImpl_fidiv_r80_by_i16; +FNIEMAIMPLFPUI16 iemAImpl_fidivr_r80_by_i16; + +IEM_DECL_IMPL_DEF(void, iemAImpl_ficom_r80_by_i16,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw, + PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2)); + +IEM_DECL_IMPL_DEF(void, iemAImpl_fild_i16_to_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, int16_t const *pi16Val)); +IEM_DECL_IMPL_DEF(void, iemAImpl_fist_r80_to_i16,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW, + int16_t *pi16Val, PCRTFLOAT80U pr80Val)); +IEM_DECL_IMPL_DEF(void, iemAImpl_fistt_r80_to_i16,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW, + int16_t *pi16Val, PCRTFLOAT80U pr80Val)); +/** @} */ + +/** @name FPU operations taking a 32-bit signed integer argument + * @{ */ +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUI32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, + PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2)); +typedef FNIEMAIMPLFPUI32 *PFNIEMAIMPLFPUI32; + +FNIEMAIMPLFPUI32 iemAImpl_fiadd_r80_by_i32; +FNIEMAIMPLFPUI32 iemAImpl_fimul_r80_by_i32; +FNIEMAIMPLFPUI32 iemAImpl_fisub_r80_by_i32; +FNIEMAIMPLFPUI32 iemAImpl_fisubr_r80_by_i32; +FNIEMAIMPLFPUI32 iemAImpl_fidiv_r80_by_i32; +FNIEMAIMPLFPUI32 iemAImpl_fidivr_r80_by_i32; + +IEM_DECL_IMPL_DEF(void, iemAImpl_ficom_r80_by_i32,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw, + PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2)); + +IEM_DECL_IMPL_DEF(void, iemAImpl_fild_i32_to_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, int32_t const *pi32Val)); +IEM_DECL_IMPL_DEF(void, iemAImpl_fist_r80_to_i32,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW, + int32_t *pi32Val, PCRTFLOAT80U pr80Val)); +IEM_DECL_IMPL_DEF(void, iemAImpl_fistt_r80_to_i32,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW, + int32_t *pi32Val, PCRTFLOAT80U pr80Val)); +/** @} */ + +/** @name FPU operations taking a 64-bit signed integer argument + * @{ */ +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUI64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, + PCRTFLOAT80U pr80Val1, int64_t const *pi64Val2)); +typedef FNIEMAIMPLFPUI64 *PFNIEMAIMPLFPUI64; + +FNIEMAIMPLFPUI64 iemAImpl_fiadd_r80_by_i64; +FNIEMAIMPLFPUI64 iemAImpl_fimul_r80_by_i64; +FNIEMAIMPLFPUI64 iemAImpl_fisub_r80_by_i64; +FNIEMAIMPLFPUI64 iemAImpl_fisubr_r80_by_i64; +FNIEMAIMPLFPUI64 iemAImpl_fidiv_r80_by_i64; +FNIEMAIMPLFPUI64 iemAImpl_fidivr_r80_by_i64; + +IEM_DECL_IMPL_DEF(void, iemAImpl_ficom_r80_by_i64,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw, + PCRTFLOAT80U pr80Val1, int64_t const *pi64Val2)); + +IEM_DECL_IMPL_DEF(void, iemAImpl_fild_i64_to_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, int64_t const *pi64Val)); +IEM_DECL_IMPL_DEF(void, iemAImpl_fist_r80_to_i64,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW, + int64_t *pi64Val, PCRTFLOAT80U pr80Val)); +IEM_DECL_IMPL_DEF(void, iemAImpl_fistt_r80_to_i64,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW, + int64_t *pi32Val, PCRTFLOAT80U pr80Val)); +/** @} */ + + +/** Temporary type representing a 256-bit vector register. */ +typedef struct {uint64_t au64[4]; } IEMVMM256; +/** Temporary type pointing to a 256-bit vector register. */ +typedef IEMVMM256 *PIEMVMM256; +/** Temporary type pointing to a const 256-bit vector register. */ +typedef IEMVMM256 *PCIEMVMM256; + + +/** @name Media (SSE/MMX/AVX) operations: full1 + full2 -> full1. + * @{ */ +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLMEDIAF2U64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src)); +typedef FNIEMAIMPLMEDIAF2U64 *PFNIEMAIMPLMEDIAF2U64; +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLMEDIAF2U128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src)); +typedef FNIEMAIMPLMEDIAF2U128 *PFNIEMAIMPLMEDIAF2U128; +FNIEMAIMPLMEDIAF2U64 iemAImpl_pxor_u64, iemAImpl_pcmpeqb_u64, iemAImpl_pcmpeqw_u64, iemAImpl_pcmpeqd_u64; +FNIEMAIMPLMEDIAF2U128 iemAImpl_pxor_u128, iemAImpl_pcmpeqb_u128, iemAImpl_pcmpeqw_u128, iemAImpl_pcmpeqd_u128; +/** @} */ + +/** @name Media (SSE/MMX/AVX) operations: lowhalf1 + lowhalf1 -> full1. + * @{ */ +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLMEDIAF1L1U64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint32_t const *pu32Src)); +typedef FNIEMAIMPLMEDIAF1L1U64 *PFNIEMAIMPLMEDIAF1L1U64; +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLMEDIAF1L1U128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, uint64_t const *pu64Src)); +typedef FNIEMAIMPLMEDIAF1L1U128 *PFNIEMAIMPLMEDIAF1L1U128; +FNIEMAIMPLMEDIAF1L1U64 iemAImpl_punpcklbw_u64, iemAImpl_punpcklwd_u64, iemAImpl_punpckldq_u64; +FNIEMAIMPLMEDIAF1L1U128 iemAImpl_punpcklbw_u128, iemAImpl_punpcklwd_u128, iemAImpl_punpckldq_u128, iemAImpl_punpcklqdq_u128; +/** @} */ + +/** @name Media (SSE/MMX/AVX) operations: hihalf1 + hihalf2 -> full1. + * @{ */ +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLMEDIAF1H1U64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src)); +typedef FNIEMAIMPLMEDIAF2U64 *PFNIEMAIMPLMEDIAF1H1U64; +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLMEDIAF1H1U128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src)); +typedef FNIEMAIMPLMEDIAF2U128 *PFNIEMAIMPLMEDIAF1H1U128; +FNIEMAIMPLMEDIAF1H1U64 iemAImpl_punpckhbw_u64, iemAImpl_punpckhwd_u64, iemAImpl_punpckhdq_u64; +FNIEMAIMPLMEDIAF1H1U128 iemAImpl_punpckhbw_u128, iemAImpl_punpckhwd_u128, iemAImpl_punpckhdq_u128, iemAImpl_punpckhqdq_u128; +/** @} */ + +/** @name Media (SSE/MMX/AVX) operation: Packed Shuffle Stuff (evil) + * @{ */ +typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLMEDIAPSHUF,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, + PCRTUINT128U pu128Src, uint8_t bEvil)); +typedef FNIEMAIMPLMEDIAPSHUF *PFNIEMAIMPLMEDIAPSHUF; +FNIEMAIMPLMEDIAPSHUF iemAImpl_pshufhw, iemAImpl_pshuflw, iemAImpl_pshufd; +IEM_DECL_IMPL_DEF(void, iemAImpl_pshufw,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src, uint8_t bEvil)); +/** @} */ + +/** @name Media (SSE/MMX/AVX) operation: Move Byte Mask + * @{ */ +IEM_DECL_IMPL_DEF(void, iemAImpl_pmovmskb_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src)); +IEM_DECL_IMPL_DEF(void, iemAImpl_pmovmskb_u128,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, PCRTUINT128U pu128Src)); +/** @} */ + +/** @name Media (SSE/MMX/AVX) operation: Sort this later + * @{ */ +IEM_DECL_IMPL_DEF(void, iemAImpl_movsldup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc)); +IEM_DECL_IMPL_DEF(void, iemAImpl_movshdup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc)); +IEM_DECL_IMPL_DEF(void, iemAImpl_movddup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, uint64_t uSrc)); + +IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc)); +IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc)); +IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc)); +IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc)); + +/** @} */ + + +/** @name Function tables. + * @{ + */ + +/** + * Function table for a binary operator providing implementation based on + * operand size. + */ +typedef struct IEMOPBINSIZES +{ + PFNIEMAIMPLBINU8 pfnNormalU8, pfnLockedU8; + PFNIEMAIMPLBINU16 pfnNormalU16, pfnLockedU16; + PFNIEMAIMPLBINU32 pfnNormalU32, pfnLockedU32; + PFNIEMAIMPLBINU64 pfnNormalU64, pfnLockedU64; +} IEMOPBINSIZES; +/** Pointer to a binary operator function table. */ +typedef IEMOPBINSIZES const *PCIEMOPBINSIZES; + + +/** + * Function table for a unary operator providing implementation based on + * operand size. + */ +typedef struct IEMOPUNARYSIZES +{ + PFNIEMAIMPLUNARYU8 pfnNormalU8, pfnLockedU8; + PFNIEMAIMPLUNARYU16 pfnNormalU16, pfnLockedU16; + PFNIEMAIMPLUNARYU32 pfnNormalU32, pfnLockedU32; + PFNIEMAIMPLUNARYU64 pfnNormalU64, pfnLockedU64; +} IEMOPUNARYSIZES; +/** Pointer to a unary operator function table. */ +typedef IEMOPUNARYSIZES const *PCIEMOPUNARYSIZES; + + +/** + * Function table for a shift operator providing implementation based on + * operand size. + */ +typedef struct IEMOPSHIFTSIZES +{ + PFNIEMAIMPLSHIFTU8 pfnNormalU8; + PFNIEMAIMPLSHIFTU16 pfnNormalU16; + PFNIEMAIMPLSHIFTU32 pfnNormalU32; + PFNIEMAIMPLSHIFTU64 pfnNormalU64; +} IEMOPSHIFTSIZES; +/** Pointer to a shift operator function table. */ +typedef IEMOPSHIFTSIZES const *PCIEMOPSHIFTSIZES; + + +/** + * Function table for a multiplication or division operation. + */ +typedef struct IEMOPMULDIVSIZES +{ + PFNIEMAIMPLMULDIVU8 pfnU8; + PFNIEMAIMPLMULDIVU16 pfnU16; + PFNIEMAIMPLMULDIVU32 pfnU32; + PFNIEMAIMPLMULDIVU64 pfnU64; +} IEMOPMULDIVSIZES; +/** Pointer to a multiplication or division operation function table. */ +typedef IEMOPMULDIVSIZES const *PCIEMOPMULDIVSIZES; + + +/** + * Function table for a double precision shift operator providing implementation + * based on operand size. + */ +typedef struct IEMOPSHIFTDBLSIZES +{ + PFNIEMAIMPLSHIFTDBLU16 pfnNormalU16; + PFNIEMAIMPLSHIFTDBLU32 pfnNormalU32; + PFNIEMAIMPLSHIFTDBLU64 pfnNormalU64; +} IEMOPSHIFTDBLSIZES; +/** Pointer to a double precision shift function table. */ +typedef IEMOPSHIFTDBLSIZES const *PCIEMOPSHIFTDBLSIZES; + + +/** + * Function table for media instruction taking two full sized media registers, + * optionally the 2nd being a memory reference (only modifying the first op.) + */ +typedef struct IEMOPMEDIAF2 +{ + PFNIEMAIMPLMEDIAF2U64 pfnU64; + PFNIEMAIMPLMEDIAF2U128 pfnU128; +} IEMOPMEDIAF2; +/** Pointer to a media operation function table for full sized ops. */ +typedef IEMOPMEDIAF2 const *PCIEMOPMEDIAF2; + +/** + * Function table for media instruction taking taking one full and one lower + * half media register. + */ +typedef struct IEMOPMEDIAF1L1 +{ + PFNIEMAIMPLMEDIAF1L1U64 pfnU64; + PFNIEMAIMPLMEDIAF1L1U128 pfnU128; +} IEMOPMEDIAF1L1; +/** Pointer to a media operation function table for lowhalf+lowhalf -> full. */ +typedef IEMOPMEDIAF1L1 const *PCIEMOPMEDIAF1L1; + +/** + * Function table for media instruction taking taking one full and one high half + * media register. + */ +typedef struct IEMOPMEDIAF1H1 +{ + PFNIEMAIMPLMEDIAF1H1U64 pfnU64; + PFNIEMAIMPLMEDIAF1H1U128 pfnU128; +} IEMOPMEDIAF1H1; +/** Pointer to a media operation function table for hihalf+hihalf -> full. */ +typedef IEMOPMEDIAF1H1 const *PCIEMOPMEDIAF1H1; + + +/** @} */ + + +/** @name C instruction implementations for anything slightly complicated. + * @{ */ + +/** + * For typedef'ing or declaring a C instruction implementation function taking + * no extra arguments. + * + * @param a_Name The name of the type. + */ +# define IEM_CIMPL_DECL_TYPE_0(a_Name) \ + IEM_DECL_IMPL_TYPE(VBOXSTRICTRC, a_Name, (PVMCPUCC pVCpu, uint8_t cbInstr)) +/** + * For defining a C instruction implementation function taking no extra + * arguments. + * + * @param a_Name The name of the function + */ +# define IEM_CIMPL_DEF_0(a_Name) \ + IEM_DECL_IMPL_DEF(VBOXSTRICTRC, a_Name, (PVMCPUCC pVCpu, uint8_t cbInstr)) +/** + * For calling a C instruction implementation function taking no extra + * arguments. + * + * This special call macro adds default arguments to the call and allow us to + * change these later. + * + * @param a_fn The name of the function. + */ +# define IEM_CIMPL_CALL_0(a_fn) a_fn(pVCpu, cbInstr) + +/** + * For typedef'ing or declaring a C instruction implementation function taking + * one extra argument. + * + * @param a_Name The name of the type. + * @param a_Type0 The argument type. + * @param a_Arg0 The argument name. + */ +# define IEM_CIMPL_DECL_TYPE_1(a_Name, a_Type0, a_Arg0) \ + IEM_DECL_IMPL_TYPE(VBOXSTRICTRC, a_Name, (PVMCPUCC pVCpu, uint8_t cbInstr, a_Type0 a_Arg0)) +/** + * For defining a C instruction implementation function taking one extra + * argument. + * + * @param a_Name The name of the function + * @param a_Type0 The argument type. + * @param a_Arg0 The argument name. + */ +# define IEM_CIMPL_DEF_1(a_Name, a_Type0, a_Arg0) \ + IEM_DECL_IMPL_DEF(VBOXSTRICTRC, a_Name, (PVMCPUCC pVCpu, uint8_t cbInstr, a_Type0 a_Arg0)) +/** + * For calling a C instruction implementation function taking one extra + * argument. + * + * This special call macro adds default arguments to the call and allow us to + * change these later. + * + * @param a_fn The name of the function. + * @param a0 The name of the 1st argument. + */ +# define IEM_CIMPL_CALL_1(a_fn, a0) a_fn(pVCpu, cbInstr, (a0)) + +/** + * For typedef'ing or declaring a C instruction implementation function taking + * two extra arguments. + * + * @param a_Name The name of the type. + * @param a_Type0 The type of the 1st argument + * @param a_Arg0 The name of the 1st argument. + * @param a_Type1 The type of the 2nd argument. + * @param a_Arg1 The name of the 2nd argument. + */ +# define IEM_CIMPL_DECL_TYPE_2(a_Name, a_Type0, a_Arg0, a_Type1, a_Arg1) \ + IEM_DECL_IMPL_TYPE(VBOXSTRICTRC, a_Name, (PVMCPUCC pVCpu, uint8_t cbInstr, a_Type0 a_Arg0, a_Type1 a_Arg1)) +/** + * For defining a C instruction implementation function taking two extra + * arguments. + * + * @param a_Name The name of the function. + * @param a_Type0 The type of the 1st argument + * @param a_Arg0 The name of the 1st argument. + * @param a_Type1 The type of the 2nd argument. + * @param a_Arg1 The name of the 2nd argument. + */ +# define IEM_CIMPL_DEF_2(a_Name, a_Type0, a_Arg0, a_Type1, a_Arg1) \ + IEM_DECL_IMPL_DEF(VBOXSTRICTRC, a_Name, (PVMCPUCC pVCpu, uint8_t cbInstr, a_Type0 a_Arg0, a_Type1 a_Arg1)) +/** + * For calling a C instruction implementation function taking two extra + * arguments. + * + * This special call macro adds default arguments to the call and allow us to + * change these later. + * + * @param a_fn The name of the function. + * @param a0 The name of the 1st argument. + * @param a1 The name of the 2nd argument. + */ +# define IEM_CIMPL_CALL_2(a_fn, a0, a1) a_fn(pVCpu, cbInstr, (a0), (a1)) + +/** + * For typedef'ing or declaring a C instruction implementation function taking + * three extra arguments. + * + * @param a_Name The name of the type. + * @param a_Type0 The type of the 1st argument + * @param a_Arg0 The name of the 1st argument. + * @param a_Type1 The type of the 2nd argument. + * @param a_Arg1 The name of the 2nd argument. + * @param a_Type2 The type of the 3rd argument. + * @param a_Arg2 The name of the 3rd argument. + */ +# define IEM_CIMPL_DECL_TYPE_3(a_Name, a_Type0, a_Arg0, a_Type1, a_Arg1, a_Type2, a_Arg2) \ + IEM_DECL_IMPL_TYPE(VBOXSTRICTRC, a_Name, (PVMCPUCC pVCpu, uint8_t cbInstr, a_Type0 a_Arg0, a_Type1 a_Arg1, a_Type2 a_Arg2)) +/** + * For defining a C instruction implementation function taking three extra + * arguments. + * + * @param a_Name The name of the function. + * @param a_Type0 The type of the 1st argument + * @param a_Arg0 The name of the 1st argument. + * @param a_Type1 The type of the 2nd argument. + * @param a_Arg1 The name of the 2nd argument. + * @param a_Type2 The type of the 3rd argument. + * @param a_Arg2 The name of the 3rd argument. + */ +# define IEM_CIMPL_DEF_3(a_Name, a_Type0, a_Arg0, a_Type1, a_Arg1, a_Type2, a_Arg2) \ + IEM_DECL_IMPL_DEF(VBOXSTRICTRC, a_Name, (PVMCPUCC pVCpu, uint8_t cbInstr, a_Type0 a_Arg0, a_Type1 a_Arg1, a_Type2 a_Arg2)) +/** + * For calling a C instruction implementation function taking three extra + * arguments. + * + * This special call macro adds default arguments to the call and allow us to + * change these later. + * + * @param a_fn The name of the function. + * @param a0 The name of the 1st argument. + * @param a1 The name of the 2nd argument. + * @param a2 The name of the 3rd argument. + */ +# define IEM_CIMPL_CALL_3(a_fn, a0, a1, a2) a_fn(pVCpu, cbInstr, (a0), (a1), (a2)) + + +/** + * For typedef'ing or declaring a C instruction implementation function taking + * four extra arguments. + * + * @param a_Name The name of the type. + * @param a_Type0 The type of the 1st argument + * @param a_Arg0 The name of the 1st argument. + * @param a_Type1 The type of the 2nd argument. + * @param a_Arg1 The name of the 2nd argument. + * @param a_Type2 The type of the 3rd argument. + * @param a_Arg2 The name of the 3rd argument. + * @param a_Type3 The type of the 4th argument. + * @param a_Arg3 The name of the 4th argument. + */ +# define IEM_CIMPL_DECL_TYPE_4(a_Name, a_Type0, a_Arg0, a_Type1, a_Arg1, a_Type2, a_Arg2, a_Type3, a_Arg3) \ + IEM_DECL_IMPL_TYPE(VBOXSTRICTRC, a_Name, (PVMCPUCC pVCpu, uint8_t cbInstr, a_Type0 a_Arg0, a_Type1 a_Arg1, a_Type2 a_Arg2, a_Type3 a_Arg3)) +/** + * For defining a C instruction implementation function taking four extra + * arguments. + * + * @param a_Name The name of the function. + * @param a_Type0 The type of the 1st argument + * @param a_Arg0 The name of the 1st argument. + * @param a_Type1 The type of the 2nd argument. + * @param a_Arg1 The name of the 2nd argument. + * @param a_Type2 The type of the 3rd argument. + * @param a_Arg2 The name of the 3rd argument. + * @param a_Type3 The type of the 4th argument. + * @param a_Arg3 The name of the 4th argument. + */ +# define IEM_CIMPL_DEF_4(a_Name, a_Type0, a_Arg0, a_Type1, a_Arg1, a_Type2, a_Arg2, a_Type3, a_Arg3) \ + IEM_DECL_IMPL_DEF(VBOXSTRICTRC, a_Name, (PVMCPUCC pVCpu, uint8_t cbInstr, a_Type0 a_Arg0, a_Type1 a_Arg1, \ + a_Type2 a_Arg2, a_Type3 a_Arg3)) +/** + * For calling a C instruction implementation function taking four extra + * arguments. + * + * This special call macro adds default arguments to the call and allow us to + * change these later. + * + * @param a_fn The name of the function. + * @param a0 The name of the 1st argument. + * @param a1 The name of the 2nd argument. + * @param a2 The name of the 3rd argument. + * @param a3 The name of the 4th argument. + */ +# define IEM_CIMPL_CALL_4(a_fn, a0, a1, a2, a3) a_fn(pVCpu, cbInstr, (a0), (a1), (a2), (a3)) + + +/** + * For typedef'ing or declaring a C instruction implementation function taking + * five extra arguments. + * + * @param a_Name The name of the type. + * @param a_Type0 The type of the 1st argument + * @param a_Arg0 The name of the 1st argument. + * @param a_Type1 The type of the 2nd argument. + * @param a_Arg1 The name of the 2nd argument. + * @param a_Type2 The type of the 3rd argument. + * @param a_Arg2 The name of the 3rd argument. + * @param a_Type3 The type of the 4th argument. + * @param a_Arg3 The name of the 4th argument. + * @param a_Type4 The type of the 5th argument. + * @param a_Arg4 The name of the 5th argument. + */ +# define IEM_CIMPL_DECL_TYPE_5(a_Name, a_Type0, a_Arg0, a_Type1, a_Arg1, a_Type2, a_Arg2, a_Type3, a_Arg3, a_Type4, a_Arg4) \ + IEM_DECL_IMPL_TYPE(VBOXSTRICTRC, a_Name, (PVMCPUCC pVCpu, uint8_t cbInstr, \ + a_Type0 a_Arg0, a_Type1 a_Arg1, a_Type2 a_Arg2, \ + a_Type3 a_Arg3, a_Type4 a_Arg4)) +/** + * For defining a C instruction implementation function taking five extra + * arguments. + * + * @param a_Name The name of the function. + * @param a_Type0 The type of the 1st argument + * @param a_Arg0 The name of the 1st argument. + * @param a_Type1 The type of the 2nd argument. + * @param a_Arg1 The name of the 2nd argument. + * @param a_Type2 The type of the 3rd argument. + * @param a_Arg2 The name of the 3rd argument. + * @param a_Type3 The type of the 4th argument. + * @param a_Arg3 The name of the 4th argument. + * @param a_Type4 The type of the 5th argument. + * @param a_Arg4 The name of the 5th argument. + */ +# define IEM_CIMPL_DEF_5(a_Name, a_Type0, a_Arg0, a_Type1, a_Arg1, a_Type2, a_Arg2, a_Type3, a_Arg3, a_Type4, a_Arg4) \ + IEM_DECL_IMPL_DEF(VBOXSTRICTRC, a_Name, (PVMCPUCC pVCpu, uint8_t cbInstr, \ + a_Type0 a_Arg0, a_Type1 a_Arg1, a_Type2 a_Arg2, \ + a_Type3 a_Arg3, a_Type4 a_Arg4)) +/** + * For calling a C instruction implementation function taking five extra + * arguments. + * + * This special call macro adds default arguments to the call and allow us to + * change these later. + * + * @param a_fn The name of the function. + * @param a0 The name of the 1st argument. + * @param a1 The name of the 2nd argument. + * @param a2 The name of the 3rd argument. + * @param a3 The name of the 4th argument. + * @param a4 The name of the 5th argument. + */ +# define IEM_CIMPL_CALL_5(a_fn, a0, a1, a2, a3, a4) a_fn(pVCpu, cbInstr, (a0), (a1), (a2), (a3), (a4)) + +/** @} */ + + +/** @} */ + +RT_C_DECLS_END + +#endif /* !VMM_INCLUDED_SRC_include_IEMInternal_h */ + diff --git a/src/VBox/VMM/include/IOMInline.h b/src/VBox/VMM/include/IOMInline.h new file mode 100644 index 00000000..a0a9cd6e --- /dev/null +++ b/src/VBox/VMM/include/IOMInline.h @@ -0,0 +1,260 @@ +/* $Id: IOMInline.h $ */ +/** @file + * IOM - Inlined functions. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_IOMInline_h +#define VMM_INCLUDED_SRC_include_IOMInline_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <iprt/errcore.h> + +/** @addtogroup grp_iom_int Internals + * @internal + * @{ + */ + + +/** + * Gets the I/O port entry for the specified I/O port in the current context. + * + * @returns Pointer to I/O port entry. + * @returns NULL if no port registered. + * + * @param pVM The cross context VM structure. + * @param uPort The I/O port to lookup. + * @param poffPort Where to return the port offset relative to the + * start of the I/O port range. + * @param pidxLastHint Pointer to IOMCPU::idxIoPortLastRead or + * IOMCPU::idxIoPortLastWrite. + * + * @note In ring-0 it is possible to get an uninitialized entry (pDevIns is + * NULL, cPorts is 0), in which case there should be ring-3 handlers + * for the entry. Use IOMIOPORTENTRYR0::idxSelf to get the ring-3 + * entry. + * + * @note This code is almost identical to iomMmioGetEntry, so keep in sync. + */ +DECLINLINE(CTX_SUFF(PIOMIOPORTENTRY)) iomIoPortGetEntry(PVMCC pVM, RTIOPORT uPort, PRTIOPORT poffPort, uint16_t *pidxLastHint) +{ + Assert(IOM_IS_SHARED_LOCK_OWNER(pVM)); + +#ifdef IN_RING0 + uint32_t iEnd = RT_MIN(pVM->iom.s.cIoPortLookupEntries, pVM->iomr0.s.cIoPortAlloc); + PCIOMIOPORTLOOKUPENTRY paLookup = pVM->iomr0.s.paIoPortLookup; +#else + uint32_t iEnd = pVM->iom.s.cIoPortLookupEntries; + PCIOMIOPORTLOOKUPENTRY paLookup = pVM->iom.s.paIoPortLookup; +#endif + if (iEnd > 0) + { + uint32_t iFirst = 0; + uint32_t i = *pidxLastHint; + if (i < iEnd) + { /* likely */ } + else + i = iEnd / 2; + for (;;) + { + PCIOMIOPORTLOOKUPENTRY pCur = &paLookup[i]; + if (pCur->uFirstPort > uPort) + { + if (i > iFirst) + iEnd = i; + else + break; + } + else if (pCur->uLastPort < uPort) + { + i += 1; + if (i < iEnd) + iFirst = i; + else + break; + } + else + { + *pidxLastHint = (uint16_t)i; + *poffPort = uPort - pCur->uFirstPort; + + /* + * Translate the 'idx' member into a pointer. + */ + size_t const idx = pCur->idx; +#ifdef IN_RING0 + AssertMsg(idx < pVM->iom.s.cIoPortRegs && idx < pVM->iomr0.s.cIoPortAlloc, + ("%#zx vs %#x/%x (port %#x)\n", idx, pVM->iom.s.cIoPortRegs, pVM->iomr0.s.cIoPortMax, uPort)); + if (idx < pVM->iomr0.s.cIoPortAlloc) + return &pVM->iomr0.s.paIoPortRegs[idx]; +#else + if (idx < pVM->iom.s.cIoPortRegs) + return &pVM->iom.s.paIoPortRegs[idx]; + AssertMsgFailed(("%#zx vs %#x (port %#x)\n", idx, pVM->iom.s.cIoPortRegs, uPort)); +#endif + break; + } + + i = iFirst + (iEnd - iFirst) / 2; + } + } + *poffPort = 0; + return NULL; +} + + +#ifdef VBOX_WITH_STATISTICS +/** + * Gets the statistics entry for an I/O port. + * + * @returns Pointer to stats. Instead of NULL, a pointer to IoPortDummyStats is + * returned, so the caller does not need to check for NULL. + * + * @param pVM The cross context VM structure. + * @param pRegEntry The I/O port entry to get stats for. + * @param offPort The offset of the port relative to the start of the + * registration entry. + */ +DECLINLINE(PIOMIOPORTSTATSENTRY) iomIoPortGetStats(PVMCC pVM, CTX_SUFF(PIOMIOPORTENTRY) pRegEntry, uint16_t offPort) +{ + size_t idxStats = pRegEntry->idxStats; + idxStats += offPort; +# ifdef IN_RING0 + if (idxStats < pVM->iomr0.s.cIoPortStatsAllocation) + return &pVM->iomr0.s.paIoPortStats[idxStats]; +# else + if (idxStats < pVM->iom.s.cIoPortStats) + return &pVM->iom.s.paIoPortStats[idxStats]; +# endif + return &pVM->iom.s.IoPortDummyStats; +} +#endif + + +/** + * Gets the MMIO region entry for the specified address in the current context. + * + * @returns Pointer to MMIO region entry. + * @returns NULL if no MMIO region registered for the given address. + * + * @param pVM The cross context VM structure. + * @param GCPhys The address to lookup. + * @param poffRegion Where to return the byte offset into the MMIO + * region that corresponds to @a GCPhys. + * @param pidxLastHint Pointer to IOMCPU::idxMmioLastRead, + * IOMCPU::idxMmioLastWrite, or similar. + * + * @note In ring-0 it is possible to get an uninitialized entry (pDevIns is + * NULL, cbRegion is 0), in which case there should be ring-3 handlers + * for the entry. Use IOMMMIOENTRYR0::idxSelf to get the ring-3 entry. + * + * @note This code is almost identical to iomIoPortGetEntry, so keep in sync. + */ +DECLINLINE(CTX_SUFF(PIOMMMIOENTRY)) iomMmioGetEntry(PVMCC pVM, RTGCPHYS GCPhys, PRTGCPHYS poffRegion, uint16_t *pidxLastHint) +{ + Assert(IOM_IS_SHARED_LOCK_OWNER(pVM)); + +#ifdef IN_RING0 + uint32_t iEnd = RT_MIN(pVM->iom.s.cMmioLookupEntries, pVM->iomr0.s.cMmioAlloc); + PCIOMMMIOLOOKUPENTRY paLookup = pVM->iomr0.s.paMmioLookup; +#else + uint32_t iEnd = pVM->iom.s.cMmioLookupEntries; + PCIOMMMIOLOOKUPENTRY paLookup = pVM->iom.s.paMmioLookup; +#endif + if (iEnd > 0) + { + uint32_t iFirst = 0; + uint32_t i = *pidxLastHint; + if (i < iEnd) + { /* likely */ } + else + i = iEnd / 2; + for (;;) + { + PCIOMMMIOLOOKUPENTRY pCur = &paLookup[i]; + if (pCur->GCPhysFirst > GCPhys) + { + if (i > iFirst) + iEnd = i; + else + break; + } + else if (pCur->GCPhysLast < GCPhys) + { + i += 1; + if (i < iEnd) + iFirst = i; + else + break; + } + else + { + *pidxLastHint = (uint16_t)i; + *poffRegion = GCPhys - pCur->GCPhysFirst; + + /* + * Translate the 'idx' member into a pointer. + */ + size_t const idx = pCur->idx; +#ifdef IN_RING0 + AssertMsg(idx < pVM->iom.s.cMmioRegs && idx < pVM->iomr0.s.cMmioAlloc, + ("%#zx vs %#x/%x (GCPhys=%RGp)\n", idx, pVM->iom.s.cMmioRegs, pVM->iomr0.s.cMmioMax, GCPhys)); + if (idx < pVM->iomr0.s.cMmioAlloc) + return &pVM->iomr0.s.paMmioRegs[idx]; +#else + if (idx < pVM->iom.s.cMmioRegs) + return &pVM->iom.s.paMmioRegs[idx]; + AssertMsgFailed(("%#zx vs %#x (GCPhys=%RGp)\n", idx, pVM->iom.s.cMmioRegs, GCPhys)); +#endif + break; + } + + i = iFirst + (iEnd - iFirst) / 2; + } + } + *poffRegion = 0; + return NULL; +} + + +#ifdef VBOX_WITH_STATISTICS +/** + * Gets the statistics entry for an MMIO region. + * + * @returns Pointer to stats. Instead of NULL, a pointer to MmioDummyStats is + * returned, so the caller does not need to check for NULL. + * + * @param pVM The cross context VM structure. + * @param pRegEntry The I/O port entry to get stats for. + */ +DECLINLINE(PIOMMMIOSTATSENTRY) iomMmioGetStats(PVMCC pVM, CTX_SUFF(PIOMMMIOENTRY) pRegEntry) +{ + size_t idxStats = pRegEntry->idxStats; +# ifdef IN_RING0 + if (idxStats < pVM->iomr0.s.cMmioStatsAllocation) + return &pVM->iomr0.s.paMmioStats[idxStats]; +# else + if (idxStats < pVM->iom.s.cMmioStats) + return &pVM->iom.s.paMmioStats[idxStats]; +# endif + return &pVM->iom.s.MmioDummyStats; +} +#endif + +/** @} */ + +#endif /* !VMM_INCLUDED_SRC_include_IOMInline_h */ + diff --git a/src/VBox/VMM/include/IOMInternal.h b/src/VBox/VMM/include/IOMInternal.h new file mode 100644 index 00000000..6154a9de --- /dev/null +++ b/src/VBox/VMM/include/IOMInternal.h @@ -0,0 +1,610 @@ +/* $Id: IOMInternal.h $ */ +/** @file + * IOM - Internal header file. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_IOMInternal_h +#define VMM_INCLUDED_SRC_include_IOMInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#define IOM_WITH_CRIT_SECT_RW + +#include <VBox/cdefs.h> +#include <VBox/types.h> +#include <VBox/vmm/iom.h> +#include <VBox/vmm/stam.h> +#include <VBox/vmm/pgm.h> +#include <VBox/vmm/pdmcritsect.h> +#ifdef IOM_WITH_CRIT_SECT_RW +# include <VBox/vmm/pdmcritsectrw.h> +#endif +#include <VBox/param.h> +#include <iprt/assert.h> +#include <iprt/avl.h> + + + +/** @defgroup grp_iom_int Internals + * @ingroup grp_iom + * @internal + * @{ + */ + +/** + * I/O port lookup table entry. + */ +typedef struct IOMIOPORTLOOKUPENTRY +{ + /** The first port in the range. */ + RTIOPORT uFirstPort; + /** The last port in the range (inclusive). */ + RTIOPORT uLastPort; + /** The registration handle/index. */ + uint16_t idx; +} IOMIOPORTLOOKUPENTRY; +/** Pointer to an I/O port lookup table entry. */ +typedef IOMIOPORTLOOKUPENTRY *PIOMIOPORTLOOKUPENTRY; +/** Pointer to a const I/O port lookup table entry. */ +typedef IOMIOPORTLOOKUPENTRY const *PCIOMIOPORTLOOKUPENTRY; + +/** + * Ring-0 I/O port handle table entry. + */ +typedef struct IOMIOPORTENTRYR0 +{ + /** Pointer to user argument. */ + RTR0PTR pvUser; + /** Pointer to the associated device instance, NULL if entry not used. */ + R0PTRTYPE(PPDMDEVINS) pDevIns; + /** Pointer to OUT callback function. */ + R0PTRTYPE(PFNIOMIOPORTNEWOUT) pfnOutCallback; + /** Pointer to IN callback function. */ + R0PTRTYPE(PFNIOMIOPORTNEWIN) pfnInCallback; + /** Pointer to string OUT callback function. */ + R0PTRTYPE(PFNIOMIOPORTNEWOUTSTRING) pfnOutStrCallback; + /** Pointer to string IN callback function. */ + R0PTRTYPE(PFNIOMIOPORTNEWINSTRING) pfnInStrCallback; + /** The entry of the first statistics entry, UINT16_MAX if no stats. */ + uint16_t idxStats; + /** The number of ports covered by this entry, 0 if entry not used. */ + RTIOPORT cPorts; + /** Same as the handle index. */ + uint16_t idxSelf; + /** IOM_IOPORT_F_XXX (copied from ring-3). */ + uint16_t fFlags; +} IOMIOPORTENTRYR0; +/** Pointer to a ring-0 I/O port handle table entry. */ +typedef IOMIOPORTENTRYR0 *PIOMIOPORTENTRYR0; +/** Pointer to a const ring-0 I/O port handle table entry. */ +typedef IOMIOPORTENTRYR0 const *PCIOMIOPORTENTRYR0; + +/** + * Ring-3 I/O port handle table entry. + */ +typedef struct IOMIOPORTENTRYR3 +{ + /** Pointer to user argument. */ + RTR3PTR pvUser; + /** Pointer to the associated device instance. */ + R3PTRTYPE(PPDMDEVINS) pDevIns; + /** Pointer to OUT callback function. */ + R3PTRTYPE(PFNIOMIOPORTNEWOUT) pfnOutCallback; + /** Pointer to IN callback function. */ + R3PTRTYPE(PFNIOMIOPORTNEWIN) pfnInCallback; + /** Pointer to string OUT callback function. */ + R3PTRTYPE(PFNIOMIOPORTNEWOUTSTRING) pfnOutStrCallback; + /** Pointer to string IN callback function. */ + R3PTRTYPE(PFNIOMIOPORTNEWINSTRING) pfnInStrCallback; + /** Description / Name. For easing debugging. */ + R3PTRTYPE(const char *) pszDesc; + /** Extended port description table, optional. */ + R3PTRTYPE(PCIOMIOPORTDESC) paExtDescs; + /** PCI device the registration is associated with. */ + R3PTRTYPE(PPDMPCIDEV) pPciDev; + /** The PCI device region (high 16-bit word) and subregion (low word), + * UINT32_MAX if not applicable. */ + uint32_t iPciRegion; + /** The number of ports covered by this entry. */ + RTIOPORT cPorts; + /** The current port mapping (duplicates lookup table). */ + RTIOPORT uPort; + /** The entry of the first statistics entry, UINT16_MAX if no stats. */ + uint16_t idxStats; + /** Set if mapped, clear if not. + * Only updated when critsect is held exclusively. */ + bool fMapped; + /** Set if there is an ring-0 entry too. */ + bool fRing0; + /** Set if there is an raw-mode entry too. */ + bool fRawMode; + /** IOM_IOPORT_F_XXX */ + uint8_t fFlags; + /** Same as the handle index. */ + uint16_t idxSelf; +} IOMIOPORTENTRYR3; +AssertCompileSize(IOMIOPORTENTRYR3, 9 * sizeof(RTR3PTR) + 16); +/** Pointer to a ring-3 I/O port handle table entry. */ +typedef IOMIOPORTENTRYR3 *PIOMIOPORTENTRYR3; +/** Pointer to a const ring-3 I/O port handle table entry. */ +typedef IOMIOPORTENTRYR3 const *PCIOMIOPORTENTRYR3; + +/** + * I/O port statistics entry (one I/O port). + */ +typedef struct IOMIOPORTSTATSENTRY +{ + /** All accesses (only updated for the first port in a range). */ + STAMCOUNTER Total; + + /** Number of INs to this port from R3. */ + STAMCOUNTER InR3; + /** Profiling IN handler overhead in R3. */ + STAMPROFILE ProfInR3; + /** Number of OUTs to this port from R3. */ + STAMCOUNTER OutR3; + /** Profiling OUT handler overhead in R3. */ + STAMPROFILE ProfOutR3; + + /** Number of INs to this port from R0/RC. */ + STAMCOUNTER InRZ; + /** Profiling IN handler overhead in R0/RC. */ + STAMPROFILE ProfInRZ; + /** Number of INs to this port from R0/RC which was serviced in R3. */ + STAMCOUNTER InRZToR3; + + /** Number of OUTs to this port from R0/RC. */ + STAMCOUNTER OutRZ; + /** Profiling OUT handler overhead in R0/RC. */ + STAMPROFILE ProfOutRZ; + /** Number of OUTs to this port from R0/RC which was serviced in R3. */ + STAMCOUNTER OutRZToR3; +} IOMIOPORTSTATSENTRY; +/** Pointer to I/O port statistics entry. */ +typedef IOMIOPORTSTATSENTRY *PIOMIOPORTSTATSENTRY; + + + +/** + * MMIO lookup table entry. + */ +typedef struct IOMMMIOLOOKUPENTRY +{ + /** The first port in the range. */ + RTGCPHYS GCPhysFirst; + /** The last port in the range (inclusive). */ + RTGCPHYS GCPhysLast; + /** The registration handle/index. + * @todo bake this into the lower/upper bits of GCPhysFirst & GCPhysLast. */ + uint16_t idx; + uint16_t abPadding[3]; +} IOMMMIOLOOKUPENTRY; +/** Pointer to an MMIO lookup table entry. */ +typedef IOMMMIOLOOKUPENTRY *PIOMMMIOLOOKUPENTRY; +/** Pointer to a const MMIO lookup table entry. */ +typedef IOMMMIOLOOKUPENTRY const *PCIOMMMIOLOOKUPENTRY; + +/** + * Ring-0 MMIO handle table entry. + */ +typedef struct IOMMMIOENTRYR0 +{ + /** The number of bytes covered by this entry, 0 if entry not used. */ + RTGCPHYS cbRegion; + /** Pointer to user argument. */ + RTR0PTR pvUser; + /** Pointer to the associated device instance, NULL if entry not used. */ + R0PTRTYPE(PPDMDEVINS) pDevIns; + /** Pointer to the write callback function. */ + R0PTRTYPE(PFNIOMMMIONEWWRITE) pfnWriteCallback; + /** Pointer to the read callback function. */ + R0PTRTYPE(PFNIOMMMIONEWREAD) pfnReadCallback; + /** Pointer to the fill callback function. */ + R0PTRTYPE(PFNIOMMMIONEWFILL) pfnFillCallback; + /** The entry of the first statistics entry, UINT16_MAX if no stats. + * @note For simplicity, this is always copied from ring-3 for all entries at + * the end of VM creation. */ + uint16_t idxStats; + /** Same as the handle index. */ + uint16_t idxSelf; + /** IOM_MMIO_F_XXX (copied from ring-3). */ + uint32_t fFlags; +} IOMMMIOENTRYR0; +/** Pointer to a ring-0 MMIO handle table entry. */ +typedef IOMMMIOENTRYR0 *PIOMMMIOENTRYR0; +/** Pointer to a const ring-0 MMIO handle table entry. */ +typedef IOMMMIOENTRYR0 const *PCIOMMMIOENTRYR0; + +/** + * Ring-3 MMIO handle table entry. + */ +typedef struct IOMMMIOENTRYR3 +{ + /** The number of bytes covered by this entry. */ + RTGCPHYS cbRegion; + /** The current mapping address (duplicates lookup table). + * This is set to NIL_RTGCPHYS if not mapped (exclusive lock + atomic). */ + RTGCPHYS volatile GCPhysMapping; + /** Pointer to user argument. */ + RTR3PTR pvUser; + /** Pointer to the associated device instance. */ + R3PTRTYPE(PPDMDEVINS) pDevIns; + /** Pointer to the write callback function. */ + R3PTRTYPE(PFNIOMMMIONEWWRITE) pfnWriteCallback; + /** Pointer to the read callback function. */ + R3PTRTYPE(PFNIOMMMIONEWREAD) pfnReadCallback; + /** Pointer to the fill callback function. */ + R3PTRTYPE(PFNIOMMMIONEWFILL) pfnFillCallback; + /** Description / Name. For easing debugging. */ + R3PTRTYPE(const char *) pszDesc; + /** PCI device the registration is associated with. */ + R3PTRTYPE(PPDMPCIDEV) pPciDev; + /** The PCI device region (high 16-bit word) and subregion (low word), + * UINT32_MAX if not applicable. */ + uint32_t iPciRegion; + /** IOM_MMIO_F_XXX */ + uint32_t fFlags; + /** The entry of the first statistics entry, UINT16_MAX if no stats. */ + uint16_t idxStats; + /** Set if mapped, clear if not. + * Only updated when critsect is held exclusively. + * @todo remove as GCPhysMapping != NIL_RTGCPHYS serves the same purpose. */ + bool volatile fMapped; + /** Set if there is an ring-0 entry too. */ + bool fRing0; + /** Set if there is an raw-mode entry too. */ + bool fRawMode; + uint8_t bPadding; + /** Same as the handle index. */ + uint16_t idxSelf; +} IOMMMIOENTRYR3; +AssertCompileSize(IOMMMIOENTRYR3, sizeof(RTGCPHYS) * 2 + 7 * sizeof(RTR3PTR) + 16); +/** Pointer to a ring-3 MMIO handle table entry. */ +typedef IOMMMIOENTRYR3 *PIOMMMIOENTRYR3; +/** Pointer to a const ring-3 MMIO handle table entry. */ +typedef IOMMMIOENTRYR3 const *PCIOMMMIOENTRYR3; + +/** + * MMIO statistics entry (one MMIO). + */ +typedef struct IOMMMIOSTATSENTRY +{ + /** Counting and profiling reads in R0/RC. */ + STAMPROFILE ProfReadRZ; + /** Number of successful read accesses. */ + STAMCOUNTER Reads; + /** Number of reads to this address from R0/RC which was serviced in R3. */ + STAMCOUNTER ReadRZToR3; + /** Number of complicated reads. */ + STAMCOUNTER ComplicatedReads; + /** Number of reads of 0xff or 0x00. */ + STAMCOUNTER FFor00Reads; + /** Profiling read handler overhead in R3. */ + STAMPROFILE ProfReadR3; + + /** Counting and profiling writes in R0/RC. */ + STAMPROFILE ProfWriteRZ; + /** Number of successful read accesses. */ + STAMCOUNTER Writes; + /** Number of writes to this address from R0/RC which was serviced in R3. */ + STAMCOUNTER WriteRZToR3; + /** Number of writes to this address from R0/RC which was committed in R3. */ + STAMCOUNTER CommitRZToR3; + /** Number of complicated writes. */ + STAMCOUNTER ComplicatedWrites; + /** Profiling write handler overhead in R3. */ + STAMPROFILE ProfWriteR3; +} IOMMMIOSTATSENTRY; +/** Pointer to MMIO statistics entry. */ +typedef IOMMMIOSTATSENTRY *PIOMMMIOSTATSENTRY; + + +/** + * IOM per virtual CPU instance data. + */ +typedef struct IOMCPU +{ + /** For saving stack space, the disassembler state is allocated here instead of + * on the stack. */ + DISCPUSTATE DisState; + + /** + * Pending I/O port write commit (VINF_IOM_R3_IOPORT_COMMIT_WRITE). + * + * This is a converted VINF_IOM_R3_IOPORT_WRITE handler return that lets the + * execution engine commit the instruction and then return to ring-3 to complete + * the I/O port write there. This avoids having to decode the instruction again + * in ring-3. + */ + struct + { + /** The value size (0 if not pending). */ + uint16_t cbValue; + /** The I/O port. */ + RTIOPORT IOPort; + /** The value. */ + uint32_t u32Value; + } PendingIOPortWrite; + + /** + * Pending MMIO write commit (VINF_IOM_R3_MMIO_COMMIT_WRITE). + * + * This is a converted VINF_IOM_R3_MMIO_WRITE handler return that lets the + * execution engine commit the instruction, stop any more REPs, and return to + * ring-3 to complete the MMIO write there. The avoid the tedious decoding of + * the instruction again once we're in ring-3, more importantly it allows us to + * correctly deal with read-modify-write instructions like XCHG, OR, and XOR. + */ + struct + { + /** Guest physical MMIO address. */ + RTGCPHYS GCPhys; + /** The number of bytes to write (0 if nothing pending). */ + uint32_t cbValue; + /** Hint. */ + uint32_t idxMmioRegionHint; + /** The value to write. */ + uint8_t abValue[128]; + } PendingMmioWrite; + + /** @name Caching of I/O Port and MMIO ranges and statistics. + * (Saves quite some time in rep outs/ins instruction emulation.) + * @{ */ + /** I/O port registration index for the last read operation. */ + uint16_t idxIoPortLastRead; + /** I/O port registration index for the last write operation. */ + uint16_t idxIoPortLastWrite; + /** I/O port registration index for the last read string operation. */ + uint16_t idxIoPortLastReadStr; + /** I/O port registration index for the last write string operation. */ + uint16_t idxIoPortLastWriteStr; + + /** MMIO port registration index for the last IOMR3MmioPhysHandler call. + * @note pretty static as only used by APIC on AMD-V. */ + uint16_t idxMmioLastPhysHandler; + uint16_t au16Padding[3]; + /** @} */ +} IOMCPU; +/** Pointer to IOM per virtual CPU instance data. */ +typedef IOMCPU *PIOMCPU; + + +/** + * IOM Data (part of VM) + */ +typedef struct IOM +{ + /** @name I/O ports + * @note The updating of these variables is done exclusively from EMT(0). + * @{ */ + /** Number of I/O port registrations. */ + uint32_t cIoPortRegs; + /** The size of the paIoPortRegs allocation (in entries). */ + uint32_t cIoPortAlloc; + /** I/O port registration table for ring-3. + * There is a parallel table in ring-0, IOMR0PERVM::paIoPortRegs. */ + R3PTRTYPE(PIOMIOPORTENTRYR3) paIoPortRegs; + /** Number of entries in the lookup table. */ + uint32_t cIoPortLookupEntries; + uint32_t u32Padding1; + /** I/O port lookup table. */ + R3PTRTYPE(PIOMIOPORTLOOKUPENTRY) paIoPortLookup; + + /** The number of valid entries in paioPortStats. */ + uint32_t cIoPortStats; + /** The size of the paIoPortStats allocation (in entries). */ + uint32_t cIoPortStatsAllocation; + /** I/O port lookup table. */ + R3PTRTYPE(PIOMIOPORTSTATSENTRY) paIoPortStats; + /** Dummy stats entry so we don't need to check for NULL pointers so much. */ + IOMIOPORTSTATSENTRY IoPortDummyStats; + /** @} */ + + /** @name MMIO ports + * @note The updating of these variables is done exclusively from EMT(0). + * @{ */ + /** MMIO physical access handler type, new style. */ + PGMPHYSHANDLERTYPE hNewMmioHandlerType; + /** Number of MMIO registrations. */ + uint32_t cMmioRegs; + /** The size of the paMmioRegs allocation (in entries). */ + uint32_t cMmioAlloc; + /** MMIO registration table for ring-3. + * There is a parallel table in ring-0, IOMR0PERVM::paMmioRegs. */ + R3PTRTYPE(PIOMMMIOENTRYR3) paMmioRegs; + /** Number of entries in the lookup table. */ + uint32_t cMmioLookupEntries; + uint32_t u32Padding2; + /** MMIO lookup table. */ + R3PTRTYPE(PIOMMMIOLOOKUPENTRY) paMmioLookup; + + /** The number of valid entries in paioPortStats. */ + uint32_t cMmioStats; + /** The size of the paMmioStats allocation (in entries). */ + uint32_t cMmioStatsAllocation; + /** MMIO lookup table. */ + R3PTRTYPE(PIOMMMIOSTATSENTRY) paMmioStats; + /** Dummy stats entry so we don't need to check for NULL pointers so much. */ + IOMMMIOSTATSENTRY MmioDummyStats; + /** @} */ + + + /** Lock serializing EMT access to IOM. */ +#ifdef IOM_WITH_CRIT_SECT_RW + PDMCRITSECTRW CritSect; +#else + PDMCRITSECT CritSect; +#endif + + /** @name I/O Port statistics. + * @{ */ + STAMCOUNTER StatIoPortIn; + STAMCOUNTER StatIoPortOut; + STAMCOUNTER StatIoPortInS; + STAMCOUNTER StatIoPortOutS; + STAMCOUNTER StatIoPortCommits; + /** @} */ + + /** @name MMIO statistics. + * @{ */ + STAMPROFILE StatMmioPfHandler; + STAMPROFILE StatMmioPhysHandler; + STAMCOUNTER StatMmioHandlerR3; + STAMCOUNTER StatMmioHandlerR0; + STAMCOUNTER StatMmioReadsR0ToR3; + STAMCOUNTER StatMmioWritesR0ToR3; + STAMCOUNTER StatMmioCommitsR0ToR3; + STAMCOUNTER StatMmioCommitsDirect; + STAMCOUNTER StatMmioCommitsPgm; + STAMCOUNTER StatMmioStaleMappings; + STAMCOUNTER StatMmioDevLockContentionR0; + /** @} */ +} IOM; +/** Pointer to IOM instance data. */ +typedef IOM *PIOM; + + +/** + * IOM data kept in the ring-0 GVM. + */ +typedef struct IOMR0PERVM +{ + /** @name I/O ports + * @{ */ + /** The higest ring-0 I/O port registration plus one. */ + uint32_t cIoPortMax; + /** The size of the paIoPortRegs allocation (in entries). */ + uint32_t cIoPortAlloc; + /** I/O port registration table for ring-0. + * There is a parallel table for ring-3, paIoPortRing3Regs. */ + R0PTRTYPE(PIOMIOPORTENTRYR0) paIoPortRegs; + /** I/O port lookup table. */ + R0PTRTYPE(PIOMIOPORTLOOKUPENTRY) paIoPortLookup; + /** I/O port registration table for ring-3. + * Also mapped to ring-3 as IOM::paIoPortRegs. */ + R0PTRTYPE(PIOMIOPORTENTRYR3) paIoPortRing3Regs; + /** Handle to the allocation backing both the ring-0 and ring-3 registration + * tables as well as the lookup table. */ + RTR0MEMOBJ hIoPortMemObj; + /** Handle to the ring-3 mapping of the lookup and ring-3 registration table. */ + RTR0MEMOBJ hIoPortMapObj; +#ifdef VBOX_WITH_STATISTICS + /** The size of the paIoPortStats allocation (in entries). */ + uint32_t cIoPortStatsAllocation; + /** Prevents paIoPortStats from growing, set by IOMR0IoPortSyncStatisticsIndices(). */ + bool fIoPortStatsFrozen; + /** I/O port lookup table. */ + R0PTRTYPE(PIOMIOPORTSTATSENTRY) paIoPortStats; + /** Handle to the allocation backing the I/O port statistics. */ + RTR0MEMOBJ hIoPortStatsMemObj; + /** Handle to the ring-3 mapping of the I/O port statistics. */ + RTR0MEMOBJ hIoPortStatsMapObj; +#endif + /** @} */ + + /** @name MMIO + * @{ */ + /** The higest ring-0 MMIO registration plus one. */ + uint32_t cMmioMax; + /** The size of the paMmioRegs allocation (in entries). */ + uint32_t cMmioAlloc; + /** MMIO registration table for ring-0. + * There is a parallel table for ring-3, paMmioRing3Regs. */ + R0PTRTYPE(PIOMMMIOENTRYR0) paMmioRegs; + /** MMIO lookup table. */ + R0PTRTYPE(PIOMMMIOLOOKUPENTRY) paMmioLookup; + /** MMIO registration table for ring-3. + * Also mapped to ring-3 as IOM::paMmioRegs. */ + R0PTRTYPE(PIOMMMIOENTRYR3) paMmioRing3Regs; + /** Handle to the allocation backing both the ring-0 and ring-3 registration + * tables as well as the lookup table. */ + RTR0MEMOBJ hMmioMemObj; + /** Handle to the ring-3 mapping of the lookup and ring-3 registration table. */ + RTR0MEMOBJ hMmioMapObj; +#ifdef VBOX_WITH_STATISTICS + /** The size of the paMmioStats allocation (in entries). */ + uint32_t cMmioStatsAllocation; + /* Prevents paMmioStats from growing, set by IOMR0MmioSyncStatisticsIndices(). */ + bool fMmioStatsFrozen; + /** MMIO lookup table. */ + R0PTRTYPE(PIOMMMIOSTATSENTRY) paMmioStats; + /** Handle to the allocation backing the MMIO statistics. */ + RTR0MEMOBJ hMmioStatsMemObj; + /** Handle to the ring-3 mapping of the MMIO statistics. */ + RTR0MEMOBJ hMmioStatsMapObj; +#endif + /** @} */ + +} IOMR0PERVM; + + +RT_C_DECLS_BEGIN + +#ifdef IN_RING3 +DECLCALLBACK(void) iomR3IoPortInfo(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); +void iomR3IoPortRegStats(PVM pVM, PIOMIOPORTENTRYR3 pRegEntry); +DECLCALLBACK(void) iomR3MmioInfo(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); +void iomR3MmioRegStats(PVM pVM, PIOMMMIOENTRYR3 pRegEntry); +VBOXSTRICTRC iomR3MmioCommitWorker(PVM pVM, PVMCPU pVCpu, PIOMMMIOENTRYR3 pRegEntry, RTGCPHYS offRegion); /* IOMAllMmioNew.cpp */ +#endif /* IN_RING3 */ +#ifdef IN_RING0 +void iomR0IoPortCleanupVM(PGVM pGVM); +void iomR0IoPortInitPerVMData(PGVM pGVM); +void iomR0MmioCleanupVM(PGVM pGVM); +void iomR0MmioInitPerVMData(PGVM pGVM); +#endif + +#ifndef IN_RING3 +DECLEXPORT(FNPGMRZPHYSPFHANDLER) iomMmioPfHandlerNew; +#endif +PGM_ALL_CB2_PROTO(FNPGMPHYSHANDLER) iomMmioHandlerNew; + +/* IOM locking helpers. */ +#ifdef IOM_WITH_CRIT_SECT_RW +# define IOM_LOCK_EXCL(a_pVM) PDMCritSectRwEnterExcl(&(a_pVM)->iom.s.CritSect, VERR_SEM_BUSY) +# define IOM_UNLOCK_EXCL(a_pVM) do { PDMCritSectRwLeaveExcl(&(a_pVM)->iom.s.CritSect); } while (0) +# if 0 /* (in case needed for debugging) */ +# define IOM_LOCK_SHARED_EX(a_pVM, a_rcBusy) PDMCritSectRwEnterExcl(&(a_pVM)->iom.s.CritSect, (a_rcBusy)) +# define IOM_UNLOCK_SHARED(a_pVM) do { PDMCritSectRwLeaveExcl(&(a_pVM)->iom.s.CritSect); } while (0) +# define IOM_IS_SHARED_LOCK_OWNER(a_pVM) PDMCritSectRwIsWriteOwner(&(a_pVM)->iom.s.CritSect) +# else +# define IOM_LOCK_SHARED_EX(a_pVM, a_rcBusy) PDMCritSectRwEnterShared(&(a_pVM)->iom.s.CritSect, (a_rcBusy)) +# define IOM_UNLOCK_SHARED(a_pVM) do { PDMCritSectRwLeaveShared(&(a_pVM)->iom.s.CritSect); } while (0) +# define IOM_IS_SHARED_LOCK_OWNER(a_pVM) PDMCritSectRwIsReadOwner(&(a_pVM)->iom.s.CritSect, true) +# endif +# define IOM_IS_EXCL_LOCK_OWNER(a_pVM) PDMCritSectRwIsWriteOwner(&(a_pVM)->iom.s.CritSect) +#else +# define IOM_LOCK_EXCL(a_pVM) PDMCritSectEnter(&(a_pVM)->iom.s.CritSect, VERR_SEM_BUSY) +# define IOM_UNLOCK_EXCL(a_pVM) do { PDMCritSectLeave(&(a_pVM)->iom.s.CritSect); } while (0) +# define IOM_LOCK_SHARED_EX(a_pVM, a_rcBusy) PDMCritSectEnter(&(a_pVM)->iom.s.CritSect, (a_rcBusy)) +# define IOM_UNLOCK_SHARED(a_pVM) do { PDMCritSectLeave(&(a_pVM)->iom.s.CritSect); } while (0) +# define IOM_IS_SHARED_LOCK_OWNER(a_pVM) PDMCritSectIsOwner(&(a_pVM)->iom.s.CritSect) +# define IOM_IS_EXCL_LOCK_OWNER(a_pVM) PDMCritSectIsOwner(&(a_pVM)->iom.s.CritSect) +#endif +#define IOM_LOCK_SHARED(a_pVM) IOM_LOCK_SHARED_EX(a_pVM, VERR_SEM_BUSY) + + +RT_C_DECLS_END + + +#ifdef IN_RING3 + +#endif + +/** @} */ + +#endif /* !VMM_INCLUDED_SRC_include_IOMInternal_h */ + diff --git a/src/VBox/VMM/include/MMInternal.h b/src/VBox/VMM/include/MMInternal.h new file mode 100644 index 00000000..e29cb156 --- /dev/null +++ b/src/VBox/VMM/include/MMInternal.h @@ -0,0 +1,661 @@ +/* $Id: MMInternal.h $ */ +/** @file + * MM - Internal header file. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_MMInternal_h +#define VMM_INCLUDED_SRC_include_MMInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <VBox/cdefs.h> +#include <VBox/types.h> +#include <VBox/sup.h> +#include <VBox/vmm/stam.h> +#include <VBox/vmm/pdmcritsect.h> +#include <iprt/assert.h> +#include <iprt/avl.h> +#include <iprt/critsect.h> + + + +/** @defgroup grp_mm_int Internals + * @ingroup grp_mm + * @internal + * @{ + */ + + +/** @name MMR3Heap - VM Ring-3 Heap Internals + * @{ + */ + +/** @def MMR3HEAP_SIZE_ALIGNMENT + * The allocation size alignment of the MMR3Heap. + */ +#define MMR3HEAP_SIZE_ALIGNMENT 16 + +/** @def MMR3HEAP_WITH_STATISTICS + * Enable MMR3Heap statistics. + */ +#if !defined(MMR3HEAP_WITH_STATISTICS) && defined(VBOX_WITH_STATISTICS) +# define MMR3HEAP_WITH_STATISTICS +#endif + +/** + * Heap statistics record. + * There is one global and one per allocation tag. + */ +typedef struct MMHEAPSTAT +{ + /** Core avl node, key is the tag. */ + AVLULNODECORE Core; + /** Pointer to the heap the memory belongs to. */ + struct MMHEAP *pHeap; +#ifdef MMR3HEAP_WITH_STATISTICS + /** Number of bytes currently allocated. */ + size_t cbCurAllocated; + /** Number of allocation. */ + uint64_t cAllocations; + /** Number of reallocations. */ + uint64_t cReallocations; + /** Number of frees. */ + uint64_t cFrees; + /** Failures. */ + uint64_t cFailures; + /** Number of bytes allocated (sum). */ + uint64_t cbAllocated; + /** Number of bytes freed. */ + uint64_t cbFreed; +#endif +} MMHEAPSTAT; +#if defined(MMR3HEAP_WITH_STATISTICS) && defined(IN_RING3) +AssertCompileMemberAlignment(MMHEAPSTAT, cAllocations, 8); +AssertCompileSizeAlignment(MMHEAPSTAT, 8); +#endif +/** Pointer to heap statistics record. */ +typedef MMHEAPSTAT *PMMHEAPSTAT; + + + + +/** + * Additional heap block header for relating allocations to the VM. + */ +typedef struct MMHEAPHDR +{ + /** Pointer to the next record. */ + struct MMHEAPHDR *pNext; + /** Pointer to the previous record. */ + struct MMHEAPHDR *pPrev; + /** Pointer to the heap statistics record. + * (Where the a PVM can be found.) */ + PMMHEAPSTAT pStat; + /** Size of the allocation (including this header). */ + size_t cbSize; +} MMHEAPHDR; +/** Pointer to MM heap header. */ +typedef MMHEAPHDR *PMMHEAPHDR; + + +/** MM Heap structure. */ +typedef struct MMHEAP +{ + /** Lock protecting the heap. */ + RTCRITSECT Lock; + /** Heap block list head. */ + PMMHEAPHDR pHead; + /** Heap block list tail. */ + PMMHEAPHDR pTail; + /** Heap per tag statistics tree. */ + PAVLULNODECORE pStatTree; + /** The VM handle. */ + PUVM pUVM; + /** Heap global statistics. */ + MMHEAPSTAT Stat; +} MMHEAP; +/** Pointer to MM Heap structure. */ +typedef MMHEAP *PMMHEAP; + +/** @} */ + + +/** @name MMUkHeap - VM User-kernel Heap Internals + * @{ + */ + +/** @def MMUKHEAP_SIZE_ALIGNMENT + * The allocation size alignment of the MMR3UkHeap. + */ +#define MMUKHEAP_SIZE_ALIGNMENT 16 + +/** @def MMUKHEAP_WITH_STATISTICS + * Enable MMUkHeap statistics. + */ +#if !defined(MMUKHEAP_WITH_STATISTICS) && defined(VBOX_WITH_STATISTICS) +# define MMUKHEAP_WITH_STATISTICS +#endif + + +/** + * Heap statistics record. + * There is one global and one per allocation tag. + */ +typedef struct MMUKHEAPSTAT +{ + /** Core avl node, key is the tag. */ + AVLULNODECORE Core; + /** Number of allocation. */ + uint64_t cAllocations; + /** Number of reallocations. */ + uint64_t cReallocations; + /** Number of frees. */ + uint64_t cFrees; + /** Failures. */ + uint64_t cFailures; + /** Number of bytes allocated (sum). */ + uint64_t cbAllocated; + /** Number of bytes freed. */ + uint64_t cbFreed; + /** Number of bytes currently allocated. */ + size_t cbCurAllocated; +} MMUKHEAPSTAT; +#ifdef IN_RING3 +AssertCompileMemberAlignment(MMUKHEAPSTAT, cAllocations, 8); +#endif +/** Pointer to heap statistics record. */ +typedef MMUKHEAPSTAT *PMMUKHEAPSTAT; + +/** + * Sub heap tracking record. + */ +typedef struct MMUKHEAPSUB +{ + /** Pointer to the next sub-heap. */ + struct MMUKHEAPSUB *pNext; + /** The base address of the sub-heap. */ + void *pv; + /** The size of the sub-heap. */ + size_t cb; + /** The handle of the simple block pointer. */ + RTHEAPSIMPLE hSimple; + /** The ring-0 address corresponding to MMUKHEAPSUB::pv. */ + RTR0PTR pvR0; +} MMUKHEAPSUB; +/** Pointer to a sub-heap tracking record. */ +typedef MMUKHEAPSUB *PMMUKHEAPSUB; + + +/** MM User-kernel Heap structure. */ +typedef struct MMUKHEAP +{ + /** Lock protecting the heap. */ + RTCRITSECT Lock; + /** Head of the sub-heap LIFO. */ + PMMUKHEAPSUB pSubHeapHead; + /** Heap per tag statistics tree. */ + PAVLULNODECORE pStatTree; + /** The VM handle. */ + PUVM pUVM; +#if HC_ARCH_BITS == 32 + /** Aligning the statistics on an 8 byte boundary (for uint64_t and STAM). */ + void *pvAlignment; +#endif + /** Heap global statistics. */ + MMUKHEAPSTAT Stat; +} MMUKHEAP; +#ifdef IN_RING3 +AssertCompileMemberAlignment(MMUKHEAP, Stat, 8); +#endif +/** Pointer to MM Heap structure. */ +typedef MMUKHEAP *PMMUKHEAP; + +/** @} */ + + + +/** @name Hypervisor Heap Internals + * @{ + */ + +/** @def MMHYPER_HEAP_FREE_DELAY + * If defined, it indicates the number of frees that should be delayed. + */ +#if defined(DOXYGEN_RUNNING) +# define MMHYPER_HEAP_FREE_DELAY 64 +#endif + +/** @def MMHYPER_HEAP_FREE_POISON + * If defined, it indicates that freed memory should be poisoned + * with the value it has. + */ +#if defined(VBOX_STRICT) || defined(DOXYGEN_RUNNING) +# define MMHYPER_HEAP_FREE_POISON 0xcb +#endif + +/** @def MMHYPER_HEAP_STRICT + * Enables a bunch of assertions in the heap code. */ +#if defined(VBOX_STRICT) || defined(DOXYGEN_RUNNING) +# define MMHYPER_HEAP_STRICT 1 +# if 0 || defined(DOXYGEN_RUNNING) +/** @def MMHYPER_HEAP_STRICT_FENCE + * Enables tail fence. */ +# define MMHYPER_HEAP_STRICT_FENCE +/** @def MMHYPER_HEAP_STRICT_FENCE_SIZE + * The fence size in bytes. */ +# define MMHYPER_HEAP_STRICT_FENCE_SIZE 256 +/** @def MMHYPER_HEAP_STRICT_FENCE_U32 + * The fence filler. */ +# define MMHYPER_HEAP_STRICT_FENCE_U32 UINT32_C(0xdeadbeef) +# endif +#endif + +/** + * Hypervisor heap statistics record. + * There is one global and one per allocation tag. + */ +typedef struct MMHYPERSTAT +{ + /** Core avl node, key is the tag. + * @todo The type is wrong! Get your lazy a$$ over and create that offsetted uint32_t version we need here! */ + AVLOGCPHYSNODECORE Core; + /** Aligning the 64-bit fields on a 64-bit line. */ + uint32_t u32Padding0; + /** Indicator for whether these statistics are registered with STAM or not. */ + bool fRegistered; + /** Number of allocation. */ + uint64_t cAllocations; + /** Number of frees. */ + uint64_t cFrees; + /** Failures. */ + uint64_t cFailures; + /** Number of bytes allocated (sum). */ + uint64_t cbAllocated; + /** Number of bytes freed (sum). */ + uint64_t cbFreed; + /** Number of bytes currently allocated. */ + uint32_t cbCurAllocated; + /** Max number of bytes allocated. */ + uint32_t cbMaxAllocated; +} MMHYPERSTAT; +AssertCompileMemberAlignment(MMHYPERSTAT, cAllocations, 8); +/** Pointer to hypervisor heap statistics record. */ +typedef MMHYPERSTAT *PMMHYPERSTAT; + +/** + * Hypervisor heap chunk. + */ +typedef struct MMHYPERCHUNK +{ + /** Previous block in the list of all blocks. + * This is relative to the start of the heap. */ + uint32_t offNext; + /** Offset to the previous block relative to this one. */ + int32_t offPrev; + /** The statistics record this allocation belongs to (self relative). */ + int32_t offStat; + /** Offset to the heap block (self relative). */ + int32_t offHeap; +} MMHYPERCHUNK; +/** Pointer to a hypervisor heap chunk. */ +typedef MMHYPERCHUNK *PMMHYPERCHUNK; + + +/** + * Hypervisor heap chunk. + */ +typedef struct MMHYPERCHUNKFREE +{ + /** Main list. */ + MMHYPERCHUNK core; + /** Offset of the next chunk in the list of free nodes. */ + uint32_t offNext; + /** Offset of the previous chunk in the list of free nodes. */ + int32_t offPrev; + /** Size of the block. */ + uint32_t cb; +} MMHYPERCHUNKFREE; +/** Pointer to a free hypervisor heap chunk. */ +typedef MMHYPERCHUNKFREE *PMMHYPERCHUNKFREE; + + +/** + * The hypervisor heap. + */ +typedef struct MMHYPERHEAP +{ + /** The typical magic (MMHYPERHEAP_MAGIC). */ + uint32_t u32Magic; + /** The heap size. (This structure is not included!) */ + uint32_t cbHeap; + /** Lock protecting the heap. */ + PDMCRITSECT Lock; + /** The HC ring-3 address of the heap. */ + R3PTRTYPE(uint8_t *) pbHeapR3; + /** The HC ring-3 address of the shared VM structure. */ + PVMR3 pVMR3; + /** The HC ring-0 address of the heap. */ + R0PTRTYPE(uint8_t *) pbHeapR0; + /** The HC ring-0 address of the shared VM structure. */ + PVMR0 pVMR0; + /** The RC address of the heap. */ + RCPTRTYPE(uint8_t *) pbHeapRC; + /** The RC address of the shared VM structure. */ + PVMRC pVMRC; + /** The amount of free memory in the heap. */ + uint32_t cbFree; + /** Offset of the first free chunk in the heap. + * The offset is relative to the start of the heap. */ + uint32_t offFreeHead; + /** Offset of the last free chunk in the heap. + * The offset is relative to the start of the heap. */ + uint32_t offFreeTail; + /** Offset of the first page aligned block in the heap. + * The offset is equal to cbHeap initially. */ + uint32_t offPageAligned; + /** Tree of hypervisor heap statistics. */ + AVLOGCPHYSTREE HyperHeapStatTree; +#ifdef MMHYPER_HEAP_FREE_DELAY + /** Where to insert the next free. */ + uint32_t iDelayedFree; + /** Array of delayed frees. Circular. Offsets relative to this structure. */ + struct + { + /** The free caller address. */ + RTUINTPTR uCaller; + /** The offset of the freed chunk. */ + uint32_t offChunk; + } aDelayedFrees[MMHYPER_HEAP_FREE_DELAY]; +#else + /** Padding the structure to a 64-bit aligned size. */ + uint32_t u32Padding0; +#endif + /** The heap physical pages. */ + R3PTRTYPE(PSUPPAGE) paPages; +#if HC_ARCH_BITS == 32 + /** Padding the structure to a 64-bit aligned size. */ + uint32_t u32Padding1; +#endif +} MMHYPERHEAP; +/** Pointer to the hypervisor heap. */ +typedef MMHYPERHEAP *PMMHYPERHEAP; + +/** Magic value for MMHYPERHEAP. (C. S. Lewis) */ +#define MMHYPERHEAP_MAGIC UINT32_C(0x18981129) + + +/** + * Hypervisor heap minimum alignment (16 bytes). + */ +#define MMHYPER_HEAP_ALIGN_MIN 16 + +/** + * The aligned size of the MMHYPERHEAP structure. + */ +#define MMYPERHEAP_HDR_SIZE RT_ALIGN_Z(sizeof(MMHYPERHEAP), MMHYPER_HEAP_ALIGN_MIN * 4) + +/** @name Hypervisor heap chunk flags. + * The flags are put in the first bits of the MMHYPERCHUNK::offPrev member. + * These bits aren't used anyway because of the chunk minimal alignment (16 bytes). + * @{ */ +/** The chunk is free. (The code ASSUMES this is 0!) */ +#define MMHYPERCHUNK_FLAGS_FREE 0x0 +/** The chunk is in use. */ +#define MMHYPERCHUNK_FLAGS_USED 0x1 +/** The type mask. */ +#define MMHYPERCHUNK_FLAGS_TYPE_MASK 0x1 +/** The flag mask */ +#define MMHYPERCHUNK_FLAGS_MASK 0x1 + +/** Checks if the chunk is free. */ +#define MMHYPERCHUNK_ISFREE(pChunk) ( (((pChunk)->offPrev) & MMHYPERCHUNK_FLAGS_TYPE_MASK) == MMHYPERCHUNK_FLAGS_FREE ) +/** Checks if the chunk is used. */ +#define MMHYPERCHUNK_ISUSED(pChunk) ( (((pChunk)->offPrev) & MMHYPERCHUNK_FLAGS_TYPE_MASK) == MMHYPERCHUNK_FLAGS_USED ) +/** Toggles FREE/USED flag of a chunk. */ +#define MMHYPERCHUNK_SET_TYPE(pChunk, type) do { (pChunk)->offPrev = ((pChunk)->offPrev & ~MMHYPERCHUNK_FLAGS_TYPE_MASK) | ((type) & MMHYPERCHUNK_FLAGS_TYPE_MASK); } while (0) + +/** Gets the prev offset without the flags. */ +#define MMHYPERCHUNK_GET_OFFPREV(pChunk) ((int32_t)((pChunk)->offPrev & ~MMHYPERCHUNK_FLAGS_MASK)) +/** Sets the prev offset without changing the flags. */ +#define MMHYPERCHUNK_SET_OFFPREV(pChunk, off) do { (pChunk)->offPrev = (off) | ((pChunk)->offPrev & MMHYPERCHUNK_FLAGS_MASK); } while (0) +#if 0 +/** Clears one or more flags. */ +#define MMHYPERCHUNK_FLAGS_OP_CLEAR(pChunk, fFlags) do { ((pChunk)->offPrev) &= ~((fFlags) & MMHYPERCHUNK_FLAGS_MASK); } while (0) +/** Sets one or more flags. */ +#define MMHYPERCHUNK_FLAGS_OP_SET(pChunk, fFlags) do { ((pChunk)->offPrev) |= ((fFlags) & MMHYPERCHUNK_FLAGS_MASK); } while (0) +/** Checks if one is set. */ +#define MMHYPERCHUNK_FLAGS_OP_ISSET(pChunk, fFlag) (!!(((pChunk)->offPrev) & ((fFlag) & MMHYPERCHUNK_FLAGS_MASK))) +#endif +/** @} */ + +/** @} */ + +/** + * Hypervisor memory mapping type. + */ +typedef enum MMLOOKUPHYPERTYPE +{ + /** Invalid record. This is used for record which are incomplete. */ + MMLOOKUPHYPERTYPE_INVALID = 0, + /** Mapping of locked memory. */ + MMLOOKUPHYPERTYPE_LOCKED, + /** Mapping of contiguous HC physical memory. */ + MMLOOKUPHYPERTYPE_HCPHYS, + /** Mapping of contiguous GC physical memory. */ + MMLOOKUPHYPERTYPE_GCPHYS, + /** Mapping of MMIO2 memory. */ + MMLOOKUPHYPERTYPE_MMIO2, + /** Dynamic mapping area (MMR3HyperReserve). + * A conversion will require to check what's in the page table for the pages. */ + MMLOOKUPHYPERTYPE_DYNAMIC +} MMLOOKUPHYPERTYPE; + +/** + * Lookup record for the hypervisor memory area. + */ +typedef struct MMLOOKUPHYPER +{ + /** Byte offset from the start of this record to the next. + * If the value is NIL_OFFSET the chain is terminated. */ + int32_t offNext; + /** Offset into the hypervisor memory area. */ + uint32_t off; + /** Size of this part. */ + uint32_t cb; + /** Locking type. */ + MMLOOKUPHYPERTYPE enmType; + /** Type specific data */ + union + { + /** Locked memory. */ + struct + { + /** Host context ring-3 pointer. */ + R3PTRTYPE(void *) pvR3; + /** Host context ring-0 pointer. Optional. */ + RTR0PTR pvR0; + /** Pointer to an array containing the physical address of each page. */ + R3PTRTYPE(PRTHCPHYS) paHCPhysPages; + } Locked; + + /** Contiguous physical memory. */ + struct + { + /** Host context ring-3 pointer. */ + R3PTRTYPE(void *) pvR3; + /** Host context ring-0 pointer. Optional. */ + RTR0PTR pvR0; + /** HC physical address corresponding to pvR3/pvR0. */ + RTHCPHYS HCPhys; + } HCPhys; + + /** Contiguous guest physical memory. */ + struct + { + /** The memory address (Guest Context). */ + RTGCPHYS GCPhys; + } GCPhys; + + /** MMIO2 memory. */ + struct + { + /** The device instance owning the MMIO2 region. */ + PPDMDEVINSR3 pDevIns; + /** The sub-device number. */ + uint32_t iSubDev; + /** The region number. */ + uint32_t iRegion; +#if HC_ARCH_BITS == 32 + /** Alignment padding. */ + uint32_t uPadding; +#endif + /** The offset into the MMIO2 region. */ + RTGCPHYS off; + } MMIO2; + } u; + /** Description. */ + R3PTRTYPE(const char *) pszDesc; +} MMLOOKUPHYPER; +/** Pointer to a hypervisor memory lookup record. */ +typedef MMLOOKUPHYPER *PMMLOOKUPHYPER; + + +/** + * Converts a MM pointer into a VM pointer. + * @returns Pointer to the VM structure the MM is part of. + * @param pMM Pointer to MM instance data. + */ +#define MM2VM(pMM) ( (PVM)((uint8_t *)pMM - pMM->offVM) ) + + +/** + * MM Data (part of VM) + */ +typedef struct MM +{ + /** Offset to the VM structure. + * See MM2VM(). */ + RTINT offVM; + + /** Set if MMR3InitPaging has been called. */ + bool fDoneMMR3InitPaging; + /** Set if PGM has been initialized and we can safely call PGMR3Map(). */ + bool fPGMInitialized; +#if GC_ARCH_BITS == 64 || HC_ARCH_BITS == 64 + uint32_t u32Padding1; /**< alignment padding. */ +#endif + + /** Lookup list for the Hypervisor Memory Area. + * The offset is relative to the start of the heap. + * Use pHyperHeapR3, pHyperHeapR0 or pHypeRHeapRC to calculate the address. + */ + RTUINT offLookupHyper; + + /** The offset of the next static mapping in the Hypervisor Memory Area. */ + RTUINT offHyperNextStatic; + /** The size of the HMA. + * Starts at 12MB and will be fixed late in the init process. */ + RTUINT cbHyperArea; + + /** Guest address of the Hypervisor Memory Area. + * @remarks It's still a bit open whether this should be change to RTRCPTR or + * remain a RTGCPTR. */ + RTGCPTR pvHyperAreaGC; + + /** The hypervisor heap (GC Ptr). */ + RCPTRTYPE(PMMHYPERHEAP) pHyperHeapRC; +#if HC_ARCH_BITS == 64 && GC_ARCH_BITS == 64 + uint32_t u32Padding2; +#endif + + /** The hypervisor heap (R0 Ptr). */ + R0PTRTYPE(PMMHYPERHEAP) pHyperHeapR0; + + /** The hypervisor heap (R3 Ptr). */ + R3PTRTYPE(PMMHYPERHEAP) pHyperHeapR3; + + /** Pointer to the dummy page. + * The dummy page is a paranoia thingy used for instance for pure MMIO RAM ranges + * to make sure any bugs will not harm whatever the system stores in the first + * physical page. */ + R3PTRTYPE(void *) pvDummyPage; + /** Physical address of the dummy page. */ + RTHCPHYS HCPhysDummyPage; + + /** Size of the base RAM in bytes. (The CFGM RamSize value.) */ + uint64_t cbRamBase; + /** Number of bytes of RAM above 4GB, starting at address 4GB. */ + uint64_t cbRamAbove4GB; + /** Size of the below 4GB RAM hole. */ + uint32_t cbRamHole; + /** Number of bytes of RAM below 4GB, starting at address 0. */ + uint32_t cbRamBelow4GB; + /** The number of base RAM pages that PGM has reserved (GMM). + * @remarks Shadow ROMs will be counted twice (RAM+ROM), so it won't be 1:1 with + * what the guest sees. */ + uint64_t cBasePages; + /** The number of handy pages that PGM has reserved (GMM). + * These are kept out of cBasePages and thus out of the saved state. */ + uint32_t cHandyPages; + /** The number of shadow pages PGM has reserved (GMM). */ + uint32_t cShadowPages; + /** The number of fixed pages we've reserved (GMM). */ + uint32_t cFixedPages; + /** Padding. */ + uint32_t u32Padding0; +} MM; +/** Pointer to MM Data (part of VM). */ +typedef MM *PMM; + + +/** + * MM data kept in the UVM. + */ +typedef struct MMUSERPERVM +{ + /** Pointer to the MM R3 Heap. */ + R3PTRTYPE(PMMHEAP) pHeap; + /** Pointer to the MM Uk Heap. */ + R3PTRTYPE(PMMUKHEAP) pUkHeap; +} MMUSERPERVM; +/** Pointer to the MM data kept in the UVM. */ +typedef MMUSERPERVM *PMMUSERPERVM; + + +RT_C_DECLS_BEGIN + + +int mmR3UpdateReservation(PVM pVM); + +int mmR3HeapCreateU(PUVM pUVM, PMMHEAP *ppHeap); +void mmR3HeapDestroy(PMMHEAP pHeap); + +void mmR3UkHeapDestroy(PMMUKHEAP pHeap); +int mmR3UkHeapCreateU(PUVM pUVM, PMMUKHEAP *ppHeap); + + +int mmR3HyperInit(PVM pVM); +int mmR3HyperTerm(PVM pVM); +int mmR3HyperInitPaging(PVM pVM); + +const char *mmGetTagName(MMTAG enmTag); + +RT_C_DECLS_END + +/** @} */ + +#endif /* !VMM_INCLUDED_SRC_include_MMInternal_h */ + diff --git a/src/VBox/VMM/include/NEMInternal.h b/src/VBox/VMM/include/NEMInternal.h new file mode 100644 index 00000000..523a39eb --- /dev/null +++ b/src/VBox/VMM/include/NEMInternal.h @@ -0,0 +1,453 @@ +/* $Id: NEMInternal.h $ */ +/** @file + * NEM - Internal header file. + */ + +/* + * Copyright (C) 2018-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_NEMInternal_h +#define VMM_INCLUDED_SRC_include_NEMInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <VBox/cdefs.h> +#include <VBox/types.h> +#include <VBox/vmm/nem.h> +#include <VBox/vmm/cpum.h> /* For CPUMCPUVENDOR. */ +#include <VBox/vmm/stam.h> +#include <VBox/vmm/vmapi.h> +#ifdef RT_OS_WINDOWS +#include <iprt/nt/hyperv.h> +#include <iprt/critsect.h> +#endif + +RT_C_DECLS_BEGIN + + +/** @defgroup grp_nem_int Internal + * @ingroup grp_nem + * @internal + * @{ + */ + + +#ifdef RT_OS_WINDOWS +/* + * Windows: Code configuration. + */ +# define NEM_WIN_USE_HYPERCALLS_FOR_PAGES +//# define NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS /**< Applies to ring-3 code only. Useful for testing VID API. */ +//# define NEM_WIN_USE_OUR_OWN_RUN_API /**< Applies to ring-3 code only. Useful for testing VID API. */ +//# define NEM_WIN_WITH_RING0_RUNLOOP /**< Enables the ring-0 runloop. */ +//# define NEM_WIN_USE_RING0_RUNLOOP_BY_DEFAULT /**< For quickly testing ring-3 API without messing with CFGM. */ +# if defined(NEM_WIN_USE_OUR_OWN_RUN_API) && !defined(NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS) +# error "NEM_WIN_USE_OUR_OWN_RUN_API requires NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS" +# endif +# if defined(NEM_WIN_USE_OUR_OWN_RUN_API) && !defined(NEM_WIN_USE_HYPERCALLS_FOR_PAGES) +# error "NEM_WIN_USE_OUR_OWN_RUN_API requires NEM_WIN_USE_HYPERCALLS_FOR_PAGES" +# endif +# if defined(NEM_WIN_WITH_RING0_RUNLOOP) && !defined(NEM_WIN_USE_HYPERCALLS_FOR_PAGES) +# error "NEM_WIN_WITH_RING0_RUNLOOP requires NEM_WIN_USE_HYPERCALLS_FOR_PAGES" +# endif + +/** + * Windows VID I/O control information. + */ +typedef struct NEMWINIOCTL +{ + /** The I/O control function number. */ + uint32_t uFunction; + uint32_t cbInput; + uint32_t cbOutput; +} NEMWINIOCTL; + +/** @name Windows: Our two-bit physical page state for PGMPAGE + * @{ */ +# define NEM_WIN_PAGE_STATE_NOT_SET 0 +# define NEM_WIN_PAGE_STATE_UNMAPPED 1 +# define NEM_WIN_PAGE_STATE_READABLE 2 +# define NEM_WIN_PAGE_STATE_WRITABLE 3 +/** @} */ + +/** Windows: Checks if a_GCPhys is subject to the limited A20 gate emulation. */ +# define NEM_WIN_IS_SUBJECT_TO_A20(a_GCPhys) ((RTGCPHYS)((a_GCPhys) - _1M) < (RTGCPHYS)_64K) +/** Windows: Checks if a_GCPhys is relevant to the limited A20 gate emulation. */ +# define NEM_WIN_IS_RELEVANT_TO_A20(a_GCPhys) \ + ( ((RTGCPHYS)((a_GCPhys) - _1M) < (RTGCPHYS)_64K) || ((RTGCPHYS)(a_GCPhys) < (RTGCPHYS)_64K) ) + +/** The CPUMCTX_EXTRN_XXX mask for IEM. */ +# define NEM_WIN_CPUMCTX_EXTRN_MASK_FOR_IEM ( IEM_CPUMCTX_EXTRN_MUST_MASK | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT \ + | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI ) +/** The CPUMCTX_EXTRN_XXX mask for IEM when raising exceptions. */ +# define NEM_WIN_CPUMCTX_EXTRN_MASK_FOR_IEM_XCPT (IEM_CPUMCTX_EXTRN_XCPT_MASK | NEM_WIN_CPUMCTX_EXTRN_MASK_FOR_IEM) + +/** @name Windows: Interrupt window flags (NEM_WIN_INTW_F_XXX). + * @{ */ +# define NEM_WIN_INTW_F_NMI UINT8_C(0x01) +# define NEM_WIN_INTW_F_REGULAR UINT8_C(0x02) +# define NEM_WIN_INTW_F_PRIO_MASK UINT8_C(0x3c) +# define NEM_WIN_INTW_F_PRIO_SHIFT 2 +/** @} */ + +#endif /* RT_OS_WINDOWS */ + + +/** Trick to make slickedit see the static functions in the template. */ +#ifndef IN_SLICKEDIT +# define NEM_TMPL_STATIC static +#else +# define NEM_TMPL_STATIC +#endif + + +/** + * Generic NEM exit type enumeration for use with EMHistoryAddExit. + * + * On windows we've got two different set of exit types and they are both jumping + * around the place value wise, so EM can use their values. + * + * @note We only have exit types for exits not covered by EM here. + */ +typedef enum NEMEXITTYPE +{ + /* windows: */ + NEMEXITTYPE_UNRECOVERABLE_EXCEPTION = 1, + NEMEXITTYPE_INVALID_VP_REGISTER_VALUE, + NEMEXITTYPE_INTTERRUPT_WINDOW, + NEMEXITTYPE_HALT, + NEMEXITTYPE_XCPT_UD, + NEMEXITTYPE_XCPT_DB, + NEMEXITTYPE_XCPT_BP, + NEMEXITTYPE_CANCELED, + NEMEXITTYPE_MEMORY_ACCESS +} NEMEXITTYPE; + + +/** + * NEM VM Instance data. + */ +typedef struct NEM +{ + /** NEM_MAGIC. */ + uint32_t u32Magic; + + /** Set if enabled. */ + bool fEnabled; + /** Set if long mode guests are allowed. */ + bool fAllow64BitGuests; +#ifdef RT_OS_WINDOWS + /** Set if we've created the EMTs. */ + bool fCreatedEmts : 1; + /** WHvRunVpExitReasonX64Cpuid is supported. */ + bool fExtendedMsrExit : 1; + /** WHvRunVpExitReasonX64MsrAccess is supported. */ + bool fExtendedCpuIdExit : 1; + /** WHvRunVpExitReasonException is supported. */ + bool fExtendedXcptExit : 1; + /** Set if we're using the ring-0 API to do the work. */ + bool fUseRing0Runloop : 1; + /** Set if we've started more than one CPU and cannot mess with A20. */ + bool fA20Fixed : 1; + /** Set if A20 is enabled. */ + bool fA20Enabled : 1; + /** The reported CPU vendor. */ + CPUMCPUVENDOR enmCpuVendor; + /** Cache line flush size as a power of two. */ + uint8_t cCacheLineFlushShift; + /** The result of WHvCapabilityCodeProcessorFeatures. */ + union + { + /** 64-bit view. */ + uint64_t u64; +# ifdef _WINHVAPIDEFS_H_ + /** Interpreed features. */ + WHV_PROCESSOR_FEATURES u; +# endif + } uCpuFeatures; + + /** The partition handle. */ +# ifdef _WINHVAPIDEFS_H_ + WHV_PARTITION_HANDLE +# else + RTHCUINTPTR +# endif + hPartition; + /** The device handle for the partition, for use with Vid APIs or direct I/O + * controls. */ + RTR3PTR hPartitionDevice; + /** The Hyper-V partition ID. */ + uint64_t idHvPartition; + + /** Number of currently mapped pages. */ + uint32_t volatile cMappedPages; + + /** Info about the VidGetHvPartitionId I/O control interface. */ + NEMWINIOCTL IoCtlGetHvPartitionId; + /** Info about the VidStartVirtualProcessor I/O control interface. */ + NEMWINIOCTL IoCtlStartVirtualProcessor; + /** Info about the VidStopVirtualProcessor I/O control interface. */ + NEMWINIOCTL IoCtlStopVirtualProcessor; + /** Info about the VidStopVirtualProcessor I/O control interface. */ + NEMWINIOCTL IoCtlMessageSlotHandleAndGetNext; + + /** Statistics updated by NEMR0UpdateStatistics. */ + struct + { + uint64_t cPagesAvailable; + uint64_t cPagesInUse; + } R0Stats; +#endif /* RT_OS_WINDOWS */ +} NEM; +/** Pointer to NEM VM instance data. */ +typedef NEM *PNEM; + +/** NEM::u32Magic value. */ +#define NEM_MAGIC UINT32_C(0x004d454e) +/** NEM::u32Magic value after termination. */ +#define NEM_MAGIC_DEAD UINT32_C(0xdead1111) + + +/** + * NEM VMCPU Instance data. + */ +typedef struct NEMCPU +{ + /** NEMCPU_MAGIC. */ + uint32_t u32Magic; + /** Whether \#UD needs to be intercepted and presented to GIM. */ + bool fGIMTrapXcptUD : 1; + /** Whether \#GP needs to be intercept for mesa driver workaround. */ + bool fTrapXcptGpForLovelyMesaDrv: 1; +#ifdef RT_OS_WINDOWS + /** The current state of the interrupt windows (NEM_WIN_INTW_F_XXX). */ + uint8_t fCurrentInterruptWindows; + /** The desired state of the interrupt windows (NEM_WIN_INTW_F_XXX). */ + uint8_t fDesiredInterruptWindows; + /** Last copy of HV_X64_VP_EXECUTION_STATE::InterruptShadow. */ + bool fLastInterruptShadow : 1; +# ifdef NEM_WIN_WITH_RING0_RUNLOOP + /** Pending VINF_NEM_FLUSH_TLB. */ + int32_t rcPending; +# else + uint32_t uPadding; +# endif + /** The VID_MSHAGN_F_XXX flags. + * Either VID_MSHAGN_F_HANDLE_MESSAGE | VID_MSHAGN_F_GET_NEXT_MESSAGE or zero. */ + uint32_t fHandleAndGetFlags; + /** What VidMessageSlotMap returns and is used for passing exit info. */ + RTR3PTR pvMsgSlotMapping; + /** The windows thread handle. */ + RTR3PTR hNativeThreadHandle; + /** Parameters for making Hyper-V hypercalls. */ + union + { + uint8_t ab[64]; + /** Arguments for NEMR0MapPages (HvCallMapGpaPages). */ + struct + { + RTGCPHYS GCPhysSrc; + RTGCPHYS GCPhysDst; /**< Same as GCPhysSrc except maybe when the A20 gate is disabled. */ + uint32_t cPages; + HV_MAP_GPA_FLAGS fFlags; + } MapPages; + /** Arguments for NEMR0UnmapPages (HvCallUnmapGpaPages). */ + struct + { + RTGCPHYS GCPhys; + uint32_t cPages; + } UnmapPages; + /** Result from NEMR0QueryCpuTick. */ + struct + { + uint64_t cTicks; + uint32_t uAux; + } QueryCpuTick; + /** Input and output for NEMR0DoExperiment. */ + struct + { + uint32_t uItem; + bool fSuccess; + uint64_t uStatus; + uint64_t uLoValue; + uint64_t uHiValue; + } Experiment; + } Hypercall; + /** I/O control buffer, we always use this for I/O controls. */ + union + { + uint8_t ab[64]; + HV_PARTITION_ID idPartition; + HV_VP_INDEX idCpu; +# ifdef VID_MSHAGN_F_GET_NEXT_MESSAGE + VID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT MsgSlotHandleAndGetNext; +# endif + } uIoCtlBuf; + + /** @name Statistics + * @{ */ + STAMCOUNTER StatExitPortIo; + STAMCOUNTER StatExitMemUnmapped; + STAMCOUNTER StatExitMemIntercept; + STAMCOUNTER StatExitHalt; + STAMCOUNTER StatExitInterruptWindow; + STAMCOUNTER StatExitCpuId; + STAMCOUNTER StatExitMsr; + STAMCOUNTER StatExitException; + STAMCOUNTER StatExitExceptionBp; + STAMCOUNTER StatExitExceptionDb; + STAMCOUNTER StatExitExceptionGp; + STAMCOUNTER StatExitExceptionGpMesa; + STAMCOUNTER StatExitExceptionUd; + STAMCOUNTER StatExitExceptionUdHandled; + STAMCOUNTER StatExitUnrecoverable; + STAMCOUNTER StatGetMsgTimeout; + STAMCOUNTER StatStopCpuSuccess; + STAMCOUNTER StatStopCpuPending; + STAMCOUNTER StatStopCpuPendingAlerts; + STAMCOUNTER StatStopCpuPendingOdd; + STAMCOUNTER StatCancelChangedState; + STAMCOUNTER StatCancelAlertedThread; + STAMCOUNTER StatBreakOnCancel; + STAMCOUNTER StatBreakOnFFPre; + STAMCOUNTER StatBreakOnFFPost; + STAMCOUNTER StatBreakOnStatus; + STAMCOUNTER StatImportOnDemand; + STAMCOUNTER StatImportOnReturn; + STAMCOUNTER StatImportOnReturnSkipped; + STAMCOUNTER StatQueryCpuTick; + /** @} */ +#endif /* RT_OS_WINDOWS */ +} NEMCPU; +/** Pointer to NEM VMCPU instance data. */ +typedef NEMCPU *PNEMCPU; + +/** NEMCPU::u32Magic value. */ +#define NEMCPU_MAGIC UINT32_C(0x4d454e20) +/** NEMCPU::u32Magic value after termination. */ +#define NEMCPU_MAGIC_DEAD UINT32_C(0xdead2222) + + +#ifdef IN_RING0 +# ifdef RT_OS_WINDOWS +/** + * Windows: Hypercall input/ouput page info. + */ +typedef struct NEMR0HYPERCALLDATA +{ + /** Host physical address of the hypercall input/output page. */ + RTHCPHYS HCPhysPage; + /** Pointer to the hypercall input/output page. */ + uint8_t *pbPage; + /** Handle to the memory object of the hypercall input/output page. */ + RTR0MEMOBJ hMemObj; +} NEMR0HYPERCALLDATA; +/** Pointer to a Windows hypercall input/output page info. */ +typedef NEMR0HYPERCALLDATA *PNEMR0HYPERCALLDATA; +# endif /* RT_OS_WINDOWS */ + +/** + * NEM GVMCPU instance data. + */ +typedef struct NEMR0PERVCPU +{ +# ifdef RT_OS_WINDOWS + /** Hypercall input/ouput page. */ + NEMR0HYPERCALLDATA HypercallData; + /** Delta to add to convert a ring-0 pointer to a ring-3 one. */ + uintptr_t offRing3ConversionDelta; +# else + uint32_t uDummy; +# endif +} NEMR0PERVCPU; + +/** + * NEM GVM instance data. + */ +typedef struct NEMR0PERVM +{ +# ifdef RT_OS_WINDOWS + /** The partition ID. */ + uint64_t idHvPartition; + /** I/O control context. */ + PSUPR0IOCTLCTX pIoCtlCtx; + /** Info about the VidGetHvPartitionId I/O control interface. */ + NEMWINIOCTL IoCtlGetHvPartitionId; + /** Info about the VidStartVirtualProcessor I/O control interface. */ + NEMWINIOCTL IoCtlStartVirtualProcessor; + /** Info about the VidStopVirtualProcessor I/O control interface. */ + NEMWINIOCTL IoCtlStopVirtualProcessor; + /** Info about the VidStopVirtualProcessor I/O control interface. */ + NEMWINIOCTL IoCtlMessageSlotHandleAndGetNext; + /** Whether we may use the ring-0 runloop or not. */ + bool fMayUseRing0Runloop; + + /** Hypercall input/ouput page for non-EMT. */ + NEMR0HYPERCALLDATA HypercallData; + /** Critical section protecting use of HypercallData. */ + RTCRITSECT HypercallDataCritSect; + +# else + uint32_t uDummy; +# endif +} NEMR0PERVM; + +#endif /* IN_RING*/ + + +#ifdef IN_RING3 +int nemR3NativeInit(PVM pVM, bool fFallback, bool fForced); +int nemR3NativeInitAfterCPUM(PVM pVM); +int nemR3NativeInitCompleted(PVM pVM, VMINITCOMPLETED enmWhat); +int nemR3NativeTerm(PVM pVM); +void nemR3NativeReset(PVM pVM); +void nemR3NativeResetCpu(PVMCPU pVCpu, bool fInitIpi); +VBOXSTRICTRC nemR3NativeRunGC(PVM pVM, PVMCPU pVCpu); +bool nemR3NativeCanExecuteGuest(PVM pVM, PVMCPU pVCpu); +bool nemR3NativeSetSingleInstruction(PVM pVM, PVMCPU pVCpu, bool fEnable); +void nemR3NativeNotifyFF(PVM pVM, PVMCPU pVCpu, uint32_t fFlags); + +int nemR3NativeNotifyPhysRamRegister(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb); +int nemR3NativeNotifyPhysMmioExMap(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags, void *pvMmio2); +int nemR3NativeNotifyPhysMmioExUnmap(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags); +int nemR3NativeNotifyPhysRomRegisterEarly(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags); +int nemR3NativeNotifyPhysRomRegisterLate(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags); +void nemR3NativeNotifySetA20(PVMCPU pVCpu, bool fEnabled); +#endif + +void nemHCNativeNotifyHandlerPhysicalRegister(PVMCC pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhys, RTGCPHYS cb); +void nemHCNativeNotifyHandlerPhysicalDeregister(PVMCC pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhys, RTGCPHYS cb, + int fRestoreAsRAM, bool fRestoreAsRAM2); +void nemHCNativeNotifyHandlerPhysicalModify(PVMCC pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhysOld, + RTGCPHYS GCPhysNew, RTGCPHYS cb, bool fRestoreAsRAM); +int nemHCNativeNotifyPhysPageAllocated(PVMCC pVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys, uint32_t fPageProt, + PGMPAGETYPE enmType, uint8_t *pu2State); +void nemHCNativeNotifyPhysPageProtChanged(PVMCC pVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys, uint32_t fPageProt, + PGMPAGETYPE enmType, uint8_t *pu2State); +void nemHCNativeNotifyPhysPageChanged(PVMCC pVM, RTGCPHYS GCPhys, RTHCPHYS HCPhysPrev, RTHCPHYS HCPhysNew, uint32_t fPageProt, + PGMPAGETYPE enmType, uint8_t *pu2State); + + +#ifdef RT_OS_WINDOWS +/** Maximum number of pages we can map in a single NEMR0MapPages call. */ +# define NEM_MAX_MAP_PAGES ((PAGE_SIZE - RT_UOFFSETOF(HV_INPUT_MAP_GPA_PAGES, PageList)) / sizeof(HV_SPA_PAGE_NUMBER)) +/** Maximum number of pages we can unmap in a single NEMR0UnmapPages call. */ +# define NEM_MAX_UNMAP_PAGES 4095 + +#endif +/** @} */ + +RT_C_DECLS_END + +#endif /* !VMM_INCLUDED_SRC_include_NEMInternal_h */ + diff --git a/src/VBox/VMM/include/PDMAsyncCompletionFileInternal.h b/src/VBox/VMM/include/PDMAsyncCompletionFileInternal.h new file mode 100644 index 00000000..bf3d2958 --- /dev/null +++ b/src/VBox/VMM/include/PDMAsyncCompletionFileInternal.h @@ -0,0 +1,566 @@ +/* $Id: PDMAsyncCompletionFileInternal.h $ */ +/** @file + * PDM Async I/O - Transport data asynchronous in R3 using EMT. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_PDMAsyncCompletionFileInternal_h +#define VMM_INCLUDED_SRC_include_PDMAsyncCompletionFileInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <VBox/vmm/cfgm.h> +#include <VBox/vmm/stam.h> +#include <VBox/vmm/tm.h> +#include <iprt/types.h> +#include <iprt/file.h> +#include <iprt/thread.h> +#include <iprt/semaphore.h> +#include <iprt/critsect.h> +#include <iprt/avl.h> +#include <iprt/list.h> +#include <iprt/spinlock.h> +#include <iprt/memcache.h> + +#include "PDMAsyncCompletionInternal.h" + +/** @todo: Revise the caching of tasks. We have currently four caches: + * Per endpoint task cache + * Per class cache + * Per endpoint task segment cache + * Per class task segment cache + * + * We could use the RT heap for this probably or extend MMR3Heap (uses RTMemAlloc + * instead of managing larger blocks) to have this global for the whole VM. + */ + +/** Enable for delay injection from the debugger. */ +#if 0 +# define PDM_ASYNC_COMPLETION_FILE_WITH_DELAY +#endif + +RT_C_DECLS_BEGIN + +/** + * A few forward declarations. + */ +typedef struct PDMASYNCCOMPLETIONENDPOINTFILE *PPDMASYNCCOMPLETIONENDPOINTFILE; +/** Pointer to a request segment. */ +typedef struct PDMACTASKFILE *PPDMACTASKFILE; +/** Pointer to the endpoint class data. */ +typedef struct PDMASYNCCOMPLETIONTASKFILE *PPDMASYNCCOMPLETIONTASKFILE; +/** Pointer to a cache LRU list. */ +typedef struct PDMACFILELRULIST *PPDMACFILELRULIST; +/** Pointer to the global cache structure. */ +typedef struct PDMACFILECACHEGLOBAL *PPDMACFILECACHEGLOBAL; +/** Pointer to a task segment. */ +typedef struct PDMACFILETASKSEG *PPDMACFILETASKSEG; + +/** + * Blocking event types. + */ +typedef enum PDMACEPFILEAIOMGRBLOCKINGEVENT +{ + /** Invalid tye */ + PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID = 0, + /** An endpoint is added to the manager. */ + PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT, + /** An endpoint is removed from the manager. */ + PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT, + /** An endpoint is about to be closed. */ + PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT, + /** The manager is requested to terminate */ + PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN, + /** The manager is requested to suspend */ + PDMACEPFILEAIOMGRBLOCKINGEVENT_SUSPEND, + /** The manager is requested to resume */ + PDMACEPFILEAIOMGRBLOCKINGEVENT_RESUME, + /** 32bit hack */ + PDMACEPFILEAIOMGRBLOCKINGEVENT_32BIT_HACK = 0x7fffffff +} PDMACEPFILEAIOMGRBLOCKINGEVENT; + +/** + * I/O manager type. + */ +typedef enum PDMACEPFILEMGRTYPE +{ + /** Simple aka failsafe */ + PDMACEPFILEMGRTYPE_SIMPLE = 0, + /** Async I/O with host cache enabled. */ + PDMACEPFILEMGRTYPE_ASYNC, + /** 32bit hack */ + PDMACEPFILEMGRTYPE_32BIT_HACK = 0x7fffffff +} PDMACEPFILEMGRTYPE; +/** Pointer to a I/O manager type */ +typedef PDMACEPFILEMGRTYPE *PPDMACEPFILEMGRTYPE; + +/** + * States of the I/O manager. + */ +typedef enum PDMACEPFILEMGRSTATE +{ + /** Invalid state. */ + PDMACEPFILEMGRSTATE_INVALID = 0, + /** Normal running state accepting new requests + * and processing them. + */ + PDMACEPFILEMGRSTATE_RUNNING, + /** Fault state - not accepting new tasks for endpoints but waiting for + * remaining ones to finish. + */ + PDMACEPFILEMGRSTATE_FAULT, + /** Suspending state - not accepting new tasks for endpoints but waiting + * for remaining ones to finish. + */ + PDMACEPFILEMGRSTATE_SUSPENDING, + /** Shutdown state - not accepting new tasks for endpoints but waiting + * for remaining ones to finish. + */ + PDMACEPFILEMGRSTATE_SHUTDOWN, + /** The I/O manager waits for all active requests to complete and doesn't queue + * new ones because it needs to grow to handle more requests. + */ + PDMACEPFILEMGRSTATE_GROWING, + /** 32bit hack */ + PDMACEPFILEMGRSTATE_32BIT_HACK = 0x7fffffff +} PDMACEPFILEMGRSTATE; + +/** + * State of a async I/O manager. + */ +typedef struct PDMACEPFILEMGR +{ + /** Next Aio manager in the list. */ + R3PTRTYPE(struct PDMACEPFILEMGR *) pNext; + /** Previous Aio manager in the list. */ + R3PTRTYPE(struct PDMACEPFILEMGR *) pPrev; + /** Manager type */ + PDMACEPFILEMGRTYPE enmMgrType; + /** Current state of the manager. */ + PDMACEPFILEMGRSTATE enmState; + /** Event semaphore the manager sleeps on when waiting for new requests. */ + RTSEMEVENT EventSem; + /** Flag whether the thread waits in the event semaphore. */ + volatile bool fWaitingEventSem; + /** Thread data */ + RTTHREAD Thread; + /** The async I/O context for this manager. */ + RTFILEAIOCTX hAioCtx; + /** Flag whether the I/O manager was woken up. */ + volatile bool fWokenUp; + /** List of endpoints assigned to this manager. */ + R3PTRTYPE(PPDMASYNCCOMPLETIONENDPOINTFILE) pEndpointsHead; + /** Number of endpoints assigned to the manager. */ + unsigned cEndpoints; + /** Number of requests active currently. */ + unsigned cRequestsActive; + /** Number of maximum requests active. */ + uint32_t cRequestsActiveMax; + /** Pointer to an array of free async I/O request handles. */ + RTFILEAIOREQ *pahReqsFree; + /** Index of the next free entry in the cache. */ + uint32_t iFreeEntry; + /** Size of the array. */ + unsigned cReqEntries; + /** Memory cache for file range locks. */ + RTMEMCACHE hMemCacheRangeLocks; + /** Number of milliseconds to wait until the bandwidth is refreshed for at least + * one endpoint and it is possible to process more requests. */ + RTMSINTERVAL msBwLimitExpired; + /** Critical section protecting the blocking event handling. */ + RTCRITSECT CritSectBlockingEvent; + /** Event semaphore for blocking external events. + * The caller waits on it until the async I/O manager + * finished processing the event. */ + RTSEMEVENT EventSemBlock; + /** Flag whether a blocking event is pending and needs + * processing by the I/O manager. */ + volatile bool fBlockingEventPending; + /** Blocking event type */ + volatile PDMACEPFILEAIOMGRBLOCKINGEVENT enmBlockingEvent; + /** Event type data */ + union + { + /** Add endpoint event. */ + struct + { + /** The endpoint to be added */ + volatile PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint; + } AddEndpoint; + /** Remove endpoint event. */ + struct + { + /** The endpoint to be removed */ + volatile PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint; + } RemoveEndpoint; + /** Close endpoint event. */ + struct + { + /** The endpoint to be closed */ + volatile PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint; + } CloseEndpoint; + } BlockingEventData; +} PDMACEPFILEMGR; +/** Pointer to a async I/O manager state. */ +typedef PDMACEPFILEMGR *PPDMACEPFILEMGR; +/** Pointer to a async I/O manager state pointer. */ +typedef PPDMACEPFILEMGR *PPPDMACEPFILEMGR; + +/** + * A file access range lock. + */ +typedef struct PDMACFILERANGELOCK +{ + /** AVL node in the locked range tree of the endpoint. */ + AVLRFOFFNODECORE Core; + /** How many tasks have locked this range. */ + uint32_t cRefs; + /** Flag whether this is a read or write lock. */ + bool fReadLock; + /** List of tasks which are waiting that the range gets unlocked. */ + PPDMACTASKFILE pWaitingTasksHead; + /** List of tasks which are waiting that the range gets unlocked. */ + PPDMACTASKFILE pWaitingTasksTail; +} PDMACFILERANGELOCK, *PPDMACFILERANGELOCK; + +/** + * Backend type for the endpoint. + */ +typedef enum PDMACFILEEPBACKEND +{ + /** Non buffered. */ + PDMACFILEEPBACKEND_NON_BUFFERED = 0, + /** Buffered (i.e host cache enabled) */ + PDMACFILEEPBACKEND_BUFFERED, + /** 32bit hack */ + PDMACFILEEPBACKEND_32BIT_HACK = 0x7fffffff +} PDMACFILEEPBACKEND; +/** Pointer to a backend type. */ +typedef PDMACFILEEPBACKEND *PPDMACFILEEPBACKEND; + +/** + * Global data for the file endpoint class. + */ +typedef struct PDMASYNCCOMPLETIONEPCLASSFILE +{ + /** Common data. */ + PDMASYNCCOMPLETIONEPCLASS Core; + /** Override I/O manager type - set to SIMPLE after failure. */ + PDMACEPFILEMGRTYPE enmMgrTypeOverride; + /** Default backend type for the endpoint. */ + PDMACFILEEPBACKEND enmEpBackendDefault; + RTCRITSECT CritSect; + /** Pointer to the head of the async I/O managers. */ + R3PTRTYPE(PPDMACEPFILEMGR) pAioMgrHead; + /** Number of async I/O managers currently running. */ + unsigned cAioMgrs; + /** Maximum number of segments to cache per endpoint */ + unsigned cTasksCacheMax; + /** Maximum number of simultaneous outstandingrequests. */ + uint32_t cReqsOutstandingMax; + /** Bitmask for checking the alignment of a buffer. */ + RTR3UINTPTR uBitmaskAlignment; + /** Flag whether the out of resources warning was printed already. */ + bool fOutOfResourcesWarningPrinted; +#ifdef PDM_ASYNC_COMPLETION_FILE_WITH_DELAY + /** Timer for delayed request completion. */ + PTMTIMERR3 pTimer; + /** Milliseconds until the next delay expires. */ + volatile uint64_t cMilliesNext; +#endif +} PDMASYNCCOMPLETIONEPCLASSFILE; +/** Pointer to the endpoint class data. */ +typedef PDMASYNCCOMPLETIONEPCLASSFILE *PPDMASYNCCOMPLETIONEPCLASSFILE; + +typedef enum PDMACEPFILEBLOCKINGEVENT +{ + /** The invalid event type */ + PDMACEPFILEBLOCKINGEVENT_INVALID = 0, + /** A task is about to be canceled */ + PDMACEPFILEBLOCKINGEVENT_CANCEL, + /** Usual 32bit hack */ + PDMACEPFILEBLOCKINGEVENT_32BIT_HACK = 0x7fffffff +} PDMACEPFILEBLOCKINGEVENT; + +/** + * States of the endpoint. + */ +typedef enum PDMASYNCCOMPLETIONENDPOINTFILESTATE +{ + /** Invalid state. */ + PDMASYNCCOMPLETIONENDPOINTFILESTATE_INVALID = 0, + /** Normal running state accepting new requests + * and processing them. + */ + PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE, + /** The endpoint is about to be closed - not accepting new tasks for endpoints but waiting for + * remaining ones to finish. + */ + PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING, + /** Removing from current I/O manager state - not processing new tasks for endpoints but waiting + * for remaining ones to finish. + */ + PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING, + /** The current endpoint will be migrated to another I/O manager. */ + PDMASYNCCOMPLETIONENDPOINTFILESTATE_MIGRATING, + /** 32bit hack */ + PDMASYNCCOMPLETIONENDPOINTFILESTATE_32BIT_HACK = 0x7fffffff +} PDMASYNCCOMPLETIONENDPOINTFILESTATE; + +typedef enum PDMACFILEREQTYPEDELAY +{ + PDMACFILEREQTYPEDELAY_ANY = 0, + PDMACFILEREQTYPEDELAY_READ, + PDMACFILEREQTYPEDELAY_WRITE, + PDMACFILEREQTYPEDELAY_FLUSH, + PDMACFILEREQTYPEDELAY_32BIT_HACK = 0x7fffffff +} PDMACFILEREQTYPEDELAY; + +/** + * Data for the file endpoint. + */ +typedef struct PDMASYNCCOMPLETIONENDPOINTFILE +{ + /** Common data. */ + PDMASYNCCOMPLETIONENDPOINT Core; + /** Current state of the endpoint. */ + PDMASYNCCOMPLETIONENDPOINTFILESTATE enmState; + /** The backend to use for this endpoint. */ + PDMACFILEEPBACKEND enmBackendType; + /** async I/O manager this endpoint is assigned to. */ + R3PTRTYPE(volatile PPDMACEPFILEMGR) pAioMgr; + /** Flags for opening the file. */ + unsigned fFlags; + /** File handle. */ + RTFILE hFile; + /** Real size of the file. Only updated if data is appended. */ + volatile uint64_t cbFile; + /** List of new tasks. */ + R3PTRTYPE(volatile PPDMACTASKFILE) pTasksNewHead; + + /** Head of the small cache for allocated task segments for exclusive + * use by this endpoint. */ + R3PTRTYPE(volatile PPDMACTASKFILE) pTasksFreeHead; + /** Tail of the small cache for allocated task segments for exclusive + * use by this endpoint. */ + R3PTRTYPE(volatile PPDMACTASKFILE) pTasksFreeTail; + /** Number of elements in the cache. */ + volatile uint32_t cTasksCached; + + /** Flag whether a flush request is currently active */ + PPDMACTASKFILE pFlushReq; + +#ifdef VBOX_WITH_STATISTICS + /** Time spend in a read. */ + STAMPROFILEADV StatRead; + /** Time spend in a write. */ + STAMPROFILEADV StatWrite; +#endif + + /** Event semaphore for blocking external events. + * The caller waits on it until the async I/O manager + * finished processing the event. */ + RTSEMEVENT EventSemBlock; + /** Flag whether caching is enabled for this file. */ + bool fCaching; + /** Flag whether the file was opened readonly. */ + bool fReadonly; + /** Flag whether the host supports the async flush API. */ + bool fAsyncFlushSupported; +#ifdef VBOX_WITH_DEBUGGER + /** Status code to inject for the next complete read. */ + volatile int rcReqRead; + /** Status code to inject for the next complete write. */ + volatile int rcReqWrite; +#endif +#ifdef PDM_ASYNC_COMPLETION_FILE_WITH_DELAY + /** Request delay. */ + volatile uint32_t msDelay; + /** Request delay jitter. */ + volatile uint32_t msJitter; + /** Number of requests to delay. */ + volatile uint32_t cReqsDelay; + /** Task type to delay. */ + PDMACFILEREQTYPEDELAY enmTypeDelay; + /** The current task which gets delayed. */ + PPDMASYNCCOMPLETIONTASKFILE pDelayedHead; +#endif + /** Flag whether a blocking event is pending and needs + * processing by the I/O manager. */ + bool fBlockingEventPending; + /** Blocking event type */ + PDMACEPFILEBLOCKINGEVENT enmBlockingEvent; + + /** Additional data needed for the event types. */ + union + { + /** Cancelation event. */ + struct + { + /** The task to cancel. */ + PPDMACTASKFILE pTask; + } Cancel; + } BlockingEventData; + /** Data for exclusive use by the assigned async I/O manager. */ + struct + { + /** Pointer to the next endpoint assigned to the manager. */ + R3PTRTYPE(PPDMASYNCCOMPLETIONENDPOINTFILE) pEndpointNext; + /** Pointer to the previous endpoint assigned to the manager. */ + R3PTRTYPE(PPDMASYNCCOMPLETIONENDPOINTFILE) pEndpointPrev; + /** List of pending requests (not submitted due to usage restrictions + * or a pending flush request) */ + R3PTRTYPE(PPDMACTASKFILE) pReqsPendingHead; + /** Tail of pending requests. */ + R3PTRTYPE(PPDMACTASKFILE) pReqsPendingTail; + /** Tree of currently locked ranges. + * If a write task is enqueued the range gets locked and any other + * task writing to that range has to wait until the task completes. + */ + PAVLRFOFFTREE pTreeRangesLocked; + /** Number of requests with a range lock active. */ + unsigned cLockedReqsActive; + /** Number of requests currently being processed for this endpoint + * (excluded flush requests). */ + unsigned cRequestsActive; + /** Number of requests processed during the last second. */ + unsigned cReqsPerSec; + /** Current number of processed requests for the current update period. */ + unsigned cReqsProcessed; + /** Flag whether the endpoint is about to be moved to another manager. */ + bool fMoving; + /** Destination I/O manager. */ + PPDMACEPFILEMGR pAioMgrDst; + } AioMgr; +} PDMASYNCCOMPLETIONENDPOINTFILE; +/** Pointer to the endpoint class data. */ +typedef PDMASYNCCOMPLETIONENDPOINTFILE *PPDMASYNCCOMPLETIONENDPOINTFILE; +#ifdef VBOX_WITH_STATISTICS +AssertCompileMemberAlignment(PDMASYNCCOMPLETIONENDPOINTFILE, StatRead, sizeof(uint64_t)); +#endif + +/** Request completion function */ +typedef DECLCALLBACK(void) FNPDMACTASKCOMPLETED(PPDMACTASKFILE pTask, void *pvUser, int rc); +/** Pointer to a request completion function. */ +typedef FNPDMACTASKCOMPLETED *PFNPDMACTASKCOMPLETED; + +/** + * Transfer type. + */ +typedef enum PDMACTASKFILETRANSFER +{ + /** Invalid. */ + PDMACTASKFILETRANSFER_INVALID = 0, + /** Read transfer. */ + PDMACTASKFILETRANSFER_READ, + /** Write transfer. */ + PDMACTASKFILETRANSFER_WRITE, + /** Flush transfer. */ + PDMACTASKFILETRANSFER_FLUSH +} PDMACTASKFILETRANSFER; + +/** + * Data of a request. + */ +typedef struct PDMACTASKFILE +{ + /** Pointer to the range lock we are waiting for */ + PPDMACFILERANGELOCK pRangeLock; + /** Next task in the list. (Depending on the state) */ + struct PDMACTASKFILE *pNext; + /** Endpoint */ + PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint; + /** Transfer type. */ + PDMACTASKFILETRANSFER enmTransferType; + /** Start offset */ + RTFOFF Off; + /** Amount of data transfered so far. */ + size_t cbTransfered; + /** Data segment. */ + RTSGSEG DataSeg; + /** When non-zero the segment uses a bounce buffer because the provided buffer + * doesn't meet host requirements. */ + size_t cbBounceBuffer; + /** Pointer to the used bounce buffer if any. */ + void *pvBounceBuffer; + /** Start offset in the bounce buffer to copy from. */ + uint32_t offBounceBuffer; + /** Flag whether this is a prefetch request. */ + bool fPrefetch; + /** Already prepared native I/O request. + * Used if the request is prepared already but + * was not queued because the host has not enough + * resources. */ + RTFILEAIOREQ hReq; + /** Completion function to call on completion. */ + PFNPDMACTASKCOMPLETED pfnCompleted; + /** User data */ + void *pvUser; +} PDMACTASKFILE; + +/** + * Per task data. + */ +typedef struct PDMASYNCCOMPLETIONTASKFILE +{ + /** Common data. */ + PDMASYNCCOMPLETIONTASK Core; + /** Number of bytes to transfer until this task completes. */ + volatile int32_t cbTransferLeft; + /** Flag whether the task completed. */ + volatile bool fCompleted; + /** Return code. */ + volatile int rc; +#ifdef PDM_ASYNC_COMPLETION_FILE_WITH_DELAY + volatile PPDMASYNCCOMPLETIONTASKFILE pDelayedNext; + /** Timestamp when the delay expires. */ + uint64_t tsDelayEnd; +#endif +} PDMASYNCCOMPLETIONTASKFILE; + +DECLCALLBACK(int) pdmacFileAioMgrFailsafe(RTTHREAD hThreadSelf, void *pvUser); +DECLCALLBACK(int) pdmacFileAioMgrNormal(RTTHREAD hThreadSelf, void *pvUser); + +int pdmacFileAioMgrNormalInit(PPDMACEPFILEMGR pAioMgr); +void pdmacFileAioMgrNormalDestroy(PPDMACEPFILEMGR pAioMgr); + +int pdmacFileAioMgrCreate(PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass, PPPDMACEPFILEMGR ppAioMgr, PDMACEPFILEMGRTYPE enmMgrType); + +int pdmacFileAioMgrAddEndpoint(PPDMACEPFILEMGR pAioMgr, PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint); + +PPDMACTASKFILE pdmacFileEpGetNewTasks(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint); +PPDMACTASKFILE pdmacFileTaskAlloc(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint); +void pdmacFileTaskFree(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, + PPDMACTASKFILE pTask); + +int pdmacFileEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask); + +int pdmacFileCacheInit(PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile, PCFGMNODE pCfgNode); +void pdmacFileCacheDestroy(PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile); +int pdmacFileEpCacheInit(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile); +void pdmacFileEpCacheDestroy(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint); + +int pdmacFileEpCacheRead(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask, + RTFOFF off, PCRTSGSEG paSegments, size_t cSegments, + size_t cbRead); +int pdmacFileEpCacheWrite(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask, + RTFOFF off, PCRTSGSEG paSegments, size_t cSegments, + size_t cbWrite); +int pdmacFileEpCacheFlush(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint); + +RT_C_DECLS_END + +#endif /* !VMM_INCLUDED_SRC_include_PDMAsyncCompletionFileInternal_h */ + diff --git a/src/VBox/VMM/include/PDMAsyncCompletionInternal.h b/src/VBox/VMM/include/PDMAsyncCompletionInternal.h new file mode 100644 index 00000000..2e2669bf --- /dev/null +++ b/src/VBox/VMM/include/PDMAsyncCompletionInternal.h @@ -0,0 +1,281 @@ +/* $Id: PDMAsyncCompletionInternal.h $ */ +/** @file + * PDM - Pluggable Device Manager, Async I/O Completion internal header. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_PDMAsyncCompletionInternal_h +#define VMM_INCLUDED_SRC_include_PDMAsyncCompletionInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <iprt/critsect.h> +#include <iprt/memcache.h> +#include <iprt/sg.h> +#include <VBox/types.h> +#include <VBox/vmm/cfgm.h> +#include <VBox/vmm/stam.h> +#include <VBox/vmm/pdmasynccompletion.h> +#include "PDMInternal.h" + +RT_C_DECLS_BEGIN + + +/** + * PDM Async completion endpoint operations. + */ +typedef struct PDMASYNCCOMPLETIONEPCLASSOPS +{ + /** Version identifier. */ + uint32_t u32Version; + /** Name of the endpoint class. */ + const char *pszName; + /** Class type. */ + PDMASYNCCOMPLETIONEPCLASSTYPE enmClassType; + /** Size of the global endpoint class data in bytes. */ + size_t cbEndpointClassGlobal; + /** Size of an endpoint in bytes. */ + size_t cbEndpoint; + /** size of a task in bytes. */ + size_t cbTask; + + /** + * Initializes the global data for a endpoint class. + * + * @returns VBox status code. + * @param pClassGlobals Pointer to the uninitialized globals data. + * @param pCfgNode Node for querying configuration data. + */ + DECLR3CALLBACKMEMBER(int, pfnInitialize, (PPDMASYNCCOMPLETIONEPCLASS pClassGlobals, PCFGMNODE pCfgNode)); + + /** + * Frees all allocated resources which were allocated during init. + * + * @returns VBox status code. + * @param pClassGlobals Pointer to the globals data. + */ + DECLR3CALLBACKMEMBER(void, pfnTerminate, (PPDMASYNCCOMPLETIONEPCLASS pClassGlobals)); + + /** + * Initializes a given endpoint. + * + * @returns VBox status code. + * @param pEndpoint Pointer to the uninitialized endpoint. + * @param pszUri Pointer to the string containing the endpoint + * destination (filename, IP address, ...) + * @param fFlags Creation flags. + */ + DECLR3CALLBACKMEMBER(int, pfnEpInitialize, (PPDMASYNCCOMPLETIONENDPOINT pEndpoint, + const char *pszUri, uint32_t fFlags)); + + /** + * Closes a endpoint finishing all tasks. + * + * @returns VBox status code. + * @param pEndpoint Pointer to the endpoint to be closed. + */ + DECLR3CALLBACKMEMBER(int, pfnEpClose, (PPDMASYNCCOMPLETIONENDPOINT pEndpoint)); + + /** + * Initiates a read request from the given endpoint. + * + * @returns VBox status code. + * @param pTask Pointer to the task object associated with the request. + * @param pEndpoint Endpoint the request is for. + * @param off Where to start reading from. + * @param paSegments Scatter gather list to store the data in. + * @param cSegments Number of segments in the list. + * @param cbRead The overall number of bytes to read. + */ + DECLR3CALLBACKMEMBER(int, pfnEpRead, (PPDMASYNCCOMPLETIONTASK pTask, + PPDMASYNCCOMPLETIONENDPOINT pEndpoint, RTFOFF off, + PCRTSGSEG paSegments, size_t cSegments, + size_t cbRead)); + + /** + * Initiates a write request to the given endpoint. + * + * @returns VBox status code. + * @param pTask Pointer to the task object associated with the request. + * @param pEndpoint Endpoint the request is for. + * @param off Where to start writing to. + * @param paSegments Scatter gather list to store the data in. + * @param cSegments Number of segments in the list. + * @param cbRead The overall number of bytes to write. + */ + DECLR3CALLBACKMEMBER(int, pfnEpWrite, (PPDMASYNCCOMPLETIONTASK pTask, + PPDMASYNCCOMPLETIONENDPOINT pEndpoint, RTFOFF off, + PCRTSGSEG paSegments, size_t cSegments, + size_t cbWrite)); + + /** + * Initiates a flush request on the given endpoint. + * + * @returns VBox status code. + * @param pTask Pointer to the task object associated with the request. + * @param pEndpoint Endpoint the request is for. + */ + DECLR3CALLBACKMEMBER(int, pfnEpFlush, (PPDMASYNCCOMPLETIONTASK pTask, + PPDMASYNCCOMPLETIONENDPOINT pEndpoint)); + + /** + * Queries the size of the endpoint. Optional. + * + * @returns VBox status code. + * @param pEndpoint Endpoint the request is for. + * @param pcbSize Where to store the size of the endpoint. + */ + DECLR3CALLBACKMEMBER(int, pfnEpGetSize, (PPDMASYNCCOMPLETIONENDPOINT pEndpoint, + uint64_t *pcbSize)); + + /** + * Sets the size of the endpoint. Optional. + * This is a synchronous operation. + * + * + * @returns VBox status code. + * @param pEndpoint Endpoint the request is for. + * @param cbSize New size for the endpoint. + */ + DECLR3CALLBACKMEMBER(int, pfnEpSetSize, (PPDMASYNCCOMPLETIONENDPOINT pEndpoint, + uint64_t cbSize)); + + /** Initialization safety marker. */ + uint32_t u32VersionEnd; +} PDMASYNCCOMPLETIONEPCLASSOPS; +/** Pointer to a async completion endpoint class operation table. */ +typedef PDMASYNCCOMPLETIONEPCLASSOPS *PPDMASYNCCOMPLETIONEPCLASSOPS; +/** Const pointer to a async completion endpoint class operation table. */ +typedef const PDMASYNCCOMPLETIONEPCLASSOPS *PCPDMASYNCCOMPLETIONEPCLASSOPS; + +/** Version for the endpoint class operations structure. */ +#define PDMAC_EPCLASS_OPS_VERSION 0x00000001 + +/** Pointer to a bandwidth control manager. */ +typedef struct PDMACBWMGR *PPDMACBWMGR; + +/** + * PDM Async completion endpoint class. + * Common data. + */ +typedef struct PDMASYNCCOMPLETIONEPCLASS +{ + /** Pointer to the VM. */ + PVM pVM; + /** Critical section protecting the lists below. */ + RTCRITSECT CritSect; + /** Number of endpoints in the list. */ + volatile unsigned cEndpoints; + /** Head of endpoints with this class. */ + R3PTRTYPE(PPDMASYNCCOMPLETIONENDPOINT) pEndpointsHead; + /** Head of the bandwidth managers for this class. */ + R3PTRTYPE(PPDMACBWMGR) pBwMgrsHead; + /** Pointer to the callback table. */ + R3PTRTYPE(PCPDMASYNCCOMPLETIONEPCLASSOPS) pEndpointOps; + /** Task cache. */ + RTMEMCACHE hMemCacheTasks; + /** Flag whether to gather advanced statistics about requests. */ + bool fGatherAdvancedStatistics; +} PDMASYNCCOMPLETIONEPCLASS; +/** Pointer to the PDM async completion endpoint class data. */ +typedef PDMASYNCCOMPLETIONEPCLASS *PPDMASYNCCOMPLETIONEPCLASS; + +/** + * A PDM Async completion endpoint. + * Common data. + */ +typedef struct PDMASYNCCOMPLETIONENDPOINT +{ + /** Next endpoint in the list. */ + R3PTRTYPE(PPDMASYNCCOMPLETIONENDPOINT) pNext; + /** Previous endpoint in the list. */ + R3PTRTYPE(PPDMASYNCCOMPLETIONENDPOINT) pPrev; + /** Pointer to the class this endpoint belongs to. */ + R3PTRTYPE(PPDMASYNCCOMPLETIONEPCLASS) pEpClass; + /** Template associated with this endpoint. */ + PPDMASYNCCOMPLETIONTEMPLATE pTemplate; + /** Statistics ID for endpoints having a similar URI (same filename for example) + * to avoid assertions. */ + unsigned iStatId; + /** URI describing the endpoint */ + char *pszUri; + /** Pointer to the assigned bandwidth manager. */ + volatile PPDMACBWMGR pBwMgr; + /** Aligns following statistic counters on a 8 byte boundary. */ + uint32_t u32Alignment; + /** @name Request size statistics. + * @{ */ + STAMCOUNTER StatReqSizeSmaller512; + STAMCOUNTER StatReqSize512To1K; + STAMCOUNTER StatReqSize1KTo2K; + STAMCOUNTER StatReqSize2KTo4K; + STAMCOUNTER StatReqSize4KTo8K; + STAMCOUNTER StatReqSize8KTo16K; + STAMCOUNTER StatReqSize16KTo32K; + STAMCOUNTER StatReqSize32KTo64K; + STAMCOUNTER StatReqSize64KTo128K; + STAMCOUNTER StatReqSize128KTo256K; + STAMCOUNTER StatReqSize256KTo512K; + STAMCOUNTER StatReqSizeOver512K; + STAMCOUNTER StatReqsUnaligned512; + STAMCOUNTER StatReqsUnaligned4K; + STAMCOUNTER StatReqsUnaligned8K; + /** @} */ + /** @name Request completion time statistics. + * @{ */ + STAMCOUNTER StatTaskRunTimesNs[10]; + STAMCOUNTER StatTaskRunTimesUs[10]; + STAMCOUNTER StatTaskRunTimesMs[10]; + STAMCOUNTER StatTaskRunTimesSec[10]; + STAMCOUNTER StatTaskRunOver100Sec; + STAMCOUNTER StatIoOpsPerSec; + STAMCOUNTER StatIoOpsStarted; + STAMCOUNTER StatIoOpsCompleted; + uint64_t tsIntervalStartMs; + uint64_t cIoOpsCompleted; + /** @} */ +} PDMASYNCCOMPLETIONENDPOINT; +AssertCompileMemberAlignment(PDMASYNCCOMPLETIONENDPOINT, StatReqSizeSmaller512, sizeof(uint64_t)); +AssertCompileMemberAlignment(PDMASYNCCOMPLETIONENDPOINT, StatTaskRunTimesNs, sizeof(uint64_t)); + +/** + * A PDM async completion task handle. + * Common data. + */ +typedef struct PDMASYNCCOMPLETIONTASK +{ + /** Next task in the list + * (for free and assigned tasks). */ + R3PTRTYPE(PPDMASYNCCOMPLETIONTASK) pNext; + /** Previous task in the list + * (for free and assigned tasks). */ + R3PTRTYPE(PPDMASYNCCOMPLETIONTASK) pPrev; + /** Endpoint this task is assigned to. */ + R3PTRTYPE(PPDMASYNCCOMPLETIONENDPOINT) pEndpoint; + /** Opaque user data for this task. */ + void *pvUser; + /** Start timestamp. */ + uint64_t tsNsStart; +} PDMASYNCCOMPLETIONTASK; + +void pdmR3AsyncCompletionCompleteTask(PPDMASYNCCOMPLETIONTASK pTask, int rc, bool fCallCompletionHandler); +bool pdmacEpIsTransferAllowed(PPDMASYNCCOMPLETIONENDPOINT pEndpoint, uint32_t cbTransfer, RTMSINTERVAL *pmsWhenNext); + +RT_C_DECLS_END + +extern const PDMASYNCCOMPLETIONEPCLASSOPS g_PDMAsyncCompletionEndpointClassFile; + +#endif /* !VMM_INCLUDED_SRC_include_PDMAsyncCompletionInternal_h */ + diff --git a/src/VBox/VMM/include/PDMBlkCacheInternal.h b/src/VBox/VMM/include/PDMBlkCacheInternal.h new file mode 100644 index 00000000..6dcbbbfd --- /dev/null +++ b/src/VBox/VMM/include/PDMBlkCacheInternal.h @@ -0,0 +1,334 @@ +/* $Id: PDMBlkCacheInternal.h $ */ +/** @file + * PDM Block Cache. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_PDMBlkCacheInternal_h +#define VMM_INCLUDED_SRC_include_PDMBlkCacheInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <VBox/vmm/cfgm.h> +#include <VBox/vmm/stam.h> +#include <VBox/vmm/tm.h> +#include <VBox/vmm/pdmblkcache.h> +#include <iprt/types.h> +#include <iprt/file.h> +#include <iprt/thread.h> +#include <iprt/semaphore.h> +#include <iprt/critsect.h> +#include <iprt/avl.h> +#include <iprt/list.h> +#include <iprt/spinlock.h> +#include <iprt/memcache.h> + +RT_C_DECLS_BEGIN + +/** + * A few forward declarations. + */ +/** Pointer to a cache LRU list. */ +typedef struct PDMBLKLRULIST *PPDMBLKLRULIST; +/** Pointer to the global cache structure. */ +typedef struct PDMBLKCACHEGLOBAL *PPDMBLKCACHEGLOBAL; +/** Pointer to a cache entry waiter structure. */ +typedef struct PDMBLKCACHEWAITER *PPDMBLKCACHEWAITER; + +/** + * A cache entry + */ +typedef struct PDMBLKCACHEENTRY +{ + /** The AVL entry data. */ + AVLRU64NODECORE Core; + /** Pointer to the previous element. Used in one of the LRU lists.*/ + struct PDMBLKCACHEENTRY *pPrev; + /** Pointer to the next element. Used in one of the LRU lists.*/ + struct PDMBLKCACHEENTRY *pNext; + /** Pointer to the list the entry is in. */ + PPDMBLKLRULIST pList; + /** Cache the entry belongs to. */ + PPDMBLKCACHE pBlkCache; + /** Flags for this entry. Combinations of PDMACFILECACHE_* \#defines */ + volatile uint32_t fFlags; + /** Reference counter. Prevents eviction of the entry if > 0. */ + volatile uint32_t cRefs; + /** Size of the entry. */ + uint32_t cbData; + /** Pointer to the memory containing the data. */ + uint8_t *pbData; + /** Head of list of tasks waiting for this one to finish. */ + PPDMBLKCACHEWAITER pWaitingHead; + /** Tail of list of tasks waiting for this one to finish. */ + PPDMBLKCACHEWAITER pWaitingTail; + /** Node for dirty but not yet committed entries list per endpoint. */ + RTLISTNODE NodeNotCommitted; +} PDMBLKCACHEENTRY, *PPDMBLKCACHEENTRY; +/** I/O is still in progress for this entry. This entry is not evictable. */ +#define PDMBLKCACHE_ENTRY_IO_IN_PROGRESS RT_BIT(0) +/** Entry is locked and thus not evictable. */ +#define PDMBLKCACHE_ENTRY_LOCKED RT_BIT(1) +/** Entry is dirty */ +#define PDMBLKCACHE_ENTRY_IS_DIRTY RT_BIT(2) +/** Entry is not evictable. */ +#define PDMBLKCACHE_NOT_EVICTABLE (PDMBLKCACHE_ENTRY_LOCKED | PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY) + +/** + * LRU list data + */ +typedef struct PDMBLKLRULIST +{ + /** Head of the list. */ + PPDMBLKCACHEENTRY pHead; + /** Tail of the list. */ + PPDMBLKCACHEENTRY pTail; + /** Number of bytes cached in the list. */ + uint32_t cbCached; +} PDMBLKLRULIST; + +/** + * Global cache data. + */ +typedef struct PDMBLKCACHEGLOBAL +{ + /** Pointer to the owning VM instance. */ + PVM pVM; + /** Maximum size of the cache in bytes. */ + uint32_t cbMax; + /** Current size of the cache in bytes. */ + uint32_t cbCached; + /** Critical section protecting the cache. */ + RTCRITSECT CritSect; + /** Maximum number of bytes cached. */ + uint32_t cbRecentlyUsedInMax; + /** Maximum number of bytes in the paged out list .*/ + uint32_t cbRecentlyUsedOutMax; + /** Recently used cache entries list */ + PDMBLKLRULIST LruRecentlyUsedIn; + /** Scorecard cache entry list. */ + PDMBLKLRULIST LruRecentlyUsedOut; + /** List of frequently used cache entries */ + PDMBLKLRULIST LruFrequentlyUsed; + /** Commit timeout in milli seconds */ + uint32_t u32CommitTimeoutMs; + /** Number of dirty bytes needed to start a commit of the data to the disk. */ + uint32_t cbCommitDirtyThreshold; + /** Current number of dirty bytes in the cache. */ + volatile uint32_t cbDirty; + /** Flag whether the VM was suspended becaus of an I/O error. */ + volatile bool fIoErrorVmSuspended; + /** Flag whether a commit is currently in progress. */ + volatile bool fCommitInProgress; + /** Commit interval timer */ + PTMTIMERR3 pTimerCommit; + /** Number of endpoints using the cache. */ + uint32_t cRefs; + /** List of all users of this cache. */ + RTLISTANCHOR ListUsers; +#ifdef VBOX_WITH_STATISTICS + /** Hit counter. */ + STAMCOUNTER cHits; + /** Partial hit counter. */ + STAMCOUNTER cPartialHits; + /** Miss counter. */ + STAMCOUNTER cMisses; + /** Bytes read from cache. */ + STAMCOUNTER StatRead; + /** Bytes written to the cache. */ + STAMCOUNTER StatWritten; + /** Time spend to get an entry in the AVL tree. */ + STAMPROFILEADV StatTreeGet; + /** Time spend to insert an entry in the AVL tree. */ + STAMPROFILEADV StatTreeInsert; + /** Time spend to remove an entry in the AVL tree. */ + STAMPROFILEADV StatTreeRemove; + /** Number of times a buffer could be reused. */ + STAMCOUNTER StatBuffersReused; +#endif +} PDMBLKCACHEGLOBAL; +#ifdef VBOX_WITH_STATISTICS +AssertCompileMemberAlignment(PDMBLKCACHEGLOBAL, cHits, sizeof(uint64_t)); +#endif + +/** + * Block cache type. + */ +typedef enum PDMBLKCACHETYPE +{ + /** Device . */ + PDMBLKCACHETYPE_DEV = 1, + /** Driver consumer. */ + PDMBLKCACHETYPE_DRV, + /** Internal consumer. */ + PDMBLKCACHETYPE_INTERNAL, + /** Usb consumer. */ + PDMBLKCACHETYPE_USB +} PDMBLKCACHETYPE; + +/** + * Per user cache data. + */ +typedef struct PDMBLKCACHE +{ + /** Pointer to the id for the cache. */ + char *pszId; + /** AVL tree managing cache entries. */ + PAVLRU64TREE pTree; + /** R/W semaphore protecting cached entries for this endpoint. */ + RTSEMRW SemRWEntries; + /** Pointer to the gobal cache data */ + PPDMBLKCACHEGLOBAL pCache; + /** Lock protecting the dirty entries list. */ + RTSPINLOCK LockList; + /** List of dirty but not committed entries for this endpoint. */ + RTLISTANCHOR ListDirtyNotCommitted; + /** Node of the cache user list. */ + RTLISTNODE NodeCacheUser; + /** Block cache type. */ + PDMBLKCACHETYPE enmType; + /** Type specific data. */ + union + { + /** PDMASYNCCOMPLETIONTEMPLATETYPE_DEV */ + struct + { + /** Pointer to the device instance owning the block cache. */ + R3PTRTYPE(PPDMDEVINS) pDevIns; + /** Complete callback to the user. */ + R3PTRTYPE(PFNPDMBLKCACHEXFERCOMPLETEDEV) pfnXferComplete; + /** I/O enqueue callback. */ + R3PTRTYPE(PFNPDMBLKCACHEXFERENQUEUEDEV) pfnXferEnqueue; + /** Discard enqueue callback. */ + R3PTRTYPE(PFNPDMBLKCACHEXFERENQUEUEDISCARDDEV) pfnXferEnqueueDiscard; + } Dev; + /** PDMASYNCCOMPLETIONTEMPLATETYPE_DRV */ + struct + { + /** Pointer to the driver instance owning the block cache. */ + R3PTRTYPE(PPDMDRVINS) pDrvIns; + /** Complete callback to the user. */ + R3PTRTYPE(PFNPDMBLKCACHEXFERCOMPLETEDRV) pfnXferComplete; + /** I/O enqueue callback. */ + R3PTRTYPE(PFNPDMBLKCACHEXFERENQUEUEDRV) pfnXferEnqueue; + /** Discard enqueue callback. */ + R3PTRTYPE(PFNPDMBLKCACHEXFERENQUEUEDISCARDDRV) pfnXferEnqueueDiscard; + } Drv; + /** PDMASYNCCOMPLETIONTEMPLATETYPE_INTERNAL */ + struct + { + /** Pointer to user data. */ + R3PTRTYPE(void *) pvUser; + /** Complete callback to the user. */ + R3PTRTYPE(PFNPDMBLKCACHEXFERCOMPLETEINT) pfnXferComplete; + /** I/O enqueue callback. */ + R3PTRTYPE(PFNPDMBLKCACHEXFERENQUEUEINT) pfnXferEnqueue; + /** Discard enqueue callback. */ + R3PTRTYPE(PFNPDMBLKCACHEXFERENQUEUEDISCARDINT) pfnXferEnqueueDiscard; + } Int; + /** PDMASYNCCOMPLETIONTEMPLATETYPE_USB */ + struct + { + /** Pointer to the usb instance owning the template. */ + R3PTRTYPE(PPDMUSBINS) pUsbIns; + /** Complete callback to the user. */ + R3PTRTYPE(PFNPDMBLKCACHEXFERCOMPLETEUSB) pfnXferComplete; + /** I/O enqueue callback. */ + R3PTRTYPE(PFNPDMBLKCACHEXFERENQUEUEUSB) pfnXferEnqueue; + /** Discard enqueue callback. */ + R3PTRTYPE(PFNPDMBLKCACHEXFERENQUEUEDISCARDUSB) pfnXferEnqueueDiscard; + } Usb; + } u; + +#ifdef VBOX_WITH_STATISTICS + +#if HC_ARCH_BITS == 64 + uint32_t u32Alignment; +#endif + /** Number of times a write was deferred because the cache entry was still in progress */ + STAMCOUNTER StatWriteDeferred; + /** Number appended cache entries. */ + STAMCOUNTER StatAppendedWrites; +#endif + + /** Flag whether the cache was suspended. */ + volatile bool fSuspended; + /** Number of outstanding I/O transfers. */ + volatile uint32_t cIoXfersActive; + +} PDMBLKCACHE, *PPDMBLKCACHE; +#ifdef VBOX_WITH_STATISTICS +AssertCompileMemberAlignment(PDMBLKCACHE, StatWriteDeferred, sizeof(uint64_t)); +#endif + +/** + * I/O task. + */ +typedef struct PDMBLKCACHEREQ +{ + /** Opaque user data returned on completion. */ + void *pvUser; + /** Number of pending transfers (waiting for a cache entry and passed through). */ + volatile uint32_t cXfersPending; + /** Status code. */ + volatile int rcReq; +} PDMBLKCACHEREQ, *PPDMBLKCACHEREQ; + +/** + * I/O transfer from the cache to the underlying medium. + */ +typedef struct PDMBLKCACHEIOXFER +{ + /** Flag whether the I/O xfer updates a cache entry or updates the request directly. */ + bool fIoCache; + /** Type dependent data. */ + union + { + /** Pointer to the entry the transfer updates. */ + PPDMBLKCACHEENTRY pEntry; + /** Pointer to the request the transfer updates. */ + PPDMBLKCACHEREQ pReq; + }; + /** Transfer direction. */ + PDMBLKCACHEXFERDIR enmXferDir; + /** Segment used if a cache entry is updated. */ + RTSGSEG SgSeg; + /** S/G buffer. */ + RTSGBUF SgBuf; +} PDMBLKCACHEIOXFER; + +/** + * Cache waiter + */ +typedef struct PDMBLKCACHEWAITER +{ + /* Next waiter in the list. */ + struct PDMBLKCACHEWAITER *pNext; + /** S/G buffer holding or receiving data. */ + RTSGBUF SgBuf; + /** Offset into the cache entry to start the transfer. */ + uint32_t offCacheEntry; + /** How many bytes to transfer. */ + size_t cbTransfer; + /** Flag whether the task wants to read or write into the entry. */ + bool fWrite; + /** Task the waiter is for. */ + PPDMBLKCACHEREQ pReq; +} PDMBLKCACHEWAITER; + +RT_C_DECLS_END + +#endif /* !VMM_INCLUDED_SRC_include_PDMBlkCacheInternal_h */ + diff --git a/src/VBox/VMM/include/PDMInline.h b/src/VBox/VMM/include/PDMInline.h new file mode 100644 index 00000000..abf1b6b8 --- /dev/null +++ b/src/VBox/VMM/include/PDMInline.h @@ -0,0 +1,42 @@ +/* $Id: PDMInline.h $ */ +/** @file + * PDM - Internal header file containing the inlined functions. + */ + +/* + * Copyright (C) 2012-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_PDMInline_h +#define VMM_INCLUDED_SRC_include_PDMInline_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + + +/** + * Calculates the next IRQ tag. + * + * @returns IRQ tag. + * @param pVM The cross context VM structure. + * @param idTracer The ID of the source device. + */ +DECLINLINE(uint32_t) pdmCalcIrqTag(PVM pVM, uint32_t idTracer) +{ + uint32_t uTag = (pVM->pdm.s.uIrqTag + 1) & 0x3ff; /* {0..1023} */ + if (!uTag) + uTag++; + pVM->pdm.s.uIrqTag = uTag |= (idTracer << 16); + return uTag; +} + +#endif /* !VMM_INCLUDED_SRC_include_PDMInline_h */ + diff --git a/src/VBox/VMM/include/PDMInternal.h b/src/VBox/VMM/include/PDMInternal.h new file mode 100644 index 00000000..25350b82 --- /dev/null +++ b/src/VBox/VMM/include/PDMInternal.h @@ -0,0 +1,1538 @@ +/* $Id: PDMInternal.h $ */ +/** @file + * PDM - Internal header file. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_PDMInternal_h +#define VMM_INCLUDED_SRC_include_PDMInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <VBox/types.h> +#include <VBox/param.h> +#include <VBox/vmm/cfgm.h> +#include <VBox/vmm/stam.h> +#include <VBox/vusb.h> +#include <VBox/vmm/pdmasynccompletion.h> +#ifdef VBOX_WITH_NETSHAPER +# include <VBox/vmm/pdmnetshaper.h> +#endif +#ifdef VBOX_WITH_PDM_ASYNC_COMPLETION +# include <VBox/vmm/pdmasynccompletion.h> +#endif +#include <VBox/vmm/pdmblkcache.h> +#include <VBox/vmm/pdmcommon.h> +#include <VBox/vmm/pdmtask.h> +#include <VBox/sup.h> +#include <iprt/assert.h> +#include <iprt/critsect.h> +#ifdef IN_RING3 +# include <iprt/thread.h> +#endif + +RT_C_DECLS_BEGIN + + +/** @defgroup grp_pdm_int Internal + * @ingroup grp_pdm + * @internal + * @{ + */ + +/** @def PDM_WITH_R3R0_CRIT_SECT + * Enables or disabled ring-3/ring-0 critical sections. */ +#if defined(DOXYGEN_RUNNING) || 1 +# define PDM_WITH_R3R0_CRIT_SECT +#endif + +/** @def PDMCRITSECT_STRICT + * Enables/disables PDM critsect strictness like deadlock detection. */ +#if (defined(RT_LOCK_STRICT) && defined(IN_RING3) && !defined(PDMCRITSECT_STRICT)) \ + || defined(DOXYGEN_RUNNING) +# define PDMCRITSECT_STRICT +#endif + +/** @def PDMCRITSECT_STRICT + * Enables/disables PDM read/write critsect strictness like deadlock + * detection. */ +#if (defined(RT_LOCK_STRICT) && defined(IN_RING3) && !defined(PDMCRITSECTRW_STRICT)) \ + || defined(DOXYGEN_RUNNING) +# define PDMCRITSECTRW_STRICT +#endif + +/** The maximum device instance (total) size, ring-0/raw-mode capable devices. */ +#define PDM_MAX_DEVICE_INSTANCE_SIZE _4M +/** The maximum device instance (total) size, ring-3 only devices. */ +#define PDM_MAX_DEVICE_INSTANCE_SIZE_R3 _8M + + + +/******************************************************************************* +* Structures and Typedefs * +*******************************************************************************/ + +/** Pointer to a PDM Device. */ +typedef struct PDMDEV *PPDMDEV; +/** Pointer to a pointer to a PDM Device. */ +typedef PPDMDEV *PPPDMDEV; + +/** Pointer to a PDM USB Device. */ +typedef struct PDMUSB *PPDMUSB; +/** Pointer to a pointer to a PDM USB Device. */ +typedef PPDMUSB *PPPDMUSB; + +/** Pointer to a PDM Driver. */ +typedef struct PDMDRV *PPDMDRV; +/** Pointer to a pointer to a PDM Driver. */ +typedef PPDMDRV *PPPDMDRV; + +/** Pointer to a PDM Logical Unit. */ +typedef struct PDMLUN *PPDMLUN; +/** Pointer to a pointer to a PDM Logical Unit. */ +typedef PPDMLUN *PPPDMLUN; + +/** Pointer to a PDM PCI Bus instance. */ +typedef struct PDMPCIBUS *PPDMPCIBUS; +/** Pointer to a DMAC instance. */ +typedef struct PDMDMAC *PPDMDMAC; +/** Pointer to a RTC instance. */ +typedef struct PDMRTC *PPDMRTC; + +/** Pointer to an USB HUB registration record. */ +typedef struct PDMUSBHUB *PPDMUSBHUB; + +/** + * Supported asynchronous completion endpoint classes. + */ +typedef enum PDMASYNCCOMPLETIONEPCLASSTYPE +{ + /** File class. */ + PDMASYNCCOMPLETIONEPCLASSTYPE_FILE = 0, + /** Number of supported classes. */ + PDMASYNCCOMPLETIONEPCLASSTYPE_MAX, + /** 32bit hack. */ + PDMASYNCCOMPLETIONEPCLASSTYPE_32BIT_HACK = 0x7fffffff +} PDMASYNCCOMPLETIONEPCLASSTYPE; + +/** + * Private device instance data, ring-3. + */ +typedef struct PDMDEVINSINTR3 +{ + /** Pointer to the next instance. + * (Head is pointed to by PDM::pDevInstances.) */ + R3PTRTYPE(PPDMDEVINS) pNextR3; + /** Pointer to the next per device instance. + * (Head is pointed to by PDMDEV::pInstances.) */ + R3PTRTYPE(PPDMDEVINS) pPerDeviceNextR3; + /** Pointer to device structure. */ + R3PTRTYPE(PPDMDEV) pDevR3; + /** Pointer to the list of logical units associated with the device. (FIFO) */ + R3PTRTYPE(PPDMLUN) pLunsR3; + /** Pointer to the asynchronous notification callback set while in + * FNPDMDEVSUSPEND or FNPDMDEVPOWEROFF. */ + R3PTRTYPE(PFNPDMDEVASYNCNOTIFY) pfnAsyncNotify; + /** Configuration handle to the instance node. */ + R3PTRTYPE(PCFGMNODE) pCfgHandle; + + /** R3 pointer to the VM this instance was created for. */ + PVMR3 pVMR3; + + /** Flags, see PDMDEVINSINT_FLAGS_XXX. */ + uint32_t fIntFlags; + /** The last IRQ tag (for tracing it thru clearing). */ + uint32_t uLastIrqTag; + /** The ring-0 device index (for making ring-0 calls). */ + uint32_t idxR0Device; +} PDMDEVINSINTR3; + + +/** + * Private device instance data, ring-0. + */ +typedef struct PDMDEVINSINTR0 +{ + /** Pointer to the VM this instance was created for. */ + R0PTRTYPE(PGVM) pGVM; + /** Pointer to device structure. */ + R0PTRTYPE(struct PDMDEVREGR0 const *) pRegR0; + /** The ring-0 module reference. */ + RTR0PTR hMod; + /** Pointer to the ring-0 mapping of the ring-3 internal data (for uLastIrqTag). */ + R0PTRTYPE(PDMDEVINSINTR3 *) pIntR3R0; + /** Pointer to the ring-0 mapping of the ring-3 instance (for idTracing). */ + R0PTRTYPE(struct PDMDEVINSR3 *) pInsR3R0; + /** The device instance memory. */ + RTR0MEMOBJ hMemObj; + /** The ring-3 mapping object. */ + RTR0MEMOBJ hMapObj; + /** Index into PDMR0PERVM::apDevInstances. */ + uint32_t idxR0Device; +} PDMDEVINSINTR0; + + +/** + * Private device instance data, raw-mode + */ +typedef struct PDMDEVINSINTRC +{ + /** Pointer to the VM this instance was created for. */ + RGPTRTYPE(PVM) pVMRC; +} PDMDEVINSINTRC; + + +/** + * Private device instance data. + */ +typedef struct PDMDEVINSINT +{ + /** Pointer to the next instance (HC Ptr). + * (Head is pointed to by PDM::pDevInstances.) */ + R3PTRTYPE(PPDMDEVINS) pNextR3; + /** Pointer to the next per device instance (HC Ptr). + * (Head is pointed to by PDMDEV::pInstances.) */ + R3PTRTYPE(PPDMDEVINS) pPerDeviceNextR3; + /** Pointer to device structure - HC Ptr. */ + R3PTRTYPE(PPDMDEV) pDevR3; + /** Pointer to the list of logical units associated with the device. (FIFO) */ + R3PTRTYPE(PPDMLUN) pLunsR3; + /** Pointer to the asynchronous notification callback set while in + * FNPDMDEVSUSPEND or FNPDMDEVPOWEROFF. */ + R3PTRTYPE(PFNPDMDEVASYNCNOTIFY) pfnAsyncNotify; + /** Configuration handle to the instance node. */ + R3PTRTYPE(PCFGMNODE) pCfgHandle; + + /** R3 pointer to the VM this instance was created for. */ + PVMR3 pVMR3; + + /** R0 pointer to the VM this instance was created for. */ + R0PTRTYPE(PVMCC) pVMR0; + + /** RC pointer to the VM this instance was created for. */ + PVMRC pVMRC; + + /** Flags, see PDMDEVINSINT_FLAGS_XXX. */ + uint32_t fIntFlags; + /** The last IRQ tag (for tracing it thru clearing). */ + uint32_t uLastIrqTag; +} PDMDEVINSINT; + +/** @name PDMDEVINSINT::fIntFlags + * @{ */ +/** Used by pdmR3Load to mark device instances it found in the saved state. */ +#define PDMDEVINSINT_FLAGS_FOUND RT_BIT_32(0) +/** Indicates that the device hasn't been powered on or resumed. + * This is used by PDMR3PowerOn, PDMR3Resume, PDMR3Suspend and PDMR3PowerOff + * to make sure each device gets exactly one notification for each of those + * events. PDMR3Resume and PDMR3PowerOn also makes use of it to bail out on + * a failure (already resumed/powered-on devices are suspended). + * PDMR3PowerOff resets this flag once before going through the devices to make sure + * every device gets the power off notification even if it was suspended before with + * PDMR3Suspend. + */ +#define PDMDEVINSINT_FLAGS_SUSPENDED RT_BIT_32(1) +/** Indicates that the device has been reset already. Used by PDMR3Reset. */ +#define PDMDEVINSINT_FLAGS_RESET RT_BIT_32(2) +#define PDMDEVINSINT_FLAGS_R0_ENABLED RT_BIT_32(3) +#define PDMDEVINSINT_FLAGS_RC_ENABLED RT_BIT_32(4) +/** Set if we've called the ring-0 constructor. */ +#define PDMDEVINSINT_FLAGS_R0_CONTRUCT RT_BIT_32(5) +/** Set if using non-default critical section. */ +#define PDMDEVINSINT_FLAGS_CHANGED_CRITSECT RT_BIT_32(6) +/** @} */ + + +/** + * Private USB device instance data. + */ +typedef struct PDMUSBINSINT +{ + /** The UUID of this instance. */ + RTUUID Uuid; + /** Pointer to the next instance. + * (Head is pointed to by PDM::pUsbInstances.) */ + R3PTRTYPE(PPDMUSBINS) pNext; + /** Pointer to the next per USB device instance. + * (Head is pointed to by PDMUSB::pInstances.) */ + R3PTRTYPE(PPDMUSBINS) pPerDeviceNext; + + /** Pointer to device structure. */ + R3PTRTYPE(PPDMUSB) pUsbDev; + + /** Pointer to the VM this instance was created for. */ + PVMR3 pVM; + /** Pointer to the list of logical units associated with the device. (FIFO) */ + R3PTRTYPE(PPDMLUN) pLuns; + /** The per instance device configuration. */ + R3PTRTYPE(PCFGMNODE) pCfg; + /** Same as pCfg if the configuration should be deleted when detaching the device. */ + R3PTRTYPE(PCFGMNODE) pCfgDelete; + /** The global device configuration. */ + R3PTRTYPE(PCFGMNODE) pCfgGlobal; + + /** Pointer to the USB hub this device is attached to. + * This is NULL if the device isn't connected to any HUB. */ + R3PTRTYPE(PPDMUSBHUB) pHub; + /** The port number that we're connected to. */ + uint32_t iPort; + /** Indicates that the USB device hasn't been powered on or resumed. + * See PDMDEVINSINT_FLAGS_SUSPENDED. */ + bool fVMSuspended; + /** Indicates that the USB device has been reset. */ + bool fVMReset; + /** Pointer to the asynchronous notification callback set while in + * FNPDMDEVSUSPEND or FNPDMDEVPOWEROFF. */ + R3PTRTYPE(PFNPDMUSBASYNCNOTIFY) pfnAsyncNotify; +} PDMUSBINSINT; + + +/** + * Private driver instance data. + */ +typedef struct PDMDRVINSINT +{ + /** Pointer to the driver instance above. + * This is NULL for the topmost drive. */ + R3PTRTYPE(PPDMDRVINS) pUp; + /** Pointer to the driver instance below. + * This is NULL for the bottommost driver. */ + R3PTRTYPE(PPDMDRVINS) pDown; + /** Pointer to the logical unit this driver chained on. */ + R3PTRTYPE(PPDMLUN) pLun; + /** Pointer to driver structure from which this was instantiated. */ + R3PTRTYPE(PPDMDRV) pDrv; + /** Pointer to the VM this instance was created for, ring-3 context. */ + PVMR3 pVMR3; + /** Pointer to the VM this instance was created for, ring-0 context. */ + R0PTRTYPE(PVMCC) pVMR0; + /** Pointer to the VM this instance was created for, raw-mode context. */ + PVMRC pVMRC; + /** Flag indicating that the driver is being detached and destroyed. + * (Helps detect potential recursive detaching.) */ + bool fDetaching; + /** Indicates that the driver hasn't been powered on or resumed. + * See PDMDEVINSINT_FLAGS_SUSPENDED. */ + bool fVMSuspended; + /** Indicates that the driver has been reset already. */ + bool fVMReset; + /** Set if allocated on the hyper heap, false if on the ring-3 heap. */ + bool fHyperHeap; + /** Pointer to the asynchronous notification callback set while in + * PDMUSBREG::pfnVMSuspend or PDMUSBREG::pfnVMPowerOff. */ + R3PTRTYPE(PFNPDMDRVASYNCNOTIFY) pfnAsyncNotify; + /** Configuration handle to the instance node. */ + R3PTRTYPE(PCFGMNODE) pCfgHandle; + /** Pointer to the ring-0 request handler function. */ + PFNPDMDRVREQHANDLERR0 pfnReqHandlerR0; +} PDMDRVINSINT; + + +/** + * Private critical section data. + */ +typedef struct PDMCRITSECTINT +{ + /** The critical section core which is shared with IPRT. + * @note The semaphore is a SUPSEMEVENT. */ + RTCRITSECT Core; + /** Pointer to the next critical section. + * This chain is used for relocating pVMRC and device cleanup. */ + R3PTRTYPE(struct PDMCRITSECTINT *) pNext; + /** Owner identifier. + * This is pDevIns if the owner is a device. Similarly for a driver or service. + * PDMR3CritSectInit() sets this to point to the critsect itself. */ + RTR3PTR pvKey; + /** Pointer to the VM - R3Ptr. */ + PVMR3 pVMR3; + /** Pointer to the VM - R0Ptr. */ + R0PTRTYPE(PVMCC) pVMR0; + /** Pointer to the VM - GCPtr. */ + PVMRC pVMRC; + /** Set if this critical section is the automatically created default + * section of a device. */ + bool fAutomaticDefaultCritsect; + /** Set if the critical section is used by a timer or similar. + * See PDMR3DevGetCritSect. */ + bool fUsedByTimerOrSimilar; + /** Alignment padding. */ + bool afPadding[2]; + /** Support driver event semaphore that is scheduled to be signaled upon leaving + * the critical section. This is only for Ring-3 and Ring-0. */ + SUPSEMEVENT hEventToSignal; + /** The lock name. */ + R3PTRTYPE(const char *) pszName; + /** R0/RC lock contention. */ + STAMCOUNTER StatContentionRZLock; + /** R0/RC unlock contention. */ + STAMCOUNTER StatContentionRZUnlock; + /** R3 lock contention. */ + STAMCOUNTER StatContentionR3; + /** Profiling the time the section is locked. */ + STAMPROFILEADV StatLocked; +} PDMCRITSECTINT; +AssertCompileMemberAlignment(PDMCRITSECTINT, StatContentionRZLock, 8); +/** Pointer to private critical section data. */ +typedef PDMCRITSECTINT *PPDMCRITSECTINT; + +/** Indicates that the critical section is queued for unlock. + * PDMCritSectIsOwner and PDMCritSectIsOwned optimizations. */ +#define PDMCRITSECT_FLAGS_PENDING_UNLOCK RT_BIT_32(17) + + +/** + * Private critical section data. + */ +typedef struct PDMCRITSECTRWINT +{ + /** The read/write critical section core which is shared with IPRT. + * @note The semaphores are SUPSEMEVENT and SUPSEMEVENTMULTI. */ + RTCRITSECTRW Core; + + /** Pointer to the next critical section. + * This chain is used for relocating pVMRC and device cleanup. */ + R3PTRTYPE(struct PDMCRITSECTRWINT *) pNext; + /** Owner identifier. + * This is pDevIns if the owner is a device. Similarly for a driver or service. + * PDMR3CritSectInit() sets this to point to the critsect itself. */ + RTR3PTR pvKey; + /** Pointer to the VM - R3Ptr. */ + PVMR3 pVMR3; + /** Pointer to the VM - R0Ptr. */ + R0PTRTYPE(PVMCC) pVMR0; + /** Pointer to the VM - GCPtr. */ + PVMRC pVMRC; +#if HC_ARCH_BITS == 64 + /** Alignment padding. */ + RTRCPTR RCPtrPadding; +#endif + /** The lock name. */ + R3PTRTYPE(const char *) pszName; + /** R0/RC write lock contention. */ + STAMCOUNTER StatContentionRZEnterExcl; + /** R0/RC write unlock contention. */ + STAMCOUNTER StatContentionRZLeaveExcl; + /** R0/RC read lock contention. */ + STAMCOUNTER StatContentionRZEnterShared; + /** R0/RC read unlock contention. */ + STAMCOUNTER StatContentionRZLeaveShared; + /** R0/RC writes. */ + STAMCOUNTER StatRZEnterExcl; + /** R0/RC reads. */ + STAMCOUNTER StatRZEnterShared; + /** R3 write lock contention. */ + STAMCOUNTER StatContentionR3EnterExcl; + /** R3 read lock contention. */ + STAMCOUNTER StatContentionR3EnterShared; + /** R3 writes. */ + STAMCOUNTER StatR3EnterExcl; + /** R3 reads. */ + STAMCOUNTER StatR3EnterShared; + /** Profiling the time the section is write locked. */ + STAMPROFILEADV StatWriteLocked; +} PDMCRITSECTRWINT; +AssertCompileMemberAlignment(PDMCRITSECTRWINT, StatContentionRZEnterExcl, 8); +AssertCompileMemberAlignment(PDMCRITSECTRWINT, Core.u64State, 8); +/** Pointer to private critical section data. */ +typedef PDMCRITSECTRWINT *PPDMCRITSECTRWINT; + + + +/** + * The usual device/driver/internal/external stuff. + */ +typedef enum +{ + /** The usual invalid entry. */ + PDMTHREADTYPE_INVALID = 0, + /** Device type. */ + PDMTHREADTYPE_DEVICE, + /** USB Device type. */ + PDMTHREADTYPE_USB, + /** Driver type. */ + PDMTHREADTYPE_DRIVER, + /** Internal type. */ + PDMTHREADTYPE_INTERNAL, + /** External type. */ + PDMTHREADTYPE_EXTERNAL, + /** The usual 32-bit hack. */ + PDMTHREADTYPE_32BIT_HACK = 0x7fffffff +} PDMTHREADTYPE; + + +/** + * The internal structure for the thread. + */ +typedef struct PDMTHREADINT +{ + /** The VM pointer. */ + PVMR3 pVM; + /** The event semaphore the thread blocks on when not running. */ + RTSEMEVENTMULTI BlockEvent; + /** The event semaphore the thread sleeps on while running. */ + RTSEMEVENTMULTI SleepEvent; + /** Pointer to the next thread. */ + R3PTRTYPE(struct PDMTHREAD *) pNext; + /** The thread type. */ + PDMTHREADTYPE enmType; +} PDMTHREADINT; + + + +/* Must be included after PDMDEVINSINT is defined. */ +#define PDMDEVINSINT_DECLARED +#define PDMUSBINSINT_DECLARED +#define PDMDRVINSINT_DECLARED +#define PDMCRITSECTINT_DECLARED +#define PDMCRITSECTRWINT_DECLARED +#define PDMTHREADINT_DECLARED +#ifdef ___VBox_pdm_h +# error "Invalid header PDM order. Include PDMInternal.h before VBox/vmm/pdm.h!" +#endif +RT_C_DECLS_END +#include <VBox/vmm/pdm.h> +RT_C_DECLS_BEGIN + +/** + * PDM Logical Unit. + * + * This typically the representation of a physical port on a + * device, like for instance the PS/2 keyboard port on the + * keyboard controller device. The LUNs are chained on the + * device they belong to (PDMDEVINSINT::pLunsR3). + */ +typedef struct PDMLUN +{ + /** The LUN - The Logical Unit Number. */ + RTUINT iLun; + /** Pointer to the next LUN. */ + PPDMLUN pNext; + /** Pointer to the top driver in the driver chain. */ + PPDMDRVINS pTop; + /** Pointer to the bottom driver in the driver chain. */ + PPDMDRVINS pBottom; + /** Pointer to the device instance which the LUN belongs to. + * Either this is set or pUsbIns is set. Both is never set at the same time. */ + PPDMDEVINS pDevIns; + /** Pointer to the USB device instance which the LUN belongs to. */ + PPDMUSBINS pUsbIns; + /** Pointer to the device base interface. */ + PPDMIBASE pBase; + /** Description of this LUN. */ + const char *pszDesc; +} PDMLUN; + + +/** + * PDM Device, ring-3. + */ +typedef struct PDMDEV +{ + /** Pointer to the next device (R3 Ptr). */ + R3PTRTYPE(PPDMDEV) pNext; + /** Device name length. (search optimization) */ + uint32_t cchName; + /** Registration structure. */ + R3PTRTYPE(const struct PDMDEVREGR3 *) pReg; + /** Number of instances. */ + uint32_t cInstances; + /** Pointer to chain of instances (R3 Ptr). */ + PPDMDEVINSR3 pInstances; + /** The search path for raw-mode context modules (';' as separator). */ + char *pszRCSearchPath; + /** The search path for ring-0 context modules (';' as separator). */ + char *pszR0SearchPath; +} PDMDEV; + + +#if 0 +/** + * PDM Device, ring-0. + */ +typedef struct PDMDEVR0 +{ + /** Pointer to the next device. */ + R0PTRTYPE(PPDMDEVR0) pNext; + /** Device name length. (search optimization) */ + uint32_t cchName; + /** Registration structure. */ + R3PTRTYPE(const struct PDMDEVREGR0 *) pReg; + /** Number of instances. */ + uint32_t cInstances; + /** Pointer to chain of instances. */ + PPDMDEVINSR0 pInstances; +} PDMDEVR0; +#endif + + +/** + * PDM USB Device. + */ +typedef struct PDMUSB +{ + /** Pointer to the next device (R3 Ptr). */ + R3PTRTYPE(PPDMUSB) pNext; + /** Device name length. (search optimization) */ + RTUINT cchName; + /** Registration structure. */ + R3PTRTYPE(const struct PDMUSBREG *) pReg; + /** Next instance number. */ + uint32_t iNextInstance; + /** Pointer to chain of instances (R3 Ptr). */ + R3PTRTYPE(PPDMUSBINS) pInstances; +} PDMUSB; + + +/** + * PDM Driver. + */ +typedef struct PDMDRV +{ + /** Pointer to the next device. */ + PPDMDRV pNext; + /** Registration structure. */ + const struct PDMDRVREG * pReg; + /** Current number of instances. */ + uint32_t cInstances; + /** The next instance number. */ + uint32_t iNextInstance; + /** The search path for raw-mode context modules (';' as separator). */ + char *pszRCSearchPath; + /** The search path for ring-0 context modules (';' as separator). */ + char *pszR0SearchPath; +} PDMDRV; + + +/** + * PDM registered PIC device. + */ +typedef struct PDMPIC +{ + /** Pointer to the PIC device instance - R3. */ + PPDMDEVINSR3 pDevInsR3; + /** @copydoc PDMPICREG::pfnSetIrq */ + DECLR3CALLBACKMEMBER(void, pfnSetIrqR3,(PPDMDEVINS pDevIns, int iIrq, int iLevel, uint32_t uTagSrc)); + /** @copydoc PDMPICREG::pfnGetInterrupt */ + DECLR3CALLBACKMEMBER(int, pfnGetInterruptR3,(PPDMDEVINS pDevIns, uint32_t *puTagSrc)); + + /** Pointer to the PIC device instance - R0. */ + PPDMDEVINSR0 pDevInsR0; + /** @copydoc PDMPICREG::pfnSetIrq */ + DECLR0CALLBACKMEMBER(void, pfnSetIrqR0,(PPDMDEVINS pDevIns, int iIrq, int iLevel, uint32_t uTagSrc)); + /** @copydoc PDMPICREG::pfnGetInterrupt */ + DECLR0CALLBACKMEMBER(int, pfnGetInterruptR0,(PPDMDEVINS pDevIns, uint32_t *puTagSrc)); + + /** Pointer to the PIC device instance - RC. */ + PPDMDEVINSRC pDevInsRC; + /** @copydoc PDMPICREG::pfnSetIrq */ + DECLRCCALLBACKMEMBER(void, pfnSetIrqRC,(PPDMDEVINS pDevIns, int iIrq, int iLevel, uint32_t uTagSrc)); + /** @copydoc PDMPICREG::pfnGetInterrupt */ + DECLRCCALLBACKMEMBER(int, pfnGetInterruptRC,(PPDMDEVINS pDevIns, uint32_t *puTagSrc)); + /** Alignment padding. */ + RTRCPTR RCPtrPadding; +} PDMPIC; + + +/** + * PDM registered APIC device. + */ +typedef struct PDMAPIC +{ + /** Pointer to the APIC device instance - R3 Ptr. */ + PPDMDEVINSR3 pDevInsR3; + /** Pointer to the APIC device instance - R0 Ptr. */ + PPDMDEVINSR0 pDevInsR0; + /** Pointer to the APIC device instance - RC Ptr. */ + PPDMDEVINSRC pDevInsRC; + uint8_t Alignment[4]; +} PDMAPIC; + + +/** + * PDM registered I/O APIC device. + */ +typedef struct PDMIOAPIC +{ + /** Pointer to the APIC device instance - R3 Ptr. */ + PPDMDEVINSR3 pDevInsR3; + /** @copydoc PDMIOAPICREG::pfnSetIrq */ + DECLR3CALLBACKMEMBER(void, pfnSetIrqR3,(PPDMDEVINS pDevIns, int iIrq, int iLevel, uint32_t uTagSrc)); + /** @copydoc PDMIOAPICREG::pfnSendMsi */ + DECLR3CALLBACKMEMBER(void, pfnSendMsiR3,(PPDMDEVINS pDevIns, RTGCPHYS GCAddr, uint32_t uValue, uint32_t uTagSrc)); + /** @copydoc PDMIOAPICREG::pfnSetEoi */ + DECLR3CALLBACKMEMBER(VBOXSTRICTRC, pfnSetEoiR3,(PPDMDEVINS pDevIns, uint8_t u8Vector)); + + /** Pointer to the PIC device instance - R0. */ + PPDMDEVINSR0 pDevInsR0; + /** @copydoc PDMIOAPICREG::pfnSetIrq */ + DECLR0CALLBACKMEMBER(void, pfnSetIrqR0,(PPDMDEVINS pDevIns, int iIrq, int iLevel, uint32_t uTagSrc)); + /** @copydoc PDMIOAPICREG::pfnSendMsi */ + DECLR0CALLBACKMEMBER(void, pfnSendMsiR0,(PPDMDEVINS pDevIns, RTGCPHYS GCAddr, uint32_t uValue, uint32_t uTagSrc)); + /** @copydoc PDMIOAPICREG::pfnSetEoi */ + DECLR0CALLBACKMEMBER(VBOXSTRICTRC, pfnSetEoiR0,(PPDMDEVINS pDevIns, uint8_t u8Vector)); + + /** Pointer to the APIC device instance - RC Ptr. */ + PPDMDEVINSRC pDevInsRC; + /** @copydoc PDMIOAPICREG::pfnSetIrq */ + DECLRCCALLBACKMEMBER(void, pfnSetIrqRC,(PPDMDEVINS pDevIns, int iIrq, int iLevel, uint32_t uTagSrc)); + /** @copydoc PDMIOAPICREG::pfnSendMsi */ + DECLRCCALLBACKMEMBER(void, pfnSendMsiRC,(PPDMDEVINS pDevIns, RTGCPHYS GCAddr, uint32_t uValue, uint32_t uTagSrc)); + /** @copydoc PDMIOAPICREG::pfnSendMsi */ + DECLRCCALLBACKMEMBER(VBOXSTRICTRC, pfnSetEoiRC,(PPDMDEVINS pDevIns, uint8_t u8Vector)); +} PDMIOAPIC; + +/** Maximum number of PCI busses for a VM. */ +#define PDM_PCI_BUSSES_MAX 8 + + +#ifdef IN_RING3 +/** + * PDM registered firmware device. + */ +typedef struct PDMFW +{ + /** Pointer to the firmware device instance. */ + PPDMDEVINSR3 pDevIns; + /** Copy of the registration structure. */ + PDMFWREG Reg; +} PDMFW; +/** Pointer to a firmware instance. */ +typedef PDMFW *PPDMFW; +#endif + + +/** + * PDM PCI bus instance. + */ +typedef struct PDMPCIBUS +{ + /** PCI bus number. */ + uint32_t iBus; + uint32_t uPadding0; /**< Alignment padding.*/ + + /** Pointer to PCI bus device instance. */ + PPDMDEVINSR3 pDevInsR3; + /** @copydoc PDMPCIBUSREGR3::pfnSetIrqR3 */ + DECLR3CALLBACKMEMBER(void, pfnSetIrqR3,(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, int iIrq, int iLevel, uint32_t uTagSrc)); + + /** @copydoc PDMPCIBUSREGR3::pfnRegisterR3 */ + DECLR3CALLBACKMEMBER(int, pfnRegister,(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t fFlags, + uint8_t uPciDevNo, uint8_t uPciFunNo, const char *pszName)); + /** @copydoc PDMPCIBUSREGR3::pfnRegisterMsiR3 */ + DECLR3CALLBACKMEMBER(int, pfnRegisterMsi,(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, PPDMMSIREG pMsiReg)); + /** @copydoc PDMPCIBUSREGR3::pfnIORegionRegisterR3 */ + DECLR3CALLBACKMEMBER(int, pfnIORegionRegister,(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t iRegion, + RTGCPHYS cbRegion, PCIADDRESSSPACE enmType, uint32_t fFlags, + uint64_t hHandle, PFNPCIIOREGIONMAP pfnCallback)); + /** @copydoc PDMPCIBUSREGR3::pfnInterceptConfigAccesses */ + DECLR3CALLBACKMEMBER(void, pfnInterceptConfigAccesses,(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, + PFNPCICONFIGREAD pfnRead, PFNPCICONFIGWRITE pfnWrite)); + /** @copydoc PDMPCIBUSREGR3::pfnConfigWrite */ + DECLR3CALLBACKMEMBER(VBOXSTRICTRC, pfnConfigWrite,(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, + uint32_t uAddress, unsigned cb, uint32_t u32Value)); + /** @copydoc PDMPCIBUSREGR3::pfnConfigRead */ + DECLR3CALLBACKMEMBER(VBOXSTRICTRC, pfnConfigRead,(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, + uint32_t uAddress, unsigned cb, uint32_t *pu32Value)); +} PDMPCIBUS; + + +/** + * Ring-0 PDM PCI bus instance data. + */ +typedef struct PDMPCIBUSR0 +{ + /** PCI bus number. */ + uint32_t iBus; + uint32_t uPadding0; /**< Alignment padding.*/ + /** Pointer to PCI bus device instance. */ + PPDMDEVINSR0 pDevInsR0; + /** @copydoc PDMPCIBUSREGR0::pfnSetIrq */ + DECLR0CALLBACKMEMBER(void, pfnSetIrqR0,(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, int iIrq, int iLevel, uint32_t uTagSrc)); +} PDMPCIBUSR0; +/** Pointer to the ring-0 PCI bus data. */ +typedef PDMPCIBUSR0 *PPDMPCIBUSR0; + +#ifdef IN_RING3 +/** + * PDM registered DMAC (DMA Controller) device. + */ +typedef struct PDMDMAC +{ + /** Pointer to the DMAC device instance. */ + PPDMDEVINSR3 pDevIns; + /** Copy of the registration structure. */ + PDMDMACREG Reg; +} PDMDMAC; + + +/** + * PDM registered RTC (Real Time Clock) device. + */ +typedef struct PDMRTC +{ + /** Pointer to the RTC device instance. */ + PPDMDEVINSR3 pDevIns; + /** Copy of the registration structure. */ + PDMRTCREG Reg; +} PDMRTC; + +#endif /* IN_RING3 */ + +/** + * Module type. + */ +typedef enum PDMMODTYPE +{ + /** Raw-mode (RC) context module. */ + PDMMOD_TYPE_RC, + /** Ring-0 (host) context module. */ + PDMMOD_TYPE_R0, + /** Ring-3 (host) context module. */ + PDMMOD_TYPE_R3 +} PDMMODTYPE; + + +/** The module name length including the terminator. */ +#define PDMMOD_NAME_LEN 32 + +/** + * Loaded module instance. + */ +typedef struct PDMMOD +{ + /** Module name. This is used for referring to + * the module internally, sort of like a handle. */ + char szName[PDMMOD_NAME_LEN]; + /** Module type. */ + PDMMODTYPE eType; + /** Loader module handle. Not used for R0 modules. */ + RTLDRMOD hLdrMod; + /** Loaded address. + * This is the 'handle' for R0 modules. */ + RTUINTPTR ImageBase; + /** Old loaded address. + * This is used during relocation of GC modules. Not used for R0 modules. */ + RTUINTPTR OldImageBase; + /** Where the R3 HC bits are stored. + * This can be equal to ImageBase but doesn't have to. Not used for R0 modules. */ + void *pvBits; + + /** Pointer to next module. */ + struct PDMMOD *pNext; + /** Module filename. */ + char szFilename[1]; +} PDMMOD; +/** Pointer to loaded module instance. */ +typedef PDMMOD *PPDMMOD; + + + +/** Extra space in the free array. */ +#define PDMQUEUE_FREE_SLACK 16 + +/** + * Queue type. + */ +typedef enum PDMQUEUETYPE +{ + /** Device consumer. */ + PDMQUEUETYPE_DEV = 1, + /** Driver consumer. */ + PDMQUEUETYPE_DRV, + /** Internal consumer. */ + PDMQUEUETYPE_INTERNAL, + /** External consumer. */ + PDMQUEUETYPE_EXTERNAL +} PDMQUEUETYPE; + +/** Pointer to a PDM Queue. */ +typedef struct PDMQUEUE *PPDMQUEUE; + +/** + * PDM Queue. + */ +typedef struct PDMQUEUE +{ + /** Pointer to the next queue in the list. */ + R3PTRTYPE(PPDMQUEUE) pNext; + /** Type specific data. */ + union + { + /** PDMQUEUETYPE_DEV */ + struct + { + /** Pointer to consumer function. */ + R3PTRTYPE(PFNPDMQUEUEDEV) pfnCallback; + /** Pointer to the device instance owning the queue. */ + R3PTRTYPE(PPDMDEVINS) pDevIns; + } Dev; + /** PDMQUEUETYPE_DRV */ + struct + { + /** Pointer to consumer function. */ + R3PTRTYPE(PFNPDMQUEUEDRV) pfnCallback; + /** Pointer to the driver instance owning the queue. */ + R3PTRTYPE(PPDMDRVINS) pDrvIns; + } Drv; + /** PDMQUEUETYPE_INTERNAL */ + struct + { + /** Pointer to consumer function. */ + R3PTRTYPE(PFNPDMQUEUEINT) pfnCallback; + } Int; + /** PDMQUEUETYPE_EXTERNAL */ + struct + { + /** Pointer to consumer function. */ + R3PTRTYPE(PFNPDMQUEUEEXT) pfnCallback; + /** Pointer to user argument. */ + R3PTRTYPE(void *) pvUser; + } Ext; + } u; + /** Queue type. */ + PDMQUEUETYPE enmType; + /** The interval between checking the queue for events. + * The realtime timer below is used to do the waiting. + * If 0, the queue will use the VM_FF_PDM_QUEUE forced action. */ + uint32_t cMilliesInterval; + /** Interval timer. Only used if cMilliesInterval is non-zero. */ + PTMTIMERR3 pTimer; + /** Pointer to the VM - R3. */ + PVMR3 pVMR3; + /** LIFO of pending items - R3. */ + R3PTRTYPE(PPDMQUEUEITEMCORE) volatile pPendingR3; + /** Pointer to the VM - R0. */ + PVMR0 pVMR0; + /** LIFO of pending items - R0. */ + R0PTRTYPE(PPDMQUEUEITEMCORE) volatile pPendingR0; + /** Pointer to the GC VM and indicator for GC enabled queue. + * If this is NULL, the queue cannot be used in GC. + */ + PVMRC pVMRC; + /** LIFO of pending items - GC. */ + RCPTRTYPE(PPDMQUEUEITEMCORE) volatile pPendingRC; + + /** Item size (bytes). */ + uint32_t cbItem; + /** Number of items in the queue. */ + uint32_t cItems; + /** Index to the free head (where we insert). */ + uint32_t volatile iFreeHead; + /** Index to the free tail (where we remove). */ + uint32_t volatile iFreeTail; + + /** Unique queue name. */ + R3PTRTYPE(const char *) pszName; +#if HC_ARCH_BITS == 32 + RTR3PTR Alignment1; +#endif + /** Stat: Times PDMQueueAlloc fails. */ + STAMCOUNTER StatAllocFailures; + /** Stat: PDMQueueInsert calls. */ + STAMCOUNTER StatInsert; + /** Stat: Queue flushes. */ + STAMCOUNTER StatFlush; + /** Stat: Queue flushes with pending items left over. */ + STAMCOUNTER StatFlushLeftovers; +#ifdef VBOX_WITH_STATISTICS + /** State: Profiling the flushing. */ + STAMPROFILE StatFlushPrf; + /** State: Pending items. */ + uint32_t volatile cStatPending; + uint32_t volatile cAlignment; +#endif + + /** Array of pointers to free items. Variable size. */ + struct PDMQUEUEFREEITEM + { + /** Pointer to the free item - HC Ptr. */ + R3PTRTYPE(PPDMQUEUEITEMCORE) volatile pItemR3; + /** Pointer to the free item - HC Ptr. */ + R0PTRTYPE(PPDMQUEUEITEMCORE) volatile pItemR0; + /** Pointer to the free item - GC Ptr. */ + RCPTRTYPE(PPDMQUEUEITEMCORE) volatile pItemRC; +#if HC_ARCH_BITS == 64 + RTRCPTR Alignment0; +#endif + } aFreeItems[1]; +} PDMQUEUE; + +/** @name PDM::fQueueFlushing + * @{ */ +/** Used to make sure only one EMT will flush the queues. + * Set when an EMT is flushing queues, clear otherwise. */ +#define PDM_QUEUE_FLUSH_FLAG_ACTIVE_BIT 0 +/** Indicating there are queues with items pending. + * This is make sure we don't miss inserts happening during flushing. The FF + * cannot be used for this since it has to be cleared immediately to prevent + * other EMTs from spinning. */ +#define PDM_QUEUE_FLUSH_FLAG_PENDING_BIT 1 +/** @} */ + + +/** @name PDM task structures. + * @{ */ + +/** + * A asynchronous user mode task. + */ +typedef struct PDMTASK +{ + /** Task owner type. */ + PDMTASKTYPE volatile enmType; + /** Queue flags. */ + uint32_t volatile fFlags; + /** User argument for the callback. */ + R3PTRTYPE(void *) volatile pvUser; + /** The callback (will be cast according to enmType before callout). */ + R3PTRTYPE(PFNRT) volatile pfnCallback; + /** The owner identifier. */ + R3PTRTYPE(void *) volatile pvOwner; + /** Task name. */ + R3PTRTYPE(const char *) pszName; + /** Number of times already triggered when PDMTaskTrigger was called. */ + uint32_t volatile cAlreadyTrigged; + /** Number of runs. */ + uint32_t cRuns; +} PDMTASK; +/** Pointer to a PDM task. */ +typedef PDMTASK *PPDMTASK; + +/** + * A task set. + * + * This is served by one task executor thread. + */ +typedef struct PDMTASKSET +{ + /** Magic value (PDMTASKSET_MAGIC). */ + uint32_t u32Magic; + /** Set if this task set works for ring-0 and raw-mode. */ + bool fRZEnabled; + /** Number of allocated taks. */ + uint8_t volatile cAllocated; + /** Base handle value for this set. */ + uint16_t uHandleBase; + /** The task executor thread. */ + R3PTRTYPE(RTTHREAD) hThread; + /** Event semaphore for waking up the thread when fRZEnabled is set. */ + SUPSEMEVENT hEventR0; + /** Event semaphore for waking up the thread when fRZEnabled is clear. */ + R3PTRTYPE(RTSEMEVENT) hEventR3; + /** The VM pointer. */ + PVM pVM; + /** Padding so fTriggered is in its own cacheline. */ + uint64_t au64Padding2[3]; + + /** Bitmask of triggered tasks. */ + uint64_t volatile fTriggered; + /** Shutdown thread indicator. */ + bool volatile fShutdown; + /** Padding. */ + bool volatile afPadding3[3]; + /** Task currently running, UINT32_MAX if idle. */ + uint32_t volatile idxRunning; + /** Padding so fTriggered and fShutdown are in their own cacheline. */ + uint64_t volatile au64Padding3[6]; + + /** The individual tasks. (Unallocated tasks have NULL pvOwner.) */ + PDMTASK aTasks[64]; +} PDMTASKSET; +AssertCompileMemberAlignment(PDMTASKSET, fTriggered, 64); +AssertCompileMemberAlignment(PDMTASKSET, aTasks, 64); +/** Magic value for PDMTASKSET::u32Magic. */ +#define PDMTASKSET_MAGIC UINT32_C(0x19320314) +/** Pointer to a task set. */ +typedef PDMTASKSET *PPDMTASKSET; + +/** @} */ + + +/** + * Queue device helper task operation. + */ +typedef enum PDMDEVHLPTASKOP +{ + /** The usual invalid 0 entry. */ + PDMDEVHLPTASKOP_INVALID = 0, + /** ISASetIrq */ + PDMDEVHLPTASKOP_ISA_SET_IRQ, + /** PCISetIrq */ + PDMDEVHLPTASKOP_PCI_SET_IRQ, + /** PCISetIrq */ + PDMDEVHLPTASKOP_IOAPIC_SET_IRQ, + /** The usual 32-bit hack. */ + PDMDEVHLPTASKOP_32BIT_HACK = 0x7fffffff +} PDMDEVHLPTASKOP; + +/** + * Queued Device Helper Task. + */ +typedef struct PDMDEVHLPTASK +{ + /** The queue item core (don't touch). */ + PDMQUEUEITEMCORE Core; + /** Pointer to the device instance (R3 Ptr). */ + PPDMDEVINSR3 pDevInsR3; + /** This operation to perform. */ + PDMDEVHLPTASKOP enmOp; +#if HC_ARCH_BITS == 64 + uint32_t Alignment0; +#endif + /** Parameters to the operation. */ + union PDMDEVHLPTASKPARAMS + { + /** + * PDMDEVHLPTASKOP_ISA_SET_IRQ and PDMDEVHLPTASKOP_IOAPIC_SET_IRQ. + */ + struct PDMDEVHLPTASKISASETIRQ + { + /** The IRQ */ + int iIrq; + /** The new level. */ + int iLevel; + /** The IRQ tag and source. */ + uint32_t uTagSrc; + } IsaSetIRQ, IoApicSetIRQ; + + /** + * PDMDEVHLPTASKOP_PCI_SET_IRQ + */ + struct PDMDEVHLPTASKPCISETIRQ + { + /** Pointer to the PCI device (R3 Ptr). */ + R3PTRTYPE(PPDMPCIDEV) pPciDevR3; + /** The IRQ */ + int iIrq; + /** The new level. */ + int iLevel; + /** The IRQ tag and source. */ + uint32_t uTagSrc; + } PciSetIRQ; + + /** Expanding the structure. */ + uint64_t au64[3]; + } u; +} PDMDEVHLPTASK; +/** Pointer to a queued Device Helper Task. */ +typedef PDMDEVHLPTASK *PPDMDEVHLPTASK; +/** Pointer to a const queued Device Helper Task. */ +typedef const PDMDEVHLPTASK *PCPDMDEVHLPTASK; + + + +/** + * An USB hub registration record. + */ +typedef struct PDMUSBHUB +{ + /** The USB versions this hub support. + * Note that 1.1 hubs can take on 2.0 devices. */ + uint32_t fVersions; + /** The number of ports on the hub. */ + uint32_t cPorts; + /** The number of available ports (0..cPorts). */ + uint32_t cAvailablePorts; + /** The driver instance of the hub. */ + PPDMDRVINS pDrvIns; + /** Copy of the to the registration structure. */ + PDMUSBHUBREG Reg; + + /** Pointer to the next hub in the list. */ + struct PDMUSBHUB *pNext; +} PDMUSBHUB; + +/** Pointer to a const USB HUB registration record. */ +typedef const PDMUSBHUB *PCPDMUSBHUB; + +/** Pointer to a PDM Async I/O template. */ +typedef struct PDMASYNCCOMPLETIONTEMPLATE *PPDMASYNCCOMPLETIONTEMPLATE; + +/** Pointer to the main PDM Async completion endpoint class. */ +typedef struct PDMASYNCCOMPLETIONEPCLASS *PPDMASYNCCOMPLETIONEPCLASS; + +/** Pointer to the global block cache structure. */ +typedef struct PDMBLKCACHEGLOBAL *PPDMBLKCACHEGLOBAL; + +/** + * PDM VMCPU Instance data. + * Changes to this must checked against the padding of the pdm union in VMCPU! + */ +typedef struct PDMCPU +{ + /** The number of entries in the apQueuedCritSectsLeaves table that's currently + * in use. */ + uint32_t cQueuedCritSectLeaves; + uint32_t uPadding0; /**< Alignment padding.*/ + /** Critical sections queued in RC/R0 because of contention preventing leave to + * complete. (R3 Ptrs) + * We will return to Ring-3 ASAP, so this queue doesn't have to be very long. */ + R3PTRTYPE(PPDMCRITSECT) apQueuedCritSectLeaves[8]; + + /** The number of entries in the apQueuedCritSectRwExclLeaves table that's + * currently in use. */ + uint32_t cQueuedCritSectRwExclLeaves; + uint32_t uPadding1; /**< Alignment padding.*/ + /** Read/write critical sections queued in RC/R0 because of contention + * preventing exclusive leave to complete. (R3 Ptrs) + * We will return to Ring-3 ASAP, so this queue doesn't have to be very long. */ + R3PTRTYPE(PPDMCRITSECTRW) apQueuedCritSectRwExclLeaves[8]; + + /** The number of entries in the apQueuedCritSectsRwShrdLeaves table that's + * currently in use. */ + uint32_t cQueuedCritSectRwShrdLeaves; + uint32_t uPadding2; /**< Alignment padding.*/ + /** Read/write critical sections queued in RC/R0 because of contention + * preventing shared leave to complete. (R3 Ptrs) + * We will return to Ring-3 ASAP, so this queue doesn't have to be very long. */ + R3PTRTYPE(PPDMCRITSECTRW) apQueuedCritSectRwShrdLeaves[8]; +} PDMCPU; + + +/** + * PDM VM Instance data. + * Changes to this must checked against the padding of the cfgm union in VM! + */ +typedef struct PDM +{ + /** The PDM lock. + * This is used to protect everything that deals with interrupts, i.e. + * the PIC, APIC, IOAPIC and PCI devices plus some PDM functions. */ + PDMCRITSECT CritSect; + /** The NOP critical section. + * This is a dummy critical section that will not do any thread + * serialization but instead let all threads enter immediately and + * concurrently. */ + PDMCRITSECT NopCritSect; + + /** The ring-0 capable task sets (max 128). */ + PDMTASKSET aTaskSets[2]; + /** Pointer to task sets (max 512). */ + R3PTRTYPE(PPDMTASKSET) apTaskSets[8]; + + /** PCI Buses. */ + PDMPCIBUS aPciBuses[PDM_PCI_BUSSES_MAX]; + /** The register PIC device. */ + PDMPIC Pic; + /** The registered APIC device. */ + PDMAPIC Apic; + /** The registered I/O APIC device. */ + PDMIOAPIC IoApic; + /** The registered HPET device. */ + PPDMDEVINSR3 pHpet; + + /** List of registered devices. (FIFO) */ + R3PTRTYPE(PPDMDEV) pDevs; + /** List of devices instances. (FIFO) */ + R3PTRTYPE(PPDMDEVINS) pDevInstances; + /** List of registered USB devices. (FIFO) */ + R3PTRTYPE(PPDMUSB) pUsbDevs; + /** List of USB devices instances. (FIFO) */ + R3PTRTYPE(PPDMUSBINS) pUsbInstances; + /** List of registered drivers. (FIFO) */ + R3PTRTYPE(PPDMDRV) pDrvs; + /** The registered firmware device (can be NULL). */ + R3PTRTYPE(PPDMFW) pFirmware; + /** The registered DMAC device. */ + R3PTRTYPE(PPDMDMAC) pDmac; + /** The registered RTC device. */ + R3PTRTYPE(PPDMRTC) pRtc; + /** The registered USB HUBs. (FIFO) */ + R3PTRTYPE(PPDMUSBHUB) pUsbHubs; + + /** @name Queues + * @{ */ + /** Queue in which devhlp tasks are queued for R3 execution - R3 Ptr. */ + R3PTRTYPE(PPDMQUEUE) pDevHlpQueueR3; + /** Queue in which devhlp tasks are queued for R3 execution - R0 Ptr. */ + R0PTRTYPE(PPDMQUEUE) pDevHlpQueueR0; + /** Queue in which devhlp tasks are queued for R3 execution - RC Ptr. */ + RCPTRTYPE(PPDMQUEUE) pDevHlpQueueRC; + /** Pointer to the queue which should be manually flushed - RC Ptr. + * Only touched by EMT. */ + RCPTRTYPE(struct PDMQUEUE *) pQueueFlushRC; + /** Pointer to the queue which should be manually flushed - R0 Ptr. + * Only touched by EMT. */ + R0PTRTYPE(struct PDMQUEUE *) pQueueFlushR0; + /** Bitmask controlling the queue flushing. + * See PDM_QUEUE_FLUSH_FLAG_ACTIVE and PDM_QUEUE_FLUSH_FLAG_PENDING. */ + uint32_t volatile fQueueFlushing; + /** @} */ + + /** The current IRQ tag (tracing purposes). */ + uint32_t volatile uIrqTag; + + /** Pending reset flags (PDMVMRESET_F_XXX). */ + uint32_t volatile fResetFlags; + + /** Set by pdmR3LoadExec for use in assertions. */ + bool fStateLoaded; + /** Alignment padding. */ + bool afPadding[3]; + + /** The tracing ID of the next device instance. + * + * @remarks We keep the device tracing ID seperate from the rest as these are + * then more likely to end up with the same ID from one run to + * another, making analysis somewhat easier. Drivers and USB devices + * are more volatile and can be changed at runtime, thus these are much + * less likely to remain stable, so just heap them all together. */ + uint32_t idTracingDev; + /** The tracing ID of the next driver instance, USB device instance or other + * PDM entity requiring an ID. */ + uint32_t idTracingOther; + + /** @name VMM device heap + * @{ */ + /** The heap size. */ + uint32_t cbVMMDevHeap; + /** Free space. */ + uint32_t cbVMMDevHeapLeft; + /** Pointer to the heap base (MMIO2 ring-3 mapping). NULL if not registered. */ + RTR3PTR pvVMMDevHeap; + /** Ring-3 mapping/unmapping notification callback for the user. */ + PFNPDMVMMDEVHEAPNOTIFY pfnVMMDevHeapNotify; + /** The current mapping. NIL_RTGCPHYS if not mapped or registered. */ + RTGCPHYS GCPhysVMMDevHeap; + /** @} */ + + /** Number of times a critical section leave request needed to be queued for ring-3 execution. */ + STAMCOUNTER StatQueuedCritSectLeaves; +} PDM; +AssertCompileMemberAlignment(PDM, CritSect, 8); +AssertCompileMemberAlignment(PDM, aTaskSets, 64); +AssertCompileMemberAlignment(PDM, StatQueuedCritSectLeaves, 8); +AssertCompileMemberAlignment(PDM, GCPhysVMMDevHeap, sizeof(RTGCPHYS)); +/** Pointer to PDM VM instance data. */ +typedef PDM *PPDM; + + +/** + * PDM data kept in the ring-0 GVM. + */ +typedef struct PDMR0PERVM +{ + /** PCI Buses, ring-0 data. */ + PDMPCIBUSR0 aPciBuses[PDM_PCI_BUSSES_MAX]; + /** Number of valid ring-0 device instances (apDevInstances). */ + uint32_t cDevInstances; + uint32_t u32Padding; + /** Pointer to ring-0 device instances. */ + R0PTRTYPE(struct PDMDEVINSR0 *) apDevInstances[190]; +} PDMR0PERVM; + + +/** + * PDM data kept in the UVM. + */ +typedef struct PDMUSERPERVM +{ + /** @todo move more stuff over here. */ + + /** Linked list of timer driven PDM queues. + * Currently serialized by PDM::CritSect. */ + R3PTRTYPE(struct PDMQUEUE *) pQueuesTimer; + /** Linked list of force action driven PDM queues. + * Currently serialized by PDM::CritSect. */ + R3PTRTYPE(struct PDMQUEUE *) pQueuesForced; + + /** Lock protecting the lists below it. */ + RTCRITSECT ListCritSect; + /** Pointer to list of loaded modules. */ + PPDMMOD pModules; + /** List of initialized critical sections. (LIFO) */ + R3PTRTYPE(PPDMCRITSECTINT) pCritSects; + /** List of initialized read/write critical sections. (LIFO) */ + R3PTRTYPE(PPDMCRITSECTRWINT) pRwCritSects; + /** Head of the PDM Thread list. (singly linked) */ + R3PTRTYPE(PPDMTHREAD) pThreads; + /** Tail of the PDM Thread list. (singly linked) */ + R3PTRTYPE(PPDMTHREAD) pThreadsTail; + + /** @name PDM Async Completion + * @{ */ + /** Pointer to the array of supported endpoint classes. */ + PPDMASYNCCOMPLETIONEPCLASS apAsyncCompletionEndpointClass[PDMASYNCCOMPLETIONEPCLASSTYPE_MAX]; + /** Head of the templates. Singly linked, protected by ListCritSect. */ + R3PTRTYPE(PPDMASYNCCOMPLETIONTEMPLATE) pAsyncCompletionTemplates; + /** @} */ + + /** Global block cache data. */ + R3PTRTYPE(PPDMBLKCACHEGLOBAL) pBlkCacheGlobal; +#ifdef VBOX_WITH_NETSHAPER + /** Pointer to network shaper instance. */ + R3PTRTYPE(PPDMNETSHAPER) pNetShaper; +#endif /* VBOX_WITH_NETSHAPER */ + +} PDMUSERPERVM; +/** Pointer to the PDM data kept in the UVM. */ +typedef PDMUSERPERVM *PPDMUSERPERVM; + + + +/******************************************************************************* +* Global Variables * +*******************************************************************************/ +#ifdef IN_RING3 +extern const PDMDRVHLPR3 g_pdmR3DrvHlp; +extern const PDMDEVHLPR3 g_pdmR3DevHlpTrusted; +extern const PDMDEVHLPR3 g_pdmR3DevHlpUnTrusted; +extern const PDMPICHLP g_pdmR3DevPicHlp; +extern const PDMIOAPICHLP g_pdmR3DevIoApicHlp; +extern const PDMFWHLPR3 g_pdmR3DevFirmwareHlp; +extern const PDMPCIHLPR3 g_pdmR3DevPciHlp; +extern const PDMDMACHLP g_pdmR3DevDmacHlp; +extern const PDMRTCHLP g_pdmR3DevRtcHlp; +extern const PDMHPETHLPR3 g_pdmR3DevHpetHlp; +extern const PDMPCIRAWHLPR3 g_pdmR3DevPciRawHlp; +#endif + + +/******************************************************************************* +* Defined Constants And Macros * +*******************************************************************************/ +/** @def PDMDEV_ASSERT_DEVINS + * Asserts the validity of the device instance. + */ +#ifdef VBOX_STRICT +# define PDMDEV_ASSERT_DEVINS(pDevIns) \ + do { \ + AssertPtr(pDevIns); \ + Assert(pDevIns->u32Version == PDM_DEVINS_VERSION); \ + Assert(pDevIns->CTX_SUFF(pvInstanceDataFor) == (void *)&pDevIns->achInstanceData[0]); \ + } while (0) +#else +# define PDMDEV_ASSERT_DEVINS(pDevIns) do { } while (0) +#endif + +/** @def PDMDRV_ASSERT_DRVINS + * Asserts the validity of the driver instance. + */ +#ifdef VBOX_STRICT +# define PDMDRV_ASSERT_DRVINS(pDrvIns) \ + do { \ + AssertPtr(pDrvIns); \ + Assert(pDrvIns->u32Version == PDM_DRVINS_VERSION); \ + Assert(pDrvIns->CTX_SUFF(pvInstanceData) == (void *)&pDrvIns->achInstanceData[0]); \ + } while (0) +#else +# define PDMDRV_ASSERT_DRVINS(pDrvIns) do { } while (0) +#endif + + +/******************************************************************************* +* Internal Functions * +*******************************************************************************/ +#ifdef IN_RING3 +bool pdmR3IsValidName(const char *pszName); + +int pdmR3CritSectBothInitStats(PVM pVM); +void pdmR3CritSectBothRelocate(PVM pVM); +int pdmR3CritSectBothDeleteDevice(PVM pVM, PPDMDEVINS pDevIns); +int pdmR3CritSectBothDeleteDriver(PVM pVM, PPDMDRVINS pDrvIns); +int pdmR3CritSectInitDevice( PVM pVM, PPDMDEVINS pDevIns, PPDMCRITSECT pCritSect, RT_SRC_POS_DECL, + const char *pszNameFmt, va_list va); +int pdmR3CritSectInitDeviceAuto( PVM pVM, PPDMDEVINS pDevIns, PPDMCRITSECT pCritSect, RT_SRC_POS_DECL, + const char *pszNameFmt, ...); +int pdmR3CritSectInitDriver( PVM pVM, PPDMDRVINS pDrvIns, PPDMCRITSECT pCritSect, RT_SRC_POS_DECL, + const char *pszNameFmt, ...); +int pdmR3CritSectRwInitDevice( PVM pVM, PPDMDEVINS pDevIns, PPDMCRITSECTRW pCritSect, RT_SRC_POS_DECL, + const char *pszNameFmt, va_list va); +int pdmR3CritSectRwInitDeviceAuto( PVM pVM, PPDMDEVINS pDevIns, PPDMCRITSECTRW pCritSect, RT_SRC_POS_DECL, + const char *pszNameFmt, ...); +int pdmR3CritSectRwInitDriver( PVM pVM, PPDMDRVINS pDrvIns, PPDMCRITSECTRW pCritSect, RT_SRC_POS_DECL, + const char *pszNameFmt, ...); + +int pdmR3DevInit(PVM pVM); +int pdmR3DevInitComplete(PVM pVM); +PPDMDEV pdmR3DevLookup(PVM pVM, const char *pszName); +int pdmR3DevFindLun(PVM pVM, const char *pszDevice, unsigned iInstance, unsigned iLun, PPDMLUN *ppLun); +DECLCALLBACK(bool) pdmR3DevHlpQueueConsumer(PVM pVM, PPDMQUEUEITEMCORE pItem); + +int pdmR3UsbLoadModules(PVM pVM); +int pdmR3UsbInstantiateDevices(PVM pVM); +PPDMUSB pdmR3UsbLookup(PVM pVM, const char *pszName); +int pdmR3UsbRegisterHub(PVM pVM, PPDMDRVINS pDrvIns, uint32_t fVersions, uint32_t cPorts, PCPDMUSBHUBREG pUsbHubReg, PPCPDMUSBHUBHLP ppUsbHubHlp); +int pdmR3UsbVMInitComplete(PVM pVM); + +int pdmR3DrvInit(PVM pVM); +int pdmR3DrvInstantiate(PVM pVM, PCFGMNODE pNode, PPDMIBASE pBaseInterface, PPDMDRVINS pDrvAbove, + PPDMLUN pLun, PPDMIBASE *ppBaseInterface); +int pdmR3DrvDetach(PPDMDRVINS pDrvIns, uint32_t fFlags); +void pdmR3DrvDestroyChain(PPDMDRVINS pDrvIns, uint32_t fFlags); +PPDMDRV pdmR3DrvLookup(PVM pVM, const char *pszName); + +int pdmR3LdrInitU(PUVM pUVM); +void pdmR3LdrTermU(PUVM pUVM); +char *pdmR3FileR3(const char *pszFile, bool fShared); +int pdmR3LoadR3U(PUVM pUVM, const char *pszFilename, const char *pszName); + +void pdmR3QueueRelocate(PVM pVM, RTGCINTPTR offDelta); + +int pdmR3TaskInit(PVM pVM); +void pdmR3TaskTerm(PVM pVM); + +int pdmR3ThreadCreateDevice(PVM pVM, PPDMDEVINS pDevIns, PPPDMTHREAD ppThread, void *pvUser, PFNPDMTHREADDEV pfnThread, + PFNPDMTHREADWAKEUPDEV pfnWakeup, size_t cbStack, RTTHREADTYPE enmType, const char *pszName); +int pdmR3ThreadCreateUsb(PVM pVM, PPDMUSBINS pUsbIns, PPPDMTHREAD ppThread, void *pvUser, PFNPDMTHREADUSB pfnThread, + PFNPDMTHREADWAKEUPUSB pfnWakeup, size_t cbStack, RTTHREADTYPE enmType, const char *pszName); +int pdmR3ThreadCreateDriver(PVM pVM, PPDMDRVINS pDrvIns, PPPDMTHREAD ppThread, void *pvUser, PFNPDMTHREADDRV pfnThread, + PFNPDMTHREADWAKEUPDRV pfnWakeup, size_t cbStack, RTTHREADTYPE enmType, const char *pszName); +int pdmR3ThreadDestroyDevice(PVM pVM, PPDMDEVINS pDevIns); +int pdmR3ThreadDestroyUsb(PVM pVM, PPDMUSBINS pUsbIns); +int pdmR3ThreadDestroyDriver(PVM pVM, PPDMDRVINS pDrvIns); +void pdmR3ThreadDestroyAll(PVM pVM); +int pdmR3ThreadResumeAll(PVM pVM); +int pdmR3ThreadSuspendAll(PVM pVM); + +#ifdef VBOX_WITH_PDM_ASYNC_COMPLETION +int pdmR3AsyncCompletionInit(PVM pVM); +int pdmR3AsyncCompletionTerm(PVM pVM); +void pdmR3AsyncCompletionResume(PVM pVM); +int pdmR3AsyncCompletionTemplateCreateDevice(PVM pVM, PPDMDEVINS pDevIns, PPPDMASYNCCOMPLETIONTEMPLATE ppTemplate, PFNPDMASYNCCOMPLETEDEV pfnCompleted, const char *pszDesc); +int pdmR3AsyncCompletionTemplateCreateDriver(PVM pVM, PPDMDRVINS pDrvIns, PPPDMASYNCCOMPLETIONTEMPLATE ppTemplate, + PFNPDMASYNCCOMPLETEDRV pfnCompleted, void *pvTemplateUser, const char *pszDesc); +int pdmR3AsyncCompletionTemplateCreateUsb(PVM pVM, PPDMUSBINS pUsbIns, PPPDMASYNCCOMPLETIONTEMPLATE ppTemplate, PFNPDMASYNCCOMPLETEUSB pfnCompleted, const char *pszDesc); +int pdmR3AsyncCompletionTemplateDestroyDevice(PVM pVM, PPDMDEVINS pDevIns); +int pdmR3AsyncCompletionTemplateDestroyDriver(PVM pVM, PPDMDRVINS pDrvIns); +int pdmR3AsyncCompletionTemplateDestroyUsb(PVM pVM, PPDMUSBINS pUsbIns); +#endif + +#ifdef VBOX_WITH_NETSHAPER +int pdmR3NetShaperInit(PVM pVM); +int pdmR3NetShaperTerm(PVM pVM); +#endif + +int pdmR3BlkCacheInit(PVM pVM); +void pdmR3BlkCacheTerm(PVM pVM); +int pdmR3BlkCacheResume(PVM pVM); + +#endif /* IN_RING3 */ + +void pdmLock(PVMCC pVM); +int pdmLockEx(PVMCC pVM, int rc); +void pdmUnlock(PVMCC pVM); + +#if defined(IN_RING3) || defined(IN_RING0) +void pdmCritSectRwLeaveSharedQueued(PPDMCRITSECTRW pThis); +void pdmCritSectRwLeaveExclQueued(PPDMCRITSECTRW pThis); +#endif + +/** @} */ + +RT_C_DECLS_END + +#endif /* !VMM_INCLUDED_SRC_include_PDMInternal_h */ + diff --git a/src/VBox/VMM/include/PDMNetShaperInternal.h b/src/VBox/VMM/include/PDMNetShaperInternal.h new file mode 100644 index 00000000..3f5531dd --- /dev/null +++ b/src/VBox/VMM/include/PDMNetShaperInternal.h @@ -0,0 +1,54 @@ +/* $Id: PDMNetShaperInternal.h $ */ +/** @file + * PDM Network Shaper - Internal data structures and functions common for both R0 and R3 parts. + */ + +/* + * Copyright (C) 2011-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_PDMNetShaperInternal_h +#define VMM_INCLUDED_SRC_include_PDMNetShaperInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +/** + * Bandwidth group instance data + */ +typedef struct PDMNSBWGROUP +{ + /** Pointer to the next group in the list. */ + R3PTRTYPE(struct PDMNSBWGROUP *) pNextR3; + /** Pointer to the shared UVM structure. */ + R3PTRTYPE(struct PDMNETSHAPER *) pShaperR3; + /** Critical section protecting all members below. */ + PDMCRITSECT Lock; + /** Pointer to the first filter attached to this group. */ + R3PTRTYPE(struct PDMNSFILTER *) pFiltersHeadR3; + /** Bandwidth group name. */ + R3PTRTYPE(char *) pszNameR3; + /** Maximum number of bytes filters are allowed to transfer. */ + volatile uint64_t cbPerSecMax; + /** Number of bytes we are allowed to transfer in one burst. */ + volatile uint32_t cbBucket; + /** Number of bytes we were allowed to transfer at the last update. */ + volatile uint32_t cbTokensLast; + /** Timestamp of the last update */ + volatile uint64_t tsUpdatedLast; + /** Reference counter - How many filters are associated with this group. */ + volatile uint32_t cRefs; +} PDMNSBWGROUP; +/** Pointer to a bandwidth group. */ +typedef PDMNSBWGROUP *PPDMNSBWGROUP; + +#endif /* !VMM_INCLUDED_SRC_include_PDMNetShaperInternal_h */ + diff --git a/src/VBox/VMM/include/PGMGstDefs.h b/src/VBox/VMM/include/PGMGstDefs.h new file mode 100644 index 00000000..85c4cd52 --- /dev/null +++ b/src/VBox/VMM/include/PGMGstDefs.h @@ -0,0 +1,231 @@ +/* $Id: PGMGstDefs.h $ */ +/** @file + * VBox - Page Manager, Guest Paging Template - All context code. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/******************************************************************************* +* Defined Constants And Macros * +*******************************************************************************/ +#undef GSTPT +#undef PGSTPT +#undef GSTPTE +#undef PGSTPTE +#undef GSTPD +#undef PGSTPD +#undef GSTPDE +#undef PGSTPDE +#undef GSTPTWALK +#undef PGSTPTWALK +#undef PCGSTPTWALK +#undef GST_BIG_PAGE_SIZE +#undef GST_BIG_PAGE_OFFSET_MASK +#undef GST_PDE_PG_MASK +#undef GST_PDE_BIG_PG_MASK +#undef GST_PD_SHIFT +#undef GST_PD_MASK +#undef GST_PTE_PG_MASK +#undef GST_GET_PTE_SHW_FLAGS +#undef GST_PT_SHIFT +#undef GST_PT_MASK +#undef GST_TOTAL_PD_ENTRIES +#undef GST_CR3_PAGE_MASK +#undef GST_PDPE_ENTRIES +#undef GST_PDPT_SHIFT +#undef GST_PDPT_MASK +#undef GST_PDPE_PG_MASK +#undef GST_GET_PTE_GCPHYS +#undef GST_GET_PDE_GCPHYS +#undef GST_GET_BIG_PDE_GCPHYS +#undef GST_GET_PDE_SHW_FLAGS +#undef GST_GET_BIG_PDE_SHW_FLAGS +#undef GST_GET_BIG_PDE_SHW_FLAGS_4_PTE +#undef GST_IS_PTE_VALID +#undef GST_IS_PDE_VALID +#undef GST_IS_BIG_PDE_VALID +#undef GST_IS_PDPE_VALID +#undef GST_IS_BIG_PDPE_VALID +#undef GST_IS_PML4E_VALID +#undef GST_IS_PSE_ACTIVE +#undef GST_IS_NX_ACTIVE +#undef BTH_IS_NP_ACTIVE + +#if PGM_GST_TYPE == PGM_TYPE_REAL \ + || PGM_GST_TYPE == PGM_TYPE_PROT + +# if PGM_SHW_TYPE == PGM_TYPE_EPT +# define GSTPT X86PTPAE +# define PGSTPT PX86PTPAE +# define GSTPTE X86PTEPAE +# define PGSTPTE PX86PTEPAE +# define GSTPD X86PDPAE +# define PGSTPD PX86PDPAE +# define GSTPDE X86PDEPAE +# define PGSTPDE PX86PDEPAE +# define GST_PTE_PG_MASK X86_PTE_PAE_PG_MASK +# define GST_IS_NX_ACTIVE(pVCpu) (true && This_should_perhaps_not_be_used_in_this_context) +# define BTH_IS_NP_ACTIVE(pVM) (true) +# else +# if PGM_SHW_TYPE == PGM_TYPE_32BIT /* Same as shadow paging, but no PGMSHWPTEPAE. */ +# define GSTPT X86PT +# define PGSTPT PX86PT +# define GSTPTE X86PTE +# define PGSTPTE PX86PTE +# define GSTPD X86PD +# define PGSTPD PX86PD +# define GSTPDE X86PDE +# define PGSTPDE PX86PDE +# define GST_PTE_PG_MASK X86_PTE_PG_MASK +# else +# define GSTPT X86PTPAE +# define PGSTPT PX86PTPAE +# define GSTPTE X86PTEPAE +# define PGSTPTE PX86PTEPAE +# define GSTPD X86PDPAE +# define PGSTPD PX86PDPAE +# define GSTPDE X86PDEPAE +# define PGSTPDE PX86PDEPAE +# define GST_PTE_PG_MASK X86_PTE_PAE_PG_MASK +# endif +# define GST_IS_NX_ACTIVE(pVCpu) (pgmGstIsNoExecuteActive(pVCpu)) +# if PGM_GST_TYPE == PGM_TYPE_PROT /* (comment at top of PGMAllBth.h) */ +# define BTH_IS_NP_ACTIVE(pVM) (pVM->pgm.s.fNestedPaging) +# else +# define BTH_IS_NP_ACTIVE(pVM) (false) +# endif +# endif +# define GST_GET_PTE_GCPHYS(Pte) PGM_A20_APPLY(pVCpu, ((Pte).u & GST_PTE_PG_MASK)) +# define GST_GET_PDE_GCPHYS(Pde) (true && This_should_perhaps_not_be_used_in_this_context) //?? +# define GST_GET_BIG_PDE_GCPHYS(Pde) (true && This_should_perhaps_not_be_used_in_this_context) //?? +# define GST_GET_PTE_SHW_FLAGS(pVCpu, Pte) ((Pte).u & (X86_PTE_P | X86_PTE_RW | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G)) /**< @todo Could return P|RW|US|A|D here without consulting the PTE. */ +# define GST_GET_PDE_SHW_FLAGS(pVCpu, Pde) (true && This_should_perhaps_not_be_used_in_this_context) //?? +# define GST_GET_BIG_PDE_SHW_FLAGS(pVCpu, Pde) (true && This_should_perhaps_not_be_used_in_this_context) //?? +# define GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, Pde) (true && This_should_perhaps_not_be_used_in_this_context) //?? +# define GST_IS_PTE_VALID(pVCpu, Pte) (true) +# define GST_IS_PDE_VALID(pVCpu, Pde) (true) +# define GST_IS_BIG_PDE_VALID(pVCpu, Pde) (true) +# define GST_IS_PDPE_VALID(pVCpu, Pdpe) (true) +# define GST_IS_BIG_PDPE_VALID(pVCpu, Pdpe) (true) +# define GST_IS_PML4E_VALID(pVCpu, Pml4e) (true) +# define GST_IS_PSE_ACTIVE(pVCpu) (false && This_should_not_be_used_in_this_context) + +#elif PGM_GST_TYPE == PGM_TYPE_32BIT +# define GSTPT X86PT +# define PGSTPT PX86PT +# define GSTPTE X86PTE +# define PGSTPTE PX86PTE +# define GSTPD X86PD +# define PGSTPD PX86PD +# define GSTPDE X86PDE +# define PGSTPDE PX86PDE +# define GSTPTWALK PGMPTWALKGST32BIT +# define PGSTPTWALK PPGMPTWALKGST32BIT +# define PCGSTPTWALK PCPGMPTWALKGST32BIT +# define GST_BIG_PAGE_SIZE X86_PAGE_4M_SIZE +# define GST_BIG_PAGE_OFFSET_MASK X86_PAGE_4M_OFFSET_MASK +# define GST_PDE_PG_MASK X86_PDE_PG_MASK +# define GST_PDE_BIG_PG_MASK X86_PDE4M_PG_MASK +# define GST_GET_PTE_GCPHYS(Pte) PGM_A20_APPLY(pVCpu, ((Pte).u & GST_PDE_PG_MASK)) +# define GST_GET_PDE_GCPHYS(Pde) PGM_A20_APPLY(pVCpu, ((Pde).u & GST_PDE_PG_MASK)) +# define GST_GET_BIG_PDE_GCPHYS(pVM, Pde) PGM_A20_APPLY(pVCpu, pgmGstGet4MBPhysPage((pVM), Pde)) +# define GST_GET_PDE_SHW_FLAGS(pVCpu, Pde) ((Pde).u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_A)) +# define GST_GET_BIG_PDE_SHW_FLAGS(pVCpu, Pde) \ + ( ((Pde).u & (X86_PDE4M_P | X86_PDE4M_RW | X86_PDE4M_US | X86_PDE4M_A)) | PGM_PDFLAGS_BIG_PAGE ) +# define GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, Pde) \ + ((Pde).u & (X86_PDE4M_P | X86_PDE4M_RW | X86_PDE4M_US | X86_PDE4M_A | X86_PDE4M_D | X86_PDE4M_G)) +# define GST_PD_SHIFT X86_PD_SHIFT +# define GST_PD_MASK X86_PD_MASK +# define GST_TOTAL_PD_ENTRIES X86_PG_ENTRIES +# define GST_PTE_PG_MASK X86_PTE_PG_MASK +# define GST_GET_PTE_SHW_FLAGS(pVCpu, Pte) ((Pte).u & (X86_PTE_P | X86_PTE_RW | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G)) +# define GST_PT_SHIFT X86_PT_SHIFT +# define GST_PT_MASK X86_PT_MASK +# define GST_CR3_PAGE_MASK X86_CR3_PAGE_MASK +# define GST_IS_PTE_VALID(pVCpu, Pte) (true) +# define GST_IS_PDE_VALID(pVCpu, Pde) (true) +# define GST_IS_BIG_PDE_VALID(pVCpu, Pde) (!( (Pde).u & (pVCpu)->pgm.s.fGst32BitMbzBigPdeMask )) +//# define GST_IS_PDPE_VALID(pVCpu, Pdpe) (false) +//# define GST_IS_BIG_PDPE_VALID(pVCpu, Pdpe) (false) +//# define GST_IS_PML4E_VALID(pVCpu, Pml4e) (false) +# define GST_IS_PSE_ACTIVE(pVCpu) pgmGst32BitIsPageSizeExtActive(pVCpu) +# define GST_IS_NX_ACTIVE(pVCpu) (false) +# define BTH_IS_NP_ACTIVE(pVM) (false) + +#elif PGM_GST_TYPE == PGM_TYPE_PAE \ + || PGM_GST_TYPE == PGM_TYPE_AMD64 +# define GSTPT X86PTPAE +# define PGSTPT PX86PTPAE +# define GSTPTE X86PTEPAE +# define PGSTPTE PX86PTEPAE +# define GSTPD X86PDPAE +# define PGSTPD PX86PDPAE +# define GSTPDE X86PDEPAE +# define PGSTPDE PX86PDEPAE +# define GST_BIG_PAGE_SIZE X86_PAGE_2M_SIZE +# define GST_BIG_PAGE_OFFSET_MASK X86_PAGE_2M_OFFSET_MASK +# define GST_PDE_PG_MASK X86_PDE_PAE_PG_MASK +# define GST_PDE_BIG_PG_MASK X86_PDE2M_PAE_PG_MASK +# define GST_GET_PTE_GCPHYS(Pte) PGM_A20_APPLY(pVCpu, ((Pte).u & GST_PTE_PG_MASK)) +# define GST_GET_PDE_GCPHYS(Pde) PGM_A20_APPLY(pVCpu, ((Pde).u & GST_PDE_PG_MASK)) +# define GST_GET_BIG_PDE_GCPHYS(pVM, Pde) PGM_A20_APPLY(pVCpu, ((Pde).u & GST_PDE_BIG_PG_MASK)) +# define GST_GET_PTE_SHW_FLAGS(pVCpu, Pte) ((Pte).u & (pVCpu)->pgm.s.fGst64ShadowedPteMask ) +# define GST_GET_PDE_SHW_FLAGS(pVCpu, Pde) ((Pde).u & (pVCpu)->pgm.s.fGst64ShadowedPdeMask ) +# define GST_GET_BIG_PDE_SHW_FLAGS(pVCpu, Pde) ( ((Pde).u & (pVCpu)->pgm.s.fGst64ShadowedBigPdeMask ) | PGM_PDFLAGS_BIG_PAGE) +# define GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, Pde) ((Pde).u & (pVCpu)->pgm.s.fGst64ShadowedBigPde4PteMask ) + +# define GST_PD_SHIFT X86_PD_PAE_SHIFT +# define GST_PD_MASK X86_PD_PAE_MASK +# if PGM_GST_TYPE == PGM_TYPE_PAE +# define GSTPTWALK PGMPTWALKGSTPAE +# define PGSTPTWALK PPGMPTWALKGSTPAE +# define PCGSTPTWALK PCPGMPTWALKGSTPAE +# define GST_TOTAL_PD_ENTRIES (X86_PG_PAE_ENTRIES * X86_PG_PAE_PDPE_ENTRIES) +# define GST_PDPE_ENTRIES X86_PG_PAE_PDPE_ENTRIES +# define GST_PDPE_PG_MASK X86_PDPE_PG_MASK +# define GST_PDPT_SHIFT X86_PDPT_SHIFT +# define GST_PDPT_MASK X86_PDPT_MASK_PAE +# define GST_PTE_PG_MASK X86_PTE_PAE_PG_MASK +# define GST_CR3_PAGE_MASK X86_CR3_PAE_PAGE_MASK +# define GST_IS_PTE_VALID(pVCpu, Pte) (!( (Pte).u & (pVCpu)->pgm.s.fGstPaeMbzPteMask )) +# define GST_IS_PDE_VALID(pVCpu, Pde) (!( (Pde).u & (pVCpu)->pgm.s.fGstPaeMbzPdeMask )) +# define GST_IS_BIG_PDE_VALID(pVCpu, Pde) (!( (Pde).u & (pVCpu)->pgm.s.fGstPaeMbzBigPdeMask )) +# define GST_IS_PDPE_VALID(pVCpu, Pdpe) (!( (Pdpe).u & (pVCpu)->pgm.s.fGstPaeMbzPdpeMask )) +//# define GST_IS_BIG_PDPE_VALID(pVCpu, Pdpe) (false) +//# define GST_IS_PML4E_VALID(pVCpu, Pml4e) (false) +# else +# define GSTPTWALK PGMPTWALKGSTAMD64 +# define PGSTPTWALK PPGMPTWALKGSTAMD64 +# define PCGSTPTWALK PCPGMPTWALKGSTAMD64 +# define GST_TOTAL_PD_ENTRIES (X86_PG_AMD64_ENTRIES * X86_PG_AMD64_PDPE_ENTRIES) +# define GST_PDPE_ENTRIES X86_PG_AMD64_PDPE_ENTRIES +# define GST_PDPT_SHIFT X86_PDPT_SHIFT +# define GST_PDPE_PG_MASK X86_PDPE_PG_MASK +# define GST_PDPT_MASK X86_PDPT_MASK_AMD64 +# define GST_PTE_PG_MASK X86_PTE_PAE_PG_MASK +# define GST_CR3_PAGE_MASK X86_CR3_AMD64_PAGE_MASK +# define GST_IS_PTE_VALID(pVCpu, Pte) (!( (Pte).u & (pVCpu)->pgm.s.fGstAmd64MbzPteMask )) +# define GST_IS_PDE_VALID(pVCpu, Pde) (!( (Pde).u & (pVCpu)->pgm.s.fGstAmd64MbzPdeMask )) +# define GST_IS_BIG_PDE_VALID(pVCpu, Pde) (!( (Pde).u & (pVCpu)->pgm.s.fGstAmd64MbzBigPdeMask )) +# define GST_IS_PDPE_VALID(pVCpu, Pdpe) (!( (Pdpe).u & (pVCpu)->pgm.s.fGstAmd64MbzPdpeMask )) +# define GST_IS_BIG_PDPE_VALID(pVCpu, Pdpe) (!( (Pdpe).u & (pVCpu)->pgm.s.fGstAmd64MbzBigPdpeMask )) +# define GST_IS_PML4E_VALID(pVCpu, Pml4e) (!( (Pml4e).u & (pVCpu)->pgm.s.fGstAmd64MbzPml4eMask )) +# endif +# define GST_PT_SHIFT X86_PT_PAE_SHIFT +# define GST_PT_MASK X86_PT_PAE_MASK +# define GST_IS_PSE_ACTIVE(pVCpu) (true) +# define GST_IS_NX_ACTIVE(pVCpu) (pgmGstIsNoExecuteActive(pVCpu)) +# define BTH_IS_NP_ACTIVE(pVM) (false) +#endif + diff --git a/src/VBox/VMM/include/PGMInline.h b/src/VBox/VMM/include/PGMInline.h new file mode 100644 index 00000000..4018ab0d --- /dev/null +++ b/src/VBox/VMM/include/PGMInline.h @@ -0,0 +1,1435 @@ +/* $Id: PGMInline.h $ */ +/** @file + * PGM - Inlined functions. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_PGMInline_h +#define VMM_INCLUDED_SRC_include_PGMInline_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <VBox/cdefs.h> +#include <VBox/types.h> +#include <VBox/err.h> +#include <VBox/vmm/stam.h> +#include <VBox/param.h> +#include <VBox/vmm/vmm.h> +#include <VBox/vmm/mm.h> +#include <VBox/vmm/pdmcritsect.h> +#include <VBox/vmm/pdmapi.h> +#include <VBox/dis.h> +#include <VBox/vmm/dbgf.h> +#include <VBox/log.h> +#include <VBox/vmm/gmm.h> +#include <VBox/vmm/hm.h> +#include <VBox/vmm/nem.h> +#include <iprt/asm.h> +#include <iprt/assert.h> +#include <iprt/avl.h> +#include <iprt/critsect.h> +#include <iprt/sha.h> + + + +/** @addtogroup grp_pgm_int Internals + * @internal + * @{ + */ + +/** + * Gets the PGMRAMRANGE structure for a guest page. + * + * @returns Pointer to the RAM range on success. + * @returns NULL on a VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS condition. + * + * @param pVM The cross context VM structure. + * @param GCPhys The GC physical address. + */ +DECLINLINE(PPGMRAMRANGE) pgmPhysGetRange(PVMCC pVM, RTGCPHYS GCPhys) +{ + PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(apRamRangesTlb)[PGM_RAMRANGE_TLB_IDX(GCPhys)]; + if (!pRam || GCPhys - pRam->GCPhys >= pRam->cb) + return pgmPhysGetRangeSlow(pVM, GCPhys); + STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,RamRangeTlbHits)); + return pRam; +} + + +/** + * Gets the PGMRAMRANGE structure for a guest page, if unassigned get the ram + * range above it. + * + * @returns Pointer to the RAM range on success. + * @returns NULL if the address is located after the last range. + * + * @param pVM The cross context VM structure. + * @param GCPhys The GC physical address. + */ +DECLINLINE(PPGMRAMRANGE) pgmPhysGetRangeAtOrAbove(PVMCC pVM, RTGCPHYS GCPhys) +{ + PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(apRamRangesTlb)[PGM_RAMRANGE_TLB_IDX(GCPhys)]; + if ( !pRam + || (GCPhys - pRam->GCPhys) >= pRam->cb) + return pgmPhysGetRangeAtOrAboveSlow(pVM, GCPhys); + STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,RamRangeTlbHits)); + return pRam; +} + + +/** + * Gets the PGMPAGE structure for a guest page. + * + * @returns Pointer to the page on success. + * @returns NULL on a VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS condition. + * + * @param pVM The cross context VM structure. + * @param GCPhys The GC physical address. + */ +DECLINLINE(PPGMPAGE) pgmPhysGetPage(PVMCC pVM, RTGCPHYS GCPhys) +{ + PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(apRamRangesTlb)[PGM_RAMRANGE_TLB_IDX(GCPhys)]; + RTGCPHYS off; + if ( !pRam + || (off = GCPhys - pRam->GCPhys) >= pRam->cb) + return pgmPhysGetPageSlow(pVM, GCPhys); + STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,RamRangeTlbHits)); + return &pRam->aPages[off >> PAGE_SHIFT]; +} + + +/** + * Gets the PGMPAGE structure for a guest page. + * + * Old Phys code: Will make sure the page is present. + * + * @returns VBox status code. + * @retval VINF_SUCCESS and a valid *ppPage on success. + * @retval VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS if the address isn't valid. + * + * @param pVM The cross context VM structure. + * @param GCPhys The GC physical address. + * @param ppPage Where to store the page pointer on success. + */ +DECLINLINE(int) pgmPhysGetPageEx(PVMCC pVM, RTGCPHYS GCPhys, PPPGMPAGE ppPage) +{ + PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(apRamRangesTlb)[PGM_RAMRANGE_TLB_IDX(GCPhys)]; + RTGCPHYS off; + if ( !pRam + || (off = GCPhys - pRam->GCPhys) >= pRam->cb) + return pgmPhysGetPageExSlow(pVM, GCPhys, ppPage); + *ppPage = &pRam->aPages[off >> PAGE_SHIFT]; + STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,RamRangeTlbHits)); + return VINF_SUCCESS; +} + + +/** + * Gets the PGMPAGE structure for a guest page. + * + * Old Phys code: Will make sure the page is present. + * + * @returns VBox status code. + * @retval VINF_SUCCESS and a valid *ppPage on success. + * @retval VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS if the address isn't valid. + * + * @param pVM The cross context VM structure. + * @param GCPhys The GC physical address. + * @param ppPage Where to store the page pointer on success. + * @param ppRamHint Where to read and store the ram list hint. + * The caller initializes this to NULL before the call. + */ +DECLINLINE(int) pgmPhysGetPageWithHintEx(PVMCC pVM, RTGCPHYS GCPhys, PPPGMPAGE ppPage, PPGMRAMRANGE *ppRamHint) +{ + RTGCPHYS off; + PPGMRAMRANGE pRam = *ppRamHint; + if ( !pRam + || RT_UNLIKELY((off = GCPhys - pRam->GCPhys) >= pRam->cb)) + { + pRam = pVM->pgm.s.CTX_SUFF(apRamRangesTlb)[PGM_RAMRANGE_TLB_IDX(GCPhys)]; + if ( !pRam + || (off = GCPhys - pRam->GCPhys) >= pRam->cb) + return pgmPhysGetPageAndRangeExSlow(pVM, GCPhys, ppPage, ppRamHint); + + STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,RamRangeTlbHits)); + *ppRamHint = pRam; + } + *ppPage = &pRam->aPages[off >> PAGE_SHIFT]; + return VINF_SUCCESS; +} + + +/** + * Gets the PGMPAGE structure for a guest page together with the PGMRAMRANGE. + * + * @returns Pointer to the page on success. + * @returns NULL on a VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS condition. + * + * @param pVM The cross context VM structure. + * @param GCPhys The GC physical address. + * @param ppPage Where to store the pointer to the PGMPAGE structure. + * @param ppRam Where to store the pointer to the PGMRAMRANGE structure. + */ +DECLINLINE(int) pgmPhysGetPageAndRangeEx(PVMCC pVM, RTGCPHYS GCPhys, PPPGMPAGE ppPage, PPGMRAMRANGE *ppRam) +{ + PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(apRamRangesTlb)[PGM_RAMRANGE_TLB_IDX(GCPhys)]; + RTGCPHYS off; + if ( !pRam + || (off = GCPhys - pRam->GCPhys) >= pRam->cb) + return pgmPhysGetPageAndRangeExSlow(pVM, GCPhys, ppPage, ppRam); + + STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,RamRangeTlbHits)); + *ppRam = pRam; + *ppPage = &pRam->aPages[off >> PAGE_SHIFT]; + return VINF_SUCCESS; +} + + +/** + * Convert GC Phys to HC Phys. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param GCPhys The GC physical address. + * @param pHCPhys Where to store the corresponding HC physical address. + * + * @deprecated Doesn't deal with zero, shared or write monitored pages. + * Avoid when writing new code! + */ +DECLINLINE(int) pgmRamGCPhys2HCPhys(PVMCC pVM, RTGCPHYS GCPhys, PRTHCPHYS pHCPhys) +{ + PPGMPAGE pPage; + int rc = pgmPhysGetPageEx(pVM, GCPhys, &pPage); + if (RT_FAILURE(rc)) + return rc; + *pHCPhys = PGM_PAGE_GET_HCPHYS(pPage) | (GCPhys & PAGE_OFFSET_MASK); + return VINF_SUCCESS; +} + +#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 + +/** + * Inlined version of the ring-0 version of the host page mapping code + * that optimizes access to pages already in the set. + * + * @returns VINF_SUCCESS. Will bail out to ring-3 on failure. + * @param pVCpu The cross context virtual CPU structure. + * @param HCPhys The physical address of the page. + * @param ppv Where to store the mapping address. + * @param SRC_POS The source location of the caller. + */ +DECLINLINE(int) pgmRZDynMapHCPageInlined(PVMCPUCC pVCpu, RTHCPHYS HCPhys, void **ppv RTLOG_COMMA_SRC_POS_DECL) +{ + PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet; + + STAM_PROFILE_START(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapHCPageInl, a); + Assert(!(HCPhys & PAGE_OFFSET_MASK)); + Assert(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries)); + + unsigned iHash = PGMMAPSET_HASH(HCPhys); + unsigned iEntry = pSet->aiHashTable[iHash]; + if ( iEntry < pSet->cEntries + && pSet->aEntries[iEntry].HCPhys == HCPhys + && pSet->aEntries[iEntry].cInlinedRefs < UINT16_MAX - 1) + { + pSet->aEntries[iEntry].cInlinedRefs++; + *ppv = pSet->aEntries[iEntry].pvPage; + STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapHCPageInlHits); + } + else + { + STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapHCPageInlMisses); + pgmRZDynMapHCPageCommon(pSet, HCPhys, ppv RTLOG_COMMA_SRC_POS_ARGS); + } + + STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapHCPageInl, a); + return VINF_SUCCESS; +} + + +/** + * Inlined version of the guest page mapping code that optimizes access to pages + * already in the set. + * + * @returns VBox status code, see pgmRZDynMapGCPageCommon for details. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param GCPhys The guest physical address of the page. + * @param ppv Where to store the mapping address. + * @param SRC_POS The source location of the caller. + */ +DECLINLINE(int) pgmRZDynMapGCPageV2Inlined(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhys, void **ppv RTLOG_COMMA_SRC_POS_DECL) +{ + STAM_PROFILE_START(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapGCPageInl, a); + AssertMsg(!(GCPhys & PAGE_OFFSET_MASK), ("%RGp\n", GCPhys)); + + /* + * Get the ram range. + */ + PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(apRamRangesTlb)[PGM_RAMRANGE_TLB_IDX(GCPhys)]; + RTGCPHYS off; + if ( !pRam + || (off = GCPhys - pRam->GCPhys) >= pRam->cb + /** @todo || page state stuff */ + ) + { + /* This case is not counted into StatRZDynMapGCPageInl. */ + STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapGCPageInlRamMisses); + return pgmRZDynMapGCPageCommon(pVM, pVCpu, GCPhys, ppv RTLOG_COMMA_SRC_POS_ARGS); + } + + RTHCPHYS HCPhys = PGM_PAGE_GET_HCPHYS(&pRam->aPages[off >> PAGE_SHIFT]); + STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapGCPageInlRamHits); + + /* + * pgmRZDynMapHCPageInlined with out stats. + */ + PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet; + Assert(!(HCPhys & PAGE_OFFSET_MASK)); + Assert(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries)); + + unsigned iHash = PGMMAPSET_HASH(HCPhys); + unsigned iEntry = pSet->aiHashTable[iHash]; + if ( iEntry < pSet->cEntries + && pSet->aEntries[iEntry].HCPhys == HCPhys + && pSet->aEntries[iEntry].cInlinedRefs < UINT16_MAX - 1) + { + pSet->aEntries[iEntry].cInlinedRefs++; + *ppv = pSet->aEntries[iEntry].pvPage; + STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapGCPageInlHits); + } + else + { + STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapGCPageInlMisses); + pgmRZDynMapHCPageCommon(pSet, HCPhys, ppv RTLOG_COMMA_SRC_POS_ARGS); + } + + STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapGCPageInl, a); + return VINF_SUCCESS; +} + + +/** + * Inlined version of the ring-0 version of guest page mapping that optimizes + * access to pages already in the set. + * + * @returns VBox status code, see pgmRZDynMapGCPageCommon for details. + * @param pVCpu The cross context virtual CPU structure. + * @param GCPhys The guest physical address of the page. + * @param ppv Where to store the mapping address. + * @param SRC_POS The source location of the caller. + */ +DECLINLINE(int) pgmRZDynMapGCPageInlined(PVMCPUCC pVCpu, RTGCPHYS GCPhys, void **ppv RTLOG_COMMA_SRC_POS_DECL) +{ + return pgmRZDynMapGCPageV2Inlined(pVCpu->CTX_SUFF(pVM), pVCpu, GCPhys, ppv RTLOG_COMMA_SRC_POS_ARGS); +} + + +/** + * Inlined version of the ring-0 version of the guest byte mapping code + * that optimizes access to pages already in the set. + * + * @returns VBox status code, see pgmRZDynMapGCPageCommon for details. + * @param pVCpu The cross context virtual CPU structure. + * @param GCPhys The guest physical address of the page. + * @param ppv Where to store the mapping address. The offset is + * preserved. + * @param SRC_POS The source location of the caller. + */ +DECLINLINE(int) pgmRZDynMapGCPageOffInlined(PVMCPUCC pVCpu, RTGCPHYS GCPhys, void **ppv RTLOG_COMMA_SRC_POS_DECL) +{ + STAM_PROFILE_START(&pVCpu->pgm.s.StatRZDynMapGCPageInl, a); + + /* + * Get the ram range. + */ + PVMCC pVM = pVCpu->CTX_SUFF(pVM); + PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(apRamRangesTlb)[PGM_RAMRANGE_TLB_IDX(GCPhys)]; + RTGCPHYS off; + if ( !pRam + || (off = GCPhys - pRam->GCPhys) >= pRam->cb + /** @todo || page state stuff */ + ) + { + /* This case is not counted into StatRZDynMapGCPageInl. */ + STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapGCPageInlRamMisses); + return pgmRZDynMapGCPageCommon(pVM, pVCpu, GCPhys, ppv RTLOG_COMMA_SRC_POS_ARGS); + } + + RTHCPHYS HCPhys = PGM_PAGE_GET_HCPHYS(&pRam->aPages[off >> PAGE_SHIFT]); + STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapGCPageInlRamHits); + + /* + * pgmRZDynMapHCPageInlined with out stats. + */ + PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet; + Assert(!(HCPhys & PAGE_OFFSET_MASK)); + Assert(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries)); + + unsigned iHash = PGMMAPSET_HASH(HCPhys); + unsigned iEntry = pSet->aiHashTable[iHash]; + if ( iEntry < pSet->cEntries + && pSet->aEntries[iEntry].HCPhys == HCPhys + && pSet->aEntries[iEntry].cInlinedRefs < UINT16_MAX - 1) + { + STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapGCPageInlHits); + pSet->aEntries[iEntry].cInlinedRefs++; + *ppv = (void *)((uintptr_t)pSet->aEntries[iEntry].pvPage | (PAGE_OFFSET_MASK & (uintptr_t)GCPhys)); + } + else + { + STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapGCPageInlMisses); + pgmRZDynMapHCPageCommon(pSet, HCPhys, ppv RTLOG_COMMA_SRC_POS_ARGS); + *ppv = (void *)((uintptr_t)*ppv | (PAGE_OFFSET_MASK & (uintptr_t)GCPhys)); + } + + STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapGCPageInl, a); + return VINF_SUCCESS; +} + + +/** + * Maps the page into current context (RC and maybe R0). + * + * @returns pointer to the mapping. + * @param pVM The cross context VM structure. + * @param pPage The page. + * @param SRC_POS The source location of the caller. + */ +DECLINLINE(void *) pgmPoolMapPageInlined(PVMCC pVM, PPGMPOOLPAGE pPage RTLOG_COMMA_SRC_POS_DECL) +{ + if (pPage->idx >= PGMPOOL_IDX_FIRST) + { + Assert(pPage->idx < pVM->pgm.s.CTX_SUFF(pPool)->cCurPages); + void *pv; + pgmRZDynMapHCPageInlined(VMMGetCpu(pVM), pPage->Core.Key, &pv RTLOG_COMMA_SRC_POS_ARGS); + return pv; + } + AssertFatalMsgFailed(("pgmPoolMapPageInlined invalid page index %x\n", pPage->idx)); +} + + +/** + * Maps the page into current context (RC and maybe R0). + * + * @returns pointer to the mapping. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure. + * @param pPage The page. + * @param SRC_POS The source location of the caller. + */ +DECLINLINE(void *) pgmPoolMapPageV2Inlined(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOLPAGE pPage RTLOG_COMMA_SRC_POS_DECL) +{ + if (pPage->idx >= PGMPOOL_IDX_FIRST) + { + Assert(pPage->idx < pVM->pgm.s.CTX_SUFF(pPool)->cCurPages); + void *pv; + Assert(pVCpu == VMMGetCpu(pVM)); RT_NOREF_PV(pVM); + pgmRZDynMapHCPageInlined(pVCpu, pPage->Core.Key, &pv RTLOG_COMMA_SRC_POS_ARGS); + return pv; + } + AssertFatalMsgFailed(("pgmPoolMapPageV2Inlined invalid page index %x\n", pPage->idx)); +} + +#endif /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */ + +/** + * Queries the Physical TLB entry for a physical guest page, + * attempting to load the TLB entry if necessary. + * + * @returns VBox status code. + * @retval VINF_SUCCESS on success + * @retval VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS if it's not a valid physical address. + * + * @param pVM The cross context VM structure. + * @param GCPhys The address of the guest page. + * @param ppTlbe Where to store the pointer to the TLB entry. + */ +DECLINLINE(int) pgmPhysPageQueryTlbe(PVMCC pVM, RTGCPHYS GCPhys, PPPGMPAGEMAPTLBE ppTlbe) +{ + int rc; + PPGMPAGEMAPTLBE pTlbe = &pVM->pgm.s.CTX_SUFF(PhysTlb).aEntries[PGM_PAGEMAPTLB_IDX(GCPhys)]; + if (pTlbe->GCPhys == (GCPhys & X86_PTE_PAE_PG_MASK)) + { + STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageMapTlbHits)); + rc = VINF_SUCCESS; + } + else + rc = pgmPhysPageLoadIntoTlb(pVM, GCPhys); + *ppTlbe = pTlbe; + return rc; +} + + +/** + * Queries the Physical TLB entry for a physical guest page, + * attempting to load the TLB entry if necessary. + * + * @returns VBox status code. + * @retval VINF_SUCCESS on success + * @retval VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS if it's not a valid physical address. + * + * @param pVM The cross context VM structure. + * @param pPage Pointer to the PGMPAGE structure corresponding to + * GCPhys. + * @param GCPhys The address of the guest page. + * @param ppTlbe Where to store the pointer to the TLB entry. + */ +DECLINLINE(int) pgmPhysPageQueryTlbeWithPage(PVMCC pVM, PPGMPAGE pPage, RTGCPHYS GCPhys, PPPGMPAGEMAPTLBE ppTlbe) +{ + int rc; + PPGMPAGEMAPTLBE pTlbe = &pVM->pgm.s.CTX_SUFF(PhysTlb).aEntries[PGM_PAGEMAPTLB_IDX(GCPhys)]; + if (pTlbe->GCPhys == (GCPhys & X86_PTE_PAE_PG_MASK)) + { + STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageMapTlbHits)); + rc = VINF_SUCCESS; +#if 0 //def VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 +# ifdef IN_RING3 + if (pTlbe->pv == (void *)pVM->pgm.s.pvZeroPgR0) +# else + if (pTlbe->pv == (void *)pVM->pgm.s.pvZeroPgR3) +# endif + pTlbe->pv = pVM->pgm.s.CTX_SUFF(pvZeroPg); +#endif + AssertPtr(pTlbe->pv); +#if defined(IN_RING3) || (!defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_RAM_IN_KERNEL)) + Assert(!pTlbe->pMap || RT_VALID_PTR(pTlbe->pMap->pv)); +#endif + } + else + rc = pgmPhysPageLoadIntoTlbWithPage(pVM, pPage, GCPhys); + *ppTlbe = pTlbe; + return rc; +} + + +/** + * Calculates NEM page protection flags. + */ +DECL_FORCE_INLINE(uint32_t) pgmPhysPageCalcNemProtection(PPGMPAGE pPage, PGMPAGETYPE enmType) +{ + /* + * Deal with potentially writable pages first. + */ + if (PGMPAGETYPE_IS_RWX(enmType)) + { + if (!PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage)) + { + if (PGM_PAGE_IS_ALLOCATED(pPage)) + return NEM_PAGE_PROT_READ | NEM_PAGE_PROT_EXECUTE | NEM_PAGE_PROT_WRITE; + return NEM_PAGE_PROT_READ | NEM_PAGE_PROT_EXECUTE; + } + if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)) + return NEM_PAGE_PROT_READ | NEM_PAGE_PROT_EXECUTE; + } + /* + * Potentially readable & executable pages. + */ + else if ( PGMPAGETYPE_IS_ROX(enmType) + && !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)) + return NEM_PAGE_PROT_READ | NEM_PAGE_PROT_EXECUTE; + + /* + * The rest is needs special access handling. + */ + return NEM_PAGE_PROT_NONE; +} + + +/** + * Enables write monitoring for an allocated page. + * + * The caller is responsible for updating the shadow page tables. + * + * @param pVM The cross context VM structure. + * @param pPage The page to write monitor. + * @param GCPhysPage The address of the page. + */ +DECLINLINE(void) pgmPhysPageWriteMonitor(PVMCC pVM, PPGMPAGE pPage, RTGCPHYS GCPhysPage) +{ + Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED); + PGM_LOCK_ASSERT_OWNER(pVM); + + PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_WRITE_MONITORED); + pVM->pgm.s.cMonitoredPages++; + + /* Large pages must disabled. */ + if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE) + { + PPGMPAGE pFirstPage = pgmPhysGetPage(pVM, GCPhysPage & X86_PDE2M_PAE_PG_MASK); + AssertFatal(pFirstPage); + if (PGM_PAGE_GET_PDE_TYPE(pFirstPage) == PGM_PAGE_PDE_TYPE_PDE) + { + PGM_PAGE_SET_PDE_TYPE(pVM, pFirstPage, PGM_PAGE_PDE_TYPE_PDE_DISABLED); + pVM->pgm.s.cLargePagesDisabled++; + } + else + Assert(PGM_PAGE_GET_PDE_TYPE(pFirstPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED); + } + + /* Tell NEM. */ + if (VM_IS_NEM_ENABLED(pVM)) + { + uint8_t u2State = PGM_PAGE_GET_NEM_STATE(pPage); + PGMPAGETYPE enmType = (PGMPAGETYPE)PGM_PAGE_GET_TYPE(pPage); + NEMHCNotifyPhysPageProtChanged(pVM, GCPhysPage, PGM_PAGE_GET_HCPHYS(pPage), + pgmPhysPageCalcNemProtection(pPage, enmType), enmType, &u2State); + PGM_PAGE_SET_NEM_STATE(pPage, u2State); + } +} + + +/** + * Checks if the no-execute (NX) feature is active (EFER.NXE=1). + * + * Only used when the guest is in PAE or long mode. This is inlined so that we + * can perform consistency checks in debug builds. + * + * @returns true if it is, false if it isn't. + * @param pVCpu The cross context virtual CPU structure. + */ +DECL_FORCE_INLINE(bool) pgmGstIsNoExecuteActive(PVMCPUCC pVCpu) +{ + Assert(pVCpu->pgm.s.fNoExecuteEnabled == CPUMIsGuestNXEnabled(pVCpu)); + Assert(CPUMIsGuestInPAEMode(pVCpu) || CPUMIsGuestInLongMode(pVCpu)); + return pVCpu->pgm.s.fNoExecuteEnabled; +} + + +/** + * Checks if the page size extension (PSE) is currently enabled (CR4.PSE=1). + * + * Only used when the guest is in paged 32-bit mode. This is inlined so that + * we can perform consistency checks in debug builds. + * + * @returns true if it is, false if it isn't. + * @param pVCpu The cross context virtual CPU structure. + */ +DECL_FORCE_INLINE(bool) pgmGst32BitIsPageSizeExtActive(PVMCPUCC pVCpu) +{ + Assert(pVCpu->pgm.s.fGst32BitPageSizeExtension == CPUMIsGuestPageSizeExtEnabled(pVCpu)); + Assert(!CPUMIsGuestInPAEMode(pVCpu)); + Assert(!CPUMIsGuestInLongMode(pVCpu)); + return pVCpu->pgm.s.fGst32BitPageSizeExtension; +} + + +/** + * Calculated the guest physical address of the large (4 MB) page in 32 bits paging mode. + * Takes PSE-36 into account. + * + * @returns guest physical address + * @param pVM The cross context VM structure. + * @param Pde Guest Pde + */ +DECLINLINE(RTGCPHYS) pgmGstGet4MBPhysPage(PVMCC pVM, X86PDE Pde) +{ + RTGCPHYS GCPhys = Pde.u & X86_PDE4M_PG_MASK; + GCPhys |= (RTGCPHYS)Pde.b.u8PageNoHigh << 32; + + return GCPhys & pVM->pgm.s.GCPhys4MBPSEMask; +} + + +/** + * Gets the address the guest page directory (32-bit paging). + * + * @returns VBox status code. + * @param pVCpu The cross context virtual CPU structure. + * @param ppPd Where to return the mapping. This is always set. + */ +DECLINLINE(int) pgmGstGet32bitPDPtrEx(PVMCPUCC pVCpu, PX86PD *ppPd) +{ +#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 + int rc = pgmRZDynMapGCPageInlined(pVCpu, pVCpu->pgm.s.GCPhysCR3, (void **)ppPd RTLOG_COMMA_SRC_POS); + if (RT_FAILURE(rc)) + { + *ppPd = NULL; + return rc; + } +#else + *ppPd = pVCpu->pgm.s.CTX_SUFF(pGst32BitPd); + if (RT_UNLIKELY(!*ppPd)) + return pgmGstLazyMap32BitPD(pVCpu, ppPd); +#endif + return VINF_SUCCESS; +} + + +/** + * Gets the address the guest page directory (32-bit paging). + * + * @returns Pointer to the page directory entry in question. + * @param pVCpu The cross context virtual CPU structure. + */ +DECLINLINE(PX86PD) pgmGstGet32bitPDPtr(PVMCPUCC pVCpu) +{ +#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 + PX86PD pGuestPD = NULL; + int rc = pgmRZDynMapGCPageInlined(pVCpu, pVCpu->pgm.s.GCPhysCR3, (void **)&pGuestPD RTLOG_COMMA_SRC_POS); + if (RT_FAILURE(rc)) + { + AssertMsg(rc == VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS, ("%Rrc\n", rc)); + return NULL; + } +#else + PX86PD pGuestPD = pVCpu->pgm.s.CTX_SUFF(pGst32BitPd); + if (RT_UNLIKELY(!pGuestPD)) + { + int rc = pgmGstLazyMap32BitPD(pVCpu, &pGuestPD); + if (RT_FAILURE(rc)) + return NULL; + } +#endif + return pGuestPD; +} + + +/** + * Gets the guest page directory pointer table. + * + * @returns VBox status code. + * @param pVCpu The cross context virtual CPU structure. + * @param ppPdpt Where to return the mapping. This is always set. + */ +DECLINLINE(int) pgmGstGetPaePDPTPtrEx(PVMCPUCC pVCpu, PX86PDPT *ppPdpt) +{ +#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 + int rc = pgmRZDynMapGCPageOffInlined(pVCpu, pVCpu->pgm.s.GCPhysCR3, (void **)ppPdpt RTLOG_COMMA_SRC_POS); + if (RT_FAILURE(rc)) + { + *ppPdpt = NULL; + return rc; + } +#else + *ppPdpt = pVCpu->pgm.s.CTX_SUFF(pGstPaePdpt); + if (RT_UNLIKELY(!*ppPdpt)) + return pgmGstLazyMapPaePDPT(pVCpu, ppPdpt); +#endif + return VINF_SUCCESS; +} + + +/** + * Gets the guest page directory pointer table. + * + * @returns Pointer to the page directory in question. + * @returns NULL if the page directory is not present or on an invalid page. + * @param pVCpu The cross context virtual CPU structure. + */ +DECLINLINE(PX86PDPT) pgmGstGetPaePDPTPtr(PVMCPUCC pVCpu) +{ + PX86PDPT pGuestPdpt; + int rc = pgmGstGetPaePDPTPtrEx(pVCpu, &pGuestPdpt); + AssertMsg(RT_SUCCESS(rc) || rc == VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS, ("%Rrc\n", rc)); NOREF(rc); + return pGuestPdpt; +} + + +/** + * Gets the guest page directory pointer table entry for the specified address. + * + * @returns Pointer to the page directory in question. + * @returns NULL if the page directory is not present or on an invalid page. + * @param pVCpu The cross context virtual CPU structure. + * @param GCPtr The address. + */ +DECLINLINE(PX86PDPE) pgmGstGetPaePDPEPtr(PVMCPUCC pVCpu, RTGCPTR GCPtr) +{ + AssertGCPtr32(GCPtr); + +#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 + PX86PDPT pGuestPDPT = NULL; + int rc = pgmRZDynMapGCPageOffInlined(pVCpu, pVCpu->pgm.s.GCPhysCR3, (void **)&pGuestPDPT RTLOG_COMMA_SRC_POS); + AssertRCReturn(rc, NULL); +#else + PX86PDPT pGuestPDPT = pVCpu->pgm.s.CTX_SUFF(pGstPaePdpt); + if (RT_UNLIKELY(!pGuestPDPT)) + { + int rc = pgmGstLazyMapPaePDPT(pVCpu, &pGuestPDPT); + if (RT_FAILURE(rc)) + return NULL; + } +#endif + return &pGuestPDPT->a[(uint32_t)GCPtr >> X86_PDPT_SHIFT]; +} + + +/** + * Gets the page directory entry for the specified address. + * + * @returns The page directory entry in question. + * @returns A non-present entry if the page directory is not present or on an invalid page. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * @param GCPtr The address. + */ +DECLINLINE(X86PDEPAE) pgmGstGetPaePDE(PVMCPUCC pVCpu, RTGCPTR GCPtr) +{ + AssertGCPtr32(GCPtr); + PX86PDPT pGuestPDPT = pgmGstGetPaePDPTPtr(pVCpu); + if (RT_LIKELY(pGuestPDPT)) + { + const unsigned iPdpt = (uint32_t)GCPtr >> X86_PDPT_SHIFT; + if ( pGuestPDPT->a[iPdpt].n.u1Present + && !(pGuestPDPT->a[iPdpt].u & pVCpu->pgm.s.fGstPaeMbzPdpeMask) ) + { + const unsigned iPD = (GCPtr >> X86_PD_PAE_SHIFT) & X86_PD_PAE_MASK; +#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 + PX86PDPAE pGuestPD = NULL; + int rc = pgmRZDynMapGCPageInlined(pVCpu, + pGuestPDPT->a[iPdpt].u & X86_PDPE_PG_MASK, + (void **)&pGuestPD + RTLOG_COMMA_SRC_POS); + if (RT_SUCCESS(rc)) + return pGuestPD->a[iPD]; + AssertMsg(rc == VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS, ("%Rrc\n", rc)); +#else + PX86PDPAE pGuestPD = pVCpu->pgm.s.CTX_SUFF(apGstPaePDs)[iPdpt]; + if ( !pGuestPD + || (pGuestPDPT->a[iPdpt].u & X86_PDPE_PG_MASK) != pVCpu->pgm.s.aGCPhysGstPaePDs[iPdpt]) + pgmGstLazyMapPaePD(pVCpu, iPdpt, &pGuestPD); + if (pGuestPD) + return pGuestPD->a[iPD]; +#endif + } + } + + X86PDEPAE ZeroPde = {0}; + return ZeroPde; +} + + +/** + * Gets the page directory pointer table entry for the specified address + * and returns the index into the page directory + * + * @returns Pointer to the page directory in question. + * @returns NULL if the page directory is not present or on an invalid page. + * @param pVCpu The cross context virtual CPU structure. + * @param GCPtr The address. + * @param piPD Receives the index into the returned page directory + * @param pPdpe Receives the page directory pointer entry. Optional. + */ +DECLINLINE(PX86PDPAE) pgmGstGetPaePDPtr(PVMCPUCC pVCpu, RTGCPTR GCPtr, unsigned *piPD, PX86PDPE pPdpe) +{ + AssertGCPtr32(GCPtr); + + /* The PDPE. */ + PX86PDPT pGuestPDPT = pgmGstGetPaePDPTPtr(pVCpu); + if (RT_UNLIKELY(!pGuestPDPT)) + return NULL; + const unsigned iPdpt = (uint32_t)GCPtr >> X86_PDPT_SHIFT; + if (pPdpe) + *pPdpe = pGuestPDPT->a[iPdpt]; + if (!pGuestPDPT->a[iPdpt].n.u1Present) + return NULL; + if (RT_UNLIKELY(pVCpu->pgm.s.fGstPaeMbzPdpeMask & pGuestPDPT->a[iPdpt].u)) + return NULL; + + /* The PDE. */ +#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 + PX86PDPAE pGuestPD = NULL; + int rc = pgmRZDynMapGCPageInlined(pVCpu, + pGuestPDPT->a[iPdpt].u & X86_PDPE_PG_MASK, + (void **)&pGuestPD + RTLOG_COMMA_SRC_POS); + if (RT_FAILURE(rc)) + { + AssertMsg(rc == VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS, ("%Rrc\n", rc)); + return NULL; + } +#else + PX86PDPAE pGuestPD = pVCpu->pgm.s.CTX_SUFF(apGstPaePDs)[iPdpt]; + if ( !pGuestPD + || (pGuestPDPT->a[iPdpt].u & X86_PDPE_PG_MASK) != pVCpu->pgm.s.aGCPhysGstPaePDs[iPdpt]) + pgmGstLazyMapPaePD(pVCpu, iPdpt, &pGuestPD); +#endif + + *piPD = (GCPtr >> X86_PD_PAE_SHIFT) & X86_PD_PAE_MASK; + return pGuestPD; +} + + +/** + * Gets the page map level-4 pointer for the guest. + * + * @returns VBox status code. + * @param pVCpu The cross context virtual CPU structure. + * @param ppPml4 Where to return the mapping. Always set. + */ +DECLINLINE(int) pgmGstGetLongModePML4PtrEx(PVMCPUCC pVCpu, PX86PML4 *ppPml4) +{ +#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 + int rc = pgmRZDynMapGCPageInlined(pVCpu, pVCpu->pgm.s.GCPhysCR3, (void **)ppPml4 RTLOG_COMMA_SRC_POS); + if (RT_FAILURE(rc)) + { + *ppPml4 = NULL; + return rc; + } +#else + *ppPml4 = pVCpu->pgm.s.CTX_SUFF(pGstAmd64Pml4); + if (RT_UNLIKELY(!*ppPml4)) + return pgmGstLazyMapPml4(pVCpu, ppPml4); +#endif + return VINF_SUCCESS; +} + + +/** + * Gets the page map level-4 pointer for the guest. + * + * @returns Pointer to the PML4 page. + * @param pVCpu The cross context virtual CPU structure. + */ +DECLINLINE(PX86PML4) pgmGstGetLongModePML4Ptr(PVMCPUCC pVCpu) +{ + PX86PML4 pGuestPml4; + int rc = pgmGstGetLongModePML4PtrEx(pVCpu, &pGuestPml4); + AssertMsg(RT_SUCCESS(rc) || rc == VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS, ("%Rrc\n", rc)); NOREF(rc); + return pGuestPml4; +} + + +/** + * Gets the pointer to a page map level-4 entry. + * + * @returns Pointer to the PML4 entry. + * @param pVCpu The cross context virtual CPU structure. + * @param iPml4 The index. + * @remarks Only used by AssertCR3. + */ +DECLINLINE(PX86PML4E) pgmGstGetLongModePML4EPtr(PVMCPUCC pVCpu, unsigned int iPml4) +{ +#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 + PX86PML4 pGuestPml4; + int rc = pgmRZDynMapGCPageInlined(pVCpu, pVCpu->pgm.s.GCPhysCR3, (void **)&pGuestPml4 RTLOG_COMMA_SRC_POS); + AssertRCReturn(rc, NULL); +#else + PX86PML4 pGuestPml4 = pVCpu->pgm.s.CTX_SUFF(pGstAmd64Pml4); + if (RT_UNLIKELY(!pGuestPml4)) + { + int rc = pgmGstLazyMapPml4(pVCpu, &pGuestPml4); + AssertRCReturn(rc, NULL); + } +#endif + return &pGuestPml4->a[iPml4]; +} + + +/** + * Gets the page directory entry for the specified address. + * + * @returns The page directory entry in question. + * @returns A non-present entry if the page directory is not present or on an invalid page. + * @param pVCpu The cross context virtual CPU structure. + * @param GCPtr The address. + */ +DECLINLINE(X86PDEPAE) pgmGstGetLongModePDE(PVMCPUCC pVCpu, RTGCPTR64 GCPtr) +{ + /* + * Note! To keep things simple, ASSUME invalid physical addresses will + * cause X86_TRAP_PF_RSVD. This isn't a problem until we start + * supporting 52-bit wide physical guest addresses. + */ + PCX86PML4 pGuestPml4 = pgmGstGetLongModePML4Ptr(pVCpu); + const unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK; + if ( RT_LIKELY(pGuestPml4) + && pGuestPml4->a[iPml4].n.u1Present + && !(pGuestPml4->a[iPml4].u & pVCpu->pgm.s.fGstAmd64MbzPml4eMask) ) + { + PCX86PDPT pPdptTemp; + int rc = PGM_GCPHYS_2_PTR_BY_VMCPU(pVCpu, pGuestPml4->a[iPml4].u & X86_PML4E_PG_MASK, &pPdptTemp); + if (RT_SUCCESS(rc)) + { + const unsigned iPdpt = (GCPtr >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; + if ( pPdptTemp->a[iPdpt].n.u1Present + && !(pPdptTemp->a[iPdpt].u & pVCpu->pgm.s.fGstAmd64MbzPdpeMask) ) + { + PCX86PDPAE pPD; + rc = PGM_GCPHYS_2_PTR_BY_VMCPU(pVCpu, pPdptTemp->a[iPdpt].u & X86_PDPE_PG_MASK, &pPD); + if (RT_SUCCESS(rc)) + { + const unsigned iPD = (GCPtr >> X86_PD_PAE_SHIFT) & X86_PD_PAE_MASK; + return pPD->a[iPD]; + } + } + } + AssertMsg(RT_SUCCESS(rc) || rc == VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS, ("%Rrc\n", rc)); + } + + X86PDEPAE ZeroPde = {0}; + return ZeroPde; +} + + +/** + * Gets the GUEST page directory pointer for the specified address. + * + * @returns The page directory in question. + * @returns NULL if the page directory is not present or on an invalid page. + * @param pVCpu The cross context virtual CPU structure. + * @param GCPtr The address. + * @param ppPml4e Page Map Level-4 Entry (out) + * @param pPdpe Page directory pointer table entry (out) + * @param piPD Receives the index into the returned page directory + */ +DECLINLINE(PX86PDPAE) pgmGstGetLongModePDPtr(PVMCPUCC pVCpu, RTGCPTR64 GCPtr, PX86PML4E *ppPml4e, PX86PDPE pPdpe, unsigned *piPD) +{ + /* The PMLE4. */ + PX86PML4 pGuestPml4 = pgmGstGetLongModePML4Ptr(pVCpu); + if (RT_UNLIKELY(!pGuestPml4)) + return NULL; + const unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK; + PCX86PML4E pPml4e = *ppPml4e = &pGuestPml4->a[iPml4]; + if (!pPml4e->n.u1Present) + return NULL; + if (RT_UNLIKELY(pPml4e->u & pVCpu->pgm.s.fGstAmd64MbzPml4eMask)) + return NULL; + + /* The PDPE. */ + PCX86PDPT pPdptTemp; + int rc = PGM_GCPHYS_2_PTR_BY_VMCPU(pVCpu, pPml4e->u & X86_PML4E_PG_MASK, &pPdptTemp); + if (RT_FAILURE(rc)) + { + AssertMsg(rc == VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS, ("%Rrc\n", rc)); + return NULL; + } + const unsigned iPdpt = (GCPtr >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; + *pPdpe = pPdptTemp->a[iPdpt]; + if (!pPdpe->n.u1Present) + return NULL; + if (RT_UNLIKELY(pPdpe->u & pVCpu->pgm.s.fGstAmd64MbzPdpeMask)) + return NULL; + + /* The PDE. */ + PX86PDPAE pPD; + rc = PGM_GCPHYS_2_PTR_BY_VMCPU(pVCpu, pPdptTemp->a[iPdpt].u & X86_PDPE_PG_MASK, &pPD); + if (RT_FAILURE(rc)) + { + AssertMsg(rc == VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS, ("%Rrc\n", rc)); + return NULL; + } + + *piPD = (GCPtr >> X86_PD_PAE_SHIFT) & X86_PD_PAE_MASK; + return pPD; +} + + +/** + * Gets the shadow page directory, 32-bit. + * + * @returns Pointer to the shadow 32-bit PD. + * @param pVCpu The cross context virtual CPU structure. + */ +DECLINLINE(PX86PD) pgmShwGet32BitPDPtr(PVMCPUCC pVCpu) +{ + return (PX86PD)PGMPOOL_PAGE_2_PTR_V2(pVCpu->CTX_SUFF(pVM), pVCpu, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3)); +} + + +/** + * Gets the shadow page directory entry for the specified address, 32-bit. + * + * @returns Shadow 32-bit PDE. + * @param pVCpu The cross context virtual CPU structure. + * @param GCPtr The address. + */ +DECLINLINE(X86PDE) pgmShwGet32BitPDE(PVMCPUCC pVCpu, RTGCPTR GCPtr) +{ + PX86PD pShwPde = pgmShwGet32BitPDPtr(pVCpu); + if (!pShwPde) + { + X86PDE ZeroPde = {0}; + return ZeroPde; + } + return pShwPde->a[(uint32_t)GCPtr >> X86_PD_SHIFT]; +} + + +/** + * Gets the pointer to the shadow page directory entry for the specified + * address, 32-bit. + * + * @returns Pointer to the shadow 32-bit PDE. + * @param pVCpu The cross context virtual CPU structure. + * @param GCPtr The address. + */ +DECLINLINE(PX86PDE) pgmShwGet32BitPDEPtr(PVMCPUCC pVCpu, RTGCPTR GCPtr) +{ + PX86PD pPde = pgmShwGet32BitPDPtr(pVCpu); + AssertReturn(pPde, NULL); + return &pPde->a[(uint32_t)GCPtr >> X86_PD_SHIFT]; +} + + +/** + * Gets the shadow page pointer table, PAE. + * + * @returns Pointer to the shadow PAE PDPT. + * @param pVCpu The cross context virtual CPU structure. + */ +DECLINLINE(PX86PDPT) pgmShwGetPaePDPTPtr(PVMCPUCC pVCpu) +{ + return (PX86PDPT)PGMPOOL_PAGE_2_PTR_V2(pVCpu->CTX_SUFF(pVM), pVCpu, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3)); +} + + +/** + * Gets the shadow page directory for the specified address, PAE. + * + * @returns Pointer to the shadow PD. + * @param pVCpu The cross context virtual CPU structure. + * @param GCPtr The address. + */ +DECLINLINE(PX86PDPAE) pgmShwGetPaePDPtr(PVMCPUCC pVCpu, RTGCPTR GCPtr) +{ + const unsigned iPdpt = (uint32_t)GCPtr >> X86_PDPT_SHIFT; + PX86PDPT pPdpt = pgmShwGetPaePDPTPtr(pVCpu); + + if (!pPdpt->a[iPdpt].n.u1Present) + return NULL; + + /* Fetch the pgm pool shadow descriptor. */ + PVMCC pVM = pVCpu->CTX_SUFF(pVM); + PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pVM->pgm.s.CTX_SUFF(pPool), pPdpt->a[iPdpt].u & X86_PDPE_PG_MASK); + AssertReturn(pShwPde, NULL); + + return (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde); +} + + +/** + * Gets the shadow page directory for the specified address, PAE. + * + * @returns Pointer to the shadow PD. + * @param pVCpu The cross context virtual CPU structure. + * @param pPdpt Pointer to the page directory pointer table. + * @param GCPtr The address. + */ +DECLINLINE(PX86PDPAE) pgmShwGetPaePDPtr(PVMCPUCC pVCpu, PX86PDPT pPdpt, RTGCPTR GCPtr) +{ + const unsigned iPdpt = (uint32_t)GCPtr >> X86_PDPT_SHIFT; + + if (!pPdpt->a[iPdpt].n.u1Present) + return NULL; + + /* Fetch the pgm pool shadow descriptor. */ + PVMCC pVM = pVCpu->CTX_SUFF(pVM); + PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pVM->pgm.s.CTX_SUFF(pPool), pPdpt->a[iPdpt].u & X86_PDPE_PG_MASK); + AssertReturn(pShwPde, NULL); + + return (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde); +} + + +/** + * Gets the shadow page directory entry, PAE. + * + * @returns PDE. + * @param pVCpu The cross context virtual CPU structure. + * @param GCPtr The address. + */ +DECLINLINE(X86PDEPAE) pgmShwGetPaePDE(PVMCPUCC pVCpu, RTGCPTR GCPtr) +{ + const unsigned iPd = (GCPtr >> X86_PD_PAE_SHIFT) & X86_PD_PAE_MASK; + + PX86PDPAE pShwPde = pgmShwGetPaePDPtr(pVCpu, GCPtr); + if (!pShwPde) + { + X86PDEPAE ZeroPde = {0}; + return ZeroPde; + } + return pShwPde->a[iPd]; +} + + +/** + * Gets the pointer to the shadow page directory entry for an address, PAE. + * + * @returns Pointer to the PDE. + * @param pVCpu The cross context virtual CPU structure. + * @param GCPtr The address. + * @remarks Only used by AssertCR3. + */ +DECLINLINE(PX86PDEPAE) pgmShwGetPaePDEPtr(PVMCPUCC pVCpu, RTGCPTR GCPtr) +{ + const unsigned iPd = (GCPtr >> X86_PD_PAE_SHIFT) & X86_PD_PAE_MASK; + + PX86PDPAE pPde = pgmShwGetPaePDPtr(pVCpu, GCPtr); + AssertReturn(pPde, NULL); + return &pPde->a[iPd]; +} + + +/** + * Gets the shadow page map level-4 pointer. + * + * @returns Pointer to the shadow PML4. + * @param pVCpu The cross context virtual CPU structure. + */ +DECLINLINE(PX86PML4) pgmShwGetLongModePML4Ptr(PVMCPUCC pVCpu) +{ + return (PX86PML4)PGMPOOL_PAGE_2_PTR_V2(pVCpu->CTX_SUFF(pVM), pVCpu, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3)); +} + + +/** + * Gets the shadow page map level-4 entry for the specified address. + * + * @returns The entry. + * @param pVCpu The cross context virtual CPU structure. + * @param GCPtr The address. + */ +DECLINLINE(X86PML4E) pgmShwGetLongModePML4E(PVMCPUCC pVCpu, RTGCPTR GCPtr) +{ + const unsigned iPml4 = ((RTGCUINTPTR64)GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK; + PX86PML4 pShwPml4 = pgmShwGetLongModePML4Ptr(pVCpu); + + if (!pShwPml4) + { + X86PML4E ZeroPml4e = {0}; + return ZeroPml4e; + } + return pShwPml4->a[iPml4]; +} + + +/** + * Gets the pointer to the specified shadow page map level-4 entry. + * + * @returns The entry. + * @param pVCpu The cross context virtual CPU structure. + * @param iPml4 The PML4 index. + */ +DECLINLINE(PX86PML4E) pgmShwGetLongModePML4EPtr(PVMCPUCC pVCpu, unsigned int iPml4) +{ + PX86PML4 pShwPml4 = pgmShwGetLongModePML4Ptr(pVCpu); + if (!pShwPml4) + return NULL; + return &pShwPml4->a[iPml4]; +} + + +/** + * Cached physical handler lookup. + * + * @returns Physical handler covering @a GCPhys. + * @param pVM The cross context VM structure. + * @param GCPhys The lookup address. + */ +DECLINLINE(PPGMPHYSHANDLER) pgmHandlerPhysicalLookup(PVMCC pVM, RTGCPHYS GCPhys) +{ + PPGMPHYSHANDLER pHandler = pVM->pgm.s.CTX_SUFF(pLastPhysHandler); + if ( pHandler + && GCPhys >= pHandler->Core.Key + && GCPhys < pHandler->Core.KeyLast) + { + STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PhysHandlerLookupHits)); + return pHandler; + } + + STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PhysHandlerLookupMisses)); + pHandler = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhys); + if (pHandler) + pVM->pgm.s.CTX_SUFF(pLastPhysHandler) = pHandler; + return pHandler; +} + + +/** + * Internal worker for finding a 'in-use' shadow page give by it's physical address. + * + * @returns Pointer to the shadow page structure. + * @param pPool The pool. + * @param idx The pool page index. + */ +DECLINLINE(PPGMPOOLPAGE) pgmPoolGetPageByIdx(PPGMPOOL pPool, unsigned idx) +{ + AssertFatalMsg(idx >= PGMPOOL_IDX_FIRST && idx < pPool->cCurPages, ("idx=%d\n", idx)); + return &pPool->aPages[idx]; +} + + +/** + * Clear references to guest physical memory. + * + * @param pPool The pool. + * @param pPoolPage The pool page. + * @param pPhysPage The physical guest page tracking structure. + * @param iPte Shadow PTE index + */ +DECLINLINE(void) pgmTrackDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPoolPage, PPGMPAGE pPhysPage, uint16_t iPte) +{ + /* + * Just deal with the simple case here. + */ +#ifdef VBOX_STRICT + PVMCC pVM = pPool->CTX_SUFF(pVM); NOREF(pVM); +#endif +#ifdef LOG_ENABLED + const unsigned uOrg = PGM_PAGE_GET_TRACKING(pPhysPage); +#endif + const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage); + if (cRefs == 1) + { + Assert(pPoolPage->idx == PGM_PAGE_GET_TD_IDX(pPhysPage)); + Assert(iPte == PGM_PAGE_GET_PTE_INDEX(pPhysPage)); + /* Invalidate the tracking data. */ + PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0); + } + else + pgmPoolTrackPhysExtDerefGCPhys(pPool, pPoolPage, pPhysPage, iPte); + Log2(("pgmTrackDerefGCPhys: %x -> %x pPhysPage=%R[pgmpage]\n", uOrg, PGM_PAGE_GET_TRACKING(pPhysPage), pPhysPage )); +} + + +/** + * Moves the page to the head of the age list. + * + * This is done when the cached page is used in one way or another. + * + * @param pPool The pool. + * @param pPage The cached page. + */ +DECLINLINE(void) pgmPoolCacheUsed(PPGMPOOL pPool, PPGMPOOLPAGE pPage) +{ + PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM)); + + /* + * Move to the head of the age list. + */ + if (pPage->iAgePrev != NIL_PGMPOOL_IDX) + { + /* unlink */ + pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext; + if (pPage->iAgeNext != NIL_PGMPOOL_IDX) + pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev; + else + pPool->iAgeTail = pPage->iAgePrev; + + /* insert at head */ + pPage->iAgePrev = NIL_PGMPOOL_IDX; + pPage->iAgeNext = pPool->iAgeHead; + Assert(pPage->iAgeNext != NIL_PGMPOOL_IDX); /* we would've already been head then */ + pPool->iAgeHead = pPage->idx; + pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->idx; + } +} + + +/** + * Locks a page to prevent flushing (important for cr3 root pages or shadow pae pd pages). + * + * @param pPool The pool. + * @param pPage PGM pool page + */ +DECLINLINE(void) pgmPoolLockPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage) +{ + PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM)); NOREF(pPool); + ASMAtomicIncU32(&pPage->cLocked); +} + + +/** + * Unlocks a page to allow flushing again + * + * @param pPool The pool. + * @param pPage PGM pool page + */ +DECLINLINE(void) pgmPoolUnlockPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage) +{ + PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM)); NOREF(pPool); + Assert(pPage->cLocked); + ASMAtomicDecU32(&pPage->cLocked); +} + + +/** + * Checks if the page is locked (e.g. the active CR3 or one of the four PDs of a PAE PDPT) + * + * @returns VBox status code. + * @param pPage PGM pool page + */ +DECLINLINE(bool) pgmPoolIsPageLocked(PPGMPOOLPAGE pPage) +{ + if (pPage->cLocked) + { + LogFlow(("pgmPoolIsPageLocked found root page %d\n", pPage->enmKind)); + if (pPage->cModifications) + pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */ + return true; + } + return false; +} + + +/** + * Check if the specified page is dirty (not write monitored) + * + * @return dirty or not + * @param pVM The cross context VM structure. + * @param GCPhys Guest physical address + */ +DECLINLINE(bool) pgmPoolIsDirtyPage(PVMCC pVM, RTGCPHYS GCPhys) +{ + PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); + PGM_LOCK_ASSERT_OWNER(pVM); + if (!pPool->cDirtyPages) + return false; + return pgmPoolIsDirtyPageSlow(pVM, GCPhys); +} + + +/** + * Tells if mappings are to be put into the shadow page table or not. + * + * @returns boolean result + * @param pVM The cross context VM structure. + */ +DECL_FORCE_INLINE(bool) pgmMapAreMappingsEnabled(PVMCC pVM) +{ +#ifdef PGM_WITHOUT_MAPPINGS + /* Only raw-mode has mappings. */ + Assert(!VM_IS_RAW_MODE_ENABLED(pVM)); NOREF(pVM); + return false; +#else + Assert(pVM->cCpus == 1 || !VM_IS_RAW_MODE_ENABLED(pVM)); + return VM_IS_RAW_MODE_ENABLED(pVM); +#endif +} + + +/** + * Checks if the mappings are floating and enabled. + * + * @returns true / false. + * @param pVM The cross context VM structure. + */ +DECL_FORCE_INLINE(bool) pgmMapAreMappingsFloating(PVMCC pVM) +{ +#ifdef PGM_WITHOUT_MAPPINGS + /* Only raw-mode has mappings. */ + Assert(!VM_IS_RAW_MODE_ENABLED(pVM)); NOREF(pVM); + return false; +#else + return !pVM->pgm.s.fMappingsFixed + && pgmMapAreMappingsEnabled(pVM); +#endif +} + +/** @} */ + +#endif /* !VMM_INCLUDED_SRC_include_PGMInline_h */ + diff --git a/src/VBox/VMM/include/PGMInternal.h b/src/VBox/VMM/include/PGMInternal.h new file mode 100644 index 00000000..1a757db6 --- /dev/null +++ b/src/VBox/VMM/include/PGMInternal.h @@ -0,0 +1,4073 @@ +/* $Id: PGMInternal.h $ */ +/** @file + * PGM - Internal header file. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_PGMInternal_h +#define VMM_INCLUDED_SRC_include_PGMInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <VBox/cdefs.h> +#include <VBox/types.h> +#include <VBox/err.h> +#include <VBox/dbg.h> +#include <VBox/vmm/stam.h> +#include <VBox/param.h> +#include <VBox/vmm/vmm.h> +#include <VBox/vmm/mm.h> +#include <VBox/vmm/pdmcritsect.h> +#include <VBox/vmm/pdmapi.h> +#include <VBox/dis.h> +#include <VBox/vmm/dbgf.h> +#include <VBox/log.h> +#include <VBox/vmm/gmm.h> +#include <VBox/vmm/hm.h> +#include <VBox/vmm/hm_vmx.h> +#include <iprt/asm.h> +#include <iprt/assert.h> +#include <iprt/avl.h> +#include <iprt/critsect.h> +#include <iprt/list-off32.h> +#include <iprt/sha.h> + + + +/** @defgroup grp_pgm_int Internals + * @ingroup grp_pgm + * @internal + * @{ + */ + + +/** @name PGM Compile Time Config + * @{ + */ + +/** + * Indicates that there are no guest mappings in the shadow tables. + * + * Note! In ring-3 the macro is also used to exclude the managment of the + * intermediate context page tables. On 32-bit systems we use the intermediate + * context to support 64-bit guest execution. Thus, we cannot fully make it + * without mappings there even when VBOX_WITH_RAW_MODE is not defined. + * + * In raw-mode context there are by design always guest mappings (the code is + * executed from one), while in ring-0 there are none at all. Neither context + * manages the page tables for intermediate switcher context, that's all done in + * ring-3. + * + * Update 6.1: It is always defined now, in pgm.h + */ +#if defined(IN_RING0) \ + || ( !defined(VBOX_WITH_RAW_MODE) \ + && ( HC_ARCH_BITS != 32 \ + || !defined(VBOX_WITH_64_BITS_GUESTS) \ + ) \ + ) +# undef PGM_WITHOUT_MAPPINGS +# define PGM_WITHOUT_MAPPINGS +#endif + +/** + * Check and skip global PDEs for non-global flushes + */ +#define PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH + +/** + * Optimization for PAE page tables that are modified often + */ +//#if 0 /* disabled again while debugging */ +#define PGMPOOL_WITH_OPTIMIZED_DIRTY_PT +//#endif + +/** + * Large page support enabled only on 64 bits hosts; applies to nested paging only. + */ +#define PGM_WITH_LARGE_PAGES + +/** + * Enables optimizations for MMIO handlers that exploits X86_TRAP_PF_RSVD and + * VMX_EXIT_EPT_MISCONFIG. + */ +#define PGM_WITH_MMIO_OPTIMIZATIONS + +/** + * Sync N pages instead of a whole page table + */ +#define PGM_SYNC_N_PAGES + +/** + * Number of pages to sync during a page fault + * + * When PGMPOOL_WITH_GCPHYS_TRACKING is enabled using high values here + * causes a lot of unnecessary extents and also is slower than taking more \#PFs. + * + * Note that \#PFs are much more expensive in the VT-x/AMD-V case due to + * world switch overhead, so let's sync more. + */ +# ifdef IN_RING0 +/* Chose 32 based on the compile test in @bugref{4219}; 64 shows worse stats. + * 32 again shows better results than 16; slightly more overhead in the \#PF handler, + * but ~5% fewer faults. + */ +# define PGM_SYNC_NR_PAGES 32 +#else +# define PGM_SYNC_NR_PAGES 8 +#endif + +/** + * Number of PGMPhysRead/Write cache entries (must be <= sizeof(uint64_t)) + */ +#define PGM_MAX_PHYSCACHE_ENTRIES 64 +#define PGM_MAX_PHYSCACHE_ENTRIES_MASK (PGM_MAX_PHYSCACHE_ENTRIES-1) + + +/** @def PGMPOOL_CFG_MAX_GROW + * The maximum number of pages to add to the pool in one go. + */ +#define PGMPOOL_CFG_MAX_GROW (_2M >> PAGE_SHIFT) + +/** @def VBOX_STRICT_PGM_HANDLER_VIRTUAL + * Enables some extra assertions for virtual handlers (mainly phys2virt related). + */ +#ifdef VBOX_STRICT +# define VBOX_STRICT_PGM_HANDLER_VIRTUAL +#endif + +/** @def VBOX_WITH_NEW_LAZY_PAGE_ALLOC + * Enables the experimental lazy page allocation code. */ +#ifdef DOXYGEN_RUNNING +# define VBOX_WITH_NEW_LAZY_PAGE_ALLOC +#endif + +/** @def VBOX_WITH_REAL_WRITE_MONITORED_PAGES + * Enables real write monitoring of pages, i.e. mapping them read-only and + * only making them writable when getting a write access \#PF. */ +#define VBOX_WITH_REAL_WRITE_MONITORED_PAGES + +/** @} */ + + +/** @name PDPT and PML4 flags. + * These are placed in the three bits available for system programs in + * the PDPT and PML4 entries. + * @{ */ +/** The entry is a permanent one and it's must always be present. + * Never free such an entry. */ +#define PGM_PLXFLAGS_PERMANENT RT_BIT_64(10) +/** Mapping (hypervisor allocated pagetable). */ +#define PGM_PLXFLAGS_MAPPING RT_BIT_64(11) +/** @} */ + +/** @name Page directory flags. + * These are placed in the three bits available for system programs in + * the page directory entries. + * @{ */ +/** Indicates the original entry was a big page. + * @remarks This is currently only used for statistics and can be recycled. */ +#define PGM_PDFLAGS_BIG_PAGE RT_BIT_64(9) +/** Mapping (hypervisor allocated pagetable). */ +#define PGM_PDFLAGS_MAPPING RT_BIT_64(10) +/** Made read-only to facilitate dirty bit tracking. */ +#define PGM_PDFLAGS_TRACK_DIRTY RT_BIT_64(11) +/** @} */ + +/** @name Page flags. + * These are placed in the three bits available for system programs in + * the page entries. + * @{ */ +/** Made read-only to facilitate dirty bit tracking. */ +#define PGM_PTFLAGS_TRACK_DIRTY RT_BIT_64(9) + +#ifndef PGM_PTFLAGS_CSAM_VALIDATED +/** Scanned and approved by CSAM (tm). + * NOTE: Must be identical to the one defined in CSAMInternal.h!! + * @todo Move PGM_PTFLAGS_* and PGM_PDFLAGS_* to VBox/vmm/pgm.h. */ +#define PGM_PTFLAGS_CSAM_VALIDATED RT_BIT_64(11) +#endif + +/** @} */ + +/** @name Defines used to indicate the shadow and guest paging in the templates. + * @{ */ +#define PGM_TYPE_REAL 1 +#define PGM_TYPE_PROT 2 +#define PGM_TYPE_32BIT 3 +#define PGM_TYPE_PAE 4 +#define PGM_TYPE_AMD64 5 +#define PGM_TYPE_NESTED_32BIT 6 +#define PGM_TYPE_NESTED_PAE 7 +#define PGM_TYPE_NESTED_AMD64 8 +#define PGM_TYPE_EPT 9 +#define PGM_TYPE_NONE 10 /**< Dummy shadow paging mode for NEM. */ +#define PGM_TYPE_END (PGM_TYPE_NONE + 1) +#define PGM_TYPE_FIRST_SHADOW PGM_TYPE_32BIT /**< The first type used by shadow paging. */ +/** @} */ + +/** Macro for checking if the guest is using paging. + * @param uGstType PGM_TYPE_* + * @param uShwType PGM_TYPE_* + * @remark ASSUMES certain order of the PGM_TYPE_* values. + */ +#define PGM_WITH_PAGING(uGstType, uShwType) \ + ( (uGstType) >= PGM_TYPE_32BIT \ + && (uShwType) < PGM_TYPE_NESTED_32BIT) + +/** Macro for checking if the guest supports the NX bit. + * @param uGstType PGM_TYPE_* + * @param uShwType PGM_TYPE_* + * @remark ASSUMES certain order of the PGM_TYPE_* values. + */ +#define PGM_WITH_NX(uGstType, uShwType) \ + ( (uGstType) >= PGM_TYPE_PAE \ + && (uShwType) < PGM_TYPE_NESTED_32BIT) + +/** Macro for checking for nested or EPT. + * @param uType PGM_TYPE_* + */ +#define PGM_TYPE_IS_NESTED(uType) \ + ( (uType) == PGM_TYPE_NESTED_32BIT \ + || (uType) == PGM_TYPE_NESTED_PAE \ + || (uType) == PGM_TYPE_NESTED_AMD64) + +/** Macro for checking for nested or EPT. + * @param uType PGM_TYPE_* + */ +#define PGM_TYPE_IS_NESTED_OR_EPT(uType) \ + ( (uType) == PGM_TYPE_NESTED_32BIT \ + || (uType) == PGM_TYPE_NESTED_PAE \ + || (uType) == PGM_TYPE_NESTED_AMD64 \ + || (uType) == PGM_TYPE_EPT) + + + +/** @def PGM_HCPHYS_2_PTR + * Maps a HC physical page pool address to a virtual address. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * @param HCPhys The HC physical address to map to a virtual one. + * @param ppv Where to store the virtual address. No need to cast + * this. + * + * @remark There is no need to assert on the result. + */ +#define PGM_HCPHYS_2_PTR(pVM, pVCpu, HCPhys, ppv) pgmPoolHCPhys2Ptr(pVM, HCPhys, (void **)(ppv)) + +/** @def PGM_GCPHYS_2_PTR_V2 + * Maps a GC physical page address to a virtual address. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * @param GCPhys The GC physical address to map to a virtual one. + * @param ppv Where to store the virtual address. No need to cast this. + * + * @remark Use with care as we don't have so much dynamic mapping space in + * ring-0 on 32-bit darwin and in RC. + * @remark There is no need to assert on the result. + */ +#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 +# define PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GCPhys, ppv) \ + pgmRZDynMapGCPageV2Inlined(pVM, pVCpu, GCPhys, (void **)(ppv) RTLOG_COMMA_SRC_POS) +#else +# define PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GCPhys, ppv) \ + pgmPhysGCPhys2R3Ptr(pVM, GCPhys, (PRTR3PTR)(ppv)) /** @todo this isn't asserting! */ +#endif + +/** @def PGM_GCPHYS_2_PTR + * Maps a GC physical page address to a virtual address. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param GCPhys The GC physical address to map to a virtual one. + * @param ppv Where to store the virtual address. No need to cast this. + * + * @remark Use with care as we don't have so much dynamic mapping space in + * ring-0 on 32-bit darwin and in RC. + * @remark There is no need to assert on the result. + */ +#define PGM_GCPHYS_2_PTR(pVM, GCPhys, ppv) PGM_GCPHYS_2_PTR_V2(pVM, VMMGetCpu(pVM), GCPhys, ppv) + +/** @def PGM_GCPHYS_2_PTR_BY_VMCPU + * Maps a GC physical page address to a virtual address. + * + * @returns VBox status code. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * @param GCPhys The GC physical address to map to a virtual one. + * @param ppv Where to store the virtual address. No need to cast this. + * + * @remark Use with care as we don't have so much dynamic mapping space in + * ring-0 on 32-bit darwin and in RC. + * @remark There is no need to assert on the result. + */ +#define PGM_GCPHYS_2_PTR_BY_VMCPU(pVCpu, GCPhys, ppv) PGM_GCPHYS_2_PTR_V2((pVCpu)->CTX_SUFF(pVM), pVCpu, GCPhys, ppv) + +/** @def PGM_GCPHYS_2_PTR_EX + * Maps a unaligned GC physical page address to a virtual address. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param GCPhys The GC physical address to map to a virtual one. + * @param ppv Where to store the virtual address. No need to cast this. + * + * @remark Use with care as we don't have so much dynamic mapping space in + * ring-0 on 32-bit darwin and in RC. + * @remark There is no need to assert on the result. + */ +#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 +# define PGM_GCPHYS_2_PTR_EX(pVM, GCPhys, ppv) \ + pgmRZDynMapGCPageOffInlined(VMMGetCpu(pVM), GCPhys, (void **)(ppv) RTLOG_COMMA_SRC_POS) +#else +# define PGM_GCPHYS_2_PTR_EX(pVM, GCPhys, ppv) \ + pgmPhysGCPhys2R3Ptr(pVM, GCPhys, (PRTR3PTR)(ppv)) /** @todo this isn't asserting! */ +#endif + +/** @def PGM_DYNMAP_UNUSED_HINT + * Hints to the dynamic mapping code in RC and R0/darwin that the specified page + * is no longer used. + * + * For best effect only apply this to the page that was mapped most recently. + * + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + * @param pvPage The pool page. + */ +#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 +# ifdef LOG_ENABLED +# define PGM_DYNMAP_UNUSED_HINT(pVCpu, pvPage) pgmRZDynMapUnusedHint(pVCpu, pvPage, RT_SRC_POS) +# else +# define PGM_DYNMAP_UNUSED_HINT(pVCpu, pvPage) pgmRZDynMapUnusedHint(pVCpu, pvPage) +# endif +#else +# define PGM_DYNMAP_UNUSED_HINT(pVCpu, pvPage) do {} while (0) +#endif + +/** @def PGM_DYNMAP_UNUSED_HINT_VM + * Hints to the dynamic mapping code in RC and R0/darwin that the specified page + * is no longer used. + * + * For best effect only apply this to the page that was mapped most recently. + * + * @param pVM The cross context VM structure. + * @param pvPage The pool page. + */ +#define PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvPage) PGM_DYNMAP_UNUSED_HINT(VMMGetCpu(pVM), pvPage) + + +/** @def PGM_INVL_PG + * Invalidates a page. + * + * @param pVCpu The cross context virtual CPU structure. + * @param GCVirt The virtual address of the page to invalidate. + */ +#ifdef IN_RING0 +# define PGM_INVL_PG(pVCpu, GCVirt) HMInvalidatePage(pVCpu, (RTGCPTR)(GCVirt)) +#elif defined(IN_RING3) +# define PGM_INVL_PG(pVCpu, GCVirt) HMInvalidatePage(pVCpu, (RTGCPTR)(GCVirt)) +#else +# error "Not IN_RING0 or IN_RING3!" +#endif + +/** @def PGM_INVL_PG_ALL_VCPU + * Invalidates a page on all VCPUs + * + * @param pVM The cross context VM structure. + * @param GCVirt The virtual address of the page to invalidate. + */ +#ifdef IN_RING0 +# define PGM_INVL_PG_ALL_VCPU(pVM, GCVirt) HMInvalidatePageOnAllVCpus(pVM, (RTGCPTR)(GCVirt)) +#else +# define PGM_INVL_PG_ALL_VCPU(pVM, GCVirt) HMInvalidatePageOnAllVCpus(pVM, (RTGCPTR)(GCVirt)) +#endif + +/** @def PGM_INVL_BIG_PG + * Invalidates a 4MB page directory entry. + * + * @param pVCpu The cross context virtual CPU structure. + * @param GCVirt The virtual address within the page directory to invalidate. + */ +#ifdef IN_RING0 +# define PGM_INVL_BIG_PG(pVCpu, GCVirt) HMFlushTlb(pVCpu) +#else +# define PGM_INVL_BIG_PG(pVCpu, GCVirt) HMFlushTlb(pVCpu) +#endif + +/** @def PGM_INVL_VCPU_TLBS() + * Invalidates the TLBs of the specified VCPU + * + * @param pVCpu The cross context virtual CPU structure. + */ +#ifdef IN_RING0 +# define PGM_INVL_VCPU_TLBS(pVCpu) HMFlushTlb(pVCpu) +#else +# define PGM_INVL_VCPU_TLBS(pVCpu) HMFlushTlb(pVCpu) +#endif + +/** @def PGM_INVL_ALL_VCPU_TLBS() + * Invalidates the TLBs of all VCPUs + * + * @param pVM The cross context VM structure. + */ +#ifdef IN_RING0 +# define PGM_INVL_ALL_VCPU_TLBS(pVM) HMFlushTlbOnAllVCpus(pVM) +#else +# define PGM_INVL_ALL_VCPU_TLBS(pVM) HMFlushTlbOnAllVCpus(pVM) +#endif + + +/** @name Safer Shadow PAE PT/PTE + * For helping avoid misinterpreting invalid PAE/AMD64 page table entries as + * present. + * + * @{ + */ +#if 1 +/** + * For making sure that u1Present and X86_PTE_P checks doesn't mistake + * invalid entries for present. + * @sa X86PTEPAE. + */ +typedef union PGMSHWPTEPAE +{ + /** Unsigned integer view */ + X86PGPAEUINT uCareful; + /* Not other views. */ +} PGMSHWPTEPAE; + +# define PGMSHWPTEPAE_IS_P(Pte) ( ((Pte).uCareful & (X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == X86_PTE_P ) +# define PGMSHWPTEPAE_IS_RW(Pte) ( !!((Pte).uCareful & X86_PTE_RW)) +# define PGMSHWPTEPAE_IS_US(Pte) ( !!((Pte).uCareful & X86_PTE_US)) +# define PGMSHWPTEPAE_IS_A(Pte) ( !!((Pte).uCareful & X86_PTE_A)) +# define PGMSHWPTEPAE_IS_D(Pte) ( !!((Pte).uCareful & X86_PTE_D)) +# define PGMSHWPTEPAE_IS_TRACK_DIRTY(Pte) ( !!((Pte).uCareful & PGM_PTFLAGS_TRACK_DIRTY) ) +# define PGMSHWPTEPAE_IS_P_RW(Pte) ( ((Pte).uCareful & (X86_PTE_P | X86_PTE_RW | X86_PTE_PAE_MBZ_MASK_NX)) == (X86_PTE_P | X86_PTE_RW) ) +# define PGMSHWPTEPAE_GET_LOG(Pte) ( (Pte).uCareful ) +# define PGMSHWPTEPAE_GET_HCPHYS(Pte) ( (Pte).uCareful & X86_PTE_PAE_PG_MASK ) +# define PGMSHWPTEPAE_GET_U(Pte) ( (Pte).uCareful ) /**< Use with care. */ +# define PGMSHWPTEPAE_SET(Pte, uVal) do { (Pte).uCareful = (uVal); } while (0) +# define PGMSHWPTEPAE_SET2(Pte, Pte2) do { (Pte).uCareful = (Pte2).uCareful; } while (0) +# define PGMSHWPTEPAE_ATOMIC_SET(Pte, uVal) do { ASMAtomicWriteU64(&(Pte).uCareful, (uVal)); } while (0) +# define PGMSHWPTEPAE_ATOMIC_SET2(Pte, Pte2) do { ASMAtomicWriteU64(&(Pte).uCareful, (Pte2).uCareful); } while (0) +# define PGMSHWPTEPAE_SET_RO(Pte) do { (Pte).uCareful &= ~(X86PGPAEUINT)X86_PTE_RW; } while (0) +# define PGMSHWPTEPAE_SET_RW(Pte) do { (Pte).uCareful |= X86_PTE_RW; } while (0) + +/** + * For making sure that u1Present and X86_PTE_P checks doesn't mistake + * invalid entries for present. + * @sa X86PTPAE. + */ +typedef struct PGMSHWPTPAE +{ + PGMSHWPTEPAE a[X86_PG_PAE_ENTRIES]; +} PGMSHWPTPAE; + +#else +typedef X86PTEPAE PGMSHWPTEPAE; +typedef X86PTPAE PGMSHWPTPAE; +# define PGMSHWPTEPAE_IS_P(Pte) ( (Pte).n.u1Present ) +# define PGMSHWPTEPAE_IS_RW(Pte) ( (Pte).n.u1Write ) +# define PGMSHWPTEPAE_IS_US(Pte) ( (Pte).n.u1User ) +# define PGMSHWPTEPAE_IS_A(Pte) ( (Pte).n.u1Accessed ) +# define PGMSHWPTEPAE_IS_D(Pte) ( (Pte).n.u1Dirty ) +# define PGMSHWPTEPAE_IS_TRACK_DIRTY(Pte) ( !!((Pte).u & PGM_PTFLAGS_TRACK_DIRTY) ) +# define PGMSHWPTEPAE_IS_P_RW(Pte) ( ((Pte).u & (X86_PTE_P | X86_PTE_RW)) == (X86_PTE_P | X86_PTE_RW) ) +# define PGMSHWPTEPAE_GET_LOG(Pte) ( (Pte).u ) +# define PGMSHWPTEPAE_GET_HCPHYS(Pte) ( (Pte).u & X86_PTE_PAE_PG_MASK ) +# define PGMSHWPTEPAE_GET_U(Pte) ( (Pte).u ) /**< Use with care. */ +# define PGMSHWPTEPAE_SET(Pte, uVal) do { (Pte).u = (uVal); } while (0) +# define PGMSHWPTEPAE_SET2(Pte, Pte2) do { (Pte).u = (Pte2).u; } while (0) +# define PGMSHWPTEPAE_ATOMIC_SET(Pte, uVal) do { ASMAtomicWriteU64(&(Pte).u, (uVal)); } while (0) +# define PGMSHWPTEPAE_ATOMIC_SET2(Pte, Pte2) do { ASMAtomicWriteU64(&(Pte).u, (Pte2).u); } while (0) +# define PGMSHWPTEPAE_SET_RO(Pte) do { (Pte).u &= ~(X86PGPAEUINT)X86_PTE_RW; } while (0) +# define PGMSHWPTEPAE_SET_RW(Pte) do { (Pte).u |= X86_PTE_RW; } while (0) + +#endif + +/** Pointer to a shadow PAE PTE. */ +typedef PGMSHWPTEPAE *PPGMSHWPTEPAE; +/** Pointer to a const shadow PAE PTE. */ +typedef PGMSHWPTEPAE const *PCPGMSHWPTEPAE; + +/** Pointer to a shadow PAE page table. */ +typedef PGMSHWPTPAE *PPGMSHWPTPAE; +/** Pointer to a const shadow PAE page table. */ +typedef PGMSHWPTPAE const *PCPGMSHWPTPAE; +/** @} */ + +#ifndef PGM_WITHOUT_MAPPINGS + +/** Size of the GCPtrConflict array in PGMMAPPING. + * @remarks Must be a power of two. */ +# define PGMMAPPING_CONFLICT_MAX 8 + +/** + * Structure for tracking GC Mappings. + * + * This structure is used by linked list in both GC and HC. + */ +typedef struct PGMMAPPING +{ + /** Pointer to next entry. */ + R3PTRTYPE(struct PGMMAPPING *) pNextR3; + /** Pointer to next entry. */ + R0PTRTYPE(struct PGMMAPPING *) pNextR0; + /** Indicate whether this entry is finalized. */ + bool fFinalized; + bool afPadding[7]; + /** Start Virtual address. */ + RTGCPTR GCPtr; + /** Last Virtual address (inclusive). */ + RTGCPTR GCPtrLast; + /** Range size (bytes). */ + RTGCPTR cb; + /** Pointer to relocation callback function. */ + R3PTRTYPE(PFNPGMRELOCATE) pfnRelocate; + /** User argument to the callback. */ + R3PTRTYPE(void *) pvUser; + /** Mapping description / name. For easing debugging. */ + R3PTRTYPE(const char *) pszDesc; + /** Last 8 addresses that caused conflicts. */ + RTGCPTR aGCPtrConflicts[PGMMAPPING_CONFLICT_MAX]; + /** Number of conflicts for this hypervisor mapping. */ + uint32_t cConflicts; + /** Number of page tables. */ + uint32_t cPTs; + + /** Array of page table mapping data. Each entry + * describes one page table. The array can be longer + * than the declared length. + */ + struct + { + /** The HC physical address of the page table. */ + RTHCPHYS HCPhysPT; + /** The HC physical address of the first PAE page table. */ + RTHCPHYS HCPhysPaePT0; + /** The HC physical address of the second PAE page table. */ + RTHCPHYS HCPhysPaePT1; + /** The HC virtual address of the 32-bit page table. */ + R3PTRTYPE(PX86PT) pPTR3; + /** The HC virtual address of the two PAE page table. (i.e 1024 entries instead of 512) */ + R3PTRTYPE(PPGMSHWPTPAE) paPaePTsR3; + /** The R0 virtual address of the 32-bit page table. */ + R0PTRTYPE(PX86PT) pPTR0; + /** The R0 virtual address of the two PAE page table. */ + R0PTRTYPE(PPGMSHWPTPAE) paPaePTsR0; + } aPTs[1]; +} PGMMAPPING; +/** Pointer to structure for tracking GC Mappings. */ +typedef struct PGMMAPPING *PPGMMAPPING; + +#endif /* !PGM_WITHOUT_MAPPINGS */ + + +/** + * Physical page access handler type registration. + */ +typedef struct PGMPHYSHANDLERTYPEINT +{ + /** Number of references. */ + uint32_t volatile cRefs; + /** Magic number (PGMPHYSHANDLERTYPEINT_MAGIC). */ + uint32_t u32Magic; + /** Link of handler types anchored in PGMTREES::HeadPhysHandlerTypes. */ + RTLISTOFF32NODE ListNode; + /** The kind of accesses we're handling. */ + PGMPHYSHANDLERKIND enmKind; + /** The PGM_PAGE_HNDL_PHYS_STATE_XXX value corresponding to enmKind. */ + uint32_t uState; + /** Pointer to R3 callback function. */ + R3PTRTYPE(PFNPGMPHYSHANDLER) pfnHandlerR3; + /** Pointer to R0 callback function. */ + R0PTRTYPE(PFNPGMPHYSHANDLER) pfnHandlerR0; + /** Pointer to R0 callback function for \#PFs. */ + R0PTRTYPE(PFNPGMRZPHYSPFHANDLER) pfnPfHandlerR0; + /** Description / Name. For easing debugging. */ + R3PTRTYPE(const char *) pszDesc; +} PGMPHYSHANDLERTYPEINT; +/** Pointer to a physical access handler type registration. */ +typedef PGMPHYSHANDLERTYPEINT *PPGMPHYSHANDLERTYPEINT; +/** Magic value for the physical handler callbacks (Robert A. Heinlein). */ +#define PGMPHYSHANDLERTYPEINT_MAGIC UINT32_C(0x19070707) +/** Magic value for the physical handler callbacks. */ +#define PGMPHYSHANDLERTYPEINT_MAGIC_DEAD UINT32_C(0x19880508) + +/** + * Converts a handle to a pointer. + * @returns PPGMPHYSHANDLERTYPEINT + * @param a_pVM The cross context VM structure. + * @param a_hType Physical access handler type handle. + */ +#define PGMPHYSHANDLERTYPEINT_FROM_HANDLE(a_pVM, a_hType) ((PPGMPHYSHANDLERTYPEINT)MMHyperHeapOffsetToPtr(a_pVM, a_hType)) + + +/** + * Physical page access handler structure. + * + * This is used to keep track of physical address ranges + * which are being monitored in some kind of way. + */ +typedef struct PGMPHYSHANDLER +{ + AVLROGCPHYSNODECORE Core; + /** Number of pages to update. */ + uint32_t cPages; + /** Set if we have pages that have been aliased. */ + uint32_t cAliasedPages; + /** Set if we have pages that have temporarily been disabled. */ + uint32_t cTmpOffPages; + /** Registered handler type handle (heap offset). */ + PGMPHYSHANDLERTYPE hType; + /** User argument for R3 handlers. */ + R3PTRTYPE(void *) pvUserR3; + /** User argument for R0 handlers. */ + R0PTRTYPE(void *) pvUserR0; + /** Description / Name. For easing debugging. */ + R3PTRTYPE(const char *) pszDesc; +#ifdef VBOX_WITH_STATISTICS + /** Profiling of this handler. */ + STAMPROFILE Stat; +#endif +} PGMPHYSHANDLER; +/** Pointer to a physical page access handler structure. */ +typedef PGMPHYSHANDLER *PPGMPHYSHANDLER; + +/** + * Gets the type record for a physical handler (no reference added). + * @returns PPGMPHYSHANDLERTYPEINT + * @param a_pVM The cross context VM structure. + * @param a_pPhysHandler Pointer to the physical handler structure + * (PGMPHYSHANDLER). + */ +#define PGMPHYSHANDLER_GET_TYPE(a_pVM, a_pPhysHandler) PGMPHYSHANDLERTYPEINT_FROM_HANDLE(a_pVM, (a_pPhysHandler)->hType) + + +/** + * A Physical Guest Page tracking structure. + * + * The format of this structure is complicated because we have to fit a lot + * of information into as few bits as possible. The format is also subject + * to change (there is one coming up soon). Which means that for we'll be + * using PGM_PAGE_GET_*, PGM_PAGE_IS_ and PGM_PAGE_SET_* macros for *all* + * accesses to the structure. + */ +typedef union PGMPAGE +{ + /** Structured view. */ + struct + { + /** 1:0 - The physical handler state (PGM_PAGE_HNDL_PHYS_STATE_*). */ + uint64_t u2HandlerPhysStateY : 2; + /** 3:2 - Paging structure needed to map the page + * (PGM_PAGE_PDE_TYPE_*). */ + uint64_t u2PDETypeY : 2; + /** 4 - Unused (was used by FTE for dirty tracking). */ + uint64_t fUnused1 : 1; + /** 5 - Flag indicating that a write monitored page was written to + * when set. */ + uint64_t fWrittenToY : 1; + /** 7:6 - Unused. */ + uint64_t u2Unused0 : 2; + /** 9:8 - Unused (was used by PGM_PAGE_HNDL_VIRT_STATE_*). */ + uint64_t u2Unused1 : 2; + /** 11:10 - NEM state bits. */ + uint64_t u2NemStateY : 2; + /** 12:48 - The host physical frame number (shift left to get the + * address). */ + uint64_t HCPhysFN : 36; + /** 50:48 - The page state. */ + uint64_t uStateY : 3; + /** 51:53 - The page type (PGMPAGETYPE). */ + uint64_t uTypeY : 3; + /** 63:54 - PTE index for usage tracking (page pool). */ + uint64_t u10PteIdx : 10; + + /** The GMM page ID. + * @remarks In the current implementation, MMIO2 and pages aliased to + * MMIO2 pages will be exploiting this field to calculate the + * ring-3 mapping address corresponding to the page. + * Later we may consider including MMIO2 management into GMM. */ + uint32_t idPage; + /** Usage tracking (page pool). */ + uint16_t u16TrackingY; + /** The number of read locks on this page. */ + uint8_t cReadLocksY; + /** The number of write locks on this page. */ + uint8_t cWriteLocksY; + } s; + + /** 64-bit integer view. */ + uint64_t au64[2]; + /** 16-bit view. */ + uint32_t au32[4]; + /** 16-bit view. */ + uint16_t au16[8]; + /** 8-bit view. */ + uint8_t au8[16]; +} PGMPAGE; +AssertCompileSize(PGMPAGE, 16); +/** Pointer to a physical guest page. */ +typedef PGMPAGE *PPGMPAGE; +/** Pointer to a const physical guest page. */ +typedef const PGMPAGE *PCPGMPAGE; +/** Pointer to a physical guest page pointer. */ +typedef PPGMPAGE *PPPGMPAGE; + + +/** + * Clears the page structure. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_CLEAR(a_pPage) \ + do { \ + (a_pPage)->au64[0] = 0; \ + (a_pPage)->au64[1] = 0; \ + } while (0) + +/** + * Initializes the page structure. + * @param a_pPage Pointer to the physical guest page tracking structure. + * @param a_HCPhys The host physical address of the page. + * @param a_idPage The (GMM) page ID of the page. + * @param a_uType The page type (PGMPAGETYPE). + * @param a_uState The page state (PGM_PAGE_STATE_XXX). + */ +#define PGM_PAGE_INIT(a_pPage, a_HCPhys, a_idPage, a_uType, a_uState) \ + do { \ + RTHCPHYS SetHCPhysTmp = (a_HCPhys); \ + AssertFatal(!(SetHCPhysTmp & ~UINT64_C(0x0000fffffffff000))); \ + (a_pPage)->au64[0] = SetHCPhysTmp; \ + (a_pPage)->au64[1] = 0; \ + (a_pPage)->s.idPage = (a_idPage); \ + (a_pPage)->s.uStateY = (a_uState); \ + (a_pPage)->s.uTypeY = (a_uType); \ + } while (0) + +/** + * Initializes the page structure of a ZERO page. + * @param a_pPage Pointer to the physical guest page tracking structure. + * @param a_pVM The VM handle (for getting the zero page address). + * @param a_uType The page type (PGMPAGETYPE). + */ +#define PGM_PAGE_INIT_ZERO(a_pPage, a_pVM, a_uType) \ + PGM_PAGE_INIT((a_pPage), (a_pVM)->pgm.s.HCPhysZeroPg, NIL_GMM_PAGEID, (a_uType), PGM_PAGE_STATE_ZERO) + + +/** @name The Page state, PGMPAGE::uStateY. + * @{ */ +/** The zero page. + * This is a per-VM page that's never ever mapped writable. */ +#define PGM_PAGE_STATE_ZERO 0U +/** A allocated page. + * This is a per-VM page allocated from the page pool (or wherever + * we get MMIO2 pages from if the type is MMIO2). + */ +#define PGM_PAGE_STATE_ALLOCATED 1U +/** A allocated page that's being monitored for writes. + * The shadow page table mappings are read-only. When a write occurs, the + * fWrittenTo member is set, the page remapped as read-write and the state + * moved back to allocated. */ +#define PGM_PAGE_STATE_WRITE_MONITORED 2U +/** The page is shared, aka. copy-on-write. + * This is a page that's shared with other VMs. */ +#define PGM_PAGE_STATE_SHARED 3U +/** The page is ballooned, so no longer available for this VM. */ +#define PGM_PAGE_STATE_BALLOONED 4U +/** @} */ + + +/** Asserts lock ownership in some of the PGM_PAGE_XXX macros. */ +#if defined(VBOX_STRICT) && 0 /** @todo triggers in pgmRZDynMapGCPageV2Inlined */ +# define PGM_PAGE_ASSERT_LOCK(a_pVM) PGM_LOCK_ASSERT_OWNER(a_pVM) +#else +# define PGM_PAGE_ASSERT_LOCK(a_pVM) do { } while (0) +#endif + +/** + * Gets the page state. + * @returns page state (PGM_PAGE_STATE_*). + * @param a_pPage Pointer to the physical guest page tracking structure. + * + * @remarks See PGM_PAGE_GET_HCPHYS_NA for remarks about GCC and strict + * builds. + */ +#define PGM_PAGE_GET_STATE_NA(a_pPage) ( (a_pPage)->s.uStateY ) +#if defined(__GNUC__) && defined(VBOX_STRICT) +# define PGM_PAGE_GET_STATE(a_pPage) __extension__ ({ PGM_PAGE_ASSERT_LOCK(pVM); PGM_PAGE_GET_STATE_NA(a_pPage); }) +#else +# define PGM_PAGE_GET_STATE PGM_PAGE_GET_STATE_NA +#endif + +/** + * Sets the page state. + * @param a_pVM The VM handle, only used for lock ownership assertions. + * @param a_pPage Pointer to the physical guest page tracking structure. + * @param a_uState The new page state. + */ +#define PGM_PAGE_SET_STATE(a_pVM, a_pPage, a_uState) \ + do { (a_pPage)->s.uStateY = (a_uState); PGM_PAGE_ASSERT_LOCK(a_pVM); } while (0) + + +/** + * Gets the host physical address of the guest page. + * @returns host physical address (RTHCPHYS). + * @param a_pPage Pointer to the physical guest page tracking structure. + * + * @remarks In strict builds on gcc platforms, this macro will make some ugly + * assumption about a valid pVM variable/parameter being in the + * current context. It will use this pVM variable to assert that the + * PGM lock is held. Use the PGM_PAGE_GET_HCPHYS_NA in contexts where + * pVM is not around. + */ +#if 0 +# define PGM_PAGE_GET_HCPHYS_NA(a_pPage) ( (a_pPage)->s.HCPhysFN << 12 ) +# define PGM_PAGE_GET_HCPHYS PGM_PAGE_GET_HCPHYS_NA +#else +# define PGM_PAGE_GET_HCPHYS_NA(a_pPage) ( (a_pPage)->au64[0] & UINT64_C(0x0000fffffffff000) ) +# if defined(__GNUC__) && defined(VBOX_STRICT) +# define PGM_PAGE_GET_HCPHYS(a_pPage) __extension__ ({ PGM_PAGE_ASSERT_LOCK(pVM); PGM_PAGE_GET_HCPHYS_NA(a_pPage); }) +# else +# define PGM_PAGE_GET_HCPHYS PGM_PAGE_GET_HCPHYS_NA +# endif +#endif + +/** + * Sets the host physical address of the guest page. + * + * @param a_pVM The VM handle, only used for lock ownership assertions. + * @param a_pPage Pointer to the physical guest page tracking structure. + * @param a_HCPhys The new host physical address. + */ +#define PGM_PAGE_SET_HCPHYS(a_pVM, a_pPage, a_HCPhys) \ + do { \ + RTHCPHYS const SetHCPhysTmp = (a_HCPhys); \ + AssertFatal(!(SetHCPhysTmp & ~UINT64_C(0x0000fffffffff000))); \ + (a_pPage)->s.HCPhysFN = SetHCPhysTmp >> 12; \ + PGM_PAGE_ASSERT_LOCK(a_pVM); \ + } while (0) + +/** + * Get the Page ID. + * @returns The Page ID; NIL_GMM_PAGEID if it's a ZERO page. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_GET_PAGEID(a_pPage) ( (uint32_t)(a_pPage)->s.idPage ) + +/** + * Sets the Page ID. + * @param a_pVM The VM handle, only used for lock ownership assertions. + * @param a_pPage Pointer to the physical guest page tracking structure. + * @param a_idPage The new page ID. + */ +#define PGM_PAGE_SET_PAGEID(a_pVM, a_pPage, a_idPage) \ + do { \ + (a_pPage)->s.idPage = (a_idPage); \ + PGM_PAGE_ASSERT_LOCK(a_pVM); \ + } while (0) + +/** + * Get the Chunk ID. + * @returns The Chunk ID; NIL_GMM_CHUNKID if it's a ZERO page. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_GET_CHUNKID(a_pPage) ( PGM_PAGE_GET_PAGEID(a_pPage) >> GMM_CHUNKID_SHIFT ) + +/** + * Get the index of the page within the allocation chunk. + * @returns The page index. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_GET_PAGE_IN_CHUNK(a_pPage) ( PGM_PAGE_GET_PAGEID(a_pPage) & GMM_PAGEID_IDX_MASK ) + +/** + * Gets the page type. + * @returns The page type. + * @param a_pPage Pointer to the physical guest page tracking structure. + * + * @remarks See PGM_PAGE_GET_HCPHYS_NA for remarks about GCC and strict + * builds. + */ +#define PGM_PAGE_GET_TYPE_NA(a_pPage) ( (a_pPage)->s.uTypeY ) +#if defined(__GNUC__) && defined(VBOX_STRICT) +# define PGM_PAGE_GET_TYPE(a_pPage) __extension__ ({ PGM_PAGE_ASSERT_LOCK(pVM); PGM_PAGE_GET_TYPE_NA(a_pPage); }) +#else +# define PGM_PAGE_GET_TYPE PGM_PAGE_GET_TYPE_NA +#endif + +/** + * Sets the page type. + * + * @param a_pVM The VM handle, only used for lock ownership assertions. + * @param a_pPage Pointer to the physical guest page tracking structure. + * @param a_enmType The new page type (PGMPAGETYPE). + */ +#define PGM_PAGE_SET_TYPE(a_pVM, a_pPage, a_enmType) \ + do { (a_pPage)->s.uTypeY = (a_enmType); PGM_PAGE_ASSERT_LOCK(a_pVM); } while (0) + +/** + * Gets the page table index + * @returns The page table index. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_GET_PTE_INDEX(a_pPage) ( (a_pPage)->s.u10PteIdx ) + +/** + * Sets the page table index. + * @param a_pVM The VM handle, only used for lock ownership assertions. + * @param a_pPage Pointer to the physical guest page tracking structure. + * @param a_iPte New page table index. + */ +#define PGM_PAGE_SET_PTE_INDEX(a_pVM, a_pPage, a_iPte) \ + do { (a_pPage)->s.u10PteIdx = (a_iPte); PGM_PAGE_ASSERT_LOCK(a_pVM); } while (0) + +/** + * Checks if the page is marked for MMIO, no MMIO2 aliasing. + * @returns true/false. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_IS_MMIO(a_pPage) ( (a_pPage)->s.uTypeY == PGMPAGETYPE_MMIO ) + +/** + * Checks if the page is marked for MMIO, including both aliases. + * @returns true/false. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_IS_MMIO_OR_ALIAS(a_pPage) ( (a_pPage)->s.uTypeY == PGMPAGETYPE_MMIO \ + || (a_pPage)->s.uTypeY == PGMPAGETYPE_MMIO2_ALIAS_MMIO \ + || (a_pPage)->s.uTypeY == PGMPAGETYPE_SPECIAL_ALIAS_MMIO \ + ) + +/** + * Checks if the page is marked for MMIO, including special aliases. + * @returns true/false. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_IS_MMIO_OR_SPECIAL_ALIAS(a_pPage) ( (a_pPage)->s.uTypeY == PGMPAGETYPE_MMIO \ + || (a_pPage)->s.uTypeY == PGMPAGETYPE_SPECIAL_ALIAS_MMIO ) + +/** + * Checks if the page is a special aliased MMIO page. + * @returns true/false. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_IS_SPECIAL_ALIAS_MMIO(a_pPage) ( (a_pPage)->s.uTypeY == PGMPAGETYPE_SPECIAL_ALIAS_MMIO ) + +/** + * Checks if the page is backed by the ZERO page. + * @returns true/false. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_IS_ZERO(a_pPage) ( (a_pPage)->s.uStateY == PGM_PAGE_STATE_ZERO ) + +/** + * Checks if the page is backed by a SHARED page. + * @returns true/false. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_IS_SHARED(a_pPage) ( (a_pPage)->s.uStateY == PGM_PAGE_STATE_SHARED ) + +/** + * Checks if the page is ballooned. + * @returns true/false. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_IS_BALLOONED(a_pPage) ( (a_pPage)->s.uStateY == PGM_PAGE_STATE_BALLOONED ) + +/** + * Checks if the page is allocated. + * @returns true/false. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_IS_ALLOCATED(a_pPage) ( (a_pPage)->s.uStateY == PGM_PAGE_STATE_ALLOCATED ) + +/** + * Marks the page as written to (for GMM change monitoring). + * @param a_pVM The VM handle, only used for lock ownership assertions. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_SET_WRITTEN_TO(a_pVM, a_pPage) \ + do { (a_pPage)->s.fWrittenToY = 1; PGM_PAGE_ASSERT_LOCK(a_pVM); } while (0) + +/** + * Clears the written-to indicator. + * @param a_pVM The VM handle, only used for lock ownership assertions. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_CLEAR_WRITTEN_TO(a_pVM, a_pPage) \ + do { (a_pPage)->s.fWrittenToY = 0; PGM_PAGE_ASSERT_LOCK(a_pVM); } while (0) + +/** + * Checks if the page was marked as written-to. + * @returns true/false. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_IS_WRITTEN_TO(a_pPage) ( (a_pPage)->s.fWrittenToY ) + + +/** @name PT usage values (PGMPAGE::u2PDEType). + * + * @{ */ +/** Either as a PT or PDE. */ +#define PGM_PAGE_PDE_TYPE_DONTCARE 0 +/** Must use a page table to map the range. */ +#define PGM_PAGE_PDE_TYPE_PT 1 +/** Can use a page directory entry to map the continuous range. */ +#define PGM_PAGE_PDE_TYPE_PDE 2 +/** Can use a page directory entry to map the continuous range - temporarily disabled (by page monitoring). */ +#define PGM_PAGE_PDE_TYPE_PDE_DISABLED 3 +/** @} */ + +/** + * Set the PDE type of the page + * @param a_pVM The VM handle, only used for lock ownership assertions. + * @param a_pPage Pointer to the physical guest page tracking structure. + * @param a_uType PGM_PAGE_PDE_TYPE_*. + */ +#define PGM_PAGE_SET_PDE_TYPE(a_pVM, a_pPage, a_uType) \ + do { (a_pPage)->s.u2PDETypeY = (a_uType); PGM_PAGE_ASSERT_LOCK(a_pVM); } while (0) + +/** + * Checks if the page was marked being part of a large page + * @returns true/false. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_GET_PDE_TYPE(a_pPage) ( (a_pPage)->s.u2PDETypeY ) + +/** @name Physical Access Handler State values (PGMPAGE::u2HandlerPhysStateY). + * + * @remarks The values are assigned in order of priority, so we can calculate + * the correct state for a page with different handlers installed. + * @{ */ +/** No handler installed. */ +#define PGM_PAGE_HNDL_PHYS_STATE_NONE 0 +/** Monitoring is temporarily disabled. */ +#define PGM_PAGE_HNDL_PHYS_STATE_DISABLED 1 +/** Write access is monitored. */ +#define PGM_PAGE_HNDL_PHYS_STATE_WRITE 2 +/** All access is monitored. */ +#define PGM_PAGE_HNDL_PHYS_STATE_ALL 3 +/** @} */ + +/** + * Gets the physical access handler state of a page. + * @returns PGM_PAGE_HNDL_PHYS_STATE_* value. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_GET_HNDL_PHYS_STATE(a_pPage) ( (a_pPage)->s.u2HandlerPhysStateY ) + +/** + * Sets the physical access handler state of a page. + * @param a_pPage Pointer to the physical guest page tracking structure. + * @param a_uState The new state value. + */ +#define PGM_PAGE_SET_HNDL_PHYS_STATE(a_pPage, a_uState) \ + do { (a_pPage)->s.u2HandlerPhysStateY = (a_uState); } while (0) + +/** + * Checks if the page has any physical access handlers, including temporarily disabled ones. + * @returns true/false + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(a_pPage) \ + ( PGM_PAGE_GET_HNDL_PHYS_STATE(a_pPage) != PGM_PAGE_HNDL_PHYS_STATE_NONE ) + +/** + * Checks if the page has any active physical access handlers. + * @returns true/false + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_HAS_ACTIVE_PHYSICAL_HANDLERS(a_pPage) \ + ( PGM_PAGE_GET_HNDL_PHYS_STATE(a_pPage) >= PGM_PAGE_HNDL_PHYS_STATE_WRITE ) + +/** + * Checks if the page has any access handlers, including temporarily disabled ones. + * @returns true/false + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_HAS_ANY_HANDLERS(a_pPage) \ + ( PGM_PAGE_GET_HNDL_PHYS_STATE(a_pPage) != PGM_PAGE_HNDL_PHYS_STATE_NONE ) + +/** + * Checks if the page has any active access handlers. + * @returns true/false + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_HAS_ACTIVE_HANDLERS(a_pPage) \ + (PGM_PAGE_GET_HNDL_PHYS_STATE(a_pPage) >= PGM_PAGE_HNDL_PHYS_STATE_WRITE ) + +/** + * Checks if the page has any active access handlers catching all accesses. + * @returns true/false + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(a_pPage) \ + ( PGM_PAGE_GET_HNDL_PHYS_STATE(a_pPage) == PGM_PAGE_HNDL_PHYS_STATE_ALL ) + + +/** @def PGM_PAGE_GET_TRACKING + * Gets the packed shadow page pool tracking data associated with a guest page. + * @returns uint16_t containing the data. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_GET_TRACKING_NA(a_pPage) ( (a_pPage)->s.u16TrackingY ) +#if defined(__GNUC__) && defined(VBOX_STRICT) +# define PGM_PAGE_GET_TRACKING(a_pPage) __extension__ ({ PGM_PAGE_ASSERT_LOCK(pVM); PGM_PAGE_GET_TRACKING_NA(a_pPage); }) +#else +# define PGM_PAGE_GET_TRACKING PGM_PAGE_GET_TRACKING_NA +#endif + +/** @def PGM_PAGE_SET_TRACKING + * Sets the packed shadow page pool tracking data associated with a guest page. + * @param a_pVM The VM handle, only used for lock ownership assertions. + * @param a_pPage Pointer to the physical guest page tracking structure. + * @param a_u16TrackingData The tracking data to store. + */ +#define PGM_PAGE_SET_TRACKING(a_pVM, a_pPage, a_u16TrackingData) \ + do { (a_pPage)->s.u16TrackingY = (a_u16TrackingData); PGM_PAGE_ASSERT_LOCK(a_pVM); } while (0) + +/** @def PGM_PAGE_GET_TD_CREFS + * Gets the @a cRefs tracking data member. + * @returns cRefs. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_GET_TD_CREFS(a_pPage) \ + ((PGM_PAGE_GET_TRACKING(a_pPage) >> PGMPOOL_TD_CREFS_SHIFT) & PGMPOOL_TD_CREFS_MASK) +#define PGM_PAGE_GET_TD_CREFS_NA(a_pPage) \ + ((PGM_PAGE_GET_TRACKING_NA(a_pPage) >> PGMPOOL_TD_CREFS_SHIFT) & PGMPOOL_TD_CREFS_MASK) + +/** @def PGM_PAGE_GET_TD_IDX + * Gets the @a idx tracking data member. + * @returns idx. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_GET_TD_IDX(a_pPage) \ + ((PGM_PAGE_GET_TRACKING(a_pPage) >> PGMPOOL_TD_IDX_SHIFT) & PGMPOOL_TD_IDX_MASK) +#define PGM_PAGE_GET_TD_IDX_NA(a_pPage) \ + ((PGM_PAGE_GET_TRACKING_NA(a_pPage) >> PGMPOOL_TD_IDX_SHIFT) & PGMPOOL_TD_IDX_MASK) + + +/** Max number of locks on a page. */ +#define PGM_PAGE_MAX_LOCKS UINT8_C(254) + +/** Get the read lock count. + * @returns count. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_GET_READ_LOCKS(a_pPage) ( (a_pPage)->s.cReadLocksY ) + +/** Get the write lock count. + * @returns count. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_GET_WRITE_LOCKS(a_pPage) ( (a_pPage)->s.cWriteLocksY ) + +/** Decrement the read lock counter. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_DEC_READ_LOCKS(a_pPage) do { --(a_pPage)->s.cReadLocksY; } while (0) + +/** Decrement the write lock counter. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_DEC_WRITE_LOCKS(a_pPage) do { --(a_pPage)->s.cWriteLocksY; } while (0) + +/** Increment the read lock counter. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_INC_READ_LOCKS(a_pPage) do { ++(a_pPage)->s.cReadLocksY; } while (0) + +/** Increment the write lock counter. + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_INC_WRITE_LOCKS(a_pPage) do { ++(a_pPage)->s.cWriteLocksY; } while (0) + + +/** Gets the NEM state. + * @returns NEM state value (two bits). + * @param a_pPage Pointer to the physical guest page tracking structure. + */ +#define PGM_PAGE_GET_NEM_STATE(a_pPage) ((a_pPage)->s.u2NemStateY) + +/** Sets the NEM state. + * @param a_pPage Pointer to the physical guest page tracking structure. + * @param a_u2State The NEM state value (specific to NEM impl.). + */ +#define PGM_PAGE_SET_NEM_STATE(a_pPage, a_u2State) \ + do { Assert((a_u2State) < 4); (a_pPage)->s.u2NemStateY = (a_u2State); } while (0) + + +#if 0 +/** Enables sanity checking of write monitoring using CRC-32. */ +# define PGMLIVESAVERAMPAGE_WITH_CRC32 +#endif + +/** + * Per page live save tracking data. + */ +typedef struct PGMLIVESAVERAMPAGE +{ + /** Number of times it has been dirtied. */ + uint32_t cDirtied : 24; + /** Whether it is currently dirty. */ + uint32_t fDirty : 1; + /** Ignore the page. + * This is used for pages that has been MMIO, MMIO2 or ROM pages once. We will + * deal with these after pausing the VM and DevPCI have said it bit about + * remappings. */ + uint32_t fIgnore : 1; + /** Was a ZERO page last time around. */ + uint32_t fZero : 1; + /** Was a SHARED page last time around. */ + uint32_t fShared : 1; + /** Whether the page is/was write monitored in a previous pass. */ + uint32_t fWriteMonitored : 1; + /** Whether the page is/was write monitored earlier in this pass. */ + uint32_t fWriteMonitoredJustNow : 1; + /** Bits reserved for future use. */ + uint32_t u2Reserved : 2; +#ifdef PGMLIVESAVERAMPAGE_WITH_CRC32 + /** CRC-32 for the page. This is for internal consistency checks. */ + uint32_t u32Crc; +#endif +} PGMLIVESAVERAMPAGE; +#ifdef PGMLIVESAVERAMPAGE_WITH_CRC32 +AssertCompileSize(PGMLIVESAVERAMPAGE, 8); +#else +AssertCompileSize(PGMLIVESAVERAMPAGE, 4); +#endif +/** Pointer to the per page live save tracking data. */ +typedef PGMLIVESAVERAMPAGE *PPGMLIVESAVERAMPAGE; + +/** The max value of PGMLIVESAVERAMPAGE::cDirtied. */ +#define PGMLIVSAVEPAGE_MAX_DIRTIED 0x00fffff0 + + +/** + * RAM range for GC Phys to HC Phys conversion. + * + * Can be used for HC Virt to GC Phys and HC Virt to HC Phys + * conversions too, but we'll let MM handle that for now. + * + * This structure is used by linked lists in both GC and HC. + */ +typedef struct PGMRAMRANGE +{ + /** Start of the range. Page aligned. */ + RTGCPHYS GCPhys; + /** Size of the range. (Page aligned of course). */ + RTGCPHYS cb; + /** Pointer to the next RAM range - for R3. */ + R3PTRTYPE(struct PGMRAMRANGE *) pNextR3; + /** Pointer to the next RAM range - for R0. */ + R0PTRTYPE(struct PGMRAMRANGE *) pNextR0; + /** PGM_RAM_RANGE_FLAGS_* flags. */ + uint32_t fFlags; + uint32_t fPadding1; + /** Last address in the range (inclusive). Page aligned (-1). */ + RTGCPHYS GCPhysLast; + /** Start of the HC mapping of the range. This is only used for MMIO2. */ + R3PTRTYPE(void *) pvR3; + /** Live save per page tracking data. */ + R3PTRTYPE(PPGMLIVESAVERAMPAGE) paLSPages; + /** The range description. */ + R3PTRTYPE(const char *) pszDesc; + /** Pointer to self - R0 pointer. */ + R0PTRTYPE(struct PGMRAMRANGE *) pSelfR0; + + /** Pointer to the left search three node - ring-3 context. */ + R3PTRTYPE(struct PGMRAMRANGE *) pLeftR3; + /** Pointer to the right search three node - ring-3 context. */ + R3PTRTYPE(struct PGMRAMRANGE *) pRightR3; + /** Pointer to the left search three node - ring-0 context. */ + R0PTRTYPE(struct PGMRAMRANGE *) pLeftR0; + /** Pointer to the right search three node - ring-0 context. */ + R0PTRTYPE(struct PGMRAMRANGE *) pRightR0; + + /** Padding to make aPage aligned on sizeof(PGMPAGE). */ +#if HC_ARCH_BITS == 32 + uint32_t au32Alignment2[HC_ARCH_BITS == 32 ? 2 : 0]; +#endif + /** Array of physical guest page tracking structures. */ + PGMPAGE aPages[1]; +} PGMRAMRANGE; +/** Pointer to RAM range for GC Phys to HC Phys conversion. */ +typedef PGMRAMRANGE *PPGMRAMRANGE; + +/** @name PGMRAMRANGE::fFlags + * @{ */ +/** The RAM range is floating around as an independent guest mapping. */ +#define PGM_RAM_RANGE_FLAGS_FLOATING RT_BIT(20) +/** Ad hoc RAM range for an ROM mapping. */ +#define PGM_RAM_RANGE_FLAGS_AD_HOC_ROM RT_BIT(21) +/** Ad hoc RAM range for an MMIO mapping. */ +#define PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO RT_BIT(22) +/** Ad hoc RAM range for an MMIO2 or pre-registered MMIO mapping. */ +#define PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO_EX RT_BIT(23) +/** @} */ + +/** Tests if a RAM range is an ad hoc one or not. + * @returns true/false. + * @param pRam The RAM range. + */ +#define PGM_RAM_RANGE_IS_AD_HOC(pRam) \ + (!!( (pRam)->fFlags & (PGM_RAM_RANGE_FLAGS_AD_HOC_ROM | PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO | PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO_EX) ) ) + +/** The number of entries in the RAM range TLBs (there is one for each + * context). Must be a power of two. */ +#define PGM_RAMRANGE_TLB_ENTRIES 8 + +/** + * Calculates the RAM range TLB index for the physical address. + * + * @returns RAM range TLB index. + * @param a_GCPhys The guest physical address. + */ +#define PGM_RAMRANGE_TLB_IDX(a_GCPhys) ( ((a_GCPhys) >> 20) & (PGM_RAMRANGE_TLB_ENTRIES - 1) ) + + + +/** + * Per page tracking structure for ROM image. + * + * A ROM image may have a shadow page, in which case we may have two pages + * backing it. This structure contains the PGMPAGE for both while + * PGMRAMRANGE have a copy of the active one. It is important that these + * aren't out of sync in any regard other than page pool tracking data. + */ +typedef struct PGMROMPAGE +{ + /** The page structure for the virgin ROM page. */ + PGMPAGE Virgin; + /** The page structure for the shadow RAM page. */ + PGMPAGE Shadow; + /** The current protection setting. */ + PGMROMPROT enmProt; + /** Live save status information. Makes use of unused alignment space. */ + struct + { + /** The previous protection value. */ + uint8_t u8Prot; + /** Written to flag set by the handler. */ + bool fWrittenTo; + /** Whether the shadow page is dirty or not. */ + bool fDirty; + /** Whether it was dirtied in the recently. */ + bool fDirtiedRecently; + } LiveSave; +} PGMROMPAGE; +AssertCompileSizeAlignment(PGMROMPAGE, 8); +/** Pointer to a ROM page tracking structure. */ +typedef PGMROMPAGE *PPGMROMPAGE; + + +/** + * A registered ROM image. + * + * This is needed to keep track of ROM image since they generally intrude + * into a PGMRAMRANGE. It also keeps track of additional info like the + * two page sets (read-only virgin and read-write shadow), the current + * state of each page. + * + * Because access handlers cannot easily be executed in a different + * context, the ROM ranges needs to be accessible and in all contexts. + */ +typedef struct PGMROMRANGE +{ + /** Pointer to the next range - R3. */ + R3PTRTYPE(struct PGMROMRANGE *) pNextR3; + /** Pointer to the next range - R0. */ + R0PTRTYPE(struct PGMROMRANGE *) pNextR0; + /** Address of the range. */ + RTGCPHYS GCPhys; + /** Address of the last byte in the range. */ + RTGCPHYS GCPhysLast; + /** Size of the range. */ + RTGCPHYS cb; + /** The flags (PGMPHYS_ROM_FLAGS_*). */ + uint32_t fFlags; + /** The saved state range ID. */ + uint8_t idSavedState; + /** Alignment padding. */ + uint8_t au8Alignment[3]; + /** Alignment padding ensuring that aPages is sizeof(PGMROMPAGE) aligned. */ + uint32_t au32Alignemnt[HC_ARCH_BITS == 32 ? 5 : 1]; + /** The size bits pvOriginal points to. */ + uint32_t cbOriginal; + /** Pointer to the original bits when PGMPHYS_ROM_FLAGS_PERMANENT_BINARY was specified. + * This is used for strictness checks. */ + R3PTRTYPE(const void *) pvOriginal; + /** The ROM description. */ + R3PTRTYPE(const char *) pszDesc; + /** The per page tracking structures. */ + PGMROMPAGE aPages[1]; +} PGMROMRANGE; +/** Pointer to a ROM range. */ +typedef PGMROMRANGE *PPGMROMRANGE; + + +/** + * Live save per page data for an MMIO2 page. + * + * Not using PGMLIVESAVERAMPAGE here because we cannot use normal write monitoring + * of MMIO2 pages. The current approach is using some optimistic SHA-1 + + * CRC-32 for detecting changes as well as special handling of zero pages. This + * is a TEMPORARY measure which isn't perfect, but hopefully it is good enough + * for speeding things up. (We're using SHA-1 and not SHA-256 or SHA-512 + * because of speed (2.5x and 6x slower).) + * + * @todo Implement dirty MMIO2 page reporting that can be enabled during live + * save but normally is disabled. Since we can write monitor guest + * accesses on our own, we only need this for host accesses. Shouldn't be + * too difficult for DevVGA, VMMDev might be doable, the planned + * networking fun will be fun since it involves ring-0. + */ +typedef struct PGMLIVESAVEMMIO2PAGE +{ + /** Set if the page is considered dirty. */ + bool fDirty; + /** The number of scans this page has remained unchanged for. + * Only updated for dirty pages. */ + uint8_t cUnchangedScans; + /** Whether this page was zero at the last scan. */ + bool fZero; + /** Alignment padding. */ + bool fReserved; + /** CRC-32 for the first half of the page. + * This is used together with u32CrcH2 to quickly detect changes in the page + * during the non-final passes. */ + uint32_t u32CrcH1; + /** CRC-32 for the second half of the page. */ + uint32_t u32CrcH2; + /** SHA-1 for the saved page. + * This is used in the final pass to skip pages without changes. */ + uint8_t abSha1Saved[RTSHA1_HASH_SIZE]; +} PGMLIVESAVEMMIO2PAGE; +/** Pointer to a live save status data for an MMIO2 page. */ +typedef PGMLIVESAVEMMIO2PAGE *PPGMLIVESAVEMMIO2PAGE; + +/** + * A registered MMIO2 (= Device RAM) range. + * + * There are a few reason why we need to keep track of these registrations. One + * of them is the deregistration & cleanup stuff, while another is that the + * PGMRAMRANGE associated with such a region may have to be removed from the ram + * range list. + * + * Overlapping with a RAM range has to be 100% or none at all. The pages in the + * existing RAM range must not be ROM nor MMIO. A guru meditation will be + * raised if a partial overlap or an overlap of ROM pages is encountered. On an + * overlap we will free all the existing RAM pages and put in the ram range + * pages instead. + */ +typedef struct PGMREGMMIO2RANGE +{ + /** The owner of the range. (a device) */ + PPDMDEVINSR3 pDevInsR3; + /** Pointer to the ring-3 mapping of the allocation. */ + RTR3PTR pvR3; +#if defined(VBOX_WITH_RAM_IN_KERNEL) && !defined(VBOX_WITH_LINEAR_HOST_PHYS_MEM) + /** Pointer to the ring-0 mapping of the allocation. */ + RTR0PTR pvR0; +#endif + /** Pointer to the next range - R3. */ + R3PTRTYPE(struct PGMREGMMIO2RANGE *) pNextR3; + /** Flags (PGMREGMMIO2RANGE_F_XXX). */ + uint16_t fFlags; + /** The sub device number (internal PCI config (CFGM) number). */ + uint8_t iSubDev; + /** The PCI region number. */ + uint8_t iRegion; + /** The saved state range ID. */ + uint8_t idSavedState; + /** MMIO2 range identifier, for page IDs (PGMPAGE::s.idPage). */ + uint8_t idMmio2; + /** Alignment padding for putting the ram range on a PGMPAGE alignment boundary. */ +#if defined(VBOX_WITH_RAM_IN_KERNEL) && !defined(VBOX_WITH_LINEAR_HOST_PHYS_MEM) + uint8_t abAlignment[HC_ARCH_BITS == 32 ? 6 + 4 : 2]; +#else + uint8_t abAlignment[HC_ARCH_BITS == 32 ? 6 + 8 : 2 + 8]; +#endif + /** The real size. + * This may be larger than indicated by RamRange.cb if the range has been + * reduced during saved state loading. */ + RTGCPHYS cbReal; + /** Pointer to the physical handler for MMIO. */ + R3PTRTYPE(PPGMPHYSHANDLER) pPhysHandlerR3; + /** Live save per page tracking data for MMIO2. */ + R3PTRTYPE(PPGMLIVESAVEMMIO2PAGE) paLSPages; + /** The associated RAM range. */ + PGMRAMRANGE RamRange; +} PGMREGMMIO2RANGE; +AssertCompileMemberAlignment(PGMREGMMIO2RANGE, RamRange, 16); +/** Pointer to a MMIO2 or pre-registered MMIO range. */ +typedef PGMREGMMIO2RANGE *PPGMREGMMIO2RANGE; + +/** @name PGMREGMMIO2RANGE_F_XXX - Registered MMIO2 range flags. + * @{ */ +/** Set if it's an MMIO2 range. + * @note Historical. For a while we did some of the MMIO this way too. */ +#define PGMREGMMIO2RANGE_F_MMIO2 UINT16_C(0x0001) +/** Set if this is the first chunk in the MMIO2 range. */ +#define PGMREGMMIO2RANGE_F_FIRST_CHUNK UINT16_C(0x0002) +/** Set if this is the last chunk in the MMIO2 range. */ +#define PGMREGMMIO2RANGE_F_LAST_CHUNK UINT16_C(0x0004) +/** Set if the whole range is mapped. */ +#define PGMREGMMIO2RANGE_F_MAPPED UINT16_C(0x0008) +/** Set if it's overlapping, clear if not. */ +#define PGMREGMMIO2RANGE_F_OVERLAPPING UINT16_C(0x0010) +/** @} */ + + +/** @name Internal MMIO2 constants. + * @{ */ +/** The maximum number of MMIO2 ranges. */ +#define PGM_MMIO2_MAX_RANGES 32 +/** The maximum number of pages in a MMIO2 range. */ +#define PGM_MMIO2_MAX_PAGE_COUNT UINT32_C(0x01000000) +/** Makes a MMIO2 page ID out of a MMIO2 range ID and page index number. */ +#define PGM_MMIO2_PAGEID_MAKE(a_idMmio2, a_iPage) ( ((uint32_t)(a_idMmio2) << 24) | (uint32_t)(a_iPage) ) +/** Gets the MMIO2 range ID from an MMIO2 page ID. */ +#define PGM_MMIO2_PAGEID_GET_MMIO2_ID(a_idPage) ( (uint8_t)((a_idPage) >> 24) ) +/** Gets the MMIO2 page index from an MMIO2 page ID. */ +#define PGM_MMIO2_PAGEID_GET_IDX(a_idPage) ( ((a_idPage) & UINT32_C(0x00ffffff)) ) +/** @} */ + + + +/** + * PGMPhysRead/Write cache entry + */ +typedef struct PGMPHYSCACHEENTRY +{ + /** R3 pointer to physical page. */ + R3PTRTYPE(uint8_t *) pbR3; + /** GC Physical address for cache entry */ + RTGCPHYS GCPhys; +#if HC_ARCH_BITS == 64 && GC_ARCH_BITS == 32 + RTGCPHYS u32Padding0; /**< alignment padding. */ +#endif +} PGMPHYSCACHEENTRY; + +/** + * PGMPhysRead/Write cache to reduce REM memory access overhead + */ +typedef struct PGMPHYSCACHE +{ + /** Bitmap of valid cache entries */ + uint64_t aEntries; + /** Cache entries */ + PGMPHYSCACHEENTRY Entry[PGM_MAX_PHYSCACHE_ENTRIES]; +} PGMPHYSCACHE; + + +/** @name Ring-3 page mapping TLBs + * @{ */ + +/** Pointer to an allocation chunk ring-3 mapping. */ +typedef struct PGMCHUNKR3MAP *PPGMCHUNKR3MAP; +/** Pointer to an allocation chunk ring-3 mapping pointer. */ +typedef PPGMCHUNKR3MAP *PPPGMCHUNKR3MAP; + +/** + * Ring-3 tracking structure for an allocation chunk ring-3 mapping. + * + * The primary tree (Core) uses the chunk id as key. + */ +typedef struct PGMCHUNKR3MAP +{ + /** The key is the chunk id. */ + AVLU32NODECORE Core; + /** The time (ChunkR3Map.iNow) this chunk was last used. Used for unmap + * selection. */ + uint32_t iLastUsed; + /** The current reference count. */ + uint32_t volatile cRefs; + /** The current permanent reference count. */ + uint32_t volatile cPermRefs; + /** The mapping address. */ + void *pv; +} PGMCHUNKR3MAP; + +/** + * Allocation chunk ring-3 mapping TLB entry. + */ +typedef struct PGMCHUNKR3MAPTLBE +{ + /** The chunk id. */ + uint32_t volatile idChunk; +#if HC_ARCH_BITS == 64 + uint32_t u32Padding; /**< alignment padding. */ +#endif + /** The chunk map. */ +#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE) || defined(VBOX_WITH_RAM_IN_KERNEL) + R3PTRTYPE(PPGMCHUNKR3MAP) volatile pChunk; +#else + R3R0PTRTYPE(PPGMCHUNKR3MAP) volatile pChunk; +#endif +} PGMCHUNKR3MAPTLBE; +/** Pointer to the an allocation chunk ring-3 mapping TLB entry. */ +typedef PGMCHUNKR3MAPTLBE *PPGMCHUNKR3MAPTLBE; + +/** The number of TLB entries in PGMCHUNKR3MAPTLB. + * @remark Must be a power of two value. */ +#define PGM_CHUNKR3MAPTLB_ENTRIES 64 + +/** + * Allocation chunk ring-3 mapping TLB. + * + * @remarks We use a TLB to speed up lookups by avoiding walking the AVL. + * At first glance this might look kinda odd since AVL trees are + * supposed to give the most optimal lookup times of all trees + * due to their balancing. However, take a tree with 1023 nodes + * in it, that's 10 levels, meaning that most searches has to go + * down 9 levels before they find what they want. This isn't fast + * compared to a TLB hit. There is the factor of cache misses, + * and of course the problem with trees and branch prediction. + * This is why we use TLBs in front of most of the trees. + * + * @todo Generalize this TLB + AVL stuff, shouldn't be all that + * difficult when we switch to the new inlined AVL trees (from kStuff). + */ +typedef struct PGMCHUNKR3MAPTLB +{ + /** The TLB entries. */ + PGMCHUNKR3MAPTLBE aEntries[PGM_CHUNKR3MAPTLB_ENTRIES]; +} PGMCHUNKR3MAPTLB; + +/** + * Calculates the index of a guest page in the Ring-3 Chunk TLB. + * @returns Chunk TLB index. + * @param idChunk The Chunk ID. + */ +#define PGM_CHUNKR3MAPTLB_IDX(idChunk) ( (idChunk) & (PGM_CHUNKR3MAPTLB_ENTRIES - 1) ) + + +/** + * Ring-3 guest page mapping TLB entry. + * @remarks used in ring-0 as well at the moment. + */ +typedef struct PGMPAGER3MAPTLBE +{ + /** Address of the page. */ + RTGCPHYS volatile GCPhys; + /** The guest page. */ +#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE) || defined(VBOX_WITH_RAM_IN_KERNEL) + R3PTRTYPE(PPGMPAGE) volatile pPage; +#else + R3R0PTRTYPE(PPGMPAGE) volatile pPage; +#endif + /** Pointer to the page mapping tracking structure, PGMCHUNKR3MAP. */ +#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE) || defined(VBOX_WITH_RAM_IN_KERNEL) + R3PTRTYPE(PPGMCHUNKR3MAP) volatile pMap; +#else + R3R0PTRTYPE(PPGMCHUNKR3MAP) volatile pMap; +#endif + /** The address */ +#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE) || defined(VBOX_WITH_RAM_IN_KERNEL) + R3PTRTYPE(void *) volatile pv; +#else + R3R0PTRTYPE(void *) volatile pv; +#endif +#if HC_ARCH_BITS == 32 + uint32_t u32Padding; /**< alignment padding. */ +#endif +} PGMPAGER3MAPTLBE; +/** Pointer to an entry in the HC physical TLB. */ +typedef PGMPAGER3MAPTLBE *PPGMPAGER3MAPTLBE; + + +/** The number of entries in the ring-3 guest page mapping TLB. + * @remarks The value must be a power of two. */ +#define PGM_PAGER3MAPTLB_ENTRIES 256 + +/** + * Ring-3 guest page mapping TLB. + * @remarks used in ring-0 as well at the moment. + */ +typedef struct PGMPAGER3MAPTLB +{ + /** The TLB entries. */ + PGMPAGER3MAPTLBE aEntries[PGM_PAGER3MAPTLB_ENTRIES]; +} PGMPAGER3MAPTLB; +/** Pointer to the ring-3 guest page mapping TLB. */ +typedef PGMPAGER3MAPTLB *PPGMPAGER3MAPTLB; + +/** + * Calculates the index of the TLB entry for the specified guest page. + * @returns Physical TLB index. + * @param GCPhys The guest physical address. + */ +#define PGM_PAGER3MAPTLB_IDX(GCPhys) ( ((GCPhys) >> PAGE_SHIFT) & (PGM_PAGER3MAPTLB_ENTRIES - 1) ) + +/** @} */ + +#if defined(VBOX_WITH_RAM_IN_KERNEL) || defined(DOXYGEN_RUNNING) +/** @name Ring-0 page mapping TLB + * @{ */ +/** + * Ring-0 guest page mapping TLB entry. + */ +typedef struct PGMPAGER0MAPTLBE +{ + /** Address of the page. */ + RTGCPHYS volatile GCPhys; + /** The guest page. */ + R0PTRTYPE(PPGMPAGE) volatile pPage; + /** The address */ + R0PTRTYPE(void *) volatile pv; +} PGMPAGER0MAPTLBE; +/** Pointer to an entry in the HC physical TLB. */ +typedef PGMPAGER0MAPTLBE *PPGMPAGER0MAPTLBE; + + +/** The number of entries in the ring-3 guest page mapping TLB. + * @remarks The value must be a power of two. */ +#define PGM_PAGER0MAPTLB_ENTRIES 256 + +/** + * Ring-3 guest page mapping TLB. + * @remarks used in ring-0 as well at the moment. + */ +typedef struct PGMPAGER0MAPTLB +{ + /** The TLB entries. */ + PGMPAGER0MAPTLBE aEntries[PGM_PAGER0MAPTLB_ENTRIES]; +} PGMPAGER0MAPTLB; +/** Pointer to the ring-3 guest page mapping TLB. */ +typedef PGMPAGER0MAPTLB *PPGMPAGER0MAPTLB; + +/** + * Calculates the index of the TLB entry for the specified guest page. + * @returns Physical TLB index. + * @param GCPhys The guest physical address. + */ +#define PGM_PAGER0MAPTLB_IDX(GCPhys) ( ((GCPhys) >> PAGE_SHIFT) & (PGM_PAGER0MAPTLB_ENTRIES - 1) ) +/** @} */ +#endif /* VBOX_WITH_RAM_IN_KERNEL || DOXYGEN_RUNNING */ + +/** + * Raw-mode context dynamic mapping cache entry. + * + * Because of raw-mode context being reloctable and all relocations are applied + * in ring-3, this has to be defined here and be RC specific. + * + * @sa PGMRZDYNMAPENTRY, PGMR0DYNMAPENTRY. + */ +typedef struct PGMRCDYNMAPENTRY +{ + /** The physical address of the currently mapped page. + * This is duplicate for three reasons: cache locality, cache policy of the PT + * mappings and sanity checks. */ + RTHCPHYS HCPhys; + /** Pointer to the page. */ + RTRCPTR pvPage; + /** The number of references. */ + int32_t volatile cRefs; + /** PTE pointer union. */ + struct PGMRCDYNMAPENTRY_PPTE + { + /** PTE pointer, 32-bit legacy version. */ + RCPTRTYPE(PX86PTE) pLegacy; + /** PTE pointer, PAE version. */ + RCPTRTYPE(PX86PTEPAE) pPae; + } uPte; +} PGMRCDYNMAPENTRY; +/** Pointer to a dynamic mapping cache entry for the raw-mode context. */ +typedef PGMRCDYNMAPENTRY *PPGMRCDYNMAPENTRY; + + +/** + * Dynamic mapping cache for the raw-mode context. + * + * This is initialized during VMMRC init based upon the pbDynPageMapBaseGC and + * paDynPageMap* PGM members. However, it has to be defined in PGMInternal.h + * so that we can perform relocations from PGMR3Relocate. This has the + * consequence that we must have separate ring-0 and raw-mode context versions + * of this struct even if they share the basic elements. + * + * @sa PPGMRZDYNMAP, PGMR0DYNMAP. + */ +typedef struct PGMRCDYNMAP +{ + /** The usual magic number / eye catcher (PGMRZDYNMAP_MAGIC). */ + uint32_t u32Magic; + /** Array for tracking and managing the pages. */ + RCPTRTYPE(PPGMRCDYNMAPENTRY) paPages; + /** The cache size given as a number of pages. */ + uint32_t cPages; + /** The current load. + * This does not include guard pages. */ + uint32_t cLoad; + /** The max load ever. + * This is maintained to get trigger adding of more mapping space. */ + uint32_t cMaxLoad; + /** The number of guard pages. */ + uint32_t cGuardPages; + /** The number of users (protected by hInitLock). */ + uint32_t cUsers; +} PGMRCDYNMAP; +/** Pointer to the dynamic cache for the raw-mode context. */ +typedef PGMRCDYNMAP *PPGMRCDYNMAP; + + +/** + * Mapping cache usage set entry. + * + * @remarks 16-bit ints was chosen as the set is not expected to be used beyond + * the dynamic ring-0 and (to some extent) raw-mode context mapping + * cache. If it's extended to include ring-3, well, then something + * will have be changed here... + */ +typedef struct PGMMAPSETENTRY +{ + /** Pointer to the page. */ + RTR0PTR pvPage; + /** The mapping cache index. */ + uint16_t iPage; + /** The number of references. + * The max is UINT16_MAX - 1. */ + uint16_t cRefs; + /** The number inlined references. + * The max is UINT16_MAX - 1. */ + uint16_t cInlinedRefs; + /** Unreferences. */ + uint16_t cUnrefs; + +#if HC_ARCH_BITS == 32 + uint32_t u32Alignment1; +#endif + /** The physical address for this entry. */ + RTHCPHYS HCPhys; +} PGMMAPSETENTRY; +AssertCompileMemberOffset(PGMMAPSETENTRY, iPage, RT_MAX(sizeof(RTR0PTR), sizeof(RTRCPTR))); +AssertCompileMemberAlignment(PGMMAPSETENTRY, HCPhys, sizeof(RTHCPHYS)); +/** Pointer to a mapping cache usage set entry. */ +typedef PGMMAPSETENTRY *PPGMMAPSETENTRY; + +/** + * Mapping cache usage set. + * + * This is used in ring-0 and the raw-mode context to track dynamic mappings + * done during exits / traps. The set is + */ +typedef struct PGMMAPSET +{ + /** The number of occupied entries. + * This is PGMMAPSET_CLOSED if the set is closed and we're not supposed to do + * dynamic mappings. */ + uint32_t cEntries; + /** The start of the current subset. + * This is UINT32_MAX if no subset is currently open. */ + uint32_t iSubset; + /** The index of the current CPU, only valid if the set is open. */ + int32_t iCpu; + uint32_t alignment; + /** The entries. */ + PGMMAPSETENTRY aEntries[64]; + /** HCPhys -> iEntry fast lookup table. + * Use PGMMAPSET_HASH for hashing. + * The entries may or may not be valid, check against cEntries. */ + uint8_t aiHashTable[128]; +} PGMMAPSET; +AssertCompileSizeAlignment(PGMMAPSET, 8); +/** Pointer to the mapping cache set. */ +typedef PGMMAPSET *PPGMMAPSET; + +/** PGMMAPSET::cEntries value for a closed set. */ +#define PGMMAPSET_CLOSED UINT32_C(0xdeadc0fe) + +/** Hash function for aiHashTable. */ +#define PGMMAPSET_HASH(HCPhys) (((HCPhys) >> PAGE_SHIFT) & 127) + + +/** @name Context neutral page mapper TLB. + * + * Hoping to avoid some code and bug duplication parts of the GCxxx->CCPtr + * code is writting in a kind of context neutral way. Time will show whether + * this actually makes sense or not... + * + * @todo this needs to be reconsidered and dropped/redone since the ring-0 + * context ends up using a global mapping cache on some platforms + * (darwin). + * + * @{ */ +/** @typedef PPGMPAGEMAPTLB + * The page mapper TLB pointer type for the current context. */ +/** @typedef PPGMPAGEMAPTLB + * The page mapper TLB entry pointer type for the current context. */ +/** @typedef PPGMPAGEMAPTLB + * The page mapper TLB entry pointer pointer type for the current context. */ +/** @def PGM_PAGEMAPTLB_ENTRIES + * The number of TLB entries in the page mapper TLB for the current context. */ +/** @def PGM_PAGEMAPTLB_IDX + * Calculate the TLB index for a guest physical address. + * @returns The TLB index. + * @param GCPhys The guest physical address. */ +/** @typedef PPGMPAGEMAP + * Pointer to a page mapper unit for current context. */ +/** @typedef PPPGMPAGEMAP + * Pointer to a page mapper unit pointer for current context. */ +#if defined(IN_RING0) && defined(VBOX_WITH_RAM_IN_KERNEL) +typedef PPGMPAGER0MAPTLB PPGMPAGEMAPTLB; +typedef PPGMPAGER0MAPTLBE PPGMPAGEMAPTLBE; +typedef PPGMPAGER0MAPTLBE *PPPGMPAGEMAPTLBE; +# define PGM_PAGEMAPTLB_ENTRIES PGM_PAGER0MAPTLB_ENTRIES +# define PGM_PAGEMAPTLB_IDX(GCPhys) PGM_PAGER0MAPTLB_IDX(GCPhys) +typedef struct PGMCHUNKR0MAP *PPGMPAGEMAP; +typedef struct PGMCHUNKR0MAP **PPPGMPAGEMAP; +#else +typedef PPGMPAGER3MAPTLB PPGMPAGEMAPTLB; +typedef PPGMPAGER3MAPTLBE PPGMPAGEMAPTLBE; +typedef PPGMPAGER3MAPTLBE *PPPGMPAGEMAPTLBE; +# define PGM_PAGEMAPTLB_ENTRIES PGM_PAGER3MAPTLB_ENTRIES +# define PGM_PAGEMAPTLB_IDX(GCPhys) PGM_PAGER3MAPTLB_IDX(GCPhys) +typedef PPGMCHUNKR3MAP PPGMPAGEMAP; +typedef PPPGMCHUNKR3MAP PPPGMPAGEMAP; +#endif +/** @} */ + + +/** @name PGM Pool Indexes. + * Aka. the unique shadow page identifier. + * @{ */ +/** NIL page pool IDX. */ +#define NIL_PGMPOOL_IDX 0 +/** The first normal index. There used to be 5 fictive pages up front, now + * there is only the NIL page. */ +#define PGMPOOL_IDX_FIRST 1 +/** The last valid index. (inclusive, 14 bits) */ +#define PGMPOOL_IDX_LAST 0x3fff +/** @} */ + +/** The NIL index for the parent chain. */ +#define NIL_PGMPOOL_USER_INDEX ((uint16_t)0xffff) +#define NIL_PGMPOOL_PRESENT_INDEX ((uint16_t)0xffff) + +/** + * Node in the chain linking a shadowed page to it's parent (user). + */ +#pragma pack(1) +typedef struct PGMPOOLUSER +{ + /** The index to the next item in the chain. NIL_PGMPOOL_USER_INDEX is no next. */ + uint16_t iNext; + /** The user page index. */ + uint16_t iUser; + /** Index into the user table. */ + uint32_t iUserTable; +} PGMPOOLUSER, *PPGMPOOLUSER; +typedef const PGMPOOLUSER *PCPGMPOOLUSER; +#pragma pack() + + +/** The NIL index for the phys ext chain. */ +#define NIL_PGMPOOL_PHYSEXT_INDEX ((uint16_t)0xffff) +/** The NIL pte index for a phys ext chain slot. */ +#define NIL_PGMPOOL_PHYSEXT_IDX_PTE ((uint16_t)0xffff) + +/** + * Node in the chain of physical cross reference extents. + * @todo Calling this an 'extent' is not quite right, find a better name. + * @todo find out the optimal size of the aidx array + */ +#pragma pack(1) +typedef struct PGMPOOLPHYSEXT +{ + /** The index to the next item in the chain. NIL_PGMPOOL_PHYSEXT_INDEX is no next. */ + uint16_t iNext; + /** Alignment. */ + uint16_t u16Align; + /** The user page index. */ + uint16_t aidx[3]; + /** The page table index or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown. */ + uint16_t apte[3]; +} PGMPOOLPHYSEXT, *PPGMPOOLPHYSEXT; +typedef const PGMPOOLPHYSEXT *PCPGMPOOLPHYSEXT; +#pragma pack() + + +/** + * The kind of page that's being shadowed. + */ +typedef enum PGMPOOLKIND +{ + /** The virtual invalid 0 entry. */ + PGMPOOLKIND_INVALID = 0, + /** The entry is free (=unused). */ + PGMPOOLKIND_FREE, + + /** Shw: 32-bit page table; Gst: no paging. */ + PGMPOOLKIND_32BIT_PT_FOR_PHYS, + /** Shw: 32-bit page table; Gst: 32-bit page table. */ + PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT, + /** Shw: 32-bit page table; Gst: 4MB page. */ + PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB, + /** Shw: PAE page table; Gst: no paging. */ + PGMPOOLKIND_PAE_PT_FOR_PHYS, + /** Shw: PAE page table; Gst: 32-bit page table. */ + PGMPOOLKIND_PAE_PT_FOR_32BIT_PT, + /** Shw: PAE page table; Gst: Half of a 4MB page. */ + PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB, + /** Shw: PAE page table; Gst: PAE page table. */ + PGMPOOLKIND_PAE_PT_FOR_PAE_PT, + /** Shw: PAE page table; Gst: 2MB page. */ + PGMPOOLKIND_PAE_PT_FOR_PAE_2MB, + + /** Shw: 32-bit page directory. Gst: 32-bit page directory. */ + PGMPOOLKIND_32BIT_PD, + /** Shw: 32-bit page directory. Gst: no paging. */ + PGMPOOLKIND_32BIT_PD_PHYS, + /** Shw: PAE page directory 0; Gst: 32-bit page directory. */ + PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD, + /** Shw: PAE page directory 1; Gst: 32-bit page directory. */ + PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD, + /** Shw: PAE page directory 2; Gst: 32-bit page directory. */ + PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD, + /** Shw: PAE page directory 3; Gst: 32-bit page directory. */ + PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD, + /** Shw: PAE page directory; Gst: PAE page directory. */ + PGMPOOLKIND_PAE_PD_FOR_PAE_PD, + /** Shw: PAE page directory; Gst: no paging. Note: +NP. */ + PGMPOOLKIND_PAE_PD_PHYS, + + /** Shw: PAE page directory pointer table (legacy, 4 entries); Gst 32 bits paging. */ + PGMPOOLKIND_PAE_PDPT_FOR_32BIT, + /** Shw: PAE page directory pointer table (legacy, 4 entries); Gst PAE PDPT. */ + PGMPOOLKIND_PAE_PDPT, + /** Shw: PAE page directory pointer table (legacy, 4 entries); Gst: no paging. */ + PGMPOOLKIND_PAE_PDPT_PHYS, + + /** Shw: 64-bit page directory pointer table; Gst: 64-bit page directory pointer table. */ + PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT, + /** Shw: 64-bit page directory pointer table; Gst: no paging. */ + PGMPOOLKIND_64BIT_PDPT_FOR_PHYS, + /** Shw: 64-bit page directory table; Gst: 64-bit page directory table. */ + PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD, + /** Shw: 64-bit page directory table; Gst: no paging. */ + PGMPOOLKIND_64BIT_PD_FOR_PHYS, /* 24 */ + + /** Shw: 64-bit PML4; Gst: 64-bit PML4. */ + PGMPOOLKIND_64BIT_PML4, + + /** Shw: EPT page directory pointer table; Gst: no paging. */ + PGMPOOLKIND_EPT_PDPT_FOR_PHYS, + /** Shw: EPT page directory table; Gst: no paging. */ + PGMPOOLKIND_EPT_PD_FOR_PHYS, + /** Shw: EPT page table; Gst: no paging. */ + PGMPOOLKIND_EPT_PT_FOR_PHYS, + + /** Shw: Root Nested paging table. */ + PGMPOOLKIND_ROOT_NESTED, + + /** The last valid entry. */ + PGMPOOLKIND_LAST = PGMPOOLKIND_ROOT_NESTED +} PGMPOOLKIND; + +/** + * The access attributes of the page; only applies to big pages. + */ +typedef enum +{ + PGMPOOLACCESS_DONTCARE = 0, + PGMPOOLACCESS_USER_RW, + PGMPOOLACCESS_USER_R, + PGMPOOLACCESS_USER_RW_NX, + PGMPOOLACCESS_USER_R_NX, + PGMPOOLACCESS_SUPERVISOR_RW, + PGMPOOLACCESS_SUPERVISOR_R, + PGMPOOLACCESS_SUPERVISOR_RW_NX, + PGMPOOLACCESS_SUPERVISOR_R_NX +} PGMPOOLACCESS; + +/** + * The tracking data for a page in the pool. + */ +typedef struct PGMPOOLPAGE +{ + /** AVL node code with the (HC) physical address of this page. */ + AVLOHCPHYSNODECORE Core; + /** Pointer to the R3 mapping of the page. */ + R3PTRTYPE(void *) pvPageR3; + /** Pointer to the R0 mapping of the page. */ + R0PTRTYPE(void *) pvPageR0; + /** The guest physical address. */ + RTGCPHYS GCPhys; + /** The kind of page we're shadowing. (This is really a PGMPOOLKIND enum.) */ + uint8_t enmKind; + /** The subkind of page we're shadowing. (This is really a PGMPOOLACCESS enum.) */ + uint8_t enmAccess; + /** This supplements enmKind and enmAccess */ + bool fA20Enabled : 1; + + /** Used to indicate that the page is zeroed. */ + bool fZeroed : 1; + /** Used to indicate that a PT has non-global entries. */ + bool fSeenNonGlobal : 1; + /** Used to indicate that we're monitoring writes to the guest page. */ + bool fMonitored : 1; + /** Used to indicate that the page is in the cache (e.g. in the GCPhys hash). + * (All pages are in the age list.) */ + bool fCached : 1; + /** This is used by the R3 access handlers when invoked by an async thread. + * It's a hack required because of REMR3NotifyHandlerPhysicalDeregister. */ + bool volatile fReusedFlushPending : 1; + /** Used to mark the page as dirty (write monitoring is temporarily + * off). */ + bool fDirty : 1; + bool fPadding1 : 1; + bool fPadding2; + + /** The index of this page. */ + uint16_t idx; + /** The next entry in the list this page currently resides in. + * It's either in the free list or in the GCPhys hash. */ + uint16_t iNext; + /** Head of the user chain. NIL_PGMPOOL_USER_INDEX if not currently in use. */ + uint16_t iUserHead; + /** The number of present entries. */ + uint16_t cPresent; + /** The first entry in the table which is present. */ + uint16_t iFirstPresent; + /** The number of modifications to the monitored page. */ + uint16_t cModifications; + /** The next modified page. NIL_PGMPOOL_IDX if tail. */ + uint16_t iModifiedNext; + /** The previous modified page. NIL_PGMPOOL_IDX if head. */ + uint16_t iModifiedPrev; + /** The next page sharing access handler. NIL_PGMPOOL_IDX if tail. */ + uint16_t iMonitoredNext; + /** The previous page sharing access handler. NIL_PGMPOOL_IDX if head. */ + uint16_t iMonitoredPrev; + /** The next page in the age list. */ + uint16_t iAgeNext; + /** The previous page in the age list. */ + uint16_t iAgePrev; + /** Index into PGMPOOL::aDirtyPages if fDirty is set. */ + uint8_t idxDirtyEntry; + + /** @name Access handler statistics to determine whether the guest is + * (re)initializing a page table. + * @{ */ + RTGCPTR GCPtrLastAccessHandlerRip; + RTGCPTR GCPtrLastAccessHandlerFault; + uint64_t cLastAccessHandler; + /** @} */ + /** Used to indicate that this page can't be flushed. Important for cr3 root pages or shadow pae pd pages. */ + uint32_t volatile cLocked; +#if GC_ARCH_BITS == 64 + uint32_t u32Alignment3; +#endif +# ifdef VBOX_STRICT + RTGCPTR GCPtrDirtyFault; +# endif +} PGMPOOLPAGE; +/** Pointer to a pool page. */ +typedef PGMPOOLPAGE *PPGMPOOLPAGE; +/** Pointer to a const pool page. */ +typedef PGMPOOLPAGE const *PCPGMPOOLPAGE; +/** Pointer to a pool page pointer. */ +typedef PGMPOOLPAGE **PPPGMPOOLPAGE; + + +/** The hash table size. */ +# define PGMPOOL_HASH_SIZE 0x40 +/** The hash function. */ +# define PGMPOOL_HASH(GCPhys) ( ((GCPhys) >> PAGE_SHIFT) & (PGMPOOL_HASH_SIZE - 1) ) + + +/** + * The shadow page pool instance data. + * + * It's all one big allocation made at init time, except for the + * pages that is. The user nodes follows immediately after the + * page structures. + */ +typedef struct PGMPOOL +{ + /** The VM handle - R3 Ptr. */ + PVMR3 pVMR3; + /** The VM handle - R0 Ptr. */ + R0PTRTYPE(PVMCC) pVMR0; + /** The max pool size. This includes the special IDs. */ + uint16_t cMaxPages; + /** The current pool size. */ + uint16_t cCurPages; + /** The head of the free page list. */ + uint16_t iFreeHead; + /* Padding. */ + uint16_t u16Padding; + /** Head of the chain of free user nodes. */ + uint16_t iUserFreeHead; + /** The number of user nodes we've allocated. */ + uint16_t cMaxUsers; + /** The number of present page table entries in the entire pool. */ + uint32_t cPresent; + /** Pointer to the array of user nodes - R3 pointer. */ + R3PTRTYPE(PPGMPOOLUSER) paUsersR3; + /** Pointer to the array of user nodes - R0 pointer. */ + R0PTRTYPE(PPGMPOOLUSER) paUsersR0; + /** Head of the chain of free phys ext nodes. */ + uint16_t iPhysExtFreeHead; + /** The number of user nodes we've allocated. */ + uint16_t cMaxPhysExts; + uint32_t u32Padding0b; + /** Pointer to the array of physical xref extent nodes - R3 pointer. */ + R3PTRTYPE(PPGMPOOLPHYSEXT) paPhysExtsR3; + /** Pointer to the array of physical xref extent nodes - R0 pointer. */ + R0PTRTYPE(PPGMPOOLPHYSEXT) paPhysExtsR0; + /** Hash table for GCPhys addresses. */ + uint16_t aiHash[PGMPOOL_HASH_SIZE]; + /** The head of the age list. */ + uint16_t iAgeHead; + /** The tail of the age list. */ + uint16_t iAgeTail; + /** Set if the cache is enabled. */ + bool fCacheEnabled; + /** Alignment padding. */ + bool afPadding1[3]; + /** Head of the list of modified pages. */ + uint16_t iModifiedHead; + /** The current number of modified pages. */ + uint16_t cModifiedPages; + /** Physical access handler type registration handle. */ + PGMPHYSHANDLERTYPE hAccessHandlerType; + /** Next available slot (in aDirtyPages). */ + uint32_t idxFreeDirtyPage; + /** Number of active dirty pages. */ + uint32_t cDirtyPages; + /** Array of current dirty pgm pool page indices. */ + uint16_t aidxDirtyPages[16]; + /** Array running in parallel to aidxDirtyPages with the page data. */ + struct + { + uint64_t aPage[512]; + } aDirtyPages[16]; + + /** The number of pages currently in use. */ + uint16_t cUsedPages; +#ifdef VBOX_WITH_STATISTICS + /** The high water mark for cUsedPages. */ + uint16_t cUsedPagesHigh; + uint32_t Alignment1; /**< Align the next member on a 64-bit boundary. */ + /** Profiling pgmPoolAlloc(). */ + STAMPROFILEADV StatAlloc; + /** Profiling pgmR3PoolClearDoIt(). */ + STAMPROFILE StatClearAll; + /** Profiling pgmR3PoolReset(). */ + STAMPROFILE StatR3Reset; + /** Profiling pgmPoolFlushPage(). */ + STAMPROFILE StatFlushPage; + /** Profiling pgmPoolFree(). */ + STAMPROFILE StatFree; + /** Counting explicit flushes by PGMPoolFlushPage(). */ + STAMCOUNTER StatForceFlushPage; + /** Counting explicit flushes of dirty pages by PGMPoolFlushPage(). */ + STAMCOUNTER StatForceFlushDirtyPage; + /** Counting flushes for reused pages. */ + STAMCOUNTER StatForceFlushReused; + /** Profiling time spent zeroing pages. */ + STAMPROFILE StatZeroPage; + /** Profiling of pgmPoolTrackDeref. */ + STAMPROFILE StatTrackDeref; + /** Profiling pgmTrackFlushGCPhysPT. */ + STAMPROFILE StatTrackFlushGCPhysPT; + /** Profiling pgmTrackFlushGCPhysPTs. */ + STAMPROFILE StatTrackFlushGCPhysPTs; + /** Profiling pgmTrackFlushGCPhysPTsSlow. */ + STAMPROFILE StatTrackFlushGCPhysPTsSlow; + /** Number of times we've been out of user records. */ + STAMCOUNTER StatTrackFreeUpOneUser; + /** Nr of flushed entries. */ + STAMCOUNTER StatTrackFlushEntry; + /** Nr of updated entries. */ + STAMCOUNTER StatTrackFlushEntryKeep; + /** Profiling deref activity related tracking GC physical pages. */ + STAMPROFILE StatTrackDerefGCPhys; + /** Number of linear searches for a HCPhys in the ram ranges. */ + STAMCOUNTER StatTrackLinearRamSearches; + /** The number of failing pgmPoolTrackPhysExtAlloc calls. */ + STAMCOUNTER StamTrackPhysExtAllocFailures; + + /** Profiling the RC/R0 \#PF access handler. */ + STAMPROFILE StatMonitorPfRZ; + /** Profiling the RC/R0 access we've handled (except REP STOSD). */ + STAMPROFILE StatMonitorPfRZHandled; + /** Times we've failed interpreting the instruction. */ + STAMCOUNTER StatMonitorPfRZEmulateInstr; + /** Profiling the pgmPoolFlushPage calls made from the RC/R0 access handler. */ + STAMPROFILE StatMonitorPfRZFlushPage; + /** Times we've detected a page table reinit. */ + STAMCOUNTER StatMonitorPfRZFlushReinit; + /** Counting flushes for pages that are modified too often. */ + STAMCOUNTER StatMonitorPfRZFlushModOverflow; + /** Times we've detected fork(). */ + STAMCOUNTER StatMonitorPfRZFork; + /** Times we've failed interpreting a patch code instruction. */ + STAMCOUNTER StatMonitorPfRZIntrFailPatch1; + /** Times we've failed interpreting a patch code instruction during flushing. */ + STAMCOUNTER StatMonitorPfRZIntrFailPatch2; + /** The number of times we've seen rep prefixes we can't handle. */ + STAMCOUNTER StatMonitorPfRZRepPrefix; + /** Profiling the REP STOSD cases we've handled. */ + STAMPROFILE StatMonitorPfRZRepStosd; + + /** Profiling the R0/RC regular access handler. */ + STAMPROFILE StatMonitorRZ; + /** Profiling the pgmPoolFlushPage calls made from the regular access handler in R0/RC. */ + STAMPROFILE StatMonitorRZFlushPage; + /** Per access size counts indexed by size minus 1, last for larger. */ + STAMCOUNTER aStatMonitorRZSizes[16+3]; + /** Missaligned access counts indexed by offset - 1. */ + STAMCOUNTER aStatMonitorRZMisaligned[7]; + + /** Nr of handled PT faults. */ + STAMCOUNTER StatMonitorRZFaultPT; + /** Nr of handled PD faults. */ + STAMCOUNTER StatMonitorRZFaultPD; + /** Nr of handled PDPT faults. */ + STAMCOUNTER StatMonitorRZFaultPDPT; + /** Nr of handled PML4 faults. */ + STAMCOUNTER StatMonitorRZFaultPML4; + + /** Profiling the R3 access handler. */ + STAMPROFILE StatMonitorR3; + /** Profiling the pgmPoolFlushPage calls made from the R3 access handler. */ + STAMPROFILE StatMonitorR3FlushPage; + /** Per access size counts indexed by size minus 1, last for larger. */ + STAMCOUNTER aStatMonitorR3Sizes[16+3]; + /** Missaligned access counts indexed by offset - 1. */ + STAMCOUNTER aStatMonitorR3Misaligned[7]; + /** Nr of handled PT faults. */ + STAMCOUNTER StatMonitorR3FaultPT; + /** Nr of handled PD faults. */ + STAMCOUNTER StatMonitorR3FaultPD; + /** Nr of handled PDPT faults. */ + STAMCOUNTER StatMonitorR3FaultPDPT; + /** Nr of handled PML4 faults. */ + STAMCOUNTER StatMonitorR3FaultPML4; + + /** Times we've called pgmPoolResetDirtyPages (and there were dirty page). */ + STAMCOUNTER StatResetDirtyPages; + /** Times we've called pgmPoolAddDirtyPage. */ + STAMCOUNTER StatDirtyPage; + /** Times we've had to flush duplicates for dirty page management. */ + STAMCOUNTER StatDirtyPageDupFlush; + /** Times we've had to flush because of overflow. */ + STAMCOUNTER StatDirtyPageOverFlowFlush; + + /** The high water mark for cModifiedPages. */ + uint16_t cModifiedPagesHigh; + uint16_t Alignment2[3]; /**< Align the next member on a 64-bit boundary. */ + + /** The number of cache hits. */ + STAMCOUNTER StatCacheHits; + /** The number of cache misses. */ + STAMCOUNTER StatCacheMisses; + /** The number of times we've got a conflict of 'kind' in the cache. */ + STAMCOUNTER StatCacheKindMismatches; + /** Number of times we've been out of pages. */ + STAMCOUNTER StatCacheFreeUpOne; + /** The number of cacheable allocations. */ + STAMCOUNTER StatCacheCacheable; + /** The number of uncacheable allocations. */ + STAMCOUNTER StatCacheUncacheable; +#else + uint32_t Alignment3; /**< Align the next member on a 64-bit boundary. */ +#endif + /** Profiling PGMR0PoolGrow(). */ + STAMPROFILE StatGrow; + /** The AVL tree for looking up a page by its HC physical address. */ + AVLOHCPHYSTREE HCPhysTree; + uint32_t Alignment4; /**< Align the next member on a 64-bit boundary. */ + /** Array of pages. (cMaxPages in length) + * The Id is the index into thist array. + */ + PGMPOOLPAGE aPages[PGMPOOL_IDX_FIRST]; +} PGMPOOL, *PPGMPOOL, **PPPGMPOOL; +AssertCompileMemberAlignment(PGMPOOL, iModifiedHead, 8); +AssertCompileMemberAlignment(PGMPOOL, aDirtyPages, 8); +AssertCompileMemberAlignment(PGMPOOL, cUsedPages, 8); +#ifdef VBOX_WITH_STATISTICS +AssertCompileMemberAlignment(PGMPOOL, StatAlloc, 8); +#endif +AssertCompileMemberAlignment(PGMPOOL, aPages, 8); + + +/** @def PGMPOOL_PAGE_2_PTR + * Maps a pool page pool into the current context. + * + * @returns VBox status code. + * @param a_pVM Pointer to the VM. + * @param a_pPage The pool page. + * + * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the + * small page window employeed by that function. Be careful. + * @remark There is no need to assert on the result. + */ +#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 +# define PGMPOOL_PAGE_2_PTR(a_pVM, a_pPage) pgmPoolMapPageInlined((a_pVM), (a_pPage) RTLOG_COMMA_SRC_POS) +#elif defined(VBOX_STRICT) || 1 /* temporarily going strict here */ +# define PGMPOOL_PAGE_2_PTR(a_pVM, a_pPage) pgmPoolMapPageStrict(a_pPage, __FUNCTION__) +DECLINLINE(void *) pgmPoolMapPageStrict(PPGMPOOLPAGE a_pPage, const char *pszCaller) +{ + RT_NOREF(pszCaller); + AssertPtr(a_pPage); + AssertMsg(RT_VALID_PTR(a_pPage->CTX_SUFF(pvPage)), + ("enmKind=%d idx=%#x HCPhys=%RHp GCPhys=%RGp pvPageR3=%p pvPageR0=%p caller=%s\n", + a_pPage->enmKind, a_pPage->idx, a_pPage->Core.Key, a_pPage->GCPhys, a_pPage->pvPageR3, a_pPage->pvPageR0, pszCaller)); + return a_pPage->CTX_SUFF(pvPage); +} +#else +# define PGMPOOL_PAGE_2_PTR(pVM, a_pPage) ((a_pPage)->CTX_SUFF(pvPage)) +#endif + + +/** @def PGMPOOL_PAGE_2_PTR_V2 + * Maps a pool page pool into the current context, taking both VM and VMCPU. + * + * @returns VBox status code. + * @param a_pVM Pointer to the VM. + * @param a_pVCpu The current CPU. + * @param a_pPage The pool page. + * + * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the + * small page window employeed by that function. Be careful. + * @remark There is no need to assert on the result. + */ +#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 +# define PGMPOOL_PAGE_2_PTR_V2(a_pVM, a_pVCpu, a_pPage) pgmPoolMapPageV2Inlined((a_pVM), (a_pVCpu), (a_pPage) RTLOG_COMMA_SRC_POS) +#else +# define PGMPOOL_PAGE_2_PTR_V2(a_pVM, a_pVCpu, a_pPage) PGMPOOL_PAGE_2_PTR((a_pVM), (a_pPage)) +#endif + + +/** @name Per guest page tracking data. + * This is currently as a 16-bit word in the PGMPAGE structure, the idea though + * is to use more bits for it and split it up later on. But for now we'll play + * safe and change as little as possible. + * + * The 16-bit word has two parts: + * + * The first 14-bit forms the @a idx field. It is either the index of a page in + * the shadow page pool, or and index into the extent list. + * + * The 2 topmost bits makes up the @a cRefs field, which counts the number of + * shadow page pool references to the page. If cRefs equals + * PGMPOOL_CREFS_PHYSEXT, then the @a idx field is an indext into the extent + * (misnomer) table and not the shadow page pool. + * + * See PGM_PAGE_GET_TRACKING and PGM_PAGE_SET_TRACKING for how to get and set + * the 16-bit word. + * + * @{ */ +/** The shift count for getting to the cRefs part. */ +#define PGMPOOL_TD_CREFS_SHIFT 14 +/** The mask applied after shifting the tracking data down by + * PGMPOOL_TD_CREFS_SHIFT. */ +#define PGMPOOL_TD_CREFS_MASK 0x3 +/** The cRefs value used to indicate that the idx is the head of a + * physical cross reference list. */ +#define PGMPOOL_TD_CREFS_PHYSEXT PGMPOOL_TD_CREFS_MASK +/** The shift used to get idx. */ +#define PGMPOOL_TD_IDX_SHIFT 0 +/** The mask applied to the idx after shifting down by PGMPOOL_TD_IDX_SHIFT. */ +#define PGMPOOL_TD_IDX_MASK 0x3fff +/** The idx value when we're out of of PGMPOOLPHYSEXT entries or/and there are + * simply too many mappings of this page. */ +#define PGMPOOL_TD_IDX_OVERFLOWED PGMPOOL_TD_IDX_MASK + +/** @def PGMPOOL_TD_MAKE + * Makes a 16-bit tracking data word. + * + * @returns tracking data. + * @param cRefs The @a cRefs field. Must be within bounds! + * @param idx The @a idx field. Must also be within bounds! */ +#define PGMPOOL_TD_MAKE(cRefs, idx) ( ((cRefs) << PGMPOOL_TD_CREFS_SHIFT) | (idx) ) + +/** @def PGMPOOL_TD_GET_CREFS + * Get the @a cRefs field from a tracking data word. + * + * @returns The @a cRefs field + * @param u16 The tracking data word. + * @remarks This will only return 1 or PGMPOOL_TD_CREFS_PHYSEXT for a + * non-zero @a u16. */ +#define PGMPOOL_TD_GET_CREFS(u16) ( ((u16) >> PGMPOOL_TD_CREFS_SHIFT) & PGMPOOL_TD_CREFS_MASK ) + +/** @def PGMPOOL_TD_GET_IDX + * Get the @a idx field from a tracking data word. + * + * @returns The @a idx field + * @param u16 The tracking data word. */ +#define PGMPOOL_TD_GET_IDX(u16) ( ((u16) >> PGMPOOL_TD_IDX_SHIFT) & PGMPOOL_TD_IDX_MASK ) +/** @} */ + + + +/** @name A20 gate macros + * @{ */ +#define PGM_WITH_A20 +#ifdef PGM_WITH_A20 +# define PGM_A20_IS_ENABLED(a_pVCpu) ((a_pVCpu)->pgm.s.fA20Enabled) +# define PGM_A20_APPLY(a_pVCpu, a_GCPhys) ((a_GCPhys) & (a_pVCpu)->pgm.s.GCPhysA20Mask) +# define PGM_A20_APPLY_TO_VAR(a_pVCpu, a_GCPhysVar) \ + do { a_GCPhysVar &= (a_pVCpu)->pgm.s.GCPhysA20Mask; } while (0) +# define PGM_A20_ASSERT_MASKED(pVCpu, a_GCPhys) Assert(PGM_A20_APPLY(pVCpu, a_GCPhys) == (a_GCPhys)) +#else +# define PGM_A20_IS_ENABLED(a_pVCpu) (true) +# define PGM_A20_APPLY(a_pVCpu, a_GCPhys) (a_GCPhys) +# define PGM_A20_APPLY_TO_VAR(a_pVCpu, a_GCPhysVar) do { } while (0) +# define PGM_A20_ASSERT_MASKED(pVCpu, a_GCPhys) do { } while (0) +#endif +/** @} */ + + +/** + * Roots and anchors for trees and list employing self relative offsets as + * pointers. + * + * When using self-relative offsets instead of pointers, the offsets needs to be + * the same in all offsets. Thus the roots and anchors needs to live on the + * hyper heap just like the nodes. + */ +typedef struct PGMTREES +{ + /** List of physical access handler types (offset pointers) of type + * PGMPHYSHANDLERTYPEINT. This is needed for relocations. */ + RTLISTOFF32ANCHOR HeadPhysHandlerTypes; + /** Physical access handlers (AVL range+offsetptr tree). */ + AVLROGCPHYSTREE PhysHandlers; +} PGMTREES; +/** Pointer to PGM trees. */ +typedef PGMTREES *PPGMTREES; + + +/** + * Page fault guest state for the AMD64 paging mode. + */ +typedef struct PGMPTWALKCORE +{ + /** The guest virtual address that is being resolved by the walk + * (input). */ + RTGCPTR GCPtr; + + /** The guest physical address that is the result of the walk. + * @remarks only valid if fSucceeded is set. */ + RTGCPHYS GCPhys; + + /** Set if the walk succeeded, i.d. GCPhys is valid. */ + bool fSucceeded; + /** The level problem arrised at. + * PTE is level 1, PDE is level 2, PDPE is level 3, PML4 is level 4, CR3 is + * level 8. This is 0 on success. */ + uint8_t uLevel; + /** Set if the page isn't present. */ + bool fNotPresent; + /** Encountered a bad physical address. */ + bool fBadPhysAddr; + /** Set if there was reserved bit violations. */ + bool fRsvdError; + /** Set if it involves a big page (2/4 MB). */ + bool fBigPage; + /** Set if it involves a gigantic page (1 GB). */ + bool fGigantPage; + /** The effective X86_PTE_US flag for the address. */ + bool fEffectiveUS; + /** The effective X86_PTE_RW flag for the address. */ + bool fEffectiveRW; + /** The effective X86_PTE_NX flag for the address. */ + bool fEffectiveNX; + bool afPadding1[2]; + /** Effective flags thus far: RW, US, PWT, PCD, A, ~NX >> 63. + * The NX bit is inverted and shifted down 63 places to bit 0. */ + uint32_t fEffective; +} PGMPTWALKCORE; + +/** @name PGMPTWALKCORE::fEffective bits. + * @{ */ +/** Effective execute bit (!NX). */ +#define PGMPTWALK_EFF_X UINT32_C(1) +/** Effective write access bit. */ +#define PGMPTWALK_EFF_RW X86_PTE_RW +/** Effective user-mode access bit. */ +#define PGMPTWALK_EFF_US X86_PTE_US +/** Effective write through cache bit. */ +#define PGMPTWALK_EFF_PWT X86_PTE_PWT +/** Effective cache disabled bit. */ +#define PGMPTWALK_EFF_PCD X86_PTE_PCD +/** Effective accessed bit. */ +#define PGMPTWALK_EFF_A X86_PTE_A +/** The dirty bit of the final entry. */ +#define PGMPTWALK_EFF_D X86_PTE_D +/** The PAT bit of the final entry. */ +#define PGMPTWALK_EFF_PAT X86_PTE_PAT +/** The global bit of the final entry. */ +#define PGMPTWALK_EFF_G X86_PTE_G +/** @} */ + + +/** + * Guest page table walk for the AMD64 mode. + */ +typedef struct PGMPTWALKGSTAMD64 +{ + /** The common core. */ + PGMPTWALKCORE Core; + + PX86PML4 pPml4; + PX86PML4E pPml4e; + X86PML4E Pml4e; + + PX86PDPT pPdpt; + PX86PDPE pPdpe; + X86PDPE Pdpe; + + PX86PDPAE pPd; + PX86PDEPAE pPde; + X86PDEPAE Pde; + + PX86PTPAE pPt; + PX86PTEPAE pPte; + X86PTEPAE Pte; +} PGMPTWALKGSTAMD64; +/** Pointer to a AMD64 guest page table walk. */ +typedef PGMPTWALKGSTAMD64 *PPGMPTWALKGSTAMD64; +/** Pointer to a const AMD64 guest page table walk. */ +typedef PGMPTWALKGSTAMD64 const *PCPGMPTWALKGSTAMD64; + +/** + * Guest page table walk for the PAE mode. + */ +typedef struct PGMPTWALKGSTPAE +{ + /** The common core. */ + PGMPTWALKCORE Core; + + PX86PDPT pPdpt; + PX86PDPE pPdpe; + X86PDPE Pdpe; + + PX86PDPAE pPd; + PX86PDEPAE pPde; + X86PDEPAE Pde; + + PX86PTPAE pPt; + PX86PTEPAE pPte; + X86PTEPAE Pte; +} PGMPTWALKGSTPAE; +/** Pointer to a PAE guest page table walk. */ +typedef PGMPTWALKGSTPAE *PPGMPTWALKGSTPAE; +/** Pointer to a const AMD64 guest page table walk. */ +typedef PGMPTWALKGSTPAE const *PCPGMPTWALKGSTPAE; + +/** + * Guest page table walk for the 32-bit mode. + */ +typedef struct PGMPTWALKGST32BIT +{ + /** The common core. */ + PGMPTWALKCORE Core; + + PX86PD pPd; + PX86PDE pPde; + X86PDE Pde; + + PX86PT pPt; + PX86PTE pPte; + X86PTE Pte; +} PGMPTWALKGST32BIT; +/** Pointer to a 32-bit guest page table walk. */ +typedef PGMPTWALKGST32BIT *PPGMPTWALKGST32BIT; +/** Pointer to a const 32-bit guest page table walk. */ +typedef PGMPTWALKGST32BIT const *PCPGMPTWALKGST32BIT; + +/** + * Which part of PGMPTWALKGST that is valid. + */ +typedef enum PGMPTWALKGSTTYPE +{ + /** Customary invalid 0 value. */ + PGMPTWALKGSTTYPE_INVALID = 0, + /** PGMPTWALKGST::u.Amd64 is valid. */ + PGMPTWALKGSTTYPE_AMD64, + /** PGMPTWALKGST::u.Pae is valid. */ + PGMPTWALKGSTTYPE_PAE, + /** PGMPTWALKGST::u.Legacy is valid. */ + PGMPTWALKGSTTYPE_32BIT, + /** Customary 32-bit type hack. */ + PGMPTWALKGSTTYPE_32BIT_HACK = 0x7fff0000 +} PGMPTWALKGSTTYPE; + +/** + * Combined guest page table walk result. + */ +typedef struct PGMPTWALKGST +{ + union + { + /** The page walker core - always valid. */ + PGMPTWALKCORE Core; + /** The page walker for AMD64. */ + PGMPTWALKGSTAMD64 Amd64; + /** The page walker for PAE (32-bit). */ + PGMPTWALKGSTPAE Pae; + /** The page walker for 32-bit paging (called legacy due to C naming + * convension). */ + PGMPTWALKGST32BIT Legacy; + } u; + /** Indicates which part of the union is valid. */ + PGMPTWALKGSTTYPE enmType; +} PGMPTWALKGST; +/** Pointer to a combined guest page table walk result. */ +typedef PGMPTWALKGST *PPGMPTWALKGST; +/** Pointer to a read-only combined guest page table walk result. */ +typedef PGMPTWALKGST const *PCPGMPTWALKGST; + + +/** @name Paging mode macros + * @{ + */ +#ifdef IN_RING3 +# define PGM_CTX(a,b) a##R3##b +# define PGM_CTX_STR(a,b) a "R3" b +# define PGM_CTX_DECL(type) DECLCALLBACK(type) +#elif defined(IN_RING0) +# define PGM_CTX(a,b) a##R0##b +# define PGM_CTX_STR(a,b) a "R0" b +# define PGM_CTX_DECL(type) VMMDECL(type) +#else +# error "Not IN_RING3 or IN_RING0!" +#endif + +#define PGM_GST_NAME_REAL(name) PGM_CTX(pgm,GstReal##name) +#define PGM_GST_NAME_RC_REAL_STR(name) "pgmRCGstReal" #name +#define PGM_GST_NAME_R0_REAL_STR(name) "pgmR0GstReal" #name +#define PGM_GST_NAME_PROT(name) PGM_CTX(pgm,GstProt##name) +#define PGM_GST_NAME_RC_PROT_STR(name) "pgmRCGstProt" #name +#define PGM_GST_NAME_R0_PROT_STR(name) "pgmR0GstProt" #name +#define PGM_GST_NAME_32BIT(name) PGM_CTX(pgm,Gst32Bit##name) +#define PGM_GST_NAME_RC_32BIT_STR(name) "pgmRCGst32Bit" #name +#define PGM_GST_NAME_R0_32BIT_STR(name) "pgmR0Gst32Bit" #name +#define PGM_GST_NAME_PAE(name) PGM_CTX(pgm,GstPAE##name) +#define PGM_GST_NAME_RC_PAE_STR(name) "pgmRCGstPAE" #name +#define PGM_GST_NAME_R0_PAE_STR(name) "pgmR0GstPAE" #name +#define PGM_GST_NAME_AMD64(name) PGM_CTX(pgm,GstAMD64##name) +#define PGM_GST_NAME_RC_AMD64_STR(name) "pgmRCGstAMD64" #name +#define PGM_GST_NAME_R0_AMD64_STR(name) "pgmR0GstAMD64" #name +#define PGM_GST_DECL(type, name) PGM_CTX_DECL(type) PGM_GST_NAME(name) + +#define PGM_SHW_NAME_32BIT(name) PGM_CTX(pgm,Shw32Bit##name) +#define PGM_SHW_NAME_RC_32BIT_STR(name) "pgmRCShw32Bit" #name +#define PGM_SHW_NAME_R0_32BIT_STR(name) "pgmR0Shw32Bit" #name +#define PGM_SHW_NAME_PAE(name) PGM_CTX(pgm,ShwPAE##name) +#define PGM_SHW_NAME_RC_PAE_STR(name) "pgmRCShwPAE" #name +#define PGM_SHW_NAME_R0_PAE_STR(name) "pgmR0ShwPAE" #name +#define PGM_SHW_NAME_AMD64(name) PGM_CTX(pgm,ShwAMD64##name) +#define PGM_SHW_NAME_RC_AMD64_STR(name) "pgmRCShwAMD64" #name +#define PGM_SHW_NAME_R0_AMD64_STR(name) "pgmR0ShwAMD64" #name +#define PGM_SHW_NAME_NESTED_32BIT(name) PGM_CTX(pgm,ShwNested32Bit##name) +#define PGM_SHW_NAME_RC_NESTED_32BIT_STR(name) "pgmRCShwNested32Bit" #name +#define PGM_SHW_NAME_R0_NESTED_32BIT_STR(name) "pgmR0ShwNested32Bit" #name +#define PGM_SHW_NAME_NESTED_PAE(name) PGM_CTX(pgm,ShwNestedPAE##name) +#define PGM_SHW_NAME_RC_NESTED_PAE_STR(name) "pgmRCShwNestedPAE" #name +#define PGM_SHW_NAME_R0_NESTED_PAE_STR(name) "pgmR0ShwNestedPAE" #name +#define PGM_SHW_NAME_NESTED_AMD64(name) PGM_CTX(pgm,ShwNestedAMD64##name) +#define PGM_SHW_NAME_RC_NESTED_AMD64_STR(name) "pgmRCShwNestedAMD64" #name +#define PGM_SHW_NAME_R0_NESTED_AMD64_STR(name) "pgmR0ShwNestedAMD64" #name +#define PGM_SHW_NAME_EPT(name) PGM_CTX(pgm,ShwEPT##name) +#define PGM_SHW_NAME_RC_EPT_STR(name) "pgmRCShwEPT" #name +#define PGM_SHW_NAME_R0_EPT_STR(name) "pgmR0ShwEPT" #name +#define PGM_SHW_NAME_NONE(name) PGM_CTX(pgm,ShwNone##name) +#define PGM_SHW_NAME_RC_NONE_STR(name) "pgmRCShwNone" #name +#define PGM_SHW_NAME_R0_NONE_STR(name) "pgmR0ShwNone" #name +#define PGM_SHW_DECL(type, name) PGM_CTX_DECL(type) PGM_SHW_NAME(name) + +/* Shw_Gst */ +#define PGM_BTH_NAME_32BIT_REAL(name) PGM_CTX(pgm,Bth32BitReal##name) +#define PGM_BTH_NAME_32BIT_PROT(name) PGM_CTX(pgm,Bth32BitProt##name) +#define PGM_BTH_NAME_32BIT_32BIT(name) PGM_CTX(pgm,Bth32Bit32Bit##name) +#define PGM_BTH_NAME_PAE_REAL(name) PGM_CTX(pgm,BthPAEReal##name) +#define PGM_BTH_NAME_PAE_PROT(name) PGM_CTX(pgm,BthPAEProt##name) +#define PGM_BTH_NAME_PAE_32BIT(name) PGM_CTX(pgm,BthPAE32Bit##name) +#define PGM_BTH_NAME_PAE_PAE(name) PGM_CTX(pgm,BthPAEPAE##name) +#define PGM_BTH_NAME_AMD64_PROT(name) PGM_CTX(pgm,BthAMD64Prot##name) +#define PGM_BTH_NAME_AMD64_AMD64(name) PGM_CTX(pgm,BthAMD64AMD64##name) +#define PGM_BTH_NAME_NESTED_32BIT_REAL(name) PGM_CTX(pgm,BthNested32BitReal##name) +#define PGM_BTH_NAME_NESTED_32BIT_PROT(name) PGM_CTX(pgm,BthNested32BitProt##name) +#define PGM_BTH_NAME_NESTED_32BIT_32BIT(name) PGM_CTX(pgm,BthNested32Bit32Bit##name) +#define PGM_BTH_NAME_NESTED_32BIT_PAE(name) PGM_CTX(pgm,BthNested32BitPAE##name) +#define PGM_BTH_NAME_NESTED_32BIT_AMD64(name) PGM_CTX(pgm,BthNested32BitAMD64##name) +#define PGM_BTH_NAME_NESTED_PAE_REAL(name) PGM_CTX(pgm,BthNestedPAEReal##name) +#define PGM_BTH_NAME_NESTED_PAE_PROT(name) PGM_CTX(pgm,BthNestedPAEProt##name) +#define PGM_BTH_NAME_NESTED_PAE_32BIT(name) PGM_CTX(pgm,BthNestedPAE32Bit##name) +#define PGM_BTH_NAME_NESTED_PAE_PAE(name) PGM_CTX(pgm,BthNestedPAEPAE##name) +#define PGM_BTH_NAME_NESTED_PAE_AMD64(name) PGM_CTX(pgm,BthNestedPAEAMD64##name) +#define PGM_BTH_NAME_NESTED_AMD64_REAL(name) PGM_CTX(pgm,BthNestedAMD64Real##name) +#define PGM_BTH_NAME_NESTED_AMD64_PROT(name) PGM_CTX(pgm,BthNestedAMD64Prot##name) +#define PGM_BTH_NAME_NESTED_AMD64_32BIT(name) PGM_CTX(pgm,BthNestedAMD6432Bit##name) +#define PGM_BTH_NAME_NESTED_AMD64_PAE(name) PGM_CTX(pgm,BthNestedAMD64PAE##name) +#define PGM_BTH_NAME_NESTED_AMD64_AMD64(name) PGM_CTX(pgm,BthNestedAMD64AMD64##name) +#define PGM_BTH_NAME_EPT_REAL(name) PGM_CTX(pgm,BthEPTReal##name) +#define PGM_BTH_NAME_EPT_PROT(name) PGM_CTX(pgm,BthEPTProt##name) +#define PGM_BTH_NAME_EPT_32BIT(name) PGM_CTX(pgm,BthEPT32Bit##name) +#define PGM_BTH_NAME_EPT_PAE(name) PGM_CTX(pgm,BthEPTPAE##name) +#define PGM_BTH_NAME_EPT_AMD64(name) PGM_CTX(pgm,BthEPTAMD64##name) +#define PGM_BTH_NAME_NONE_REAL(name) PGM_CTX(pgm,BthNoneReal##name) +#define PGM_BTH_NAME_NONE_PROT(name) PGM_CTX(pgm,BthNoneProt##name) +#define PGM_BTH_NAME_NONE_32BIT(name) PGM_CTX(pgm,BthNone32Bit##name) +#define PGM_BTH_NAME_NONE_PAE(name) PGM_CTX(pgm,BthNonePAE##name) +#define PGM_BTH_NAME_NONE_AMD64(name) PGM_CTX(pgm,BthNoneAMD64##name) + +#define PGM_BTH_NAME_RC_32BIT_REAL_STR(name) "pgmRCBth32BitReal" #name +#define PGM_BTH_NAME_RC_32BIT_PROT_STR(name) "pgmRCBth32BitProt" #name +#define PGM_BTH_NAME_RC_32BIT_32BIT_STR(name) "pgmRCBth32Bit32Bit" #name +#define PGM_BTH_NAME_RC_PAE_REAL_STR(name) "pgmRCBthPAEReal" #name +#define PGM_BTH_NAME_RC_PAE_PROT_STR(name) "pgmRCBthPAEProt" #name +#define PGM_BTH_NAME_RC_PAE_32BIT_STR(name) "pgmRCBthPAE32Bit" #name +#define PGM_BTH_NAME_RC_PAE_PAE_STR(name) "pgmRCBthPAEPAE" #name +#define PGM_BTH_NAME_RC_AMD64_AMD64_STR(name) "pgmRCBthAMD64AMD64" #name +#define PGM_BTH_NAME_RC_NESTED_32BIT_REAL_STR(name) "pgmRCBthNested32BitReal" #name +#define PGM_BTH_NAME_RC_NESTED_32BIT_PROT_STR(name) "pgmRCBthNested32BitProt" #name +#define PGM_BTH_NAME_RC_NESTED_32BIT_32BIT_STR(name) "pgmRCBthNested32Bit32Bit" #name +#define PGM_BTH_NAME_RC_NESTED_32BIT_PAE_STR(name) "pgmRCBthNested32BitPAE" #name +#define PGM_BTH_NAME_RC_NESTED_32BIT_AMD64_STR(name) "pgmRCBthNested32BitAMD64" #name +#define PGM_BTH_NAME_RC_NESTED_PAE_REAL_STR(name) "pgmRCBthNestedPAEReal" #name +#define PGM_BTH_NAME_RC_NESTED_PAE_PROT_STR(name) "pgmRCBthNestedPAEProt" #name +#define PGM_BTH_NAME_RC_NESTED_PAE_32BIT_STR(name) "pgmRCBthNestedPAE32Bit" #name +#define PGM_BTH_NAME_RC_NESTED_PAE_PAE_STR(name) "pgmRCBthNestedPAEPAE" #name +#define PGM_BTH_NAME_RC_NESTED_PAE_AMD64_STR(name) "pgmRCBthNestedPAEAMD64" #name +#define PGM_BTH_NAME_RC_NESTED_AMD64_REAL_STR(name) "pgmRCBthNestedAMD64Real" #name +#define PGM_BTH_NAME_RC_NESTED_AMD64_PROT_STR(name) "pgmRCBthNestedAMD64Prot" #name +#define PGM_BTH_NAME_RC_NESTED_AMD64_32BIT_STR(name) "pgmRCBthNestedAMD6432Bit" #name +#define PGM_BTH_NAME_RC_NESTED_AMD64_PAE_STR(name) "pgmRCBthNestedAMD64PAE" #name +#define PGM_BTH_NAME_RC_NESTED_AMD64_AMD64_STR(name) "pgmRCBthNestedAMD64AMD64" #name +#define PGM_BTH_NAME_RC_EPT_REAL_STR(name) "pgmRCBthEPTReal" #name +#define PGM_BTH_NAME_RC_EPT_PROT_STR(name) "pgmRCBthEPTProt" #name +#define PGM_BTH_NAME_RC_EPT_32BIT_STR(name) "pgmRCBthEPT32Bit" #name +#define PGM_BTH_NAME_RC_EPT_PAE_STR(name) "pgmRCBthEPTPAE" #name +#define PGM_BTH_NAME_RC_EPT_AMD64_STR(name) "pgmRCBthEPTAMD64" #name + +#define PGM_BTH_NAME_R0_32BIT_REAL_STR(name) "pgmR0Bth32BitReal" #name +#define PGM_BTH_NAME_R0_32BIT_PROT_STR(name) "pgmR0Bth32BitProt" #name +#define PGM_BTH_NAME_R0_32BIT_32BIT_STR(name) "pgmR0Bth32Bit32Bit" #name +#define PGM_BTH_NAME_R0_PAE_REAL_STR(name) "pgmR0BthPAEReal" #name +#define PGM_BTH_NAME_R0_PAE_PROT_STR(name) "pgmR0BthPAEProt" #name +#define PGM_BTH_NAME_R0_PAE_32BIT_STR(name) "pgmR0BthPAE32Bit" #name +#define PGM_BTH_NAME_R0_PAE_PAE_STR(name) "pgmR0BthPAEPAE" #name +#define PGM_BTH_NAME_R0_AMD64_PROT_STR(name) "pgmR0BthAMD64Prot" #name +#define PGM_BTH_NAME_R0_AMD64_AMD64_STR(name) "pgmR0BthAMD64AMD64" #name +#define PGM_BTH_NAME_R0_NESTED_32BIT_REAL_STR(name) "pgmR0BthNested32BitReal" #name +#define PGM_BTH_NAME_R0_NESTED_32BIT_PROT_STR(name) "pgmR0BthNested32BitProt" #name +#define PGM_BTH_NAME_R0_NESTED_32BIT_32BIT_STR(name) "pgmR0BthNested32Bit32Bit" #name +#define PGM_BTH_NAME_R0_NESTED_32BIT_PAE_STR(name) "pgmR0BthNested32BitPAE" #name +#define PGM_BTH_NAME_R0_NESTED_32BIT_AMD64_STR(name) "pgmR0BthNested32BitAMD64" #name +#define PGM_BTH_NAME_R0_NESTED_PAE_REAL_STR(name) "pgmR0BthNestedPAEReal" #name +#define PGM_BTH_NAME_R0_NESTED_PAE_PROT_STR(name) "pgmR0BthNestedPAEProt" #name +#define PGM_BTH_NAME_R0_NESTED_PAE_32BIT_STR(name) "pgmR0BthNestedPAE32Bit" #name +#define PGM_BTH_NAME_R0_NESTED_PAE_PAE_STR(name) "pgmR0BthNestedPAEPAE" #name +#define PGM_BTH_NAME_R0_NESTED_PAE_AMD64_STR(name) "pgmR0BthNestedPAEAMD64" #name +#define PGM_BTH_NAME_R0_NESTED_AMD64_REAL_STR(name) "pgmR0BthNestedAMD64Real" #name +#define PGM_BTH_NAME_R0_NESTED_AMD64_PROT_STR(name) "pgmR0BthNestedAMD64Prot" #name +#define PGM_BTH_NAME_R0_NESTED_AMD64_32BIT_STR(name) "pgmR0BthNestedAMD6432Bit" #name +#define PGM_BTH_NAME_R0_NESTED_AMD64_PAE_STR(name) "pgmR0BthNestedAMD64PAE" #name +#define PGM_BTH_NAME_R0_NESTED_AMD64_AMD64_STR(name) "pgmR0BthNestedAMD64AMD64" #name +#define PGM_BTH_NAME_R0_EPT_REAL_STR(name) "pgmR0BthEPTReal" #name +#define PGM_BTH_NAME_R0_EPT_PROT_STR(name) "pgmR0BthEPTProt" #name +#define PGM_BTH_NAME_R0_EPT_32BIT_STR(name) "pgmR0BthEPT32Bit" #name +#define PGM_BTH_NAME_R0_EPT_PAE_STR(name) "pgmR0BthEPTPAE" #name +#define PGM_BTH_NAME_R0_EPT_AMD64_STR(name) "pgmR0BthEPTAMD64" #name + +#define PGM_BTH_DECL(type, name) PGM_CTX_DECL(type) PGM_BTH_NAME(name) +/** @} */ + + +/** + * Function pointers for guest paging. + */ +typedef struct PGMMODEDATAGST +{ + /** The guest mode type. */ + uint32_t uType; + DECLCALLBACKMEMBER(int, pfnGetPage)(PVMCPUCC pVCpu, RTGCPTR GCPtr, uint64_t *pfFlags, PRTGCPHYS pGCPhys); + DECLCALLBACKMEMBER(int, pfnModifyPage)(PVMCPUCC pVCpu, RTGCPTR GCPtr, size_t cbPages, uint64_t fFlags, uint64_t fMask); + DECLCALLBACKMEMBER(int, pfnGetPDE)(PVMCPUCC pVCpu, RTGCPTR GCPtr, PX86PDEPAE pPde); + DECLCALLBACKMEMBER(int, pfnEnter)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3); + DECLCALLBACKMEMBER(int, pfnExit)(PVMCPUCC pVCpu); +#ifdef IN_RING3 + DECLCALLBACKMEMBER(int, pfnRelocate)(PVMCPUCC pVCpu, RTGCPTR offDelta); /**< Only in ring-3. */ +#endif +} PGMMODEDATAGST; + +/** The length of g_aPgmGuestModeData. */ +#ifdef VBOX_WITH_64_BITS_GUESTS +# define PGM_GUEST_MODE_DATA_ARRAY_SIZE (PGM_TYPE_AMD64 + 1) +#else +# define PGM_GUEST_MODE_DATA_ARRAY_SIZE (PGM_TYPE_PAE + 1) +#endif +/** The guest mode data array. */ +extern PGMMODEDATAGST const g_aPgmGuestModeData[PGM_GUEST_MODE_DATA_ARRAY_SIZE]; + + +/** + * Function pointers for shadow paging. + */ +typedef struct PGMMODEDATASHW +{ + /** The shadow mode type. */ + uint32_t uType; + DECLCALLBACKMEMBER(int, pfnGetPage)(PVMCPUCC pVCpu, RTGCPTR GCPtr, uint64_t *pfFlags, PRTHCPHYS pHCPhys); + DECLCALLBACKMEMBER(int, pfnModifyPage)(PVMCPUCC pVCpu, RTGCPTR GCPtr, size_t cbPages, uint64_t fFlags, + uint64_t fMask, uint32_t fOpFlags); + DECLCALLBACKMEMBER(int, pfnEnter)(PVMCPUCC pVCpu, bool fIs64BitsPagingMode); + DECLCALLBACKMEMBER(int, pfnExit)(PVMCPUCC pVCpu); +#ifdef IN_RING3 + DECLCALLBACKMEMBER(int, pfnRelocate)(PVMCPUCC pVCpu, RTGCPTR offDelta); /**< Only in ring-3. */ +#endif +} PGMMODEDATASHW; + +/** The length of g_aPgmShadowModeData. */ +#define PGM_SHADOW_MODE_DATA_ARRAY_SIZE PGM_TYPE_END +/** The shadow mode data array. */ +extern PGMMODEDATASHW const g_aPgmShadowModeData[PGM_SHADOW_MODE_DATA_ARRAY_SIZE]; + + +/** + * Function pointers for guest+shadow paging. + */ +typedef struct PGMMODEDATABTH +{ + /** The shadow mode type. */ + uint32_t uShwType; + /** The guest mode type. */ + uint32_t uGstType; + + DECLCALLBACKMEMBER(int, pfnInvalidatePage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage); + DECLCALLBACKMEMBER(int, pfnSyncCR3)(PVMCPUCC pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal); + DECLCALLBACKMEMBER(int, pfnPrefetchPage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage); + DECLCALLBACKMEMBER(int, pfnVerifyAccessSyncPage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage, unsigned fFlags, unsigned uError); + DECLCALLBACKMEMBER(int, pfnMapCR3)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3); + DECLCALLBACKMEMBER(int, pfnUnmapCR3)(PVMCPUCC pVCpu); + DECLCALLBACKMEMBER(int, pfnEnter)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3); +#ifndef IN_RING3 + DECLCALLBACKMEMBER(int, pfnTrap0eHandler)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken); +#endif +#ifdef VBOX_STRICT + DECLCALLBACKMEMBER(unsigned, pfnAssertCR3)(PVMCPUCC pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb); +#endif +} PGMMODEDATABTH; + +/** The length of g_aPgmBothModeData. */ +#define PGM_BOTH_MODE_DATA_ARRAY_SIZE ((PGM_TYPE_END - PGM_TYPE_FIRST_SHADOW) * PGM_TYPE_END) +/** The guest+shadow mode data array. */ +extern PGMMODEDATABTH const g_aPgmBothModeData[PGM_BOTH_MODE_DATA_ARRAY_SIZE]; + + +#ifdef VBOX_WITH_STATISTICS +/** + * PGM statistics. + * + * These lives on the heap when compiled in as they would otherwise waste + * unnecessary space in release builds. + */ +typedef struct PGMSTATS +{ + /* R3 only: */ + STAMCOUNTER StatR3DetectedConflicts; /**< R3: Number of times PGMR3MapHasConflicts() detected a conflict. */ + STAMPROFILE StatR3ResolveConflict; /**< R3: pgmR3SyncPTResolveConflict() profiling (includes the entire relocation). */ + + /* R3+RZ */ + STAMCOUNTER StatRZChunkR3MapTlbHits; /**< RC/R0: Ring-3/0 chunk mapper TLB hits. */ + STAMCOUNTER StatRZChunkR3MapTlbMisses; /**< RC/R0: Ring-3/0 chunk mapper TLB misses. */ + STAMCOUNTER StatRZPageMapTlbHits; /**< RC/R0: Ring-3/0 page mapper TLB hits. */ + STAMCOUNTER StatRZPageMapTlbMisses; /**< RC/R0: Ring-3/0 page mapper TLB misses. */ + STAMCOUNTER StatPageMapTlbFlushes; /**< ALL: Ring-3/0 page mapper TLB flushes. */ + STAMCOUNTER StatPageMapTlbFlushEntry; /**< ALL: Ring-3/0 page mapper TLB flushes. */ + STAMCOUNTER StatR3ChunkR3MapTlbHits; /**< R3: Ring-3/0 chunk mapper TLB hits. */ + STAMCOUNTER StatR3ChunkR3MapTlbMisses; /**< R3: Ring-3/0 chunk mapper TLB misses. */ + STAMCOUNTER StatR3PageMapTlbHits; /**< R3: Ring-3/0 page mapper TLB hits. */ + STAMCOUNTER StatR3PageMapTlbMisses; /**< R3: Ring-3/0 page mapper TLB misses. */ + STAMCOUNTER StatRZRamRangeTlbHits; /**< RC/R0: RAM range TLB hits. */ + STAMCOUNTER StatRZRamRangeTlbMisses; /**< RC/R0: RAM range TLB misses. */ + STAMCOUNTER StatR3RamRangeTlbHits; /**< R3: RAM range TLB hits. */ + STAMCOUNTER StatR3RamRangeTlbMisses; /**< R3: RAM range TLB misses. */ + STAMCOUNTER StatR3PhysHandlerReset; /**< R3: The number of times PGMHandlerPhysicalReset is called. */ + STAMCOUNTER StatRZPhysHandlerReset; /**< RC/R0: The number of times PGMHandlerPhysicalReset is called. */ + STAMCOUNTER StatR3PhysHandlerLookupHits; /**< R3: Number of cache hits when looking up physical handlers. */ + STAMCOUNTER StatR3PhysHandlerLookupMisses; /**< R3: Number of cache misses when looking up physical handlers. */ + STAMCOUNTER StatRZPhysHandlerLookupHits; /**< RC/R0: Number of cache hits when lookup up physical handlers. */ + STAMCOUNTER StatRZPhysHandlerLookupMisses; /**< RC/R0: Number of cache misses when looking up physical handlers */ + STAMCOUNTER StatRZPageReplaceShared; /**< RC/R0: Times a shared page has been replaced by a private one. */ + STAMCOUNTER StatRZPageReplaceZero; /**< RC/R0: Times the zero page has been replaced by a private one. */ +/// @todo STAMCOUNTER StatRZPageHandyAllocs; /**< RC/R0: The number of times we've executed GMMR3AllocateHandyPages. */ + STAMCOUNTER StatR3PageReplaceShared; /**< R3: Times a shared page has been replaced by a private one. */ + STAMCOUNTER StatR3PageReplaceZero; /**< R3: Times the zero page has been replaced by a private one. */ +/// @todo STAMCOUNTER StatR3PageHandyAllocs; /**< R3: The number of times we've executed GMMR3AllocateHandyPages. */ + + /* RC only: */ + STAMCOUNTER StatRCInvlPgConflict; /**< RC: Number of times PGMInvalidatePage() detected a mapping conflict. */ + STAMCOUNTER StatRCInvlPgSyncMonCR3; /**< RC: Number of times PGMInvalidatePage() ran into PGM_SYNC_MONITOR_CR3. */ + + STAMCOUNTER StatRZPhysRead; + STAMCOUNTER StatRZPhysReadBytes; + STAMCOUNTER StatRZPhysWrite; + STAMCOUNTER StatRZPhysWriteBytes; + STAMCOUNTER StatR3PhysRead; + STAMCOUNTER StatR3PhysReadBytes; + STAMCOUNTER StatR3PhysWrite; + STAMCOUNTER StatR3PhysWriteBytes; + STAMCOUNTER StatRCPhysRead; + STAMCOUNTER StatRCPhysReadBytes; + STAMCOUNTER StatRCPhysWrite; + STAMCOUNTER StatRCPhysWriteBytes; + + STAMCOUNTER StatRZPhysSimpleRead; + STAMCOUNTER StatRZPhysSimpleReadBytes; + STAMCOUNTER StatRZPhysSimpleWrite; + STAMCOUNTER StatRZPhysSimpleWriteBytes; + STAMCOUNTER StatR3PhysSimpleRead; + STAMCOUNTER StatR3PhysSimpleReadBytes; + STAMCOUNTER StatR3PhysSimpleWrite; + STAMCOUNTER StatR3PhysSimpleWriteBytes; + STAMCOUNTER StatRCPhysSimpleRead; + STAMCOUNTER StatRCPhysSimpleReadBytes; + STAMCOUNTER StatRCPhysSimpleWrite; + STAMCOUNTER StatRCPhysSimpleWriteBytes; + + STAMCOUNTER StatTrackVirgin; /**< The number of first time shadowings. */ + STAMCOUNTER StatTrackAliased; /**< The number of times switching to cRef2, i.e. the page is being shadowed by two PTs. */ + STAMCOUNTER StatTrackAliasedMany; /**< The number of times we're tracking using cRef2. */ + STAMCOUNTER StatTrackAliasedLots; /**< The number of times we're hitting pages which has overflowed cRef2. */ + STAMCOUNTER StatTrackNoExtentsLeft; /**< The number of times the extent list was exhausted. */ + STAMCOUNTER StatTrackOverflows; /**< The number of times the extent list grows to long. */ + STAMPROFILE StatTrackDeref; /**< Profiling of SyncPageWorkerTrackDeref (expensive). */ + + /** Time spent by the host OS for large page allocation. */ + STAMPROFILE StatAllocLargePage; + /** Time spent clearing the newly allocated large pages. */ + STAMPROFILE StatClearLargePage; + /** The number of times allocating a large pages takes more than the allowed period. */ + STAMCOUNTER StatLargePageOverflow; + /** pgmPhysIsValidLargePage profiling - R3 */ + STAMPROFILE StatR3IsValidLargePage; + /** pgmPhysIsValidLargePage profiling - RZ*/ + STAMPROFILE StatRZIsValidLargePage; + + STAMPROFILE StatChunkAging; + STAMPROFILE StatChunkFindCandidate; + STAMPROFILE StatChunkUnmap; + STAMPROFILE StatChunkMap; +} PGMSTATS; +#endif /* VBOX_WITH_STATISTICS */ + + +/** + * Converts a PGM pointer into a VM pointer. + * @returns Pointer to the VM structure the PGM is part of. + * @param pPGM Pointer to PGM instance data. + */ +#define PGM2VM(pPGM) ( (PVM)((char*)pPGM - pPGM->offVM) ) + +/** + * PGM Data (part of VM) + */ +typedef struct PGM +{ + /** Offset to the VM structure. */ + int32_t offVM; + /** Offset of the PGMCPU structure relative to VMCPU. */ + int32_t offVCpuPGM; + + /** @cfgm{/RamPreAlloc, boolean, false} + * Indicates whether the base RAM should all be allocated before starting + * the VM (default), or if it should be allocated when first written to. + */ + bool fRamPreAlloc; + /** Indicates whether write monitoring is currently in use. + * This is used to prevent conflicts between live saving and page sharing + * detection. */ + bool fPhysWriteMonitoringEngaged; + /** Set if the CPU has less than 52-bit physical address width. + * This is used */ + bool fLessThan52PhysicalAddressBits; + /** Set when nested paging is active. + * This is meant to save calls to HMIsNestedPagingActive and let the + * compilers optimize the code better. Whether we use nested paging or + * not is something we find out during VMM initialization and we won't + * change this later on. */ + bool fNestedPaging; + /** The host paging mode. (This is what SUPLib reports.) */ + SUPPAGINGMODE enmHostMode; + /** We're not in a state which permits writes to guest memory. + * (Only used in strict builds.) */ + bool fNoMorePhysWrites; + /** @cfgm{/PageFusionAllowed, boolean, false} + * Whether page fusion is allowed. */ + bool fPageFusionAllowed; + /** @cfgm{/PGM/PciPassThrough, boolean, false} + * Whether PCI passthrough is enabled. */ + bool fPciPassthrough; + /** The number of MMIO2 regions (serves as the next MMIO2 ID). */ + uint8_t cMmio2Regions; + /** Restore original ROM page content when resetting after loading state. + * The flag is set by pgmR3LoadRomRanges and cleared at reset. This + * enables the VM to start using an updated ROM without requiring powering + * down the VM, just rebooting or resetting it. */ + bool fRestoreRomPagesOnReset; + /** Whether to automatically clear all RAM pages on reset. */ + bool fZeroRamPagesOnReset; + /** Alignment padding. */ + bool afAlignment3[7]; + + /** Indicates that PGMR3FinalizeMappings has been called and that further + * PGMR3MapIntermediate calls will be rejected. */ + bool fFinalizedMappings; + /** If set no conflict checks are required. */ + bool fMappingsFixed; + /** If set if restored as fixed but we were unable to re-fixate at the old + * location because of room or address incompatibilities. */ + bool fMappingsFixedRestored; + /** Size of fixed mapping. + * This is valid if either fMappingsFixed or fMappingsFixedRestored is set. */ + uint32_t cbMappingFixed; + /** Generation ID for the RAM ranges. This member is incremented everytime + * a RAM range is linked or unlinked. */ + uint32_t volatile idRamRangesGen; + + /** Base address (GC) of fixed mapping. + * This is valid if either fMappingsFixed or fMappingsFixedRestored is set. */ + RTGCPTR GCPtrMappingFixed; + /** The address of the previous RAM range mapping. */ + RTGCPTR GCPtrPrevRamRangeMapping; + + /** Physical access handler type for ROM protection. */ + PGMPHYSHANDLERTYPE hRomPhysHandlerType; + /** Alignment padding. */ + uint32_t u32Padding; + + /** 4 MB page mask; 32 or 36 bits depending on PSE-36 (identical for all VCPUs) */ + RTGCPHYS GCPhys4MBPSEMask; + /** Mask containing the invalid bits of a guest physical address. + * @remarks this does not stop at bit 52. */ + RTGCPHYS GCPhysInvAddrMask; + + + /** RAM range TLB for R3. */ + R3PTRTYPE(PPGMRAMRANGE) apRamRangesTlbR3[PGM_RAMRANGE_TLB_ENTRIES]; + /** Pointer to the list of RAM ranges (Phys GC -> Phys HC conversion) - for R3. + * This is sorted by physical address and contains no overlapping ranges. */ + R3PTRTYPE(PPGMRAMRANGE) pRamRangesXR3; + /** Root of the RAM range search tree for ring-3. */ + R3PTRTYPE(PPGMRAMRANGE) pRamRangeTreeR3; + /** PGM offset based trees - R3 Ptr. */ + R3PTRTYPE(PPGMTREES) pTreesR3; + /** Caching the last physical handler we looked up in R3. */ + R3PTRTYPE(PPGMPHYSHANDLER) pLastPhysHandlerR3; + /** Shadow Page Pool - R3 Ptr. */ + R3PTRTYPE(PPGMPOOL) pPoolR3; +#ifndef PGM_WITHOUT_MAPPINGS + /** Linked list of GC mappings - for HC. + * The list is sorted ascending on address. */ + R3PTRTYPE(PPGMMAPPING) pMappingsR3; +#endif + /** Pointer to the list of ROM ranges - for R3. + * This is sorted by physical address and contains no overlapping ranges. */ + R3PTRTYPE(PPGMROMRANGE) pRomRangesR3; + /** Pointer to the list of MMIO2 ranges - for R3. + * Registration order. */ + R3PTRTYPE(PPGMREGMMIO2RANGE) pRegMmioRangesR3; + /** MMIO2 lookup array for ring-3. Indexed by idMmio2 minus 1. */ + R3PTRTYPE(PPGMREGMMIO2RANGE) apMmio2RangesR3[PGM_MMIO2_MAX_RANGES]; + + /** RAM range TLB for R0. */ + R0PTRTYPE(PPGMRAMRANGE) apRamRangesTlbR0[PGM_RAMRANGE_TLB_ENTRIES]; + /** R0 pointer corresponding to PGM::pRamRangesXR3. */ + R0PTRTYPE(PPGMRAMRANGE) pRamRangesXR0; + /** Root of the RAM range search tree for ring-0. */ + R0PTRTYPE(PPGMRAMRANGE) pRamRangeTreeR0; + /** PGM offset based trees - R0 Ptr. */ + R0PTRTYPE(PPGMTREES) pTreesR0; + /** Caching the last physical handler we looked up in R0. */ + R0PTRTYPE(PPGMPHYSHANDLER) pLastPhysHandlerR0; + /** Shadow Page Pool - R0 Ptr. */ + R0PTRTYPE(PPGMPOOL) pPoolR0; +#ifndef PGM_WITHOUT_MAPPINGS + /** Linked list of GC mappings - for R0. + * The list is sorted ascending on address. */ + R0PTRTYPE(PPGMMAPPING) pMappingsR0; + RTR0PTR R0PtrAlignment0; +#endif + /** R0 pointer corresponding to PGM::pRomRangesR3. */ + R0PTRTYPE(PPGMROMRANGE) pRomRangesR0; + /** MMIO2 lookup array for ring-0. Indexed by idMmio2 minus 1. */ + R0PTRTYPE(PPGMREGMMIO2RANGE) apMmio2RangesR0[PGM_MMIO2_MAX_RANGES]; + +#ifndef PGM_WITHOUT_MAPPINGS + /** Pointer to the 5 page CR3 content mapping. + * The first page is always the CR3 (in some form) while the 4 other pages + * are used for the PDs in PAE mode. */ + RTGCPTR GCPtrCR3Mapping; + + /** @name Intermediate Context + * @{ */ + /** Pointer to the intermediate page directory - Normal. */ + R3PTRTYPE(PX86PD) pInterPD; + /** Pointer to the intermediate page tables - Normal. + * There are two page tables, one for the identity mapping and one for + * the host context mapping (of the core code). */ + R3PTRTYPE(PX86PT) apInterPTs[2]; + /** Pointer to the intermediate page tables - PAE. */ + R3PTRTYPE(PX86PTPAE) apInterPaePTs[2]; + /** Pointer to the intermediate page directory - PAE. */ + R3PTRTYPE(PX86PDPAE) apInterPaePDs[4]; + /** Pointer to the intermediate page directory - PAE. */ + R3PTRTYPE(PX86PDPT) pInterPaePDPT; + /** Pointer to the intermediate page-map level 4 - AMD64. */ + R3PTRTYPE(PX86PML4) pInterPaePML4; + /** Pointer to the intermediate page directory - AMD64. */ + R3PTRTYPE(PX86PDPT) pInterPaePDPT64; + /** The Physical Address (HC) of the intermediate Page Directory - Normal. */ + RTHCPHYS HCPhysInterPD; + /** The Physical Address (HC) of the intermediate Page Directory Pointer Table - PAE. */ + RTHCPHYS HCPhysInterPaePDPT; + /** The Physical Address (HC) of the intermediate Page Map Level 4 table - AMD64. */ + RTHCPHYS HCPhysInterPaePML4; + /** @} */ +#endif + +#ifndef PGM_WITHOUT_MAPPINGS + /** Base address of the dynamic page mapping area. + * The array is MM_HYPER_DYNAMIC_SIZE bytes big. + * + * @todo The plan of keeping PGMRCDYNMAP private to PGMRZDynMap.cpp didn't + * work out. Some cleaning up of the initialization that would + * remove this memory is yet to be done... + */ + RCPTRTYPE(uint8_t *) pbDynPageMapBaseGC; + /** The address of the raw-mode context mapping cache. */ + RCPTRTYPE(PPGMRCDYNMAP) pRCDynMap; + /** The address of the ring-0 mapping cache if we're making use of it. */ + RTR0PTR pvR0DynMapUsed; +#endif + + /** Hack: Number of deprecated page mapping locks taken by the current lock + * owner via pgmPhysGCPhys2CCPtrInternalDepr. */ + uint32_t cDeprecatedPageLocks; + /** Alignment padding. */ + uint32_t au32Alignment2[1]; + + + /** PGM critical section. + * This protects the physical, ram ranges, and the page flag updating (some of + * it anyway). + */ + PDMCRITSECT CritSectX; + + /** + * Data associated with managing the ring-3 mappings of the allocation chunks. + */ + struct + { + /** The chunk mapping TLB. */ + PGMCHUNKR3MAPTLB Tlb; + /** The chunk tree, ordered by chunk id. */ +#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE) || defined(VBOX_WITH_RAM_IN_KERNEL) + R3PTRTYPE(PAVLU32NODECORE) pTree; +#else + R3R0PTRTYPE(PAVLU32NODECORE) pTree; +#endif +#if HC_ARCH_BITS == 32 + uint32_t u32Alignment0; +#endif + /** The number of mapped chunks. */ + uint32_t c; + /** @cfgm{/PGM/MaxRing3Chunks, uint32_t, host dependent} + * The maximum number of mapped chunks. On 64-bit this is unlimited by default, + * on 32-bit it defaults to 1 or 3 GB depending on the host. */ + uint32_t cMax; + /** The current time. This is incremented whenever a chunk is inserted. */ + uint32_t iNow; + /** Alignment padding. */ + uint32_t au32Alignment1[3]; + } ChunkR3Map; + + /** The page mapping TLB for ring-3. */ + PGMPAGER3MAPTLB PhysTlbR3; +#ifdef VBOX_WITH_RAM_IN_KERNEL + /** The page mapping TLB for ring-0. */ + PGMPAGER0MAPTLB PhysTlbR0; +#else + /** The page mapping TLB for ring-0 (still using ring-3 mappings). */ + PGMPAGER3MAPTLB PhysTlbR0; +#endif + + /** @name The zero page. + * @{ */ + /** The host physical address of the zero page. */ + RTHCPHYS HCPhysZeroPg; + /** The ring-3 mapping of the zero page. */ + RTR3PTR pvZeroPgR3; + /** The ring-0 mapping of the zero page. */ + RTR0PTR pvZeroPgR0; + /** The GC mapping of the zero page. */ + RTRCPTR pvZeroPgRC; + RTRCPTR RCPtrAlignment3; + /** @}*/ + + /** @name The Invalid MMIO page. + * This page is filled with 0xfeedface. + * @{ */ + /** The host physical address of the invalid MMIO page. */ + RTHCPHYS HCPhysMmioPg; + /** The host pysical address of the invalid MMIO page plus all invalid + * physical address bits set. This is used to trigger X86_TRAP_PF_RSVD. + * @remarks Check fLessThan52PhysicalAddressBits before use. */ + RTHCPHYS HCPhysInvMmioPg; + /** The ring-3 mapping of the invalid MMIO page. */ + RTR3PTR pvMmioPgR3; +#if HC_ARCH_BITS == 32 + RTR3PTR R3PtrAlignment4; +#endif + /** @} */ + + + /** The number of handy pages. */ + uint32_t cHandyPages; + + /** The number of large handy pages. */ + uint32_t cLargeHandyPages; + + /** + * Array of handy pages. + * + * This array is used in a two way communication between pgmPhysAllocPage + * and GMMR0AllocateHandyPages, with PGMR3PhysAllocateHandyPages serving as + * an intermediary. + * + * The size of this array is important, see pgmPhysEnsureHandyPage for details. + * (The current size of 32 pages, means 128 KB of handy memory.) + */ + GMMPAGEDESC aHandyPages[PGM_HANDY_PAGES]; + + /** + * Array of large handy pages. (currently size 1) + * + * This array is used in a two way communication between pgmPhysAllocLargePage + * and GMMR0AllocateLargePage, with PGMR3PhysAllocateLargePage serving as + * an intermediary. + */ + GMMPAGEDESC aLargeHandyPage[1]; + + /** + * Live save data. + */ + struct + { + /** Per type statistics. */ + struct + { + /** The number of ready pages. */ + uint32_t cReadyPages; + /** The number of dirty pages. */ + uint32_t cDirtyPages; + /** The number of ready zero pages. */ + uint32_t cZeroPages; + /** The number of write monitored pages. */ + uint32_t cMonitoredPages; + } Rom, + Mmio2, + Ram; + /** The number of ignored pages in the RAM ranges (i.e. MMIO, MMIO2 and ROM). */ + uint32_t cIgnoredPages; + /** Indicates that a live save operation is active. */ + bool fActive; + /** Padding. */ + bool afReserved[2]; + /** The next history index. */ + uint8_t iDirtyPagesHistory; + /** History of the total amount of dirty pages. */ + uint32_t acDirtyPagesHistory[64]; + /** Short term dirty page average. */ + uint32_t cDirtyPagesShort; + /** Long term dirty page average. */ + uint32_t cDirtyPagesLong; + /** The number of saved pages. This is used to get some kind of estimate of the + * link speed so we can decide when we're done. It is reset after the first + * 7 passes so the speed estimate doesn't get inflated by the initial set of + * zero pages. */ + uint64_t cSavedPages; + /** The nanosecond timestamp when cSavedPages was 0. */ + uint64_t uSaveStartNS; + /** Pages per second (for statistics). */ + uint32_t cPagesPerSecond; + uint32_t cAlignment; + } LiveSave; + + /** @name Error injection. + * @{ */ + /** Inject handy page allocation errors pretending we're completely out of + * memory. */ + bool volatile fErrInjHandyPages; + /** Padding. */ + bool afReserved[3]; + /** @} */ + + /** @name Release Statistics + * @{ */ + uint32_t cAllPages; /**< The total number of pages. (Should be Private + Shared + Zero + Pure MMIO.) */ + uint32_t cPrivatePages; /**< The number of private pages. */ + uint32_t cSharedPages; /**< The number of shared pages. */ + uint32_t cReusedSharedPages; /**< The number of reused shared pages. */ + uint32_t cZeroPages; /**< The number of zero backed pages. */ + uint32_t cPureMmioPages; /**< The number of pure MMIO pages. */ + uint32_t cMonitoredPages; /**< The number of write monitored pages. */ + uint32_t cWrittenToPages; /**< The number of previously write monitored pages. */ + uint32_t cWriteLockedPages; /**< The number of write locked pages. */ + uint32_t cReadLockedPages; /**< The number of read locked pages. */ + uint32_t cBalloonedPages; /**< The number of ballooned pages. */ + uint32_t cMappedChunks; /**< Number of times we mapped a chunk. */ + uint32_t cUnmappedChunks; /**< Number of times we unmapped a chunk. */ + uint32_t cLargePages; /**< The number of large pages. */ + uint32_t cLargePagesDisabled; /**< The number of disabled large pages. */ +/* uint32_t aAlignment4[1]; */ + + /** The number of times we were forced to change the hypervisor region location. */ + STAMCOUNTER cRelocations; + + STAMCOUNTER StatLargePageReused; /**< The number of large pages we've reused.*/ + STAMCOUNTER StatLargePageRefused; /**< The number of times we couldn't use a large page.*/ + STAMCOUNTER StatLargePageRecheck; /**< The number of times we rechecked a disabled large page.*/ + + STAMPROFILE StatShModCheck; /**< Profiles shared module checks. */ + /** @} */ + +#ifdef VBOX_WITH_STATISTICS + /** @name Statistics on the heap. + * @{ */ + R3PTRTYPE(PGMSTATS *) pStatsR3; + R0PTRTYPE(PGMSTATS *) pStatsR0; + /** @} */ +#endif +} PGM; +#ifndef IN_TSTVMSTRUCTGC /* HACK */ +# ifndef PGM_WITHOUT_MAPPINGS +AssertCompileMemberAlignment(PGM, paDynPageMap32BitPTEsGC, 8); +# endif +AssertCompileMemberAlignment(PGM, GCPtrMappingFixed, sizeof(RTGCPTR)); +# ifndef PGM_WITHOUT_MAPPINGS +AssertCompileMemberAlignment(PGM, HCPhysInterPD, 8); +# endif +AssertCompileMemberAlignment(PGM, CritSectX, 8); +AssertCompileMemberAlignment(PGM, ChunkR3Map, 16); +AssertCompileMemberAlignment(PGM, PhysTlbR3, 32); /** @todo 32 byte alignment! */ +AssertCompileMemberAlignment(PGM, PhysTlbR0, 32); +AssertCompileMemberAlignment(PGM, HCPhysZeroPg, 8); +AssertCompileMemberAlignment(PGM, aHandyPages, 8); +AssertCompileMemberAlignment(PGM, cRelocations, 8); +#endif /* !IN_TSTVMSTRUCTGC */ +/** Pointer to the PGM instance data. */ +typedef PGM *PPGM; + + + +typedef struct PGMCPUSTATS +{ + /* Common */ + STAMCOUNTER StatSyncPtPD[X86_PG_ENTRIES]; /**< SyncPT - PD distribution. */ + STAMCOUNTER StatSyncPagePD[X86_PG_ENTRIES]; /**< SyncPage - PD distribution. */ + + /* R0 only: */ + STAMPROFILE StatR0NpMiscfg; /**< R0: PGMR0Trap0eHandlerNPMisconfig() profiling. */ + STAMCOUNTER StatR0NpMiscfgSyncPage; /**< R0: SyncPage calls from PGMR0Trap0eHandlerNPMisconfig(). */ + + /* RZ only: */ + STAMPROFILE StatRZTrap0e; /**< RC/R0: PGMTrap0eHandler() profiling. */ + STAMPROFILE StatRZTrap0eTime2Ballooned; /**< RC/R0: Profiling of the Trap0eHandler body when the cause is read access to a ballooned page. */ + STAMPROFILE StatRZTrap0eTime2CSAM; /**< RC/R0: Profiling of the Trap0eHandler body when the cause is CSAM. */ + STAMPROFILE StatRZTrap0eTime2DirtyAndAccessed; /**< RC/R0: Profiling of the Trap0eHandler body when the cause is dirty and/or accessed bit emulation. */ + STAMPROFILE StatRZTrap0eTime2GuestTrap; /**< RC/R0: Profiling of the Trap0eHandler body when the cause is a guest trap. */ + STAMPROFILE StatRZTrap0eTime2HndPhys; /**< RC/R0: Profiling of the Trap0eHandler body when the cause is a physical handler. */ + STAMPROFILE StatRZTrap0eTime2HndUnhandled; /**< RC/R0: Profiling of the Trap0eHandler body when the cause is access outside the monitored areas of a monitored page. */ + STAMPROFILE StatRZTrap0eTime2InvalidPhys; /**< RC/R0: Profiling of the Trap0eHandler body when the cause is access to an invalid physical guest address. */ + STAMPROFILE StatRZTrap0eTime2MakeWritable; /**< RC/R0: Profiling of the Trap0eHandler body when the cause is a page that needed to be made writable. */ + STAMPROFILE StatRZTrap0eTime2Mapping; /**< RC/R0: Profiling of the Trap0eHandler body when the cause is the guest mappings. */ + STAMPROFILE StatRZTrap0eTime2Misc; /**< RC/R0: Profiling of the Trap0eHandler body when the cause is not known. */ + STAMPROFILE StatRZTrap0eTime2OutOfSync; /**< RC/R0: Profiling of the Trap0eHandler body when the cause is an out-of-sync page. */ + STAMPROFILE StatRZTrap0eTime2OutOfSyncHndPhys; /**< RC/R0: Profiling of the Trap0eHandler body when the cause is an out-of-sync physical handler page. */ + STAMPROFILE StatRZTrap0eTime2OutOfSyncHndObs; /**< RC/R0: Profiling of the Trap0eHandler body when the cause is an obsolete handler page. */ + STAMPROFILE StatRZTrap0eTime2SyncPT; /**< RC/R0: Profiling of the Trap0eHandler body when the cause is lazy syncing of a PT. */ + STAMPROFILE StatRZTrap0eTime2WPEmulation; /**< RC/R0: Profiling of the Trap0eHandler body when the cause is CR0.WP emulation. */ + STAMPROFILE StatRZTrap0eTime2Wp0RoUsHack; /**< RC/R0: Profiling of the Trap0eHandler body when the cause is CR0.WP and netware hack to be enabled. */ + STAMPROFILE StatRZTrap0eTime2Wp0RoUsUnhack; /**< RC/R0: Profiling of the Trap0eHandler body when the cause is CR0.WP and netware hack to be disabled. */ + STAMCOUNTER StatRZTrap0eConflicts; /**< RC/R0: The number of times \#PF was caused by an undetected conflict. */ + STAMCOUNTER StatRZTrap0eHandlersMapping; /**< RC/R0: Number of traps due to access handlers in mappings. */ + STAMCOUNTER StatRZTrap0eHandlersOutOfSync; /**< RC/R0: Number of out-of-sync handled pages. */ + STAMCOUNTER StatRZTrap0eHandlersPhysAll; /**< RC/R0: Number of traps due to physical all-access handlers. */ + STAMCOUNTER StatRZTrap0eHandlersPhysAllOpt; /**< RC/R0: Number of the physical all-access handler traps using the optimization. */ + STAMCOUNTER StatRZTrap0eHandlersPhysWrite; /**< RC/R0: Number of traps due to write-physical access handlers. */ + STAMCOUNTER StatRZTrap0eHandlersUnhandled; /**< RC/R0: Number of traps due to access outside range of monitored page(s). */ + STAMCOUNTER StatRZTrap0eHandlersInvalid; /**< RC/R0: Number of traps due to access to invalid physical memory. */ + STAMCOUNTER StatRZTrap0eUSNotPresentRead; /**< RC/R0: \#PF err kind */ + STAMCOUNTER StatRZTrap0eUSNotPresentWrite; /**< RC/R0: \#PF err kind */ + STAMCOUNTER StatRZTrap0eUSWrite; /**< RC/R0: \#PF err kind */ + STAMCOUNTER StatRZTrap0eUSReserved; /**< RC/R0: \#PF err kind */ + STAMCOUNTER StatRZTrap0eUSNXE; /**< RC/R0: \#PF err kind */ + STAMCOUNTER StatRZTrap0eUSRead; /**< RC/R0: \#PF err kind */ + STAMCOUNTER StatRZTrap0eSVNotPresentRead; /**< RC/R0: \#PF err kind */ + STAMCOUNTER StatRZTrap0eSVNotPresentWrite; /**< RC/R0: \#PF err kind */ + STAMCOUNTER StatRZTrap0eSVWrite; /**< RC/R0: \#PF err kind */ + STAMCOUNTER StatRZTrap0eSVReserved; /**< RC/R0: \#PF err kind */ + STAMCOUNTER StatRZTrap0eSNXE; /**< RC/R0: \#PF err kind */ + STAMCOUNTER StatRZTrap0eGuestPF; /**< RC/R0: Real guest \#PFs. */ + STAMCOUNTER StatRZTrap0eGuestPFMapping; /**< RC/R0: Real guest \#PF to HMA or other mapping. */ + STAMCOUNTER StatRZTrap0eWPEmulInRZ; /**< RC/R0: WP=0 virtualization trap, handled. */ + STAMCOUNTER StatRZTrap0eWPEmulToR3; /**< RC/R0: WP=0 virtualization trap, chickened out. */ + STAMCOUNTER StatRZTrap0ePD[X86_PG_ENTRIES]; /**< RC/R0: PD distribution of the \#PFs. */ + STAMCOUNTER StatRZGuestCR3WriteHandled; /**< RC/R0: The number of times WriteHandlerCR3() was successfully called. */ + STAMCOUNTER StatRZGuestCR3WriteUnhandled; /**< RC/R0: The number of times WriteHandlerCR3() was called and we had to fall back to the recompiler. */ + STAMCOUNTER StatRZGuestCR3WriteConflict; /**< RC/R0: The number of times WriteHandlerCR3() was called and a conflict was detected. */ + STAMCOUNTER StatRZGuestROMWriteHandled; /**< RC/R0: The number of times pgmPhysRomWriteHandler() was successfully called. */ + STAMCOUNTER StatRZGuestROMWriteUnhandled; /**< RC/R0: The number of times pgmPhysRomWriteHandler() was called and we had to fall back to the recompiler */ + STAMCOUNTER StatRZDynMapMigrateInvlPg; /**< RZ: invlpg in PGMR0DynMapMigrateAutoSet. */ + STAMPROFILE StatRZDynMapGCPageInl; /**< RZ: Calls to pgmRZDynMapGCPageInlined. */ + STAMCOUNTER StatRZDynMapGCPageInlHits; /**< RZ: Hash table lookup hits. */ + STAMCOUNTER StatRZDynMapGCPageInlMisses; /**< RZ: Misses that falls back to the code common. */ + STAMCOUNTER StatRZDynMapGCPageInlRamHits; /**< RZ: 1st ram range hits. */ + STAMCOUNTER StatRZDynMapGCPageInlRamMisses; /**< RZ: 1st ram range misses, takes slow path. */ + STAMPROFILE StatRZDynMapHCPageInl; /**< RZ: Calls to pgmRZDynMapHCPageInlined. */ + STAMCOUNTER StatRZDynMapHCPageInlHits; /**< RZ: Hash table lookup hits. */ + STAMCOUNTER StatRZDynMapHCPageInlMisses; /**< RZ: Misses that falls back to the code common. */ + STAMPROFILE StatRZDynMapHCPage; /**< RZ: Calls to pgmRZDynMapHCPageCommon. */ + STAMCOUNTER StatRZDynMapSetOptimize; /**< RZ: Calls to pgmRZDynMapOptimizeAutoSet. */ + STAMCOUNTER StatRZDynMapSetSearchFlushes; /**< RZ: Set search restoring to subset flushes. */ + STAMCOUNTER StatRZDynMapSetSearchHits; /**< RZ: Set search hits. */ + STAMCOUNTER StatRZDynMapSetSearchMisses; /**< RZ: Set search misses. */ + STAMCOUNTER StatRZDynMapPage; /**< RZ: Calls to pgmR0DynMapPage. */ + STAMCOUNTER StatRZDynMapPageHits0; /**< RZ: Hits at iPage+0. */ + STAMCOUNTER StatRZDynMapPageHits1; /**< RZ: Hits at iPage+1. */ + STAMCOUNTER StatRZDynMapPageHits2; /**< RZ: Hits at iPage+2. */ + STAMCOUNTER StatRZDynMapPageInvlPg; /**< RZ: invlpg. */ + STAMCOUNTER StatRZDynMapPageSlow; /**< RZ: Calls to pgmR0DynMapPageSlow. */ + STAMCOUNTER StatRZDynMapPageSlowLoopHits; /**< RZ: Hits in the pgmR0DynMapPageSlow search loop. */ + STAMCOUNTER StatRZDynMapPageSlowLoopMisses; /**< RZ: Misses in the pgmR0DynMapPageSlow search loop. */ + //STAMCOUNTER StatRZDynMapPageSlowLostHits; /**< RZ: Lost hits. */ + STAMCOUNTER StatRZDynMapSubsets; /**< RZ: Times PGMDynMapPushAutoSubset was called. */ + STAMCOUNTER StatRZDynMapPopFlushes; /**< RZ: Times PGMDynMapPopAutoSubset flushes the subset. */ + STAMCOUNTER aStatRZDynMapSetFilledPct[11]; /**< RZ: Set fill distribution, percent. */ + + /* HC - R3 and (maybe) R0: */ + + /* RZ & R3: */ + STAMPROFILE StatRZSyncCR3; /**< RC/R0: PGMSyncCR3() profiling. */ + STAMPROFILE StatRZSyncCR3Handlers; /**< RC/R0: Profiling of the PGMSyncCR3() update handler section. */ + STAMCOUNTER StatRZSyncCR3Global; /**< RC/R0: The number of global CR3 syncs. */ + STAMCOUNTER StatRZSyncCR3NotGlobal; /**< RC/R0: The number of non-global CR3 syncs. */ + STAMCOUNTER StatRZSyncCR3DstCacheHit; /**< RC/R0: The number of times we got some kind of cache hit on a page table. */ + STAMCOUNTER StatRZSyncCR3DstFreed; /**< RC/R0: The number of times we've had to free a shadow entry. */ + STAMCOUNTER StatRZSyncCR3DstFreedSrcNP; /**< RC/R0: The number of times we've had to free a shadow entry for which the source entry was not present. */ + STAMCOUNTER StatRZSyncCR3DstNotPresent; /**< RC/R0: The number of times we've encountered a not present shadow entry for a present guest entry. */ + STAMCOUNTER StatRZSyncCR3DstSkippedGlobalPD; /**< RC/R0: The number of times a global page directory wasn't flushed. */ + STAMCOUNTER StatRZSyncCR3DstSkippedGlobalPT; /**< RC/R0: The number of times a page table with only global entries wasn't flushed. */ + STAMPROFILE StatRZSyncPT; /**< RC/R0: PGMSyncPT() profiling. */ + STAMCOUNTER StatRZSyncPTFailed; /**< RC/R0: The number of times PGMSyncPT() failed. */ + STAMCOUNTER StatRZSyncPT4K; /**< RC/R0: Number of 4KB syncs. */ + STAMCOUNTER StatRZSyncPT4M; /**< RC/R0: Number of 4MB syncs. */ + STAMCOUNTER StatRZSyncPagePDNAs; /**< RC/R0: The number of time we've marked a PD not present from SyncPage to virtualize the accessed bit. */ + STAMCOUNTER StatRZSyncPagePDOutOfSync; /**< RC/R0: The number of time we've encountered an out-of-sync PD in SyncPage. */ + STAMCOUNTER StatRZAccessedPage; /**< RC/R0: The number of pages marked not present for accessed bit emulation. */ + STAMPROFILE StatRZDirtyBitTracking; /**< RC/R0: Profiling the dirty bit tracking in CheckPageFault(). */ + STAMCOUNTER StatRZDirtyPage; /**< RC/R0: The number of pages marked read-only for dirty bit tracking. */ + STAMCOUNTER StatRZDirtyPageBig; /**< RC/R0: The number of pages marked read-only for dirty bit tracking. */ + STAMCOUNTER StatRZDirtyPageSkipped; /**< RC/R0: The number of pages already dirty or readonly. */ + STAMCOUNTER StatRZDirtyPageTrap; /**< RC/R0: The number of traps generated for dirty bit tracking. */ + STAMCOUNTER StatRZDirtyPageStale; /**< RC/R0: The number of traps generated for dirty bit tracking. (stale tlb entries) */ + STAMCOUNTER StatRZDirtyTrackRealPF; /**< RC/R0: The number of real pages faults during dirty bit tracking. */ + STAMCOUNTER StatRZDirtiedPage; /**< RC/R0: The number of pages marked dirty because of write accesses. */ + STAMCOUNTER StatRZPageAlreadyDirty; /**< RC/R0: The number of pages already marked dirty because of write accesses. */ + STAMPROFILE StatRZInvalidatePage; /**< RC/R0: PGMInvalidatePage() profiling. */ + STAMCOUNTER StatRZInvalidatePage4KBPages; /**< RC/R0: The number of times PGMInvalidatePage() was called for a 4KB page. */ + STAMCOUNTER StatRZInvalidatePage4MBPages; /**< RC/R0: The number of times PGMInvalidatePage() was called for a 4MB page. */ + STAMCOUNTER StatRZInvalidatePage4MBPagesSkip; /**< RC/R0: The number of times PGMInvalidatePage() skipped a 4MB page. */ + STAMCOUNTER StatRZInvalidatePagePDMappings; /**< RC/R0: The number of times PGMInvalidatePage() was called for a page directory containing mappings (no conflict). */ + STAMCOUNTER StatRZInvalidatePagePDNAs; /**< RC/R0: The number of times PGMInvalidatePage() was called for a not accessed page directory. */ + STAMCOUNTER StatRZInvalidatePagePDNPs; /**< RC/R0: The number of times PGMInvalidatePage() was called for a not present page directory. */ + STAMCOUNTER StatRZInvalidatePagePDOutOfSync; /**< RC/R0: The number of times PGMInvalidatePage() was called for an out of sync page directory. */ + STAMCOUNTER StatRZInvalidatePageSizeChanges ; /**< RC/R0: The number of times PGMInvalidatePage() was called on a page size change (4KB <-> 2/4MB). */ + STAMCOUNTER StatRZInvalidatePageSkipped; /**< RC/R0: The number of times PGMInvalidatePage() was skipped due to not present shw or pending pending SyncCR3. */ + STAMCOUNTER StatRZPageOutOfSyncUser; /**< RC/R0: The number of times user page is out of sync was detected in \#PF or VerifyAccessSyncPage. */ + STAMCOUNTER StatRZPageOutOfSyncSupervisor; /**< RC/R0: The number of times supervisor page is out of sync was detected in in \#PF or VerifyAccessSyncPage. */ + STAMCOUNTER StatRZPageOutOfSyncUserWrite; /**< RC/R0: The number of times user page is out of sync was detected in \#PF. */ + STAMCOUNTER StatRZPageOutOfSyncSupervisorWrite; /**< RC/R0: The number of times supervisor page is out of sync was detected in in \#PF. */ + STAMCOUNTER StatRZPageOutOfSyncBallloon; /**< RC/R0: The number of times a ballooned page was accessed (read). */ + STAMPROFILE StatRZPrefetch; /**< RC/R0: PGMPrefetchPage. */ + STAMPROFILE StatRZFlushTLB; /**< RC/R0: Profiling of the PGMFlushTLB() body. */ + STAMCOUNTER StatRZFlushTLBNewCR3; /**< RC/R0: The number of times PGMFlushTLB was called with a new CR3, non-global. (switch) */ + STAMCOUNTER StatRZFlushTLBNewCR3Global; /**< RC/R0: The number of times PGMFlushTLB was called with a new CR3, global. (switch) */ + STAMCOUNTER StatRZFlushTLBSameCR3; /**< RC/R0: The number of times PGMFlushTLB was called with the same CR3, non-global. (flush) */ + STAMCOUNTER StatRZFlushTLBSameCR3Global; /**< RC/R0: The number of times PGMFlushTLB was called with the same CR3, global. (flush) */ + STAMPROFILE StatRZGstModifyPage; /**< RC/R0: Profiling of the PGMGstModifyPage() body */ + + STAMPROFILE StatR3SyncCR3; /**< R3: PGMSyncCR3() profiling. */ + STAMPROFILE StatR3SyncCR3Handlers; /**< R3: Profiling of the PGMSyncCR3() update handler section. */ + STAMCOUNTER StatR3SyncCR3Global; /**< R3: The number of global CR3 syncs. */ + STAMCOUNTER StatR3SyncCR3NotGlobal; /**< R3: The number of non-global CR3 syncs. */ + STAMCOUNTER StatR3SyncCR3DstFreed; /**< R3: The number of times we've had to free a shadow entry. */ + STAMCOUNTER StatR3SyncCR3DstFreedSrcNP; /**< R3: The number of times we've had to free a shadow entry for which the source entry was not present. */ + STAMCOUNTER StatR3SyncCR3DstNotPresent; /**< R3: The number of times we've encountered a not present shadow entry for a present guest entry. */ + STAMCOUNTER StatR3SyncCR3DstSkippedGlobalPD; /**< R3: The number of times a global page directory wasn't flushed. */ + STAMCOUNTER StatR3SyncCR3DstSkippedGlobalPT; /**< R3: The number of times a page table with only global entries wasn't flushed. */ + STAMCOUNTER StatR3SyncCR3DstCacheHit; /**< R3: The number of times we got some kind of cache hit on a page table. */ + STAMPROFILE StatR3SyncPT; /**< R3: PGMSyncPT() profiling. */ + STAMCOUNTER StatR3SyncPTFailed; /**< R3: The number of times PGMSyncPT() failed. */ + STAMCOUNTER StatR3SyncPT4K; /**< R3: Number of 4KB syncs. */ + STAMCOUNTER StatR3SyncPT4M; /**< R3: Number of 4MB syncs. */ + STAMCOUNTER StatR3SyncPagePDNAs; /**< R3: The number of time we've marked a PD not present from SyncPage to virtualize the accessed bit. */ + STAMCOUNTER StatR3SyncPagePDOutOfSync; /**< R3: The number of time we've encountered an out-of-sync PD in SyncPage. */ + STAMCOUNTER StatR3AccessedPage; /**< R3: The number of pages marked not present for accessed bit emulation. */ + STAMPROFILE StatR3DirtyBitTracking; /**< R3: Profiling the dirty bit tracking in CheckPageFault(). */ + STAMCOUNTER StatR3DirtyPage; /**< R3: The number of pages marked read-only for dirty bit tracking. */ + STAMCOUNTER StatR3DirtyPageBig; /**< R3: The number of pages marked read-only for dirty bit tracking. */ + STAMCOUNTER StatR3DirtyPageSkipped; /**< R3: The number of pages already dirty or readonly. */ + STAMCOUNTER StatR3DirtyPageTrap; /**< R3: The number of traps generated for dirty bit tracking. */ + STAMCOUNTER StatR3DirtyTrackRealPF; /**< R3: The number of real pages faults during dirty bit tracking. */ + STAMCOUNTER StatR3DirtiedPage; /**< R3: The number of pages marked dirty because of write accesses. */ + STAMCOUNTER StatR3PageAlreadyDirty; /**< R3: The number of pages already marked dirty because of write accesses. */ + STAMPROFILE StatR3InvalidatePage; /**< R3: PGMInvalidatePage() profiling. */ + STAMCOUNTER StatR3InvalidatePage4KBPages; /**< R3: The number of times PGMInvalidatePage() was called for a 4KB page. */ + STAMCOUNTER StatR3InvalidatePage4MBPages; /**< R3: The number of times PGMInvalidatePage() was called for a 4MB page. */ + STAMCOUNTER StatR3InvalidatePage4MBPagesSkip; /**< R3: The number of times PGMInvalidatePage() skipped a 4MB page. */ + STAMCOUNTER StatR3InvalidatePagePDNAs; /**< R3: The number of times PGMInvalidatePage() was called for a not accessed page directory. */ + STAMCOUNTER StatR3InvalidatePagePDNPs; /**< R3: The number of times PGMInvalidatePage() was called for a not present page directory. */ + STAMCOUNTER StatR3InvalidatePagePDMappings; /**< R3: The number of times PGMInvalidatePage() was called for a page directory containing mappings (no conflict). */ + STAMCOUNTER StatR3InvalidatePagePDOutOfSync; /**< R3: The number of times PGMInvalidatePage() was called for an out of sync page directory. */ + STAMCOUNTER StatR3InvalidatePageSizeChanges ; /**< R3: The number of times PGMInvalidatePage() was called on a page size change (4KB <-> 2/4MB). */ + STAMCOUNTER StatR3InvalidatePageSkipped; /**< R3: The number of times PGMInvalidatePage() was skipped due to not present shw or pending pending SyncCR3. */ + STAMCOUNTER StatR3PageOutOfSyncUser; /**< R3: The number of times user page is out of sync was detected in \#PF or VerifyAccessSyncPage. */ + STAMCOUNTER StatR3PageOutOfSyncSupervisor; /**< R3: The number of times supervisor page is out of sync was detected in in \#PF or VerifyAccessSyncPage. */ + STAMCOUNTER StatR3PageOutOfSyncUserWrite; /**< R3: The number of times user page is out of sync was detected in \#PF. */ + STAMCOUNTER StatR3PageOutOfSyncSupervisorWrite; /**< R3: The number of times supervisor page is out of sync was detected in in \#PF. */ + STAMCOUNTER StatR3PageOutOfSyncBallloon; /**< R3: The number of times a ballooned page was accessed (read). */ + STAMPROFILE StatR3Prefetch; /**< R3: PGMPrefetchPage. */ + STAMPROFILE StatR3FlushTLB; /**< R3: Profiling of the PGMFlushTLB() body. */ + STAMCOUNTER StatR3FlushTLBNewCR3; /**< R3: The number of times PGMFlushTLB was called with a new CR3, non-global. (switch) */ + STAMCOUNTER StatR3FlushTLBNewCR3Global; /**< R3: The number of times PGMFlushTLB was called with a new CR3, global. (switch) */ + STAMCOUNTER StatR3FlushTLBSameCR3; /**< R3: The number of times PGMFlushTLB was called with the same CR3, non-global. (flush) */ + STAMCOUNTER StatR3FlushTLBSameCR3Global; /**< R3: The number of times PGMFlushTLB was called with the same CR3, global. (flush) */ + STAMPROFILE StatR3GstModifyPage; /**< R3: Profiling of the PGMGstModifyPage() body */ +} PGMCPUSTATS; + + +/** + * Converts a PGMCPU pointer into a VM pointer. + * @returns Pointer to the VM structure the PGM is part of. + * @param pPGM Pointer to PGMCPU instance data. + */ +#define PGMCPU2VM(pPGM) ( (PVM)((char*)(pPGM) - (pPGM)->offVM) ) + +/** + * Converts a PGMCPU pointer into a PGM pointer. + * @returns Pointer to the VM structure the PGM is part of. + * @param pPGMCpu Pointer to PGMCPU instance data. + */ +#define PGMCPU2PGM(pPGMCpu) ( (PPGM)((char *)(pPGMCpu) - (pPGMCpu)->offPGM) ) + +/** + * PGMCPU Data (part of VMCPU). + */ +typedef struct PGMCPU +{ + /** Offset to the VM structure. */ + int32_t offVM; + /** Offset to the VMCPU structure. */ + int32_t offVCpu; + /** Offset of the PGM structure relative to VMCPU. */ + int32_t offPGM; + uint32_t uPadding0; /**< structure size alignment. */ + +#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE + /** Automatically tracked physical memory mapping set. + * Ring-0 and strict raw-mode builds. */ + PGMMAPSET AutoSet; +#endif + + /** A20 gate mask. + * Our current approach to A20 emulation is to let REM do it and don't bother + * anywhere else. The interesting Guests will be operating with it enabled anyway. + * But whould need arrise, we'll subject physical addresses to this mask. */ + RTGCPHYS GCPhysA20Mask; + /** A20 gate state - boolean! */ + bool fA20Enabled; + /** Mirror of the EFER.NXE bit. Managed by PGMNotifyNxeChanged. */ + bool fNoExecuteEnabled; + /** Unused bits. */ + bool afUnused[2]; + + /** What needs syncing (PGM_SYNC_*). + * This is used to queue operations for PGMSyncCR3, PGMInvalidatePage, + * PGMFlushTLB, and PGMR3Load. */ + uint32_t fSyncFlags; + + /** The shadow paging mode. */ + PGMMODE enmShadowMode; + /** The guest paging mode. */ + PGMMODE enmGuestMode; + /** Guest mode data table index (PGM_TYPE_XXX). */ + uint8_t volatile idxGuestModeData; + /** Shadow mode data table index (PGM_TYPE_XXX). */ + uint8_t volatile idxShadowModeData; + /** Both mode data table index (complicated). */ + uint8_t volatile idxBothModeData; + /** Alignment padding. */ + uint8_t abPadding[5]; + + /** The current physical address represented in the guest CR3 register. */ + RTGCPHYS GCPhysCR3; + + /** @name 32-bit Guest Paging. + * @{ */ + /** The guest's page directory, R3 pointer. */ + R3PTRTYPE(PX86PD) pGst32BitPdR3; +#ifndef VBOX_WITH_2X_4GB_ADDR_SPACE + /** The guest's page directory, R0 pointer. */ + R0PTRTYPE(PX86PD) pGst32BitPdR0; +#endif + /** Mask containing the MBZ bits of a big page PDE. */ + uint32_t fGst32BitMbzBigPdeMask; + /** Set if the page size extension (PSE) is enabled. */ + bool fGst32BitPageSizeExtension; + /** Alignment padding. */ + bool afAlignment2[3]; + /** @} */ + + /** @name PAE Guest Paging. + * @{ */ + /** The guest's page directory pointer table, R3 pointer. */ + R3PTRTYPE(PX86PDPT) pGstPaePdptR3; +#ifndef VBOX_WITH_2X_4GB_ADDR_SPACE + /** The guest's page directory pointer table, R0 pointer. */ + R0PTRTYPE(PX86PDPT) pGstPaePdptR0; +#endif + + /** The guest's page directories, R3 pointers. + * These are individual pointers and don't have to be adjacent. + * These don't have to be up-to-date - use pgmGstGetPaePD() to access them. */ + R3PTRTYPE(PX86PDPAE) apGstPaePDsR3[4]; + /** The guest's page directories, R0 pointers. + * Same restrictions as apGstPaePDsR3. */ +#ifndef VBOX_WITH_2X_4GB_ADDR_SPACE + R0PTRTYPE(PX86PDPAE) apGstPaePDsR0[4]; +#endif + /** The physical addresses of the guest page directories (PAE) pointed to by apGstPagePDsHC/GC. + * @todo Remove this and use aGstPaePdpeRegs instead? */ + RTGCPHYS aGCPhysGstPaePDs[4]; + /** The values of the 4 PDPE CPU registers (PAE). */ + X86PDPE aGstPaePdpeRegs[4]; + /** The physical addresses of the monitored guest page directories (PAE). */ + RTGCPHYS aGCPhysGstPaePDsMonitored[4]; + /** Mask containing the MBZ PTE bits. */ + uint64_t fGstPaeMbzPteMask; + /** Mask containing the MBZ PDE bits. */ + uint64_t fGstPaeMbzPdeMask; + /** Mask containing the MBZ big page PDE bits. */ + uint64_t fGstPaeMbzBigPdeMask; + /** Mask containing the MBZ PDPE bits. */ + uint64_t fGstPaeMbzPdpeMask; + /** @} */ + + /** @name AMD64 Guest Paging. + * @{ */ + /** The guest's page directory pointer table, R3 pointer. */ + R3PTRTYPE(PX86PML4) pGstAmd64Pml4R3; +#ifndef VBOX_WITH_2X_4GB_ADDR_SPACE + /** The guest's page directory pointer table, R0 pointer. */ + R0PTRTYPE(PX86PML4) pGstAmd64Pml4R0; +#else + RTR0PTR alignment6b; /**< alignment equalizer. */ +#endif + /** Mask containing the MBZ PTE bits. */ + uint64_t fGstAmd64MbzPteMask; + /** Mask containing the MBZ PDE bits. */ + uint64_t fGstAmd64MbzPdeMask; + /** Mask containing the MBZ big page PDE bits. */ + uint64_t fGstAmd64MbzBigPdeMask; + /** Mask containing the MBZ PDPE bits. */ + uint64_t fGstAmd64MbzPdpeMask; + /** Mask containing the MBZ big page PDPE bits. */ + uint64_t fGstAmd64MbzBigPdpeMask; + /** Mask containing the MBZ PML4E bits. */ + uint64_t fGstAmd64MbzPml4eMask; + /** Mask containing the PDPE bits that we shadow. */ + uint64_t fGstAmd64ShadowedPdpeMask; + /** Mask containing the PML4E bits that we shadow. */ + uint64_t fGstAmd64ShadowedPml4eMask; + /** @} */ + + /** @name PAE and AMD64 Guest Paging. + * @{ */ + /** Mask containing the PTE bits that we shadow. */ + uint64_t fGst64ShadowedPteMask; + /** Mask containing the PDE bits that we shadow. */ + uint64_t fGst64ShadowedPdeMask; + /** Mask containing the big page PDE bits that we shadow in the PDE. */ + uint64_t fGst64ShadowedBigPdeMask; + /** Mask containing the big page PDE bits that we shadow in the PTE. */ + uint64_t fGst64ShadowedBigPde4PteMask; + /** @} */ + + /** Pointer to the page of the current active CR3 - R3 Ptr. */ + R3PTRTYPE(PPGMPOOLPAGE) pShwPageCR3R3; + /** Pointer to the page of the current active CR3 - R0 Ptr. */ + R0PTRTYPE(PPGMPOOLPAGE) pShwPageCR3R0; + + /** For saving stack space, the disassembler state is allocated here instead of + * on the stack. */ + DISCPUSTATE DisState; + + /** Counts the number of times the netware WP0+RO+US hack has been applied. */ + uint64_t cNetwareWp0Hacks; + + /** Count the number of pgm pool access handler calls. */ + uint64_t cPoolAccessHandler; + + /** @name Release Statistics + * @{ */ + /** The number of times the guest has switched mode since last reset or statistics reset. */ + STAMCOUNTER cGuestModeChanges; + /** The number of times the guest has switched mode since last reset or statistics reset. */ + STAMCOUNTER cA20Changes; + /** @} */ + +#ifdef VBOX_WITH_STATISTICS /** @todo move this chunk to the heap. */ + /** @name Statistics + * @{ */ + /** R0: Pointer to the statistics. */ + R0PTRTYPE(PGMCPUSTATS *) pStatsR0; + /** R0: Which statistic this \#PF should be attributed to. */ + R0PTRTYPE(PSTAMPROFILE) pStatTrap0eAttributionR0; + /** R3: Pointer to the statistics. */ + R3PTRTYPE(PGMCPUSTATS *) pStatsR3; + /** Alignment padding. */ + RTR3PTR pPaddingR3; + /** @} */ +#endif /* VBOX_WITH_STATISTICS */ +} PGMCPU; +/** Pointer to the per-cpu PGM data. */ +typedef PGMCPU *PPGMCPU; + + +/** @name PGM::fSyncFlags Flags + * @note Was part of saved state a long time ago. + * @{ + */ +/* 0 used to be PGM_SYNC_UPDATE_PAGE_BIT_VIRTUAL */ +/** Always sync CR3. */ +#define PGM_SYNC_ALWAYS RT_BIT(1) +/** Check monitoring on next CR3 (re)load and invalidate page. + * @todo This is obsolete now. Remove after 2.2.0 is branched off. */ +#define PGM_SYNC_MONITOR_CR3 RT_BIT(2) +/** Check guest mapping in SyncCR3. */ +#define PGM_SYNC_MAP_CR3 RT_BIT(3) +/** Clear the page pool (a light weight flush). */ +#define PGM_SYNC_CLEAR_PGM_POOL_BIT 8 +#define PGM_SYNC_CLEAR_PGM_POOL RT_BIT(PGM_SYNC_CLEAR_PGM_POOL_BIT) +/** @} */ + + +/** + * PGM GVM instance data. + */ +typedef struct PGMR0PERVM +{ + /** @name PGM Pool related stuff. + * @{ */ + /** Critical section for serializing pool growth. */ + RTCRITSECT PoolGrowCritSect; + /** The memory objects for the pool pages. */ + RTR0MEMOBJ ahPoolMemObjs[(PGMPOOL_IDX_LAST + PGMPOOL_CFG_MAX_GROW - 1) / PGMPOOL_CFG_MAX_GROW]; + /** The ring-3 mapping objects for the pool pages. */ + RTR0MEMOBJ ahPoolMapObjs[(PGMPOOL_IDX_LAST + PGMPOOL_CFG_MAX_GROW - 1) / PGMPOOL_CFG_MAX_GROW]; + /** @} */ +} PGMR0PERVM; + +RT_C_DECLS_BEGIN + +#if defined(VBOX_STRICT) && defined(IN_RING3) +int pgmLockDebug(PVMCC pVM, RT_SRC_POS_DECL); +# define pgmLock(a_pVM) pgmLockDebug(a_pVM, RT_SRC_POS) +#else +int pgmLock(PVMCC pVM); +#endif +void pgmUnlock(PVM pVM); +/** + * Asserts that the caller owns the PDM lock. + * This is the internal variant of PGMIsLockOwner. + * @param a_pVM Pointer to the VM. + */ +#define PGM_LOCK_ASSERT_OWNER(a_pVM) Assert(PDMCritSectIsOwner(&(a_pVM)->pgm.s.CritSectX)) +/** + * Asserts that the caller owns the PDM lock. + * This is the internal variant of PGMIsLockOwner. + * @param a_pVM Pointer to the VM. + * @param a_pVCpu The current CPU handle. + */ +#define PGM_LOCK_ASSERT_OWNER_EX(a_pVM, a_pVCpu) Assert(PDMCritSectIsOwnerEx(&(a_pVM)->pgm.s.CritSectX, a_pVCpu)) + +#ifndef PGM_WITHOUT_MAPPINGS +int pgmR3MappingsFixInternal(PVM pVM, RTGCPTR GCPtrBase, uint32_t cb); +int pgmR3SyncPTResolveConflict(PVM pVM, PPGMMAPPING pMapping, PX86PD pPDSrc, RTGCPTR GCPtrOldMapping); +int pgmR3SyncPTResolveConflictPAE(PVM pVM, PPGMMAPPING pMapping, RTGCPTR GCPtrOldMapping); +int pgmMapResolveConflicts(PVM pVM); +PPGMMAPPING pgmGetMapping(PVM pVM, RTGCPTR GCPtr); +DECLCALLBACK(void) pgmR3MapInfo(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); +#endif /* !PGM_WITHOUT_MAPPINGS */ + +int pgmHandlerPhysicalExCreate(PVMCC pVM, PGMPHYSHANDLERTYPE hType, RTR3PTR pvUserR3, RTR0PTR pvUserR0, + RTRCPTR pvUserRC, R3PTRTYPE(const char *) pszDesc, PPGMPHYSHANDLER *ppPhysHandler); +int pgmHandlerPhysicalExDup(PVMCC pVM, PPGMPHYSHANDLER pPhysHandlerSrc, PPGMPHYSHANDLER *ppPhysHandler); +int pgmHandlerPhysicalExRegister(PVMCC pVM, PPGMPHYSHANDLER pPhysHandler, RTGCPHYS GCPhys, RTGCPHYS GCPhysLast); +int pgmHandlerPhysicalExDeregister(PVMCC pVM, PPGMPHYSHANDLER pPhysHandler, int fRestoreAsRAM); +int pgmHandlerPhysicalExDestroy(PVMCC pVM, PPGMPHYSHANDLER pHandler); +void pgmR3HandlerPhysicalUpdateAll(PVM pVM); +bool pgmHandlerPhysicalIsAll(PVMCC pVM, RTGCPHYS GCPhys); +void pgmHandlerPhysicalResetAliasedPage(PVMCC pVM, PPGMPAGE pPage, RTGCPHYS GCPhysPage, bool fDoAccounting); +DECLCALLBACK(void) pgmR3InfoHandlers(PVM pVM, PCDBGFINFOHLP pHlp, const char *pszArgs); +int pgmR3InitSavedState(PVM pVM, uint64_t cbRam); + +int pgmPhysAllocPage(PVMCC pVM, PPGMPAGE pPage, RTGCPHYS GCPhys); +int pgmPhysAllocLargePage(PVMCC pVM, RTGCPHYS GCPhys); +int pgmPhysRecheckLargePage(PVMCC pVM, RTGCPHYS GCPhys, PPGMPAGE pLargePage); +int pgmPhysPageLoadIntoTlb(PVMCC pVM, RTGCPHYS GCPhys); +int pgmPhysPageLoadIntoTlbWithPage(PVMCC pVM, PPGMPAGE pPage, RTGCPHYS GCPhys); +void pgmPhysPageMakeWriteMonitoredWritable(PVMCC pVM, PPGMPAGE pPage, RTGCPHYS GCPhys); +int pgmPhysPageMakeWritable(PVMCC pVM, PPGMPAGE pPage, RTGCPHYS GCPhys); +int pgmPhysPageMakeWritableAndMap(PVMCC pVM, PPGMPAGE pPage, RTGCPHYS GCPhys, void **ppv); +int pgmPhysPageMap(PVMCC pVM, PPGMPAGE pPage, RTGCPHYS GCPhys, void **ppv); +int pgmPhysPageMapReadOnly(PVMCC pVM, PPGMPAGE pPage, RTGCPHYS GCPhys, void const **ppv); +int pgmPhysPageMapByPageID(PVMCC pVM, uint32_t idPage, RTHCPHYS HCPhys, void **ppv); +int pgmPhysGCPhys2R3Ptr(PVMCC pVM, RTGCPHYS GCPhys, PRTR3PTR pR3Ptr); +int pgmPhysCr3ToHCPtr(PVM pVM, RTGCPHYS GCPhys, PRTR3PTR pR3Ptr); +int pgmPhysGCPhys2CCPtrInternalDepr(PVMCC pVM, PPGMPAGE pPage, RTGCPHYS GCPhys, void **ppv); +int pgmPhysGCPhys2CCPtrInternal(PVMCC pVM, PPGMPAGE pPage, RTGCPHYS GCPhys, void **ppv, PPGMPAGEMAPLOCK pLock); +int pgmPhysGCPhys2CCPtrInternalReadOnly(PVMCC pVM, PPGMPAGE pPage, RTGCPHYS GCPhys, const void **ppv, PPGMPAGEMAPLOCK pLock); +void pgmPhysReleaseInternalPageMappingLock(PVMCC pVM, PPGMPAGEMAPLOCK pLock); +PGM_ALL_CB2_PROTO(FNPGMPHYSHANDLER) pgmPhysRomWriteHandler; +#ifndef IN_RING3 +DECLEXPORT(FNPGMPHYSHANDLER) pgmPhysHandlerRedirectToHC; +DECLEXPORT(FNPGMRZPHYSPFHANDLER) pgmPhysPfHandlerRedirectToHC; +DECLEXPORT(FNPGMRZPHYSPFHANDLER) pgmPhysRomWritePfHandler; +#endif +int pgmPhysFreePage(PVM pVM, PGMMFREEPAGESREQ pReq, uint32_t *pcPendingPages, PPGMPAGE pPage, RTGCPHYS GCPhys, + PGMPAGETYPE enmNewType); +void pgmPhysInvalidRamRangeTlbs(PVMCC pVM); +void pgmPhysInvalidatePageMapTLB(PVMCC pVM); +void pgmPhysInvalidatePageMapTLBEntry(PVM pVM, RTGCPHYS GCPhys); +PPGMRAMRANGE pgmPhysGetRangeSlow(PVM pVM, RTGCPHYS GCPhys); +PPGMRAMRANGE pgmPhysGetRangeAtOrAboveSlow(PVM pVM, RTGCPHYS GCPhys); +PPGMPAGE pgmPhysGetPageSlow(PVM pVM, RTGCPHYS GCPhys); +int pgmPhysGetPageExSlow(PVM pVM, RTGCPHYS GCPhys, PPPGMPAGE ppPage); +int pgmPhysGetPageAndRangeExSlow(PVM pVM, RTGCPHYS GCPhys, PPPGMPAGE ppPage, PPGMRAMRANGE *ppRam); + +#ifdef IN_RING3 +void pgmR3PhysRelinkRamRanges(PVM pVM); +int pgmR3PhysRamPreAllocate(PVM pVM); +int pgmR3PhysRamReset(PVM pVM); +int pgmR3PhysRomReset(PVM pVM); +int pgmR3PhysRamZeroAll(PVM pVM); +int pgmR3PhysChunkMap(PVM pVM, uint32_t idChunk, PPPGMCHUNKR3MAP ppChunk); +int pgmR3PhysRamTerm(PVM pVM); +void pgmR3PhysRomTerm(PVM pVM); +void pgmR3PhysAssertSharedPageChecksums(PVM pVM); + +int pgmR3PoolInit(PVM pVM); +void pgmR3PoolRelocate(PVM pVM); +void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu); +void pgmR3PoolReset(PVM pVM); +void pgmR3PoolClearAll(PVM pVM, bool fFlushRemTlb); +DECLCALLBACK(VBOXSTRICTRC) pgmR3PoolClearAllRendezvous(PVM pVM, PVMCPU pVCpu, void *fpvFlushRemTbl); +void pgmR3PoolWriteProtectPages(PVM pVM); + +#endif /* IN_RING3 */ +#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 +int pgmRZDynMapHCPageCommon(PPGMMAPSET pSet, RTHCPHYS HCPhys, void **ppv RTLOG_COMMA_SRC_POS_DECL); +int pgmRZDynMapGCPageCommon(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys, void **ppv RTLOG_COMMA_SRC_POS_DECL); +# ifdef LOG_ENABLED +void pgmRZDynMapUnusedHint(PVMCPU pVCpu, void *pvHint, RT_SRC_POS_DECL); +# else +void pgmRZDynMapUnusedHint(PVMCPU pVCpu, void *pvHint); +# endif +#endif +int pgmPoolAlloc(PVMCC pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled, + uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage); +void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable); +void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable); +int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush = true /* DO NOT USE false UNLESS YOU KNOWN WHAT YOU'RE DOING!! */); +void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys); +PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys); +PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys); +int pgmPoolHCPhys2Ptr(PVM pVM, RTHCPHYS HCPhys, void **ppv); +int pgmPoolSyncCR3(PVMCPUCC pVCpu); +bool pgmPoolIsDirtyPageSlow(PVM pVM, RTGCPHYS GCPhys); +void pgmPoolInvalidateDirtyPage(PVMCC pVM, RTGCPHYS GCPhysPT); +int pgmPoolTrackUpdateGCPhys(PVMCC pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs); +void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte); +uint16_t pgmPoolTrackPhysExtAddref(PVMCC pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte); +void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPoolPage, PPGMPAGE pPhysPage, uint16_t iPte); +void pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage); +void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage); +PGM_ALL_CB2_PROTO(FNPGMPHYSHANDLER) pgmPoolAccessHandler; +#ifndef IN_RING3 +DECLEXPORT(FNPGMRZPHYSPFHANDLER) pgmRZPoolAccessPfHandler; +#endif + +void pgmPoolAddDirtyPage(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage); +void pgmPoolResetDirtyPages(PVMCC pVM); +void pgmPoolResetDirtyPage(PVM pVM, RTGCPTR GCPtrPage); + +int pgmR3ExitShadowModeBeforePoolFlush(PVMCPU pVCpu); +int pgmR3ReEnterShadowModeAfterPoolFlush(PVM pVM, PVMCPU pVCpu); +void pgmR3RefreshShadowModeAfterA20Change(PVMCPU pVCpu); + +#ifndef PGM_WITHOUT_MAPPINGS +void pgmMapSetShadowPDEs(PVM pVM, PPGMMAPPING pMap, unsigned iNewPDE); +void pgmMapClearShadowPDEs(PVM pVM, PPGMPOOLPAGE pShwPageCR3, PPGMMAPPING pMap, unsigned iOldPDE, bool fDeactivateCR3); +int pgmMapActivateCR3(PVM pVM, PPGMPOOLPAGE pShwPageCR3); +int pgmMapDeactivateCR3(PVM pVM, PPGMPOOLPAGE pShwPageCR3); +#endif + +int pgmShwMakePageSupervisorAndWritable(PVMCPUCC pVCpu, RTGCPTR GCPtr, bool fBigPage, uint32_t fOpFlags); +int pgmShwSyncPaePDPtr(PVMCPUCC pVCpu, RTGCPTR GCPtr, X86PGPAEUINT uGstPdpe, PX86PDPAE *ppPD); +int pgmShwSyncNestedPageLocked(PVMCPUCC pVCpu, RTGCPHYS GCPhysFault, uint32_t cPages, PGMMODE enmShwPagingMode); + +int pgmGstLazyMap32BitPD(PVMCPUCC pVCpu, PX86PD *ppPd); +int pgmGstLazyMapPaePDPT(PVMCPUCC pVCpu, PX86PDPT *ppPdpt); +int pgmGstLazyMapPaePD(PVMCPUCC pVCpu, uint32_t iPdpt, PX86PDPAE *ppPd); +int pgmGstLazyMapPml4(PVMCPUCC pVCpu, PX86PML4 *ppPml4); +int pgmGstPtWalk(PVMCPUCC pVCpu, RTGCPTR GCPtr, PPGMPTWALKGST pWalk); +int pgmGstPtWalkNext(PVMCPUCC pVCpu, RTGCPTR GCPtr, PPGMPTWALKGST pWalk); + +# if defined(VBOX_STRICT) && HC_ARCH_BITS == 64 && defined(IN_RING3) +FNDBGCCMD pgmR3CmdCheckDuplicatePages; +FNDBGCCMD pgmR3CmdShowSharedModules; +# endif + +void pgmLogState(PVM pVM); + +RT_C_DECLS_END + +/** @} */ + +#endif /* !VMM_INCLUDED_SRC_include_PGMInternal_h */ + diff --git a/src/VBox/VMM/include/SELMInternal.h b/src/VBox/VMM/include/SELMInternal.h new file mode 100644 index 00000000..f74a5019 --- /dev/null +++ b/src/VBox/VMM/include/SELMInternal.h @@ -0,0 +1,62 @@ +/* $Id: SELMInternal.h $ */ +/** @file + * SELM - Internal header file. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_SELMInternal_h +#define VMM_INCLUDED_SRC_include_SELMInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <VBox/cdefs.h> +#include <VBox/types.h> +#include <VBox/vmm/stam.h> +#include <VBox/vmm/cpum.h> +#include <VBox/vmm/pgm.h> +#include <VBox/log.h> +#include <iprt/x86.h> + + + +/** @defgroup grp_selm_int Internals + * @ingroup grp_selm + * @internal + * @{ + */ + +/** The number of GDTS allocated for our GDT. (full size) */ +#define SELM_GDT_ELEMENTS 8192 + + +/** + * SELM Data (part of VM) + * + * @note This is a very marginal component after kicking raw-mode. + */ +typedef struct SELM +{ +#ifdef VBOX_WITH_STATISTICS + STAMCOUNTER StatLoadHidSelGst; + STAMCOUNTER StatLoadHidSelShw; +#endif + STAMCOUNTER StatLoadHidSelReadErrors; + STAMCOUNTER StatLoadHidSelGstNoGood; +} SELM, *PSELM; + + +/** @} */ + +#endif /* !VMM_INCLUDED_SRC_include_SELMInternal_h */ diff --git a/src/VBox/VMM/include/SSMInternal.h b/src/VBox/VMM/include/SSMInternal.h new file mode 100644 index 00000000..d27bf340 --- /dev/null +++ b/src/VBox/VMM/include/SSMInternal.h @@ -0,0 +1,331 @@ +/* $Id: SSMInternal.h $ */ +/** @file + * SSM - Internal header file. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_SSMInternal_h +#define VMM_INCLUDED_SRC_include_SSMInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <VBox/cdefs.h> +#include <VBox/types.h> +#include <VBox/vmm/ssm.h> +#include <iprt/critsect.h> + +RT_C_DECLS_BEGIN + +/** @defgroup grp_ssm_int Internals + * @ingroup grp_ssm + * @internal + * @{ + */ + + +/** + * Data unit callback type. + */ +typedef enum SSMUNITTYPE +{ + /** PDM Device . */ + SSMUNITTYPE_DEV = 1, + /** PDM Driver. */ + SSMUNITTYPE_DRV, + /** PDM USB device. */ + SSMUNITTYPE_USB, + /** VM Internal. */ + SSMUNITTYPE_INTERNAL, + /** External Wrapper. */ + SSMUNITTYPE_EXTERNAL +} SSMUNITTYPE; + +/** Pointer to a data unit descriptor. */ +typedef struct SSMUNIT *PSSMUNIT; + +/** + * Data unit descriptor. + */ +typedef struct SSMUNIT +{ + /** Pointer ot the next one in the list. */ + PSSMUNIT pNext; + + /** Called in this save/load operation. + * The flag is used to determine whether there is need for a call to + * done or not. */ + bool fCalled; + /** Finished its live part. + * This is used to handle VERR_SSM_VOTE_FOR_GIVING_UP. */ + bool fDoneLive; + /** Callback interface type. */ + SSMUNITTYPE enmType; + /** Type specific data. */ + union + { + /** SSMUNITTYPE_DEV. */ + struct + { + /** Prepare live save. */ + PFNSSMDEVLIVEPREP pfnLivePrep; + /** Execute live save. */ + PFNSSMDEVLIVEEXEC pfnLiveExec; + /** Vote live save complete. */ + PFNSSMDEVLIVEVOTE pfnLiveVote; + /** Prepare save. */ + PFNSSMDEVSAVEPREP pfnSavePrep; + /** Execute save. */ + PFNSSMDEVSAVEEXEC pfnSaveExec; + /** Done save. */ + PFNSSMDEVSAVEDONE pfnSaveDone; + /** Prepare load. */ + PFNSSMDEVLOADPREP pfnLoadPrep; + /** Execute load. */ + PFNSSMDEVLOADEXEC pfnLoadExec; + /** Done load. */ + PFNSSMDEVLOADDONE pfnLoadDone; + /** Device instance. */ + PPDMDEVINS pDevIns; + } Dev; + + /** SSMUNITTYPE_DRV. */ + struct + { + /** Prepare live save. */ + PFNSSMDRVLIVEPREP pfnLivePrep; + /** Execute live save. */ + PFNSSMDRVLIVEEXEC pfnLiveExec; + /** Vote live save complete. */ + PFNSSMDRVLIVEVOTE pfnLiveVote; + /** Prepare save. */ + PFNSSMDRVSAVEPREP pfnSavePrep; + /** Execute save. */ + PFNSSMDRVSAVEEXEC pfnSaveExec; + /** Done save. */ + PFNSSMDRVSAVEDONE pfnSaveDone; + /** Prepare load. */ + PFNSSMDRVLOADPREP pfnLoadPrep; + /** Execute load. */ + PFNSSMDRVLOADEXEC pfnLoadExec; + /** Done load. */ + PFNSSMDRVLOADDONE pfnLoadDone; + /** Driver instance. */ + PPDMDRVINS pDrvIns; + } Drv; + + /** SSMUNITTYPE_USB. */ + struct + { + /** Prepare live save. */ + PFNSSMUSBLIVEPREP pfnLivePrep; + /** Execute live save. */ + PFNSSMUSBLIVEEXEC pfnLiveExec; + /** Vote live save complete. */ + PFNSSMUSBLIVEVOTE pfnLiveVote; + /** Prepare save. */ + PFNSSMUSBSAVEPREP pfnSavePrep; + /** Execute save. */ + PFNSSMUSBSAVEEXEC pfnSaveExec; + /** Done save. */ + PFNSSMUSBSAVEDONE pfnSaveDone; + /** Prepare load. */ + PFNSSMUSBLOADPREP pfnLoadPrep; + /** Execute load. */ + PFNSSMUSBLOADEXEC pfnLoadExec; + /** Done load. */ + PFNSSMUSBLOADDONE pfnLoadDone; + /** USB instance. */ + PPDMUSBINS pUsbIns; + } Usb; + + /** SSMUNITTYPE_INTERNAL. */ + struct + { + /** Prepare live save. */ + PFNSSMINTLIVEPREP pfnLivePrep; + /** Execute live save. */ + PFNSSMINTLIVEEXEC pfnLiveExec; + /** Vote live save complete. */ + PFNSSMINTLIVEVOTE pfnLiveVote; + /** Prepare save. */ + PFNSSMINTSAVEPREP pfnSavePrep; + /** Execute save. */ + PFNSSMINTSAVEEXEC pfnSaveExec; + /** Done save. */ + PFNSSMINTSAVEDONE pfnSaveDone; + /** Prepare load. */ + PFNSSMINTLOADPREP pfnLoadPrep; + /** Execute load. */ + PFNSSMINTLOADEXEC pfnLoadExec; + /** Done load. */ + PFNSSMINTLOADDONE pfnLoadDone; + } Internal; + + /** SSMUNITTYPE_EXTERNAL. */ + struct + { + /** Prepare live save. */ + PFNSSMEXTLIVEPREP pfnLivePrep; + /** Execute live save. */ + PFNSSMEXTLIVEEXEC pfnLiveExec; + /** Vote live save complete. */ + PFNSSMEXTLIVEVOTE pfnLiveVote; + /** Prepare save. */ + PFNSSMEXTSAVEPREP pfnSavePrep; + /** Execute save. */ + PFNSSMEXTSAVEEXEC pfnSaveExec; + /** Done save. */ + PFNSSMEXTSAVEDONE pfnSaveDone; + /** Prepare load. */ + PFNSSMEXTLOADPREP pfnLoadPrep; + /** Execute load. */ + PFNSSMEXTLOADEXEC pfnLoadExec; + /** Done load. */ + PFNSSMEXTLOADDONE pfnLoadDone; + /** User data. */ + void *pvUser; + } External; + + struct + { + /** Prepare live save. */ + PFNRT pfnLivePrep; + /** Execute live save. */ + PFNRT pfnLiveExec; + /** Vote live save complete. */ + PFNRT pfnLiveVote; + /** Prepare save. */ + PFNRT pfnSavePrep; + /** Execute save. */ + PFNRT pfnSaveExec; + /** Done save. */ + PFNRT pfnSaveDone; + /** Prepare load. */ + PFNRT pfnLoadPrep; + /** Execute load. */ + PFNRT pfnLoadExec; + /** Done load. */ + PFNRT pfnLoadDone; + /** User data. */ + void *pvKey; + } Common; + } u; + /** Data layout version. */ + uint32_t u32Version; + /** Instance number. */ + uint32_t u32Instance; + /** The offset of the final data unit. + * This is used for constructing the directory. */ + RTFOFF offStream; + /** Critical section to be taken before working any of the callbacks. */ + PPDMCRITSECT pCritSect; + /** The guessed size of the data unit - used only for progress indication. */ + size_t cbGuess; + /** Name size. (bytes) */ + size_t cchName; + /** Name of this unit. (extends beyond the defined size) */ + char szName[1]; +} SSMUNIT; + +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLivePrep, u.Dev.pfnLivePrep); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLiveExec, u.Dev.pfnLiveExec); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLiveVote, u.Dev.pfnLiveVote); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnSavePrep, u.Dev.pfnSavePrep); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnSaveExec, u.Dev.pfnSaveExec); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnSaveDone, u.Dev.pfnSaveDone); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLoadPrep, u.Dev.pfnLoadPrep); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLoadExec, u.Dev.pfnLoadExec); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLoadDone, u.Dev.pfnLoadDone); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pvKey, u.Dev.pDevIns); + +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLivePrep, u.Drv.pfnLivePrep); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLiveExec, u.Drv.pfnLiveExec); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLiveVote, u.Drv.pfnLiveVote); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnSavePrep, u.Drv.pfnSavePrep); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnSaveExec, u.Drv.pfnSaveExec); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnSaveDone, u.Drv.pfnSaveDone); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLoadPrep, u.Drv.pfnLoadPrep); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLoadExec, u.Drv.pfnLoadExec); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLoadDone, u.Drv.pfnLoadDone); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pvKey, u.Drv.pDrvIns); + +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLivePrep, u.Usb.pfnLivePrep); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLiveExec, u.Usb.pfnLiveExec); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLiveVote, u.Usb.pfnLiveVote); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnSavePrep, u.Usb.pfnSavePrep); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnSaveExec, u.Usb.pfnSaveExec); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnSaveDone, u.Usb.pfnSaveDone); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLoadPrep, u.Usb.pfnLoadPrep); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLoadExec, u.Usb.pfnLoadExec); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLoadDone, u.Usb.pfnLoadDone); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pvKey, u.Usb.pUsbIns); + +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLivePrep, u.Internal.pfnLivePrep); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLiveExec, u.Internal.pfnLiveExec); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLiveVote, u.Internal.pfnLiveVote); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnSavePrep, u.Internal.pfnSavePrep); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnSaveExec, u.Internal.pfnSaveExec); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnSaveDone, u.Internal.pfnSaveDone); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLoadPrep, u.Internal.pfnLoadPrep); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLoadExec, u.Internal.pfnLoadExec); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLoadDone, u.Internal.pfnLoadDone); + +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLivePrep, u.External.pfnLivePrep); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLiveExec, u.External.pfnLiveExec); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLiveVote, u.External.pfnLiveVote); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnSavePrep, u.External.pfnSavePrep); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnSaveExec, u.External.pfnSaveExec); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnSaveDone, u.External.pfnSaveDone); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLoadPrep, u.External.pfnLoadPrep); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLoadExec, u.External.pfnLoadExec); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pfnLoadDone, u.External.pfnLoadDone); +AssertCompile2MemberOffsets(SSMUNIT, u.Common.pvKey, u.External.pvUser); + + +/** + * SSM VM Instance data. + * Changes to this must checked against the padding of the cfgm union in VM! + * + * @todo Move this to UVM. + */ +typedef struct SSM +{ + /** Critical section for serializing cancellation (pSSM). */ + RTCRITSECT CancelCritSect; + /** The handle of the current save or load operation. + * This is used by SSMR3Cancel. */ + PSSMHANDLE volatile pSSM; + + /** FIFO of data entity descriptors. */ + R3PTRTYPE(PSSMUNIT) pHead; + /** The number of register units. */ + uint32_t cUnits; + /** For lazy init. */ + bool fInitialized; + /** Current pass (for STAM). */ + uint32_t uPass; + uint32_t u32Alignment; +} SSM; +/** Pointer to SSM VM instance data. */ +typedef SSM *PSSM; + + + +/** @} */ + +RT_C_DECLS_END + +#endif /* !VMM_INCLUDED_SRC_include_SSMInternal_h */ + diff --git a/src/VBox/VMM/include/STAMInternal.h b/src/VBox/VMM/include/STAMInternal.h new file mode 100644 index 00000000..53d73a32 --- /dev/null +++ b/src/VBox/VMM/include/STAMInternal.h @@ -0,0 +1,177 @@ +/* $Id: STAMInternal.h $ */ +/** @file + * STAM Internal Header. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_STAMInternal_h +#define VMM_INCLUDED_SRC_include_STAMInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <VBox/cdefs.h> +#include <VBox/types.h> +#include <VBox/vmm/stam.h> +#include <VBox/vmm/gvmm.h> +#include <VBox/vmm/gmm.h> +#include <iprt/list.h> +#include <iprt/semaphore.h> + + + +RT_C_DECLS_BEGIN + +/** @defgroup grp_stam_int Internals + * @ingroup grp_stam + * @internal + * @{ + */ + +/** Pointer to sample descriptor. */ +typedef struct STAMDESC *PSTAMDESC; +/** Pointer to a sample lookup node. */ +typedef struct STAMLOOKUP *PSTAMLOOKUP; + +/** + * Sample lookup node. + */ +typedef struct STAMLOOKUP +{ + /** The parent lookup record. This is NULL for the root node. */ + PSTAMLOOKUP pParent; + /** Array of children (using array for binary searching). */ + PSTAMLOOKUP *papChildren; + /** Pointer to the description node, if any. */ + PSTAMDESC pDesc; + /** Number of decentants with descriptors. (Use for freeing up sub-trees.) */ + uint32_t cDescsInTree; + /** The number of children. */ + uint16_t cChildren; + /** The index in the parent paChildren array. UINT16_MAX for the root node. */ + uint16_t iParent; + /** The path offset. */ + uint16_t off; + /** The size of the path component. */ + uint16_t cch; + /** The name (variable size). */ + char szName[1]; +} STAMLOOKUP; + + +/** + * Sample descriptor. + */ +typedef struct STAMDESC +{ + /** Our entry in the big linear list. */ + RTLISTNODE ListEntry; + /** Pointer to our lookup node. */ + PSTAMLOOKUP pLookup; + /** Sample name. */ + const char *pszName; + /** Sample type. */ + STAMTYPE enmType; + /** Visibility type. */ + STAMVISIBILITY enmVisibility; + /** Pointer to the sample data. */ + union STAMDESCSAMPLEDATA + { + /** Counter. */ + PSTAMCOUNTER pCounter; + /** Profile. */ + PSTAMPROFILE pProfile; + /** Advanced profile. */ + PSTAMPROFILEADV pProfileAdv; + /** Ratio, unsigned 32-bit. */ + PSTAMRATIOU32 pRatioU32; + /** unsigned 8-bit. */ + uint8_t *pu8; + /** unsigned 16-bit. */ + uint16_t *pu16; + /** unsigned 32-bit. */ + uint32_t *pu32; + /** unsigned 64-bit. */ + uint64_t *pu64; + /** Simple void pointer. */ + void *pv; + /** Boolean. */ + bool *pf; + /** */ + struct STAMDESCSAMPLEDATACALLBACKS + { + /** The same pointer. */ + void *pvSample; + /** Pointer to the reset callback. */ + PFNSTAMR3CALLBACKRESET pfnReset; + /** Pointer to the print callback. */ + PFNSTAMR3CALLBACKPRINT pfnPrint; + } Callback; + } u; + /** Unit. */ + STAMUNIT enmUnit; + /** The refresh group number (STAM_REFRESH_GRP_XXX). */ + uint8_t iRefreshGroup; + /** Description. */ + const char *pszDesc; +} STAMDESC; + + +/** + * STAM data kept in the UVM. + */ +typedef struct STAMUSERPERVM +{ + /** List of samples. */ + RTLISTANCHOR List; + /** Root of the lookup tree. */ + PSTAMLOOKUP pRoot; + + /** RW Lock for the list and tree. */ + RTSEMRW RWSem; + + /** The copy of the GVMM statistics. */ + GVMMSTATS GVMMStats; + /** The number of registered host CPU leaves. */ + uint32_t cRegisteredHostCpus; + + /** Explicit alignment padding. */ + uint32_t uAlignment; + /** The copy of the GMM statistics. */ + GMMSTATS GMMStats; +} STAMUSERPERVM; +#ifdef IN_RING3 +AssertCompileMemberAlignment(STAMUSERPERVM, GMMStats, 8); +#endif + +/** Pointer to the STAM data kept in the UVM. */ +typedef STAMUSERPERVM *PSTAMUSERPERVM; + + +/** Locks the sample descriptors for reading. */ +#define STAM_LOCK_RD(pUVM) do { int rcSem = RTSemRWRequestRead(pUVM->stam.s.RWSem, RT_INDEFINITE_WAIT); AssertRC(rcSem); } while (0) +/** Locks the sample descriptors for writing. */ +#define STAM_LOCK_WR(pUVM) do { int rcSem = RTSemRWRequestWrite(pUVM->stam.s.RWSem, RT_INDEFINITE_WAIT); AssertRC(rcSem); } while (0) +/** UnLocks the sample descriptors after reading. */ +#define STAM_UNLOCK_RD(pUVM) do { int rcSem = RTSemRWReleaseRead(pUVM->stam.s.RWSem); AssertRC(rcSem); } while (0) +/** UnLocks the sample descriptors after writing. */ +#define STAM_UNLOCK_WR(pUVM) do { int rcSem = RTSemRWReleaseWrite(pUVM->stam.s.RWSem); AssertRC(rcSem); } while (0) +/** Lazy initialization */ +#define STAM_LAZY_INIT(pUVM) do { } while (0) + +/** @} */ + +RT_C_DECLS_END + +#endif /* !VMM_INCLUDED_SRC_include_STAMInternal_h */ diff --git a/src/VBox/VMM/include/TMInline.h b/src/VBox/VMM/include/TMInline.h new file mode 100644 index 00000000..6d5951a8 --- /dev/null +++ b/src/VBox/VMM/include/TMInline.h @@ -0,0 +1,59 @@ +/* $Id: TMInline.h $ */ +/** @file + * TM - Common Inlined functions. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_TMInline_h +#define VMM_INCLUDED_SRC_include_TMInline_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + + +/** + * Used to unlink a timer from the active list. + * + * @param pQueue The timer queue. + * @param pTimer The timer that needs linking. + * + * @remarks Called while owning the relevant queue lock. + */ +DECL_FORCE_INLINE(void) tmTimerQueueUnlinkActive(PTMTIMERQUEUE pQueue, PTMTIMER pTimer) +{ +#ifdef VBOX_STRICT + TMTIMERSTATE const enmState = pTimer->enmState; + Assert( pTimer->enmClock == TMCLOCK_VIRTUAL_SYNC + ? enmState == TMTIMERSTATE_ACTIVE + : enmState == TMTIMERSTATE_PENDING_SCHEDULE || enmState == TMTIMERSTATE_PENDING_STOP_SCHEDULE); +#endif + + const PTMTIMER pPrev = TMTIMER_GET_PREV(pTimer); + const PTMTIMER pNext = TMTIMER_GET_NEXT(pTimer); + if (pPrev) + TMTIMER_SET_NEXT(pPrev, pNext); + else + { + TMTIMER_SET_HEAD(pQueue, pNext); + pQueue->u64Expire = pNext ? pNext->u64Expire : INT64_MAX; + DBGFTRACE_U64_TAG(pTimer->CTX_SUFF(pVM), pQueue->u64Expire, "tmTimerQueueUnlinkActive"); + } + if (pNext) + TMTIMER_SET_PREV(pNext, pPrev); + pTimer->offNext = 0; + pTimer->offPrev = 0; +} + +#endif /* !VMM_INCLUDED_SRC_include_TMInline_h */ + diff --git a/src/VBox/VMM/include/TMInternal.h b/src/VBox/VMM/include/TMInternal.h new file mode 100644 index 00000000..b04ed38a --- /dev/null +++ b/src/VBox/VMM/include/TMInternal.h @@ -0,0 +1,843 @@ +/* $Id: TMInternal.h $ */ +/** @file + * TM - Internal header file. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_TMInternal_h +#define VMM_INCLUDED_SRC_include_TMInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <VBox/cdefs.h> +#include <VBox/types.h> +#include <iprt/time.h> +#include <iprt/timer.h> +#include <iprt/assert.h> +#include <VBox/vmm/stam.h> +#include <VBox/vmm/pdmcritsect.h> + +RT_C_DECLS_BEGIN + + +/** @defgroup grp_tm_int Internal + * @ingroup grp_tm + * @internal + * @{ + */ + +/** Frequency of the real clock. */ +#define TMCLOCK_FREQ_REAL UINT32_C(1000) +/** Frequency of the virtual clock. */ +#define TMCLOCK_FREQ_VIRTUAL UINT32_C(1000000000) + + +/** + * Timer type. + */ +typedef enum TMTIMERTYPE +{ + /** Device timer. */ + TMTIMERTYPE_DEV = 1, + /** USB device timer. */ + TMTIMERTYPE_USB, + /** Driver timer. */ + TMTIMERTYPE_DRV, + /** Internal timer . */ + TMTIMERTYPE_INTERNAL, + /** External timer. */ + TMTIMERTYPE_EXTERNAL +} TMTIMERTYPE; + +/** + * Timer state + */ +typedef enum TMTIMERSTATE +{ + /** Timer is stopped. */ + TMTIMERSTATE_STOPPED = 1, + /** Timer is active. */ + TMTIMERSTATE_ACTIVE, + /** Timer is expired, getting expire and unlinking. */ + TMTIMERSTATE_EXPIRED_GET_UNLINK, + /** Timer is expired and is being delivered. */ + TMTIMERSTATE_EXPIRED_DELIVER, + + /** Timer is stopped but still in the active list. + * Currently in the ScheduleTimers list. */ + TMTIMERSTATE_PENDING_STOP, + /** Timer is stopped but needs unlinking from the ScheduleTimers list. + * Currently in the ScheduleTimers list. */ + TMTIMERSTATE_PENDING_STOP_SCHEDULE, + /** Timer is being modified and will soon be pending scheduling. + * Currently in the ScheduleTimers list. */ + TMTIMERSTATE_PENDING_SCHEDULE_SET_EXPIRE, + /** Timer is pending scheduling. + * Currently in the ScheduleTimers list. */ + TMTIMERSTATE_PENDING_SCHEDULE, + /** Timer is being modified and will soon be pending rescheduling. + * Currently in the ScheduleTimers list and the active list. */ + TMTIMERSTATE_PENDING_RESCHEDULE_SET_EXPIRE, + /** Timer is modified and is now pending rescheduling. + * Currently in the ScheduleTimers list and the active list. */ + TMTIMERSTATE_PENDING_RESCHEDULE, + /** Timer is being destroyed. */ + TMTIMERSTATE_DESTROY, + /** Timer is free. */ + TMTIMERSTATE_FREE +} TMTIMERSTATE; + +/** Predicate that returns true if the give state is pending scheduling or + * rescheduling of any kind. Will reference the argument more than once! */ +#define TMTIMERSTATE_IS_PENDING_SCHEDULING(enmState) \ + ( (enmState) <= TMTIMERSTATE_PENDING_RESCHEDULE \ + && (enmState) >= TMTIMERSTATE_PENDING_SCHEDULE_SET_EXPIRE) + + +/** + * Internal representation of a timer. + * + * For correct serialization (without the use of semaphores and + * other blocking/slow constructs) certain rules applies to updating + * this structure: + * - For thread other than EMT only u64Expire, enmState and pScheduleNext* + * are changeable. Everything else is out of bounds. + * - Updating of u64Expire timer can only happen in the TMTIMERSTATE_STOPPED + * and TMTIMERSTATE_PENDING_RESCHEDULING_SET_EXPIRE states. + * - Timers in the TMTIMERSTATE_EXPIRED state are only accessible from EMT. + * - Actual destruction of a timer can only be done at scheduling time. + */ +typedef struct TMTIMER +{ + /** Expire time. */ + volatile uint64_t u64Expire; + /** Clock to apply to u64Expire. */ + TMCLOCK enmClock; + /** Timer callback type. */ + TMTIMERTYPE enmType; + /** Type specific data. */ + union + { + /** TMTIMERTYPE_DEV. */ + struct + { + /** Callback. */ + R3PTRTYPE(PFNTMTIMERDEV) pfnTimer; + /** Device instance. */ + PPDMDEVINSR3 pDevIns; + } Dev; + + /** TMTIMERTYPE_DEV. */ + struct + { + /** Callback. */ + R3PTRTYPE(PFNTMTIMERUSB) pfnTimer; + /** USB device instance. */ + PPDMUSBINS pUsbIns; + } Usb; + + /** TMTIMERTYPE_DRV. */ + struct + { + /** Callback. */ + R3PTRTYPE(PFNTMTIMERDRV) pfnTimer; + /** Device instance. */ + R3PTRTYPE(PPDMDRVINS) pDrvIns; + } Drv; + + /** TMTIMERTYPE_INTERNAL. */ + struct + { + /** Callback. */ + R3PTRTYPE(PFNTMTIMERINT) pfnTimer; + } Internal; + + /** TMTIMERTYPE_EXTERNAL. */ + struct + { + /** Callback. */ + R3PTRTYPE(PFNTMTIMEREXT) pfnTimer; + } External; + } u; + + /** Timer state. */ + volatile TMTIMERSTATE enmState; + /** Timer relative offset to the next timer in the schedule list. */ + int32_t volatile offScheduleNext; + + /** Timer relative offset to the next timer in the chain. */ + int32_t offNext; + /** Timer relative offset to the previous timer in the chain. */ + int32_t offPrev; + + /** Pointer to the VM the timer belongs to - R3 Ptr. */ + PVMR3 pVMR3; + /** Pointer to the VM the timer belongs to - R0 Ptr. */ + R0PTRTYPE(PVMCC) pVMR0; + /** Pointer to the VM the timer belongs to - RC Ptr. */ + PVMRC pVMRC; + /** The timer frequency hint. This is 0 if not hint was given. */ + uint32_t volatile uHzHint; + + /** User argument. */ + RTR3PTR pvUser; + /** The critical section associated with the lock. */ + R3PTRTYPE(PPDMCRITSECT) pCritSect; + + /** Pointer to the next timer in the list of created or free timers. (TM::pTimers or TM::pFree) */ + PTMTIMERR3 pBigNext; + /** Pointer to the previous timer in the list of all created timers. (TM::pTimers) */ + PTMTIMERR3 pBigPrev; + /** Pointer to the timer description. */ + R3PTRTYPE(const char *) pszDesc; +#if HC_ARCH_BITS == 32 + uint32_t padding0; /**< pad structure to multiple of 8 bytes. */ +#endif +#ifdef VBOX_WITH_STATISTICS + STAMPROFILE StatTimer; + STAMPROFILE StatCritSectEnter; + STAMCOUNTER StatGet; + STAMCOUNTER StatSetAbsolute; + STAMCOUNTER StatSetRelative; + STAMCOUNTER StatStop; +#endif +} TMTIMER; +AssertCompileMemberSize(TMTIMER, enmState, sizeof(uint32_t)); + + +/** + * Updates a timer state in the correct atomic manner. + */ +#if 1 +# define TM_SET_STATE(pTimer, state) \ + ASMAtomicWriteU32((uint32_t volatile *)&(pTimer)->enmState, state) +#else +# define TM_SET_STATE(pTimer, state) \ + do { \ + uint32_t uOld1 = (pTimer)->enmState; \ + Log(("%s: %p: %d -> %d\n", __FUNCTION__, (pTimer), (pTimer)->enmState, state)); \ + uint32_t uOld2 = ASMAtomicXchgU32((uint32_t volatile *)&(pTimer)->enmState, state); \ + Assert(uOld1 == uOld2); \ + } while (0) +#endif + +/** + * Tries to updates a timer state in the correct atomic manner. + */ +#if 1 +# define TM_TRY_SET_STATE(pTimer, StateNew, StateOld, fRc) \ + (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)&(pTimer)->enmState, StateNew, StateOld) +#else +# define TM_TRY_SET_STATE(pTimer, StateNew, StateOld, fRc) \ + do { (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)&(pTimer)->enmState, StateNew, StateOld); \ + Log(("%s: %p: %d -> %d %RTbool\n", __FUNCTION__, (pTimer), StateOld, StateNew, fRc)); \ + } while (0) +#endif + +/** Get the previous timer. */ +#define TMTIMER_GET_PREV(pTimer) ((PTMTIMER)((pTimer)->offPrev ? (intptr_t)(pTimer) + (pTimer)->offPrev : 0)) +/** Get the next timer. */ +#define TMTIMER_GET_NEXT(pTimer) ((PTMTIMER)((pTimer)->offNext ? (intptr_t)(pTimer) + (pTimer)->offNext : 0)) +/** Set the previous timer link. */ +#define TMTIMER_SET_PREV(pTimer, pPrev) ((pTimer)->offPrev = (pPrev) ? (intptr_t)(pPrev) - (intptr_t)(pTimer) : 0) +/** Set the next timer link. */ +#define TMTIMER_SET_NEXT(pTimer, pNext) ((pTimer)->offNext = (pNext) ? (intptr_t)(pNext) - (intptr_t)(pTimer) : 0) + + +/** + * A timer queue. + * + * This is allocated on the hyper heap. + */ +typedef struct TMTIMERQUEUE +{ + /** The cached expire time for this queue. + * Updated by EMT when scheduling the queue or modifying the head timer. + * Assigned UINT64_MAX when there is no head timer. */ + uint64_t u64Expire; + /** Doubly linked list of active timers. + * + * When no scheduling is pending, this list is will be ordered by expire time (ascending). + * Access is serialized by only letting the emulation thread (EMT) do changes. + * + * The offset is relative to the queue structure. + */ + int32_t offActive; + /** List of timers pending scheduling of some kind. + * + * Timer stats allowed in the list are TMTIMERSTATE_PENDING_STOPPING, + * TMTIMERSTATE_PENDING_DESTRUCTION, TMTIMERSTATE_PENDING_STOPPING_DESTRUCTION, + * TMTIMERSTATE_PENDING_RESCHEDULING and TMTIMERSTATE_PENDING_SCHEDULE. + * + * The offset is relative to the queue structure. + */ + int32_t volatile offSchedule; + /** The clock for this queue. */ + TMCLOCK enmClock; + /** Pad the structure up to 32 bytes. */ + uint32_t au32Padding[3]; +} TMTIMERQUEUE; + +/** Pointer to a timer queue. */ +typedef TMTIMERQUEUE *PTMTIMERQUEUE; + +/** Get the head of the active timer list. */ +#define TMTIMER_GET_HEAD(pQueue) ((PTMTIMER)((pQueue)->offActive ? (intptr_t)(pQueue) + (pQueue)->offActive : 0)) +/** Set the head of the active timer list. */ +#define TMTIMER_SET_HEAD(pQueue, pHead) ((pQueue)->offActive = pHead ? (intptr_t)pHead - (intptr_t)(pQueue) : 0) + + +/** + * CPU load data set. + * Mainly used by tmR3CpuLoadTimer. + */ +typedef struct TMCPULOADSTATE +{ + /** The percent of the period spent executing guest code. */ + uint8_t cPctExecuting; + /** The percent of the period spent halted. */ + uint8_t cPctHalted; + /** The percent of the period spent on other things. */ + uint8_t cPctOther; + /** Explicit alignment padding */ + uint8_t au8Alignment[1]; + /** Index into aHistory of the current entry. */ + uint16_t volatile idxHistory; + /** Number of valid history entries before idxHistory. */ + uint16_t volatile cHistoryEntries; + + /** Previous cNsTotal value. */ + uint64_t cNsPrevTotal; + /** Previous cNsExecuting value. */ + uint64_t cNsPrevExecuting; + /** Previous cNsHalted value. */ + uint64_t cNsPrevHalted; + /** Data for the last 30 min (given an interval of 1 second). */ + struct + { + uint8_t cPctExecuting; + /** The percent of the period spent halted. */ + uint8_t cPctHalted; + /** The percent of the period spent on other things. */ + uint8_t cPctOther; + } aHistory[30*60]; +} TMCPULOADSTATE; +AssertCompileSizeAlignment(TMCPULOADSTATE, 8); +AssertCompileMemberAlignment(TMCPULOADSTATE, cNsPrevTotal, 8); +/** Pointer to a CPU load data set. */ +typedef TMCPULOADSTATE *PTMCPULOADSTATE; + + +/** + * TSC mode. + * + * The main modes of how TM implements the TSC clock (TMCLOCK_TSC). + */ +typedef enum TMTSCMODE +{ + /** The guest TSC is an emulated, virtual TSC. */ + TMTSCMODE_VIRT_TSC_EMULATED = 1, + /** The guest TSC is an offset of the real TSC. */ + TMTSCMODE_REAL_TSC_OFFSET, + /** The guest TSC is dynamically derived through emulating or offsetting. */ + TMTSCMODE_DYNAMIC, + /** The native API provides it. */ + TMTSCMODE_NATIVE_API +} TMTSCMODE; +AssertCompileSize(TMTSCMODE, sizeof(uint32_t)); + + +/** + * Converts a TM pointer into a VM pointer. + * @returns Pointer to the VM structure the TM is part of. + * @param pTM Pointer to TM instance data. + */ +#define TM2VM(pTM) ( (PVM)((char*)pTM - pTM->offVM) ) + + +/** + * TM VM Instance data. + * Changes to this must checked against the padding of the cfgm union in VM! + */ +typedef struct TM +{ + /** Offset to the VM structure. + * See TM2VM(). */ + RTUINT offVM; + + /** The current TSC mode of the VM. + * Config variable: Mode (string). */ + TMTSCMODE enmTSCMode; + /** The original TSC mode of the VM. */ + TMTSCMODE enmOriginalTSCMode; + /** Alignment padding. */ + uint32_t u32Alignment0; + /** Whether the TSC is tied to the execution of code. + * Config variable: TSCTiedToExecution (bool) */ + bool fTSCTiedToExecution; + /** Modifier for fTSCTiedToExecution which pauses the TSC while halting if true. + * Config variable: TSCNotTiedToHalt (bool) */ + bool fTSCNotTiedToHalt; + /** Whether TM TSC mode switching is allowed at runtime. */ + bool fTSCModeSwitchAllowed; + /** Whether the guest has enabled use of paravirtualized TSC. */ + bool fParavirtTscEnabled; + /** The ID of the virtual CPU that normally runs the timers. */ + VMCPUID idTimerCpu; + + /** The number of CPU clock ticks per second (TMCLOCK_TSC). + * Config variable: TSCTicksPerSecond (64-bit unsigned int) + * The config variable implies @c enmTSCMode would be + * TMTSCMODE_VIRT_TSC_EMULATED. */ + uint64_t cTSCTicksPerSecond; + /** The TSC difference introduced by pausing the VM. */ + uint64_t offTSCPause; + /** The TSC value when the last TSC was paused. */ + uint64_t u64LastPausedTSC; + /** CPU TSCs ticking indicator (one for each VCPU). */ + uint32_t volatile cTSCsTicking; + + /** Virtual time ticking enabled indicator (counter for each VCPU). (TMCLOCK_VIRTUAL) */ + uint32_t volatile cVirtualTicking; + /** Virtual time is not running at 100%. */ + bool fVirtualWarpDrive; + /** Virtual timer synchronous time ticking enabled indicator (bool). (TMCLOCK_VIRTUAL_SYNC) */ + bool volatile fVirtualSyncTicking; + /** Virtual timer synchronous time catch-up active. */ + bool volatile fVirtualSyncCatchUp; + /** Alignment padding. */ + bool afAlignment1[1]; + /** WarpDrive percentage. + * 100% is normal (fVirtualSyncNormal == true). When other than 100% we apply + * this percentage to the raw time source for the period it's been valid in, + * i.e. since u64VirtualWarpDriveStart. */ + uint32_t u32VirtualWarpDrivePercentage; + + /** The offset of the virtual clock relative to it's timesource. + * Only valid if fVirtualTicking is set. */ + uint64_t u64VirtualOffset; + /** The guest virtual time when fVirtualTicking is cleared. */ + uint64_t u64Virtual; + /** When the Warp drive was started or last adjusted. + * Only valid when fVirtualWarpDrive is set. */ + uint64_t u64VirtualWarpDriveStart; + /** The previously returned nano TS. + * This handles TSC drift on SMP systems and expired interval. + * This is a valid range u64NanoTS to u64NanoTS + 1000000000 (ie. 1sec). */ + uint64_t volatile u64VirtualRawPrev; + /** The ring-3 data structure for the RTTimeNanoTS workers used by tmVirtualGetRawNanoTS. */ + RTTIMENANOTSDATAR3 VirtualGetRawDataR3; + /** The ring-0 data structure for the RTTimeNanoTS workers used by tmVirtualGetRawNanoTS. */ + RTTIMENANOTSDATAR0 VirtualGetRawDataR0; + /** The ring-0 data structure for the RTTimeNanoTS workers used by tmVirtualGetRawNanoTS. */ + RTTIMENANOTSDATARC VirtualGetRawDataRC; + /** Pointer to the ring-3 tmVirtualGetRawNanoTS worker function. */ + R3PTRTYPE(PFNTIMENANOTSINTERNAL) pfnVirtualGetRawR3; + /** Pointer to the ring-0 tmVirtualGetRawNanoTS worker function. */ + R0PTRTYPE(PFNTIMENANOTSINTERNAL) pfnVirtualGetRawR0; + /** Pointer to the raw-mode tmVirtualGetRawNanoTS worker function. */ + RCPTRTYPE(PFNTIMENANOTSINTERNAL) pfnVirtualGetRawRC; + /** Alignment. */ + RTRCPTR AlignmentRCPtr; + /** The guest virtual timer synchronous time when fVirtualSyncTicking is cleared. + * When fVirtualSyncTicking is set it holds the last time returned to + * the guest (while the lock was held). */ + uint64_t volatile u64VirtualSync; + /** The offset of the timer synchronous virtual clock (TMCLOCK_VIRTUAL_SYNC) relative + * to the virtual clock (TMCLOCK_VIRTUAL). + * (This is accessed by the timer thread and must be updated atomically.) */ + uint64_t volatile offVirtualSync; + /** The offset into offVirtualSync that's been irrevocably given up by failed catch-up attempts. + * Thus the current lag is offVirtualSync - offVirtualSyncGivenUp. */ + uint64_t offVirtualSyncGivenUp; + /** The TMCLOCK_VIRTUAL at the previous TMVirtualGetSync call when catch-up is active. */ + uint64_t volatile u64VirtualSyncCatchUpPrev; + /** The current catch-up percentage. */ + uint32_t volatile u32VirtualSyncCatchUpPercentage; + /** How much slack when processing timers. */ + uint32_t u32VirtualSyncScheduleSlack; + /** When to stop catch-up. */ + uint64_t u64VirtualSyncCatchUpStopThreshold; + /** When to give up catch-up. */ + uint64_t u64VirtualSyncCatchUpGiveUpThreshold; +/** @def TM_MAX_CATCHUP_PERIODS + * The number of catchup rates. */ +#define TM_MAX_CATCHUP_PERIODS 10 + /** The aggressiveness of the catch-up relative to how far we've lagged behind. + * The idea is to have increasing catch-up percentage as the lag increases. */ + struct TMCATCHUPPERIOD + { + uint64_t u64Start; /**< When this period starts. (u64VirtualSyncOffset). */ + uint32_t u32Percentage; /**< The catch-up percent to apply. */ + uint32_t u32Alignment; /**< Structure alignment */ + } aVirtualSyncCatchUpPeriods[TM_MAX_CATCHUP_PERIODS]; + + /** The current max timer Hz hint. */ + uint32_t volatile uMaxHzHint; + /** Whether to recalulate the HzHint next time its queried. */ + bool volatile fHzHintNeedsUpdating; + /** Alignment */ + bool afAlignment2[3]; + /** @cfgm{/TM/HostHzMax, uint32_t, Hz, 0, UINT32_MAX, 20000} + * The max host Hz frequency hint returned by TMCalcHostTimerFrequency. */ + uint32_t cHostHzMax; + /** @cfgm{/TM/HostHzFudgeFactorTimerCpu, uint32_t, Hz, 0, UINT32_MAX, 111} + * The number of Hz TMCalcHostTimerFrequency adds for the timer CPU. */ + uint32_t cPctHostHzFudgeFactorTimerCpu; + /** @cfgm{/TM/HostHzFudgeFactorOtherCpu, uint32_t, Hz, 0, UINT32_MAX, 110} + * The number of Hz TMCalcHostTimerFrequency adds for the other CPUs. */ + uint32_t cPctHostHzFudgeFactorOtherCpu; + /** @cfgm{/TM/HostHzFudgeFactorCatchUp100, uint32_t, Hz, 0, UINT32_MAX, 300} + * The fudge factor (expressed in percent) that catch-up percentages below + * 100% is multiplied by. */ + uint32_t cPctHostHzFudgeFactorCatchUp100; + /** @cfgm{/TM/HostHzFudgeFactorCatchUp200, uint32_t, Hz, 0, UINT32_MAX, 250} + * The fudge factor (expressed in percent) that catch-up percentages + * 100%-199% is multiplied by. */ + uint32_t cPctHostHzFudgeFactorCatchUp200; + /** @cfgm{/TM/HostHzFudgeFactorCatchUp400, uint32_t, Hz, 0, UINT32_MAX, 200} + * The fudge factor (expressed in percent) that catch-up percentages + * 200%-399% is multiplied by. */ + uint32_t cPctHostHzFudgeFactorCatchUp400; + + /** The UTC offset in ns. + * This is *NOT* for converting UTC to local time. It is for converting real + * world UTC time to VM UTC time. This feature is indented for doing date + * testing of software and similar. + * @todo Implement warpdrive on UTC. */ + int64_t offUTC; + /** The last value TMR3UtcNow returned. */ + int64_t volatile nsLastUtcNow; + /** File to touch on UTC jump. */ + R3PTRTYPE(char *) pszUtcTouchFileOnJump; + /** Just to avoid dealing with 32-bit alignment trouble. */ + R3PTRTYPE(char *) pszAlignment2b; + + /** Timer queues for the different clock types - R3 Ptr */ + R3PTRTYPE(PTMTIMERQUEUE) paTimerQueuesR3; + /** Timer queues for the different clock types - R0 Ptr */ + R0PTRTYPE(PTMTIMERQUEUE) paTimerQueuesR0; + /** Timer queues for the different clock types - RC Ptr */ + RCPTRTYPE(PTMTIMERQUEUE) paTimerQueuesRC; + + /** Pointer to our RC mapping of the GIP. */ + RCPTRTYPE(void *) pvGIPRC; + /** Pointer to our R3 mapping of the GIP. */ + R3PTRTYPE(void *) pvGIPR3; + + /** Pointer to a singly linked list of free timers. + * This chain is using the TMTIMER::pBigNext members. + * Only accessible from the emulation thread. */ + PTMTIMERR3 pFree; + + /** Pointer to a doubly linked list of created timers. + * This chain is using the TMTIMER::pBigNext and TMTIMER::pBigPrev members. + * Only accessible from the emulation thread. */ + PTMTIMERR3 pCreated; + + /** The schedule timer timer handle (runtime timer). + * This timer will do frequent check on pending queue schedules and + * raise VM_FF_TIMER to pull EMTs attention to them. + */ + R3PTRTYPE(PRTTIMER) pTimer; + /** Interval in milliseconds of the pTimer timer. */ + uint32_t u32TimerMillies; + + /** Indicates that queues are being run. */ + bool volatile fRunningQueues; + /** Indicates that the virtual sync queue is being run. */ + bool volatile fRunningVirtualSyncQueue; + /** Alignment */ + bool afAlignment3[2]; + + /** Lock serializing access to the timer lists. */ + PDMCRITSECT TimerCritSect; + /** Lock serializing access to the VirtualSync clock and the associated + * timer queue. */ + PDMCRITSECT VirtualSyncLock; + + /** CPU load state for all the virtual CPUs (tmR3CpuLoadTimer). */ + TMCPULOADSTATE CpuLoad; + + /** TMR3TimerQueuesDo + * @{ */ + STAMPROFILE StatDoQueues; + STAMPROFILEADV aStatDoQueues[TMCLOCK_MAX]; + /** @} */ + /** tmSchedule + * @{ */ + STAMPROFILE StatScheduleOneRZ; + STAMPROFILE StatScheduleOneR3; + STAMCOUNTER StatScheduleSetFF; + STAMCOUNTER StatPostponedR3; + STAMCOUNTER StatPostponedRZ; + /** @} */ + /** Read the time + * @{ */ + STAMCOUNTER StatVirtualGet; + STAMCOUNTER StatVirtualGetSetFF; + STAMCOUNTER StatVirtualSyncGet; + STAMCOUNTER StatVirtualSyncGetAdjLast; + STAMCOUNTER StatVirtualSyncGetELoop; + STAMCOUNTER StatVirtualSyncGetExpired; + STAMCOUNTER StatVirtualSyncGetLockless; + STAMCOUNTER StatVirtualSyncGetLocked; + STAMCOUNTER StatVirtualSyncGetSetFF; + STAMCOUNTER StatVirtualPause; + STAMCOUNTER StatVirtualResume; + /** @} */ + /** TMTimerPoll + * @{ */ + STAMCOUNTER StatPoll; + STAMCOUNTER StatPollAlreadySet; + STAMCOUNTER StatPollELoop; + STAMCOUNTER StatPollMiss; + STAMCOUNTER StatPollRunning; + STAMCOUNTER StatPollSimple; + STAMCOUNTER StatPollVirtual; + STAMCOUNTER StatPollVirtualSync; + /** @} */ + /** TMTimerSet sans virtual sync timers. + * @{ */ + STAMCOUNTER StatTimerSet; + STAMCOUNTER StatTimerSetOpt; + STAMPROFILE StatTimerSetRZ; + STAMPROFILE StatTimerSetR3; + STAMCOUNTER StatTimerSetStStopped; + STAMCOUNTER StatTimerSetStExpDeliver; + STAMCOUNTER StatTimerSetStActive; + STAMCOUNTER StatTimerSetStPendStop; + STAMCOUNTER StatTimerSetStPendStopSched; + STAMCOUNTER StatTimerSetStPendSched; + STAMCOUNTER StatTimerSetStPendResched; + STAMCOUNTER StatTimerSetStOther; + /** @} */ + /** TMTimerSet on virtual sync timers. + * @{ */ + STAMCOUNTER StatTimerSetVs; + STAMPROFILE StatTimerSetVsRZ; + STAMPROFILE StatTimerSetVsR3; + STAMCOUNTER StatTimerSetVsStStopped; + STAMCOUNTER StatTimerSetVsStExpDeliver; + STAMCOUNTER StatTimerSetVsStActive; + /** @} */ + /** TMTimerSetRelative sans virtual sync timers + * @{ */ + STAMCOUNTER StatTimerSetRelative; + STAMPROFILE StatTimerSetRelativeRZ; + STAMPROFILE StatTimerSetRelativeR3; + STAMCOUNTER StatTimerSetRelativeOpt; + STAMCOUNTER StatTimerSetRelativeStStopped; + STAMCOUNTER StatTimerSetRelativeStExpDeliver; + STAMCOUNTER StatTimerSetRelativeStActive; + STAMCOUNTER StatTimerSetRelativeStPendStop; + STAMCOUNTER StatTimerSetRelativeStPendStopSched; + STAMCOUNTER StatTimerSetRelativeStPendSched; + STAMCOUNTER StatTimerSetRelativeStPendResched; + STAMCOUNTER StatTimerSetRelativeStOther; + /** @} */ + /** TMTimerSetRelative on virtual sync timers. + * @{ */ + STAMCOUNTER StatTimerSetRelativeVs; + STAMPROFILE StatTimerSetRelativeVsRZ; + STAMPROFILE StatTimerSetRelativeVsR3; + STAMCOUNTER StatTimerSetRelativeVsStStopped; + STAMCOUNTER StatTimerSetRelativeVsStExpDeliver; + STAMCOUNTER StatTimerSetRelativeVsStActive; + /** @} */ + /** TMTimerStop sans virtual sync. + * @{ */ + STAMPROFILE StatTimerStopRZ; + STAMPROFILE StatTimerStopR3; + /** @} */ + /** TMTimerStop on virtual sync timers. + * @{ */ + STAMPROFILE StatTimerStopVsRZ; + STAMPROFILE StatTimerStopVsR3; + /** @} */ + /** VirtualSync - Running and Catching Up + * @{ */ + STAMCOUNTER StatVirtualSyncRun; + STAMCOUNTER StatVirtualSyncRunRestart; + STAMPROFILE StatVirtualSyncRunSlack; + STAMCOUNTER StatVirtualSyncRunStop; + STAMCOUNTER StatVirtualSyncRunStoppedAlready; + STAMCOUNTER StatVirtualSyncGiveUp; + STAMCOUNTER StatVirtualSyncGiveUpBeforeStarting; + STAMPROFILEADV StatVirtualSyncCatchup; + STAMCOUNTER aStatVirtualSyncCatchupInitial[TM_MAX_CATCHUP_PERIODS]; + STAMCOUNTER aStatVirtualSyncCatchupAdjust[TM_MAX_CATCHUP_PERIODS]; + /** @} */ + /** TMR3VirtualSyncFF (non dedicated EMT). */ + STAMPROFILE StatVirtualSyncFF; + /** The timer callback. */ + STAMCOUNTER StatTimerCallbackSetFF; + STAMCOUNTER StatTimerCallback; + + /** Calls to TMCpuTickSet. */ + STAMCOUNTER StatTSCSet; + + /** TSC starts and stops. */ + STAMCOUNTER StatTSCPause; + STAMCOUNTER StatTSCResume; + + /** @name Reasons for refusing TSC offsetting in TMCpuTickCanUseRealTSC. + * @{ */ + STAMCOUNTER StatTSCNotFixed; + STAMCOUNTER StatTSCNotTicking; + STAMCOUNTER StatTSCCatchupLE010; + STAMCOUNTER StatTSCCatchupLE025; + STAMCOUNTER StatTSCCatchupLE100; + STAMCOUNTER StatTSCCatchupOther; + STAMCOUNTER StatTSCWarp; + STAMCOUNTER StatTSCUnderflow; + STAMCOUNTER StatTSCSyncNotTicking; + /** @} */ +} TM; +/** Pointer to TM VM instance data. */ +typedef TM *PTM; + +/** + * TM VMCPU Instance data. + * Changes to this must checked against the padding of the tm union in VM! + */ +typedef struct TMCPU +{ + /** Offset to the VMCPU structure. + * See TMCPU2VM(). */ + RTUINT offVMCPU; + + /** CPU timestamp ticking enabled indicator (bool). (RDTSC) */ + bool fTSCTicking; + bool afAlignment0[3]; /**< alignment padding */ + + /** The offset between the host tick (TSC/virtual depending on the TSC mode) and + * the guest tick. */ + uint64_t offTSCRawSrc; + + /** The guest TSC when fTicking is cleared. */ + uint64_t u64TSC; + + /** The last seen TSC by the guest. */ + uint64_t u64TSCLastSeen; + +#ifndef VBOX_WITHOUT_NS_ACCOUNTING + /** The nanosecond timestamp of the CPU start or resume. + * This is recalculated when the VM is started so that + * cNsTotal = RTTimeNanoTS() - u64NsTsStartCpu. */ + uint64_t u64NsTsStartTotal; + /** The nanosecond timestamp of the last start-execute notification. */ + uint64_t u64NsTsStartExecuting; + /** The nanosecond timestamp of the last start-halt notification. */ + uint64_t u64NsTsStartHalting; + /** The cNsXXX generation. */ + uint32_t volatile uTimesGen; + /** Explicit alignment padding. */ + uint32_t u32Alignment; + /** The number of nanoseconds total run time. + * @remarks This is updated when cNsExecuting and cNsHalted are updated. */ + uint64_t cNsTotal; + /** The number of nanoseconds spent executing. */ + uint64_t cNsExecuting; + /** The number of nanoseconds being halted. */ + uint64_t cNsHalted; + /** The number of nanoseconds spent on other things. + * @remarks This is updated when cNsExecuting and cNsHalted are updated. */ + uint64_t cNsOther; + /** The number of halts. */ + uint64_t cPeriodsHalted; + /** The number of guest execution runs. */ + uint64_t cPeriodsExecuting; +# if defined(VBOX_WITH_STATISTICS) || defined(VBOX_WITH_NS_ACCOUNTING_STATS) + /** Resettable version of cNsTotal. */ + STAMCOUNTER StatNsTotal; + /** Resettable version of cNsExecuting. */ + STAMPROFILE StatNsExecuting; + /** Long execution intervals. */ + STAMPROFILE StatNsExecLong; + /** Short execution intervals . */ + STAMPROFILE StatNsExecShort; + /** Tiny execution intervals . */ + STAMPROFILE StatNsExecTiny; + /** Resettable version of cNsHalted. */ + STAMPROFILE StatNsHalted; + /** Resettable version of cNsOther. */ + STAMPROFILE StatNsOther; +# endif + + /** CPU load state for this virtual CPU (tmR3CpuLoadTimer). */ + TMCPULOADSTATE CpuLoad; +#endif +} TMCPU; +/** Pointer to TM VMCPU instance data. */ +typedef TMCPU *PTMCPU; + +const char *tmTimerState(TMTIMERSTATE enmState); +void tmTimerQueueSchedule(PVM pVM, PTMTIMERQUEUE pQueue); +#ifdef VBOX_STRICT +void tmTimerQueuesSanityChecks(PVM pVM, const char *pszWhere); +#endif + +uint64_t tmR3CpuTickGetRawVirtualNoCheck(PVM pVM); +int tmCpuTickPause(PVMCPUCC pVCpu); +int tmCpuTickPauseLocked(PVMCC pVM, PVMCPUCC pVCpu); +int tmCpuTickResume(PVMCC pVM, PVMCPUCC pVCpu); +int tmCpuTickResumeLocked(PVMCC pVM, PVMCPUCC pVCpu); + +int tmVirtualPauseLocked(PVMCC pVM); +int tmVirtualResumeLocked(PVMCC pVM); +DECLCALLBACK(DECLEXPORT(void)) tmVirtualNanoTSBad(PRTTIMENANOTSDATA pData, uint64_t u64NanoTS, + uint64_t u64DeltaPrev, uint64_t u64PrevNanoTS); +DECLCALLBACK(DECLEXPORT(uint64_t)) tmVirtualNanoTSRediscover(PRTTIMENANOTSDATA pData); +DECLCALLBACK(DECLEXPORT(uint64_t)) tmVirtualNanoTSBadCpuIndex(PRTTIMENANOTSDATA pData, uint16_t idApic, + uint16_t iCpuSet, uint16_t iGipCpu); + +/** + * Try take the timer lock, wait in ring-3 return VERR_SEM_BUSY in R0/RC. + * + * @retval VINF_SUCCESS on success (always in ring-3). + * @retval VERR_SEM_BUSY in RC and R0 if the semaphore is busy. + * + * @param a_pVM Pointer to the VM. + * + * @remarks The virtual sync timer queue requires the virtual sync lock. + */ +#define TM_LOCK_TIMERS(a_pVM) PDMCritSectEnter(&(a_pVM)->tm.s.TimerCritSect, VERR_SEM_BUSY) + +/** + * Try take the timer lock, no waiting. + * + * @retval VINF_SUCCESS on success. + * @retval VERR_SEM_BUSY if busy. + * + * @param a_pVM Pointer to the VM. + * + * @remarks The virtual sync timer queue requires the virtual sync lock. + */ +#define TM_TRY_LOCK_TIMERS(a_pVM) PDMCritSectTryEnter(&(a_pVM)->tm.s.TimerCritSect) + +/** Lock the timers (sans the virtual sync queue). */ +#define TM_UNLOCK_TIMERS(a_pVM) do { PDMCritSectLeave(&(a_pVM)->tm.s.TimerCritSect); } while (0) + +/** Checks that the caller owns the timer lock. */ +#define TM_ASSERT_TIMER_LOCK_OWNERSHIP(a_pVM) \ + Assert(PDMCritSectIsOwner(&(a_pVM)->tm.s.TimerCritSect)) + +/** @} */ + +RT_C_DECLS_END + +#endif /* !VMM_INCLUDED_SRC_include_TMInternal_h */ + diff --git a/src/VBox/VMM/include/TRPMInternal.h b/src/VBox/VMM/include/TRPMInternal.h new file mode 100644 index 00000000..ad7c0282 --- /dev/null +++ b/src/VBox/VMM/include/TRPMInternal.h @@ -0,0 +1,96 @@ +/* $Id: TRPMInternal.h $ */ +/** @file + * TRPM - Internal header file. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_TRPMInternal_h +#define VMM_INCLUDED_SRC_include_TRPMInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <VBox/cdefs.h> +#include <VBox/types.h> +#include <VBox/vmm/stam.h> +#include <VBox/vmm/cpum.h> +#include <VBox/vmm/pgm.h> + +RT_C_DECLS_BEGIN + + +/** @defgroup grp_trpm_int Internals + * @ingroup grp_trpm + * @internal + * @{ + */ + +/** + * TRPM Data (part of VM) + * + * @note This used to be a big deal when we had raw-mode, now it's a dud. :-) + */ +typedef struct TRPM +{ +#ifdef VBOX_WITH_STATISTICS + /** Statistics for interrupt handlers (allocated on the hypervisor heap) - R3 + * pointer. */ + R3PTRTYPE(PSTAMCOUNTER) paStatForwardedIRQR3; +#endif + uint64_t u64Dummy; +} TRPM; + +/** Pointer to TRPM Data. */ +typedef TRPM *PTRPM; + + +/** + * Per CPU data for TRPM. + */ +typedef struct TRPMCPU +{ + /** Active Interrupt or trap vector number. + * If not UINT32_MAX this indicates that we're currently processing a + * interrupt, trap, fault, abort, whatever which have arrived at that + * vector number. + */ + uint32_t uActiveVector; + + /** Active trap type. */ + TRPMEVENT enmActiveType; + + /** Errorcode for the active interrupt/trap. */ + uint32_t uActiveErrorCode; + + /** Instruction length for software interrupts and software exceptions + * (\#BP, \#OF) */ + uint8_t cbInstr; + + /** Whether this \#DB trap is caused due to INT1/ICEBP. */ + bool fIcebp; + + /** CR2 at the time of the active exception. */ + RTGCUINTPTR uActiveCR2; +} TRPMCPU; + +/** Pointer to TRPMCPU Data. */ +typedef TRPMCPU *PTRPMCPU; +/** Pointer to const TRPMCPU Data. */ +typedef const TRPMCPU *PCTRPMCPU; + +/** @} */ + +RT_C_DECLS_END + +#endif /* !VMM_INCLUDED_SRC_include_TRPMInternal_h */ diff --git a/src/VBox/VMM/include/VMInternal.h b/src/VBox/VMM/include/VMInternal.h new file mode 100644 index 00000000..4dc9c063 --- /dev/null +++ b/src/VBox/VMM/include/VMInternal.h @@ -0,0 +1,485 @@ +/* $Id: VMInternal.h $ */ +/** @file + * VM - Internal header file. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_VMInternal_h +#define VMM_INCLUDED_SRC_include_VMInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <VBox/cdefs.h> +#include <VBox/vmm/vmapi.h> +#include <iprt/assert.h> +#include <iprt/critsect.h> +#include <setjmp.h> + + + +/** @defgroup grp_vm_int Internals + * @ingroup grp_vm + * @internal + * @{ + */ + + +/** + * VM state change callback. + */ +typedef struct VMATSTATE +{ + /** Pointer to the next one. */ + struct VMATSTATE *pNext; + /** Pointer to the callback. */ + PFNVMATSTATE pfnAtState; + /** The user argument. */ + void *pvUser; +} VMATSTATE; +/** Pointer to a VM state change callback. */ +typedef VMATSTATE *PVMATSTATE; + + +/** + * VM error callback. + */ +typedef struct VMATERROR +{ + /** Pointer to the next one. */ + struct VMATERROR *pNext; + /** Pointer to the callback. */ + PFNVMATERROR pfnAtError; + /** The user argument. */ + void *pvUser; +} VMATERROR; +/** Pointer to a VM error callback. */ +typedef VMATERROR *PVMATERROR; + + +/** + * Chunk of memory allocated off the hypervisor heap in which + * we copy the error details. + */ +typedef struct VMERROR +{ + /** The size of the chunk. */ + uint32_t cbAllocated; + /** The current offset into the chunk. + * We start by putting the filename and function immediately + * after the end of the buffer. */ + uint32_t off; + /** Offset from the start of this structure to the file name. */ + uint32_t offFile; + /** The line number. */ + uint32_t iLine; + /** Offset from the start of this structure to the function name. */ + uint32_t offFunction; + /** Offset from the start of this structure to the formatted message text. */ + uint32_t offMessage; + /** The VBox status code. */ + int32_t rc; +} VMERROR, *PVMERROR; + + +/** + * VM runtime error callback. + */ +typedef struct VMATRUNTIMEERROR +{ + /** Pointer to the next one. */ + struct VMATRUNTIMEERROR *pNext; + /** Pointer to the callback. */ + PFNVMATRUNTIMEERROR pfnAtRuntimeError; + /** The user argument. */ + void *pvUser; +} VMATRUNTIMEERROR; +/** Pointer to a VM error callback. */ +typedef VMATRUNTIMEERROR *PVMATRUNTIMEERROR; + + +/** + * Chunk of memory allocated off the hypervisor heap in which + * we copy the runtime error details. + */ +typedef struct VMRUNTIMEERROR +{ + /** The size of the chunk. */ + uint32_t cbAllocated; + /** The current offset into the chunk. + * We start by putting the error ID immediately + * after the end of the buffer. */ + uint32_t off; + /** Offset from the start of this structure to the error ID. */ + uint32_t offErrorId; + /** Offset from the start of this structure to the formatted message text. */ + uint32_t offMessage; + /** Error flags. */ + uint32_t fFlags; +} VMRUNTIMEERROR, *PVMRUNTIMEERROR; + +/** The halt method. */ +typedef enum +{ + /** The usual invalid value. */ + VMHALTMETHOD_INVALID = 0, + /** Use the method used during bootstrapping. */ + VMHALTMETHOD_BOOTSTRAP, + /** Use the default method. */ + VMHALTMETHOD_DEFAULT, + /** The old spin/yield/block method. */ + VMHALTMETHOD_OLD, + /** The first go at a block/spin method. */ + VMHALTMETHOD_1, + /** The first go at a more global approach. */ + VMHALTMETHOD_GLOBAL_1, + /** The end of valid methods. (not inclusive of course) */ + VMHALTMETHOD_END, + /** The usual 32-bit max value. */ + VMHALTMETHOD_32BIT_HACK = 0x7fffffff +} VMHALTMETHOD; + + +/** + * VM Internal Data (part of the VM structure). + * + * @todo Move this and all related things to VMM. The VM component was, to some + * extent at least, a bad ad hoc design which should all have been put in + * VMM. @see pg_vm. + */ +typedef struct VMINT +{ + /** VM Error Message. */ + R3PTRTYPE(PVMERROR) pErrorR3; + /** VM Runtime Error Message. */ + R3PTRTYPE(PVMRUNTIMEERROR) pRuntimeErrorR3; + /** The VM was/is-being teleported and has not yet been fully resumed. */ + bool fTeleportedAndNotFullyResumedYet; + /** The VM should power off instead of reset. */ + bool fPowerOffInsteadOfReset; + /** Reset counter (soft + hard). */ + uint32_t cResets; + /** Hard reset counter. */ + uint32_t cHardResets; + /** Soft reset counter. */ + uint32_t cSoftResets; +} VMINT; +/** Pointer to the VM Internal Data (part of the VM structure). */ +typedef VMINT *PVMINT; + + +#ifdef IN_RING3 + +/** + * VM internal data kept in the UVM. + */ +typedef struct VMINTUSERPERVM +{ + /** Head of the standard request queue. Atomic. */ + volatile PVMREQ pNormalReqs; + /** Head of the priority request queue. Atomic. */ + volatile PVMREQ pPriorityReqs; + /** The last index used during alloc/free. */ + volatile uint32_t iReqFree; + /** Number of free request packets. */ + volatile uint32_t cReqFree; + /** Array of pointers to lists of free request packets. Atomic. */ + volatile PVMREQ apReqFree[16 - (HC_ARCH_BITS == 32 ? 5 : 4)]; + + /** The reference count of the UVM handle. */ + volatile uint32_t cUvmRefs; + + /** Number of active EMTs. */ + volatile uint32_t cActiveEmts; + +# ifdef VBOX_WITH_STATISTICS +# if HC_ARCH_BITS == 32 + uint32_t uPadding; +# endif + /** Number of VMR3ReqAlloc returning a new packet. */ + STAMCOUNTER StatReqAllocNew; + /** Number of VMR3ReqAlloc causing races. */ + STAMCOUNTER StatReqAllocRaces; + /** Number of VMR3ReqAlloc returning a recycled packet. */ + STAMCOUNTER StatReqAllocRecycled; + /** Number of VMR3ReqFree calls. */ + STAMCOUNTER StatReqFree; + /** Number of times the request was actually freed. */ + STAMCOUNTER StatReqFreeOverflow; + /** Number of requests served. */ + STAMCOUNTER StatReqProcessed; + /** Number of times there are more than one request and the others needed to be + * pushed back onto the list. */ + STAMCOUNTER StatReqMoreThan1; + /** Number of times we've raced someone when pushing the other requests back + * onto the list. */ + STAMCOUNTER StatReqPushBackRaces; +# endif + + /** Pointer to the support library session. + * Mainly for creation and destruction. */ + PSUPDRVSESSION pSession; + + /** Force EMT to terminate. */ + bool volatile fTerminateEMT; + + /** Critical section for pAtState and enmPrevVMState. */ + RTCRITSECT AtStateCritSect; + /** List of registered state change callbacks. */ + PVMATSTATE pAtState; + /** List of registered state change callbacks. */ + PVMATSTATE *ppAtStateNext; + /** The previous VM state. + * This is mainly used for the 'Resetting' state, but may come in handy later + * and when debugging. */ + VMSTATE enmPrevVMState; + + /** Reason for the most recent suspend operation. */ + VMSUSPENDREASON enmSuspendReason; + /** Reason for the most recent operation. */ + VMRESUMEREASON enmResumeReason; + + /** Critical section for pAtError and pAtRuntimeError. */ + RTCRITSECT AtErrorCritSect; + + /** List of registered error callbacks. */ + PVMATERROR pAtError; + /** List of registered error callbacks. */ + PVMATERROR *ppAtErrorNext; + /** The error message count. + * This is incremented every time an error is raised. */ + uint32_t volatile cErrors; + + /** The runtime error message count. + * This is incremented every time a runtime error is raised. */ + uint32_t volatile cRuntimeErrors; + /** List of registered error callbacks. */ + PVMATRUNTIMEERROR pAtRuntimeError; + /** List of registered error callbacks. */ + PVMATRUNTIMEERROR *ppAtRuntimeErrorNext; + + /** @name Generic Halt data + * @{ + */ + /** The current halt method. + * Can be selected by CFGM option 'VM/HaltMethod'. */ + VMHALTMETHOD enmHaltMethod; + /** The index into g_aHaltMethods of the current halt method. */ + uint32_t volatile iHaltMethod; + /** @} */ + + /** @todo Do NOT add new members here or reuse the current, we need to store the config for + * each halt method separately because we're racing on SMP guest rigs. */ + union + { + /** + * Method 1 & 2 - Block whenever possible, and when lagging behind + * switch to spinning with regular blocking every 5-200ms (defaults) + * depending on the accumulated lag. The blocking interval is adjusted + * with the average oversleeping of the last 64 times. + * + * The difference between 1 and 2 is that we use native absolute + * time APIs for the blocking instead of the millisecond based IPRT + * interface. + */ + struct + { + /** The max interval without blocking (when spinning). */ + uint32_t u32MinBlockIntervalCfg; + /** The minimum interval between blocking (when spinning). */ + uint32_t u32MaxBlockIntervalCfg; + /** The value to divide the current lag by to get the raw blocking interval (when spinning). */ + uint32_t u32LagBlockIntervalDivisorCfg; + /** When to start spinning (lag / nano secs). */ + uint32_t u32StartSpinningCfg; + /** When to stop spinning (lag / nano secs). */ + uint32_t u32StopSpinningCfg; + } Method12; + + /** + * The GVMM manages halted and waiting EMTs. + */ + struct + { + /** The threshold between spinning and blocking. */ + uint32_t cNsSpinBlockThresholdCfg; + } Global1; + } Halt; + + /** Pointer to the DBGC instance data. */ + void *pvDBGC; + + /** TLS index for the VMINTUSERPERVMCPU pointer. */ + RTTLS idxTLS; + + /** The VM name. (Set after the config constructure has been called.) */ + char *pszName; + /** The VM UUID. (Set after the config constructure has been called.) */ + RTUUID Uuid; +} VMINTUSERPERVM; +# ifdef VBOX_WITH_STATISTICS +AssertCompileMemberAlignment(VMINTUSERPERVM, StatReqAllocNew, 8); +# endif + +/** Pointer to the VM internal data kept in the UVM. */ +typedef VMINTUSERPERVM *PVMINTUSERPERVM; + + +/** + * VMCPU internal data kept in the UVM. + * + * Almost a copy of VMINTUSERPERVM. Separate data properly later on. + */ +typedef struct VMINTUSERPERVMCPU +{ + /** Head of the normal request queue. Atomic. */ + volatile PVMREQ pNormalReqs; + /** Head of the priority request queue. Atomic. */ + volatile PVMREQ pPriorityReqs; + + /** The handle to the EMT thread. */ + RTTHREAD ThreadEMT; + /** The native of the EMT thread. */ + RTNATIVETHREAD NativeThreadEMT; + /** Wait event semaphore. */ + RTSEMEVENT EventSemWait; + /** Wait/Idle indicator. */ + bool volatile fWait; + /** Set if we've been thru vmR3Destroy and decremented the active EMT count + * already. */ + bool volatile fBeenThruVmDestroy; + /** Align the next bit. */ + bool afAlignment[HC_ARCH_BITS == 32 ? 2 : 6]; + + /** @name Generic Halt data + * @{ + */ + /** The average time (ns) between two halts in the last second. (updated once per second) */ + uint32_t HaltInterval; + /** The average halt frequency for the last second. (updated once per second) */ + uint32_t HaltFrequency; + /** The number of halts in the current period. */ + uint32_t cHalts; + uint32_t padding; /**< alignment padding. */ + /** When we started counting halts in cHalts (RTTimeNanoTS). */ + uint64_t u64HaltsStartTS; + /** @} */ + + /** Union containing data and config for the different halt algorithms. */ + union + { + /** + * Method 1 & 2 - Block whenever possible, and when lagging behind + * switch to spinning with regular blocking every 5-200ms (defaults) + * depending on the accumulated lag. The blocking interval is adjusted + * with the average oversleeping of the last 64 times. + * + * The difference between 1 and 2 is that we use native absolute + * time APIs for the blocking instead of the millisecond based IPRT + * interface. + */ + struct + { + /** How many times we've blocked while cBlockedNS and cBlockedTooLongNS has been accumulating. */ + uint32_t cBlocks; + /** Align the next member. */ + uint32_t u32Alignment; + /** Avg. time spend oversleeping when blocking. (Re-calculated every so often.) */ + uint64_t cNSBlockedTooLongAvg; + /** Total time spend oversleeping when blocking. */ + uint64_t cNSBlockedTooLong; + /** Total time spent blocking. */ + uint64_t cNSBlocked; + /** The timestamp (RTTimeNanoTS) of the last block. */ + uint64_t u64LastBlockTS; + + /** When we started spinning relentlessly in order to catch up some of the oversleeping. + * This is 0 when we're not spinning. */ + uint64_t u64StartSpinTS; + } Method12; + +# if 0 + /** + * Method 3 & 4 - Same as method 1 & 2 respectivly, except that we + * sprinkle it with yields. + */ + struct + { + /** How many times we've blocked while cBlockedNS and cBlockedTooLongNS has been accumulating. */ + uint32_t cBlocks; + /** Avg. time spend oversleeping when blocking. (Re-calculated every so often.) */ + uint64_t cBlockedTooLongNSAvg; + /** Total time spend oversleeping when blocking. */ + uint64_t cBlockedTooLongNS; + /** Total time spent blocking. */ + uint64_t cBlockedNS; + /** The timestamp (RTTimeNanoTS) of the last block. */ + uint64_t u64LastBlockTS; + + /** How many times we've yielded while cBlockedNS and cBlockedTooLongNS has been accumulating. */ + uint32_t cYields; + /** Avg. time spend oversleeping when yielding. */ + uint32_t cYieldTooLongNSAvg; + /** Total time spend oversleeping when yielding. */ + uint64_t cYieldTooLongNS; + /** Total time spent yielding. */ + uint64_t cYieldedNS; + /** The timestamp (RTTimeNanoTS) of the last block. */ + uint64_t u64LastYieldTS; + + /** When we started spinning relentlessly in order to catch up some of the oversleeping. */ + uint64_t u64StartSpinTS; + } Method34; +# endif + } Halt; + + /** Profiling the halted state; yielding vs blocking. + * @{ */ + STAMPROFILE StatHaltYield; + STAMPROFILE StatHaltBlock; + STAMPROFILE StatHaltBlockOverslept; + STAMPROFILE StatHaltBlockInsomnia; + STAMPROFILE StatHaltBlockOnTime; + STAMPROFILE StatHaltTimers; + STAMPROFILE StatHaltPoll; + /** @} */ +} VMINTUSERPERVMCPU; +AssertCompileMemberAlignment(VMINTUSERPERVMCPU, u64HaltsStartTS, 8); +AssertCompileMemberAlignment(VMINTUSERPERVMCPU, Halt.Method12.cNSBlockedTooLongAvg, 8); +AssertCompileMemberAlignment(VMINTUSERPERVMCPU, StatHaltYield, 8); + +/** Pointer to the VM internal data kept in the UVM. */ +typedef VMINTUSERPERVMCPU *PVMINTUSERPERVMCPU; + +#endif /* IN_RING3 */ + +RT_C_DECLS_BEGIN + +DECLCALLBACK(int) vmR3EmulationThread(RTTHREAD ThreadSelf, void *pvArg); +int vmR3SetHaltMethodU(PUVM pUVM, VMHALTMETHOD enmHaltMethod); +DECLCALLBACK(int) vmR3Destroy(PVM pVM); +DECLCALLBACK(void) vmR3SetErrorUV(PUVM pUVM, int rc, RT_SRC_POS_DECL, const char *pszFormat, va_list *args); +void vmSetErrorCopy(PVM pVM, int rc, RT_SRC_POS_DECL, const char *pszFormat, va_list args); +DECLCALLBACK(int) vmR3SetRuntimeError(PVM pVM, uint32_t fFlags, const char *pszErrorId, char *pszMessage); +DECLCALLBACK(int) vmR3SetRuntimeErrorV(PVM pVM, uint32_t fFlags, const char *pszErrorId, const char *pszFormat, va_list *pVa); +void vmSetRuntimeErrorCopy(PVM pVM, uint32_t fFlags, const char *pszErrorId, const char *pszFormat, va_list va); +void vmR3SetTerminated(PVM pVM); + +RT_C_DECLS_END + + +/** @} */ + +#endif /* !VMM_INCLUDED_SRC_include_VMInternal_h */ + diff --git a/src/VBox/VMM/include/VMMInternal.h b/src/VBox/VMM/include/VMMInternal.h new file mode 100644 index 00000000..aa3ee80c --- /dev/null +++ b/src/VBox/VMM/include/VMMInternal.h @@ -0,0 +1,589 @@ +/* $Id: VMMInternal.h $ */ +/** @file + * VMM - Internal header file. + */ + +/* + * Copyright (C) 2006-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_VMMInternal_h +#define VMM_INCLUDED_SRC_include_VMMInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#include <VBox/cdefs.h> +#include <VBox/sup.h> +#include <VBox/vmm/stam.h> +#include <VBox/vmm/vmm.h> +#include <VBox/log.h> +#include <iprt/critsect.h> + +#if !defined(IN_VMM_R3) && !defined(IN_VMM_R0) && !defined(IN_VMM_RC) +# error "Not in VMM! This is an internal header!" +#endif +#if HC_ARCH_BITS == 32 +# error "32-bit hosts are no longer supported. Go back to 6.0 or earlier!" +#endif + + + +/** @defgroup grp_vmm_int Internals + * @ingroup grp_vmm + * @internal + * @{ + */ + +/** @def VBOX_WITH_RC_RELEASE_LOGGING + * Enables RC release logging. */ +#define VBOX_WITH_RC_RELEASE_LOGGING + +/** @def VBOX_WITH_R0_LOGGING + * Enables Ring-0 logging (non-release). + * + * Ring-0 logging isn't 100% safe yet (thread id reuse / process exit cleanup), + * so you have to sign up here by adding your defined(DEBUG_<userid>) to the + * \#if, or by adding VBOX_WITH_R0_LOGGING to your LocalConfig.kmk. + */ +#if defined(DEBUG_sandervl) || defined(DEBUG_frank) || defined(DEBUG_ramshankar) || defined(DOXYGEN_RUNNING) +# define VBOX_WITH_R0_LOGGING +#endif + +/** @def VBOX_STRICT_VMM_STACK + * Enables VMM stack guard pages to catch stack over- and underruns. */ +#if defined(VBOX_STRICT) || defined(DOXYGEN_RUNNING) +# define VBOX_STRICT_VMM_STACK +#endif + + +/** + * The ring-0 logger instance wrapper. + * + * We need to be able to find the VM handle from the logger instance, so we wrap + * it in this structure. + */ +typedef struct VMMR0LOGGER +{ + /** Pointer to Pointer to the VM. */ + R0PTRTYPE(PVMCC) pVM; + /** Size of the allocated logger instance (Logger). */ + uint32_t cbLogger; + /** Flag indicating whether we've create the logger Ring-0 instance yet. */ + bool fCreated; + /** Flag indicating whether we've disabled flushing (world switch) or not. */ + bool fFlushingDisabled; + /** Flag indicating whether we've registered the instance already. */ + bool fRegistered; + bool a8Alignment; + /** The CPU ID. */ + VMCPUID idCpu; +#if HC_ARCH_BITS == 64 + uint32_t u32Alignment; +#endif + /** The ring-0 logger instance. This extends beyond the size. */ + RTLOGGER Logger; +} VMMR0LOGGER; +/** Pointer to a ring-0 logger instance wrapper. */ +typedef VMMR0LOGGER *PVMMR0LOGGER; + + +/** + * Jump buffer for the setjmp/longjmp like constructs used to + * quickly 'call' back into Ring-3. + */ +typedef struct VMMR0JMPBUF +{ + /** Traditional jmp_buf stuff + * @{ */ +#if HC_ARCH_BITS == 32 + uint32_t ebx; + uint32_t esi; + uint32_t edi; + uint32_t ebp; + uint32_t esp; + uint32_t eip; + uint32_t eflags; +#endif +#if HC_ARCH_BITS == 64 + uint64_t rbx; +# ifdef RT_OS_WINDOWS + uint64_t rsi; + uint64_t rdi; +# endif + uint64_t rbp; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + uint64_t rsp; + uint64_t rip; +# ifdef RT_OS_WINDOWS + uint128_t xmm6; + uint128_t xmm7; + uint128_t xmm8; + uint128_t xmm9; + uint128_t xmm10; + uint128_t xmm11; + uint128_t xmm12; + uint128_t xmm13; + uint128_t xmm14; + uint128_t xmm15; +# endif + uint64_t rflags; +#endif + /** @} */ + + /** Flag that indicates that we've done a ring-3 call. */ + bool fInRing3Call; + /** The number of bytes we've saved. */ + uint32_t cbSavedStack; + /** Pointer to the buffer used to save the stack. + * This is assumed to be 8KB. */ + RTR0PTR pvSavedStack; + /** Esp we we match against esp on resume to make sure the stack wasn't relocated. */ + RTHCUINTREG SpCheck; + /** The esp we should resume execution with after the restore. */ + RTHCUINTREG SpResume; + /** ESP/RSP at the time of the jump to ring 3. */ + RTHCUINTREG SavedEsp; + /** EBP/RBP at the time of the jump to ring 3. */ + RTHCUINTREG SavedEbp; + /** EIP/RIP within vmmR0CallRing3LongJmp for assisting unwinding. */ + RTHCUINTREG SavedEipForUnwind; + /** Unwind: The vmmR0CallRing3SetJmp return address value. */ + RTHCUINTREG UnwindRetPcValue; + /** Unwind: The vmmR0CallRing3SetJmp return address stack location. */ + RTHCUINTREG UnwindRetPcLocation; +#if HC_ARCH_BITS == 32 + /** Alignment padding. */ + uint32_t uPadding; +#endif + + /** Stats: Max amount of stack used. */ + uint32_t cbUsedMax; + /** Stats: Average stack usage. (Avg = cbUsedTotal / cUsedTotal) */ + uint32_t cbUsedAvg; + /** Stats: Total amount of stack used. */ + uint64_t cbUsedTotal; + /** Stats: Number of stack usages. */ + uint64_t cUsedTotal; +} VMMR0JMPBUF; +/** Pointer to a ring-0 jump buffer. */ +typedef VMMR0JMPBUF *PVMMR0JMPBUF; + + +/** + * VMM Data (part of VM) + */ +typedef struct VMM +{ + /** Whether we should use the periodic preemption timers. */ + bool fUsePeriodicPreemptionTimers; + /** Alignment padding. */ + bool afPadding0[7]; + + /** The EMT yield timer. */ + PTMTIMERR3 pYieldTimer; + /** The period to the next timeout when suspended or stopped. + * This is 0 when running. */ + uint32_t cYieldResumeMillies; + /** The EMT yield timer interval (milliseconds). */ + uint32_t cYieldEveryMillies; + /** The timestamp of the previous yield. (nano) */ + uint64_t u64LastYield; + + /** @name EMT Rendezvous + * @{ */ + /** Semaphore to wait on upon entering ordered execution. */ + R3PTRTYPE(PRTSEMEVENT) pahEvtRendezvousEnterOrdered; + /** Semaphore to wait on upon entering for one-by-one execution. */ + RTSEMEVENT hEvtRendezvousEnterOneByOne; + /** Semaphore to wait on upon entering for all-at-once execution. */ + RTSEMEVENTMULTI hEvtMulRendezvousEnterAllAtOnce; + /** Semaphore to wait on when done. */ + RTSEMEVENTMULTI hEvtMulRendezvousDone; + /** Semaphore the VMMR3EmtRendezvous caller waits on at the end. */ + RTSEMEVENT hEvtRendezvousDoneCaller; + /** Semaphore to wait on upon recursing. */ + RTSEMEVENTMULTI hEvtMulRendezvousRecursionPush; + /** Semaphore to wait on after done with recursion (caller restoring state). */ + RTSEMEVENTMULTI hEvtMulRendezvousRecursionPop; + /** Semaphore the initiator waits on while the EMTs are getting into position + * on hEvtMulRendezvousRecursionPush. */ + RTSEMEVENT hEvtRendezvousRecursionPushCaller; + /** Semaphore the initiator waits on while the EMTs sitting on + * hEvtMulRendezvousRecursionPop wakes up and leave. */ + RTSEMEVENT hEvtRendezvousRecursionPopCaller; + /** Callback. */ + R3PTRTYPE(PFNVMMEMTRENDEZVOUS) volatile pfnRendezvous; + /** The user argument for the callback. */ + RTR3PTR volatile pvRendezvousUser; + /** Flags. */ + volatile uint32_t fRendezvousFlags; + /** The number of EMTs that has entered. */ + volatile uint32_t cRendezvousEmtsEntered; + /** The number of EMTs that has done their job. */ + volatile uint32_t cRendezvousEmtsDone; + /** The number of EMTs that has returned. */ + volatile uint32_t cRendezvousEmtsReturned; + /** The status code. */ + volatile int32_t i32RendezvousStatus; + /** Spin lock. */ + volatile uint32_t u32RendezvousLock; + /** The recursion depth. */ + volatile uint32_t cRendezvousRecursions; + /** The number of EMTs that have entered the recursion routine. */ + volatile uint32_t cRendezvousEmtsRecursingPush; + /** The number of EMTs that have leaft the recursion routine. */ + volatile uint32_t cRendezvousEmtsRecursingPop; + /** Triggers rendezvous recursion in the other threads. */ + volatile bool fRendezvousRecursion; + + /** @} */ + + /** RTThreadPreemptIsPendingTrusty() result, set by vmmR0InitVM() for + * release logging purposes. */ + bool fIsPreemptPendingApiTrusty : 1; + /** The RTThreadPreemptIsPossible() result, set by vmmR0InitVM() for + * release logging purposes. */ + bool fIsPreemptPossible : 1; + + bool afAlignment2[2]; /**< Alignment padding. */ + + /** Buffer for storing the standard assertion message for a ring-0 assertion. + * Used for saving the assertion message text for the release log and guru + * meditation dump. */ + char szRing0AssertMsg1[512]; + /** Buffer for storing the custom message for a ring-0 assertion. */ + char szRing0AssertMsg2[256]; + + /** Number of VMMR0_DO_HM_RUN or VMMR0_DO_NEM_RUN calls. */ + STAMCOUNTER StatRunGC; + + /** Statistics for each of the RC/R0 return codes. + * @{ */ + STAMCOUNTER StatRZRetNormal; + STAMCOUNTER StatRZRetInterrupt; + STAMCOUNTER StatRZRetInterruptHyper; + STAMCOUNTER StatRZRetGuestTrap; + STAMCOUNTER StatRZRetRingSwitch; + STAMCOUNTER StatRZRetRingSwitchInt; + STAMCOUNTER StatRZRetStaleSelector; + STAMCOUNTER StatRZRetIRETTrap; + STAMCOUNTER StatRZRetEmulate; + STAMCOUNTER StatRZRetPatchEmulate; + STAMCOUNTER StatRZRetIORead; + STAMCOUNTER StatRZRetIOWrite; + STAMCOUNTER StatRZRetIOCommitWrite; + STAMCOUNTER StatRZRetMMIORead; + STAMCOUNTER StatRZRetMMIOWrite; + STAMCOUNTER StatRZRetMMIOCommitWrite; + STAMCOUNTER StatRZRetMMIOPatchRead; + STAMCOUNTER StatRZRetMMIOPatchWrite; + STAMCOUNTER StatRZRetMMIOReadWrite; + STAMCOUNTER StatRZRetMSRRead; + STAMCOUNTER StatRZRetMSRWrite; + STAMCOUNTER StatRZRetLDTFault; + STAMCOUNTER StatRZRetGDTFault; + STAMCOUNTER StatRZRetIDTFault; + STAMCOUNTER StatRZRetTSSFault; + STAMCOUNTER StatRZRetCSAMTask; + STAMCOUNTER StatRZRetSyncCR3; + STAMCOUNTER StatRZRetMisc; + STAMCOUNTER StatRZRetPatchInt3; + STAMCOUNTER StatRZRetPatchPF; + STAMCOUNTER StatRZRetPatchGP; + STAMCOUNTER StatRZRetPatchIretIRQ; + STAMCOUNTER StatRZRetRescheduleREM; + STAMCOUNTER StatRZRetToR3Total; + STAMCOUNTER StatRZRetToR3FF; + STAMCOUNTER StatRZRetToR3Unknown; + STAMCOUNTER StatRZRetToR3TMVirt; + STAMCOUNTER StatRZRetToR3HandyPages; + STAMCOUNTER StatRZRetToR3PDMQueues; + STAMCOUNTER StatRZRetToR3Rendezvous; + STAMCOUNTER StatRZRetToR3Timer; + STAMCOUNTER StatRZRetToR3DMA; + STAMCOUNTER StatRZRetToR3CritSect; + STAMCOUNTER StatRZRetToR3Iem; + STAMCOUNTER StatRZRetToR3Iom; + STAMCOUNTER StatRZRetTimerPending; + STAMCOUNTER StatRZRetInterruptPending; + STAMCOUNTER StatRZRetCallRing3; + STAMCOUNTER StatRZRetPATMDuplicateFn; + STAMCOUNTER StatRZRetPGMChangeMode; + STAMCOUNTER StatRZRetPendingRequest; + STAMCOUNTER StatRZRetPGMFlushPending; + STAMCOUNTER StatRZRetPatchTPR; + STAMCOUNTER StatRZCallPDMCritSectEnter; + STAMCOUNTER StatRZCallPDMLock; + STAMCOUNTER StatRZCallLogFlush; + STAMCOUNTER StatRZCallPGMPoolGrow; + STAMCOUNTER StatRZCallPGMMapChunk; + STAMCOUNTER StatRZCallPGMAllocHandy; + STAMCOUNTER StatRZCallVMSetError; + STAMCOUNTER StatRZCallVMSetRuntimeError; + STAMCOUNTER StatRZCallPGMLock; + /** @} */ +} VMM; +/** Pointer to VMM. */ +typedef VMM *PVMM; + + +/** + * VMMCPU Data (part of VMCPU) + */ +typedef struct VMMCPU +{ + /** The last RC/R0 return code. */ + int32_t iLastGZRc; + /** Alignment padding. */ + uint32_t u32Padding0; + + /** VMM stack, pointer to the top of the stack in R3. + * Stack is allocated from the hypervisor heap and is page aligned + * and always writable in RC. */ + R3PTRTYPE(uint8_t *) pbEMTStackR3; + + /** Pointer to the R0 logger instance - R3 Ptr. + * This is NULL if logging is disabled. */ + R3PTRTYPE(PVMMR0LOGGER) pR0LoggerR3; + /** Pointer to the R0 logger instance - R0 Ptr. + * This is NULL if logging is disabled. */ + R0PTRTYPE(PVMMR0LOGGER) pR0LoggerR0; + + /** Pointer to the R0 release logger instance - R3 Ptr. + * This is NULL if logging is disabled. */ + R3PTRTYPE(PVMMR0LOGGER) pR0RelLoggerR3; + /** Pointer to the R0 release instance - R0 Ptr. + * This is NULL if logging is disabled. */ + R0PTRTYPE(PVMMR0LOGGER) pR0RelLoggerR0; + + /** Thread context switching hook (ring-0). */ + RTTHREADCTXHOOK hCtxHook; + + /** @name Rendezvous + * @{ */ + /** Whether the EMT is executing a rendezvous right now. For detecting + * attempts at recursive rendezvous. */ + bool volatile fInRendezvous; + bool afPadding1[10]; + /** @} */ + + /** Whether we can HLT in VMMR0 rather than having to return to EM. + * Updated by vmR3SetHaltMethodU(). */ + bool fMayHaltInRing0; + /** The minimum delta for which we can HLT in ring-0 for. + * The deadlines we can calculate are from TM, so, if it's too close + * we should just return to ring-3 and run the timer wheel, no point + * in spinning in ring-0. + * Updated by vmR3SetHaltMethodU(). */ + uint32_t cNsSpinBlockThreshold; + /** Number of ring-0 halts (used for depreciating following values). */ + uint32_t cR0Halts; + /** Number of ring-0 halts succeeding (VINF_SUCCESS) recently. */ + uint32_t cR0HaltsSucceeded; + /** Number of ring-0 halts failing (VINF_EM_HALT) recently. */ + uint32_t cR0HaltsToRing3; + /** Padding */ + uint32_t u32Padding2; + + /** @name Raw-mode context tracing data. + * @{ */ + SUPDRVTRACERUSRCTX TracerCtx; + /** @} */ + + /** Alignment padding, making sure u64CallRing3Arg and CallRing3JmpBufR0 are nicely aligned. */ + uint32_t au32Padding3[1]; + + /** @name Call Ring-3 + * Formerly known as host calls. + * @{ */ + /** The disable counter. */ + uint32_t cCallRing3Disabled; + /** The pending operation. */ + VMMCALLRING3 enmCallRing3Operation; + /** The result of the last operation. */ + int32_t rcCallRing3; + /** The argument to the operation. */ + uint64_t u64CallRing3Arg; + /** The Ring-0 notification callback. */ + R0PTRTYPE(PFNVMMR0CALLRING3NOTIFICATION) pfnCallRing3CallbackR0; + /** The Ring-0 notification callback user argument. */ + R0PTRTYPE(void *) pvCallRing3CallbackUserR0; + /** The Ring-0 jmp buffer. + * @remarks The size of this type isn't stable in assembly, so don't put + * anything that needs to be accessed from assembly after it. */ + VMMR0JMPBUF CallRing3JmpBufR0; + /** @} */ + + STAMPROFILE StatR0HaltBlock; + STAMPROFILE StatR0HaltBlockOnTime; + STAMPROFILE StatR0HaltBlockOverslept; + STAMPROFILE StatR0HaltBlockInsomnia; + STAMCOUNTER StatR0HaltExec; + STAMCOUNTER StatR0HaltExecFromBlock; + STAMCOUNTER StatR0HaltExecFromSpin; + STAMCOUNTER StatR0HaltToR3; + STAMCOUNTER StatR0HaltToR3FromSpin; + STAMCOUNTER StatR0HaltToR3Other; + STAMCOUNTER StatR0HaltToR3PendingFF; + STAMCOUNTER StatR0HaltToR3SmallDelta; + STAMCOUNTER StatR0HaltToR3PostNoInt; + STAMCOUNTER StatR0HaltToR3PostPendingFF; +} VMMCPU; +AssertCompileMemberAlignment(VMMCPU, TracerCtx, 8); +/** Pointer to VMMCPU. */ +typedef VMMCPU *PVMMCPU; + + + +RT_C_DECLS_BEGIN + +int vmmInitFormatTypes(void); +void vmmTermFormatTypes(void); +uint32_t vmmGetBuildType(void); + +#ifdef IN_RING3 +int vmmR3SwitcherInit(PVM pVM); +void vmmR3SwitcherRelocate(PVM pVM, RTGCINTPTR offDelta); +#endif /* IN_RING3 */ + +#ifdef IN_RING0 + +/** + * World switcher assembly routine. + * It will call VMMRCEntry(). + * + * @returns return code from VMMRCEntry(). + * @param pVM The cross context VM structure. + * @param uArg See VMMRCEntry(). + * @internal + */ +DECLASM(int) vmmR0WorldSwitch(PVM pVM, unsigned uArg); + +/** + * Callback function for vmmR0CallRing3SetJmp. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + */ +typedef DECLCALLBACK(int) FNVMMR0SETJMP(PVMCC pVM, PVMCPUCC pVCpu); +/** Pointer to FNVMMR0SETJMP(). */ +typedef FNVMMR0SETJMP *PFNVMMR0SETJMP; + +/** + * The setjmp variant used for calling Ring-3. + * + * This differs from the normal setjmp in that it will resume VMMRZCallRing3 if we're + * in the middle of a ring-3 call. Another differences is the function pointer and + * argument. This has to do with resuming code and the stack frame of the caller. + * + * @returns VINF_SUCCESS on success or whatever is passed to vmmR0CallRing3LongJmp. + * @param pJmpBuf The jmp_buf to set. + * @param pfn The function to be called when not resuming. + * @param pVM The cross context VM structure. + * @param pVCpu The cross context virtual CPU structure of the calling EMT. + */ +DECLASM(int) vmmR0CallRing3SetJmp(PVMMR0JMPBUF pJmpBuf, PFNVMMR0SETJMP pfn, PVM pVM, PVMCPU pVCpu); + + +/** + * Callback function for vmmR0CallRing3SetJmp2. + * + * @returns VBox status code. + * @param pvUser The user argument. + */ +typedef DECLCALLBACK(int) FNVMMR0SETJMP2(PGVM pGVM, VMCPUID idCpu); +/** Pointer to FNVMMR0SETJMP2(). */ +typedef FNVMMR0SETJMP2 *PFNVMMR0SETJMP2; + +/** + * Same as vmmR0CallRing3SetJmp except for the function signature. + * + * @returns VINF_SUCCESS on success or whatever is passed to vmmR0CallRing3LongJmp. + * @param pJmpBuf The jmp_buf to set. + * @param pfn The function to be called when not resuming. + * @param pGVM The ring-0 VM structure. + * @param idCpu The ID of the calling EMT. + */ +DECLASM(int) vmmR0CallRing3SetJmp2(PVMMR0JMPBUF pJmpBuf, PFNVMMR0SETJMP2 pfn, PGVM pGVM, VMCPUID idCpu); + + +/** + * Callback function for vmmR0CallRing3SetJmpEx. + * + * @returns VBox status code. + * @param pvUser The user argument. + */ +typedef DECLCALLBACK(int) FNVMMR0SETJMPEX(void *pvUser); +/** Pointer to FNVMMR0SETJMPEX(). */ +typedef FNVMMR0SETJMPEX *PFNVMMR0SETJMPEX; + +/** + * Same as vmmR0CallRing3SetJmp except for the function signature. + * + * @returns VINF_SUCCESS on success or whatever is passed to vmmR0CallRing3LongJmp. + * @param pJmpBuf The jmp_buf to set. + * @param pfn The function to be called when not resuming. + * @param pvUser The argument of that function. + */ +DECLASM(int) vmmR0CallRing3SetJmpEx(PVMMR0JMPBUF pJmpBuf, PFNVMMR0SETJMPEX pfn, void *pvUser); + + +/** + * Worker for VMMRZCallRing3. + * This will save the stack and registers. + * + * @returns rc. + * @param pJmpBuf Pointer to the jump buffer. + * @param rc The return code. + */ +DECLASM(int) vmmR0CallRing3LongJmp(PVMMR0JMPBUF pJmpBuf, int rc); + +/** + * Internal R0 logger worker: Logger wrapper. + */ +VMMR0DECL(void) vmmR0LoggerWrapper(const char *pszFormat, ...); + +/** + * Internal R0 logger worker: Flush logger. + * + * @param pLogger The logger instance to flush. + * @remark This function must be exported! + */ +VMMR0DECL(void) vmmR0LoggerFlush(PRTLOGGER pLogger); + +/** + * Internal R0 logger worker: Custom prefix. + * + * @returns Number of chars written. + * + * @param pLogger The logger instance. + * @param pchBuf The output buffer. + * @param cchBuf The size of the buffer. + * @param pvUser User argument (ignored). + */ +VMMR0DECL(size_t) vmmR0LoggerPrefix(PRTLOGGER pLogger, char *pchBuf, size_t cchBuf, void *pvUser); + +# ifdef VBOX_WITH_TRIPLE_FAULT_HACK +int vmmR0TripleFaultHackInit(void); +void vmmR0TripleFaultHackTerm(void); +# endif + +#endif /* IN_RING0 */ + +RT_C_DECLS_END + +/** @} */ + +#endif /* !VMM_INCLUDED_SRC_include_VMMInternal_h */ diff --git a/src/VBox/VMM/include/VMMInternal.mac b/src/VBox/VMM/include/VMMInternal.mac new file mode 100644 index 00000000..10b64c78 --- /dev/null +++ b/src/VBox/VMM/include/VMMInternal.mac @@ -0,0 +1,141 @@ +; $Id: VMMInternal.mac $ +;; @file +; VMM - Internal header file. +; + +; +; Copyright (C) 2006-2020 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; + +%include "VBox/asmdefs.mac" +%include "VBox/sup.mac" + +; +; Determine the default stack switching unless specified explicitly. +; +%ifndef VMM_R0_SWITCH_STACK + %ifndef VMM_R0_NO_SWITCH_STACK + %ifdef RT_OS_DARWIN + %define VMM_R0_SWITCH_STACK + %endif + %endif +%endif + + +struc VMMR0JMPBUF +%ifdef RT_ARCH_X86 + ; traditional jmp_buf + .ebx resd 1 + .esi resd 1 + .edi resd 1 + .ebp resd 1 + .esp resd 1 + .eip resd 1 + .eflags resd 1 + + ; additional state and stack info. + .fInRing3Call resd 1 + .cbSavedStack resd 1 + .pvSavedStack resd 1 + .SpCheck resd 1 + .SpResume resd 1 + .SavedEsp resd 1 + .SavedEbp resd 1 + .SavedEipForUnwind resd 1 + .UnwindRetPcValue resd 1 + .UnwindRetPcLocation resd 1 +%endif +%ifdef RT_ARCH_AMD64 + ; traditional jmp_buf + .rbx resq 1 + %ifdef RT_OS_WINDOWS + .rsi resq 1 + .rdi resq 1 + %endif + .rbp resq 1 + .r12 resq 1 + .r13 resq 1 + .r14 resq 1 + .r15 resq 1 + .rsp resq 1 + .rip resq 1 + %ifdef RT_OS_WINDOWS + .xmm6 resq 2 + .xmm7 resq 2 + .xmm8 resq 2 + .xmm9 resq 2 + .xmm10 resq 2 + .xmm11 resq 2 + .xmm12 resq 2 + .xmm13 resq 2 + .xmm14 resq 2 + .xmm15 resq 2 + %endif + .rflags resq 1 + + ; additional state and stack info. + .fInRing3Call resd 1 + .cbSavedStack resd 1 + .pvSavedStack resq 1 + .SpCheck resq 1 + .SpResume resq 1 + .SavedEsp resq 1 + .SavedEbp resq 1 + .SavedEipForUnwind resq 1 + .UnwindRetPcValue resq 1 + .UnwindRetPcLocation resq 1 +%endif + + ; Statistics + alignb 8 + .cbUsedMax resd 1 + .cbUsedAvg resd 1 + .cbUsedTotal resq 1 + .cUsedTotal resq 1 +endstruc + + +struc VMMCPU + + .iLastGZRc resd 1 + alignb 8 + .pbEMTStackR3 RTR3PTR_RES 1 + + .pR0LoggerR3 RTR3PTR_RES 1 + .pR0LoggerR0 RTR0PTR_RES 1 + .pR0RelLoggerR3 RTR3PTR_RES 1 + .pR0RelLoggerR0 RTR0PTR_RES 1 + + .hCtxHook RTR0PTR_RES 1 + + .fInRendezvous resb 1 + .afPadding1 resb 10 + .fMayHaltInRing0 resb 1 + .cNsSpinBlockThreshold resd 1 + .cR0Halts resd 1 + .cR0HaltsSucceeded resd 1 + .cR0HaltsToRing3 resd 1 + + alignb 8 + .TracerCtx resb SUPDRVTRACERUSRCTX64_size + + .au32Padding3 resd 1 + + .cCallRing3Disabled resd 1 + .enmCallRing3Operation resd 1 + .rcCallRing3 resd 1 + alignb 8 + .u64CallRing3Arg resq 1 + .pfnCallRing3CallbackR0 RTR0PTR_RES 1 + .pvCallRing3CallbackUserR0 RTR0PTR_RES 1 + ; .CallRing3JmpBufR0 resb no-can-do +endstruc + diff --git a/src/VBox/VMM/include/VMMTracing.h b/src/VBox/VMM/include/VMMTracing.h new file mode 100644 index 00000000..f07b4f89 --- /dev/null +++ b/src/VBox/VMM/include/VMMTracing.h @@ -0,0 +1,126 @@ +/* $Id: VMMTracing.h $ */ +/** @file + * VBoxVMM - Trace point macros for the VMM. + */ + +/* + * Copyright (C) 2012-2020 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef VMM_INCLUDED_SRC_include_VMMTracing_h +#define VMM_INCLUDED_SRC_include_VMMTracing_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + + +/******************************************************************************* +* Header Files * +*******************************************************************************/ +#ifdef DOXYGEN_RUNNING +# undef VBOX_WITH_DTRACE +# undef VBOX_WITH_DTRACE_R3 +# undef VBOX_WITH_DTRACE_R0 +# undef VBOX_WITH_DTRACE_RC +# define DBGFTRACE_ENABLED +#endif +#include <VBox/vmm/dbgftrace.h> + + +/******************************************************************************* +* Defined Constants And Macros * +*******************************************************************************/ +/** Gets the trace buffer handle from a VMCPU pointer. */ +#define VMCPU_TO_HTB(a_pVCpu) ((a_pVCpu)->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf)) + +/** Gets the trace buffer handle from a VMCPU pointer. */ +#define VM_TO_HTB(a_pVM) ((a_pVM)->CTX_SUFF(hTraceBuf)) + +/** Macro wrapper for trace points that are disabled by default. */ +#define TP_COND_VMCPU(a_pVCpu, a_GrpSuff, a_TraceStmt) \ + do { \ + if (RT_UNLIKELY( (a_pVCpu)->fTraceGroups & VMMTPGROUP_##a_GrpSuff )) \ + { \ + RTTRACEBUF const hTB = (a_pVCpu)->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf); \ + a_TraceStmt; \ + } \ + } while (0) + +/** @name VMM Trace Point Groups. + * @{ */ +#define VMMTPGROUP_EM RT_BIT(0) +#define VMMTPGROUP_HM RT_BIT(1) +#define VMMTPGROUP_TM RT_BIT(2) +/** @} */ + + + +/** @name Ring-3 trace points. + * @{ + */ +#ifdef IN_RING3 +# ifdef VBOX_WITH_DTRACE_R3 +# include "dtrace/VBoxVMM.h" + +# elif defined(DBGFTRACE_ENABLED) +# define VBOXVMM_EM_STATE_CHANGED(a_pVCpu, a_enmOldState, a_enmNewState, a_rc) \ + TP_COND_VMCPU(a_pVCpu, EM, RTTraceBufAddMsgF(hTB, "em-state-changed %d -> %d (rc=%d)", a_enmOldState, a_enmNewState, a_rc)) +# define VBOXVMM_EM_STATE_UNCHANGED(a_pVCpu, a_enmState, a_rc) \ + TP_COND_VMCPU(a_pVCpu, EM, RTTraceBufAddMsgF(hTB, "em-state-unchanged %d (rc=%d)", a_enmState, a_rc)) +# define VBOXVMM_EM_RAW_RUN_PRE(a_pVCpu, a_pCtx) \ + TP_COND_VMCPU(a_pVCpu, EM, RTTraceBufAddMsgF(hTB, "em-raw-pre %04x:%08llx", (a_pCtx)->cs, (a_pCtx)->rip)) +# define VBOXVMM_EM_RAW_RUN_RET(a_pVCpu, a_pCtx, a_rc) \ + TP_COND_VMCPU(a_pVCpu, EM, RTTraceBufAddMsgF(hTB, "em-raw-ret %04x:%08llx rc=%d", (a_pCtx)->cs, (a_pCtx)->rip, (a_rc))) +# define VBOXVMM_EM_FF_HIGH(a_pVCpu, a_fGlobal, a_fLocal, a_rc) \ + TP_COND_VMCPU(a_pVCpu, EM, RTTraceBufAddMsgF(hTB, "em-ff-high vm=%#x cpu=%#x rc=%d", (a_fGlobal), (a_fLocal), (a_rc))) +# define VBOXVMM_EM_FF_ALL(a_pVCpu, a_fGlobal, a_fLocal, a_rc) \ + TP_COND_VMCPU(a_pVCpu, EM, RTTraceBufAddMsgF(hTB, "em-ff-all vm=%#x cpu=%#x rc=%d", (a_fGlobal), (a_fLocal), (a_rc))) +# define VBOXVMM_EM_FF_ALL_RET(a_pVCpu, a_rc) \ + TP_COND_VMCPU(a_pVCpu, EM, RTTraceBufAddMsgF(hTB, "em-ff-all-ret %d", (a_rc))) +# define VBOXVMM_EM_FF_RAW(a_pVCpu, a_fGlobal, a_fLocal) \ + TP_COND_VMCPU(a_pVCpu, EM, RTTraceBufAddMsgF(hTB, "em-ff-raw vm=%#x cpu=%#x", (a_fGlobal), (a_fLocal))) +# define VBOXVMM_EM_FF_RAW_RET(a_pVCpu, a_rc) \ + TP_COND_VMCPU(a_pVCpu, EM, RTTraceBufAddMsgF(hTB, "em-ff-raw-ret %d", (a_rc))) + +# else +# define VBOXVMM_EM_STATE_CHANGED(a_pVCpu, a_enmOldState, a_enmNewState, a_rc) do { } while (0) +# define VBOXVMM_EM_STATE_UNCHANGED(a_pVCpu, a_enmState, a_rc) do { } while (0) +# define VBOXVMM_EM_RAW_RUN_PRE(a_pVCpu, a_pCtx) do { } while (0) +# define VBOXVMM_EM_RAW_RUN_RET(a_pVCpu, a_pCtx, a_rc) do { } while (0) +# define VBOXVMM_EM_FF_HIGH(a_pVCpu, a_fGlobal, a_fLocal, a_rc) do { } while (0) +# define VBOXVMM_EM_FF_ALL(a_pVCpu, a_fGlobal, a_fLocal, a_rc) do { } while (0) +# define VBOXVMM_EM_FF_ALL_RET(a_pVCpu, a_rc) do { } while (0) +# define VBOXVMM_EM_FF_RAW(a_pVCpu, a_fGlobal, a_fLocal) do { } while (0) +# define VBOXVMM_EM_FF_RAW_RET(a_pVCpu, a_rc) do { } while (0) + +# endif +#endif /* IN_RING3 */ +/** @} */ + + +/** @name Ring-0 trace points. + * @{ + */ +#ifdef IN_RING0 +# ifdef VBOX_WITH_DTRACE_R0 +# include "VBoxVMMR0-dtrace.h" + +# elif defined(DBGFTRACE_ENABLED) + +# else + +# endif +#endif /* IN_RING0 */ +/** @} */ + + +#endif /* !VMM_INCLUDED_SRC_include_VMMTracing_h */ + |