summaryrefslogtreecommitdiffstats
path: root/src/VBox/VMM/VMMRZ
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 14:19:18 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 14:19:18 +0000
commit4035b1bfb1e5843a539a8b624d21952b756974d1 (patch)
treef1e9cd5bf548cbc57ff2fddfb2b4aa9ae95587e2 /src/VBox/VMM/VMMRZ
parentInitial commit. (diff)
downloadvirtualbox-4035b1bfb1e5843a539a8b624d21952b756974d1.tar.xz
virtualbox-4035b1bfb1e5843a539a8b624d21952b756974d1.zip
Adding upstream version 6.1.22-dfsg.upstream/6.1.22-dfsgupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/VBox/VMM/VMMRZ')
-rw-r--r--src/VBox/VMM/VMMRZ/CPUMRZ.cpp144
-rw-r--r--src/VBox/VMM/VMMRZ/CPUMRZA.asm384
-rw-r--r--src/VBox/VMM/VMMRZ/DBGFRZ.cpp240
-rw-r--r--src/VBox/VMM/VMMRZ/Makefile.kup0
-rw-r--r--src/VBox/VMM/VMMRZ/PGMRZDynMap.cpp2692
-rw-r--r--src/VBox/VMM/VMMRZ/VMMRZ.cpp253
6 files changed, 3713 insertions, 0 deletions
diff --git a/src/VBox/VMM/VMMRZ/CPUMRZ.cpp b/src/VBox/VMM/VMMRZ/CPUMRZ.cpp
new file mode 100644
index 00000000..4e765df8
--- /dev/null
+++ b/src/VBox/VMM/VMMRZ/CPUMRZ.cpp
@@ -0,0 +1,144 @@
+/* $Id: CPUMRZ.cpp $ */
+/** @file
+ * CPUM - Raw-mode and ring-0 context.
+ */
+
+/*
+ * Copyright (C) 2016-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_CPUM
+#include <VBox/vmm/cpum.h>
+#include "CPUMInternal.h"
+#include <VBox/vmm/vmcc.h>
+
+#include <VBox/err.h>
+#include <VBox/log.h>
+#include <VBox/vmm/hm.h>
+#include <iprt/assert.h>
+#include <iprt/x86.h>
+
+
+
+
+/**
+ * Prepares the host FPU/SSE/AVX stuff for IEM action.
+ *
+ * This will make sure the FPU/SSE/AVX guest state is _not_ loaded in the CPU.
+ * This will make sure the FPU/SSE/AVX host state is saved.
+ * Finally, it will make sure the FPU/SSE/AVX host features can be safely
+ * accessed.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+VMMRZ_INT_DECL(void) CPUMRZFpuStatePrepareHostCpuForUse(PVMCPUCC pVCpu)
+{
+ pVCpu->cpum.s.fChanged |= CPUM_CHANGED_FPU_REM;
+ switch (pVCpu->cpum.s.fUseFlags & (CPUM_USED_FPU_GUEST | CPUM_USED_FPU_HOST))
+ {
+ case 0:
+ if (cpumRZSaveHostFPUState(&pVCpu->cpum.s) == VINF_CPUM_HOST_CR0_MODIFIED)
+ HMR0NotifyCpumModifiedHostCr0(pVCpu);
+ Log6(("CPUMRZFpuStatePrepareHostCpuForUse: #0 - %#x\n", ASMGetCR0()));
+ break;
+
+ case CPUM_USED_FPU_HOST:
+ Log6(("CPUMRZFpuStatePrepareHostCpuForUse: #1 - %#x\n", ASMGetCR0()));
+ break;
+
+ case CPUM_USED_FPU_GUEST | CPUM_USED_FPU_HOST:
+ cpumRZSaveGuestFpuState(&pVCpu->cpum.s, true /*fLeaveFpuAccessible*/);
+#ifdef IN_RING0
+ HMR0NotifyCpumUnloadedGuestFpuState(pVCpu);
+#endif
+ Log6(("CPUMRZFpuStatePrepareHostCpuForUse: #2 - %#x\n", ASMGetCR0()));
+ break;
+
+ default:
+ AssertFailed();
+ }
+
+}
+
+
+/**
+ * Makes sure the FPU/SSE/AVX guest state is saved in CPUMCPU::Guest and will be
+ * reloaded before direct use.
+ *
+ * No promisses about the FPU/SSE/AVX host features are made.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+VMMRZ_INT_DECL(void) CPUMRZFpuStateActualizeForChange(PVMCPUCC pVCpu)
+{
+ CPUMRZFpuStatePrepareHostCpuForUse(pVCpu);
+}
+
+
+/**
+ * Makes sure the FPU/SSE/AVX state in CPUMCPU::Guest is up to date.
+ *
+ * This will not cause CPUM_USED_FPU_GUEST to change.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+VMMRZ_INT_DECL(void) CPUMRZFpuStateActualizeForRead(PVMCPUCC pVCpu)
+{
+ if (pVCpu->cpum.s.fUseFlags & CPUM_USED_FPU_GUEST)
+ {
+ cpumRZSaveGuestFpuState(&pVCpu->cpum.s, false /*fLeaveFpuAccessible*/);
+ pVCpu->cpum.s.fUseFlags |= CPUM_USED_FPU_GUEST;
+ Log7(("CPUMRZFpuStateActualizeForRead\n"));
+ }
+}
+
+
+/**
+ * Makes sure the XMM0..XMM15 and MXCSR state in CPUMCPU::Guest is up to date.
+ *
+ * This will not cause CPUM_USED_FPU_GUEST to change.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+VMMRZ_INT_DECL(void) CPUMRZFpuStateActualizeSseForRead(PVMCPUCC pVCpu)
+{
+#if defined(VBOX_WITH_KERNEL_USING_XMM) && HC_ARCH_BITS == 64
+ NOREF(pVCpu);
+#else
+ if (pVCpu->cpum.s.fUseFlags & CPUM_USED_FPU_GUEST)
+ {
+ cpumRZSaveGuestSseRegisters(&pVCpu->cpum.s);
+ Log7(("CPUMRZFpuStateActualizeSseForRead\n"));
+ }
+#endif
+}
+
+
+/**
+ * Makes sure the YMM0..YMM15 and MXCSR state in CPUMCPU::Guest is up to date.
+ *
+ * This will not cause CPUM_USED_FPU_GUEST to change.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+VMMRZ_INT_DECL(void) CPUMRZFpuStateActualizeAvxForRead(PVMCPUCC pVCpu)
+{
+ if (pVCpu->cpum.s.fUseFlags & CPUM_USED_FPU_GUEST)
+ {
+ cpumRZSaveGuestAvxRegisters(&pVCpu->cpum.s);
+ Log7(("CPUMRZFpuStateActualizeAvxForRead\n"));
+ }
+}
+
diff --git a/src/VBox/VMM/VMMRZ/CPUMRZA.asm b/src/VBox/VMM/VMMRZ/CPUMRZA.asm
new file mode 100644
index 00000000..14309a9f
--- /dev/null
+++ b/src/VBox/VMM/VMMRZ/CPUMRZA.asm
@@ -0,0 +1,384 @@
+ ; $Id: CPUMRZA.asm $
+;; @file
+; CPUM - Raw-mode and Ring-0 Context Assembly Routines.
+;
+
+;
+; Copyright (C) 2006-2020 Oracle Corporation
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+
+
+;*******************************************************************************
+;* Header Files *
+;*******************************************************************************
+%define RT_ASM_WITH_SEH64
+%include "VBox/asmdefs.mac"
+%include "CPUMInternal.mac"
+%include "iprt/x86.mac"
+%include "VBox/vmm/cpum.mac"
+%include "VBox/err.mac"
+
+
+
+BEGINCODE
+
+
+;;
+; Saves the host FPU/SSE/AVX state.
+;
+; Will return with CR0.EM and CR0.TS cleared! This is the normal state in ring-0.
+;
+; @returns VINF_SUCCESS (0) or VINF_CPUM_HOST_CR0_MODIFIED. (EAX)
+; @param pCpumCpu x86:[ebp+8] gcc:rdi msc:rcx CPUMCPU pointer
+;
+align 16
+BEGINPROC cpumRZSaveHostFPUState
+ push xBP
+ SEH64_PUSH_xBP
+ mov xBP, xSP
+ SEH64_SET_FRAME_xBP 0
+SEH64_END_PROLOGUE
+
+ ;
+ ; Prologue - xAX+xDX must be free for XSAVE/XRSTOR input.
+ ;
+%ifdef RT_ARCH_AMD64
+ %ifdef ASM_CALL64_MSC
+ mov r11, rcx
+ %else
+ mov r11, rdi
+ %endif
+ %define pCpumCpu r11
+ %define pXState r10
+%else
+ push ebx
+ push esi
+ mov ebx, dword [ebp + 8]
+ %define pCpumCpu ebx
+ %define pXState esi
+%endif
+
+ pushf ; The darwin kernel can get upset or upset things if an
+ cli ; interrupt occurs while we're doing fxsave/fxrstor/cr0.
+
+ ;
+ ; We may have to update CR0, indirectly or directly. We must report any
+ ; changes to the VT-x code.
+ ;
+ CPUMRZ_TOUCH_FPU_CLEAR_CR0_FPU_TRAPS_SET_RC xCX, xAX, pCpumCpu ; xCX is the return value (xAX scratch)
+
+ ;
+ ; Save the host state (xsave/fxsave will cause thread FPU state to be
+ ; loaded on systems where we are allowed to use it in ring-0.
+ ;
+ CPUMR0_SAVE_HOST
+
+ or dword [pCpumCpu + CPUMCPU.fUseFlags], (CPUM_USED_FPU_HOST | CPUM_USED_FPU_SINCE_REM) ; Latter is not necessarily true, but normally yes.
+ popf
+
+ mov eax, ecx ; The return value from above.
+%ifdef RT_ARCH_X86
+ pop esi
+ pop ebx
+%endif
+ leave
+ ret
+%undef pCpumCpu
+%undef pXState
+ENDPROC cpumRZSaveHostFPUState
+
+
+;;
+; Saves the guest FPU/SSE/AVX state.
+;
+; @param pCpumCpu x86:[ebp+8] gcc:rdi msc:rcx CPUMCPU pointer
+; @param fLeaveFpuAccessible x86:[ebp+c] gcc:sil msc:dl Whether to restore CR0 and XCR0 on
+; the way out. Only really applicable to RC.
+;
+; @remarks 64-bit Windows drivers shouldn't use AVX registers without saving+loading:
+; https://msdn.microsoft.com/en-us/library/windows/hardware/ff545910%28v=vs.85%29.aspx?f=255&MSPPError=-2147217396
+; However the compiler docs have different idea:
+; https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
+; We'll go with the former for now.
+;
+align 16
+BEGINPROC cpumRZSaveGuestFpuState
+ push xBP
+ SEH64_PUSH_xBP
+ mov xBP, xSP
+ SEH64_SET_FRAME_xBP 0
+SEH64_END_PROLOGUE
+
+ ;
+ ; Prologue - xAX+xDX must be free for XSAVE/XRSTOR input.
+ ;
+%ifdef RT_ARCH_AMD64
+ %ifdef ASM_CALL64_MSC
+ mov r11, rcx
+ %else
+ mov r11, rdi
+ %endif
+ %define pCpumCpu r11
+ %define pXState r10
+%else
+ push ebx
+ push esi
+ mov ebx, dword [ebp + 8]
+ %define pCpumCpu ebx
+ %define pXState esi
+%endif
+ pushf ; The darwin kernel can get upset or upset things if an
+ cli ; interrupt occurs while we're doing fxsave/fxrstor/cr0.
+
+ %ifdef IN_RC
+ mov ecx, cr0 ; ecx = saved cr0
+ test ecx, X86_CR0_TS | X86_CR0_EM
+ jz .skip_cr0_write
+ mov eax, ecx
+ and eax, ~(X86_CR0_TS | X86_CR0_EM)
+ mov cr0, eax
+.skip_cr0_write:
+ %endif
+
+ %ifndef VBOX_WITH_KERNEL_USING_XMM
+ CPUMR0_SAVE_GUEST
+ %else
+ ;
+ ; The XMM0..XMM15 registers have been saved already. We exploit the
+ ; host state here to temporarly save the non-volatile XMM registers,
+ ; so we can load the guest ones while saving. This is safe.
+ ;
+
+ ; Save caller's XMM registers.
+ mov pXState, [pCpumCpu + CPUMCPU.Host.pXStateR0]
+ movdqa [pXState + X86FXSTATE.xmm6 ], xmm6
+ movdqa [pXState + X86FXSTATE.xmm7 ], xmm7
+ movdqa [pXState + X86FXSTATE.xmm8 ], xmm8
+ movdqa [pXState + X86FXSTATE.xmm9 ], xmm9
+ movdqa [pXState + X86FXSTATE.xmm10], xmm10
+ movdqa [pXState + X86FXSTATE.xmm11], xmm11
+ movdqa [pXState + X86FXSTATE.xmm12], xmm12
+ movdqa [pXState + X86FXSTATE.xmm13], xmm13
+ movdqa [pXState + X86FXSTATE.xmm14], xmm14
+ movdqa [pXState + X86FXSTATE.xmm15], xmm15
+ stmxcsr [pXState + X86FXSTATE.MXCSR]
+
+ ; Load the guest XMM register values we already saved in HMR0VMXStartVMWrapXMM.
+ mov pXState, [pCpumCpu + CPUMCPU.Guest.pXStateR0]
+ movdqa xmm0, [pXState + X86FXSTATE.xmm0]
+ movdqa xmm1, [pXState + X86FXSTATE.xmm1]
+ movdqa xmm2, [pXState + X86FXSTATE.xmm2]
+ movdqa xmm3, [pXState + X86FXSTATE.xmm3]
+ movdqa xmm4, [pXState + X86FXSTATE.xmm4]
+ movdqa xmm5, [pXState + X86FXSTATE.xmm5]
+ movdqa xmm6, [pXState + X86FXSTATE.xmm6]
+ movdqa xmm7, [pXState + X86FXSTATE.xmm7]
+ movdqa xmm8, [pXState + X86FXSTATE.xmm8]
+ movdqa xmm9, [pXState + X86FXSTATE.xmm9]
+ movdqa xmm10, [pXState + X86FXSTATE.xmm10]
+ movdqa xmm11, [pXState + X86FXSTATE.xmm11]
+ movdqa xmm12, [pXState + X86FXSTATE.xmm12]
+ movdqa xmm13, [pXState + X86FXSTATE.xmm13]
+ movdqa xmm14, [pXState + X86FXSTATE.xmm14]
+ movdqa xmm15, [pXState + X86FXSTATE.xmm15]
+ ldmxcsr [pXState + X86FXSTATE.MXCSR]
+
+ CPUMR0_SAVE_GUEST
+
+ ; Restore caller's XMM registers.
+ mov pXState, [pCpumCpu + CPUMCPU.Host.pXStateR0]
+ movdqa xmm6, [pXState + X86FXSTATE.xmm6 ]
+ movdqa xmm7, [pXState + X86FXSTATE.xmm7 ]
+ movdqa xmm8, [pXState + X86FXSTATE.xmm8 ]
+ movdqa xmm9, [pXState + X86FXSTATE.xmm9 ]
+ movdqa xmm10, [pXState + X86FXSTATE.xmm10]
+ movdqa xmm11, [pXState + X86FXSTATE.xmm11]
+ movdqa xmm12, [pXState + X86FXSTATE.xmm12]
+ movdqa xmm13, [pXState + X86FXSTATE.xmm13]
+ movdqa xmm14, [pXState + X86FXSTATE.xmm14]
+ movdqa xmm15, [pXState + X86FXSTATE.xmm15]
+ ldmxcsr [pXState + X86FXSTATE.MXCSR]
+
+ %endif
+
+ and dword [pCpumCpu + CPUMCPU.fUseFlags], ~CPUM_USED_FPU_GUEST
+ %ifdef IN_RC
+ test byte [ebp + 0ch], 1 ; fLeaveFpuAccessible
+ jz .no_cr0_restore
+ CPUMRZ_RESTORE_CR0_IF_TS_OR_EM_SET ecx
+.no_cr0_restore:
+ %endif
+ popf
+%ifdef RT_ARCH_X86
+ pop esi
+ pop ebx
+%endif
+ leave
+ ret
+%undef pCpumCpu
+%undef pXState
+ENDPROC cpumRZSaveGuestFpuState
+
+
+;;
+; Saves the guest XMM0..15 registers and MXCSR.
+;
+; The purpose is to actualize the register state for read-only use, so CR0 is
+; restored in raw-mode context (so, the FPU/SSE/AVX CPU features can be
+; inaccessible upon return).
+;
+; @param pCpumCpu x86:[ebp+8] gcc:rdi msc:rcx CPUMCPU pointer
+;
+align 16
+BEGINPROC cpumRZSaveGuestSseRegisters
+ push xBP
+ SEH64_PUSH_xBP
+ mov xBP, xSP
+ SEH64_SET_FRAME_xBP 0
+SEH64_END_PROLOGUE
+
+%ifndef VBOX_WITH_KERNEL_USING_XMM
+ ;
+ ; Load xCX with the guest pXStateR0.
+ ;
+ %ifdef ASM_CALL64_GCC
+ mov xCX, rdi
+ %elifdef RT_ARCH_X86
+ mov xCX, dword [ebp + 8]
+ %endif
+ %ifdef IN_RING0
+ mov xCX, [xCX + CPUMCPU.Guest.pXStateR0]
+ %elifdef IN_RC
+ mov xCX, [xCX + CPUMCPU.Guest.pXStateRC]
+ %else
+ %error "Invalid context!"
+ %endif
+
+ %ifdef IN_RC
+ ; Temporarily grant access to the SSE state. xDX must be preserved until CR0 is restored!
+ mov edx, cr0
+ test edx, X86_CR0_TS | X86_CR0_EM
+ jz .skip_cr0_write
+ mov eax, edx
+ and eax, ~(X86_CR0_TS | X86_CR0_EM)
+ mov cr0, eax
+.skip_cr0_write:
+ %endif
+
+ ;
+ ; Do the job.
+ ;
+ stmxcsr [xCX + X86FXSTATE.MXCSR]
+ movdqa [xCX + X86FXSTATE.xmm0 ], xmm0
+ movdqa [xCX + X86FXSTATE.xmm1 ], xmm1
+ movdqa [xCX + X86FXSTATE.xmm2 ], xmm2
+ movdqa [xCX + X86FXSTATE.xmm3 ], xmm3
+ movdqa [xCX + X86FXSTATE.xmm4 ], xmm4
+ movdqa [xCX + X86FXSTATE.xmm5 ], xmm5
+ movdqa [xCX + X86FXSTATE.xmm6 ], xmm6
+ movdqa [xCX + X86FXSTATE.xmm7 ], xmm7
+ %if ARCH_BITS == 64
+ movdqa [xCX + X86FXSTATE.xmm8 ], xmm8
+ movdqa [xCX + X86FXSTATE.xmm9 ], xmm9
+ movdqa [xCX + X86FXSTATE.xmm10], xmm10
+ movdqa [xCX + X86FXSTATE.xmm11], xmm11
+ movdqa [xCX + X86FXSTATE.xmm12], xmm12
+ movdqa [xCX + X86FXSTATE.xmm13], xmm13
+ movdqa [xCX + X86FXSTATE.xmm14], xmm14
+ movdqa [xCX + X86FXSTATE.xmm15], xmm15
+ %endif
+
+ %ifdef IN_RC
+ CPUMRZ_RESTORE_CR0_IF_TS_OR_EM_SET edx ; Restore CR0 if we changed it above.
+ %endif
+
+%endif ; !VBOX_WITH_KERNEL_USING_XMM
+
+ leave
+ ret
+ENDPROC cpumRZSaveGuestSseRegisters
+
+;;
+; Saves the guest YMM0..15 registers.
+;
+; The purpose is to actualize the register state for read-only use, so CR0 is
+; restored in raw-mode context (so, the FPU/SSE/AVX CPU features can be
+; inaccessible upon return).
+;
+; @param pCpumCpu x86:[ebp+8] gcc:rdi msc:rcx CPUMCPU pointer
+;
+align 16
+BEGINPROC cpumRZSaveGuestAvxRegisters
+ push xBP
+ SEH64_PUSH_xBP
+ mov xBP, xSP
+ SEH64_SET_FRAME_xBP 0
+%ifdef IN_RC
+ push xBX
+%endif
+SEH64_END_PROLOGUE
+
+ ;
+ ; Load xCX with the guest pXStateR0.
+ ;
+%ifdef ASM_CALL64_GCC
+ mov xCX, rdi
+%elifdef RT_ARCH_X86
+ mov xCX, dword [ebp + 8]
+%endif
+%ifdef IN_RING0
+ mov xCX, [xCX + CPUMCPU.Guest.pXStateR0]
+%elifdef IN_RC
+ mov xCX, [xCX + CPUMCPU.Guest.pXStateRC]
+%else
+ %error "Invalid context!"
+%endif
+
+%ifdef IN_RC
+ ; Temporarily grant access to the SSE state. xBX must be preserved until CR0 is restored!
+ mov ebx, cr0
+ test ebx, X86_CR0_TS | X86_CR0_EM
+ jz .skip_cr0_write
+ mov eax, ebx
+ and eax, ~(X86_CR0_TS | X86_CR0_EM)
+ mov cr0, eax
+.skip_cr0_write:
+%endif
+
+ ;
+ ; Use XSAVE to do the job.
+ ;
+ ; Drivers shouldn't use AVX registers without saving+loading:
+ ; https://msdn.microsoft.com/en-us/library/windows/hardware/ff545910%28v=vs.85%29.aspx?f=255&MSPPError=-2147217396
+ ; However the compiler docs have different idea:
+ ; https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
+ ; We'll go with the former for now.
+ ;
+%ifdef VBOX_WITH_KERNEL_USING_XMM
+ mov eax, XSAVE_C_YMM
+%else
+ mov eax, XSAVE_C_YMM | XSAVE_C_SSE ; The SSE component includes MXCSR.
+%endif
+ xor edx, edx
+%if ARCH_BITS == 64
+ o64 xsave [xCX]
+%else
+ xsave [xCX]
+%endif
+
+%ifdef IN_RC
+ CPUMRZ_RESTORE_CR0_IF_TS_OR_EM_SET ebx ; Restore CR0 if we changed it above.
+ pop xBX
+%endif
+ leave
+ ret
+ENDPROC cpumRZSaveGuestAvxRegisters
+
diff --git a/src/VBox/VMM/VMMRZ/DBGFRZ.cpp b/src/VBox/VMM/VMMRZ/DBGFRZ.cpp
new file mode 100644
index 00000000..44066fd5
--- /dev/null
+++ b/src/VBox/VMM/VMMRZ/DBGFRZ.cpp
@@ -0,0 +1,240 @@
+/* $Id: DBGFRZ.cpp $ */
+/** @file
+ * DBGF - Debugger Facility, RZ part.
+ */
+
+/*
+ * Copyright (C) 2006-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_DBGF
+#include <VBox/vmm/dbgf.h>
+#include <VBox/vmm/selm.h>
+#ifdef IN_RC
+# include <VBox/vmm/trpm.h>
+#endif
+#include <VBox/log.h>
+#include "DBGFInternal.h"
+#include <VBox/vmm/vmcc.h>
+#include <VBox/err.h>
+#include <iprt/assert.h>
+
+#ifdef IN_RC
+DECLASM(void) TRPMRCHandlerAsmTrap03(void);
+#endif
+
+
+/**
+ * \#DB (Debug event) handler.
+ *
+ * @returns VBox status code.
+ * VINF_SUCCESS means we completely handled this trap,
+ * other codes are passed execution to host context.
+ *
+ * @param pVM The cross context VM structure.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pRegFrame Pointer to the register frame for the trap.
+ * @param uDr6 The DR6 hypervisor register value.
+ * @param fAltStepping Alternative stepping indicator.
+ */
+VMMRZ_INT_DECL(int) DBGFRZTrap01Handler(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, RTGCUINTREG uDr6, bool fAltStepping)
+{
+#ifdef IN_RC
+ const bool fInHyper = !(pRegFrame->ss.Sel & X86_SEL_RPL) && !pRegFrame->eflags.Bits.u1VM;
+#else
+ NOREF(pRegFrame);
+ const bool fInHyper = false;
+#endif
+
+ /** @todo Intel docs say that X86_DR6_BS has the highest priority... */
+ /*
+ * A breakpoint?
+ */
+ AssertCompile(X86_DR6_B0 == 1 && X86_DR6_B1 == 2 && X86_DR6_B2 == 4 && X86_DR6_B3 == 8);
+ if ( (uDr6 & (X86_DR6_B0 | X86_DR6_B1 | X86_DR6_B2 | X86_DR6_B3))
+ && pVM->dbgf.s.cEnabledHwBreakpoints > 0)
+ {
+ for (unsigned iBp = 0; iBp < RT_ELEMENTS(pVM->dbgf.s.aHwBreakpoints); iBp++)
+ {
+ if ( ((uint32_t)uDr6 & RT_BIT_32(iBp))
+ && pVM->dbgf.s.aHwBreakpoints[iBp].enmType == DBGFBPTYPE_REG)
+ {
+ pVCpu->dbgf.s.iActiveBp = pVM->dbgf.s.aHwBreakpoints[iBp].iBp;
+ pVCpu->dbgf.s.fSingleSteppingRaw = false;
+ LogFlow(("DBGFRZTrap03Handler: hit hw breakpoint %d at %04x:%RGv\n",
+ pVM->dbgf.s.aHwBreakpoints[iBp].iBp, pRegFrame->cs.Sel, pRegFrame->rip));
+
+ return fInHyper ? VINF_EM_DBG_HYPER_BREAKPOINT : VINF_EM_DBG_BREAKPOINT;
+ }
+ }
+ }
+
+ /*
+ * Single step?
+ * Are we single stepping or is it the guest?
+ */
+ if ( (uDr6 & X86_DR6_BS)
+ && (fInHyper || pVCpu->dbgf.s.fSingleSteppingRaw || fAltStepping))
+ {
+ pVCpu->dbgf.s.fSingleSteppingRaw = false;
+ LogFlow(("DBGFRZTrap01Handler: single step at %04x:%RGv\n", pRegFrame->cs.Sel, pRegFrame->rip));
+ return fInHyper ? VINF_EM_DBG_HYPER_STEPPED : VINF_EM_DBG_STEPPED;
+ }
+
+#ifdef IN_RC
+ /*
+ * Either an ICEBP in hypervisor code or a guest related debug exception
+ * of sorts.
+ */
+ if (RT_UNLIKELY(fInHyper))
+ {
+ /*
+ * Is this a guest debug event that was delayed past a ring transition?
+ *
+ * Since we do no allow sysenter/syscall in raw-mode, the only
+ * non-trap/fault type transitions that can occur are thru interrupt gates.
+ * Of those, only INT3 (#BP) has a DPL other than 0 with a CS.RPL of 0.
+ * See bugref:9171 and bs3-cpu-weird-1 for more details.
+ *
+ * We need to reconstruct the guest register state from the hypervisor one
+ * here, so here is the layout of the IRET frame on the stack:
+ * 20:[8] GS (V86 only)
+ * 1C:[7] FS (V86 only)
+ * 18:[6] DS (V86 only)
+ * 14:[5] ES (V86 only)
+ * 10:[4] SS
+ * 0c:[3] ESP
+ * 08:[2] EFLAGS
+ * 04:[1] CS
+ * 00:[0] EIP
+ */
+ if (pRegFrame->rip == (uintptr_t)TRPMRCHandlerAsmTrap03)
+ {
+ uint32_t const *pu32Stack = (uint32_t const *)pRegFrame->esp;
+ if ( (pu32Stack[2] & X86_EFL_VM)
+ || (pu32Stack[1] & X86_SEL_RPL))
+ {
+ LogFlow(("DBGFRZTrap01Handler: Detected guest #DB delayed past ring transition %04x:%RX32 %#x\n",
+ pu32Stack[1] & 0xffff, pu32Stack[0], pu32Stack[2]));
+ PCPUMCTX pGstCtx = CPUMQueryGuestCtxPtr(pVCpu);
+ pGstCtx->rip = pu32Stack[0];
+ pGstCtx->cs.Sel = pu32Stack[1];
+ pGstCtx->eflags.u = pu32Stack[2];
+ pGstCtx->rsp = pu32Stack[3];
+ pGstCtx->ss.Sel = pu32Stack[4];
+ if (pu32Stack[2] & X86_EFL_VM)
+ {
+ pGstCtx->es.Sel = pu32Stack[5];
+ pGstCtx->ds.Sel = pu32Stack[6];
+ pGstCtx->fs.Sel = pu32Stack[7];
+ pGstCtx->gs.Sel = pu32Stack[8];
+ }
+ else
+ {
+ pGstCtx->es.Sel = pRegFrame->es.Sel;
+ pGstCtx->ds.Sel = pRegFrame->ds.Sel;
+ pGstCtx->fs.Sel = pRegFrame->fs.Sel;
+ pGstCtx->gs.Sel = pRegFrame->gs.Sel;
+ }
+ pGstCtx->rax = pRegFrame->rax;
+ pGstCtx->rcx = pRegFrame->rcx;
+ pGstCtx->rdx = pRegFrame->rdx;
+ pGstCtx->rbx = pRegFrame->rbx;
+ pGstCtx->rsi = pRegFrame->rsi;
+ pGstCtx->rdi = pRegFrame->rdi;
+ pGstCtx->rbp = pRegFrame->rbp;
+
+ /*
+ * We should assert a #BP followed by a #DB here, but TRPM cannot
+ * do that. So, we'll just assert the #BP and ignore the #DB, even
+ * if that isn't strictly correct.
+ */
+ TRPMResetTrap(pVCpu);
+ TRPMAssertTrap(pVCpu, X86_XCPT_BP, TRPM_SOFTWARE_INT);
+ return VINF_EM_RAW_GUEST_TRAP;
+ }
+ }
+
+ LogFlow(("DBGFRZTrap01Handler: Unknown bp at %04x:%RGv\n", pRegFrame->cs.Sel, pRegFrame->rip));
+ return VERR_DBGF_HYPER_DB_XCPT;
+ }
+#endif
+
+ LogFlow(("DBGFRZTrap01Handler: guest debug event %#x at %04x:%RGv!\n", (uint32_t)uDr6, pRegFrame->cs.Sel, pRegFrame->rip));
+ return VINF_EM_RAW_GUEST_TRAP;
+}
+
+
+/**
+ * \#BP (Breakpoint) handler.
+ *
+ * @returns VBox status code.
+ * VINF_SUCCESS means we completely handled this trap,
+ * other codes are passed execution to host context.
+ *
+ * @param pVM The cross context VM structure.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pRegFrame Pointer to the register frame for the trap.
+ */
+VMMRZ_INT_DECL(int) DBGFRZTrap03Handler(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame)
+{
+#ifdef IN_RC
+ const bool fInHyper = !(pRegFrame->ss.Sel & X86_SEL_RPL) && !pRegFrame->eflags.Bits.u1VM;
+#else
+ const bool fInHyper = false;
+#endif
+
+ /*
+ * Get the trap address and look it up in the breakpoint table.
+ * Don't bother if we don't have any breakpoints.
+ */
+ unsigned cToSearch = pVM->dbgf.s.Int3.cToSearch;
+ if (cToSearch > 0)
+ {
+ RTGCPTR pPc;
+ int rc = SELMValidateAndConvertCSAddr(pVCpu, pRegFrame->eflags, pRegFrame->ss.Sel, pRegFrame->cs.Sel, &pRegFrame->cs,
+#ifdef IN_RC
+ pRegFrame->eip - 1,
+#else
+ pRegFrame->rip /* no -1 in R0 */,
+#endif
+ &pPc);
+ AssertRCReturn(rc, rc);
+
+ unsigned iBp = pVM->dbgf.s.Int3.iStartSearch;
+ while (cToSearch-- > 0)
+ {
+ if ( pVM->dbgf.s.aBreakpoints[iBp].u.GCPtr == (RTGCUINTPTR)pPc
+ && pVM->dbgf.s.aBreakpoints[iBp].enmType == DBGFBPTYPE_INT3)
+ {
+ pVM->dbgf.s.aBreakpoints[iBp].cHits++;
+ pVCpu->dbgf.s.iActiveBp = pVM->dbgf.s.aBreakpoints[iBp].iBp;
+
+ LogFlow(("DBGFRZTrap03Handler: hit breakpoint %d at %RGv (%04x:%RGv) cHits=0x%RX64\n",
+ pVM->dbgf.s.aBreakpoints[iBp].iBp, pPc, pRegFrame->cs.Sel, pRegFrame->rip,
+ pVM->dbgf.s.aBreakpoints[iBp].cHits));
+ return fInHyper
+ ? VINF_EM_DBG_HYPER_BREAKPOINT
+ : VINF_EM_DBG_BREAKPOINT;
+ }
+ iBp++;
+ }
+ }
+
+ return fInHyper
+ ? VINF_EM_DBG_HYPER_ASSERTION
+ : VINF_EM_RAW_GUEST_TRAP;
+}
+
diff --git a/src/VBox/VMM/VMMRZ/Makefile.kup b/src/VBox/VMM/VMMRZ/Makefile.kup
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/VBox/VMM/VMMRZ/Makefile.kup
diff --git a/src/VBox/VMM/VMMRZ/PGMRZDynMap.cpp b/src/VBox/VMM/VMMRZ/PGMRZDynMap.cpp
new file mode 100644
index 00000000..0f4313bb
--- /dev/null
+++ b/src/VBox/VMM/VMMRZ/PGMRZDynMap.cpp
@@ -0,0 +1,2692 @@
+/* $Id: PGMRZDynMap.cpp $ */
+/** @file
+ * PGM - Page Manager and Monitor, dynamic mapping cache.
+ */
+
+/*
+ * Copyright (C) 2008-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Internal Functions *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_PGM_DYNMAP
+#include <VBox/vmm/pgm.h>
+#include "PGMInternal.h"
+#include <VBox/vmm/vm.h>
+#include "PGMInline.h"
+#include <VBox/err.h>
+#include <VBox/param.h>
+#include <VBox/sup.h>
+#include <iprt/asm.h>
+#include <iprt/asm-amd64-x86.h>
+#include <iprt/assert.h>
+#ifndef IN_RC
+# include <iprt/cpuset.h>
+# include <iprt/mem.h>
+# include <iprt/memobj.h>
+# include <iprt/mp.h>
+# include <iprt/semaphore.h>
+# include <iprt/spinlock.h>
+#endif
+#include <iprt/string.h>
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+#ifdef IN_RING0
+/** The max size of the mapping cache (in pages). */
+# define PGMR0DYNMAP_MAX_PAGES ((16*_1M) >> PAGE_SHIFT)
+/** The small segment size that is adopted on out-of-memory conditions with a
+ * single big segment. */
+# define PGMR0DYNMAP_SMALL_SEG_PAGES 128
+/** The number of pages we reserve per CPU. */
+# define PGMR0DYNMAP_PAGES_PER_CPU 256
+/** The minimum number of pages we reserve per CPU.
+ * This must be equal or larger than the autoset size. */
+# define PGMR0DYNMAP_PAGES_PER_CPU_MIN 64
+/** Calcs the overload threshold (safety margin). Current set at 50%. */
+# define PGMR0DYNMAP_CALC_OVERLOAD(cPages) ((cPages) / 2)
+/** The number of guard pages.
+ * @remarks Never do tuning of the hashing or whatnot with a strict build! */
+# if defined(VBOX_STRICT)
+# define PGMR0DYNMAP_GUARD_PAGES 1
+# else
+# define PGMR0DYNMAP_GUARD_PAGES 0
+# endif
+#endif /* IN_RING0 */
+/** The dummy physical address of guard pages. */
+#define PGMR0DYNMAP_GUARD_PAGE_HCPHYS UINT32_C(0x7777feed)
+/** The dummy reference count of guard pages. (Must be non-zero.) */
+#define PGMR0DYNMAP_GUARD_PAGE_REF_COUNT INT32_C(0x7777feed)
+#if 0
+/** Define this to just clear the present bit on guard pages.
+ * The alternative is to replace the entire PTE with an bad not-present
+ * PTE. Either way, XNU will screw us. :-/ */
+# define PGMR0DYNMAP_GUARD_NP
+#endif
+/** The dummy PTE value for a page. */
+#define PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE X86_PTE_PG_MASK
+/** The dummy PTE value for a page. */
+#define PGMR0DYNMAP_GUARD_PAGE_PAE_PTE UINT64_MAX /*X86_PTE_PAE_PG_MASK*/
+
+#ifdef IN_RING0 /* Note! Assertions causes panics if preemption is disabled,
+ * disable this to work around that. */
+/**
+ * Acquire the spinlock.
+ * This will declare a temporary variable and expands to two statements!
+ */
+# define PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis) \
+ RTSpinlockAcquire((pThis)->hSpinlock)
+
+/**
+ * Releases the spinlock.
+ */
+# define PGMRZDYNMAP_SPINLOCK_RELEASE(pThis) \
+ RTSpinlockRelease((pThis)->hSpinlock)
+
+/**
+ * Re-acquires the spinlock.
+ */
+# define PGMRZDYNMAP_SPINLOCK_REACQUIRE(pThis) \
+ RTSpinlockAcquire((pThis)->hSpinlock)
+#else
+# define PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis) do { } while (0)
+# define PGMRZDYNMAP_SPINLOCK_RELEASE(pThis) do { } while (0)
+# define PGMRZDYNMAP_SPINLOCK_REACQUIRE(pThis) do { } while (0)
+#endif
+
+
+/** Converts a PGMCPUM::AutoSet pointer into a PVMCPU. */
+#define PGMRZDYNMAP_SET_2_VMCPU(pSet) (RT_FROM_MEMBER(pSet, VMCPU, pgm.s.AutoSet))
+
+/** Converts a PGMCPUM::AutoSet pointer into a PVM. */
+#define PGMRZDYNMAP_SET_2_VM(pSet) (PGMRZDYNMAP_SET_2_VMCPU(pSet)->CTX_SUFF(pVM))
+
+/** Converts a PGMCPUM::AutoSet pointer into a PVM. */
+#ifdef IN_RC
+# define PGMRZDYNMAP_SET_2_DYNMAP(pSet) (PGMRZDYNMAP_SET_2_VM(pSet)->pgm.s.pRCDynMap)
+#else
+# define PGMRZDYNMAP_SET_2_DYNMAP(pSet) (g_pPGMR0DynMap)
+#endif
+
+/**
+ * Gets the set index of the current CPU.
+ *
+ * This always returns 0 when in raw-mode context because there is only ever
+ * one EMT in that context (at least presently).
+ */
+#ifdef IN_RC
+# define PGMRZDYNMAP_CUR_CPU() (0)
+#else
+# define PGMRZDYNMAP_CUR_CPU() RTMpCurSetIndex()
+#endif
+
+/** PGMRZDYNMAP::u32Magic. (Jens Christian Bugge Wesseltoft) */
+#define PGMRZDYNMAP_MAGIC UINT32_C(0x19640201)
+
+
+/** Zaps an set entry. */
+#define PGMRZDYNMAP_ZAP_ENTRY(pEntry) \
+ do \
+ { \
+ (pEntry)->iPage = UINT16_MAX; \
+ (pEntry)->cRefs = 0; \
+ (pEntry)->cInlinedRefs = 0; \
+ (pEntry)->cUnrefs = 0; \
+ } while (0)
+
+
+/** @def PGMRZDYNMAP_STRICT_RELEASE
+ * Define this to force pages to be released and make non-present ASAP after
+ * use. This should not normally be enabled as it is a bit expensive. */
+#if 0 || defined(DOXYGEN_RUNNING)
+# define PGMRZDYNMAP_STRICT_RELEASE
+#endif
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+#ifdef IN_RING0
+/**
+ * Ring-0 dynamic mapping cache segment.
+ *
+ * The dynamic mapping cache can be extended with additional segments if the
+ * load is found to be too high. This done the next time a VM is created, under
+ * the protection of the init mutex. The arrays is reallocated and the new
+ * segment is added to the end of these. Nothing is rehashed of course, as the
+ * indexes / addresses must remain unchanged.
+ *
+ * This structure is only modified while owning the init mutex or during module
+ * init / term.
+ */
+typedef struct PGMR0DYNMAPSEG
+{
+ /** Pointer to the next segment. */
+ struct PGMR0DYNMAPSEG *pNext;
+ /** The memory object for the virtual address range that we're abusing. */
+ RTR0MEMOBJ hMemObj;
+ /** The start page in the cache. (I.e. index into the arrays.) */
+ uint16_t iPage;
+ /** The number of pages this segment contributes. */
+ uint16_t cPages;
+ /** The number of page tables. */
+ uint16_t cPTs;
+ /** The memory objects for the page tables. */
+ RTR0MEMOBJ ahMemObjPTs[1];
+} PGMR0DYNMAPSEG;
+/** Pointer to a ring-0 dynamic mapping cache segment. */
+typedef PGMR0DYNMAPSEG *PPGMR0DYNMAPSEG;
+
+
+/**
+ * Ring-0 dynamic mapping cache entry.
+ *
+ * @sa PGMRZDYNMAPENTRY, PGMRCDYNMAPENTRY.
+ */
+typedef struct PGMR0DYNMAPENTRY
+{
+ /** The physical address of the currently mapped page.
+ * This is duplicate for three reasons: cache locality, cache policy of the PT
+ * mappings and sanity checks. */
+ RTHCPHYS HCPhys;
+ /** Pointer to the page. */
+ void *pvPage;
+ /** The number of references. */
+ int32_t volatile cRefs;
+ /** PTE pointer union. */
+ union PGMR0DYNMAPENTRY_PPTE
+ {
+ /** PTE pointer, 32-bit legacy version. */
+ PX86PTE pLegacy;
+ /** PTE pointer, PAE version. */
+ PX86PTEPAE pPae;
+ /** PTE pointer, the void version. */
+ void *pv;
+ } uPte;
+ /** CPUs that haven't invalidated this entry after it's last update. */
+ RTCPUSET PendingSet;
+} PGMR0DYNMAPENTRY;
+/** Pointer a mapping cache entry for the ring-0.
+ * @sa PPGMRZDYNMAPENTRY, PPGMRCDYNMAPENTRY, */
+typedef PGMR0DYNMAPENTRY *PPGMR0DYNMAPENTRY;
+
+
+/**
+ * Dynamic mapping cache for ring-0.
+ *
+ * This is initialized during VMMR0 module init but no segments are allocated
+ * at that time. Segments will be added when the first VM is started and
+ * removed again when the last VM shuts down, thus avoid consuming memory while
+ * dormant. At module termination, the remaining bits will be freed up.
+ *
+ * @sa PPGMRZDYNMAP, PGMRCDYNMAP.
+ */
+typedef struct PGMR0DYNMAP
+{
+ /** The usual magic number / eye catcher (PGMRZDYNMAP_MAGIC). */
+ uint32_t u32Magic;
+ /** Spinlock serializing the normal operation of the cache. */
+ RTSPINLOCK hSpinlock;
+ /** Array for tracking and managing the pages. */
+ PPGMR0DYNMAPENTRY paPages;
+ /** The cache size given as a number of pages. */
+ uint32_t cPages;
+ /** Whether it's 32-bit legacy or PAE/AMD64 paging mode. */
+ bool fLegacyMode;
+ /** The current load.
+ * This does not include guard pages. */
+ uint32_t cLoad;
+ /** The max load ever.
+ * This is maintained to trigger the adding of more mapping space. */
+ uint32_t cMaxLoad;
+ /** Initialization / termination lock. */
+ RTSEMFASTMUTEX hInitLock;
+ /** The number of guard pages. */
+ uint32_t cGuardPages;
+ /** The number of users (protected by hInitLock). */
+ uint32_t cUsers;
+ /** Array containing a copy of the original page tables.
+ * The entries are either X86PTE or X86PTEPAE according to fLegacyMode. */
+ void *pvSavedPTEs;
+ /** List of segments. */
+ PPGMR0DYNMAPSEG pSegHead;
+ /** The paging mode. */
+ SUPPAGINGMODE enmPgMode;
+} PGMR0DYNMAP;
+
+
+/**
+ * Paging level data.
+ */
+typedef struct PGMR0DYNMAPPGLVL
+{
+ uint32_t cLevels; /**< The number of levels. */
+ struct
+ {
+ RTHCPHYS HCPhys; /**< The address of the page for the current level,
+ * i.e. what hMemObj/hMapObj is currently mapping. */
+ RTHCPHYS fPhysMask; /**< Mask for extracting HCPhys from uEntry. */
+ RTR0MEMOBJ hMemObj; /**< Memory object for HCPhys, PAGE_SIZE. */
+ RTR0MEMOBJ hMapObj; /**< Mapping object for hMemObj. */
+ uint32_t fPtrShift; /**< The pointer shift count. */
+ uint64_t fPtrMask; /**< The mask to apply to the shifted pointer to get the table index. */
+ uint64_t fAndMask; /**< And mask to check entry flags. */
+ uint64_t fResMask; /**< The result from applying fAndMask. */
+ union
+ {
+ void *pv; /**< hMapObj address. */
+ PX86PGUINT paLegacy; /**< Legacy table view. */
+ PX86PGPAEUINT paPae; /**< PAE/AMD64 table view. */
+ } u;
+ } a[4];
+} PGMR0DYNMAPPGLVL;
+/** Pointer to paging level data. */
+typedef PGMR0DYNMAPPGLVL *PPGMR0DYNMAPPGLVL;
+#endif
+
+/** Mapping cache entry for the current context.
+ * @sa PGMR0DYNMAPENTRY, PGMRCDYNMAPENTRY */
+typedef CTX_MID(PGM,DYNMAPENTRY) PGMRZDYNMAPENTRY;
+/** Pointer a mapping cache entry for the current context.
+ * @sa PGMR0DYNMAPENTRY, PGMRCDYNMAPENTRY */
+typedef PGMRZDYNMAPENTRY *PPGMRZDYNMAPENTRY;
+
+/** Pointer to the mapping cache instance for the current context.
+ * @sa PGMR0DYNMAP, PGMRCDYNMAP */
+typedef CTX_MID(PGM,DYNMAP) *PPGMRZDYNMAP;
+
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+#ifdef IN_RING0
+/** Pointer to the ring-0 dynamic mapping cache. */
+static PGMR0DYNMAP *g_pPGMR0DynMap;
+#endif
+/** For overflow testing. */
+static bool g_fPGMR0DynMapTestRunning = false;
+
+
+/*********************************************************************************************************************************
+* Internal Functions *
+*********************************************************************************************************************************/
+static void pgmRZDynMapReleasePage(PPGMRZDYNMAP pThis, uint32_t iPage, uint32_t cRefs);
+#ifdef IN_RING0
+static int pgmR0DynMapSetup(PPGMRZDYNMAP pThis);
+static int pgmR0DynMapExpand(PPGMRZDYNMAP pThis);
+static void pgmR0DynMapTearDown(PPGMRZDYNMAP pThis);
+#endif
+#if 0 /*def DEBUG*/
+static int pgmR0DynMapTest(PVM pVM);
+#endif
+
+
+/**
+ * Initializes the auto mapping sets for a VM.
+ *
+ * @returns VINF_SUCCESS on success, VERR_PGM_DYNMAP_IPE on failure.
+ * @param pVM The cross context VM structure.
+ */
+static int pgmRZDynMapInitAutoSetsForVM(PVM pVM)
+{
+ VMCPUID idCpu = pVM->cCpus;
+ AssertReturn(idCpu > 0 && idCpu <= VMM_MAX_CPU_COUNT, VERR_PGM_DYNMAP_IPE);
+ while (idCpu-- > 0)
+ {
+ PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet;
+ uint32_t j = RT_ELEMENTS(pSet->aEntries);
+ while (j-- > 0)
+ {
+ pSet->aEntries[j].pvPage = NULL;
+ pSet->aEntries[j].HCPhys = NIL_RTHCPHYS;
+ PGMRZDYNMAP_ZAP_ENTRY(&pSet->aEntries[j]);
+ }
+ pSet->cEntries = PGMMAPSET_CLOSED;
+ pSet->iSubset = UINT32_MAX;
+ pSet->iCpu = -1;
+ memset(&pSet->aiHashTable[0], 0xff, sizeof(pSet->aiHashTable));
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+#ifdef IN_RING0
+
+/**
+ * Initializes the ring-0 dynamic mapping cache.
+ *
+ * @returns VBox status code.
+ */
+VMMR0DECL(int) PGMR0DynMapInit(void)
+{
+ Assert(!g_pPGMR0DynMap);
+
+ /*
+ * Create and initialize the cache instance.
+ */
+ PPGMRZDYNMAP pThis = (PPGMRZDYNMAP)RTMemAllocZ(sizeof(*pThis));
+ AssertLogRelReturn(pThis, VERR_NO_MEMORY);
+ int rc = VINF_SUCCESS;
+ pThis->enmPgMode = SUPR0GetPagingMode();
+ switch (pThis->enmPgMode)
+ {
+ case SUPPAGINGMODE_32_BIT:
+ case SUPPAGINGMODE_32_BIT_GLOBAL:
+ pThis->fLegacyMode = false;
+ break;
+ case SUPPAGINGMODE_PAE:
+ case SUPPAGINGMODE_PAE_GLOBAL:
+ case SUPPAGINGMODE_PAE_NX:
+ case SUPPAGINGMODE_PAE_GLOBAL_NX:
+ case SUPPAGINGMODE_AMD64:
+ case SUPPAGINGMODE_AMD64_GLOBAL:
+ case SUPPAGINGMODE_AMD64_NX:
+ case SUPPAGINGMODE_AMD64_GLOBAL_NX:
+ pThis->fLegacyMode = false;
+ break;
+ default:
+ rc = VERR_PGM_DYNMAP_IPE;
+ break;
+ }
+ if (RT_SUCCESS(rc))
+ {
+ rc = RTSemFastMutexCreate(&pThis->hInitLock);
+ if (RT_SUCCESS(rc))
+ {
+ rc = RTSpinlockCreate(&pThis->hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "PGMR0DynMap");
+ if (RT_SUCCESS(rc))
+ {
+ pThis->u32Magic = PGMRZDYNMAP_MAGIC;
+ g_pPGMR0DynMap = pThis;
+ return VINF_SUCCESS;
+ }
+ RTSemFastMutexDestroy(pThis->hInitLock);
+ }
+ }
+ RTMemFree(pThis);
+ return rc;
+}
+
+
+/**
+ * Terminates the ring-0 dynamic mapping cache.
+ */
+VMMR0DECL(void) PGMR0DynMapTerm(void)
+{
+ /*
+ * Destroy the cache.
+ *
+ * There is not supposed to be any races here, the loader should
+ * make sure about that. So, don't bother locking anything.
+ *
+ * The VM objects should all be destroyed by now, so there is no
+ * dangling users or anything like that to clean up. This routine
+ * is just a mirror image of PGMR0DynMapInit.
+ */
+ PPGMRZDYNMAP pThis = g_pPGMR0DynMap;
+ if (pThis)
+ {
+ AssertPtr(pThis);
+ g_pPGMR0DynMap = NULL;
+
+ /* This should *never* happen, but in case it does try not to leak memory. */
+ AssertLogRelMsg(!pThis->cUsers && !pThis->paPages && !pThis->pvSavedPTEs && !pThis->cPages,
+ ("cUsers=%d paPages=%p pvSavedPTEs=%p cPages=%#x\n",
+ pThis->cUsers, pThis->paPages, pThis->pvSavedPTEs, pThis->cPages));
+ if (pThis->paPages)
+ pgmR0DynMapTearDown(pThis);
+
+ /* Free the associated resources. */
+ RTSemFastMutexDestroy(pThis->hInitLock);
+ pThis->hInitLock = NIL_RTSEMFASTMUTEX;
+ RTSpinlockDestroy(pThis->hSpinlock);
+ pThis->hSpinlock = NIL_RTSPINLOCK;
+ pThis->u32Magic = UINT32_MAX;
+ RTMemFree(pThis);
+ }
+}
+
+
+/**
+ * Initializes the dynamic mapping cache for a new VM.
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ */
+VMMR0DECL(int) PGMR0DynMapInitVM(PVM pVM)
+{
+ AssertMsgReturn(!pVM->pgm.s.pvR0DynMapUsed, ("%p (pThis=%p)\n", pVM->pgm.s.pvR0DynMapUsed, g_pPGMR0DynMap), VERR_WRONG_ORDER);
+
+ /*
+ * Initialize the auto sets.
+ */
+ int rc = pgmRZDynMapInitAutoSetsForVM(pVM);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ /*
+ * Do we need the cache? Skip the last bit if we don't.
+ */
+ if (VM_IS_RAW_MODE_ENABLED(pVM))
+ return VINF_SUCCESS;
+
+ /*
+ * Reference and if necessary setup or expand the cache.
+ */
+ PPGMRZDYNMAP pThis = g_pPGMR0DynMap;
+ AssertPtrReturn(pThis, VERR_PGM_DYNMAP_IPE);
+ rc = RTSemFastMutexRequest(pThis->hInitLock);
+ AssertLogRelRCReturn(rc, rc);
+
+ pThis->cUsers++;
+ if (pThis->cUsers == 1)
+ {
+ rc = pgmR0DynMapSetup(pThis);
+#if 0 /*def DEBUG*/
+ if (RT_SUCCESS(rc))
+ {
+ rc = pgmR0DynMapTest(pVM);
+ if (RT_FAILURE(rc))
+ pgmR0DynMapTearDown(pThis);
+ }
+#endif
+ }
+ else if (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(pThis->cPages - pThis->cGuardPages))
+ rc = pgmR0DynMapExpand(pThis);
+ if (RT_SUCCESS(rc))
+ pVM->pgm.s.pvR0DynMapUsed = pThis;
+ else
+ pThis->cUsers--;
+
+ RTSemFastMutexRelease(pThis->hInitLock);
+ return rc;
+}
+
+
+/**
+ * Terminates the dynamic mapping cache usage for a VM.
+ *
+ * @param pVM The cross context VM structure.
+ */
+VMMR0DECL(void) PGMR0DynMapTermVM(PVM pVM)
+{
+ /*
+ * Return immediately if we're not using the cache.
+ */
+ if (!pVM->pgm.s.pvR0DynMapUsed)
+ return;
+
+ PPGMRZDYNMAP pThis = g_pPGMR0DynMap;
+ AssertPtrReturnVoid(pThis);
+
+ int rc = RTSemFastMutexRequest(pThis->hInitLock);
+ AssertLogRelRCReturnVoid(rc);
+
+ if (pVM->pgm.s.pvR0DynMapUsed == pThis)
+ {
+ pVM->pgm.s.pvR0DynMapUsed = NULL;
+
+#ifdef VBOX_STRICT
+ PGMR0DynMapAssertIntegrity();
+#endif
+
+ /*
+ * Clean up and check the auto sets.
+ */
+ VMCPUID idCpu = pVM->cCpus;
+ while (idCpu-- > 0)
+ {
+ PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet;
+ uint32_t j = pSet->cEntries;
+ if (j <= RT_ELEMENTS(pSet->aEntries))
+ {
+ /*
+ * The set is open, close it.
+ */
+ while (j-- > 0)
+ {
+ int32_t cRefs = pSet->aEntries[j].cRefs;
+ uint32_t iPage = pSet->aEntries[j].iPage;
+ LogRel(("PGMR0DynMapTermVM: %d dangling refs to %#x\n", cRefs, iPage));
+ if (iPage < pThis->cPages && cRefs > 0)
+ pgmRZDynMapReleasePage(pThis, iPage, cRefs);
+ else
+ AssertLogRelMsgFailed(("cRefs=%d iPage=%#x cPages=%u\n", cRefs, iPage, pThis->cPages));
+
+ PGMRZDYNMAP_ZAP_ENTRY(&pSet->aEntries[j]);
+ }
+ pSet->cEntries = PGMMAPSET_CLOSED;
+ pSet->iSubset = UINT32_MAX;
+ pSet->iCpu = -1;
+ }
+ else
+ AssertMsg(j == PGMMAPSET_CLOSED, ("cEntries=%#x\n", j));
+
+ j = RT_ELEMENTS(pSet->aEntries);
+ while (j-- > 0)
+ {
+ Assert(pSet->aEntries[j].iPage == UINT16_MAX);
+ Assert(!pSet->aEntries[j].cRefs);
+ }
+ }
+
+ /*
+ * Release our reference to the mapping cache.
+ */
+ Assert(pThis->cUsers > 0);
+ pThis->cUsers--;
+ if (!pThis->cUsers)
+ pgmR0DynMapTearDown(pThis);
+ }
+ else
+ AssertLogRelMsgFailed(("pvR0DynMapUsed=%p pThis=%p\n", pVM->pgm.s.pvR0DynMapUsed, pThis));
+
+ RTSemFastMutexRelease(pThis->hInitLock);
+}
+
+
+/**
+ * Shoots down the TLBs for all the cache pages, pgmR0DynMapTearDown helper.
+ *
+ * @param idCpu The current CPU.
+ * @param pvUser1 The dynamic mapping cache instance.
+ * @param pvUser2 Unused, NULL.
+ */
+static DECLCALLBACK(void) pgmR0DynMapShootDownTlbs(RTCPUID idCpu, void *pvUser1, void *pvUser2)
+{
+ Assert(!pvUser2);
+ PPGMRZDYNMAP pThis = (PPGMRZDYNMAP)pvUser1;
+ Assert(pThis == g_pPGMR0DynMap);
+ PPGMRZDYNMAPENTRY paPages = pThis->paPages;
+ uint32_t iPage = pThis->cPages;
+ while (iPage-- > 0)
+ ASMInvalidatePage((uintptr_t)paPages[iPage].pvPage);
+}
+
+
+/**
+ * Shoot down the TLBs for every single cache entry on all CPUs.
+ *
+ * @returns IPRT status code (RTMpOnAll).
+ * @param pThis The dynamic mapping cache instance.
+ */
+static int pgmR0DynMapTlbShootDown(PPGMRZDYNMAP pThis)
+{
+ int rc = RTMpOnAll(pgmR0DynMapShootDownTlbs, pThis, NULL);
+ AssertRC(rc);
+ if (RT_FAILURE(rc))
+ {
+ uint32_t iPage = pThis->cPages;
+ while (iPage-- > 0)
+ ASMInvalidatePage((uintptr_t)pThis->paPages[iPage].pvPage);
+ }
+ return rc;
+}
+
+
+/**
+ * Calculate the new cache size based on cMaxLoad statistics.
+ *
+ * @returns Number of pages.
+ * @param pThis The dynamic mapping cache instance.
+ * @param pcMinPages The minimal size in pages.
+ */
+static uint32_t pgmR0DynMapCalcNewSize(PPGMRZDYNMAP pThis, uint32_t *pcMinPages)
+{
+ Assert(pThis->cPages <= PGMR0DYNMAP_MAX_PAGES);
+
+ /* cCpus * PGMR0DYNMAP_PAGES_PER_CPU(_MIN). */
+ RTCPUID cCpus = RTMpGetCount();
+ AssertReturn(cCpus > 0 && cCpus <= RTCPUSET_MAX_CPUS, 0);
+ uint32_t cPages = cCpus * PGMR0DYNMAP_PAGES_PER_CPU;
+ uint32_t cMinPages = cCpus * PGMR0DYNMAP_PAGES_PER_CPU_MIN;
+
+ /* adjust against cMaxLoad. */
+ AssertMsg(pThis->cMaxLoad <= PGMR0DYNMAP_MAX_PAGES, ("%#x\n", pThis->cMaxLoad));
+ if (pThis->cMaxLoad > PGMR0DYNMAP_MAX_PAGES)
+ pThis->cMaxLoad = 0;
+
+ while (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(cPages))
+ cPages += PGMR0DYNMAP_PAGES_PER_CPU;
+
+ if (pThis->cMaxLoad > cMinPages)
+ cMinPages = pThis->cMaxLoad;
+
+ /* adjust against max and current size. */
+ if (cPages < pThis->cPages)
+ cPages = pThis->cPages;
+ cPages *= PGMR0DYNMAP_GUARD_PAGES + 1;
+ if (cPages > PGMR0DYNMAP_MAX_PAGES)
+ cPages = PGMR0DYNMAP_MAX_PAGES;
+
+ if (cMinPages < pThis->cPages)
+ cMinPages = pThis->cPages;
+ cMinPages *= PGMR0DYNMAP_GUARD_PAGES + 1;
+ if (cMinPages > PGMR0DYNMAP_MAX_PAGES)
+ cMinPages = PGMR0DYNMAP_MAX_PAGES;
+
+ Assert(cMinPages);
+ *pcMinPages = cMinPages;
+ return cPages;
+}
+
+
+/**
+ * Initializes the paging level data.
+ *
+ * @param pThis The dynamic mapping cache instance.
+ * @param pPgLvl The paging level data.
+ */
+void pgmR0DynMapPagingArrayInit(PPGMRZDYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl)
+{
+ RTCCUINTREG cr4 = ASMGetCR4();
+ switch (pThis->enmPgMode)
+ {
+ case SUPPAGINGMODE_32_BIT:
+ case SUPPAGINGMODE_32_BIT_GLOBAL:
+ pPgLvl->cLevels = 2;
+ pPgLvl->a[0].fPhysMask = X86_CR3_PAGE_MASK;
+ pPgLvl->a[0].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
+ pPgLvl->a[0].fResMask = X86_PDE_P | X86_PDE_RW;
+ pPgLvl->a[0].fPtrMask = X86_PD_MASK;
+ pPgLvl->a[0].fPtrShift = X86_PD_SHIFT;
+
+ pPgLvl->a[1].fPhysMask = X86_PDE_PG_MASK;
+ pPgLvl->a[1].fAndMask = X86_PTE_P | X86_PTE_RW;
+ pPgLvl->a[1].fResMask = X86_PTE_P | X86_PTE_RW;
+ pPgLvl->a[1].fPtrMask = X86_PT_MASK;
+ pPgLvl->a[1].fPtrShift = X86_PT_SHIFT;
+ break;
+
+ case SUPPAGINGMODE_PAE:
+ case SUPPAGINGMODE_PAE_GLOBAL:
+ case SUPPAGINGMODE_PAE_NX:
+ case SUPPAGINGMODE_PAE_GLOBAL_NX:
+ pPgLvl->cLevels = 3;
+ pPgLvl->a[0].fPhysMask = X86_CR3_PAE_PAGE_MASK;
+ pPgLvl->a[0].fPtrMask = X86_PDPT_MASK_PAE;
+ pPgLvl->a[0].fPtrShift = X86_PDPT_SHIFT;
+ pPgLvl->a[0].fAndMask = X86_PDPE_P;
+ pPgLvl->a[0].fResMask = X86_PDPE_P;
+
+ pPgLvl->a[1].fPhysMask = X86_PDPE_PG_MASK;
+ pPgLvl->a[1].fPtrMask = X86_PD_PAE_MASK;
+ pPgLvl->a[1].fPtrShift = X86_PD_PAE_SHIFT;
+ pPgLvl->a[1].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
+ pPgLvl->a[1].fResMask = X86_PDE_P | X86_PDE_RW;
+
+ pPgLvl->a[2].fPhysMask = X86_PDE_PAE_PG_MASK;
+ pPgLvl->a[2].fPtrMask = X86_PT_PAE_MASK;
+ pPgLvl->a[2].fPtrShift = X86_PT_PAE_SHIFT;
+ pPgLvl->a[2].fAndMask = X86_PTE_P | X86_PTE_RW;
+ pPgLvl->a[2].fResMask = X86_PTE_P | X86_PTE_RW;
+ break;
+
+ case SUPPAGINGMODE_AMD64:
+ case SUPPAGINGMODE_AMD64_GLOBAL:
+ case SUPPAGINGMODE_AMD64_NX:
+ case SUPPAGINGMODE_AMD64_GLOBAL_NX:
+ pPgLvl->cLevels = 4;
+ pPgLvl->a[0].fPhysMask = X86_CR3_AMD64_PAGE_MASK;
+ pPgLvl->a[0].fPtrShift = X86_PML4_SHIFT;
+ pPgLvl->a[0].fPtrMask = X86_PML4_MASK;
+ pPgLvl->a[0].fAndMask = X86_PML4E_P | X86_PML4E_RW;
+ pPgLvl->a[0].fResMask = X86_PML4E_P | X86_PML4E_RW;
+
+ pPgLvl->a[1].fPhysMask = X86_PML4E_PG_MASK;
+ pPgLvl->a[1].fPtrShift = X86_PDPT_SHIFT;
+ pPgLvl->a[1].fPtrMask = X86_PDPT_MASK_AMD64;
+ pPgLvl->a[1].fAndMask = X86_PDPE_P | X86_PDPE_RW /** @todo check for X86_PDPT_PS support. */;
+ pPgLvl->a[1].fResMask = X86_PDPE_P | X86_PDPE_RW;
+
+ pPgLvl->a[2].fPhysMask = X86_PDPE_PG_MASK;
+ pPgLvl->a[2].fPtrShift = X86_PD_PAE_SHIFT;
+ pPgLvl->a[2].fPtrMask = X86_PD_PAE_MASK;
+ pPgLvl->a[2].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
+ pPgLvl->a[2].fResMask = X86_PDE_P | X86_PDE_RW;
+
+ pPgLvl->a[3].fPhysMask = X86_PDE_PAE_PG_MASK;
+ pPgLvl->a[3].fPtrShift = X86_PT_PAE_SHIFT;
+ pPgLvl->a[3].fPtrMask = X86_PT_PAE_MASK;
+ pPgLvl->a[3].fAndMask = X86_PTE_P | X86_PTE_RW;
+ pPgLvl->a[3].fResMask = X86_PTE_P | X86_PTE_RW;
+ break;
+
+ default:
+ AssertFailed();
+ pPgLvl->cLevels = 0;
+ break;
+ }
+
+ for (uint32_t i = 0; i < 4; i++) /* ASSUMING array size. */
+ {
+ pPgLvl->a[i].HCPhys = NIL_RTHCPHYS;
+ pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
+ pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ;
+ pPgLvl->a[i].u.pv = NULL;
+ }
+}
+
+
+/**
+ * Maps a PTE.
+ *
+ * This will update the segment structure when new PTs are mapped.
+ *
+ * It also assumes that we (for paranoid reasons) wish to establish a mapping
+ * chain from CR3 to the PT that all corresponds to the processor we're
+ * currently running on, and go about this by running with interrupts disabled
+ * and restarting from CR3 for every change.
+ *
+ * @returns VBox status code, VINF_TRY_AGAIN if we changed any mappings and had
+ * to re-enable interrupts.
+ * @param pThis The dynamic mapping cache instance.
+ * @param pPgLvl The paging level structure.
+ * @param pvPage The page.
+ * @param pSeg The segment.
+ * @param cMaxPTs The max number of PTs expected in the segment.
+ * @param ppvPTE Where to store the PTE address.
+ */
+static int pgmR0DynMapPagingArrayMapPte(PPGMRZDYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl, void *pvPage,
+ PPGMR0DYNMAPSEG pSeg, uint32_t cMaxPTs, void **ppvPTE)
+{
+ Assert(!(ASMGetFlags() & X86_EFL_IF));
+ void *pvEntry = NULL;
+ X86PGPAEUINT uEntry = ASMGetCR3();
+ for (uint32_t i = 0; i < pPgLvl->cLevels; i++)
+ {
+ RTHCPHYS HCPhys = uEntry & pPgLvl->a[i].fPhysMask;
+ if (pPgLvl->a[i].HCPhys != HCPhys)
+ {
+ /*
+ * Need to remap this level.
+ * The final level, the PT, will not be freed since that is what it's all about.
+ */
+ ASMIntEnable();
+ if (i + 1 == pPgLvl->cLevels)
+ AssertReturn(pSeg->cPTs < cMaxPTs, VERR_PGM_DYNMAP_IPE);
+ else
+ {
+ int rc2 = RTR0MemObjFree(pPgLvl->a[i].hMemObj, true /* fFreeMappings */); AssertRC(rc2);
+ pPgLvl->a[i].hMemObj = pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
+ }
+
+ int rc = RTR0MemObjEnterPhys(&pPgLvl->a[i].hMemObj, HCPhys, PAGE_SIZE, RTMEM_CACHE_POLICY_DONT_CARE);
+ if (RT_SUCCESS(rc))
+ {
+ rc = RTR0MemObjMapKernel(&pPgLvl->a[i].hMapObj, pPgLvl->a[i].hMemObj,
+ (void *)-1 /* pvFixed */, 0 /* cbAlignment */,
+ RTMEM_PROT_WRITE | RTMEM_PROT_READ);
+ if (RT_SUCCESS(rc))
+ {
+ pPgLvl->a[i].u.pv = RTR0MemObjAddress(pPgLvl->a[i].hMapObj);
+ AssertMsg(((uintptr_t)pPgLvl->a[i].u.pv & ~(uintptr_t)PAGE_OFFSET_MASK), ("%p\n", pPgLvl->a[i].u.pv));
+ pPgLvl->a[i].HCPhys = HCPhys;
+ if (i + 1 == pPgLvl->cLevels)
+ pSeg->ahMemObjPTs[pSeg->cPTs++] = pPgLvl->a[i].hMemObj;
+ ASMIntDisable();
+ return VINF_TRY_AGAIN;
+ }
+
+ pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
+ }
+ else
+ pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ;
+ pPgLvl->a[i].HCPhys = NIL_RTHCPHYS;
+ return rc;
+ }
+
+ /*
+ * The next level.
+ */
+ uint32_t iEntry = ((uint64_t)(uintptr_t)pvPage >> pPgLvl->a[i].fPtrShift) & pPgLvl->a[i].fPtrMask;
+ if (pThis->fLegacyMode)
+ {
+ pvEntry = &pPgLvl->a[i].u.paLegacy[iEntry];
+ uEntry = pPgLvl->a[i].u.paLegacy[iEntry];
+ }
+ else
+ {
+ pvEntry = &pPgLvl->a[i].u.paPae[iEntry];
+ uEntry = pPgLvl->a[i].u.paPae[iEntry];
+ }
+
+ if ((uEntry & pPgLvl->a[i].fAndMask) != pPgLvl->a[i].fResMask)
+ {
+ LogRel(("PGMR0DynMap: internal error - iPgLvl=%u cLevels=%u uEntry=%#llx fAnd=%#llx fRes=%#llx got=%#llx\n"
+ "PGMR0DynMap: pv=%p pvPage=%p iEntry=%#x fLegacyMode=%RTbool\n",
+ i, pPgLvl->cLevels, uEntry, pPgLvl->a[i].fAndMask, pPgLvl->a[i].fResMask, uEntry & pPgLvl->a[i].fAndMask,
+ pPgLvl->a[i].u.pv, pvPage, iEntry, pThis->fLegacyMode));
+ return VERR_PGM_DYNMAP_IPE;
+ }
+ /*Log(("#%d: iEntry=%4d uEntry=%#llx pvEntry=%p HCPhys=%RHp \n", i, iEntry, uEntry, pvEntry, pPgLvl->a[i].HCPhys));*/
+ }
+
+ /* made it thru without needing to remap anything. */
+ *ppvPTE = pvEntry;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets up a guard page.
+ *
+ * @param pThis The dynamic mapping cache instance.
+ * @param pPage The page.
+ */
+DECLINLINE(void) pgmR0DynMapSetupGuardPage(PPGMRZDYNMAP pThis, PPGMRZDYNMAPENTRY pPage)
+{
+ memset(pPage->pvPage, 0xfd, PAGE_SIZE);
+ pPage->cRefs = PGMR0DYNMAP_GUARD_PAGE_REF_COUNT;
+ pPage->HCPhys = PGMR0DYNMAP_GUARD_PAGE_HCPHYS;
+#ifdef PGMR0DYNMAP_GUARD_NP
+ ASMAtomicBitClear(pPage->uPte.pv, X86_PTE_BIT_P);
+#else
+ if (pThis->fLegacyMode)
+ ASMAtomicWriteU32(&pPage->uPte.pLegacy->u, PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE);
+ else
+ ASMAtomicWriteU64(&pPage->uPte.pPae->u, PGMR0DYNMAP_GUARD_PAGE_PAE_PTE);
+#endif
+ pThis->cGuardPages++;
+}
+
+
+/**
+ * Adds a new segment of the specified size.
+ *
+ * @returns VBox status code.
+ * @param pThis The dynamic mapping cache instance.
+ * @param cPages The size of the new segment, give as a page count.
+ */
+static int pgmR0DynMapAddSeg(PPGMRZDYNMAP pThis, uint32_t cPages)
+{
+ int rc2;
+ AssertReturn(ASMGetFlags() & X86_EFL_IF, VERR_PREEMPT_DISABLED);
+
+ /*
+ * Do the array reallocations first.
+ * (The pages array has to be replaced behind the spinlock of course.)
+ */
+ void *pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * (pThis->cPages + cPages));
+ if (!pvSavedPTEs)
+ return VERR_NO_MEMORY;
+ pThis->pvSavedPTEs = pvSavedPTEs;
+
+ void *pvPages = RTMemAllocZ(sizeof(pThis->paPages[0]) * (pThis->cPages + cPages));
+ if (!pvPages)
+ {
+ pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * pThis->cPages);
+ if (pvSavedPTEs)
+ pThis->pvSavedPTEs = pvSavedPTEs;
+ return VERR_NO_MEMORY;
+ }
+
+ PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis);
+
+ memcpy(pvPages, pThis->paPages, sizeof(pThis->paPages[0]) * pThis->cPages);
+ void *pvToFree = pThis->paPages;
+ pThis->paPages = (PPGMRZDYNMAPENTRY)pvPages;
+
+ PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
+ RTMemFree(pvToFree);
+
+ /*
+ * Allocate the segment structure and pages of memory, then touch all the pages (paranoia).
+ */
+ uint32_t cMaxPTs = cPages / (pThis->fLegacyMode ? X86_PG_ENTRIES : X86_PG_PAE_ENTRIES) + 2;
+ PPGMR0DYNMAPSEG pSeg = (PPGMR0DYNMAPSEG)RTMemAllocZ(RT_UOFFSETOF_DYN(PGMR0DYNMAPSEG, ahMemObjPTs[cMaxPTs]));
+ if (!pSeg)
+ return VERR_NO_MEMORY;
+ pSeg->pNext = NULL;
+ pSeg->cPages = cPages;
+ pSeg->iPage = pThis->cPages;
+ pSeg->cPTs = 0;
+ int rc = RTR0MemObjAllocPage(&pSeg->hMemObj, cPages << PAGE_SHIFT, false);
+ if (RT_SUCCESS(rc))
+ {
+ uint8_t *pbPage = (uint8_t *)RTR0MemObjAddress(pSeg->hMemObj);
+ AssertMsg(VALID_PTR(pbPage) && !((uintptr_t)pbPage & PAGE_OFFSET_MASK), ("%p\n", pbPage));
+ memset(pbPage, 0xfe, cPages << PAGE_SHIFT);
+
+ /*
+ * Walk thru the pages and set them up with a mapping of their PTE and everything.
+ */
+ ASMIntDisable();
+ PGMR0DYNMAPPGLVL PgLvl;
+ pgmR0DynMapPagingArrayInit(pThis, &PgLvl);
+ uint32_t const iEndPage = pSeg->iPage + cPages;
+ for (uint32_t iPage = pSeg->iPage;
+ iPage < iEndPage;
+ iPage++, pbPage += PAGE_SIZE)
+ {
+ /* Initialize the page data. */
+ pThis->paPages[iPage].HCPhys = NIL_RTHCPHYS;
+ pThis->paPages[iPage].pvPage = pbPage;
+ pThis->paPages[iPage].cRefs = 0;
+ pThis->paPages[iPage].uPte.pPae = 0;
+#ifndef IN_RC
+ RTCpuSetFill(&pThis->paPages[iPage].PendingSet);
+#endif
+
+ /* Map its page table, retry until we've got a clean run (paranoia). */
+ do
+ rc = pgmR0DynMapPagingArrayMapPte(pThis, &PgLvl, pbPage, pSeg, cMaxPTs,
+ &pThis->paPages[iPage].uPte.pv);
+ while (rc == VINF_TRY_AGAIN);
+ if (RT_FAILURE(rc))
+ break;
+
+ /* Save the PTE. */
+ if (pThis->fLegacyMode)
+ ((PX86PGUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pLegacy->u;
+ else
+ ((PX86PGPAEUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pPae->u;
+
+#ifdef VBOX_STRICT
+ /* Check that we've got the right entry. */
+ RTHCPHYS HCPhysPage = RTR0MemObjGetPagePhysAddr(pSeg->hMemObj, iPage - pSeg->iPage);
+ RTHCPHYS HCPhysPte = pThis->fLegacyMode
+ ? pThis->paPages[iPage].uPte.pLegacy->u & X86_PTE_PG_MASK
+ : pThis->paPages[iPage].uPte.pPae->u & X86_PTE_PAE_PG_MASK;
+ if (HCPhysPage != HCPhysPte)
+ {
+ LogRel(("pgmR0DynMapAddSeg: internal error - page #%u HCPhysPage=%RHp HCPhysPte=%RHp pbPage=%p pvPte=%p\n",
+ iPage - pSeg->iPage, HCPhysPage, HCPhysPte, pbPage, pThis->paPages[iPage].uPte.pv));
+ rc = VERR_PGM_DYNMAP_IPE;
+ break;
+ }
+#endif
+ } /* for each page */
+ ASMIntEnable();
+
+ /* cleanup non-PT mappings */
+ for (uint32_t i = 0; i < PgLvl.cLevels - 1; i++)
+ RTR0MemObjFree(PgLvl.a[i].hMemObj, true /* fFreeMappings */);
+
+ if (RT_SUCCESS(rc))
+ {
+#if PGMR0DYNMAP_GUARD_PAGES > 0
+ /*
+ * Setup guard pages.
+ * (Note: TLBs will be shot down later on.)
+ */
+ uint32_t iPage = pSeg->iPage;
+ while (iPage < iEndPage)
+ {
+ for (uint32_t iGPg = 0; iGPg < PGMR0DYNMAP_GUARD_PAGES && iPage < iEndPage; iGPg++, iPage++)
+ pgmR0DynMapSetupGuardPage(pThis, &pThis->paPages[iPage]);
+ iPage++; /* the guarded page */
+ }
+
+ /* Make sure the very last page is a guard page too. */
+ iPage = iEndPage - 1;
+ if (pThis->paPages[iPage].cRefs != PGMR0DYNMAP_GUARD_PAGE_REF_COUNT)
+ pgmR0DynMapSetupGuardPage(pThis, &pThis->paPages[iPage]);
+#endif /* PGMR0DYNMAP_GUARD_PAGES > 0 */
+
+ /*
+ * Commit it by adding the segment to the list and updating the page count.
+ */
+ pSeg->pNext = pThis->pSegHead;
+ pThis->pSegHead = pSeg;
+ pThis->cPages += cPages;
+ return VINF_SUCCESS;
+ }
+
+ /*
+ * Bail out.
+ */
+ while (pSeg->cPTs-- > 0)
+ {
+ rc2 = RTR0MemObjFree(pSeg->ahMemObjPTs[pSeg->cPTs], true /* fFreeMappings */);
+ AssertRC(rc2);
+ pSeg->ahMemObjPTs[pSeg->cPTs] = NIL_RTR0MEMOBJ;
+ }
+
+ rc2 = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */);
+ AssertRC(rc2);
+ pSeg->hMemObj = NIL_RTR0MEMOBJ;
+ }
+ else if (rc == VERR_NO_PAGE_MEMORY || rc == VERR_NO_PHYS_MEMORY)
+ rc = VERR_NO_MEMORY;
+ RTMemFree(pSeg);
+
+ /* Don't bother resizing the arrays, but free them if we're the only user. */
+ if (!pThis->cPages)
+ {
+ RTMemFree(pThis->paPages);
+ pThis->paPages = NULL;
+ RTMemFree(pThis->pvSavedPTEs);
+ pThis->pvSavedPTEs = NULL;
+ }
+ return rc;
+}
+
+
+/**
+ * Called by PGMR0DynMapInitVM under the init lock.
+ *
+ * @returns VBox status code.
+ * @param pThis The dynamic mapping cache instance.
+ */
+static int pgmR0DynMapSetup(PPGMRZDYNMAP pThis)
+{
+ /*
+ * Calc the size and add a segment of that size.
+ */
+ uint32_t cMinPages;
+ uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages);
+ AssertReturn(cPages, VERR_PGM_DYNMAP_IPE);
+ int rc = pgmR0DynMapAddSeg(pThis, cPages);
+ if (rc == VERR_NO_MEMORY)
+ {
+ /*
+ * Try adding smaller segments.
+ */
+ do
+ rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES);
+ while (RT_SUCCESS(rc) && pThis->cPages < cPages);
+ if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages)
+ rc = VINF_SUCCESS;
+ if (rc == VERR_NO_MEMORY)
+ {
+ if (pThis->cPages)
+ pgmR0DynMapTearDown(pThis);
+ rc = VERR_PGM_DYNMAP_SETUP_ERROR;
+ }
+ }
+ Assert(ASMGetFlags() & X86_EFL_IF);
+
+#if PGMR0DYNMAP_GUARD_PAGES > 0
+ /* paranoia */
+ if (RT_SUCCESS(rc))
+ pgmR0DynMapTlbShootDown(pThis);
+#endif
+ return rc;
+}
+
+
+/**
+ * Called by PGMR0DynMapInitVM under the init lock.
+ *
+ * @returns VBox status code.
+ * @param pThis The dynamic mapping cache instance.
+ */
+static int pgmR0DynMapExpand(PPGMRZDYNMAP pThis)
+{
+ /*
+ * Calc the new target size and add a segment of the appropriate size.
+ */
+ uint32_t cMinPages;
+ uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages);
+ AssertReturn(cPages, VERR_PGM_DYNMAP_IPE);
+ if (pThis->cPages >= cPages)
+ return VINF_SUCCESS;
+
+ uint32_t cAdd = cPages - pThis->cPages;
+ int rc = pgmR0DynMapAddSeg(pThis, cAdd);
+ if (rc == VERR_NO_MEMORY)
+ {
+ /*
+ * Try adding smaller segments.
+ */
+ do
+ rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES);
+ while (RT_SUCCESS(rc) && pThis->cPages < cPages);
+ if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages)
+ rc = VINF_SUCCESS;
+ if (rc == VERR_NO_MEMORY)
+ rc = VERR_PGM_DYNMAP_EXPAND_ERROR;
+ }
+ Assert(ASMGetFlags() & X86_EFL_IF);
+
+#if PGMR0DYNMAP_GUARD_PAGES > 0
+ /* paranoia */
+ if (RT_SUCCESS(rc))
+ pgmR0DynMapTlbShootDown(pThis);
+#endif
+ return rc;
+}
+
+
+/**
+ * Called by PGMR0DynMapTermVM under the init lock.
+ *
+ * @returns VBox status code.
+ * @param pThis The dynamic mapping cache instance.
+ */
+static void pgmR0DynMapTearDown(PPGMRZDYNMAP pThis)
+{
+ /*
+ * Restore the original page table entries
+ */
+ PPGMRZDYNMAPENTRY paPages = pThis->paPages;
+ uint32_t iPage = pThis->cPages;
+ if (pThis->fLegacyMode)
+ {
+ X86PGUINT const *paSavedPTEs = (X86PGUINT const *)pThis->pvSavedPTEs;
+ while (iPage-- > 0)
+ {
+ X86PGUINT uOld = paPages[iPage].uPte.pLegacy->u;
+ X86PGUINT uOld2 = uOld; NOREF(uOld2);
+ X86PGUINT uNew = paSavedPTEs[iPage];
+ while (!ASMAtomicCmpXchgExU32(&paPages[iPage].uPte.pLegacy->u, uNew, uOld, &uOld))
+ AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew));
+ Assert(paPages[iPage].uPte.pLegacy->u == paSavedPTEs[iPage]);
+ }
+ }
+ else
+ {
+ X86PGPAEUINT const *paSavedPTEs = (X86PGPAEUINT const *)pThis->pvSavedPTEs;
+ while (iPage-- > 0)
+ {
+ X86PGPAEUINT uOld = paPages[iPage].uPte.pPae->u;
+ X86PGPAEUINT uOld2 = uOld; NOREF(uOld2);
+ X86PGPAEUINT uNew = paSavedPTEs[iPage];
+ while (!ASMAtomicCmpXchgExU64(&paPages[iPage].uPte.pPae->u, uNew, uOld, &uOld))
+ AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew));
+ Assert(paPages[iPage].uPte.pPae->u == paSavedPTEs[iPage]);
+ }
+ }
+
+ /*
+ * Shoot down the TLBs on all CPUs before freeing them.
+ */
+ pgmR0DynMapTlbShootDown(pThis);
+
+ /*
+ * Free the segments.
+ */
+ while (pThis->pSegHead)
+ {
+ int rc;
+ PPGMR0DYNMAPSEG pSeg = pThis->pSegHead;
+ pThis->pSegHead = pSeg->pNext;
+
+ uint32_t iPT = pSeg->cPTs;
+ while (iPT-- > 0)
+ {
+ rc = RTR0MemObjFree(pSeg->ahMemObjPTs[iPT], true /* fFreeMappings */); AssertRC(rc);
+ pSeg->ahMemObjPTs[iPT] = NIL_RTR0MEMOBJ;
+ }
+ rc = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */); AssertRC(rc);
+ pSeg->hMemObj = NIL_RTR0MEMOBJ;
+ pSeg->pNext = NULL;
+ pSeg->iPage = UINT16_MAX;
+ pSeg->cPages = 0;
+ pSeg->cPTs = 0;
+ RTMemFree(pSeg);
+ }
+
+ /*
+ * Free the arrays and restore the initial state.
+ * The cLoadMax value is left behind for the next setup.
+ */
+ RTMemFree(pThis->paPages);
+ pThis->paPages = NULL;
+ RTMemFree(pThis->pvSavedPTEs);
+ pThis->pvSavedPTEs = NULL;
+ pThis->cPages = 0;
+ pThis->cLoad = 0;
+ pThis->cGuardPages = 0;
+}
+
+#endif /* IN_RING0 */
+#ifdef IN_RC
+
+/**
+ * Initializes the dynamic mapping cache in raw-mode context.
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ */
+VMMRCDECL(int) PGMRCDynMapInit(PVM pVM)
+{
+ /*
+ * Allocate and initialize the instance data and page array.
+ */
+ PPGMRZDYNMAP pThis;
+ size_t const cPages = MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE;
+ size_t const cb = RT_ALIGN_Z(sizeof(*pThis), 32)
+ + sizeof(PGMRZDYNMAPENTRY) * cPages;
+ int rc = MMHyperAlloc(pVM, cb, 32, MM_TAG_PGM, (void **)&pThis);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ pThis->u32Magic = PGMRZDYNMAP_MAGIC;
+ pThis->paPages = RT_ALIGN_PT(pThis + 1, 32, PPGMRZDYNMAPENTRY);
+ pThis->cPages = cPages;
+ pThis->cLoad = 0;
+ pThis->cMaxLoad = 0;
+ pThis->cGuardPages = 0;
+ pThis->cUsers = 1;
+
+ for (size_t iPage = 0; iPage < cPages; iPage++)
+ {
+ pThis->paPages[iPage].HCPhys = NIL_RTHCPHYS;
+ pThis->paPages[iPage].pvPage = pVM->pgm.s.pbDynPageMapBaseGC + iPage * PAGE_SIZE;
+ pThis->paPages[iPage].cRefs = 0;
+ pThis->paPages[iPage].uPte.pLegacy = &pVM->pgm.s.paDynPageMap32BitPTEsGC[iPage];
+ pThis->paPages[iPage].uPte.pPae = (PX86PTEPAE)&pVM->pgm.s.paDynPageMapPaePTEsGC[iPage];
+ }
+
+ pVM->pgm.s.pRCDynMap = pThis;
+
+ /*
+ * Initialize the autosets the VM.
+ */
+ rc = pgmRZDynMapInitAutoSetsForVM(pVM);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ return VINF_SUCCESS;
+}
+
+#endif /* IN_RC */
+
+/**
+ * Release references to a page, caller owns the spin lock.
+ *
+ * @param pThis The dynamic mapping cache instance.
+ * @param iPage The page.
+ * @param cRefs The number of references to release.
+ */
+DECLINLINE(void) pgmRZDynMapReleasePageLocked(PPGMRZDYNMAP pThis, uint32_t iPage, int32_t cRefs)
+{
+ cRefs = ASMAtomicSubS32(&pThis->paPages[iPage].cRefs, cRefs) - cRefs;
+ AssertMsg(cRefs >= 0, ("%d\n", cRefs));
+ if (!cRefs)
+ {
+ pThis->cLoad--;
+#ifdef PGMRZDYNMAP_STRICT_RELEASE
+ pThis->paPages[iPage].HCPhys = NIL_RTHCPHYS;
+ ASMAtomicBitClear(pThis->paPages[iPage].uPte.pv, X86_PTE_BIT_P);
+ ASMInvalidatePage((uintptr_t)pThis->paPages[iPage].pvPage);
+#endif
+ }
+}
+
+
+/**
+ * Release references to a page, caller does not own the spin lock.
+ *
+ * @param pThis The dynamic mapping cache instance.
+ * @param iPage The page.
+ * @param cRefs The number of references to release.
+ */
+static void pgmRZDynMapReleasePage(PPGMRZDYNMAP pThis, uint32_t iPage, uint32_t cRefs)
+{
+ PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis);
+ pgmRZDynMapReleasePageLocked(pThis, iPage, cRefs);
+ PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
+}
+
+
+/**
+ * pgmR0DynMapPage worker that deals with the tedious bits.
+ *
+ * @returns The page index on success, UINT32_MAX on failure.
+ * @param pThis The dynamic mapping cache instance.
+ * @param HCPhys The address of the page to be mapped.
+ * @param iPage The page index pgmR0DynMapPage hashed HCPhys to.
+ * @param pVCpu The cross context virtual CPU structure of the calling EMT.
+ * For statistics.
+ * @param pfNew Set to @c true if a new entry was made and @c false if
+ * an old entry was found and reused.
+ */
+static uint32_t pgmR0DynMapPageSlow(PPGMRZDYNMAP pThis, RTHCPHYS HCPhys, uint32_t iPage, PVMCPU pVCpu, bool *pfNew)
+{
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapPageSlow); RT_NOREF_PV(pVCpu);
+
+ /*
+ * Check if any of the first 3 pages are unreferenced since the caller
+ * already has made sure they aren't matching.
+ */
+#ifdef VBOX_WITH_STATISTICS
+ bool fLooped = false;
+#endif
+ uint32_t const cPages = pThis->cPages;
+ PPGMRZDYNMAPENTRY paPages = pThis->paPages;
+ uint32_t iFreePage;
+ if (!paPages[iPage].cRefs)
+ iFreePage = iPage;
+ else if (!paPages[(iPage + 1) % cPages].cRefs)
+ iFreePage = (iPage + 1) % cPages;
+ else if (!paPages[(iPage + 2) % cPages].cRefs)
+ iFreePage = (iPage + 2) % cPages;
+ else
+ {
+ /*
+ * Search for an unused or matching entry.
+ */
+ iFreePage = (iPage + 3) % cPages;
+ for (;;)
+ {
+ if (paPages[iFreePage].HCPhys == HCPhys)
+ {
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapPageSlowLoopHits);
+ *pfNew = false;
+ return iFreePage;
+ }
+ if (!paPages[iFreePage].cRefs)
+ break;
+
+ /* advance */
+ iFreePage = (iFreePage + 1) % cPages;
+ if (RT_UNLIKELY(iFreePage == iPage))
+ return UINT32_MAX;
+ }
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapPageSlowLoopMisses);
+#ifdef VBOX_WITH_STATISTICS
+ fLooped = true;
+#endif
+ }
+ Assert(iFreePage < cPages);
+
+#if 0 //def VBOX_WITH_STATISTICS
+ /* Check for lost hits. */
+ if (!fLooped)
+ for (uint32_t iPage2 = (iPage + 3) % cPages; iPage2 != iPage; iPage2 = (iPage2 + 1) % cPages)
+ if (paPages[iPage2].HCPhys == HCPhys)
+ STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZDynMapPageSlowLostHits);
+#endif
+
+ /*
+ * Setup the new entry.
+ */
+ *pfNew = true;
+ /*Log6(("pgmR0DynMapPageSlow: old - %RHp %#x %#llx\n", paPages[iFreePage].HCPhys, paPages[iFreePage].cRefs, paPages[iFreePage].uPte.pPae->u));*/
+ paPages[iFreePage].HCPhys = HCPhys;
+#ifndef IN_RC
+ RTCpuSetFill(&paPages[iFreePage].PendingSet);
+
+ if (pThis->fLegacyMode)
+#endif
+ {
+ X86PGUINT uOld = paPages[iFreePage].uPte.pLegacy->u;
+ X86PGUINT uOld2 = uOld; NOREF(uOld2);
+ X86PGUINT uNew = (uOld & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
+ | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
+ | (HCPhys & X86_PTE_PG_MASK);
+ while (!ASMAtomicCmpXchgExU32(&paPages[iFreePage].uPte.pLegacy->u, uNew, uOld, &uOld))
+ AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew));
+ Assert(paPages[iFreePage].uPte.pLegacy->u == uNew);
+ }
+#ifndef IN_RC
+ else
+#endif
+ {
+ X86PGPAEUINT uOld = paPages[iFreePage].uPte.pPae->u;
+ X86PGPAEUINT uOld2 = uOld; NOREF(uOld2);
+ X86PGPAEUINT uNew = (uOld & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
+ | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
+ | (HCPhys & X86_PTE_PAE_PG_MASK);
+ while (!ASMAtomicCmpXchgExU64(&paPages[iFreePage].uPte.pPae->u, uNew, uOld, &uOld))
+ AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew));
+ Assert(paPages[iFreePage].uPte.pPae->u == uNew);
+ /*Log6(("pgmR0DynMapPageSlow: #%x - %RHp %p %#llx\n", iFreePage, HCPhys, paPages[iFreePage].pvPage, uNew));*/
+ }
+ return iFreePage;
+}
+
+
+/**
+ * Maps a page into the pool.
+ *
+ * @returns Page index on success, UINT32_MAX on failure.
+ * @param pThis The dynamic mapping cache instance.
+ * @param HCPhys The address of the page to be mapped.
+ * @param iRealCpu The real cpu set index. (optimization)
+ * @param pVCpu The cross context virtual CPU structure of the calling
+ * EMT. For statistics.
+ * @param ppvPage Where to the page address.
+ */
+DECLINLINE(uint32_t) pgmR0DynMapPage(PPGMRZDYNMAP pThis, RTHCPHYS HCPhys, int32_t iRealCpu, PVMCPU pVCpu, void **ppvPage)
+{
+ PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis);
+ AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys));
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapPage);
+
+ /*
+ * Find an entry, if possible a matching one. The HCPhys address is hashed
+ * down to a page index, collisions are handled by linear searching.
+ * Optimized for a hit in the first 3 pages.
+ *
+ * Field easy hits here and defer the tedious searching and inserting
+ * to pgmR0DynMapPageSlow().
+ */
+ bool fNew = false;
+ uint32_t const cPages = pThis->cPages;
+ uint32_t iPage = (HCPhys >> PAGE_SHIFT) % cPages;
+ PPGMRZDYNMAPENTRY paPages = pThis->paPages;
+ if (RT_LIKELY(paPages[iPage].HCPhys == HCPhys))
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapPageHits0);
+ else
+ {
+ uint32_t iPage2 = (iPage + 1) % cPages;
+ if (RT_LIKELY(paPages[iPage2].HCPhys == HCPhys))
+ {
+ iPage = iPage2;
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapPageHits1);
+ }
+ else
+ {
+ iPage2 = (iPage + 2) % cPages;
+ if (paPages[iPage2].HCPhys == HCPhys)
+ {
+ iPage = iPage2;
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapPageHits2);
+ }
+ else
+ {
+ iPage = pgmR0DynMapPageSlow(pThis, HCPhys, iPage, pVCpu, &fNew);
+ if (RT_UNLIKELY(iPage == UINT32_MAX))
+ {
+ PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
+ *ppvPage = NULL;
+ return iPage;
+ }
+ }
+ }
+ }
+
+ /*
+ * Reference it, update statistics and get the return address.
+ */
+ int32_t cRefs = ASMAtomicIncS32(&paPages[iPage].cRefs);
+ if (cRefs == 1)
+ {
+ pThis->cLoad++;
+ if (pThis->cLoad > pThis->cMaxLoad)
+ pThis->cMaxLoad = pThis->cLoad;
+ AssertMsg(pThis->cLoad <= pThis->cPages - pThis->cGuardPages, ("%d/%d\n", pThis->cLoad, pThis->cPages - pThis->cGuardPages));
+ }
+ else if (RT_UNLIKELY(cRefs <= 0))
+ {
+ ASMAtomicDecS32(&paPages[iPage].cRefs);
+ PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
+ *ppvPage = NULL;
+ AssertLogRelMsgFailedReturn(("cRefs=%d iPage=%u HCPhys=%RHp\n", cRefs, iPage, HCPhys), UINT32_MAX);
+ }
+ void *pvPage = paPages[iPage].pvPage;
+
+#ifndef IN_RC
+ /*
+ * Invalidate the entry?
+ */
+ bool fInvalidateIt = RTCpuSetIsMemberByIndex(&paPages[iPage].PendingSet, iRealCpu);
+ if (RT_UNLIKELY(fInvalidateIt))
+ RTCpuSetDelByIndex(&paPages[iPage].PendingSet, iRealCpu);
+#else
+ NOREF(iRealCpu);
+#endif
+
+ PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
+
+ /*
+ * Do the actual invalidation outside the spinlock.
+ */
+#ifdef IN_RC
+ if (RT_UNLIKELY(fNew))
+#else
+ if (RT_UNLIKELY(fInvalidateIt))
+#endif
+ {
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapPageInvlPg);
+ ASMInvalidatePage((uintptr_t)pvPage);
+ }
+
+ *ppvPage = pvPage;
+ return iPage;
+}
+
+
+/**
+ * Assert the integrity of the pool.
+ *
+ * @returns VBox status code.
+ */
+static int pgmRZDynMapAssertIntegrity(PPGMRZDYNMAP pThis)
+{
+ /*
+ * Basic pool stuff that doesn't require any lock, just assumes we're a user.
+ */
+ if (!pThis)
+ return VINF_SUCCESS;
+ AssertPtrReturn(pThis, VERR_INVALID_POINTER);
+ AssertReturn(pThis->u32Magic == PGMRZDYNMAP_MAGIC, VERR_INVALID_MAGIC);
+ if (!pThis->cUsers)
+ return VERR_INVALID_PARAMETER;
+
+ PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis);
+
+#define CHECK_RET(expr, a) \
+ do { \
+ if (RT_UNLIKELY(!(expr))) \
+ { \
+ PGMRZDYNMAP_SPINLOCK_RELEASE(pThis); \
+ RTAssertMsg1Weak(#expr, __LINE__, __FILE__, __PRETTY_FUNCTION__); \
+ RTAssertMsg2Weak a; \
+ return VERR_PGM_DYNMAP_IPE; \
+ } \
+ } while (0)
+
+ /*
+ * Check that the PTEs are correct.
+ */
+ uint32_t cGuard = 0;
+ uint32_t cLoad = 0;
+ PPGMRZDYNMAPENTRY paPages = pThis->paPages;
+
+#ifndef IN_RC
+ if (pThis->fLegacyMode)
+#endif
+ {
+#ifdef IN_RING0
+ PCX86PGUINT paSavedPTEs = (PCX86PGUINT)pThis->pvSavedPTEs; NOREF(paSavedPTEs);
+#endif
+ uint32_t iPage = pThis->cPages;
+ while (iPage-- > 0)
+ {
+ CHECK_RET(!((uintptr_t)paPages[iPage].pvPage & PAGE_OFFSET_MASK), ("#%u: %p\n", iPage, paPages[iPage].pvPage));
+ if ( paPages[iPage].cRefs == PGMR0DYNMAP_GUARD_PAGE_REF_COUNT
+ && paPages[iPage].HCPhys == PGMR0DYNMAP_GUARD_PAGE_HCPHYS)
+ {
+#ifdef PGMR0DYNMAP_GUARD_NP
+ CHECK_RET(paPages[iPage].uPte.pLegacy->u == (paSavedPTEs[iPage] & ~(X86PGUINT)X86_PTE_P),
+ ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, paSavedPTEs[iPage]));
+#else
+ CHECK_RET(paPages[iPage].uPte.pLegacy->u == PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE,
+ ("#%u: %#x", iPage, paPages[iPage].uPte.pLegacy->u));
+#endif
+ cGuard++;
+ }
+ else if (paPages[iPage].HCPhys != NIL_RTHCPHYS)
+ {
+ CHECK_RET(!(paPages[iPage].HCPhys & PAGE_OFFSET_MASK), ("#%u: %RHp\n", iPage, paPages[iPage].HCPhys));
+ X86PGUINT uPte = X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
+#ifdef IN_RING0
+ | (paSavedPTEs[iPage] & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
+#endif
+ | (paPages[iPage].HCPhys & X86_PTE_PAE_PG_MASK);
+ CHECK_RET(paPages[iPage].uPte.pLegacy->u == uPte,
+ ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, uPte));
+ if (paPages[iPage].cRefs)
+ cLoad++;
+ }
+#if defined(IN_RING0) && !defined(PGMRZDYNMAP_STRICT_RELEASE)
+ else
+ CHECK_RET(paPages[iPage].uPte.pLegacy->u == paSavedPTEs[iPage],
+ ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, paSavedPTEs[iPage]));
+#endif
+ }
+ }
+#ifndef IN_RC
+ else
+#endif
+ {
+#ifdef IN_RING0
+ PCX86PGPAEUINT paSavedPTEs = (PCX86PGPAEUINT)pThis->pvSavedPTEs; NOREF(paSavedPTEs);
+#endif
+ uint32_t iPage = pThis->cPages;
+ while (iPage-- > 0)
+ {
+ CHECK_RET(!((uintptr_t)paPages[iPage].pvPage & PAGE_OFFSET_MASK), ("#%u: %p\n", iPage, paPages[iPage].pvPage));
+ if ( paPages[iPage].cRefs == PGMR0DYNMAP_GUARD_PAGE_REF_COUNT
+ && paPages[iPage].HCPhys == PGMR0DYNMAP_GUARD_PAGE_HCPHYS)
+ {
+#ifdef PGMR0DYNMAP_GUARD_NP
+ CHECK_RET(paPages[iPage].uPte.pPae->u == (paSavedPTEs[iPage] & ~(X86PGPAEUINT)X86_PTE_P),
+ ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pPae->u, paSavedPTEs[iPage]));
+#else
+ CHECK_RET(paPages[iPage].uPte.pPae->u == PGMR0DYNMAP_GUARD_PAGE_PAE_PTE,
+ ("#%u: %#llx", iPage, paPages[iPage].uPte.pPae->u));
+#endif
+ cGuard++;
+ }
+ else if (paPages[iPage].HCPhys != NIL_RTHCPHYS)
+ {
+ CHECK_RET(!(paPages[iPage].HCPhys & PAGE_OFFSET_MASK), ("#%u: %RHp\n", iPage, paPages[iPage].HCPhys));
+ X86PGPAEUINT uPte = X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
+#ifdef IN_RING0
+ | (paSavedPTEs[iPage] & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
+#endif
+ | (paPages[iPage].HCPhys & X86_PTE_PAE_PG_MASK);
+ CHECK_RET(paPages[iPage].uPte.pPae->u == uPte,
+ ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pLegacy->u, uPte));
+ if (paPages[iPage].cRefs)
+ cLoad++;
+ }
+#ifdef IN_RING0
+ else
+ CHECK_RET(paPages[iPage].uPte.pPae->u == paSavedPTEs[iPage],
+ ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pPae->u, paSavedPTEs[iPage]));
+#endif
+ }
+ }
+
+ CHECK_RET(cLoad == pThis->cLoad, ("%u %u\n", cLoad, pThis->cLoad));
+ CHECK_RET(cGuard == pThis->cGuardPages, ("%u %u\n", cGuard, pThis->cGuardPages));
+
+#undef CHECK_RET
+ PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
+ return VINF_SUCCESS;
+}
+
+#ifdef IN_RING0
+/**
+ * Assert the integrity of the pool.
+ *
+ * @returns VBox status code.
+ */
+VMMR0DECL(int) PGMR0DynMapAssertIntegrity(void)
+{
+ return pgmRZDynMapAssertIntegrity(g_pPGMR0DynMap);
+}
+#endif /* IN_RING0 */
+
+#ifdef IN_RC
+/**
+ * Assert the integrity of the pool.
+ *
+ * @returns VBox status code.
+ */
+VMMRCDECL(int) PGMRCDynMapAssertIntegrity(PVM pVM)
+{
+ return pgmRZDynMapAssertIntegrity((PPGMRZDYNMAP)pVM->pgm.s.pRCDynMap);
+}
+#endif /* IN_RC */
+
+
+/**
+ * As a final resort for a (somewhat) full auto set or full cache, try merge
+ * duplicate entries and flush the ones we can.
+ *
+ * @param pSet The set.
+ */
+static void pgmDynMapOptimizeAutoSet(PPGMMAPSET pSet)
+{
+ LogFlow(("pgmDynMapOptimizeAutoSet\n"));
+
+ for (uint32_t i = 0 ; i < pSet->cEntries; i++)
+ {
+ /*
+ * Try merge entries.
+ */
+ uint16_t const iPage = pSet->aEntries[i].iPage;
+ uint32_t j = i + 1;
+ while ( j < pSet->cEntries
+ && ( pSet->iSubset == UINT32_MAX
+ || pSet->iSubset < pSet->cEntries) )
+ {
+ if (pSet->aEntries[j].iPage != iPage)
+ j++;
+ else
+ {
+ uint32_t const cHardRefs = (uint32_t)pSet->aEntries[i].cRefs
+ + (uint32_t)pSet->aEntries[j].cRefs;
+ uint32_t cInlinedRefs = (uint32_t)pSet->aEntries[i].cInlinedRefs
+ + (uint32_t)pSet->aEntries[j].cInlinedRefs;
+ uint32_t cUnrefs = (uint32_t)pSet->aEntries[i].cUnrefs
+ + (uint32_t)pSet->aEntries[j].cUnrefs;
+ uint32_t cSub = RT_MIN(cUnrefs, cInlinedRefs);
+ cInlinedRefs -= cSub;
+ cUnrefs -= cSub;
+
+ if ( cHardRefs < UINT16_MAX
+ && cInlinedRefs < UINT16_MAX
+ && cUnrefs < UINT16_MAX)
+ {
+ /* merge j into i removing j. */
+ Log2(("pgmDynMapOptimizeAutoSet: Merging #%u into #%u\n", j, i));
+ pSet->aEntries[i].cRefs = cHardRefs;
+ pSet->aEntries[i].cInlinedRefs = cInlinedRefs;
+ pSet->aEntries[i].cUnrefs = cUnrefs;
+ pSet->cEntries--;
+ if (j < pSet->cEntries)
+ {
+ pSet->aEntries[j] = pSet->aEntries[pSet->cEntries];
+ PGMRZDYNMAP_ZAP_ENTRY(&pSet->aEntries[pSet->cEntries]);
+ }
+ else
+ PGMRZDYNMAP_ZAP_ENTRY(&pSet->aEntries[j]);
+ }
+#if 0 /* too complicated, skip it. */
+ else
+ {
+ /* migrate the max number of refs from j into i and quit the inner loop. */
+ uint32_t cMigrate = UINT16_MAX - 1 - pSet->aEntries[i].cRefs;
+ Assert(pSet->aEntries[j].cRefs > cMigrate);
+ pSet->aEntries[j].cRefs -= cMigrate;
+ pSet->aEntries[i].cRefs = UINT16_MAX - 1;
+ break;
+ }
+#endif
+ }
+ }
+
+ /*
+ * Try make use of the unused hinting (cUnrefs) to evict entries
+ * from both the set as well as the mapping cache.
+ */
+
+ uint32_t const cTotalRefs = (uint32_t)pSet->aEntries[i].cRefs + pSet->aEntries[i].cInlinedRefs;
+ Log2(("pgmDynMapOptimizeAutoSet: #%u/%u/%u pvPage=%p iPage=%u cRefs=%u cInlinedRefs=%u cUnrefs=%u cTotalRefs=%u\n",
+ i,
+ pSet->iSubset,
+ pSet->cEntries,
+ pSet->aEntries[i].pvPage,
+ pSet->aEntries[i].iPage,
+ pSet->aEntries[i].cRefs,
+ pSet->aEntries[i].cInlinedRefs,
+ pSet->aEntries[i].cUnrefs,
+ cTotalRefs));
+ Assert(cTotalRefs >= pSet->aEntries[i].cUnrefs);
+
+ if ( cTotalRefs == pSet->aEntries[i].cUnrefs
+ && ( pSet->iSubset == UINT32_MAX
+ || pSet->iSubset < pSet->cEntries)
+ )
+ {
+ Log2(("pgmDynMapOptimizeAutoSet: Releasing iPage=%d/%p\n", pSet->aEntries[i].iPage, pSet->aEntries[i].pvPage));
+ //LogFlow(("pgmDynMapOptimizeAutoSet: Releasing iPage=%d/%p\n", pSet->aEntries[i].iPage, pSet->aEntries[i].pvPage));
+ pgmRZDynMapReleasePage(PGMRZDYNMAP_SET_2_DYNMAP(pSet),
+ pSet->aEntries[i].iPage,
+ pSet->aEntries[i].cRefs);
+ pSet->cEntries--;
+ if (i < pSet->cEntries)
+ {
+ pSet->aEntries[i] = pSet->aEntries[pSet->cEntries];
+ PGMRZDYNMAP_ZAP_ENTRY(&pSet->aEntries[pSet->cEntries]);
+ }
+
+ i--;
+ }
+ }
+}
+
+
+
+
+/**
+ * Signals the start of a new set of mappings.
+ *
+ * Mostly for strictness. PGMDynMapHCPage won't work unless this
+ * API is called.
+ *
+ * @param pVCpu The cross context virtual CPU structure of the calling EMT.
+ */
+VMMDECL(void) PGMRZDynMapStartAutoSet(PVMCPU pVCpu)
+{
+ LogFlow(("PGMRZDynMapStartAutoSet:\n"));
+ Assert(pVCpu->pgm.s.AutoSet.cEntries == PGMMAPSET_CLOSED);
+ Assert(pVCpu->pgm.s.AutoSet.iSubset == UINT32_MAX);
+ pVCpu->pgm.s.AutoSet.cEntries = 0;
+ pVCpu->pgm.s.AutoSet.iCpu = PGMRZDYNMAP_CUR_CPU();
+}
+
+
+#ifdef IN_RING0
+/**
+ * Starts or migrates the autoset of a virtual CPU.
+ *
+ * This is used by HMR0Enter. When we've longjumped out of the HM
+ * execution loop with the set open, we'll migrate it when re-entering. While
+ * under normal circumstances, we'll start it so VMXR0LoadGuestState can access
+ * guest memory.
+ *
+ * @returns @c true if started, @c false if migrated.
+ * @param pVCpu The cross context virtual CPU structure of the calling EMT.
+ * @thread EMT
+ */
+VMMR0DECL(bool) PGMR0DynMapStartOrMigrateAutoSet(PVMCPU pVCpu)
+{
+ bool fStartIt = pVCpu->pgm.s.AutoSet.cEntries == PGMMAPSET_CLOSED;
+ if (fStartIt)
+ PGMRZDynMapStartAutoSet(pVCpu);
+ else
+ PGMR0DynMapMigrateAutoSet(pVCpu);
+ return fStartIt;
+}
+#endif /* IN_RING0 */
+
+
+/**
+ * Checks if the set has high load.
+ *
+ * @returns true on high load, otherwise false.
+ * @param pSet The set.
+ */
+DECLINLINE(bool) pgmRZDynMapHasHighLoad(PPGMMAPSET pSet)
+{
+#ifdef IN_RC
+ if (pSet->cEntries < MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE / 2)
+ return false;
+#endif
+
+ PPGMRZDYNMAP pThis = PGMRZDYNMAP_SET_2_DYNMAP(pSet);
+ uint32_t cUnusedPages = pThis->cPages - pThis->cLoad;
+#ifdef IN_RC
+ return cUnusedPages <= MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE * 36 / 100;
+#else
+ return cUnusedPages <= PGMR0DYNMAP_PAGES_PER_CPU_MIN;
+#endif
+}
+
+
+/**
+ * Worker that performs the actual flushing of the set.
+ *
+ * @param pSet The set to flush.
+ * @param cEntries The number of entries.
+ */
+DECLINLINE(void) pgmDynMapFlushAutoSetWorker(PPGMMAPSET pSet, uint32_t cEntries)
+{
+ /*
+ * Release any pages it's referencing.
+ */
+ if ( cEntries != 0
+ && RT_LIKELY(cEntries <= RT_ELEMENTS(pSet->aEntries)))
+ {
+ PPGMRZDYNMAP pThis = PGMRZDYNMAP_SET_2_DYNMAP(pSet);
+ PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis);
+
+ uint32_t i = cEntries;
+ while (i-- > 0)
+ {
+ uint32_t iPage = pSet->aEntries[i].iPage;
+ Assert(iPage < pThis->cPages);
+ int32_t cRefs = pSet->aEntries[i].cRefs;
+ Assert(cRefs > 0);
+ pgmRZDynMapReleasePageLocked(pThis, iPage, cRefs);
+
+ PGMRZDYNMAP_ZAP_ENTRY(&pSet->aEntries[i]);
+ }
+
+ Assert(pThis->cLoad <= pThis->cPages - pThis->cGuardPages);
+ PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
+ }
+}
+
+
+/**
+ * Releases the dynamic memory mappings made by PGMDynMapHCPage and associates
+ * since the PGMDynMapStartAutoSet call.
+ *
+ * @param pVCpu The cross context virtual CPU structure of the calling EMT.
+ */
+VMMDECL(void) PGMRZDynMapReleaseAutoSet(PVMCPU pVCpu)
+{
+ PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
+
+ /*
+ * Close and flush the set.
+ */
+ uint32_t cEntries = pSet->cEntries;
+ AssertReturnVoid(cEntries != PGMMAPSET_CLOSED);
+ pSet->cEntries = PGMMAPSET_CLOSED;
+ pSet->iSubset = UINT32_MAX;
+ pSet->iCpu = -1;
+
+#ifdef IN_RC
+ if (RT_ELEMENTS(pSet->aEntries) > MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE)
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->aStatRZDynMapSetFilledPct[(cEntries * 10 / (MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE)) % 11]);
+ else
+#endif
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->aStatRZDynMapSetFilledPct[(cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]);
+ if (cEntries > RT_ELEMENTS(pSet->aEntries) * 50 / 100)
+ Log(("PGMRZDynMapReleaseAutoSet: cEntries=%d\n", cEntries));
+ else
+ LogFlow(("PGMRZDynMapReleaseAutoSet: cEntries=%d\n", cEntries));
+
+ pgmDynMapFlushAutoSetWorker(pSet, cEntries);
+}
+
+
+/**
+ * Flushes the set if it's above a certain threshold.
+ *
+ * @param pVCpu The cross context virtual CPU structure of the calling EMT.
+ */
+VMMDECL(void) PGMRZDynMapFlushAutoSet(PVMCPU pVCpu)
+{
+ PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
+ AssertMsg(pSet->iCpu == PGMRZDYNMAP_CUR_CPU(), ("%d %d efl=%#x\n", pSet->iCpu, PGMRZDYNMAP_CUR_CPU(), ASMGetFlags()));
+
+ /*
+ * Only flush it if it's 45% full.
+ */
+ uint32_t cEntries = pSet->cEntries;
+ AssertReturnVoid(cEntries != PGMMAPSET_CLOSED);
+ Assert(pSet->iSubset == UINT32_MAX);
+#ifdef IN_RC
+ if (RT_ELEMENTS(pSet->aEntries) > MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE)
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->aStatRZDynMapSetFilledPct[(cEntries * 10 / (MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE)) % 11]);
+ else
+#endif
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->aStatRZDynMapSetFilledPct[(cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]);
+ if ( cEntries >= RT_ELEMENTS(pSet->aEntries) * 45 / 100
+ || pgmRZDynMapHasHighLoad(pSet))
+ {
+ pSet->cEntries = 0;
+ Log(("PGMDynMapFlushAutoSet: cEntries=%d\n", pSet->cEntries));
+
+ pgmDynMapFlushAutoSetWorker(pSet, cEntries);
+ AssertMsg(pSet->iCpu == PGMRZDYNMAP_CUR_CPU(), ("%d %d efl=%#x\n", pSet->iCpu, PGMRZDYNMAP_CUR_CPU(), ASMGetFlags()));
+ }
+}
+
+
+#ifndef IN_RC
+/**
+ * Migrates the automatic mapping set of the current vCPU if it's active and
+ * necessary.
+ *
+ * This is called when re-entering the hardware assisted execution mode after a
+ * nip down to ring-3. We run the risk that the CPU might have change and we
+ * will therefore make sure all the cache entries currently in the auto set will
+ * be valid on the new CPU. If the cpu didn't change nothing will happen as all
+ * the entries will have been flagged as invalidated.
+ *
+ * @param pVCpu The cross context virtual CPU structure of the calling EMT.
+ * @thread EMT
+ */
+VMMR0DECL(void) PGMR0DynMapMigrateAutoSet(PVMCPU pVCpu)
+{
+ LogFlow(("PGMR0DynMapMigrateAutoSet\n"));
+ PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
+ int32_t iRealCpu = PGMRZDYNMAP_CUR_CPU();
+ if (pSet->iCpu != iRealCpu)
+ {
+ uint32_t i = pSet->cEntries;
+ if (i != PGMMAPSET_CLOSED)
+ {
+ AssertMsg(i <= RT_ELEMENTS(pSet->aEntries), ("%#x (%u)\n", i, i));
+ if (i != 0 && RT_LIKELY(i <= RT_ELEMENTS(pSet->aEntries)))
+ {
+ PPGMRZDYNMAP pThis = PGMRZDYNMAP_SET_2_DYNMAP(pSet);
+ PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis);
+
+ while (i-- > 0)
+ {
+ Assert(pSet->aEntries[i].cRefs > 0);
+ uint32_t iPage = pSet->aEntries[i].iPage;
+ Assert(iPage < pThis->cPages);
+ if (RTCpuSetIsMemberByIndex(&pThis->paPages[iPage].PendingSet, iRealCpu))
+ {
+ RTCpuSetDelByIndex(&pThis->paPages[iPage].PendingSet, iRealCpu);
+ PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
+
+ ASMInvalidatePage((uintptr_t)pThis->paPages[iPage].pvPage);
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapMigrateInvlPg);
+
+ PGMRZDYNMAP_SPINLOCK_REACQUIRE(pThis);
+ }
+ }
+
+ PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
+ }
+ }
+ pSet->iCpu = iRealCpu;
+ }
+}
+#endif /* !IN_RC */
+
+
+/**
+ * Worker function that flushes the current subset.
+ *
+ * This is called when the set is popped or when the set
+ * hash a too high load. As also pointed out elsewhere, the
+ * whole subset thing is a hack for working around code that
+ * accesses too many pages. Like PGMPool.
+ *
+ * @param pSet The set which subset to flush.
+ */
+static void pgmDynMapFlushSubset(PPGMMAPSET pSet)
+{
+ uint32_t iSubset = pSet->iSubset;
+ uint32_t i = pSet->cEntries;
+ Assert(i <= RT_ELEMENTS(pSet->aEntries));
+ if ( i > iSubset
+ && i <= RT_ELEMENTS(pSet->aEntries))
+ {
+ Log(("pgmDynMapFlushSubset: cEntries=%d iSubset=%d\n", pSet->cEntries, iSubset));
+ pSet->cEntries = iSubset;
+
+ PPGMRZDYNMAP pThis = PGMRZDYNMAP_SET_2_DYNMAP(pSet);
+ PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis);
+
+ while (i-- > iSubset)
+ {
+ uint32_t iPage = pSet->aEntries[i].iPage;
+ Assert(iPage < pThis->cPages);
+ int32_t cRefs = pSet->aEntries[i].cRefs;
+ Assert(cRefs > 0);
+ pgmRZDynMapReleasePageLocked(pThis, iPage, cRefs);
+
+ PGMRZDYNMAP_ZAP_ENTRY(&pSet->aEntries[i]);
+ }
+
+ PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
+ }
+}
+
+
+/**
+ * Creates a subset.
+ *
+ * A subset is a hack to avoid having to rewrite code that touches a lot of
+ * pages. It prevents the mapping set from being overflowed by automatically
+ * flushing previous mappings when a certain threshold is reached.
+ *
+ * Pages mapped after calling this function are only valid until the next page
+ * is mapped.
+ *
+ * @returns The index of the previous subset. Pass this to
+ * PGMDynMapPopAutoSubset when popping it.
+ * @param pVCpu The cross context virtual CPU structure of the calling EMT.
+ */
+VMMDECL(uint32_t) PGMRZDynMapPushAutoSubset(PVMCPU pVCpu)
+{
+ PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
+ AssertReturn(pSet->cEntries != PGMMAPSET_CLOSED, UINT32_MAX);
+ uint32_t iPrevSubset = pSet->iSubset;
+ LogFlow(("PGMRZDynMapPushAutoSubset: pVCpu=%p iPrevSubset=%u\n", pVCpu, iPrevSubset));
+
+ /*
+ * If it looks like we're approaching the max set size or mapping space
+ * optimize the set to drop off unused pages.
+ */
+ if ( pSet->cEntries > RT_ELEMENTS(pSet->aEntries) * 60 / 100
+ || pgmRZDynMapHasHighLoad(pSet))
+ {
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapSetOptimize);
+ pgmDynMapOptimizeAutoSet(pSet);
+ }
+
+ pSet->iSubset = pSet->cEntries;
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapSubsets);
+
+ AssertMsg(iPrevSubset <= pSet->iSubset || iPrevSubset == UINT32_MAX, ("iPrevSubset=%#x iSubset=%#x\n", iPrevSubset, pSet->iSubset));
+ return iPrevSubset;
+}
+
+
+/**
+ * Pops a subset created by a previous call to PGMDynMapPushAutoSubset.
+ *
+ * @param pVCpu The cross context virtual CPU structure of the calling EMT.
+ * @param iPrevSubset What PGMDynMapPushAutoSubset returned.
+ */
+VMMDECL(void) PGMRZDynMapPopAutoSubset(PVMCPU pVCpu, uint32_t iPrevSubset)
+{
+ PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
+ uint32_t cEntries = pSet->cEntries;
+ LogFlow(("PGMRZDynMapPopAutoSubset: pVCpu=%p iPrevSubset=%u iSubset=%u cEntries=%u\n", pVCpu, iPrevSubset, pSet->iSubset, cEntries));
+ AssertReturnVoid(cEntries != PGMMAPSET_CLOSED);
+ AssertMsgReturnVoid(pSet->iSubset >= iPrevSubset || iPrevSubset == UINT32_MAX, ("iPrevSubset=%u iSubset=%u cEntries=%u\n", iPrevSubset, pSet->iSubset, cEntries));
+#ifdef IN_RC
+ if (RT_ELEMENTS(pSet->aEntries) > MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE)
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->aStatRZDynMapSetFilledPct[(cEntries * 10 / (MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE)) % 11]);
+ else
+#endif
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->aStatRZDynMapSetFilledPct[(cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]);
+ if ( cEntries >= RT_ELEMENTS(pSet->aEntries) * 40 / 100
+ && cEntries != pSet->iSubset)
+ {
+ pgmDynMapFlushSubset(pSet);
+ Assert(pSet->cEntries >= iPrevSubset || iPrevSubset == UINT32_MAX);
+ }
+ pSet->iSubset = iPrevSubset;
+}
+
+
+/**
+ * Indicates that the given page is unused and its mapping can be re-used.
+ *
+ * @param pVCpu The cross context virtual CPU structure of the calling EMT.
+ * @param pvHint The page that is now unused. This does not have to
+ * point at the start of the page. NULL is ignored.
+ */
+#ifdef LOG_ENABLED
+void pgmRZDynMapUnusedHint(PVMCPU pVCpu, void *pvHint, RT_SRC_POS_DECL)
+#else
+void pgmRZDynMapUnusedHint(PVMCPU pVCpu, void *pvHint)
+#endif
+{
+ /*
+ * Ignore NULL pointers and mask off the page offset bits.
+ */
+ if (pvHint == NULL)
+ return;
+ pvHint = (void *)((uintptr_t)pvHint & ~(uintptr_t)PAGE_OFFSET_MASK);
+
+ PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
+ uint32_t iEntry = pSet->cEntries;
+ AssertReturnVoid(iEntry > 0);
+
+ /*
+ * Find the entry in the usual unrolled fashion.
+ */
+ /** @todo add a hint to the set which entry was used last since it's not
+ * always the last entry? */
+#define IS_MATCHING_ENTRY(pSet, iEntry, pvHint) \
+ ( (pSet)->aEntries[(iEntry)].pvPage == (pvHint) \
+ && (uint32_t)(pSet)->aEntries[(iEntry)].cRefs + (pSet)->aEntries[(iEntry)].cInlinedRefs \
+ > (pSet)->aEntries[(iEntry)].cUnrefs )
+ if ( iEntry >= 1 && IS_MATCHING_ENTRY(pSet, iEntry - 1, pvHint))
+ iEntry = iEntry - 1;
+ else if (iEntry >= 2 && IS_MATCHING_ENTRY(pSet, iEntry - 2, pvHint))
+ iEntry = iEntry - 2;
+ else if (iEntry >= 3 && IS_MATCHING_ENTRY(pSet, iEntry - 3, pvHint))
+ iEntry = iEntry - 3;
+ else if (iEntry >= 4 && IS_MATCHING_ENTRY(pSet, iEntry - 4, pvHint))
+ iEntry = iEntry - 4;
+ else if (iEntry >= 5 && IS_MATCHING_ENTRY(pSet, iEntry - 5, pvHint))
+ iEntry = iEntry - 5;
+ else if (iEntry >= 6 && IS_MATCHING_ENTRY(pSet, iEntry - 6, pvHint))
+ iEntry = iEntry - 6;
+ else if (iEntry >= 7 && IS_MATCHING_ENTRY(pSet, iEntry - 7, pvHint))
+ iEntry = iEntry - 7;
+ else
+ {
+ /*
+ * Loop till we find it.
+ */
+ bool fFound = false;
+ if (iEntry > 7)
+ {
+ iEntry -= 7;
+ while (iEntry-- > 0)
+ if (IS_MATCHING_ENTRY(pSet, iEntry, pvHint))
+ {
+ fFound = true;
+ break;
+ }
+ }
+ AssertMsgReturnVoid(fFound,
+ ("pvHint=%p cEntries=%#x iSubset=%#x\n"
+ "aEntries[0] = {%#x, %#x, %#x, %#x, %p}\n"
+ "aEntries[1] = {%#x, %#x, %#x, %#x, %p}\n"
+ "aEntries[2] = {%#x, %#x, %#x, %#x, %p}\n"
+ "aEntries[3] = {%#x, %#x, %#x, %#x, %p}\n"
+ "aEntries[4] = {%#x, %#x, %#x, %#x, %p}\n"
+ "aEntries[5] = {%#x, %#x, %#x, %#x, %p}\n"
+ ,
+ pvHint, pSet->cEntries, pSet->iSubset,
+ pSet->aEntries[0].iPage, pSet->aEntries[0].cRefs, pSet->aEntries[0].cInlinedRefs, pSet->aEntries[0].cUnrefs, pSet->aEntries[0].pvPage,
+ pSet->aEntries[1].iPage, pSet->aEntries[1].cRefs, pSet->aEntries[1].cInlinedRefs, pSet->aEntries[1].cUnrefs, pSet->aEntries[1].pvPage,
+ pSet->aEntries[2].iPage, pSet->aEntries[2].cRefs, pSet->aEntries[2].cInlinedRefs, pSet->aEntries[2].cUnrefs, pSet->aEntries[2].pvPage,
+ pSet->aEntries[3].iPage, pSet->aEntries[3].cRefs, pSet->aEntries[3].cInlinedRefs, pSet->aEntries[3].cUnrefs, pSet->aEntries[3].pvPage,
+ pSet->aEntries[4].iPage, pSet->aEntries[4].cRefs, pSet->aEntries[4].cInlinedRefs, pSet->aEntries[4].cUnrefs, pSet->aEntries[4].pvPage,
+ pSet->aEntries[5].iPage, pSet->aEntries[5].cRefs, pSet->aEntries[5].cInlinedRefs, pSet->aEntries[5].cUnrefs, pSet->aEntries[5].pvPage));
+ }
+#undef IS_MATCHING_ENTRY
+
+ /*
+ * Update it.
+ */
+ uint32_t const cTotalRefs = (uint32_t)pSet->aEntries[iEntry].cRefs + pSet->aEntries[iEntry].cInlinedRefs;
+ uint32_t const cUnrefs = pSet->aEntries[iEntry].cUnrefs;
+ LogFlow(("pgmRZDynMapUnusedHint: pvHint=%p #%u cRefs=%d cInlinedRefs=%d cUnrefs=%d (+1) cTotalRefs=%d %s(%d) %s\n",
+ pvHint, iEntry, pSet->aEntries[iEntry].cRefs, pSet->aEntries[iEntry].cInlinedRefs, cUnrefs, cTotalRefs, pszFile, iLine, pszFunction));
+ AssertReturnVoid(cTotalRefs > cUnrefs);
+
+ if (RT_LIKELY(cUnrefs < UINT16_MAX - 1))
+ pSet->aEntries[iEntry].cUnrefs++;
+ else if (pSet->aEntries[iEntry].cInlinedRefs)
+ {
+ uint32_t cSub = RT_MIN(pSet->aEntries[iEntry].cInlinedRefs, pSet->aEntries[iEntry].cUnrefs);
+ pSet->aEntries[iEntry].cInlinedRefs -= cSub;
+ pSet->aEntries[iEntry].cUnrefs -= cSub;
+ pSet->aEntries[iEntry].cUnrefs++;
+ }
+ else
+ Log(("pgmRZDynMapUnusedHint: pvHint=%p ignored because of overflow! %s(%d) %s\n", pvHint, pszFile, iLine, pszFunction));
+
+#ifdef PGMRZDYNMAP_STRICT_RELEASE
+ /*
+ * Optimize the set to trigger the unmapping and invalidation of the page.
+ */
+ if (cUnrefs + 1 == cTotalRefs)
+ pgmDynMapOptimizeAutoSet(pSet);
+#endif
+}
+
+
+/**
+ * Common worker code for pgmRZDynMapHCPageInlined, pgmRZDynMapHCPageV2Inlined
+ * and pgmR0DynMapGCPageOffInlined.
+ *
+ * @returns VINF_SUCCESS, bails out to ring-3 on failure.
+ * @param pSet The set.
+ * @param HCPhys The physical address of the page.
+ * @param ppv Where to store the address of the mapping on success.
+ *
+ * @remarks This is a very hot path.
+ */
+int pgmRZDynMapHCPageCommon(PPGMMAPSET pSet, RTHCPHYS HCPhys, void **ppv RTLOG_COMMA_SRC_POS_DECL)
+{
+ AssertMsg(pSet->iCpu == PGMRZDYNMAP_CUR_CPU(), ("%d %d efl=%#x\n", pSet->iCpu, PGMRZDYNMAP_CUR_CPU(), ASMGetFlags()));
+ PVMCPU pVCpu = PGMRZDYNMAP_SET_2_VMCPU(pSet);
+ STAM_PROFILE_START(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapHCPage, a);
+
+ /*
+ * Map it.
+ */
+ void *pvPage;
+ PPGMRZDYNMAP pThis = PGMRZDYNMAP_SET_2_DYNMAP(pSet);
+ uint32_t iPage = pgmR0DynMapPage(pThis, HCPhys, pSet->iCpu, pVCpu, &pvPage);
+ if (RT_UNLIKELY(iPage == UINT32_MAX))
+ {
+ /*
+ * We're out of mapping space, optimize our set to try remedy the
+ * situation. (Only works if there are unreference hints.)
+ */
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapSetOptimize);
+ pgmDynMapOptimizeAutoSet(pSet);
+
+ iPage = pgmR0DynMapPage(pThis, HCPhys, pSet->iCpu, pVCpu, &pvPage);
+ if (RT_UNLIKELY(iPage == UINT32_MAX))
+ {
+ RTAssertMsg2Weak("pgmRZDynMapHCPageCommon: cLoad=%u/%u cPages=%u cGuardPages=%u\n",
+ pThis->cLoad, pThis->cMaxLoad, pThis->cPages, pThis->cGuardPages);
+ if (!g_fPGMR0DynMapTestRunning)
+ VMMRZCallRing3NoCpu(PGMRZDYNMAP_SET_2_VM(pSet), VMMCALLRING3_VM_R0_ASSERTION, 0);
+ *ppv = NULL;
+ STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapHCPage, a);
+ return VERR_PGM_DYNMAP_FAILED;
+ }
+ }
+
+ /*
+ * Add the page to the auto reference set.
+ *
+ * The typical usage pattern means that the same pages will be mapped
+ * several times in the same set. We can catch most of these
+ * remappings by looking a few pages back into the set. (The searching
+ * and set optimizing path will hardly ever be used when doing this.)
+ */
+ AssertCompile(RT_ELEMENTS(pSet->aEntries) >= 8);
+ int32_t i = pSet->cEntries;
+ if (i-- < 5)
+ {
+ unsigned iEntry = pSet->cEntries++;
+ pSet->aEntries[iEntry].cRefs = 1;
+ pSet->aEntries[iEntry].cUnrefs = 0;
+ pSet->aEntries[iEntry].cInlinedRefs = 0;
+ pSet->aEntries[iEntry].iPage = iPage;
+ pSet->aEntries[iEntry].pvPage = pvPage;
+ pSet->aEntries[iEntry].HCPhys = HCPhys;
+ pSet->aiHashTable[PGMMAPSET_HASH(HCPhys)] = iEntry;
+ LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=%u/0/0 iPage=%#x [a] %s(%d) %s\n",
+ pSet, HCPhys, iEntry, iEntry + 1, pvPage, 1, iPage, pszFile, iLine, pszFunction));
+ }
+ /* Any of the last 5 pages? */
+ else if ( pSet->aEntries[i - 0].iPage == iPage
+ && pSet->aEntries[i - 0].cRefs < UINT16_MAX - 1)
+ {
+ pSet->aEntries[i - 0].cRefs++;
+ LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=%u/%u/%u iPage=%#x [0] %s(%d) %s\n", pSet, HCPhys, i - 0, pSet->cEntries, pvPage, pSet->aEntries[i - 0].cRefs, pSet->aEntries[i - 0].cInlinedRefs, pSet->aEntries[i - 0].cUnrefs, iPage, pszFile, iLine, pszFunction));
+ }
+ else if ( pSet->aEntries[i - 1].iPage == iPage
+ && pSet->aEntries[i - 1].cRefs < UINT16_MAX - 1)
+ {
+ pSet->aEntries[i - 1].cRefs++;
+ LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=%u/%u/%u iPage=%#x [1] %s(%d) %s\n", pSet, HCPhys, i - 1, pSet->cEntries, pvPage, pSet->aEntries[i - 1].cRefs, pSet->aEntries[i - 1].cInlinedRefs, pSet->aEntries[i - 1].cUnrefs, iPage, pszFile, iLine, pszFunction));
+ }
+ else if ( pSet->aEntries[i - 2].iPage == iPage
+ && pSet->aEntries[i - 2].cRefs < UINT16_MAX - 1)
+ {
+ pSet->aEntries[i - 2].cRefs++;
+ LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=%u/%u/%u iPage=%#x [2] %s(%d) %s\n", pSet, HCPhys, i - 2, pSet->cEntries, pvPage, pSet->aEntries[i - 2].cRefs, pSet->aEntries[i - 2].cInlinedRefs, pSet->aEntries[i - 2].cUnrefs, iPage, pszFile, iLine, pszFunction));
+ }
+ else if ( pSet->aEntries[i - 3].iPage == iPage
+ && pSet->aEntries[i - 3].cRefs < UINT16_MAX - 1)
+ {
+ pSet->aEntries[i - 3].cRefs++;
+ LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=%u/%u/%u iPage=%#x [4] %s(%d) %s\n", pSet, HCPhys, i - 3, pSet->cEntries, pvPage, pSet->aEntries[i - 3].cRefs, pSet->aEntries[i - 3].cInlinedRefs, pSet->aEntries[i - 3].cUnrefs, iPage, pszFile, iLine, pszFunction));
+ }
+ else if ( pSet->aEntries[i - 4].iPage == iPage
+ && pSet->aEntries[i - 4].cRefs < UINT16_MAX - 1)
+ {
+ pSet->aEntries[i - 4].cRefs++;
+ LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=%u/%u/%u iPage=%#x [4] %s(%d) %s\n", pSet, HCPhys, i - 4, pSet->cEntries, pvPage, pSet->aEntries[i - 4].cRefs, pSet->aEntries[i - 4].cInlinedRefs, pSet->aEntries[i - 4].cUnrefs, iPage, pszFile, iLine, pszFunction));
+ }
+ /* Don't bother searching unless we're above a 60% load. */
+ else if (RT_LIKELY(i <= (int32_t)RT_ELEMENTS(pSet->aEntries) * 60 / 100))
+ {
+ unsigned iEntry = pSet->cEntries++;
+ pSet->aEntries[iEntry].cRefs = 1;
+ pSet->aEntries[iEntry].cUnrefs = 0;
+ pSet->aEntries[iEntry].cInlinedRefs = 0;
+ pSet->aEntries[iEntry].iPage = iPage;
+ pSet->aEntries[iEntry].pvPage = pvPage;
+ pSet->aEntries[iEntry].HCPhys = HCPhys;
+ pSet->aiHashTable[PGMMAPSET_HASH(HCPhys)] = iEntry;
+ LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=1/0/0 iPage=%#x [b] %s(%d) %s\n", pSet, HCPhys, iEntry, pSet->cEntries, pvPage, iPage, pszFile, iLine, pszFunction));
+ }
+ else
+ {
+ /* Search the rest of the set. */
+ Assert(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries));
+ i -= 4;
+ while (i-- > 0)
+ if ( pSet->aEntries[i].iPage == iPage
+ && pSet->aEntries[i].cRefs < UINT16_MAX - 1)
+ {
+ pSet->aEntries[i].cRefs++;
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapSetSearchHits);
+ LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=%u/%u/%u iPage=%#x [c] %s(%d) %s\n", pSet, HCPhys, i, pSet->cEntries, pvPage, pSet->aEntries[i].cRefs, pSet->aEntries[i].cInlinedRefs, pSet->aEntries[i].cUnrefs, iPage, pszFile, iLine, pszFunction));
+ break;
+ }
+ if (i < 0)
+ {
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapSetSearchMisses);
+#if 0 /* this is very bogus */
+ if (pSet->iSubset < pSet->cEntries)
+ {
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapSetSearchFlushes);
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->aStatRZDynMapSetFilledPct[(pSet->cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]);
+ pgmDynMapFlushSubset(pSet);
+ }
+#endif
+
+ if (RT_UNLIKELY(pSet->cEntries >= RT_ELEMENTS(pSet->aEntries)))
+ {
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapSetOptimize);
+ pgmDynMapOptimizeAutoSet(pSet);
+ }
+
+ if (RT_LIKELY(pSet->cEntries < RT_ELEMENTS(pSet->aEntries)))
+ {
+ unsigned iEntry = pSet->cEntries++;
+ pSet->aEntries[iEntry].cRefs = 1;
+ pSet->aEntries[iEntry].cUnrefs = 0;
+ pSet->aEntries[iEntry].cInlinedRefs = 0;
+ pSet->aEntries[iEntry].iPage = iPage;
+ pSet->aEntries[iEntry].pvPage = pvPage;
+ pSet->aEntries[iEntry].HCPhys = HCPhys;
+ pSet->aiHashTable[PGMMAPSET_HASH(HCPhys)] = iEntry;
+ LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=1/0/0 iPage=%#x [d] %s(%d) %s\n", pSet, HCPhys, iEntry, pSet->cEntries, pvPage, iPage, pszFile, iLine, pszFunction));
+ }
+ else
+ {
+ /* We're screwed. */
+ pgmRZDynMapReleasePage(pThis, iPage, 1);
+
+ RTAssertMsg2Weak("pgmRZDynMapHCPageCommon: set is full!\n");
+ if (!g_fPGMR0DynMapTestRunning)
+ VMMRZCallRing3NoCpu(PGMRZDYNMAP_SET_2_VM(pSet), VMMCALLRING3_VM_R0_ASSERTION, 0);
+ *ppv = NULL;
+ STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapHCPage, a);
+ return VERR_PGM_DYNMAP_FULL_SET;
+ }
+ }
+ }
+
+ *ppv = pvPage;
+ STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapHCPage, a);
+ return VINF_SUCCESS;
+}
+
+
+#if 0 /*def DEBUG*/
+/** For pgmR0DynMapTest3PerCpu. */
+typedef struct PGMR0DYNMAPTEST
+{
+ uint32_t u32Expect;
+ uint32_t *pu32;
+ uint32_t volatile cFailures;
+} PGMR0DYNMAPTEST;
+typedef PGMR0DYNMAPTEST *PPGMR0DYNMAPTEST;
+
+/**
+ * Checks that the content of the page is the same on all CPUs, i.e. that there
+ * are no CPU specific PTs or similar nasty stuff involved.
+ *
+ * @param idCpu The current CPU.
+ * @param pvUser1 Pointer a PGMR0DYNMAPTEST structure.
+ * @param pvUser2 Unused, ignored.
+ */
+static DECLCALLBACK(void) pgmR0DynMapTest3PerCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
+{
+ PPGMR0DYNMAPTEST pTest = (PPGMR0DYNMAPTEST)pvUser1;
+ ASMInvalidatePage(pTest->pu32);
+ if (*pTest->pu32 != pTest->u32Expect)
+ ASMAtomicIncU32(&pTest->cFailures);
+ NOREF(pvUser2); NOREF(idCpu);
+}
+
+
+/**
+ * Performs some basic tests in debug builds.
+ */
+static int pgmR0DynMapTest(PVM pVM)
+{
+ LogRel(("pgmR0DynMapTest: ****** START ******\n"));
+ PPGMMAPSET pSet = &pVM->aCpus[0].pgm.s.AutoSet;
+ PPGMRZDYNMAP pThis = PGMRZDYNMAP_SET_2_DYNMAP(pSet);
+ uint32_t i;
+
+ /*
+ * Assert internal integrity first.
+ */
+ LogRel(("Test #0\n"));
+ int rc = PGMR0DynMapAssertIntegrity();
+ if (RT_FAILURE(rc))
+ return rc;
+
+ void *pvR0DynMapUsedSaved = pVM->pgm.s.pvR0DynMapUsed;
+ pVM->pgm.s.pvR0DynMapUsed = pThis;
+ g_fPGMR0DynMapTestRunning = true;
+
+ /*
+ * Simple test, map CR3 twice and check that we're getting the
+ * same mapping address back.
+ */
+ LogRel(("Test #1\n"));
+ ASMIntDisable();
+ PGMRZDynMapStartAutoSet(&pVM->aCpus[0]);
+
+ uint64_t cr3 = ASMGetCR3() & ~(uint64_t)PAGE_OFFSET_MASK;
+ void *pv = (void *)(intptr_t)-1;
+ void *pv2 = (void *)(intptr_t)-2;
+ rc = pgmRZDynMapHCPageCommon(pVM, cr3, &pv RTLOG_COMMA_SRC_POS);
+ int rc2 = pgmRZDynMapHCPageCommon(pVM, cr3, &pv2 RTLOG_COMMA_SRC_POS);
+ ASMIntEnable();
+ if ( RT_SUCCESS(rc2)
+ && RT_SUCCESS(rc)
+ && pv == pv2)
+ {
+ LogRel(("Load=%u/%u/%u Set=%u/%u\n", pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
+ rc = PGMR0DynMapAssertIntegrity();
+
+ /*
+ * Check that the simple set overflow code works by filling it
+ * with more CR3 mappings.
+ */
+ LogRel(("Test #2\n"));
+ ASMIntDisable();
+ PGMR0DynMapMigrateAutoSet(&pVM->aCpus[0]);
+ for (i = 0 ; i < UINT16_MAX*2 - 1 && RT_SUCCESS(rc) && pv2 == pv; i++)
+ {
+ pv2 = (void *)(intptr_t)-4;
+ rc = pgmRZDynMapHCPageCommon(pVM, cr3, &pv2 RTLOG_COMMA_SRC_POS);
+ }
+ ASMIntEnable();
+ if (RT_FAILURE(rc) || pv != pv2)
+ {
+ LogRel(("failed(%d): rc=%Rrc; pv=%p pv2=%p i=%p\n", __LINE__, rc, pv, pv2, i));
+ if (RT_SUCCESS(rc)) rc = VERR_PGM_DYNMAP_IPE;
+ }
+ else if (pSet->cEntries != 5)
+ {
+ LogRel(("failed(%d): cEntries=%d expected %d\n", __LINE__, pSet->cEntries, RT_ELEMENTS(pSet->aEntries) / 2));
+ rc = VERR_PGM_DYNMAP_IPE;
+ }
+ else if ( pSet->aEntries[4].cRefs != UINT16_MAX - 1
+ || pSet->aEntries[3].cRefs != UINT16_MAX - 1
+ || pSet->aEntries[2].cRefs != 1
+ || pSet->aEntries[1].cRefs != 1
+ || pSet->aEntries[0].cRefs != 1)
+ {
+ LogRel(("failed(%d): bad set dist: ", __LINE__));
+ for (i = 0; i < pSet->cEntries; i++)
+ LogRel(("[%d]=%d, ", i, pSet->aEntries[i].cRefs));
+ LogRel(("\n"));
+ rc = VERR_PGM_DYNMAP_IPE;
+ }
+ if (RT_SUCCESS(rc))
+ rc = PGMR0DynMapAssertIntegrity();
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Trigger an set optimization run (exactly).
+ */
+ LogRel(("Test #3\n"));
+ ASMIntDisable();
+ PGMR0DynMapMigrateAutoSet(&pVM->aCpus[0]);
+ pv2 = NULL;
+ for (i = 0 ; i < RT_ELEMENTS(pSet->aEntries) - 5 && RT_SUCCESS(rc) && pv2 != pv; i++)
+ {
+ pv2 = (void *)(intptr_t)(-5 - i);
+ rc = pgmRZDynMapHCPageCommon(pVM, cr3 + PAGE_SIZE * (i + 5), &pv2 RTLOG_COMMA_SRC_POS);
+ }
+ ASMIntEnable();
+ if (RT_FAILURE(rc) || pv == pv2)
+ {
+ LogRel(("failed(%d): rc=%Rrc; pv=%p pv2=%p i=%d\n", __LINE__, rc, pv, pv2, i));
+ if (RT_SUCCESS(rc)) rc = VERR_PGM_DYNMAP_IPE;
+ }
+ else if (pSet->cEntries != RT_ELEMENTS(pSet->aEntries))
+ {
+ LogRel(("failed(%d): cEntries=%d expected %d\n", __LINE__, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
+ rc = VERR_PGM_DYNMAP_IPE;
+ }
+ LogRel(("Load=%u/%u/%u Set=%u/%u\n", pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
+ if (RT_SUCCESS(rc))
+ rc = PGMR0DynMapAssertIntegrity();
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Trigger an overflow error.
+ */
+ LogRel(("Test #4\n"));
+ ASMIntDisable();
+ PGMR0DynMapMigrateAutoSet(&pVM->aCpus[0]);
+ for (i = 0 ; i < RT_ELEMENTS(pSet->aEntries) + 2; i++)
+ {
+ rc = pgmRZDynMapHCPageCommon(pVM, cr3 - PAGE_SIZE * (i + 5), &pv2 RTLOG_COMMA_SRC_POS);
+ if (RT_SUCCESS(rc))
+ rc = PGMR0DynMapAssertIntegrity();
+ if (RT_FAILURE(rc))
+ break;
+ }
+ ASMIntEnable();
+ if (rc == VERR_PGM_DYNMAP_FULL_SET)
+ {
+ /* flush the set. */
+ LogRel(("Test #5\n"));
+ ASMIntDisable();
+ PGMR0DynMapMigrateAutoSet(&pVM->aCpus[0]);
+ PGMRZDynMapReleaseAutoSet(&pVM->aCpus[0]);
+ PGMRZDynMapStartAutoSet(&pVM->aCpus[0]);
+ ASMIntEnable();
+
+ rc = PGMR0DynMapAssertIntegrity();
+ }
+ else
+ {
+ LogRel(("failed(%d): rc=%Rrc, wanted %d ; pv2=%p Set=%u/%u; i=%d\n", __LINE__,
+ rc, VERR_PGM_DYNMAP_FULL_SET, pv2, pSet->cEntries, RT_ELEMENTS(pSet->aEntries), i));
+ if (RT_SUCCESS(rc)) rc = VERR_PGM_DYNMAP_IPE;
+ }
+ }
+ }
+ }
+ else
+ {
+ LogRel(("failed(%d): rc=%Rrc rc2=%Rrc; pv=%p pv2=%p\n", __LINE__, rc, rc2, pv, pv2));
+ if (RT_SUCCESS(rc))
+ rc = rc2;
+ }
+
+ /*
+ * Check that everyone sees the same stuff.
+ */
+ if (RT_SUCCESS(rc))
+ {
+ LogRel(("Test #5\n"));
+ ASMIntDisable();
+ PGMR0DynMapMigrateAutoSet(&pVM->aCpus[0]);
+ RTHCPHYS HCPhysPT = RTR0MemObjGetPagePhysAddr(pThis->pSegHead->ahMemObjPTs[0], 0);
+ rc = pgmRZDynMapHCPageCommon(pVM, HCPhysPT, &pv RTLOG_COMMA_SRC_POS);
+ if (RT_SUCCESS(rc))
+ {
+ PGMR0DYNMAPTEST Test;
+ uint32_t *pu32Real = &pThis->paPages[pThis->pSegHead->iPage].uPte.pLegacy->u;
+ Test.pu32 = (uint32_t *)((uintptr_t)pv | ((uintptr_t)pu32Real & PAGE_OFFSET_MASK));
+ Test.u32Expect = *pu32Real;
+ ASMAtomicWriteU32(&Test.cFailures, 0);
+ ASMIntEnable();
+
+ rc = RTMpOnAll(pgmR0DynMapTest3PerCpu, &Test, NULL);
+ if (RT_FAILURE(rc))
+ LogRel(("failed(%d): RTMpOnAll rc=%Rrc\n", __LINE__, rc));
+ else if (Test.cFailures)
+ {
+ LogRel(("failed(%d): cFailures=%d pu32Real=%p pu32=%p u32Expect=%#x *pu32=%#x\n", __LINE__,
+ Test.cFailures, pu32Real, Test.pu32, Test.u32Expect, *Test.pu32));
+ rc = VERR_PGM_DYNMAP_IPE;
+ }
+ else
+ LogRel(("pu32Real=%p pu32=%p u32Expect=%#x *pu32=%#x\n",
+ pu32Real, Test.pu32, Test.u32Expect, *Test.pu32));
+ }
+ else
+ {
+ ASMIntEnable();
+ LogRel(("failed(%d): rc=%Rrc\n", rc));
+ }
+ }
+
+ /*
+ * Clean up.
+ */
+ LogRel(("Cleanup.\n"));
+ ASMIntDisable();
+ PGMR0DynMapMigrateAutoSet(&pVM->aCpus[0]);
+ PGMRZDynMapFlushAutoSet(&pVM->aCpus[0]);
+ PGMRZDynMapReleaseAutoSet(&pVM->aCpus[0]);
+ ASMIntEnable();
+
+ if (RT_SUCCESS(rc))
+ rc = PGMR0DynMapAssertIntegrity();
+ else
+ PGMR0DynMapAssertIntegrity();
+
+ g_fPGMR0DynMapTestRunning = false;
+ LogRel(("Result: rc=%Rrc Load=%u/%u/%u Set=%#x/%u\n", rc,
+ pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
+ pVM->pgm.s.pvR0DynMapUsed = pvR0DynMapUsedSaved;
+ LogRel(("pgmR0DynMapTest: ****** END ******\n"));
+ return rc;
+}
+#endif /* DEBUG */
+
diff --git a/src/VBox/VMM/VMMRZ/VMMRZ.cpp b/src/VBox/VMM/VMMRZ/VMMRZ.cpp
new file mode 100644
index 00000000..fd8ccd5c
--- /dev/null
+++ b/src/VBox/VMM/VMMRZ/VMMRZ.cpp
@@ -0,0 +1,253 @@
+/* $Id: VMMRZ.cpp $ */
+/** @file
+ * VMM - Virtual Machine Monitor, Raw-mode and ring-0 context code.
+ */
+
+/*
+ * Copyright (C) 2009-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_VMM
+#include <VBox/vmm/vmm.h>
+#include "VMMInternal.h"
+#include <VBox/vmm/vmcc.h>
+#include <VBox/err.h>
+
+#include <iprt/assert.h>
+#include <iprt/asm-amd64-x86.h>
+#include <iprt/string.h>
+
+
+/**
+ * Calls the ring-3 host code.
+ *
+ * @returns VBox status code of the ring-3 call.
+ * @retval VERR_VMM_RING3_CALL_DISABLED if called at the wrong time. This must
+ * be passed up the stack, or if that isn't possible then VMMRZCallRing3
+ * needs to change it into an assertion.
+ *
+ *
+ * @param pVM The cross context VM structure.
+ * @param pVCpu The cross context virtual CPU structure of the calling EMT.
+ * @param enmOperation The operation.
+ * @param uArg The argument to the operation.
+ */
+VMMRZDECL(int) VMMRZCallRing3(PVMCC pVM, PVMCPUCC pVCpu, VMMCALLRING3 enmOperation, uint64_t uArg)
+{
+ VMCPU_ASSERT_EMT(pVCpu);
+
+ /*
+ * Check if calling ring-3 has been disabled and only let let fatal calls thru.
+ */
+ if (RT_UNLIKELY( pVCpu->vmm.s.cCallRing3Disabled != 0
+ && enmOperation != VMMCALLRING3_VM_R0_ASSERTION))
+ {
+#ifndef IN_RING0
+ /*
+ * In most cases, it's sufficient to return a status code which
+ * will then be propagated up the code usually encountering several
+ * AssertRC invocations along the way. Hitting one of those is more
+ * helpful than stopping here.
+ *
+ * However, some doesn't check the status code because they are called
+ * from void functions, and for these we'll turn this into a ring-0
+ * assertion host call.
+ */
+ if (enmOperation != VMMCALLRING3_REM_REPLAY_HANDLER_NOTIFICATIONS)
+ return VERR_VMM_RING3_CALL_DISABLED;
+#endif
+#ifdef IN_RC
+ RTStrPrintf(g_szRTAssertMsg1, sizeof(pVM->vmm.s.szRing0AssertMsg1),
+ "VMMRZCallRing3: enmOperation=%d uArg=%#llx idCpu=%#x cCallRing3Disabled=%#x\n",
+ enmOperation, uArg, pVCpu->idCpu, pVCpu->vmm.s.cCallRing3Disabled);
+#endif
+ RTStrPrintf(pVM->vmm.s.szRing0AssertMsg1, sizeof(pVM->vmm.s.szRing0AssertMsg1),
+ "VMMRZCallRing3: enmOperation=%d uArg=%#llx idCpu=%#x cCallRing3Disabled=%#x\n",
+ enmOperation, uArg, pVCpu->idCpu, pVCpu->vmm.s.cCallRing3Disabled);
+ enmOperation = VMMCALLRING3_VM_R0_ASSERTION;
+ }
+
+ /*
+ * The normal path.
+ */
+/** @todo profile this! */
+ pVCpu->vmm.s.enmCallRing3Operation = enmOperation;
+ pVCpu->vmm.s.u64CallRing3Arg = uArg;
+ pVCpu->vmm.s.rcCallRing3 = VERR_VMM_RING3_CALL_NO_RC;
+#ifdef IN_RC
+ pVM->vmm.s.pfnRCToHost(VINF_VMM_CALL_HOST);
+#else
+ int rc;
+ if (pVCpu->vmm.s.pfnCallRing3CallbackR0)
+ {
+ rc = pVCpu->vmm.s.pfnCallRing3CallbackR0(pVCpu, enmOperation, pVCpu->vmm.s.pvCallRing3CallbackUserR0);
+ if (RT_FAILURE(rc))
+ return rc;
+ }
+ rc = vmmR0CallRing3LongJmp(&pVCpu->vmm.s.CallRing3JmpBufR0, VINF_VMM_CALL_HOST);
+ if (RT_FAILURE(rc))
+ return rc;
+#endif
+ return pVCpu->vmm.s.rcCallRing3;
+}
+
+
+/**
+ * Simple wrapper that adds the pVCpu argument.
+ *
+ * @returns VBox status code of the ring-3 call.
+ * @retval VERR_VMM_RING3_CALL_DISABLED if called at the wrong time. This must
+ * be passed up the stack, or if that isn't possible then VMMRZCallRing3
+ * needs to change it into an assertion.
+ *
+ * @param pVM The cross context VM structure.
+ * @param enmOperation The operation.
+ * @param uArg The argument to the operation.
+ */
+VMMRZDECL(int) VMMRZCallRing3NoCpu(PVMCC pVM, VMMCALLRING3 enmOperation, uint64_t uArg)
+{
+ return VMMRZCallRing3(pVM, VMMGetCpu(pVM), enmOperation, uArg);
+}
+
+
+/**
+ * Disables all host calls, except certain fatal ones.
+ *
+ * @param pVCpu The cross context virtual CPU structure of the calling EMT.
+ * @thread EMT.
+ */
+VMMRZDECL(void) VMMRZCallRing3Disable(PVMCPUCC pVCpu)
+{
+ VMCPU_ASSERT_EMT(pVCpu);
+#if defined(LOG_ENABLED) && defined(IN_RING0)
+ RTCCUINTREG fFlags = ASMIntDisableFlags(); /* preemption consistency. */
+#endif
+
+ Assert(pVCpu->vmm.s.cCallRing3Disabled < 16);
+ if (ASMAtomicUoIncU32(&pVCpu->vmm.s.cCallRing3Disabled) == 1)
+ {
+ /** @todo it might make more sense to just disable logging here, then we
+ * won't flush away important bits... but that goes both ways really. */
+#ifdef IN_RC
+ pVCpu->pVMRC->vmm.s.fRCLoggerFlushingDisabled = true;
+#else
+# ifdef LOG_ENABLED
+ if (pVCpu->vmm.s.pR0LoggerR0)
+ pVCpu->vmm.s.pR0LoggerR0->fFlushingDisabled = true;
+# endif
+ if (pVCpu->vmm.s.pR0RelLoggerR0)
+ pVCpu->vmm.s.pR0RelLoggerR0->fFlushingDisabled = true;
+#endif
+ }
+
+#if defined(LOG_ENABLED) && defined(IN_RING0)
+ ASMSetFlags(fFlags);
+#endif
+}
+
+
+/**
+ * Counters VMMRZCallRing3Disable() and re-enables host calls.
+ *
+ * @param pVCpu The cross context virtual CPU structure of the calling EMT.
+ * @thread EMT.
+ */
+VMMRZDECL(void) VMMRZCallRing3Enable(PVMCPUCC pVCpu)
+{
+ VMCPU_ASSERT_EMT(pVCpu);
+#if defined(LOG_ENABLED) && defined(IN_RING0)
+ RTCCUINTREG fFlags = ASMIntDisableFlags(); /* preemption consistency. */
+#endif
+
+ Assert(pVCpu->vmm.s.cCallRing3Disabled > 0);
+ if (ASMAtomicUoDecU32(&pVCpu->vmm.s.cCallRing3Disabled) == 0)
+ {
+#ifdef IN_RC
+ pVCpu->pVMRC->vmm.s.fRCLoggerFlushingDisabled = false;
+#else
+# ifdef LOG_ENABLED
+ if (pVCpu->vmm.s.pR0LoggerR0)
+ pVCpu->vmm.s.pR0LoggerR0->fFlushingDisabled = false;
+# endif
+ if (pVCpu->vmm.s.pR0RelLoggerR0)
+ pVCpu->vmm.s.pR0RelLoggerR0->fFlushingDisabled = false;
+#endif
+ }
+
+#if defined(LOG_ENABLED) && defined(IN_RING0)
+ ASMSetFlags(fFlags);
+#endif
+}
+
+
+/**
+ * Checks whether its possible to call host context or not.
+ *
+ * @returns true if it's safe, false if it isn't.
+ * @param pVCpu The cross context virtual CPU structure of the calling EMT.
+ */
+VMMRZDECL(bool) VMMRZCallRing3IsEnabled(PVMCPUCC pVCpu)
+{
+ VMCPU_ASSERT_EMT(pVCpu);
+ Assert(pVCpu->vmm.s.cCallRing3Disabled <= 16);
+ return pVCpu->vmm.s.cCallRing3Disabled == 0;
+}
+
+
+/**
+ * Sets the ring-0 callback before doing the ring-3 call.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pfnCallback Pointer to the callback.
+ * @param pvUser The user argument.
+ *
+ * @return VBox status code.
+ */
+VMMRZDECL(int) VMMRZCallRing3SetNotification(PVMCPUCC pVCpu, R0PTRTYPE(PFNVMMR0CALLRING3NOTIFICATION) pfnCallback, RTR0PTR pvUser)
+{
+ AssertPtrReturn(pVCpu, VERR_INVALID_POINTER);
+ AssertPtrReturn(pfnCallback, VERR_INVALID_POINTER);
+
+ if (pVCpu->vmm.s.pfnCallRing3CallbackR0)
+ return VERR_ALREADY_EXISTS;
+
+ pVCpu->vmm.s.pfnCallRing3CallbackR0 = pfnCallback;
+ pVCpu->vmm.s.pvCallRing3CallbackUserR0 = pvUser;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Removes the ring-0 callback.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+VMMRZDECL(void) VMMRZCallRing3RemoveNotification(PVMCPUCC pVCpu)
+{
+ pVCpu->vmm.s.pfnCallRing3CallbackR0 = NULL;
+}
+
+
+/**
+ * Checks whether there is a ring-0 callback notification active.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @returns true if there the notification is active, false otherwise.
+ */
+VMMRZDECL(bool) VMMRZCallRing3IsNotificationSet(PVMCPUCC pVCpu)
+{
+ return pVCpu->vmm.s.pfnCallRing3CallbackR0 != NULL;
+}
+