summaryrefslogtreecommitdiffstats
path: root/src/VBox/Devices/Bus/DevIommuAmd.cpp
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:17:27 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:17:27 +0000
commitf215e02bf85f68d3a6106c2a1f4f7f063f819064 (patch)
tree6bb5b92c046312c4e95ac2620b10ddf482d3fa8b /src/VBox/Devices/Bus/DevIommuAmd.cpp
parentInitial commit. (diff)
downloadvirtualbox-f215e02bf85f68d3a6106c2a1f4f7f063f819064.tar.xz
virtualbox-f215e02bf85f68d3a6106c2a1f4f7f063f819064.zip
Adding upstream version 7.0.14-dfsg.upstream/7.0.14-dfsg
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/VBox/Devices/Bus/DevIommuAmd.cpp')
-rw-r--r--src/VBox/Devices/Bus/DevIommuAmd.cpp7363
1 files changed, 7363 insertions, 0 deletions
diff --git a/src/VBox/Devices/Bus/DevIommuAmd.cpp b/src/VBox/Devices/Bus/DevIommuAmd.cpp
new file mode 100644
index 00000000..ca839e50
--- /dev/null
+++ b/src/VBox/Devices/Bus/DevIommuAmd.cpp
@@ -0,0 +1,7363 @@
+/* $Id: DevIommuAmd.cpp $ */
+/** @file
+ * IOMMU - Input/Output Memory Management Unit - AMD implementation.
+ */
+
+/*
+ * Copyright (C) 2020-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_DEV_IOMMU
+#include <VBox/msi.h>
+#include <VBox/iommu-amd.h>
+#include <VBox/vmm/pdmdev.h>
+
+#include <iprt/x86.h>
+#include <iprt/string.h>
+#include <iprt/avl.h>
+#ifdef IN_RING3
+# include <iprt/mem.h>
+#endif
+
+#include "VBoxDD.h"
+#include "DevIommuAmd.h"
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+/** Release log prefix string. */
+#define IOMMU_LOG_PFX "AMD-IOMMU"
+/** The current saved state version. */
+#define IOMMU_SAVED_STATE_VERSION 1
+/** The IOMMU device instance magic. */
+#define IOMMU_MAGIC 0x10acce55
+
+/** Enable the IOTLBE cache only in ring-3 for now, see @bugref{9654#c95}. */
+#ifdef IN_RING3
+# define IOMMU_WITH_IOTLBE_CACHE
+#endif
+/** Enable the interrupt cache. */
+#define IOMMU_WITH_IRTE_CACHE
+
+/* The DTE cache is mandatory for the IOTLB or interrupt cache to work. */
+#if defined(IOMMU_WITH_IOTLBE_CACHE) || defined(IOMMU_WITH_IRTE_CACHE)
+# define IOMMU_WITH_DTE_CACHE
+/** The maximum number of device IDs in the cache. */
+# define IOMMU_DEV_CACHE_COUNT 16
+/** An empty device ID. */
+# define IOMMU_DTE_CACHE_KEY_NIL 0
+#endif
+
+#ifdef IOMMU_WITH_IRTE_CACHE
+/** The maximum number of IRTE cache entries. */
+# define IOMMU_IRTE_CACHE_COUNT 32
+/** A NIL IRTE cache entry key. */
+# define IOMMU_IRTE_CACHE_KEY_NIL (~(uint32_t)0U)
+/** Gets the device ID from an IRTE cache entry key. */
+#define IOMMU_IRTE_CACHE_KEY_GET_DEVICE_ID(a_Key) RT_HIWORD(a_Key)
+/** Gets the IOVA from the IOTLB entry key. */
+# define IOMMU_IRTE_CACHE_KEY_GET_OFF(a_Key) RT_LOWORD(a_Key)
+/** Makes an IRTE cache entry key.
+ *
+ * Bits 31:16 is the device ID (Bus, Device, Function).
+ * Bits 15:0 is the the offset into the IRTE table.
+ */
+# define IOMMU_IRTE_CACHE_KEY_MAKE(a_DevId, a_off) RT_MAKE_U32(a_off, a_DevId)
+#endif /* IOMMU_WITH_IRTE_CACHE */
+
+#ifdef IOMMU_WITH_IOTLBE_CACHE
+/** The maximum number of IOTLB entries. */
+# define IOMMU_IOTLBE_MAX 64
+/** The mask of bits covering the domain ID in the IOTLBE key. */
+# define IOMMU_IOTLB_DOMAIN_ID_MASK UINT64_C(0xffffff0000000000)
+/** The mask of bits covering the IOVA in the IOTLBE key. */
+# define IOMMU_IOTLB_IOVA_MASK (~IOMMU_IOTLB_DOMAIN_ID_MASK)
+/** The number of bits to shift for the domain ID of the IOTLBE key. */
+# define IOMMU_IOTLB_DOMAIN_ID_SHIFT 40
+/** A NIL IOTLB key. */
+# define IOMMU_IOTLB_KEY_NIL UINT64_C(0)
+/** Gets the domain ID from an IOTLB entry key. */
+# define IOMMU_IOTLB_KEY_GET_DOMAIN_ID(a_Key) ((a_Key) >> IOMMU_IOTLB_DOMAIN_ID_SHIFT)
+/** Gets the IOVA from the IOTLB entry key. */
+# define IOMMU_IOTLB_KEY_GET_IOVA(a_Key) (((a_Key) & IOMMU_IOTLB_IOVA_MASK) << X86_PAGE_4K_SHIFT)
+/** Makes an IOTLB entry key.
+ *
+ * Address bits 63:52 of the IOVA are zero extended, so top 12 bits are free.
+ * Address bits 11:0 of the IOVA are offset into the minimum page size of 4K,
+ * so bottom 12 bits are free.
+ *
+ * Thus we use the top 24 bits of key to hold bits 15:0 of the domain ID.
+ * We use the bottom 40 bits of the key to hold bits 51:12 of the IOVA.
+ */
+# define IOMMU_IOTLB_KEY_MAKE(a_DomainId, a_uIova) ( ((uint64_t)(a_DomainId) << IOMMU_IOTLB_DOMAIN_ID_SHIFT) \
+ | (((a_uIova) >> X86_PAGE_4K_SHIFT) & IOMMU_IOTLB_IOVA_MASK))
+#endif /* IOMMU_WITH_IOTLBE_CACHE */
+
+#ifdef IOMMU_WITH_DTE_CACHE
+/** @name IOMMU_DTE_CACHE_F_XXX: DTE cache flags.
+ *
+ * Some of these flags are "basic" i.e. they correspond directly to their bits in
+ * the DTE. The rest of the flags are based on checks or operations on several DTE
+ * bits.
+ *
+ * The basic flags are:
+ * - VALID (DTE.V)
+ * - IO_PERM_READ (DTE.IR)
+ * - IO_PERM_WRITE (DTE.IW)
+ * - IO_PERM_RSVD (bit following DTW.IW reserved for future & to keep
+ * masking consistent)
+ * - SUPPRESS_ALL_IOPF (DTE.SA)
+ * - SUPPRESS_IOPF (DTE.SE)
+ * - INTR_MAP_VALID (DTE.IV)
+ * - IGNORE_UNMAPPED_INTR (DTE.IG)
+ *
+ * @see iommuAmdGetBasicDevFlags()
+ * @{ */
+/** The DTE is present. */
+# define IOMMU_DTE_CACHE_F_PRESENT RT_BIT(0)
+/** The DTE is valid. */
+# define IOMMU_DTE_CACHE_F_VALID RT_BIT(1)
+/** The DTE permissions apply for address translations. */
+# define IOMMU_DTE_CACHE_F_IO_PERM RT_BIT(2)
+/** DTE permission - I/O read allowed. */
+# define IOMMU_DTE_CACHE_F_IO_PERM_READ RT_BIT(3)
+/** DTE permission - I/O write allowed. */
+# define IOMMU_DTE_CACHE_F_IO_PERM_WRITE RT_BIT(4)
+/** DTE permission - reserved. */
+# define IOMMU_DTE_CACHE_F_IO_PERM_RSVD RT_BIT(5)
+/** Address translation required. */
+# define IOMMU_DTE_CACHE_F_ADDR_TRANSLATE RT_BIT(6)
+/** Suppress all I/O page faults. */
+# define IOMMU_DTE_CACHE_F_SUPPRESS_ALL_IOPF RT_BIT(7)
+/** Suppress I/O page faults. */
+# define IOMMU_DTE_CACHE_F_SUPPRESS_IOPF RT_BIT(8)
+/** Interrupt map valid. */
+# define IOMMU_DTE_CACHE_F_INTR_MAP_VALID RT_BIT(9)
+/** Ignore unmapped interrupts. */
+# define IOMMU_DTE_CACHE_F_IGNORE_UNMAPPED_INTR RT_BIT(10)
+/** An I/O page fault has been raised for this device. */
+# define IOMMU_DTE_CACHE_F_IO_PAGE_FAULT_RAISED RT_BIT(11)
+/** Fixed and arbitrary interrupt control: Target Abort. */
+# define IOMMU_DTE_CACHE_F_INTR_CTRL_TARGET_ABORT RT_BIT(12)
+/** Fixed and arbitrary interrupt control: Forward unmapped. */
+# define IOMMU_DTE_CACHE_F_INTR_CTRL_FWD_UNMAPPED RT_BIT(13)
+/** Fixed and arbitrary interrupt control: Remapped. */
+# define IOMMU_DTE_CACHE_F_INTR_CTRL_REMAPPED RT_BIT(14)
+/** Fixed and arbitrary interrupt control: Reserved. */
+# define IOMMU_DTE_CACHE_F_INTR_CTRL_RSVD RT_BIT(15)
+/** @} */
+
+/** The number of bits to shift I/O device flags for DTE permissions. */
+# define IOMMU_DTE_CACHE_F_IO_PERM_SHIFT 3
+/** The mask of DTE permissions in I/O device flags. */
+# define IOMMU_DTE_CACHE_F_IO_PERM_MASK 0x3
+/** The number of bits to shift I/O device flags for interrupt control bits. */
+# define IOMMU_DTE_CACHE_F_INTR_CTRL_SHIFT 12
+/** The mask of interrupt control bits in I/O device flags. */
+# define IOMMU_DTE_CACHE_F_INTR_CTRL_MASK 0x3
+/** The number of bits to shift for ignore-unmapped interrupts bit. */
+# define IOMMU_DTE_CACHE_F_IGNORE_UNMAPPED_INTR_SHIFT 10
+
+/** Acquires the cache lock. */
+# define IOMMU_CACHE_LOCK(a_pDevIns, a_pThis) \
+ do { \
+ int const rcLock = PDMDevHlpCritSectEnter((a_pDevIns), &(a_pThis)->CritSectCache, VINF_SUCCESS); \
+ PDM_CRITSECT_RELEASE_ASSERT_RC_DEV((a_pDevIns), &(a_pThis)->CritSectCache, rcLock); \
+ } while (0)
+
+/** Releases the cache lock. */
+# define IOMMU_CACHE_UNLOCK(a_pDevIns, a_pThis) PDMDevHlpCritSectLeave((a_pDevIns), &(a_pThis)->CritSectCache)
+#endif /* IOMMU_WITH_DTE_CACHE */
+
+/** Acquires the IOMMU lock (returns a_rcBusy on contention). */
+#define IOMMU_LOCK_RET(a_pDevIns, a_pThisCC, a_rcBusy) \
+ do { \
+ int const rcLock = (a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnLock((a_pDevIns), (a_rcBusy)); \
+ if (RT_LIKELY(rcLock == VINF_SUCCESS)) \
+ { /* likely */ } \
+ else \
+ return rcLock; \
+ } while (0)
+
+/** Acquires the IOMMU lock (can fail under extraordinary circumstance in ring-0). */
+#define IOMMU_LOCK(a_pDevIns, a_pThisCC) \
+ do { \
+ int const rcLock = (a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnLock((a_pDevIns), VINF_SUCCESS); \
+ PDM_CRITSECT_RELEASE_ASSERT_RC_DEV((a_pDevIns), NULL, rcLock); \
+ } while (0)
+
+/** Checks if the current thread owns the PDM lock. */
+# define IOMMU_ASSERT_LOCK_IS_OWNER(a_pDevIns, a_pThisCC) \
+ do \
+ { \
+ Assert((a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnLockIsOwner((a_pDevIns))); \
+ NOREF(a_pThisCC); \
+ } while (0)
+
+/** Releases the PDM lock. */
+# define IOMMU_UNLOCK(a_pDevIns, a_pThisCC) (a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnUnlock((a_pDevIns))
+
+/** Gets the maximum valid IOVA for the given I/O page-table level. */
+#define IOMMU_GET_MAX_VALID_IOVA(a_Level) ((X86_PAGE_4K_SIZE << ((a_Level) * 9)) - 1)
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+/**
+ * IOMMU operation (transaction).
+ */
+typedef enum IOMMUOP
+{
+ /** Address translation request. */
+ IOMMUOP_TRANSLATE_REQ = 0,
+ /** Memory read request. */
+ IOMMUOP_MEM_READ,
+ /** Memory write request. */
+ IOMMUOP_MEM_WRITE,
+ /** Interrupt request. */
+ IOMMUOP_INTR_REQ,
+ /** Command. */
+ IOMMUOP_CMD
+} IOMMUOP;
+/** Pointer to a IOMMU operation. */
+typedef IOMMUOP *PIOMMUOP;
+
+/**
+ * I/O page lookup.
+ */
+typedef struct IOPAGELOOKUP
+{
+ /** The translated system physical address. */
+ RTGCPHYS GCPhysSpa;
+ /** The number of offset bits in the system physical address. */
+ uint8_t cShift;
+ /** The I/O permissions for this translation, see IOMMU_IO_PERM_XXX. */
+ uint8_t fPerm;
+} IOPAGELOOKUP;
+/** Pointer to an I/O page lookup. */
+typedef IOPAGELOOKUP *PIOPAGELOOKUP;
+/** Pointer to a const I/O page lookup. */
+typedef IOPAGELOOKUP const *PCIOPAGELOOKUP;
+
+/**
+ * I/O address range.
+ */
+typedef struct IOADDRRANGE
+{
+ /** The address (virtual or physical). */
+ uint64_t uAddr;
+ /** The size of the access in bytes. */
+ size_t cb;
+ /** The I/O permissions for this translation, see IOMMU_IO_PERM_XXX. */
+ uint8_t fPerm;
+} IOADDRRANGE;
+/** Pointer to an I/O address range. */
+typedef IOADDRRANGE *PIOADDRRANGE;
+/** Pointer to a const I/O address range. */
+typedef IOADDRRANGE const *PCIOADDRRANGE;
+
+#ifdef IOMMU_WITH_DTE_CACHE
+/**
+ * Device Table Entry Cache.
+ */
+typedef struct DTECACHE
+{
+ /** This device's flags, see IOMMU_DTE_CACHE_F_XXX. */
+ uint16_t fFlags;
+ /** The domain ID assigned for this device by software. */
+ uint16_t idDomain;
+} DTECACHE;
+/** Pointer to an I/O device struct. */
+typedef DTECACHE *PDTECACHE;
+/** Pointer to a const I/O device struct. */
+typedef DTECACHE *PCDTECACHE;
+AssertCompileSize(DTECACHE, 4);
+#endif /* IOMMU_WITH_DTE_CACHE */
+
+#ifdef IOMMU_WITH_IOTLBE_CACHE
+/**
+ * I/O TLB Entry.
+ * Keep this as small and aligned as possible.
+ */
+typedef struct IOTLBE
+{
+ /** The AVL tree node. */
+ AVLU64NODECORE Core;
+ /** The least recently used (LRU) list node. */
+ RTLISTNODE NdLru;
+ /** The I/O page lookup results of the translation. */
+ IOPAGELOOKUP PageLookup;
+ /** Whether the entry needs to be evicted from the cache. */
+ bool fEvictPending;
+} IOTLBE;
+/** Pointer to an IOMMU I/O TLB entry struct. */
+typedef IOTLBE *PIOTLBE;
+/** Pointer to a const IOMMU I/O TLB entry struct. */
+typedef IOTLBE const *PCIOTLBE;
+AssertCompileSizeAlignment(IOTLBE, 8);
+AssertCompileMemberOffset(IOTLBE, Core, 0);
+#endif /* IOMMU_WITH_IOTLBE_CACHE */
+
+#ifdef IOMMU_WITH_IRTE_CACHE
+/**
+ * Interrupt Remap Table Entry Cache.
+ */
+typedef struct IRTECACHE
+{
+ /** The key, see IOMMU_IRTE_CACHE_KEY_MAKE. */
+ uint32_t uKey;
+ /** The IRTE. */
+ IRTE_T Irte;
+} IRTECACHE;
+/** Pointer to an IRTE cache struct. */
+typedef IRTECACHE *PIRTECACHE;
+/** Pointer to a const IRTE cache struct. */
+typedef IRTECACHE const *PCIRTECACHE;
+AssertCompileSizeAlignment(IRTECACHE, 4);
+#endif /* IOMMU_WITH_IRTE_CACHE */
+
+/**
+ * The shared IOMMU device state.
+ */
+typedef struct IOMMU
+{
+ /** IOMMU device index (0 is at the top of the PCI tree hierarchy). */
+ uint32_t idxIommu;
+ /** IOMMU magic. */
+ uint32_t u32Magic;
+
+ /** The MMIO handle. */
+ IOMMMIOHANDLE hMmio;
+ /** The event semaphore the command thread waits on. */
+ SUPSEMEVENT hEvtCmdThread;
+ /** Whether the command thread has been signaled for wake up. */
+ bool volatile fCmdThreadSignaled;
+ /** Padding. */
+ bool afPadding0[3];
+ /** The IOMMU PCI address. */
+ PCIBDF uPciAddress;
+
+#ifdef IOMMU_WITH_DTE_CACHE
+ /** The critsect that protects the cache from concurrent access. */
+ PDMCRITSECT CritSectCache;
+ /** Array of device IDs. */
+ uint16_t aDeviceIds[IOMMU_DEV_CACHE_COUNT];
+ /** Array of DTE cache entries. */
+ DTECACHE aDteCache[IOMMU_DEV_CACHE_COUNT];
+#endif
+#ifdef IOMMU_WITH_IRTE_CACHE
+ /** Array of IRTE cache entries. */
+ IRTECACHE aIrteCache[IOMMU_IRTE_CACHE_COUNT];
+#endif
+
+ /** @name PCI: Base capability block registers.
+ * @{ */
+ IOMMU_BAR_T IommuBar; /**< IOMMU base address register. */
+ /** @} */
+
+ /** @name MMIO: Control and status registers.
+ * @{ */
+ DEV_TAB_BAR_T aDevTabBaseAddrs[8]; /**< Device table base address registers. */
+ CMD_BUF_BAR_T CmdBufBaseAddr; /**< Command buffer base address register. */
+ EVT_LOG_BAR_T EvtLogBaseAddr; /**< Event log base address register. */
+ IOMMU_CTRL_T Ctrl; /**< IOMMU control register. */
+ IOMMU_EXCL_RANGE_BAR_T ExclRangeBaseAddr; /**< IOMMU exclusion range base register. */
+ IOMMU_EXCL_RANGE_LIMIT_T ExclRangeLimit; /**< IOMMU exclusion range limit. */
+ IOMMU_EXT_FEAT_T ExtFeat; /**< IOMMU extended feature register. */
+ /** @} */
+
+ /** @name MMIO: Peripheral Page Request (PPR) Log registers.
+ * @{ */
+ PPR_LOG_BAR_T PprLogBaseAddr; /**< PPR Log base address register. */
+ IOMMU_HW_EVT_HI_T HwEvtHi; /**< IOMMU hardware event register (Hi). */
+ IOMMU_HW_EVT_LO_T HwEvtLo; /**< IOMMU hardware event register (Lo). */
+ IOMMU_HW_EVT_STATUS_T HwEvtStatus; /**< IOMMU hardware event status. */
+ /** @} */
+
+ /** @todo IOMMU: SMI filter. */
+
+ /** @name MMIO: Guest Virtual-APIC Log registers.
+ * @{ */
+ GALOG_BAR_T GALogBaseAddr; /**< Guest Virtual-APIC Log base address register. */
+ GALOG_TAIL_ADDR_T GALogTailAddr; /**< Guest Virtual-APIC Log Tail address register. */
+ /** @} */
+
+ /** @name MMIO: Alternate PPR and Event Log registers.
+ * @{ */
+ PPR_LOG_B_BAR_T PprLogBBaseAddr; /**< PPR Log B base address register. */
+ EVT_LOG_B_BAR_T EvtLogBBaseAddr; /**< Event Log B base address register. */
+ /** @} */
+
+ /** @name MMIO: Device-specific feature registers.
+ * @{ */
+ DEV_SPECIFIC_FEAT_T DevSpecificFeat; /**< Device-specific feature extension register (DSFX). */
+ DEV_SPECIFIC_CTRL_T DevSpecificCtrl; /**< Device-specific control extension register (DSCX). */
+ DEV_SPECIFIC_STATUS_T DevSpecificStatus; /**< Device-specific status extension register (DSSX). */
+ /** @} */
+
+ /** @name MMIO: MSI Capability Block registers.
+ * @{ */
+ MSI_MISC_INFO_T MiscInfo; /**< MSI Misc. info registers / MSI Vector registers. */
+ /** @} */
+
+ /** @name MMIO: Performance Optimization Control registers.
+ * @{ */
+ IOMMU_PERF_OPT_CTRL_T PerfOptCtrl; /**< IOMMU Performance optimization control register. */
+ /** @} */
+
+ /** @name MMIO: x2APIC Control registers.
+ * @{ */
+ IOMMU_XT_GEN_INTR_CTRL_T XtGenIntrCtrl; /**< IOMMU X2APIC General interrupt control register. */
+ IOMMU_XT_PPR_INTR_CTRL_T XtPprIntrCtrl; /**< IOMMU X2APIC PPR interrupt control register. */
+ IOMMU_XT_GALOG_INTR_CTRL_T XtGALogIntrCtrl; /**< IOMMU X2APIC Guest Log interrupt control register. */
+ /** @} */
+
+ /** @name MMIO: Memory Address Routing & Control (MARC) registers.
+ * @{ */
+ MARC_APER_T aMarcApers[4]; /**< MARC Aperture Registers. */
+ /** @} */
+
+ /** @name MMIO: Reserved register.
+ * @{ */
+ IOMMU_RSVD_REG_T RsvdReg; /**< IOMMU Reserved Register. */
+ /** @} */
+
+ /** @name MMIO: Command and Event Log pointer registers.
+ * @{ */
+ CMD_BUF_HEAD_PTR_T CmdBufHeadPtr; /**< Command buffer head pointer register. */
+ CMD_BUF_TAIL_PTR_T CmdBufTailPtr; /**< Command buffer tail pointer register. */
+ EVT_LOG_HEAD_PTR_T EvtLogHeadPtr; /**< Event log head pointer register. */
+ EVT_LOG_TAIL_PTR_T EvtLogTailPtr; /**< Event log tail pointer register. */
+ /** @} */
+
+ /** @name MMIO: Command and Event Status register.
+ * @{ */
+ IOMMU_STATUS_T Status; /**< IOMMU status register. */
+ /** @} */
+
+ /** @name MMIO: PPR Log Head and Tail pointer registers.
+ * @{ */
+ PPR_LOG_HEAD_PTR_T PprLogHeadPtr; /**< IOMMU PPR log head pointer register. */
+ PPR_LOG_TAIL_PTR_T PprLogTailPtr; /**< IOMMU PPR log tail pointer register. */
+ /** @} */
+
+ /** @name MMIO: Guest Virtual-APIC Log Head and Tail pointer registers.
+ * @{ */
+ GALOG_HEAD_PTR_T GALogHeadPtr; /**< Guest Virtual-APIC log head pointer register. */
+ GALOG_TAIL_PTR_T GALogTailPtr; /**< Guest Virtual-APIC log tail pointer register. */
+ /** @} */
+
+ /** @name MMIO: PPR Log B Head and Tail pointer registers.
+ * @{ */
+ PPR_LOG_B_HEAD_PTR_T PprLogBHeadPtr; /**< PPR log B head pointer register. */
+ PPR_LOG_B_TAIL_PTR_T PprLogBTailPtr; /**< PPR log B tail pointer register. */
+ /** @} */
+
+ /** @name MMIO: Event Log B Head and Tail pointer registers.
+ * @{ */
+ EVT_LOG_B_HEAD_PTR_T EvtLogBHeadPtr; /**< Event log B head pointer register. */
+ EVT_LOG_B_TAIL_PTR_T EvtLogBTailPtr; /**< Event log B tail pointer register. */
+ /** @} */
+
+ /** @name MMIO: PPR Log Overflow protection registers.
+ * @{ */
+ PPR_LOG_AUTO_RESP_T PprLogAutoResp; /**< PPR Log Auto Response register. */
+ PPR_LOG_OVERFLOW_EARLY_T PprLogOverflowEarly; /**< PPR Log Overflow Early Indicator register. */
+ PPR_LOG_B_OVERFLOW_EARLY_T PprLogBOverflowEarly; /**< PPR Log B Overflow Early Indicator register. */
+ /** @} */
+
+ /** @todo IOMMU: IOMMU Event counter registers. */
+
+#ifdef VBOX_WITH_STATISTICS
+ /** @name IOMMU: Stat counters.
+ * @{ */
+ STAMCOUNTER StatMmioReadR3; /**< Number of MMIO reads in R3. */
+ STAMCOUNTER StatMmioReadRZ; /**< Number of MMIO reads in RZ. */
+ STAMCOUNTER StatMmioWriteR3; /**< Number of MMIO writes in R3. */
+ STAMCOUNTER StatMmioWriteRZ; /**< Number of MMIO writes in RZ. */
+
+ STAMCOUNTER StatMsiRemapR3; /**< Number of MSI remap requests in R3. */
+ STAMCOUNTER StatMsiRemapRZ; /**< Number of MSI remap requests in RZ. */
+
+ STAMCOUNTER StatMemReadR3; /**< Number of memory read translation requests in R3. */
+ STAMCOUNTER StatMemReadRZ; /**< Number of memory read translation requests in RZ. */
+ STAMCOUNTER StatMemWriteR3; /**< Number of memory write translation requests in R3. */
+ STAMCOUNTER StatMemWriteRZ; /**< Number of memory write translation requests in RZ. */
+
+ STAMCOUNTER StatMemBulkReadR3; /**< Number of memory read bulk translation requests in R3. */
+ STAMCOUNTER StatMemBulkReadRZ; /**< Number of memory read bulk translation requests in RZ. */
+ STAMCOUNTER StatMemBulkWriteR3; /**< Number of memory write bulk translation requests in R3. */
+ STAMCOUNTER StatMemBulkWriteRZ; /**< Number of memory write bulk translation requests in RZ. */
+
+ STAMCOUNTER StatCmd; /**< Number of commands processed in total. */
+ STAMCOUNTER StatCmdCompWait; /**< Number of Completion Wait commands processed. */
+ STAMCOUNTER StatCmdInvDte; /**< Number of Invalidate DTE commands processed. */
+ STAMCOUNTER StatCmdInvIommuPages; /**< Number of Invalidate IOMMU pages commands processed. */
+ STAMCOUNTER StatCmdInvIotlbPages; /**< Number of Invalidate IOTLB pages commands processed. */
+ STAMCOUNTER StatCmdInvIntrTable; /**< Number of Invalidate Interrupt Table commands processed. */
+ STAMCOUNTER StatCmdPrefIommuPages; /**< Number of Prefetch IOMMU Pages commands processed. */
+ STAMCOUNTER StatCmdCompletePprReq; /**< Number of Complete PPR Requests commands processed. */
+ STAMCOUNTER StatCmdInvIommuAll; /**< Number of Invalidate IOMMU All commands processed. */
+
+ STAMCOUNTER StatIotlbeCached; /**< Number of IOTLB entries in the cache. */
+ STAMCOUNTER StatIotlbeLazyEvictReuse; /**< Number of IOTLB entries re-used after lazy eviction. */
+
+ STAMPROFILEADV StatProfDteLookup; /**< Profiling of I/O page walk (from memory). */
+ STAMPROFILEADV StatProfIotlbeLookup; /**< Profiling of IOTLB entry lookup (from cache). */
+
+ STAMPROFILEADV StatProfIrteLookup; /**< Profiling of IRTE entry lookup (from memory). */
+ STAMPROFILEADV StatProfIrteCacheLookup; /**< Profiling of IRTE entry lookup (from cache). */
+
+ STAMCOUNTER StatAccessCacheHit; /**< Number of IOTLB cache hits. */
+ STAMCOUNTER StatAccessCacheHitFull; /**< Number of accesses that were fully looked up from the cache. */
+ STAMCOUNTER StatAccessCacheMiss; /**< Number of cache misses (resulting in DTE lookups). */
+ STAMCOUNTER StatAccessCacheNonContig; /**< Number of cache accesses resulting in non-contiguous access. */
+ STAMCOUNTER StatAccessCachePermDenied; /**< Number of cache accesses resulting in insufficient permissions. */
+ STAMCOUNTER StatAccessDteNonContig; /**< Number of DTE accesses resulting in non-contiguous access. */
+ STAMCOUNTER StatAccessDtePermDenied; /**< Number of DTE accesses resulting in insufficient permissions. */
+
+ STAMCOUNTER StatIntrCacheHit; /**< Number of interrupt cache hits. */
+ STAMCOUNTER StatIntrCacheMiss; /**< Number of interrupt cache misses. */
+
+ STAMCOUNTER StatNonStdPageSize; /**< Number of non-standard page size translations. */
+ STAMCOUNTER StatIopfs; /**< Number of I/O page faults. */
+ /** @} */
+#endif
+} IOMMU;
+/** Pointer to the IOMMU device state. */
+typedef IOMMU *PIOMMU;
+/** Pointer to the const IOMMU device state. */
+typedef const IOMMU *PCIOMMU;
+AssertCompileMemberAlignment(IOMMU, hMmio, 8);
+#ifdef IOMMU_WITH_DTE_CACHE
+AssertCompileMemberAlignment(IOMMU, CritSectCache, 8);
+AssertCompileMemberAlignment(IOMMU, aDeviceIds, 8);
+AssertCompileMemberAlignment(IOMMU, aDteCache, 8);
+#endif
+#ifdef IOMMU_WITH_IRTE_CACHE
+AssertCompileMemberAlignment(IOMMU, aIrteCache, 8);
+#endif
+AssertCompileMemberAlignment(IOMMU, IommuBar, 8);
+AssertCompileMemberAlignment(IOMMU, aDevTabBaseAddrs, 8);
+AssertCompileMemberAlignment(IOMMU, CmdBufHeadPtr, 8);
+AssertCompileMemberAlignment(IOMMU, Status, 8);
+
+/**
+ * The ring-3 IOMMU device state.
+ */
+typedef struct IOMMUR3
+{
+ /** Device instance. */
+ PPDMDEVINSR3 pDevInsR3;
+ /** The IOMMU helpers. */
+ R3PTRTYPE(PCPDMIOMMUHLPR3) pIommuHlpR3;
+ /** The command thread handle. */
+ R3PTRTYPE(PPDMTHREAD) pCmdThread;
+#ifdef IOMMU_WITH_IOTLBE_CACHE
+ /** Pointer to array of pre-allocated IOTLBEs. */
+ PIOTLBE paIotlbes;
+ /** Maps [DomainId,Iova] to [IOTLBE]. */
+ AVLU64TREE TreeIotlbe;
+ /** LRU list anchor for IOTLB entries. */
+ RTLISTANCHOR LstLruIotlbe;
+ /** Index of the next unused IOTLB. */
+ uint32_t idxUnusedIotlbe;
+ /** Number of cached IOTLB entries in the tree. */
+ uint32_t cCachedIotlbes;
+#endif
+} IOMMUR3;
+/** Pointer to the ring-3 IOMMU device state. */
+typedef IOMMUR3 *PIOMMUR3;
+/** Pointer to the const ring-3 IOMMU device state. */
+typedef const IOMMUR3 *PCIOMMUR3;
+#ifdef IOMMU_WITH_IOTLBE_CACHE
+AssertCompileMemberAlignment(IOMMUR3, paIotlbes, 8);
+AssertCompileMemberAlignment(IOMMUR3, TreeIotlbe, 8);
+AssertCompileMemberAlignment(IOMMUR3, LstLruIotlbe, 8);
+#endif
+
+/**
+ * The ring-0 IOMMU device state.
+ */
+typedef struct IOMMUR0
+{
+ /** Device instance. */
+ PPDMDEVINSR0 pDevInsR0;
+ /** The IOMMU helpers. */
+ R0PTRTYPE(PCPDMIOMMUHLPR0) pIommuHlpR0;
+} IOMMUR0;
+/** Pointer to the ring-0 IOMMU device state. */
+typedef IOMMUR0 *PIOMMUR0;
+
+/**
+ * The raw-mode IOMMU device state.
+ */
+typedef struct IOMMURC
+{
+ /** Device instance. */
+ PPDMDEVINSRC pDevInsRC;
+ /** The IOMMU helpers. */
+ RCPTRTYPE(PCPDMIOMMUHLPRC) pIommuHlpRC;
+} IOMMURC;
+/** Pointer to the raw-mode IOMMU device state. */
+typedef IOMMURC *PIOMMURC;
+
+/** The IOMMU device state for the current context. */
+typedef CTX_SUFF(IOMMU) IOMMUCC;
+/** Pointer to the IOMMU device state for the current context. */
+typedef CTX_SUFF(PIOMMU) PIOMMUCC;
+
+/**
+ * IOMMU register access.
+ */
+typedef struct IOMMUREGACC
+{
+ const char *pszName;
+ VBOXSTRICTRC (*pfnRead)(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value);
+ VBOXSTRICTRC (*pfnWrite)(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value);
+} IOMMUREGACC;
+/** Pointer to an IOMMU register access. */
+typedef IOMMUREGACC *PIOMMUREGACC;
+/** Pointer to a const IOMMU register access. */
+typedef IOMMUREGACC const *PCIOMMUREGACC;
+
+#ifdef IOMMU_WITH_IOTLBE_CACHE
+/**
+ * IOTLBE flush argument.
+ */
+typedef struct IOTLBEFLUSHARG
+{
+ /** The ring-3 IOMMU device state. */
+ PIOMMUR3 pIommuR3;
+ /** The domain ID to flush. */
+ uint16_t idDomain;
+} IOTLBEFLUSHARG;
+/** Pointer to an IOTLBE flush argument. */
+typedef IOTLBEFLUSHARG *PIOTLBEFLUSHARG;
+/** Pointer to a const IOTLBE flush argument. */
+typedef IOTLBEFLUSHARG const *PCIOTLBEFLUSHARG;
+
+/**
+ * IOTLBE Info. argument.
+ */
+typedef struct IOTLBEINFOARG
+{
+ /** The ring-3 IOMMU device state. */
+ PIOMMUR3 pIommuR3;
+ /** The info helper. */
+ PCDBGFINFOHLP pHlp;
+ /** The domain ID to dump IOTLB entry. */
+ uint16_t idDomain;
+} IOTLBEINFOARG;
+/** Pointer to an IOTLBE flush argument. */
+typedef IOTLBEINFOARG *PIOTLBEINFOARG;
+/** Pointer to a const IOTLBE flush argument. */
+typedef IOTLBEINFOARG const *PCIOTLBEINFOARG;
+#endif
+
+/**
+ * IOMMU operation auxiliary info.
+ */
+typedef struct IOMMUOPAUX
+{
+ /** The IOMMU operation being performed. */
+ IOMMUOP enmOp;
+ /** The device table entry (can be NULL). */
+ PCDTE_T pDte;
+ /** The device ID (bus, device, function). */
+ uint16_t idDevice;
+ /** The domain ID (when the DTE isn't provided). */
+ uint16_t idDomain;
+} IOMMUOPAUX;
+/** Pointer to an I/O address lookup struct. */
+typedef IOMMUOPAUX *PIOMMUOPAUX;
+/** Pointer to a const I/O address lookup struct. */
+typedef IOMMUOPAUX const *PCIOMMUOPAUX;
+
+typedef DECLCALLBACKTYPE(int, FNIOPAGELOOKUP,(PPDMDEVINS pDevIns, uint64_t uIovaPage, uint8_t fPerm, PCIOMMUOPAUX pAux,
+ PIOPAGELOOKUP pPageLookup));
+typedef FNIOPAGELOOKUP *PFNIOPAGELOOKUP;
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+#ifdef IN_RING3
+/**
+ * An array of the number of device table segments supported.
+ * Indexed by u2DevTabSegSup.
+ */
+static uint8_t const g_acDevTabSegs[] = { 0, 2, 4, 8 };
+#endif
+
+#if (defined(IN_RING3) && defined(IOMMU_WITH_IOTLBE_CACHE)) || defined(LOG_ENABLED)
+/**
+ * The IOMMU I/O permission names.
+ */
+static const char * const g_aszPerm[] = { "none", "read", "write", "read+write" };
+#endif
+
+/**
+ * An array of the masks to select the device table segment index from a device ID.
+ */
+static uint16_t const g_auDevTabSegMasks[] = { 0x0, 0x8000, 0xc000, 0xe000 };
+
+/**
+ * An array of the shift values to select the device table segment index from a
+ * device ID.
+ */
+static uint8_t const g_auDevTabSegShifts[] = { 0, 15, 14, 13 };
+
+/**
+ * The maximum size (inclusive) of each device table segment (0 to 7).
+ * Indexed by the device table segment index.
+ */
+static uint16_t const g_auDevTabSegMaxSizes[] = { 0x1ff, 0xff, 0x7f, 0x7f, 0x3f, 0x3f, 0x3f, 0x3f };
+
+
+#ifndef VBOX_DEVICE_STRUCT_TESTCASE
+/**
+ * Gets the maximum number of buffer entries for the given buffer length.
+ *
+ * @returns Number of buffer entries.
+ * @param uEncodedLen The length (power-of-2 encoded).
+ */
+DECLINLINE(uint32_t) iommuAmdGetBufMaxEntries(uint8_t uEncodedLen)
+{
+ Assert(uEncodedLen > 7);
+ Assert(uEncodedLen < 16);
+ return 2 << (uEncodedLen - 1);
+}
+
+
+/**
+ * Gets the total length of the buffer given a base register's encoded length.
+ *
+ * @returns The length of the buffer in bytes.
+ * @param uEncodedLen The length (power-of-2 encoded).
+ */
+DECLINLINE(uint32_t) iommuAmdGetTotalBufLength(uint8_t uEncodedLen)
+{
+ Assert(uEncodedLen > 7);
+ Assert(uEncodedLen < 16);
+ return (2 << (uEncodedLen - 1)) << 4;
+}
+
+
+/**
+ * Gets the number of (unconsumed) entries in the event log.
+ *
+ * @returns The number of entries in the event log.
+ * @param pThis The shared IOMMU device state.
+ */
+static uint32_t iommuAmdGetEvtLogEntryCount(PIOMMU pThis)
+{
+ uint32_t const idxTail = pThis->EvtLogTailPtr.n.off >> IOMMU_EVT_GENERIC_SHIFT;
+ uint32_t const idxHead = pThis->EvtLogHeadPtr.n.off >> IOMMU_EVT_GENERIC_SHIFT;
+ if (idxTail >= idxHead)
+ return idxTail - idxHead;
+
+ uint32_t const cMaxEvts = iommuAmdGetBufMaxEntries(pThis->EvtLogBaseAddr.n.u4Len);
+ return cMaxEvts - idxHead + idxTail;
+}
+
+
+#if (defined(IN_RING3) && defined(IOMMU_WITH_IOTLBE_CACHE)) || defined(LOG_ENABLED)
+/**
+ * Gets the descriptive I/O permission name for a memory access.
+ *
+ * @returns The I/O permission name.
+ * @param fPerm The I/O permissions for the access, see IOMMU_IO_PERM_XXX.
+ */
+static const char *iommuAmdMemAccessGetPermName(uint8_t fPerm)
+{
+ /* We shouldn't construct an access with "none" or "read+write" (must be read or write) permissions. */
+ Assert(fPerm > 0 && fPerm < RT_ELEMENTS(g_aszPerm));
+ return g_aszPerm[fPerm & IOMMU_IO_PERM_MASK];
+}
+#endif
+
+
+#ifdef IOMMU_WITH_DTE_CACHE
+/**
+ * Gets the basic I/O device flags for the given device table entry.
+ *
+ * @returns The basic I/O device flags.
+ * @param pDte The device table entry.
+ */
+static uint16_t iommuAmdGetBasicDevFlags(PCDTE_T pDte)
+{
+ /* Extract basic flags from bits 127:0 of the DTE. */
+ uint16_t fFlags = 0;
+ if (pDte->n.u1Valid)
+ {
+ fFlags |= IOMMU_DTE_CACHE_F_VALID;
+
+ /** @todo Skip the if checks here (shift/mask the relevant bits over). */
+ if (pDte->n.u1SuppressAllPfEvents)
+ fFlags |= IOMMU_DTE_CACHE_F_SUPPRESS_ALL_IOPF;
+ if (pDte->n.u1SuppressPfEvents)
+ fFlags |= IOMMU_DTE_CACHE_F_SUPPRESS_IOPF;
+
+ uint16_t const fDtePerm = (pDte->au64[0] >> IOMMU_IO_PERM_SHIFT) & IOMMU_IO_PERM_MASK;
+ AssertCompile(IOMMU_DTE_CACHE_F_IO_PERM_MASK == IOMMU_IO_PERM_MASK);
+ fFlags |= fDtePerm << IOMMU_DTE_CACHE_F_IO_PERM_SHIFT;
+ }
+
+ /* Extract basic flags from bits 255:128 of the DTE. */
+ if (pDte->n.u1IntrMapValid)
+ {
+ fFlags |= IOMMU_DTE_CACHE_F_INTR_MAP_VALID;
+
+ /** @todo Skip the if check here (shift/mask the relevant bit over). */
+ if (pDte->n.u1IgnoreUnmappedIntrs)
+ fFlags |= IOMMU_DTE_CACHE_F_IGNORE_UNMAPPED_INTR;
+
+ uint16_t const fIntrCtrl = IOMMU_DTE_GET_INTR_CTRL(pDte);
+ AssertCompile(IOMMU_DTE_CACHE_F_INTR_CTRL_MASK == IOMMU_DTE_INTR_CTRL_MASK);
+ fFlags |= fIntrCtrl << IOMMU_DTE_CACHE_F_INTR_CTRL_SHIFT;
+ }
+ return fFlags;
+}
+#endif
+
+
+/**
+ * Remaps the source MSI to the destination MSI given the IRTE.
+ *
+ * @param pMsiIn The source MSI.
+ * @param pMsiOut Where to store the remapped MSI.
+ * @param pIrte The IRTE used for the remapping.
+ */
+static void iommuAmdIrteRemapMsi(PCMSIMSG pMsiIn, PMSIMSG pMsiOut, PCIRTE_T pIrte)
+{
+ /* Preserve all bits from the source MSI address and data that don't map 1:1 from the IRTE. */
+ *pMsiOut = *pMsiIn;
+
+ pMsiOut->Addr.n.u1DestMode = pIrte->n.u1DestMode;
+ pMsiOut->Addr.n.u8DestId = pIrte->n.u8Dest;
+
+ pMsiOut->Data.n.u8Vector = pIrte->n.u8Vector;
+ pMsiOut->Data.n.u3DeliveryMode = pIrte->n.u3IntrType;
+}
+
+
+#ifdef IOMMU_WITH_DTE_CACHE
+/**
+ * Looks up an entry in the DTE cache for the given device ID.
+ *
+ * @returns The index of the entry, or the cache capacity if no entry was found.
+ * @param pThis The shared IOMMU device state.
+ * @param idDevice The device ID (bus, device, function).
+ */
+DECLINLINE(uint16_t) iommuAmdDteCacheEntryLookup(PIOMMU pThis, uint16_t idDevice)
+{
+ uint16_t const cDeviceIds = RT_ELEMENTS(pThis->aDeviceIds);
+ for (uint16_t i = 0; i < cDeviceIds; i++)
+ {
+ if (pThis->aDeviceIds[i] == idDevice)
+ return i;
+ }
+ return cDeviceIds;
+}
+
+
+/**
+ * Gets an free/unused DTE cache entry.
+ *
+ * @returns The index of an unused entry, or cache capacity if the cache is full.
+ * @param pThis The shared IOMMU device state.
+ */
+DECLINLINE(uint16_t) iommuAmdDteCacheEntryGetUnused(PCIOMMU pThis)
+{
+ /*
+ * ASSUMES device ID 0 is the PCI host bridge or the IOMMU itself
+ * (the latter being an ugly hack) and cannot be a valid device ID.
+ */
+ uint16_t const cDeviceIds = RT_ELEMENTS(pThis->aDeviceIds);
+ for (uint16_t i = 0; i < cDeviceIds; i++)
+ {
+ if (!pThis->aDeviceIds[i])
+ return i;
+ }
+ return cDeviceIds;
+}
+
+
+/**
+ * Adds a DTE cache entry at the given index.
+ *
+ * @param pThis The shared IOMMU device state.
+ * @param idxDte The index of the DTE cache entry.
+ * @param idDevice The device ID (bus, device, function).
+ * @param fFlags Device flags to set, see IOMMU_DTE_CACHE_F_XXX.
+ * @param idDomain The domain ID.
+ *
+ * @remarks Requires the cache lock to be taken.
+ */
+DECL_FORCE_INLINE(void) iommuAmdDteCacheAddAtIndex(PIOMMU pThis, uint16_t idxDte, uint16_t idDevice, uint16_t fFlags,
+ uint16_t idDomain)
+{
+ pThis->aDeviceIds[idxDte] = idDevice;
+ pThis->aDteCache[idxDte].fFlags = fFlags;
+ pThis->aDteCache[idxDte].idDomain = idDomain;
+}
+
+
+/**
+ * Adds a DTE cache entry.
+ *
+ * @param pDevIns The IOMMU instance data.
+ * @param idDevice The device ID (bus, device, function).
+ * @param pDte The device table entry.
+ */
+static void iommuAmdDteCacheAdd(PPDMDEVINS pDevIns, uint16_t idDevice, PCDTE_T pDte)
+{
+ uint16_t const fFlags = iommuAmdGetBasicDevFlags(pDte) | IOMMU_DTE_CACHE_F_PRESENT;
+ uint16_t const idDomain = pDte->n.u16DomainId;
+
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ IOMMU_CACHE_LOCK(pDevIns, pThis);
+
+ uint16_t const cDteCache = RT_ELEMENTS(pThis->aDteCache);
+ uint16_t idxDte = iommuAmdDteCacheEntryLookup(pThis, idDevice);
+ if ( idxDte >= cDteCache /* Not found. */
+ && (idxDte = iommuAmdDteCacheEntryGetUnused(pThis)) < cDteCache) /* Get new/unused slot index. */
+ iommuAmdDteCacheAddAtIndex(pThis, idxDte, idDevice, fFlags, idDomain);
+
+ IOMMU_CACHE_UNLOCK(pDevIns, pThis);
+}
+
+
+/**
+ * Updates flags for an existing DTE cache entry given its index.
+ *
+ * @param pThis The shared IOMMU device state.
+ * @param idxDte The index of the DTE cache entry.
+ * @param fOrMask Device flags to add to the existing flags, see
+ * IOMMU_DTE_CACHE_F_XXX.
+ * @param fAndMask Device flags to remove from the existing flags, see
+ * IOMMU_DTE_CACHE_F_XXX.
+ *
+ * @remarks Requires the cache lock to be taken.
+ */
+DECL_FORCE_INLINE(void) iommuAmdDteCacheUpdateFlagsForIndex(PIOMMU pThis, uint16_t idxDte, uint16_t fOrMask, uint16_t fAndMask)
+{
+ uint16_t const fOldFlags = pThis->aDteCache[idxDte].fFlags;
+ uint16_t const fNewFlags = (fOldFlags | fOrMask) & ~fAndMask;
+ Assert(fOldFlags & IOMMU_DTE_CACHE_F_PRESENT);
+ pThis->aDteCache[idxDte].fFlags = fNewFlags;
+}
+
+
+#ifdef IOMMU_WITH_IOTLBE_CACHE
+/**
+ * Adds a new DTE cache entry or updates flags for an existing DTE cache entry.
+ * If the cache is full, nothing happens.
+ *
+ * @param pDevIns The IOMMU instance data.
+ * @param pDte The device table entry.
+ * @param idDevice The device ID (bus, device, function).
+ * @param fOrMask Device flags to add to the existing flags, see
+ * IOMMU_DTE_CACHE_F_XXX.
+ * @param fAndMask Device flags to remove from the existing flags, see
+ * IOMMU_DTE_CACHE_F_XXX.
+ */
+static void iommuAmdDteCacheAddOrUpdateFlags(PPDMDEVINS pDevIns, PCDTE_T pDte, uint16_t idDevice, uint16_t fOrMask,
+ uint16_t fAndMask)
+{
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ IOMMU_CACHE_LOCK(pDevIns, pThis);
+
+ uint16_t const cDteCache = RT_ELEMENTS(pThis->aDteCache);
+ uint16_t idxDte = iommuAmdDteCacheEntryLookup(pThis, idDevice);
+ if (idxDte < cDteCache)
+ iommuAmdDteCacheUpdateFlagsForIndex(pThis, idxDte, fOrMask, fAndMask);
+ else if ((idxDte = iommuAmdDteCacheEntryGetUnused(pThis)) < cDteCache)
+ {
+ uint16_t const fFlags = (iommuAmdGetBasicDevFlags(pDte) | IOMMU_DTE_CACHE_F_PRESENT | fOrMask) & ~fAndMask;
+ iommuAmdDteCacheAddAtIndex(pThis, idxDte, idDevice, fFlags, pDte->n.u16DomainId);
+ }
+ /* else: cache is full, shouldn't really happen. */
+
+ IOMMU_CACHE_UNLOCK(pDevIns, pThis);
+}
+#endif
+
+
+/**
+ * Updates flags for an existing DTE cache entry.
+ *
+ * @param pDevIns The IOMMU instance data.
+ * @param idDevice The device ID (bus, device, function).
+ * @param fOrMask Device flags to add to the existing flags, see
+ * IOMMU_DTE_CACHE_F_XXX.
+ * @param fAndMask Device flags to remove from the existing flags, see
+ * IOMMU_DTE_CACHE_F_XXX.
+ */
+static void iommuAmdDteCacheUpdateFlags(PPDMDEVINS pDevIns, uint16_t idDevice, uint16_t fOrMask, uint16_t fAndMask)
+{
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ IOMMU_CACHE_LOCK(pDevIns, pThis);
+
+ uint16_t const cDteCache = RT_ELEMENTS(pThis->aDteCache);
+ uint16_t const idxDte = iommuAmdDteCacheEntryLookup(pThis, idDevice);
+ if (idxDte < cDteCache)
+ iommuAmdDteCacheUpdateFlagsForIndex(pThis, idxDte, fOrMask, fAndMask);
+
+ IOMMU_CACHE_UNLOCK(pDevIns, pThis);
+}
+
+
+# ifdef IN_RING3
+/**
+ * Removes a DTE cache entry.
+ *
+ * @param pDevIns The IOMMU instance data.
+ * @param idDevice The device ID to remove cache entries for.
+ */
+static void iommuAmdDteCacheRemove(PPDMDEVINS pDevIns, uint16_t idDevice)
+{
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ IOMMU_CACHE_LOCK(pDevIns, pThis);
+
+ uint16_t const cDteCache = RT_ELEMENTS(pThis->aDteCache);
+ uint16_t const idxDte = iommuAmdDteCacheEntryLookup(pThis, idDevice);
+ if (idxDte < cDteCache)
+ {
+ pThis->aDteCache[idxDte].fFlags = 0;
+ pThis->aDteCache[idxDte].idDomain = 0;
+ }
+
+ IOMMU_CACHE_UNLOCK(pDevIns, pThis);
+}
+
+
+/**
+ * Removes all entries in the device table entry cache.
+ *
+ * @param pDevIns The IOMMU instance data.
+ */
+static void iommuAmdDteCacheRemoveAll(PPDMDEVINS pDevIns)
+{
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ IOMMU_CACHE_LOCK(pDevIns, pThis);
+ RT_ZERO(pThis->aDeviceIds);
+ RT_ZERO(pThis->aDteCache);
+ IOMMU_CACHE_UNLOCK(pDevIns, pThis);
+}
+# endif /* IN_RING3 */
+#endif /* IOMMU_WITH_DTE_CACHE */
+
+
+#ifdef IOMMU_WITH_IOTLBE_CACHE
+/**
+ * Moves the IOTLB entry to the least recently used slot.
+ *
+ * @param pThisR3 The ring-3 IOMMU device state.
+ * @param pIotlbe The IOTLB entry to move.
+ */
+DECLINLINE(void) iommuAmdIotlbEntryMoveToLru(PIOMMUR3 pThisR3, PIOTLBE pIotlbe)
+{
+ if (!RTListNodeIsFirst(&pThisR3->LstLruIotlbe, &pIotlbe->NdLru))
+ {
+ RTListNodeRemove(&pIotlbe->NdLru);
+ RTListPrepend(&pThisR3->LstLruIotlbe, &pIotlbe->NdLru);
+ }
+}
+
+
+/**
+ * Moves the IOTLB entry to the most recently used slot.
+ *
+ * @param pThisR3 The ring-3 IOMMU device state.
+ * @param pIotlbe The IOTLB entry to move.
+ */
+DECLINLINE(void) iommuAmdIotlbEntryMoveToMru(PIOMMUR3 pThisR3, PIOTLBE pIotlbe)
+{
+ if (!RTListNodeIsLast(&pThisR3->LstLruIotlbe, &pIotlbe->NdLru))
+ {
+ RTListNodeRemove(&pIotlbe->NdLru);
+ RTListAppend(&pThisR3->LstLruIotlbe, &pIotlbe->NdLru);
+ }
+}
+
+
+# ifdef IN_RING3
+/**
+ * Dumps the IOTLB entry via the debug info helper.
+ *
+ * @returns VINF_SUCCESS.
+ * @param pNode Pointer to an IOTLB entry to dump info.
+ * @param pvUser Pointer to an IOTLBEINFOARG.
+ */
+static DECLCALLBACK(int) iommuAmdR3IotlbEntryInfo(PAVLU64NODECORE pNode, void *pvUser)
+{
+ /* Validate. */
+ PCIOTLBEINFOARG pArgs = (PCIOTLBEINFOARG)pvUser;
+ AssertPtr(pArgs);
+ AssertPtr(pArgs->pIommuR3);
+ AssertPtr(pArgs->pHlp);
+ //Assert(pArgs->pIommuR3->u32Magic == IOMMU_MAGIC);
+
+ uint16_t const idDomain = IOMMU_IOTLB_KEY_GET_DOMAIN_ID(pNode->Key);
+ if (idDomain == pArgs->idDomain)
+ {
+ PCIOTLBE pIotlbe = (PCIOTLBE)pNode;
+ AVLU64KEY const uKey = pIotlbe->Core.Key;
+ uint64_t const uIova = IOMMU_IOTLB_KEY_GET_IOVA(uKey);
+ RTGCPHYS const GCPhysSpa = pIotlbe->PageLookup.GCPhysSpa;
+ uint8_t const cShift = pIotlbe->PageLookup.cShift;
+ size_t const cbPage = RT_BIT_64(cShift);
+ uint8_t const fPerm = pIotlbe->PageLookup.fPerm;
+ const char *pszPerm = iommuAmdMemAccessGetPermName(fPerm);
+ bool const fEvictPending = pIotlbe->fEvictPending;
+
+ PCDBGFINFOHLP pHlp = pArgs->pHlp;
+ pHlp->pfnPrintf(pHlp, " Key = %#RX64 (%#RX64)\n", uKey, uIova);
+ pHlp->pfnPrintf(pHlp, " GCPhys = %#RGp\n", GCPhysSpa);
+ pHlp->pfnPrintf(pHlp, " cShift = %u (%zu bytes)\n", cShift, cbPage);
+ pHlp->pfnPrintf(pHlp, " fPerm = %#x (%s)\n", fPerm, pszPerm);
+ pHlp->pfnPrintf(pHlp, " fEvictPending = %RTbool\n", fEvictPending);
+ }
+
+ return VINF_SUCCESS;
+}
+# endif /* IN_RING3 */
+
+
+/**
+ * Removes the IOTLB entry if it's associated with the specified domain ID.
+ *
+ * @returns VINF_SUCCESS.
+ * @param pNode Pointer to an IOTLBE.
+ * @param pvUser Pointer to an IOTLBEFLUSHARG containing the domain ID.
+ */
+static DECLCALLBACK(int) iommuAmdIotlbEntryRemoveDomainId(PAVLU64NODECORE pNode, void *pvUser)
+{
+ /* Validate. */
+ PCIOTLBEFLUSHARG pArgs = (PCIOTLBEFLUSHARG)pvUser;
+ AssertPtr(pArgs);
+ AssertPtr(pArgs->pIommuR3);
+ //Assert(pArgs->pIommuR3->u32Magic == IOMMU_MAGIC);
+
+ uint16_t const idDomain = IOMMU_IOTLB_KEY_GET_DOMAIN_ID(pNode->Key);
+ if (idDomain == pArgs->idDomain)
+ {
+ /* Mark this entry is as invalidated and needs to be evicted later. */
+ PIOTLBE pIotlbe = (PIOTLBE)pNode;
+ pIotlbe->fEvictPending = true;
+ iommuAmdIotlbEntryMoveToLru(pArgs->pIommuR3, (PIOTLBE)pNode);
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Destroys an IOTLB entry that's in the tree.
+ *
+ * @returns VINF_SUCCESS.
+ * @param pNode Pointer to an IOTLBE.
+ * @param pvUser Opaque data. Currently not used, will be NULL.
+ */
+static DECLCALLBACK(int) iommuAmdIotlbEntryDestroy(PAVLU64NODECORE pNode, void *pvUser)
+{
+ RT_NOREF(pvUser);
+ PIOTLBE pIotlbe = (PIOTLBE)pNode;
+ Assert(pIotlbe);
+ pIotlbe->NdLru.pNext = NULL;
+ pIotlbe->NdLru.pPrev = NULL;
+ RT_ZERO(pIotlbe->PageLookup);
+ pIotlbe->fEvictPending = false;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Inserts an IOTLB entry into the cache.
+ *
+ * @param pThis The shared IOMMU device state.
+ * @param pThisR3 The ring-3 IOMMU device state.
+ * @param pIotlbe The IOTLB entry to initialize and insert.
+ * @param idDomain The domain ID.
+ * @param uIova The I/O virtual address.
+ * @param pPageLookup The I/O page lookup result of the access.
+ */
+static void iommuAmdIotlbEntryInsert(PIOMMU pThis, PIOMMUR3 pThisR3, PIOTLBE pIotlbe, uint16_t idDomain, uint64_t uIova,
+ PCIOPAGELOOKUP pPageLookup)
+{
+ /* Initialize the IOTLB entry with results of the I/O page walk. */
+ AVLU64KEY const uKey = IOMMU_IOTLB_KEY_MAKE(idDomain, uIova);
+ Assert(uKey != IOMMU_IOTLB_KEY_NIL);
+
+ /* Check if the entry already exists. */
+ PIOTLBE pFound = (PIOTLBE)RTAvlU64Get(&pThisR3->TreeIotlbe, uKey);
+ if (!pFound)
+ {
+ /* Insert the entry into the cache. */
+ pIotlbe->Core.Key = uKey;
+ pIotlbe->PageLookup = *pPageLookup;
+ Assert(!pIotlbe->fEvictPending);
+
+ bool const fInserted = RTAvlU64Insert(&pThisR3->TreeIotlbe, &pIotlbe->Core);
+ Assert(fInserted); NOREF(fInserted);
+ Assert(pThisR3->cCachedIotlbes < IOMMU_IOTLBE_MAX);
+ ++pThisR3->cCachedIotlbes;
+ STAM_COUNTER_INC(&pThis->StatIotlbeCached); NOREF(pThis);
+ }
+ else
+ {
+ /* Update the existing entry. */
+ Assert(pFound->Core.Key == uKey);
+ if (pFound->fEvictPending)
+ {
+ pFound->fEvictPending = false;
+ STAM_COUNTER_INC(&pThis->StatIotlbeLazyEvictReuse); NOREF(pThis);
+ }
+ pFound->PageLookup = *pPageLookup;
+ }
+}
+
+
+/**
+ * Removes an IOTLB entry from the cache for the given key.
+ *
+ * @returns Pointer to the removed IOTLB entry, NULL if the entry wasn't found in
+ * the tree.
+ * @param pThis The shared IOMMU device state.
+ * @param pThisR3 The ring-3 IOMMU device state.
+ * @param uKey The key of the IOTLB entry to remove.
+ */
+static PIOTLBE iommuAmdIotlbEntryRemove(PIOMMU pThis, PIOMMUR3 pThisR3, AVLU64KEY uKey)
+{
+ PIOTLBE pIotlbe = (PIOTLBE)RTAvlU64Remove(&pThisR3->TreeIotlbe, uKey);
+ if (pIotlbe)
+ {
+ if (pIotlbe->fEvictPending)
+ STAM_COUNTER_INC(&pThis->StatIotlbeLazyEvictReuse);
+
+ RT_ZERO(pIotlbe->Core);
+ RT_ZERO(pIotlbe->PageLookup);
+ /* We must not erase the LRU node connections here! */
+ pIotlbe->fEvictPending = false;
+ Assert(pIotlbe->Core.Key == IOMMU_IOTLB_KEY_NIL);
+
+ Assert(pThisR3->cCachedIotlbes > 0);
+ --pThisR3->cCachedIotlbes;
+ STAM_COUNTER_DEC(&pThis->StatIotlbeCached); NOREF(pThis);
+ }
+ return pIotlbe;
+}
+
+
+/**
+ * Looks up an IOTLB from the cache.
+ *
+ * @returns Pointer to IOTLB entry if found, NULL otherwise.
+ * @param pThis The shared IOMMU device state.
+ * @param pThisR3 The ring-3 IOMMU device state.
+ * @param idDomain The domain ID.
+ * @param uIova The I/O virtual address.
+ */
+static PIOTLBE iommuAmdIotlbLookup(PIOMMU pThis, PIOMMUR3 pThisR3, uint64_t idDomain, uint64_t uIova)
+{
+ RT_NOREF(pThis);
+
+ uint64_t const uKey = IOMMU_IOTLB_KEY_MAKE(idDomain, uIova);
+ PIOTLBE pIotlbe = (PIOTLBE)RTAvlU64Get(&pThisR3->TreeIotlbe, uKey);
+ if ( pIotlbe
+ && !pIotlbe->fEvictPending)
+ return pIotlbe;
+
+ /*
+ * Domain Id wildcard invalidations only marks entries for eviction later but doesn't remove
+ * them from the cache immediately. We found an entry pending eviction, just return that
+ * nothing was found (rather than evicting now).
+ */
+ return NULL;
+}
+
+
+/**
+ * Adds an IOTLB entry to the cache.
+ *
+ * @param pThis The shared IOMMU device state.
+ * @param pThisR3 The ring-3 IOMMU device state.
+ * @param idDomain The domain ID.
+ * @param uIovaPage The I/O virtual address (must be 4K aligned).
+ * @param pPageLookup The I/O page lookup result of the access.
+ */
+static void iommuAmdIotlbAdd(PIOMMU pThis, PIOMMUR3 pThisR3, uint16_t idDomain, uint64_t uIovaPage, PCIOPAGELOOKUP pPageLookup)
+{
+ Assert(!(uIovaPage & X86_PAGE_4K_OFFSET_MASK));
+ Assert(pPageLookup);
+ Assert(pPageLookup->cShift <= 51);
+ Assert(pPageLookup->fPerm != IOMMU_IO_PERM_NONE);
+
+ /*
+ * If there are no unused IOTLB entries, evict the LRU entry.
+ * Otherwise, get a new IOTLB entry from the pre-allocated list.
+ */
+ if (pThisR3->idxUnusedIotlbe == IOMMU_IOTLBE_MAX)
+ {
+ /* Grab the least recently used entry. */
+ PIOTLBE pIotlbe = RTListGetFirst(&pThisR3->LstLruIotlbe, IOTLBE, NdLru);
+ Assert(pIotlbe);
+
+ /* If the entry is in the cache, remove it. */
+ if (pIotlbe->Core.Key != IOMMU_IOTLB_KEY_NIL)
+ iommuAmdIotlbEntryRemove(pThis, pThisR3, pIotlbe->Core.Key);
+
+ /* Initialize and insert the IOTLB entry into the cache. */
+ iommuAmdIotlbEntryInsert(pThis, pThisR3, pIotlbe, idDomain, uIovaPage, pPageLookup);
+
+ /* Move the entry to the most recently used slot. */
+ iommuAmdIotlbEntryMoveToMru(pThisR3, pIotlbe);
+ }
+ else
+ {
+ /* Grab an unused IOTLB entry from the pre-allocated list. */
+ PIOTLBE pIotlbe = &pThisR3->paIotlbes[pThisR3->idxUnusedIotlbe];
+ ++pThisR3->idxUnusedIotlbe;
+
+ /* Initialize and insert the IOTLB entry into the cache. */
+ iommuAmdIotlbEntryInsert(pThis, pThisR3, pIotlbe, idDomain, uIovaPage, pPageLookup);
+
+ /* Add the entry to the most recently used slot. */
+ RTListAppend(&pThisR3->LstLruIotlbe, &pIotlbe->NdLru);
+ }
+}
+
+
+/**
+ * Removes all IOTLB entries from the cache.
+ *
+ * @param pDevIns The IOMMU instance data.
+ */
+static void iommuAmdIotlbRemoveAll(PPDMDEVINS pDevIns)
+{
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
+ IOMMU_CACHE_LOCK(pDevIns, pThis);
+
+ if (pThisR3->cCachedIotlbes > 0)
+ {
+ RTAvlU64Destroy(&pThisR3->TreeIotlbe, iommuAmdIotlbEntryDestroy, NULL /* pvParam */);
+ RTListInit(&pThisR3->LstLruIotlbe);
+ pThisR3->idxUnusedIotlbe = 0;
+ pThisR3->cCachedIotlbes = 0;
+ STAM_COUNTER_RESET(&pThis->StatIotlbeCached);
+ }
+
+ IOMMU_CACHE_UNLOCK(pDevIns, pThis);
+}
+
+
+/**
+ * Removes IOTLB entries for the range of I/O virtual addresses and the specified
+ * domain ID from the cache.
+ *
+ * @param pDevIns The IOMMU instance data.
+ * @param idDomain The domain ID.
+ * @param uIova The I/O virtual address to invalidate.
+ * @param cbInvalidate The size of the invalidation (must be 4K aligned).
+ */
+static void iommuAmdIotlbRemoveRange(PPDMDEVINS pDevIns, uint16_t idDomain, uint64_t uIova, size_t cbInvalidate)
+{
+ /* Validate. */
+ Assert(!(uIova & X86_PAGE_4K_OFFSET_MASK));
+ Assert(!(cbInvalidate & X86_PAGE_4K_OFFSET_MASK));
+ Assert(cbInvalidate >= X86_PAGE_4K_SIZE);
+
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
+ IOMMU_CACHE_LOCK(pDevIns, pThis);
+
+ do
+ {
+ uint64_t const uKey = IOMMU_IOTLB_KEY_MAKE(idDomain, uIova);
+ PIOTLBE pIotlbe = iommuAmdIotlbEntryRemove(pThis, pThisR3, uKey);
+ if (pIotlbe)
+ iommuAmdIotlbEntryMoveToLru(pThisR3, pIotlbe);
+ uIova += X86_PAGE_4K_SIZE;
+ cbInvalidate -= X86_PAGE_4K_SIZE;
+ } while (cbInvalidate > 0);
+
+ IOMMU_CACHE_UNLOCK(pDevIns, pThis);
+}
+
+
+/**
+ * Removes all IOTLB entries for the specified domain ID.
+ *
+ * @param pDevIns The IOMMU instance data.
+ * @param idDomain The domain ID.
+ */
+static void iommuAmdIotlbRemoveDomainId(PPDMDEVINS pDevIns, uint16_t idDomain)
+{
+ /*
+ * We need to iterate the tree and search based on the domain ID.
+ * But it seems we cannot remove items while iterating the tree.
+ * Thus, we simply mark entries for eviction later but move them to the LRU
+ * so they will eventually get evicted and re-cycled as the cache gets re-populated.
+ */
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
+ IOMMU_CACHE_LOCK(pDevIns, pThis);
+
+ IOTLBEFLUSHARG Args;
+ Args.pIommuR3 = pThisR3;
+ Args.idDomain = idDomain;
+ RTAvlU64DoWithAll(&pThisR3->TreeIotlbe, true /* fFromLeft */, iommuAmdIotlbEntryRemoveDomainId, &Args);
+
+ IOMMU_CACHE_UNLOCK(pDevIns, pThis);
+}
+
+
+/**
+ * Adds or updates IOTLB entries for the given range of I/O virtual addresses.
+ *
+ * @param pDevIns The IOMMU instance data.
+ * @param idDomain The domain ID.
+ * @param uIovaPage The I/O virtual address (must be 4K aligned).
+ * @param cbContiguous The size of the access.
+ * @param pAddrOut The translated I/O address lookup.
+ *
+ * @remarks All pages in the range specified by @c cbContiguous must have identical
+ * permissions and page sizes.
+ */
+static void iommuAmdIotlbAddRange(PPDMDEVINS pDevIns, uint16_t idDomain, uint64_t uIovaPage, size_t cbContiguous,
+ PCIOPAGELOOKUP pAddrOut)
+{
+ Assert(!(uIovaPage & X86_PAGE_4K_OFFSET_MASK));
+
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
+
+ IOPAGELOOKUP PageLookup;
+ PageLookup.GCPhysSpa = pAddrOut->GCPhysSpa & X86_PAGE_4K_BASE_MASK;
+ PageLookup.cShift = pAddrOut->cShift;
+ PageLookup.fPerm = pAddrOut->fPerm;
+
+ size_t const cbIova = RT_ALIGN_Z(cbContiguous, X86_PAGE_4K_SIZE);
+ Assert(!(cbIova & X86_PAGE_4K_OFFSET_MASK));
+ Assert(cbIova >= X86_PAGE_4K_SIZE);
+
+ size_t cPages = cbIova / X86_PAGE_4K_SIZE;
+ cPages = RT_MIN(cPages, IOMMU_IOTLBE_MAX);
+
+ IOMMU_CACHE_LOCK(pDevIns, pThis);
+ /** @todo Re-check DTE cache? */
+ /*
+ * Add IOTLB entries for every page in the access.
+ * The page size and permissions are assumed to be identical to every
+ * page in this access.
+ */
+ while (cPages > 0)
+ {
+ iommuAmdIotlbAdd(pThis, pThisR3, idDomain, uIovaPage, &PageLookup);
+ uIovaPage += X86_PAGE_4K_SIZE;
+ PageLookup.GCPhysSpa += X86_PAGE_4K_SIZE;
+ --cPages;
+ }
+ IOMMU_CACHE_UNLOCK(pDevIns, pThis);
+}
+#endif /* IOMMU_WITH_IOTLBE_CACHE */
+
+
+#ifdef IOMMU_WITH_IRTE_CACHE
+/**
+ * Looks up an IRTE cache entry.
+ *
+ * @returns Index of the found entry, or cache capacity if not found.
+ * @param pThis The shared IOMMU device state.
+ * @param idDevice The device ID (bus, device, function).
+ * @param offIrte The offset into the interrupt remap table.
+ */
+static uint16_t iommuAmdIrteCacheEntryLookup(PCIOMMU pThis, uint16_t idDevice, uint16_t offIrte)
+{
+ /** @todo Consider sorting and binary search when the cache capacity grows.
+ * For the IRTE cache this should be okay since typically guests do not alter the
+ * interrupt remapping once programmed, so hopefully sorting shouldn't happen
+ * often. */
+ uint32_t const uKey = IOMMU_IRTE_CACHE_KEY_MAKE(idDevice, offIrte);
+ uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache);
+ for (uint16_t i = 0; i < cIrteCache; i++)
+ if (pThis->aIrteCache[i].uKey == uKey)
+ return i;
+ return cIrteCache;
+}
+
+
+/**
+ * Gets a free/unused IRTE cache entry.
+ *
+ * @returns The index of an unused entry, or cache capacity if the cache is full.
+ * @param pThis The shared IOMMU device state.
+ */
+static uint16_t iommuAmdIrteCacheEntryGetUnused(PCIOMMU pThis)
+{
+ uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache);
+ for (uint16_t i = 0; i < cIrteCache; i++)
+ if (pThis->aIrteCache[i].uKey == IOMMU_IRTE_CACHE_KEY_NIL)
+ {
+ Assert(!pThis->aIrteCache[i].Irte.u32);
+ return i;
+ }
+ return cIrteCache;
+}
+
+
+/**
+ * Looks up the IRTE cache for the given MSI.
+ *
+ * @returns VBox status code.
+ * @param pDevIns The IOMMU instance data.
+ * @param idDevice The device ID (bus, device, function).
+ * @param enmOp The IOMMU operation being performed.
+ * @param pMsiIn The source MSI.
+ * @param pMsiOut Where to store the remapped MSI.
+ */
+static int iommuAmdIrteCacheLookup(PPDMDEVINS pDevIns, uint16_t idDevice, IOMMUOP enmOp, PCMSIMSG pMsiIn, PMSIMSG pMsiOut)
+{
+ RT_NOREF(enmOp); /* May need it if we have to report errors (currently we fallback to the slower path to do that). */
+
+ int rc = VERR_NOT_FOUND;
+ /* Deal with such cases in the slower/fallback path. */
+ if ((pMsiIn->Addr.u64 & VBOX_MSI_ADDR_ADDR_MASK) == VBOX_MSI_ADDR_BASE)
+ { /* likely */ }
+ else
+ return rc;
+
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ IOMMU_CACHE_LOCK(pDevIns, pThis);
+
+ uint16_t const idxDteCache = iommuAmdDteCacheEntryLookup(pThis, idDevice);
+ if (idxDteCache < RT_ELEMENTS(pThis->aDteCache))
+ {
+ PCDTECACHE pDteCache = &pThis->aDteCache[idxDteCache];
+ if ((pDteCache->fFlags & (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_INTR_MAP_VALID))
+ == (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_INTR_MAP_VALID))
+ {
+ Assert((pMsiIn->Addr.u64 & VBOX_MSI_ADDR_ADDR_MASK) == VBOX_MSI_ADDR_BASE); /* Paranoia. */
+
+ /* Currently, we only cache remapping of fixed and arbitrated interrupts. */
+ uint8_t const u8DeliveryMode = pMsiIn->Data.n.u3DeliveryMode;
+ if (u8DeliveryMode <= VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO)
+ {
+ uint8_t const uIntrCtrl = (pDteCache->fFlags >> IOMMU_DTE_CACHE_F_INTR_CTRL_SHIFT)
+ & IOMMU_DTE_CACHE_F_INTR_CTRL_MASK;
+ if (uIntrCtrl == IOMMU_INTR_CTRL_REMAP)
+ {
+ /* Interrupt table length has been verified prior to adding entries to the cache. */
+ uint16_t const offIrte = IOMMU_GET_IRTE_OFF(pMsiIn->Data.u32);
+ uint16_t const idxIrteCache = iommuAmdIrteCacheEntryLookup(pThis, idDevice, offIrte);
+ if (idxIrteCache < RT_ELEMENTS(pThis->aIrteCache))
+ {
+ PCIRTE_T pIrte = &pThis->aIrteCache[idxIrteCache].Irte;
+ Assert(pIrte->n.u1RemapEnable);
+ Assert(pIrte->n.u3IntrType <= VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO);
+ iommuAmdIrteRemapMsi(pMsiIn, pMsiOut, pIrte);
+ rc = VINF_SUCCESS;
+ }
+ }
+ else if (uIntrCtrl == IOMMU_INTR_CTRL_FWD_UNMAPPED)
+ {
+ *pMsiOut = *pMsiIn;
+ rc = VINF_SUCCESS;
+ }
+ }
+ }
+ else if (pDteCache->fFlags & IOMMU_DTE_CACHE_F_PRESENT)
+ {
+ *pMsiOut = *pMsiIn;
+ rc = VINF_SUCCESS;
+ }
+ }
+
+ IOMMU_CACHE_UNLOCK(pDevIns, pThis);
+ return rc;
+}
+
+
+/**
+ * Adds or updates the IRTE cache for the given IRTE.
+ *
+ * @returns VBox status code.
+ * @retval VERR_OUT_OF_RESOURCES if the cache is full.
+ *
+ * @param pDevIns The IOMMU instance data.
+ * @param idDevice The device ID (bus, device, function).
+ * @param offIrte The offset into the interrupt remap table.
+ * @param pIrte The IRTE to cache.
+ */
+static int iommuAmdIrteCacheAdd(PPDMDEVINS pDevIns, uint16_t idDevice, uint16_t offIrte, PCIRTE_T pIrte)
+{
+ Assert(offIrte != 0xffff); /* Shouldn't be a valid IRTE table offset since sizeof(IRTE) is a multiple of 4. */
+
+ int rc = VINF_SUCCESS;
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ Assert(idDevice != pThis->uPciAddress);
+ IOMMU_CACHE_LOCK(pDevIns, pThis);
+
+ /* Find an existing entry or get an unused slot. */
+ uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache);
+ uint16_t idxIrteCache = iommuAmdIrteCacheEntryLookup(pThis, idDevice, offIrte);
+ if ( idxIrteCache < cIrteCache
+ || (idxIrteCache = iommuAmdIrteCacheEntryGetUnused(pThis)) < cIrteCache)
+ {
+ pThis->aIrteCache[idxIrteCache].uKey = IOMMU_IRTE_CACHE_KEY_MAKE(idDevice, offIrte);
+ pThis->aIrteCache[idxIrteCache].Irte = *pIrte;
+ }
+ else
+ rc = VERR_OUT_OF_RESOURCES;
+
+ IOMMU_CACHE_UNLOCK(pDevIns, pThis);
+ return rc;
+}
+
+
+# ifdef IN_RING3
+/**
+ * Removes IRTE cache entries for the given device ID.
+ *
+ * @param pDevIns The IOMMU instance data.
+ * @param idDevice The device ID (bus, device, function).
+ */
+static void iommuAmdIrteCacheRemove(PPDMDEVINS pDevIns, uint16_t idDevice)
+{
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ IOMMU_CACHE_LOCK(pDevIns, pThis);
+ uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache);
+ for (uint16_t i = 0; i < cIrteCache; i++)
+ {
+ PIRTECACHE pIrteCache = &pThis->aIrteCache[i];
+ if (idDevice == IOMMU_IRTE_CACHE_KEY_GET_DEVICE_ID(pIrteCache->uKey))
+ {
+ pIrteCache->uKey = IOMMU_IRTE_CACHE_KEY_NIL;
+ pIrteCache->Irte.u32 = 0;
+ /* There could multiple IRTE entries for a device ID, continue searching. */
+ }
+ }
+ IOMMU_CACHE_UNLOCK(pDevIns, pThis);
+}
+
+
+/**
+ * Removes all IRTE cache entries.
+ *
+ * @param pDevIns The IOMMU instance data.
+ */
+static void iommuAmdIrteCacheRemoveAll(PPDMDEVINS pDevIns)
+{
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ IOMMU_CACHE_LOCK(pDevIns, pThis);
+ uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache);
+ for (uint16_t i = 0; i < cIrteCache; i++)
+ {
+ pThis->aIrteCache[i].uKey = IOMMU_IRTE_CACHE_KEY_NIL;
+ pThis->aIrteCache[i].Irte.u32 = 0;
+ }
+ IOMMU_CACHE_UNLOCK(pDevIns, pThis);
+}
+# endif /* IN_RING3 */
+#endif /* IOMMU_WITH_IRTE_CACHE */
+
+
+/**
+ * Atomically reads the control register without locking the IOMMU device.
+ *
+ * @returns The control register.
+ * @param pThis The shared IOMMU device state.
+ */
+DECL_FORCE_INLINE(IOMMU_CTRL_T) iommuAmdGetCtrlUnlocked(PCIOMMU pThis)
+{
+ IOMMU_CTRL_T Ctrl;
+ Ctrl.u64 = ASMAtomicReadU64((volatile uint64_t *)&pThis->Ctrl.u64);
+ return Ctrl;
+}
+
+
+/**
+ * Returns whether MSI is enabled for the IOMMU.
+ *
+ * @returns Whether MSI is enabled.
+ * @param pDevIns The IOMMU device instance.
+ *
+ * @note There should be a PCIDevXxx function for this.
+ */
+static bool iommuAmdIsMsiEnabled(PPDMDEVINS pDevIns)
+{
+ MSI_CAP_HDR_T MsiCapHdr;
+ MsiCapHdr.u32 = PDMPciDevGetDWord(pDevIns->apPciDevs[0], IOMMU_PCI_OFF_MSI_CAP_HDR);
+ return MsiCapHdr.n.u1MsiEnable;
+}
+
+
+/**
+ * Signals a PCI target abort.
+ *
+ * @param pDevIns The IOMMU device instance.
+ */
+static void iommuAmdSetPciTargetAbort(PPDMDEVINS pDevIns)
+{
+ PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
+ uint16_t const u16Status = PDMPciDevGetStatus(pPciDev) | VBOX_PCI_STATUS_SIG_TARGET_ABORT;
+ PDMPciDevSetStatus(pPciDev, u16Status);
+}
+
+
+/**
+ * Wakes up the command thread if there are commands to be processed.
+ *
+ * @param pDevIns The IOMMU device instance.
+ *
+ * @remarks The IOMMU lock must be held while calling this!
+ */
+static void iommuAmdCmdThreadWakeUpIfNeeded(PPDMDEVINS pDevIns)
+{
+ Log4Func(("\n"));
+
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ if ( pThis->Status.n.u1CmdBufRunning
+ && pThis->CmdBufTailPtr.n.off != pThis->CmdBufHeadPtr.n.off
+ && !ASMAtomicXchgBool(&pThis->fCmdThreadSignaled, true))
+ {
+ Log4Func(("Signaling command thread\n"));
+ PDMDevHlpSUPSemEventSignal(pDevIns, pThis->hEvtCmdThread);
+ }
+}
+
+
+/**
+ * Reads the Device Table Base Address Register.
+ */
+static VBOXSTRICTRC iommuAmdDevTabBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+ *pu64Value = pThis->aDevTabBaseAddrs[0].u64;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads the Command Buffer Base Address Register.
+ */
+static VBOXSTRICTRC iommuAmdCmdBufBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+ *pu64Value = pThis->CmdBufBaseAddr.u64;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads the Event Log Base Address Register.
+ */
+static VBOXSTRICTRC iommuAmdEvtLogBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+ *pu64Value = pThis->EvtLogBaseAddr.u64;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads the Control Register.
+ */
+static VBOXSTRICTRC iommuAmdCtrl_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+ *pu64Value = pThis->Ctrl.u64;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads the Exclusion Range Base Address Register.
+ */
+static VBOXSTRICTRC iommuAmdExclRangeBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+ *pu64Value = pThis->ExclRangeBaseAddr.u64;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads to the Exclusion Range Limit Register.
+ */
+static VBOXSTRICTRC iommuAmdExclRangeLimit_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+ *pu64Value = pThis->ExclRangeLimit.u64;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads to the Extended Feature Register.
+ */
+static VBOXSTRICTRC iommuAmdExtFeat_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+ *pu64Value = pThis->ExtFeat.u64;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads to the PPR Log Base Address Register.
+ */
+static VBOXSTRICTRC iommuAmdPprLogBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+ *pu64Value = pThis->PprLogBaseAddr.u64;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Writes the Hardware Event Register (Hi).
+ */
+static VBOXSTRICTRC iommuAmdHwEvtHi_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+ *pu64Value = pThis->HwEvtHi.u64;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads the Hardware Event Register (Lo).
+ */
+static VBOXSTRICTRC iommuAmdHwEvtLo_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+ *pu64Value = pThis->HwEvtLo;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads the Hardware Event Status Register.
+ */
+static VBOXSTRICTRC iommuAmdHwEvtStatus_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+ *pu64Value = pThis->HwEvtStatus.u64;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads to the GA Log Base Address Register.
+ */
+static VBOXSTRICTRC iommuAmdGALogBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+ *pu64Value = pThis->GALogBaseAddr.u64;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads to the PPR Log B Base Address Register.
+ */
+static VBOXSTRICTRC iommuAmdPprLogBBaseAddr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+ *pu64Value = pThis->PprLogBBaseAddr.u64;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads to the Event Log B Base Address Register.
+ */
+static VBOXSTRICTRC iommuAmdEvtLogBBaseAddr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+ *pu64Value = pThis->EvtLogBBaseAddr.u64;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads the Device Table Segment Base Address Register.
+ */
+static VBOXSTRICTRC iommuAmdDevTabSegBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
+{
+ RT_NOREF(pDevIns);
+
+ /* Figure out which segment is being written. */
+ uint8_t const offSegment = (offReg - IOMMU_MMIO_OFF_DEV_TAB_SEG_FIRST) >> 3;
+ uint8_t const idxSegment = offSegment + 1;
+ Assert(idxSegment < RT_ELEMENTS(pThis->aDevTabBaseAddrs));
+
+ *pu64Value = pThis->aDevTabBaseAddrs[idxSegment].u64;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads the Device Specific Feature Extension (DSFX) Register.
+ */
+static VBOXSTRICTRC iommuAmdDevSpecificFeat_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+ *pu64Value = pThis->DevSpecificFeat.u64;
+ return VINF_SUCCESS;
+}
+
+/**
+ * Reads the Device Specific Control Extension (DSCX) Register.
+ */
+static VBOXSTRICTRC iommuAmdDevSpecificCtrl_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+ *pu64Value = pThis->DevSpecificCtrl.u64;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads the Device Specific Status Extension (DSSX) Register.
+ */
+static VBOXSTRICTRC iommuAmdDevSpecificStatus_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+ *pu64Value = pThis->DevSpecificStatus.u64;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads the MSI Vector Register 0 (32-bit) and the MSI Vector Register 1 (32-bit).
+ */
+static VBOXSTRICTRC iommuAmdDevMsiVector_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+ uint32_t const uLo = pThis->MiscInfo.au32[0];
+ uint32_t const uHi = pThis->MiscInfo.au32[1];
+ *pu64Value = RT_MAKE_U64(uLo, uHi);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads the MSI Capability Header Register (32-bit) and the MSI Address (Lo)
+ * Register (32-bit).
+ */
+static VBOXSTRICTRC iommuAmdMsiCapHdrAndAddrLo_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
+{
+ RT_NOREF(pThis, offReg);
+ PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
+ PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
+ uint32_t const uLo = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_CAP_HDR);
+ uint32_t const uHi = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_LO);
+ *pu64Value = RT_MAKE_U64(uLo, uHi);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads the MSI Address (Hi) Register (32-bit) and the MSI data register (32-bit).
+ */
+static VBOXSTRICTRC iommuAmdMsiAddrHiAndData_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
+{
+ RT_NOREF(pThis, offReg);
+ PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
+ PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
+ uint32_t const uLo = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_HI);
+ uint32_t const uHi = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_DATA);
+ *pu64Value = RT_MAKE_U64(uLo, uHi);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads the Command Buffer Head Pointer Register.
+ */
+static VBOXSTRICTRC iommuAmdCmdBufHeadPtr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+ *pu64Value = pThis->CmdBufHeadPtr.u64;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads the Command Buffer Tail Pointer Register.
+ */
+static VBOXSTRICTRC iommuAmdCmdBufTailPtr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+ *pu64Value = pThis->CmdBufTailPtr.u64;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads the Event Log Head Pointer Register.
+ */
+static VBOXSTRICTRC iommuAmdEvtLogHeadPtr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+ *pu64Value = pThis->EvtLogHeadPtr.u64;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads the Event Log Tail Pointer Register.
+ */
+static VBOXSTRICTRC iommuAmdEvtLogTailPtr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+ *pu64Value = pThis->EvtLogTailPtr.u64;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads the Status Register.
+ */
+static VBOXSTRICTRC iommuAmdStatus_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+ *pu64Value = pThis->Status.u64;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Writes the Device Table Base Address Register.
+ */
+static VBOXSTRICTRC iommuAmdDevTabBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+
+ /* Mask out all unrecognized bits. */
+ u64Value &= IOMMU_DEV_TAB_BAR_VALID_MASK;
+
+ /* Update the register. */
+ pThis->aDevTabBaseAddrs[0].u64 = u64Value;
+
+ /* Paranoia. */
+ Assert(pThis->aDevTabBaseAddrs[0].n.u9Size <= g_auDevTabSegMaxSizes[0]);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Writes the Command Buffer Base Address Register.
+ */
+static VBOXSTRICTRC iommuAmdCmdBufBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+
+ /*
+ * While this is not explicitly specified like the event log base address register,
+ * the AMD IOMMU spec. does specify "CmdBufRun must be 0b to modify the command buffer registers properly".
+ * Inconsistent specs :/
+ */
+ if (pThis->Status.n.u1CmdBufRunning)
+ {
+ LogFunc(("Setting CmdBufBar (%#RX64) when command buffer is running -> Ignored\n", u64Value));
+ return VINF_SUCCESS;
+ }
+
+ /* Mask out all unrecognized bits. */
+ CMD_BUF_BAR_T CmdBufBaseAddr;
+ CmdBufBaseAddr.u64 = u64Value & IOMMU_CMD_BUF_BAR_VALID_MASK;
+
+ /* Validate the length. */
+ if (CmdBufBaseAddr.n.u4Len >= 8)
+ {
+ /* Update the register. */
+ pThis->CmdBufBaseAddr.u64 = CmdBufBaseAddr.u64;
+
+ /*
+ * Writing the command buffer base address, clears the command buffer head and tail pointers.
+ * See AMD IOMMU spec. 2.4 "Commands".
+ */
+ pThis->CmdBufHeadPtr.u64 = 0;
+ pThis->CmdBufTailPtr.u64 = 0;
+ }
+ else
+ LogFunc(("Command buffer length (%#x) invalid -> Ignored\n", CmdBufBaseAddr.n.u4Len));
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Writes the Event Log Base Address Register.
+ */
+static VBOXSTRICTRC iommuAmdEvtLogBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+
+ /*
+ * IOMMU behavior is undefined when software writes this register when event logging is running.
+ * In our emulation, we ignore the write entirely.
+ * See AMD IOMMU spec. "Event Log Base Address Register".
+ */
+ if (pThis->Status.n.u1EvtLogRunning)
+ {
+ LogFunc(("Setting EvtLogBar (%#RX64) when event logging is running -> Ignored\n", u64Value));
+ return VINF_SUCCESS;
+ }
+
+ /* Mask out all unrecognized bits. */
+ u64Value &= IOMMU_EVT_LOG_BAR_VALID_MASK;
+ EVT_LOG_BAR_T EvtLogBaseAddr;
+ EvtLogBaseAddr.u64 = u64Value;
+
+ /* Validate the length. */
+ if (EvtLogBaseAddr.n.u4Len >= 8)
+ {
+ /* Update the register. */
+ pThis->EvtLogBaseAddr.u64 = EvtLogBaseAddr.u64;
+
+ /*
+ * Writing the event log base address, clears the event log head and tail pointers.
+ * See AMD IOMMU spec. 2.5 "Event Logging".
+ */
+ pThis->EvtLogHeadPtr.u64 = 0;
+ pThis->EvtLogTailPtr.u64 = 0;
+ }
+ else
+ LogFunc(("Event log length (%#x) invalid -> Ignored\n", EvtLogBaseAddr.n.u4Len));
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Writes the Control Register.
+ */
+static VBOXSTRICTRC iommuAmdCtrl_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+
+ /* Mask out all unrecognized bits. */
+ u64Value &= IOMMU_CTRL_VALID_MASK;
+ IOMMU_CTRL_T NewCtrl;
+ NewCtrl.u64 = u64Value;
+
+ /* Ensure the device table segments are within limits. */
+ if (NewCtrl.n.u3DevTabSegEn <= pThis->ExtFeat.n.u2DevTabSegSup)
+ {
+ IOMMU_CTRL_T const OldCtrl = pThis->Ctrl;
+
+ /* Update the register. */
+ ASMAtomicWriteU64(&pThis->Ctrl.u64, NewCtrl.u64);
+
+ bool const fNewIommuEn = NewCtrl.n.u1IommuEn;
+ bool const fOldIommuEn = OldCtrl.n.u1IommuEn;
+
+ /* Enable or disable event logging when the bit transitions. */
+ bool const fOldEvtLogEn = OldCtrl.n.u1EvtLogEn;
+ bool const fNewEvtLogEn = NewCtrl.n.u1EvtLogEn;
+ if ( fOldEvtLogEn != fNewEvtLogEn
+ || fOldIommuEn != fNewIommuEn)
+ {
+ if ( fNewIommuEn
+ && fNewEvtLogEn)
+ {
+ ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_EVT_LOG_OVERFLOW);
+ ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_EVT_LOG_RUNNING);
+ }
+ else
+ ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_EVT_LOG_RUNNING);
+ }
+
+ /* Enable or disable command buffer processing when the bit transitions. */
+ bool const fOldCmdBufEn = OldCtrl.n.u1CmdBufEn;
+ bool const fNewCmdBufEn = NewCtrl.n.u1CmdBufEn;
+ if ( fOldCmdBufEn != fNewCmdBufEn
+ || fOldIommuEn != fNewIommuEn)
+ {
+ if ( fNewCmdBufEn
+ && fNewIommuEn)
+ {
+ ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_CMD_BUF_RUNNING);
+ LogFunc(("Command buffer enabled\n"));
+
+ /* Wake up the command thread to start processing commands if any. */
+ iommuAmdCmdThreadWakeUpIfNeeded(pDevIns);
+ }
+ else
+ {
+ ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_CMD_BUF_RUNNING);
+ LogFunc(("Command buffer disabled\n"));
+ }
+ }
+ }
+ else
+ {
+ LogFunc(("Invalid number of device table segments enabled, exceeds %#x (%#RX64) -> Ignored!\n",
+ pThis->ExtFeat.n.u2DevTabSegSup, NewCtrl.u64));
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Writes to the Exclusion Range Base Address Register.
+ */
+static VBOXSTRICTRC iommuAmdExclRangeBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+ pThis->ExclRangeBaseAddr.u64 = u64Value & IOMMU_EXCL_RANGE_BAR_VALID_MASK;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Writes to the Exclusion Range Limit Register.
+ */
+static VBOXSTRICTRC iommuAmdExclRangeLimit_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+ u64Value &= IOMMU_EXCL_RANGE_LIMIT_VALID_MASK;
+ u64Value |= UINT64_C(0xfff);
+ pThis->ExclRangeLimit.u64 = u64Value;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Writes the Hardware Event Register (Hi).
+ */
+static VBOXSTRICTRC iommuAmdHwEvtHi_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
+{
+ /** @todo IOMMU: Why the heck is this marked read/write by the AMD IOMMU spec? */
+ RT_NOREF(pDevIns, offReg);
+ LogFlowFunc(("Writing %#RX64 to hardware event (Hi) register!\n", u64Value));
+ pThis->HwEvtHi.u64 = u64Value;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Writes the Hardware Event Register (Lo).
+ */
+static VBOXSTRICTRC iommuAmdHwEvtLo_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
+{
+ /** @todo IOMMU: Why the heck is this marked read/write by the AMD IOMMU spec? */
+ RT_NOREF(pDevIns, offReg);
+ LogFlowFunc(("Writing %#RX64 to hardware event (Lo) register!\n", u64Value));
+ pThis->HwEvtLo = u64Value;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Writes the Hardware Event Status Register.
+ */
+static VBOXSTRICTRC iommuAmdHwEvtStatus_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+
+ /* Mask out all unrecognized bits. */
+ u64Value &= IOMMU_HW_EVT_STATUS_VALID_MASK;
+
+ /*
+ * The two bits (HEO and HEV) are RW1C (Read/Write 1-to-Clear; writing 0 has no effect).
+ * If the current status bits or the bits being written are both 0, we've nothing to do.
+ * The Overflow bit (bit 1) is only valid when the Valid bit (bit 0) is 1.
+ */
+ uint64_t HwStatus = pThis->HwEvtStatus.u64;
+ if (!(HwStatus & RT_BIT(0)))
+ return VINF_SUCCESS;
+ if (u64Value & HwStatus & RT_BIT_64(0))
+ HwStatus &= ~RT_BIT_64(0);
+ if (u64Value & HwStatus & RT_BIT_64(1))
+ HwStatus &= ~RT_BIT_64(1);
+
+ /* Update the register. */
+ pThis->HwEvtStatus.u64 = HwStatus;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Writes the Device Table Segment Base Address Register.
+ */
+static VBOXSTRICTRC iommuAmdDevTabSegBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
+{
+ RT_NOREF(pDevIns);
+
+ /* Figure out which segment is being written. */
+ uint8_t const offSegment = (offReg - IOMMU_MMIO_OFF_DEV_TAB_SEG_FIRST) >> 3;
+ uint8_t const idxSegment = offSegment + 1;
+ Assert(idxSegment < RT_ELEMENTS(pThis->aDevTabBaseAddrs));
+
+ /* Mask out all unrecognized bits. */
+ u64Value &= IOMMU_DEV_TAB_SEG_BAR_VALID_MASK;
+ DEV_TAB_BAR_T DevTabSegBar;
+ DevTabSegBar.u64 = u64Value;
+
+ /* Validate the size. */
+ uint16_t const uSegSize = DevTabSegBar.n.u9Size;
+ uint16_t const uMaxSegSize = g_auDevTabSegMaxSizes[idxSegment];
+ if (uSegSize <= uMaxSegSize)
+ {
+ /* Update the register. */
+ pThis->aDevTabBaseAddrs[idxSegment].u64 = u64Value;
+ }
+ else
+ LogFunc(("Device table segment (%u) size invalid (%#RX32) -> Ignored\n", idxSegment, uSegSize));
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Writes the MSI Vector Register 0 (32-bit) and the MSI Vector Register 1 (32-bit).
+ */
+static VBOXSTRICTRC iommuAmdDevMsiVector_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+
+ /* MSI Vector Register 0 is read-only. */
+ /* MSI Vector Register 1. */
+ uint32_t const uReg = u64Value >> 32;
+ pThis->MiscInfo.au32[1] = uReg & IOMMU_MSI_VECTOR_1_VALID_MASK;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Writes the MSI Capability Header Register (32-bit) or the MSI Address (Lo)
+ * Register (32-bit).
+ */
+static VBOXSTRICTRC iommuAmdMsiCapHdrAndAddrLo_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
+{
+ RT_NOREF(pThis, offReg);
+ PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
+ PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
+
+ /* MSI capability header. */
+ {
+ uint32_t const uReg = u64Value;
+ MSI_CAP_HDR_T MsiCapHdr;
+ MsiCapHdr.u32 = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_CAP_HDR);
+ MsiCapHdr.n.u1MsiEnable = RT_BOOL(uReg & IOMMU_MSI_CAP_HDR_MSI_EN_MASK);
+ PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_CAP_HDR, MsiCapHdr.u32);
+ }
+
+ /* MSI Address Lo. */
+ {
+ uint32_t const uReg = u64Value >> 32;
+ uint32_t const uMsiAddrLo = uReg & VBOX_MSI_ADDR_VALID_MASK;
+ PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_LO, uMsiAddrLo);
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Writes the MSI Address (Hi) Register (32-bit) or the MSI data register (32-bit).
+ */
+static VBOXSTRICTRC iommuAmdMsiAddrHiAndData_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
+{
+ RT_NOREF(pThis, offReg);
+ PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
+ PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
+
+ /* MSI Address Hi. */
+ {
+ uint32_t const uReg = u64Value;
+ PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_HI, uReg);
+ }
+
+ /* MSI Data. */
+ {
+ uint32_t const uReg = u64Value >> 32;
+ uint32_t const uMsiData = uReg & VBOX_MSI_DATA_VALID_MASK;
+ PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_DATA, uMsiData);
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Writes the Command Buffer Head Pointer Register.
+ */
+static VBOXSTRICTRC iommuAmdCmdBufHeadPtr_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+
+ /*
+ * IOMMU behavior is undefined when software writes this register when the command buffer is running.
+ * In our emulation, we ignore the write entirely.
+ * See AMD IOMMU spec. 3.3.13 "Command and Event Log Pointer Registers".
+ */
+ if (pThis->Status.n.u1CmdBufRunning)
+ {
+ LogFunc(("Setting CmdBufHeadPtr (%#RX64) when command buffer is running -> Ignored\n", u64Value));
+ return VINF_SUCCESS;
+ }
+
+ /*
+ * IOMMU behavior is undefined when software writes a value outside the buffer length.
+ * In our emulation, we ignore the write entirely.
+ */
+ uint32_t const offBuf = u64Value & IOMMU_CMD_BUF_HEAD_PTR_VALID_MASK;
+ uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->CmdBufBaseAddr.n.u4Len);
+ Assert(cbBuf <= _512K);
+ if (offBuf >= cbBuf)
+ {
+ LogFunc(("Setting CmdBufHeadPtr (%#RX32) to a value that exceeds buffer length (%#RX23) -> Ignored\n", offBuf, cbBuf));
+ return VINF_SUCCESS;
+ }
+
+ /* Update the register. */
+ pThis->CmdBufHeadPtr.au32[0] = offBuf;
+
+ iommuAmdCmdThreadWakeUpIfNeeded(pDevIns);
+
+ Log4Func(("Set CmdBufHeadPtr to %#RX32\n", offBuf));
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Writes the Command Buffer Tail Pointer Register.
+ */
+static VBOXSTRICTRC iommuAmdCmdBufTailPtr_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+
+ /*
+ * IOMMU behavior is undefined when software writes a value outside the buffer length.
+ * In our emulation, we ignore the write entirely.
+ * See AMD IOMMU spec. 3.3.13 "Command and Event Log Pointer Registers".
+ */
+ uint32_t const offBuf = u64Value & IOMMU_CMD_BUF_TAIL_PTR_VALID_MASK;
+ uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->CmdBufBaseAddr.n.u4Len);
+ Assert(cbBuf <= _512K);
+ if (offBuf >= cbBuf)
+ {
+ LogFunc(("Setting CmdBufTailPtr (%#RX32) to a value that exceeds buffer length (%#RX32) -> Ignored\n", offBuf, cbBuf));
+ return VINF_SUCCESS;
+ }
+
+ /*
+ * IOMMU behavior is undefined if software advances the tail pointer equal to or beyond the
+ * head pointer after adding one or more commands to the buffer.
+ *
+ * However, we cannot enforce this strictly because it's legal for software to shrink the
+ * command queue (by reducing the offset) as well as wrap around the pointer (when head isn't
+ * at 0). Software might even make the queue empty by making head and tail equal which is
+ * allowed. I don't think we can or should try too hard to prevent software shooting itself
+ * in the foot here. As long as we make sure the offset value is within the circular buffer
+ * bounds (which we do by masking bits above) it should be sufficient.
+ */
+ pThis->CmdBufTailPtr.au32[0] = offBuf;
+
+ iommuAmdCmdThreadWakeUpIfNeeded(pDevIns);
+
+ Log4Func(("Set CmdBufTailPtr to %#RX32\n", offBuf));
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Writes the Event Log Head Pointer Register.
+ */
+static VBOXSTRICTRC iommuAmdEvtLogHeadPtr_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+
+ /*
+ * IOMMU behavior is undefined when software writes a value outside the buffer length.
+ * In our emulation, we ignore the write entirely.
+ * See AMD IOMMU spec. 3.3.13 "Command and Event Log Pointer Registers".
+ */
+ uint32_t const offBuf = u64Value & IOMMU_EVT_LOG_HEAD_PTR_VALID_MASK;
+ uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->EvtLogBaseAddr.n.u4Len);
+ Assert(cbBuf <= _512K);
+ if (offBuf >= cbBuf)
+ {
+ LogFunc(("Setting EvtLogHeadPtr (%#RX32) to a value that exceeds buffer length (%#RX32) -> Ignored\n", offBuf, cbBuf));
+ return VINF_SUCCESS;
+ }
+
+ /* Update the register. */
+ pThis->EvtLogHeadPtr.au32[0] = offBuf;
+
+ Log4Func(("Set EvtLogHeadPtr to %#RX32\n", offBuf));
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Writes the Event Log Tail Pointer Register.
+ */
+static VBOXSTRICTRC iommuAmdEvtLogTailPtr_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+ NOREF(pThis);
+
+ /*
+ * IOMMU behavior is undefined when software writes this register when the event log is running.
+ * In our emulation, we ignore the write entirely.
+ * See AMD IOMMU spec. 3.3.13 "Command and Event Log Pointer Registers".
+ */
+ if (pThis->Status.n.u1EvtLogRunning)
+ {
+ LogFunc(("Setting EvtLogTailPtr (%#RX64) when event log is running -> Ignored\n", u64Value));
+ return VINF_SUCCESS;
+ }
+
+ /*
+ * IOMMU behavior is undefined when software writes a value outside the buffer length.
+ * In our emulation, we ignore the write entirely.
+ */
+ uint32_t const offBuf = u64Value & IOMMU_EVT_LOG_TAIL_PTR_VALID_MASK;
+ uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->EvtLogBaseAddr.n.u4Len);
+ Assert(cbBuf <= _512K);
+ if (offBuf >= cbBuf)
+ {
+ LogFunc(("Setting EvtLogTailPtr (%#RX32) to a value that exceeds buffer length (%#RX32) -> Ignored\n", offBuf, cbBuf));
+ return VINF_SUCCESS;
+ }
+
+ /* Update the register. */
+ pThis->EvtLogTailPtr.au32[0] = offBuf;
+
+ Log4Func(("Set EvtLogTailPtr to %#RX32\n", offBuf));
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Writes the Status Register.
+ */
+static VBOXSTRICTRC iommuAmdStatus_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
+{
+ RT_NOREF(pDevIns, offReg);
+
+ /* Mask out all unrecognized bits. */
+ u64Value &= IOMMU_STATUS_VALID_MASK;
+
+ /*
+ * Compute RW1C (read-only, write-1-to-clear) bits and preserve the rest (which are read-only).
+ * Writing 0 to an RW1C bit has no effect. Writing 1 to an RW1C bit, clears the bit if it's already 1.
+ */
+ IOMMU_STATUS_T const OldStatus = pThis->Status;
+ uint64_t const fOldRw1cBits = (OldStatus.u64 & IOMMU_STATUS_RW1C_MASK);
+ uint64_t const fOldRoBits = (OldStatus.u64 & ~IOMMU_STATUS_RW1C_MASK);
+ uint64_t const fNewRw1cBits = (u64Value & IOMMU_STATUS_RW1C_MASK);
+
+ uint64_t const uNewStatus = (fOldRw1cBits & ~fNewRw1cBits) | fOldRoBits;
+
+ /* Update the register. */
+ ASMAtomicWriteU64(&pThis->Status.u64, uNewStatus);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Register access table 0.
+ * The MMIO offset of each entry must be a multiple of 8!
+ */
+static const IOMMUREGACC g_aRegAccess0[] =
+{
+ /* MMIO off. Register name Read function Write function */
+ { /* 0x00 */ "DEV_TAB_BAR", iommuAmdDevTabBar_r, iommuAmdDevTabBar_w },
+ { /* 0x08 */ "CMD_BUF_BAR", iommuAmdCmdBufBar_r, iommuAmdCmdBufBar_w },
+ { /* 0x10 */ "EVT_LOG_BAR", iommuAmdEvtLogBar_r, iommuAmdEvtLogBar_w },
+ { /* 0x18 */ "CTRL", iommuAmdCtrl_r, iommuAmdCtrl_w },
+ { /* 0x20 */ "EXCL_BAR", iommuAmdExclRangeBar_r, iommuAmdExclRangeBar_w },
+ { /* 0x28 */ "EXCL_RANGE_LIMIT", iommuAmdExclRangeLimit_r, iommuAmdExclRangeLimit_w },
+ { /* 0x30 */ "EXT_FEAT", iommuAmdExtFeat_r, NULL },
+ { /* 0x38 */ "PPR_LOG_BAR", iommuAmdPprLogBar_r, NULL },
+ { /* 0x40 */ "HW_EVT_HI", iommuAmdHwEvtHi_r, iommuAmdHwEvtHi_w },
+ { /* 0x48 */ "HW_EVT_LO", iommuAmdHwEvtLo_r, iommuAmdHwEvtLo_w },
+ { /* 0x50 */ "HW_EVT_STATUS", iommuAmdHwEvtStatus_r, iommuAmdHwEvtStatus_w },
+ { /* 0x58 */ NULL, NULL, NULL },
+
+ { /* 0x60 */ "SMI_FLT_0", NULL, NULL },
+ { /* 0x68 */ "SMI_FLT_1", NULL, NULL },
+ { /* 0x70 */ "SMI_FLT_2", NULL, NULL },
+ { /* 0x78 */ "SMI_FLT_3", NULL, NULL },
+ { /* 0x80 */ "SMI_FLT_4", NULL, NULL },
+ { /* 0x88 */ "SMI_FLT_5", NULL, NULL },
+ { /* 0x90 */ "SMI_FLT_6", NULL, NULL },
+ { /* 0x98 */ "SMI_FLT_7", NULL, NULL },
+ { /* 0xa0 */ "SMI_FLT_8", NULL, NULL },
+ { /* 0xa8 */ "SMI_FLT_9", NULL, NULL },
+ { /* 0xb0 */ "SMI_FLT_10", NULL, NULL },
+ { /* 0xb8 */ "SMI_FLT_11", NULL, NULL },
+ { /* 0xc0 */ "SMI_FLT_12", NULL, NULL },
+ { /* 0xc8 */ "SMI_FLT_13", NULL, NULL },
+ { /* 0xd0 */ "SMI_FLT_14", NULL, NULL },
+ { /* 0xd8 */ "SMI_FLT_15", NULL, NULL },
+
+ { /* 0xe0 */ "GALOG_BAR", iommuAmdGALogBar_r, NULL },
+ { /* 0xe8 */ "GALOG_TAIL_ADDR", NULL, NULL },
+ { /* 0xf0 */ "PPR_LOG_B_BAR", iommuAmdPprLogBBaseAddr_r, NULL },
+ { /* 0xf8 */ "PPR_EVT_B_BAR", iommuAmdEvtLogBBaseAddr_r, NULL },
+
+ { /* 0x100 */ "DEV_TAB_SEG_1", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
+ { /* 0x108 */ "DEV_TAB_SEG_2", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
+ { /* 0x110 */ "DEV_TAB_SEG_3", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
+ { /* 0x118 */ "DEV_TAB_SEG_4", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
+ { /* 0x120 */ "DEV_TAB_SEG_5", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
+ { /* 0x128 */ "DEV_TAB_SEG_6", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
+ { /* 0x130 */ "DEV_TAB_SEG_7", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
+
+ { /* 0x138 */ "DEV_SPECIFIC_FEAT", iommuAmdDevSpecificFeat_r, NULL },
+ { /* 0x140 */ "DEV_SPECIFIC_CTRL", iommuAmdDevSpecificCtrl_r, NULL },
+ { /* 0x148 */ "DEV_SPECIFIC_STATUS", iommuAmdDevSpecificStatus_r, NULL },
+
+ { /* 0x150 */ "MSI_VECTOR_0 or MSI_VECTOR_1", iommuAmdDevMsiVector_r, iommuAmdDevMsiVector_w },
+ { /* 0x158 */ "MSI_CAP_HDR or MSI_ADDR_LO", iommuAmdMsiCapHdrAndAddrLo_r, iommuAmdMsiCapHdrAndAddrLo_w },
+ { /* 0x160 */ "MSI_ADDR_HI or MSI_DATA", iommuAmdMsiAddrHiAndData_r, iommuAmdMsiAddrHiAndData_w },
+ { /* 0x168 */ "MSI_MAPPING_CAP_HDR or PERF_OPT_CTRL", NULL, NULL },
+
+ { /* 0x170 */ "XT_GEN_INTR_CTRL", NULL, NULL },
+ { /* 0x178 */ "XT_PPR_INTR_CTRL", NULL, NULL },
+ { /* 0x180 */ "XT_GALOG_INT_CTRL", NULL, NULL },
+};
+AssertCompile(RT_ELEMENTS(g_aRegAccess0) == (IOMMU_MMIO_OFF_QWORD_TABLE_0_END - IOMMU_MMIO_OFF_QWORD_TABLE_0_START) / 8);
+
+/**
+ * Register access table 1.
+ * The MMIO offset of each entry must be a multiple of 8!
+ */
+static const IOMMUREGACC g_aRegAccess1[] =
+{
+ /* MMIO offset Register name Read function Write function */
+ { /* 0x200 */ "MARC_APER_BAR_0", NULL, NULL },
+ { /* 0x208 */ "MARC_APER_RELOC_0", NULL, NULL },
+ { /* 0x210 */ "MARC_APER_LEN_0", NULL, NULL },
+ { /* 0x218 */ "MARC_APER_BAR_1", NULL, NULL },
+ { /* 0x220 */ "MARC_APER_RELOC_1", NULL, NULL },
+ { /* 0x228 */ "MARC_APER_LEN_1", NULL, NULL },
+ { /* 0x230 */ "MARC_APER_BAR_2", NULL, NULL },
+ { /* 0x238 */ "MARC_APER_RELOC_2", NULL, NULL },
+ { /* 0x240 */ "MARC_APER_LEN_2", NULL, NULL },
+ { /* 0x248 */ "MARC_APER_BAR_3", NULL, NULL },
+ { /* 0x250 */ "MARC_APER_RELOC_3", NULL, NULL },
+ { /* 0x258 */ "MARC_APER_LEN_3", NULL, NULL }
+};
+AssertCompile(RT_ELEMENTS(g_aRegAccess1) == (IOMMU_MMIO_OFF_QWORD_TABLE_1_END - IOMMU_MMIO_OFF_QWORD_TABLE_1_START) / 8);
+
+/**
+ * Register access table 2.
+ * The MMIO offset of each entry must be a multiple of 8!
+ */
+static const IOMMUREGACC g_aRegAccess2[] =
+{
+ /* MMIO offset Register name Read Function Write function */
+ { /* 0x1ff8 */ "RSVD_REG", NULL, NULL },
+
+ { /* 0x2000 */ "CMD_BUF_HEAD_PTR", iommuAmdCmdBufHeadPtr_r, iommuAmdCmdBufHeadPtr_w },
+ { /* 0x2008 */ "CMD_BUF_TAIL_PTR", iommuAmdCmdBufTailPtr_r , iommuAmdCmdBufTailPtr_w },
+ { /* 0x2010 */ "EVT_LOG_HEAD_PTR", iommuAmdEvtLogHeadPtr_r, iommuAmdEvtLogHeadPtr_w },
+ { /* 0x2018 */ "EVT_LOG_TAIL_PTR", iommuAmdEvtLogTailPtr_r, iommuAmdEvtLogTailPtr_w },
+
+ { /* 0x2020 */ "STATUS", iommuAmdStatus_r, iommuAmdStatus_w },
+ { /* 0x2028 */ NULL, NULL, NULL },
+
+ { /* 0x2030 */ "PPR_LOG_HEAD_PTR", NULL, NULL },
+ { /* 0x2038 */ "PPR_LOG_TAIL_PTR", NULL, NULL },
+
+ { /* 0x2040 */ "GALOG_HEAD_PTR", NULL, NULL },
+ { /* 0x2048 */ "GALOG_TAIL_PTR", NULL, NULL },
+
+ { /* 0x2050 */ "PPR_LOG_B_HEAD_PTR", NULL, NULL },
+ { /* 0x2058 */ "PPR_LOG_B_TAIL_PTR", NULL, NULL },
+
+ { /* 0x2060 */ NULL, NULL, NULL },
+ { /* 0x2068 */ NULL, NULL, NULL },
+
+ { /* 0x2070 */ "EVT_LOG_B_HEAD_PTR", NULL, NULL },
+ { /* 0x2078 */ "EVT_LOG_B_TAIL_PTR", NULL, NULL },
+
+ { /* 0x2080 */ "PPR_LOG_AUTO_RESP", NULL, NULL },
+ { /* 0x2088 */ "PPR_LOG_OVERFLOW_EARLY", NULL, NULL },
+ { /* 0x2090 */ "PPR_LOG_B_OVERFLOW_EARLY", NULL, NULL }
+};
+AssertCompile(RT_ELEMENTS(g_aRegAccess2) == (IOMMU_MMIO_OFF_QWORD_TABLE_2_END - IOMMU_MMIO_OFF_QWORD_TABLE_2_START) / 8);
+
+
+/**
+ * Gets the register access structure given its MMIO offset.
+ *
+ * @returns The register access structure, or NULL if the offset is invalid.
+ * @param off The MMIO offset of the register being accessed.
+ */
+static PCIOMMUREGACC iommuAmdGetRegAccess(uint32_t off)
+{
+ /* Figure out which table the register belongs to and validate its index. */
+ PCIOMMUREGACC pReg;
+ if (off < IOMMU_MMIO_OFF_QWORD_TABLE_0_END)
+ {
+ uint32_t const idxReg = off >> 3;
+ Assert(idxReg < RT_ELEMENTS(g_aRegAccess0));
+ pReg = &g_aRegAccess0[idxReg];
+ }
+ else if ( off < IOMMU_MMIO_OFF_QWORD_TABLE_1_END
+ && off >= IOMMU_MMIO_OFF_QWORD_TABLE_1_START)
+ {
+ uint32_t const idxReg = (off - IOMMU_MMIO_OFF_QWORD_TABLE_1_START) >> 3;
+ Assert(idxReg < RT_ELEMENTS(g_aRegAccess1));
+ pReg = &g_aRegAccess1[idxReg];
+ }
+ else if ( off < IOMMU_MMIO_OFF_QWORD_TABLE_2_END
+ && off >= IOMMU_MMIO_OFF_QWORD_TABLE_2_START)
+ {
+ uint32_t const idxReg = (off - IOMMU_MMIO_OFF_QWORD_TABLE_2_START) >> 3;
+ Assert(idxReg < RT_ELEMENTS(g_aRegAccess2));
+ pReg = &g_aRegAccess2[idxReg];
+ }
+ else
+ pReg = NULL;
+ return pReg;
+}
+
+
+/**
+ * Writes an IOMMU register (32-bit and 64-bit).
+ *
+ * @returns Strict VBox status code.
+ * @param pDevIns The IOMMU device instance.
+ * @param off MMIO byte offset to the register.
+ * @param cb The size of the write access.
+ * @param uValue The value being written.
+ *
+ * @thread EMT.
+ */
+static VBOXSTRICTRC iommuAmdRegisterWrite(PPDMDEVINS pDevIns, uint32_t off, uint8_t cb, uint64_t uValue)
+{
+ /*
+ * Validate the access in case of IOM bug or incorrect assumption.
+ */
+ Assert(off < IOMMU_MMIO_REGION_SIZE);
+ AssertMsgReturn(cb == 4 || cb == 8, ("Invalid access size %u\n", cb), VINF_SUCCESS);
+ AssertMsgReturn(!(off & 3), ("Invalid offset %#x\n", off), VINF_SUCCESS);
+
+ Log4Func(("off=%#x cb=%u uValue=%#RX64\n", off, cb, uValue));
+
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
+ PCIOMMUREGACC pReg = iommuAmdGetRegAccess(off);
+ if (pReg)
+ { /* likely */ }
+ else
+ {
+ LogFunc(("Writing unknown register %#x with %#RX64 -> Ignored\n", off, uValue));
+ return VINF_SUCCESS;
+ }
+
+ /* If a write handler doesn't exist, it's either a reserved or read-only register. */
+ if (pReg->pfnWrite)
+ { /* likely */ }
+ else
+ {
+ LogFunc(("Writing reserved or read-only register off=%#x (cb=%u) with %#RX64 -> Ignored\n", off, cb, uValue));
+ return VINF_SUCCESS;
+ }
+
+ /*
+ * If the write access is 64-bits and aligned on a 64-bit boundary, dispatch right away.
+ * This handles writes to 64-bit registers as well as aligned, 64-bit writes to two
+ * consecutive 32-bit registers.
+ */
+ if (cb == 8)
+ {
+ if (!(off & 7))
+ {
+ IOMMU_LOCK_RET(pDevIns, pThisCC, VINF_IOM_R3_MMIO_WRITE);
+ VBOXSTRICTRC rcStrict = pReg->pfnWrite(pDevIns, pThis, off, uValue);
+ IOMMU_UNLOCK(pDevIns, pThisCC);
+ return rcStrict;
+ }
+
+ LogFunc(("Misaligned access while writing register at off=%#x (cb=%u) with %#RX64 -> Ignored\n", off, cb, uValue));
+ return VINF_SUCCESS;
+ }
+
+ /* We shouldn't get sizes other than 32 bits here as we've specified so with IOM. */
+ Assert(cb == 4);
+ if (!(off & 7))
+ {
+ VBOXSTRICTRC rcStrict;
+ IOMMU_LOCK_RET(pDevIns, pThisCC, VINF_IOM_R3_MMIO_WRITE);
+
+ /*
+ * Lower 32 bits of a 64-bit register or a 32-bit register is being written.
+ * Merge with higher 32 bits (after reading the full 64-bits) and perform a 64-bit write.
+ */
+ uint64_t u64Read;
+ if (pReg->pfnRead)
+ rcStrict = pReg->pfnRead(pDevIns, pThis, off, &u64Read);
+ else
+ {
+ rcStrict = VINF_SUCCESS;
+ u64Read = 0;
+ }
+
+ if (RT_SUCCESS(rcStrict))
+ {
+ uValue = (u64Read & UINT64_C(0xffffffff00000000)) | uValue;
+ rcStrict = pReg->pfnWrite(pDevIns, pThis, off, uValue);
+ }
+ else
+ LogFunc(("Reading off %#x during split write failed! rc=%Rrc\n -> Ignored", off, VBOXSTRICTRC_VAL(rcStrict)));
+
+ IOMMU_UNLOCK(pDevIns, pThisCC);
+ return rcStrict;
+ }
+
+ /*
+ * Higher 32 bits of a 64-bit register or a 32-bit register at a 32-bit boundary is being written.
+ * Merge with lower 32 bits (after reading the full 64-bits) and perform a 64-bit write.
+ */
+ VBOXSTRICTRC rcStrict;
+ Assert(!(off & 3));
+ Assert(off & 7);
+ Assert(off >= 4);
+ uint64_t u64Read;
+ IOMMU_LOCK_RET(pDevIns, pThisCC, VINF_IOM_R3_MMIO_WRITE);
+ if (pReg->pfnRead)
+ rcStrict = pReg->pfnRead(pDevIns, pThis, off - 4, &u64Read);
+ else
+ {
+ rcStrict = VINF_SUCCESS;
+ u64Read = 0;
+ }
+
+ if (RT_SUCCESS(rcStrict))
+ {
+ uValue = (uValue << 32) | (u64Read & UINT64_C(0xffffffff));
+ rcStrict = pReg->pfnWrite(pDevIns, pThis, off - 4, uValue);
+ }
+ else
+ LogFunc(("Reading off %#x during split write failed! rc=%Rrc\n -> Ignored", off, VBOXSTRICTRC_VAL(rcStrict)));
+
+ IOMMU_UNLOCK(pDevIns, pThisCC);
+ return rcStrict;
+}
+
+
+/**
+ * Reads an IOMMU register (64-bit) given its MMIO offset.
+ *
+ * All reads are 64-bit but reads to 32-bit registers that are aligned on an 8-byte
+ * boundary include the lower half of the subsequent register.
+ *
+ * This is because most registers are 64-bit and aligned on 8-byte boundaries but
+ * some are really 32-bit registers aligned on an 8-byte boundary. We cannot assume
+ * software will only perform 32-bit reads on those 32-bit registers that are
+ * aligned on 8-byte boundaries.
+ *
+ * @returns Strict VBox status code.
+ * @param pDevIns The IOMMU device instance.
+ * @param off The MMIO offset of the register in bytes.
+ * @param puResult Where to store the value being read.
+ *
+ * @thread EMT.
+ */
+static VBOXSTRICTRC iommuAmdRegisterRead(PPDMDEVINS pDevIns, uint32_t off, uint64_t *puResult)
+{
+ Assert(off < IOMMU_MMIO_REGION_SIZE);
+ Assert(!(off & 7) || !(off & 3));
+
+ Log4Func(("off=%#x\n", off));
+
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
+ PCPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
+ PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev); NOREF(pPciDev);
+
+ PCIOMMUREGACC pReg = iommuAmdGetRegAccess(off);
+ if (pReg)
+ { /* likely */ }
+ else
+ {
+ LogFunc(("Reading unknown register %#x -> Ignored\n", off));
+ return VINF_IOM_MMIO_UNUSED_FF;
+ }
+
+ /* If a read handler doesn't exist, it's a reserved or unknown register. */
+ if (pReg->pfnRead)
+ { /* likely */ }
+ else
+ {
+ LogFunc(("Reading reserved or unknown register off=%#x -> returning 0s\n", off));
+ return VINF_IOM_MMIO_UNUSED_00;
+ }
+
+ /*
+ * If the read access is aligned on a 64-bit boundary, read the full 64-bits and return.
+ * The caller takes care of truncating upper 32 bits for 32-bit reads.
+ */
+ if (!(off & 7))
+ {
+ IOMMU_LOCK_RET(pDevIns, pThisCC, VINF_IOM_R3_MMIO_READ);
+ VBOXSTRICTRC rcStrict = pReg->pfnRead(pDevIns, pThis, off, puResult);
+ IOMMU_UNLOCK(pDevIns, pThisCC);
+ return rcStrict;
+ }
+
+ /*
+ * High 32 bits of a 64-bit register or a 32-bit register at a non 64-bit boundary is being read.
+ * Read full 64 bits at the previous 64-bit boundary but return only the high 32 bits.
+ */
+ Assert(!(off & 3));
+ Assert(off & 7);
+ Assert(off >= 4);
+ IOMMU_LOCK_RET(pDevIns, pThisCC, VINF_IOM_R3_MMIO_READ);
+ VBOXSTRICTRC rcStrict = pReg->pfnRead(pDevIns, pThis, off - 4, puResult);
+ IOMMU_UNLOCK(pDevIns, pThisCC);
+ if (RT_SUCCESS(rcStrict))
+ *puResult >>= 32;
+ else
+ {
+ *puResult = 0;
+ LogFunc(("Reading off %#x during split read failed! rc=%Rrc\n -> Ignored", off, VBOXSTRICTRC_VAL(rcStrict)));
+ }
+
+ return rcStrict;
+}
+
+
+/**
+ * Raises the MSI interrupt for the IOMMU device.
+ *
+ * @param pDevIns The IOMMU device instance.
+ *
+ * @thread Any.
+ * @remarks The IOMMU lock may or may not be held.
+ */
+static void iommuAmdMsiInterruptRaise(PPDMDEVINS pDevIns)
+{
+ LogFlowFunc(("\n"));
+ if (iommuAmdIsMsiEnabled(pDevIns))
+ {
+ LogFunc(("Raising MSI\n"));
+ PDMDevHlpPCISetIrq(pDevIns, 0, PDM_IRQ_LEVEL_HIGH);
+ }
+}
+
+#if 0
+/**
+ * Clears the MSI interrupt for the IOMMU device.
+ *
+ * @param pDevIns The IOMMU device instance.
+ *
+ * @thread Any.
+ * @remarks The IOMMU lock may or may not be held.
+ */
+static void iommuAmdMsiInterruptClear(PPDMDEVINS pDevIns)
+{
+ if (iommuAmdIsMsiEnabled(pDevIns))
+ PDMDevHlpPCISetIrq(pDevIns, 0, PDM_IRQ_LEVEL_LOW);
+}
+#endif
+
+/**
+ * Writes an entry to the event log in memory.
+ *
+ * @returns VBox status code.
+ * @param pDevIns The IOMMU device instance.
+ * @param pEvent The event to log.
+ *
+ * @thread Any.
+ * @remarks The IOMMU lock must be held while calling this function.
+ */
+static int iommuAmdEvtLogEntryWrite(PPDMDEVINS pDevIns, PCEVT_GENERIC_T pEvent)
+{
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
+
+ IOMMU_LOCK(pDevIns, pThisCC);
+
+ /* Check if event logging is active and the log has not overflowed. */
+ IOMMU_STATUS_T const Status = pThis->Status;
+ if ( Status.n.u1EvtLogRunning
+ && !Status.n.u1EvtOverflow)
+ {
+ uint32_t const cbEvt = sizeof(*pEvent);
+
+ /* Get the offset we need to write the event to in memory (circular buffer offset). */
+ uint32_t const offEvt = pThis->EvtLogTailPtr.n.off;
+ Assert(!(offEvt & ~IOMMU_EVT_LOG_TAIL_PTR_VALID_MASK));
+
+ /* Ensure we have space in the event log. */
+ uint32_t const cMaxEvts = iommuAmdGetBufMaxEntries(pThis->EvtLogBaseAddr.n.u4Len);
+ uint32_t const cEvts = iommuAmdGetEvtLogEntryCount(pThis);
+ if (cEvts + 1 < cMaxEvts)
+ {
+ /* Write the event log entry to memory. */
+ RTGCPHYS const GCPhysEvtLog = pThis->EvtLogBaseAddr.n.u40Base << X86_PAGE_4K_SHIFT;
+ RTGCPHYS const GCPhysEvtLogEntry = GCPhysEvtLog + offEvt;
+ int rc = PDMDevHlpPCIPhysWrite(pDevIns, GCPhysEvtLogEntry, pEvent, cbEvt);
+ if (RT_FAILURE(rc))
+ LogFunc(("Failed to write event log entry at %#RGp. rc=%Rrc\n", GCPhysEvtLogEntry, rc));
+
+ /* Increment the event log tail pointer. */
+ uint32_t const cbEvtLog = iommuAmdGetTotalBufLength(pThis->EvtLogBaseAddr.n.u4Len);
+ pThis->EvtLogTailPtr.n.off = (offEvt + cbEvt) % cbEvtLog;
+
+ /* Indicate that an event log entry was written. */
+ ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_EVT_LOG_INTR);
+
+ /* Check and signal an interrupt if software wants to receive one when an event log entry is written. */
+ if (pThis->Ctrl.n.u1EvtIntrEn)
+ iommuAmdMsiInterruptRaise(pDevIns);
+ }
+ else
+ {
+ /* Indicate that the event log has overflowed. */
+ ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_EVT_LOG_OVERFLOW);
+
+ /* Check and signal an interrupt if software wants to receive one when the event log has overflowed. */
+ if (pThis->Ctrl.n.u1EvtIntrEn)
+ iommuAmdMsiInterruptRaise(pDevIns);
+ }
+ }
+
+ IOMMU_UNLOCK(pDevIns, pThisCC);
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets an event in the hardware error registers.
+ *
+ * @param pDevIns The IOMMU device instance.
+ * @param pEvent The event.
+ *
+ * @thread Any.
+ */
+static void iommuAmdHwErrorSet(PPDMDEVINS pDevIns, PCEVT_GENERIC_T pEvent)
+{
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ if (pThis->ExtFeat.n.u1HwErrorSup)
+ {
+ if (pThis->HwEvtStatus.n.u1Valid)
+ pThis->HwEvtStatus.n.u1Overflow = 1;
+ pThis->HwEvtStatus.n.u1Valid = 1;
+ pThis->HwEvtHi.u64 = RT_MAKE_U64(pEvent->au32[0], pEvent->au32[1]);
+ pThis->HwEvtLo = RT_MAKE_U64(pEvent->au32[2], pEvent->au32[3]);
+ Assert( pThis->HwEvtHi.n.u4EvtCode == IOMMU_EVT_DEV_TAB_HW_ERROR
+ || pThis->HwEvtHi.n.u4EvtCode == IOMMU_EVT_PAGE_TAB_HW_ERROR
+ || pThis->HwEvtHi.n.u4EvtCode == IOMMU_EVT_COMMAND_HW_ERROR);
+ }
+}
+
+
+/**
+ * Initializes a PAGE_TAB_HARDWARE_ERROR event.
+ *
+ * @param idDevice The device ID (bus, device, function).
+ * @param idDomain The domain ID.
+ * @param GCPhysPtEntity The system physical address of the page table
+ * entity.
+ * @param enmOp The IOMMU operation being performed.
+ * @param pEvtPageTabHwErr Where to store the initialized event.
+ */
+static void iommuAmdPageTabHwErrorEventInit(uint16_t idDevice, uint16_t idDomain, RTGCPHYS GCPhysPtEntity, IOMMUOP enmOp,
+ PEVT_PAGE_TAB_HW_ERR_T pEvtPageTabHwErr)
+{
+ memset(pEvtPageTabHwErr, 0, sizeof(*pEvtPageTabHwErr));
+ pEvtPageTabHwErr->n.u16DevId = idDevice;
+ pEvtPageTabHwErr->n.u16DomainOrPasidLo = idDomain;
+ pEvtPageTabHwErr->n.u1GuestOrNested = 0;
+ pEvtPageTabHwErr->n.u1Interrupt = RT_BOOL(enmOp == IOMMUOP_INTR_REQ);
+ pEvtPageTabHwErr->n.u1ReadWrite = RT_BOOL(enmOp == IOMMUOP_MEM_WRITE);
+ pEvtPageTabHwErr->n.u1Translation = RT_BOOL(enmOp == IOMMUOP_TRANSLATE_REQ);
+ pEvtPageTabHwErr->n.u2Type = enmOp == IOMMUOP_CMD ? HWEVTTYPE_DATA_ERROR : HWEVTTYPE_TARGET_ABORT;
+ pEvtPageTabHwErr->n.u4EvtCode = IOMMU_EVT_PAGE_TAB_HW_ERROR;
+ pEvtPageTabHwErr->n.u64Addr = GCPhysPtEntity;
+}
+
+
+/**
+ * Raises a PAGE_TAB_HARDWARE_ERROR event.
+ *
+ * @param pDevIns The IOMMU device instance.
+ * @param enmOp The IOMMU operation being performed.
+ * @param pEvtPageTabHwErr The page table hardware error event.
+ *
+ * @thread Any.
+ */
+static void iommuAmdPageTabHwErrorEventRaise(PPDMDEVINS pDevIns, IOMMUOP enmOp, PEVT_PAGE_TAB_HW_ERR_T pEvtPageTabHwErr)
+{
+ AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_PAGE_TAB_HW_ERR_T));
+ PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtPageTabHwErr;
+
+ PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
+ IOMMU_LOCK(pDevIns, pThisCC);
+
+ iommuAmdHwErrorSet(pDevIns, (PCEVT_GENERIC_T)pEvent);
+ iommuAmdEvtLogEntryWrite(pDevIns, (PCEVT_GENERIC_T)pEvent);
+ if (enmOp != IOMMUOP_CMD)
+ iommuAmdSetPciTargetAbort(pDevIns);
+
+ IOMMU_UNLOCK(pDevIns, pThisCC);
+
+ LogFunc(("Raised PAGE_TAB_HARDWARE_ERROR. idDevice=%#x idDomain=%#x GCPhysPtEntity=%#RGp enmOp=%u u2Type=%u\n",
+ pEvtPageTabHwErr->n.u16DevId, pEvtPageTabHwErr->n.u16DomainOrPasidLo, pEvtPageTabHwErr->n.u64Addr, enmOp,
+ pEvtPageTabHwErr->n.u2Type));
+}
+
+
+#ifdef IN_RING3
+/**
+ * Initializes a COMMAND_HARDWARE_ERROR event.
+ *
+ * @param GCPhysAddr The system physical address the IOMMU attempted to access.
+ * @param pEvtCmdHwErr Where to store the initialized event.
+ */
+static void iommuAmdCmdHwErrorEventInit(RTGCPHYS GCPhysAddr, PEVT_CMD_HW_ERR_T pEvtCmdHwErr)
+{
+ memset(pEvtCmdHwErr, 0, sizeof(*pEvtCmdHwErr));
+ pEvtCmdHwErr->n.u2Type = HWEVTTYPE_DATA_ERROR;
+ pEvtCmdHwErr->n.u4EvtCode = IOMMU_EVT_COMMAND_HW_ERROR;
+ pEvtCmdHwErr->n.u64Addr = GCPhysAddr;
+}
+
+
+/**
+ * Raises a COMMAND_HARDWARE_ERROR event.
+ *
+ * @param pDevIns The IOMMU device instance.
+ * @param pEvtCmdHwErr The command hardware error event.
+ *
+ * @thread Any.
+ */
+static void iommuAmdCmdHwErrorEventRaise(PPDMDEVINS pDevIns, PCEVT_CMD_HW_ERR_T pEvtCmdHwErr)
+{
+ AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_CMD_HW_ERR_T));
+ PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtCmdHwErr;
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+
+ PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
+ IOMMU_LOCK(pDevIns, pThisCC);
+
+ iommuAmdHwErrorSet(pDevIns, (PCEVT_GENERIC_T)pEvent);
+ iommuAmdEvtLogEntryWrite(pDevIns, (PCEVT_GENERIC_T)pEvent);
+ ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_CMD_BUF_RUNNING);
+
+ IOMMU_UNLOCK(pDevIns, pThisCC);
+
+ LogFunc(("Raised COMMAND_HARDWARE_ERROR. GCPhysCmd=%#RGp u2Type=%u\n", pEvtCmdHwErr->n.u64Addr, pEvtCmdHwErr->n.u2Type));
+}
+#endif /* IN_RING3 */
+
+
+/**
+ * Initializes a DEV_TAB_HARDWARE_ERROR event.
+ *
+ * @param idDevice The device ID (bus, device, function).
+ * @param GCPhysDte The system physical address of the failed device table
+ * access.
+ * @param enmOp The IOMMU operation being performed.
+ * @param pEvtDevTabHwErr Where to store the initialized event.
+ */
+static void iommuAmdDevTabHwErrorEventInit(uint16_t idDevice, RTGCPHYS GCPhysDte, IOMMUOP enmOp,
+ PEVT_DEV_TAB_HW_ERROR_T pEvtDevTabHwErr)
+{
+ memset(pEvtDevTabHwErr, 0, sizeof(*pEvtDevTabHwErr));
+ pEvtDevTabHwErr->n.u16DevId = idDevice;
+ pEvtDevTabHwErr->n.u1Intr = RT_BOOL(enmOp == IOMMUOP_INTR_REQ);
+ /** @todo IOMMU: Any other transaction type that can set read/write bit? */
+ pEvtDevTabHwErr->n.u1ReadWrite = RT_BOOL(enmOp == IOMMUOP_MEM_WRITE);
+ pEvtDevTabHwErr->n.u1Translation = RT_BOOL(enmOp == IOMMUOP_TRANSLATE_REQ);
+ pEvtDevTabHwErr->n.u2Type = enmOp == IOMMUOP_CMD ? HWEVTTYPE_DATA_ERROR : HWEVTTYPE_TARGET_ABORT;
+ pEvtDevTabHwErr->n.u4EvtCode = IOMMU_EVT_DEV_TAB_HW_ERROR;
+ pEvtDevTabHwErr->n.u64Addr = GCPhysDte;
+}
+
+
+/**
+ * Raises a DEV_TAB_HARDWARE_ERROR event.
+ *
+ * @param pDevIns The IOMMU device instance.
+ * @param enmOp The IOMMU operation being performed.
+ * @param pEvtDevTabHwErr The device table hardware error event.
+ *
+ * @thread Any.
+ */
+static void iommuAmdDevTabHwErrorEventRaise(PPDMDEVINS pDevIns, IOMMUOP enmOp, PEVT_DEV_TAB_HW_ERROR_T pEvtDevTabHwErr)
+{
+ AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_DEV_TAB_HW_ERROR_T));
+ PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtDevTabHwErr;
+
+ PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
+ IOMMU_LOCK(pDevIns, pThisCC);
+
+ iommuAmdHwErrorSet(pDevIns, (PCEVT_GENERIC_T)pEvent);
+ iommuAmdEvtLogEntryWrite(pDevIns, (PCEVT_GENERIC_T)pEvent);
+ if (enmOp != IOMMUOP_CMD)
+ iommuAmdSetPciTargetAbort(pDevIns);
+
+ IOMMU_UNLOCK(pDevIns, pThisCC);
+
+ LogFunc(("Raised DEV_TAB_HARDWARE_ERROR. idDevice=%#x GCPhysDte=%#RGp enmOp=%u u2Type=%u\n", pEvtDevTabHwErr->n.u16DevId,
+ pEvtDevTabHwErr->n.u64Addr, enmOp, pEvtDevTabHwErr->n.u2Type));
+}
+
+
+#ifdef IN_RING3
+/**
+ * Initializes an ILLEGAL_COMMAND_ERROR event.
+ *
+ * @param GCPhysCmd The system physical address of the failed command
+ * access.
+ * @param pEvtIllegalCmd Where to store the initialized event.
+ */
+static void iommuAmdIllegalCmdEventInit(RTGCPHYS GCPhysCmd, PEVT_ILLEGAL_CMD_ERR_T pEvtIllegalCmd)
+{
+ Assert(!(GCPhysCmd & UINT64_C(0xf)));
+ memset(pEvtIllegalCmd, 0, sizeof(*pEvtIllegalCmd));
+ pEvtIllegalCmd->n.u4EvtCode = IOMMU_EVT_ILLEGAL_CMD_ERROR;
+ pEvtIllegalCmd->n.u64Addr = GCPhysCmd;
+}
+
+
+/**
+ * Raises an ILLEGAL_COMMAND_ERROR event.
+ *
+ * @param pDevIns The IOMMU device instance.
+ * @param pEvtIllegalCmd The illegal command error event.
+ */
+static void iommuAmdIllegalCmdEventRaise(PPDMDEVINS pDevIns, PCEVT_ILLEGAL_CMD_ERR_T pEvtIllegalCmd)
+{
+ AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_ILLEGAL_DTE_T));
+ PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtIllegalCmd;
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+
+ iommuAmdEvtLogEntryWrite(pDevIns, pEvent);
+ ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_CMD_BUF_RUNNING);
+
+ LogFunc(("Raised ILLEGAL_COMMAND_ERROR. Addr=%#RGp\n", pEvtIllegalCmd->n.u64Addr));
+}
+#endif /* IN_RING3 */
+
+
+/**
+ * Initializes an ILLEGAL_DEV_TABLE_ENTRY event.
+ *
+ * @param idDevice The device ID (bus, device, function).
+ * @param uIova The I/O virtual address.
+ * @param fRsvdNotZero Whether reserved bits are not zero. Pass @c false if the
+ * event was caused by an invalid level encoding in the
+ * DTE.
+ * @param enmOp The IOMMU operation being performed.
+ * @param pEvtIllegalDte Where to store the initialized event.
+ */
+static void iommuAmdIllegalDteEventInit(uint16_t idDevice, uint64_t uIova, bool fRsvdNotZero, IOMMUOP enmOp,
+ PEVT_ILLEGAL_DTE_T pEvtIllegalDte)
+{
+ memset(pEvtIllegalDte, 0, sizeof(*pEvtIllegalDte));
+ pEvtIllegalDte->n.u16DevId = idDevice;
+ pEvtIllegalDte->n.u1Interrupt = RT_BOOL(enmOp == IOMMUOP_INTR_REQ);
+ pEvtIllegalDte->n.u1ReadWrite = RT_BOOL(enmOp == IOMMUOP_MEM_WRITE);
+ pEvtIllegalDte->n.u1RsvdNotZero = fRsvdNotZero;
+ pEvtIllegalDte->n.u1Translation = RT_BOOL(enmOp == IOMMUOP_TRANSLATE_REQ);
+ pEvtIllegalDte->n.u4EvtCode = IOMMU_EVT_ILLEGAL_DEV_TAB_ENTRY;
+ pEvtIllegalDte->n.u64Addr = uIova & ~UINT64_C(0x3);
+ /** @todo r=ramshankar: Not sure why the last 2 bits are marked as reserved by the
+ * IOMMU spec here but not for this field for I/O page fault event. */
+ Assert(!(uIova & UINT64_C(0x3)));
+}
+
+
+/**
+ * Raises an ILLEGAL_DEV_TABLE_ENTRY event.
+ *
+ * @param pDevIns The IOMMU instance data.
+ * @param enmOp The IOMMU operation being performed.
+ * @param pEvtIllegalDte The illegal device table entry event.
+ * @param enmEvtType The illegal device table entry event type.
+ *
+ * @thread Any.
+ */
+static void iommuAmdIllegalDteEventRaise(PPDMDEVINS pDevIns, IOMMUOP enmOp, PCEVT_ILLEGAL_DTE_T pEvtIllegalDte,
+ EVT_ILLEGAL_DTE_TYPE_T enmEvtType)
+{
+ AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_ILLEGAL_DTE_T));
+ PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtIllegalDte;
+
+ iommuAmdEvtLogEntryWrite(pDevIns, pEvent);
+ if (enmOp != IOMMUOP_CMD)
+ iommuAmdSetPciTargetAbort(pDevIns);
+
+ LogFunc(("Raised ILLEGAL_DTE_EVENT. idDevice=%#x uIova=%#RX64 enmOp=%u enmEvtType=%u\n", pEvtIllegalDte->n.u16DevId,
+ pEvtIllegalDte->n.u64Addr, enmOp, enmEvtType));
+ NOREF(enmEvtType);
+}
+
+
+/**
+ * Initializes an IO_PAGE_FAULT event.
+ *
+ * @param idDevice The device ID (bus, device, function).
+ * @param idDomain The domain ID.
+ * @param uIova The I/O virtual address being accessed.
+ * @param fPresent Transaction to a page marked as present (including
+ * DTE.V=1) or interrupt marked as remapped
+ * (IRTE.RemapEn=1).
+ * @param fRsvdNotZero Whether reserved bits are not zero. Pass @c false if
+ * the I/O page fault was caused by invalid level
+ * encoding.
+ * @param fPermDenied Permission denied for the address being accessed.
+ * @param enmOp The IOMMU operation being performed.
+ * @param pEvtIoPageFault Where to store the initialized event.
+ */
+static void iommuAmdIoPageFaultEventInit(uint16_t idDevice, uint16_t idDomain, uint64_t uIova, bool fPresent, bool fRsvdNotZero,
+ bool fPermDenied, IOMMUOP enmOp, PEVT_IO_PAGE_FAULT_T pEvtIoPageFault)
+{
+ Assert(!fPermDenied || fPresent);
+ memset(pEvtIoPageFault, 0, sizeof(*pEvtIoPageFault));
+ pEvtIoPageFault->n.u16DevId = idDevice;
+ //pEvtIoPageFault->n.u4PasidHi = 0;
+ pEvtIoPageFault->n.u16DomainOrPasidLo = idDomain;
+ //pEvtIoPageFault->n.u1GuestOrNested = 0;
+ //pEvtIoPageFault->n.u1NoExecute = 0;
+ //pEvtIoPageFault->n.u1User = 0;
+ pEvtIoPageFault->n.u1Interrupt = RT_BOOL(enmOp == IOMMUOP_INTR_REQ);
+ pEvtIoPageFault->n.u1Present = fPresent;
+ pEvtIoPageFault->n.u1ReadWrite = RT_BOOL(enmOp == IOMMUOP_MEM_WRITE);
+ pEvtIoPageFault->n.u1PermDenied = fPermDenied;
+ pEvtIoPageFault->n.u1RsvdNotZero = fRsvdNotZero;
+ pEvtIoPageFault->n.u1Translation = RT_BOOL(enmOp == IOMMUOP_TRANSLATE_REQ);
+ pEvtIoPageFault->n.u4EvtCode = IOMMU_EVT_IO_PAGE_FAULT;
+ pEvtIoPageFault->n.u64Addr = uIova;
+}
+
+
+/**
+ * Raises an IO_PAGE_FAULT event.
+ *
+ * @param pDevIns The IOMMU instance data.
+ * @param fIoDevFlags The I/O device flags, see IOMMU_DTE_CACHE_F_XXX.
+ * @param pIrte The interrupt remapping table entry, can be NULL.
+ * @param enmOp The IOMMU operation being performed.
+ * @param pEvtIoPageFault The I/O page fault event.
+ * @param enmEvtType The I/O page fault event type.
+ *
+ * @thread Any.
+ */
+static void iommuAmdIoPageFaultEventRaise(PPDMDEVINS pDevIns, uint16_t fIoDevFlags, PCIRTE_T pIrte, IOMMUOP enmOp,
+ PCEVT_IO_PAGE_FAULT_T pEvtIoPageFault, EVT_IO_PAGE_FAULT_TYPE_T enmEvtType)
+{
+ AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_IO_PAGE_FAULT_T));
+ PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtIoPageFault;
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ STAM_COUNTER_INC(&pThis->StatIopfs); NOREF(pThis);
+
+#ifdef IOMMU_WITH_DTE_CACHE
+# define IOMMU_DTE_CACHE_SET_PF_RAISED(a_pDevIns, a_DevId) iommuAmdDteCacheUpdateFlags((a_pDevIns), (a_DevId), \
+ IOMMU_DTE_CACHE_F_IO_PAGE_FAULT_RAISED, \
+ 0 /* fAndMask */)
+#else
+# define IOMMU_DTE_CACHE_SET_PF_RAISED(a_pDevIns, a_DevId) do { } while (0)
+#endif
+
+ bool fSuppressEvtLogging = false;
+ if ( enmOp == IOMMUOP_MEM_READ
+ || enmOp == IOMMUOP_MEM_WRITE)
+ {
+ uint16_t const fSuppressIopf = IOMMU_DTE_CACHE_F_VALID
+ | IOMMU_DTE_CACHE_F_SUPPRESS_IOPF | IOMMU_DTE_CACHE_F_IO_PAGE_FAULT_RAISED;
+ uint16_t const fSuppressAllIopf = IOMMU_DTE_CACHE_F_VALID | IOMMU_DTE_CACHE_F_SUPPRESS_ALL_IOPF;
+ if ( (fIoDevFlags & fSuppressAllIopf) == fSuppressAllIopf
+ || (fIoDevFlags & fSuppressIopf) == fSuppressIopf)
+ {
+ fSuppressEvtLogging = true;
+ }
+ }
+ else if (enmOp == IOMMUOP_INTR_REQ)
+ {
+ uint16_t const fSuppressIopf = IOMMU_DTE_CACHE_F_INTR_MAP_VALID | IOMMU_DTE_CACHE_F_IGNORE_UNMAPPED_INTR;
+ if ((fIoDevFlags & fSuppressIopf) == fSuppressIopf)
+ fSuppressEvtLogging = true;
+ else if (pIrte) /** @todo Make this compulsary and assert if it isn't provided. */
+ fSuppressEvtLogging = pIrte->n.u1SuppressIoPf;
+ }
+ /* else: Events are never suppressed for commands. */
+
+ switch (enmEvtType)
+ {
+ case kIoPageFaultType_PermDenied:
+ {
+ /* Cannot be triggered by a command. */
+ Assert(enmOp != IOMMUOP_CMD);
+ RT_FALL_THRU();
+ }
+ case kIoPageFaultType_DteRsvdPagingMode:
+ case kIoPageFaultType_PteInvalidPageSize:
+ case kIoPageFaultType_PteInvalidLvlEncoding:
+ case kIoPageFaultType_SkippedLevelIovaNotZero:
+ case kIoPageFaultType_PteRsvdNotZero:
+ case kIoPageFaultType_PteValidNotSet:
+ case kIoPageFaultType_DteTranslationDisabled:
+ case kIoPageFaultType_PasidInvalidRange:
+ {
+ /*
+ * For a translation request, the IOMMU doesn't signal an I/O page fault nor does it
+ * create an event log entry. See AMD IOMMU spec. 2.1.3.2 "I/O Page Faults".
+ */
+ if (enmOp != IOMMUOP_TRANSLATE_REQ)
+ {
+ if (!fSuppressEvtLogging)
+ {
+ iommuAmdEvtLogEntryWrite(pDevIns, pEvent);
+ IOMMU_DTE_CACHE_SET_PF_RAISED(pDevIns, pEvtIoPageFault->n.u16DevId);
+ }
+ if (enmOp != IOMMUOP_CMD)
+ iommuAmdSetPciTargetAbort(pDevIns);
+ }
+ break;
+ }
+
+ case kIoPageFaultType_UserSupervisor:
+ {
+ /* Access is blocked and only creates an event log entry. */
+ if (!fSuppressEvtLogging)
+ {
+ iommuAmdEvtLogEntryWrite(pDevIns, pEvent);
+ IOMMU_DTE_CACHE_SET_PF_RAISED(pDevIns, pEvtIoPageFault->n.u16DevId);
+ }
+ break;
+ }
+
+ case kIoPageFaultType_IrteAddrInvalid:
+ case kIoPageFaultType_IrteRsvdNotZero:
+ case kIoPageFaultType_IrteRemapEn:
+ case kIoPageFaultType_IrteRsvdIntType:
+ case kIoPageFaultType_IntrReqAborted:
+ case kIoPageFaultType_IntrWithPasid:
+ {
+ /* Only trigerred by interrupt requests. */
+ Assert(enmOp == IOMMUOP_INTR_REQ);
+ if (!fSuppressEvtLogging)
+ {
+ iommuAmdEvtLogEntryWrite(pDevIns, pEvent);
+ IOMMU_DTE_CACHE_SET_PF_RAISED(pDevIns, pEvtIoPageFault->n.u16DevId);
+ }
+ iommuAmdSetPciTargetAbort(pDevIns);
+ break;
+ }
+
+ case kIoPageFaultType_SmiFilterMismatch:
+ {
+ /* Not supported and probably will never be, assert. */
+ AssertMsgFailed(("kIoPageFaultType_SmiFilterMismatch - Upstream SMI requests not supported/implemented."));
+ break;
+ }
+
+ case kIoPageFaultType_DevId_Invalid:
+ {
+ /* Cannot be triggered by a command. */
+ Assert(enmOp != IOMMUOP_CMD);
+ Assert(enmOp != IOMMUOP_TRANSLATE_REQ); /** @todo IOMMU: We don't support translation requests yet. */
+ if (!fSuppressEvtLogging)
+ {
+ iommuAmdEvtLogEntryWrite(pDevIns, pEvent);
+ IOMMU_DTE_CACHE_SET_PF_RAISED(pDevIns, pEvtIoPageFault->n.u16DevId);
+ }
+ if ( enmOp == IOMMUOP_MEM_READ
+ || enmOp == IOMMUOP_MEM_WRITE)
+ iommuAmdSetPciTargetAbort(pDevIns);
+ break;
+ }
+ }
+
+#undef IOMMU_DTE_CACHE_SET_PF_RAISED
+}
+
+
+/**
+ * Raises an IO_PAGE_FAULT event given the DTE.
+ *
+ * @param pDevIns The IOMMU instance data.
+ * @param pDte The device table entry.
+ * @param pIrte The interrupt remapping table entry, can be NULL.
+ * @param enmOp The IOMMU operation being performed.
+ * @param pEvtIoPageFault The I/O page fault event.
+ * @param enmEvtType The I/O page fault event type.
+ *
+ * @thread Any.
+ */
+static void iommuAmdIoPageFaultEventRaiseWithDte(PPDMDEVINS pDevIns, PCDTE_T pDte, PCIRTE_T pIrte, IOMMUOP enmOp,
+ PCEVT_IO_PAGE_FAULT_T pEvtIoPageFault, EVT_IO_PAGE_FAULT_TYPE_T enmEvtType)
+{
+ Assert(pDte);
+ uint16_t const fIoDevFlags = iommuAmdGetBasicDevFlags(pDte);
+ return iommuAmdIoPageFaultEventRaise(pDevIns, fIoDevFlags, pIrte, enmOp, pEvtIoPageFault, enmEvtType);
+}
+
+
+/**
+ * Reads a device table entry for the given the device ID.
+ *
+ * @returns VBox status code.
+ * @param pDevIns The IOMMU device instance.
+ * @param idDevice The device ID (bus, device, function).
+ * @param enmOp The IOMMU operation being performed.
+ * @param pDte Where to store the device table entry.
+ *
+ * @thread Any.
+ */
+static int iommuAmdDteRead(PPDMDEVINS pDevIns, uint16_t idDevice, IOMMUOP enmOp, PDTE_T pDte)
+{
+ PCIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
+
+ IOMMU_LOCK(pDevIns, pThisCC);
+
+ /* Figure out which device table segment is being accessed. */
+ uint8_t const idxSegsEn = pThis->Ctrl.n.u3DevTabSegEn;
+ Assert(idxSegsEn < RT_ELEMENTS(g_auDevTabSegShifts));
+
+ uint8_t const idxSeg = (idDevice & g_auDevTabSegMasks[idxSegsEn]) >> g_auDevTabSegShifts[idxSegsEn];
+ Assert(idxSeg < RT_ELEMENTS(pThis->aDevTabBaseAddrs));
+ AssertCompile(RT_ELEMENTS(g_auDevTabSegShifts) == RT_ELEMENTS(g_auDevTabSegMasks));
+
+ RTGCPHYS const GCPhysDevTab = pThis->aDevTabBaseAddrs[idxSeg].n.u40Base << X86_PAGE_4K_SHIFT;
+ uint32_t const offDte = (idDevice & ~g_auDevTabSegMasks[idxSegsEn]) * sizeof(DTE_T);
+ RTGCPHYS const GCPhysDte = GCPhysDevTab + offDte;
+
+ /* Ensure the DTE falls completely within the device table segment. */
+ uint32_t const cbDevTabSeg = (pThis->aDevTabBaseAddrs[idxSeg].n.u9Size + 1) << X86_PAGE_4K_SHIFT;
+
+ IOMMU_UNLOCK(pDevIns, pThisCC);
+
+ if (offDte + sizeof(DTE_T) <= cbDevTabSeg)
+ {
+ /* Read the device table entry from guest memory. */
+ Assert(!(GCPhysDevTab & X86_PAGE_4K_OFFSET_MASK));
+ int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysDte, pDte, sizeof(*pDte));
+ if (RT_SUCCESS(rc))
+ return VINF_SUCCESS;
+
+ /* Raise a device table hardware error. */
+ LogFunc(("Failed to read device table entry at %#RGp. rc=%Rrc -> DevTabHwError\n", GCPhysDte, rc));
+
+ EVT_DEV_TAB_HW_ERROR_T EvtDevTabHwErr;
+ iommuAmdDevTabHwErrorEventInit(idDevice, GCPhysDte, enmOp, &EvtDevTabHwErr);
+ iommuAmdDevTabHwErrorEventRaise(pDevIns, enmOp, &EvtDevTabHwErr);
+ return VERR_IOMMU_DTE_READ_FAILED;
+ }
+
+ /* Raise an I/O page fault for out-of-bounds acccess. */
+ LogFunc(("Out-of-bounds device table entry. idDevice=%#x offDte=%u cbDevTabSeg=%u -> IOPF\n", idDevice, offDte, cbDevTabSeg));
+ EVT_IO_PAGE_FAULT_T EvtIoPageFault;
+ iommuAmdIoPageFaultEventInit(idDevice, 0 /* idDomain */, 0 /* uIova */, false /* fPresent */, false /* fRsvdNotZero */,
+ false /* fPermDenied */, enmOp, &EvtIoPageFault);
+ iommuAmdIoPageFaultEventRaise(pDevIns, 0 /* fIoDevFlags */, NULL /* pIrte */, enmOp, &EvtIoPageFault,
+ kIoPageFaultType_DevId_Invalid);
+ return VERR_IOMMU_DTE_BAD_OFFSET;
+}
+
+
+/**
+ * Performs pre-translation checks for the given device table entry.
+ *
+ * @returns VBox status code.
+ * @retval VINF_SUCCESS if the DTE is valid and supports address translation.
+ * @retval VINF_IOMMU_ADDR_TRANSLATION_DISABLED if the DTE is valid but address
+ * translation is disabled.
+ * @retval VERR_IOMMU_ADDR_TRANSLATION_FAILED if an error occurred and any
+ * corresponding event was raised.
+ * @retval VERR_IOMMU_ADDR_ACCESS_DENIED if the DTE denies the requested
+ * permissions.
+ *
+ * @param pDevIns The IOMMU device instance.
+ * @param uIova The I/O virtual address to translate.
+ * @param idDevice The device ID (bus, device, function).
+ * @param fPerm The I/O permissions for this access, see
+ * IOMMU_IO_PERM_XXX.
+ * @param pDte The device table entry.
+ * @param enmOp The IOMMU operation being performed.
+ *
+ * @thread Any.
+ */
+static int iommuAmdPreTranslateChecks(PPDMDEVINS pDevIns, uint16_t idDevice, uint64_t uIova, uint8_t fPerm, PCDTE_T pDte,
+ IOMMUOP enmOp)
+{
+ /*
+ * Check if the translation is valid, otherwise raise an I/O page fault.
+ */
+ if (pDte->n.u1TranslationValid)
+ { /* likely */ }
+ else
+ {
+ /** @todo r=ramshankar: The AMD IOMMU spec. says page walk is terminated but
+ * doesn't explicitly say whether an I/O page fault is raised. From other
+ * places in the spec. it seems early page walk terminations (starting with
+ * the DTE) return the state computed so far and raises an I/O page fault. So
+ * returning an invalid translation rather than skipping translation. */
+ LogFunc(("Translation valid bit not set -> IOPF\n"));
+ EVT_IO_PAGE_FAULT_T EvtIoPageFault;
+ iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, false /* fPresent */, false /* fRsvdNotZero */,
+ false /* fPermDenied */, enmOp, &EvtIoPageFault);
+ iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
+ kIoPageFaultType_DteTranslationDisabled);
+ return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
+ }
+
+ /*
+ * Check permissions bits in the DTE.
+ * Note: This MUST be checked prior to checking the root page table level below!
+ */
+ uint8_t const fDtePerm = (pDte->au64[0] >> IOMMU_IO_PERM_SHIFT) & IOMMU_IO_PERM_MASK;
+ if ((fPerm & fDtePerm) == fPerm)
+ { /* likely */ }
+ else
+ {
+ LogFunc(("Permission denied by DTE (fPerm=%#x fDtePerm=%#x) -> IOPF\n", fPerm, fDtePerm));
+ EVT_IO_PAGE_FAULT_T EvtIoPageFault;
+ iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */,
+ true /* fPermDenied */, enmOp, &EvtIoPageFault);
+ iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
+ kIoPageFaultType_PermDenied);
+ return VERR_IOMMU_ADDR_ACCESS_DENIED;
+ }
+
+ /*
+ * If the root page table level is 0, translation is disabled and GPA=SPA and
+ * the DTE.IR and DTE.IW bits control permissions (verified above).
+ */
+ uint8_t const uMaxLevel = pDte->n.u3Mode;
+ if (uMaxLevel != 0)
+ { /* likely */ }
+ else
+ {
+ Assert((fPerm & fDtePerm) == fPerm); /* Verify we've checked permissions. */
+ return VINF_IOMMU_ADDR_TRANSLATION_DISABLED;
+ }
+
+ /*
+ * If the root page table level exceeds the allowed host-address translation level,
+ * page walk is terminated and translation fails.
+ */
+ if (uMaxLevel <= IOMMU_MAX_HOST_PT_LEVEL)
+ { /* likely */ }
+ else
+ {
+ /** @todo r=ramshankar: I cannot make out from the AMD IOMMU spec. if I should be
+ * raising an ILLEGAL_DEV_TABLE_ENTRY event or an IO_PAGE_FAULT event here.
+ * I'm just going with I/O page fault. */
+ LogFunc(("Invalid root page table level %#x (idDevice=%#x) -> IOPF\n", uMaxLevel, idDevice));
+ EVT_IO_PAGE_FAULT_T EvtIoPageFault;
+ iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */,
+ false /* fPermDenied */, enmOp, &EvtIoPageFault);
+ iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
+ kIoPageFaultType_PteInvalidLvlEncoding);
+ return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
+ }
+
+ /* The DTE allows translations for this device. */
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Walks the I/O page table to translate the I/O virtual address to a system
+ * physical address.
+ *
+ * @returns VBox status code.
+ * @param pDevIns The IOMMU device instance.
+ * @param uIova The I/O virtual address to translate. Must be 4K aligned.
+ * @param fPerm The I/O permissions for this access, see
+ * IOMMU_IO_PERM_XXX.
+ * @param idDevice The device ID (bus, device, function).
+ * @param pDte The device table entry.
+ * @param enmOp The IOMMU operation being performed.
+ * @param pPageLookup Where to store the results of the I/O page lookup. This
+ * is only updated when VINF_SUCCESS is returned.
+ *
+ * @thread Any.
+ */
+static int iommuAmdIoPageTableWalk(PPDMDEVINS pDevIns, uint64_t uIova, uint8_t fPerm, uint16_t idDevice, PCDTE_T pDte,
+ IOMMUOP enmOp, PIOPAGELOOKUP pPageLookup)
+{
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ Assert(pDte->n.u1Valid);
+ Assert(!(uIova & X86_PAGE_4K_OFFSET_MASK));
+
+ /* The virtual address bits indexing table. */
+ static uint8_t const s_acIovaLevelShifts[] = { 0, 12, 21, 30, 39, 48, 57, 0 };
+ AssertCompile(RT_ELEMENTS(s_acIovaLevelShifts) > IOMMU_MAX_HOST_PT_LEVEL);
+
+ /*
+ * Traverse the I/O page table starting with the page directory in the DTE.
+ *
+ * The Valid (Present bit), Translation Valid and Mode (Next-Level bits) in
+ * the DTE have been validated already, see iommuAmdPreTranslateChecks.
+ */
+ IOPTENTITY_T PtEntity;
+ PtEntity.u64 = pDte->au64[0];
+ for (;;)
+ {
+ uint8_t const uLevel = PtEntity.n.u3NextLevel;
+
+ /* Read the page table entity at the current level. */
+ {
+ Assert(uLevel > 0 && uLevel < RT_ELEMENTS(s_acIovaLevelShifts));
+ Assert(uLevel <= IOMMU_MAX_HOST_PT_LEVEL);
+ uint16_t const idxPte = (uIova >> s_acIovaLevelShifts[uLevel]) & UINT64_C(0x1ff);
+ uint64_t const offPte = idxPte << 3;
+ RTGCPHYS const GCPhysPtEntity = (PtEntity.u64 & IOMMU_PTENTITY_ADDR_MASK) + offPte;
+ int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysPtEntity, &PtEntity.u64, sizeof(PtEntity));
+ if (RT_FAILURE(rc))
+ {
+ LogFunc(("Failed to read page table entry at %#RGp. rc=%Rrc -> PageTabHwError\n", GCPhysPtEntity, rc));
+ EVT_PAGE_TAB_HW_ERR_T EvtPageTabHwErr;
+ iommuAmdPageTabHwErrorEventInit(idDevice, pDte->n.u16DomainId, GCPhysPtEntity, enmOp, &EvtPageTabHwErr);
+ iommuAmdPageTabHwErrorEventRaise(pDevIns, enmOp, &EvtPageTabHwErr);
+ return VERR_IOMMU_IPE_2;
+ }
+ }
+
+ /* Check present bit. */
+ if (PtEntity.n.u1Present)
+ { /* likely */ }
+ else
+ {
+ LogFunc(("Page table entry not present. idDevice=%#x uIova=%#RX64 -> IOPF\n", idDevice, uIova));
+ EVT_IO_PAGE_FAULT_T EvtIoPageFault;
+ iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, false /* fPresent */, false /* fRsvdNotZero */,
+ false /* fPermDenied */, enmOp, &EvtIoPageFault);
+ iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
+ kIoPageFaultType_PermDenied);
+ return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
+ }
+
+ /* Validate the encoding of the next level. */
+ uint8_t const uNextLevel = PtEntity.n.u3NextLevel;
+#if IOMMU_MAX_HOST_PT_LEVEL < 6
+ if (uNextLevel <= IOMMU_MAX_HOST_PT_LEVEL)
+ { /* likely */ }
+ else
+ {
+ LogFunc(("Next-level/paging-mode field of the paging entity invalid. uNextLevel=%#x -> IOPF\n", uNextLevel));
+ EVT_IO_PAGE_FAULT_T EvtIoPageFault;
+ iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, true /* fRsvdNotZero */,
+ false /* fPermDenied */, enmOp, &EvtIoPageFault);
+ iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
+ kIoPageFaultType_PteInvalidLvlEncoding);
+ return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
+ }
+#endif
+
+ /* Check reserved bits. */
+ uint64_t const fRsvdMask = uNextLevel == 0 || uNextLevel == 7 ? IOMMU_PTE_RSVD_MASK : IOMMU_PDE_RSVD_MASK;
+ if (!(PtEntity.u64 & fRsvdMask))
+ { /* likely */ }
+ else
+ {
+ LogFunc(("Page table entity (%#RX64 level=%u) reserved bits set -> IOPF\n", PtEntity.u64, uNextLevel));
+ EVT_IO_PAGE_FAULT_T EvtIoPageFault;
+ iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, true /* fRsvdNotZero */,
+ false /* fPermDenied */, enmOp, &EvtIoPageFault);
+ iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
+ kIoPageFaultType_PteRsvdNotZero);
+ return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
+ }
+
+ /* Check permission bits. */
+ uint8_t const fPtePerm = (PtEntity.u64 >> IOMMU_IO_PERM_SHIFT) & IOMMU_IO_PERM_MASK;
+ if ((fPerm & fPtePerm) == fPerm)
+ { /* likely */ }
+ else
+ {
+ LogFunc(("Page table entry access denied. idDevice=%#x fPerm=%#x fPtePerm=%#x -> IOPF\n", idDevice, fPerm, fPtePerm));
+ EVT_IO_PAGE_FAULT_T EvtIoPageFault;
+ iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */,
+ true /* fPermDenied */, enmOp, &EvtIoPageFault);
+ iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
+ kIoPageFaultType_PermDenied);
+ return VERR_IOMMU_ADDR_ACCESS_DENIED;
+ }
+
+ /* If the next level is 0 or 7, this is the final level PTE. */
+ if (uNextLevel == 0)
+ {
+ /* The page size of the translation is the default size for the level. */
+ uint8_t const cShift = s_acIovaLevelShifts[uLevel];
+ RTGCPHYS const GCPhysPte = PtEntity.u64 & IOMMU_PTENTITY_ADDR_MASK;
+ pPageLookup->GCPhysSpa = GCPhysPte & X86_GET_PAGE_BASE_MASK(cShift);
+ pPageLookup->cShift = cShift;
+ pPageLookup->fPerm = fPtePerm;
+ return VINF_SUCCESS;
+ }
+ if (uNextLevel == 7)
+ {
+ /* The default page size of the translation is overridden. */
+ uint8_t cShift = X86_PAGE_4K_SHIFT;
+ RTGCPHYS const GCPhysPte = PtEntity.u64 & IOMMU_PTENTITY_ADDR_MASK;
+ while (GCPhysPte & RT_BIT_64(cShift++))
+ ;
+
+ /* The page size must be larger than the default size and lower than the default size of the higher level. */
+ if ( cShift > s_acIovaLevelShifts[uLevel]
+ && cShift < s_acIovaLevelShifts[uLevel + 1])
+ {
+ pPageLookup->GCPhysSpa = GCPhysPte & X86_GET_PAGE_BASE_MASK(cShift);
+ pPageLookup->cShift = cShift;
+ pPageLookup->fPerm = fPtePerm;
+ STAM_COUNTER_INC(&pThis->StatNonStdPageSize); NOREF(pThis);
+ return VINF_SUCCESS;
+ }
+
+ LogFunc(("Page size invalid. idDevice=%#x cShift=%u -> IOPF\n", idDevice, cShift));
+ EVT_IO_PAGE_FAULT_T EvtIoPageFault;
+ iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */,
+ false /* fPermDenied */, enmOp, &EvtIoPageFault);
+ iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
+ kIoPageFaultType_PteInvalidPageSize);
+ return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
+ }
+
+ /* Validate level transition. */
+ if (uNextLevel < uLevel)
+ { /* likely */ }
+ else
+ {
+ LogFunc(("Next level (%#x) must be less than the current level (%#x) -> IOPF\n", uNextLevel, uLevel));
+ EVT_IO_PAGE_FAULT_T EvtIoPageFault;
+ iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */,
+ false /* fPermDenied */, enmOp, &EvtIoPageFault);
+ iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
+ kIoPageFaultType_PteInvalidLvlEncoding);
+ return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
+ }
+
+ /* Ensure IOVA bits of skipped levels (if any) are zero. */
+ uint64_t const fIovaSkipMask = IOMMU_GET_MAX_VALID_IOVA(uLevel - 1) - IOMMU_GET_MAX_VALID_IOVA(uNextLevel);
+ if (!(uIova & fIovaSkipMask))
+ { /* likely */ }
+ else
+ {
+ LogFunc(("IOVA of skipped levels are not zero. uIova=%#RX64 fSkipMask=%#RX64 -> IOPF\n", uIova, fIovaSkipMask));
+ EVT_IO_PAGE_FAULT_T EvtIoPageFault;
+ iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */,
+ false /* fPermDenied */, enmOp, &EvtIoPageFault);
+ iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
+ kIoPageFaultType_SkippedLevelIovaNotZero);
+ return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
+ }
+
+ /* Traverse to the next level. */
+ }
+}
+
+
+/**
+ * Page lookup callback for finding an I/O page from guest memory.
+ *
+ * @returns VBox status code.
+ * @retval VINF_SUCCESS when the page is found and has the right permissions.
+ * @retval VERR_IOMMU_ADDR_TRANSLATION_FAILED when address translation fails.
+ * @retval VERR_IOMMU_ADDR_ACCESS_DENIED when the page is found but permissions are
+ * insufficient to what is requested.
+ *
+ * @param pDevIns The IOMMU instance data.
+ * @param uIovaPage The I/O virtual address to lookup in the cache (must be
+ * 4K aligned).
+ * @param fPerm The I/O permissions for this access, see
+ * IOMMU_IO_PERM_XXX.
+ * @param pAux The auxiliary information required during lookup.
+ * @param pPageLookup Where to store the looked up I/O page.
+ */
+static DECLCALLBACK(int) iommuAmdDteLookupPage(PPDMDEVINS pDevIns, uint64_t uIovaPage, uint8_t fPerm, PCIOMMUOPAUX pAux,
+ PIOPAGELOOKUP pPageLookup)
+{
+ AssertPtr(pAux);
+ AssertPtr(pPageLookup);
+ Assert(!(uIovaPage & X86_PAGE_4K_OFFSET_MASK));
+
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ STAM_PROFILE_ADV_START(&pThis->StatProfDteLookup, a);
+ int rc = iommuAmdIoPageTableWalk(pDevIns, uIovaPage, fPerm, pAux->idDevice, pAux->pDte, pAux->enmOp, pPageLookup);
+ STAM_PROFILE_ADV_STOP(&pThis->StatProfDteLookup, a); NOREF(pThis);
+ return rc;
+}
+
+
+/**
+ * Looks up a range of I/O virtual addresses.
+ *
+ * @returns VBox status code.
+ * @param pDevIns The IOMMU instance data.
+ * @param pfnIoPageLookup The lookup function to use.
+ * @param pAddrIn The I/O address range to lookup.
+ * @param pAux The auxiliary information required by the lookup
+ * function.
+ * @param pAddrOut Where to store the translated I/O address page
+ * lookup.
+ * @param pcbContiguous Where to store the size of the access.
+ */
+static int iommuAmdLookupIoAddrRange(PPDMDEVINS pDevIns, PFNIOPAGELOOKUP pfnIoPageLookup, PCIOADDRRANGE pAddrIn,
+ PCIOMMUOPAUX pAux, PIOPAGELOOKUP pAddrOut, size_t *pcbContiguous)
+{
+ int rc;
+ size_t const cbIova = pAddrIn->cb;
+ uint8_t const fPerm = pAddrIn->fPerm;
+ uint64_t const uIova = pAddrIn->uAddr;
+ RTGCPHYS GCPhysSpa = NIL_RTGCPHYS;
+ size_t cbRemaining = cbIova;
+ uint64_t uIovaPage = pAddrIn->uAddr & X86_PAGE_4K_BASE_MASK;
+ uint64_t offIova = pAddrIn->uAddr & X86_PAGE_4K_OFFSET_MASK;
+ size_t const cbPage = X86_PAGE_4K_SIZE;
+
+ IOPAGELOOKUP PageLookupPrev;
+ RT_ZERO(PageLookupPrev);
+ for (;;)
+ {
+ /* Lookup the physical page corresponding to the I/O virtual address. */
+ IOPAGELOOKUP PageLookup;
+ rc = pfnIoPageLookup(pDevIns, uIovaPage, fPerm, pAux, &PageLookup);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Validate results of the translation.
+ */
+ /* The IOTLB cache preserves the original page sizes even though the IOVAs are split into 4K pages. */
+ Assert(PageLookup.cShift >= X86_PAGE_4K_SHIFT && PageLookup.cShift <= 51);
+ Assert( pfnIoPageLookup != iommuAmdDteLookupPage
+ || !(PageLookup.GCPhysSpa & X86_GET_PAGE_OFFSET_MASK(PageLookup.cShift)));
+ Assert((PageLookup.fPerm & fPerm) == fPerm);
+
+ /* Store the translated address before continuing to access more pages. */
+ if (cbRemaining == cbIova)
+ {
+ uint64_t const offSpa = uIova & X86_GET_PAGE_OFFSET_MASK(PageLookup.cShift);
+ GCPhysSpa = PageLookup.GCPhysSpa | offSpa;
+ }
+ /*
+ * Check if translated address results in a physically contiguous region.
+ *
+ * Also ensure that the permissions for all pages in this range are identical
+ * because we specify a common permission while adding pages in this range
+ * to the IOTLB cache.
+ *
+ * The page size must also be identical since we need to know how many offset
+ * bits to copy into the final translated address (while retrieving 4K sized
+ * pages from the IOTLB cache).
+ */
+ else if ( PageLookup.GCPhysSpa == PageLookupPrev.GCPhysSpa + cbPage
+ && PageLookup.fPerm == PageLookupPrev.fPerm
+ && PageLookup.cShift == PageLookupPrev.cShift)
+ { /* likely */ }
+ else
+ {
+ Assert(cbRemaining > 0);
+ rc = VERR_OUT_OF_RANGE;
+ break;
+ }
+
+ /* Store the page lookup result from the first/previous page. */
+ PageLookupPrev = PageLookup;
+
+ /* Check if we need to access more pages. */
+ if (cbRemaining > cbPage - offIova)
+ {
+ cbRemaining -= (cbPage - offIova); /* Calculate how much more we need to access. */
+ uIovaPage += cbPage; /* Update address of the next access. */
+ offIova = 0; /* After the first page, remaining pages are accessed from offset 0. */
+ }
+ else
+ {
+ /* Caller (PDM) doesn't expect more data accessed than what was requested. */
+ cbRemaining = 0;
+ break;
+ }
+ }
+ else
+ break;
+ }
+
+ pAddrOut->GCPhysSpa = GCPhysSpa; /* Update the translated address. */
+ pAddrOut->cShift = PageLookupPrev.cShift; /* Update the page size of the lookup. */
+ pAddrOut->fPerm = PageLookupPrev.fPerm; /* Update the allowed permissions for this access. */
+ *pcbContiguous = cbIova - cbRemaining; /* Update the size of the contiguous memory region. */
+ return rc;
+}
+
+
+/**
+ * Looks up an I/O virtual address from the device table.
+ *
+ * @returns VBox status code.
+ * @param pDevIns The IOMMU instance data.
+ * @param idDevice The device ID (bus, device, function).
+ * @param uIova The I/O virtual address to lookup.
+ * @param cbIova The size of the access.
+ * @param fPerm The I/O permissions for this access, see
+ * IOMMU_IO_PERM_XXX.
+ * @param enmOp The IOMMU operation being performed.
+ * @param pGCPhysSpa Where to store the translated system physical address.
+ * @param pcbContiguous Where to store the number of contiguous bytes translated
+ * and permission-checked.
+ *
+ * @thread Any.
+ */
+static int iommuAmdDteLookup(PPDMDEVINS pDevIns, uint16_t idDevice, uint64_t uIova, size_t cbIova, uint8_t fPerm, IOMMUOP enmOp,
+ PRTGCPHYS pGCPhysSpa, size_t *pcbContiguous)
+{
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ RTGCPHYS GCPhysSpa = NIL_RTGCPHYS;
+ size_t cbContiguous = 0;
+
+ /* Read the device table entry from memory. */
+ DTE_T Dte;
+ int rc = iommuAmdDteRead(pDevIns, idDevice, enmOp, &Dte);
+ if (RT_SUCCESS(rc))
+ {
+ if (Dte.n.u1Valid)
+ {
+ /* Validate bits 127:0 of the device table entry when DTE.V is 1. */
+ uint64_t const fRsvd0 = Dte.au64[0] & ~(IOMMU_DTE_QWORD_0_VALID_MASK & ~IOMMU_DTE_QWORD_0_FEAT_MASK);
+ uint64_t const fRsvd1 = Dte.au64[1] & ~(IOMMU_DTE_QWORD_1_VALID_MASK & ~IOMMU_DTE_QWORD_1_FEAT_MASK);
+ if (RT_LIKELY(!fRsvd0 && !fRsvd1))
+ {
+ /*
+ * Check if the DTE is configured for translating addresses.
+ * Note: Addresses cannot be subject to exclusion as we do -not- support remote IOTLBs,
+ * so there's no need to check the address exclusion base/limit here.
+ */
+ rc = iommuAmdPreTranslateChecks(pDevIns, idDevice, uIova, fPerm, &Dte, enmOp);
+ if (rc == VINF_SUCCESS)
+ {
+ IOADDRRANGE AddrIn;
+ AddrIn.uAddr = uIova;
+ AddrIn.cb = cbIova;
+ AddrIn.fPerm = fPerm;
+
+ IOMMUOPAUX Aux;
+ Aux.enmOp = enmOp;
+ Aux.pDte = &Dte;
+ Aux.idDevice = idDevice;
+ Aux.idDomain = Dte.n.u16DomainId;
+
+ /* Lookup the address from the DTE and I/O page tables.*/
+ IOPAGELOOKUP AddrOut;
+ rc = iommuAmdLookupIoAddrRange(pDevIns, iommuAmdDteLookupPage, &AddrIn, &Aux, &AddrOut, &cbContiguous);
+ GCPhysSpa = AddrOut.GCPhysSpa;
+
+ /*
+ * If we stopped since translation resulted in non-contiguous physical addresses
+ * or permissions aren't identical for all pages in the access, what we translated
+ * thus far is still valid.
+ */
+ if (rc == VERR_OUT_OF_RANGE)
+ {
+ Assert(cbContiguous > 0 && cbContiguous < cbIova);
+ rc = VINF_SUCCESS;
+ STAM_COUNTER_INC(&pThis->StatAccessDteNonContig); NOREF(pThis);
+ }
+ else if (rc == VERR_IOMMU_ADDR_ACCESS_DENIED)
+ STAM_COUNTER_INC(&pThis->StatAccessDtePermDenied);
+
+#ifdef IOMMU_WITH_IOTLBE_CACHE
+ if (RT_SUCCESS(rc))
+ {
+ /* Update that addresses requires translation (cumulative permissions of DTE and I/O page tables). */
+ iommuAmdDteCacheAddOrUpdateFlags(pDevIns, &Dte, idDevice, IOMMU_DTE_CACHE_F_ADDR_TRANSLATE,
+ 0 /* fAndMask */);
+ /* Update IOTLB for the contiguous range of I/O virtual addresses. */
+ iommuAmdIotlbAddRange(pDevIns, Aux.idDomain, uIova & X86_PAGE_4K_BASE_MASK, cbContiguous, &AddrOut);
+ }
+#endif
+ }
+ else if (rc == VINF_IOMMU_ADDR_TRANSLATION_DISABLED)
+ {
+ /*
+ * Translation is disabled for this device (root paging mode is 0).
+ * GPA=SPA, but the permission bits are important and controls accesses.
+ */
+ GCPhysSpa = uIova;
+ cbContiguous = cbIova;
+ rc = VINF_SUCCESS;
+
+#ifdef IOMMU_WITH_IOTLBE_CACHE
+ /* Update that addresses permissions of DTE apply (but omit address translation). */
+ iommuAmdDteCacheAddOrUpdateFlags(pDevIns, &Dte, idDevice, IOMMU_DTE_CACHE_F_IO_PERM,
+ IOMMU_DTE_CACHE_F_ADDR_TRANSLATE);
+#endif
+ }
+ else
+ {
+ /* Address translation failed or access is denied. */
+ Assert(rc == VERR_IOMMU_ADDR_ACCESS_DENIED || rc == VERR_IOMMU_ADDR_TRANSLATION_FAILED);
+ GCPhysSpa = NIL_RTGCPHYS;
+ cbContiguous = 0;
+ STAM_COUNTER_INC(&pThis->StatAccessDtePermDenied);
+ }
+ }
+ else
+ {
+ /* Invalid reserved bits in the DTE, raise an error event. */
+ LogFunc(("Invalid DTE reserved bits (u64[0]=%#RX64 u64[1]=%#RX64) -> Illegal DTE\n", fRsvd0, fRsvd1));
+ EVT_ILLEGAL_DTE_T Event;
+ iommuAmdIllegalDteEventInit(idDevice, uIova, true /* fRsvdNotZero */, enmOp, &Event);
+ iommuAmdIllegalDteEventRaise(pDevIns, enmOp, &Event, kIllegalDteType_RsvdNotZero);
+ rc = VERR_IOMMU_ADDR_TRANSLATION_FAILED;
+ }
+ }
+ else
+ {
+ /*
+ * The DTE is not valid, forward addresses untranslated.
+ * See AMD IOMMU spec. "Table 5: Feature Enablement for Address Translation".
+ */
+ GCPhysSpa = uIova;
+ cbContiguous = cbIova;
+ }
+ }
+ else
+ {
+ LogFunc(("Failed to read device table entry. idDevice=%#x rc=%Rrc\n", idDevice, rc));
+ rc = VERR_IOMMU_ADDR_TRANSLATION_FAILED;
+ }
+
+ *pGCPhysSpa = GCPhysSpa;
+ *pcbContiguous = cbContiguous;
+ AssertMsg(rc != VINF_SUCCESS || cbContiguous > 0, ("cbContiguous=%zu\n", cbContiguous));
+ return rc;
+}
+
+
+#ifdef IOMMU_WITH_IOTLBE_CACHE
+/**
+ * I/O page lookup callback for finding an I/O page from the IOTLB.
+ *
+ * @returns VBox status code.
+ * @retval VINF_SUCCESS when the page is found and has the right permissions.
+ * @retval VERR_NOT_FOUND when the page is not found.
+ * @retval VERR_IOMMU_ADDR_ACCESS_DENIED when the page is found but permissions are
+ * insufficient to what is requested.
+ *
+ * @param pDevIns The IOMMU instance data.
+ * @param uIovaPage The I/O virtual address to lookup in the cache (must be
+ * 4K aligned).
+ * @param fPerm The I/O permissions for this access, see
+ * IOMMU_IO_PERM_XXX.
+ * @param pAux The auxiliary information required during lookup.
+ * @param pPageLookup Where to store the looked up I/O page.
+ */
+static DECLCALLBACK(int) iommuAmdCacheLookupPage(PPDMDEVINS pDevIns, uint64_t uIovaPage, uint8_t fPerm, PCIOMMUOPAUX pAux,
+ PIOPAGELOOKUP pPageLookup)
+{
+ Assert(pAux);
+ Assert(pPageLookup);
+ Assert(!(uIovaPage & X86_PAGE_4K_OFFSET_MASK));
+
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
+
+ STAM_PROFILE_ADV_START(&pThis->StatProfIotlbeLookup, a);
+ PCIOTLBE pIotlbe = iommuAmdIotlbLookup(pThis, pThisR3, pAux->idDomain, uIovaPage);
+ STAM_PROFILE_ADV_STOP(&pThis->StatProfIotlbeLookup, a);
+ if (pIotlbe)
+ {
+ *pPageLookup = pIotlbe->PageLookup;
+ if ((pPageLookup->fPerm & fPerm) == fPerm)
+ {
+ STAM_COUNTER_INC(&pThis->StatAccessCacheHit);
+ return VINF_SUCCESS;
+ }
+ return VERR_IOMMU_ADDR_ACCESS_DENIED;
+ }
+ return VERR_NOT_FOUND;
+}
+
+
+/**
+ * Lookups a memory access from the IOTLB cache.
+ *
+ * @returns VBox status code.
+ * @retval VINF_SUCCESS if the access was cached and permissions are verified.
+ * @retval VERR_OUT_OF_RANGE if the access resulted in a non-contiguous physical
+ * address region.
+ * @retval VERR_NOT_FOUND if the access was not cached.
+ * @retval VERR_IOMMU_ADDR_ACCESS_DENIED if the access was cached but permissions
+ * are insufficient.
+ *
+ * @param pDevIns The IOMMU instance data.
+ * @param idDevice The device ID (bus, device, function).
+ * @param uIova The I/O virtual address to lookup.
+ * @param cbIova The size of the access.
+ * @param fPerm The I/O permissions for this access, see
+ * IOMMU_IO_PERM_XXX.
+ * @param enmOp The IOMMU operation being performed.
+ * @param pGCPhysSpa Where to store the translated system physical address.
+ * @param pcbContiguous Where to store the number of contiguous bytes translated
+ * and permission-checked.
+ */
+static int iommuAmdIotlbCacheLookup(PPDMDEVINS pDevIns, uint16_t idDevice, uint64_t uIova, size_t cbIova, uint8_t fPerm,
+ IOMMUOP enmOp, PRTGCPHYS pGCPhysSpa, size_t *pcbContiguous)
+{
+ int rc;
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+
+#define IOMMU_IOTLB_LOOKUP_FAILED(a_rc) \
+ do { \
+ *pGCPhysSpa = NIL_RTGCPHYS; \
+ *pcbContiguous = 0; \
+ rc = (a_rc); \
+ } while (0)
+
+ /*
+ * We hold the cache lock across both the DTE and the IOTLB lookups (if any) because
+ * we don't want the DTE cache to be invalidate while we perform IOTBL lookups.
+ */
+ IOMMU_CACHE_LOCK(pDevIns, pThis);
+
+ /* Lookup the DTE cache entry. */
+ uint16_t const idxDteCache = iommuAmdDteCacheEntryLookup(pThis, idDevice);
+ if (idxDteCache < RT_ELEMENTS(pThis->aDteCache))
+ {
+ PCDTECACHE pDteCache = &pThis->aDteCache[idxDteCache];
+ if ((pDteCache->fFlags & (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_VALID | IOMMU_DTE_CACHE_F_ADDR_TRANSLATE))
+ == (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_VALID | IOMMU_DTE_CACHE_F_ADDR_TRANSLATE))
+ {
+ /* Lookup IOTLB entries. */
+ IOADDRRANGE AddrIn;
+ AddrIn.uAddr = uIova;
+ AddrIn.cb = cbIova;
+ AddrIn.fPerm = fPerm;
+
+ IOMMUOPAUX Aux;
+ Aux.enmOp = enmOp;
+ Aux.pDte = NULL;
+ Aux.idDevice = idDevice;
+ Aux.idDomain = pDteCache->idDomain;
+
+ IOPAGELOOKUP AddrOut;
+ rc = iommuAmdLookupIoAddrRange(pDevIns, iommuAmdCacheLookupPage, &AddrIn, &Aux, &AddrOut, pcbContiguous);
+ *pGCPhysSpa = AddrOut.GCPhysSpa;
+ Assert(*pcbContiguous <= cbIova);
+ }
+ else if ((pDteCache->fFlags & (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_VALID | IOMMU_DTE_CACHE_F_IO_PERM))
+ == (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_VALID | IOMMU_DTE_CACHE_F_IO_PERM))
+ {
+ /* Address translation is disabled, but DTE permissions apply. */
+ Assert(!(pDteCache->fFlags & IOMMU_DTE_CACHE_F_ADDR_TRANSLATE));
+ uint8_t const fDtePerm = (pDteCache->fFlags >> IOMMU_DTE_CACHE_F_IO_PERM_SHIFT) & IOMMU_DTE_CACHE_F_IO_PERM_MASK;
+ if ((fDtePerm & fPerm) == fPerm)
+ {
+ *pGCPhysSpa = uIova;
+ *pcbContiguous = cbIova;
+ rc = VINF_SUCCESS;
+ }
+ else
+ IOMMU_IOTLB_LOOKUP_FAILED(VERR_IOMMU_ADDR_ACCESS_DENIED);
+ }
+ else if (pDteCache->fFlags & IOMMU_DTE_CACHE_F_PRESENT)
+ {
+ /* Forward addresses untranslated, without checking permissions. */
+ *pGCPhysSpa = uIova;
+ *pcbContiguous = cbIova;
+ rc = VINF_SUCCESS;
+ }
+ else
+ IOMMU_IOTLB_LOOKUP_FAILED(VERR_NOT_FOUND);
+ }
+ else
+ IOMMU_IOTLB_LOOKUP_FAILED(VERR_NOT_FOUND);
+
+ IOMMU_CACHE_UNLOCK(pDevIns, pThis);
+
+ return rc;
+
+#undef IOMMU_IOTLB_LOOKUP_FAILED
+}
+#endif /* IOMMU_WITH_IOTLBE_CACHE */
+
+
+/**
+ * Gets the I/O permission and IOMMU operation type for the given access flags.
+ *
+ * @param pThis The shared IOMMU device state.
+ * @param fFlags The PDM IOMMU flags, PDMIOMMU_MEM_F_XXX.
+ * @param penmOp Where to store the IOMMU operation.
+ * @param pfPerm Where to store the IOMMU I/O permission.
+ * @param fBulk Whether this is a bulk read or write.
+ */
+DECLINLINE(void) iommuAmdMemAccessGetPermAndOp(PIOMMU pThis, uint32_t fFlags, PIOMMUOP penmOp, uint8_t *pfPerm, bool fBulk)
+{
+ if (fFlags & PDMIOMMU_MEM_F_WRITE)
+ {
+ *penmOp = IOMMUOP_MEM_WRITE;
+ *pfPerm = IOMMU_IO_PERM_WRITE;
+#ifdef VBOX_WITH_STATISTICS
+ if (!fBulk)
+ STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMemWrite));
+ else
+ STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMemBulkWrite));
+#else
+ RT_NOREF2(pThis, fBulk);
+#endif
+ }
+ else
+ {
+ Assert(fFlags & PDMIOMMU_MEM_F_READ);
+ *penmOp = IOMMUOP_MEM_READ;
+ *pfPerm = IOMMU_IO_PERM_READ;
+#ifdef VBOX_WITH_STATISTICS
+ if (!fBulk)
+ STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMemRead));
+ else
+ STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMemBulkRead));
+#else
+ RT_NOREF2(pThis, fBulk);
+#endif
+ }
+}
+
+
+/**
+ * Memory access transaction from a device.
+ *
+ * @returns VBox status code.
+ * @param pDevIns The IOMMU device instance.
+ * @param idDevice The device ID (bus, device, function).
+ * @param uIova The I/O virtual address being accessed.
+ * @param cbIova The size of the access.
+ * @param fFlags The access flags, see PDMIOMMU_MEM_F_XXX.
+ * @param pGCPhysSpa Where to store the translated system physical address.
+ * @param pcbContiguous Where to store the number of contiguous bytes translated
+ * and permission-checked.
+ *
+ * @thread Any.
+ */
+static DECLCALLBACK(int) iommuAmdMemAccess(PPDMDEVINS pDevIns, uint16_t idDevice, uint64_t uIova, size_t cbIova,
+ uint32_t fFlags, PRTGCPHYS pGCPhysSpa, size_t *pcbContiguous)
+{
+ /* Validate. */
+ AssertPtr(pDevIns);
+ AssertPtr(pGCPhysSpa);
+ Assert(cbIova > 0);
+ Assert(!(fFlags & ~PDMIOMMU_MEM_F_VALID_MASK));
+
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ IOMMU_CTRL_T const Ctrl = iommuAmdGetCtrlUnlocked(pThis);
+ if (Ctrl.n.u1IommuEn)
+ {
+ IOMMUOP enmOp;
+ uint8_t fPerm;
+ iommuAmdMemAccessGetPermAndOp(pThis, fFlags, &enmOp, &fPerm, false /* fBulk */);
+ LogFlowFunc(("%s: idDevice=%#x uIova=%#RX64 cb=%zu\n", iommuAmdMemAccessGetPermName(fPerm), idDevice, uIova, cbIova));
+
+ int rc;
+#ifdef IOMMU_WITH_IOTLBE_CACHE
+ /* Lookup the IOVA from the cache. */
+ rc = iommuAmdIotlbCacheLookup(pDevIns, idDevice, uIova, cbIova, fPerm, enmOp, pGCPhysSpa, pcbContiguous);
+ if (rc == VINF_SUCCESS)
+ {
+ /* All pages in the access were found in the cache with sufficient permissions. */
+ Assert(*pcbContiguous == cbIova);
+ Assert(*pGCPhysSpa != NIL_RTGCPHYS);
+ STAM_COUNTER_INC(&pThis->StatAccessCacheHitFull);
+ return VINF_SUCCESS;
+ }
+ if (rc != VERR_OUT_OF_RANGE)
+ { /* likely */ }
+ else
+ {
+ /* Access stopped since translations resulted in non-contiguous memory, let caller resume access. */
+ Assert(*pcbContiguous > 0 && *pcbContiguous < cbIova);
+ STAM_COUNTER_INC(&pThis->StatAccessCacheNonContig);
+ return VINF_SUCCESS;
+ }
+
+ /*
+ * Access incomplete as not all pages were in the cache.
+ * Or permissions were denied for the access (which typically doesn't happen)
+ * so go through the slower path and raise the required event.
+ */
+ AssertMsg(*pcbContiguous < cbIova, ("Invalid size: cbContiguous=%zu cbIova=%zu\n", *pcbContiguous, cbIova));
+ uIova += *pcbContiguous;
+ cbIova -= *pcbContiguous;
+ /* We currently are including any permission denied pages as cache misses too.*/
+ STAM_COUNTER_INC(&pThis->StatAccessCacheMiss);
+#endif
+
+ /* Lookup the IOVA from the device table. */
+ rc = iommuAmdDteLookup(pDevIns, idDevice, uIova, cbIova, fPerm, enmOp, pGCPhysSpa, pcbContiguous);
+ if (RT_SUCCESS(rc))
+ { /* likely */ }
+ else
+ {
+ Assert(rc != VERR_OUT_OF_RANGE);
+ LogFunc(("DTE lookup failed! idDevice=%#x uIova=%#RX64 fPerm=%u cbIova=%zu rc=%#Rrc\n", idDevice, uIova, fPerm,
+ cbIova, rc));
+ }
+
+ return rc;
+ }
+
+ /* Addresses are forwarded without translation when the IOMMU is disabled. */
+ *pGCPhysSpa = uIova;
+ *pcbContiguous = cbIova;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Memory access bulk (one or more 4K pages) request from a device.
+ *
+ * @returns VBox status code.
+ * @param pDevIns The IOMMU device instance.
+ * @param idDevice The device ID (bus, device, function).
+ * @param cIovas The number of addresses being accessed.
+ * @param pauIovas The I/O virtual addresses for each page being accessed.
+ * @param fFlags The access flags, see PDMIOMMU_MEM_F_XXX.
+ * @param paGCPhysSpa Where to store the translated physical addresses.
+ *
+ * @thread Any.
+ */
+static DECLCALLBACK(int) iommuAmdMemBulkAccess(PPDMDEVINS pDevIns, uint16_t idDevice, size_t cIovas, uint64_t const *pauIovas,
+ uint32_t fFlags, PRTGCPHYS paGCPhysSpa)
+{
+ /* Validate. */
+ AssertPtr(pDevIns);
+ Assert(cIovas > 0);
+ AssertPtr(pauIovas);
+ AssertPtr(paGCPhysSpa);
+ Assert(!(fFlags & ~PDMIOMMU_MEM_F_VALID_MASK));
+
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ IOMMU_CTRL_T const Ctrl = iommuAmdGetCtrlUnlocked(pThis);
+ if (Ctrl.n.u1IommuEn)
+ {
+ IOMMUOP enmOp;
+ uint8_t fPerm;
+ iommuAmdMemAccessGetPermAndOp(pThis, fFlags, &enmOp, &fPerm, true /* fBulk */);
+ LogFlowFunc(("%s: idDevice=%#x cIovas=%zu\n", iommuAmdMemAccessGetPermName(fPerm), idDevice, cIovas));
+
+ for (size_t i = 0; i < cIovas; i++)
+ {
+ int rc;
+ size_t cbContig;
+
+#ifdef IOMMU_WITH_IOTLBE_CACHE
+ /* Lookup the IOVA from the IOTLB cache. */
+ rc = iommuAmdIotlbCacheLookup(pDevIns, idDevice, pauIovas[i], X86_PAGE_SIZE, fPerm, enmOp, &paGCPhysSpa[i],
+ &cbContig);
+ if (rc == VINF_SUCCESS)
+ {
+ Assert(cbContig == X86_PAGE_SIZE);
+ Assert(paGCPhysSpa[i] != NIL_RTGCPHYS);
+ STAM_COUNTER_INC(&pThis->StatAccessCacheHitFull);
+ continue;
+ }
+ Assert(rc == VERR_NOT_FOUND || rc == VERR_IOMMU_ADDR_ACCESS_DENIED);
+ STAM_COUNTER_INC(&pThis->StatAccessCacheMiss);
+#endif
+
+ /* Lookup the IOVA from the device table. */
+ rc = iommuAmdDteLookup(pDevIns, idDevice, pauIovas[i], X86_PAGE_SIZE, fPerm, enmOp, &paGCPhysSpa[i], &cbContig);
+ if (RT_SUCCESS(rc))
+ { /* likely */ }
+ else
+ {
+ LogFunc(("Failed! idDevice=%#x uIova=%#RX64 fPerm=%u rc=%Rrc\n", idDevice, pauIovas[i], fPerm, rc));
+ return rc;
+ }
+ Assert(cbContig == X86_PAGE_SIZE);
+ }
+ }
+ else
+ {
+ /* Addresses are forwarded without translation when the IOMMU is disabled. */
+ for (size_t i = 0; i < cIovas; i++)
+ paGCPhysSpa[i] = pauIovas[i];
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads an interrupt remapping table entry from guest memory given its DTE.
+ *
+ * @returns VBox status code.
+ * @param pDevIns The IOMMU device instance.
+ * @param idDevice The device ID (bus, device, function).
+ * @param pDte The device table entry.
+ * @param GCPhysIn The source MSI address (used for reporting errors).
+ * @param uDataIn The source MSI data.
+ * @param enmOp The IOMMU operation being performed.
+ * @param pIrte Where to store the interrupt remapping table entry.
+ *
+ * @thread Any.
+ */
+static int iommuAmdIrteRead(PPDMDEVINS pDevIns, uint16_t idDevice, PCDTE_T pDte, RTGCPHYS GCPhysIn, uint32_t uDataIn,
+ IOMMUOP enmOp, PIRTE_T pIrte)
+{
+ /* Ensure the IRTE length is valid. */
+ Assert(pDte->n.u4IntrTableLength < IOMMU_DTE_INTR_TAB_LEN_MAX);
+
+ RTGCPHYS const GCPhysIntrTable = pDte->au64[2] & IOMMU_DTE_IRTE_ROOT_PTR_MASK;
+ uint16_t const cbIntrTable = IOMMU_DTE_GET_INTR_TAB_LEN(pDte);
+ uint16_t const offIrte = IOMMU_GET_IRTE_OFF(uDataIn);
+ RTGCPHYS const GCPhysIrte = GCPhysIntrTable + offIrte;
+
+ /* Ensure the IRTE falls completely within the interrupt table. */
+ if (offIrte + sizeof(IRTE_T) <= cbIntrTable)
+ { /* likely */ }
+ else
+ {
+ LogFunc(("IRTE exceeds table length (GCPhysIntrTable=%#RGp cbIntrTable=%u offIrte=%#x uDataIn=%#x) -> IOPF\n",
+ GCPhysIntrTable, cbIntrTable, offIrte, uDataIn));
+
+ EVT_IO_PAGE_FAULT_T EvtIoPageFault;
+ iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, GCPhysIn, false /* fPresent */, false /* fRsvdNotZero */,
+ false /* fPermDenied */, enmOp, &EvtIoPageFault);
+ iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
+ kIoPageFaultType_IrteAddrInvalid);
+ return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
+ }
+
+ /* Read the IRTE from memory. */
+ Assert(!(GCPhysIrte & 3));
+ int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysIrte, pIrte, sizeof(*pIrte));
+ if (RT_SUCCESS(rc))
+ return VINF_SUCCESS;
+
+ /** @todo The IOMMU spec. does not tell what kind of error is reported in this
+ * situation. Is it an I/O page fault or a device table hardware error?
+ * There's no interrupt table hardware error event, but it's unclear what
+ * we should do here. */
+ LogFunc(("Failed to read interrupt table entry at %#RGp. rc=%Rrc -> ???\n", GCPhysIrte, rc));
+ return VERR_IOMMU_IPE_4;
+}
+
+
+/**
+ * Remaps the interrupt using the interrupt remapping table.
+ *
+ * @returns VBox status code.
+ * @param pDevIns The IOMMU instance data.
+ * @param idDevice The device ID (bus, device, function).
+ * @param pDte The device table entry.
+ * @param enmOp The IOMMU operation being performed.
+ * @param pMsiIn The source MSI.
+ * @param pMsiOut Where to store the remapped MSI.
+ *
+ * @thread Any.
+ */
+static int iommuAmdIntrRemap(PPDMDEVINS pDevIns, uint16_t idDevice, PCDTE_T pDte, IOMMUOP enmOp, PCMSIMSG pMsiIn,
+ PMSIMSG pMsiOut)
+{
+ Assert(pDte->n.u2IntrCtrl == IOMMU_INTR_CTRL_REMAP);
+
+ IRTE_T Irte;
+ uint32_t const uMsiInData = pMsiIn->Data.u32;
+ int rc = iommuAmdIrteRead(pDevIns, idDevice, pDte, pMsiIn->Addr.u64, uMsiInData, enmOp, &Irte);
+ if (RT_SUCCESS(rc))
+ {
+ if (Irte.n.u1RemapEnable)
+ {
+ if (!Irte.n.u1GuestMode)
+ {
+ if (Irte.n.u3IntrType <= VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO)
+ {
+ iommuAmdIrteRemapMsi(pMsiIn, pMsiOut, &Irte);
+#ifdef IOMMU_WITH_IRTE_CACHE
+ iommuAmdIrteCacheAdd(pDevIns, idDevice, IOMMU_GET_IRTE_OFF(uMsiInData), &Irte);
+#endif
+ return VINF_SUCCESS;
+ }
+
+ LogFunc(("Interrupt type (%#x) invalid -> IOPF\n", Irte.n.u3IntrType));
+ EVT_IO_PAGE_FAULT_T EvtIoPageFault;
+ iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, pMsiIn->Addr.u64, Irte.n.u1RemapEnable,
+ true /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault);
+ iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, &Irte, enmOp, &EvtIoPageFault,
+ kIoPageFaultType_IrteRsvdIntType);
+ return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
+ }
+
+ LogFunc(("Guest mode not supported -> IOPF\n"));
+ EVT_IO_PAGE_FAULT_T EvtIoPageFault;
+ iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, pMsiIn->Addr.u64, Irte.n.u1RemapEnable,
+ true /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault);
+ iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, &Irte, enmOp, &EvtIoPageFault, kIoPageFaultType_IrteRsvdNotZero);
+ return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
+ }
+
+ LogFunc(("Remapping disabled -> IOPF\n"));
+ EVT_IO_PAGE_FAULT_T EvtIoPageFault;
+ iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, pMsiIn->Addr.u64, Irte.n.u1RemapEnable,
+ false /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault);
+ iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, &Irte, enmOp, &EvtIoPageFault, kIoPageFaultType_IrteRemapEn);
+ return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
+ }
+
+ return rc;
+}
+
+
+/**
+ * Looks up an MSI interrupt from the interrupt remapping table.
+ *
+ * @returns VBox status code.
+ * @param pDevIns The IOMMU instance data.
+ * @param idDevice The device ID (bus, device, function).
+ * @param enmOp The IOMMU operation being performed.
+ * @param pMsiIn The source MSI.
+ * @param pMsiOut Where to store the remapped MSI.
+ *
+ * @thread Any.
+ */
+static int iommuAmdIntrTableLookup(PPDMDEVINS pDevIns, uint16_t idDevice, IOMMUOP enmOp, PCMSIMSG pMsiIn, PMSIMSG pMsiOut)
+{
+ LogFlowFunc(("idDevice=%#x (%#x:%#x:%#x) enmOp=%u\n", idDevice, ((idDevice >> VBOX_PCI_BUS_SHIFT) & VBOX_PCI_BUS_MASK),
+ ((idDevice >> VBOX_PCI_DEVFN_DEV_SHIFT) & VBOX_PCI_DEVFN_DEV_MASK), (idDevice & VBOX_PCI_DEVFN_FUN_MASK),
+ enmOp));
+
+ /* Read the device table entry from memory. */
+ DTE_T Dte;
+ int rc = iommuAmdDteRead(pDevIns, idDevice, enmOp, &Dte);
+ if (RT_SUCCESS(rc))
+ {
+#ifdef IOMMU_WITH_IRTE_CACHE
+ iommuAmdDteCacheAdd(pDevIns, idDevice, &Dte);
+#endif
+ /* If the DTE is not valid, all interrupts are forwarded without remapping. */
+ if (Dte.n.u1IntrMapValid)
+ {
+ /* Validate bits 255:128 of the device table entry when DTE.IV is 1. */
+ uint64_t const fRsvd0 = Dte.au64[2] & ~IOMMU_DTE_QWORD_2_VALID_MASK;
+ uint64_t const fRsvd1 = Dte.au64[3] & ~IOMMU_DTE_QWORD_3_VALID_MASK;
+ if (RT_LIKELY(!fRsvd0 && !fRsvd1))
+ { /* likely */ }
+ else
+ {
+ LogFunc(("Invalid reserved bits in DTE (u64[2]=%#RX64 u64[3]=%#RX64) -> Illegal DTE\n", fRsvd0, fRsvd1));
+ EVT_ILLEGAL_DTE_T Event;
+ iommuAmdIllegalDteEventInit(idDevice, pMsiIn->Addr.u64, true /* fRsvdNotZero */, enmOp, &Event);
+ iommuAmdIllegalDteEventRaise(pDevIns, enmOp, &Event, kIllegalDteType_RsvdNotZero);
+ return VERR_IOMMU_INTR_REMAP_FAILED;
+ }
+
+ /*
+ * LINT0/LINT1 pins cannot be driven by PCI(e) devices. Perhaps for a Southbridge
+ * that's connected through HyperTransport it might be possible; but for us, it
+ * doesn't seem we need to specially handle these pins.
+ */
+
+ /*
+ * Validate the MSI source address.
+ *
+ * 64-bit MSIs are supported by the PCI and AMD IOMMU spec. However as far as the
+ * CPU is concerned, the MSI region is fixed and we must ensure no other device
+ * claims the region as I/O space.
+ *
+ * See PCI spec. 6.1.4. "Message Signaled Interrupt (MSI) Support".
+ * See AMD IOMMU spec. 2.8 "IOMMU Interrupt Support".
+ * See Intel spec. 10.11.1 "Message Address Register Format".
+ */
+ if ((pMsiIn->Addr.u64 & VBOX_MSI_ADDR_ADDR_MASK) == VBOX_MSI_ADDR_BASE)
+ {
+ /*
+ * The IOMMU remaps fixed and arbitrated interrupts using the IRTE.
+ * See AMD IOMMU spec. "2.2.5.1 Interrupt Remapping Tables, Guest Virtual APIC Not Enabled".
+ */
+ uint8_t const u8DeliveryMode = pMsiIn->Data.n.u3DeliveryMode;
+ bool fPassThru = false;
+ switch (u8DeliveryMode)
+ {
+ case VBOX_MSI_DELIVERY_MODE_FIXED:
+ case VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO:
+ {
+ uint8_t const uIntrCtrl = Dte.n.u2IntrCtrl;
+ if (uIntrCtrl == IOMMU_INTR_CTRL_REMAP)
+ {
+ /* Validate the encoded interrupt table length when IntCtl specifies remapping. */
+ uint8_t const uIntrTabLen = Dte.n.u4IntrTableLength;
+ if (uIntrTabLen < IOMMU_DTE_INTR_TAB_LEN_MAX)
+ {
+ /*
+ * We don't support guest interrupt remapping yet. When we do, we'll need to
+ * check Ctrl.u1GstVirtApicEn and use the guest Virtual APIC Table Root Pointer
+ * in the DTE rather than the Interrupt Root Table Pointer. Since the caller
+ * already reads the control register, add that as a parameter when we eventually
+ * support guest interrupt remapping. For now, just assert.
+ */
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ Assert(!pThis->ExtFeat.n.u1GstVirtApicSup);
+ NOREF(pThis);
+
+ return iommuAmdIntrRemap(pDevIns, idDevice, &Dte, enmOp, pMsiIn, pMsiOut);
+ }
+
+ LogFunc(("Invalid interrupt table length %#x -> Illegal DTE\n", uIntrTabLen));
+ EVT_ILLEGAL_DTE_T Event;
+ iommuAmdIllegalDteEventInit(idDevice, pMsiIn->Addr.u64, false /* fRsvdNotZero */, enmOp, &Event);
+ iommuAmdIllegalDteEventRaise(pDevIns, enmOp, &Event, kIllegalDteType_RsvdIntTabLen);
+ return VERR_IOMMU_INTR_REMAP_FAILED;
+ }
+
+ if (uIntrCtrl == IOMMU_INTR_CTRL_FWD_UNMAPPED)
+ {
+ fPassThru = true;
+ break;
+ }
+
+ if (uIntrCtrl == IOMMU_INTR_CTRL_TARGET_ABORT)
+ {
+ LogRelMax(10, ("%s: Remapping disallowed for fixed/arbitrated interrupt %#x -> Target abort\n",
+ IOMMU_LOG_PFX, pMsiIn->Data.n.u8Vector));
+ iommuAmdSetPciTargetAbort(pDevIns);
+ return VERR_IOMMU_INTR_REMAP_DENIED;
+ }
+
+ Assert(uIntrCtrl == IOMMU_INTR_CTRL_RSVD); /* Paranoia. */
+ LogRelMax(10, ("%s: IntCtl mode invalid %#x -> Illegal DTE\n", IOMMU_LOG_PFX, uIntrCtrl));
+ EVT_ILLEGAL_DTE_T Event;
+ iommuAmdIllegalDteEventInit(idDevice, pMsiIn->Addr.u64, true /* fRsvdNotZero */, enmOp, &Event);
+ iommuAmdIllegalDteEventRaise(pDevIns, enmOp, &Event, kIllegalDteType_RsvdIntCtl);
+ return VERR_IOMMU_INTR_REMAP_FAILED;
+ }
+
+ /* SMIs are passed through unmapped. We don't implement SMI filters. */
+ case VBOX_MSI_DELIVERY_MODE_SMI: fPassThru = true; break;
+ case VBOX_MSI_DELIVERY_MODE_NMI: fPassThru = Dte.n.u1NmiPassthru; break;
+ case VBOX_MSI_DELIVERY_MODE_INIT: fPassThru = Dte.n.u1InitPassthru; break;
+ case VBOX_MSI_DELIVERY_MODE_EXT_INT: fPassThru = Dte.n.u1ExtIntPassthru; break;
+ default:
+ {
+ LogRelMax(10, ("%s: MSI data delivery mode invalid %#x -> Target abort\n", IOMMU_LOG_PFX,
+ u8DeliveryMode));
+ iommuAmdSetPciTargetAbort(pDevIns);
+ return VERR_IOMMU_INTR_REMAP_FAILED;
+ }
+ }
+
+ /*
+ * For those other than fixed and arbitrated interrupts, destination mode must be 0 (physical).
+ * See AMD IOMMU spec. The note below Table 19: "IOMMU Controls and Actions for Upstream Interrupts".
+ */
+ if ( u8DeliveryMode <= VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO
+ || !pMsiIn->Addr.n.u1DestMode)
+ {
+ if (fPassThru)
+ {
+ *pMsiOut = *pMsiIn;
+ return VINF_SUCCESS;
+ }
+ LogRelMax(10, ("%s: Remapping/passthru disallowed for interrupt %#x -> Target abort\n", IOMMU_LOG_PFX,
+ pMsiIn->Data.n.u8Vector));
+ }
+ else
+ LogRelMax(10, ("%s: Logical destination mode invalid for delivery mode %#x\n -> Target abort\n",
+ IOMMU_LOG_PFX, u8DeliveryMode));
+
+ iommuAmdSetPciTargetAbort(pDevIns);
+ return VERR_IOMMU_INTR_REMAP_DENIED;
+ }
+ else
+ {
+ /** @todo should be cause a PCI target abort here? */
+ LogRelMax(10, ("%s: MSI address region invalid %#RX64\n", IOMMU_LOG_PFX, pMsiIn->Addr.u64));
+ return VERR_IOMMU_INTR_REMAP_FAILED;
+ }
+ }
+ else
+ {
+ LogFlowFunc(("DTE interrupt map not valid\n"));
+ *pMsiOut = *pMsiIn;
+ return VINF_SUCCESS;
+ }
+ }
+
+ LogFunc(("Failed to read device table entry. idDevice=%#x rc=%Rrc\n", idDevice, rc));
+ return VERR_IOMMU_INTR_REMAP_FAILED;
+}
+
+
+/**
+ * Interrupt remap request from a device.
+ *
+ * @returns VBox status code.
+ * @param pDevIns The IOMMU device instance.
+ * @param idDevice The device ID (bus, device, function).
+ * @param pMsiIn The source MSI.
+ * @param pMsiOut Where to store the remapped MSI.
+ */
+static DECLCALLBACK(int) iommuAmdMsiRemap(PPDMDEVINS pDevIns, uint16_t idDevice, PCMSIMSG pMsiIn, PMSIMSG pMsiOut)
+{
+ /* Validate. */
+ Assert(pDevIns);
+ Assert(pMsiIn);
+ Assert(pMsiOut);
+
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+
+ /* If this MSI was generated by the IOMMU itself, it's not subject to remapping, see @bugref{9654#c104}. */
+ if (idDevice == pThis->uPciAddress)
+ return VERR_IOMMU_CANNOT_CALL_SELF;
+
+ /* Interrupts are forwarded with remapping when the IOMMU is disabled. */
+ IOMMU_CTRL_T const Ctrl = iommuAmdGetCtrlUnlocked(pThis);
+ if (Ctrl.n.u1IommuEn)
+ {
+ STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMsiRemap));
+
+ int rc;
+#ifdef IOMMU_WITH_IRTE_CACHE
+ STAM_PROFILE_ADV_START(&pThis->StatProfIrteCacheLookup, a);
+ rc = iommuAmdIrteCacheLookup(pDevIns, idDevice, IOMMUOP_INTR_REQ, pMsiIn, pMsiOut);
+ STAM_PROFILE_ADV_STOP(&pThis->StatProfIrteCacheLookup, a);
+ if (RT_SUCCESS(rc))
+ {
+ STAM_COUNTER_INC(&pThis->StatIntrCacheHit);
+ return VINF_SUCCESS;
+ }
+ STAM_COUNTER_INC(&pThis->StatIntrCacheMiss);
+#endif
+
+ STAM_PROFILE_ADV_START(&pThis->StatProfIrteLookup, a);
+ rc = iommuAmdIntrTableLookup(pDevIns, idDevice, IOMMUOP_INTR_REQ, pMsiIn, pMsiOut);
+ STAM_PROFILE_ADV_STOP(&pThis->StatProfIrteLookup, a);
+ return rc;
+ }
+
+ *pMsiOut = *pMsiIn;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * @callback_method_impl{FNIOMMMIONEWWRITE}
+ */
+static DECLCALLBACK(VBOXSTRICTRC) iommuAmdMmioWrite(PPDMDEVINS pDevIns, void *pvUser, RTGCPHYS off, void const *pv, unsigned cb)
+{
+ NOREF(pvUser);
+ Assert(cb == 4 || cb == 8);
+ Assert(!(off & (cb - 1)));
+
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMmioWrite)); NOREF(pThis);
+
+ uint64_t const uValue = cb == 8 ? *(uint64_t const *)pv : *(uint32_t const *)pv;
+ return iommuAmdRegisterWrite(pDevIns, off, cb, uValue);
+}
+
+
+/**
+ * @callback_method_impl{FNIOMMMIONEWREAD}
+ */
+static DECLCALLBACK(VBOXSTRICTRC) iommuAmdMmioRead(PPDMDEVINS pDevIns, void *pvUser, RTGCPHYS off, void *pv, unsigned cb)
+{
+ NOREF(pvUser);
+ Assert(cb == 4 || cb == 8);
+ Assert(!(off & (cb - 1)));
+
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMmioRead)); NOREF(pThis);
+
+ uint64_t uResult;
+ VBOXSTRICTRC rcStrict = iommuAmdRegisterRead(pDevIns, off, &uResult);
+ if (rcStrict == VINF_SUCCESS)
+ {
+ if (cb == 8)
+ *(uint64_t *)pv = uResult;
+ else
+ *(uint32_t *)pv = (uint32_t)uResult;
+ }
+
+ return rcStrict;
+}
+
+
+#ifdef IN_RING3
+/**
+ * Processes an IOMMU command.
+ *
+ * @returns VBox status code.
+ * @param pDevIns The IOMMU device instance.
+ * @param pCmd The command to process.
+ * @param GCPhysCmd The system physical address of the command.
+ * @param pEvtError Where to store the error event in case of failures.
+ *
+ * @thread Command thread.
+ */
+static int iommuAmdR3CmdProcess(PPDMDEVINS pDevIns, PCCMD_GENERIC_T pCmd, RTGCPHYS GCPhysCmd, PEVT_GENERIC_T pEvtError)
+{
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
+
+ STAM_COUNTER_INC(&pThis->StatCmd);
+
+ uint8_t const bCmd = pCmd->n.u4Opcode;
+ switch (bCmd)
+ {
+ case IOMMU_CMD_COMPLETION_WAIT:
+ {
+ STAM_COUNTER_INC(&pThis->StatCmdCompWait);
+
+ PCCMD_COMWAIT_T pCmdComWait = (PCCMD_COMWAIT_T)pCmd;
+ AssertCompile(sizeof(*pCmdComWait) == sizeof(*pCmd));
+
+ /* Validate reserved bits in the command. */
+ if (!(pCmdComWait->au64[0] & ~IOMMU_CMD_COM_WAIT_QWORD_0_VALID_MASK))
+ {
+ /* If Completion Store is requested, write the StoreData to the specified address. */
+ if (pCmdComWait->n.u1Store)
+ {
+ RTGCPHYS const GCPhysStore = RT_MAKE_U64(pCmdComWait->n.u29StoreAddrLo << 3, pCmdComWait->n.u20StoreAddrHi);
+ uint64_t const u64Data = pCmdComWait->n.u64StoreData;
+ int rc = PDMDevHlpPCIPhysWrite(pDevIns, GCPhysStore, &u64Data, sizeof(u64Data));
+ if (RT_FAILURE(rc))
+ {
+ LogFunc(("Cmd(%#x): Failed to write StoreData (%#RX64) to %#RGp, rc=%Rrc\n", bCmd, u64Data,
+ GCPhysStore, rc));
+ iommuAmdCmdHwErrorEventInit(GCPhysStore, (PEVT_CMD_HW_ERR_T)pEvtError);
+ return VERR_IOMMU_CMD_HW_ERROR;
+ }
+ }
+
+ /* If the command requests an interrupt and completion wait interrupts are enabled, raise it. */
+ if (pCmdComWait->n.u1Interrupt)
+ {
+ IOMMU_LOCK(pDevIns, pThisR3);
+ ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_COMPLETION_WAIT_INTR);
+ bool const fRaiseInt = pThis->Ctrl.n.u1CompWaitIntrEn;
+ IOMMU_UNLOCK(pDevIns, pThisR3);
+ if (fRaiseInt)
+ iommuAmdMsiInterruptRaise(pDevIns);
+ }
+ return VINF_SUCCESS;
+ }
+ iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
+ return VERR_IOMMU_CMD_INVALID_FORMAT;
+ }
+
+ case IOMMU_CMD_INV_DEV_TAB_ENTRY:
+ {
+ STAM_COUNTER_INC(&pThis->StatCmdInvDte);
+ PCCMD_INV_DTE_T pCmdInvDte = (PCCMD_INV_DTE_T)pCmd;
+ AssertCompile(sizeof(*pCmdInvDte) == sizeof(*pCmd));
+
+ /* Validate reserved bits in the command. */
+ if ( !(pCmdInvDte->au64[0] & ~IOMMU_CMD_INV_DTE_QWORD_0_VALID_MASK)
+ && !(pCmdInvDte->au64[1] & ~IOMMU_CMD_INV_DTE_QWORD_1_VALID_MASK))
+ {
+#ifdef IOMMU_WITH_DTE_CACHE
+ iommuAmdDteCacheRemove(pDevIns, pCmdInvDte->n.u16DevId);
+#endif
+ return VINF_SUCCESS;
+ }
+ iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
+ return VERR_IOMMU_CMD_INVALID_FORMAT;
+ }
+
+ case IOMMU_CMD_INV_IOMMU_PAGES:
+ {
+ STAM_COUNTER_INC(&pThis->StatCmdInvIommuPages);
+ PCCMD_INV_IOMMU_PAGES_T pCmdInvPages = (PCCMD_INV_IOMMU_PAGES_T)pCmd;
+ AssertCompile(sizeof(*pCmdInvPages) == sizeof(*pCmd));
+
+ /* Validate reserved bits in the command. */
+ if ( !(pCmdInvPages->au64[0] & ~IOMMU_CMD_INV_IOMMU_PAGES_QWORD_0_VALID_MASK)
+ && !(pCmdInvPages->au64[1] & ~IOMMU_CMD_INV_IOMMU_PAGES_QWORD_1_VALID_MASK))
+ {
+#ifdef IOMMU_WITH_IOTLBE_CACHE
+ uint64_t const uIova = RT_MAKE_U64(pCmdInvPages->n.u20AddrLo << X86_PAGE_4K_SHIFT, pCmdInvPages->n.u32AddrHi);
+ uint16_t const idDomain = pCmdInvPages->n.u16DomainId;
+ uint8_t cShift;
+ if (!pCmdInvPages->n.u1Size)
+ cShift = X86_PAGE_4K_SHIFT;
+ else
+ {
+ /* Find the first clear bit starting from bit 12 to 64 of the I/O virtual address. */
+ unsigned const uFirstZeroBit = ASMBitLastSetU64(~(uIova >> X86_PAGE_4K_SHIFT));
+ cShift = X86_PAGE_4K_SHIFT + uFirstZeroBit;
+
+ /*
+ * For the address 0x7ffffffffffff000, cShift would be 76 (12+64) and the code below
+ * would do the right thing by clearing the entire cache for the specified domain ID.
+ *
+ * However, for the address 0xfffffffffffff000, cShift would be computed as 12.
+ * IOMMU behavior is undefined in this case, so it's safe to invalidate just one page.
+ * A debug-time assert is in place here to let us know if any software tries this.
+ *
+ * See AMD IOMMU spec. 2.4.3 "INVALIDATE_IOMMU_PAGES".
+ * See AMD IOMMU spec. Table 14: "Example Page Size Encodings".
+ */
+ Assert(uIova != UINT64_C(0xfffffffffffff000));
+ }
+
+ /*
+ * Validate invalidation size.
+ * See AMD IOMMU spec. 2.2.3 "I/O Page Tables for Host Translations".
+ */
+ if ( cShift >= 12 /* 4 KB */
+ && cShift <= 51 /* 2 PB */)
+ {
+ /* Remove the range of I/O virtual addresses requesting to be invalidated. */
+ size_t const cbIova = RT_BIT_64(cShift);
+ iommuAmdIotlbRemoveRange(pDevIns, idDomain, uIova, cbIova);
+ }
+ else
+ {
+ /*
+ * The guest provided size is invalid or exceeds the largest, meaningful page size.
+ * In such situations we must remove all ranges for the specified domain ID.
+ */
+ iommuAmdIotlbRemoveDomainId(pDevIns, idDomain);
+ }
+#endif
+ return VINF_SUCCESS;
+ }
+ iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
+ return VERR_IOMMU_CMD_INVALID_FORMAT;
+ }
+
+ case IOMMU_CMD_INV_IOTLB_PAGES:
+ {
+ STAM_COUNTER_INC(&pThis->StatCmdInvIotlbPages);
+
+ uint32_t const uCapHdr = PDMPciDevGetDWord(pDevIns->apPciDevs[0], IOMMU_PCI_OFF_CAP_HDR);
+ if (RT_BF_GET(uCapHdr, IOMMU_BF_CAPHDR_IOTLB_SUP))
+ {
+ /** @todo IOMMU: Implement remote IOTLB invalidation. */
+ return VERR_NOT_IMPLEMENTED;
+ }
+ iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
+ return VERR_IOMMU_CMD_NOT_SUPPORTED;
+ }
+
+ case IOMMU_CMD_INV_INTR_TABLE:
+ {
+ STAM_COUNTER_INC(&pThis->StatCmdInvIntrTable);
+
+ PCCMD_INV_INTR_TABLE_T pCmdInvIntrTable = (PCCMD_INV_INTR_TABLE_T)pCmd;
+ AssertCompile(sizeof(*pCmdInvIntrTable) == sizeof(*pCmd));
+
+ /* Validate reserved bits in the command. */
+ if ( !(pCmdInvIntrTable->au64[0] & ~IOMMU_CMD_INV_INTR_TABLE_QWORD_0_VALID_MASK)
+ && !(pCmdInvIntrTable->au64[1] & ~IOMMU_CMD_INV_INTR_TABLE_QWORD_1_VALID_MASK))
+ {
+#ifdef IOMMU_WITH_IRTE_CACHE
+ iommuAmdIrteCacheRemove(pDevIns, pCmdInvIntrTable->u.u16DevId);
+#endif
+ return VINF_SUCCESS;
+ }
+ iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
+ return VERR_IOMMU_CMD_INVALID_FORMAT;
+ }
+
+ case IOMMU_CMD_PREFETCH_IOMMU_PAGES:
+ {
+ /* Linux doesn't use prefetching of IOMMU pages, so we don't bother for now. */
+ STAM_COUNTER_INC(&pThis->StatCmdPrefIommuPages);
+ Assert(!pThis->ExtFeat.n.u1PrefetchSup);
+ iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
+ return VERR_IOMMU_CMD_NOT_SUPPORTED;
+ }
+
+ case IOMMU_CMD_COMPLETE_PPR_REQ:
+ {
+ STAM_COUNTER_INC(&pThis->StatCmdCompletePprReq);
+
+ /* We don't support PPR requests yet. */
+ Assert(!pThis->ExtFeat.n.u1PprSup);
+ iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
+ return VERR_IOMMU_CMD_NOT_SUPPORTED;
+ }
+
+ case IOMMU_CMD_INV_IOMMU_ALL:
+ {
+ STAM_COUNTER_INC(&pThis->StatCmdInvIommuAll);
+ if (pThis->ExtFeat.n.u1InvAllSup)
+ {
+ PCCMD_INV_IOMMU_ALL_T pCmdInvAll = (PCCMD_INV_IOMMU_ALL_T)pCmd;
+ AssertCompile(sizeof(*pCmdInvAll) == sizeof(*pCmd));
+
+ /* Validate reserved bits in the command. */
+ if ( !(pCmdInvAll->au64[0] & ~IOMMU_CMD_INV_IOMMU_ALL_QWORD_0_VALID_MASK)
+ && !(pCmdInvAll->au64[1] & ~IOMMU_CMD_INV_IOMMU_ALL_QWORD_1_VALID_MASK))
+ {
+#ifdef IOMMU_WITH_DTE_CACHE
+ iommuAmdDteCacheRemoveAll(pDevIns);
+#endif
+#ifdef IOMMU_WITH_IOTLBE_CACHE
+ iommuAmdIotlbRemoveAll(pDevIns);
+#endif
+ return VINF_SUCCESS;
+ }
+ iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
+ return VERR_IOMMU_CMD_INVALID_FORMAT;
+ }
+ iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
+ return VERR_IOMMU_CMD_NOT_SUPPORTED;
+ }
+ }
+
+ STAM_COUNTER_DEC(&pThis->StatCmd);
+ LogFunc(("Cmd(%#x): Unrecognized\n", bCmd));
+ iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
+ return VERR_IOMMU_CMD_NOT_SUPPORTED;
+}
+
+
+/**
+ * The IOMMU command thread.
+ *
+ * @returns VBox status code.
+ * @param pDevIns The IOMMU device instance.
+ * @param pThread The command thread.
+ */
+static DECLCALLBACK(int) iommuAmdR3CmdThread(PPDMDEVINS pDevIns, PPDMTHREAD pThread)
+{
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
+
+ if (pThread->enmState == PDMTHREADSTATE_INITIALIZING)
+ return VINF_SUCCESS;
+
+ /*
+ * Pre-allocate the maximum command buffer size supported by the IOMMU.
+ * This avoid trashing the heap as well as not wasting time allocating
+ * and freeing buffers while processing commands.
+ */
+ size_t const cbMaxCmdBuf = sizeof(CMD_GENERIC_T) * iommuAmdGetBufMaxEntries(15);
+ void *pvCmds = RTMemAllocZ(cbMaxCmdBuf);
+ AssertPtrReturn(pvCmds, VERR_NO_MEMORY);
+
+ while (pThread->enmState == PDMTHREADSTATE_RUNNING)
+ {
+ /*
+ * Sleep perpetually until we are woken up to process commands.
+ */
+ bool const fSignaled = ASMAtomicXchgBool(&pThis->fCmdThreadSignaled, false);
+ if (!fSignaled)
+ {
+ int rc = PDMDevHlpSUPSemEventWaitNoResume(pDevIns, pThis->hEvtCmdThread, RT_INDEFINITE_WAIT);
+ AssertLogRelMsgReturn(RT_SUCCESS(rc) || rc == VERR_INTERRUPTED, ("%Rrc\n", rc), rc);
+ if (RT_UNLIKELY(pThread->enmState != PDMTHREADSTATE_RUNNING))
+ break;
+ Log4Func(("Woken up with rc=%Rrc\n", rc));
+ ASMAtomicWriteBool(&pThis->fCmdThreadSignaled, false);
+ }
+
+ /*
+ * Fetch and process IOMMU commands.
+ */
+ /** @todo r=ramshankar: We currently copy all commands from guest memory into a
+ * temporary host buffer before processing them as a batch. If we want to
+ * save on host memory a bit, we could (once PGM has the necessary APIs)
+ * lock the page mappings page mappings and access them directly. */
+ IOMMU_LOCK(pDevIns, pThisR3);
+
+ if (pThis->Status.n.u1CmdBufRunning)
+ {
+ /* Get the offsets we need to read commands from memory (circular buffer offset). */
+ uint32_t const cbCmdBuf = iommuAmdGetTotalBufLength(pThis->CmdBufBaseAddr.n.u4Len);
+ uint32_t const offTail = pThis->CmdBufTailPtr.n.off;
+ uint32_t offHead = pThis->CmdBufHeadPtr.n.off;
+
+ /* Validate. */
+ Assert(!(offHead & ~IOMMU_CMD_BUF_HEAD_PTR_VALID_MASK));
+ Assert(offHead < cbCmdBuf);
+ Assert(cbCmdBuf <= cbMaxCmdBuf);
+
+ if (offHead != offTail)
+ {
+ /* Read the entire command buffer from memory (avoids multiple PGM calls). */
+ RTGCPHYS const GCPhysCmdBufBase = pThis->CmdBufBaseAddr.n.u40Base << X86_PAGE_4K_SHIFT;
+
+ IOMMU_UNLOCK(pDevIns, pThisR3);
+ int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysCmdBufBase, pvCmds, cbCmdBuf);
+ IOMMU_LOCK(pDevIns, pThisR3);
+
+ if (RT_SUCCESS(rc))
+ {
+ /* Indicate to software we've fetched all commands from the buffer. */
+ pThis->CmdBufHeadPtr.n.off = offTail;
+
+ /* Allow IOMMU to do other work while we process commands. */
+ IOMMU_UNLOCK(pDevIns, pThisR3);
+
+ /* Process the fetched commands. */
+ EVT_GENERIC_T EvtError;
+ do
+ {
+ PCCMD_GENERIC_T pCmd = (PCCMD_GENERIC_T)((uintptr_t)pvCmds + offHead);
+ rc = iommuAmdR3CmdProcess(pDevIns, pCmd, GCPhysCmdBufBase + offHead, &EvtError);
+ if (RT_FAILURE(rc))
+ {
+ if ( rc == VERR_IOMMU_CMD_NOT_SUPPORTED
+ || rc == VERR_IOMMU_CMD_INVALID_FORMAT)
+ {
+ Assert(EvtError.n.u4EvtCode == IOMMU_EVT_ILLEGAL_CMD_ERROR);
+ iommuAmdIllegalCmdEventRaise(pDevIns, (PCEVT_ILLEGAL_CMD_ERR_T)&EvtError);
+ }
+ else if (rc == VERR_IOMMU_CMD_HW_ERROR)
+ {
+ Assert(EvtError.n.u4EvtCode == IOMMU_EVT_COMMAND_HW_ERROR);
+ LogFunc(("Raising command hardware error. Cmd=%#x -> COMMAND_HW_ERROR\n", pCmd->n.u4Opcode));
+ iommuAmdCmdHwErrorEventRaise(pDevIns, (PCEVT_CMD_HW_ERR_T)&EvtError);
+ }
+ break;
+ }
+
+ /* Move to the next command in the circular buffer. */
+ offHead = (offHead + sizeof(CMD_GENERIC_T)) % cbCmdBuf;
+ } while (offHead != offTail);
+ }
+ else
+ {
+ LogFunc(("Failed to read command at %#RGp. rc=%Rrc -> COMMAND_HW_ERROR\n", GCPhysCmdBufBase, rc));
+ EVT_CMD_HW_ERR_T EvtCmdHwErr;
+ iommuAmdCmdHwErrorEventInit(GCPhysCmdBufBase, &EvtCmdHwErr);
+ iommuAmdCmdHwErrorEventRaise(pDevIns, &EvtCmdHwErr);
+
+ IOMMU_UNLOCK(pDevIns, pThisR3);
+ }
+ }
+ else
+ IOMMU_UNLOCK(pDevIns, pThisR3);
+ }
+ else
+ IOMMU_UNLOCK(pDevIns, pThisR3);
+ }
+
+ RTMemFree(pvCmds);
+ LogFlowFunc(("Command thread terminating\n"));
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Wakes up the command thread so it can respond to a state change.
+ *
+ * @returns VBox status code.
+ * @param pDevIns The IOMMU device instance.
+ * @param pThread The command thread.
+ */
+static DECLCALLBACK(int) iommuAmdR3CmdThreadWakeUp(PPDMDEVINS pDevIns, PPDMTHREAD pThread)
+{
+ RT_NOREF(pThread);
+ Log4Func(("\n"));
+ PCIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ return PDMDevHlpSUPSemEventSignal(pDevIns, pThis->hEvtCmdThread);
+}
+
+
+/**
+ * @callback_method_impl{FNPCICONFIGREAD}
+ */
+static DECLCALLBACK(VBOXSTRICTRC) iommuAmdR3PciConfigRead(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t uAddress,
+ unsigned cb, uint32_t *pu32Value)
+{
+ /** @todo IOMMU: PCI config read stat counter. */
+ VBOXSTRICTRC rcStrict = PDMDevHlpPCIConfigRead(pDevIns, pPciDev, uAddress, cb, pu32Value);
+ Log3Func(("uAddress=%#x (cb=%u) -> %#x. rc=%Rrc\n", uAddress, cb, *pu32Value, VBOXSTRICTRC_VAL(rcStrict)));
+ return rcStrict;
+}
+
+
+/**
+ * Sets up the IOMMU MMIO region (usually in response to an IOMMU base address
+ * register write).
+ *
+ * @returns VBox status code.
+ * @param pDevIns The IOMMU instance data.
+ *
+ * @remarks Call this function only when the IOMMU BAR is enabled.
+ */
+static int iommuAmdR3MmioSetup(PPDMDEVINS pDevIns)
+{
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ Assert(pThis->IommuBar.n.u1Enable);
+ Assert(pThis->hMmio != NIL_IOMMMIOHANDLE); /* Paranoia. Ensure we have a valid IOM MMIO handle. */
+ Assert(!pThis->ExtFeat.n.u1PerfCounterSup); /* Base is 16K aligned when performance counters aren't supported. */
+ RTGCPHYS const GCPhysMmioBase = RT_MAKE_U64(pThis->IommuBar.au32[0] & 0xffffc000, pThis->IommuBar.au32[1]);
+ RTGCPHYS const GCPhysMmioBasePrev = PDMDevHlpMmioGetMappingAddress(pDevIns, pThis->hMmio);
+
+ /* If the MMIO region is already mapped at the specified address, we're done. */
+ Assert(GCPhysMmioBase != NIL_RTGCPHYS);
+ if (GCPhysMmioBasePrev == GCPhysMmioBase)
+ return VINF_SUCCESS;
+
+ /* Unmap the previous MMIO region (which is at a different address). */
+ if (GCPhysMmioBasePrev != NIL_RTGCPHYS)
+ {
+ LogFlowFunc(("Unmapping previous MMIO region at %#RGp\n", GCPhysMmioBasePrev));
+ int rc = PDMDevHlpMmioUnmap(pDevIns, pThis->hMmio);
+ if (RT_FAILURE(rc))
+ {
+ LogFunc(("Failed to unmap MMIO region at %#RGp. rc=%Rrc\n", GCPhysMmioBasePrev, rc));
+ return rc;
+ }
+ }
+
+ /* Map the newly specified MMIO region. */
+ LogFlowFunc(("Mapping MMIO region at %#RGp\n", GCPhysMmioBase));
+ int rc = PDMDevHlpMmioMap(pDevIns, pThis->hMmio, GCPhysMmioBase);
+ if (RT_FAILURE(rc))
+ {
+ LogFunc(("Failed to unmap MMIO region at %#RGp. rc=%Rrc\n", GCPhysMmioBase, rc));
+ return rc;
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * @callback_method_impl{FNPCICONFIGWRITE}
+ */
+static DECLCALLBACK(VBOXSTRICTRC) iommuAmdR3PciConfigWrite(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t uAddress,
+ unsigned cb, uint32_t u32Value)
+{
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+
+ /*
+ * Discard writes to read-only registers that are specific to the IOMMU.
+ * Other common PCI registers are handled by the generic code, see devpciR3IsConfigByteWritable().
+ * See PCI spec. 6.1. "Configuration Space Organization".
+ */
+ switch (uAddress)
+ {
+ case IOMMU_PCI_OFF_CAP_HDR: /* All bits are read-only. */
+ case IOMMU_PCI_OFF_RANGE_REG: /* We don't have any devices integrated with the IOMMU. */
+ case IOMMU_PCI_OFF_MISCINFO_REG_0: /* We don't support MSI-X. */
+ case IOMMU_PCI_OFF_MISCINFO_REG_1: /* We don't support guest-address translation. */
+ {
+ LogFunc(("PCI config write (%#RX32) to read-only register %#x -> Ignored\n", u32Value, uAddress));
+ return VINF_SUCCESS;
+ }
+ }
+
+ PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
+ IOMMU_LOCK(pDevIns, pThisR3);
+
+ VBOXSTRICTRC rcStrict;
+ switch (uAddress)
+ {
+ case IOMMU_PCI_OFF_BASE_ADDR_REG_LO:
+ {
+ if (!pThis->IommuBar.n.u1Enable)
+ {
+ pThis->IommuBar.au32[0] = u32Value & IOMMU_BAR_VALID_MASK;
+ if (pThis->IommuBar.n.u1Enable)
+ rcStrict = iommuAmdR3MmioSetup(pDevIns);
+ else
+ rcStrict = VINF_SUCCESS;
+ }
+ else
+ {
+ LogFunc(("Writing Base Address (Lo) when it's already enabled -> Ignored\n"));
+ rcStrict = VINF_SUCCESS;
+ }
+ break;
+ }
+
+ case IOMMU_PCI_OFF_BASE_ADDR_REG_HI:
+ {
+ if (!pThis->IommuBar.n.u1Enable)
+ {
+ AssertCompile((IOMMU_BAR_VALID_MASK >> 32) == 0xffffffff);
+ pThis->IommuBar.au32[1] = u32Value;
+ }
+ else
+ LogFunc(("Writing Base Address (Hi) when it's already enabled -> Ignored\n"));
+ rcStrict = VINF_SUCCESS;
+ break;
+ }
+
+ case IOMMU_PCI_OFF_MSI_CAP_HDR:
+ {
+ u32Value |= RT_BIT(23); /* 64-bit MSI addressess must always be enabled for IOMMU. */
+ RT_FALL_THRU();
+ }
+ default:
+ {
+ rcStrict = PDMDevHlpPCIConfigWrite(pDevIns, pPciDev, uAddress, cb, u32Value);
+ break;
+ }
+ }
+
+ IOMMU_UNLOCK(pDevIns, pThisR3);
+
+ Log3Func(("uAddress=%#x (cb=%u) with %#x. rc=%Rrc\n", uAddress, cb, u32Value, VBOXSTRICTRC_VAL(rcStrict)));
+ return rcStrict;
+}
+
+
+/**
+ * @callback_method_impl{FNDBGFHANDLERDEV}
+ */
+static DECLCALLBACK(void) iommuAmdR3DbgInfo(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs)
+{
+ PCIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ PCPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
+ PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
+
+ bool const fVerbose = RTStrCmp(pszArgs, "verbose") == 0;
+
+ pHlp->pfnPrintf(pHlp, "AMD-IOMMU:\n");
+ /* Device Table Base Addresses (all segments). */
+ for (unsigned i = 0; i < RT_ELEMENTS(pThis->aDevTabBaseAddrs); i++)
+ {
+ DEV_TAB_BAR_T const DevTabBar = pThis->aDevTabBaseAddrs[i];
+ pHlp->pfnPrintf(pHlp, " Device Table BAR %u = %#RX64\n", i, DevTabBar.u64);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " Size = %#x (%u bytes)\n", DevTabBar.n.u9Size,
+ IOMMU_GET_DEV_TAB_LEN(&DevTabBar));
+ pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n",
+ DevTabBar.n.u40Base << X86_PAGE_4K_SHIFT);
+ }
+ }
+ /* Command Buffer Base Address Register. */
+ {
+ CMD_BUF_BAR_T const CmdBufBar = pThis->CmdBufBaseAddr;
+ uint8_t const uEncodedLen = CmdBufBar.n.u4Len;
+ uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen);
+ uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen);
+ pHlp->pfnPrintf(pHlp, " Command Buffer BAR = %#RX64\n", CmdBufBar.u64);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n",
+ CmdBufBar.n.u40Base << X86_PAGE_4K_SHIFT);
+ pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen,
+ cEntries, cbBuffer);
+ }
+ }
+ /* Event Log Base Address Register. */
+ {
+ EVT_LOG_BAR_T const EvtLogBar = pThis->EvtLogBaseAddr;
+ uint8_t const uEncodedLen = EvtLogBar.n.u4Len;
+ uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen);
+ uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen);
+ pHlp->pfnPrintf(pHlp, " Event Log BAR = %#RX64\n", EvtLogBar.u64);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n",
+ EvtLogBar.n.u40Base << X86_PAGE_4K_SHIFT);
+ pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen,
+ cEntries, cbBuffer);
+ }
+ }
+ /* IOMMU Control Register. */
+ {
+ IOMMU_CTRL_T const Ctrl = pThis->Ctrl;
+ pHlp->pfnPrintf(pHlp, " Control = %#RX64\n", Ctrl.u64);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " IOMMU enable = %RTbool\n", Ctrl.n.u1IommuEn);
+ pHlp->pfnPrintf(pHlp, " HT Tunnel translation enable = %RTbool\n", Ctrl.n.u1HtTunEn);
+ pHlp->pfnPrintf(pHlp, " Event log enable = %RTbool\n", Ctrl.n.u1EvtLogEn);
+ pHlp->pfnPrintf(pHlp, " Event log interrupt enable = %RTbool\n", Ctrl.n.u1EvtIntrEn);
+ pHlp->pfnPrintf(pHlp, " Completion wait interrupt enable = %RTbool\n", Ctrl.n.u1EvtIntrEn);
+ pHlp->pfnPrintf(pHlp, " Invalidation timeout = %u\n", Ctrl.n.u3InvTimeOut);
+ pHlp->pfnPrintf(pHlp, " Pass posted write = %RTbool\n", Ctrl.n.u1PassPW);
+ pHlp->pfnPrintf(pHlp, " Respose Pass posted write = %RTbool\n", Ctrl.n.u1ResPassPW);
+ pHlp->pfnPrintf(pHlp, " Coherent = %RTbool\n", Ctrl.n.u1Coherent);
+ pHlp->pfnPrintf(pHlp, " Isochronous = %RTbool\n", Ctrl.n.u1Isoc);
+ pHlp->pfnPrintf(pHlp, " Command buffer enable = %RTbool\n", Ctrl.n.u1CmdBufEn);
+ pHlp->pfnPrintf(pHlp, " PPR log enable = %RTbool\n", Ctrl.n.u1PprLogEn);
+ pHlp->pfnPrintf(pHlp, " PPR interrupt enable = %RTbool\n", Ctrl.n.u1PprIntrEn);
+ pHlp->pfnPrintf(pHlp, " PPR enable = %RTbool\n", Ctrl.n.u1PprEn);
+ pHlp->pfnPrintf(pHlp, " Guest translation eanble = %RTbool\n", Ctrl.n.u1GstTranslateEn);
+ pHlp->pfnPrintf(pHlp, " Guest virtual-APIC enable = %RTbool\n", Ctrl.n.u1GstVirtApicEn);
+ pHlp->pfnPrintf(pHlp, " CRW = %#x\n", Ctrl.n.u4Crw);
+ pHlp->pfnPrintf(pHlp, " SMI filter enable = %RTbool\n", Ctrl.n.u1SmiFilterEn);
+ pHlp->pfnPrintf(pHlp, " Self-writeback disable = %RTbool\n", Ctrl.n.u1SelfWriteBackDis);
+ pHlp->pfnPrintf(pHlp, " SMI filter log enable = %RTbool\n", Ctrl.n.u1SmiFilterLogEn);
+ pHlp->pfnPrintf(pHlp, " Guest virtual-APIC mode enable = %#x\n", Ctrl.n.u3GstVirtApicModeEn);
+ pHlp->pfnPrintf(pHlp, " Guest virtual-APIC GA log enable = %RTbool\n", Ctrl.n.u1GstLogEn);
+ pHlp->pfnPrintf(pHlp, " Guest virtual-APIC interrupt enable = %RTbool\n", Ctrl.n.u1GstIntrEn);
+ pHlp->pfnPrintf(pHlp, " Dual PPR log enable = %#x\n", Ctrl.n.u2DualPprLogEn);
+ pHlp->pfnPrintf(pHlp, " Dual event log enable = %#x\n", Ctrl.n.u2DualEvtLogEn);
+ pHlp->pfnPrintf(pHlp, " Device table segmentation enable = %#x\n", Ctrl.n.u3DevTabSegEn);
+ pHlp->pfnPrintf(pHlp, " Privilege abort enable = %#x\n", Ctrl.n.u2PrivAbortEn);
+ pHlp->pfnPrintf(pHlp, " PPR auto response enable = %RTbool\n", Ctrl.n.u1PprAutoRespEn);
+ pHlp->pfnPrintf(pHlp, " MARC enable = %RTbool\n", Ctrl.n.u1MarcEn);
+ pHlp->pfnPrintf(pHlp, " Block StopMark enable = %RTbool\n", Ctrl.n.u1BlockStopMarkEn);
+ pHlp->pfnPrintf(pHlp, " PPR auto response always-on enable = %RTbool\n", Ctrl.n.u1PprAutoRespAlwaysOnEn);
+ pHlp->pfnPrintf(pHlp, " Domain IDPNE = %RTbool\n", Ctrl.n.u1DomainIDPNE);
+ pHlp->pfnPrintf(pHlp, " Enhanced PPR handling = %RTbool\n", Ctrl.n.u1EnhancedPpr);
+ pHlp->pfnPrintf(pHlp, " Host page table access/dirty bit update = %#x\n", Ctrl.n.u2HstAccDirtyBitUpdate);
+ pHlp->pfnPrintf(pHlp, " Guest page table dirty bit disable = %RTbool\n", Ctrl.n.u1GstDirtyUpdateDis);
+ pHlp->pfnPrintf(pHlp, " x2APIC enable = %RTbool\n", Ctrl.n.u1X2ApicEn);
+ pHlp->pfnPrintf(pHlp, " x2APIC interrupt enable = %RTbool\n", Ctrl.n.u1X2ApicIntrGenEn);
+ pHlp->pfnPrintf(pHlp, " Guest page table access bit update = %RTbool\n", Ctrl.n.u1GstAccessUpdateDis);
+ }
+ }
+ /* Exclusion Base Address Register. */
+ {
+ IOMMU_EXCL_RANGE_BAR_T const ExclRangeBar = pThis->ExclRangeBaseAddr;
+ pHlp->pfnPrintf(pHlp, " Exclusion BAR = %#RX64\n", ExclRangeBar.u64);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " Exclusion enable = %RTbool\n", ExclRangeBar.n.u1ExclEnable);
+ pHlp->pfnPrintf(pHlp, " Allow all devices = %RTbool\n", ExclRangeBar.n.u1AllowAll);
+ pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n",
+ ExclRangeBar.n.u40ExclRangeBase << X86_PAGE_4K_SHIFT);
+ }
+ }
+ /* Exclusion Range Limit Register. */
+ {
+ IOMMU_EXCL_RANGE_LIMIT_T const ExclRangeLimit = pThis->ExclRangeLimit;
+ pHlp->pfnPrintf(pHlp, " Exclusion Range Limit = %#RX64\n", ExclRangeLimit.u64);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " Range limit = %#RX64\n",
+ (ExclRangeLimit.n.u40ExclRangeLimit << X86_PAGE_4K_SHIFT) | X86_PAGE_4K_OFFSET_MASK);
+ }
+ }
+ /* Extended Feature Register. */
+ {
+ IOMMU_EXT_FEAT_T ExtFeat = pThis->ExtFeat;
+ pHlp->pfnPrintf(pHlp, " Extended Feature Register = %#RX64\n", ExtFeat.u64);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " Prefetch support = %RTbool\n", ExtFeat.n.u1PrefetchSup);
+ pHlp->pfnPrintf(pHlp, " PPR support = %RTbool\n", ExtFeat.n.u1PprSup);
+ pHlp->pfnPrintf(pHlp, " x2APIC support = %RTbool\n", ExtFeat.n.u1X2ApicSup);
+ pHlp->pfnPrintf(pHlp, " NX and privilege level support = %RTbool\n", ExtFeat.n.u1NoExecuteSup);
+ pHlp->pfnPrintf(pHlp, " Guest translation support = %RTbool\n", ExtFeat.n.u1GstTranslateSup);
+ pHlp->pfnPrintf(pHlp, " Invalidate-All command support = %RTbool\n", ExtFeat.n.u1InvAllSup);
+ pHlp->pfnPrintf(pHlp, " Guest virtual-APIC support = %RTbool\n", ExtFeat.n.u1GstVirtApicSup);
+ pHlp->pfnPrintf(pHlp, " Hardware error register support = %RTbool\n", ExtFeat.n.u1HwErrorSup);
+ pHlp->pfnPrintf(pHlp, " Performance counters support = %RTbool\n", ExtFeat.n.u1PerfCounterSup);
+ pHlp->pfnPrintf(pHlp, " Host address translation size = %#x\n", ExtFeat.n.u2HostAddrTranslateSize);
+ pHlp->pfnPrintf(pHlp, " Guest address translation size = %#x\n", ExtFeat.n.u2GstAddrTranslateSize);
+ pHlp->pfnPrintf(pHlp, " Guest CR3 root table level support = %#x\n", ExtFeat.n.u2GstCr3RootTblLevel);
+ pHlp->pfnPrintf(pHlp, " SMI filter register support = %#x\n", ExtFeat.n.u2SmiFilterSup);
+ pHlp->pfnPrintf(pHlp, " SMI filter register count = %#x\n", ExtFeat.n.u3SmiFilterCount);
+ pHlp->pfnPrintf(pHlp, " Guest virtual-APIC modes support = %#x\n", ExtFeat.n.u3GstVirtApicModeSup);
+ pHlp->pfnPrintf(pHlp, " Dual PPR log support = %#x\n", ExtFeat.n.u2DualPprLogSup);
+ pHlp->pfnPrintf(pHlp, " Dual event log support = %#x\n", ExtFeat.n.u2DualEvtLogSup);
+ pHlp->pfnPrintf(pHlp, " Maximum PASID = %#x\n", ExtFeat.n.u5MaxPasidSup);
+ pHlp->pfnPrintf(pHlp, " User/supervisor page protection support = %RTbool\n", ExtFeat.n.u1UserSupervisorSup);
+ pHlp->pfnPrintf(pHlp, " Device table segments supported = %#x (%u)\n", ExtFeat.n.u2DevTabSegSup,
+ g_acDevTabSegs[ExtFeat.n.u2DevTabSegSup]);
+ pHlp->pfnPrintf(pHlp, " PPR log overflow early warning support = %RTbool\n", ExtFeat.n.u1PprLogOverflowWarn);
+ pHlp->pfnPrintf(pHlp, " PPR auto response support = %RTbool\n", ExtFeat.n.u1PprAutoRespSup);
+ pHlp->pfnPrintf(pHlp, " MARC support = %#x\n", ExtFeat.n.u2MarcSup);
+ pHlp->pfnPrintf(pHlp, " Block StopMark message support = %RTbool\n", ExtFeat.n.u1BlockStopMarkSup);
+ pHlp->pfnPrintf(pHlp, " Performance optimization support = %RTbool\n", ExtFeat.n.u1PerfOptSup);
+ pHlp->pfnPrintf(pHlp, " MSI capability MMIO access support = %RTbool\n", ExtFeat.n.u1MsiCapMmioSup);
+ pHlp->pfnPrintf(pHlp, " Guest I/O protection support = %RTbool\n", ExtFeat.n.u1GstIoSup);
+ pHlp->pfnPrintf(pHlp, " Host access support = %RTbool\n", ExtFeat.n.u1HostAccessSup);
+ pHlp->pfnPrintf(pHlp, " Enhanced PPR handling support = %RTbool\n", ExtFeat.n.u1EnhancedPprSup);
+ pHlp->pfnPrintf(pHlp, " Attribute forward supported = %RTbool\n", ExtFeat.n.u1AttrForwardSup);
+ pHlp->pfnPrintf(pHlp, " Host dirty support = %RTbool\n", ExtFeat.n.u1HostDirtySup);
+ pHlp->pfnPrintf(pHlp, " Invalidate IOTLB type support = %RTbool\n", ExtFeat.n.u1InvIoTlbTypeSup);
+ pHlp->pfnPrintf(pHlp, " Guest page table access bit hw disable = %RTbool\n", ExtFeat.n.u1GstUpdateDisSup);
+ pHlp->pfnPrintf(pHlp, " Force physical dest for remapped intr. = %RTbool\n", ExtFeat.n.u1ForcePhysDstSup);
+ }
+ }
+ /* PPR Log Base Address Register. */
+ {
+ PPR_LOG_BAR_T PprLogBar = pThis->PprLogBaseAddr;
+ uint8_t const uEncodedLen = PprLogBar.n.u4Len;
+ uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen);
+ uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen);
+ pHlp->pfnPrintf(pHlp, " PPR Log BAR = %#RX64\n", PprLogBar.u64);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n",
+ PprLogBar.n.u40Base << X86_PAGE_4K_SHIFT);
+ pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen,
+ cEntries, cbBuffer);
+ }
+ }
+ /* Hardware Event (Hi) Register. */
+ {
+ IOMMU_HW_EVT_HI_T HwEvtHi = pThis->HwEvtHi;
+ pHlp->pfnPrintf(pHlp, " Hardware Event (Hi) = %#RX64\n", HwEvtHi.u64);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " First operand = %#RX64\n", HwEvtHi.n.u60FirstOperand);
+ pHlp->pfnPrintf(pHlp, " Event code = %#RX8\n", HwEvtHi.n.u4EvtCode);
+ }
+ }
+ /* Hardware Event (Lo) Register. */
+ pHlp->pfnPrintf(pHlp, " Hardware Event (Lo) = %#RX64\n", pThis->HwEvtLo);
+ /* Hardware Event Status. */
+ {
+ IOMMU_HW_EVT_STATUS_T HwEvtStatus = pThis->HwEvtStatus;
+ pHlp->pfnPrintf(pHlp, " Hardware Event Status = %#RX64\n", HwEvtStatus.u64);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " Valid = %RTbool\n", HwEvtStatus.n.u1Valid);
+ pHlp->pfnPrintf(pHlp, " Overflow = %RTbool\n", HwEvtStatus.n.u1Overflow);
+ }
+ }
+ /* Guest Virtual-APIC Log Base Address Register. */
+ {
+ GALOG_BAR_T const GALogBar = pThis->GALogBaseAddr;
+ uint8_t const uEncodedLen = GALogBar.n.u4Len;
+ uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen);
+ uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen);
+ pHlp->pfnPrintf(pHlp, " Guest Log BAR = %#RX64\n", GALogBar.u64);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n",
+ GALogBar.n.u40Base << X86_PAGE_4K_SHIFT);
+ pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen,
+ cEntries, cbBuffer);
+ }
+ }
+ /* Guest Virtual-APIC Log Tail Address Register. */
+ {
+ GALOG_TAIL_ADDR_T GALogTail = pThis->GALogTailAddr;
+ pHlp->pfnPrintf(pHlp, " Guest Log Tail Address = %#RX64\n", GALogTail.u64);
+ if (fVerbose)
+ pHlp->pfnPrintf(pHlp, " Tail address = %#RX64\n", GALogTail.n.u40GALogTailAddr);
+ }
+ /* PPR Log B Base Address Register. */
+ {
+ PPR_LOG_B_BAR_T PprLogBBar = pThis->PprLogBBaseAddr;
+ uint8_t const uEncodedLen = PprLogBBar.n.u4Len;
+ uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen);
+ uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen);
+ pHlp->pfnPrintf(pHlp, " PPR Log B BAR = %#RX64\n", PprLogBBar.u64);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n",
+ PprLogBBar.n.u40Base << X86_PAGE_4K_SHIFT);
+ pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen,
+ cEntries, cbBuffer);
+ }
+ }
+ /* Event Log B Base Address Register. */
+ {
+ EVT_LOG_B_BAR_T EvtLogBBar = pThis->EvtLogBBaseAddr;
+ uint8_t const uEncodedLen = EvtLogBBar.n.u4Len;
+ uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen);
+ uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen);
+ pHlp->pfnPrintf(pHlp, " Event Log B BAR = %#RX64\n", EvtLogBBar.u64);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n",
+ EvtLogBBar.n.u40Base << X86_PAGE_4K_SHIFT);
+ pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen,
+ cEntries, cbBuffer);
+ }
+ }
+ /* Device-Specific Feature Extension Register. */
+ {
+ DEV_SPECIFIC_FEAT_T const DevSpecificFeat = pThis->DevSpecificFeat;
+ pHlp->pfnPrintf(pHlp, " Device-specific Feature = %#RX64\n", DevSpecificFeat.u64);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " Feature = %#RX32\n", DevSpecificFeat.n.u24DevSpecFeat);
+ pHlp->pfnPrintf(pHlp, " Minor revision ID = %#x\n", DevSpecificFeat.n.u4RevMinor);
+ pHlp->pfnPrintf(pHlp, " Major revision ID = %#x\n", DevSpecificFeat.n.u4RevMajor);
+ }
+ }
+ /* Device-Specific Control Extension Register. */
+ {
+ DEV_SPECIFIC_CTRL_T const DevSpecificCtrl = pThis->DevSpecificCtrl;
+ pHlp->pfnPrintf(pHlp, " Device-specific Control = %#RX64\n", DevSpecificCtrl.u64);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " Control = %#RX32\n", DevSpecificCtrl.n.u24DevSpecCtrl);
+ pHlp->pfnPrintf(pHlp, " Minor revision ID = %#x\n", DevSpecificCtrl.n.u4RevMinor);
+ pHlp->pfnPrintf(pHlp, " Major revision ID = %#x\n", DevSpecificCtrl.n.u4RevMajor);
+ }
+ }
+ /* Device-Specific Status Extension Register. */
+ {
+ DEV_SPECIFIC_STATUS_T const DevSpecificStatus = pThis->DevSpecificStatus;
+ pHlp->pfnPrintf(pHlp, " Device-specific Status = %#RX64\n", DevSpecificStatus.u64);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " Status = %#RX32\n", DevSpecificStatus.n.u24DevSpecStatus);
+ pHlp->pfnPrintf(pHlp, " Minor revision ID = %#x\n", DevSpecificStatus.n.u4RevMinor);
+ pHlp->pfnPrintf(pHlp, " Major revision ID = %#x\n", DevSpecificStatus.n.u4RevMajor);
+ }
+ }
+ /* Miscellaneous Information Register (Lo and Hi). */
+ {
+ MSI_MISC_INFO_T const MiscInfo = pThis->MiscInfo;
+ pHlp->pfnPrintf(pHlp, " Misc. Info. Register = %#RX64\n", MiscInfo.u64);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " Event Log MSI number = %#x\n", MiscInfo.n.u5MsiNumEvtLog);
+ pHlp->pfnPrintf(pHlp, " Guest Virtual-Address Size = %#x\n", MiscInfo.n.u3GstVirtAddrSize);
+ pHlp->pfnPrintf(pHlp, " Physical Address Size = %#x\n", MiscInfo.n.u7PhysAddrSize);
+ pHlp->pfnPrintf(pHlp, " Virtual-Address Size = %#x\n", MiscInfo.n.u7VirtAddrSize);
+ pHlp->pfnPrintf(pHlp, " HT Transport ATS Range Reserved = %RTbool\n", MiscInfo.n.u1HtAtsResv);
+ pHlp->pfnPrintf(pHlp, " PPR MSI number = %#x\n", MiscInfo.n.u5MsiNumPpr);
+ pHlp->pfnPrintf(pHlp, " GA Log MSI number = %#x\n", MiscInfo.n.u5MsiNumGa);
+ }
+ }
+ /* MSI Capability Header. */
+ {
+ MSI_CAP_HDR_T MsiCapHdr;
+ MsiCapHdr.u32 = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_CAP_HDR);
+ pHlp->pfnPrintf(pHlp, " MSI Capability Header = %#RX32\n", MsiCapHdr.u32);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " Capability ID = %#x\n", MsiCapHdr.n.u8MsiCapId);
+ pHlp->pfnPrintf(pHlp, " Capability Ptr (PCI config offset) = %#x\n", MsiCapHdr.n.u8MsiCapPtr);
+ pHlp->pfnPrintf(pHlp, " Enable = %RTbool\n", MsiCapHdr.n.u1MsiEnable);
+ pHlp->pfnPrintf(pHlp, " Multi-message capability = %#x\n", MsiCapHdr.n.u3MsiMultiMessCap);
+ pHlp->pfnPrintf(pHlp, " Multi-message enable = %#x\n", MsiCapHdr.n.u3MsiMultiMessEn);
+ }
+ }
+ /* MSI Address Register (Lo and Hi). */
+ {
+ uint32_t const uMsiAddrLo = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_LO);
+ uint32_t const uMsiAddrHi = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_HI);
+ MSIADDR MsiAddr;
+ MsiAddr.u64 = RT_MAKE_U64(uMsiAddrLo, uMsiAddrHi);
+ pHlp->pfnPrintf(pHlp, " MSI Address = %#RX64\n", MsiAddr.u64);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " Destination mode = %#x\n", MsiAddr.n.u1DestMode);
+ pHlp->pfnPrintf(pHlp, " Redirection hint = %#x\n", MsiAddr.n.u1RedirHint);
+ pHlp->pfnPrintf(pHlp, " Destination Id = %#x\n", MsiAddr.n.u8DestId);
+ pHlp->pfnPrintf(pHlp, " Address = %#RX32\n", MsiAddr.n.u12Addr);
+ pHlp->pfnPrintf(pHlp, " Address (Hi) / Rsvd? = %#RX32\n", MsiAddr.n.u32Rsvd0);
+ }
+ }
+ /* MSI Data. */
+ {
+ MSIDATA MsiData;
+ MsiData.u32 = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_DATA);
+ pHlp->pfnPrintf(pHlp, " MSI Data = %#RX32\n", MsiData.u32);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " Vector = %#x (%u)\n", MsiData.n.u8Vector,
+ MsiData.n.u8Vector);
+ pHlp->pfnPrintf(pHlp, " Delivery mode = %#x\n", MsiData.n.u3DeliveryMode);
+ pHlp->pfnPrintf(pHlp, " Level = %#x\n", MsiData.n.u1Level);
+ pHlp->pfnPrintf(pHlp, " Trigger mode = %s\n", MsiData.n.u1TriggerMode ?
+ "level" : "edge");
+ }
+ }
+ /* MSI Mapping Capability Header (HyperTransport, reporting all 0s currently). */
+ {
+ MSI_MAP_CAP_HDR_T MsiMapCapHdr;
+ MsiMapCapHdr.u32 = 0;
+ pHlp->pfnPrintf(pHlp, " MSI Mapping Capability Header = %#RX32\n", MsiMapCapHdr.u32);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " Capability ID = %#x\n", MsiMapCapHdr.n.u8MsiMapCapId);
+ pHlp->pfnPrintf(pHlp, " Map enable = %RTbool\n", MsiMapCapHdr.n.u1MsiMapEn);
+ pHlp->pfnPrintf(pHlp, " Map fixed = %RTbool\n", MsiMapCapHdr.n.u1MsiMapFixed);
+ pHlp->pfnPrintf(pHlp, " Map capability type = %#x\n", MsiMapCapHdr.n.u5MapCapType);
+ }
+ }
+ /* Performance Optimization Control Register. */
+ {
+ IOMMU_PERF_OPT_CTRL_T const PerfOptCtrl = pThis->PerfOptCtrl;
+ pHlp->pfnPrintf(pHlp, " Performance Optimization Control = %#RX32\n", PerfOptCtrl.u32);
+ if (fVerbose)
+ pHlp->pfnPrintf(pHlp, " Enable = %RTbool\n", PerfOptCtrl.n.u1PerfOptEn);
+ }
+ /* XT (x2APIC) General Interrupt Control Register. */
+ {
+ IOMMU_XT_GEN_INTR_CTRL_T const XtGenIntrCtrl = pThis->XtGenIntrCtrl;
+ pHlp->pfnPrintf(pHlp, " XT General Interrupt Control = %#RX64\n", XtGenIntrCtrl.u64);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " Interrupt destination mode = %s\n",
+ !XtGenIntrCtrl.n.u1X2ApicIntrDstMode ? "physical" : "logical");
+ pHlp->pfnPrintf(pHlp, " Interrupt destination = %#RX64\n",
+ RT_MAKE_U64(XtGenIntrCtrl.n.u24X2ApicIntrDstLo, XtGenIntrCtrl.n.u7X2ApicIntrDstHi));
+ pHlp->pfnPrintf(pHlp, " Interrupt vector = %#x\n", XtGenIntrCtrl.n.u8X2ApicIntrVector);
+ pHlp->pfnPrintf(pHlp, " Interrupt delivery mode = %s\n",
+ !XtGenIntrCtrl.n.u8X2ApicIntrVector ? "fixed" : "arbitrated");
+ }
+ }
+ /* XT (x2APIC) PPR Interrupt Control Register. */
+ {
+ IOMMU_XT_PPR_INTR_CTRL_T const XtPprIntrCtrl = pThis->XtPprIntrCtrl;
+ pHlp->pfnPrintf(pHlp, " XT PPR Interrupt Control = %#RX64\n", XtPprIntrCtrl.u64);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " Interrupt destination mode = %s\n",
+ !XtPprIntrCtrl.n.u1X2ApicIntrDstMode ? "physical" : "logical");
+ pHlp->pfnPrintf(pHlp, " Interrupt destination = %#RX64\n",
+ RT_MAKE_U64(XtPprIntrCtrl.n.u24X2ApicIntrDstLo, XtPprIntrCtrl.n.u7X2ApicIntrDstHi));
+ pHlp->pfnPrintf(pHlp, " Interrupt vector = %#x\n", XtPprIntrCtrl.n.u8X2ApicIntrVector);
+ pHlp->pfnPrintf(pHlp, " Interrupt delivery mode = %s\n",
+ !XtPprIntrCtrl.n.u8X2ApicIntrVector ? "fixed" : "arbitrated");
+ }
+ }
+ /* XT (X2APIC) GA Log Interrupt Control Register. */
+ {
+ IOMMU_XT_GALOG_INTR_CTRL_T const XtGALogIntrCtrl = pThis->XtGALogIntrCtrl;
+ pHlp->pfnPrintf(pHlp, " XT PPR Interrupt Control = %#RX64\n", XtGALogIntrCtrl.u64);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " Interrupt destination mode = %s\n",
+ !XtGALogIntrCtrl.n.u1X2ApicIntrDstMode ? "physical" : "logical");
+ pHlp->pfnPrintf(pHlp, " Interrupt destination = %#RX64\n",
+ RT_MAKE_U64(XtGALogIntrCtrl.n.u24X2ApicIntrDstLo, XtGALogIntrCtrl.n.u7X2ApicIntrDstHi));
+ pHlp->pfnPrintf(pHlp, " Interrupt vector = %#x\n", XtGALogIntrCtrl.n.u8X2ApicIntrVector);
+ pHlp->pfnPrintf(pHlp, " Interrupt delivery mode = %s\n",
+ !XtGALogIntrCtrl.n.u8X2ApicIntrVector ? "fixed" : "arbitrated");
+ }
+ }
+ /* MARC Registers. */
+ {
+ for (unsigned i = 0; i < RT_ELEMENTS(pThis->aMarcApers); i++)
+ {
+ pHlp->pfnPrintf(pHlp, " MARC Aperature %u:\n", i);
+ MARC_APER_BAR_T const MarcAperBar = pThis->aMarcApers[i].Base;
+ pHlp->pfnPrintf(pHlp, " Base = %#RX64\n", MarcAperBar.n.u40MarcBaseAddr << X86_PAGE_4K_SHIFT);
+
+ MARC_APER_RELOC_T const MarcAperReloc = pThis->aMarcApers[i].Reloc;
+ pHlp->pfnPrintf(pHlp, " Reloc = %#RX64 (addr: %#RX64, read-only: %RTbool, enable: %RTbool)\n",
+ MarcAperReloc.u64, MarcAperReloc.n.u40MarcRelocAddr << X86_PAGE_4K_SHIFT,
+ MarcAperReloc.n.u1ReadOnly, MarcAperReloc.n.u1RelocEn);
+
+ MARC_APER_LEN_T const MarcAperLen = pThis->aMarcApers[i].Length;
+ pHlp->pfnPrintf(pHlp, " Length = %u pages\n", MarcAperLen.n.u40MarcLength);
+ }
+ }
+ /* Reserved Register. */
+ pHlp->pfnPrintf(pHlp, " Reserved Register = %#RX64\n", pThis->RsvdReg);
+ /* Command Buffer Head Pointer Register. */
+ {
+ CMD_BUF_HEAD_PTR_T const CmdBufHeadPtr = pThis->CmdBufHeadPtr;
+ pHlp->pfnPrintf(pHlp, " Command Buffer Head Pointer = %#RX64 (off: %#x)\n", CmdBufHeadPtr.u64,
+ CmdBufHeadPtr.n.off);
+ }
+ /* Command Buffer Tail Pointer Register. */
+ {
+ CMD_BUF_HEAD_PTR_T const CmdBufTailPtr = pThis->CmdBufTailPtr;
+ pHlp->pfnPrintf(pHlp, " Command Buffer Tail Pointer = %#RX64 (off: %#x)\n", CmdBufTailPtr.u64,
+ CmdBufTailPtr.n.off);
+ }
+ /* Event Log Head Pointer Register. */
+ {
+ EVT_LOG_HEAD_PTR_T const EvtLogHeadPtr = pThis->EvtLogHeadPtr;
+ pHlp->pfnPrintf(pHlp, " Event Log Head Pointer = %#RX64 (off: %#x)\n", EvtLogHeadPtr.u64,
+ EvtLogHeadPtr.n.off);
+ }
+ /* Event Log Tail Pointer Register. */
+ {
+ EVT_LOG_TAIL_PTR_T const EvtLogTailPtr = pThis->EvtLogTailPtr;
+ pHlp->pfnPrintf(pHlp, " Event Log Head Pointer = %#RX64 (off: %#x)\n", EvtLogTailPtr.u64,
+ EvtLogTailPtr.n.off);
+ }
+ /* Status Register. */
+ {
+ IOMMU_STATUS_T const Status = pThis->Status;
+ pHlp->pfnPrintf(pHlp, " Status Register = %#RX64\n", Status.u64);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " Event log overflow = %RTbool\n", Status.n.u1EvtOverflow);
+ pHlp->pfnPrintf(pHlp, " Event log interrupt = %RTbool\n", Status.n.u1EvtLogIntr);
+ pHlp->pfnPrintf(pHlp, " Completion wait interrupt = %RTbool\n", Status.n.u1CompWaitIntr);
+ pHlp->pfnPrintf(pHlp, " Event log running = %RTbool\n", Status.n.u1EvtLogRunning);
+ pHlp->pfnPrintf(pHlp, " Command buffer running = %RTbool\n", Status.n.u1CmdBufRunning);
+ pHlp->pfnPrintf(pHlp, " PPR overflow = %RTbool\n", Status.n.u1PprOverflow);
+ pHlp->pfnPrintf(pHlp, " PPR interrupt = %RTbool\n", Status.n.u1PprIntr);
+ pHlp->pfnPrintf(pHlp, " PPR log running = %RTbool\n", Status.n.u1PprLogRunning);
+ pHlp->pfnPrintf(pHlp, " Guest log running = %RTbool\n", Status.n.u1GstLogRunning);
+ pHlp->pfnPrintf(pHlp, " Guest log interrupt = %RTbool\n", Status.n.u1GstLogIntr);
+ pHlp->pfnPrintf(pHlp, " PPR log B overflow = %RTbool\n", Status.n.u1PprOverflowB);
+ pHlp->pfnPrintf(pHlp, " PPR log active = %RTbool\n", Status.n.u1PprLogActive);
+ pHlp->pfnPrintf(pHlp, " Event log B overflow = %RTbool\n", Status.n.u1EvtOverflowB);
+ pHlp->pfnPrintf(pHlp, " Event log active = %RTbool\n", Status.n.u1EvtLogActive);
+ pHlp->pfnPrintf(pHlp, " PPR log B overflow early warning = %RTbool\n", Status.n.u1PprOverflowEarlyB);
+ pHlp->pfnPrintf(pHlp, " PPR log overflow early warning = %RTbool\n", Status.n.u1PprOverflowEarly);
+ }
+ }
+ /* PPR Log Head Pointer. */
+ {
+ PPR_LOG_HEAD_PTR_T const PprLogHeadPtr = pThis->PprLogHeadPtr;
+ pHlp->pfnPrintf(pHlp, " PPR Log Head Pointer = %#RX64 (off: %#x)\n", PprLogHeadPtr.u64,
+ PprLogHeadPtr.n.off);
+ }
+ /* PPR Log Tail Pointer. */
+ {
+ PPR_LOG_TAIL_PTR_T const PprLogTailPtr = pThis->PprLogTailPtr;
+ pHlp->pfnPrintf(pHlp, " PPR Log Tail Pointer = %#RX64 (off: %#x)\n", PprLogTailPtr.u64,
+ PprLogTailPtr.n.off);
+ }
+ /* Guest Virtual-APIC Log Head Pointer. */
+ {
+ GALOG_HEAD_PTR_T const GALogHeadPtr = pThis->GALogHeadPtr;
+ pHlp->pfnPrintf(pHlp, " Guest Virtual-APIC Log Head Pointer = %#RX64 (off: %#x)\n", GALogHeadPtr.u64,
+ GALogHeadPtr.n.u12GALogPtr);
+ }
+ /* Guest Virtual-APIC Log Tail Pointer. */
+ {
+ GALOG_HEAD_PTR_T const GALogTailPtr = pThis->GALogTailPtr;
+ pHlp->pfnPrintf(pHlp, " Guest Virtual-APIC Log Tail Pointer = %#RX64 (off: %#x)\n", GALogTailPtr.u64,
+ GALogTailPtr.n.u12GALogPtr);
+ }
+ /* PPR Log B Head Pointer. */
+ {
+ PPR_LOG_B_HEAD_PTR_T const PprLogBHeadPtr = pThis->PprLogBHeadPtr;
+ pHlp->pfnPrintf(pHlp, " PPR Log B Head Pointer = %#RX64 (off: %#x)\n", PprLogBHeadPtr.u64,
+ PprLogBHeadPtr.n.off);
+ }
+ /* PPR Log B Tail Pointer. */
+ {
+ PPR_LOG_B_TAIL_PTR_T const PprLogBTailPtr = pThis->PprLogBTailPtr;
+ pHlp->pfnPrintf(pHlp, " PPR Log B Tail Pointer = %#RX64 (off: %#x)\n", PprLogBTailPtr.u64,
+ PprLogBTailPtr.n.off);
+ }
+ /* Event Log B Head Pointer. */
+ {
+ EVT_LOG_B_HEAD_PTR_T const EvtLogBHeadPtr = pThis->EvtLogBHeadPtr;
+ pHlp->pfnPrintf(pHlp, " Event Log B Head Pointer = %#RX64 (off: %#x)\n", EvtLogBHeadPtr.u64,
+ EvtLogBHeadPtr.n.off);
+ }
+ /* Event Log B Tail Pointer. */
+ {
+ EVT_LOG_B_TAIL_PTR_T const EvtLogBTailPtr = pThis->EvtLogBTailPtr;
+ pHlp->pfnPrintf(pHlp, " Event Log B Tail Pointer = %#RX64 (off: %#x)\n", EvtLogBTailPtr.u64,
+ EvtLogBTailPtr.n.off);
+ }
+ /* PPR Log Auto Response Register. */
+ {
+ PPR_LOG_AUTO_RESP_T const PprLogAutoResp = pThis->PprLogAutoResp;
+ pHlp->pfnPrintf(pHlp, " PPR Log Auto Response Register = %#RX64\n", PprLogAutoResp.u64);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " Code = %#x\n", PprLogAutoResp.n.u4AutoRespCode);
+ pHlp->pfnPrintf(pHlp, " Mask Gen. = %RTbool\n", PprLogAutoResp.n.u1AutoRespMaskGen);
+ }
+ }
+ /* PPR Log Overflow Early Warning Indicator Register. */
+ {
+ PPR_LOG_OVERFLOW_EARLY_T const PprLogOverflowEarly = pThis->PprLogOverflowEarly;
+ pHlp->pfnPrintf(pHlp, " PPR Log overflow early warning = %#RX64\n", PprLogOverflowEarly.u64);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " Threshold = %#x\n", PprLogOverflowEarly.n.u15Threshold);
+ pHlp->pfnPrintf(pHlp, " Interrupt enable = %RTbool\n", PprLogOverflowEarly.n.u1IntrEn);
+ pHlp->pfnPrintf(pHlp, " Enable = %RTbool\n", PprLogOverflowEarly.n.u1Enable);
+ }
+ }
+ /* PPR Log Overflow Early Warning Indicator Register. */
+ {
+ PPR_LOG_OVERFLOW_EARLY_T const PprLogBOverflowEarly = pThis->PprLogBOverflowEarly;
+ pHlp->pfnPrintf(pHlp, " PPR Log B overflow early warning = %#RX64\n", PprLogBOverflowEarly.u64);
+ if (fVerbose)
+ {
+ pHlp->pfnPrintf(pHlp, " Threshold = %#x\n", PprLogBOverflowEarly.n.u15Threshold);
+ pHlp->pfnPrintf(pHlp, " Interrupt enable = %RTbool\n", PprLogBOverflowEarly.n.u1IntrEn);
+ pHlp->pfnPrintf(pHlp, " Enable = %RTbool\n", PprLogBOverflowEarly.n.u1Enable);
+ }
+ }
+}
+
+
+/**
+ * Dumps the DTE via the info callback helper.
+ *
+ * @param pHlp The info helper.
+ * @param pDte The device table entry.
+ * @param pszPrefix The string prefix.
+ */
+static void iommuAmdR3DbgInfoDteWorker(PCDBGFINFOHLP pHlp, PCDTE_T pDte, const char *pszPrefix)
+{
+ AssertReturnVoid(pHlp);
+ AssertReturnVoid(pDte);
+ AssertReturnVoid(pszPrefix);
+
+ pHlp->pfnPrintf(pHlp, "%sValid = %RTbool\n", pszPrefix, pDte->n.u1Valid);
+ pHlp->pfnPrintf(pHlp, "%sTranslation Valid = %RTbool\n", pszPrefix, pDte->n.u1TranslationValid);
+ pHlp->pfnPrintf(pHlp, "%sHost Access Dirty = %#x\n", pszPrefix, pDte->n.u2Had);
+ pHlp->pfnPrintf(pHlp, "%sPaging Mode = %u\n", pszPrefix, pDte->n.u3Mode);
+ pHlp->pfnPrintf(pHlp, "%sPage Table Root Ptr = %#RX64 (addr=%#RGp)\n", pszPrefix, pDte->n.u40PageTableRootPtrLo,
+ pDte->n.u40PageTableRootPtrLo << 12);
+ pHlp->pfnPrintf(pHlp, "%sPPR enable = %RTbool\n", pszPrefix, pDte->n.u1Ppr);
+ pHlp->pfnPrintf(pHlp, "%sGuest PPR Resp w/ PASID = %RTbool\n", pszPrefix, pDte->n.u1GstPprRespPasid);
+ pHlp->pfnPrintf(pHlp, "%sGuest I/O Prot Valid = %RTbool\n", pszPrefix, pDte->n.u1GstIoValid);
+ pHlp->pfnPrintf(pHlp, "%sGuest Translation Valid = %RTbool\n", pszPrefix, pDte->n.u1GstTranslateValid);
+ pHlp->pfnPrintf(pHlp, "%sGuest Levels Translated = %#x\n", pszPrefix, pDte->n.u2GstMode);
+ pHlp->pfnPrintf(pHlp, "%sGuest Root Page Table Ptr = %#x %#x %#x (addr=%#RGp)\n", pszPrefix,
+ pDte->n.u3GstCr3TableRootPtrLo, pDte->n.u16GstCr3TableRootPtrMid, pDte->n.u21GstCr3TableRootPtrHi,
+ (pDte->n.u21GstCr3TableRootPtrHi << 31)
+ | (pDte->n.u16GstCr3TableRootPtrMid << 15)
+ | (pDte->n.u3GstCr3TableRootPtrLo << 12));
+ pHlp->pfnPrintf(pHlp, "%sI/O Read = %s\n", pszPrefix, pDte->n.u1IoRead ? "allowed" : "denied");
+ pHlp->pfnPrintf(pHlp, "%sI/O Write = %s\n", pszPrefix, pDte->n.u1IoWrite ? "allowed" : "denied");
+ pHlp->pfnPrintf(pHlp, "%sReserved (MBZ) = %#x\n", pszPrefix, pDte->n.u1Rsvd0);
+ pHlp->pfnPrintf(pHlp, "%sDomain ID = %u (%#x)\n", pszPrefix, pDte->n.u16DomainId, pDte->n.u16DomainId);
+ pHlp->pfnPrintf(pHlp, "%sIOTLB Enable = %RTbool\n", pszPrefix, pDte->n.u1IoTlbEnable);
+ pHlp->pfnPrintf(pHlp, "%sSuppress I/O PFs = %RTbool\n", pszPrefix, pDte->n.u1SuppressPfEvents);
+ pHlp->pfnPrintf(pHlp, "%sSuppress all I/O PFs = %RTbool\n", pszPrefix, pDte->n.u1SuppressAllPfEvents);
+ pHlp->pfnPrintf(pHlp, "%sPort I/O Control = %#x\n", pszPrefix, pDte->n.u2IoCtl);
+ pHlp->pfnPrintf(pHlp, "%sIOTLB Cache Hint = %s\n", pszPrefix, pDte->n.u1Cache ? "no caching" : "cache");
+ pHlp->pfnPrintf(pHlp, "%sSnoop Disable = %RTbool\n", pszPrefix, pDte->n.u1SnoopDisable);
+ pHlp->pfnPrintf(pHlp, "%sAllow Exclusion = %RTbool\n", pszPrefix, pDte->n.u1AllowExclusion);
+ pHlp->pfnPrintf(pHlp, "%sSysMgt Message Enable = %RTbool\n", pszPrefix, pDte->n.u2SysMgt);
+ pHlp->pfnPrintf(pHlp, "%sInterrupt Map Valid = %RTbool\n", pszPrefix, pDte->n.u1IntrMapValid);
+ uint8_t const uIntrTabLen = pDte->n.u4IntrTableLength;
+ if (uIntrTabLen < IOMMU_DTE_INTR_TAB_LEN_MAX)
+ {
+ uint16_t const cEntries = IOMMU_DTE_GET_INTR_TAB_ENTRIES(pDte);
+ uint16_t const cbIntrTable = IOMMU_DTE_GET_INTR_TAB_LEN(pDte);
+ pHlp->pfnPrintf(pHlp, "%sInterrupt Table Length = %#x (%u entries, %u bytes)\n", pszPrefix, uIntrTabLen, cEntries,
+ cbIntrTable);
+ }
+ else
+ pHlp->pfnPrintf(pHlp, "%sInterrupt Table Length = %#x (invalid!)\n", pszPrefix, uIntrTabLen);
+ pHlp->pfnPrintf(pHlp, "%sIgnore Unmapped Interrupts = %RTbool\n", pszPrefix, pDte->n.u1IgnoreUnmappedIntrs);
+ pHlp->pfnPrintf(pHlp, "%sInterrupt Table Root Ptr = %#RX64 (addr=%#RGp)\n", pszPrefix,
+ pDte->n.u46IntrTableRootPtr, pDte->au64[2] & IOMMU_DTE_IRTE_ROOT_PTR_MASK);
+ pHlp->pfnPrintf(pHlp, "%sReserved (MBZ) = %#x\n", pszPrefix, pDte->n.u4Rsvd0);
+ pHlp->pfnPrintf(pHlp, "%sINIT passthru = %RTbool\n", pszPrefix, pDte->n.u1InitPassthru);
+ pHlp->pfnPrintf(pHlp, "%sExtInt passthru = %RTbool\n", pszPrefix, pDte->n.u1ExtIntPassthru);
+ pHlp->pfnPrintf(pHlp, "%sNMI passthru = %RTbool\n", pszPrefix, pDte->n.u1NmiPassthru);
+ pHlp->pfnPrintf(pHlp, "%sReserved (MBZ) = %#x\n", pszPrefix, pDte->n.u1Rsvd2);
+ pHlp->pfnPrintf(pHlp, "%sInterrupt Control = %#x\n", pszPrefix, pDte->n.u2IntrCtrl);
+ pHlp->pfnPrintf(pHlp, "%sLINT0 passthru = %RTbool\n", pszPrefix, pDte->n.u1Lint0Passthru);
+ pHlp->pfnPrintf(pHlp, "%sLINT1 passthru = %RTbool\n", pszPrefix, pDte->n.u1Lint1Passthru);
+ pHlp->pfnPrintf(pHlp, "%sReserved (MBZ) = %#x\n", pszPrefix, pDte->n.u32Rsvd0);
+ pHlp->pfnPrintf(pHlp, "%sReserved (MBZ) = %#x\n", pszPrefix, pDte->n.u22Rsvd0);
+ pHlp->pfnPrintf(pHlp, "%sAttribute Override Valid = %RTbool\n", pszPrefix, pDte->n.u1AttrOverride);
+ pHlp->pfnPrintf(pHlp, "%sMode0FC = %#x\n", pszPrefix, pDte->n.u1Mode0FC);
+ pHlp->pfnPrintf(pHlp, "%sSnoop Attribute = %#x\n", pszPrefix, pDte->n.u8SnoopAttr);
+ pHlp->pfnPrintf(pHlp, "\n");
+}
+
+
+/**
+ * @callback_method_impl{FNDBGFHANDLERDEV}
+ */
+static DECLCALLBACK(void) iommuAmdR3DbgInfoDte(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs)
+{
+ if (pszArgs)
+ {
+ uint16_t idDevice = 0;
+ int rc = RTStrToUInt16Full(pszArgs, 0 /* uBase */, &idDevice);
+ if (RT_SUCCESS(rc))
+ {
+ DTE_T Dte;
+ rc = iommuAmdDteRead(pDevIns, idDevice, IOMMUOP_TRANSLATE_REQ, &Dte);
+ if (RT_SUCCESS(rc))
+ {
+ pHlp->pfnPrintf(pHlp, "DTE for device %#x\n", idDevice);
+ iommuAmdR3DbgInfoDteWorker(pHlp, &Dte, " ");
+ return;
+ }
+ pHlp->pfnPrintf(pHlp, "Failed to read DTE for device ID %u (%#x). rc=%Rrc\n", idDevice, idDevice, rc);
+ }
+ else
+ pHlp->pfnPrintf(pHlp, "Failed to parse a valid 16-bit device ID. rc=%Rrc\n", rc);
+ }
+ else
+ pHlp->pfnPrintf(pHlp, "Missing device ID.\n");
+}
+
+
+# ifdef IOMMU_WITH_DTE_CACHE
+/**
+ * @callback_method_impl{FNDBGFHANDLERDEV}
+ */
+static DECLCALLBACK(void) iommuAmdR3DbgInfoDteCache(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs)
+{
+ RT_NOREF(pszArgs);
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ IOMMU_CACHE_LOCK(pDevIns, pThis);
+
+ uint16_t const cDteCache = RT_ELEMENTS(pThis->aDeviceIds);
+ pHlp->pfnPrintf(pHlp, "DTE Cache: Capacity=%u entries\n", cDteCache);
+ for (uint16_t i = 0; i < cDteCache; i++)
+ {
+ uint16_t const idDevice = pThis->aDeviceIds[i];
+ if (idDevice)
+ {
+ pHlp->pfnPrintf(pHlp, " Entry[%u]: Device=%#x (BDF %02x:%02x.%d)\n", i, idDevice,
+ (idDevice >> VBOX_PCI_BUS_SHIFT) & VBOX_PCI_BUS_MASK,
+ (idDevice >> VBOX_PCI_DEVFN_DEV_SHIFT) & VBOX_PCI_DEVFN_DEV_MASK,
+ idDevice & VBOX_PCI_DEVFN_FUN_MASK);
+
+ PCDTECACHE pDteCache = &pThis->aDteCache[i];
+ pHlp->pfnPrintf(pHlp, " Flags = %#x\n", pDteCache->fFlags);
+ pHlp->pfnPrintf(pHlp, " Domain Id = %u\n", pDteCache->idDomain);
+ pHlp->pfnPrintf(pHlp, "\n");
+ }
+ }
+ IOMMU_CACHE_UNLOCK(pDevIns, pThis);
+}
+# endif /* IOMMU_WITH_DTE_CACHE */
+
+
+# ifdef IOMMU_WITH_IOTLBE_CACHE
+/**
+ * @callback_method_impl{FNDBGFHANDLERDEV}
+ */
+static DECLCALLBACK(void) iommuAmdR3DbgInfoIotlb(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs)
+{
+ if (pszArgs)
+ {
+ uint16_t idDomain = 0;
+ int rc = RTStrToUInt16Full(pszArgs, 0 /* uBase */, &idDomain);
+ if (RT_SUCCESS(rc))
+ {
+ pHlp->pfnPrintf(pHlp, "IOTLBEs for domain %u (%#x):\n", idDomain, idDomain);
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
+ IOTLBEINFOARG Args;
+ Args.pIommuR3 = pThisR3;
+ Args.pHlp = pHlp;
+ Args.idDomain = idDomain;
+
+ IOMMU_CACHE_LOCK(pDevIns, pThis);
+ RTAvlU64DoWithAll(&pThisR3->TreeIotlbe, true /* fFromLeft */, iommuAmdR3IotlbEntryInfo, &Args);
+ IOMMU_CACHE_UNLOCK(pDevIns, pThis);
+ }
+ else
+ pHlp->pfnPrintf(pHlp, "Failed to parse a valid 16-bit domain ID. rc=%Rrc\n", rc);
+ }
+ else
+ pHlp->pfnPrintf(pHlp, "Missing domain ID.\n");
+}
+# endif /* IOMMU_WITH_IOTLBE_CACHE */
+
+
+# ifdef IOMMU_WITH_IRTE_CACHE
+/**
+ * Gets the interrupt type name for an interrupt type in the IRTE.
+ *
+ * @returns The interrupt type name.
+ * @param uIntrType The interrupt type (as specified in the IRTE).
+ */
+static const char *iommuAmdIrteGetIntrTypeName(uint8_t uIntrType)
+{
+ switch (uIntrType)
+ {
+ case VBOX_MSI_DELIVERY_MODE_FIXED: return "Fixed";
+ case VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO: return "Arbitrated";
+ default: return "<Reserved>";
+ }
+}
+
+
+/**
+ * @callback_method_impl{FNDBGFHANDLERDEV}
+ */
+static DECLCALLBACK(void) iommuAmdR3DbgInfoIrteCache(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs)
+{
+ RT_NOREF(pszArgs);
+
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ IOMMU_CACHE_LOCK(pDevIns, pThis);
+
+ uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache);
+ pHlp->pfnPrintf(pHlp, "IRTE Cache: Capacity=%u entries\n", cIrteCache);
+ for (uint16_t idxIrte = 0; idxIrte < cIrteCache; idxIrte++)
+ {
+ PCIRTECACHE pIrteCache = &pThis->aIrteCache[idxIrte];
+ uint32_t const uKey = pIrteCache->uKey;
+ if (uKey != IOMMU_IRTE_CACHE_KEY_NIL)
+ {
+ uint16_t const idDevice = IOMMU_IRTE_CACHE_KEY_GET_DEVICE_ID(uKey);
+ uint16_t const offIrte = IOMMU_IRTE_CACHE_KEY_GET_OFF(uKey);
+ pHlp->pfnPrintf(pHlp, " Entry[%u]: Offset=%#x Device=%#x (BDF %02x:%02x.%d)\n",
+ idxIrte, offIrte, idDevice,
+ (idDevice >> VBOX_PCI_BUS_SHIFT) & VBOX_PCI_BUS_MASK,
+ (idDevice >> VBOX_PCI_DEVFN_DEV_SHIFT) & VBOX_PCI_DEVFN_DEV_MASK,
+ idDevice & VBOX_PCI_DEVFN_FUN_MASK);
+
+ PCIRTE_T pIrte = &pIrteCache->Irte;
+ pHlp->pfnPrintf(pHlp, " Remap Enable = %RTbool\n", pIrte->n.u1RemapEnable);
+ pHlp->pfnPrintf(pHlp, " Suppress IOPF = %RTbool\n", pIrte->n.u1SuppressIoPf);
+ pHlp->pfnPrintf(pHlp, " Interrupt Type = %#x (%s)\n", pIrte->n.u3IntrType,
+ iommuAmdIrteGetIntrTypeName(pIrte->n.u3IntrType));
+ pHlp->pfnPrintf(pHlp, " Request EOI = %RTbool\n", pIrte->n.u1ReqEoi);
+ pHlp->pfnPrintf(pHlp, " Destination mode = %s\n", pIrte->n.u1DestMode ? "Logical" : "Physical");
+ pHlp->pfnPrintf(pHlp, " Destination Id = %u\n", pIrte->n.u8Dest);
+ pHlp->pfnPrintf(pHlp, " Vector = %#x (%u)\n", pIrte->n.u8Vector, pIrte->n.u8Vector);
+ pHlp->pfnPrintf(pHlp, "\n");
+ }
+ }
+ IOMMU_CACHE_UNLOCK(pDevIns, pThis);
+}
+# endif /* IOMMU_WITH_IRTE_CACHE */
+
+
+/**
+ * @callback_method_impl{FNDBGFHANDLERDEV}
+ */
+static DECLCALLBACK(void) iommuAmdR3DbgInfoDevTabs(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs)
+{
+ RT_NOREF(pszArgs);
+
+ PCIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ PCPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
+ PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
+ NOREF(pPciDev);
+
+ uint8_t cSegments = 0;
+ for (uint8_t i = 0; i < RT_ELEMENTS(pThis->aDevTabBaseAddrs); i++)
+ {
+ DEV_TAB_BAR_T const DevTabBar = pThis->aDevTabBaseAddrs[i];
+ RTGCPHYS const GCPhysDevTab = DevTabBar.n.u40Base << X86_PAGE_4K_SHIFT;
+ if (GCPhysDevTab)
+ ++cSegments;
+ }
+
+ pHlp->pfnPrintf(pHlp, "AMD-IOMMU device tables with address translations enabled:\n");
+ pHlp->pfnPrintf(pHlp, " DTE Segments=%u\n", cSegments);
+ if (!cSegments)
+ return;
+
+ for (uint8_t i = 0; i < RT_ELEMENTS(pThis->aDevTabBaseAddrs); i++)
+ {
+ DEV_TAB_BAR_T const DevTabBar = pThis->aDevTabBaseAddrs[i];
+ RTGCPHYS const GCPhysDevTab = DevTabBar.n.u40Base << X86_PAGE_4K_SHIFT;
+ if (GCPhysDevTab)
+ {
+ uint32_t const cbDevTab = IOMMU_GET_DEV_TAB_LEN(&DevTabBar);
+ uint32_t const cDtes = cbDevTab / sizeof(DTE_T);
+
+ void *pvDevTab = RTMemAllocZ(cbDevTab);
+ if (RT_LIKELY(pvDevTab))
+ {
+ int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysDevTab, pvDevTab, cbDevTab);
+ if (RT_SUCCESS(rc))
+ {
+ for (uint32_t idxDte = 0; idxDte < cDtes; idxDte++)
+ {
+ PCDTE_T pDte = (PCDTE_T)((uintptr_t)pvDevTab + idxDte * sizeof(DTE_T));
+ if ( pDte->n.u1Valid
+ && pDte->n.u1TranslationValid
+ && pDte->n.u3Mode != 0)
+ {
+ pHlp->pfnPrintf(pHlp, " DTE %u (BDF %02x:%02x.%d)\n", idxDte,
+ (idxDte >> VBOX_PCI_BUS_SHIFT) & VBOX_PCI_BUS_MASK,
+ (idxDte >> VBOX_PCI_DEVFN_DEV_SHIFT) & VBOX_PCI_DEVFN_DEV_MASK,
+ idxDte & VBOX_PCI_DEVFN_FUN_MASK);
+ iommuAmdR3DbgInfoDteWorker(pHlp, pDte, " ");
+ pHlp->pfnPrintf(pHlp, "\n");
+ }
+ }
+ pHlp->pfnPrintf(pHlp, "\n");
+ }
+ else
+ {
+ pHlp->pfnPrintf(pHlp, " Failed to read table at %#RGp of size %zu bytes. rc=%Rrc!\n", GCPhysDevTab,
+ cbDevTab, rc);
+ }
+
+ RTMemFree(pvDevTab);
+ }
+ else
+ {
+ pHlp->pfnPrintf(pHlp, " Allocating %zu bytes for reading the device table failed!\n", cbDevTab);
+ return;
+ }
+ }
+ }
+}
+
+
+/**
+ * @callback_method_impl{FNSSMDEVSAVEEXEC}
+ */
+static DECLCALLBACK(int) iommuAmdR3SaveExec(PPDMDEVINS pDevIns, PSSMHANDLE pSSM)
+{
+ PCIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ PCPDMDEVHLPR3 pHlp = pDevIns->pHlpR3;
+ LogFlowFunc(("\n"));
+
+ /* First, save ExtFeat and other registers that cannot be modified by the guest. */
+ pHlp->pfnSSMPutU64(pSSM, pThis->ExtFeat.u64);
+ pHlp->pfnSSMPutU64(pSSM, pThis->DevSpecificFeat.u64);
+ pHlp->pfnSSMPutU64(pSSM, pThis->DevSpecificCtrl.u64);
+ pHlp->pfnSSMPutU64(pSSM, pThis->DevSpecificStatus.u64);
+ pHlp->pfnSSMPutU64(pSSM, pThis->MiscInfo.u64);
+ pHlp->pfnSSMPutU64(pSSM, pThis->RsvdReg);
+
+ /* Next, save all registers that can be modified by the guest. */
+ pHlp->pfnSSMPutU64(pSSM, pThis->IommuBar.u64);
+
+ uint8_t const cDevTabBaseAddrs = RT_ELEMENTS(pThis->aDevTabBaseAddrs);
+ pHlp->pfnSSMPutU8(pSSM, cDevTabBaseAddrs);
+ for (uint8_t i = 0; i < cDevTabBaseAddrs; i++)
+ pHlp->pfnSSMPutU64(pSSM, pThis->aDevTabBaseAddrs[i].u64);
+
+ AssertReturn(pThis->CmdBufBaseAddr.n.u4Len >= 8, VERR_IOMMU_IPE_4);
+ pHlp->pfnSSMPutU64(pSSM, pThis->CmdBufBaseAddr.u64);
+ pHlp->pfnSSMPutU64(pSSM, pThis->EvtLogBaseAddr.u64);
+ pHlp->pfnSSMPutU64(pSSM, pThis->Ctrl.u64);
+ pHlp->pfnSSMPutU64(pSSM, pThis->ExclRangeBaseAddr.u64);
+ pHlp->pfnSSMPutU64(pSSM, pThis->ExclRangeLimit.u64);
+#if 0
+ pHlp->pfnSSMPutU64(pSSM, pThis->ExtFeat.u64); /* read-only, done already (above). */
+#endif
+
+ pHlp->pfnSSMPutU64(pSSM, pThis->PprLogBaseAddr.u64);
+ pHlp->pfnSSMPutU64(pSSM, pThis->HwEvtHi.u64);
+ pHlp->pfnSSMPutU64(pSSM, pThis->HwEvtLo);
+ pHlp->pfnSSMPutU64(pSSM, pThis->HwEvtStatus.u64);
+
+ pHlp->pfnSSMPutU64(pSSM, pThis->GALogBaseAddr.u64);
+ pHlp->pfnSSMPutU64(pSSM, pThis->GALogTailAddr.u64);
+
+ pHlp->pfnSSMPutU64(pSSM, pThis->PprLogBBaseAddr.u64);
+ pHlp->pfnSSMPutU64(pSSM, pThis->EvtLogBBaseAddr.u64);
+
+#if 0
+ pHlp->pfnSSMPutU64(pSSM, pThis->DevSpecificFeat.u64); /* read-only, done already (above). */
+ pHlp->pfnSSMPutU64(pSSM, pThis->DevSpecificCtrl.u64); /* read-only, done already (above). */
+ pHlp->pfnSSMPutU64(pSSM, pThis->DevSpecificStatus.u64); /* read-only, done already (above). */
+
+ pHlp->pfnSSMPutU64(pSSM, pThis->MiscInfo.u64); /* read-only, done already (above). */
+#endif
+ pHlp->pfnSSMPutU32(pSSM, pThis->PerfOptCtrl.u32);
+
+ pHlp->pfnSSMPutU64(pSSM, pThis->XtGenIntrCtrl.u64);
+ pHlp->pfnSSMPutU64(pSSM, pThis->XtPprIntrCtrl.u64);
+ pHlp->pfnSSMPutU64(pSSM, pThis->XtGALogIntrCtrl.u64);
+
+ size_t const cMarcApers = RT_ELEMENTS(pThis->aMarcApers);
+ pHlp->pfnSSMPutU8(pSSM, cMarcApers);
+ for (size_t i = 0; i < cMarcApers; i++)
+ {
+ pHlp->pfnSSMPutU64(pSSM, pThis->aMarcApers[i].Base.u64);
+ pHlp->pfnSSMPutU64(pSSM, pThis->aMarcApers[i].Reloc.u64);
+ pHlp->pfnSSMPutU64(pSSM, pThis->aMarcApers[i].Length.u64);
+ }
+
+#if 0
+ pHlp->pfnSSMPutU64(pSSM, pThis->RsvdReg); /* read-only, done already (above). */
+#endif
+
+ pHlp->pfnSSMPutU64(pSSM, pThis->CmdBufHeadPtr.u64);
+ pHlp->pfnSSMPutU64(pSSM, pThis->CmdBufTailPtr.u64);
+ pHlp->pfnSSMPutU64(pSSM, pThis->EvtLogHeadPtr.u64);
+ pHlp->pfnSSMPutU64(pSSM, pThis->EvtLogTailPtr.u64);
+
+ pHlp->pfnSSMPutU64(pSSM, pThis->Status.u64);
+
+ pHlp->pfnSSMPutU64(pSSM, pThis->PprLogHeadPtr.u64);
+ pHlp->pfnSSMPutU64(pSSM, pThis->PprLogTailPtr.u64);
+
+ pHlp->pfnSSMPutU64(pSSM, pThis->GALogHeadPtr.u64);
+ pHlp->pfnSSMPutU64(pSSM, pThis->GALogTailPtr.u64);
+
+ pHlp->pfnSSMPutU64(pSSM, pThis->PprLogBHeadPtr.u64);
+ pHlp->pfnSSMPutU64(pSSM, pThis->PprLogBTailPtr.u64);
+
+ pHlp->pfnSSMPutU64(pSSM, pThis->EvtLogBHeadPtr.u64);
+ pHlp->pfnSSMPutU64(pSSM, pThis->EvtLogBTailPtr.u64);
+
+ pHlp->pfnSSMPutU64(pSSM, pThis->PprLogAutoResp.u64);
+ pHlp->pfnSSMPutU64(pSSM, pThis->PprLogOverflowEarly.u64);
+ pHlp->pfnSSMPutU64(pSSM, pThis->PprLogBOverflowEarly.u64);
+
+ return pHlp->pfnSSMPutU32(pSSM, UINT32_MAX);
+}
+
+
+/**
+ * @callback_method_impl{FNSSMDEVLOADEXEC}
+ */
+static DECLCALLBACK(int) iommuAmdR3LoadExec(PPDMDEVINS pDevIns, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass)
+{
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ PCPDMDEVHLPR3 pHlp = pDevIns->pHlpR3;
+ int const rcErr = VERR_SSM_UNEXPECTED_DATA;
+ LogFlowFunc(("\n"));
+
+ /* Validate. */
+ AssertReturn(uPass == SSM_PASS_FINAL, VERR_WRONG_ORDER);
+ if (uVersion != IOMMU_SAVED_STATE_VERSION)
+ {
+ LogRel(("%s: Invalid saved-state version %#x\n", IOMMU_LOG_PFX, uVersion));
+ return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION;
+ }
+
+ /* Load ExtFeat and other read-only registers first. */
+ int rc = pHlp->pfnSSMGetU64(pSSM, &pThis->ExtFeat.u64);
+ AssertRCReturn(rc, rc);
+ AssertLogRelMsgReturn(pThis->ExtFeat.n.u2HostAddrTranslateSize < 0x3,
+ ("ExtFeat.HATS register invalid %#RX64\n", pThis->ExtFeat.u64), rcErr);
+ pHlp->pfnSSMGetU64(pSSM, &pThis->DevSpecificFeat.u64);
+ pHlp->pfnSSMGetU64(pSSM, &pThis->DevSpecificCtrl.u64);
+ pHlp->pfnSSMGetU64(pSSM, &pThis->DevSpecificStatus.u64);
+ pHlp->pfnSSMGetU64(pSSM, &pThis->MiscInfo.u64);
+ pHlp->pfnSSMGetU64(pSSM, &pThis->RsvdReg);
+
+ /* IOMMU base address register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->IommuBar.u64);
+ AssertRCReturn(rc, rc);
+ pThis->IommuBar.u64 &= IOMMU_BAR_VALID_MASK;
+
+ /* Device table base address registers. */
+ uint8_t cDevTabBaseAddrs;
+ rc = pHlp->pfnSSMGetU8(pSSM, &cDevTabBaseAddrs);
+ AssertRCReturn(rc, rc);
+ AssertLogRelMsgReturn(cDevTabBaseAddrs > 0 && cDevTabBaseAddrs <= RT_ELEMENTS(pThis->aDevTabBaseAddrs),
+ ("Device table segment count invalid %#x\n", cDevTabBaseAddrs), rcErr);
+ AssertCompile(RT_ELEMENTS(pThis->aDevTabBaseAddrs) == RT_ELEMENTS(g_auDevTabSegMaxSizes));
+ for (uint8_t i = 0; i < cDevTabBaseAddrs; i++)
+ {
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->aDevTabBaseAddrs[i].u64);
+ AssertRCReturn(rc, rc);
+ pThis->aDevTabBaseAddrs[i].u64 &= IOMMU_DEV_TAB_BAR_VALID_MASK;
+ uint16_t const uSegSize = pThis->aDevTabBaseAddrs[i].n.u9Size;
+ uint16_t const uMaxSegSize = g_auDevTabSegMaxSizes[i];
+ AssertLogRelMsgReturn(uSegSize <= uMaxSegSize,
+ ("Device table [%u] segment size invalid %u (max %u)\n", i, uSegSize, uMaxSegSize), rcErr);
+ }
+
+ /* Command buffer base address register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->CmdBufBaseAddr.u64);
+ AssertRCReturn(rc, rc);
+ pThis->CmdBufBaseAddr.u64 &= IOMMU_CMD_BUF_BAR_VALID_MASK;
+ AssertLogRelMsgReturn(pThis->CmdBufBaseAddr.n.u4Len >= 8,
+ ("Command buffer base address invalid %#RX64\n", pThis->CmdBufBaseAddr.u64), rcErr);
+
+ /* Event log base address register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->EvtLogBaseAddr.u64);
+ AssertRCReturn(rc, rc);
+ pThis->EvtLogBaseAddr.u64 &= IOMMU_EVT_LOG_BAR_VALID_MASK;
+ AssertLogRelMsgReturn(pThis->EvtLogBaseAddr.n.u4Len >= 8,
+ ("Event log base address invalid %#RX64\n", pThis->EvtLogBaseAddr.u64), rcErr);
+
+ /* Control register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->Ctrl.u64);
+ AssertRCReturn(rc, rc);
+ pThis->Ctrl.u64 &= IOMMU_CTRL_VALID_MASK;
+ AssertLogRelMsgReturn(pThis->Ctrl.n.u3DevTabSegEn <= pThis->ExtFeat.n.u2DevTabSegSup,
+ ("Control register invalid %#RX64\n", pThis->Ctrl.u64), rcErr);
+
+ /* Exclusion range base address register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->ExclRangeBaseAddr.u64);
+ AssertRCReturn(rc, rc);
+ pThis->ExclRangeBaseAddr.u64 &= IOMMU_EXCL_RANGE_BAR_VALID_MASK;
+
+ /* Exclusion range limit register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->ExclRangeLimit.u64);
+ AssertRCReturn(rc, rc);
+ pThis->ExclRangeLimit.u64 &= IOMMU_EXCL_RANGE_LIMIT_VALID_MASK;
+ pThis->ExclRangeLimit.u64 |= UINT64_C(0xfff);
+
+#if 0
+ pHlp->pfnSSMGetU64(pSSM, &pThis->ExtFeat.u64); /* read-only, done already (above). */
+#endif
+
+ /* PPR log base address register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogBaseAddr.u64);
+ AssertRCReturn(rc, rc);
+ Assert(!pThis->ExtFeat.n.u1PprSup);
+
+ /* Hardware event (Hi) register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->HwEvtHi.u64);
+ AssertRCReturn(rc, rc);
+
+ /* Hardware event (Lo) register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->HwEvtLo);
+ AssertRCReturn(rc, rc);
+
+ /* Hardware event status register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->HwEvtStatus.u64);
+ AssertRCReturn(rc, rc);
+ pThis->HwEvtStatus.u64 &= IOMMU_HW_EVT_STATUS_VALID_MASK;
+
+ /* Guest Virtual-APIC log base address register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->GALogBaseAddr.u64);
+ AssertRCReturn(rc, rc);
+ Assert(!pThis->ExtFeat.n.u1GstVirtApicSup);
+
+ /* Guest Virtual-APIC log tail address register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->GALogTailAddr.u64);
+ AssertRCReturn(rc, rc);
+ Assert(!pThis->ExtFeat.n.u1GstVirtApicSup);
+
+ /* PPR log-B base address register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogBBaseAddr.u64);
+ AssertRCReturn(rc, rc);
+ Assert(!pThis->ExtFeat.n.u1PprSup);
+
+ /* Event log-B base address register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->EvtLogBBaseAddr.u64);
+ AssertRCReturn(rc, rc);
+ Assert(!pThis->ExtFeat.n.u2DualPprLogSup);
+
+#if 0
+ pHlp->pfnSSMGetU64(pSSM, &pThis->DevSpecificFeat.u64); /* read-only, done already (above). */
+ pHlp->pfnSSMGetU64(pSSM, &pThis->DevSpecificCtrl.u64); /* read-only, done already (above). */
+ pHlp->pfnSSMGetU64(pSSM, &pThis->DevSpecificStatus.u64); /* read-only, done already (above). */
+
+ pHlp->pfnSSMGetU64(pSSM, &pThis->MiscInfo.u64); /* read-only, done already (above). */
+#endif
+
+ /* Performance optimization control register. */
+ rc = pHlp->pfnSSMGetU32(pSSM, &pThis->PerfOptCtrl.u32);
+ AssertRCReturn(rc, rc);
+ Assert(!pThis->ExtFeat.n.u1PerfOptSup);
+
+ /* x2APIC registers. */
+ {
+ Assert(!pThis->ExtFeat.n.u1X2ApicSup);
+
+ /* x2APIC general interrupt control register. */
+ pHlp->pfnSSMGetU64(pSSM, &pThis->XtGenIntrCtrl.u64);
+ AssertRCReturn(rc, rc);
+
+ /* x2APIC PPR interrupt control register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->XtPprIntrCtrl.u64);
+ AssertRCReturn(rc, rc);
+
+ /* x2APIC GA log interrupt control register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->XtGALogIntrCtrl.u64);
+ AssertRCReturn(rc, rc);
+ }
+
+ /* MARC (Memory Access and Routing) registers. */
+ {
+ uint8_t cMarcApers;
+ rc = pHlp->pfnSSMGetU8(pSSM, &cMarcApers);
+ AssertRCReturn(rc, rc);
+ AssertLogRelMsgReturn(cMarcApers > 0 && cMarcApers <= RT_ELEMENTS(pThis->aMarcApers),
+ ("MARC register count invalid %#x\n", cMarcApers), rcErr);
+ for (uint8_t i = 0; i < cMarcApers; i++)
+ {
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->aMarcApers[i].Base.u64);
+ AssertRCReturn(rc, rc);
+
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->aMarcApers[i].Reloc.u64);
+ AssertRCReturn(rc, rc);
+
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->aMarcApers[i].Length.u64);
+ AssertRCReturn(rc, rc);
+ }
+ Assert(!pThis->ExtFeat.n.u2MarcSup);
+ }
+
+#if 0
+ pHlp->pfnSSMGetU64(pSSM, &pThis->RsvdReg); /* read-only, done already (above). */
+#endif
+
+ /* Command buffer head pointer register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->CmdBufHeadPtr.u64);
+ AssertRCReturn(rc, rc);
+ {
+ /*
+ * IOMMU behavior is undefined when software writes a value outside the buffer length.
+ * In our emulation, since we ignore the write entirely (see iommuAmdCmdBufHeadPtr_w)
+ * we shouldn't see such values in the saved state.
+ */
+ uint32_t const offBuf = pThis->CmdBufHeadPtr.u64 & IOMMU_CMD_BUF_HEAD_PTR_VALID_MASK;
+ uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->CmdBufBaseAddr.n.u4Len);
+ Assert(cbBuf <= _512K);
+ AssertLogRelMsgReturn(offBuf < cbBuf,
+ ("Command buffer head pointer invalid %#x\n", pThis->CmdBufHeadPtr.u64), rcErr);
+ }
+
+ /* Command buffer tail pointer register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->CmdBufTailPtr.u64);
+ AssertRCReturn(rc, rc);
+ {
+ uint32_t const offBuf = pThis->CmdBufTailPtr.u64 & IOMMU_CMD_BUF_TAIL_PTR_VALID_MASK;
+ uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->CmdBufBaseAddr.n.u4Len);
+ Assert(cbBuf <= _512K);
+ AssertLogRelMsgReturn(offBuf < cbBuf,
+ ("Command buffer tail pointer invalid %#x\n", pThis->CmdBufTailPtr.u64), rcErr);
+ }
+
+ /* Event log head pointer register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->EvtLogHeadPtr.u64);
+ AssertRCReturn(rc, rc);
+ {
+ uint32_t const offBuf = pThis->EvtLogHeadPtr.u64 & IOMMU_EVT_LOG_HEAD_PTR_VALID_MASK;
+ uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->EvtLogBaseAddr.n.u4Len);
+ Assert(cbBuf <= _512K);
+ AssertLogRelMsgReturn(offBuf < cbBuf,
+ ("Event log head pointer invalid %#x\n", pThis->EvtLogHeadPtr.u64), rcErr);
+ }
+
+ /* Event log tail pointer register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->EvtLogTailPtr.u64);
+ AssertRCReturn(rc, rc);
+ {
+ uint32_t const offBuf = pThis->EvtLogTailPtr.u64 & IOMMU_EVT_LOG_TAIL_PTR_VALID_MASK;
+ uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->EvtLogBaseAddr.n.u4Len);
+ Assert(cbBuf <= _512K);
+ AssertLogRelMsgReturn(offBuf < cbBuf,
+ ("Event log tail pointer invalid %#x\n", pThis->EvtLogTailPtr.u64), rcErr);
+ }
+
+ /* Status register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->Status.u64);
+ AssertRCReturn(rc, rc);
+ pThis->Status.u64 &= IOMMU_STATUS_VALID_MASK;
+
+ /* PPR log head pointer register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogHeadPtr.u64);
+ AssertRCReturn(rc, rc);
+ Assert(!pThis->ExtFeat.n.u1PprSup);
+
+ /* PPR log tail pointer register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogTailPtr.u64);
+ AssertRCReturn(rc, rc);
+ Assert(!pThis->ExtFeat.n.u1PprSup);
+
+ /* Guest Virtual-APIC log head pointer register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->GALogHeadPtr.u64);
+ AssertRCReturn(rc, rc);
+ Assert(!pThis->ExtFeat.n.u1GstVirtApicSup);
+
+ /* Guest Virtual-APIC log tail pointer register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->GALogTailPtr.u64);
+ AssertRCReturn(rc, rc);
+ Assert(!pThis->ExtFeat.n.u1GstVirtApicSup);
+
+ /* PPR log-B head pointer register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogBHeadPtr.u64);
+ AssertRCReturn(rc, rc);
+ Assert(!pThis->ExtFeat.n.u1PprSup);
+
+ /* PPR log-B head pointer register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogBTailPtr.u64);
+ AssertRCReturn(rc, rc);
+ Assert(!pThis->ExtFeat.n.u1PprSup);
+
+ /* Event log-B head pointer register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->EvtLogBHeadPtr.u64);
+ AssertRCReturn(rc, rc);
+ Assert(!pThis->ExtFeat.n.u2DualEvtLogSup);
+
+ /* Event log-B tail pointer register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->EvtLogBTailPtr.u64);
+ AssertRCReturn(rc, rc);
+ Assert(!pThis->ExtFeat.n.u2DualEvtLogSup);
+
+ /* PPR log auto response register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogAutoResp.u64);
+ AssertRCReturn(rc, rc);
+ Assert(!pThis->ExtFeat.n.u1PprAutoRespSup);
+
+ /* PPR log overflow early indicator register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogOverflowEarly.u64);
+ AssertRCReturn(rc, rc);
+ Assert(!pThis->ExtFeat.n.u1PprLogOverflowWarn);
+
+ /* PPR log-B overflow early indicator register. */
+ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogBOverflowEarly.u64);
+ AssertRCReturn(rc, rc);
+ Assert(!pThis->ExtFeat.n.u1PprLogOverflowWarn);
+
+ /* End marker. */
+ {
+ uint32_t uEndMarker;
+ rc = pHlp->pfnSSMGetU32(pSSM, &uEndMarker);
+ AssertLogRelMsgRCReturn(rc, ("Failed to read end marker. rc=%Rrc\n", rc), VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
+ AssertLogRelMsgReturn(uEndMarker == UINT32_MAX, ("End marker invalid (%#x expected %#x)\n", uEndMarker, UINT32_MAX),
+ rcErr);
+ }
+
+ return rc;
+}
+
+
+/**
+ * @callback_method_impl{FNSSMDEVLOADDONE}
+ */
+static DECLCALLBACK(int) iommuAmdR3LoadDone(PPDMDEVINS pDevIns, PSSMHANDLE pSSM)
+{
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
+ RT_NOREF(pSSM);
+ LogFlowFunc(("\n"));
+
+ /* Sanity. */
+ AssertPtrReturn(pThis, VERR_INVALID_POINTER);
+ AssertPtrReturn(pThisR3, VERR_INVALID_POINTER);
+
+ int rc;
+ IOMMU_LOCK(pDevIns, pThisR3);
+
+ /* Map MMIO regions if the IOMMU BAR is enabled. */
+ if (pThis->IommuBar.n.u1Enable)
+ rc = iommuAmdR3MmioSetup(pDevIns);
+ else
+ rc = VINF_SUCCESS;
+
+ /* Wake up the command thread if commands need processing. */
+ iommuAmdCmdThreadWakeUpIfNeeded(pDevIns);
+
+ IOMMU_UNLOCK(pDevIns, pThisR3);
+
+ LogRel(("%s: Restored: DSFX=%u.%u DSCX=%u.%u DSSX=%u.%u ExtFeat=%#RX64\n", IOMMU_LOG_PFX,
+ pThis->DevSpecificFeat.n.u4RevMajor, pThis->DevSpecificFeat.n.u4RevMinor,
+ pThis->DevSpecificCtrl.n.u4RevMajor, pThis->DevSpecificCtrl.n.u4RevMinor,
+ pThis->DevSpecificStatus.n.u4RevMajor, pThis->DevSpecificStatus.n.u4RevMinor,
+ pThis->ExtFeat.u64));
+ return rc;
+}
+
+
+/**
+ * @interface_method_impl{PDMDEVREG,pfnReset}
+ */
+static DECLCALLBACK(void) iommuAmdR3Reset(PPDMDEVINS pDevIns)
+{
+ /*
+ * Resets read-write portion of the IOMMU state.
+ *
+ * NOTE! State not initialized here is expected to be initialized during
+ * device construction and remain read-only through the lifetime of the VM.
+ */
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
+ PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
+ PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
+ LogFlowFunc(("\n"));
+
+ IOMMU_LOCK(pDevIns, pThisR3);
+
+ RT_ZERO(pThis->aDevTabBaseAddrs);
+
+ pThis->CmdBufBaseAddr.u64 = 0;
+ pThis->CmdBufBaseAddr.n.u4Len = 8;
+
+ pThis->EvtLogBaseAddr.u64 = 0;
+ pThis->EvtLogBaseAddr.n.u4Len = 8;
+
+ pThis->Ctrl.u64 = 0;
+ pThis->Ctrl.n.u1Coherent = 1;
+ Assert(!pThis->ExtFeat.n.u1BlockStopMarkSup);
+
+ pThis->ExclRangeBaseAddr.u64 = 0;
+ pThis->ExclRangeLimit.u64 = 0;
+
+ pThis->PprLogBaseAddr.u64 = 0;
+ pThis->PprLogBaseAddr.n.u4Len = 8;
+
+ pThis->HwEvtHi.u64 = 0;
+ pThis->HwEvtLo = 0;
+ pThis->HwEvtStatus.u64 = 0;
+
+ pThis->GALogBaseAddr.u64 = 0;
+ pThis->GALogBaseAddr.n.u4Len = 8;
+ pThis->GALogTailAddr.u64 = 0;
+
+ pThis->PprLogBBaseAddr.u64 = 0;
+ pThis->PprLogBBaseAddr.n.u4Len = 8;
+
+ pThis->EvtLogBBaseAddr.u64 = 0;
+ pThis->EvtLogBBaseAddr.n.u4Len = 8;
+
+ pThis->PerfOptCtrl.u32 = 0;
+
+ pThis->XtGenIntrCtrl.u64 = 0;
+ pThis->XtPprIntrCtrl.u64 = 0;
+ pThis->XtGALogIntrCtrl.u64 = 0;
+
+ RT_ZERO(pThis->aMarcApers);
+
+ pThis->CmdBufHeadPtr.u64 = 0;
+ pThis->CmdBufTailPtr.u64 = 0;
+ pThis->EvtLogHeadPtr.u64 = 0;
+ pThis->EvtLogTailPtr.u64 = 0;
+
+ pThis->Status.u64 = 0;
+
+ pThis->PprLogHeadPtr.u64 = 0;
+ pThis->PprLogTailPtr.u64 = 0;
+
+ pThis->GALogHeadPtr.u64 = 0;
+ pThis->GALogTailPtr.u64 = 0;
+
+ pThis->PprLogBHeadPtr.u64 = 0;
+ pThis->PprLogBTailPtr.u64 = 0;
+
+ pThis->EvtLogBHeadPtr.u64 = 0;
+ pThis->EvtLogBTailPtr.u64 = 0;
+
+ pThis->PprLogAutoResp.u64 = 0;
+ pThis->PprLogOverflowEarly.u64 = 0;
+ pThis->PprLogBOverflowEarly.u64 = 0;
+
+ pThis->IommuBar.u64 = 0;
+ PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_BASE_ADDR_REG_LO, 0);
+ PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_BASE_ADDR_REG_HI, 0);
+
+ PDMPciDevSetCommand(pPciDev, VBOX_PCI_COMMAND_MASTER);
+
+ IOMMU_UNLOCK(pDevIns, pThisR3);
+
+#ifdef IOMMU_WITH_DTE_CACHE
+ iommuAmdDteCacheRemoveAll(pDevIns);
+#endif
+#ifdef IOMMU_WITH_IOTLBE_CACHE
+ iommuAmdIotlbRemoveAll(pDevIns);
+#endif
+#ifdef IOMMU_WITH_IRTE_CACHE
+ iommuAmdIrteCacheRemoveAll(pDevIns);
+#endif
+}
+
+
+/**
+ * @interface_method_impl{PDMDEVREG,pfnDestruct}
+ */
+static DECLCALLBACK(int) iommuAmdR3Destruct(PPDMDEVINS pDevIns)
+{
+ PDMDEV_CHECK_VERSIONS_RETURN_QUIET(pDevIns);
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
+ LogFlowFunc(("\n"));
+
+ IOMMU_LOCK(pDevIns, pThisR3);
+
+ if (pThis->hEvtCmdThread != NIL_SUPSEMEVENT)
+ {
+ PDMDevHlpSUPSemEventClose(pDevIns, pThis->hEvtCmdThread);
+ pThis->hEvtCmdThread = NIL_SUPSEMEVENT;
+ }
+
+#ifdef IOMMU_WITH_IOTLBE_CACHE
+ if (pThisR3->paIotlbes)
+ {
+ PDMDevHlpMMHeapFree(pDevIns, pThisR3->paIotlbes);
+ pThisR3->paIotlbes = NULL;
+ pThisR3->idxUnusedIotlbe = 0;
+ }
+#endif
+
+ IOMMU_UNLOCK(pDevIns, pThisR3);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * @interface_method_impl{PDMDEVREG,pfnConstruct}
+ */
+static DECLCALLBACK(int) iommuAmdR3Construct(PPDMDEVINS pDevIns, int iInstance, PCFGMNODE pCfg)
+{
+ PDMDEV_CHECK_VERSIONS_RETURN(pDevIns);
+
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
+ PCPDMDEVHLPR3 pHlp = pDevIns->pHlpR3;
+
+ pThis->u32Magic = IOMMU_MAGIC;
+ pThisR3->pDevInsR3 = pDevIns;
+
+ LogFlowFunc(("iInstance=%d\n", iInstance));
+
+ /*
+ * Validate and read the configuration.
+ */
+ PDMDEV_VALIDATE_CONFIG_RETURN(pDevIns, "PCIAddress", "");
+ int rc = pHlp->pfnCFGMQueryU32Def(pCfg, "PCIAddress", &pThis->uPciAddress, NIL_PCIBDF);
+ if (RT_FAILURE(rc))
+ return PDMDEV_SET_ERROR(pDevIns, rc, N_("Configuration error: Failed to query 32-bit integer \"PCIAddress\""));
+ if (!PCIBDF_IS_VALID(pThis->uPciAddress))
+ return PDMDEV_SET_ERROR(pDevIns, rc, N_("Configuration error: Failed \"PCIAddress\" of the AMD IOMMU cannot be invalid"));
+
+ /*
+ * Register the IOMMU with PDM.
+ */
+ PDMIOMMUREGR3 IommuReg;
+ RT_ZERO(IommuReg);
+ IommuReg.u32Version = PDM_IOMMUREGCC_VERSION;
+ IommuReg.pfnMemAccess = iommuAmdMemAccess;
+ IommuReg.pfnMemBulkAccess = iommuAmdMemBulkAccess;
+ IommuReg.pfnMsiRemap = iommuAmdMsiRemap;
+ IommuReg.u32TheEnd = PDM_IOMMUREGCC_VERSION;
+ rc = PDMDevHlpIommuRegister(pDevIns, &IommuReg, &pThisR3->CTX_SUFF(pIommuHlp), &pThis->idxIommu);
+ if (RT_FAILURE(rc))
+ return PDMDEV_SET_ERROR(pDevIns, rc, N_("Failed to register ourselves as an IOMMU device"));
+ if (pThisR3->CTX_SUFF(pIommuHlp)->u32Version != PDM_IOMMUHLPR3_VERSION)
+ return PDMDevHlpVMSetError(pDevIns, VERR_VERSION_MISMATCH, RT_SRC_POS,
+ N_("IOMMU helper version mismatch; got %#x expected %#x"),
+ pThisR3->CTX_SUFF(pIommuHlp)->u32Version, PDM_IOMMUHLPR3_VERSION);
+ if (pThisR3->CTX_SUFF(pIommuHlp)->u32TheEnd != PDM_IOMMUHLPR3_VERSION)
+ return PDMDevHlpVMSetError(pDevIns, VERR_VERSION_MISMATCH, RT_SRC_POS,
+ N_("IOMMU helper end-version mismatch; got %#x expected %#x"),
+ pThisR3->CTX_SUFF(pIommuHlp)->u32TheEnd, PDM_IOMMUHLPR3_VERSION);
+ AssertPtr(pThisR3->pIommuHlpR3->pfnLock);
+ AssertPtr(pThisR3->pIommuHlpR3->pfnUnlock);
+ AssertPtr(pThisR3->pIommuHlpR3->pfnLockIsOwner);
+ AssertPtr(pThisR3->pIommuHlpR3->pfnSendMsi);
+
+ /*
+ * We will use PDM's critical section (via helpers) for the IOMMU device.
+ */
+ rc = PDMDevHlpSetDeviceCritSect(pDevIns, PDMDevHlpCritSectGetNop(pDevIns));
+ AssertRCReturn(rc, rc);
+
+ /*
+ * Initialize read-only PCI configuration space.
+ */
+ PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
+ PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
+
+ /* Header. */
+ PDMPciDevSetVendorId(pPciDev, IOMMU_PCI_VENDOR_ID); /* AMD */
+ PDMPciDevSetDeviceId(pPciDev, IOMMU_PCI_DEVICE_ID); /* VirtualBox IOMMU device */
+ PDMPciDevSetCommand(pPciDev, VBOX_PCI_COMMAND_MASTER); /* Enable bus master (as we directly access main memory) */
+ PDMPciDevSetStatus(pPciDev, VBOX_PCI_STATUS_CAP_LIST); /* Capability list supported */
+ PDMPciDevSetRevisionId(pPciDev, IOMMU_PCI_REVISION_ID); /* VirtualBox specific device implementation revision */
+ PDMPciDevSetClassBase(pPciDev, VBOX_PCI_CLASS_SYSTEM); /* System Base Peripheral */
+ PDMPciDevSetClassSub(pPciDev, VBOX_PCI_SUB_SYSTEM_IOMMU); /* IOMMU */
+ PDMPciDevSetClassProg(pPciDev, 0x0); /* IOMMU Programming interface */
+ PDMPciDevSetHeaderType(pPciDev, 0x0); /* Single function, type 0 */
+ PDMPciDevSetSubSystemId(pPciDev, IOMMU_PCI_DEVICE_ID); /* AMD */
+ PDMPciDevSetSubSystemVendorId(pPciDev, IOMMU_PCI_VENDOR_ID); /* VirtualBox IOMMU device */
+ PDMPciDevSetCapabilityList(pPciDev, IOMMU_PCI_OFF_CAP_HDR); /* Offset into capability registers */
+ PDMPciDevSetInterruptPin(pPciDev, 0x1); /* INTA#. */
+ PDMPciDevSetInterruptLine(pPciDev, 0x0); /* For software compatibility; no effect on hardware */
+
+ /* Capability Header. */
+ /* NOTE! Fields (e.g, EFR) must match what we expose in the ACPI tables. */
+ PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_CAP_HDR,
+ RT_BF_MAKE(IOMMU_BF_CAPHDR_CAP_ID, 0xf) /* RO - Secure Device capability block */
+ | RT_BF_MAKE(IOMMU_BF_CAPHDR_CAP_PTR, IOMMU_PCI_OFF_MSI_CAP_HDR) /* RO - Next capability offset */
+ | RT_BF_MAKE(IOMMU_BF_CAPHDR_CAP_TYPE, 0x3) /* RO - IOMMU capability block */
+ | RT_BF_MAKE(IOMMU_BF_CAPHDR_CAP_REV, 0x1) /* RO - IOMMU interface revision */
+ | RT_BF_MAKE(IOMMU_BF_CAPHDR_IOTLB_SUP, 0x0) /* RO - Remote IOTLB support */
+ | RT_BF_MAKE(IOMMU_BF_CAPHDR_HT_TUNNEL, 0x0) /* RO - HyperTransport Tunnel support */
+ | RT_BF_MAKE(IOMMU_BF_CAPHDR_NP_CACHE, 0x0) /* RO - Cache NP page table entries */
+ | RT_BF_MAKE(IOMMU_BF_CAPHDR_EFR_SUP, 0x1) /* RO - Extended Feature Register support */
+ | RT_BF_MAKE(IOMMU_BF_CAPHDR_CAP_EXT, 0x1)); /* RO - Misc. Information Register support */
+
+ /* Base Address Register. */
+ PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_BASE_ADDR_REG_LO, 0x0); /* RW - Base address (Lo) and enable bit */
+ PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_BASE_ADDR_REG_HI, 0x0); /* RW - Base address (Hi) */
+
+ /* IOMMU Range Register. */
+ PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_RANGE_REG, 0x0); /* RW - Range register (implemented as RO by us) */
+
+ /* Misc. Information Register. */
+ /* NOTE! Fields (e.g, GVA size) must match what we expose in the ACPI tables. */
+ uint32_t const uMiscInfoReg0 = RT_BF_MAKE(IOMMU_BF_MISCINFO_0_MSI_NUM, 0) /* RO - MSI number */
+ | RT_BF_MAKE(IOMMU_BF_MISCINFO_0_GVA_SIZE, 2) /* RO - Guest Virt. Addr size (2=48 bits) */
+ | RT_BF_MAKE(IOMMU_BF_MISCINFO_0_PA_SIZE, 48) /* RO - Physical Addr size (48 bits) */
+ | RT_BF_MAKE(IOMMU_BF_MISCINFO_0_VA_SIZE, 64) /* RO - Virt. Addr size (64 bits) */
+ | RT_BF_MAKE(IOMMU_BF_MISCINFO_0_HT_ATS_RESV, 0) /* RW - HT ATS reserved */
+ | RT_BF_MAKE(IOMMU_BF_MISCINFO_0_MSI_NUM_PPR, 0); /* RW - PPR interrupt number */
+ uint32_t const uMiscInfoReg1 = 0;
+ PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MISCINFO_REG_0, uMiscInfoReg0);
+ PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MISCINFO_REG_1, uMiscInfoReg1);
+
+ /* MSI Capability Header register. */
+ PDMMSIREG MsiReg;
+ RT_ZERO(MsiReg);
+ MsiReg.cMsiVectors = 1;
+ MsiReg.iMsiCapOffset = IOMMU_PCI_OFF_MSI_CAP_HDR;
+ MsiReg.iMsiNextOffset = 0; /* IOMMU_PCI_OFF_MSI_MAP_CAP_HDR */
+ MsiReg.fMsi64bit = 1; /* 64-bit addressing support is mandatory; See AMD IOMMU spec. 2.8 "IOMMU Interrupt Support". */
+
+ /* MSI Address (Lo, Hi) and MSI data are read-write PCI config registers handled by our generic PCI config space code. */
+#if 0
+ /* MSI Address Lo. */
+ PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_LO, 0); /* RW - MSI message address (Lo) */
+ /* MSI Address Hi. */
+ PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_HI, 0); /* RW - MSI message address (Hi) */
+ /* MSI Data. */
+ PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_DATA, 0); /* RW - MSI data */
+#endif
+
+#if 0
+ /** @todo IOMMU: I don't know if we need to support this, enable later if
+ * required. */
+ /* MSI Mapping Capability Header register. */
+ PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_MAP_CAP_HDR,
+ RT_BF_MAKE(IOMMU_BF_MSI_MAP_CAPHDR_CAP_ID, 0x8) /* RO - Capability ID */
+ | RT_BF_MAKE(IOMMU_BF_MSI_MAP_CAPHDR_CAP_PTR, 0x0) /* RO - Offset to next capability (NULL) */
+ | RT_BF_MAKE(IOMMU_BF_MSI_MAP_CAPHDR_EN, 0x1) /* RO - MSI mapping capability enable */
+ | RT_BF_MAKE(IOMMU_BF_MSI_MAP_CAPHDR_FIXED, 0x1) /* RO - MSI mapping range is fixed */
+ | RT_BF_MAKE(IOMMU_BF_MSI_MAP_CAPHDR_CAP_TYPE, 0x15)); /* RO - MSI mapping capability */
+ /* When implementing don't forget to copy this to its MMIO shadow register (MsiMapCapHdr) in iommuAmdR3Init. */
+#endif
+
+ /*
+ * Register the PCI function with PDM.
+ */
+ rc = PDMDevHlpPCIRegister(pDevIns, pPciDev);
+ AssertLogRelRCReturn(rc, rc);
+
+ /*
+ * Register MSI support for the PCI device.
+ * This must be done -after- registering it as a PCI device!
+ */
+ rc = PDMDevHlpPCIRegisterMsi(pDevIns, &MsiReg);
+ AssertRCReturn(rc, rc);
+
+ /*
+ * Intercept PCI config. space accesses.
+ */
+ rc = PDMDevHlpPCIInterceptConfigAccesses(pDevIns, pPciDev, iommuAmdR3PciConfigRead, iommuAmdR3PciConfigWrite);
+ AssertLogRelRCReturn(rc, rc);
+
+ /*
+ * Create the MMIO region.
+ * Mapping of the region is done when software configures it via PCI config space.
+ */
+ rc = PDMDevHlpMmioCreate(pDevIns, IOMMU_MMIO_REGION_SIZE, pPciDev, 0 /* iPciRegion */, iommuAmdMmioWrite, iommuAmdMmioRead,
+ NULL /* pvUser */,
+ IOMMMIO_FLAGS_READ_DWORD_QWORD
+ | IOMMMIO_FLAGS_WRITE_DWORD_QWORD_READ_MISSING
+ | IOMMMIO_FLAGS_DBGSTOP_ON_COMPLICATED_READ
+ | IOMMMIO_FLAGS_DBGSTOP_ON_COMPLICATED_WRITE,
+ "AMD-IOMMU", &pThis->hMmio);
+ AssertLogRelRCReturn(rc, rc);
+
+ /*
+ * Register saved state handlers.
+ */
+ rc = PDMDevHlpSSMRegisterEx(pDevIns, IOMMU_SAVED_STATE_VERSION, sizeof(IOMMU), NULL /* pszBefore */,
+ NULL /* pfnLivePrep */, NULL /* pfnLiveExec */, NULL /* pfnLiveVote */,
+ NULL /* pfnSavePrep */, iommuAmdR3SaveExec, NULL /* pfnSaveDone */,
+ NULL /* pfnLoadPrep */, iommuAmdR3LoadExec, iommuAmdR3LoadDone);
+ AssertLogRelRCReturn(rc, rc);
+
+ /*
+ * Register debugger info items.
+ */
+ PDMDevHlpDBGFInfoRegister(pDevIns, "iommu", "Display IOMMU state.", iommuAmdR3DbgInfo);
+ PDMDevHlpDBGFInfoRegister(pDevIns, "iommudte", "Display the DTE for a device (from memory). Arguments: DeviceID.", iommuAmdR3DbgInfoDte);
+ PDMDevHlpDBGFInfoRegister(pDevIns, "iommudevtabs", "Display I/O device tables with translation enabled.", iommuAmdR3DbgInfoDevTabs);
+#ifdef IOMMU_WITH_IOTLBE_CACHE
+ PDMDevHlpDBGFInfoRegister(pDevIns, "iommutlb", "Display IOTLBs for a domain. Arguments: DomainID.", iommuAmdR3DbgInfoIotlb);
+#endif
+#ifdef IOMMU_WITH_DTE_CACHE
+ PDMDevHlpDBGFInfoRegister(pDevIns, "iommudtecache", "Display the DTE cache.", iommuAmdR3DbgInfoDteCache);
+#endif
+#ifdef IOMMU_WITH_IRTE_CACHE
+ PDMDevHlpDBGFInfoRegister(pDevIns, "iommuirtecache", "Display the IRTE cache.", iommuAmdR3DbgInfoIrteCache);
+#endif
+
+# ifdef VBOX_WITH_STATISTICS
+ /*
+ * Statistics.
+ */
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMmioReadR3, STAMTYPE_COUNTER, "R3/MmioRead", STAMUNIT_OCCURENCES, "Number of MMIO reads in R3");
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMmioReadRZ, STAMTYPE_COUNTER, "RZ/MmioRead", STAMUNIT_OCCURENCES, "Number of MMIO reads in RZ.");
+
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMmioWriteR3, STAMTYPE_COUNTER, "R3/MmioWrite", STAMUNIT_OCCURENCES, "Number of MMIO writes in R3.");
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMmioWriteRZ, STAMTYPE_COUNTER, "RZ/MmioWrite", STAMUNIT_OCCURENCES, "Number of MMIO writes in RZ.");
+
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMsiRemapR3, STAMTYPE_COUNTER, "R3/MsiRemap", STAMUNIT_OCCURENCES, "Number of interrupt remap requests in R3.");
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMsiRemapRZ, STAMTYPE_COUNTER, "RZ/MsiRemap", STAMUNIT_OCCURENCES, "Number of interrupt remap requests in RZ.");
+
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemReadR3, STAMTYPE_COUNTER, "R3/MemRead", STAMUNIT_OCCURENCES, "Number of memory read translation requests in R3.");
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemReadRZ, STAMTYPE_COUNTER, "RZ/MemRead", STAMUNIT_OCCURENCES, "Number of memory read translation requests in RZ.");
+
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemWriteR3, STAMTYPE_COUNTER, "R3/MemWrite", STAMUNIT_OCCURENCES, "Number of memory write translation requests in R3.");
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemWriteRZ, STAMTYPE_COUNTER, "RZ/MemWrite", STAMUNIT_OCCURENCES, "Number of memory write translation requests in RZ.");
+
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemBulkReadR3, STAMTYPE_COUNTER, "R3/MemBulkRead", STAMUNIT_OCCURENCES, "Number of memory bulk read translation requests in R3.");
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemBulkReadRZ, STAMTYPE_COUNTER, "RZ/MemBulkRead", STAMUNIT_OCCURENCES, "Number of memory bulk read translation requests in RZ.");
+
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemBulkWriteR3, STAMTYPE_COUNTER, "R3/MemBulkWrite", STAMUNIT_OCCURENCES, "Number of memory bulk write translation requests in R3.");
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemBulkWriteRZ, STAMTYPE_COUNTER, "RZ/MemBulkWrite", STAMUNIT_OCCURENCES, "Number of memory bulk write translation requests in RZ.");
+
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmd, STAMTYPE_COUNTER, "R3/Commands", STAMUNIT_OCCURENCES, "Number of commands processed (total).");
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdCompWait, STAMTYPE_COUNTER, "R3/Commands/CompWait", STAMUNIT_OCCURENCES, "Number of Completion Wait commands processed.");
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdInvDte, STAMTYPE_COUNTER, "R3/Commands/InvDte", STAMUNIT_OCCURENCES, "Number of Invalidate DTE commands processed.");
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdInvIommuPages, STAMTYPE_COUNTER, "R3/Commands/InvIommuPages", STAMUNIT_OCCURENCES, "Number of Invalidate IOMMU Pages commands processed.");
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdInvIotlbPages, STAMTYPE_COUNTER, "R3/Commands/InvIotlbPages", STAMUNIT_OCCURENCES, "Number of Invalidate IOTLB Pages commands processed.");
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdInvIntrTable, STAMTYPE_COUNTER, "R3/Commands/InvIntrTable", STAMUNIT_OCCURENCES, "Number of Invalidate Interrupt Table commands processed.");
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdPrefIommuPages, STAMTYPE_COUNTER, "R3/Commands/PrefIommuPages", STAMUNIT_OCCURENCES, "Number of Prefetch IOMMU Pages commands processed.");
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdCompletePprReq, STAMTYPE_COUNTER, "R3/Commands/CompletePprReq", STAMUNIT_OCCURENCES, "Number of Complete PPR Requests commands processed.");
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdInvIommuAll, STAMTYPE_COUNTER, "R3/Commands/InvIommuAll", STAMUNIT_OCCURENCES, "Number of Invalidate IOMMU All commands processed.");
+
+
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIotlbeCached, STAMTYPE_COUNTER, "IOTLB/Cached", STAMUNIT_OCCURENCES, "Number of IOTLB entries in the cache.");
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIotlbeLazyEvictReuse, STAMTYPE_COUNTER, "IOTLB/LazyEvictReuse", STAMUNIT_OCCURENCES, "Number of IOTLB entries reused after lazy eviction.");
+
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatProfDteLookup, STAMTYPE_PROFILE, "Profile/DteLookup", STAMUNIT_TICKS_PER_CALL, "Profiling DTE lookup.");
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatProfIotlbeLookup, STAMTYPE_PROFILE, "Profile/IotlbeLookup", STAMUNIT_TICKS_PER_CALL, "Profiling IOTLBE lookup.");
+
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatProfIrteLookup, STAMTYPE_PROFILE, "Profile/IrteLookup", STAMUNIT_TICKS_PER_CALL, "Profiling IRTE lookup.");
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatProfIrteCacheLookup, STAMTYPE_PROFILE, "Profile/IrteCacheLookup", STAMUNIT_TICKS_PER_CALL, "Profiling IRTE cache lookup.");
+
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheHit, STAMTYPE_COUNTER, "MemAccess/CacheHit", STAMUNIT_OCCURENCES, "Number of cache hits.");
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheMiss, STAMTYPE_COUNTER, "MemAccess/CacheMiss", STAMUNIT_OCCURENCES, "Number of cache misses.");
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheHitFull, STAMTYPE_COUNTER, "MemAccess/CacheHitFull", STAMUNIT_OCCURENCES, "Number of accesses that was entirely in the cache.");
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheNonContig, STAMTYPE_COUNTER, "MemAccess/CacheNonContig", STAMUNIT_OCCURENCES, "Number of cache accesses that resulted in non-contiguous translated regions.");
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCachePermDenied, STAMTYPE_COUNTER, "MemAccess/CacheAddrDenied", STAMUNIT_OCCURENCES, "Number of cache accesses that resulted in denied permissions.");
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessDteNonContig, STAMTYPE_COUNTER, "MemAccess/DteNonContig", STAMUNIT_OCCURENCES, "Number of DTE accesses that resulted in non-contiguous translated regions.");
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessDtePermDenied, STAMTYPE_COUNTER, "MemAccess/DtePermDenied", STAMUNIT_OCCURENCES, "Number of DTE accesses that resulted in denied permissions.");
+
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIntrCacheHit, STAMTYPE_COUNTER, "Interrupt/CacheHit", STAMUNIT_OCCURENCES, "Number of cache hits.");
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIntrCacheMiss, STAMTYPE_COUNTER, "Interrupt/CacheMiss", STAMUNIT_OCCURENCES, "Number of cache misses.");
+
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatNonStdPageSize, STAMTYPE_COUNTER, "MemAccess/NonStdPageSize", STAMUNIT_OCCURENCES, "Number of non-standard page size translations.");
+ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIopfs, STAMTYPE_COUNTER, "MemAccess/IOPFs", STAMUNIT_OCCURENCES, "Number of I/O page faults.");
+# endif
+
+ /*
+ * Create the command thread and its event semaphore.
+ */
+ char szDevIommu[64];
+ RT_ZERO(szDevIommu);
+ RTStrPrintf(szDevIommu, sizeof(szDevIommu), "IOMMU-%u", iInstance);
+ rc = PDMDevHlpThreadCreate(pDevIns, &pThisR3->pCmdThread, pThis, iommuAmdR3CmdThread, iommuAmdR3CmdThreadWakeUp,
+ 0 /* cbStack */, RTTHREADTYPE_IO, szDevIommu);
+ AssertLogRelRCReturn(rc, rc);
+
+ rc = PDMDevHlpSUPSemEventCreate(pDevIns, &pThis->hEvtCmdThread);
+ AssertLogRelRCReturn(rc, rc);
+
+#ifdef IOMMU_WITH_DTE_CACHE
+ /*
+ * Initialize the critsect of the cache.
+ */
+ rc = PDMDevHlpCritSectInit(pDevIns, &pThis->CritSectCache, RT_SRC_POS, "IOMMUCache-#%u", pDevIns->iInstance);
+ AssertLogRelRCReturn(rc, rc);
+
+ /* Several places in this code relies on this basic assumption - assert it! */
+ AssertCompile(RT_ELEMENTS(pThis->aDeviceIds) == RT_ELEMENTS(pThis->aDteCache));
+#endif
+
+#ifdef IOMMU_WITH_IOTLBE_CACHE
+ /*
+ * Allocate IOTLB entries.
+ * This is allocated upfront since we expect a relatively small number of entries,
+ * is more cache-line efficient and easier to track least recently used entries for
+ * eviction when the cache is full. This also avoids unpredictable behavior during
+ * the lifetime of the VM if the hyperheap gets full.
+ */
+ size_t const cbIotlbes = sizeof(IOTLBE) * IOMMU_IOTLBE_MAX;
+ pThisR3->paIotlbes = (PIOTLBE)PDMDevHlpMMHeapAllocZ(pDevIns, cbIotlbes);
+ if (!pThisR3->paIotlbes)
+ return PDMDevHlpVMSetError(pDevIns, VERR_NO_MEMORY, RT_SRC_POS,
+ N_("Failed to allocate %zu bytes from the hyperheap for the IOTLB cache."), cbIotlbes);
+ RTListInit(&pThisR3->LstLruIotlbe);
+ LogRel(("%s: Allocated %zu bytes from the hyperheap for the IOTLB cache\n", IOMMU_LOG_PFX, cbIotlbes));
+#endif
+
+ /*
+ * Initialize read-only registers.
+ * NOTE! Fields here must match their corresponding field in the ACPI tables.
+ */
+ /* Don't remove the commented lines below as it lets us see all features at a glance. */
+ pThis->ExtFeat.u64 = 0;
+ //pThis->ExtFeat.n.u1PrefetchSup = 0;
+ //pThis->ExtFeat.n.u1PprSup = 0;
+ //pThis->ExtFeat.n.u1X2ApicSup = 0;
+ //pThis->ExtFeat.n.u1NoExecuteSup = 0;
+ //pThis->ExtFeat.n.u1GstTranslateSup = 0;
+ pThis->ExtFeat.n.u1InvAllSup = 1;
+ //pThis->ExtFeat.n.u1GstVirtApicSup = 0;
+ pThis->ExtFeat.n.u1HwErrorSup = 1;
+ //pThis->ExtFeat.n.u1PerfCounterSup = 0;
+ AssertCompile((IOMMU_MAX_HOST_PT_LEVEL & 0x3) < 3);
+ pThis->ExtFeat.n.u2HostAddrTranslateSize = (IOMMU_MAX_HOST_PT_LEVEL & 0x3);
+ //pThis->ExtFeat.n.u2GstAddrTranslateSize = 0; /* Requires GstTranslateSup */
+ //pThis->ExtFeat.n.u2GstCr3RootTblLevel = 0; /* Requires GstTranslateSup */
+ //pThis->ExtFeat.n.u2SmiFilterSup = 0;
+ //pThis->ExtFeat.n.u3SmiFilterCount = 0;
+ //pThis->ExtFeat.n.u3GstVirtApicModeSup = 0; /* Requires GstVirtApicSup */
+ //pThis->ExtFeat.n.u2DualPprLogSup = 0;
+ //pThis->ExtFeat.n.u2DualEvtLogSup = 0;
+ //pThis->ExtFeat.n.u5MaxPasidSup = 0; /* Requires GstTranslateSup */
+ //pThis->ExtFeat.n.u1UserSupervisorSup = 0;
+ AssertCompile(IOMMU_MAX_DEV_TAB_SEGMENTS <= 3);
+ pThis->ExtFeat.n.u2DevTabSegSup = IOMMU_MAX_DEV_TAB_SEGMENTS;
+ //pThis->ExtFeat.n.u1PprLogOverflowWarn = 0;
+ //pThis->ExtFeat.n.u1PprAutoRespSup = 0;
+ //pThis->ExtFeat.n.u2MarcSup = 0;
+ //pThis->ExtFeat.n.u1BlockStopMarkSup = 0;
+ //pThis->ExtFeat.n.u1PerfOptSup = 0;
+ pThis->ExtFeat.n.u1MsiCapMmioSup = 1;
+ //pThis->ExtFeat.n.u1GstIoSup = 0;
+ //pThis->ExtFeat.n.u1HostAccessSup = 0;
+ //pThis->ExtFeat.n.u1EnhancedPprSup = 0;
+ //pThis->ExtFeat.n.u1AttrForwardSup = 0;
+ //pThis->ExtFeat.n.u1HostDirtySup = 0;
+ //pThis->ExtFeat.n.u1InvIoTlbTypeSup = 0;
+ //pThis->ExtFeat.n.u1GstUpdateDisSup = 0;
+ //pThis->ExtFeat.n.u1ForcePhysDstSup = 0;
+
+ pThis->DevSpecificFeat.u64 = 0;
+ pThis->DevSpecificFeat.n.u4RevMajor = IOMMU_DEVSPEC_FEAT_MAJOR_VERSION;
+ pThis->DevSpecificFeat.n.u4RevMinor = IOMMU_DEVSPEC_FEAT_MINOR_VERSION;
+
+ pThis->DevSpecificCtrl.u64 = 0;
+ pThis->DevSpecificCtrl.n.u4RevMajor = IOMMU_DEVSPEC_CTRL_MAJOR_VERSION;
+ pThis->DevSpecificCtrl.n.u4RevMinor = IOMMU_DEVSPEC_CTRL_MINOR_VERSION;
+
+ pThis->DevSpecificStatus.u64 = 0;
+ pThis->DevSpecificStatus.n.u4RevMajor = IOMMU_DEVSPEC_STATUS_MAJOR_VERSION;
+ pThis->DevSpecificStatus.n.u4RevMinor = IOMMU_DEVSPEC_STATUS_MINOR_VERSION;
+
+ pThis->MiscInfo.u64 = RT_MAKE_U64(uMiscInfoReg0, uMiscInfoReg1);
+
+ pThis->RsvdReg = 0;
+
+ /*
+ * Initialize parts of the IOMMU state as it would during reset.
+ * Also initializes non-zero initial values like IRTE cache keys.
+ * Must be called -after- initializing PCI config. space registers.
+ */
+ iommuAmdR3Reset(pDevIns);
+
+ LogRel(("%s: DSFX=%u.%u DSCX=%u.%u DSSX=%u.%u ExtFeat=%#RX64\n", IOMMU_LOG_PFX,
+ pThis->DevSpecificFeat.n.u4RevMajor, pThis->DevSpecificFeat.n.u4RevMinor,
+ pThis->DevSpecificCtrl.n.u4RevMajor, pThis->DevSpecificCtrl.n.u4RevMinor,
+ pThis->DevSpecificStatus.n.u4RevMajor, pThis->DevSpecificStatus.n.u4RevMinor,
+ pThis->ExtFeat.u64));
+ return VINF_SUCCESS;
+}
+
+#else
+
+/**
+ * @callback_method_impl{PDMDEVREGR0,pfnConstruct}
+ */
+static DECLCALLBACK(int) iommuAmdRZConstruct(PPDMDEVINS pDevIns)
+{
+ PDMDEV_CHECK_VERSIONS_RETURN(pDevIns);
+ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+ PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
+ pThisCC->CTX_SUFF(pDevIns) = pDevIns;
+
+ /* We will use PDM's critical section (via helpers) for the IOMMU device. */
+ int rc = PDMDevHlpSetDeviceCritSect(pDevIns, PDMDevHlpCritSectGetNop(pDevIns));
+ AssertRCReturn(rc, rc);
+
+ /* Set up the MMIO RZ handlers. */
+ rc = PDMDevHlpMmioSetUpContext(pDevIns, pThis->hMmio, iommuAmdMmioWrite, iommuAmdMmioRead, NULL /* pvUser */);
+ AssertRCReturn(rc, rc);
+
+ /* Set up the IOMMU RZ callbacks. */
+ PDMIOMMUREGCC IommuReg;
+ RT_ZERO(IommuReg);
+ IommuReg.u32Version = PDM_IOMMUREGCC_VERSION;
+ IommuReg.idxIommu = pThis->idxIommu;
+ IommuReg.pfnMemAccess = iommuAmdMemAccess;
+ IommuReg.pfnMemBulkAccess = iommuAmdMemBulkAccess;
+ IommuReg.pfnMsiRemap = iommuAmdMsiRemap;
+ IommuReg.u32TheEnd = PDM_IOMMUREGCC_VERSION;
+ rc = PDMDevHlpIommuSetUpContext(pDevIns, &IommuReg, &pThisCC->CTX_SUFF(pIommuHlp));
+ AssertRCReturn(rc, rc);
+ AssertPtrReturn(pThisCC->CTX_SUFF(pIommuHlp), VERR_IOMMU_IPE_1);
+ AssertReturn(pThisCC->CTX_SUFF(pIommuHlp)->u32Version == CTX_MID(PDM_IOMMUHLP,_VERSION), VERR_VERSION_MISMATCH);
+ AssertReturn(pThisCC->CTX_SUFF(pIommuHlp)->u32TheEnd == CTX_MID(PDM_IOMMUHLP,_VERSION), VERR_VERSION_MISMATCH);
+ AssertPtr(pThisCC->CTX_SUFF(pIommuHlp)->pfnLock);
+ AssertPtr(pThisCC->CTX_SUFF(pIommuHlp)->pfnUnlock);
+ AssertPtr(pThisCC->CTX_SUFF(pIommuHlp)->pfnLockIsOwner);
+ AssertPtr(pThisCC->CTX_SUFF(pIommuHlp)->pfnSendMsi);
+ return VINF_SUCCESS;
+}
+#endif
+
+
+/**
+ * The device registration structure.
+ */
+const PDMDEVREG g_DeviceIommuAmd =
+{
+ /* .u32Version = */ PDM_DEVREG_VERSION,
+ /* .uReserved0 = */ 0,
+ /* .szName = */ "iommu-amd",
+ /* .fFlags = */ PDM_DEVREG_FLAGS_DEFAULT_BITS | PDM_DEVREG_FLAGS_RZ | PDM_DEVREG_FLAGS_NEW_STYLE,
+ /* .fClass = */ PDM_DEVREG_CLASS_PCI_BUILTIN,
+ /* .cMaxInstances = */ 1,
+ /* .uSharedVersion = */ 42,
+ /* .cbInstanceShared = */ sizeof(IOMMU),
+ /* .cbInstanceCC = */ sizeof(IOMMUCC),
+ /* .cbInstanceRC = */ sizeof(IOMMURC),
+ /* .cMaxPciDevices = */ 1,
+ /* .cMaxMsixVectors = */ 0,
+ /* .pszDescription = */ "IOMMU (AMD)",
+#if defined(IN_RING3)
+ /* .pszRCMod = */ "VBoxDDRC.rc",
+ /* .pszR0Mod = */ "VBoxDDR0.r0",
+ /* .pfnConstruct = */ iommuAmdR3Construct,
+ /* .pfnDestruct = */ iommuAmdR3Destruct,
+ /* .pfnRelocate = */ NULL,
+ /* .pfnMemSetup = */ NULL,
+ /* .pfnPowerOn = */ NULL,
+ /* .pfnReset = */ iommuAmdR3Reset,
+ /* .pfnSuspend = */ NULL,
+ /* .pfnResume = */ NULL,
+ /* .pfnAttach = */ NULL,
+ /* .pfnDetach = */ NULL,
+ /* .pfnQueryInterface = */ NULL,
+ /* .pfnInitComplete = */ NULL,
+ /* .pfnPowerOff = */ NULL,
+ /* .pfnSoftReset = */ NULL,
+ /* .pfnReserved0 = */ NULL,
+ /* .pfnReserved1 = */ NULL,
+ /* .pfnReserved2 = */ NULL,
+ /* .pfnReserved3 = */ NULL,
+ /* .pfnReserved4 = */ NULL,
+ /* .pfnReserved5 = */ NULL,
+ /* .pfnReserved6 = */ NULL,
+ /* .pfnReserved7 = */ NULL,
+#elif defined(IN_RING0)
+ /* .pfnEarlyConstruct = */ NULL,
+ /* .pfnConstruct = */ iommuAmdRZConstruct,
+ /* .pfnDestruct = */ NULL,
+ /* .pfnFinalDestruct = */ NULL,
+ /* .pfnRequest = */ NULL,
+ /* .pfnReserved0 = */ NULL,
+ /* .pfnReserved1 = */ NULL,
+ /* .pfnReserved2 = */ NULL,
+ /* .pfnReserved3 = */ NULL,
+ /* .pfnReserved4 = */ NULL,
+ /* .pfnReserved5 = */ NULL,
+ /* .pfnReserved6 = */ NULL,
+ /* .pfnReserved7 = */ NULL,
+#elif defined(IN_RC)
+ /* .pfnConstruct = */ iommuAmdRZConstruct,
+ /* .pfnReserved0 = */ NULL,
+ /* .pfnReserved1 = */ NULL,
+ /* .pfnReserved2 = */ NULL,
+ /* .pfnReserved3 = */ NULL,
+ /* .pfnReserved4 = */ NULL,
+ /* .pfnReserved5 = */ NULL,
+ /* .pfnReserved6 = */ NULL,
+ /* .pfnReserved7 = */ NULL,
+#else
+# error "Not in IN_RING3, IN_RING0 or IN_RC!"
+#endif
+ /* .u32VersionEnd = */ PDM_DEVREG_VERSION
+};
+
+#endif /* !VBOX_DEVICE_STRUCT_TESTCASE */
+